diff --git a/.travis.yml b/.travis.yml index c74c9cc12..6bcff0d00 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,20 @@ python: - "3.4" - "3.5" sudo: false -script: nosetests test --verbose +env: + - TESTS=complete + - TESTS=regression_1-of-7 + - TESTS=regression_2-of-7 + - TESTS=regression_3-of-7 + - TESTS=regression_4-of-7 + - TESTS=regression_5-of-7 + - TESTS=regression_6-of-7 + - TESTS=regression_7-of-7 +matrix: + allow_failures: + - env: TESTS=complete + +script: devscripts/travis.sh notifications: email: - filippo.valsorda@gmail.com diff --git a/devscripts/regdetect.py b/devscripts/regdetect.py new file mode 100755 index 000000000..bbbc05036 --- /dev/null +++ b/devscripts/regdetect.py @@ -0,0 +1,238 @@ +#!/usr/bin/env python3 + +from __future__ import print_function, unicode_literals + +import subprocess +import sys +import os +import time + +NOSECOMMAND="nosetests" + + +def process(test): + # Parse nose output to get test statuses + # Format: test_ () ... [multi-line-error-message?] {ok,ERROR,FAIL} + if not test or len(test) == 0: + print("Received empty line") + return (None, None) + s = test.split() + if len(s) < 4: + print("Bad line passed, not enough elements:", test) + # We try to convert this nose output: + # test_opengraph (test.test_InfoExtractor.TestInfoExtractor) ... ok + # into this test handle: + # test.test_InfoExtractor:TestInfoExtractor.test_opengraph + testpath = s[1][1:-1].split('.') + if len(testpath) != 3: + print("Bad testpath passed, not enough elements:", testpath) + return (None, None) + fulltestname = "%s.%s:%s.%s"%(testpath[0], testpath[1], testpath[2], s[0]) + + status = s[-1] + if status not in ("ok", "FAIL", "ERROR"): + print("Unknown test status", status) + return (None, None) + + # we cannot assume that a test failing with a warning is ok (network error) + if status == "ok" and test.find("WARNING") != -1: + status = "WARNING" + + return (fulltestname, status) + +def fill_results(res, results): + if res[0] != None and res[1] != None: + results[res[0]] = res[1] + +def process_stream(f, verbose): + results = {} + buf = None + for line in f: + if verbose: print(line, end='') + if line.startswith("===========") or line.startswith("--------------"): + #this is the end + break + if line.startswith("test_"): #new test, process previous test + if buf != None: + fill_results(process(buf), results) # for every other element this signals the beginning of a new one + buf = line + else: + if buf and len(buf) > 0: # some tests have multi-line outputs + buf += line + if verbose: # print the end of the file + for line in f: # it might contain interesting info, like tracebacks + print(line, end='') + fill_results(process(buf), results) # process last line + return results + + +def launch_nose(args=[], verbose=True): + nose = subprocess.Popen([NOSECOMMAND, "-v"] + args, stderr=subprocess.PIPE, universal_newlines=True) + results = process_stream(nose.stderr, verbose) + nose.stderr.close() + nose.wait() + return results + +def filter_bad(results): + # Filter failing/error tests + redo = {} + for k in results.keys(): + if results[k] != "ok": + redo[k] = results[k] + return list(redo.keys()) + +def test_stability(refcommit, testcommit, failed_tests): + git_checkout(refcommit) + print("Testing at commit " + refcommit) + refresults = launch_nose(failed_tests) + stable_tests = [] + for k in refresults: + if refresults[k] != "ok": + print("Test %s is unreliable !"%(k)) + else: + stable_tests.append(k) + git_checkout(testcommit) + print("Back to commit " + testcommit) + return stable_tests + +def iterate_tests(refcommit, testcommit, testlist=[], iterations=9, cooldown=60): + failed_tests=testlist # empty means run all tests + # run tests passed in arguments (or all) and get list of failed tests + # keep running those tests a few times to make sure the failure wasn't + # temporary (bad connection, site error, ...) + for i in range(iterations): + if i > 3 and len(failed_tests) < 5: + # We have reduced the number of tests, we now test them for stability + print("We only have %d tests at iteration %d, testing for reliablity"%(len(failed_tests), i)) + failed_tests = test_stability(refcommit, testcommit, failed_tests) + if len(failed_tests) == 0: # no more stable tests + return {} + time.sleep(cooldown) + results = launch_nose(failed_tests) + failed_tests = filter_bad(results) + print("Run %d done. Has %d out of %d non-ok tests"%(i, len(failed_tests), len(results.keys()))) + if len(failed_tests) == 0: # no failure. Awesome ! + break + time.sleep(cooldown) + return results # this will return a partial result list. It does not matter since ok-tests aren't that interesting + +def git_checkout(arg): + ret = subprocess.call(["git", "checkout", "--quiet", arg]) + if ret != 0: + raise RuntimeError("git checkout failed") + +def regressive_tests(refresults, testresults): + regressive = [] + # Return list of tests that are ok in refresults but not in testresults + for k in refresults: + assert k in testresults, "New unknown test case" + if refresults[k] == "ok" and refresults[k] != testresults[k]: #let's assume FAIL == ERROR + regressive.append(k) + + return regressive + +def list_nose_tests(): + tests = sorted(launch_nose(["--collect-only"], verbose=False).keys()) + return tests + +def sub_tests(): + # See if we need to slice the work and do only one part + slice_arg = os.getenv("TESTS") + if slice_arg == None: + return None + + test_slice = slice_arg.split('_')[1] + slice_bounds = test_slice.split('-of-') + current_slice = int(slice_bounds[0]) + nr_slices = int(slice_bounds[1]) + all_tests = list_nose_tests() + length = len(all_tests) + sub_test_list = all_tests[(current_slice-1)*length // nr_slices : \ + current_slice*length // nr_slices] + + print("Running slice %d of %d; it has %d out of %d tests"%(current_slice, + nr_slices, len(sub_test_list), length)) + + return sub_test_list + +def bisect(good, bad, test): + def git_bisect(args): + ret = subprocess.call(["git", "bisect"] + args) + if ret != 0: + raise RuntimeError("git bisect failed with " + " ".join(args)) + print("Bisecting %s between %s and %s"%(test, good, bad)) + git_bisect(["start", bad, good]) + git_bisect(["run", NOSECOMMAND, "--verbose", "--detailed-errors", test]) + git_bisect(["reset"]) + print("Bisect done") + +def main(): + if len(sys.argv) < 3: + commit_range = os.getenv("TRAVIS_COMMIT_RANGE") + if commit_range != None: + commits = commit_range.split("...") + refcommit, testcommit = commits[0], commits[1] + else: + testcommit="master" + refcommit="master^" + else: + testcommit=sys.argv[1] + refcommit=sys.argv[2] + + print("Testing if commit-ish %s introduced regressions compared to %s"%(testcommit, refcommit)) + + git_checkout(testcommit) + + sub_test_list = sub_tests() + + if sub_test_list != None: + args = sub_test_list + else: + args = sys.argv[3:] # use remaining args to limit test selection (if there are any) + + results = launch_nose(args) + + failed_tests = filter_bad(results) + if len(failed_tests) == 0: + print("No failure, exiting") + sys.exit(0) + + print("%d tests are failing at %s, now testing if they are regression from %s" % + (len(failed_tests), testcommit, refcommit)) + + git_checkout(refcommit) + + results_ref = launch_nose(failed_tests) + print("Second run of %d tests done."%len(failed_tests)) + + regressive = regressive_tests(results_ref, results) + + git_checkout(testcommit) + + if len(regressive) == 0: + print("There was no detected regression") + sys.exit(0) + + + print("%d test(s) have a potential regression. Retrying them a few times to be sure"%len(regressive)) + + results_retry = iterate_tests(refcommit, testcommit, regressive) + + failed_retry = filter_bad(results_retry) + if len(failed_retry) == 0: + print("All false alarms, exiting") + sys.exit(0) + + print("We have %d regressions"%len(failed_retry)) + for k in failed_retry: + print("Test %s was %s in %s, is now %s at %s"%(k, results_ref[k], + refcommit, results_retry[k], testcommit)) + bisect(refcommit, testcommit, k) + + git_checkout(testcommit) + + sys.exit(-1) + +if __name__ == "__main__": + main() + diff --git a/devscripts/travis.sh b/devscripts/travis.sh new file mode 100755 index 000000000..3c7f1c495 --- /dev/null +++ b/devscripts/travis.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +if [ "$TESTS" = "complete" ]; then + exec nosetests test --verbose +else + exec ./devscripts/regdetect.py +fi