From bf920ea2002a64f23ded34bff1cd81a4bb7a4637 Mon Sep 17 00:00:00 2001 From: Matthew Treinish Date: Mon, 8 Jul 2013 16:18:37 -0400 Subject: Add group regex scheduling hint to the test partitioner This commit adds a new optional parameter, group_regex, to TestListingFixture. The parameter group_regex is used to group the test_ids. By passing a regex string with the parameter the test partitioner will match the regex to the test ids and then group by the result. In the case a test id does not match the regex it will be put in a default group. These groups are then used for scheduling the partitions. Each test groups is scheduled together so that all tests in a group are run together on the same partition. For the purposes of scheduling the groups the sum of all test ids in a group is used to schedule the whole group in a partition. --- COPYING | 1 + testrepository/testcommand.py | 91 +++++++++++++++++++++++++++----- testrepository/tests/test_testcommand.py | 31 +++++++++++ 3 files changed, 110 insertions(+), 13 deletions(-) diff --git a/COPYING b/COPYING index daebc89..632a79e 100644 --- a/COPYING +++ b/COPYING @@ -29,6 +29,7 @@ for distributions such as Debian that wish to list all the copyright holders in their metadata: * Robert Collins , 2009 * Hewlett-Packard Development Company, L.P., 2013 +* IBM Corp., 2013 Code that has been incorporated into Testrepository from other projects will diff --git a/testrepository/testcommand.py b/testrepository/testcommand.py index ef285ab..65a8f61 100644 --- a/testrepository/testcommand.py +++ b/testrepository/testcommand.py @@ -134,7 +134,7 @@ class TestListingFixture(Fixture): def __init__(self, test_ids, cmd_template, listopt, idoption, ui, repository, parallel=True, listpath=None, parser=None, - test_filters=None, instance_source=None): + test_filters=None, instance_source=None, group_regex=None): """Create a TestListingFixture. :param test_ids: The test_ids to use. May be None indicating that @@ -167,6 +167,8 @@ class TestListingFixture(Fixture): :param instance_source: A source of test run instances. Must support obtain_instance(max_concurrency) -> id and release_instance(id) calls. + :param group_regex: An optional regular expression string which is used + to provide a grouping hint to the test partitioner """ self.test_ids = test_ids self.template = cmd_template @@ -179,6 +181,7 @@ class TestListingFixture(Fixture): self._parser = parser self.test_filters = test_filters self._instance_source = instance_source + self.group_regex = group_regex def setUp(self): super(TestListingFixture, self).setUp() @@ -327,6 +330,7 @@ class TestListingFixture(Fixture): :return: A list of spawned processes. """ result = [] + group_tags = None test_ids = self.test_ids if self.concurrency == 1 and (test_ids is None or test_ids): # Have to customise cmd here, as instances are allocated @@ -343,8 +347,11 @@ class TestListingFixture(Fixture): return [CallWhenProcFinishes(run_proc, lambda:self._instance_source.release_instance(instance))] else: - return [run_proc] - test_id_groups = self.partition_tests(test_ids, self.concurrency) + return [run_proc] + if self.group_regex: + group_tags = self.filter_test_groups(test_ids, self.group_regex) + test_id_groups = self.partition_tests(test_ids, self.concurrency, + group_tags) for test_ids in test_id_groups: if not test_ids: # No tests in this partition @@ -356,7 +363,28 @@ class TestListingFixture(Fixture): result.extend(fixture.run_tests()) return result - def partition_tests(self, test_ids, concurrency): + def filter_test_groups(self, test_ids, group_regex): + """Add a group tag based on the regex provided + + :return A dict with the group tags as keys and a list of + test ids that are a member of the group tag as the value + """ + + group_dict = {} + expr = re.compile(group_regex) + for test_id in test_ids: + match = expr.match(test_id) + if match: + group_id = match.group(0) + else: + group_id = None + if group_dict.get(group_id): + group_dict[group_id].append(test_id) + else: + group_dict[group_id] = [test_id] + return group_dict + + def partition_tests(self, test_ids, concurrency, group_tags=None): """Parition test_ids by concurrency. Test durations from the repository are used to get partitions which @@ -367,26 +395,63 @@ class TestListingFixture(Fixture): :return: A list where each element is a distinct subset of test_ids, and the union of all the elements is equal to set(test_ids). """ + partitions = [list() for i in range(concurrency)] timed_partitions = [[0.0, partition] for partition in partitions] time_data = self.repository.get_test_times(test_ids) timed = time_data['known'] unknown = time_data['unknown'] + # Schedule test groups by the sum of execute time for each test that is + # a member of the group + if group_tags: + group_timed = {} + group_unknown = [] + for group_tag in group_tags.keys(): + time = 0.0 + for test_id in group_tags[group_tag]: + # If a test_id is not timed remove the whole group from the + # timed groups dict and + if test_id in unknown: + if group_tag in group_timed.keys(): + group_timed.pop(group_tag, None) + group_unknown.append(group_tag) + break + time = time + timed[test_id] + group_timed[group_tag] = (group_tags[group_tag], time) + + queue = sorted(group_timed.items(), + key=operator.itemgetter(1), + reverse=True) + + # Sort the tests by runtime + for group_tag, test_tuple in queue: + test_ids = test_tuple[0] + duration = test_tuple[1] + timed_partitions[0][0] = timed_partitions[0][0] + duration + # Handle groups larger than a single entry + timed_partitions[0][1].extend(test_ids) + timed_partitions.sort(key=lambda item: (item[0], len(item[1]))) + for partition, group_id in zip(itertools.cycle(partitions), + group_unknown): + partition = partition + group_tags[group_id] + return partitions + # Scheduling is NP complete in general, so we avoid aiming for # perfection. A quick approximation that is sufficient for our general # needs: # sort the tests by time # allocate to partitions by putting each test in to the partition with # the current (lowest time, shortest length) - queue = sorted(timed.items(), key=operator.itemgetter(1), reverse=True) - for test_id, duration in queue: - timed_partitions[0][0] = timed_partitions[0][0] + duration - timed_partitions[0][1].append(test_id) - timed_partitions.sort(key=lambda item:(item[0], len(item[1]))) - # Assign tests with unknown times in round robin fashion to the partitions. - for partition, test_id in zip(itertools.cycle(partitions), unknown): - partition.append(test_id) - return partitions + else: + queue = sorted(timed.items(), key=operator.itemgetter(1), reverse=True) + for test_id, duration in queue: + timed_partitions[0][0] = timed_partitions[0][0] + duration + timed_partitions[0][1].append(test_id) + timed_partitions.sort(key=lambda item:(item[0], len(item[1]))) + # Assign tests with unknown times in round robin fashion to the partitions. + for partition, test_id in zip(itertools.cycle(partitions), unknown): + partition.append(test_id) + return partitions def callout_concurrency(self): """Callout for user defined concurrency.""" diff --git a/testrepository/tests/test_testcommand.py b/testrepository/tests/test_testcommand.py index 43be8e6..bbe4ac8 100644 --- a/testrepository/tests/test_testcommand.py +++ b/testrepository/tests/test_testcommand.py @@ -362,6 +362,37 @@ class TestTestCommand(ResourcedTestCase): self.assertEqual(1, len(partitions[0])) self.assertEqual(1, len(partitions[1])) + def test_partition_tests_with_group_regex(self): + repo = memory.RepositoryFactory().initialise('memory:') + result = repo.get_inserter() + result.startTestRun() + run_timed("TestCase1.slow", 3, result) + run_timed("TestCase2.fast1", 1, result) + run_timed("TestCase2.fast2", 1, result) + result.stopTestRun() + ui, command = self.get_test_ui_and_cmd(repository=repo) + self.set_config( + '[DEFAULT]\ntest_command=foo $IDLIST $LISTOPT\n' + 'test_list_option=--list\n') + fixture = self.useFixture(command.get_run_command()) + test_ids = frozenset(['TestCase1.slow', 'TestCase1.fast', + 'TestCase1.fast2', 'TestCase2.fast1', + 'TestCase3.test1', 'TestCase3.test2', + 'TestCase2.fast2', 'TestCase4.test', + 'testdir.testfile.TestCase5.test']) + regex = 'TestCase[0-5]' + group_tags = fixture.filter_test_groups(test_ids, regex) + partitions = fixture.partition_tests(test_ids, 2, group_tags) + self.assertTrue('TestCase1.slow' in partitions[1]) + self.assertTrue('TestCase1.fast' in partitions[1]) + self.assertTrue('TestCase1.fast2' in partitions[1]) + self.assertTrue('TestCase3.test2' in partitions[1]) + self.assertTrue('TestCase3.test1' in partitions[1]) + self.assertTrue('TestCase4.test' in partitions[1]) + self.assertTrue('testdir.testfile.TestCase5.test' in partitions[0]) + self.assertTrue('TestCase2.fast1' in partitions[0]) + self.assertTrue('TestCase2.fast2' in partitions[0]) + def test_run_tests_with_instances(self): # when there are instances and no instance_execute, run_tests acts as # normal. -- cgit v1.2.1