From bdedd703c59a40a074fc0929d4abbb8b53efd813 Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Mon, 8 Jul 2024 12:09:33 +0000 Subject: [PATCH] teuthology/schedule: Add "descr" option A convenience option. When a large batch has many failed tests, "descr" makes it easy to schedule just selected ones. It is as simple as copy-paste description of failed test. teuthology-suite -e akupczyk@redhat.com -vv -c aclamk-testing-ceres-2024-07-01-1011 -p 50 \ --descr "rados/singleton-bluestore/{all/cephtool mon_election/connectivity msgr-failures/none \ msgr/async-v2only objectstore/bluestore-comp-lz4 rados supported-random-distro$/{centos_latest}}" Many tests may be rerun in this fashion; "descr" accepts multiple comma separated test descriptions. Signed-off-by: Adam Kupczyk --- scripts/suite.py | 4 ++ teuthology/suite/run.py | 112 ++++++++++++++++++++++++++++++++-------- 2 files changed, 94 insertions(+), 22 deletions(-) diff --git a/scripts/suite.py b/scripts/suite.py index 77561b7e0..af134f77b 100644 --- a/scripts/suite.py +++ b/scripts/suite.py @@ -9,6 +9,7 @@ usage: teuthology-suite --help teuthology-suite [-v | -vv ] --suite [options] [...] teuthology-suite [-v | -vv ] --rerun [options] [...] + teuthology-suite [-v | -vv ] --descr [options] [...] Run a suite of ceph integration tests. A suite is a directory containing facets. A facet is a directory containing config snippets. Running a suite @@ -126,6 +127,9 @@ 2/ ... -1/ will schedule all jobs in the suite (many more than once). If specified, this value can be found in results.log. + --descr Use to rerun tests. Reschedule test based on their + descriptions. The descr is comma separated list of test + descriptions. -p , --priority Job priority (lower is sooner) [default: 1000] diff --git a/teuthology/suite/run.py b/teuthology/suite/run.py index a37887811..4073bd49c 100644 --- a/teuthology/suite/run.py +++ b/teuthology/suite/run.py @@ -27,7 +27,6 @@ log = logging.getLogger(__name__) - class Run(object): WAIT_MAX_JOB_TIME = 30 * 60 WAIT_PAUSE = 5 * 60 @@ -70,7 +69,7 @@ def make_run_name(self): [ self.user, str(self.timestamp), - self.args.suite, + self.args.suite or "rerun", self.args.ceph_branch, self.args.kernel_branch or '-', self.args.flavor, worker @@ -357,8 +356,14 @@ def build_base_config(self): job_config.timestamp = self.timestamp job_config.priority = self.args.priority job_config.seed = self.args.seed + if self.args.subset and self.args.descr: + util.schedule_fail("--subset is not compatible with --descr") + if self.args.suite and self.args.descr: + util.schedule_fail("--suite is not compatible with --descr") if self.args.subset: job_config.subset = '/'.join(str(i) for i in self.args.subset) + if self.args.descr: + job_config.suite = "rerun" if self.args.email: job_config.email = self.args.email if self.args.owner: @@ -564,39 +569,102 @@ def check_num_jobs(self, jobs_to_schedule): if threshold and jobs_to_schedule > threshold: util.schedule_fail(msg, dry_run=self.args.dry_run) - def schedule_suite(self): + def descr_to_yamls(self, descriptions, suites_path): """ - Schedule the suite-run. Returns the number of jobs scheduled. + Function converts description of the run into sequence of .yaml files. + Input "descriptions" is the value of "--descr" parameter; + "description" consists of comma-separated list of run description + Input: + descriptions="rerun/rados/objectstore/{backends/objectstore-bluestore-b supported-random-distro$/{centos_latest}}" + suites_path="/cephfs/github.com_ceph_build/qa/suites/" + Output: + ('rados/objectstore/{backends/objectstore-bluestore-b supported-random-distro$/{ubuntu_latest}}', + ['/cephfs/github.com_ceph_build/qa/suites/rados/objectstore/backends/objectstore-bluestore-b.yaml', + '/cephfs/github.com_ceph_build/qa/suites/rados/objectstore/supported-random-distro$/ubuntu_latest.yaml']) """ - name = self.name - if self.args.arch: - arch = self.args.arch - log.debug("Using '%s' as an arch" % arch) - else: - arch = util.get_arch(self.base_config.machine_type) - suite_name = self.base_config.suite - suite_path = os.path.normpath(os.path.join( + def expand_descr(test_yamls, source, prefix, base_pos): + pos = base_pos + while (pos < len(source)): + if source[pos] == '{': + more_prefix=source[base_pos:pos] + pos = expand_descr(test_yamls, source, prefix + more_prefix, pos + 1) + base_pos = pos + elif source[pos] == '}': + if base_pos != pos: + test_yamls.append(suites_path + "/" + prefix + source[base_pos:pos] + ".yaml") + return pos + 1 + elif source[pos] == ' ': + if base_pos != pos: + test_yamls.append(suites_path + "/" + prefix + source[base_pos:pos] + ".yaml") + pos = pos + 1 + base_pos = pos + else: + pos = pos + 1 + result = [] + desc_tab = descriptions.split(',') + for d in desc_tab: + d = d.strip() + test_yamls = [] + expand_descr(test_yamls, d, "", 0) + result.append((d, test_yamls)) + return result + + def prepare_configs(self): + suite_name = self.base_config.suite or "rerun" + suites_path = os.path.normpath(os.path.join( self.suite_repo_path, self.args.suite_relpath, - 'suites', - self.base_config.suite.replace(':', '/'), + 'suites' + )) + suite_path = os.path.normpath(os.path.join( + suites_path, + suite_name.replace(':', '/'), )) - log.debug('Suite %s in %s' % (suite_name, suite_path)) - log.debug(f"subset = {self.args.subset}") - log.debug(f"no_nested_subset = {self.args.no_nested_subset}") - configs = build_matrix(suite_path, + log.debug('Suites in %s' % (suites_path)) + if self.args.descr: + log.debug(f'Rerun by description in {suites_path} in %s') + configs = self.descr_to_yamls(self.args.descr, suites_path) + use_suite_name = None + generated = len(configs) + log.info(f'Rerun from description in {suite_path} generated {generated} jobs') + else: + log.debug('Suite %s in %s' % (suite_name, suites_path)) + log.debug(f"subset = {self.args.subset}") + log.debug(f"no_nested_subset = {self.args.no_nested_subset}") + configs = build_matrix(suite_path, subset=self.args.subset, no_nested_subset=self.args.no_nested_subset, seed=self.args.seed) - generated = len(configs) - log.info(f'Suite {suite_name} in {suite_path} generated {generated} jobs (not yet filtered or merged)') + use_suite_name = self.base_config.suite + generated = len(configs) + log.info(f'Suite {suite_name} in {suites_path} generated {generated} jobs (not yet filtered or merged)') configs = list(config_merge(configs, filter_in=self.args.filter_in, filter_out=self.args.filter_out, filter_all=self.args.filter_all, filter_fragments=self.args.filter_fragments, seed=self.args.seed, - suite_name=suite_name)) + suite_name=use_suite_name)) + return configs + + def schedule_suite(self): + """ + Schedule the suite-run. Returns the number of jobs scheduled. + """ + name = self.name + if self.args.arch: + arch = self.args.arch + log.debug("Using '%s' as an arch" % arch) + else: + arch = util.get_arch(self.base_config.machine_type) + suite_name = self.base_config.suite or "rerun" + suites_path = os.path.normpath(os.path.join( + self.suite_repo_path, + self.args.suite_relpath, + 'suites' + )) + configs = self.prepare_configs() + generated = len(configs) if self.args.dry_run: log.debug("Base job config:\n%s" % self.base_config) @@ -696,7 +764,7 @@ def schedule_suite(self): total_count *= self.args.num log.info( 'Suite %s in %s scheduled %d jobs.' % - (suite_name, suite_path, count) + (suite_name, suites_path, count) ) log.info('%d/%d jobs were filtered out.', (generated - count),