Skip to content

Commit

Permalink
teuthology/schedule: Add "descr" option
Browse files Browse the repository at this point in the history
A convenience option.
When a large batch has many failed tests, "descr" makes it easy to schedule
just selected ones. It is as simple as copy-paste description of failed test.

teuthology-suite -e [email protected] -vv -c aclamk-testing-ceres-2024-07-01-1011 -p 50 \
  --descr "rados/singleton-bluestore/{all/cephtool mon_election/connectivity msgr-failures/none \
  msgr/async-v2only objectstore/bluestore-comp-lz4 rados supported-random-distro$/{centos_latest}}"

Many tests may be rerun in this fashion; "descr" accepts multiple comma separated test descriptions.

Signed-off-by: Adam Kupczyk <[email protected]>
  • Loading branch information
aclamk committed Jul 19, 2024
1 parent c1b06c2 commit bdedd70
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 22 deletions.
4 changes: 4 additions & 0 deletions scripts/suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
usage: teuthology-suite --help
teuthology-suite [-v | -vv ] --suite <suite> [options] [<config_yaml>...]
teuthology-suite [-v | -vv ] --rerun <name> [options] [<config_yaml>...]
teuthology-suite [-v | -vv ] --descr <descr> [options] [<config_yaml>...]
Run a suite of ceph integration tests. A suite is a directory containing
facets. A facet is a directory containing config snippets. Running a suite
Expand Down Expand Up @@ -126,6 +127,9 @@
2/<outof> ... <outof>-1/<outof> will schedule all
jobs in the suite (many more than once). If specified,
this value can be found in results.log.
--descr <descr> Use to rerun tests. Reschedule test based on their
descriptions. The descr is comma separated list of test
descriptions.
-p <priority>, --priority <priority>
Job priority (lower is sooner)
[default: 1000]
Expand Down
112 changes: 90 additions & 22 deletions teuthology/suite/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@

log = logging.getLogger(__name__)


class Run(object):
WAIT_MAX_JOB_TIME = 30 * 60
WAIT_PAUSE = 5 * 60
Expand Down Expand Up @@ -70,7 +69,7 @@ def make_run_name(self):
[
self.user,
str(self.timestamp),
self.args.suite,
self.args.suite or "rerun",
self.args.ceph_branch,
self.args.kernel_branch or '-',
self.args.flavor, worker
Expand Down Expand Up @@ -357,8 +356,14 @@ def build_base_config(self):
job_config.timestamp = self.timestamp
job_config.priority = self.args.priority
job_config.seed = self.args.seed
if self.args.subset and self.args.descr:
util.schedule_fail("--subset is not compatible with --descr")
if self.args.suite and self.args.descr:
util.schedule_fail("--suite is not compatible with --descr")
if self.args.subset:
job_config.subset = '/'.join(str(i) for i in self.args.subset)
if self.args.descr:
job_config.suite = "rerun"
if self.args.email:
job_config.email = self.args.email
if self.args.owner:
Expand Down Expand Up @@ -564,39 +569,102 @@ def check_num_jobs(self, jobs_to_schedule):
if threshold and jobs_to_schedule > threshold:
util.schedule_fail(msg, dry_run=self.args.dry_run)

def schedule_suite(self):
def descr_to_yamls(self, descriptions, suites_path):
"""
Schedule the suite-run. Returns the number of jobs scheduled.
Function converts description of the run into sequence of .yaml files.
Input "descriptions" is the value of "--descr" parameter;
"description" consists of comma-separated list of run description
Input:
descriptions="rerun/rados/objectstore/{backends/objectstore-bluestore-b supported-random-distro$/{centos_latest}}"
suites_path="/cephfs/github.com_ceph_build/qa/suites/"
Output:
('rados/objectstore/{backends/objectstore-bluestore-b supported-random-distro$/{ubuntu_latest}}',
['/cephfs/github.com_ceph_build/qa/suites/rados/objectstore/backends/objectstore-bluestore-b.yaml',
'/cephfs/github.com_ceph_build/qa/suites/rados/objectstore/supported-random-distro$/ubuntu_latest.yaml'])
"""
name = self.name
if self.args.arch:
arch = self.args.arch
log.debug("Using '%s' as an arch" % arch)
else:
arch = util.get_arch(self.base_config.machine_type)
suite_name = self.base_config.suite
suite_path = os.path.normpath(os.path.join(
def expand_descr(test_yamls, source, prefix, base_pos):
pos = base_pos
while (pos < len(source)):
if source[pos] == '{':
more_prefix=source[base_pos:pos]
pos = expand_descr(test_yamls, source, prefix + more_prefix, pos + 1)
base_pos = pos
elif source[pos] == '}':
if base_pos != pos:
test_yamls.append(suites_path + "/" + prefix + source[base_pos:pos] + ".yaml")
return pos + 1
elif source[pos] == ' ':
if base_pos != pos:
test_yamls.append(suites_path + "/" + prefix + source[base_pos:pos] + ".yaml")
pos = pos + 1
base_pos = pos
else:
pos = pos + 1
result = []
desc_tab = descriptions.split(',')
for d in desc_tab:
d = d.strip()
test_yamls = []
expand_descr(test_yamls, d, "", 0)
result.append((d, test_yamls))
return result

def prepare_configs(self):
suite_name = self.base_config.suite or "rerun"
suites_path = os.path.normpath(os.path.join(
self.suite_repo_path,
self.args.suite_relpath,
'suites',
self.base_config.suite.replace(':', '/'),
'suites'
))
suite_path = os.path.normpath(os.path.join(
suites_path,
suite_name.replace(':', '/'),
))
log.debug('Suite %s in %s' % (suite_name, suite_path))
log.debug(f"subset = {self.args.subset}")
log.debug(f"no_nested_subset = {self.args.no_nested_subset}")
configs = build_matrix(suite_path,
log.debug('Suites in %s' % (suites_path))
if self.args.descr:
log.debug(f'Rerun by description in {suites_path} in %s')
configs = self.descr_to_yamls(self.args.descr, suites_path)
use_suite_name = None
generated = len(configs)
log.info(f'Rerun from description in {suite_path} generated {generated} jobs')
else:
log.debug('Suite %s in %s' % (suite_name, suites_path))
log.debug(f"subset = {self.args.subset}")
log.debug(f"no_nested_subset = {self.args.no_nested_subset}")
configs = build_matrix(suite_path,
subset=self.args.subset,
no_nested_subset=self.args.no_nested_subset,
seed=self.args.seed)
generated = len(configs)
log.info(f'Suite {suite_name} in {suite_path} generated {generated} jobs (not yet filtered or merged)')
use_suite_name = self.base_config.suite
generated = len(configs)
log.info(f'Suite {suite_name} in {suites_path} generated {generated} jobs (not yet filtered or merged)')
configs = list(config_merge(configs,
filter_in=self.args.filter_in,
filter_out=self.args.filter_out,
filter_all=self.args.filter_all,
filter_fragments=self.args.filter_fragments,
seed=self.args.seed,
suite_name=suite_name))
suite_name=use_suite_name))
return configs

def schedule_suite(self):
"""
Schedule the suite-run. Returns the number of jobs scheduled.
"""
name = self.name
if self.args.arch:
arch = self.args.arch
log.debug("Using '%s' as an arch" % arch)
else:
arch = util.get_arch(self.base_config.machine_type)
suite_name = self.base_config.suite or "rerun"
suites_path = os.path.normpath(os.path.join(
self.suite_repo_path,
self.args.suite_relpath,
'suites'
))
configs = self.prepare_configs()
generated = len(configs)

if self.args.dry_run:
log.debug("Base job config:\n%s" % self.base_config)
Expand Down Expand Up @@ -696,7 +764,7 @@ def schedule_suite(self):
total_count *= self.args.num
log.info(
'Suite %s in %s scheduled %d jobs.' %
(suite_name, suite_path, count)
(suite_name, suites_path, count)
)
log.info('%d/%d jobs were filtered out.',
(generated - count),
Expand Down

0 comments on commit bdedd70

Please sign in to comment.