Skip to content

Commit

Permalink
teuthology/schedule: Add "descr" option
Browse files Browse the repository at this point in the history
A convenience option.
When a large batch has many failed tests, "descr" makes it easy to schedule
just selected ones. It is as simple as copy-paste description of failed test.

teuthology-suite -e [email protected] -vv -c aclamk-testing-ceres-2024-07-01-1011 -p 50 \
  --descr "rados/singleton-bluestore/{all/cephtool mon_election/connectivity msgr-failures/none \
  msgr/async-v2only objectstore/bluestore-comp-lz4 rados supported-random-distro$/{centos_latest}}"

Many tests may be rerun in this fashion; "descr" accepts multiple comma separated test descriptions.

Signed-off-by: Adam Kupczyk <[email protected]>
  • Loading branch information
aclamk committed Aug 2, 2024
1 parent c1b06c2 commit 36d0c59
Show file tree
Hide file tree
Showing 3 changed files with 142 additions and 21 deletions.
4 changes: 4 additions & 0 deletions scripts/suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
usage: teuthology-suite --help
teuthology-suite [-v | -vv ] --suite <suite> [options] [<config_yaml>...]
teuthology-suite [-v | -vv ] --rerun <name> [options] [<config_yaml>...]
teuthology-suite [-v | -vv ] --descr <descr> [options] [<config_yaml>...]
Run a suite of ceph integration tests. A suite is a directory containing
facets. A facet is a directory containing config snippets. Running a suite
Expand Down Expand Up @@ -126,6 +127,9 @@
2/<outof> ... <outof>-1/<outof> will schedule all
jobs in the suite (many more than once). If specified,
this value can be found in results.log.
--descr <descr> Use to rerun tests. Reschedule test based on their
descriptions. The descr is comma separated list of test
descriptions.
-p <priority>, --priority <priority>
Job priority (lower is sooner)
[default: 1000]
Expand Down
110 changes: 89 additions & 21 deletions teuthology/suite/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,45 @@

log = logging.getLogger(__name__)

def descr_to_yamls(descriptions, suites_path):
"""
Function converts description of a job into sequence of .yaml files.
Input "descriptions" is a string containing comma-separated list of job descriptions.
Input "suites_path" is a posix path to dir containing all suites.
"""
def expand_descr(test_yamls, source, prefix, base_pos):
"""
Expand description using production rules (explanation, not Chomsky context-free formalism):
rule 1: (simplification)
"A{BX}" => AB A{X}
rule 2: (termination)
"A" => "suites_pathA.yaml"
"""
pos = base_pos
while (pos < len(source)):
if source[pos] == '{':
more_prefix=source[base_pos:pos]
pos = expand_descr(test_yamls, source, prefix + more_prefix, pos + 1)
base_pos = pos
elif source[pos] == '}':
if base_pos != pos:
test_yamls.append(suites_path + "/" + prefix + source[base_pos:pos] + ".yaml")
return pos + 1
elif source[pos] == ' ':
if base_pos != pos:
test_yamls.append(suites_path + "/" + prefix + source[base_pos:pos] + ".yaml")
pos = pos + 1
base_pos = pos
else:
pos = pos + 1
result = []
desc_tab = descriptions.split(',')
for d in desc_tab:
dd = d.strip()
test_yamls = []
expand_descr(test_yamls, dd, "", 0)
result.append((dd, test_yamls))
return result

class Run(object):
WAIT_MAX_JOB_TIME = 30 * 60
Expand Down Expand Up @@ -70,7 +109,7 @@ def make_run_name(self):
[
self.user,
str(self.timestamp),
self.args.suite,
self.args.suite or "rerun",
self.args.ceph_branch,
self.args.kernel_branch or '-',
self.args.flavor, worker
Expand Down Expand Up @@ -357,8 +396,14 @@ def build_base_config(self):
job_config.timestamp = self.timestamp
job_config.priority = self.args.priority
job_config.seed = self.args.seed
if self.args.subset and self.args.descr:
util.schedule_fail("--subset is not compatible with --descr")
if self.args.suite and self.args.descr:
util.schedule_fail("--suite is not compatible with --descr")
if self.args.subset:
job_config.subset = '/'.join(str(i) for i in self.args.subset)
if self.args.descr:
job_config.suite = "rerun"
if self.args.email:
job_config.email = self.args.email
if self.args.owner:
Expand Down Expand Up @@ -564,6 +609,44 @@ def check_num_jobs(self, jobs_to_schedule):
if threshold and jobs_to_schedule > threshold:
util.schedule_fail(msg, dry_run=self.args.dry_run)

def prepare_configs(self):
suite_name = self.base_config.suite or "rerun"
suites_path = os.path.normpath(os.path.join(
self.suite_repo_path,
self.args.suite_relpath,
'suites'
))
suite_path = os.path.normpath(os.path.join(
suites_path,
suite_name.replace(':', '/'),
))
log.debug('Suites in %s' % (suites_path))
if self.args.descr:
log.debug(f'Rerun by description in {suites_path} in %s')
configs = descr_to_yamls(self.args.descr, suites_path)
use_suite_name = None
generated = len(configs)
log.info(f'Rerun from description in {suite_path} generated {generated} jobs')
else:
log.debug('Suite %s in %s' % (suite_name, suites_path))
log.debug(f"subset = {self.args.subset}")
log.debug(f"no_nested_subset = {self.args.no_nested_subset}")
configs = build_matrix(suite_path,
subset=self.args.subset,
no_nested_subset=self.args.no_nested_subset,
seed=self.args.seed)
use_suite_name = self.base_config.suite
generated = len(configs)
log.info(f'Suite {suite_name} in {suites_path} generated {generated} jobs (not yet filtered or merged)')
configs = list(config_merge(configs,
filter_in=self.args.filter_in,
filter_out=self.args.filter_out,
filter_all=self.args.filter_all,
filter_fragments=self.args.filter_fragments,
seed=self.args.seed,
suite_name=use_suite_name))
return configs

def schedule_suite(self):
"""
Schedule the suite-run. Returns the number of jobs scheduled.
Expand All @@ -574,29 +657,14 @@ def schedule_suite(self):
log.debug("Using '%s' as an arch" % arch)
else:
arch = util.get_arch(self.base_config.machine_type)
suite_name = self.base_config.suite
suite_path = os.path.normpath(os.path.join(
suite_name = self.base_config.suite or "rerun"
suites_path = os.path.normpath(os.path.join(
self.suite_repo_path,
self.args.suite_relpath,
'suites',
self.base_config.suite.replace(':', '/'),
'suites'
))
log.debug('Suite %s in %s' % (suite_name, suite_path))
log.debug(f"subset = {self.args.subset}")
log.debug(f"no_nested_subset = {self.args.no_nested_subset}")
configs = build_matrix(suite_path,
subset=self.args.subset,
no_nested_subset=self.args.no_nested_subset,
seed=self.args.seed)
configs = self.prepare_configs()
generated = len(configs)
log.info(f'Suite {suite_name} in {suite_path} generated {generated} jobs (not yet filtered or merged)')
configs = list(config_merge(configs,
filter_in=self.args.filter_in,
filter_out=self.args.filter_out,
filter_all=self.args.filter_all,
filter_fragments=self.args.filter_fragments,
seed=self.args.seed,
suite_name=suite_name))

if self.args.dry_run:
log.debug("Base job config:\n%s" % self.base_config)
Expand Down Expand Up @@ -696,7 +764,7 @@ def schedule_suite(self):
total_count *= self.args.num
log.info(
'Suite %s in %s scheduled %d jobs.' %
(suite_name, suite_path, count)
(suite_name, suites_path, count)
)
log.info('%d/%d jobs were filtered out.',
(generated - count),
Expand Down
49 changes: 49 additions & 0 deletions teuthology/suite/test/test_run_.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,3 +360,52 @@ def test_newest_success(
m_find_git_parents.assert_has_calls(
[call('ceph', 'ceph_sha1', 10)]
)

def test_dupa(
self
):
X = run.descr_to_yamls(
"rados/objectstore/{backends/objectstore-bluestore-b supported-random-distro$/{ubuntu_latest}}",
"/cephfs/github.com_ceph_build/qa/suites"
)
assert(X ==
[
('rados/objectstore/{backends/objectstore-bluestore-b supported-random-distro$/{ubuntu_latest}}',
['/cephfs/github.com_ceph_build/qa/suites/rados/objectstore/backends/objectstore-bluestore-b.yaml',
'/cephfs/github.com_ceph_build/qa/suites/rados/objectstore/supported-random-distro$/ubuntu_latest.yaml'])
]
)

X = run.descr_to_yamls(
"rados/singleton-nomsgr/{all/health-warnings mon_election/connectivity rados supported-random-distro$/{ubuntu_latest}}",
"/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_aclamk-testing/qa/suites"
)
print(str(X))
assert(X ==
[
("rados/singleton-nomsgr/{all/health-warnings mon_election/connectivity rados supported-random-distro$/{ubuntu_latest}}",
['/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_aclamk-testing/qa/suites/rados/singleton-nomsgr/all/health-warnings.yaml',
'/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_aclamk-testing/qa/suites/rados/singleton-nomsgr/mon_election/connectivity.yaml',
'/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_aclamk-testing/qa/suites/rados/singleton-nomsgr/rados.yaml',
'/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_aclamk-testing/qa/suites/rados/singleton-nomsgr/supported-random-distro$/ubuntu_latest.yaml'])
]
)

X = run.descr_to_yamls(
"rados/cephadm/osds/{0-distro/centos_9.stream_runc 0-nvme-loop 1-start 2-ops/rm-zap-flag},"
"rados/standalone/{supported-random-distro$/{ubuntu_latest} workloads/scrub}",
"/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_squid/qa/suites"
)
assert(X ==
[
("rados/cephadm/osds/{0-distro/centos_9.stream_runc 0-nvme-loop 1-start 2-ops/rm-zap-flag}",
['/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_squid/qa/suites/rados/cephadm/osds/0-distro/centos_9.stream_runc.yaml',
'/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_squid/qa/suites/rados/cephadm/osds/0-nvme-loop.yaml',
'/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_squid/qa/suites/rados/cephadm/osds/1-start.yaml',
'/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_squid/qa/suites/rados/cephadm/osds/2-ops/rm-zap-flag.yaml'])
,
("rados/standalone/{supported-random-distro$/{ubuntu_latest} workloads/scrub}",
['/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_squid/qa/suites/rados/standalone/supported-random-distro$/ubuntu_latest.yaml',
'/cephfs/home/akupczyk/src/git.ceph.com_ceph-c_squid/qa/suites/rados/standalone/workloads/scrub.yaml'])
]
)

0 comments on commit 36d0c59

Please sign in to comment.