Skip to content

Commit

Permalink
Merge branch 'develop' into ci/slurm_multinode
Browse files Browse the repository at this point in the history
  • Loading branch information
teojgo authored Sep 25, 2024
2 parents 297d2e6 + cadbd83 commit c5b54f0
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 3 deletions.
11 changes: 11 additions & 0 deletions docs/manpage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,17 @@ Options controlling ReFrame execution
.. versionchanged:: 3.6.1
Multiple report files are now accepted.


.. option:: --retries-threshold=VALUE[%]

Skip retries (see :option:`--max-retries`) if failures exceed the given threshold.

Threshold can be specified either as an absolute value or as a percentage using the ``%`` character, e.g., ``--retries-threshold=30%``.
Note that in certain shells the ``%`` character may need to be escaped.

.. versionadded:: 4.7


.. option:: -S, --setvar=[TEST.]VAR=VAL

Set variable ``VAR`` in all tests or optionally only in test ``TEST`` to ``VAL``.
Expand Down
15 changes: 14 additions & 1 deletion reframe/frontend/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,11 @@ def main():
metavar='REPORT',
help='Restore a testing session from REPORT file'
)
run_options.add_argument(
'--retries-threshold', action='store', default='1000%',
metavar='VALUE[%]',
help='Retry tests only if failures do not exceed threshold'
)
run_options.add_argument(
'-S', '--setvar', action='append', metavar='[TEST.]VAR=VAL',
dest='vars', default=[],
Expand Down Expand Up @@ -1564,8 +1569,16 @@ def module_unuse(*paths):
f"{options.reruns}"
)

# Parse retries threshold
if options.retries_threshold[-1] == '%':
ratio = int(options.retries_threshold[:-1]) / 100.
retries_threshold = int(len(testcases)*ratio)
else:
retries_threshold = int(options.retries_threshold)

runner = Runner(exec_policy, printer, options.max_retries,
options.maxfail, options.reruns, options.duration)
options.maxfail, options.reruns, options.duration,
retries_threshold)
try:
time_start = time.time()
runner.runall(testcases, restored_cases)
Expand Down
7 changes: 5 additions & 2 deletions reframe/frontend/executors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,10 +573,12 @@ class Runner:
_timeout = fields.TypedField(typ.Duration, type(None), allow_implicit=True)

def __init__(self, policy, printer=None, max_retries=0,
max_failures=sys.maxsize, reruns=0, timeout=None):
max_failures=sys.maxsize, reruns=0, timeout=None,
retries_threshold=sys.maxsize):
self._policy = policy
self._printer = printer or PrettyPrinter()
self._max_retries = max_retries
self._retries_threshold = retries_threshold
self._num_reruns = reruns
self._timeout = timeout
self._t_init = timeout
Expand Down Expand Up @@ -620,7 +622,8 @@ def runall(self, testcases, restored_cases=None):
self._policy.set_expiry(self._t_init + self._timeout)

self._runall(testcases)
if self._max_retries:
if (self._max_retries and
len(self._stats.failed()) <= self._retries_threshold):
restored_cases = restored_cases or []
self._retry_failed(testcases + restored_cases)

Expand Down
7 changes: 7 additions & 0 deletions unittests/test_policies.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,13 @@ def test_retries_bad_check(make_runner, make_cases, common_exec_ctx):
assert_runall(runner)
assert runner.max_retries == rt.runtime().current_run
assert 2 == len(runner.stats.failed())
assert 3 == runner.stats.num_runs


def test_retries_threshold(make_runner, make_cases, common_exec_ctx):
runner = make_runner(max_retries=2, retries_threshold=1)
runner.runall(make_cases([BadSetupCheck(), BadSetupCheckEarly()]))
assert 1 == runner.stats.num_runs


def test_retries_good_check(make_runner, make_cases, common_exec_ctx):
Expand Down

0 comments on commit c5b54f0

Please sign in to comment.