From cc551467c616efb9d1d6e4141e09da85b7d44846 Mon Sep 17 00:00:00 2001 From: Anda Date: Thu, 19 Sep 2024 12:09:15 -0700 Subject: [PATCH 1/4] fix: fix flaky generic task pause test --- e2e_tests/tests/task/test_generic_tasks.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/e2e_tests/tests/task/test_generic_tasks.py b/e2e_tests/tests/task/test_generic_tasks.py index 3a39a65e817..5cbf438c0bc 100644 --- a/e2e_tests/tests/task/test_generic_tasks.py +++ b/e2e_tests/tests/task/test_generic_tasks.py @@ -1,4 +1,5 @@ import pytest +import time from determined.cli import ntsc from determined.common import util @@ -231,8 +232,16 @@ def test_pause_and_unpause_generic_task() -> None: detproc.check_call(sess, command) - pause_resp = bindings.get_GetTask(sess, taskId=task_resp.taskId) - assert pause_resp.task.taskState == bindings.v1GenericTaskState.PAUSED + # The task may still be PAUSING, retry a few times. + retries = 3 + for i in range(retries): + pause_resp = bindings.get_GetTask(sess, taskId=task_resp.taskId) + if pause_resp.task.taskState == bindings.v1GenericTaskState.PAUSED: + break + time.sleep(1) + else: + pytest.fail(f"Task {task_resp.taskId} did not reach a PAUSED state after {retries} seconds.") + # Unpause task command = ["det", "-m", conf.make_master_url(), "task", "unpause", task_resp.taskId] From eefc4fd4e45662a62c8d4f5de807230c8ee318ee Mon Sep 17 00:00:00 2001 From: Anda Date: Thu, 19 Sep 2024 12:27:39 -0700 Subject: [PATCH 2/4] more retries --- e2e_tests/tests/task/test_generic_tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e_tests/tests/task/test_generic_tasks.py b/e2e_tests/tests/task/test_generic_tasks.py index 5cbf438c0bc..83c4e36069c 100644 --- a/e2e_tests/tests/task/test_generic_tasks.py +++ b/e2e_tests/tests/task/test_generic_tasks.py @@ -233,7 +233,7 @@ def test_pause_and_unpause_generic_task() -> None: detproc.check_call(sess, command) # The task may still be PAUSING, retry a few times. - retries = 3 + retries = 5 for i in range(retries): pause_resp = bindings.get_GetTask(sess, taskId=task_resp.taskId) if pause_resp.task.taskState == bindings.v1GenericTaskState.PAUSED: From 8aec5f2b4dff3769d6444206fa61902f5657e841 Mon Sep 17 00:00:00 2001 From: Anda Date: Fri, 20 Sep 2024 07:26:04 -0700 Subject: [PATCH 3/4] wait for task state --- e2e_tests/tests/task/test_generic_tasks.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/e2e_tests/tests/task/test_generic_tasks.py b/e2e_tests/tests/task/test_generic_tasks.py index 83c4e36069c..2ae6fab9321 100644 --- a/e2e_tests/tests/task/test_generic_tasks.py +++ b/e2e_tests/tests/task/test_generic_tasks.py @@ -232,23 +232,12 @@ def test_pause_and_unpause_generic_task() -> None: detproc.check_call(sess, command) - # The task may still be PAUSING, retry a few times. - retries = 5 - for i in range(retries): - pause_resp = bindings.get_GetTask(sess, taskId=task_resp.taskId) - if pause_resp.task.taskState == bindings.v1GenericTaskState.PAUSED: - break - time.sleep(1) - else: - pytest.fail(f"Task {task_resp.taskId} did not reach a PAUSED state after {retries} seconds.") - + task.wait_for_task_state(sess, task_resp.taskId, bindings.v1GenericTaskState.PAUSED) # Unpause task command = ["det", "-m", conf.make_master_url(), "task", "unpause", task_resp.taskId] detproc.check_call(sess, command) - unpause_resp = bindings.get_GetTask(sess, taskId=task_resp.taskId) - assert unpause_resp.task.taskState == bindings.v1GenericTaskState.ACTIVE - + task.wait_for_task_state(sess, task_resp.taskId, bindings.v1GenericTaskState.ACTIVE) task.wait_for_task_state(sess, task_resp.taskId, bindings.v1GenericTaskState.COMPLETED) From 914fce44ee40e1d8fb50392aaaf037980d73b323 Mon Sep 17 00:00:00 2001 From: Anda Date: Fri, 20 Sep 2024 07:26:40 -0700 Subject: [PATCH 4/4] unused import --- e2e_tests/tests/task/test_generic_tasks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/e2e_tests/tests/task/test_generic_tasks.py b/e2e_tests/tests/task/test_generic_tasks.py index 2ae6fab9321..907a3afbed3 100644 --- a/e2e_tests/tests/task/test_generic_tasks.py +++ b/e2e_tests/tests/task/test_generic_tasks.py @@ -1,5 +1,4 @@ import pytest -import time from determined.cli import ntsc from determined.common import util