CRAYSAT-1706: To stop waiting if BOS session is deleted

IM:CRAYSAT-1706 Reviewer:Ryan
Cray-HPE · Aug 20, 2024 · a8846c9 · a8846c9
1 parent fe8ea55
commit a8846c9
Show file tree

Hide file tree

Showing 4 changed files with 80 additions and 40 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -25,6 +25,12 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [3.30.2] - 2024-08-14
+
+### Fixed
+- Log an error message and stop waiting on BOS sessions that have been deleted
+  in the `bos-operations` stage of `sat bootsys`.
+
 ## [3.30.1] - 2024-08-13
 
 ### Fixed

diff --git a/sat/apiclient/bos.py b/sat/apiclient/bos.py
@@ -1,7 +1,7 @@
 #
 # MIT License
 #
-# (C) Copyright 2019-2023 Hewlett Packard Enterprise Development LP
+# (C) Copyright 2019-2024 Hewlett Packard Enterprise Development LP
 #
 # Permission is hereby granted, free of charge, to any person obtaining a
 # copy of this software and associated documentation files (the "Software"),
@@ -145,29 +145,6 @@ def get_base_boot_set_data():
             'rootfs_provider_passthrough': 'dvs:api-gw-service-nmn.local:300:nmn0'
         }
 
-    def get_session_status(self, session_id):
-        """Get the status of a BOS session.
-
-        Args:
-            session_id (str): the ID of the session
-
-        Returns:
-            dict: the session status information from BOS
-
-        Raises:
-            APIError: if there is a problem retrieving session status BOS, or if
-                the returned JSON is invalid.
-        """
-        try:
-            return self.get(self.session_path, session_id, 'status').json()
-        except APIError as err:
-            raise APIError(f'Failed to get BOS session status for session {session_id}: '
-                           f'{err}')
-        except ValueError as err:
-            raise APIError(f'Failed to parse JSON in response from BOS when '
-                           f'getting session status for session {session_id}: '
-                           f'{err}')
-
     def get_session(self, session_id):
         """Get information about a given session.
 

diff --git a/sat/cli/bootsys/bos.py b/sat/cli/bootsys/bos.py
@@ -45,7 +45,7 @@
 from sat.config import get_config_value
 from sat.session import SATSession
 from sat.util import pester, prompt_continue
-from sat.waiting import Waiter
+from sat.waiting import Waiter, WaitingFailure
 from sat.xname import XName
 
 LOGGER = logging.getLogger(__name__)
@@ -246,9 +246,23 @@ def has_completed(self):
                 self.bos_session_thread.session_template
             )
 
-            self.session_status = self.bos_client.get_session_status(
-                self.bos_session_thread.session_id
-            )
+            response = self.bos_client.get(self.bos_client.session_path, self.bos_session_thread.session_id, 'status',
+                                           raise_not_ok=False)
+
+            if not response.ok:
+                if response.status_code == 404:
+                    raise WaitingFailure(
+                        f'Failed to query session status: Session {self.bos_session_thread.session_id} does not exist.')
+                else:
+                    LOGGER.warning(
+                        f'Failed to query status of BOS session {self.bos_session_thread.session_id}: '
+                        f'{response.status_code} {response.reason}.')
+                    return False
+
+            try:
+                self.session_status = response.json()
+            except ValueError as err:
+                LOGGER.warning(f'Failed to parse session status JSON: {err}')
 
             if (self.session_status['percent_successful'] != self.pct_successful
                     or self.session_status['percent_failed'] != self.pct_failed):
@@ -276,8 +290,6 @@ def has_completed(self):
 
             return False
 
-        except APIError as err:
-            LOGGER.warning('Failed to query session status: %s', err)
         except KeyError as err:
             LOGGER.warning('BOS session status query response missing key %s', err)
 

diff --git a/tests/cli/bootsys/test_bos.py b/tests/cli/bootsys/test_bos.py
@@ -40,6 +40,7 @@
                                  do_bos_reboots, do_bos_shutdowns,
                                  get_session_templates)
 from tests.common import ExtendedTestCase
+from sat.waiting import WaitingFailure
 
 
 class TestBOAJobSuccessful(ExtendedTestCase):
@@ -233,7 +234,7 @@ def setUp(self):
         self.session_id = 'abcdef-012345-678901-fdecba'
 
         self.mock_bos_client = MagicMock()
-        self.mock_bos_client.get_session_status.return_value = {
+        self.mock_bos_client.get.return_value = MagicMock(ok=True, status_code=200, json=MagicMock(return_value={
             'status': 'complete',
             'managed_components_count': 10,
             'phases': {
@@ -251,7 +252,8 @@ def setUp(self):
                 'end_time': '2022-01-01T00:00:00',
                 'start_time': '2022-01-01T00:01:00',
             },
-        }
+        }))
+
         patch('sat.cli.bootsys.bos.BOSClientCommon.get_bos_client', return_value=self.mock_bos_client).start()
 
         self.mock_session_thread = MagicMock(session_id=self.session_id)
@@ -268,7 +270,7 @@ def test_session_is_completed(self):
 
     def test_session_not_completed(self):
         """Test that the waiter detects a session that has not completed yet"""
-        self.mock_bos_client.get_session_status.return_value.update({
+        self.mock_bos_client.get.return_value.json.return_value.update({
             'status': 'running',
             'phases': {
                 'percent_complete': 50.0,
@@ -282,7 +284,7 @@ def test_session_not_completed(self):
 
     def test_session_failed(self):
         """Test that the waiter detects when a session has failed"""
-        self.mock_bos_client.get_session_status.return_value.update({
+        self.mock_bos_client.get.return_value.json.return_value.update({
             'status': 'running',
             'phases': {
                 'percent_complete': 50.0,
@@ -297,7 +299,7 @@ def test_session_failed(self):
 
     def test_session_superseded(self):
         """Test when the waiter's session has its managed components taken by another session"""
-        self.mock_bos_client.get_session_status.return_value.update({
+        self.mock_bos_client.get.return_value.json.return_value.update({
             'status': 'complete',
             'managed_components_count': 0,
             'phases': {
@@ -314,7 +316,7 @@ def test_session_superseded(self):
 
     def test_staged_session_not_complete(self):
         """Test that a staged session still in pending state is not complete"""
-        self.mock_bos_client.get_session_status.return_value.update({
+        self.mock_bos_client.get.return_value.json.return_value.update({
             'status': 'pending',
             'managed_components_count': 0,
             'phases': {
@@ -333,7 +335,7 @@ def test_staged_session_not_complete(self):
 
     def test_staged_session_running_complete(self):
         """Test that a staged session that has reached running state is complete."""
-        self.mock_bos_client.get_session_status.return_value.update({
+        self.mock_bos_client.get.return_value.json.return_value.update({
             'status': 'running',
             'managed_components_count': 10,
             'phases': {
@@ -352,7 +354,7 @@ def test_staged_session_running_complete(self):
 
     def test_staged_session_empty_complete(self):
         """Test that a staged session with empty components in complete state is complete."""
-        self.mock_bos_client.get_session_status.return_value.update({
+        self.mock_bos_client.get.return_value.json.return_value.update({
             'status': 'complete',
             'managed_components_count': 0,
             'phases': {
@@ -372,7 +374,7 @@ def test_staged_session_empty_complete(self):
 
     def test_percent_complete_above_99(self):
         """Test that the waiter correctly handles percent_complete above 99 but below 100"""
-        self.mock_bos_client.get_session_status.return_value.update({
+        self.mock_bos_client.get.return_value.json.return_value.update({
             'status': 'running',
             'phases': {
                 'percent_complete': 99.6898,
@@ -394,7 +396,7 @@ def test_percent_complete_above_99(self):
 
     def test_percent_complete_below_99(self):
         """Test that the waiter correctly handles percent_complete below 99"""
-        self.mock_bos_client.get_session_status.return_value.update({
+        self.mock_bos_client.get.return_value.json.return_value.update({
             'status': 'running',
             'phases': {
                 'percent_complete': 78.5689,
@@ -414,6 +416,49 @@ def test_percent_complete_below_99(self):
             f'0.00% components failed'
         )
 
+    def test_session_status_404_error(self):
+        """Test that the waiter raises WaitingFailure on a 404 error."""
+        self.mock_bos_client.get.return_value.ok = False
+        self.mock_bos_client.get.return_value.status_code = 404
+
+        with self.assertRaises(WaitingFailure) as context:
+            self.waiter.has_completed()
+
+        self.assertEqual(
+            str(context.exception),
+            f'Failed to query session status: Session {self.session_id} does not exist.'
+        )
+
+    def test_session_status_503_error(self):
+        """Test that the waiter logs a warning and continues on a 503 error."""
+        self.mock_bos_client.get.side_effect = [
+            MagicMock(ok=False, status_code=503, json=MagicMock(return_value={})),
+            MagicMock(ok=True, status_code=200, json=MagicMock(return_value={
+                'status': 'running',
+                'managed_components_count': 10,
+                'phases': {
+                    'percent_complete': 75.0,
+                    'percent_powering_on': 25.0,
+                    'percent_powering_off': 0.0,
+                    'percent_configuring': 50.0,
+                },
+                'percent_successful': 75.0,
+                'percent_failed': 0.0,
+            }))
+        ]
+
+        with self.assertLogs(level='WARNING') as logs_cm:
+            result = self.waiter.has_completed()
+
+        self.assertFalse(result)
+        self.assertEqual(1, len(logs_cm.records))
+        self.assertIn('Failed to query status of BOS session', logs_cm.records[0].message)
+
+        # Ensuring it made the second call
+        self.mock_bos_client.get.assert_called_with(
+            self.mock_bos_client.session_path, self.session_id, 'status', raise_not_ok=False
+        )
+
 
 class TestBOSSessionThread(unittest.TestCase):
     """Test the BOSSessionThread class."""