Skip to content

Commit

Permalink
CRAYSAT-1706: To stop waiting if BOS session is deleted
Browse files Browse the repository at this point in the history
IM:CRAYSAT-1706
Reviewer:Ryan
  • Loading branch information
annapoorna-s-alt committed Aug 20, 2024
1 parent fe8ea55 commit a8846c9
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 40 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [3.30.2] - 2024-08-14

### Fixed
- Log an error message and stop waiting on BOS sessions that have been deleted
in the `bos-operations` stage of `sat bootsys`.

## [3.30.1] - 2024-08-13

### Fixed
Expand Down
25 changes: 1 addition & 24 deletions sat/apiclient/bos.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# MIT License
#
# (C) Copyright 2019-2023 Hewlett Packard Enterprise Development LP
# (C) Copyright 2019-2024 Hewlett Packard Enterprise Development LP
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
Expand Down Expand Up @@ -145,29 +145,6 @@ def get_base_boot_set_data():
'rootfs_provider_passthrough': 'dvs:api-gw-service-nmn.local:300:nmn0'
}

def get_session_status(self, session_id):
"""Get the status of a BOS session.
Args:
session_id (str): the ID of the session
Returns:
dict: the session status information from BOS
Raises:
APIError: if there is a problem retrieving session status BOS, or if
the returned JSON is invalid.
"""
try:
return self.get(self.session_path, session_id, 'status').json()
except APIError as err:
raise APIError(f'Failed to get BOS session status for session {session_id}: '
f'{err}')
except ValueError as err:
raise APIError(f'Failed to parse JSON in response from BOS when '
f'getting session status for session {session_id}: '
f'{err}')

def get_session(self, session_id):
"""Get information about a given session.
Expand Down
24 changes: 18 additions & 6 deletions sat/cli/bootsys/bos.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
from sat.config import get_config_value
from sat.session import SATSession
from sat.util import pester, prompt_continue
from sat.waiting import Waiter
from sat.waiting import Waiter, WaitingFailure
from sat.xname import XName

LOGGER = logging.getLogger(__name__)
Expand Down Expand Up @@ -246,9 +246,23 @@ def has_completed(self):
self.bos_session_thread.session_template
)

self.session_status = self.bos_client.get_session_status(
self.bos_session_thread.session_id
)
response = self.bos_client.get(self.bos_client.session_path, self.bos_session_thread.session_id, 'status',
raise_not_ok=False)

if not response.ok:
if response.status_code == 404:
raise WaitingFailure(
f'Failed to query session status: Session {self.bos_session_thread.session_id} does not exist.')
else:
LOGGER.warning(
f'Failed to query status of BOS session {self.bos_session_thread.session_id}: '
f'{response.status_code} {response.reason}.')
return False

try:
self.session_status = response.json()
except ValueError as err:
LOGGER.warning(f'Failed to parse session status JSON: {err}')

if (self.session_status['percent_successful'] != self.pct_successful
or self.session_status['percent_failed'] != self.pct_failed):
Expand Down Expand Up @@ -276,8 +290,6 @@ def has_completed(self):

return False

except APIError as err:
LOGGER.warning('Failed to query session status: %s', err)
except KeyError as err:
LOGGER.warning('BOS session status query response missing key %s', err)

Expand Down
65 changes: 55 additions & 10 deletions tests/cli/bootsys/test_bos.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
do_bos_reboots, do_bos_shutdowns,
get_session_templates)
from tests.common import ExtendedTestCase
from sat.waiting import WaitingFailure


class TestBOAJobSuccessful(ExtendedTestCase):
Expand Down Expand Up @@ -233,7 +234,7 @@ def setUp(self):
self.session_id = 'abcdef-012345-678901-fdecba'

self.mock_bos_client = MagicMock()
self.mock_bos_client.get_session_status.return_value = {
self.mock_bos_client.get.return_value = MagicMock(ok=True, status_code=200, json=MagicMock(return_value={
'status': 'complete',
'managed_components_count': 10,
'phases': {
Expand All @@ -251,7 +252,8 @@ def setUp(self):
'end_time': '2022-01-01T00:00:00',
'start_time': '2022-01-01T00:01:00',
},
}
}))

patch('sat.cli.bootsys.bos.BOSClientCommon.get_bos_client', return_value=self.mock_bos_client).start()

self.mock_session_thread = MagicMock(session_id=self.session_id)
Expand All @@ -268,7 +270,7 @@ def test_session_is_completed(self):

def test_session_not_completed(self):
"""Test that the waiter detects a session that has not completed yet"""
self.mock_bos_client.get_session_status.return_value.update({
self.mock_bos_client.get.return_value.json.return_value.update({
'status': 'running',
'phases': {
'percent_complete': 50.0,
Expand All @@ -282,7 +284,7 @@ def test_session_not_completed(self):

def test_session_failed(self):
"""Test that the waiter detects when a session has failed"""
self.mock_bos_client.get_session_status.return_value.update({
self.mock_bos_client.get.return_value.json.return_value.update({
'status': 'running',
'phases': {
'percent_complete': 50.0,
Expand All @@ -297,7 +299,7 @@ def test_session_failed(self):

def test_session_superseded(self):
"""Test when the waiter's session has its managed components taken by another session"""
self.mock_bos_client.get_session_status.return_value.update({
self.mock_bos_client.get.return_value.json.return_value.update({
'status': 'complete',
'managed_components_count': 0,
'phases': {
Expand All @@ -314,7 +316,7 @@ def test_session_superseded(self):

def test_staged_session_not_complete(self):
"""Test that a staged session still in pending state is not complete"""
self.mock_bos_client.get_session_status.return_value.update({
self.mock_bos_client.get.return_value.json.return_value.update({
'status': 'pending',
'managed_components_count': 0,
'phases': {
Expand All @@ -333,7 +335,7 @@ def test_staged_session_not_complete(self):

def test_staged_session_running_complete(self):
"""Test that a staged session that has reached running state is complete."""
self.mock_bos_client.get_session_status.return_value.update({
self.mock_bos_client.get.return_value.json.return_value.update({
'status': 'running',
'managed_components_count': 10,
'phases': {
Expand All @@ -352,7 +354,7 @@ def test_staged_session_running_complete(self):

def test_staged_session_empty_complete(self):
"""Test that a staged session with empty components in complete state is complete."""
self.mock_bos_client.get_session_status.return_value.update({
self.mock_bos_client.get.return_value.json.return_value.update({
'status': 'complete',
'managed_components_count': 0,
'phases': {
Expand All @@ -372,7 +374,7 @@ def test_staged_session_empty_complete(self):

def test_percent_complete_above_99(self):
"""Test that the waiter correctly handles percent_complete above 99 but below 100"""
self.mock_bos_client.get_session_status.return_value.update({
self.mock_bos_client.get.return_value.json.return_value.update({
'status': 'running',
'phases': {
'percent_complete': 99.6898,
Expand All @@ -394,7 +396,7 @@ def test_percent_complete_above_99(self):

def test_percent_complete_below_99(self):
"""Test that the waiter correctly handles percent_complete below 99"""
self.mock_bos_client.get_session_status.return_value.update({
self.mock_bos_client.get.return_value.json.return_value.update({
'status': 'running',
'phases': {
'percent_complete': 78.5689,
Expand All @@ -414,6 +416,49 @@ def test_percent_complete_below_99(self):
f'0.00% components failed'
)

def test_session_status_404_error(self):
"""Test that the waiter raises WaitingFailure on a 404 error."""
self.mock_bos_client.get.return_value.ok = False
self.mock_bos_client.get.return_value.status_code = 404

with self.assertRaises(WaitingFailure) as context:
self.waiter.has_completed()

self.assertEqual(
str(context.exception),
f'Failed to query session status: Session {self.session_id} does not exist.'
)

def test_session_status_503_error(self):
"""Test that the waiter logs a warning and continues on a 503 error."""
self.mock_bos_client.get.side_effect = [
MagicMock(ok=False, status_code=503, json=MagicMock(return_value={})),
MagicMock(ok=True, status_code=200, json=MagicMock(return_value={
'status': 'running',
'managed_components_count': 10,
'phases': {
'percent_complete': 75.0,
'percent_powering_on': 25.0,
'percent_powering_off': 0.0,
'percent_configuring': 50.0,
},
'percent_successful': 75.0,
'percent_failed': 0.0,
}))
]

with self.assertLogs(level='WARNING') as logs_cm:
result = self.waiter.has_completed()

self.assertFalse(result)
self.assertEqual(1, len(logs_cm.records))
self.assertIn('Failed to query status of BOS session', logs_cm.records[0].message)

# Ensuring it made the second call
self.mock_bos_client.get.assert_called_with(
self.mock_bos_client.session_path, self.session_id, 'status', raise_not_ok=False
)


class TestBOSSessionThread(unittest.TestCase):
"""Test the BOSSessionThread class."""
Expand Down

0 comments on commit a8846c9

Please sign in to comment.