Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CRAYSAT-1706: To stop waiting if BOS session is deleted #255

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [3.30.2] - 2024-08-14

### Fixed
- Log an error message and stop waiting on BOS sessions that have been deleted
in the `bos-operations` stage of `sat bootsys`.

## [3.30.1] - 2024-08-13

### Fixed
Expand Down
25 changes: 1 addition & 24 deletions sat/apiclient/bos.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# MIT License
#
# (C) Copyright 2019-2023 Hewlett Packard Enterprise Development LP
# (C) Copyright 2019-2024 Hewlett Packard Enterprise Development LP
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
Expand Down Expand Up @@ -145,29 +145,6 @@ def get_base_boot_set_data():
'rootfs_provider_passthrough': 'dvs:api-gw-service-nmn.local:300:nmn0'
}

def get_session_status(self, session_id):
"""Get the status of a BOS session.

Args:
session_id (str): the ID of the session

Returns:
dict: the session status information from BOS

Raises:
APIError: if there is a problem retrieving session status BOS, or if
the returned JSON is invalid.
"""
try:
return self.get(self.session_path, session_id, 'status').json()
except APIError as err:
raise APIError(f'Failed to get BOS session status for session {session_id}: '
f'{err}')
except ValueError as err:
raise APIError(f'Failed to parse JSON in response from BOS when '
f'getting session status for session {session_id}: '
f'{err}')

def get_session(self, session_id):
"""Get information about a given session.

Expand Down
24 changes: 18 additions & 6 deletions sat/cli/bootsys/bos.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
from sat.config import get_config_value
from sat.session import SATSession
from sat.util import pester, prompt_continue
from sat.waiting import Waiter
from sat.waiting import Waiter, WaitingFailure
from sat.xname import XName

LOGGER = logging.getLogger(__name__)
Expand Down Expand Up @@ -246,9 +246,23 @@ def has_completed(self):
self.bos_session_thread.session_template
)

self.session_status = self.bos_client.get_session_status(
self.bos_session_thread.session_id
)
response = self.bos_client.get(self.bos_client.session_path, self.bos_session_thread.session_id, 'status',
raise_not_ok=False)

if not response.ok:
if response.status_code == 404:
raise WaitingFailure(
f'Failed to query session status: Session {self.bos_session_thread.session_id} does not exist.')
else:
LOGGER.warning(
f'Failed to query status of BOS session {self.bos_session_thread.session_id}: '
f'{response.status_code} {response.reason}.')
return False

try:
self.session_status = response.json()
except ValueError as err:
LOGGER.warning(f'Failed to parse session status JSON: {err}')

if (self.session_status['percent_successful'] != self.pct_successful
or self.session_status['percent_failed'] != self.pct_failed):
Expand Down Expand Up @@ -276,8 +290,6 @@ def has_completed(self):

return False

except APIError as err:
LOGGER.warning('Failed to query session status: %s', err)
except KeyError as err:
LOGGER.warning('BOS session status query response missing key %s', err)

Expand Down
65 changes: 55 additions & 10 deletions tests/cli/bootsys/test_bos.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
do_bos_reboots, do_bos_shutdowns,
get_session_templates)
from tests.common import ExtendedTestCase
from sat.waiting import WaitingFailure


class TestBOAJobSuccessful(ExtendedTestCase):
Expand Down Expand Up @@ -233,7 +234,7 @@ def setUp(self):
self.session_id = 'abcdef-012345-678901-fdecba'

self.mock_bos_client = MagicMock()
self.mock_bos_client.get_session_status.return_value = {
self.mock_bos_client.get.return_value = MagicMock(ok=True, status_code=200, json=MagicMock(return_value={
'status': 'complete',
'managed_components_count': 10,
'phases': {
Expand All @@ -251,7 +252,8 @@ def setUp(self):
'end_time': '2022-01-01T00:00:00',
'start_time': '2022-01-01T00:01:00',
},
}
}))

patch('sat.cli.bootsys.bos.BOSClientCommon.get_bos_client', return_value=self.mock_bos_client).start()

self.mock_session_thread = MagicMock(session_id=self.session_id)
Expand All @@ -268,7 +270,7 @@ def test_session_is_completed(self):

def test_session_not_completed(self):
"""Test that the waiter detects a session that has not completed yet"""
self.mock_bos_client.get_session_status.return_value.update({
self.mock_bos_client.get.return_value.json.return_value.update({
'status': 'running',
'phases': {
'percent_complete': 50.0,
Expand All @@ -282,7 +284,7 @@ def test_session_not_completed(self):

def test_session_failed(self):
"""Test that the waiter detects when a session has failed"""
self.mock_bos_client.get_session_status.return_value.update({
self.mock_bos_client.get.return_value.json.return_value.update({
'status': 'running',
'phases': {
'percent_complete': 50.0,
Expand All @@ -297,7 +299,7 @@ def test_session_failed(self):

def test_session_superseded(self):
"""Test when the waiter's session has its managed components taken by another session"""
self.mock_bos_client.get_session_status.return_value.update({
self.mock_bos_client.get.return_value.json.return_value.update({
'status': 'complete',
'managed_components_count': 0,
'phases': {
Expand All @@ -314,7 +316,7 @@ def test_session_superseded(self):

def test_staged_session_not_complete(self):
"""Test that a staged session still in pending state is not complete"""
self.mock_bos_client.get_session_status.return_value.update({
self.mock_bos_client.get.return_value.json.return_value.update({
'status': 'pending',
'managed_components_count': 0,
'phases': {
Expand All @@ -333,7 +335,7 @@ def test_staged_session_not_complete(self):

def test_staged_session_running_complete(self):
"""Test that a staged session that has reached running state is complete."""
self.mock_bos_client.get_session_status.return_value.update({
self.mock_bos_client.get.return_value.json.return_value.update({
'status': 'running',
'managed_components_count': 10,
'phases': {
Expand All @@ -352,7 +354,7 @@ def test_staged_session_running_complete(self):

def test_staged_session_empty_complete(self):
"""Test that a staged session with empty components in complete state is complete."""
self.mock_bos_client.get_session_status.return_value.update({
self.mock_bos_client.get.return_value.json.return_value.update({
'status': 'complete',
'managed_components_count': 0,
'phases': {
Expand All @@ -372,7 +374,7 @@ def test_staged_session_empty_complete(self):

def test_percent_complete_above_99(self):
"""Test that the waiter correctly handles percent_complete above 99 but below 100"""
self.mock_bos_client.get_session_status.return_value.update({
self.mock_bos_client.get.return_value.json.return_value.update({
'status': 'running',
'phases': {
'percent_complete': 99.6898,
Expand All @@ -394,7 +396,7 @@ def test_percent_complete_above_99(self):

def test_percent_complete_below_99(self):
"""Test that the waiter correctly handles percent_complete below 99"""
self.mock_bos_client.get_session_status.return_value.update({
self.mock_bos_client.get.return_value.json.return_value.update({
'status': 'running',
'phases': {
'percent_complete': 78.5689,
Expand All @@ -414,6 +416,49 @@ def test_percent_complete_below_99(self):
f'0.00% components failed'
)

def test_session_status_404_error(self):
"""Test that the waiter raises WaitingFailure on a 404 error."""
self.mock_bos_client.get.return_value.ok = False
self.mock_bos_client.get.return_value.status_code = 404

with self.assertRaises(WaitingFailure) as context:
self.waiter.has_completed()

self.assertEqual(
str(context.exception),
f'Failed to query session status: Session {self.session_id} does not exist.'
)

def test_session_status_503_error(self):
"""Test that the waiter logs a warning and continues on a 503 error."""
self.mock_bos_client.get.side_effect = [
MagicMock(ok=False, status_code=503, json=MagicMock(return_value={})),
MagicMock(ok=True, status_code=200, json=MagicMock(return_value={
'status': 'running',
'managed_components_count': 10,
'phases': {
'percent_complete': 75.0,
'percent_powering_on': 25.0,
'percent_powering_off': 0.0,
'percent_configuring': 50.0,
},
'percent_successful': 75.0,
'percent_failed': 0.0,
}))
]

with self.assertLogs(level='WARNING') as logs_cm:
result = self.waiter.has_completed()

self.assertFalse(result)
self.assertEqual(1, len(logs_cm.records))
self.assertIn('Failed to query status of BOS session', logs_cm.records[0].message)

# Ensuring it made the second call
self.mock_bos_client.get.assert_called_with(
self.mock_bos_client.session_path, self.session_id, 'status', raise_not_ok=False
)


class TestBOSSessionThread(unittest.TestCase):
"""Test the BOSSessionThread class."""
Expand Down
Loading