Skip to content

Commit

Permalink
Merge pull request #309 from Cray-HPE/release-2.17.7
Browse files Browse the repository at this point in the history
Release 2.17.7 for CSM 1.6
  • Loading branch information
mharding-hpe authored May 16, 2024
2 parents 10a82ad + a74e488 commit 8ec8917
Show file tree
Hide file tree
Showing 29 changed files with 249 additions and 116 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [2.17.7] - 2024-05-16
### Changed
- Added more checks to avoid operating on empty lists
- Compact response bodies to single line before logging them
- Improve BOS logging of unexpected errors

## [2.17.6] - 2024-04-19
### Fixed
- Corrected description of `disable_components_on_completion` in API spec.
Expand Down
6 changes: 4 additions & 2 deletions src/bos/common/tenant_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import logging
import hashlib
from requests.exceptions import HTTPError
from bos.common.utils import requests_retry_session, PROTOCOL
from bos.common.utils import exc_type_msg, requests_retry_session, PROTOCOL

LOGGER = logging.getLogger('bos.common.tenant_utils')

Expand Down Expand Up @@ -78,7 +78,7 @@ def get_tenant_data(tenant, session=None):
try:
response.raise_for_status()
except HTTPError as e:
LOGGER.error("Failed getting tenant data from tapms: %s", e)
LOGGER.error("Failed getting tenant data from tapms: %s", exc_type_msg(e))
if response.status_code == 404:
raise InvalidTenantException(f"Data not found for tenant {tenant}") from e
else:
Expand Down Expand Up @@ -110,6 +110,7 @@ def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except InvalidTenantException as e:
LOGGER.debug("Invalid tenant: %s", exc_type_msg(e))
return connexion.problem(
status=400, title='Invalid tenant',
detail=str(e))
Expand All @@ -122,6 +123,7 @@ def reject_invalid_tenant(func):
def wrapper(*args, **kwargs):
tenant = get_tenant_from_header()
if tenant and not validate_tenant_exists(tenant):
LOGGER.debug("The provided tenant does not exist")
return connexion.problem(
status=400, title="Invalid tenant",
detail=str("The provided tenant does not exist"))
Expand Down
22 changes: 21 additions & 1 deletion src/bos/common/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# MIT License
#
# (C) Copyright 2023 Hewlett Packard Enterprise Development LP
# (C) Copyright 2022-2024 Hewlett Packard Enterprise Development LP
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
Expand All @@ -23,6 +23,7 @@
#
import datetime
import re
import traceback
from dateutil.parser import parse
import requests
from requests.adapters import HTTPAdapter
Expand Down Expand Up @@ -59,6 +60,7 @@ def duration_to_timedelta(timestamp: str):
seconds = timeval * seconds_table[durationval]
return datetime.timedelta(seconds=seconds)


class TimeoutHTTPAdapter(HTTPAdapter):
"""
An HTTP Adapter that allows a session level timeout for both read and connect attributes. This prevents interruption
Expand Down Expand Up @@ -95,3 +97,21 @@ def requests_retry_session(retries=10, backoff_factor=0.5,
# Mounting to only http will not work!
session.mount("%s://" % protocol, adapter)
return session


def compact_response_text(response_text: str) -> str:
"""
Often JSON is "pretty printed" in response text, which is undesirable for our logging.
This function transforms the response text into a single line, stripping leading and trailing whitespace from each line,
and then returns it.
"""
if response_text:
return ' '.join([ line.strip() for line in response_text.split('\n') ])
return str(response_text)


def exc_type_msg(exc: Exception) -> str:
"""
Given an exception, returns a string of its type and its text (e.g. TypeError: 'int' object is not subscriptable)
"""
return ''.join(traceback.format_exception_only(type(exc), exc))
19 changes: 18 additions & 1 deletion src/bos/operators/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import time
from typing import List, NoReturn, Type

from bos.common.utils import exc_type_msg
from bos.common.values import Status
from bos.operators.filters.base import BaseFilter
from bos.operators.utils.clients.bos.options import options
Expand Down Expand Up @@ -140,6 +141,10 @@ def _get_components(self) -> List[dict]:

def _handle_failed_components(self, components: List[dict]) -> List[dict]:
""" Marks components failed if the retry limits are exceeded """
if not components:
# If we have been passed an empty list, there is nothing to do.
LOGGER.debug("_handle_failed_components: No components to handle")
return []
failed_components = []
good_components = [] # Any component that isn't determined to be in a failed state
for component in components:
Expand All @@ -163,6 +168,10 @@ def _update_database(self, components: List[dict], additional_fields: dict=None)
Updates the BOS database for all components acted on by the operator
Includes updating the last action, attempt count and error
"""
if not components:
# If we have been passed an empty list, there is nothing to do.
LOGGER.debug("_update_database: No components require database updates")
return
data = []
for component in components:
patch = {
Expand Down Expand Up @@ -200,6 +209,10 @@ def _preset_last_action(self, components: List[dict]) -> None:
# e.g. nodes could be powered-on without the correct power-on last action, causing status problems
if not self.name:
return
if not components:
# If we have been passed an empty list, there is nothing to do.
LOGGER.debug("_preset_last_action: No components require database updates")
return
data = []
for component in components:
patch = {
Expand All @@ -221,6 +234,10 @@ def _update_database_for_failure(self, components: List[dict]) -> None:
"""
Updates the BOS database for all components the operator believes have failed
"""
if not components:
# If we have been passed an empty list, there is nothing to do.
LOGGER.debug("_update_database_for_failure: No components require database updates")
return
data = []
for component in components:
patch = {
Expand Down Expand Up @@ -250,7 +267,7 @@ def _update_log_level() -> None:
LOGGER.log(new_level, 'Logging level changed from {} to {}'.format(
logging.getLevelName(current_level), logging.getLevelName(new_level)))
except Exception as e:
LOGGER.error('Error updating logging level: {}'.format(e))
LOGGER.error('Error updating logging level: %s', exc_type_msg(e))


def _liveliness_heartbeat() -> NoReturn:
Expand Down
5 changes: 3 additions & 2 deletions src/bos/operators/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#
# MIT License
#
# (C) Copyright 2022 Hewlett Packard Enterprise Development LP
# (C) Copyright 2022, 2024 Hewlett Packard Enterprise Development LP
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
Expand Down Expand Up @@ -57,7 +57,8 @@ def filters(self):
]

def _act(self, components):
set_cfs(components, enabled=True)
if components:
set_cfs(components, enabled=True)
return components


Expand Down
7 changes: 4 additions & 3 deletions src/bos/operators/power_off_forceful.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#
# MIT License
#
# (C) Copyright 2021-2023 Hewlett Packard Enterprise Development LP
# (C) Copyright 2021-2024 Hewlett Packard Enterprise Development LP
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
Expand Down Expand Up @@ -57,8 +57,9 @@ def filters(self):
]

def _act(self, components):
component_ids = [component['id'] for component in components]
pcs.force_off(nodes=component_ids)
if components:
component_ids = [component['id'] for component in components]
pcs.force_off(nodes=component_ids)
return components


Expand Down
7 changes: 4 additions & 3 deletions src/bos/operators/power_off_graceful.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#
# MIT License
#
# (C) Copyright 2021-2023 Hewlett Packard Enterprise Development LP
# (C) Copyright 2021-2024 Hewlett Packard Enterprise Development LP
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
Expand Down Expand Up @@ -53,8 +53,9 @@ def filters(self):
]

def _act(self, components):
component_ids = [component['id'] for component in components]
pcs.soft_off(component_ids)
if components:
component_ids = [component['id'] for component in components]
pcs.soft_off(component_ids)
return components


Expand Down
19 changes: 14 additions & 5 deletions src/bos/operators/power_on.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import logging
from requests import HTTPError

from bos.common.utils import exc_type_msg
from bos.common.values import Action, Status
import bos.operators.utils.clients.bss as bss
import bos.operators.utils.clients.pcs as pcs
Expand Down Expand Up @@ -59,6 +60,8 @@ def filters(self):
]

def _act(self, components):
if not components:
return components
self._preset_last_action(components)
try:
self._set_bss(components)
Expand All @@ -85,6 +88,10 @@ def _set_bss(self, components, retries=5):
Because the connection to the BSS tokens database can be lost due to
infrequent use, retry up to retries number of times.
"""
if not components:
# If we have been passed an empty list, there is nothing to do.
LOGGER.debug("_set_bss: No components to act on")
return
parameters = defaultdict(set)
sessions = {}
for component in components:
Expand All @@ -106,9 +113,9 @@ def _set_bss(self, components, retries=5):
resp = bss.set_bss(node_set=nodes, kernel_params=kernel_parameters,
kernel=kernel, initrd=initrd)
resp.raise_for_status()
except HTTPError:
LOGGER.error(f"Failed to set BSS for boot artifacts: {key} for"
"nodes: {nodes}. Error: {err}")
except HTTPError as err:
LOGGER.error("Failed to set BSS for boot artifacts: %s for nodes: %s. Error: %s",
key, nodes, exc_type_msg(err))
else:
token = resp.headers['bss-referral-token']
attempts = 0
Expand All @@ -118,7 +125,8 @@ def _set_bss(self, components, retries=5):
break
except Exception as err:
attempts += 1
LOGGER.error(f"An error occurred attempting to record the BSS token: {err}")
LOGGER.error("An error occurred attempting to record the BSS token: %s",
exc_type_msg(err))
if attempts > retries:
raise
LOGGER.info("Retrying to record the BSS token.")
Expand All @@ -128,6 +136,8 @@ def _set_bss(self, components, retries=5):
"desired_state": {"bss_token": token},
"session": sessions[node]})
LOGGER.info('Found %d components that require BSS token updates', len(bss_tokens))
if not bss_tokens:
return
redacted_component_updates = [
{ "id": comp["id"],
"session": comp["session"]
Expand All @@ -136,7 +146,6 @@ def _set_bss(self, components, retries=5):
LOGGER.debug('Updated components (minus desired_state data): {}'.format(redacted_component_updates))
self.bos_client.components.update_components(bss_tokens)


if __name__ == '__main__':
main(PowerOnOperator)

Expand Down
55 changes: 46 additions & 9 deletions src/bos/operators/session_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from bos.operators.utils.clients.bos.options import options
from bos.operators.utils.rootfs.factory import ProviderFactory
from bos.operators.session_completion import SessionCompletionOperator
from bos.common.utils import exc_type_msg
from bos.common.values import Action, EMPTY_ACTUAL_STATE, EMPTY_DESIRED_STATE, EMPTY_STAGED_STATE
from bos.common.tenant_utils import get_tenant_component_set, InvalidTenantException

Expand Down Expand Up @@ -135,7 +136,7 @@ def _setup_components(self):
if not all_component_ids:
raise SessionSetupException("No nodes were found to act upon.")
except Exception as err:
raise SessionSetupException(err)
raise SessionSetupException(err) from err
else:
self._log(LOGGER.info, 'Found %d components that require updates', len(data))
self._log(LOGGER.debug, f'Updated components: {data}')
Expand Down Expand Up @@ -166,20 +167,26 @@ def _get_boot_set_component_list(self, boot_set) -> Set[str]:
self._log(LOGGER.warning, f"No hardware matching role {role_name}")
continue
nodes |= self.inventory.roles[role_name]
if not nodes:
self._log(LOGGER.warning, "After populating node list, before any filtering, no nodes to act upon.")
return nodes
self._log(LOGGER.debug, "Before any limiting or filtering, %d nodes to act upon.", len(nodes))
# Filter out any nodes that do not match the boot set architecture desired; boot sets that do not have a
# specified arch are considered 'X86' nodes.
arch = boot_set.get('arch', 'X86')
nodes = self._apply_arch(nodes, arch)
if not nodes:
return nodes
# Filter to nodes defined by limit
nodes = self._apply_limit(nodes)
if not nodes:
return nodes
# Exclude disabled nodes
include_disabled = self.session_data.get("include_disabled", False)
if not include_disabled:
hsmfilter = HSMState(enabled=True)
nodes = set(hsmfilter._filter(list(nodes)))
nodes = self._apply_tenant_limit(nodes)
nodes = self._apply_include_disabled(nodes)
if not nodes:
self._log(LOGGER.warning, "No nodes were found to act upon.")
return nodes
# If this session is for a tenant, filter out nodes not belonging to this tenant
nodes = self._apply_tenant_limit(nodes)
return nodes

def _apply_arch(self, nodes, arch):
Expand All @@ -204,7 +211,29 @@ def _apply_arch(self, nodes, arch):
if arch == 'X86':
valid_archs.add('UNKNOWN')
hsm_filter = HSMState()
return set(hsm_filter.filter_by_arch(nodes, valid_archs))
nodes = set(hsm_filter.filter_by_arch(nodes, valid_archs))
if not nodes:
self._log(LOGGER.warning, "After filtering for architecture, no nodes remain to act upon.")
else:
self._log(LOGGER.debug, "After filtering for architecture, %d nodes remain to act upon.", len(nodes))
return nodes

def _apply_include_disabled(self, nodes):
"""
If include_disabled is False for this session, filter out any nodes which are disabled in HSM.
If include_disabled is True, return the node list unchanged.
"""
include_disabled = self.session_data.get("include_disabled", False)
if include_disabled:
# Nodes disabled in HSM may be included, so no filtering is required
return nodes
hsmfilter = HSMState(enabled=True)
nodes = set(hsmfilter._filter(list(nodes)))
if not nodes:
self._log(LOGGER.warning, "After removing disabled nodes, no nodes remain to act upon.")
else:
self._log(LOGGER.debug, "After removing disabled nodes, %d nodes remain to act upon.", len(nodes))
return nodes

def _apply_limit(self, nodes):
session_limit = self.session_data.get('limit')
Expand All @@ -230,6 +259,10 @@ def _apply_limit(self, nodes):
limit_nodes = self.inventory[limit]
limit_node_set = op(limit_nodes)
nodes = nodes.intersection(limit_node_set)
if not nodes:
self._log(LOGGER.warning, "After applying limit, no nodes remain to act upon.")
else:
self._log(LOGGER.debug, "After applying limit, %d nodes remain to act upon.", len(nodes))
return nodes

def _apply_tenant_limit(self, nodes):
Expand All @@ -241,6 +274,10 @@ def _apply_tenant_limit(self, nodes):
except InvalidTenantException as e:
raise SessionSetupException(str(e)) from e
nodes = nodes.intersection(tenant_limit)
if not nodes:
self._log(LOGGER.warning, "After applying tenant limit, no nodes remain to act upon.")
else:
self._log(LOGGER.debug, "After applying tenant limit, %d nodes remain to act upon.", len(nodes))
return nodes

def _mark_running(self, component_ids):
Expand Down Expand Up @@ -377,7 +414,7 @@ def assemble_kernel_boot_parameters(self, boot_set, artifact_info):
except (ClientError, UnicodeDecodeError, S3ObjectNotFound) as error:
self._log(LOGGER.error, "Unable to read file {}. Thus, no kernel boot parameters obtained "
"from image".format(artifact_info['boot_parameters']))
LOGGER.error(error)
LOGGER.error(exc_type_msg(error))
raise

# Parameters from the BOS Session template if the parameters exist.
Expand Down
Loading

0 comments on commit 8ec8917

Please sign in to comment.