Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release 2.17.7 for CSM 1.6 #309

Merged
merged 7 commits into from
May 16, 2024
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [2.17.7] - 2024-05-16
### Changed
- Added more checks to avoid operating on empty lists
- Compact response bodies to single line before logging them
- Improve BOS logging of unexpected errors

## [2.17.6] - 2024-04-19
### Fixed
- Corrected description of `disable_components_on_completion` in API spec.
Expand Down
6 changes: 4 additions & 2 deletions src/bos/common/tenant_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import logging
import hashlib
from requests.exceptions import HTTPError
from bos.common.utils import requests_retry_session, PROTOCOL
from bos.common.utils import exc_type_msg, requests_retry_session, PROTOCOL

LOGGER = logging.getLogger('bos.common.tenant_utils')

Expand Down Expand Up @@ -78,7 +78,7 @@ def get_tenant_data(tenant, session=None):
try:
response.raise_for_status()
except HTTPError as e:
LOGGER.error("Failed getting tenant data from tapms: %s", e)
LOGGER.error("Failed getting tenant data from tapms: %s", exc_type_msg(e))
if response.status_code == 404:
raise InvalidTenantException(f"Data not found for tenant {tenant}") from e
else:
Expand Down Expand Up @@ -110,6 +110,7 @@ def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except InvalidTenantException as e:
LOGGER.debug("Invalid tenant: %s", exc_type_msg(e))
return connexion.problem(
status=400, title='Invalid tenant',
detail=str(e))
Expand All @@ -122,6 +123,7 @@ def reject_invalid_tenant(func):
def wrapper(*args, **kwargs):
tenant = get_tenant_from_header()
if tenant and not validate_tenant_exists(tenant):
LOGGER.debug("The provided tenant does not exist")
return connexion.problem(
status=400, title="Invalid tenant",
detail=str("The provided tenant does not exist"))
Expand Down
22 changes: 21 additions & 1 deletion src/bos/common/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# MIT License
#
# (C) Copyright 2023 Hewlett Packard Enterprise Development LP
# (C) Copyright 2022-2024 Hewlett Packard Enterprise Development LP
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
Expand All @@ -23,6 +23,7 @@
#
import datetime
import re
import traceback
from dateutil.parser import parse
import requests
from requests.adapters import HTTPAdapter
Expand Down Expand Up @@ -59,6 +60,7 @@ def duration_to_timedelta(timestamp: str):
seconds = timeval * seconds_table[durationval]
return datetime.timedelta(seconds=seconds)


class TimeoutHTTPAdapter(HTTPAdapter):
"""
An HTTP Adapter that allows a session level timeout for both read and connect attributes. This prevents interruption
Expand Down Expand Up @@ -95,3 +97,21 @@ def requests_retry_session(retries=10, backoff_factor=0.5,
# Mounting to only http will not work!
session.mount("%s://" % protocol, adapter)
return session


def compact_response_text(response_text: str) -> str:
"""
Often JSON is "pretty printed" in response text, which is undesirable for our logging.
This function transforms the response text into a single line, stripping leading and trailing whitespace from each line,
and then returns it.
"""
if response_text:
return ' '.join([ line.strip() for line in response_text.split('\n') ])
return str(response_text)


def exc_type_msg(exc: Exception) -> str:
"""
Given an exception, returns a string of its type and its text (e.g. TypeError: 'int' object is not subscriptable)
"""
return ''.join(traceback.format_exception_only(type(exc), exc))
19 changes: 18 additions & 1 deletion src/bos/operators/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import time
from typing import List, NoReturn, Type

from bos.common.utils import exc_type_msg
from bos.common.values import Status
from bos.operators.filters.base import BaseFilter
from bos.operators.utils.clients.bos.options import options
Expand Down Expand Up @@ -140,6 +141,10 @@ def _get_components(self) -> List[dict]:

def _handle_failed_components(self, components: List[dict]) -> List[dict]:
""" Marks components failed if the retry limits are exceeded """
if not components:
# If we have been passed an empty list, there is nothing to do.
LOGGER.debug("_handle_failed_components: No components to handle")
return []
failed_components = []
good_components = [] # Any component that isn't determined to be in a failed state
for component in components:
Expand All @@ -163,6 +168,10 @@ def _update_database(self, components: List[dict], additional_fields: dict=None)
Updates the BOS database for all components acted on by the operator
Includes updating the last action, attempt count and error
"""
if not components:
# If we have been passed an empty list, there is nothing to do.
LOGGER.debug("_update_database: No components require database updates")
return
data = []
for component in components:
patch = {
Expand Down Expand Up @@ -200,6 +209,10 @@ def _preset_last_action(self, components: List[dict]) -> None:
# e.g. nodes could be powered-on without the correct power-on last action, causing status problems
if not self.name:
return
if not components:
# If we have been passed an empty list, there is nothing to do.
LOGGER.debug("_preset_last_action: No components require database updates")
return
data = []
for component in components:
patch = {
Expand All @@ -221,6 +234,10 @@ def _update_database_for_failure(self, components: List[dict]) -> None:
"""
Updates the BOS database for all components the operator believes have failed
"""
if not components:
# If we have been passed an empty list, there is nothing to do.
LOGGER.debug("_update_database_for_failure: No components require database updates")
return
data = []
for component in components:
patch = {
Expand Down Expand Up @@ -250,7 +267,7 @@ def _update_log_level() -> None:
LOGGER.log(new_level, 'Logging level changed from {} to {}'.format(
logging.getLevelName(current_level), logging.getLevelName(new_level)))
except Exception as e:
LOGGER.error('Error updating logging level: {}'.format(e))
LOGGER.error('Error updating logging level: %s', exc_type_msg(e))


def _liveliness_heartbeat() -> NoReturn:
Expand Down
5 changes: 3 additions & 2 deletions src/bos/operators/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#
# MIT License
#
# (C) Copyright 2022 Hewlett Packard Enterprise Development LP
# (C) Copyright 2022, 2024 Hewlett Packard Enterprise Development LP
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
Expand Down Expand Up @@ -57,7 +57,8 @@ def filters(self):
]

def _act(self, components):
set_cfs(components, enabled=True)
if components:
set_cfs(components, enabled=True)
return components


Expand Down
7 changes: 4 additions & 3 deletions src/bos/operators/power_off_forceful.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#
# MIT License
#
# (C) Copyright 2021-2023 Hewlett Packard Enterprise Development LP
# (C) Copyright 2021-2024 Hewlett Packard Enterprise Development LP
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
Expand Down Expand Up @@ -57,8 +57,9 @@ def filters(self):
]

def _act(self, components):
component_ids = [component['id'] for component in components]
pcs.force_off(nodes=component_ids)
if components:
component_ids = [component['id'] for component in components]
pcs.force_off(nodes=component_ids)
return components


Expand Down
7 changes: 4 additions & 3 deletions src/bos/operators/power_off_graceful.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#
# MIT License
#
# (C) Copyright 2021-2023 Hewlett Packard Enterprise Development LP
# (C) Copyright 2021-2024 Hewlett Packard Enterprise Development LP
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
Expand Down Expand Up @@ -53,8 +53,9 @@ def filters(self):
]

def _act(self, components):
component_ids = [component['id'] for component in components]
pcs.soft_off(component_ids)
if components:
component_ids = [component['id'] for component in components]
pcs.soft_off(component_ids)
return components


Expand Down
19 changes: 14 additions & 5 deletions src/bos/operators/power_on.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import logging
from requests import HTTPError

from bos.common.utils import exc_type_msg
from bos.common.values import Action, Status
import bos.operators.utils.clients.bss as bss
import bos.operators.utils.clients.pcs as pcs
Expand Down Expand Up @@ -59,6 +60,8 @@ def filters(self):
]

def _act(self, components):
if not components:
return components
self._preset_last_action(components)
try:
self._set_bss(components)
Expand All @@ -85,6 +88,10 @@ def _set_bss(self, components, retries=5):
Because the connection to the BSS tokens database can be lost due to
infrequent use, retry up to retries number of times.
"""
if not components:
# If we have been passed an empty list, there is nothing to do.
LOGGER.debug("_set_bss: No components to act on")
return
parameters = defaultdict(set)
sessions = {}
for component in components:
Expand All @@ -106,9 +113,9 @@ def _set_bss(self, components, retries=5):
resp = bss.set_bss(node_set=nodes, kernel_params=kernel_parameters,
kernel=kernel, initrd=initrd)
resp.raise_for_status()
except HTTPError:
LOGGER.error(f"Failed to set BSS for boot artifacts: {key} for"
"nodes: {nodes}. Error: {err}")
except HTTPError as err:
LOGGER.error("Failed to set BSS for boot artifacts: %s for nodes: %s. Error: %s",
key, nodes, exc_type_msg(err))
else:
token = resp.headers['bss-referral-token']
attempts = 0
Expand All @@ -118,7 +125,8 @@ def _set_bss(self, components, retries=5):
break
except Exception as err:
attempts += 1
LOGGER.error(f"An error occurred attempting to record the BSS token: {err}")
LOGGER.error("An error occurred attempting to record the BSS token: %s",
exc_type_msg(err))
if attempts > retries:
raise
LOGGER.info("Retrying to record the BSS token.")
Expand All @@ -128,6 +136,8 @@ def _set_bss(self, components, retries=5):
"desired_state": {"bss_token": token},
"session": sessions[node]})
LOGGER.info('Found %d components that require BSS token updates', len(bss_tokens))
if not bss_tokens:
return
redacted_component_updates = [
{ "id": comp["id"],
"session": comp["session"]
Expand All @@ -136,7 +146,6 @@ def _set_bss(self, components, retries=5):
LOGGER.debug('Updated components (minus desired_state data): {}'.format(redacted_component_updates))
self.bos_client.components.update_components(bss_tokens)


if __name__ == '__main__':
main(PowerOnOperator)

Expand Down
55 changes: 46 additions & 9 deletions src/bos/operators/session_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from bos.operators.utils.clients.bos.options import options
from bos.operators.utils.rootfs.factory import ProviderFactory
from bos.operators.session_completion import SessionCompletionOperator
from bos.common.utils import exc_type_msg
from bos.common.values import Action, EMPTY_ACTUAL_STATE, EMPTY_DESIRED_STATE, EMPTY_STAGED_STATE
from bos.common.tenant_utils import get_tenant_component_set, InvalidTenantException

Expand Down Expand Up @@ -135,7 +136,7 @@ def _setup_components(self):
if not all_component_ids:
raise SessionSetupException("No nodes were found to act upon.")
except Exception as err:
raise SessionSetupException(err)
raise SessionSetupException(err) from err
else:
self._log(LOGGER.info, 'Found %d components that require updates', len(data))
self._log(LOGGER.debug, f'Updated components: {data}')
Expand Down Expand Up @@ -166,20 +167,26 @@ def _get_boot_set_component_list(self, boot_set) -> Set[str]:
self._log(LOGGER.warning, f"No hardware matching role {role_name}")
continue
nodes |= self.inventory.roles[role_name]
if not nodes:
self._log(LOGGER.warning, "After populating node list, before any filtering, no nodes to act upon.")
return nodes
self._log(LOGGER.debug, "Before any limiting or filtering, %d nodes to act upon.", len(nodes))
# Filter out any nodes that do not match the boot set architecture desired; boot sets that do not have a
# specified arch are considered 'X86' nodes.
arch = boot_set.get('arch', 'X86')
nodes = self._apply_arch(nodes, arch)
if not nodes:
return nodes
# Filter to nodes defined by limit
nodes = self._apply_limit(nodes)
if not nodes:
return nodes
# Exclude disabled nodes
include_disabled = self.session_data.get("include_disabled", False)
if not include_disabled:
hsmfilter = HSMState(enabled=True)
nodes = set(hsmfilter._filter(list(nodes)))
nodes = self._apply_tenant_limit(nodes)
nodes = self._apply_include_disabled(nodes)
if not nodes:
self._log(LOGGER.warning, "No nodes were found to act upon.")
return nodes
# If this session is for a tenant, filter out nodes not belonging to this tenant
nodes = self._apply_tenant_limit(nodes)
return nodes

def _apply_arch(self, nodes, arch):
Expand All @@ -204,7 +211,29 @@ def _apply_arch(self, nodes, arch):
if arch == 'X86':
valid_archs.add('UNKNOWN')
hsm_filter = HSMState()
return set(hsm_filter.filter_by_arch(nodes, valid_archs))
nodes = set(hsm_filter.filter_by_arch(nodes, valid_archs))
if not nodes:
self._log(LOGGER.warning, "After filtering for architecture, no nodes remain to act upon.")
else:
self._log(LOGGER.debug, "After filtering for architecture, %d nodes remain to act upon.", len(nodes))
return nodes

def _apply_include_disabled(self, nodes):
"""
If include_disabled is False for this session, filter out any nodes which are disabled in HSM.
If include_disabled is True, return the node list unchanged.
"""
include_disabled = self.session_data.get("include_disabled", False)
if include_disabled:
# Nodes disabled in HSM may be included, so no filtering is required
return nodes
hsmfilter = HSMState(enabled=True)
nodes = set(hsmfilter._filter(list(nodes)))
if not nodes:
self._log(LOGGER.warning, "After removing disabled nodes, no nodes remain to act upon.")
else:
self._log(LOGGER.debug, "After removing disabled nodes, %d nodes remain to act upon.", len(nodes))
return nodes

def _apply_limit(self, nodes):
session_limit = self.session_data.get('limit')
Expand All @@ -230,6 +259,10 @@ def _apply_limit(self, nodes):
limit_nodes = self.inventory[limit]
limit_node_set = op(limit_nodes)
nodes = nodes.intersection(limit_node_set)
if not nodes:
self._log(LOGGER.warning, "After applying limit, no nodes remain to act upon.")
else:
self._log(LOGGER.debug, "After applying limit, %d nodes remain to act upon.", len(nodes))
return nodes

def _apply_tenant_limit(self, nodes):
Expand All @@ -241,6 +274,10 @@ def _apply_tenant_limit(self, nodes):
except InvalidTenantException as e:
raise SessionSetupException(str(e)) from e
nodes = nodes.intersection(tenant_limit)
if not nodes:
self._log(LOGGER.warning, "After applying tenant limit, no nodes remain to act upon.")
else:
self._log(LOGGER.debug, "After applying tenant limit, %d nodes remain to act upon.", len(nodes))
return nodes

def _mark_running(self, component_ids):
Expand Down Expand Up @@ -377,7 +414,7 @@ def assemble_kernel_boot_parameters(self, boot_set, artifact_info):
except (ClientError, UnicodeDecodeError, S3ObjectNotFound) as error:
self._log(LOGGER.error, "Unable to read file {}. Thus, no kernel boot parameters obtained "
"from image".format(artifact_info['boot_parameters']))
LOGGER.error(error)
LOGGER.error(exc_type_msg(error))
raise

# Parameters from the BOS Session template if the parameters exist.
Expand Down
Loading
Loading