Skip to content

Commit

Permalink
Merge pull request #412 from Cray-HPE/casmcms-9225-csm-1.5
Browse files Browse the repository at this point in the history
CASMCMS-9225: Improve performance of large GET components requests
  • Loading branch information
mharding-hpe authored Dec 18, 2024
2 parents b54ba69 + fd1dc01 commit d2312f8
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 44 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [2.10.29] - 2024-12-18
### Changed
- Improve performance of large `GET` components requests.

### Dependencies
- Bumped `certifi` version to resolve CVE.
- Bumped `grpcio` version to resolve CVE.
Expand Down
117 changes: 74 additions & 43 deletions src/bos/server/controllers/v2/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
import connexion
from functools import partial
import logging

import connexion

from bos.common.utils import exc_type_msg, get_current_timestamp
from bos.common.tenant_utils import get_tenant_from_header, get_tenant_component_set, tenant_error_handler, get_tenant_aware_key
from bos.common.values import Phase, Action, Status, EMPTY_STAGED_STATE, EMPTY_BOOT_ARTIFACTS
Expand All @@ -49,7 +51,6 @@ def get_v2_components(ids="", enabled=None, session=None, staged_session=None, p
LOGGER.debug("GET /v2/components invoked get_v2_components with ids=%s enabled=%s session=%s "
"staged_session=%s phase=%s status=%s", ids, enabled, session, staged_session,
phase, status)
id_list = []
if ids:
try:
id_list = ids.split(',')
Expand All @@ -58,50 +59,93 @@ def get_v2_components(ids="", enabled=None, session=None, staged_session=None, p
return connexion.problem(
status=400, title="Error parsing the ids provided.",
detail=str(err))
else:
id_list = None
tenant = get_tenant_from_header()
LOGGER.debug("GET /v2/components for tenant=%s with %d IDs specified", tenant, len(id_list))
LOGGER.debug("GET /v2/components for tenant=%s with %d IDs specified",
tenant, len(id_list) if id_list else 0)
response = get_v2_components_data(id_list=id_list, enabled=enabled, session=session, staged_session=staged_session,
phase=phase, status=status, tenant=tenant)
phase=phase, status=status, tenant=tenant, delete_timestamp=True)
LOGGER.debug("GET /v2/components returning data for tenant=%s on %d components", tenant, len(response))
for component in response:
del_timestamp(component)
return response, 200


def get_v2_components_data(id_list=None, enabled=None, session=None, staged_session=None,
phase=None, status=None, tenant=None):
phase=None, status=None, tenant=None, delete_timestamp: bool = False):
"""Used by the GET /components API operation
Allows filtering using a comma separated list of ids.
"""
response = []
if id_list:
for component_id in id_list:
data = DB.get(component_id)
if data:
response.append(data)
tenant_components = None if tenant is None else get_tenant_component_set(tenant)

if id_list is not None:
id_set = set(id_list)
if tenant_components is not None:
id_set.intersection_update(tenant_components)
else:
# TODO: On large scale systems, this response may be too large
# and require paging to be implemented
response = DB.get_all()
# The status must be set before using _matches_filter as the status is one of the matching criteria.
response = [_set_status(r) for r in response if r]
if enabled is not None or session is not None or staged_session is not None or phase is not None or status is not None:
response = [r for r in response if _matches_filter(r, enabled, session, staged_session, phase, status)]
if tenant:
tenant_components = get_tenant_component_set(tenant)
limited_response = [component for component in response if component["id"] in tenant_components]
response = limited_response
return response
id_set = tenant_components

# If id_set is not None but is empty, that means no components in the system
# will match our filter, so we can return an empty list immediately.
if id_set is not None and not id_set:
return []

if any([id_set, enabled, session, staged_session, phase, status]):
_component_filter_func = partial(_filter_component,
id_set=id_set,
enabled=enabled,
session=session or None,
staged_session=staged_session or None,
phase=phase or None,
status=status or None,
delete_timestamp=delete_timestamp)
else:
_component_filter_func = partial(_set_status,
delete_timestamp=delete_timestamp)

return DB.get_all_filtered(filter_func=_component_filter_func)


def _filter_component(data: dict,
id_set=None,
enabled=None,
session=None,
staged_session=None,
phase=None,
status=None,
delete_timestamp: bool = False) -> dict | None:
# Do all of the checks we can before calculating status, to avoid doing it needlessly
if id_set is not None and data["id"] not in id_set:
return None
if enabled is not None and data.get('enabled', None) != enabled:
return None
if session is not None and data.get('session', None) != session:
return None
if staged_session is not None and \
data.get('staged_state', {}).get('session', None) != staged_session:
return None
updated_data = _set_status(data)

status_data = updated_data.get('status')
if phase is not None and status_data.get('phase') != phase:
return None
if status is not None and status_data.get('status') not in status.split(
','):
return None
if delete_timestamp:
del_timestamp(updated_data)
return updated_data


def _set_status(data):
def _set_status(data, delete_timestamp: bool = False):
"""
This sets the status field of the overall status.
"""
if "status" not in data:
data["status"] = {"phase": "", "status_override": ""}
data['status']['status'] = _calculate_status(data)
if delete_timestamp:
del_timestamp(data)
return data


Expand All @@ -117,11 +161,13 @@ def _calculate_status(data):
return override

phase = status_data.get('phase', '')
last_action = data.get('last_action', {}).get('action', '')
last_action_dict = data.get('last_action', {})
last_action = last_action_dict.get('action', '')
component = data.get('id', '')
now = get_current_timestamp()
if phase == Phase.powering_on:
if last_action == Action.power_on and not data.get('last_action', {}).get('failed', False):
if last_action == Action.power_on and not last_action_dict.get(
'failed', False):
LOGGER.debug(f"{now} Component: {component} Phase: {phase} Status: {Status.power_on_called}")
return Status.power_on_called
else:
Expand All @@ -145,21 +191,6 @@ def _calculate_status(data):
return Status.stable


def _matches_filter(data, enabled, session, staged_session, phase, status):
if enabled is not None and data.get('enabled', None) != enabled:
return False
if session is not None and data.get('session', None) != session:
return False
if staged_session is not None and data.get('staged_state', {}).get('session', None) != staged_session:
return False
status_data = data.get('status')
if phase is not None and status_data.get('phase') != phase:
return False
if status is not None and status_data.get('status') not in status.split(','):
return False
return True


@dbutils.redis_error_handler
def put_v2_components():
"""Used by the PUT /components API operation"""
Expand Down
26 changes: 25 additions & 1 deletion src/bos/server/redis_db_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,13 @@
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
import connexion
from itertools import batched
import functools
import json
import logging
from typing import Callable

import connexion
import redis

from bos.common.utils import exc_type_msg
Expand Down Expand Up @@ -90,6 +93,27 @@ def get_all(self):
data.append(single_data)
return data

def get_all_filtered(self,
filter_func: Callable[[dict], dict | None]) -> list[dict]:
"""
Get an array of data for all keys after passing them through the specified filter
(discarding any for which the filter returns None)
"""
data = []
for value in self.iter_values():
filtered_value = filter_func(value)
if filtered_value is not None:
data.append(filtered_value)
return data

def iter_values(self):
"""
Iterate through every item in the database. Parse each item as JSON and yield it.
"""
for next_keys in batched(self.client.scan_iter(), 500):
for datastr in self.client.mget(next_keys):
yield json.loads(datastr) if datastr else None

def get_keys(self):
"""Get an array of all keys"""
data = []
Expand Down

0 comments on commit d2312f8

Please sign in to comment.