Skip to content

Commit

Permalink
CASMCMS-9225: Improve performance of large GET components requests
Browse files Browse the repository at this point in the history
(cherry picked from commit cea748f)
  • Loading branch information
mharding-hpe committed Dec 18, 2024
1 parent fa06bf8 commit f432dff
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 58 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [2.30.7] - 2024-12-18
### Changed
- Improve performance of large `GET` components requests.

## [2.30.6] - 2024-12-18
### Fixed
- When renaming session templates during migration, use correct database key to store renamed template.
Expand Down
159 changes: 101 additions & 58 deletions src/bos/server/controllers/v2/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
from functools import partial
import logging

import connexion
Expand All @@ -41,74 +42,130 @@

@tenant_error_handler
@dbutils.redis_error_handler
def get_v2_components(ids="", enabled=None, session=None, staged_session=None, phase=None,
def get_v2_components(ids="",
enabled=None,
session=None,
staged_session=None,
phase=None,
status=None):
"""Used by the GET /components API operation
Allows filtering using a comma separated list of ids.
"""
LOGGER.debug("GET /v2/components invoked get_v2_components with ids=%s enabled=%s session=%s "
"staged_session=%s phase=%s status=%s", ids, enabled, session, staged_session,
phase, status)
id_list = []
LOGGER.debug(
"GET /v2/components invoked get_v2_components with ids=%s enabled=%s session=%s "
"staged_session=%s phase=%s status=%s", ids, enabled, session,
staged_session, phase, status)
if ids:
try:
id_list = ids.split(',')
except Exception as err:
LOGGER.error("Error parsing component IDs: %s", exc_type_msg(err))
return connexion.problem(
status=400, title="Error parsing the ids provided.",
detail=str(err))
return connexion.problem(status=400,
title="Error parsing the ids provided.",
detail=str(err))
else:
id_list = None
tenant = get_tenant_from_header()
LOGGER.debug("GET /v2/components for tenant=%s with %d IDs specified", tenant, len(id_list))
response = get_v2_components_data(id_list=id_list, enabled=enabled, session=session,
LOGGER.debug("GET /v2/components for tenant=%s with %d IDs specified",
tenant, len(id_list) if id_list else 0)
response = get_v2_components_data(id_list=id_list,
enabled=enabled,
session=session,
staged_session=staged_session,
phase=phase, status=status, tenant=tenant)
LOGGER.debug("GET /v2/components returning data for tenant=%s on %d components", tenant,
len(response))
for component in response:
del_timestamp(component)
phase=phase,
status=status,
tenant=tenant,
delete_timestamp=True)
LOGGER.debug(
"GET /v2/components returning data for tenant=%s on %d components",
tenant, len(response))
return response, 200


def get_v2_components_data(id_list=None, enabled=None, session=None, staged_session=None,
phase=None, status=None, tenant=None):
def get_v2_components_data(id_list=None,
enabled=None,
session=None,
staged_session=None,
phase=None,
status=None,
tenant=None,
delete_timestamp: bool = False):
"""Used by the GET /components API operation
Allows filtering using a comma separated list of ids.
"""
response = []
if id_list:
for component_id in id_list:
data = DB.get(component_id)
if data:
response.append(data)
tenant_components = None if tenant is None else get_tenant_component_set(tenant)

if id_list is not None:
id_set = set(id_list)
if tenant_components is not None:
id_set.intersection_update(tenant_components)
else:
# TODO: On large scale systems, this response may be too large
# and require paging to be implemented
response = DB.get_all()
# The status must be set before using _matches_filter as the status is one of the
# matching criteria.
response = [_set_status(r) for r in response if r]
if enabled is not None or session is not None or staged_session is not None or \
phase is not None or status is not None:
response = [r for r in response if _matches_filter(r, enabled, session, staged_session,
phase, status)]
if tenant:
tenant_components = get_tenant_component_set(tenant)
limited_response = [component for component in response
if component["id"] in tenant_components]
response = limited_response
return response
id_set = tenant_components

# If id_set is not None but is empty, that means no components in the system
# will match our filter, so we can return an empty list immediately.
if id_set is not None and not id_set:
return []

if any([id_set, enabled, session, staged_session, phase, status]):
_component_filter_func = partial(_filter_component,
id_set=id_set,
enabled=enabled,
session=session or None,
staged_session=staged_session or None,
phase=phase or None,
status=status or None)
elif delete_timestamp:
_component_filter_func = partial(_set_status,
delete_timestamp=delete_timestamp)
else:
_component_filter_func = _set_status

return DB.get_all_filtered(filter_func=_component_filter_func)


def _filter_component(data: dict,
id_set=None,
enabled=None,
session=None,
staged_session=None,
phase=None,
status=None,
delete_timestamp: bool = False) -> dict | None:
# Do all of the checks we can before calculating status, to avoid doing it needlessly
if id_set is not None and data["id"] not in id_set:
return None
if enabled is not None and data.get('enabled', None) != enabled:
return None
if session is not None and data.get('session', None) != session:
return None
if staged_session is not None and \
data.get('staged_state', {}).get('session', None) != staged_session:
return None
updated_data = _set_status(data)

status_data = updated_data.get('status')
if phase is not None and status_data.get('phase') != phase:
return None
if status is not None and status_data.get('status') not in status.split(
','):
return None
if delete_timestamp:
del_timestamp(updated_data)
return updated_data


def _set_status(data):
def _set_status(data, delete_timestamp: bool = False):
"""
This sets the status field of the overall status.
"""
if "status" not in data:
data["status"] = {"phase": "", "status_override": ""}
data['status']['status'] = _calculate_status(data)
if delete_timestamp:
del_timestamp(data)
return data


Expand All @@ -127,11 +184,13 @@ def _calculate_status(data):

phase = status_data.get('phase', '')
component = data.get('id', '')
last_action = data.get('last_action', {}).get('action', '')
last_action_dict = data.get('last_action', {})
last_action = last_action_dict.get('action', '')

status = status = Status.stable
if phase == Phase.powering_on:
if last_action == Action.power_on and not data.get('last_action', {}).get('failed', False):
if last_action == Action.power_on and not last_action_dict.get(
'failed', False):
status = Status.power_on_called
else:
status = Status.power_on_pending
Expand All @@ -150,22 +209,6 @@ def _calculate_status(data):
return status


def _matches_filter(data, enabled, session, staged_session, phase, status):
if enabled is not None and data.get('enabled', None) != enabled:
return False
if session is not None and data.get('session', None) != session:
return False
if staged_session is not None and \
data.get('staged_state', {}).get('session', None) != staged_session:
return False
status_data = data.get('status')
if phase is not None and status_data.get('phase') != phase:
return False
if status is not None and status_data.get('status') not in status.split(','):
return False
return True


@dbutils.redis_error_handler
def put_v2_components():
"""Used by the PUT /components API operation"""
Expand Down
23 changes: 23 additions & 0 deletions src/bos/server/redis_db_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
from itertools import batched
import functools
import json
import logging
from typing import Callable

import connexion
import redis
Expand Down Expand Up @@ -104,6 +106,27 @@ def get_all(self):
data.append(single_data)
return data

def get_all_filtered(self,
filter_func: Callable[[dict], dict | None]) -> list[dict]:
"""
Get an array of data for all keys after passing them through the specified filter
(discarding any for which the filter returns None)
"""
data = []
for value in self.iter_values():
filtered_value = filter_func(value)
if filtered_value is not None:
data.append(filtered_value)
return data

def iter_values(self):
"""
Iterate through every item in the database. Parse each item as JSON and yield it.
"""
for next_keys in batched(self.client.scan_iter(), 500):
for datastr in self.client.mget(next_keys):
yield json.loads(datastr) if datastr else None

def get_all_as_dict(self):
"""Return a mapping from all keys to their corresponding data
Based on https://github.com/redis/redis-py/issues/984#issuecomment-391404875
Expand Down

0 comments on commit f432dff

Please sign in to comment.