diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d9c5c0e..7c531e8b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [2.10.29] - 2024-12-18 +### Changed +- Improve performance of large `GET` components requests. + ### Dependencies - Bumped `certifi` version to resolve CVE. - Bumped `grpcio` version to resolve CVE. diff --git a/src/bos/server/controllers/v2/components.py b/src/bos/server/controllers/v2/components.py index 7fab3cca..441a3888 100644 --- a/src/bos/server/controllers/v2/components.py +++ b/src/bos/server/controllers/v2/components.py @@ -21,9 +21,11 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. # -import connexion +from functools import partial import logging +import connexion + from bos.common.utils import exc_type_msg, get_current_timestamp from bos.common.tenant_utils import get_tenant_from_header, get_tenant_component_set, tenant_error_handler, get_tenant_aware_key from bos.common.values import Phase, Action, Status, EMPTY_STAGED_STATE, EMPTY_BOOT_ARTIFACTS @@ -49,7 +51,6 @@ def get_v2_components(ids="", enabled=None, session=None, staged_session=None, p LOGGER.debug("GET /v2/components invoked get_v2_components with ids=%s enabled=%s session=%s " "staged_session=%s phase=%s status=%s", ids, enabled, session, staged_session, phase, status) - id_list = [] if ids: try: id_list = ids.split(',') @@ -58,50 +59,93 @@ def get_v2_components(ids="", enabled=None, session=None, staged_session=None, p return connexion.problem( status=400, title="Error parsing the ids provided.", detail=str(err)) + else: + id_list = None tenant = get_tenant_from_header() - LOGGER.debug("GET /v2/components for tenant=%s with %d IDs specified", tenant, len(id_list)) + LOGGER.debug("GET /v2/components for tenant=%s with %d IDs specified", + tenant, len(id_list) if id_list else 0) response = get_v2_components_data(id_list=id_list, enabled=enabled, session=session, staged_session=staged_session, - phase=phase, status=status, tenant=tenant) + phase=phase, status=status, tenant=tenant, delete_timestamp=True) LOGGER.debug("GET /v2/components returning data for tenant=%s on %d components", tenant, len(response)) - for component in response: - del_timestamp(component) return response, 200 def get_v2_components_data(id_list=None, enabled=None, session=None, staged_session=None, - phase=None, status=None, tenant=None): + phase=None, status=None, tenant=None, delete_timestamp: bool = False): """Used by the GET /components API operation Allows filtering using a comma separated list of ids. """ - response = [] - if id_list: - for component_id in id_list: - data = DB.get(component_id) - if data: - response.append(data) + tenant_components = None if tenant is None else get_tenant_component_set(tenant) + + if id_list is not None: + id_set = set(id_list) + if tenant_components is not None: + id_set.intersection_update(tenant_components) else: - # TODO: On large scale systems, this response may be too large - # and require paging to be implemented - response = DB.get_all() - # The status must be set before using _matches_filter as the status is one of the matching criteria. - response = [_set_status(r) for r in response if r] - if enabled is not None or session is not None or staged_session is not None or phase is not None or status is not None: - response = [r for r in response if _matches_filter(r, enabled, session, staged_session, phase, status)] - if tenant: - tenant_components = get_tenant_component_set(tenant) - limited_response = [component for component in response if component["id"] in tenant_components] - response = limited_response - return response + id_set = tenant_components + + # If id_set is not None but is empty, that means no components in the system + # will match our filter, so we can return an empty list immediately. + if id_set is not None and not id_set: + return [] + + if any([id_set, enabled, session, staged_session, phase, status]): + _component_filter_func = partial(_filter_component, + id_set=id_set, + enabled=enabled, + session=session or None, + staged_session=staged_session or None, + phase=phase or None, + status=status or None, + delete_timestamp=delete_timestamp) + else: + _component_filter_func = partial(_set_status, + delete_timestamp=delete_timestamp) + + return DB.get_all_filtered(filter_func=_component_filter_func) + + +def _filter_component(data: dict, + id_set=None, + enabled=None, + session=None, + staged_session=None, + phase=None, + status=None, + delete_timestamp: bool = False) -> dict | None: + # Do all of the checks we can before calculating status, to avoid doing it needlessly + if id_set is not None and data["id"] not in id_set: + return None + if enabled is not None and data.get('enabled', None) != enabled: + return None + if session is not None and data.get('session', None) != session: + return None + if staged_session is not None and \ + data.get('staged_state', {}).get('session', None) != staged_session: + return None + updated_data = _set_status(data) + + status_data = updated_data.get('status') + if phase is not None and status_data.get('phase') != phase: + return None + if status is not None and status_data.get('status') not in status.split( + ','): + return None + if delete_timestamp: + del_timestamp(updated_data) + return updated_data -def _set_status(data): +def _set_status(data, delete_timestamp: bool = False): """ This sets the status field of the overall status. """ if "status" not in data: data["status"] = {"phase": "", "status_override": ""} data['status']['status'] = _calculate_status(data) + if delete_timestamp: + del_timestamp(data) return data @@ -117,11 +161,13 @@ def _calculate_status(data): return override phase = status_data.get('phase', '') - last_action = data.get('last_action', {}).get('action', '') + last_action_dict = data.get('last_action', {}) + last_action = last_action_dict.get('action', '') component = data.get('id', '') now = get_current_timestamp() if phase == Phase.powering_on: - if last_action == Action.power_on and not data.get('last_action', {}).get('failed', False): + if last_action == Action.power_on and not last_action_dict.get( + 'failed', False): LOGGER.debug(f"{now} Component: {component} Phase: {phase} Status: {Status.power_on_called}") return Status.power_on_called else: @@ -145,21 +191,6 @@ def _calculate_status(data): return Status.stable -def _matches_filter(data, enabled, session, staged_session, phase, status): - if enabled is not None and data.get('enabled', None) != enabled: - return False - if session is not None and data.get('session', None) != session: - return False - if staged_session is not None and data.get('staged_state', {}).get('session', None) != staged_session: - return False - status_data = data.get('status') - if phase is not None and status_data.get('phase') != phase: - return False - if status is not None and status_data.get('status') not in status.split(','): - return False - return True - - @dbutils.redis_error_handler def put_v2_components(): """Used by the PUT /components API operation""" diff --git a/src/bos/server/redis_db_utils.py b/src/bos/server/redis_db_utils.py index faf906bc..983c49c3 100644 --- a/src/bos/server/redis_db_utils.py +++ b/src/bos/server/redis_db_utils.py @@ -21,10 +21,13 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. # -import connexion +from itertools import batched import functools import json import logging +from typing import Callable + +import connexion import redis from bos.common.utils import exc_type_msg @@ -90,6 +93,27 @@ def get_all(self): data.append(single_data) return data + def get_all_filtered(self, + filter_func: Callable[[dict], dict | None]) -> list[dict]: + """ + Get an array of data for all keys after passing them through the specified filter + (discarding any for which the filter returns None) + """ + data = [] + for value in self.iter_values(): + filtered_value = filter_func(value) + if filtered_value is not None: + data.append(filtered_value) + return data + + def iter_values(self): + """ + Iterate through every item in the database. Parse each item as JSON and yield it. + """ + for next_keys in batched(self.client.scan_iter(), 500): + for datastr in self.client.mget(next_keys): + yield json.loads(datastr) if datastr else None + def get_keys(self): """Get an array of all keys""" data = []