From d9eae3c8906e9524e4dd511a0e337b4b7024872f Mon Sep 17 00:00:00 2001 From: "Mitch Harding (the weird one)" Date: Wed, 18 Dec 2024 12:47:05 -0500 Subject: [PATCH 1/2] CASMCMS-9225: Improve performance of large GET components requests (cherry picked from commit cea748f733251e309785c8078b451d2fc8429bf9) (cherry picked from commit c9b8be0247b40bda60d74384f782fccb8ee315de) --- CHANGELOG.md | 4 + src/bos/server/controllers/v2/components.py | 121 ++++++++++++-------- src/bos/server/redis_db_utils.py | 23 ++++ 3 files changed, 101 insertions(+), 47 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 29fafbdc..0e3a96f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [2.30.7] - 2024-12-18 +### Changed +- Improve performance of large `GET` components requests. + ## [2.30.6] - 2024-12-18 ### Fixed - When renaming session templates during migration, use correct database key to store renamed template. diff --git a/src/bos/server/controllers/v2/components.py b/src/bos/server/controllers/v2/components.py index 0c03b406..a2b5f458 100644 --- a/src/bos/server/controllers/v2/components.py +++ b/src/bos/server/controllers/v2/components.py @@ -21,6 +21,7 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. # +from functools import partial import logging import connexion @@ -50,7 +51,6 @@ def get_v2_components(ids="", enabled=None, session=None, staged_session=None, p LOGGER.debug("GET /v2/components invoked get_v2_components with ids=%s enabled=%s session=%s " "staged_session=%s phase=%s status=%s", ids, enabled, session, staged_session, phase, status) - id_list = [] if ids: try: id_list = ids.split(',') @@ -59,56 +59,97 @@ def get_v2_components(ids="", enabled=None, session=None, staged_session=None, p return connexion.problem( status=400, title="Error parsing the ids provided.", detail=str(err)) + else: + id_list = None tenant = get_tenant_from_header() - LOGGER.debug("GET /v2/components for tenant=%s with %d IDs specified", tenant, len(id_list)) + LOGGER.debug("GET /v2/components for tenant=%s with %d IDs specified", + tenant, len(id_list) if id_list else 0) response = get_v2_components_data(id_list=id_list, enabled=enabled, session=session, staged_session=staged_session, - phase=phase, status=status, tenant=tenant) + phase=phase, status=status, tenant=tenant, + delete_timestamp=True) LOGGER.debug("GET /v2/components returning data for tenant=%s on %d components", tenant, len(response)) - for component in response: - del_timestamp(component) return response, 200 def get_v2_components_data(id_list=None, enabled=None, session=None, staged_session=None, - phase=None, status=None, tenant=None): + phase=None, status=None, tenant=None, + delete_timestamp: bool = False): """Used by the GET /components API operation Allows filtering using a comma separated list of ids. """ - response = [] - if id_list: - for component_id in id_list: - data = DB.get(component_id) - if data: - response.append(data) + tenant_components = None if tenant is None else get_tenant_component_set(tenant) + + if id_list is not None: + id_set = set(id_list) + if tenant_components is not None: + id_set.intersection_update(tenant_components) else: - # TODO: On large scale systems, this response may be too large - # and require paging to be implemented - response = DB.get_all() - # The status must be set before using _matches_filter as the status is one of the - # matching criteria. - response = [_set_status(r) for r in response if r] - if enabled is not None or session is not None or staged_session is not None or \ - phase is not None or status is not None: - response = [r for r in response if _matches_filter(r, enabled, session, staged_session, - phase, status)] - if tenant: - tenant_components = get_tenant_component_set(tenant) - limited_response = [component for component in response - if component["id"] in tenant_components] - response = limited_response - return response + id_set = tenant_components + + # If id_set is not None but is empty, that means no components in the system + # will match our filter, so we can return an empty list immediately. + if id_set is not None and not id_set: + return [] + + if any([id_set, enabled, session, staged_session, phase, status]): + _component_filter_func = partial(_filter_component, + id_set=id_set, + enabled=enabled, + session=session or None, + staged_session=staged_session or None, + phase=phase or None, + status=status or None, + delete_timestamp=delete_timestamp) + else: + _component_filter_func = partial(_set_status, + delete_timestamp=delete_timestamp) + + return DB.get_all_filtered(filter_func=_component_filter_func) + + +def _filter_component(data: dict, + id_set=None, + enabled=None, + session=None, + staged_session=None, + phase=None, + status=None, + delete_timestamp: bool = False) -> dict | None: + # Do all of the checks we can before calculating status, to avoid doing it needlessly + if id_set is not None and data["id"] not in id_set: + return None + if enabled is not None and data.get('enabled', None) != enabled: + return None + if session is not None and data.get('session', None) != session: + return None + if staged_session is not None and \ + data.get('staged_state', {}).get('session', None) != staged_session: + return None + updated_data = _set_status(data) + + status_data = updated_data.get('status') + if phase is not None and status_data.get('phase') != phase: + return None + if status is not None and status_data.get('status') not in status.split( + ','): + return None + if delete_timestamp: + del_timestamp(updated_data) + return updated_data -def _set_status(data): +def _set_status(data, delete_timestamp: bool = False): """ This sets the status field of the overall status. """ if "status" not in data: data["status"] = {"phase": "", "status_override": ""} data['status']['status'] = _calculate_status(data) + if delete_timestamp: + del_timestamp(data) return data @@ -127,11 +168,13 @@ def _calculate_status(data): phase = status_data.get('phase', '') component = data.get('id', '') - last_action = data.get('last_action', {}).get('action', '') + last_action_dict = data.get('last_action', {}) + last_action = last_action_dict.get('action', '') status = status = Status.stable if phase == Phase.powering_on: - if last_action == Action.power_on and not data.get('last_action', {}).get('failed', False): + if last_action == Action.power_on and not last_action_dict.get( + 'failed', False): status = Status.power_on_called else: status = Status.power_on_pending @@ -150,22 +193,6 @@ def _calculate_status(data): return status -def _matches_filter(data, enabled, session, staged_session, phase, status): - if enabled is not None and data.get('enabled', None) != enabled: - return False - if session is not None and data.get('session', None) != session: - return False - if staged_session is not None and \ - data.get('staged_state', {}).get('session', None) != staged_session: - return False - status_data = data.get('status') - if phase is not None and status_data.get('phase') != phase: - return False - if status is not None and status_data.get('status') not in status.split(','): - return False - return True - - @dbutils.redis_error_handler def put_v2_components(): """Used by the PUT /components API operation""" diff --git a/src/bos/server/redis_db_utils.py b/src/bos/server/redis_db_utils.py index 988970fb..3bd2db23 100644 --- a/src/bos/server/redis_db_utils.py +++ b/src/bos/server/redis_db_utils.py @@ -21,9 +21,11 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. # +from itertools import batched import functools import json import logging +from typing import Callable import connexion import redis @@ -104,6 +106,27 @@ def get_all(self): data.append(single_data) return data + def get_all_filtered(self, + filter_func: Callable[[dict], dict | None]) -> list[dict]: + """ + Get an array of data for all keys after passing them through the specified filter + (discarding any for which the filter returns None) + """ + data = [] + for value in self.iter_values(): + filtered_value = filter_func(value) + if filtered_value is not None: + data.append(filtered_value) + return data + + def iter_values(self): + """ + Iterate through every item in the database. Parse each item as JSON and yield it. + """ + for next_keys in batched(self.client.scan_iter(), 500): + for datastr in self.client.mget(next_keys): + yield json.loads(datastr) if datastr else None + def get_all_as_dict(self): """Return a mapping from all keys to their corresponding data Based on https://github.com/redis/redis-py/issues/984#issuecomment-391404875 From d30d686d24f4a11b1013b7f1c340205bd38595b1 Mon Sep 17 00:00:00 2001 From: Mitch Harding Date: Wed, 18 Dec 2024 13:07:53 -0500 Subject: [PATCH 2/2] Update src/bos/server/controllers/v2/components.py Fix indentation (cherry picked from commit 6267783be93deec8aef81169bb4b088a32af22a5) --- src/bos/server/controllers/v2/components.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bos/server/controllers/v2/components.py b/src/bos/server/controllers/v2/components.py index a2b5f458..0366c212 100644 --- a/src/bos/server/controllers/v2/components.py +++ b/src/bos/server/controllers/v2/components.py @@ -87,7 +87,7 @@ def get_v2_components_data(id_list=None, enabled=None, session=None, staged_sess if tenant_components is not None: id_set.intersection_update(tenant_components) else: - id_set = tenant_components + id_set = tenant_components # If id_set is not None but is empty, that means no components in the system # will match our filter, so we can return an empty list immediately.