From d7b6f7c7929e57222f999678cc7904e3dc8fc6e8 Mon Sep 17 00:00:00 2001 From: "Mitch Harding (the weird one)" Date: Mon, 9 Sep 2024 16:21:34 -0400 Subject: [PATCH 1/3] CASMCMS-9026: Sanitize BOS data during migration to this BOS version --- CHANGELOG.md | 8 + .../cray-bos/templates/post-upgrade-job.yaml | 5 +- .../{migrations.py => migrations/__init__.py} | 16 +- src/bos/server/migrations/__main__.py | 87 +++++ src/bos/server/migrations/db.py | 60 +++ src/bos/server/migrations/sanitize.py | 346 ++++++++++++++++++ src/bos/server/migrations/validate.py | 130 +++++++ src/bos/server/redis_db_utils.py | 31 +- src/bos/server/schema.py | 15 + 9 files changed, 680 insertions(+), 18 deletions(-) rename src/bos/server/{migrations.py => migrations/__init__.py} (73%) create mode 100644 src/bos/server/migrations/__main__.py create mode 100644 src/bos/server/migrations/db.py create mode 100644 src/bos/server/migrations/sanitize.py create mode 100644 src/bos/server/migrations/validate.py diff --git a/CHANGELOG.md b/CHANGELOG.md index e1df9d2d..49c81e1b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Changed +- Sanitize BOS data during migration to this BOS version, to ensure it complies with the API specification. + - For components and sessions, only validate the fields used to identify them (name, id, tenant). Delete them + from the database if these fields have problems. + - For templates, validate all fields, but do a more complete attempt to automatically clean them up, + only deleting them as a last resort. +- Do not delete migration job after it completes; instead, set a TTL value for it, to allow time for its logs to be + collected after it completes. ## [2.27.0] - 2024-09-05 ### Changed diff --git a/kubernetes/cray-bos/templates/post-upgrade-job.yaml b/kubernetes/cray-bos/templates/post-upgrade-job.yaml index f32776a6..89f3e3ef 100644 --- a/kubernetes/cray-bos/templates/post-upgrade-job.yaml +++ b/kubernetes/cray-bos/templates/post-upgrade-job.yaml @@ -34,10 +34,13 @@ metadata: annotations: "helm.sh/hook": post-upgrade "helm.sh/hook-weight": "-1" - "helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation spec: + # Set ttlSecondsAfterFinished to 28 days. + # Because the migration can delete BOS data, but logs when it does so, + # this allows its logs to be easily accesible for longer post-upgrade + ttlSecondsAfterFinished: 2419200 template: metadata: name: "{{ .Release.Name }}" diff --git a/src/bos/server/migrations.py b/src/bos/server/migrations/__init__.py similarity index 73% rename from src/bos/server/migrations.py rename to src/bos/server/migrations/__init__.py index 1726b6bc..f713202b 100644 --- a/src/bos/server/migrations.py +++ b/src/bos/server/migrations/__init__.py @@ -1,7 +1,7 @@ # # MIT License # -# (C) Copyright 2023-2024 Hewlett Packard Enterprise Development LP +# (C) Copyright 2024 Hewlett Packard Enterprise Development LP # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), @@ -21,17 +21,3 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. # -import logging - -LOGGER = logging.getLogger('bos.server.migration') - - -def perform_migrations(): - # Not removing this file entirely because we are going to be adding - # code here to migrate the previous BOS data to enforce API field - # restrictions - pass - - -if __name__ == "__main__": - perform_migrations() diff --git a/src/bos/server/migrations/__main__.py b/src/bos/server/migrations/__main__.py new file mode 100644 index 00000000..891fdea1 --- /dev/null +++ b/src/bos/server/migrations/__main__.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +# +# MIT License +# +# (C) Copyright 2024 Hewlett Packard Enterprise Development LP +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +""" +Starting in CSM 1.6, BOS is enforcing many API restrictions for the first time. +When migrating to this BOS version, this tool will attempt to clean up the BOS +data so that it complies with the spec (like modifying boot sets so that +rootfs_providers mapping to empty strings instead are not included in the boot set). +It will only delete BOS resources in two cases: + +1. Cases where ID fields are in violation. For example, if a +session template has an invalid name. The reason this is deleted is because +that template would be inaccessible using the API or CLI, since incoming +requests would be rejected for not following the spec. For session templates +specifically, however, an attempt will be made to modify the name to comply +with the requirements. Only if that fails will the template be deleted. + +2. Cases where the ID fields inside the data structure conflicts with the +database key. That is, if generating the DB key based on the fields in the +actual data result in a different database key than the one used to look it up. +This is not something that is likely to happen, but given the lack of spec +enforcement that existed in the past, we can't rule it out. In this case, +for session templates, if the expected database key is not in use, then +the resource will be moved to the expected key. Otherwise, or for +non-session templates, it will be deleted. + +The CSM upgrade code for 1.6 has been modified to include a BOS backup +before the BOS service is updated. In addition, this migration tool will +log all data that is deleted. And the migration pod has been modified +so that it stays around for much longer after completing. +""" + +import logging + +from .db import COMP_DB, SESS_DB, TEMP_DB +from .sanitize import sanitize_component, sanitize_session, sanitize_session_template + + +LOGGER = logging.getLogger('bos.server.migration') + + +def main(): + LOGGER.info("Sanitizing session templates") + for key, data in TEMP_DB.get_all_as_dict().items(): + sanitize_session_template(key, data) + LOGGER.info("Done sanitizing session templates") + + LOGGER.info("Sanitizing sessions") + for key, data in SESS_DB.get_all_as_dict().items(): + sanitize_session(key, data) + LOGGER.info("Done sanitizing sessions") + + LOGGER.info("Sanitizing components") + for key, data in COMP_DB.get_all_as_dict().items(): + sanitize_component(key, data) + LOGGER.info("Done sanitizing components") + + +if __name__ == "__main__": + log_level = logging.getLevelName('INFO') + logging.basicConfig(level=log_level) + + LOGGER.info("Beginning post-upgrade BOS data migration") + main() + LOGGER.info("Completed post-upgrade BOS data migration") diff --git a/src/bos/server/migrations/db.py b/src/bos/server/migrations/db.py new file mode 100644 index 00000000..4824e6cc --- /dev/null +++ b/src/bos/server/migrations/db.py @@ -0,0 +1,60 @@ +# +# MIT License +# +# (C) Copyright 2024 Hewlett Packard Enterprise Development LP +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +import logging + +import bos.server.redis_db_utils as dbutils + +LOGGER = logging.getLogger('bos.server.migration') + +TEMP_DB=dbutils.get_wrapper(db='session_templates') +SESS_DB=dbutils.get_wrapper(db='sessions') +STAT_DB=dbutils.get_wrapper(db='session_status') +COMP_DB=dbutils.get_wrapper(db='components') + + +def delete_from_db(db: dbutils.DBWrapper, key: str, err_msg: str|None=None) -> None: + if err_msg is None: + LOGGER.warning("Deleting %s under DB key '%s'", db.db_string, key) + else: + LOGGER.error("%s; Deleting %s under DB key '%s'", err_msg, db.db_string, key) + data = db.get_and_delete(key) + if data: + LOGGER.info("Deleted %s '%s': %s", db.db_string, key, data) + else: + LOGGER.warning("Could not delete %s '%s' -- does not exist", db.db_string, key) + + +def delete_component(key: str, err_msg: str|None=None) -> None: + delete_from_db(COMP_DB, key, err_msg) + + +def delete_template(key: str, err_msg: str|None=None) -> None: + delete_from_db(TEMP_DB, key, err_msg) + + +def delete_session(key: str, err_msg: str|None=None) -> None: + delete_from_db(SESS_DB, key, err_msg) + LOGGER.info("Deleting associated session status, if it exists") + delete_from_db(STAT_DB, key, err_msg) diff --git a/src/bos/server/migrations/sanitize.py b/src/bos/server/migrations/sanitize.py new file mode 100644 index 00000000..34443f04 --- /dev/null +++ b/src/bos/server/migrations/sanitize.py @@ -0,0 +1,346 @@ +# +# MIT License +# +# (C) Copyright 2024 Hewlett Packard Enterprise Development LP +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +import copy +import itertools +import logging +import string + +from bos.common.tenant_utils import get_tenant_aware_key +from bos.server.controllers.v2.boot_set import HARDWARE_SPECIFIER_FIELDS +from bos.server.schema import validator + +from .db import TEMP_DB, delete_component, delete_session, delete_template +from .validate import ValidationError, check_component, check_session, check_keys, \ + get_required_field, get_validate_tenant, is_valid_available_template_name, \ + validate_bootset_path, validate_against_schema + + +LOGGER = logging.getLogger('bos.server.migration') + +ALPHANUMERIC = string.ascii_letters + string.digits +TEMPLATE_NAME_CHARACTERS = ALPHANUMERIC + '-._' + + +def sanitize_component(key: str|bytes, data: dict) -> None: + """ + If the id field is missing or invalid, delete the component + """ + try: + check_component(key, data) + except ValidationError as exc: + delete_component(key, str(exc)) + + +def sanitize_session(key: str|bytes, data: dict) -> None: + """ + If the name field is missing, or if the name or tenant fields are invalid, delete the session. + """ + try: + check_session(key, data) + except ValidationError as exc: + delete_session(key, str(exc)) + + +def sanitize_session_template(key: str|bytes, data: dict) -> None: + """ + Session templates are the things most likely to run afoul of the API spec. + This attempts to automatically fix them if at all possible, only deleting them + as a last resort. + """ + try: + _sanitize_session_template(key, data) + except ValidationError as exc: + delete_template(key, str(exc)) + + +def _sanitize_session_template(key: str|bytes, data: dict) -> None: + """ + Validates and tries to sanitize the session template. + If there are correctable errors, the function will update the database + to fix them. + If there are uncorrectable errors, the function deletes the template. + """ + # Validate presence of required name and boot_sets fields + name = get_required_field("name", data) + boot_sets = get_required_field("boot_sets", data) + + # Validate that if there is a non-None tenant field, it follows the schema + tenant = get_validate_tenant(data) + + # Make sure that the boot_set field is not empty and correct type + if not isinstance(boot_sets, dict): + raise ValidationError("'boot_sets' field value has invalid type") + if not boot_sets: + raise ValidationError("'boot_sets' field value is empty") + + # Make a copy of the session template. If we identify problems, we will see if we can correct + # them in the copy. While copying, remove any fields that are no longer in the spec + new_data = { k: copy.deepcopy(v) for k,v in data.items() + if k in validator.session_template_fields } + + # Check and sanitize each boot set + for bsname, bsdata in new_data["boot_sets"].items(): + sanitize_bootset(bsname, bsdata) + + sanitize_description_field(new_data) + sanitize_cfs_field(new_data) + + new_name = get_unused_legal_template_name(name, tenant) + + if new_name == name: + # Name did not change + check_keys(key, get_tenant_aware_key(name, tenant)) + + validate_against_schema(new_data, "V2SessionTemplate") + + if data == new_data: + # Data did not change, so nothing to do + return + + # This means the data changed, so we need to update the entry under the existing key + LOGGER.warning("Updating session template to comply with the BOS API schema") + LOGGER.warning("Old template data: %s", data) + LOGGER.warning("New template data: %s", new_data) + TEMP_DB.put(key, new_data) + return + + # Name changed + base_msg = f"Renaming session template '{name}' (tenant: {tenant}) to new name '{new_name}'" + if data == new_data: + LOGGER.warning(base_msg) + else: + LOGGER.warning("%s and updating it to comply with the BOS API schema", base_msg) + + delete_template(key, data) + + new_key = get_tenant_aware_key(name, tenant) + LOGGER.info("Old DB key = '%s', new DB key = '%s'", key, new_key) + + new_data["name"] = new_name + log_rename_in_template_description(name, new_data) + + LOGGER.warning("Old template data: %s", data) + LOGGER.warning("New template data: %s", new_data) + try: + validate_against_schema(new_data, "V2SessionTemplate") + except ValidationError: + LOGGER.error("New session template does not follow schema -- it will not be saved") + return + + TEMP_DB.put(new_key, new_data) + + +def sanitize_description_field(data: dict) -> None: + """ + Ensure that the description field (if present) is a string that is <= 1023 characters long. + Delete or truncate it as needed. + """ + try: + description = data["description"] + except KeyError: + # If there is no description field, nothing for us to do + return + + # Delete it if it is empty + if not description: + del data["description"] + return + + # Log a warning and delete it if it is not a string + if not isinstance(description, str): + LOGGER.warning("Removing non-string 'description' field from session template") + del data["description"] + return + + # Truncate it if it is too long + if len(description) > 1023: + data["description"] = description[:1023] + + +def sanitize_bootset(bsname: str, bsdata: dict) -> str|None: + """ + Corrects in-place bsdata. + Returns an error message if this proves impossible. + Otherwise returns None. + """ + # Every boot_set must have a valid path set + validate_bootset_path(bsname, bsdata) + + # The type field is required and 's3' is its only legal value + # So rather than even checking it, just set it to 's3' + bsdata["type"] = "s3" + + # Delete the name field, if it is present -- it is redundant and should not + # be stored inside the boot set under the current API spec + bsdata.pop("name", None) + + # Remove any fields that are no longer in the spec + bad_fields = [ field for field in bsdata if field not in validator.boot_set_fields ] + for field in bad_fields: + del bsdata[field] + + # Sanitize the cfs field, if any + try: + sanitize_cfs_field(bsdata) + except ValidationError as exc: + raise ValidationError(f"Boot set '{bsname}' {exc}") from exc + + nonempty_node_field_found = False + + # Use list() since we will be modifying the dict while iterating over its contents + for field, value in list(bsdata.items()): + # We have already dealt with 'cfs', 'path', and 'type', so we can skip those + if field in { 'cfs', 'path', 'type' }: + continue + + # Delete None-valued fields that are not nullable (No boot set fields are nullable) + if value is None: + del bsdata[field] + continue + + if field != 'rootfs_provider' and field not in HARDWARE_SPECIFIER_FIELDS: + continue + + # rootfs_provider and the node-specifier fields are optional* but if present, + # are not allowed to have an empty value. + # So if we find any set to an empty values, delete it. + # + # * The node-specifier fields are each individually optional, but one of them must + # be set + if not value: + del bsdata[field] + elif field in HARDWARE_SPECIFIER_FIELDS: + nonempty_node_field_found = True + + # Validate that at least one of the required node-specified fields is present + if nonempty_node_field_found: + return + + raise ValidationError( + f"Boot set '{bsname}' has no non-empty node fields ({HARDWARE_SPECIFIER_FIELDS})") + + +def sanitize_cfs_field(data: dict) -> None: + """ + If the 'cfs' field is present: + * If it's mapped to None, remove it + * If it isn't a dict, raise a ValidationError + * Remove any invalid fields from it + * Delete the configuration field if it is empty or null + * If (after the above) the cfs dict is empty, remove it. + """ + try: + cfs = data["cfs"] + except KeyError: + # If no CFS field, nothing to sanitize + return + + # The CFS field is not nullable, so if it is mapped to None, delete it + # Also delete it if it is empty, since that is the same effect as it not being present + if not cfs: + del data["cfs"] + return + + # If it does not map to a dictionary, raise an exception + if not isinstance(cfs, dict): + raise ValidationError("'cfs' field value has invalid type") + + # Remove any fields that are no longer in the spec + bad_fields = [ field for field in cfs if field not in validator.cfs_fields ] + for field in bad_fields: + del cfs[field] + + if "configuration" in cfs: + # The configuration field is not nullable, so if it maps to None, delete it + # Also delete it if it is empty, since that is the same effect as it not being present + if not cfs["configuration"]: + del cfs["configuration"] + + # If this results in the cfs field being empty now, delete it + if not cfs: + del data["cfs"] + + +def get_unused_legal_template_name(name: str, tenant: str|None) -> str: + """ + If the current name is legal, return it unchanged. + Otherwise, try to find a name which is not in use and which is legal per the spec. + Returns the new name if successful, otherwise raises ValidationError + """ + try: + validate_against_schema(name, "SessionTemplateName") + return name + except ValidationError: + # If the name has no legal characters at all, or in the (hopefully unlikely) case that it + # is 0 length, make no attempt to salvage it. Otherwise, we will try to find a good name + if not name or not any(c in TEMPLATE_NAME_CHARACTERS for c in name): + raise + + LOGGER.warning("Session template name '%s' (tenant: %s) does not follow schema. " + "Will attempt to rename to a legal name", name, tenant) + + # Strip out illegal characters, but replace spaces with underscores and prepend 'auto_renamed_' + new_name_base = 'auto_renamed_' + ''.join([ c for c in name.replace(' ','_') + if c in TEMPLATE_NAME_CHARACTERS ]) + + # Trim to 127 characters, if it exceeds that + new_name = new_name_base[:127] + + # At this point the only thing preventing this from being a legal name would be if the final + # character is not alphanumeric + if new_name[-1] in ALPHANUMERIC: + if is_valid_available_template_name(new_name, tenant): + return new_name + + # Trying all 2 character alphanumeric suffixes gives 1953 options, which is enough of an effort + # for us to make here. + for suffix_length in range(1, 3): + for suffix in itertools.combinations_with_replacement(ALPHANUMERIC, suffix_length): + new_name = f'{new_name_base[:126-suffix_length]}_{suffix}' + if is_valid_available_template_name(new_name, tenant): + return new_name + + LOGGER.error("Unable to find unused valid new name for session template '%s' (tenant: %s)", + name, tenant) + raise ValidationError("Name does not follow schema") + + +def log_rename_in_template_description(old_name: str, data: dict) -> None: + """ + If possible, update the session template description field to record the previous name of this + template. Failing that, if possible, at least record that it was renamed. + """ + rename_messages = [ + f"Former name: {old_name}", + "Renamed during BOS upgrade", + "Auto-renamed", + "Renamed" ] + + current_description = data.get("description", "") + for msg in rename_messages: + new_description = f"{current_description}; {msg}" if current_description else msg + if len(new_description) <= 1023: + data["description"] = new_description + return diff --git a/src/bos/server/migrations/validate.py b/src/bos/server/migrations/validate.py new file mode 100644 index 00000000..d4a4625e --- /dev/null +++ b/src/bos/server/migrations/validate.py @@ -0,0 +1,130 @@ +# +# MIT License +# +# (C) Copyright 2023-2024 Hewlett Packard Enterprise Development LP +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +import logging +from typing import Any + +from bos.common.tenant_utils import get_tenant_aware_key +from bos.common.utils import exc_type_msg +from bos.server.schema import validator + +from .db import TEMP_DB + + +LOGGER = logging.getLogger('bos.server.migration') + + +class ValidationError(Exception): + """ + Raised by validation functions when they find problems + """ + + +def check_session(key: str|bytes, data: dict) -> None: + """ + Raises a ValidationError if the data contains fatal errors. + """ + name = get_required_field("name", data) + validate_against_schema(name, "V2SessionName") + tenant = get_validate_tenant(data) + expected_db_key = get_tenant_aware_key(name, tenant) + check_keys(key, expected_db_key) + + +def check_component(key: str|bytes, data: dict) -> None: + """ + Raises a ValidationError if the data contains fatal errors. + """ + compid = get_required_field("id", data) + validate_against_schema(compid, "V2ComponentId") + check_keys(key, compid) + + +def get_validate_tenant(data: dict) -> str|None: + """ + If no tenant field present, return None. + If the tenant field value is valid, return it. + Otherwise, raise ValidationError + """ + tenant = data.get("tenant", None) + if tenant is not None: + validate_against_schema(tenant, "V2TenantName") + return tenant + + +def validate_bootset_path(bsname: str, bsdata: dict) -> None: + try: + path = get_required_field("path", bsdata) + except ValidationError as exc: + raise ValidationError(f"Boot set '{bsname}': {exc}") from exc + try: + validate_against_schema(path, "BootManifestPath") + except ValidationError as exc: + raise ValidationError(f"Boot set '{bsname}' has invalid 'path' field: {exc}") from exc + + +def check_keys(actual: str|bytes, expected: str|bytes) -> str|None: + """ + Converts both keys to strings. + Raises ValidationError if the strings do not match + """ + if isinstance(actual, bytes): + actual = actual.decode() + if isinstance(expected, bytes): + expected = expected.decode() + if actual != expected: + raise ValidationError( + f"Actual DB key ('{actual}') does not match expected key ('{expected}')") + + +def is_valid_available_template_name(name: str, tenant: str|None) -> bool: + if get_tenant_aware_key(name, tenant) in TEMP_DB: + return False + try: + validate_against_schema(name, "SessionTemplateName") + except ValidationError: + return False + return True + + +def validate_against_schema(obj: Any, schema_name: str) -> None: + """ + Raises a ValidationError if it does not follow the schema + """ + try: + validator.validate(obj, schema_name) + except Exception as exc: + LOGGER.error(exc_type_msg(exc)) + raise ValidationError(f"Does not follow {schema_name} schema: {obj}") from exc + + +def get_required_field(field: str, data: dict) -> Any: + """ + Returns the value of the field in the dict + Raises ValiationError otherwise + """ + try: + return data[field] + except KeyError as exc: + raise ValidationError(f"Missing required '{field}' field") from exc diff --git a/src/bos/server/redis_db_utils.py b/src/bos/server/redis_db_utils.py index a5c1115e..988970fb 100644 --- a/src/bos/server/redis_db_utils.py +++ b/src/bos/server/redis_db_utils.py @@ -49,8 +49,8 @@ class DBWrapper(): """ def __init__(self, db): - db_id = self._get_db_id(db) - self.client = self._get_client(db_id) + self.db_id = self._get_db_id(db) + self.client = self._get_client(self.db_id) def __contains__(self, key): return self.client.exists(key) @@ -73,6 +73,11 @@ def _get_client(self, db_id): db_id, exc_type_msg(err)) raise + @property + def db_string(self) -> str: + """Returns the string name of the database, from the DATABASES array""" + return DATABASES[self.db_id] + # The following methods act like REST calls for single items def get(self, key): """Get the data for the given key.""" @@ -82,6 +87,14 @@ def get(self, key): data = json.loads(datastr) return data + def get_and_delete(self, key): + """Get the data for the given key and delete it from the DB.""" + datastr = self.client.getdel(key) + if not datastr: + return None + data = json.loads(datastr) + return data + def get_all(self): """Get an array of data for all keys.""" data = [] @@ -91,6 +104,20 @@ def get_all(self): data.append(single_data) return data + def get_all_as_dict(self): + """Return a mapping from all keys to their corresponding data + Based on https://github.com/redis/redis-py/issues/984#issuecomment-391404875 + """ + data = {} + cursor = '0' + while cursor != 0: + cursor, keys = self.client.scan(cursor=cursor, count=1000) + values = [ json.loads(datastr) if datastr else None + for datastr in self.client.mget(keys) ] + keys = [ k.decode() for k in keys ] + data.update(dict(zip(keys, values))) + return data + def get_keys(self): """Get an array of all keys""" data = [] diff --git a/src/bos/server/schema.py b/src/bos/server/schema.py index 93a27ddb..3711d242 100644 --- a/src/bos/server/schema.py +++ b/src/bos/server/schema.py @@ -58,4 +58,19 @@ def validate_session(self, data): def validate_session_template(self, data): self.validate(data, "V2SessionTemplate") + def get_schema_fields(self, schema_name: str): + return set(self.api_schema[schema_name]["properties"]) + + @property + def session_template_fields(self): + return self.get_schema_fields("V2SessionTemplate") + + @property + def boot_set_fields(self): + return self.get_schema_fields("V2BootSet") + + @property + def cfs_fields(self): + return self.get_schema_fields("V2CfsParameters") + validator = Validator() From d456d24bb49ae36aa8e0cba07177b545aa7ad2ca Mon Sep 17 00:00:00 2001 From: "Mitch Harding (the weird one)" Date: Wed, 11 Sep 2024 14:34:12 -0400 Subject: [PATCH 2/3] CASMCMS-7687: Add reject_nids BOS option --- CHANGELOG.md | 3 + api/openapi.yaml.in | 19 ++- .../operators/utils/clients/bos/options.py | 5 + src/bos/server/controllers/v2/boot_set.py | 115 ++++++++++++++++-- src/bos/server/controllers/v2/options.py | 3 +- src/bos/server/controllers/v2/sessions.py | 19 ++- .../server/controllers/v2/sessiontemplates.py | 56 ++------- 7 files changed, 160 insertions(+), 60 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 49c81e1b..bb9ca5e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Added +- Added `reject_nids` BOS option, to reject Sessions and Session Template which appear to reference NIDs. + ### Changed - Sanitize BOS data during migration to this BOS version, to ensure it complies with the API specification. - For components and sessions, only validate the fields used to identify them (name, id, tenant). Delete them diff --git a/api/openapi.yaml.in b/api/openapi.yaml.in index 28c05217..e8e989a2 100644 --- a/api/openapi.yaml.in +++ b/api/openapi.yaml.in @@ -231,7 +231,11 @@ components: $ref: '#/components/schemas/Link' NodeList: type: array - description: A node list that is required to have at least one node. + description: | + A node list that is required to have at least one node. + Nodes must be specified by component name (xname). NIDs are not supported. + If the reject_nids option is enabled, then Session Template creation or validation will fail if + any of the boot sets contain a NodeList that appears to contain a NID. minItems: 1 maxItems: 65535 example: ["x3000c0s19b1n0", "x3000c0s19b2n0"] @@ -302,6 +306,9 @@ components: Alternatively, the limit can be set to "*", which means no limit. An empty string or null value is the same as specifying no limit. + + If the reject_nids option is enabled, then Session creation will fail if its + limit appears to contain a NID value. maxLength: 524288 nullable: true default: "" @@ -1045,6 +1052,16 @@ components: example: 1000 minimum: 0 maximum: 131071 + reject_nids: + type: boolean + description: | + If true, then BOS will attempt to prevent Sessions and Session Templates that reference NIDs (which BOS does not support). + Specifically, if this option is true, then: + - When creating a Session, if the Session limit or a Session Template node list appear to contain NID values, then Session creation will fail. + - When creating a Session Template, if a node list appears to contain a NID value, then the Session Template creation will fail. + - When validating an existing Session Template, if a node list appears to contain a NID value, then the validation will report an error. + + This option does NOT have an effect on Sessions that were created prior to it being enabled (even if they have not yet started). session_limit_required: type: boolean description: If true, Sessions cannot be created without specifying the limit parameter. diff --git a/src/bos/operators/utils/clients/bos/options.py b/src/bos/operators/utils/clients/bos/options.py index 5990c5b9..6c784b1a 100644 --- a/src/bos/operators/utils/clients/bos/options.py +++ b/src/bos/operators/utils/clients/bos/options.py @@ -124,4 +124,9 @@ def max_component_batch_size(self): def session_limit_required(self): return self.get_option('session_limit_required', bool, False) + @property + def reject_nids(self): + return self.get_option('reject_nids', bool, False) + + options = Options() diff --git a/src/bos/server/controllers/v2/boot_set.py b/src/bos/server/controllers/v2/boot_set.py index f5f12c6f..7854e134 100644 --- a/src/bos/server/controllers/v2/boot_set.py +++ b/src/bos/server/controllers/v2/boot_set.py @@ -26,6 +26,8 @@ from bos.common.utils import exc_type_msg from bos.operators.utils.boot_image_metadata.factory import BootImageMetaDataFactory from bos.operators.utils.clients.s3 import S3Object, ArtifactNotFound +from bos.server.controllers.v2.options import get_v2_options_data +from bos.server.utils import canonize_xname, ParsingException LOGGER = logging.getLogger('bos.server.controllers.v2.boot_set') @@ -39,7 +41,8 @@ def validate_boot_sets(session_template: dict, operation: str, - template_name: str) -> tuple[str, int]: + template_name: str, + reject_nids: bool|None=None) -> tuple[str, int]: """ Validates the boot sets listed in a session template. It ensures that there are boot sets. @@ -66,17 +69,37 @@ def validate_boot_sets(session_template: dict, msg = f"Session template '{template_name}' requires at least 1 boot set." return BOOT_SET_ERROR, msg + if reject_nids is None: + reject_nids = get_v2_options_data().get('reject_nids', False) + for bs_name, bs in session_template['boot_sets'].items(): + warning_msgs = [] + # Verify that the hardware is specified specified = [bs.get(field, None) for field in HARDWARE_SPECIFIER_FIELDS] if not any(specified): msg = f"Session template: '{template_name}' boot set: '{bs_name}' " \ - f"must have at least one " \ - f"hardware specifier field provided (%s); None were provided." \ - % (', '.join(sorted(HARDWARE_SPECIFIER_FIELDS))) + f"must have at least one non-empty" \ + f"hardware specifier field provided (%s); None were provided." \ + % (', '.join(sorted(HARDWARE_SPECIFIER_FIELDS))) LOGGER.error(msg) return BOOT_SET_ERROR, msg + try: + if any(node[:3] == "nid" for node in bs["node_list"]): + msg = f"Session template: '{template_name}' boot set: '{bs_name}' "\ + "has NID in 'node_list'" + if reject_nids: + LOGGER.error(msg) + return BOOT_SET_ERROR, msg + # Otherwise, log this as a warning -- even if reject_nids is not set, + # BOS still doesn't support NIDs, so this is still undesirable + LOGGER.warning(msg) + warning_msgs.append(msg) + except KeyError: + # If there is no node_list field, not a problem + pass + if operation in ['boot', 'reboot']: # Verify that the boot artifacts exist try: @@ -101,8 +124,6 @@ def validate_boot_sets(session_template: dict, LOGGER.error(msg) return BOOT_SET_ERROR, msg - warning_flag = False - warn_msg = "" for boot_artifact in ["initrd", "boot_parameters"]: try: artifact = getattr(image_metadata.boot_artifacts, boot_artifact) @@ -118,9 +139,83 @@ def validate_boot_sets(session_template: dict, msg = f"Session template: '{template_name}' boot set: '{bs_name}' " \ f"could not locate its {boot_artifact}. Warning: " + exc_type_msg(err) LOGGER.warning(msg) - warning_flag = True - warn_msg = warn_msg + msg - if warning_flag: - return BOOT_SET_WARNING, warn_msg + warning_msgs.append(msg) + if warning_msgs: + return BOOT_SET_WARNING, "; ".join(warning_msgs) return BOOT_SET_SUCCESS, "Valid" + + +def validate_sanitize_boot_sets(template_data: dict) -> None: + """ + Calls validate_sanitize_boot_set on every boot set in the template. + Raises an exception if there are problems. + """ + # The boot_sets field is required. + try: + boot_sets = template_data["boot_sets"] + except KeyError as exc: + raise ParsingException("Missing required 'boot_sets' field") from exc + + # The boot_sets field must map to a dict + if not isinstance(boot_sets, dict): + raise ParsingException("'boot_sets' field has invalid type") + + # The boot_sets field must be non-empty + if not boot_sets: + raise ParsingException("Session templates must contain at least one boot set") + + reject_nids = get_v2_options_data().get('reject_nids', False) + + # Finally, call validate_sanitize_boot_set on each boot set + for bs_name, bs in boot_sets.items(): + validate_sanitize_boot_set(bs_name, bs, reject_nids=reject_nids) + + +def validate_sanitize_boot_set(bs_name: str, bs_data: dict, reject_nids: bool=False) -> None: + """ + Called when creating/updating a BOS session template. + Validates the boot set, and sanitizes it (editing it in place). + Raises ParsingException on error. + """ + if "name" not in bs_data: + # Set the field here -- this allows the name to be validated + # per the schema later + bs_data["name"] = bs_name + elif bs_data["name"] != bs_name: + # All keys in the boot_sets mapping must match the 'name' fields in the + # boot sets to which they map (if they contain a 'name' field). + raise ParsingException(f"boot_sets key ({bs_name}) does not match 'name' " + f"field of corresponding boot set ({bs_data['name']})") + + # Validate that the boot set has at least one of the HARDWARE_SPECIFIER_FIELDS + if not any(field_name in bs_data for field_name in HARDWARE_SPECIFIER_FIELDS): + raise ParsingException(f"Boot set {bs_name} has none of the following " + f"fields: {HARDWARE_SPECIFIER_FIELDS}") + + # Last thing to do is validate/sanitize the node_list field, if it is present + try: + node_list = bs_data["node_list"] + except KeyError: + return + + # Make sure it is a list + if not isinstance(node_list, list): + raise ParsingException(f"Boot set {bs_name} has 'node_list' of invalid type") + + new_node_list = [] + for node in node_list: + # Make sure it is a list of strings + if not isinstance(node, str): + raise ParsingException(f"Boot set {bs_name} 'node_list' contains non-string element") + + # If reject_nids is set, raise an exception if any member of the node list + # begins with 'nid' + if reject_nids and node[:3] == 'nid': + raise ParsingException(f"reject_nids: Boot set {bs_name} 'node_list' contains a NID") + + # Canonize the xname and append it to the node list + new_node_list.append(canonize_xname(node)) + + # Update the node_list value in the boot set with the canonized version + bs_data["node_list"] = new_node_list diff --git a/src/bos/server/controllers/v2/options.py b/src/bos/server/controllers/v2/options.py index 949693ef..86825984 100644 --- a/src/bos/server/controllers/v2/options.py +++ b/src/bos/server/controllers/v2/options.py @@ -51,7 +51,8 @@ 'polling_frequency': 15, 'default_retry_policy': 3, 'max_component_batch_size': 2800, - "session_limit_required": False + 'session_limit_required': False, + 'reject_nids': False } diff --git a/src/bos/server/controllers/v2/sessions.py b/src/bos/server/controllers/v2/sessions.py index 127f82db..ed0a4769 100644 --- a/src/bos/server/controllers/v2/sessions.py +++ b/src/bos/server/controllers/v2/sessions.py @@ -48,7 +48,7 @@ COMPONENTS_DB = dbutils.get_wrapper(db='components') STATUS_DB = dbutils.get_wrapper(db='session_status') MAX_COMPONENTS_IN_ERROR_DETAILS = 10 - +LIMIT_NID_RE = re.compile(r'^[&!]*nid') @reject_invalid_tenant @dbutils.redis_error_handler @@ -76,6 +76,20 @@ def post_v2_session(): # noqa: E501 LOGGER.error(msg) return msg, 400 + reject_nids = get_v2_options_data().get('reject_nids', False) + # If reject_nids is specified, and a limit is specified, check the limit for nids + if session_create.limit and any(LIMIT_NID_RE.match(limit_item) + for limit_item in session_create.limit.split(',')): + msg = f"session limit appears to contain NIDs: {session_create.limit}" + if reject_nids: + msg = f"reject_nids: {msg}" + LOGGER.error(msg) + return msg, 400 + # Since BOS does not support NIDs, still log this as a warning. + # There is a chance that a node group has a name with a name resembling + # a NID + LOGGER.warning(msg) + template_name = session_create.template_name LOGGER.debug("Template Name: %s operation: %s", template_name, session_create.operation) # Check that the template_name exists. @@ -87,7 +101,8 @@ def post_v2_session(): # noqa: E501 session_template, _ = session_template_response # Validate health/validity of the sessiontemplate before creating a session - error_code, msg = validate_boot_sets(session_template, session_create.operation, template_name) + error_code, msg = validate_boot_sets(session_template, session_create.operation, template_name, + reject_nids=reject_nids) if error_code >= BOOT_SET_ERROR: LOGGER.error("Session template fails check: %s", msg) return msg, 400 diff --git a/src/bos/server/controllers/v2/sessiontemplates.py b/src/bos/server/controllers/v2/sessiontemplates.py index 7d5c6509..b10eff4d 100644 --- a/src/bos/server/controllers/v2/sessiontemplates.py +++ b/src/bos/server/controllers/v2/sessiontemplates.py @@ -29,8 +29,8 @@ from bos.common.utils import exc_type_msg from bos.server import redis_db_utils as dbutils from bos.server.schema import validator -from bos.server.utils import canonize_xname, get_request_json, ParsingException -from .boot_set import validate_boot_sets, HARDWARE_SPECIFIER_FIELDS +from bos.server.utils import get_request_json +from .boot_set import validate_boot_sets, validate_sanitize_boot_sets LOGGER = logging.getLogger('bos.server.controllers.v2.sessiontemplates') DB = dbutils.get_wrapper(db='session_templates') @@ -221,55 +221,19 @@ def _matches_filter(data, tenant): return True -def _sanitize_xnames(st_json): - """ - Sanitize xnames - Canonize the xnames - Args: - st_json (dict): The Session Template as a JSON object - - Returns: - Nothing - """ - # There should always be a boot_sets field -- this function - # is only called after the template has been verified - for boot_set in st_json['boot_sets'].values(): - if 'node_list' not in boot_set: - continue - boot_set['node_list'] = [canonize_xname(node) for node in boot_set['node_list']] - - def validate_sanitize_session_template(session_template_id, template_data): """ Used when creating or patching session templates """ - # The boot_sets field is required. - if "boot_sets" not in template_data: - raise ParsingException("Missing required 'boot_sets' field") - - # All keys in the boot_sets mapping must match the 'name' fields in the - # boot sets to which they map (if they contain a 'name' field). - for bs_name, bs in template_data["boot_sets"].items(): - if "name" not in bs: - # Set the field here -- this allows the name to be validated - # per the schema later - bs["name"] = bs_name - elif bs["name"] != bs_name: - raise ParsingException(f"boot_sets key ({bs_name}) does not match 'name' " - f"field of corresponding boot set ({bs['name']})") - - # Also, validate that each boot set has at least one of the HARDWARE_SPECIFIER_FIELDS - if not any(field_name in bs for field_name in HARDWARE_SPECIFIER_FIELDS): - raise ParsingException(f"Boot set {bs_name} has none of the following " - f"fields: {HARDWARE_SPECIFIER_FIELDS}") - - # We do not bother storing the boot set names inside the boot sets, so delete them. - # We know every boot set has a name field because we verified that earlier. - for bs in template_data["boot_sets"].values(): - del bs["name"] - - _sanitize_xnames(template_data) + validate_sanitize_boot_sets(template_data) template_data['name'] = session_template_id - # Finally, validate this against the API schema + # Validate this against the API schema # An exception will be raised if it does not follow it validator.validate_session_template(template_data) + + # We do not bother storing the boot set names inside the boot sets, so delete them. + # We know every boot set has a name field because we verified that earlier in + # validate_sanitize_boot_sets() + for bs in template_data["boot_sets"].values(): + del bs["name"] From f6df6bd10001ec2763f1293e65c77c2ebd1ca947 Mon Sep 17 00:00:00 2001 From: Mitch Harding Date: Wed, 11 Sep 2024 15:13:59 -0400 Subject: [PATCH 3/3] Update CHANGELOG.md 2.28.0 for CSM 1.6 --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb9ca5e6..349a4275 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] + +## [2.28.0] - 2024-09-11 ### Added - Added `reject_nids` BOS option, to reject Sessions and Session Template which appear to reference NIDs.