Skip to content

Commit

Permalink
Merge pull request #367 from Cray-HPE/casmcms-7687
Browse files Browse the repository at this point in the history
CASMCMS-7687: Add reject_nids BOS option, to reject Sessions and Session Template which appear to reference NIDs
  • Loading branch information
mharding-hpe authored Sep 11, 2024
2 parents de2e914 + d456d24 commit 0f8cb23
Show file tree
Hide file tree
Showing 7 changed files with 160 additions and 60 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
### Added
- Added `reject_nids` BOS option, to reject Sessions and Session Template which appear to reference NIDs.

### Changed
- Sanitize BOS data during migration to this BOS version, to ensure it complies with the API specification.
- For components and sessions, only validate the fields used to identify them (name, id, tenant). Delete them
Expand Down
19 changes: 18 additions & 1 deletion api/openapi.yaml.in
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,11 @@ components:
$ref: '#/components/schemas/Link'
NodeList:
type: array
description: A node list that is required to have at least one node.
description: |
A node list that is required to have at least one node.
Nodes must be specified by component name (xname). NIDs are not supported.
If the reject_nids option is enabled, then Session Template creation or validation will fail if
any of the boot sets contain a NodeList that appears to contain a NID.
minItems: 1
maxItems: 65535
example: ["x3000c0s19b1n0", "x3000c0s19b2n0"]
Expand Down Expand Up @@ -302,6 +306,9 @@ components:
Alternatively, the limit can be set to "*", which means no limit.

An empty string or null value is the same as specifying no limit.

If the reject_nids option is enabled, then Session creation will fail if its
limit appears to contain a NID value.
maxLength: 524288
nullable: true
default: ""
Expand Down Expand Up @@ -1045,6 +1052,16 @@ components:
example: 1000
minimum: 0
maximum: 131071
reject_nids:
type: boolean
description: |
If true, then BOS will attempt to prevent Sessions and Session Templates that reference NIDs (which BOS does not support).
Specifically, if this option is true, then:
- When creating a Session, if the Session limit or a Session Template node list appear to contain NID values, then Session creation will fail.
- When creating a Session Template, if a node list appears to contain a NID value, then the Session Template creation will fail.
- When validating an existing Session Template, if a node list appears to contain a NID value, then the validation will report an error.

This option does NOT have an effect on Sessions that were created prior to it being enabled (even if they have not yet started).
session_limit_required:
type: boolean
description: If true, Sessions cannot be created without specifying the limit parameter.
Expand Down
5 changes: 5 additions & 0 deletions src/bos/operators/utils/clients/bos/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,4 +124,9 @@ def max_component_batch_size(self):
def session_limit_required(self):
return self.get_option('session_limit_required', bool, False)

@property
def reject_nids(self):
return self.get_option('reject_nids', bool, False)


options = Options()
115 changes: 105 additions & 10 deletions src/bos/server/controllers/v2/boot_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
from bos.common.utils import exc_type_msg
from bos.operators.utils.boot_image_metadata.factory import BootImageMetaDataFactory
from bos.operators.utils.clients.s3 import S3Object, ArtifactNotFound
from bos.server.controllers.v2.options import get_v2_options_data
from bos.server.utils import canonize_xname, ParsingException

LOGGER = logging.getLogger('bos.server.controllers.v2.boot_set')

Expand All @@ -39,7 +41,8 @@

def validate_boot_sets(session_template: dict,
operation: str,
template_name: str) -> tuple[str, int]:
template_name: str,
reject_nids: bool|None=None) -> tuple[str, int]:
"""
Validates the boot sets listed in a session template.
It ensures that there are boot sets.
Expand All @@ -66,17 +69,37 @@ def validate_boot_sets(session_template: dict,
msg = f"Session template '{template_name}' requires at least 1 boot set."
return BOOT_SET_ERROR, msg

if reject_nids is None:
reject_nids = get_v2_options_data().get('reject_nids', False)

for bs_name, bs in session_template['boot_sets'].items():
warning_msgs = []

# Verify that the hardware is specified
specified = [bs.get(field, None)
for field in HARDWARE_SPECIFIER_FIELDS]
if not any(specified):
msg = f"Session template: '{template_name}' boot set: '{bs_name}' " \
f"must have at least one " \
f"hardware specifier field provided (%s); None were provided." \
% (', '.join(sorted(HARDWARE_SPECIFIER_FIELDS)))
f"must have at least one non-empty" \
f"hardware specifier field provided (%s); None were provided." \
% (', '.join(sorted(HARDWARE_SPECIFIER_FIELDS)))
LOGGER.error(msg)
return BOOT_SET_ERROR, msg
try:
if any(node[:3] == "nid" for node in bs["node_list"]):
msg = f"Session template: '{template_name}' boot set: '{bs_name}' "\
"has NID in 'node_list'"
if reject_nids:
LOGGER.error(msg)
return BOOT_SET_ERROR, msg
# Otherwise, log this as a warning -- even if reject_nids is not set,
# BOS still doesn't support NIDs, so this is still undesirable
LOGGER.warning(msg)
warning_msgs.append(msg)
except KeyError:
# If there is no node_list field, not a problem
pass

if operation in ['boot', 'reboot']:
# Verify that the boot artifacts exist
try:
Expand All @@ -101,8 +124,6 @@ def validate_boot_sets(session_template: dict,
LOGGER.error(msg)
return BOOT_SET_ERROR, msg

warning_flag = False
warn_msg = ""
for boot_artifact in ["initrd", "boot_parameters"]:
try:
artifact = getattr(image_metadata.boot_artifacts, boot_artifact)
Expand All @@ -118,9 +139,83 @@ def validate_boot_sets(session_template: dict,
msg = f"Session template: '{template_name}' boot set: '{bs_name}' " \
f"could not locate its {boot_artifact}. Warning: " + exc_type_msg(err)
LOGGER.warning(msg)
warning_flag = True
warn_msg = warn_msg + msg
if warning_flag:
return BOOT_SET_WARNING, warn_msg
warning_msgs.append(msg)
if warning_msgs:
return BOOT_SET_WARNING, "; ".join(warning_msgs)

return BOOT_SET_SUCCESS, "Valid"


def validate_sanitize_boot_sets(template_data: dict) -> None:
"""
Calls validate_sanitize_boot_set on every boot set in the template.
Raises an exception if there are problems.
"""
# The boot_sets field is required.
try:
boot_sets = template_data["boot_sets"]
except KeyError as exc:
raise ParsingException("Missing required 'boot_sets' field") from exc

# The boot_sets field must map to a dict
if not isinstance(boot_sets, dict):
raise ParsingException("'boot_sets' field has invalid type")

# The boot_sets field must be non-empty
if not boot_sets:
raise ParsingException("Session templates must contain at least one boot set")

reject_nids = get_v2_options_data().get('reject_nids', False)

# Finally, call validate_sanitize_boot_set on each boot set
for bs_name, bs in boot_sets.items():
validate_sanitize_boot_set(bs_name, bs, reject_nids=reject_nids)


def validate_sanitize_boot_set(bs_name: str, bs_data: dict, reject_nids: bool=False) -> None:
"""
Called when creating/updating a BOS session template.
Validates the boot set, and sanitizes it (editing it in place).
Raises ParsingException on error.
"""
if "name" not in bs_data:
# Set the field here -- this allows the name to be validated
# per the schema later
bs_data["name"] = bs_name
elif bs_data["name"] != bs_name:
# All keys in the boot_sets mapping must match the 'name' fields in the
# boot sets to which they map (if they contain a 'name' field).
raise ParsingException(f"boot_sets key ({bs_name}) does not match 'name' "
f"field of corresponding boot set ({bs_data['name']})")

# Validate that the boot set has at least one of the HARDWARE_SPECIFIER_FIELDS
if not any(field_name in bs_data for field_name in HARDWARE_SPECIFIER_FIELDS):
raise ParsingException(f"Boot set {bs_name} has none of the following "
f"fields: {HARDWARE_SPECIFIER_FIELDS}")

# Last thing to do is validate/sanitize the node_list field, if it is present
try:
node_list = bs_data["node_list"]
except KeyError:
return

# Make sure it is a list
if not isinstance(node_list, list):
raise ParsingException(f"Boot set {bs_name} has 'node_list' of invalid type")

new_node_list = []
for node in node_list:
# Make sure it is a list of strings
if not isinstance(node, str):
raise ParsingException(f"Boot set {bs_name} 'node_list' contains non-string element")

# If reject_nids is set, raise an exception if any member of the node list
# begins with 'nid'
if reject_nids and node[:3] == 'nid':
raise ParsingException(f"reject_nids: Boot set {bs_name} 'node_list' contains a NID")

# Canonize the xname and append it to the node list
new_node_list.append(canonize_xname(node))

# Update the node_list value in the boot set with the canonized version
bs_data["node_list"] = new_node_list
3 changes: 2 additions & 1 deletion src/bos/server/controllers/v2/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@
'polling_frequency': 15,
'default_retry_policy': 3,
'max_component_batch_size': 2800,
"session_limit_required": False
'session_limit_required': False,
'reject_nids': False
}


Expand Down
19 changes: 17 additions & 2 deletions src/bos/server/controllers/v2/sessions.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
COMPONENTS_DB = dbutils.get_wrapper(db='components')
STATUS_DB = dbutils.get_wrapper(db='session_status')
MAX_COMPONENTS_IN_ERROR_DETAILS = 10

LIMIT_NID_RE = re.compile(r'^[&!]*nid')

@reject_invalid_tenant
@dbutils.redis_error_handler
Expand Down Expand Up @@ -76,6 +76,20 @@ def post_v2_session(): # noqa: E501
LOGGER.error(msg)
return msg, 400

reject_nids = get_v2_options_data().get('reject_nids', False)
# If reject_nids is specified, and a limit is specified, check the limit for nids
if session_create.limit and any(LIMIT_NID_RE.match(limit_item)
for limit_item in session_create.limit.split(',')):
msg = f"session limit appears to contain NIDs: {session_create.limit}"
if reject_nids:
msg = f"reject_nids: {msg}"
LOGGER.error(msg)
return msg, 400
# Since BOS does not support NIDs, still log this as a warning.
# There is a chance that a node group has a name with a name resembling
# a NID
LOGGER.warning(msg)

template_name = session_create.template_name
LOGGER.debug("Template Name: %s operation: %s", template_name, session_create.operation)
# Check that the template_name exists.
Expand All @@ -87,7 +101,8 @@ def post_v2_session(): # noqa: E501
session_template, _ = session_template_response

# Validate health/validity of the sessiontemplate before creating a session
error_code, msg = validate_boot_sets(session_template, session_create.operation, template_name)
error_code, msg = validate_boot_sets(session_template, session_create.operation, template_name,
reject_nids=reject_nids)
if error_code >= BOOT_SET_ERROR:
LOGGER.error("Session template fails check: %s", msg)
return msg, 400
Expand Down
56 changes: 10 additions & 46 deletions src/bos/server/controllers/v2/sessiontemplates.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
from bos.common.utils import exc_type_msg
from bos.server import redis_db_utils as dbutils
from bos.server.schema import validator
from bos.server.utils import canonize_xname, get_request_json, ParsingException
from .boot_set import validate_boot_sets, HARDWARE_SPECIFIER_FIELDS
from bos.server.utils import get_request_json
from .boot_set import validate_boot_sets, validate_sanitize_boot_sets

LOGGER = logging.getLogger('bos.server.controllers.v2.sessiontemplates')
DB = dbutils.get_wrapper(db='session_templates')
Expand Down Expand Up @@ -221,55 +221,19 @@ def _matches_filter(data, tenant):
return True


def _sanitize_xnames(st_json):
"""
Sanitize xnames - Canonize the xnames
Args:
st_json (dict): The Session Template as a JSON object
Returns:
Nothing
"""
# There should always be a boot_sets field -- this function
# is only called after the template has been verified
for boot_set in st_json['boot_sets'].values():
if 'node_list' not in boot_set:
continue
boot_set['node_list'] = [canonize_xname(node) for node in boot_set['node_list']]


def validate_sanitize_session_template(session_template_id, template_data):
"""
Used when creating or patching session templates
"""
# The boot_sets field is required.
if "boot_sets" not in template_data:
raise ParsingException("Missing required 'boot_sets' field")

# All keys in the boot_sets mapping must match the 'name' fields in the
# boot sets to which they map (if they contain a 'name' field).
for bs_name, bs in template_data["boot_sets"].items():
if "name" not in bs:
# Set the field here -- this allows the name to be validated
# per the schema later
bs["name"] = bs_name
elif bs["name"] != bs_name:
raise ParsingException(f"boot_sets key ({bs_name}) does not match 'name' "
f"field of corresponding boot set ({bs['name']})")

# Also, validate that each boot set has at least one of the HARDWARE_SPECIFIER_FIELDS
if not any(field_name in bs for field_name in HARDWARE_SPECIFIER_FIELDS):
raise ParsingException(f"Boot set {bs_name} has none of the following "
f"fields: {HARDWARE_SPECIFIER_FIELDS}")

# We do not bother storing the boot set names inside the boot sets, so delete them.
# We know every boot set has a name field because we verified that earlier.
for bs in template_data["boot_sets"].values():
del bs["name"]

_sanitize_xnames(template_data)
validate_sanitize_boot_sets(template_data)
template_data['name'] = session_template_id

# Finally, validate this against the API schema
# Validate this against the API schema
# An exception will be raised if it does not follow it
validator.validate_session_template(template_data)

# We do not bother storing the boot set names inside the boot sets, so delete them.
# We know every boot set has a name field because we verified that earlier in
# validate_sanitize_boot_sets()
for bs in template_data["boot_sets"].values():
del bs["name"]

0 comments on commit 0f8cb23

Please sign in to comment.