diff --git a/CHANGES/672.feature b/CHANGES/672.feature new file mode 100644 index 000000000..ddcbc9944 --- /dev/null +++ b/CHANGES/672.feature @@ -0,0 +1 @@ +Added validation for uploaded and synced manifest JSON content. diff --git a/CHANGES/853.bugfix b/CHANGES/853.bugfix new file mode 100644 index 000000000..1a9153b84 --- /dev/null +++ b/CHANGES/853.bugfix @@ -0,0 +1,2 @@ +Fixed internal server errors raised when a podman client (<4.0) used invalid content types for +manifest lists. diff --git a/CHANGES/854.bugfix b/CHANGES/854.bugfix new file mode 100644 index 000000000..00e3aeb5d --- /dev/null +++ b/CHANGES/854.bugfix @@ -0,0 +1 @@ +Fixed a misleading error message raised when a user provided an invalid manifest list. diff --git a/pulp_container/app/exceptions.py b/pulp_container/app/exceptions.py index d81449edd..8b2c998ed 100644 --- a/pulp_container/app/exceptions.py +++ b/pulp_container/app/exceptions.py @@ -89,14 +89,14 @@ def __init__(self, reference): class ManifestInvalid(ParseError): """Exception to render a 400 with the code 'MANIFEST_INVALID'""" - def __init__(self, digest): + def __init__(self, digest, reason=None): """Initialize the exception with the manifest digest.""" super().__init__( detail={ "errors": [ { "code": "MANIFEST_INVALID", - "message": "manifest invalid", + "message": reason or "manifest invalid", "detail": {"digest": digest}, } ] diff --git a/pulp_container/app/json_schemas.py b/pulp_container/app/json_schemas.py index ca0ce7e01..e85189674 100644 --- a/pulp_container/app/json_schemas.py +++ b/pulp_container/app/json_schemas.py @@ -1,6 +1,223 @@ -SIGNATURE_SCHEMA = """{ - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://example.com/product.schema.json", +from pulp_container.constants import MEDIA_TYPE, SIGNATURE_TYPE + + +def get_descriptor_schema( + allowed_media_types, additional_properties=None, additional_required=None +): + """Return a concrete descriptor schema for manifests.""" + properties = { + "mediaType": {"type": "string", "enum": allowed_media_types}, + "size": {"type": "number"}, + "digest": {"type": "string"}, + "annotations": {"type": "object", "additionalProperties": True}, + "urls": {"type": "array", "items": {"type": "string"}}, + } + + if additional_properties: + properties.update(additional_properties) + + required = ["mediaType", "size", "digest"] + if additional_required: + required.extend(additional_required) + + return {"type": "object", "properties": properties, "required": required} + + +OCI_INDEX_SCHEMA = { + "type": "object", + "properties": { + "schemaVersion": {"type": "number", "minimum": 2, "maximum": 2}, + "mediaType": { + "type": "string", + "enum": [MEDIA_TYPE.INDEX_OCI], + }, + "manifests": { + "type": "array", + "items": get_descriptor_schema( + allowed_media_types=[ + MEDIA_TYPE.MANIFEST_OCI, + MEDIA_TYPE.INDEX_OCI, + ], + additional_properties={ + "platform": { + "type": "object", + "properties": { + "architecture": {"type": "string"}, + "os": {"type": "string"}, + "os.version": {"type": "string"}, + "os.features": {"type": "array", "items": {"type": "string"}}, + "variant": {"type": "string"}, + "features": {"type": "array", "items": {"type": "string"}}, + }, + "required": ["architecture", "os"], + }, + }, + additional_required=["platform"], + ), + }, + "annotations": {"type": "object", "additionalProperties": True}, + }, + "required": ["schemaVersion", "manifests"], +} + +OCI_MANIFEST_SCHEMA = { + "type": "object", + "properties": { + "schemaVersion": {"type": "number", "minimum": 2, "maximum": 2}, + "mediaType": { + "type": "string", + "enum": [MEDIA_TYPE.MANIFEST_OCI], + }, + "config": get_descriptor_schema([MEDIA_TYPE.CONFIG_BLOB_OCI]), + "layers": { + "type": "array", + "items": get_descriptor_schema( + [ + MEDIA_TYPE.REGULAR_BLOB_OCI_TAR, + MEDIA_TYPE.REGULAR_BLOB_OCI_TAR_GZIP, + MEDIA_TYPE.REGULAR_BLOB_OCI_TAR_ZSTD, + MEDIA_TYPE.FOREIGN_BLOB_OCI_TAR, + MEDIA_TYPE.FOREIGN_BLOB_OCI_TAR_GZIP, + MEDIA_TYPE.FOREIGN_BLOB_OCI_TAR_ZSTD, + ] + ), + }, + }, + "required": ["schemaVersion", "config", "layers"], +} + +DOCKER_MANIFEST_LIST_V2_SCHEMA = { + "type": "object", + "properties": { + "schemaVersion": {"type": "number", "minimum": 2, "maximum": 2}, + "mediaType": { + "type": "string", + "enum": [MEDIA_TYPE.MANIFEST_LIST], + }, + "manifests": { + "type": "array", + "items": { + "type": "object", + "properties": { + "mediaType": { + "type": "string", + "enum": [ + MEDIA_TYPE.MANIFEST_V2, + MEDIA_TYPE.MANIFEST_V1, + ], + }, + "size": {"type": "number"}, + "digest": {"type": "string"}, + "platform": { + "type": "object", + "properties": { + "architecture": {"type": "string"}, + "os": {"type": "string"}, + "os.version": {"type": "string"}, + "os.features": { + "type": "array", + "items": {"type": "string"}, + }, + "variant": {"type": "string"}, + "features": { + "type": "array", + "items": {"type": "string"}, + }, + }, + "required": ["architecture", "os"], + }, + }, + "required": ["mediaType", "size", "digest", "platform"], + }, + }, + }, + "required": ["schemaVersion", "mediaType", "manifests"], +} + +DOCKER_MANIFEST_V2_SCHEMA = { + "type": "object", + "properties": { + "schemaVersion": {"type": "number", "minimum": 2, "maximum": 2}, + "mediaType": { + "type": "string", + "enum": [MEDIA_TYPE.MANIFEST_V2], + }, + "config": { + "type": "object", + "properties": { + "mediaType": { + "type": "string", + "enum": [MEDIA_TYPE.CONFIG_BLOB], + }, + "size": {"type": "number"}, + "digest": {"type": "string"}, + }, + "required": ["mediaType", "size", "digest"], + }, + "layers": { + "type": "array", + "items": { + "type": "object", + "properties": { + "mediaType:": { + "type": "string", + "enum": [ + MEDIA_TYPE.REGULAR_BLOB, + MEDIA_TYPE.FOREIGN_BLOB, + ], + }, + "size": {"type": "number"}, + "digest": {"type": "string"}, + }, + "required": ["mediaType", "size", "digest"], + }, + }, + }, + "required": ["schemaVersion", "mediaType", "config", "layers"], +} + +DOCKER_MANIFEST_V1_SCHEMA = { + "type": "object", + "properties": { + "signatures": { + "type": "array", + "items": { + "type": "object", + "properties": { + "protected": {"type": "string"}, + "header": { + "type": "object", + "properties": {"alg": {"type": "string"}, "jwk": {"type": "object"}}, + "required": ["alg", "jwk"], + }, + "signature": {"type": "string"}, + }, + "required": ["protected", "header", "signature"], + }, + }, + "tag": {"type": "string"}, + "name": {"type": "string"}, + "history": { + "type": "array", + "items": { + "type": "object", + "properties": {"v1Compatibility": {"type": "string"}}, + "required": ["v1Compatibility"], + }, + }, + "fsLayers": { + "type": "array", + "items": { + "type": "object", + "properties": {"blobSum": {"type": "string"}}, + "required": ["blobSum"], + }, + }, + }, + "required": ["tag", "name", "fsLayers", "history"], +} + +SIGNATURE_SCHEMA = { "title": "Atomic Container Signature", "description": "JSON Schema Validation for the Signature Payload", "type": "object", @@ -8,47 +225,31 @@ "critical": { "type": "object", "properties": { - "type": { - "type": "string", - "const": "atomic container signature" - }, + "type": {"type": "string", "const": SIGNATURE_TYPE.ATOMIC_FULL}, "image": { "type": "object", - "properties": { - "docker-manifest-digest": { - "type": "string" - } - }, + "properties": {"docker-manifest-digest": {"type": "string"}}, "required": ["docker-manifest-digest"], - "additionalProperties": false + "additionalProperties": False, }, "identity": { "type": "object", - "properties": { - "docker-reference": { - "type": "string" - } - }, + "properties": {"docker-reference": {"type": "string"}}, "required": ["docker-reference"], - "additionalProperties": false - } + "additionalProperties": False, + }, }, "required": ["type", "image", "identity"], - "additionalProperties": false + "additionalProperties": False, }, "optional": { "type": "object", "properties": { - "creator": { - "type": "string" - }, - "timestamp": { - "type": "number", - "minimum": 0 - } - } - } + "creator": {"type": "string"}, + "timestamp": {"type": "number", "minimum": 0}, + }, + }, }, "required": ["critical", "optional"], - "additionalProperties": false -}""" + "additionalProperties": False, +} diff --git a/pulp_container/app/registry_api.py b/pulp_container/app/registry_api.py index 6b0e9cd1d..f1098bf92 100644 --- a/pulp_container/app/registry_api.py +++ b/pulp_container/app/registry_api.py @@ -71,7 +71,12 @@ RegistryPermission, TokenPermission, ) -from pulp_container.app.utils import extract_data_from_signature, has_task_completed +from pulp_container.app.utils import ( + determine_media_type, + extract_data_from_signature, + has_task_completed, + validate_manifest, +) from pulp_container.constants import ( EMPTY_BLOB, SIGNATURE_API_EXTENSION_VERSION, @@ -869,34 +874,28 @@ def put(self, request, path, pk=None): """ Responds with the actual manifest """ - # when a user uploads a manifest list with zero listed manifests (no blobs were uploaded - # before) and the specified repository has not been created yet, create the repository - # without raising an error - create_new_repo = request.content_type in ( - models.MEDIA_TYPE.MANIFEST_LIST, - models.MEDIA_TYPE.INDEX_OCI, - ) - _, repository = self.get_dr_push(request, path, create=create_new_repo) # iterate over all the layers and create chunk = request.META["wsgi.input"] artifact = self.receive_artifact(chunk) manifest_digest = "sha256:{id}".format(id=artifact.sha256) - # oci format might not contain mediaType in the manifest.json, docker should - # hence need to check request content type - if request.content_type not in ( - models.MEDIA_TYPE.MANIFEST_V2, - models.MEDIA_TYPE.MANIFEST_OCI, - models.MEDIA_TYPE.MANIFEST_LIST, - models.MEDIA_TYPE.INDEX_OCI, - ): - raise ManifestInvalid(digest=manifest_digest) - with storage.open(artifact.file.name) as artifact_file: raw_data = artifact_file.read() content_data = json.loads(raw_data) + media_type = determine_media_type(content_data, request) + validate_manifest(content_data, media_type, manifest_digest) + + # when a user uploads a manifest list with zero listed manifests (no blobs were uploaded + # before) and the specified repository has not been created yet, create the repository + # without raising an error + create_new_repo = request.content_type in ( + models.MEDIA_TYPE.MANIFEST_LIST, + models.MEDIA_TYPE.INDEX_OCI, + ) + _, repository = self.get_dr_push(request, path, create=create_new_repo) + if request.content_type in ( models.MEDIA_TYPE.MANIFEST_LIST, models.MEDIA_TYPE.INDEX_OCI, @@ -935,14 +934,13 @@ def put(self, request, path, pk=None): else: # both docker/oci format should contain config, digest, mediaType, size config_layer = content_data.get("config") - config_media_type = config_layer.get("mediaType") - config_digest = config_layer.get("digest") - if config_media_type not in ( - models.MEDIA_TYPE.CONFIG_BLOB, - models.MEDIA_TYPE.CONFIG_BLOB_OCI, - ): - raise BlobInvalid(digest=config_digest) + if not config_layer: + raise ManifestInvalid( + digest=manifest_digest, + reason="Pushing manifests of the version V1 is not supported", + ) + config_digest = config_layer.get("digest") try: config_blob = models.Blob.objects.get( digest=config_digest, pk__in=repository.latest_version().content @@ -956,21 +954,22 @@ def put(self, request, path, pk=None): for layer in layers: media_type = layer.get("mediaType") urls = layer.get("urls") - digest = layer.get("digest") if ( media_type in ( models.MEDIA_TYPE.FOREIGN_BLOB, - models.MEDIA_TYPE.FOREIGN_BLOB_OCI, + models.MEDIA_TYPE.FOREIGN_BLOB_OCI_TAR, + models.MEDIA_TYPE.FOREIGN_BLOB_OCI_TAR_GZIP, + models.MEDIA_TYPE.FOREIGN_BLOB_OCI_TAR_ZSTD, ) and not urls ): - raise ManifestInvalid(digest=manifest_digest) - if media_type not in ( - models.MEDIA_TYPE.REGULAR_BLOB, - models.MEDIA_TYPE.REGULAR_BLOB_OCI, - ): - raise BlobInvalid(digest=digest) + raise ManifestInvalid( + digest=manifest_digest, + reason="The URL of a foreign layer must be specified", + ) + + digest = layer.get("digest") blobs.add(digest) blobs_qs = models.Blob.objects.filter( diff --git a/pulp_container/app/tasks/sync_stages.py b/pulp_container/app/tasks/sync_stages.py index 21a981c48..cdb3c5edd 100644 --- a/pulp_container/app/tasks/sync_stages.py +++ b/pulp_container/app/tasks/sync_stages.py @@ -32,6 +32,7 @@ extract_data_from_signature, urlpath_sanitize, determine_media_type, + validate_manifest, ) log = logging.getLogger(__name__) @@ -41,6 +42,7 @@ "Accept": ",".join( [ MEDIA_TYPE.MANIFEST_V2, + MEDIA_TYPE.MANIFEST_V1, MEDIA_TYPE.MANIFEST_LIST, MEDIA_TYPE.INDEX_OCI, MEDIA_TYPE.MANIFEST_OCI, @@ -133,8 +135,9 @@ async def run(self): tag_dc = DeclarativeContent(Tag(name=tag_name)) content_data = json.loads(raw_data) - media_type = determine_media_type(content_data, dl_res) + digest = dl_res.artifact_attributes["sha256"] + validate_manifest(content_data, media_type, digest) if media_type in (MEDIA_TYPE.MANIFEST_LIST, MEDIA_TYPE.INDEX_OCI): list_dc = self.create_tagged_manifest_list( @@ -352,6 +355,7 @@ def create_tagged_manifest(self, tag_name, saved_artifact, manifest_data, raw_da saved_artifact (pulpcore.plugin.models.Artifact): A saved manifest's Artifact manifest_data (dict): Data about a single new ImageManifest. raw_data: (str): The raw JSON representation of the ImageManifest. + media_type (str): The type of a manifest """ if media_type in (MEDIA_TYPE.MANIFEST_V2, MEDIA_TYPE.MANIFEST_OCI): @@ -446,8 +450,11 @@ def _get_content_data_blocking(): dl_res.artifact_attributes ) content_data = json.loads(raw_data) + media_type = determine_media_type(content_data, dl_res) + validate_manifest(content_data, media_type, digest) + manifest = Manifest( - digest=manifest_data["digest"], + digest=digest, schema_version=2 if manifest_data["mediaType"] in (MEDIA_TYPE.MANIFEST_V2, MEDIA_TYPE.MANIFEST_OCI) else 1, @@ -587,7 +594,12 @@ def _include_layer(self, layer): """ foreign_excluded = not self.remote.include_foreign_layers layer_type = layer.get("mediaType", MEDIA_TYPE.REGULAR_BLOB) - is_foreign = layer_type in (MEDIA_TYPE.FOREIGN_BLOB, MEDIA_TYPE.FOREIGN_BLOB_OCI) + is_foreign = layer_type in ( + MEDIA_TYPE.FOREIGN_BLOB, + MEDIA_TYPE.FOREIGN_BLOB_OCI_TAR, + MEDIA_TYPE.FOREIGN_BLOB_OCI_TAR_GZIP, + MEDIA_TYPE.FOREIGN_BLOB_OCI_TAR_ZSTD, + ) if is_foreign and foreign_excluded: log.debug("Foreign Layer: %(d)s EXCLUDED", dict(d=layer)) return False diff --git a/pulp_container/app/utils.py b/pulp_container/app/utils.py index 16dcc6bf9..84b8138f9 100644 --- a/pulp_container/app/utils.py +++ b/pulp_container/app/utils.py @@ -5,18 +5,26 @@ import logging import time -from jsonschema import Draft7Validator +from jsonschema import Draft7Validator, validate, ValidationError from rest_framework.exceptions import Throttled from pulpcore.plugin.models import Task from pulp_container.constants import MEDIA_TYPE -from pulp_container.app.json_schemas import SIGNATURE_SCHEMA +from pulp_container.app.exceptions import ManifestInvalid +from pulp_container.app.json_schemas import ( + OCI_INDEX_SCHEMA, + OCI_MANIFEST_SCHEMA, + DOCKER_MANIFEST_LIST_V2_SCHEMA, + DOCKER_MANIFEST_V2_SCHEMA, + DOCKER_MANIFEST_V1_SCHEMA, + SIGNATURE_SCHEMA, +) KEY_ID_REGEX_COMPILED = re.compile(r"keyid ([0-9A-F]+)") TIMESTAMP_REGEX_COMPILED = re.compile(r"created ([0-9]+)") -validator = Draft7Validator(json.loads(SIGNATURE_SCHEMA)) +signature_validator = Draft7Validator(SIGNATURE_SCHEMA) log = logging.getLogger(__name__) @@ -90,8 +98,8 @@ def extract_data_from_signature(signature_raw, man_digest): return errors = [] - for error in validator.iter_errors(sig_json): - errors.append(error.message) + for error in signature_validator.iter_errors(sig_json): + errors.append(f'{".".join(error.path)}: {error.message}') if errors: log.info("The signature for {} is not synced due to: {}".format(man_digest, errors)) @@ -156,3 +164,36 @@ def determine_media_type(content_data, response): return MEDIA_TYPE.MANIFEST_V2 else: return MEDIA_TYPE.MANIFEST_V1 + + +def determine_schema(media_type): + """Return a JSON schema based on the specified content type.""" + if media_type == MEDIA_TYPE.MANIFEST_V2: + return DOCKER_MANIFEST_V2_SCHEMA + elif media_type == MEDIA_TYPE.MANIFEST_OCI: + return OCI_MANIFEST_SCHEMA + elif media_type == MEDIA_TYPE.MANIFEST_LIST: + return DOCKER_MANIFEST_LIST_V2_SCHEMA + elif media_type == MEDIA_TYPE.INDEX_OCI: + return OCI_INDEX_SCHEMA + elif media_type in (MEDIA_TYPE.MANIFEST_V1, MEDIA_TYPE.MANIFEST_V1_SIGNED): + return DOCKER_MANIFEST_V1_SCHEMA + else: + raise ValueError() + + +def validate_manifest(content_data, media_type, digest): + """Validate JSON data (manifest) according to its declared content type (e.g., list).""" + try: + schema_validator = determine_schema(media_type) + except ValueError: + raise ManifestInvalid( + reason=f"A manifest of an unknown media type was provided: {media_type}", + digest=digest, + ) + + try: + validate(content_data, schema_validator) + except ValidationError as error: + # fail on the first encountered error + raise ManifestInvalid(reason=f'{".".join(error.path)}: {error.message}', digest=digest) diff --git a/pulp_container/constants.py b/pulp_container/constants.py index 2ca118eec..333a8f669 100644 --- a/pulp_container/constants.py +++ b/pulp_container/constants.py @@ -12,8 +12,12 @@ MANIFEST_OCI="application/vnd.oci.image.manifest.v1+json", INDEX_OCI="application/vnd.oci.image.index.v1+json", CONFIG_BLOB_OCI="application/vnd.oci.image.config.v1+json", - REGULAR_BLOB_OCI="application/vnd.oci.image.layer.v1.tar+gzip", - FOREIGN_BLOB_OCI="application/vnd.oci.image.layer.nondistributable.v1.tar+gzip", + REGULAR_BLOB_OCI_TAR="application/vnd.oci.image.layer.v1.tar", + REGULAR_BLOB_OCI_TAR_GZIP="application/vnd.oci.image.layer.v1.tar+gzip", + REGULAR_BLOB_OCI_TAR_ZSTD="application/vnd.oci.image.layer.v1.tar+zstd", + FOREIGN_BLOB_OCI_TAR="application/vnd.oci.image.layer.nondistributable.v1.tar", + FOREIGN_BLOB_OCI_TAR_GZIP="application/vnd.oci.image.layer.nondistributable.v1.tar+gzip", + FOREIGN_BLOB_OCI_TAR_ZSTD="application/vnd.oci.image.layer.nondistributable.v1.tar+zstd", ) MANIFEST_MEDIA_TYPES = SimpleNamespace( diff --git a/pulp_container/tests/unit/test_json_schemas.py b/pulp_container/tests/unit/test_json_schemas.py index e153c4d58..5e38e6d07 100644 --- a/pulp_container/tests/unit/test_json_schemas.py +++ b/pulp_container/tests/unit/test_json_schemas.py @@ -6,7 +6,7 @@ from pulp_container.app.json_schemas import SIGNATURE_SCHEMA -validator = Draft7Validator(json.loads(SIGNATURE_SCHEMA)) +validator = Draft7Validator(SIGNATURE_SCHEMA) class TestSignatureJsonSchema(TestCase):