From 19732ef7cf25fea237efa740bd4f43aa547aedaf Mon Sep 17 00:00:00 2001 From: git-hyagi <45576767+git-hyagi@users.noreply.github.com> Date: Fri, 12 Jul 2024 11:06:23 -0300 Subject: [PATCH] Limit the size of manifests/signatures sync/upload Adds new settings to limit the size of manifests and signatures as a safeguard to avoid DDoS attack during sync and upload operations. To also prevent this during image upload, this commit configures a `client_max_body_size` for manifests and signatures Nginx endpoints. Modify the blob upload to read the layers in chunks. closes: #532 --- CHANGES/532.feature | 6 ++ .../limit-manifest-and-signature-sizes.md | 17 ++++ pulp_container/app/downloaders.py | 86 ++++++++++++++++++- pulp_container/app/registry_api.py | 31 +++++-- pulp_container/app/tasks/sync_stages.py | 21 ++++- pulp_container/app/utils.py | 7 ++ .../app/webserver_snippets/nginx.conf | 22 +++++ pulp_container/constants.py | 1 - 8 files changed, 180 insertions(+), 11 deletions(-) create mode 100644 CHANGES/532.feature create mode 100644 docs/admin/guides/limit-manifest-and-signature-sizes.md diff --git a/CHANGES/532.feature b/CHANGES/532.feature new file mode 100644 index 000000000..077fe8e5f --- /dev/null +++ b/CHANGES/532.feature @@ -0,0 +1,6 @@ +Added support to the `MANIFEST_PAYLOAD_MAX_SIZE` and `SIGNATURE_PAYLOAD_MAX_SIZE` settings to define +limits (for the size of Manifests and Signatures) to protect against OOM DoS attacks during synchronization tasks +and image uploads. +Additionally, the Nginx snippet has been updated to enforce the limit for these endpoints. +Modified the internal logic of Blob uploads to read the receiving layers in chunks, +thereby reducing the memory footprint of the process. diff --git a/docs/admin/guides/limit-manifest-and-signature-sizes.md b/docs/admin/guides/limit-manifest-and-signature-sizes.md new file mode 100644 index 000000000..7872e14b3 --- /dev/null +++ b/docs/admin/guides/limit-manifest-and-signature-sizes.md @@ -0,0 +1,17 @@ +# Limit the size of Manifests and Signatures + +It is possible to configure Pulp to block the synchronization and upload of image Manifests and/or +Signatures if they exceed a defined size. A use case for this feature is to avoid OOM DoS attacks +when synchronizing remote repositories with malicious or compromised containter images. +To implement this, use the following settings: +``` +MANIFEST_MAX_PAYLOAD_SIZE= +SIGNATURE_MAX_PAYLOAD_SIZE= +``` + +!!! info + By default, there is no definition for these settings, meaning that no limit will be enforced. + + +!!! note + A common value adopted by other registries is to set these values to 4MB (4000000). diff --git a/pulp_container/app/downloaders.py b/pulp_container/app/downloaders.py index d9c6fff1c..e992f9804 100644 --- a/pulp_container/app/downloaders.py +++ b/pulp_container/app/downloaders.py @@ -1,16 +1,25 @@ import aiohttp import asyncio +import fnmatch import json import re from aiohttp.client_exceptions import ClientResponseError from collections import namedtuple +from django.conf import settings from logging import getLogger from urllib import parse + from pulpcore.plugin.download import DownloaderFactory, HttpDownloader -from pulp_container.constants import V2_ACCEPT_HEADERS +from pulp_container.constants import ( + MANIFEST_MEDIA_TYPES, + MEGABYTE, + V2_ACCEPT_HEADERS, +) +from pulp_container.app.exceptions import InvalidRequest +from pulp_container.app.utils import resource_body_size_exceeded_msg log = getLogger(__name__) @@ -18,9 +27,61 @@ "HeadResult", ["status_code", "path", "artifact_attributes", "url", "headers"], ) +DownloadResult = namedtuple("DownloadResult", ["url", "artifact_attributes", "path", "headers"]) + +class ValidateResourceSizeMixin: + async def _handle_response(self, response, content_type=None, max_body_size=None): + """ + Overrides the HttpDownloader method to be able to limit the request body size. + Handle the aiohttp response by writing it to disk and calculating digests + Args: + response (aiohttp.ClientResponse): The response to handle. + content_type (string): Type of the resource (manifest or signature) whose size + will be verified + max_body_size (int): Maximum allowed body size of the resource (manifest or signature). + Returns: + DownloadResult: Contains information about the result. See the DownloadResult docs for + more information. + """ + if self.headers_ready_callback: + await self.headers_ready_callback(response.headers) + total_size = 0 + while True: + chunk = await response.content.read(MEGABYTE) + total_size += len(chunk) + if max_body_size and total_size > max_body_size: + await self.finalize() + raise InvalidRequest(resource_body_size_exceeded_msg(content_type, max_body_size)) + if not chunk: + await self.finalize() + break # the download is done + await self.handle_data(chunk) + return DownloadResult( + path=self.path, + artifact_attributes=self.artifact_attributes, + url=self.url, + headers=response.headers, + ) -class RegistryAuthHttpDownloader(HttpDownloader): + def get_content_type_and_max_resource_size(self, response): + """ + Returns the content_type (manifest or signature) based on the HTTP request and also the + corresponding resource allowed maximum size. + """ + max_resource_size = None + content_type = response.content_type + is_cosign_tag = fnmatch.fnmatch(response.url.name, "sha256-*.sig") + if isinstance(self, NoAuthSignatureDownloader) or is_cosign_tag: + max_resource_size = settings.get("SIGNATURE_PAYLOAD_MAX_SIZE", None) + content_type = "Signature" + elif content_type in MANIFEST_MEDIA_TYPES.IMAGE + MANIFEST_MEDIA_TYPES.LIST: + max_resource_size = settings.get("MANIFEST_PAYLOAD_MAX_SIZE", None) + content_type = "Manifest" + return content_type, max_resource_size + + +class RegistryAuthHttpDownloader(ValidateResourceSizeMixin, HttpDownloader): """ Custom Downloader that automatically handles Token Based and Basic Authentication. @@ -104,7 +165,10 @@ async def _run(self, handle_401=True, extra_data=None): if http_method == "head": to_return = await self._handle_head_response(response) else: - to_return = await self._handle_response(response) + content_type, max_resource_size = self.get_content_type_and_max_resource_size( + response + ) + to_return = await self._handle_response(response, content_type, max_resource_size) await response.release() self.response_headers = response.headers @@ -193,7 +257,7 @@ async def _handle_head_response(self, response): ) -class NoAuthSignatureDownloader(HttpDownloader): +class NoAuthSignatureDownloader(ValidateResourceSizeMixin, HttpDownloader): """A downloader class suited for signature downloads.""" def raise_for_status(self, response): @@ -208,6 +272,20 @@ def raise_for_status(self, response): else: response.raise_for_status() + async def _run(self, extra_data=None): + if self.download_throttler: + await self.download_throttler.acquire() + async with self.session.get( + self.url, proxy=self.proxy, proxy_auth=self.proxy_auth, auth=self.auth + ) as response: + self.raise_for_status(response) + content_type, max_resource_size = self.get_content_type_and_max_resource_size(response) + to_return = await self._handle_response(response, content_type, max_resource_size) + await response.release() + if self._close_session_on_finalize: + await self.session.close() + return to_return + class NoAuthDownloaderFactory(DownloaderFactory): """ diff --git a/pulp_container/app/registry_api.py b/pulp_container/app/registry_api.py index 83c95e78e..755ac3fe8 100644 --- a/pulp_container/app/registry_api.py +++ b/pulp_container/app/registry_api.py @@ -87,18 +87,20 @@ filter_resource, has_task_completed, validate_manifest, + resource_body_size_exceeded_msg, ) from pulp_container.constants import ( EMPTY_BLOB, SIGNATURE_API_EXTENSION_VERSION, SIGNATURE_HEADER, - SIGNATURE_PAYLOAD_MAX_SIZE, SIGNATURE_TYPE, V2_ACCEPT_HEADERS, + MEGABYTE, ) log = logging.getLogger(__name__) + IGNORED_PULL_THROUGH_REMOTE_ATTRIBUTES = [ "remote_ptr", "pulp_type", @@ -790,7 +792,9 @@ def create_single_chunk_artifact(self, chunk): with transaction.atomic(): # 1 chunk, create artifact right away with NamedTemporaryFile("ab") as temp_file: - temp_file.write(chunk.read()) + while chunk and chunk.reader.length > 0: + current_chunk = chunk.reader.read(MEGABYTE) + temp_file.write(current_chunk) temp_file.flush() uploaded_file = PulpTemporaryUploadedFile.from_file( @@ -1379,8 +1383,16 @@ def receive_artifact(self, chunk): subchunk = chunk.read(2000000) if not subchunk: break - temp_file.write(subchunk) size += len(subchunk) + manifest_payload_max_size = settings.get("MANIFEST_PAYLOAD_MAX_SIZE", None) + if manifest_payload_max_size and size > manifest_payload_max_size: + temp_file.flush() + raise InvalidRequest( + message=resource_body_size_exceeded_msg( + "Manifest", manifest_payload_max_size + ) + ) + temp_file.write(subchunk) for algorithm in Artifact.DIGEST_FIELDS: hashers[algorithm].update(subchunk) temp_file.flush() @@ -1451,9 +1463,18 @@ def put(self, request, path, pk): except models.Manifest.DoesNotExist: raise ManifestNotFound(reference=pk) - signature_payload = request.META["wsgi.input"].read(SIGNATURE_PAYLOAD_MAX_SIZE) + signature_max_size = settings.get("SIGNATURE_PAYLOAD_MAX_SIZE", None) + if signature_max_size: + meta = request.META + try: + content_length = int(meta.get("CONTENT_LENGTH", meta.get("HTTP_CONTENT_LENGTH", 0))) + except (ValueError, TypeError): + content_length = 0 + if content_length > signature_max_size: + raise ManifestSignatureInvalid(digest=pk) + try: - signature_dict = json.loads(signature_payload) + signature_dict = json.loads(request.stream.body) except json.decoder.JSONDecodeError: raise ManifestSignatureInvalid(digest=pk) diff --git a/pulp_container/app/tasks/sync_stages.py b/pulp_container/app/tasks/sync_stages.py index 15ade2596..e73e1e014 100644 --- a/pulp_container/app/tasks/sync_stages.py +++ b/pulp_container/app/tasks/sync_stages.py @@ -27,6 +27,7 @@ ManifestSignature, Tag, ) +from pulp_container.app.exceptions import InvalidRequest from pulp_container.app.utils import ( extract_data_from_signature, urlpath_sanitize, @@ -62,7 +63,14 @@ def __init__(self, remote, signed_only): async def _download_manifest_data(self, manifest_url): downloader = self.remote.get_downloader(url=manifest_url) - response = await downloader.run(extra_data={"headers": V2_ACCEPT_HEADERS}) + try: + response = await downloader.run(extra_data={"headers": V2_ACCEPT_HEADERS}) + except InvalidRequest as e: + # if it failed to download the manifest, log the error and + # there is nothing to return + log.warning(e.args[0]) + return None, None, None + with open(response.path, "rb") as content_file: raw_bytes_data = content_file.read() response.artifact_attributes["file"] = response.path @@ -146,6 +154,11 @@ async def run(self): for artifact in asyncio.as_completed(to_download_artifact): content_data, raw_text_data, response = await artifact + # skip the current object if it failed to be downloaded + if not content_data: + await pb_parsed_tags.aincrement() + continue + digest = calculate_digest(raw_text_data) tag_name = response.url.split("/")[-1] @@ -542,6 +555,12 @@ async def create_signatures(self, man_dc, signature_source): "{} is not accessible, can't sync an image signature. " "Error: {} {}".format(signature_url, exc.status, exc.message) ) + except InvalidRequest as e: + log.warning( + "Failed to sync signature {}. Error: {}".format(signature_url, e.args[0]) + ) + signature_counter += 1 + continue with open(signature_download_result.path, "rb") as f: signature_raw = f.read() diff --git a/pulp_container/app/utils.py b/pulp_container/app/utils.py index ed038fa0e..2f2406e81 100644 --- a/pulp_container/app/utils.py +++ b/pulp_container/app/utils.py @@ -342,3 +342,10 @@ def filter_resources(element_list, include_patterns, exclude_patterns): if exclude_patterns: element_list = filter(partial(exclude, patterns=exclude_patterns), element_list) return list(element_list) + + +def resource_body_size_exceeded_msg(content_type, max_resource_size): + return ( + f"{content_type} body size exceeded the maximum allowed size of " + f"{max_resource_size} bytes." + ) diff --git a/pulp_container/app/webserver_snippets/nginx.conf b/pulp_container/app/webserver_snippets/nginx.conf index a5c27afdd..29cccad71 100644 --- a/pulp_container/app/webserver_snippets/nginx.conf +++ b/pulp_container/app/webserver_snippets/nginx.conf @@ -38,3 +38,25 @@ location /token/ { proxy_redirect off; proxy_pass http://pulp-api; } + +location ~* /v2/.*/manifests/.*$ { + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Host $http_host; + # we don't want nginx trying to do something clever with + # redirects, we set the Host: header above already. + proxy_redirect off; + proxy_pass http://pulp-api; + client_max_body_size 4m; +} + +location ~* /extensions/v2/.*/signatures/.*$ { + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Host $http_host; + # we don't want nginx trying to do something clever with + # redirects, we set the Host: header above already. + proxy_redirect off; + proxy_pass http://pulp-api; + client_max_body_size 4m; +} diff --git a/pulp_container/constants.py b/pulp_container/constants.py index 8d6463481..a55cc8554 100644 --- a/pulp_container/constants.py +++ b/pulp_container/constants.py @@ -68,6 +68,5 @@ SIGNATURE_HEADER = "X-Registry-Supports-Signatures" MEGABYTE = 1_000_000 -SIGNATURE_PAYLOAD_MAX_SIZE = 4 * MEGABYTE SIGNATURE_API_EXTENSION_VERSION = 2