From cedb1d182500737be2d59007b91e4c86a5491f5d Mon Sep 17 00:00:00 2001 From: git-hyagi <45576767+git-hyagi@users.noreply.github.com> Date: Fri, 12 Jul 2024 11:06:23 -0300 Subject: [PATCH] Limit the size of manifests/signatures sync/upload Adds a limit to the size of manifests and signatures as a safeguard to avoid DDoS attack during sync and upload operations. To also prevent this during image upload, this commit configures a `client_max_body_size` for manifests and signatures Nginx endpoints. closes: #532 --- CHANGES/532.feature | 3 + pulp_container/app/downloaders.py | 87 ++++++++++++++++++- pulp_container/app/registry_api.py | 11 ++- pulp_container/app/tasks/sync_stages.py | 21 ++++- pulp_container/app/utils.py | 7 ++ .../app/webserver_snippets/nginx.conf | 22 +++++ pulp_container/constants.py | 1 + 7 files changed, 146 insertions(+), 6 deletions(-) create mode 100644 CHANGES/532.feature diff --git a/CHANGES/532.feature b/CHANGES/532.feature new file mode 100644 index 000000000..936332b08 --- /dev/null +++ b/CHANGES/532.feature @@ -0,0 +1,3 @@ +A limit of 4 MB has been set for the size of manifests and signatures to protect against +OOM DoS attacks during synchronization tasks and image uploads. +Additionally, the Nginx snippet has been updated to enforce the limit for these endpoints. diff --git a/pulp_container/app/downloaders.py b/pulp_container/app/downloaders.py index d9c6fff1c..bdfc28816 100644 --- a/pulp_container/app/downloaders.py +++ b/pulp_container/app/downloaders.py @@ -1,5 +1,6 @@ import aiohttp import asyncio +import fnmatch import json import re @@ -8,9 +9,18 @@ from logging import getLogger from urllib import parse + from pulpcore.plugin.download import DownloaderFactory, HttpDownloader -from pulp_container.constants import V2_ACCEPT_HEADERS +from pulp_container.constants import ( + MANIFEST_MEDIA_TYPES, + MANIFEST_PAYLOAD_MAX_SIZE, + MEGABYTE, + SIGNATURE_PAYLOAD_MAX_SIZE, + V2_ACCEPT_HEADERS, +) +from pulp_container.app.exceptions import InvalidRequest +from pulp_container.app.utils import resource_body_size_exceeded_msg log = getLogger(__name__) @@ -18,9 +28,61 @@ "HeadResult", ["status_code", "path", "artifact_attributes", "url", "headers"], ) +DownloadResult = namedtuple("DownloadResult", ["url", "artifact_attributes", "path", "headers"]) + +class ValidateResourceSizeMixin: + async def _handle_response(self, response, content_type=None, max_body_size=None): + """ + Overrides the HttpDownloader method to be able to limit the request body size. + Handle the aiohttp response by writing it to disk and calculating digests + Args: + response (aiohttp.ClientResponse): The response to handle. + content_type (string): Type of the resource (manifest or signature) whose size + will be verified + max_body_size (int): Maximum allowed body size of the resource (manifest or signature). + Returns: + DownloadResult: Contains information about the result. See the DownloadResult docs for + more information. + """ + if self.headers_ready_callback: + await self.headers_ready_callback(response.headers) + total_size = 0 + while True: + chunk = await response.content.read(MEGABYTE) + total_size += len(chunk) + if max_body_size and total_size > max_body_size: + await self.finalize() + raise InvalidRequest(resource_body_size_exceeded_msg(content_type, max_body_size)) + if not chunk: + await self.finalize() + break # the download is done + await self.handle_data(chunk) + return DownloadResult( + path=self.path, + artifact_attributes=self.artifact_attributes, + url=self.url, + headers=response.headers, + ) -class RegistryAuthHttpDownloader(HttpDownloader): + def get_content_type_and_max_resource_size(self, response): + """ + Returns the content_type (manifest or signature) based on the HTTP request and also the + corresponding resource allowed maximum size. + """ + max_resource_size = None + content_type = response.content_type + is_cosign_tag = fnmatch.fnmatch(response.url.name, "sha256-*.sig") + if isinstance(self, NoAuthSignatureDownloader) or is_cosign_tag: + max_resource_size = SIGNATURE_PAYLOAD_MAX_SIZE + content_type = "Signature" + elif content_type in MANIFEST_MEDIA_TYPES.IMAGE + MANIFEST_MEDIA_TYPES.LIST: + max_resource_size = MANIFEST_PAYLOAD_MAX_SIZE + content_type = "Manifest" + return content_type, max_resource_size + + +class RegistryAuthHttpDownloader(ValidateResourceSizeMixin, HttpDownloader): """ Custom Downloader that automatically handles Token Based and Basic Authentication. @@ -104,7 +166,10 @@ async def _run(self, handle_401=True, extra_data=None): if http_method == "head": to_return = await self._handle_head_response(response) else: - to_return = await self._handle_response(response) + content_type, max_resource_size = self.get_content_type_and_max_resource_size( + response + ) + to_return = await self._handle_response(response, content_type, max_resource_size) await response.release() self.response_headers = response.headers @@ -193,7 +258,7 @@ async def _handle_head_response(self, response): ) -class NoAuthSignatureDownloader(HttpDownloader): +class NoAuthSignatureDownloader(ValidateResourceSizeMixin, HttpDownloader): """A downloader class suited for signature downloads.""" def raise_for_status(self, response): @@ -208,6 +273,20 @@ def raise_for_status(self, response): else: response.raise_for_status() + async def _run(self, extra_data=None): + if self.download_throttler: + await self.download_throttler.acquire() + async with self.session.get( + self.url, proxy=self.proxy, proxy_auth=self.proxy_auth, auth=self.auth + ) as response: + self.raise_for_status(response) + content_type, max_resource_size = self.get_content_type_and_max_resource_size(response) + to_return = await self._handle_response(response, content_type, max_resource_size) + await response.release() + if self._close_session_on_finalize: + await self.session.close() + return to_return + class NoAuthDownloaderFactory(DownloaderFactory): """ diff --git a/pulp_container/app/registry_api.py b/pulp_container/app/registry_api.py index 83c95e78e..f0a0b01f8 100644 --- a/pulp_container/app/registry_api.py +++ b/pulp_container/app/registry_api.py @@ -87,9 +87,11 @@ filter_resource, has_task_completed, validate_manifest, + resource_body_size_exceeded_msg, ) from pulp_container.constants import ( EMPTY_BLOB, + MANIFEST_PAYLOAD_MAX_SIZE, SIGNATURE_API_EXTENSION_VERSION, SIGNATURE_HEADER, SIGNATURE_PAYLOAD_MAX_SIZE, @@ -1379,8 +1381,15 @@ def receive_artifact(self, chunk): subchunk = chunk.read(2000000) if not subchunk: break - temp_file.write(subchunk) size += len(subchunk) + if size > MANIFEST_PAYLOAD_MAX_SIZE: + temp_file.flush() + raise InvalidRequest( + message=resource_body_size_exceeded_msg( + "Manifest", MANIFEST_PAYLOAD_MAX_SIZE + ) + ) + temp_file.write(subchunk) for algorithm in Artifact.DIGEST_FIELDS: hashers[algorithm].update(subchunk) temp_file.flush() diff --git a/pulp_container/app/tasks/sync_stages.py b/pulp_container/app/tasks/sync_stages.py index 15ade2596..e73e1e014 100644 --- a/pulp_container/app/tasks/sync_stages.py +++ b/pulp_container/app/tasks/sync_stages.py @@ -27,6 +27,7 @@ ManifestSignature, Tag, ) +from pulp_container.app.exceptions import InvalidRequest from pulp_container.app.utils import ( extract_data_from_signature, urlpath_sanitize, @@ -62,7 +63,14 @@ def __init__(self, remote, signed_only): async def _download_manifest_data(self, manifest_url): downloader = self.remote.get_downloader(url=manifest_url) - response = await downloader.run(extra_data={"headers": V2_ACCEPT_HEADERS}) + try: + response = await downloader.run(extra_data={"headers": V2_ACCEPT_HEADERS}) + except InvalidRequest as e: + # if it failed to download the manifest, log the error and + # there is nothing to return + log.warning(e.args[0]) + return None, None, None + with open(response.path, "rb") as content_file: raw_bytes_data = content_file.read() response.artifact_attributes["file"] = response.path @@ -146,6 +154,11 @@ async def run(self): for artifact in asyncio.as_completed(to_download_artifact): content_data, raw_text_data, response = await artifact + # skip the current object if it failed to be downloaded + if not content_data: + await pb_parsed_tags.aincrement() + continue + digest = calculate_digest(raw_text_data) tag_name = response.url.split("/")[-1] @@ -542,6 +555,12 @@ async def create_signatures(self, man_dc, signature_source): "{} is not accessible, can't sync an image signature. " "Error: {} {}".format(signature_url, exc.status, exc.message) ) + except InvalidRequest as e: + log.warning( + "Failed to sync signature {}. Error: {}".format(signature_url, e.args[0]) + ) + signature_counter += 1 + continue with open(signature_download_result.path, "rb") as f: signature_raw = f.read() diff --git a/pulp_container/app/utils.py b/pulp_container/app/utils.py index ed038fa0e..2f2406e81 100644 --- a/pulp_container/app/utils.py +++ b/pulp_container/app/utils.py @@ -342,3 +342,10 @@ def filter_resources(element_list, include_patterns, exclude_patterns): if exclude_patterns: element_list = filter(partial(exclude, patterns=exclude_patterns), element_list) return list(element_list) + + +def resource_body_size_exceeded_msg(content_type, max_resource_size): + return ( + f"{content_type} body size exceeded the maximum allowed size of " + f"{max_resource_size} bytes." + ) diff --git a/pulp_container/app/webserver_snippets/nginx.conf b/pulp_container/app/webserver_snippets/nginx.conf index a5c27afdd..29cccad71 100644 --- a/pulp_container/app/webserver_snippets/nginx.conf +++ b/pulp_container/app/webserver_snippets/nginx.conf @@ -38,3 +38,25 @@ location /token/ { proxy_redirect off; proxy_pass http://pulp-api; } + +location ~* /v2/.*/manifests/.*$ { + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Host $http_host; + # we don't want nginx trying to do something clever with + # redirects, we set the Host: header above already. + proxy_redirect off; + proxy_pass http://pulp-api; + client_max_body_size 4m; +} + +location ~* /extensions/v2/.*/signatures/.*$ { + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Host $http_host; + # we don't want nginx trying to do something clever with + # redirects, we set the Host: header above already. + proxy_redirect off; + proxy_pass http://pulp-api; + client_max_body_size 4m; +} diff --git a/pulp_container/constants.py b/pulp_container/constants.py index 8d6463481..b58431293 100644 --- a/pulp_container/constants.py +++ b/pulp_container/constants.py @@ -69,5 +69,6 @@ MEGABYTE = 1_000_000 SIGNATURE_PAYLOAD_MAX_SIZE = 4 * MEGABYTE +MANIFEST_PAYLOAD_MAX_SIZE = 4 * MEGABYTE SIGNATURE_API_EXTENSION_VERSION = 2