Skip to content

Commit

Permalink
Limit the size of manifests/signatures sync/upload
Browse files Browse the repository at this point in the history
Adds new settings to limit the size of manifests and signatures as a safeguard
to avoid DDoS attack during sync and upload operations.
Modify the blob upload to read the layers in chunks.

closes: pulp#532
  • Loading branch information
git-hyagi committed Oct 14, 2024
1 parent bb7e18c commit 743533a
Show file tree
Hide file tree
Showing 8 changed files with 146 additions and 9 deletions.
2 changes: 2 additions & 0 deletions CHANGES/532.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Added a limit of 4MB to non-Blob content, through the `OCI_PAYLOAD_MAX_SIZE` setting, to protect
against OOM DoS attacks.
15 changes: 15 additions & 0 deletions docs/admin/guides/limit-manifest-and-signature-sizes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Limit the size of Manifests and Signatures

By default, Pulp is configured to block the synchronization of non-Blob content (Manifests,
Signatures, etc.) if they exceed a 4MB size limit. A use case for this feature is to avoid
OOM DoS attacks when synchronizing remote repositories with malicious or compromised container
images.
To define a different limit, use the following setting:
```
OCI_PAYLOAD_MAX_SIZE=<bytes>
```

for example, to modify the limit to 10MB:
```
OCI_PAYLOAD_MAX_SIZE=10_000_000
```
69 changes: 66 additions & 3 deletions pulp_container/app/downloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,85 @@

from aiohttp.client_exceptions import ClientResponseError
from collections import namedtuple
from django.conf import settings
from logging import getLogger
from urllib import parse

from pulpcore.plugin.download import DownloaderFactory, HttpDownloader

from pulp_container.constants import V2_ACCEPT_HEADERS
from pulp_container.constants import (
CONTENT_TYPE_WITHOUT_SIZE_RESTRICTION,
MEGABYTE,
V2_ACCEPT_HEADERS,
)

log = getLogger(__name__)

HeadResult = namedtuple(
"HeadResult",
["status_code", "path", "artifact_attributes", "url", "headers"],
)
DownloadResult = namedtuple("DownloadResult", ["url", "artifact_attributes", "path", "headers"])


class PayloadTooLarge(ClientResponseError):
"""Client exceeded the max allowed payload size."""


class ValidateResourceSizeMixin:
async def _handle_response(self, response):
"""
Overrides the HttpDownloader method to be able to limit the request body size.
Handle the aiohttp response by writing it to disk and calculating digests
Args:
response (aiohttp.ClientResponse): The response to handle.
Returns:
DownloadResult: Contains information about the result. See the DownloadResult docs for
more information.
"""
if self.headers_ready_callback:
await self.headers_ready_callback(response.headers)
total_size = 0
while True:
chunk = await response.content.read(MEGABYTE)
total_size += len(chunk)
max_body_size = self._get_max_allowed_resource_size(response)
if max_body_size and total_size > max_body_size:
self._ensure_no_broken_file()
raise PayloadTooLarge(
status=413,
message="manifest invalid",
request_info=response.request_info,
history=response.history,
)
if not chunk:
await self.finalize()
break # the download is done
await self.handle_data(chunk)
return DownloadResult(
path=self.path,
artifact_attributes=self.artifact_attributes,
url=self.url,
headers=response.headers,
)

def _get_max_allowed_resource_size(self, response):
"""
Returns the maximum allowed size for non-blob artifacts.
"""

# content_type is defined by aiohttp based on the definition of the content-type header.
# When it is not set, aiohttp defines it as "application/octet-stream"
# note: http content-type header can be manipulated, making it easy to bypass this
# size restriction, but checking the manifest content is also not a feasible solution
# because we would need to first download it.
if response.content_type in CONTENT_TYPE_WITHOUT_SIZE_RESTRICTION:
return None

return settings["OCI_PAYLOAD_MAX_SIZE"]


class RegistryAuthHttpDownloader(HttpDownloader):
class RegistryAuthHttpDownloader(ValidateResourceSizeMixin, HttpDownloader):
"""
Custom Downloader that automatically handles Token Based and Basic Authentication.
Expand Down Expand Up @@ -193,7 +256,7 @@ async def _handle_head_response(self, response):
)


class NoAuthSignatureDownloader(HttpDownloader):
class NoAuthSignatureDownloader(ValidateResourceSizeMixin, HttpDownloader):
"""A downloader class suited for signature downloads."""

def raise_for_status(self, response):
Expand Down
24 changes: 24 additions & 0 deletions pulp_container/app/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from rest_framework import status
from rest_framework.exceptions import APIException, NotFound, ParseError


Expand Down Expand Up @@ -151,3 +152,26 @@ def __init__(self, message):
]
}
)


class PayloadTooLarge(APIException):
"""An exception to render an HTTP 413 response."""

status_code = status.HTTP_413_REQUEST_ENTITY_TOO_LARGE
default_code = "manifest_invalid"

def __init__(self, message=None, code=None):
"""Initialize the exception with the message for invalid size."""
message = message or "payload too large"
code = code or self.default_code
super().__init__(
detail={
"errors": [
{
"code": code,
"message": message,
"detail": "http: request body too large",
}
]
}
)
15 changes: 11 additions & 4 deletions pulp_container/app/registry_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
ManifestNotFound,
ManifestInvalid,
ManifestSignatureInvalid,
PayloadTooLarge,
)
from pulp_container.app.redirects import (
FileStorageRedirects,
Expand All @@ -90,9 +91,9 @@
)
from pulp_container.constants import (
EMPTY_BLOB,
MEGABYTE,
SIGNATURE_API_EXTENSION_VERSION,
SIGNATURE_HEADER,
SIGNATURE_PAYLOAD_MAX_SIZE,
SIGNATURE_TYPE,
V2_ACCEPT_HEADERS,
)
Expand Down Expand Up @@ -790,7 +791,8 @@ def create_single_chunk_artifact(self, chunk):
with transaction.atomic():
# 1 chunk, create artifact right away
with NamedTemporaryFile("ab") as temp_file:
temp_file.write(chunk.read())
while subchunk := chunk.read(MEGABYTE):
temp_file.write(subchunk)
temp_file.flush()

uploaded_file = PulpTemporaryUploadedFile.from_file(
Expand Down Expand Up @@ -1157,6 +1159,8 @@ def fetch_manifest(self, remote, pk):
raise Throttled()
elif response_error.status == 404:
raise ManifestNotFound(reference=pk)
elif response_error.status == 413:
raise PayloadTooLarge()
else:
raise BadGateway(detail=response_error.message)
except (ClientConnectionError, TimeoutException):
Expand Down Expand Up @@ -1379,8 +1383,11 @@ def receive_artifact(self, chunk):
subchunk = chunk.read(2000000)
if not subchunk:
break
temp_file.write(subchunk)
size += len(subchunk)
if size > settings["OCI_PAYLOAD_MAX_SIZE"]:
temp_file.flush()
raise PayloadTooLarge()
temp_file.write(subchunk)
for algorithm in Artifact.DIGEST_FIELDS:
hashers[algorithm].update(subchunk)
temp_file.flush()
Expand Down Expand Up @@ -1451,7 +1458,7 @@ def put(self, request, path, pk):
except models.Manifest.DoesNotExist:
raise ManifestNotFound(reference=pk)

signature_payload = request.META["wsgi.input"].read(SIGNATURE_PAYLOAD_MAX_SIZE)
signature_payload = request.META["wsgi.input"].read(settings["OCI_PAYLOAD_MAX_SIZE"])
try:
signature_dict = json.loads(signature_payload)
except json.decoder.JSONDecodeError:
Expand Down
5 changes: 5 additions & 0 deletions pulp_container/app/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,8 @@

# The number of allowed threads to sign manifests in parallel
MAX_PARALLEL_SIGNING_TASKS = 10

# Set max payload size for non-blob container artifacts (manifests, signatures, etc).
# This limit is also valid for docker manifests, but we will use the OCI_ prefix
# (instead of ARTIFACT_) to avoid confusion with pulpcore artifacts.
OCI_PAYLOAD_MAX_SIZE = 4_000_000
14 changes: 13 additions & 1 deletion pulp_container/app/tasks/sync_stages.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
SIGNATURE_TYPE,
V2_ACCEPT_HEADERS,
)
from pulp_container.app.downloaders import PayloadTooLarge
from pulp_container.app.models import (
Blob,
BlobManifest,
Expand Down Expand Up @@ -62,7 +63,12 @@ def __init__(self, remote, signed_only):

async def _download_manifest_data(self, manifest_url):
downloader = self.remote.get_downloader(url=manifest_url)
response = await downloader.run(extra_data={"headers": V2_ACCEPT_HEADERS})
try:
response = await downloader.run(extra_data={"headers": V2_ACCEPT_HEADERS})
except PayloadTooLarge as e:
log.warning(e.message + ": max size limit exceeded!")
raise RuntimeError("Manifest max size limit exceeded.")

with open(response.path, "rb") as content_file:
raw_bytes_data = content_file.read()
response.artifact_attributes["file"] = response.path
Expand Down Expand Up @@ -542,6 +548,12 @@ async def create_signatures(self, man_dc, signature_source):
"{} is not accessible, can't sync an image signature. "
"Error: {} {}".format(signature_url, exc.status, exc.message)
)
except PayloadTooLarge as e:
log.warning(
"Failed to sync signature {}. Error: {}".format(signature_url, e.args[0])
)
signature_counter += 1
continue

with open(signature_download_result.path, "rb") as f:
signature_raw = f.read()
Expand Down
11 changes: 10 additions & 1 deletion pulp_container/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,15 @@
SIGNATURE_HEADER = "X-Registry-Supports-Signatures"

MEGABYTE = 1_000_000
SIGNATURE_PAYLOAD_MAX_SIZE = 4 * MEGABYTE

SIGNATURE_API_EXTENSION_VERSION = 2

BINARY_CONTENT_TYPE = "binary/octet-stream"
JSON_CONTENT_TYPE = "application/json"

# Any content-type that should not be limited by OCI_PAYLOAD_MAX_SIZE
CONTENT_TYPE_WITHOUT_SIZE_RESTRICTION = [
BINARY_CONTENT_TYPE,
BLOB_CONTENT_TYPE,
JSON_CONTENT_TYPE,
]

0 comments on commit 743533a

Please sign in to comment.