Skip to content

Commit

Permalink
Address comments from the reviewer
Browse files Browse the repository at this point in the history
[noissue]
  • Loading branch information
lubosmj committed Jan 2, 2024
1 parent b39573f commit cbd654e
Show file tree
Hide file tree
Showing 14 changed files with 555 additions and 406 deletions.
4 changes: 2 additions & 2 deletions CHANGES/507.feature
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Added support for pull-through caching. Users can now configure a dedicated distribution and remote
linked to an external registry without specifying a repository name (upstream name). Pulp downloads
missing content automatically if requested and acts as a caching proxy.
linked to an external registry without the need to create and mirror repositories in advance. Pulp
downloads missing content automatically if requested and acts as a caching proxy.
31 changes: 17 additions & 14 deletions docs/workflows/host.rst
Original file line number Diff line number Diff line change
Expand Up @@ -123,32 +123,35 @@ Pull-Through Caching
--------------------

The Pull-Through Caching feature offers an alternative way to host content by leveraging a **remote
registry** as the source of truth. This eliminates the need for repository synchronization, reducing
storage overhead, and ensuring up-to-date images. Pulp acts as a **caching proxy** and stores images
in a local repository.
registry** as the source of truth. This eliminates the need for in-advance repository
synchronization because Pulp acts as a **caching proxy** and stores images, after they have been
pulled by an end client, in a local repository.

Administering the caching::
Configuring the caching::

# initialize a pull-through remote (the concept of upstream-name is not applicable here)
REMOTE_HREF=$(http ${BASE_ADDR}/pulp/api/v3/remotes/container/pull-through/ name=docker-cache url=https://registry-1.docker.io | jq -r ".pulp_href")

# create a specialized distribution linked to the initialized remote
# create a pull-through distribution linked to the initialized remote
http ${BASE_ADDR}/pulp/api/v3/distributions/container/pull-through/ remote=${REMOTE_HREF} name=docker-cache base_path=docker-cache

Downloading content::
Pulling content::

podman pull localhost:24817/docker-cache/library/busybox

In the example above, the image "busybox" is pulled from the "docker-cache" distribution, acting as
a transparent caching layer.
In the example above, the image "busybox" is pulled from *DockerHub* through the "docker-cache"
distribution, acting as a transparent caching layer.

By incorporating the Pull-Through Caching feature, administrators can **reduce external network
dependencies**, and ensure a more reliable and responsive container deployment system in production
environments.
By incorporating the Pull-Through Caching feature into standard workflows, users **do not need** to
pre-configure a new repository and sync it to facilitate the retrieval of the actual content. This
speeds up the whole process of shipping containers from its early management stages to distribution.
Similarly to on-demand syncing, the feature also **reduces external network dependencies**, and
ensures a more reliable container deployment system in production environments.

.. note::
Pulp creates repositories that maintain a single repository version for user-pulled images.
Pulp creates repositories that maintain a single repository version for pulled images.
Thus, only the latest repository version is retained. For instance, when pulling "debian:10,"
a "debian" repository with the "10" tag is established. Subsequent pulls such as "debian:11"
a "debian" repository with the "10" tag is created. Subsequent pulls such as "debian:11"
result in a new repository version that incorporates both tags while removing the previous
version. Repositories and their content remain manageable through standard API endpoints.
version. Repositories and their content remain manageable through standard Pulp API endpoints.
With that, no content can be pushed to these repositories.
10 changes: 6 additions & 4 deletions pulp_container/app/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
registry = Registry()

app.add_routes(
[web.get(r"/pulp/container/{path:.+}/blobs/sha256:{digest:.+}", registry.get_by_digest)]
)
app.add_routes(
[web.get(r"/pulp/container/{path:.+}/manifests/sha256:{digest:.+}", registry.get_by_digest)]
[
web.get(
r"/pulp/container/{path:.+}/{content:(blobs|manifests)}/sha256:{digest:.+}",
registry.get_by_digest,
)
]
)
app.add_routes([web.get(r"/pulp/container/{path:.+}/manifests/{tag_name}", registry.get_tag)])
29 changes: 18 additions & 11 deletions pulp_container/app/downloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@

log = getLogger(__name__)

InMemoryDownloadResult = namedtuple("InMemoryDownloadResult", ["data", "headers", "status_code"])
HeadResult = namedtuple(
"HeadResult",
["status_code", "path", "artifact_attributes", "url", "headers"],
)


class RegistryAuthHttpDownloader(HttpDownloader):
Expand All @@ -27,13 +30,13 @@ class RegistryAuthHttpDownloader(HttpDownloader):
"""

registry_auth = {"bearer": None, "basic": None}
token_lock = asyncio.Lock()

def __init__(self, *args, **kwargs):
"""
Initialize the downloader.
"""
self.remote = kwargs.pop("remote")
self.token_lock = asyncio.Lock()

super().__init__(*args, **kwargs)

Expand Down Expand Up @@ -99,7 +102,12 @@ async def _run(self, handle_401=True, extra_data=None):
return await self._run(handle_401=False, extra_data=extra_data)
else:
raise
to_return = await self._handle_response(response)

if http_method == "head":
to_return = await self._handle_head_response(response)
else:
to_return = await self._handle_response(response)

await response.release()
self.response_headers = response.headers

Expand Down Expand Up @@ -177,14 +185,13 @@ def auth_header(token, basic_auth):
return {"Authorization": basic_auth}
return {}


class InMemoryDownloader(RegistryAuthHttpDownloader):
"""A downloader class suited for downloading data in-memory."""

async def _handle_response(self, response):
data = await response.text()
return InMemoryDownloadResult(
data=data, headers=response.headers, status_code=response.status
async def _handle_head_response(self, response):
return HeadResult(
status_code=response.status,
path=None,
artifact_attributes=None,
url=self.url,
headers=response.headers,
)


Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by Django 4.2.6 on 2023-10-25 20:04
# Generated by Django 4.2.8 on 2023-12-12 21:15

from django.db import migrations, models
import django.db.models.deletion
Expand All @@ -8,32 +8,45 @@
class Migration(migrations.Migration):

dependencies = [
('core', '0108_task_versions'),
('core', '0116_alter_remoteartifact_md5_alter_remoteartifact_sha1_and_more'),
('container', '0036_containerpushrepository_pending_blobs_manifests'),
]

operations = [
migrations.CreateModel(
name='ContainerPullThroughDistribution',
name='ContainerPullThroughRemote',
fields=[
('distribution_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='core.distribution')),
('remote_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='core.remote')),
],
options={
'permissions': [('manage_roles_containerpullthroughdistribution', 'Can manage role assignments on pull-through cache distribution')],
'permissions': [('manage_roles_containerpullthroughremote', 'Can manage role assignments on pull-through container remote')],
'default_related_name': '%(app_label)s_%(model_name)s',
},
bases=('core.distribution', pulpcore.app.models.access_policy.AutoAddObjPermsMixin),
bases=('core.remote', pulpcore.app.models.access_policy.AutoAddObjPermsMixin),
),
migrations.AddField(
model_name='containerrepository',
name='pending_blobs',
field=models.ManyToManyField(to='container.blob'),
),
migrations.AddField(
model_name='containerrepository',
name='pending_manifests',
field=models.ManyToManyField(to='container.manifest'),
),
migrations.CreateModel(
name='ContainerPullThroughRemote',
name='ContainerPullThroughDistribution',
fields=[
('remote_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='core.remote')),
('distribution_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='core.distribution')),
('private', models.BooleanField(default=False, help_text='Restrict pull access to explicitly authorized users. Related distributions inherit this value. Defaults to unrestricted pull access.')),
('description', models.TextField(null=True)),
('namespace', models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, related_name='container_pull_through_distributions', to='container.containernamespace')),
],
options={
'permissions': [('manage_roles_containerpullthroughremote', 'Can manage role assignments on pull-through container remote')],
'permissions': [('manage_roles_containerpullthroughdistribution', 'Can manage role assignments on pull-through cache distribution')],
'default_related_name': '%(app_label)s_%(model_name)s',
},
bases=('core.remote', pulpcore.app.models.access_policy.AutoAddObjPermsMixin),
bases=('core.distribution', pulpcore.app.models.access_policy.AutoAddObjPermsMixin),
),
migrations.AddField(
model_name='containerdistribution',
Expand Down
139 changes: 31 additions & 108 deletions pulp_container/app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,33 +334,6 @@ def noauth_download_factory(self):
)
return self._noauth_download_factory

@property
def in_memory_download_factory(self):
"""
A Downloader Factory that stores downloaded data in-memory.
This downloader should be used in workflows where the size of downloaded content is
reasonably small. For instance, for downloading manifests or manifest lists.
Upon first access, the InMemoryDownloaderFactory is instantiated and saved internally.
Returns:
DownloadFactory: The instantiated InMemoryDownloaderFactory to be used by
get_in_memory_downloader().
"""
try:
return self._in_memory_download_factory
except AttributeError:
self._in_memory_download_factory = DownloaderFactory(
self,
downloader_overrides={
"http": downloaders.InMemoryDownloader,
"https": downloaders.InMemoryDownloader,
},
)
return self._in_memory_download_factory

def get_downloader(self, remote_artifact=None, url=None, **kwargs):
"""
Get a downloader from either a RemoteArtifact or URL that is configured with this Remote.
Expand Down Expand Up @@ -415,36 +388,6 @@ def get_noauth_downloader(self, remote_artifact=None, url=None, **kwargs):
**kwargs,
)

def get_in_memory_downloader(self, remote_artifact=None, url=None, **kwargs):
"""
Get an in-memory downloader from either a RemoteArtifact or URL that is provided.
This method accepts either `remote_artifact` or `url` but not both. At least one is
required. If neither of both are passed a ValueError is raised.
Args:
remote_artifact (:class:`~pulpcore.app.models.RemoteArtifact`): The RemoteArtifact to
download.
url (str): The URL to download.
kwargs (dict): This accepts the parameters of
:class:`~pulpcore.plugin.download.BaseDownloader`.
Raises:
ValueError: If neither remote_artifact and url are passed, or if both are passed.
Returns:
subclass of :class:`~pulpcore.plugin.download.BaseDownloader`: A downloader that
is configured with the remote settings.
"""
kwargs["remote"] = self
return super().get_downloader(
remote_artifact=remote_artifact,
url=url,
download_factory=self.in_memory_download_factory,
**kwargs,
)

@property
def namespaced_upstream_name(self):
"""
Expand Down Expand Up @@ -473,58 +416,11 @@ class Meta:
class ContainerPullThroughRemote(Remote, AutoAddObjPermsMixin):
"""
A remote for pull-through caching, omitting the requirement for the upstream name.
"""

TYPE = "pull-through"

@property
def download_factory(self):
"""
Downloader Factory that maps to custom downloaders which support registry auth.
Upon first access, the DownloaderFactory is instantiated and saved internally.
Returns:
DownloadFactory: The instantiated DownloaderFactory to be used by
get_downloader()
"""
try:
return self._download_factory
except AttributeError:
self._download_factory = DownloaderFactory(
self,
downloader_overrides={
"http": downloaders.RegistryAuthHttpDownloader,
"https": downloaders.RegistryAuthHttpDownloader,
},
)
return self._download_factory

def get_downloader(self, remote_artifact=None, url=None, **kwargs):
"""
Get a downloader from either a RemoteArtifact or URL that is configured with this Remote.
This method accepts either `remote_artifact` or `url` but not both. At least one is
required. If neither or both are passed a ValueError is raised.
Args:
remote_artifact (:class:`~pulpcore.app.models.RemoteArtifact`): The RemoteArtifact to
download.
url (str): The URL to download.
kwargs (dict): This accepts the parameters of
:class:`~pulpcore.plugin.download.BaseDownloader`.
Raises:
ValueError: If neither remote_artifact and url are passed, or if both are passed.
Returns:
subclass of :class:`~pulpcore.plugin.download.BaseDownloader`: A downloader that
is configured with the remote settings.
"""
kwargs["remote"] = self
return super().get_downloader(remote_artifact=remote_artifact, url=url, **kwargs)
This remote is used for instantiating new regular container remotes with the upstream name.
Configuring credentials and everything related to container workflows can be therefore done
from within a single instance of this remote.
"""

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"
Expand Down Expand Up @@ -609,6 +505,8 @@ class ContainerRepository(
manifest_signing_service = models.ForeignKey(
ManifestSigningService, on_delete=models.SET_NULL, null=True
)
pending_blobs = models.ManyToManyField(Blob)
pending_manifests = models.ManyToManyField(Manifest)

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"
Expand All @@ -632,6 +530,15 @@ def finalize_new_version(self, new_version):
"""
remove_duplicates(new_version)
validate_repo_version(new_version)
self.remove_pending_content(new_version)

def remove_pending_content(self, repository_version):
"""Remove pending blobs and manifests when committing the content to the repository."""
added_content = repository_version.added(
base_version=repository_version.base_version
).values_list("pk")
self.pending_blobs.remove(*Blob.objects.filter(pk__in=added_content))
self.pending_manifests.remove(*Manifest.objects.filter(pk__in=added_content))


class ContainerPushRepository(Repository, AutoAddObjPermsMixin):
Expand Down Expand Up @@ -695,6 +602,22 @@ class ContainerPullThroughDistribution(Distribution, AutoAddObjPermsMixin):

TYPE = "pull-through"

namespace = models.ForeignKey(
ContainerNamespace,
on_delete=models.CASCADE,
related_name="container_pull_through_distributions",
null=True,
)
private = models.BooleanField(
default=False,
help_text=_(
"Restrict pull access to explicitly authorized users. "
"Related distributions inherit this value. "
"Defaults to unrestricted pull access."
),
)
description = models.TextField(null=True)

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"
permissions = [
Expand Down
Loading

0 comments on commit cbd654e

Please sign in to comment.