Skip to content

Commit

Permalink
Add includes/excludes fields to filter remotes
Browse files Browse the repository at this point in the history
closes: #459
  • Loading branch information
git-hyagi committed Jun 25, 2024
1 parent ce05037 commit ba2e297
Show file tree
Hide file tree
Showing 9 changed files with 236 additions and 30 deletions.
1 change: 1 addition & 0 deletions CHANGES/459.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added support to filter repos in pull-through distributions using `includes`/`excludes` fields.
33 changes: 33 additions & 0 deletions pulp_container/app/migrations/0040_add_remote_repo_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Generated by Django 4.2.13 on 2024-06-25 14:15

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('container', '0039_manifest_data'),
]

operations = [
migrations.AddField(
model_name='containerpullthroughremote',
name='excludes',
field=models.JSONField(null=True),
),
migrations.AddField(
model_name='containerpullthroughremote',
name='includes',
field=models.JSONField(null=True),
),
migrations.AddField(
model_name='containerremote',
name='excludes',
field=models.JSONField(null=True),
),
migrations.AddField(
model_name='containerremote',
name='includes',
field=models.JSONField(null=True),
),
]
7 changes: 7 additions & 0 deletions pulp_container/app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,13 +355,17 @@ class ContainerRemote(Remote, AutoAddObjPermsMixin):
exclude_tags (fields.ArrayField): List of tags to exclude during sync.
sigstore (models.TextField): The URL to a sigstore where signatures of container images
should be synced from.
includes (models.JSONField): Dictionary of repositories to include. [default=null]
excludes (models.JSONField): Dictionary of repositories to exclude. [default=null]
"""

upstream_name = models.TextField(db_index=True)
include_foreign_layers = models.BooleanField(default=False)
include_tags = fields.ArrayField(models.TextField(null=True), null=True)
exclude_tags = fields.ArrayField(models.TextField(null=True), null=True)
sigstore = models.TextField(null=True)
includes = models.JSONField(null=True)
excludes = models.JSONField(null=True)

TYPE = "container"

Expand Down Expand Up @@ -503,6 +507,9 @@ class ContainerPullThroughRemote(Remote, AutoAddObjPermsMixin):
from within a single instance of this remote.
"""

includes = models.JSONField(null=True)
excludes = models.JSONField(null=True)

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"
permissions = [
Expand Down
9 changes: 6 additions & 3 deletions pulp_container/app/registry_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
from pulp_container.app.utils import (
determine_media_type,
extract_data_from_signature,
filter_repo,
has_task_completed,
validate_manifest,
)
Expand Down Expand Up @@ -1110,9 +1111,11 @@ def get_content_units_to_add(self, manifest, tag):
return add_content_units

def fetch_manifest(self, remote, pk):
relative_url = "/v2/{name}/manifests/{pk}".format(
name=remote.namespaced_upstream_name, pk=pk
)
try:
repo_name = filter_repo(remote)
except RepositoryNotFound:
raise
relative_url = "/v2/{name}/manifests/{pk}".format(name=repo_name, pk=pk)
tag_url = urljoin(remote.url, relative_url)
downloader = remote.get_downloader(url=tag_url)
try:
Expand Down
4 changes: 3 additions & 1 deletion pulp_container/app/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,9 +307,11 @@ class ContainerPullThroughRemoteSerializer(RemoteSerializer):
"""

policy = serializers.ChoiceField(choices=[Remote.ON_DEMAND], default=Remote.ON_DEMAND)
includes = serializers.JSONField(required=False, allow_null=True)
excludes = serializers.JSONField(required=False, allow_null=True)

class Meta:
fields = RemoteSerializer.Meta.fields
fields = RemoteSerializer.Meta.fields + ("includes", "excludes")
model = models.ContainerPullThroughRemote


Expand Down
26 changes: 2 additions & 24 deletions pulp_container/app/tasks/sync_stages.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import aiohttp
import asyncio
import base64
import fnmatch
import hashlib
import json
import logging
Expand Down Expand Up @@ -34,6 +33,7 @@
determine_media_type,
validate_manifest,
calculate_digest,
filter_resource,
get_content_data,
)

Expand Down Expand Up @@ -118,7 +118,7 @@ async def run(self):
repo_name = self.remote.namespaced_upstream_name
tag_list_url = "/v2/{name}/tags/list".format(name=repo_name)
tag_list = await self.get_paginated_tag_list(tag_list_url, repo_name)
tag_list = self.filter_tags(tag_list)
tag_list = filter_resource(self.remote, tag_list, True)
await pb.aincrement()

for tag_name in tag_list:
Expand Down Expand Up @@ -303,28 +303,6 @@ async def resolve_flush(self):
await self.put(signature_dc)
self.signature_dcs.clear()

def filter_tags(self, tag_list):
"""
Filter tags by a list of included and excluded tags.
"""
include_tags = self.remote.include_tags
if include_tags:
tag_list = [
tag
for tag in tag_list
if any(fnmatch.fnmatch(tag, pattern) for pattern in include_tags)
]

exclude_tags = self.remote.exclude_tags
if exclude_tags:
tag_list = [
tag
for tag in tag_list
if not any(fnmatch.fnmatch(tag, pattern) for pattern in exclude_tags)
]

return tag_list

async def get_paginated_tag_list(self, rel_link, repo_name):
"""
Handle registries that have pagination enabled.
Expand Down
48 changes: 46 additions & 2 deletions pulp_container/app/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import base64
import hashlib
import fnmatch
import re
import subprocess
import gnupg
Expand All @@ -15,8 +16,11 @@

from pulpcore.plugin.models import Artifact, Task

from pulp_container.constants import MANIFEST_MEDIA_TYPES, MEDIA_TYPE
from pulp_container.app.exceptions import ManifestInvalid
from pulp_container.constants import (
MANIFEST_MEDIA_TYPES,
MEDIA_TYPE,
)
from pulp_container.app.exceptions import ManifestInvalid, RepositoryNotFound
from pulp_container.app.json_schemas import (
OCI_INDEX_SCHEMA,
OCI_MANIFEST_SCHEMA,
Expand Down Expand Up @@ -309,3 +313,43 @@ def get_content_data(saved_artifact):
raw_data = file.read()
content_data = json.loads(raw_data)
return content_data, raw_data


def filter_resource(remote, element_list, tags=False):
"""
Filter tags or repos by a list of included and/or excluded definition(s).
"""
if tags:
include = remote.include_tags
exclude = remote.exclude_tags
else:
include = remote.includes
exclude = remote.excludes

if include:
element_list = [
item
for item in element_list
if any(fnmatch.fnmatch(item, pattern) for pattern in include)
]

if exclude:
element_list = [
item
for item in element_list
if not any(fnmatch.fnmatch(item, pattern) for pattern in exclude)
]

return element_list


def filter_repo(remote):
"""
Filter repositories and return the name of the repository or an exception in cases none is
found after applying the filter.
"""
repo_name = remote.namespaced_upstream_name
filtered_repo = filter_resource(remote, [repo_name])
if len(filtered_repo) == 0:
raise RepositoryNotFound(name=repo_name)
return filtered_repo[0]
102 changes: 102 additions & 0 deletions pulp_container/tests/functional/api/test_remote_filter_pull_through.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import subprocess
import pytest
import re

from uuid import uuid4

from pulp_container.tests.functional.constants import (
REGISTRY_V2,
REGISTRY_V2_FEED_URL,
PULP_HELLO_WORLD_REPO,
PULP_FIXTURE_1,
)


@pytest.fixture
def pull_through_distribution(
gen_object_with_cleanup,
container_pull_through_remote_api,
container_pull_through_distribution_api,
):
def _pull_through_distribution(includes, excludes):
remote = gen_object_with_cleanup(
container_pull_through_remote_api,
{
"name": str(uuid4()),
"url": REGISTRY_V2_FEED_URL,
"includes": includes,
"excludes": excludes,
},
)
distribution = gen_object_with_cleanup(
container_pull_through_distribution_api,
{"name": str(uuid4()), "base_path": str(uuid4()), "remote": remote.pulp_href},
)
return distribution

return _pull_through_distribution


@pytest.fixture
def pull_and_verify(
capfd,
delete_orphans_pre,
local_registry,
registry_client,
):
def _pull_and_verify(images, pull_through_distribution, includes, excludes):
distr = pull_through_distribution(includes, excludes)
for _, image_path in enumerate(images, start=1):
remote_image_path = f"{REGISTRY_V2}/{image_path}"
local_image_path = f"{distr.base_path}/{image_path}"

if excludes and re.match(".*fixture.*", image_path):
with pytest.raises(subprocess.CalledProcessError):
local_registry.pull(local_image_path)
assert (
re.search(
".*Repository not found.*",
capfd.readouterr().err,
)
is not None
)
continue
local_registry.pull(local_image_path)
local_image = local_registry.inspect(local_image_path)
registry_client.pull(remote_image_path)
remote_image = registry_client.inspect(remote_image_path)
assert local_image[0]["Id"] == remote_image[0]["Id"]

return _pull_and_verify


def test_no_filter(pull_through_distribution, pull_and_verify):
images = [f"{PULP_FIXTURE_1}:manifest_a", f"{PULP_FIXTURE_1}:manifest_b"]
includes = []
excludes = []
pull_and_verify(images, pull_through_distribution, includes, excludes)


def test_filter_exclude_with_regex(pull_through_distribution, pull_and_verify):
images = [f"{PULP_FIXTURE_1}:manifest_a", f"{PULP_FIXTURE_1}:manifest_b"]
includes = []
excludes = ["pulp*"]
pull_and_verify(images, pull_through_distribution, includes, excludes)


def test_filter_exclude(pull_through_distribution, pull_and_verify):
images = [f"{PULP_FIXTURE_1}:manifest_a", f"{PULP_FIXTURE_1}:manifest_b"]
includes = []
excludes = ["pulp/test-fixture-1"]
pull_and_verify(images, pull_through_distribution, includes, excludes)


def test_filter_include_and_exclude(pull_through_distribution, pull_and_verify):
images = [
f"{PULP_FIXTURE_1}:manifest_a",
f"{PULP_FIXTURE_1}:manifest_b",
f"{PULP_HELLO_WORLD_REPO}:linux",
]
includes = ["*hello*"]
excludes = ["*fixture*"]
pull_and_verify(images, pull_through_distribution, includes, excludes)
36 changes: 36 additions & 0 deletions staging_docs/admin/guides/pull-through-caching.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,39 @@ ensures a more reliable container deployment system in production environments.
generate a new repository version that incorporates both the "10" and "11" tags, automatically
removing the previous version. Repositories and their content remain manageable through standard
Pulp API endpoints. The repositories are read-only and public by default.


### Filtering the repositories

It is possible to use the includes/excludes fields to set a list of upstream repositories that Pulp
will be able to pull from.

```
# define a pull-through remote with the includes/excludes fields
REMOTE_HREF=$(http ${BASE_ADDR}/pulp/api/v3/remotes/container/pull-through/ name=docker-cache url=https://registry-1.docker.io includes=["*pulp*"] excludes=["*molecule_debian*"] | jq -r ".pulp_href")
# create a pull-through distribution linked to the initialized remote
http ${BASE_ADDR}/pulp/api/v3/distributions/container/pull-through/ remote=${REMOTE_HREF} name=docker-cache base_path=docker-cache
```

Pulling an allowed content:

```
podman pull localhost:24817/docker-cache/pulp/test-fixture-1:manifest_a
```

trying to pull from a repo which has *molecule_debian* in its name will fail because it is filtered
by the *excludes* definition:

```
podman pull localhost:24817/docker-cache/pulp/molecule_debian11
Error response from daemon: unknown: No repository found for the defined remote filters
```

and since we are *including* only repositories with \*pulp\* in its name (`includes=["*pulp*"]`),
the following image pull should also fail:

```
podman pull localhost:24817/docker-cache/library/hello-world
Error response from daemon: unknown: No repository found for the defined remote filters
```

0 comments on commit ba2e297

Please sign in to comment.