Skip to content

Commit

Permalink
Add includes/excludes fields to filter remotes
Browse files Browse the repository at this point in the history
closes: #459
  • Loading branch information
git-hyagi committed Jun 26, 2024
1 parent ce05037 commit 070a0bb
Show file tree
Hide file tree
Showing 11 changed files with 227 additions and 44 deletions.
2 changes: 2 additions & 0 deletions CHANGES/459.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Added support for filtering remote repositories in pull-through caching using `includes` and
`excludes` fields. These fields can be set on pull-through caching remote objects.
23 changes: 23 additions & 0 deletions pulp_container/app/migrations/0040_add_remote_repo_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 4.2.13 on 2024-06-26 11:51

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('container', '0039_manifest_data'),
]

operations = [
migrations.AddField(
model_name='containerpullthroughremote',
name='excludes',
field=models.JSONField(null=True),
),
migrations.AddField(
model_name='containerpullthroughremote',
name='includes',
field=models.JSONField(null=True),
),
]
3 changes: 3 additions & 0 deletions pulp_container/app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,9 @@ class ContainerPullThroughRemote(Remote, AutoAddObjPermsMixin):
from within a single instance of this remote.
"""

includes = models.JSONField(null=True)
excludes = models.JSONField(null=True)

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"
permissions = [
Expand Down
9 changes: 9 additions & 0 deletions pulp_container/app/registry_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
from pulp_container.app.utils import (
determine_media_type,
extract_data_from_signature,
filter_resource,
has_task_completed,
validate_manifest,
)
Expand All @@ -104,6 +105,8 @@
"pulp_id",
"url",
"name",
"includes",
"excludes",
]


Expand Down Expand Up @@ -309,6 +312,12 @@ def get_pull_through_drv(self, path):
if not pull_through_cache_distribution:
raise RepositoryNotFound(name=path)

pull_through_remote = models.ContainerPullThroughRemote.objects.get(
pk=pull_through_cache_distribution.remote
)
if not filter_resource(path, pull_through_remote.includes, pull_through_remote.excludes):
raise RepositoryNotFound(name=path)

try:
with transaction.atomic():
repository, _ = models.ContainerRepository.objects.get_or_create(
Expand Down
4 changes: 3 additions & 1 deletion pulp_container/app/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,9 +307,11 @@ class ContainerPullThroughRemoteSerializer(RemoteSerializer):
"""

policy = serializers.ChoiceField(choices=[Remote.ON_DEMAND], default=Remote.ON_DEMAND)
includes = serializers.JSONField(required=False, allow_null=True)
excludes = serializers.JSONField(required=False, allow_null=True)

class Meta:
fields = RemoteSerializer.Meta.fields
fields = RemoteSerializer.Meta.fields + ("includes", "excludes")
model = models.ContainerPullThroughRemote


Expand Down
28 changes: 4 additions & 24 deletions pulp_container/app/tasks/sync_stages.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import aiohttp
import asyncio
import base64
import fnmatch
import hashlib
import json
import logging
Expand Down Expand Up @@ -34,6 +33,7 @@
determine_media_type,
validate_manifest,
calculate_digest,
filter_resources,
get_content_data,
)

Expand Down Expand Up @@ -118,7 +118,9 @@ async def run(self):
repo_name = self.remote.namespaced_upstream_name
tag_list_url = "/v2/{name}/tags/list".format(name=repo_name)
tag_list = await self.get_paginated_tag_list(tag_list_url, repo_name)
tag_list = self.filter_tags(tag_list)
tag_list = filter_resources(
tag_list, self.remote.include_tags, self.remote.exclude_tags
)
await pb.aincrement()

for tag_name in tag_list:
Expand Down Expand Up @@ -303,28 +305,6 @@ async def resolve_flush(self):
await self.put(signature_dc)
self.signature_dcs.clear()

def filter_tags(self, tag_list):
"""
Filter tags by a list of included and excluded tags.
"""
include_tags = self.remote.include_tags
if include_tags:
tag_list = [
tag
for tag in tag_list
if any(fnmatch.fnmatch(tag, pattern) for pattern in include_tags)
]

exclude_tags = self.remote.exclude_tags
if exclude_tags:
tag_list = [
tag
for tag in tag_list
if not any(fnmatch.fnmatch(tag, pattern) for pattern in exclude_tags)
]

return tag_list

async def get_paginated_tag_list(self, rel_link, repo_name):
"""
Handle registries that have pagination enabled.
Expand Down
45 changes: 43 additions & 2 deletions pulp_container/app/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import base64
import hashlib
import fnmatch
import re
import subprocess
import gnupg
Expand All @@ -11,12 +12,16 @@
from jsonschema import Draft7Validator, validate, ValidationError
from django.core.files.storage import default_storage as storage
from django.db import IntegrityError
from functools import partial
from rest_framework.exceptions import Throttled

from pulpcore.plugin.models import Artifact, Task

from pulp_container.constants import MANIFEST_MEDIA_TYPES, MEDIA_TYPE
from pulp_container.app.exceptions import ManifestInvalid
from pulp_container.constants import (
MANIFEST_MEDIA_TYPES,
MEDIA_TYPE,
)
from pulp_container.app.exceptions import ManifestInvalid, RepositoryNotFound
from pulp_container.app.json_schemas import (
OCI_INDEX_SCHEMA,
OCI_MANIFEST_SCHEMA,
Expand Down Expand Up @@ -309,3 +314,39 @@ def get_content_data(saved_artifact):
raw_data = file.read()
content_data = json.loads(raw_data)
return content_data, raw_data


def include(x, patterns):
"""
Checks if any item from `patterns` matches x, meaning it should be included as a remote repo.
"""
return any(fnmatch.fnmatch(x, pattern) for pattern in patterns)


def exclude(x, patterns):
"""
Checks if any item from `patterns` matches x, meaning it should not be considered a remote repo.
"""
return not include(x, patterns)


def filter_resource(element, include_patterns, exclude_patterns):
"""
Verify if the repository should have access to the remote upstream based on the include_patterns
and exclude_patterns filters.
"""
if not (include_patterns or []) and not (exclude_patterns or []):
return True
return include(element, include_patterns or []) and exclude(element, exclude_patterns or [])


def filter_resources(element_list, include_patterns, exclude_patterns):
"""
Returns a list of elements (tags or repositories) allowed to be pulled/synced based on
include_patterns and exclude_patterns filters.
"""
if include_patterns:
element_list = filter(partial(include, patterns=include_patterns), element_list)
if exclude_patterns:
element_list = filter(partial(exclude, patterns=exclude_patterns), element_list)
return list(element_list)
19 changes: 2 additions & 17 deletions pulp_container/tests/functional/api/test_pull_through_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,6 @@
)


@pytest.fixture
def pull_through_distribution(
gen_object_with_cleanup,
container_pull_through_remote_api,
container_pull_through_distribution_api,
):
remote = gen_object_with_cleanup(
container_pull_through_remote_api,
{"name": str(uuid4()), "url": REGISTRY_V2_FEED_URL},
)
distribution = gen_object_with_cleanup(
container_pull_through_distribution_api,
{"name": str(uuid4()), "base_path": str(uuid4()), "remote": remote.pulp_href},
)
return distribution


@pytest.fixture
def pull_and_verify(
add_to_cleanup,
Expand All @@ -42,6 +25,7 @@ def pull_and_verify(
):
def _pull_and_verify(images, pull_through_distribution):
tags_to_verify = []
pull_through_distribution = pull_through_distribution()
for version, image_path in enumerate(images, start=1):
remote_image_path = f"{REGISTRY_V2}/{image_path}"
local_image_path = f"{pull_through_distribution.base_path}/{image_path}"
Expand Down Expand Up @@ -113,6 +97,7 @@ def test_conflicting_names_and_paths(
local_registry,
monitor_task,
):
pull_through_distribution = pull_through_distribution()
local_image_path = f"{pull_through_distribution.base_path}/{str(uuid4())}"

remote = container_remote_factory(name=local_image_path)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import subprocess
import pytest
import re

from uuid import uuid4

from pulp_container.tests.functional.constants import (
REGISTRY_V2,
REGISTRY_V2_FEED_URL,
PULP_HELLO_WORLD_REPO,
PULP_FIXTURE_1,
)


@pytest.fixture
def pull_and_verify(
capfd,
delete_orphans_pre,
local_registry,
registry_client,
):
def _pull_and_verify(images, pull_through_distribution, includes, excludes):
distr = pull_through_distribution(includes, excludes)
for _, image_path in enumerate(images, start=1):
remote_image_path = f"{REGISTRY_V2}/{image_path}"
local_image_path = f"{distr.base_path}/{image_path}"

if excludes and re.match(".*fixture.*", image_path):
with pytest.raises(subprocess.CalledProcessError):
local_registry.pull(local_image_path)
assert (
re.search(
".*Repository not found.*",
capfd.readouterr().err,
)
is not None
)
continue
local_registry.pull(local_image_path)
local_image = local_registry.inspect(local_image_path)
registry_client.pull(remote_image_path)
remote_image = registry_client.inspect(remote_image_path)
assert local_image[0]["Id"] == remote_image[0]["Id"]

return _pull_and_verify


def test_no_filter(pull_through_distribution, pull_and_verify):
images = [f"{PULP_FIXTURE_1}:manifest_a", f"{PULP_FIXTURE_1}:manifest_b"]
includes = None
excludes = []
pull_and_verify(images, pull_through_distribution, includes, excludes)


def test_filter_exclude_with_regex(pull_through_distribution, pull_and_verify):
images = [f"{PULP_FIXTURE_1}:manifest_a", f"{PULP_FIXTURE_1}:manifest_b"]
includes = []
excludes = ["pulp*"]
pull_and_verify(images, pull_through_distribution, includes, excludes)


def test_filter_exclude(pull_through_distribution, pull_and_verify):
images = [f"{PULP_FIXTURE_1}:manifest_a", f"{PULP_FIXTURE_1}:manifest_b"]
includes = []
excludes = ["pulp/test-fixture-1"]
pull_and_verify(images, pull_through_distribution, includes, excludes)


def test_filter_include_and_exclude(pull_through_distribution, pull_and_verify):
images = [
f"{PULP_FIXTURE_1}:manifest_a",
f"{PULP_FIXTURE_1}:manifest_b",
f"{PULP_HELLO_WORLD_REPO}:linux",
]
includes = ["*hello*"]
excludes = ["*fixture*"]
pull_and_verify(images, pull_through_distribution, includes, excludes)
25 changes: 25 additions & 0 deletions pulp_container/tests/functional/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,3 +428,28 @@ def _sync(repo, remote=None):
return monitor_task(sync_response.task)

return _sync


@pytest.fixture
def pull_through_distribution(
gen_object_with_cleanup,
container_pull_through_remote_api,
container_pull_through_distribution_api,
):
def _pull_through_distribution(includes=None, excludes=None):
remote = gen_object_with_cleanup(
container_pull_through_remote_api,
{
"name": str(uuid4()),
"url": REGISTRY_V2_FEED_URL,
"includes": includes,
"excludes": excludes,
},
)
distribution = gen_object_with_cleanup(
container_pull_through_distribution_api,
{"name": str(uuid4()), "base_path": str(uuid4()), "remote": remote.pulp_href},
)
return distribution

return _pull_through_distribution
36 changes: 36 additions & 0 deletions staging_docs/admin/guides/pull-through-caching.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,39 @@ ensures a more reliable container deployment system in production environments.
generate a new repository version that incorporates both the "10" and "11" tags, automatically
removing the previous version. Repositories and their content remain manageable through standard
Pulp API endpoints. The repositories are read-only and public by default.


### Filtering the repositories

It is possible to use the includes/excludes fields to set a list of upstream repositories that Pulp
will be able to pull from.

```
# define a pull-through remote with the includes/excludes fields
REMOTE_HREF=$(http ${BASE_ADDR}/pulp/api/v3/remotes/container/pull-through/ name=docker-cache url=https://registry-1.docker.io includes=["*pulp*"] excludes=["*molecule_debian*"] | jq -r ".pulp_href")
# create a pull-through distribution linked to the initialized remote
http ${BASE_ADDR}/pulp/api/v3/distributions/container/pull-through/ remote=${REMOTE_HREF} name=docker-cache base_path=docker-cache
```

Pulling an allowed content:

```
podman pull localhost:24817/docker-cache/pulp/test-fixture-1:manifest_a
```

trying to pull from a repo which has *molecule_debian* in its name will fail because it is filtered
by the *excludes* definition:

```
podman pull localhost:24817/docker-cache/pulp/molecule_debian11
Error response from daemon: unknown: No repository found for the defined remote filters
```

and since we are *including* only repositories with \*pulp\* in its name (`includes=["*pulp*"]`),
the following image pull should also fail:

```
podman pull localhost:24817/docker-cache/library/hello-world
Error response from daemon: unknown: No repository found for the defined remote filters
```

0 comments on commit 070a0bb

Please sign in to comment.