Skip to content

Commit

Permalink
Add includes/excludes fields to filter remotes
Browse files Browse the repository at this point in the history
This commit adds a feature to refine (include only and/or remove unwanted) remote
repositories pulled during pull-through caching.

closes: pulp#459
  • Loading branch information
git-hyagi authored and lubosmj committed Jul 4, 2024
1 parent a832b9f commit 5908f47
Show file tree
Hide file tree
Showing 11 changed files with 255 additions and 59 deletions.
2 changes: 2 additions & 0 deletions CHANGES/459.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Added support for filtering remote repositories in pull-through caching using `includes` and
`excludes` fields. These fields can be set on pull-through caching remote objects.
24 changes: 24 additions & 0 deletions pulp_container/app/migrations/0040_add_remote_repo_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Generated by Django 4.2.13 on 2024-06-28 10:34

import django.contrib.postgres.fields
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('container', '0039_manifest_data'),
]

operations = [
migrations.AddField(
model_name='containerpullthroughremote',
name='excludes',
field=django.contrib.postgres.fields.ArrayField(base_field=models.TextField(null=True), null=True, size=None),
),
migrations.AddField(
model_name='containerpullthroughremote',
name='includes',
field=django.contrib.postgres.fields.ArrayField(base_field=models.TextField(null=True), null=True, size=None),
),
]
5 changes: 5 additions & 0 deletions pulp_container/app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,11 @@ class ContainerPullThroughRemote(Remote, AutoAddObjPermsMixin):
from within a single instance of this remote.
"""

TYPE = "pull-through"

includes = fields.ArrayField(models.TextField(null=True), null=True)
excludes = fields.ArrayField(models.TextField(null=True), null=True)

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"
permissions = [
Expand Down
36 changes: 21 additions & 15 deletions pulp_container/app/registry_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from django.core.files.base import ContentFile, File
from django.db import IntegrityError, transaction
from django.db.models import F, Value
from django.forms.models import model_to_dict
from django.shortcuts import get_object_or_404

from django.conf import settings
Expand Down Expand Up @@ -82,6 +83,7 @@
from pulp_container.app.utils import (
determine_media_type,
extract_data_from_signature,
filter_resource,
has_task_completed,
validate_manifest,
)
Expand All @@ -97,13 +99,15 @@
log = logging.getLogger(__name__)

IGNORED_PULL_THROUGH_REMOTE_ATTRIBUTES = [
"remote_ptr_id",
"remote_ptr",
"pulp_type",
"pulp_last_updated",
"pulp_created",
"pulp_id",
"url",
"name",
"includes",
"excludes",
"pulp_domain",
]


Expand Down Expand Up @@ -309,18 +313,29 @@ def get_pull_through_drv(self, path):
if not pull_through_cache_distribution:
raise RepositoryNotFound(name=path)

upstream_name = path.split(pull_through_cache_distribution.base_path, maxsplit=1)[1].strip(
"/"
)
pull_through_remote = models.ContainerPullThroughRemote.objects.get(
pk=pull_through_cache_distribution.remote_id
)
if not filter_resource(
upstream_name, pull_through_remote.includes, pull_through_remote.excludes
):
raise RepositoryNotFound(name=path)

try:
with transaction.atomic():
repository, _ = models.ContainerRepository.objects.get_or_create(
name=path, retain_repo_versions=1
)

remote_data = _get_pull_through_remote_data(pull_through_cache_distribution)
upstream_name = path.split(pull_through_cache_distribution.base_path, maxsplit=1)[1]
remote_data = model_to_dict(
pull_through_remote, exclude=IGNORED_PULL_THROUGH_REMOTE_ATTRIBUTES
)
remote, _ = models.ContainerRemote.objects.get_or_create(
name=path,
upstream_name=upstream_name.strip("/"),
url=pull_through_cache_distribution.remote.url,
upstream_name=upstream_name,
**remote_data,
)

Expand Down Expand Up @@ -389,15 +404,6 @@ def create_dr(self, path, request):
return distribution, repository


def _get_pull_through_remote_data(root_cache_distribution):
remote_data = models.ContainerPullThroughRemote.objects.filter(
pk=root_cache_distribution.remote_id
).values()[0]
for attr in IGNORED_PULL_THROUGH_REMOTE_ATTRIBUTES:
remote_data.pop(attr, None)
return remote_data


class BearerTokenView(APIView):
"""
Hand out anonymous or authenticated bearer tokens.
Expand Down
26 changes: 25 additions & 1 deletion pulp_container/app/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,9 +307,33 @@ class ContainerPullThroughRemoteSerializer(RemoteSerializer):
"""

policy = serializers.ChoiceField(choices=[Remote.ON_DEMAND], default=Remote.ON_DEMAND)
includes = serializers.ListField(
child=serializers.CharField(max_length=255),
allow_null=True,
required=False,
help_text=_(
"""
A list of remotes to include during pull-through caching.
Wildcards *, ? are recognized.
'includes' is evaluated before 'excludes'.
"""
),
)
excludes = serializers.ListField(
child=serializers.CharField(max_length=255),
allow_null=True,
required=False,
help_text=_(
"""
A list of remotes to exclude during pull-through caching.
Wildcards *, ? are recognized.
'excludes' is evaluated after 'includes'.
"""
),
)

class Meta:
fields = RemoteSerializer.Meta.fields
fields = RemoteSerializer.Meta.fields + ("includes", "excludes")
model = models.ContainerPullThroughRemote


Expand Down
28 changes: 4 additions & 24 deletions pulp_container/app/tasks/sync_stages.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import aiohttp
import asyncio
import base64
import fnmatch
import hashlib
import json
import logging
Expand Down Expand Up @@ -34,6 +33,7 @@
determine_media_type,
validate_manifest,
calculate_digest,
filter_resources,
get_content_data,
)

Expand Down Expand Up @@ -118,7 +118,9 @@ async def run(self):
repo_name = self.remote.namespaced_upstream_name
tag_list_url = "/v2/{name}/tags/list".format(name=repo_name)
tag_list = await self.get_paginated_tag_list(tag_list_url, repo_name)
tag_list = self.filter_tags(tag_list)
tag_list = filter_resources(
tag_list, self.remote.include_tags, self.remote.exclude_tags
)
await pb.aincrement()

for tag_name in tag_list:
Expand Down Expand Up @@ -303,28 +305,6 @@ async def resolve_flush(self):
await self.put(signature_dc)
self.signature_dcs.clear()

def filter_tags(self, tag_list):
"""
Filter tags by a list of included and excluded tags.
"""
include_tags = self.remote.include_tags
if include_tags:
tag_list = [
tag
for tag in tag_list
if any(fnmatch.fnmatch(tag, pattern) for pattern in include_tags)
]

exclude_tags = self.remote.exclude_tags
if exclude_tags:
tag_list = [
tag
for tag in tag_list
if not any(fnmatch.fnmatch(tag, pattern) for pattern in exclude_tags)
]

return tag_list

async def get_paginated_tag_list(self, rel_link, repo_name):
"""
Handle registries that have pagination enabled.
Expand Down
35 changes: 34 additions & 1 deletion pulp_container/app/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import base64
import hashlib
import fnmatch
import re
import subprocess
import gnupg
Expand All @@ -11,11 +12,15 @@
from jsonschema import Draft7Validator, validate, ValidationError
from django.core.files.storage import default_storage as storage
from django.db import IntegrityError
from functools import partial
from rest_framework.exceptions import Throttled

from pulpcore.plugin.models import Artifact, Task

from pulp_container.constants import MANIFEST_MEDIA_TYPES, MEDIA_TYPE
from pulp_container.constants import (
MANIFEST_MEDIA_TYPES,
MEDIA_TYPE,
)
from pulp_container.app.exceptions import ManifestInvalid
from pulp_container.app.json_schemas import (
OCI_INDEX_SCHEMA,
Expand Down Expand Up @@ -309,3 +314,31 @@ def get_content_data(saved_artifact):
raw_data = file.read()
content_data = json.loads(raw_data)
return content_data, raw_data


def include(x, patterns):
return any(fnmatch.fnmatch(x, pattern) for pattern in patterns)


def exclude(x, patterns):
return not include(x, patterns)


def filter_resource(element, include_patterns, exclude_patterns):
"""
Returns true if element matches {include,exclude}_patterns filters.
"""
if not (include_patterns or exclude_patterns):
return True
return include(element, include_patterns or []) and exclude(element, exclude_patterns or [])


def filter_resources(element_list, include_patterns, exclude_patterns):
"""
Returns a list of elements based on filter parameters ({include,exclude}_patterns).
"""
if include_patterns:
element_list = filter(partial(include, patterns=include_patterns), element_list)
if exclude_patterns:
element_list = filter(partial(exclude, patterns=exclude_patterns), element_list)
return list(element_list)
20 changes: 2 additions & 18 deletions pulp_container/tests/functional/api/test_pull_through_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,11 @@

from pulp_container.tests.functional.constants import (
REGISTRY_V2,
REGISTRY_V2_FEED_URL,
PULP_HELLO_WORLD_REPO,
PULP_FIXTURE_1,
)


@pytest.fixture
def pull_through_distribution(
gen_object_with_cleanup,
container_pull_through_remote_api,
container_pull_through_distribution_api,
):
remote = gen_object_with_cleanup(
container_pull_through_remote_api,
{"name": str(uuid4()), "url": REGISTRY_V2_FEED_URL},
)
distribution = gen_object_with_cleanup(
container_pull_through_distribution_api,
{"name": str(uuid4()), "base_path": str(uuid4()), "remote": remote.pulp_href},
)
return distribution


@pytest.fixture
def pull_and_verify(
add_to_cleanup,
Expand All @@ -42,6 +24,7 @@ def pull_and_verify(
):
def _pull_and_verify(images, pull_through_distribution):
tags_to_verify = []
pull_through_distribution = pull_through_distribution()
for version, image_path in enumerate(images, start=1):
remote_image_path = f"{REGISTRY_V2}/{image_path}"
local_image_path = f"{pull_through_distribution.base_path}/{image_path}"
Expand Down Expand Up @@ -113,6 +96,7 @@ def test_conflicting_names_and_paths(
local_registry,
monitor_task,
):
pull_through_distribution = pull_through_distribution()
local_image_path = f"{pull_through_distribution.base_path}/{str(uuid4())}"

remote = container_remote_factory(name=local_image_path)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import pytest
import subprocess

from pulp_container.tests.functional.constants import (
REGISTRY_V2,
PULP_HELLO_WORLD_REPO,
PULP_FIXTURE_1,
)


@pytest.fixture
def pull_and_verify(
capfd,
local_registry,
registry_client,
):
def _pull_and_verify(images, pull_through_distribution, includes, excludes, expected):
distr = pull_through_distribution(includes, excludes)
for image_path in images:
remote_image_path = f"{REGISTRY_V2}/{image_path}"
local_image_path = f"{distr.base_path}/{image_path}"

if image_path not in expected:
with pytest.raises(subprocess.CalledProcessError):
local_registry.pull(local_image_path)
assert "Repository not found" in capfd.readouterr().err
else:
local_registry.pull(local_image_path)
local_image = local_registry.inspect(local_image_path)
registry_client.pull(remote_image_path)
remote_image = registry_client.inspect(remote_image_path)
assert local_image[0]["Id"] == remote_image[0]["Id"]

return _pull_and_verify


@pytest.mark.parametrize(
"images, includes, excludes, expected",
[
(
[f"{PULP_FIXTURE_1}:manifest_a", f"{PULP_FIXTURE_1}:manifest_b"],
None,
[],
[f"{PULP_FIXTURE_1}:manifest_a", f"{PULP_FIXTURE_1}:manifest_b"],
),
([f"{PULP_FIXTURE_1}:manifest_a", f"{PULP_FIXTURE_1}:manifest_b"], [], ["pulp*"], []),
(
[f"{PULP_FIXTURE_1}:manifest_a", f"{PULP_FIXTURE_1}:manifest_b"],
[],
["pulp/test-fixture-1"],
[],
),
(
[
f"{PULP_FIXTURE_1}:manifest_a",
f"{PULP_FIXTURE_1}:manifest_b",
f"{PULP_HELLO_WORLD_REPO}:linux",
],
["*hello*"],
["*fixture*"],
[f"{PULP_HELLO_WORLD_REPO}:linux"],
),
(
["custom_namespace/custom_repo:latest"],
["*pulp*"],
None,
[],
),
],
)
def test_includes_excludes_filter(
images,
includes,
excludes,
expected,
pull_through_distribution,
pull_and_verify,
delete_orphans_pre,
):
pull_and_verify(images, pull_through_distribution, includes, excludes, expected)
Loading

0 comments on commit 5908f47

Please sign in to comment.