Skip to content

Commit

Permalink
fix: add training-operator aggregation ClusterRoles (#130)
Browse files Browse the repository at this point in the history
* fix: add training-operator aggregation ClusterRoles

Fixes #108

* tests: adds integration test for verifying jobs can be applied by users

* ci: enable tox environment for testing with profiles

Co-authored-by: Phoevos Kalemkeris <[email protected]>
  • Loading branch information
DnPlas and phoevos authored Sep 8, 2023
1 parent 36de342 commit 93f53e8
Show file tree
Hide file tree
Showing 5 changed files with 248 additions and 4 deletions.
7 changes: 6 additions & 1 deletion .github/workflows/integrate.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ jobs:
integration-test:
name: Integration
runs-on: ubuntu-20.04
strategy:
matrix:
tox-environments:
- charm-integration
- integration-with-profiles
steps:
- name: Check out code
uses: actions/checkout@v3
Expand All @@ -65,7 +70,7 @@ jobs:
juju-channel: 2.9/stable

- name: Run integration tests
run: tox -e integration -- --model testing
run: tox -e ${{ matrix.tox-environments }} -- --model testing

- name: Capture k8s resources on failure
run: |
Expand Down
82 changes: 82 additions & 0 deletions src/templates/auth_manifests.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,85 @@ rules:
- podgroups
verbs:
- "*"
# Source manifests/apps/training-operator/upstream/overlays/kubeflow/kubeflow-training-roles.yaml
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kubeflow-training-admin
labels:
rbac.authorization.kubeflow.org/aggregate-to-kubeflow-admin: "true"
aggregationRule:
clusterRoleSelectors:
- matchLabels:
rbac.authorization.kubeflow.org/aggregate-to-kubeflow-training-admin: "true"
rules: []
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kubeflow-training-edit
labels:
rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true"
rbac.authorization.kubeflow.org/aggregate-to-kubeflow-training-admin: "true"
rules:
- apiGroups:
- kubeflow.org
resources:
- mpijobs
- tfjobs
- pytorchjobs
- mxjobs
- xgboostjobs
- paddlejobs
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- kubeflow.org
resources:
- mpijobs/status
- tfjobs/status
- pytorchjobs/status
- mxjobs/status
- xgboostjobs/status
- paddlejobs/status
verbs:
- get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kubeflow-training-view
labels:
rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true"
rules:
- apiGroups:
- kubeflow.org
resources:
- mpijobs
- tfjobs
- pytorchjobs
- mxjobs
- xgboostjobs
- paddlejobs
verbs:
- get
- list
- watch
- apiGroups:
- kubeflow.org
resources:
- mpijobs/status
- tfjobs/status
- pytorchjobs/status
- mxjobs/status
- xgboostjobs/status
- paddlejobs/status
verbs:
- get
11 changes: 11 additions & 0 deletions tests/integration/profile.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Copyright 2023 Canonical Ltd.
# See LICENSE file for licensing details.

apiVersion: kubeflow.org/v1
kind: Profile
metadata:
name: profile-example
spec:
owner:
kind: User
name: [email protected]
138 changes: 138 additions & 0 deletions tests/integration/test_charm_with_profile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
# Copyright 2023 Canonical Ltd.
# See LICENSE file for licensing details.
import glob
import logging
from pathlib import Path

import lightkube
import pytest
import yaml
from lightkube import codecs
from lightkube.generic_resource import create_global_resource
from pytest_operator.plugin import OpsTest
from tenacity import RetryError, Retrying, stop_after_attempt, stop_after_delay, wait_exponential

basedir = Path("./").absolute()
PROFILE_NAMESPACE = "profile-example"
PROFILE_NAME = "profile-example"
PROFILE_FILE_PATH = basedir / "tests/integration/profile.yaml"
PROFILE_FILE = yaml.safe_load(PROFILE_FILE_PATH.read_text())
METADATA = yaml.safe_load(Path("./metadata.yaml").read_text())
APP_NAME = "training-operator"

log = logging.getLogger(__name__)


@pytest.mark.abort_on_fail
async def test_build_and_deploy(ops_test: OpsTest):
"""Build the charm and deploy."""
charm_under_test = await ops_test.build_charm(".")
image_path = METADATA["resources"]["training-operator-image"]["upstream-source"]
resources = {"training-operator-image": image_path}

await ops_test.model.deploy(
charm_under_test, resources=resources, application_name=APP_NAME, trust=True
)

# Deploy kubeflow-roles and kubeflow-profiles to create a Profile
await ops_test.model.deploy(
entity_url="kubeflow-roles",
channel="latest/edge",
trust=True,
)
await ops_test.model.deploy(
entity_url="kubeflow-profiles",
channel="latest/edge",
trust=True,
)

await ops_test.model.wait_for_idle(
status="active", raise_on_blocked=True, raise_on_error=True, timeout=60 * 10
)


@pytest.mark.parametrize("example", glob.glob("examples/*.yaml"))
@pytest.mark.abort_on_fail
async def test_authorization_for_creating_resources(
example, ops_test: OpsTest, lightkube_client, apply_profile
):
"""Assert a *Job can be created by a user in the user namespace."""
# Set up for creating an object of kind *Job
job_yaml = yaml.safe_load(Path(example).read_text())
training_job = job_yaml["kind"]
_, stdout, _ = await ops_test.run(
"kubectl",
"auth",
"can-i",
"create",
f"{training_job}",
f"--as=system:serviceaccount:{PROFILE_NAMESPACE}:default-editor",
f"--namespace={PROFILE_NAMESPACE}",
check=True,
fail_msg="Failed to execute kubectl auth",
)
assert stdout.strip() == "yes"


def apply_manifests(lightkube_client: lightkube.Client, yaml_file_path: Path):
"""Apply resources using manifest files and return the applied object.
Args:
lightkube_client (lightkube.Client): an instance of lightkube.Client to
use for applying resources.
yaml_file_path (Path): the path to the resource yaml file.
Returns:
A namespaced or global lightkube resource (obj).
"""
read_yaml = yaml_file_path.read_text()
yaml_loaded = codecs.load_all_yaml(read_yaml)
for obj in yaml_loaded:
lightkube_client.apply(
obj=obj,
name=obj.metadata.name,
)
return obj


@pytest.fixture(scope="module")
def lightkube_client() -> lightkube.Client:
"""Return a lightkube Client that can talk to the K8s API."""
client = lightkube.Client(field_manager="kfp-operators")
return client


@pytest.fixture(scope="module")
def apply_profile(lightkube_client):
"""Apply a Profile simulating a user."""
# Create a Profile global resource
profile_resource = create_global_resource(
group="kubeflow.org", version="v1", kind="Profile", plural="profiles"
)

# Apply Profile first
apply_manifests(lightkube_client, PROFILE_FILE_PATH)

# Allow time for the Profile to be created
try:
for attempt in Retrying(
stop=(stop_after_attempt(10) | stop_after_delay(30)),
wait=wait_exponential(multiplier=1, min=5, max=10),
reraise=True,
):
with attempt:
lightkube_client.get(profile_resource, name=PROFILE_NAME)
except RetryError:
log.info(f"Profile {PROFILE_NAME} not found.")

yield

# Remove namespace
read_yaml = PROFILE_FILE_PATH.read_text()
yaml_loaded = codecs.load_all_yaml(read_yaml)
for obj in yaml_loaded:
lightkube_client.delete(
res=type(obj),
name=obj.metadata.name,
namespace=obj.metadata.namespace,
)
14 changes: 11 additions & 3 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ max-line-length = 100
[tox]
skipsdist = True
skip_missing_interpreters = True
envlist = fmt, lint, unit, integration
envlist = fmt, lint, unit, charm-integration, integration-with-profiles

[vars]
all_path = {[vars]src_path} {[vars]tst_path}
Expand Down Expand Up @@ -73,8 +73,16 @@ deps =
-r requirements-unit.txt
description = Run unit tests

[testenv:integration]
commands = pytest -v --tb native --asyncio-mode=auto {[vars]tst_path}integration --log-cli-level=INFO -s {posargs}
[testenv:charm-integration]
commands = pytest -v --tb native --asyncio-mode=auto \
{[vars]tst_path}integration/test_charm.py --log-cli-level=INFO -s {posargs}
deps =
-r requirements-integration.txt
description = Run integration tests

[testenv:integration-with-profiles]
commands = pytest -v --tb native --asyncio-mode=auto \
{[vars]tst_path}integration/test_charm_with_profile.py --log-cli-level=INFO -s {posargs}
deps =
-r requirements-integration.txt
description = Run integration tests with profiles

0 comments on commit 93f53e8

Please sign in to comment.