From d48985e386e4846394672d4912cceb9d65234639 Mon Sep 17 00:00:00 2001 From: misohu Date: Thu, 26 Sep 2024 10:49:42 +0200 Subject: [PATCH] Add tox for running MLflow UATs --- .github/workflows/deploy-eks.yaml | 18 +++---- tests/integration/test_bundle_deployment.py | 58 +++++++++++++-------- 2 files changed, 41 insertions(+), 35 deletions(-) diff --git a/.github/workflows/deploy-eks.yaml b/.github/workflows/deploy-eks.yaml index ee2fba9d..ae5a01a8 100644 --- a/.github/workflows/deploy-eks.yaml +++ b/.github/workflows/deploy-eks.yaml @@ -119,18 +119,7 @@ jobs: juju add-model kubeflow - name: Test bundle deployment - run: | - juju deploy kubeflow --channel=$KUBEFLOW_CHANNEL --trust && - juju deploy ./releases/2.15/stable/mlflow/bundle.yaml --trust && - juju deploy resource-dispatcher --channel=$RESOURCE_DISPATCHER_CHANNEL --trust && - juju integrate mlflow-server:secrets resource-dispatcher:secrets && - juju integrate mlflow-server:pod-defaults resource-dispatcher:pod-defaults && - juju integrate mlflow-minio:object-storage kserve-controller:object-storage && - juju integrate kserve-controller:service-accounts resource-dispatcher:service-accounts && - juju integrate kserve-controller:secrets resource-dispatcher:secrets && - juju integrate mlflow-server:ingress istio-pilot:ingress && - juju integrate mlflow-server:dashboard-links kubeflow-dashboard:links - + run: | tox -vve test_bundle_deployment-${{ matrix.bundle_version }} -- --model kubeflow --keep-models -vv -s - name: Run Kubeflow UATs @@ -139,6 +128,11 @@ jobs: cd ~/charmed-kubeflow-uats git checkout ${{ env.UATS_BRANCH }} tox -e mlflow-remote + + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3 + timeout-minutes: 300 + if: failure() # On failure, capture debugging resources - name: Save debug artifacts diff --git a/tests/integration/test_bundle_deployment.py b/tests/integration/test_bundle_deployment.py index f636e3b1..e8952bc7 100644 --- a/tests/integration/test_bundle_deployment.py +++ b/tests/integration/test_bundle_deployment.py @@ -20,29 +20,43 @@ def lightkube_client() -> lightkube.Client: def bundle_path() -> str: return os.environ.get("BUNDLE_PATH").replace("\"", "") -def run_juju_commands_in_one(bundle_path: str, kubeflow_channel: str, resource_dispatcher_channel: str): - """Helper function to run all juju commands in one subprocess.""" - commands = f""" - juju deploy kubeflow --channel={kubeflow_channel} --trust && - juju deploy {bundle_path} --trust && - juju deploy resource-dispatcher --channel={resource_dispatcher_channel} --trust && - juju integrate mlflow-server:secrets resource-dispatcher:secrets && - juju integrate mlflow-server:pod-defaults resource-dispatcher:pod-defaults && - juju integrate mlflow-minio:object-storage kserve-controller:object-storage && - juju integrate kserve-controller:service-accounts resource-dispatcher:service-accounts && - juju integrate kserve-controller:secrets resource-dispatcher:secrets && - juju integrate mlflow-server:ingress istio-pilot:ingress && - juju integrate mlflow-server:dashboard-links kubeflow-dashboard:links - """ - - # Execute all commands in one subprocess - subprocess.run(commands, shell=True, check=True) +async def deploy_bundle(ops_test: OpsTest, bundle_path, trust: bool) -> None: + """Deploy a bundle from file using juju CLI.""" + run_args = ["juju", "deploy", "-m", ops_test.model_full_name, f"{bundle_path}"] + if trust: + run_args.append("--trust") + retcode, stdout, stderr = await ops_test.run(*run_args) + print(stdout) + assert retcode == 0, f"Deploy failed: {(stderr or stdout).strip()}" class TestCharm: @pytest.mark.abort_on_fail async def test_bundle_deployment_works(self, ops_test: OpsTest, lightkube_client, bundle_path): - # Run all Juju commands in a single subprocess call - # run_juju_commands_in_one(bundle_path, KUBEFLOW_CHANNEL, RESOURCE_DISPATCHER_CHANNEL) + # Deploy Kubeflow with channel and trust + await ops_test.model.deploy( + entity_url="kubeflow", + channel=KUBEFLOW_CHANNEL, + trust=True, + ) + + # Deploy the bundle path + await deploy_bundle(ops_test, bundle_path, trust=True) + + # Deploy resource-dispatcher with its channel and trust + await ops_test.model.deploy( + entity_url="resource-dispatcher", + channel=RESOURCE_DISPATCHER_CHANNEL, + trust=True, + ) + + # Relate services as per Juju integrations + await ops_test.model.relate("mlflow-server:secrets", "resource-dispatcher:secrets") + await ops_test.model.relate("mlflow-server:pod-defaults", "resource-dispatcher:pod-defaults") + await ops_test.model.relate("mlflow-minio:object-storage", "kserve-controller:object-storage") + await ops_test.model.relate("kserve-controller:service-accounts", "resource-dispatcher:service-accounts") + await ops_test.model.relate("kserve-controller:secrets", "resource-dispatcher:secrets") + await ops_test.model.relate("mlflow-server:ingress", "istio-pilot:ingress") + await ops_test.model.relate("mlflow-server:dashboard-links", "kubeflow-dashboard:links") # Wait for the model to become active and idle await ops_test.model.wait_for_idle( @@ -61,9 +75,7 @@ async def test_bundle_deployment_works(self, ops_test: OpsTest, lightkube_client assert "Password" in result_text def get_public_url(lightkube_client: lightkube.Client, bundle_name: str): - """Extracts public url from service istio-ingressgateway-workload for EKS deployment. - As a next step, this could be generalized in order for the above test to run in MicroK8s as well. - """ + """Extracts public URL from service istio-ingressgateway-workload.""" ingressgateway_svc = lightkube_client.get( Service, "istio-ingressgateway-workload", namespace=bundle_name ) @@ -72,7 +84,7 @@ def get_public_url(lightkube_client: lightkube.Client, bundle_name: str): return public_url async def fetch_response(url, headers=None): - """Fetch provided URL and return pair - status and text (int, string).""" + """Fetch provided URL and return (status, text).""" result_status = 0 result_text = "" async with aiohttp.ClientSession() as session: