From c3f66b3a655338cdaad2a7e1fad35ea016697be5 Mon Sep 17 00:00:00 2001 From: shiva kumar Date: Thu, 7 Nov 2024 11:44:50 +0530 Subject: [PATCH] ubuntu24.04 ci pipeline fix ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar ubuntu24.04 ci pipeline fix ubuntu24.04 ci pipeline fix ubuntu24.04 ci pipeline fix Signed-off-by: shiva kumar --- .common-ci.yml | 17 ++++- .github/workflows/ci.yaml | 2 +- .github/workflows/image.yaml | 12 ++-- .github/workflows/precompiled.yaml | 33 +++++++-- .gitlab-ci.yml | 9 +++ .nvidia-ci.yml | 67 +++++++++++++++++++ Makefile | 2 +- base/generate-ci-config | 3 + tests/cases/nvidia-driver.sh | 3 + ...olodeck.yaml => holodeck_ubuntu22.04.yaml} | 0 tests/holodeck_ubuntu24.04.yaml | 33 +++++++++ tests/scripts/.definitions.sh | 4 +- tests/scripts/checks.sh | 15 +++++ tests/scripts/ci-precompiled-helpers.sh | 2 +- tests/scripts/install-operator.sh | 7 ++ tests/scripts/kernel-upgrade-helper.sh | 33 +++++++++ ubuntu24.04/install.sh | 18 +++-- ubuntu24.04/nvidia-driver | 5 +- ubuntu24.04/precompiled/Dockerfile | 56 +++++++++++----- ubuntu24.04/precompiled/nvidia-driver | 6 ++ 20 files changed, 284 insertions(+), 43 deletions(-) rename tests/{holodeck.yaml => holodeck_ubuntu22.04.yaml} (100%) create mode 100644 tests/holodeck_ubuntu24.04.yaml diff --git a/.common-ci.yml b/.common-ci.yml index fbaed745..2899b256 100644 --- a/.common-ci.yml +++ b/.common-ci.yml @@ -73,19 +73,19 @@ trigger-pipeline: .driver-versions: parallel: matrix: - - DRIVER_VERSION: [535.216.01, 550.127.05] + - DRIVER_VERSION: [535.216.01, 550.127.05, 565.57.01] # Define the driver versions for jobs that can be run in parallel for ubuntu24.04 .driver-versions-ubuntu24.04: parallel: matrix: - - DRIVER_VERSION: [535.216.01, 550.127.05] + - DRIVER_VERSION: [535.216.01, 550.127.05, 565.57.01] # Define the matrix of precompiled jobs that can be run in parallel for ubuntu22.04 .driver-versions-precompiled-ubuntu22.04: parallel: matrix: - - DRIVER_BRANCH: [535, 550] + - DRIVER_BRANCH: [535, 550, 565] KERNEL_FLAVOR: [aws, azure, generic, nvidia, oracle] # Define the distribution targets @@ -312,3 +312,14 @@ release:staging-precompiled-ubuntu22.04: - .release:staging-precompiled needs: - image-precompiled-ubuntu22.04 + +# Precompiled Ubuntu24.04 release +release:staging-precompiled-ubuntu24.04: + variables: + DIST: signed_ubuntu24.04 + BASE_TARGET: jammy + extends: + - .driver-versions-precompiled-ubuntu24.04 + - .release:staging-precompiled + needs: + - image-precompiled-ubuntu24.04 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a46e34d3..fa5b321c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -40,7 +40,7 @@ jobs: aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws_ssh_key: ${{ secrets.AWS_SSH_KEY }} - holodeck_config: "tests/holodeck.yaml" + holodeck_config: "tests/holodeck_ubuntu22.04.yaml" - name: Get public dns name id: get_public_dns_name diff --git a/.github/workflows/image.yaml b/.github/workflows/image.yaml index fd3d293b..e96b3c1b 100644 --- a/.github/workflows/image.yaml +++ b/.github/workflows/image.yaml @@ -21,12 +21,10 @@ on: - opened - synchronize branches: - - main - - release-* + - main-no push: branches: - - main - - release-* + - main-no jobs: image: @@ -91,6 +89,7 @@ jobs: IMAGE_NAME: ghcr.io/${LOWERCASE_REPO_OWNER}/driver VERSION: ${COMMIT_SHORT_SHA} run: | + echo "DRIVER_VERSIONS=${{ matrix.driver }} make build-${{ matrix.dist }}-${{ matrix.driver }}" DRIVER_VERSIONS=${{ matrix.driver }} make build-${{ matrix.dist }}-${{ matrix.driver }} pre-compiled: @@ -156,6 +155,7 @@ jobs: VERSION: ${COMMIT_SHORT_SHA} BASE_TARGET: jammy run: | + echo "make DRIVER_BRANCH=${{ matrix.driver }} KERNEL_FLAVOR=${{ matrix.flavor }} build-base-${BASE_TARGET}" make DRIVER_BRANCH=${{ matrix.driver }} KERNEL_FLAVOR=${{ matrix.flavor }} build-base-${BASE_TARGET} trap "docker rm -f base-${BASE_TARGET}-${{ matrix.flavor }}" EXIT @@ -170,7 +170,9 @@ jobs: IMAGE_NAME: ghcr.io/${LOWERCASE_REPO_OWNER}/driver VERSION: ${COMMIT_SHORT_SHA} PRECOMPILED: "true" + # SHIVA -- add here ubuntu24.04 DIST: signed_ubuntu22.04 run: | - source kernel_version.txt && \ + source kernel_version.txt + echo "make DRIVER_VERSIONS=${DRIVER_VERSIONS} DRIVER_BRANCH=${{ matrix.driver }} build-${DIST}-${DRIVER_VERSION}" make DRIVER_VERSIONS=${DRIVER_VERSIONS} DRIVER_BRANCH=${{ matrix.driver }} build-${DIST}-${DRIVER_VERSION} diff --git a/.github/workflows/precompiled.yaml b/.github/workflows/precompiled.yaml index 769ce505..aa1a0708 100644 --- a/.github/workflows/precompiled.yaml +++ b/.github/workflows/precompiled.yaml @@ -15,9 +15,20 @@ # Run this workflow on a schedule name: Precompiled images +# on: +# schedule: +# - cron: '00 09 * * *' # scheduled job + on: - schedule: - - cron: '00 09 * * *' + pull_request: + types: + - opened + - synchronize + branches: + - ci-precompile-ubuntu24.04 + push: + branches: + - ci-precompile-ubuntu24.04 jobs: set-driver-version-matrix: @@ -33,17 +44,25 @@ jobs: id: extract_driver_branch run: | # get driver_branch - DRIVER_BRANCH=("535" "550") + # DRIVER_BRANCH=("535" "550" "565") + # DRIVER_BRANCH=("535" "550") + DRIVER_BRANCH=("550") + # DRIVER_BRANCH=("565") driver_branch_json=$(printf '%s\n' "${DRIVER_BRANCH[@]}" | jq -R . | jq -cs .) echo "driver_branch=$driver_branch_json" >> $GITHUB_OUTPUT # get kernel flavors - KERNEL_FLAVORS=("aws" "azure" "generic" "nvidia" "oracle") + # KERNEL_FLAVORS=("aws" "azure" "generic" "nvidia" "oracle") + # KERNEL_FLAVORS=("azure" "generic" "nvidia" "oracle") + # KERNEL_FLAVORS=("aws") + KERNEL_FLAVORS=("generic") kernel_flavors_json=$(printf '%s\n' "${KERNEL_FLAVORS[@]}" | jq -R . | jq -cs .) echo "kernel_flavors=$kernel_flavors_json" >> $GITHUB_OUTPUT # get ubuntu distributions - DIST=("ubuntu22.04") + # DIST=("ubuntu22.04" "ubuntu24.04") + # DIST=("ubuntu22.04") + DIST=("ubuntu24.04") dist_json=$(printf '%s\n' "${DIST[@]}" | jq -R . | jq -cs .) echo "dist=$dist_json" >> $GITHUB_OUTPUT @@ -201,7 +220,7 @@ jobs: aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws_ssh_key: ${{ secrets.AWS_SSH_KEY }} - holodeck_config: "tests/holodeck.yaml" + holodeck_config: "tests/holodeck_${{ matrix.dist }}.yaml" - name: Get public dns name id: get_public_dns_name @@ -296,7 +315,7 @@ jobs: matrix: driver_branch: ${{ fromJson(needs.set-driver-version-matrix.outputs.driver_branch) }} kernel_version: ${{ fromJson(needs.determine-e2e-test-matrix.outputs.matrix_values) }} - dist: ${{ fromJson(needs.set-driver-version-matrix.outputs.dist) }} + dist: ${{ fromJson(needs.set-driver-version-matrix.outputs.dist) }} steps: - name: Check out code uses: actions/checkout@v4 diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d05de9e0..6845842b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -120,3 +120,12 @@ image-precompiled-ubuntu22.04: extends: - .driver-versions-precompiled-ubuntu22.04 - .image-build-precompiled + +image-precompiled-ubuntu24.04: + variables: + DIST: signed_ubuntu24.04 + BASE_TARGET: jammy + CVE_UPDATES: "curl libc6" + extends: + - .driver-versions-precompiled-ubuntu24.04 + - .image-build-precompiled diff --git a/.nvidia-ci.yml b/.nvidia-ci.yml index 04470fd7..8a86b90f 100644 --- a/.nvidia-ci.yml +++ b/.nvidia-ci.yml @@ -77,6 +77,30 @@ variables: - !reference [.image-pull-rules, rules] +.image-pull-ubuntu24.04: + # Perform for each DRIVER_VERSION + extends: + - .driver-versions + - .image-pull-generic + rules: + - if: $CI_PIPELINE_SOURCE == "schedule" + when: never + - !reference [.image-pull-rules, rules] + +image-precompiled-ubuntu24.04: + variables: + DIST: signed_ubuntu24.04 + BASE_TARGET: jammy + PRECOMPILED: "true" + CVE_UPDATES: "curl libc6" + rules: + - when: delayed + start_in: 30 minutes + extends: + - .driver-versions-precompiled-ubuntu24.04 + - .image-pull-generic + + .image-pull-ubuntu22.04: # Perform for each DRIVER_VERSION extends: @@ -196,6 +220,18 @@ image-rhel8: - if: $CI_PIPELINE_SOURCE == "merge_request_event" - !reference [.pipeline-trigger-rules, rules] +.scan-precompiled-ubuntu24.04: + variables: + DIST: signed_ubuntu24.04 + BASE_TARGET: jammy + PRECOMPILED: "true" + extends: + - .driver-versions-precompiled-ubuntu24.04 + - .scan-generic + rules: + - !reference [.scan-rules-common, rules] + - when: always + .scan-precompiled-ubuntu22.04: variables: DIST: signed_ubuntu22.04 @@ -312,6 +348,19 @@ release:ngc-ubuntu24.04: - .dist-ubuntu24.04 - .driver-versions-ubuntu24.04 +release:ngc-precompiled-ubuntu24.04: + variables: + DIST: signed_ubuntu24.04 + BASE_TARGET: jammy + PRECOMPILED: "true" + extends: + - .driver-versions-precompiled-ubuntu24.04 + - .release-generic + - .release:ngc-variables + rules: + # Only run NGC release job on scheduled pipelines + - if: $CI_PIPELINE_SOURCE == "schedule" + release:ngc-precompiled-ubuntu22.04: variables: DIST: signed_ubuntu22.04 @@ -439,6 +488,23 @@ release:ngc-rhel8.10: - 'echo "Signing the image ${IMAGE_NAME}:${IMAGE_TAG}"' - ngc-cli/ngc registry image publish --source ${IMAGE_NAME}:${IMAGE_TAG} ${IMAGE_NAME}:${IMAGE_TAG} --public --discoverable --allow-guest --sign --org nvidia +sign:ngc-precompiled-ubuntu24.04: + extends: + - .driver-versions-precompiled-ubuntu24.04 + - .dist-ubuntu22.04 + - .release-generic + - .release:ngc-variables + - .sign:ngc + variables: + DIST: signed_ubuntu24.04 + BASE_TARGET: jammy + PRECOMPILED: "true" + needs: + - release:ngc-precompiled-ubuntu24.04 + rules: + # Only run NGC release job on scheduled pipelines + - if: $CI_PIPELINE_SOURCE == "schedule" + sign:ngc-precompiled-ubuntu22.04: extends: - .driver-versions-precompiled-ubuntu22.04 @@ -463,6 +529,7 @@ sign:ngc-ubuntu-rhel-rhcos: matrix: - SIGN_JOB_NAME: ["ubuntu"] VERSION: ["24.04"] + DRIVER_VERSION: ["535.216.01", "550.127.05", "565.57.01"] - SIGN_JOB_NAME: ["ubuntu"] VERSION: ["22.04"] DRIVER_VERSION: ["535.216.01", "550.127.05", "565.57.01"] diff --git a/Makefile b/Makefile index 468af6cc..e1fd6729 100644 --- a/Makefile +++ b/Makefile @@ -54,7 +54,7 @@ OUT_IMAGE_TAG = $(OUT_IMAGE_VERSION)-$(OUT_DIST) OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG) ##### Public rules ##### -DISTRIBUTIONS := ubuntu18.04 ubuntu20.04 ubuntu22.04 ubuntu24.04 signed_ubuntu20.04 signed_ubuntu22.04 rhel8 rhel9 flatcar fedora36 sles15.3 precompiled_rhcos +DISTRIBUTIONS := ubuntu18.04 ubuntu20.04 ubuntu22.04 ubuntu24.04 signed_ubuntu24.04 signed_ubuntu20.04 signed_ubuntu22.04 rhel8 rhel9 flatcar fedora36 sles15.3 precompiled_rhcos PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS)) BASE_FROM := jammy focal PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS)) diff --git a/base/generate-ci-config b/base/generate-ci-config index 1fc863e0..9fe854f8 100755 --- a/base/generate-ci-config +++ b/base/generate-ci-config @@ -24,6 +24,9 @@ DRIVER_VERSION=$(apt-cache show nvidia-utils-${DRIVER_BRANCH}-server |grep Versi # Latest supported kernel SK=$(echo $SUPPORTED_KERNELS | awk '{print $NF}') +# SHIVA +#SK=$(echo $SUPPORTED_KERNELS | awk '{print $(NF-1)}') + # Write to file echo "export KERNEL_VERSION=$SK DRIVER_VERSION=$DRIVER_VERSION DRIVER_VERSIONS=$DRIVER_VERSION" > /var/kernel_version.txt echo "KERNEL_VERSION=$SK DRIVER_VERSION=$DRIVER_VERSION DRIVER_VERSIONS=$DRIVER_VERSION" diff --git a/tests/cases/nvidia-driver.sh b/tests/cases/nvidia-driver.sh index 8ddafad0..f77d928a 100755 --- a/tests/cases/nvidia-driver.sh +++ b/tests/cases/nvidia-driver.sh @@ -8,9 +8,12 @@ fi # export gpu-operator options export TEST_CASE_ARGS="$1" +echo "uploading image SHIVA $2" if [[ $# -eq 2 ]]; then export IMAGE_PATH="$2" + echo "uploading image SHIVA 1" sudo ctr -n k8s.io images import "$IMAGE_PATH" + echo "uploading image SHIVA 2" fi SCRIPTS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )"/../scripts && pwd )" diff --git a/tests/holodeck.yaml b/tests/holodeck_ubuntu22.04.yaml similarity index 100% rename from tests/holodeck.yaml rename to tests/holodeck_ubuntu22.04.yaml diff --git a/tests/holodeck_ubuntu24.04.yaml b/tests/holodeck_ubuntu24.04.yaml new file mode 100644 index 00000000..8f9f9922 --- /dev/null +++ b/tests/holodeck_ubuntu24.04.yaml @@ -0,0 +1,33 @@ +apiVersion: holodeck.nvidia.com/v1alpha1 +kind: Environment +metadata: + name: HOLODECK_NAME + description: "end-to-end test infrastructure" +spec: + provider: aws + auth: + keyName: cnt-ci + privateKey: HOLODECK_PRIVATE_KEY + instance: + type: g4dn.xlarge + region: us-west-1 + ingressIpRanges: + - 18.190.12.32/32 + - 3.143.46.93/32 + - 52.15.119.136/32 + - 35.155.108.162/32 + - 35.162.190.51/32 + - 54.201.61.24/32 + image: + architecture: amd64 + imageId: ami-0ce2cb35386fc22e9 + #imageId: ami-0da424eb883458071 + containerRuntime: + install: true + name: containerd + version: 1.7.22 + kubernetes: + install: true + installer: kubeadm + version: v1.30.0 + crictlVersion: v1.30.0 diff --git a/tests/scripts/.definitions.sh b/tests/scripts/.definitions.sh index 945bb04c..3feb70cc 100644 --- a/tests/scripts/.definitions.sh +++ b/tests/scripts/.definitions.sh @@ -16,8 +16,8 @@ CASES_DIR="$( cd "${TEST_DIR}/cases" && pwd )" : ${HELM_NVIDIA_REPO:="https://helm.ngc.nvidia.com/nvidia"} -: ${DAEMON_POD_STATUS_TIME_OUT:="15m"} -: ${POD_STATUS_TIME_OUT:="2m"} +: ${DAEMON_POD_STATUS_TIME_OUT:="600m"} +: ${POD_STATUS_TIME_OUT:="600m"} : ${LOG_DIR:="/tmp/logs"} diff --git a/tests/scripts/checks.sh b/tests/scripts/checks.sh index c30b2d4f..162f301f 100755 --- a/tests/scripts/checks.sh +++ b/tests/scripts/checks.sh @@ -15,6 +15,21 @@ check_pod_ready() { else # print status of pod kubectl get pods -n ${TEST_NAMESPACE} + echo "SHIVA AAAAAAAAA CHECK logs" + # sudo ctr -n k8s.io images ls + echo "SHIVA AAAAAAAAA CHECK=============" + kubectl describe pod $(kubectl get pods -n ${TEST_NAMESPACE} --no-headers | grep "nvidia-driver-daemonset" | awk '{print $1}') -n ${TEST_NAMESPACE} + echo "SHIVA AAAAAAAAA CHECK@@@@@@@@@@@@@" + kubectl get pods -n ${TEST_NAMESPACE} --no-headers + echo "SHIVA AAAAAAAAA CHECK@@@@@@@@@@@@@" + kubectl get pods -n ${TEST_NAMESPACE} --no-headers | grep "nvidia-driver-daemonset" + echo "SHIVA AAAAAAAAA CHECK@@@@@@@@@@@@@" + kubectl get pods -n ${TEST_NAMESPACE} --no-headers | grep "nvidia-driver-daemonset" | awk '{print $1}' + echo "SHIVA AAAAAAAAA CHECK@@@@@@@@@@@@@" + kubectl logs $(kubectl get pods -n ${TEST_NAMESPACE} --no-headers | grep "nvidia-driver-daemonset" | awk '{print $1}') -n ${TEST_NAMESPACE} + echo "SHIVA AAAAAAAAA CHECK=============" + sudo uname -r + echo "SHIVA AAAAAAAAA CHECK****************" fi return 1 diff --git a/tests/scripts/ci-precompiled-helpers.sh b/tests/scripts/ci-precompiled-helpers.sh index 2efa9a75..f45539b3 100644 --- a/tests/scripts/ci-precompiled-helpers.sh +++ b/tests/scripts/ci-precompiled-helpers.sh @@ -1,6 +1,6 @@ get_kernel_versions_to_test() { if [[ "$#" -ne 4 ]]; then - echo " Error:$0 must be called with BASE_TARGET DRIVER_BRANCHES DRIVER_BRANCHES DIST" >&2 + echo " Error:$0 must be called with BASE_TARGET KERNEL_FLAVORS DRIVER_BRANCHES DIST" >&2 exit 1 fi diff --git a/tests/scripts/install-operator.sh b/tests/scripts/install-operator.sh index 2b4bcbaf..eac49867 100755 --- a/tests/scripts/install-operator.sh +++ b/tests/scripts/install-operator.sh @@ -9,6 +9,13 @@ echo "Checking current kernel version..." CURRENT_KERNEL=$(uname -r) echo "Current kernel version: $CURRENT_KERNEL" +sudo apt-get autoremove --purge -y +sudo dpkg --list | grep linux-image +sudo dpkg --list | awk '/linux-image-[0-9]/{print $2}' | grep -v $(uname -r) +sudo apt-get purge $(dpkg --list | awk '/linux-image-[0-9]/{print $2}' | grep -v $(uname -r)) -y +sudo apt-get autoremove --purge -y +sudo apt-get autoclean -y + SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" source ${SCRIPT_DIR}/.definitions.sh diff --git a/tests/scripts/kernel-upgrade-helper.sh b/tests/scripts/kernel-upgrade-helper.sh index a8926397..017e3ab4 100755 --- a/tests/scripts/kernel-upgrade-helper.sh +++ b/tests/scripts/kernel-upgrade-helper.sh @@ -23,6 +23,19 @@ if [ "${CURRENT_KERNEL}" != ${KERNEL_VERSION} ]; then export EDITOR=/bin/true echo 'debconf debconf/frontend select Noninteractive' | sudo debconf-set-selections + sudo apt clean -y || true + sudo apt update -y || true + sudo apt-key del 7fa2af80 && \ + sudo apt-key adv --fetch-keys "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/3bf863cc.pub" + # upgrade on ubuntu24.04 OS fails , update /etc/apt/sources.list + echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy main universe" | sudo tee /etc/apt/sources.list + echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy-updates main universe" | sudo tee -a /etc/apt/sources.list + echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy-security main universe" | sudo tee -a /etc/apt/sources.list + sudo usermod -o -u 0 -g 0 _apt + + sudo apt-key del 7fa2af80 && \ + sudo apt-key adv --fetch-keys "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/3bf863cc.pub" + sudo apt clean -y || true sudo apt-get update -y || true # The removal of the currently running kernel (apt remove linux-image-*) sometimes works and sometimes does not. @@ -33,6 +46,26 @@ if [ "${CURRENT_KERNEL}" != ${KERNEL_VERSION} ]; then sudo rm -rf /boot/*.old #install new kernel + # upgrade on ubuntu24.04 OS fails , update /etc/apt/sources.list + echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy main universe" | sudo tee /etc/apt/sources.list + echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy-updates main universe" | sudo tee -a /etc/apt/sources.list + echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy-security main universe" | sudo tee -a /etc/apt/sources.list + sudo usermod -o -u 0 -g 0 _apt + + sudo apt-key del 7fa2af80 && \ + sudo apt-key adv --fetch-keys "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/3bf863cc.pub" + + echo "SHIVAAAAAAAAAAAAAAAAA" + sudo uname -r + sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 3B4FE6ACC0B21F32 + sudo apt clean -y || true + sudo apt-get update -y || true + sudo dpkg --configure -a + echo "SHIVAAAAAAAAAAAAAAAAA kumar" + sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 3B4FE6ACC0B21F32 + sudo apt clean -y || true + sudo apt-get update -y || true + sudo apt-get install --allow-downgrades linux-image-${KERNEL_VERSION} linux-headers-${KERNEL_VERSION} linux-modules-${KERNEL_VERSION} -y || exit 1 if [ $? -ne 0 ]; then echo "Kernel upgrade failed." diff --git a/ubuntu24.04/install.sh b/ubuntu24.04/install.sh index 8b36d4bd..d5e65704 100755 --- a/ubuntu24.04/install.sh +++ b/ubuntu24.04/install.sh @@ -37,14 +37,20 @@ dep_install () { repo_setup () { if [ "$TARGETARCH" = "amd64" ]; then - echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble main universe" > /etc/apt/sources.list && \ - echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble-updates main universe" >> /etc/apt/sources.list && \ - echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble-security main universe" >> /etc/apt/sources.list && \ + # echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble main universe" > /etc/apt/sources.list && \ + # echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble-updates main universe" >> /etc/apt/sources.list && \ + # echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble-security main universe" >> /etc/apt/sources.list && \ + echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy main universe" > /etc/apt/sources.list && \ + echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy-updates main universe" >> /etc/apt/sources.list && \ + echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy-security main universe" >> /etc/apt/sources.list && \ usermod -o -u 0 -g 0 _apt elif [ "$TARGETARCH" = "arm64" ]; then - echo "deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports noble main universe" > /etc/apt/sources.list && \ - echo "deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports noble-updates main universe" >> /etc/apt/sources.list && \ - echo "deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports noble-security main universe" >> /etc/apt/sources.list && \ + #echo "deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports noble main universe" > /etc/apt/sources.list && \ + #echo "deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports noble-updates main universe" >> /etc/apt/sources.list && \ + #echo "deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports noble-security main universe" >> /etc/apt/sources.list && \ + echo "deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports jammy main universe" > /etc/apt/sources.list && \ + echo "deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports jammy-updates main universe" >> /etc/apt/sources.list && \ + echo "deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports jammy-security main universe" >> /etc/apt/sources.list && \ usermod -o -u 0 -g 0 _apt else echo "TARGETARCH doesn't match a known arch target" diff --git a/ubuntu24.04/nvidia-driver b/ubuntu24.04/nvidia-driver index aedeeea2..83453cbb 100755 --- a/ubuntu24.04/nvidia-driver +++ b/ubuntu24.04/nvidia-driver @@ -16,7 +16,10 @@ NVIDIA_MODESET_MODULE_PARAMS=() NVIDIA_PEERMEM_MODULE_PARAMS=() TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"} -OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false} +# TODO default open kernel module for ubuntu24.04 +# closed nvidia kernel modules gives error of dpkg configure +# OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false} +OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-true} [[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel export DEBIAN_FRONTEND=noninteractive diff --git a/ubuntu24.04/precompiled/Dockerfile b/ubuntu24.04/precompiled/Dockerfile index 25e69bb1..f4d312e6 100644 --- a/ubuntu24.04/precompiled/Dockerfile +++ b/ubuntu24.04/precompiled/Dockerfile @@ -4,7 +4,7 @@ ENV DEBIAN_FRONTEND=noninteractive ARG DRIVER_BRANCH=550 ENV DRIVER_BRANCH=$DRIVER_BRANCH -ARG DRIVER_VERSION=550.90.12 +ARG DRIVER_VERSION=550.127.05 ENV DRIVER_VERSION=$DRIVER_VERSION ARG KERNEL_VERSION=6.8.0-44-generic @@ -19,7 +19,7 @@ RUN apt-key del 7fa2af80 && \ apt-key adv --fetch-keys "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/3bf863cc.pub" RUN dpkg --add-architecture i386 && \ - apt-get update && apt-get install -y --no-install-recommends \ + apt-get purge -y && apt-get update && apt-get install -y --no-install-recommends \ apt-utils \ build-essential \ ca-certificates \ @@ -31,11 +31,17 @@ RUN dpkg --add-architecture i386 && \ pkg-config && \ rm -rf /var/lib/apt/lists/* -RUN echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble main universe" > /etc/apt/sources.list && \ - echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble-updates main universe" >> /etc/apt/sources.list && \ - echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble-security main universe" >> /etc/apt/sources.list && \ - echo "deb [arch=amd64] http://us.archive.ubuntu.com/ubuntu noble-updates main restricted" >> /etc/apt/sources.list && \ - echo "deb [arch=amd64] http://us.archive.ubuntu.com/ubuntu noble-security main restricted" >> /etc/apt/sources.list && \ + +# RUN echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble main universe" > /etc/apt/sources.list && \ +# echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble-updates main universe" >> /etc/apt/sources.list && \ +# echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble-security main universe" >> /etc/apt/sources.list && \ +# usermod -o -u 0 -g 0 _apt + +RUN echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy main universe" > /etc/apt/sources.list && \ + echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy-updates main universe" >> /etc/apt/sources.list && \ + echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy-security main universe" >> /etc/apt/sources.list && \ + echo "deb [arch=amd64] http://us.archive.ubuntu.com/ubuntu jammy-updates main restricted" >> /etc/apt/sources.list && \ + echo "deb [arch=amd64] http://us.archive.ubuntu.com/ubuntu jammy-security main restricted" >> /etc/apt/sources.list && \ usermod -o -u 0 -g 0 _apt RUN curl -fsSL -o /usr/local/bin/donkey https://github.com/3XX0/donkey/releases/download/v1.1.0/donkey && \ @@ -48,21 +54,38 @@ RUN if [ -n "${CVE_UPDATES}" ]; then \ rm -rf /var/lib/apt/lists/*; \ fi +RUN echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/ /" | tee /etc/apt/sources.list.d/cuda.list; \ + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/3bf863cc.pub; usermod -o -u 0 -g 0 _apt + # update pkg cache and install pkgs for userspace driver libs -RUN apt-get update && apt-get install -y --no-install-recommends nvidia-driver-${DRIVER_BRANCH}-server \ - nvidia-fabricmanager-${DRIVER_BRANCH}=${DRIVER_VERSION}-1 \ - libnvidia-nscq-${DRIVER_BRANCH}=${DRIVER_VERSION}-1 && \ +RUN apt-get update -y; \ + FABRIC_PACKAGE="nvidia-fabricmanager-${DRIVER_BRANCH}"; \ + LIBNSCQ_PACKAGE="libnvidia-nscq-${DRIVER_BRANCH}"; \ + echo "SHIVA111"; \ + apt search nvidia-kernel; \ + apt search nvidia; \ + apt list *nvidia*; \ + echo "SHIVA222"; \ + apt-get-- install -y --no-install-recommends nvidia-driver-${DRIVER_BRANCH}-server \ + ${FABRIC_PACKAGE} \ + ${LIBNSCQ_PACKAGE} && \ apt-get purge -y \ - libnvidia-egl-wayland1 \ - nvidia-dkms-${DRIVER_BRANCH}-server \ - nvidia-kernel-source-${DRIVER_BRANCH}-server \ - xserver-xorg-video-nvidia-${DRIVER_BRANCH}-server && \ - rm -rf /var/lib/apt/lists/*; + libnvidia-egl-wayland1 \ + nvidia-dkms-${DRIVER_BRANCH}-server \ + nvidia-kernel-source-${DRIVER_BRANCH}-server \ + xserver-xorg-video-nvidia-${DRIVER_BRANCH}-server && \ + rm -rf /var/lib/apt/lists/*; + # update pkg cache and download pkgs for driver module installation during runtime. # this is done to avoid shipping .ko files. # avoid cleaning the cache after this to retain these packages during runtime. -RUN apt-get update && apt-get install --download-only --no-install-recommends -y linux-objects-nvidia-${DRIVER_BRANCH}-server-${KERNEL_VERSION} \ +# RUN apt search linux-objects-nvidia* ; \ +# apt search linux-signatures-nvidia*; + +# RUN apt update && apt-get update && apt search linux-modules* + +RUN apt-get purge -y && apt update && apt-get update && apt-get install --download-only --no-install-recommends -y linux-objects-nvidia-${DRIVER_BRANCH}-server-${KERNEL_VERSION} \ linux-signatures-nvidia-${KERNEL_VERSION} \ linux-modules-nvidia-${DRIVER_BRANCH}-server-${KERNEL_VERSION} \ # add support for nvidia open source driver packages during runtime @@ -75,4 +98,5 @@ WORKDIR /drivers # Remove cuda repository to avoid GPG errors RUN rm -f /etc/apt/sources.list.d/cuda* +RUN apt-get purge -y ENTRYPOINT ["nvidia-driver", "init"] diff --git a/ubuntu24.04/precompiled/nvidia-driver b/ubuntu24.04/precompiled/nvidia-driver index 97369be2..39f6526a 100755 --- a/ubuntu24.04/precompiled/nvidia-driver +++ b/ubuntu24.04/precompiled/nvidia-driver @@ -211,6 +211,12 @@ _unload_driver() { # Link and install the kernel modules from a precompiled packages _install_driver() { + # apt search linux-objects-nvidia* ; \ + # apt search linux-signatures-nvidia*; + + # apt update && apt-get update && apt search linux-objects-nvidia* ; \ + # apt search linux-signatures-nvidia*; + if [ "$OPEN_KERNEL_MODULES_ENABLED" = true ]; then echo "Installing Open NVIDIA driver kernel modules..." apt-get install --no-install-recommends -y \