From 2628e066c68511e3fd4eb7869467ed0c27c439ff Mon Sep 17 00:00:00 2001 From: Misha Sakhnov Date: Tue, 22 Oct 2024 18:36:48 +0200 Subject: [PATCH] ci: build images for arm and amd Signed-off-by: Misha Sakhnov --- .github/workflows/build-images.yaml | 148 +++++++++++++++---------- .github/workflows/build-test-vm.yaml | 61 ++++++---- .github/workflows/check-ca-builds.yaml | 9 +- .github/workflows/e2e-test.yaml | 43 ++++--- .github/workflows/vm-kernel.yaml | 28 ++++- Makefile | 24 +++- cluster-autoscaler/Dockerfile | 10 +- tests/e2e/vm-neonvmd/00-create-vm.yaml | 1 + 8 files changed, 212 insertions(+), 112 deletions(-) diff --git a/.github/workflows/build-images.yaml b/.github/workflows/build-images.yaml index 3784c651e..f5a70cd7f 100644 --- a/.github/workflows/build-images.yaml +++ b/.github/workflows/build-images.yaml @@ -31,21 +31,38 @@ on: type: boolean required: false outputs: - controller: + + controller-amd64: + description: 'neonvm-controller image' + value: ${{ jobs.build.outputs.controller-amd64 }} + vxlan-controller-amd64: + description: 'neonvm-vxlan-controller image' + value: ${{ jobs.build.outputs.vxlan-controller-amd64 }} + runner-amd64: + description: 'neonvm-runner image' + value: ${{ jobs.build.outputs.runner-amd64 }} + scheduler-amd64: + description: 'autoscale-scheduler image' + value: ${{ jobs.build.outputs.scheduler-amd64 }} + autoscaler-agent-amd64: + description: 'autoscaler-agent image' + value: ${{ jobs.build.outputs.autoscaler-agent-amd64 }} + + controller-arm64: description: 'neonvm-controller image' - value: ${{ jobs.tags.outputs.controller }} - vxlan-controller: + value: ${{ jobs.build.outputs.controller-arm64 }} + vxlan-controller-arm64: description: 'neonvm-vxlan-controller image' - value: ${{ jobs.tags.outputs.vxlan-controller }} - runner: + value: ${{ jobs.build.outputs.vxlan-controller-arm64 }} + runner-arm64: description: 'neonvm-runner image' - value: ${{ jobs.tags.outputs.runner }} - scheduler: + value: ${{ jobs.build.outputs.runner-arm64 }} + scheduler-arm64: description: 'autoscale-scheduler image' - value: ${{ jobs.tags.outputs.scheduler }} - autoscaler-agent: + value: ${{ jobs.build.outputs.scheduler-arm64 }} + autoscaler-agent-arm64: description: 'autoscaler-agent image' - value: ${{ jobs.tags.outputs.autoscaler-agent }} + value: ${{ jobs.build.outputs.autoscaler-agent-arm64 }} env: IMG_CONTROLLER: "neondatabase/neonvm-controller" @@ -58,39 +75,11 @@ env: ECR_DEV: "369495373322.dkr.ecr.eu-central-1.amazonaws.com" ECR_PROD: "093970136003.dkr.ecr.eu-central-1.amazonaws.com" - # Why localhost? We use a local registry so that when docker/build-push-action tries to pull the - # image we built locally, it'll actually have a place to pull from. - # - # Otherwise, if we just try to use a local image, it fails trying to pull it from dockerhub. - # See https://github.com/moby/buildkit/issues/2343 for more information. - GO_BASE_IMG: "localhost:5000/neondatabase/autoscaling-go-base:dev" - # Default architecture to build. In future it would be changed to multi-arch build or separate builds for each arch - TARGET_ARCH: "amd64" - defaults: run: shell: bash -euo pipefail {0} jobs: - tags: - outputs: - controller: ${{ steps.show-tags.outputs.controller }} - vxlan-controller: ${{ steps.show-tags.outputs.vxlan-controller }} - runner: ${{ steps.show-tags.outputs.runner }} - scheduler: ${{ steps.show-tags.outputs.scheduler }} - autoscaler-agent: ${{ steps.show-tags.outputs.autoscaler-agent }} - cluster-autoscaler: ${{ steps.show-tags.outputs.cluster-autoscaler }} - runs-on: ubuntu-latest - steps: - - id: show-tags - run: | - echo "controller=${{ env.IMG_CONTROLLER }}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT - echo "vxlan-controller=${{ env.IMG_VXLAN_CONTROLLER }}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT - echo "runner=${{ env.IMG_RUNNER }}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT - echo "scheduler=${{ env.IMG_SCHEDULER }}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT - echo "autoscaler-agent=${{ env.IMG_AUTOSCALER_AGENT }}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT - echo "cluster-autoscaler=${{ env.IMG_CLUSTER_AUTOSCALER }}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT - vm-kernel: # nb: use format(..) to catch both inputs.skip = true AND inputs.skip = 'true'. if: ${{ format('{0}', inputs.skip) != 'true' }} @@ -98,17 +87,45 @@ jobs: with: tag: ${{ inputs.kernel-image || inputs.tag }} return-image-for-tag: ${{ inputs.kernel-image }} + arch: ${{ matrix.arch }} + strategy: + matrix: + arch: [ 'amd64', 'arm64' ] secrets: inherit build: # nb: use format(..) to catch both inputs.skip = true AND inputs.skip = 'true'. if: ${{ format('{0}', inputs.skip) != 'true' }} - needs: [ tags, vm-kernel ] - runs-on: [ self-hosted, large ] + needs: [ vm-kernel ] + outputs: + controller-amd64: ${{ steps.tags.outputs.controller-amd64 }} + vxlan-controller-amd64: ${{ steps.tags.outputs.vxlan-controller-amd64 }} + runner-amd64: ${{ steps.tags.outputs.runner-amd64 }} + scheduler-amd64: ${{ steps.tags.outputs.scheduler-amd64 }} + autoscaler-agent-amd64: ${{ steps.tags.outputs.autoscaler-agent-amd64 }} + cluster-autoscaler-amd64: ${{ steps.tags.outputs.cluster-autoscaler-amd64 }} + controller-arm64: ${{ steps.tags.outputs.controller-arm64 }} + vxlan-controller-arm64: ${{ steps.tags.outputs.vxlan-controller-arm64 }} + runner-arm64: ${{ steps.tags.outputs.runner-arm64 }} + scheduler-arm64: ${{ steps.tags.outputs.scheduler-arm64 }} + autoscaler-agent-arm64: ${{ steps.tags.outputs.autoscaler-agent-arm64 }} + cluster-autoscaler-arm64: ${{ steps.tags.outputs.cluster-autoscaler-arm64 }} + env: + # Why localhost? We use a local registry so that when docker/build-push-action tries to pull the + # image we built locally, it'll actually have a place to pull from. + # + # Otherwise, if we just try to use a local image, it fails trying to pull it from dockerhub. + # See https://github.com/moby/buildkit/issues/2343 for more information. + GO_BASE_IMG: ${{ format('localhost:5000/neondatabase/autoscaling-go-base-{0}:dev', matrix.arch) }} permissions: contents: read # This is required for actions/checkout id-token: write # This is required for aws-actions/configure-aws-credentials + strategy: + matrix: + arch: [ arm64 ] + runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }} + services: registry: image: registry:2 @@ -116,6 +133,16 @@ jobs: - 5000:5000 steps: + # tags converted to be a step and moved here to be in the same strategy context + - id: tags + run: | + echo "controller-${{matrix.arch}}=${{ env.IMG_CONTROLLER }}-${{matrix.arch}}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT + echo "vxlan-controller-${{matrix.arch}}=${{ env.IMG_VXLAN_CONTROLLER }}-${{matrix.arch}}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT + echo "runner-${{matrix.arch}}=${{ env.IMG_RUNNER }}-${{matrix.arch}}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT + echo "scheduler-${{matrix.arch}}=${{ env.IMG_SCHEDULER }}-${{matrix.arch}}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT + echo "autoscaler-agent-${{matrix.arch}}=${{ env.IMG_AUTOSCALER_AGENT }}-${{matrix.arch}}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT + echo "cluster-autoscaler-${{matrix.arch}}=${{ env.IMG_CLUSTER_AUTOSCALER }}-${{matrix.arch}}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT + - uses: actions/checkout@v4 with: fetch-depth: 0 # fetch all, so that we also include tags @@ -164,7 +191,6 @@ jobs: registry: cache.neon.build username: ${{ secrets.NEON_CI_DOCKERCACHE_USERNAME }} password: ${{ secrets.NEON_CI_DOCKERCACHE_PASSWORD }} - - name: Configure dev AWS credentials if: ${{ format('{0}', inputs.upload-to-ecr) == 'true' }} uses: aws-actions/configure-aws-credentials@v4 @@ -200,7 +226,8 @@ jobs: - name: Load VM kernel env: - IMAGE: ${{ needs.vm-kernel.outputs.image }} + IMAGE: ${{format(needs.vm-kernel.outputs.image_placeholder, matrix.arch)}} + # IMAGE: ${{format('neondatabase/vm-kernel-{0}:3372e65.11573472293', matrix.arch)}} run: | docker pull --quiet $IMAGE ID=$(docker create $IMAGE true) @@ -212,23 +239,23 @@ jobs: id: build-go-dependencies-image with: context: . - platforms: linux/amd64 + push: true file: Dockerfile.go-base cache-from: type=registry,ref=cache.neon.build/autoscaling-go-base:cache cache-to: ${{ github.ref_name == 'main' && 'type=registry,ref=cache.neon.build/autoscaling-go-base:cache,mode=max' || '' }} tags: ${{ env.GO_BASE_IMG }} - + - name: Build and push neonvm-runner image uses: docker/build-push-action@v6 with: context: . - platforms: linux/amd64 push: true + platforms: linux/${{ matrix.arch }} file: neonvm-runner/Dockerfile cache-from: type=registry,ref=cache.neon.build/neonvm-runner:cache cache-to: ${{ github.ref_name == 'main' && 'type=registry,ref=cache.neon.build/neonvm-runner:cache,mode=max' || '' }} - tags: ${{ needs.tags.outputs.runner }} + tags: ${{ steps.tags.outputs[format('runner-{0}', matrix.arch)] }} build-args: | GO_BASE_IMG=${{ env.GO_BASE_IMG }} @@ -247,41 +274,41 @@ jobs: uses: docker/build-push-action@v6 with: context: . - platforms: linux/amd64 + platforms: linux/${{ matrix.arch }} push: true file: neonvm-controller/Dockerfile cache-from: type=registry,ref=cache.neon.build/neonvm-controller:cache cache-to: ${{ github.ref_name == 'main' && 'type=registry,ref=cache.neon.build/neonvm-controller:cache,mode=max' || '' }} - tags: ${{ needs.tags.outputs.controller }} + tags: ${{ steps.tags.outputs[format('controller-{0}', matrix.arch)] }} build-args: | GO_BASE_IMG=${{ env.GO_BASE_IMG }} - VM_RUNNER_IMAGE=${{ needs.tags.outputs.runner }} + VM_RUNNER_IMAGE=${{ steps.tags.outputs.runner }} BUILDTAGS=${{ steps.controller-build-tags.outputs.buildtags }} - name: Build and push neonvm-vxlan-controller image uses: docker/build-push-action@v6 with: context: . - platforms: linux/amd64 + platforms: linux/${{ matrix.arch }} push: true file: neonvm-vxlan-controller/Dockerfile cache-from: type=registry,ref=cache.neon.build/neonvm-vxlan-controller:cache cache-to: ${{ github.ref_name == 'main' && 'type=registry,ref=cache.neon.build/neonvm-vxlan-controller:cache,mode=max' || '' }} - tags: ${{ needs.tags.outputs.vxlan-controller }} + tags: ${{ steps.tags.outputs[format('vxlan-controller-{0}', matrix.arch)] }} build-args: | GO_BASE_IMG=${{ env.GO_BASE_IMG }} - TARGET_ARCH=${{ env.TARGET_ARCH }} + TARGET_ARCH=${{ matrix.arch }} - name: Build and push autoscale-scheduler image uses: docker/build-push-action@v6 with: context: . - platforms: linux/amd64 + platforms: linux/${{ matrix.arch }} push: true file: autoscale-scheduler/Dockerfile cache-from: type=registry,ref=cache.neon.build/autoscale-scheduler:cache cache-to: ${{ github.ref_name == 'main' && 'type=registry,ref=cache.neon.build/autoscale-scheduler:cache,mode=max' || '' }} - tags: ${{ needs.tags.outputs.scheduler }} + tags: ${{ steps.tags.outputs[format('scheduler-{0}', matrix.arch)]}} build-args: | GO_BASE_IMG=${{ env.GO_BASE_IMG }} GIT_INFO=${{ steps.get-git-info.outputs.info }}:${{ inputs.tag }} @@ -290,29 +317,30 @@ jobs: uses: docker/build-push-action@v6 with: context: . - platforms: linux/amd64 + platforms: linux/${{ matrix.arch }} push: true file: autoscaler-agent/Dockerfile cache-from: type=registry,ref=cache.neon.build/autoscaler-agent:cache cache-to: ${{ github.ref_name == 'main' && 'type=registry,ref=cache.neon.build/autoscaler-agent:cache,mode=max' || '' }} - tags: ${{ needs.tags.outputs.autoscaler-agent }} + tags: ${{ steps.tags.outputs[format('autoscaler-agent-{0}', matrix.arch)]}} build-args: | GO_BASE_IMG=${{ env.GO_BASE_IMG }} GIT_INFO=${{ steps.get-git-info.outputs.info }} - + - name: Build and push cluster-autoscaler image uses: docker/build-push-action@v6 if: ${{ format('{0}', inputs.build-cluster-autoscaler) == 'true' }} with: context: cluster-autoscaler - platforms: linux/amd64 + platforms: linux/${{ matrix.arch }} push: true - tags: ${{ needs.tags.outputs.cluster-autoscaler }} + target: ${{format('cluster_autoscaler_{0}', matrix.arch)}} + tags: ${{ steps.tags.outputs[format('cluster-autoscaler-{0}', matrix.arch)]}} cache-from: type=registry,ref=cache.neon.build/cluster-autoscaler-neonvm:cache cache-to: ${{ github.ref_name == 'main' && 'type=registry,ref=cache.neon.build/cluster-autoscaler-neonvm:cache,mode=max' || '' }} build-args: | CA_GIT_TAG=${{ steps.get-ca-tag.outputs.tag }} - + - name: Copy all images to ECR if: ${{ format('{0}', inputs.upload-to-ecr) == 'true' }} run: | diff --git a/.github/workflows/build-test-vm.yaml b/.github/workflows/build-test-vm.yaml index 265b807bc..dd1d94ca8 100644 --- a/.github/workflows/build-test-vm.yaml +++ b/.github/workflows/build-test-vm.yaml @@ -23,38 +23,42 @@ on: required: false default: false outputs: - vm-postgres-16-bullseye: + vm-postgres-16-bullseye-amd64: description: 'image name for postgres:16-bullseye, VM-ified' - value: ${{ jobs.tags.outputs.vm-postgres-16-bullseye }} - + value: ${{ jobs.build.outputs.vm-postgres-16-bullseye-amd64 }} + vm-postgres-16-bullseye-arm64: + description: 'image name for postgres:16-bullseye, VM-ified' + value: ${{ jobs.build.outputs.vm-postgres-16-bullseye-arm64 }} + env: IMG_POSTGRES_16_BULLSEYE: "neondatabase/vm-postgres-16-bullseye" # using image built in the same workflow - IMG_DAEMON: "daemon:dev" - TARGET_ARCH: "amd64" + IMG_DAEMON: "daemon" defaults: run: shell: bash -euo pipefail {0} jobs: - tags: - outputs: - vm-postgres-16-bullseye: ${{ steps.show-tags.outputs.vm-postgres-16-bullseye }} - daemon: ${{ steps.show-tags.outputs.daemon }} - runs-on: ubuntu-latest - steps: - - id: show-tags - run: | - echo "vm-postgres-16-bullseye=${{ env.IMG_POSTGRES_16_BULLSEYE }}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT - echo "daemon=${{ env.IMG_DAEMON }}" | tee -a $GITHUB_OUTPUT - + build: # nb: use format(..) to catch both inputs.skip = true AND inputs.skip = 'true'. if: ${{ format('{0}', inputs.skip) != 'true' }} - needs: tags - runs-on: [ self-hosted, gen3, large ] + outputs: + vm-postgres-16-bullseye-arm64: ${{ steps.tags.outputs.vm-postgres-16-bullseye-arm64 }} + vm-postgres-16-bullseye-amd64: ${{ steps.tags.outputs.vm-postgres-16-bullseye-amd64 }} + daemon: ${{ steps.tags.outputs.daemon }} + strategy: + matrix: + arch: [ amd64, arm64 ] + # TODO: do we need gen3 runners? to clarify with dev exp team + runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }} steps: + # tags converted to be a step and moved here to be in the same strategy contextt + - id: tags + run: | + echo "vm-postgres-16-bullseye-${{matrix.arch}}=${{ env.IMG_POSTGRES_16_BULLSEYE }}-${{matrix.arch}}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT + echo "daemon=${{ env.IMG_DAEMON }}-${{matrix.arch}}:dev" | tee -a $GITHUB_OUTPUT - uses: actions/checkout@v4 - uses: actions/setup-go@v5 with: @@ -63,7 +67,16 @@ jobs: cache: false # Sometimes setup-go gets stuck. Without this, it'll keep going until the job gets killed timeout-minutes: 10 - - run: make docker-build-daemon + - name: Build daemon image + uses: docker/build-push-action@v6 + with: + context: . + push: false + platforms: linux/${{ matrix.arch }} + file: neonvm-daemon/Dockerfile + cache-from: type=registry,ref=cache.neon.build/neonvm-daemon:cache + cache-to: ${{ github.ref_name == 'main' && 'type=registry,ref=cache.neon.build/neonvm-daemon:cache,mode=max' || '' }} + tags: ${{ steps.tags.outputs.daemon }} - run: make bin/vm-builder - name: upload vm-builder @@ -71,7 +84,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: vm-builder - path: bin/vm-builder + path: ${{format('bin/vm-builder-{0}', matrix.arch)}} if-no-files-found: error retention-days: 2 @@ -84,9 +97,9 @@ jobs: username: ${{ secrets.NEON_DOCKERHUB_USERNAME }} password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} - - name: build ${{ needs.tags.outputs.vm-postgres-16-bullseye }} + - name: build ${{ steps.tags.outputs[format('vm-postgres-16-bullseye-{0}', matrix.arch)] }} run: | - ./bin/vm-builder -src postgres:16-bullseye -spec tests/e2e/image-spec.yaml -dst ${{ needs.tags.outputs.vm-postgres-16-bullseye }} -daemon-image ${{ needs.tags.outputs.daemon }} -target-arch linux/${TARGET_ARCH} - - name: docker push ${{ needs.tags.outputs.vm-postgres-16-bullseye }} + ./bin/vm-builder -src postgres:16-bullseye -spec tests/e2e/image-spec.yaml -dst ${{ steps.tags.outputs[format('vm-postgres-16-bullseye-{0}', matrix.arch)] }} -daemon-image ${{ steps.tags.outputs.daemon }} -target-arch linux/${{ matrix.arch }} + - name: docker push ${{ steps.tags.outputs[format('vm-postgres-16-bullseye-{0}', matrix.arch)] }} run: | - docker push ${{ needs.tags.outputs.vm-postgres-16-bullseye }} + docker push ${{ steps.tags.outputs[format('vm-postgres-16-bullseye-{0}', matrix.arch)] }} diff --git a/.github/workflows/check-ca-builds.yaml b/.github/workflows/check-ca-builds.yaml index 895db56db..6bcfe98b8 100644 --- a/.github/workflows/check-ca-builds.yaml +++ b/.github/workflows/check-ca-builds.yaml @@ -17,7 +17,11 @@ on: jobs: build-ca: - runs-on: [ self-hosted, gen3, small ] + strategy: + fail-fast: false + matrix: + arch: [ amd64, arm64 ] + runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }} steps: - uses: actions/checkout@v4 @@ -41,8 +45,9 @@ jobs: uses: docker/build-push-action@v6 with: context: cluster-autoscaler - platforms: linux/amd64 + platforms: ${{format('linux/{0}', matrix.arch)}} push: false + target: ${{format('cluster_autoscaler_{0}', matrix.arch)}} file: cluster-autoscaler/Dockerfile cache-from: type=registry,ref=cache.neon.build/cluster-autoscaler-neonvm:cache build-args: | diff --git a/.github/workflows/e2e-test.yaml b/.github/workflows/e2e-test.yaml index 2d2aaf390..7555b1b95 100644 --- a/.github/workflows/e2e-test.yaml +++ b/.github/workflows/e2e-test.yaml @@ -50,7 +50,7 @@ jobs: test -n "$SHA" sha="${SHA::7}" echo "tag=$sha.$GITHUB_RUN_ID" | tee -a $GITHUB_OUTPUT - + build-images: needs: get-tag uses: ./.github/workflows/build-images.yaml @@ -77,9 +77,12 @@ jobs: strategy: fail-fast: false matrix: - cluster: - - ${{ inputs.cluster || 'k3d' }} - runs-on: [ self-hosted, gen3, large ] + cluster: [ k3d ] + # run tests on amd64 only, since scope of the PR is to build images, there is separate issue for e2e tests + arch: [ amd64 ] + + runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }} + steps: - uses: actions/checkout@v4 with: @@ -109,18 +112,18 @@ jobs: - run: make render-release env: - IMG_CONTROLLER: ${{ needs.build-images.outputs.controller }} - IMG_VXLAN_CONTROLLER: ${{ needs.build-images.outputs.vxlan-controller }} - IMG_RUNNER: ${{ needs.build-images.outputs.runner }} - IMG_SCHEDULER: ${{ needs.build-images.outputs.scheduler }} - IMG_AUTOSCALER_AGENT: ${{ needs.build-images.outputs.autoscaler-agent }} + IMG_CONTROLLER: ${{ needs.build-images.outputs[format('controller-{0}', matrix.arch)] }} + IMG_VXLAN_CONTROLLER: ${{ needs.build-images.outputs[format('vxlan-controller-{0}', matrix.arch)] }} + IMG_RUNNER: ${{ needs.build-images.outputs[format('runner-{0}', matrix.arch)] }} + IMG_SCHEDULER: ${{ needs.build-images.outputs[format('scheduler-{0}', matrix.arch)] }} + IMG_AUTOSCALER_AGENT: ${{ needs.build-images.outputs[format('autoscaler-agent-{0}', matrix.arch)] }} - name: upload manifests # nb: use format(..) to catch both inputs.push-yamls = true AND inputs.push-yamls = 'true'. - if: ${{ format('{0}', inputs.push-yamls) == 'true' }} + if: ${{ format('{0}', inputs.push-yamls) == 'true'}} uses: actions/upload-artifact@v4 with: - name: rendered_manifests + name: ${{ format('rendered_manifests-{0}', matrix.arch) }} # nb: prefix before wildcard is removed from the uploaded files, so the artifact should # contain e.g. # - autoscale-scheduler.yaml @@ -167,9 +170,9 @@ jobs: run: | rendered () { echo "rendered_manifests/$1"; } - kubectl apply -f $(rendered multus-amd64.yaml) + kubectl apply -f $(rendered multus-${{ matrix.arch}}.yaml) kubectl -n kube-system rollout status daemonset kube-multus-ds - kubectl apply -f $(rendered whereabouts-amd64.yaml) + kubectl apply -f $(rendered whereabouts-${{matrix.arch}}.yaml) kubectl -n kube-system rollout status daemonset whereabouts kubectl apply -f $(rendered neonvm-runner-image-loader.yaml) kubectl -n neonvm-system rollout status daemonset neonvm-runner-image-loader @@ -186,7 +189,7 @@ jobs: - name: load e2e test vm image env: - TEST_IMAGE: ${{ needs.build-test-vm.outputs.vm-postgres-16-bullseye }} + TEST_IMAGE: ${{ needs.build-test-vm.outputs[format('vm-postgres-16-bullseye-{0}', matrix.arch)] }} timeout-minutes: 2 run: | # Pull the docker image so we can re-tag it, because using a consistent tag inside the @@ -194,10 +197,18 @@ jobs: docker pull "$TEST_IMAGE" docker image tag "$TEST_IMAGE" "$IMG_E2E_TEST" make load-example-vms - + + - name: Patch e2e tests to not use acceleration in arm runners + if: matrix.arch == 'arm64' + run: make arm_patch_e2e + - run: make e2e timeout-minutes: 15 - + + - name: Setup tmate session + if: always() + uses: mxschmitt/action-tmate@v3 + - name: Get k8s logs and events if: always() run: | diff --git a/.github/workflows/vm-kernel.yaml b/.github/workflows/vm-kernel.yaml index 67d780fdb..ecbb96d91 100644 --- a/.github/workflows/vm-kernel.yaml +++ b/.github/workflows/vm-kernel.yaml @@ -28,13 +28,22 @@ on: type: boolean required: false default: false + arch: + description: 'Architecture to build the kernel for' + type: string + required: false + default: 'amd64' outputs: image: description: 'vm-kernel Docker image' value: ${{ jobs.setup-build-vm-kernel-image.outputs.image || jobs.build-vm-kernel-image.outputs.image }} + image_placeholder: + description: 'vm-kernel Docker image placeholder' + value: ${{ jobs.build-vm-kernel-image.outputs.image }} env: - VM_KERNEL_IMAGE: "neondatabase/vm-kernel" + VM_KERNEL_IMAGE: ${{format('neondatabase/vm-kernel-{0}', inputs.arch)}} + VM_KERNEL_IMAGE_PLACEHOLDER: 'neondatabase/vm-kernel-{0}' defaults: run: @@ -130,7 +139,9 @@ jobs: needs: setup-build-vm-kernel-image if: needs.setup-build-vm-kernel-image.outputs.image == '' outputs: + # image for backward compatibility image: ${{ steps.get-tags.outputs.canonical }}@${{ steps.build-linux-kernel.outputs.digest }} + placeholder: ${{ steps.get-tags.outputs.canonical_placeholder }}@${{ steps.build-linux-kernel.outputs.digest }} runs-on: [ self-hosted, gen3, large ] steps: @@ -162,7 +173,7 @@ jobs: echo VM_KERNEL_VERSION=$kernel_version >> $GITHUB_OUTPUT - - name: get docker tags + - name: get docker tags id: get-tags env: KERNEL_VERSION_TAG: ${{ inputs.tag || steps.get-kernel-version.outputs.VM_KERNEL_VERSION }} @@ -178,6 +189,8 @@ jobs: # `docker/build-push-action@v6` returns all ${TAGS} in metadata ("image.name" field), so it can't be used a image name right away. # Choose one of them as a "canonical" tag and use it to construct the job output (along with a digest provided by `docker/build-push-action@v6`). echo "canonical=${VM_KERNEL_IMAGE}:${GITHUB_RUN_ID}" >> $GITHUB_OUTPUT + echo "canonical_placeholder=${VM_KERNEL_IMAGE_PLACEHOLDER}:${GITHUB_RUN_ID}" >> $GITHUB_OUTPUT + - name: build linux kernel id: build-linux-kernel @@ -185,14 +198,17 @@ jobs: with: build-args: KERNEL_VERSION=${{ steps.get-kernel-version.outputs.VM_KERNEL_VERSION }} context: neonvm-kernel - platforms: linux/amd64 + platforms: ${{format('linux/{0}', inputs.arch)}} # neonvm-kernel/Dockerfile.kernel-builder has different targets for different architectures # so we need to specify the target explicitly - target: kernel_amd64 + target: ${{format('kernel_{0}', inputs.arch)}} # Push kernel image only for scheduled builds or if workflow_dispatch/workflow_call input is true push: true pull: true file: neonvm-kernel/Dockerfile.kernel-builder - cache-from: type=registry,ref=cache.neon.build/vm-kernel:cache - cache-to: ${{ github.ref_name == 'main' && 'type=registry,ref=cache.neon.build/vm-kernel:cache,mode=max' || '' }} + cache-from: ${{format('type=registry,ref=cache.neon.build/vm-kernel:{0}cache', inputs.arch)}} + cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/vm-kernel:{0}cache,mode=max', inputs.arch) || '' }} tags: ${{ steps.get-tags.outputs.tags }} + + + - run: git diff neonvm-kernel diff --git a/Makefile b/Makefile index 98ec4d6b4..ce55571ff 100644 --- a/Makefile +++ b/Makefile @@ -344,12 +344,10 @@ render-release: $(RENDERED) kustomize cd autoscaler-agent && $(KUSTOMIZE) edit set image autoscaler-agent=$(IMG_AUTOSCALER_AGENT) # Build: $(KUSTOMIZE) build neonvm/config/whereabouts-amd64 > $(RENDERED)/whereabouts-amd64.yaml - # TODO: I ain't sure if we need arm64 for render-release target - # $(KUSTOMIZE) build neonvm/config/whereabouts-arm64 > $(RENDERED)/whereabouts-arm64.yaml + $(KUSTOMIZE) build neonvm/config/whereabouts-arm64 > $(RENDERED)/whereabouts-arm64.yaml $(KUSTOMIZE) build neonvm/config/multus-aks > $(RENDERED)/multus-aks.yaml $(KUSTOMIZE) build neonvm/config/multus-eks > $(RENDERED)/multus-eks.yaml $(KUSTOMIZE) build neonvm/config/multus-amd64 > $(RENDERED)/multus-amd64.yaml - # TODO: I ain't sure if we need arm64 for render-release target $(KUSTOMIZE) build neonvm/config/multus-arm64 > $(RENDERED)/multus-arm64.yaml $(KUSTOMIZE) build neonvm/config > $(RENDERED)/neonvm.yaml $(KUSTOMIZE) build neonvm-controller > $(RENDERED)/neonvm-controller.yaml @@ -439,6 +437,16 @@ e2e: check-local-context e2e-tools ## Run e2e kuttl tests $(KUTTL) test --config tests/e2e/kuttl-test.yaml $(if $(CI),--skip-delete) rm -f kubeconfig +# arm doesn't support cpu hot plug and memory hot plug and CI runners are based on qemu so no kvm acceleration as well +arm_patch_e2e: yq + @find tests/e2e -name "*.yaml" | xargs -I{} ./bin/yq eval '(select(.kind == "VirtualMachine") | .spec.cpuScalingMode = "sysfsScaling") // .' -i {} + @find tests/e2e -name "*.yaml" | xargs -I{} ./bin/yq eval '(select(.kind == "VirtualMachine") | .spec.enableAcceleration = false) // .' -i {} + @find tests/e2e -name "*.yaml" | xargs -I{} ./bin/yq eval '(select(.kind == "VirtualMachine") | .status.memoryProvider = "VirtioMem") // .' -i {} + @find tests/e2e -name "*.yaml" | xargs -I{} ./bin/yq eval '(select(.kind == "VirtualMachine") | .spec.guest.memoryProvider = "VirtioMem") // .' -i {} + +revert_path_e2e: + git checkout tests/e2e + ##@ Local kind cluster .PHONY: kind-setup @@ -507,7 +515,12 @@ else ifeq ($(GOARCH), amd64) else $(error Unsupported architecture: $(GOARCH)) endif + +YQ_ARCH ?= $(GOARCH) +YQ ?= $(LOCALBIN)/yq + KUBECTL ?= $(LOCALBIN)/kubectl + KUBECTL_VERSION ?= v1.28.12 KIND ?= $(LOCALBIN)/kind @@ -545,6 +558,11 @@ kubectl: $(KUBECTL) ## Download kubectl locally if necessary. $(KUBECTL): $(LOCALBIN) @test -s $(LOCALBIN)/kubectl || { curl -sfSLo $(KUBECTL) https://dl.k8s.io/release/$(KUBECTL_VERSION)/bin/$(GOOS)/$(GOARCH)/kubectl && chmod +x $(KUBECTL); } +.PHONY: yq +yq: $(YQ) +$(YQ): + @test -s $(YQ) || { curl -sfSLo $(YQ) https://github.com/mikefarah/yq/releases/download/v4.44.3/yq_linux_arm64 && chmod +x $(YQ); } + .PHONY: kuttl kuttl: $(KUTTL) ## Download kuttl locally if necessary. $(KUTTL): $(LOCALBIN) diff --git a/cluster-autoscaler/Dockerfile b/cluster-autoscaler/Dockerfile index d868e62e9..e14c39e54 100644 --- a/cluster-autoscaler/Dockerfile +++ b/cluster-autoscaler/Dockerfile @@ -28,7 +28,15 @@ RUN cd autoscaler/cluster-autoscaler \ # This is adapted from CA's Dockerfile.amd64, here: # https://github.com/kubernetes/autoscaler/blob/cluster-autoscaler-1.24.1/cluster-autoscaler/Dockerfile.amd64 -FROM gcr.io/distroless/static:nonroot-amd64 + +# NB: two build stages, one for each architecture, because I wasn't able to use variable substitution in FROM statements +FROM gcr.io/distroless/static:nonroot-amd64 as cluster_autoscaler_amd64 + +WORKDIR / +COPY --from=builder /workspace/cluster-autoscaler . +CMD ["/cluster-autoscaler"] + +FROM gcr.io/distroless/static:nonroot-arm64 as cluster_autoscaler_arm64 WORKDIR / COPY --from=builder /workspace/cluster-autoscaler . diff --git a/tests/e2e/vm-neonvmd/00-create-vm.yaml b/tests/e2e/vm-neonvmd/00-create-vm.yaml index 2bb3c345d..28ab41499 100644 --- a/tests/e2e/vm-neonvmd/00-create-vm.yaml +++ b/tests/e2e/vm-neonvmd/00-create-vm.yaml @@ -7,6 +7,7 @@ kind: VirtualMachine metadata: name: example spec: + cpuScalingMode: sysfsScaling schedulerName: autoscale-scheduler enableSSH: true guest: