From 2628e066c68511e3fd4eb7869467ed0c27c439ff Mon Sep 17 00:00:00 2001
From: Misha Sakhnov <misha@neon.tech>
Date: Tue, 22 Oct 2024 18:36:48 +0200
Subject: [PATCH] ci: build images for arm and amd

Signed-off-by: Misha Sakhnov <misha@neon.tech>
---
 .github/workflows/build-images.yaml    | 148 +++++++++++++++----------
 .github/workflows/build-test-vm.yaml   |  61 ++++++----
 .github/workflows/check-ca-builds.yaml |   9 +-
 .github/workflows/e2e-test.yaml        |  43 ++++---
 .github/workflows/vm-kernel.yaml       |  28 ++++-
 Makefile                               |  24 +++-
 cluster-autoscaler/Dockerfile          |  10 +-
 tests/e2e/vm-neonvmd/00-create-vm.yaml |   1 +
 8 files changed, 212 insertions(+), 112 deletions(-)

diff --git a/.github/workflows/build-images.yaml b/.github/workflows/build-images.yaml
index 3784c651e..f5a70cd7f 100644
--- a/.github/workflows/build-images.yaml
+++ b/.github/workflows/build-images.yaml
@@ -31,21 +31,38 @@ on:
         type: boolean
         required: false
     outputs:
-      controller:
+
+      controller-amd64:
+        description: 'neonvm-controller image'
+        value: ${{ jobs.build.outputs.controller-amd64 }}
+      vxlan-controller-amd64:
+        description: 'neonvm-vxlan-controller image'
+        value: ${{ jobs.build.outputs.vxlan-controller-amd64 }}
+      runner-amd64:
+        description: 'neonvm-runner image'
+        value: ${{ jobs.build.outputs.runner-amd64 }}
+      scheduler-amd64:
+        description: 'autoscale-scheduler image'
+        value: ${{ jobs.build.outputs.scheduler-amd64 }}
+      autoscaler-agent-amd64:
+        description: 'autoscaler-agent image'
+        value: ${{ jobs.build.outputs.autoscaler-agent-amd64 }}
+
+      controller-arm64:
         description: 'neonvm-controller image'
-        value: ${{ jobs.tags.outputs.controller }}
-      vxlan-controller:
+        value: ${{ jobs.build.outputs.controller-arm64 }}
+      vxlan-controller-arm64:
         description: 'neonvm-vxlan-controller image'
-        value: ${{ jobs.tags.outputs.vxlan-controller }}
-      runner:
+        value: ${{ jobs.build.outputs.vxlan-controller-arm64 }}
+      runner-arm64:
         description: 'neonvm-runner image'
-        value: ${{ jobs.tags.outputs.runner }}
-      scheduler:
+        value: ${{ jobs.build.outputs.runner-arm64 }}
+      scheduler-arm64:
         description: 'autoscale-scheduler image'
-        value: ${{ jobs.tags.outputs.scheduler }}
-      autoscaler-agent:
+        value: ${{ jobs.build.outputs.scheduler-arm64 }}
+      autoscaler-agent-arm64:
         description: 'autoscaler-agent image'
-        value: ${{ jobs.tags.outputs.autoscaler-agent }}
+        value: ${{ jobs.build.outputs.autoscaler-agent-arm64 }}        
 
 env:
   IMG_CONTROLLER:         "neondatabase/neonvm-controller"
@@ -58,39 +75,11 @@ env:
   ECR_DEV:                "369495373322.dkr.ecr.eu-central-1.amazonaws.com"
   ECR_PROD:               "093970136003.dkr.ecr.eu-central-1.amazonaws.com"
 
-  # Why localhost? We use a local registry so that when docker/build-push-action tries to pull the
-  # image we built locally, it'll actually have a place to pull from.
-  #
-  # Otherwise, if we just try to use a local image, it fails trying to pull it from dockerhub.
-  # See https://github.com/moby/buildkit/issues/2343 for more information.
-  GO_BASE_IMG: "localhost:5000/neondatabase/autoscaling-go-base:dev"
-  # Default architecture to build. In future it would be changed to multi-arch build or separate builds for each arch
-  TARGET_ARCH: "amd64"
-
 defaults:
   run:
     shell: bash -euo pipefail {0}
 
 jobs:
-  tags:
-    outputs:
-      controller:         ${{ steps.show-tags.outputs.controller }}
-      vxlan-controller:   ${{ steps.show-tags.outputs.vxlan-controller }}
-      runner:             ${{ steps.show-tags.outputs.runner }}
-      scheduler:          ${{ steps.show-tags.outputs.scheduler }}
-      autoscaler-agent:   ${{ steps.show-tags.outputs.autoscaler-agent }}
-      cluster-autoscaler: ${{ steps.show-tags.outputs.cluster-autoscaler }}
-    runs-on: ubuntu-latest
-    steps:
-      - id: show-tags
-        run: |
-          echo "controller=${{ env.IMG_CONTROLLER }}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT
-          echo "vxlan-controller=${{ env.IMG_VXLAN_CONTROLLER }}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT
-          echo "runner=${{ env.IMG_RUNNER }}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT
-          echo "scheduler=${{ env.IMG_SCHEDULER }}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT
-          echo "autoscaler-agent=${{ env.IMG_AUTOSCALER_AGENT }}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT
-          echo "cluster-autoscaler=${{ env.IMG_CLUSTER_AUTOSCALER }}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT
-
   vm-kernel:
     # nb: use format(..) to catch both inputs.skip = true AND inputs.skip = 'true'.
     if: ${{ format('{0}', inputs.skip) != 'true' }}
@@ -98,17 +87,45 @@ jobs:
     with:
       tag: ${{ inputs.kernel-image || inputs.tag }}
       return-image-for-tag: ${{ inputs.kernel-image }}
+      arch: ${{ matrix.arch }}
+    strategy:
+      matrix:
+        arch: [ 'amd64', 'arm64' ]  
     secrets: inherit
 
   build:
     # nb: use format(..) to catch both inputs.skip = true AND inputs.skip = 'true'.
     if: ${{ format('{0}', inputs.skip) != 'true' }}
-    needs: [ tags, vm-kernel ]
-    runs-on: [ self-hosted, large ]
+    needs: [ vm-kernel  ]
+    outputs:
+      controller-amd64:         ${{ steps.tags.outputs.controller-amd64 }}
+      vxlan-controller-amd64:   ${{ steps.tags.outputs.vxlan-controller-amd64 }}
+      runner-amd64:             ${{ steps.tags.outputs.runner-amd64 }}
+      scheduler-amd64:          ${{ steps.tags.outputs.scheduler-amd64 }}
+      autoscaler-agent-amd64:   ${{ steps.tags.outputs.autoscaler-agent-amd64 }}
+      cluster-autoscaler-amd64: ${{ steps.tags.outputs.cluster-autoscaler-amd64 }}  
+      controller-arm64:         ${{ steps.tags.outputs.controller-arm64 }}
+      vxlan-controller-arm64:   ${{ steps.tags.outputs.vxlan-controller-arm64 }}
+      runner-arm64:             ${{ steps.tags.outputs.runner-arm64 }}
+      scheduler-arm64:          ${{ steps.tags.outputs.scheduler-arm64 }}
+      autoscaler-agent-arm64:   ${{ steps.tags.outputs.autoscaler-agent-arm64 }}
+      cluster-autoscaler-arm64: ${{ steps.tags.outputs.cluster-autoscaler-arm64 }}    
+    env:
+      # Why localhost? We use a local registry so that when docker/build-push-action tries to pull the
+      # image we built locally, it'll actually have a place to pull from.
+      #
+      # Otherwise, if we just try to use a local image, it fails trying to pull it from dockerhub.
+      # See https://github.com/moby/buildkit/issues/2343 for more information. 
+      GO_BASE_IMG: ${{ format('localhost:5000/neondatabase/autoscaling-go-base-{0}:dev', matrix.arch) }}
     permissions:
       contents: read  # This is required for actions/checkout
       id-token: write # This is required for aws-actions/configure-aws-credentials
 
+    strategy:
+      matrix:
+        arch: [ arm64 ]
+    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
+
     services:
       registry:
         image: registry:2
@@ -116,6 +133,16 @@ jobs:
           - 5000:5000
 
     steps:
+      # tags converted to be a step and moved here to be in the same strategy context
+      - id: tags
+        run: |
+          echo "controller-${{matrix.arch}}=${{ env.IMG_CONTROLLER }}-${{matrix.arch}}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT
+          echo "vxlan-controller-${{matrix.arch}}=${{ env.IMG_VXLAN_CONTROLLER }}-${{matrix.arch}}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT
+          echo "runner-${{matrix.arch}}=${{ env.IMG_RUNNER }}-${{matrix.arch}}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT
+          echo "scheduler-${{matrix.arch}}=${{ env.IMG_SCHEDULER }}-${{matrix.arch}}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT
+          echo "autoscaler-agent-${{matrix.arch}}=${{ env.IMG_AUTOSCALER_AGENT }}-${{matrix.arch}}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT
+          echo "cluster-autoscaler-${{matrix.arch}}=${{ env.IMG_CLUSTER_AUTOSCALER }}-${{matrix.arch}}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT
+
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0 # fetch all, so that we also include tags
@@ -164,7 +191,6 @@ jobs:
           registry: cache.neon.build
           username: ${{ secrets.NEON_CI_DOCKERCACHE_USERNAME }}
           password: ${{ secrets.NEON_CI_DOCKERCACHE_PASSWORD }}
-
       - name: Configure dev AWS credentials
         if: ${{ format('{0}', inputs.upload-to-ecr) == 'true' }}
         uses: aws-actions/configure-aws-credentials@v4
@@ -200,7 +226,8 @@ jobs:
 
       - name: Load VM kernel
         env:
-          IMAGE: ${{ needs.vm-kernel.outputs.image }}
+          IMAGE: ${{format(needs.vm-kernel.outputs.image_placeholder, matrix.arch)}}
+          # IMAGE:  ${{format('neondatabase/vm-kernel-{0}:3372e65.11573472293', matrix.arch)}}
         run: |
           docker pull --quiet $IMAGE
           ID=$(docker create $IMAGE true)
@@ -212,23 +239,23 @@ jobs:
         id: build-go-dependencies-image
         with:
           context: .
-          platforms: linux/amd64
+          
           push: true
           file: Dockerfile.go-base
           cache-from: type=registry,ref=cache.neon.build/autoscaling-go-base:cache
           cache-to: ${{ github.ref_name == 'main' && 'type=registry,ref=cache.neon.build/autoscaling-go-base:cache,mode=max' || '' }}
           tags: ${{ env.GO_BASE_IMG }}
-
+      
       - name: Build and push neonvm-runner image
         uses: docker/build-push-action@v6
         with:
           context: .
-          platforms: linux/amd64
           push: true
+          platforms: linux/${{ matrix.arch }}
           file: neonvm-runner/Dockerfile
           cache-from: type=registry,ref=cache.neon.build/neonvm-runner:cache
           cache-to: ${{ github.ref_name == 'main' && 'type=registry,ref=cache.neon.build/neonvm-runner:cache,mode=max' || '' }}
-          tags: ${{ needs.tags.outputs.runner }}
+          tags: ${{ steps.tags.outputs[format('runner-{0}', matrix.arch)] }}
           build-args: |
             GO_BASE_IMG=${{ env.GO_BASE_IMG }}
 
@@ -247,41 +274,41 @@ jobs:
         uses: docker/build-push-action@v6
         with:
           context: .
-          platforms: linux/amd64
+          platforms: linux/${{ matrix.arch }}
           push: true
           file: neonvm-controller/Dockerfile
           cache-from: type=registry,ref=cache.neon.build/neonvm-controller:cache
           cache-to: ${{ github.ref_name == 'main' && 'type=registry,ref=cache.neon.build/neonvm-controller:cache,mode=max' || '' }}
-          tags: ${{ needs.tags.outputs.controller }}
+          tags: ${{ steps.tags.outputs[format('controller-{0}', matrix.arch)] }}
           build-args: |
             GO_BASE_IMG=${{ env.GO_BASE_IMG }}
-            VM_RUNNER_IMAGE=${{ needs.tags.outputs.runner }}
+            VM_RUNNER_IMAGE=${{ steps.tags.outputs.runner }}
             BUILDTAGS=${{ steps.controller-build-tags.outputs.buildtags }}
 
       - name: Build and push neonvm-vxlan-controller image
         uses: docker/build-push-action@v6
         with:
           context: .
-          platforms: linux/amd64
+          platforms: linux/${{ matrix.arch }}
           push: true
           file: neonvm-vxlan-controller/Dockerfile
           cache-from: type=registry,ref=cache.neon.build/neonvm-vxlan-controller:cache
           cache-to: ${{ github.ref_name == 'main' && 'type=registry,ref=cache.neon.build/neonvm-vxlan-controller:cache,mode=max' || '' }}
-          tags: ${{ needs.tags.outputs.vxlan-controller }}
+          tags: ${{ steps.tags.outputs[format('vxlan-controller-{0}', matrix.arch)] }}
           build-args: |
             GO_BASE_IMG=${{ env.GO_BASE_IMG }}
-            TARGET_ARCH=${{ env.TARGET_ARCH }}
+            TARGET_ARCH=${{ matrix.arch }}
 
       - name: Build and push autoscale-scheduler image
         uses: docker/build-push-action@v6
         with:
           context: .
-          platforms: linux/amd64
+          platforms: linux/${{ matrix.arch }} 
           push: true
           file: autoscale-scheduler/Dockerfile
           cache-from: type=registry,ref=cache.neon.build/autoscale-scheduler:cache
           cache-to: ${{ github.ref_name == 'main' && 'type=registry,ref=cache.neon.build/autoscale-scheduler:cache,mode=max' || '' }}
-          tags: ${{ needs.tags.outputs.scheduler }}
+          tags: ${{ steps.tags.outputs[format('scheduler-{0}', matrix.arch)]}}
           build-args: |
             GO_BASE_IMG=${{ env.GO_BASE_IMG }}
             GIT_INFO=${{ steps.get-git-info.outputs.info }}:${{ inputs.tag }}
@@ -290,29 +317,30 @@ jobs:
         uses: docker/build-push-action@v6
         with:
           context: .
-          platforms: linux/amd64
+          platforms: linux/${{ matrix.arch }}
           push: true
           file: autoscaler-agent/Dockerfile
           cache-from: type=registry,ref=cache.neon.build/autoscaler-agent:cache
           cache-to: ${{ github.ref_name == 'main' && 'type=registry,ref=cache.neon.build/autoscaler-agent:cache,mode=max' || '' }}
-          tags: ${{ needs.tags.outputs.autoscaler-agent }}
+          tags: ${{ steps.tags.outputs[format('autoscaler-agent-{0}', matrix.arch)]}}
           build-args: |
             GO_BASE_IMG=${{ env.GO_BASE_IMG }}
             GIT_INFO=${{ steps.get-git-info.outputs.info }}
-
+      
       - name: Build and push cluster-autoscaler image
         uses: docker/build-push-action@v6
         if: ${{ format('{0}', inputs.build-cluster-autoscaler) == 'true' }}
         with:
           context: cluster-autoscaler
-          platforms: linux/amd64
+          platforms: linux/${{ matrix.arch }}
           push: true
-          tags: ${{ needs.tags.outputs.cluster-autoscaler }}
+          target: ${{format('cluster_autoscaler_{0}', matrix.arch)}}
+          tags: ${{ steps.tags.outputs[format('cluster-autoscaler-{0}', matrix.arch)]}}
           cache-from: type=registry,ref=cache.neon.build/cluster-autoscaler-neonvm:cache
           cache-to: ${{ github.ref_name == 'main' && 'type=registry,ref=cache.neon.build/cluster-autoscaler-neonvm:cache,mode=max' || '' }}
           build-args: |
             CA_GIT_TAG=${{ steps.get-ca-tag.outputs.tag }}
-
+      
       - name: Copy all images to ECR
         if: ${{ format('{0}', inputs.upload-to-ecr) == 'true' }}
         run: |
diff --git a/.github/workflows/build-test-vm.yaml b/.github/workflows/build-test-vm.yaml
index 265b807bc..dd1d94ca8 100644
--- a/.github/workflows/build-test-vm.yaml
+++ b/.github/workflows/build-test-vm.yaml
@@ -23,38 +23,42 @@ on:
         required: false
         default: false
     outputs:
-      vm-postgres-16-bullseye:
+      vm-postgres-16-bullseye-amd64:
         description: 'image name for postgres:16-bullseye, VM-ified'
-        value: ${{ jobs.tags.outputs.vm-postgres-16-bullseye }}
-
+        value: ${{ jobs.build.outputs.vm-postgres-16-bullseye-amd64 }}
+      vm-postgres-16-bullseye-arm64:
+        description: 'image name for postgres:16-bullseye, VM-ified'
+        value: ${{ jobs.build.outputs.vm-postgres-16-bullseye-arm64 }}
+  
 env:
   IMG_POSTGRES_16_BULLSEYE: "neondatabase/vm-postgres-16-bullseye"
   # using image built in the same workflow
-  IMG_DAEMON: "daemon:dev"
-  TARGET_ARCH: "amd64"
+  IMG_DAEMON: "daemon"
 
 defaults:
   run:
     shell: bash -euo pipefail {0}
 
 jobs:
-  tags:
-    outputs:
-      vm-postgres-16-bullseye: ${{ steps.show-tags.outputs.vm-postgres-16-bullseye }}
-      daemon: ${{ steps.show-tags.outputs.daemon }}
-    runs-on: ubuntu-latest
-    steps:
-      - id: show-tags
-        run: |
-          echo "vm-postgres-16-bullseye=${{ env.IMG_POSTGRES_16_BULLSEYE }}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT
-          echo "daemon=${{ env.IMG_DAEMON }}" | tee -a $GITHUB_OUTPUT
-
+  
   build:
     # nb: use format(..) to catch both inputs.skip = true AND inputs.skip = 'true'.
     if: ${{ format('{0}', inputs.skip) != 'true' }}
-    needs: tags
-    runs-on: [ self-hosted, gen3, large ]
+    outputs:
+      vm-postgres-16-bullseye-arm64: ${{ steps.tags.outputs.vm-postgres-16-bullseye-arm64 }}
+      vm-postgres-16-bullseye-amd64: ${{ steps.tags.outputs.vm-postgres-16-bullseye-amd64 }}
+      daemon: ${{ steps.tags.outputs.daemon }}    
+    strategy:
+      matrix:
+        arch: [ amd64, arm64 ]  
+    # TODO: do we need gen3 runners? to clarify with dev exp team    
+    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
     steps:
+      # tags converted to be a step and moved here to be in the same strategy contextt
+      - id: tags
+        run: |
+          echo "vm-postgres-16-bullseye-${{matrix.arch}}=${{ env.IMG_POSTGRES_16_BULLSEYE }}-${{matrix.arch}}:${{ inputs.tag }}" | tee -a $GITHUB_OUTPUT
+          echo "daemon=${{ env.IMG_DAEMON }}-${{matrix.arch}}:dev" | tee -a $GITHUB_OUTPUT      
       - uses: actions/checkout@v4
       - uses: actions/setup-go@v5
         with:
@@ -63,7 +67,16 @@ jobs:
           cache: false
         # Sometimes setup-go gets stuck. Without this, it'll keep going until the job gets killed
         timeout-minutes: 10
-      - run: make docker-build-daemon
+      - name: Build daemon image
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          push: false
+          platforms: linux/${{ matrix.arch }}
+          file: neonvm-daemon/Dockerfile
+          cache-from: type=registry,ref=cache.neon.build/neonvm-daemon:cache
+          cache-to: ${{ github.ref_name == 'main' && 'type=registry,ref=cache.neon.build/neonvm-daemon:cache,mode=max' || '' }}
+          tags: ${{ steps.tags.outputs.daemon }}
       - run: make bin/vm-builder
 
       - name: upload vm-builder
@@ -71,7 +84,7 @@ jobs:
         uses: actions/upload-artifact@v4
         with:
           name: vm-builder
-          path: bin/vm-builder
+          path: ${{format('bin/vm-builder-{0}', matrix.arch)}}
           if-no-files-found: error
           retention-days: 2
 
@@ -84,9 +97,9 @@ jobs:
           username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
           password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
 
-      - name: build ${{ needs.tags.outputs.vm-postgres-16-bullseye }}
+      - name: build ${{ steps.tags.outputs[format('vm-postgres-16-bullseye-{0}', matrix.arch)] }}
         run: |
-          ./bin/vm-builder -src postgres:16-bullseye -spec tests/e2e/image-spec.yaml -dst ${{ needs.tags.outputs.vm-postgres-16-bullseye }} -daemon-image ${{ needs.tags.outputs.daemon }} -target-arch linux/${TARGET_ARCH}
-      - name: docker push ${{ needs.tags.outputs.vm-postgres-16-bullseye }}
+          ./bin/vm-builder -src postgres:16-bullseye -spec tests/e2e/image-spec.yaml -dst ${{ steps.tags.outputs[format('vm-postgres-16-bullseye-{0}', matrix.arch)] }} -daemon-image ${{ steps.tags.outputs.daemon }} -target-arch linux/${{ matrix.arch }}
+      - name: docker push ${{ steps.tags.outputs[format('vm-postgres-16-bullseye-{0}', matrix.arch)] }}
         run: |
-          docker push ${{ needs.tags.outputs.vm-postgres-16-bullseye }}
+          docker push ${{ steps.tags.outputs[format('vm-postgres-16-bullseye-{0}', matrix.arch)] }}
diff --git a/.github/workflows/check-ca-builds.yaml b/.github/workflows/check-ca-builds.yaml
index 895db56db..6bcfe98b8 100644
--- a/.github/workflows/check-ca-builds.yaml
+++ b/.github/workflows/check-ca-builds.yaml
@@ -17,7 +17,11 @@ on:
 
 jobs:
   build-ca:
-    runs-on: [ self-hosted, gen3, small ]
+    strategy:
+      fail-fast: false
+      matrix:
+        arch: [ amd64, arm64 ]
+    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
     steps:
       - uses: actions/checkout@v4
 
@@ -41,8 +45,9 @@ jobs:
         uses: docker/build-push-action@v6
         with:
           context: cluster-autoscaler
-          platforms: linux/amd64
+          platforms: ${{format('linux/{0}', matrix.arch)}}
           push: false
+          target: ${{format('cluster_autoscaler_{0}', matrix.arch)}}
           file: cluster-autoscaler/Dockerfile
           cache-from: type=registry,ref=cache.neon.build/cluster-autoscaler-neonvm:cache
           build-args: |
diff --git a/.github/workflows/e2e-test.yaml b/.github/workflows/e2e-test.yaml
index 2d2aaf390..7555b1b95 100644
--- a/.github/workflows/e2e-test.yaml
+++ b/.github/workflows/e2e-test.yaml
@@ -50,7 +50,7 @@ jobs:
           test -n "$SHA"
           sha="${SHA::7}"
           echo "tag=$sha.$GITHUB_RUN_ID" | tee -a $GITHUB_OUTPUT
-
+  
   build-images:
     needs: get-tag
     uses: ./.github/workflows/build-images.yaml
@@ -77,9 +77,12 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        cluster:
-          - ${{ inputs.cluster || 'k3d' }}
-    runs-on: [ self-hosted, gen3, large ]
+        cluster: [ k3d ]
+        # run tests on amd64 only, since scope of the PR is to build images, there is separate issue for e2e tests
+        arch: [ amd64 ]
+    
+    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
+
     steps:
       - uses: actions/checkout@v4
         with:
@@ -109,18 +112,18 @@ jobs:
 
       - run: make render-release
         env:
-          IMG_CONTROLLER:       ${{ needs.build-images.outputs.controller }}
-          IMG_VXLAN_CONTROLLER: ${{ needs.build-images.outputs.vxlan-controller }}
-          IMG_RUNNER:           ${{ needs.build-images.outputs.runner }}
-          IMG_SCHEDULER:        ${{ needs.build-images.outputs.scheduler }}
-          IMG_AUTOSCALER_AGENT: ${{ needs.build-images.outputs.autoscaler-agent }}
+          IMG_CONTROLLER:       ${{ needs.build-images.outputs[format('controller-{0}', matrix.arch)] }}
+          IMG_VXLAN_CONTROLLER: ${{ needs.build-images.outputs[format('vxlan-controller-{0}', matrix.arch)] }}
+          IMG_RUNNER:           ${{ needs.build-images.outputs[format('runner-{0}', matrix.arch)] }}
+          IMG_SCHEDULER:        ${{ needs.build-images.outputs[format('scheduler-{0}', matrix.arch)] }}
+          IMG_AUTOSCALER_AGENT: ${{ needs.build-images.outputs[format('autoscaler-agent-{0}', matrix.arch)] }}
 
       - name: upload manifests
         # nb: use format(..) to catch both inputs.push-yamls = true AND inputs.push-yamls = 'true'.
-        if: ${{ format('{0}', inputs.push-yamls) == 'true' }}
+        if: ${{ format('{0}', inputs.push-yamls) == 'true'}}
         uses: actions/upload-artifact@v4
         with:
-          name: rendered_manifests
+          name: ${{ format('rendered_manifests-{0}', matrix.arch) }}
           # nb: prefix before wildcard is removed from the uploaded files, so the artifact should
           # contain e.g.
           #   - autoscale-scheduler.yaml
@@ -167,9 +170,9 @@ jobs:
         run: |
           rendered () { echo "rendered_manifests/$1"; }
 
-          kubectl apply -f $(rendered multus-amd64.yaml)
+          kubectl apply -f $(rendered multus-${{ matrix.arch}}.yaml)
           kubectl -n kube-system rollout status daemonset kube-multus-ds
-          kubectl apply -f $(rendered whereabouts-amd64.yaml)
+          kubectl apply -f $(rendered whereabouts-${{matrix.arch}}.yaml)
           kubectl -n kube-system rollout status daemonset whereabouts
           kubectl apply -f $(rendered neonvm-runner-image-loader.yaml)
           kubectl -n neonvm-system rollout status daemonset neonvm-runner-image-loader
@@ -186,7 +189,7 @@ jobs:
 
       - name: load e2e test vm image
         env:
-          TEST_IMAGE: ${{ needs.build-test-vm.outputs.vm-postgres-16-bullseye }}
+          TEST_IMAGE: ${{ needs.build-test-vm.outputs[format('vm-postgres-16-bullseye-{0}', matrix.arch)] }}
         timeout-minutes: 2
         run: |
           # Pull the docker image so we can re-tag it, because using a consistent tag inside the
@@ -194,10 +197,18 @@ jobs:
           docker pull "$TEST_IMAGE"
           docker image tag "$TEST_IMAGE" "$IMG_E2E_TEST"
           make load-example-vms
-
+      
+      - name: Patch e2e tests to not use acceleration in arm runners
+        if: matrix.arch == 'arm64'
+        run: make arm_patch_e2e
+      
       - run: make e2e
         timeout-minutes: 15
-
+      
+      - name: Setup tmate session
+        if: always()
+        uses: mxschmitt/action-tmate@v3
+        
       - name: Get k8s logs and events
         if: always()
         run: |
diff --git a/.github/workflows/vm-kernel.yaml b/.github/workflows/vm-kernel.yaml
index 67d780fdb..ecbb96d91 100644
--- a/.github/workflows/vm-kernel.yaml
+++ b/.github/workflows/vm-kernel.yaml
@@ -28,13 +28,22 @@ on:
         type: boolean
         required: false
         default: false
+      arch: 
+        description: 'Architecture to build the kernel for'
+        type: string
+        required: false
+        default: 'amd64'
     outputs:
       image:
         description: 'vm-kernel Docker image'
         value: ${{ jobs.setup-build-vm-kernel-image.outputs.image || jobs.build-vm-kernel-image.outputs.image }}
+      image_placeholder:
+        description: 'vm-kernel Docker image placeholder'
+        value: ${{ jobs.build-vm-kernel-image.outputs.image }}
 
 env:
-  VM_KERNEL_IMAGE: "neondatabase/vm-kernel"
+  VM_KERNEL_IMAGE: ${{format('neondatabase/vm-kernel-{0}', inputs.arch)}}
+  VM_KERNEL_IMAGE_PLACEHOLDER: 'neondatabase/vm-kernel-{0}'
 
 defaults:
   run:
@@ -130,7 +139,9 @@ jobs:
     needs: setup-build-vm-kernel-image
     if: needs.setup-build-vm-kernel-image.outputs.image == ''
     outputs:
+      # image for backward compatibility
       image: ${{ steps.get-tags.outputs.canonical }}@${{ steps.build-linux-kernel.outputs.digest }}
+      placeholder: ${{ steps.get-tags.outputs.canonical_placeholder }}@${{ steps.build-linux-kernel.outputs.digest }}
 
     runs-on: [ self-hosted, gen3, large ]
     steps:
@@ -162,7 +173,7 @@ jobs:
 
           echo VM_KERNEL_VERSION=$kernel_version >> $GITHUB_OUTPUT
 
-      - name: get docker tags
+      - name: get docker tags 
         id: get-tags
         env:
           KERNEL_VERSION_TAG: ${{ inputs.tag || steps.get-kernel-version.outputs.VM_KERNEL_VERSION }}
@@ -178,6 +189,8 @@ jobs:
           # `docker/build-push-action@v6` returns all ${TAGS} in metadata ("image.name" field), so it can't be used a image name right away.
           # Choose one of them as a "canonical" tag and use it to construct the job output (along with a digest provided by `docker/build-push-action@v6`).
           echo "canonical=${VM_KERNEL_IMAGE}:${GITHUB_RUN_ID}" >> $GITHUB_OUTPUT
+          echo "canonical_placeholder=${VM_KERNEL_IMAGE_PLACEHOLDER}:${GITHUB_RUN_ID}" >> $GITHUB_OUTPUT
+
 
       - name: build linux kernel
         id: build-linux-kernel
@@ -185,14 +198,17 @@ jobs:
         with:
           build-args: KERNEL_VERSION=${{ steps.get-kernel-version.outputs.VM_KERNEL_VERSION }}
           context: neonvm-kernel
-          platforms: linux/amd64
+          platforms: ${{format('linux/{0}', inputs.arch)}}
           # neonvm-kernel/Dockerfile.kernel-builder has different targets for different architectures
           # so we need to specify the target explicitly
-          target: kernel_amd64
+          target: ${{format('kernel_{0}', inputs.arch)}}
           # Push kernel image only for scheduled builds or if workflow_dispatch/workflow_call input is true
           push: true
           pull: true
           file: neonvm-kernel/Dockerfile.kernel-builder
-          cache-from: type=registry,ref=cache.neon.build/vm-kernel:cache
-          cache-to: ${{ github.ref_name == 'main' && 'type=registry,ref=cache.neon.build/vm-kernel:cache,mode=max' || '' }}
+          cache-from: ${{format('type=registry,ref=cache.neon.build/vm-kernel:{0}cache', inputs.arch)}}
+          cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/vm-kernel:{0}cache,mode=max', inputs.arch) || '' }}
           tags: ${{ steps.get-tags.outputs.tags }}
+
+
+      - run: git diff neonvm-kernel
diff --git a/Makefile b/Makefile
index 98ec4d6b4..ce55571ff 100644
--- a/Makefile
+++ b/Makefile
@@ -344,12 +344,10 @@ render-release: $(RENDERED) kustomize
 	cd autoscaler-agent && $(KUSTOMIZE) edit set image autoscaler-agent=$(IMG_AUTOSCALER_AGENT)
 	# Build:
 	$(KUSTOMIZE) build neonvm/config/whereabouts-amd64 > $(RENDERED)/whereabouts-amd64.yaml
-	# TODO: I ain't sure if we need arm64 for render-release target
-	# $(KUSTOMIZE) build neonvm/config/whereabouts-arm64 > $(RENDERED)/whereabouts-arm64.yaml
+	$(KUSTOMIZE) build neonvm/config/whereabouts-arm64 > $(RENDERED)/whereabouts-arm64.yaml
 	$(KUSTOMIZE) build neonvm/config/multus-aks > $(RENDERED)/multus-aks.yaml
 	$(KUSTOMIZE) build neonvm/config/multus-eks > $(RENDERED)/multus-eks.yaml
 	$(KUSTOMIZE) build neonvm/config/multus-amd64 > $(RENDERED)/multus-amd64.yaml
-	# TODO: I ain't sure if we need arm64 for render-release target
 	$(KUSTOMIZE) build neonvm/config/multus-arm64 > $(RENDERED)/multus-arm64.yaml
 	$(KUSTOMIZE) build neonvm/config > $(RENDERED)/neonvm.yaml
 	$(KUSTOMIZE) build neonvm-controller > $(RENDERED)/neonvm-controller.yaml
@@ -439,6 +437,16 @@ e2e: check-local-context e2e-tools ## Run e2e kuttl tests
 	$(KUTTL) test --config tests/e2e/kuttl-test.yaml $(if $(CI),--skip-delete)
 	rm -f kubeconfig
 
+# arm doesn't support cpu hot plug and memory hot plug and CI runners are based on qemu so no kvm acceleration as well
+arm_patch_e2e: yq
+	@find tests/e2e -name "*.yaml" | xargs -I{} ./bin/yq eval '(select(.kind == "VirtualMachine") | .spec.cpuScalingMode = "sysfsScaling") // .' -i {}
+	@find tests/e2e -name "*.yaml" | xargs -I{} ./bin/yq eval '(select(.kind == "VirtualMachine") | .spec.enableAcceleration = false) // .' -i {}
+	@find tests/e2e -name "*.yaml" | xargs -I{} ./bin/yq eval '(select(.kind == "VirtualMachine") | .status.memoryProvider = "VirtioMem") // .' -i {}
+	@find tests/e2e -name "*.yaml" | xargs -I{} ./bin/yq eval '(select(.kind == "VirtualMachine") | .spec.guest.memoryProvider = "VirtioMem") // .' -i {}
+
+revert_path_e2e:
+	git checkout tests/e2e
+
 ##@ Local kind cluster
 
 .PHONY: kind-setup
@@ -507,7 +515,12 @@ else ifeq ($(GOARCH), amd64)
 else
     $(error Unsupported architecture: $(GOARCH))
 endif
+
+YQ_ARCH ?= $(GOARCH)
+YQ ?= $(LOCALBIN)/yq
+
 KUBECTL ?= $(LOCALBIN)/kubectl
+
 KUBECTL_VERSION ?= v1.28.12
 
 KIND ?= $(LOCALBIN)/kind
@@ -545,6 +558,11 @@ kubectl: $(KUBECTL) ## Download kubectl locally if necessary.
 $(KUBECTL): $(LOCALBIN)
 	@test -s $(LOCALBIN)/kubectl || { curl -sfSLo $(KUBECTL) https://dl.k8s.io/release/$(KUBECTL_VERSION)/bin/$(GOOS)/$(GOARCH)/kubectl && chmod +x $(KUBECTL); }
 
+.PHONY: yq
+yq: $(YQ)
+$(YQ):
+	@test -s $(YQ)  || { curl -sfSLo $(YQ) https://github.com/mikefarah/yq/releases/download/v4.44.3/yq_linux_arm64 && chmod +x $(YQ); }
+
 .PHONY: kuttl
 kuttl: $(KUTTL) ## Download kuttl locally if necessary.
 $(KUTTL): $(LOCALBIN)
diff --git a/cluster-autoscaler/Dockerfile b/cluster-autoscaler/Dockerfile
index d868e62e9..e14c39e54 100644
--- a/cluster-autoscaler/Dockerfile
+++ b/cluster-autoscaler/Dockerfile
@@ -28,7 +28,15 @@ RUN cd autoscaler/cluster-autoscaler \
 # This is adapted from CA's Dockerfile.amd64, here:
 # https://github.com/kubernetes/autoscaler/blob/cluster-autoscaler-1.24.1/cluster-autoscaler/Dockerfile.amd64
 
-FROM gcr.io/distroless/static:nonroot-amd64
+
+# NB: two build stages, one for each architecture, because I wasn't able to use variable substitution in FROM statements
+FROM gcr.io/distroless/static:nonroot-amd64 as cluster_autoscaler_amd64
+
+WORKDIR /
+COPY --from=builder /workspace/cluster-autoscaler .
+CMD ["/cluster-autoscaler"]
+
+FROM gcr.io/distroless/static:nonroot-arm64 as cluster_autoscaler_arm64
 
 WORKDIR /
 COPY --from=builder /workspace/cluster-autoscaler .
diff --git a/tests/e2e/vm-neonvmd/00-create-vm.yaml b/tests/e2e/vm-neonvmd/00-create-vm.yaml
index 2bb3c345d..28ab41499 100644
--- a/tests/e2e/vm-neonvmd/00-create-vm.yaml
+++ b/tests/e2e/vm-neonvmd/00-create-vm.yaml
@@ -7,6 +7,7 @@ kind: VirtualMachine
 metadata:
   name: example
 spec:
+  cpuScalingMode: sysfsScaling
   schedulerName: autoscale-scheduler
   enableSSH: true
   guest: