neondatabase · mikhail-sakhnov · Oct 24, 2024 · Nov 26, 2024 · sharnoff · Nov 13, 2024
diff --git a/neonvm-controller/role.yaml b/neonvm-controller/role.yaml
@@ -11,6 +11,14 @@ rules:
   verbs:
   - create
   - patch
+- apiGroups:
+  - ""
+  resources:
+  - nodes
+  verbs:
+  - get
+  - list
+  - watch
 - apiGroups:
   - ""
   resources:

diff --git a/pkg/neonvm/controllers/vmmigration_controller.go b/pkg/neonvm/controllers/vmmigration_controller.go
@@ -174,6 +174,26 @@ func (r *VirtualMachineMigrationReconciler) Reconcile(ctx context.Context, req c
 		if err := ctrl.SetControllerReference(vm, migration, r.Scheme); err != nil {
 			return ctrl.Result{}, err
 		}
+		// First check if VM has the affinity with CPU architecture
+		// if it doesn't we need to set it explicitly
+		// to prevent migration going to the node with different architecture
+		sourceNode := &corev1.Node{}
+		//nolint:exhaustruct // corev1.Node objects is not namespaced
+		err := r.Get(ctx, types.NamespacedName{Name: vm.Status.Node}, sourceNode)
+		if err != nil {
+			log.Error(err, "Failed to get source node")
+			return ctrl.Result{}, err
+		}
+		if !hasArchitectureAffinity(vm, sourceNode) {
+
+			if err := addArchitectureAffinity(vm, sourceNode); err != nil {
+				log.Error(err, "Failed to add architecture affinity to VM", "sourceNodeStatus", sourceNode.Status)
+			}
+			if err = r.Update(ctx, vm); err != nil {
+				log.Error(err, "Failed to update node affinity of the source vm")
+				return ctrl.Result{RequeueAfter: time.Second}, err
+			}
+		}
 		if err := r.Update(ctx, migration); err != nil {
 			log.Info("Failed to add owner to Migration", "error", err)
 			return ctrl.Result{}, err
@@ -752,3 +772,55 @@ func (r *VirtualMachineMigrationReconciler) targetPodForVirtualMachine(
 
 	return pod, nil
 }
+
+// hasArchitectureAffinity checks if the VM has the affinity with the CPU architecture of the source node
+func hasArchitectureAffinity(vm *vmv1.VirtualMachine, sourceNode *corev1.Node) bool {
+	if vm.Spec.Affinity != nil && vm.Spec.Affinity.NodeAffinity != nil && vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil {
+		for _, term := range vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms {
+			for _, expr := range term.MatchExpressions {
+				if expr.Key == "kubernetes.io/arch" && expr.Operator == corev1.NodeSelectorOpIn && len(expr.Values) > 0 {
+					for _, value := range expr.Values {
+						if value == sourceNode.Status.NodeInfo.Architecture {
+							return true
+						}
+					}
+				}
+			}
+		}
+	}
+	return false
+}
+
+func addArchitectureAffinity(vm *vmv1.VirtualMachine, sourceNode *corev1.Node) error {
+	if sourceNode.Status.NodeInfo.Architecture == "" {
+		return errors.New("source node architecture is empty")
+	}
+	if vm.Spec.Affinity == nil {
+		vm.Spec.Affinity = &corev1.Affinity{}
+	}
+	if vm.Spec.Affinity.NodeAffinity == nil {
+		vm.Spec.Affinity.NodeAffinity = &corev1.NodeAffinity{}
+	}
+	if vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil {
+		vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution = &corev1.NodeSelector{}
+	}
+
+	vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms = append(
+		vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms,
+		corev1.NodeSelectorTerm{
+			MatchExpressions: []corev1.NodeSelectorRequirement{
+				{
+					Key:      "kubernetes.io/arch",
+					Operator: corev1.NodeSelectorOpIn,
+					Values:   []string{sourceNode.Status.NodeInfo.Architecture},
+				},
+				{
+					Key:      "kubernetes.io/os",
+					Operator: "In",
+					Values:   []string{"linux"},
+				},
+			},
+		},
+	)
+	return nil
+}
diff --git a/pkg/neonvm/controllers/vmmigration_controller_test.go b/pkg/neonvm/controllers/vmmigration_controller_test.go
@@ -0,0 +1,142 @@
+package controllers
+
+import (
+	"testing"
+
+	corev1 "k8s.io/api/core/v1"
+
+	vmv1 "github.com/neondatabase/autoscaling/neonvm/apis/neonvm/v1"
+)
+
+func TestHasArchitectureAffinity(t *testing.T) {
+	cases := []struct {
+		name   string
+		result bool
+		vmSpec *vmv1.VirtualMachine
+		node   *corev1.Node
+	}{
+		{
+			name:   "No architecture affinity",
+			result: false,
+			vmSpec: &vmv1.VirtualMachine{},
+			node:   &corev1.Node{},
+		},
+		{
+			name:   "VM spec has affinity",
+			result: true,
+			vmSpec: &vmv1.VirtualMachine{
+				Spec: vmv1.VirtualMachineSpec{
+					Affinity: &corev1.Affinity{
+						NodeAffinity: &corev1.NodeAffinity{
+							RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{
+								NodeSelectorTerms: []corev1.NodeSelectorTerm{
+									{
+										MatchExpressions: []corev1.NodeSelectorRequirement{
+											{
+												Key:      "kubernetes.io/arch",
+												Operator: corev1.NodeSelectorOpIn,
+												Values:   []string{"amd64"},
+											},
+										},
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			node: &corev1.Node{
+				Status: corev1.NodeStatus{
+					NodeInfo: corev1.NodeSystemInfo{
+						Architecture: "amd64",
+					},
+				},
+			},
+		},
+		{
+			name:   "VM affinity and node has different architectures",
+			result: false,
+			vmSpec: &vmv1.VirtualMachine{
+				Spec: vmv1.VirtualMachineSpec{
+					Affinity: &corev1.Affinity{
+						NodeAffinity: &corev1.NodeAffinity{
+							RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{
+								NodeSelectorTerms: []corev1.NodeSelectorTerm{
+									{
+										MatchExpressions: []corev1.NodeSelectorRequirement{
+											{
+												Key:      "kubernetes.io/arch",
+												Operator: corev1.NodeSelectorOpIn,
+												Values:   []string{"arm64"},
+											},
+										},
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			node: &corev1.Node{
+				Status: corev1.NodeStatus{
+					NodeInfo: corev1.NodeSystemInfo{
+						Architecture: "amd64",
+					},
+				},
+			},
+		},
+	}
+
+	for _, testCase := range cases {
+		t.Run(testCase.name, func(t *testing.T) {
+			result := hasArchitectureAffinity(testCase.vmSpec, testCase.node)
+			if result != testCase.result {
+				t.Errorf("Expected %v, got %v", testCase.result, result)
+			}
+		})
+	}
+}
+
+func TestAddArchitectureAffinity(t *testing.T) {
+	vmSpec := &vmv1.VirtualMachine{}
+	sourceNode := &corev1.Node{
+		Status: corev1.NodeStatus{
+			NodeInfo: corev1.NodeSystemInfo{
+				Architecture: "amd64",
+			},
+		},
+	}
+	if hasArchitectureAffinity(vmSpec, sourceNode) {
+		t.Errorf("Expected no architecture affinity")
+	}
+	if err := addArchitectureAffinity(vmSpec, &corev1.Node{
+		Status: corev1.NodeStatus{
+			NodeInfo: corev1.NodeSystemInfo{
+				Architecture: "amd64",
+			},
+		},
+	}); err != nil {
+		t.Errorf("Unexpected error: %v", err)
+	}
+
+	if !hasArchitectureAffinity(vmSpec, sourceNode) {
+		t.Errorf("Expected architecture affinity to be added")
+	}
+}
+
+func TestSourceNodeHasNoArchitecture(t *testing.T) {
+	sourceNode := &corev1.Node{
+		Status: corev1.NodeStatus{
+			NodeInfo: corev1.NodeSystemInfo{
+				Architecture: "",
+			},
+		},
+	}
+
+	vmSpec := &vmv1.VirtualMachine{}
+
+	err := addArchitectureAffinity(vmSpec, sourceNode)
+	if err == nil {
+		t.Errorf("Expected error when source node has no architecture")
+	}
+}
diff --git a/tests/e2e/vm-migration/00-assert.yaml b/tests/e2e/vm-migration/00-assert.yaml
@@ -1,6 +1,15 @@
 apiVersion: kuttl.dev/v1beta1
 kind: TestAssert
 timeout: 90
+commands:
+# it is not expected to have affinity set before the migration started
+- script: |
+    set -eux
+    affinity=$(kubectl -n  "$NAMESPACE" get virtualmachines example -ojsonpath="{.spec.affinity}")
+    if [ -n "$affinity" ]; then
+      echo "Affinity is set"
+      exit 1
+    fi
 ---
 apiVersion: vm.neon.tech/v1
 kind: VirtualMachine

diff --git a/tests/e2e/vm-migration/01-assert.yaml b/tests/e2e/vm-migration/01-assert.yaml
@@ -19,6 +19,21 @@ status:
   conditions:
     - type: Available
       status: "True"
+# original spec had no affinity but it should be added during the migration to prevent the VM from being scheduled on a different architecture      
+spec:
+  affinity:
+    nodeAffinity:
+      requiredDuringSchedulingIgnoredDuringExecution:
+        nodeSelectorTerms:
+        - matchExpressions:
+          - key: kubernetes.io/arch
+            operator: In
+            values:
+            - amd64
+          - key: kubernetes.io/os
+            operator: In
+            values:
+            - linux
 ---
 apiVersion: v1
 kind: pod