From a036d85b152eac13abe310f2c10f46e6dc3c746a Mon Sep 17 00:00:00 2001 From: Misha Sakhnov Date: Thu, 24 Oct 2024 10:02:00 +0200 Subject: [PATCH] neonvm: add architecture fencing for migrations Signed-off-by: Misha Sakhnov --- neonvm-controller/role.yaml | 8 ++ .../controllers/vmmigration_controller.go | 61 +++++++++++++++ .../vmmigration_controller_test.go | 75 +++++++++++++++++++ tests/e2e/vm-migration/00-assert.yaml | 9 +++ tests/e2e/vm-migration/01-assert.yaml | 15 ++++ 5 files changed, 168 insertions(+) create mode 100644 pkg/neonvm/controllers/vmmigration_controller_test.go diff --git a/neonvm-controller/role.yaml b/neonvm-controller/role.yaml index b6d6d3f3f..b30fbe477 100644 --- a/neonvm-controller/role.yaml +++ b/neonvm-controller/role.yaml @@ -11,6 +11,14 @@ rules: verbs: - create - patch +- apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - watch - apiGroups: - "" resources: diff --git a/pkg/neonvm/controllers/vmmigration_controller.go b/pkg/neonvm/controllers/vmmigration_controller.go index 2a7160170..a38f5c1cb 100644 --- a/pkg/neonvm/controllers/vmmigration_controller.go +++ b/pkg/neonvm/controllers/vmmigration_controller.go @@ -172,6 +172,23 @@ func (r *VirtualMachineMigrationReconciler) Reconcile(ctx context.Context, req c if err := ctrl.SetControllerReference(vm, migration, r.Scheme); err != nil { return ctrl.Result{}, err } + // First check if VM has the affinity with CPU architecture + // if it doesn't we need to set it explicitly + // to prevent migration going to the node with different architecture + if !hasArchitectureAffinity(vm) { + sourceNode := &corev1.Node{} + //nolint:exhaustruct // corev1.Node objects is not namespaced + err := r.Get(ctx, types.NamespacedName{Name: vm.Status.Node}, sourceNode) + if err != nil { + log.Error(err, "Failed to get source node") + return ctrl.Result{}, err + } + addArchitectureAffinity(vm, sourceNode) + if err = r.Update(ctx, vm); err != nil { + log.Error(err, "Failed to update node affinity of the source vm") + return ctrl.Result{RequeueAfter: time.Second}, err + } + } if err := r.Update(ctx, migration); err != nil { log.Info("Failed to add owner to Migration", "error", err) return ctrl.Result{}, err @@ -224,6 +241,7 @@ func (r *VirtualMachineMigrationReconciler) Reconcile(ctx context.Context, req c // NB: .Spec.EnableSSH guaranteed non-nil because the k8s API server sets the default for us. enableSSH := *vm.Spec.EnableSSH var sshSecret *corev1.Secret + if enableSSH { // We require the SSH secret to exist because we cannot unmount and // mount the new secret into the VM after the live migration. If a @@ -753,3 +771,46 @@ func (r *VirtualMachineMigrationReconciler) targetPodForVirtualMachine( return pod, nil } + +func hasArchitectureAffinity(vm *vmv1.VirtualMachine) bool { + if vm.Spec.Affinity != nil && vm.Spec.Affinity.NodeAffinity != nil && vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil { + for _, term := range vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms { + for _, expr := range term.MatchExpressions { + if expr.Key == "kubernetes.io/arch" { + return true + } + } + } + } + return false +} + +func addArchitectureAffinity(vm *vmv1.VirtualMachine, sourceNode *corev1.Node) { + if vm.Spec.Affinity == nil { + vm.Spec.Affinity = &corev1.Affinity{} + } + if vm.Spec.Affinity.NodeAffinity == nil { + vm.Spec.Affinity.NodeAffinity = &corev1.NodeAffinity{} + } + if vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil { + vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution = &corev1.NodeSelector{} + } + + vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms = append( + vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms, + corev1.NodeSelectorTerm{ + MatchExpressions: []corev1.NodeSelectorRequirement{ + { + Key: "kubernetes.io/arch", + Operator: corev1.NodeSelectorOpIn, + Values: []string{sourceNode.Status.NodeInfo.Architecture}, + }, + { + Key: "kubernetes.io/os", + Operator: "In", + Values: []string{"linux"}, + }, + }, + }, + ) +} diff --git a/pkg/neonvm/controllers/vmmigration_controller_test.go b/pkg/neonvm/controllers/vmmigration_controller_test.go new file mode 100644 index 000000000..d58921a77 --- /dev/null +++ b/pkg/neonvm/controllers/vmmigration_controller_test.go @@ -0,0 +1,75 @@ +package controllers + +import ( + "testing" + + corev1 "k8s.io/api/core/v1" + + vmv1 "github.com/neondatabase/autoscaling/neonvm/apis/neonvm/v1" +) + +func TestHasArchitectureAffinity(t *testing.T) { + + cases := []struct { + name string + result bool + vmSpec *vmv1.VirtualMachine + }{ + { + name: "No architecture affinity", + result: false, + vmSpec: &vmv1.VirtualMachine{}, + }, + { + name: "Architecture affinity", + result: true, + vmSpec: &vmv1.VirtualMachine{ + Spec: vmv1.VirtualMachineSpec{ + Affinity: &corev1.Affinity{ + NodeAffinity: &corev1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ + NodeSelectorTerms: []corev1.NodeSelectorTerm{ + { + MatchExpressions: []corev1.NodeSelectorRequirement{ + { + Key: "kubernetes.io/arch", + Operator: corev1.NodeSelectorOpIn, + Values: []string{"amd64"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }} + + for _, testCase := range cases { + t.Run(testCase.name, func(t *testing.T) { + result := hasArchitectureAffinity(testCase.vmSpec) + if result != testCase.result { + t.Errorf("Expected %v, got %v", testCase.result, result) + } + }) + } +} + +func TestAddArchitectureAffinity(t *testing.T) { + vmSpec := &vmv1.VirtualMachine{} + if hasArchitectureAffinity(vmSpec) { + t.Errorf("Expected no architecture affinity") + } + addArchitectureAffinity(vmSpec, &corev1.Node{ + Status: corev1.NodeStatus{ + NodeInfo: corev1.NodeSystemInfo{ + Architecture: "amd64", + }, + }, + }) + + if !hasArchitectureAffinity(vmSpec) { + t.Errorf("Expected architecture affinity to be added") + } +} diff --git a/tests/e2e/vm-migration/00-assert.yaml b/tests/e2e/vm-migration/00-assert.yaml index f6cc18db2..daf7a2a32 100644 --- a/tests/e2e/vm-migration/00-assert.yaml +++ b/tests/e2e/vm-migration/00-assert.yaml @@ -1,6 +1,15 @@ apiVersion: kuttl.dev/v1beta1 kind: TestAssert timeout: 90 +commands: +# it is not expected to have affinity set before the migration started +- script: | + set -eux + affinity=$(kubectl -n "$NAMESPACE" get virtualmachines example -ojsonpath="{.spec.affinity}") + if [ -n "$affinity" ]; then + echo "Affinity is set" + exit 1 + fi --- apiVersion: vm.neon.tech/v1 kind: VirtualMachine diff --git a/tests/e2e/vm-migration/01-assert.yaml b/tests/e2e/vm-migration/01-assert.yaml index 9416c0d72..6ba9b2144 100644 --- a/tests/e2e/vm-migration/01-assert.yaml +++ b/tests/e2e/vm-migration/01-assert.yaml @@ -19,6 +19,21 @@ status: conditions: - type: Available status: "True" +# original spec had no affinity but it should be added during the migration to prevent the VM from being scheduled on a different architecture +spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/arch + operator: In + values: + - amd64 + - key: kubernetes.io/os + operator: In + values: + - linux --- apiVersion: v1 kind: pod