Skip to content

Commit

Permalink
neonvm: add architecture fencing for migrations
Browse files Browse the repository at this point in the history
Signed-off-by: Misha Sakhnov <[email protected]>
  • Loading branch information
mikhail-sakhnov committed Oct 24, 2024
1 parent 3d54d14 commit a036d85
Show file tree
Hide file tree
Showing 5 changed files with 168 additions and 0 deletions.
8 changes: 8 additions & 0 deletions neonvm-controller/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@ rules:
verbs:
- create
- patch
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
Expand Down
61 changes: 61 additions & 0 deletions pkg/neonvm/controllers/vmmigration_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,23 @@ func (r *VirtualMachineMigrationReconciler) Reconcile(ctx context.Context, req c
if err := ctrl.SetControllerReference(vm, migration, r.Scheme); err != nil {
return ctrl.Result{}, err
}
// First check if VM has the affinity with CPU architecture
// if it doesn't we need to set it explicitly
// to prevent migration going to the node with different architecture
if !hasArchitectureAffinity(vm) {
sourceNode := &corev1.Node{}
//nolint:exhaustruct // corev1.Node objects is not namespaced
err := r.Get(ctx, types.NamespacedName{Name: vm.Status.Node}, sourceNode)
if err != nil {
log.Error(err, "Failed to get source node")
return ctrl.Result{}, err
}
addArchitectureAffinity(vm, sourceNode)
if err = r.Update(ctx, vm); err != nil {
log.Error(err, "Failed to update node affinity of the source vm")
return ctrl.Result{RequeueAfter: time.Second}, err
}
}
if err := r.Update(ctx, migration); err != nil {
log.Info("Failed to add owner to Migration", "error", err)
return ctrl.Result{}, err
Expand Down Expand Up @@ -224,6 +241,7 @@ func (r *VirtualMachineMigrationReconciler) Reconcile(ctx context.Context, req c
// NB: .Spec.EnableSSH guaranteed non-nil because the k8s API server sets the default for us.
enableSSH := *vm.Spec.EnableSSH
var sshSecret *corev1.Secret

if enableSSH {
// We require the SSH secret to exist because we cannot unmount and
// mount the new secret into the VM after the live migration. If a
Expand Down Expand Up @@ -753,3 +771,46 @@ func (r *VirtualMachineMigrationReconciler) targetPodForVirtualMachine(

return pod, nil
}

func hasArchitectureAffinity(vm *vmv1.VirtualMachine) bool {
if vm.Spec.Affinity != nil && vm.Spec.Affinity.NodeAffinity != nil && vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil {
for _, term := range vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms {
for _, expr := range term.MatchExpressions {
if expr.Key == "kubernetes.io/arch" {
return true
}
}
}
}
return false
}

func addArchitectureAffinity(vm *vmv1.VirtualMachine, sourceNode *corev1.Node) {
if vm.Spec.Affinity == nil {
vm.Spec.Affinity = &corev1.Affinity{}
}
if vm.Spec.Affinity.NodeAffinity == nil {
vm.Spec.Affinity.NodeAffinity = &corev1.NodeAffinity{}
}
if vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil {
vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution = &corev1.NodeSelector{}
}

vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms = append(
vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms,
corev1.NodeSelectorTerm{
MatchExpressions: []corev1.NodeSelectorRequirement{
{
Key: "kubernetes.io/arch",
Operator: corev1.NodeSelectorOpIn,
Values: []string{sourceNode.Status.NodeInfo.Architecture},
},
{
Key: "kubernetes.io/os",
Operator: "In",
Values: []string{"linux"},
},
},
},
)
}
75 changes: 75 additions & 0 deletions pkg/neonvm/controllers/vmmigration_controller_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package controllers

import (
"testing"

corev1 "k8s.io/api/core/v1"

vmv1 "github.com/neondatabase/autoscaling/neonvm/apis/neonvm/v1"
)

func TestHasArchitectureAffinity(t *testing.T) {

cases := []struct {
name string
result bool
vmSpec *vmv1.VirtualMachine
}{
{
name: "No architecture affinity",
result: false,
vmSpec: &vmv1.VirtualMachine{},
},
{
name: "Architecture affinity",
result: true,
vmSpec: &vmv1.VirtualMachine{
Spec: vmv1.VirtualMachineSpec{
Affinity: &corev1.Affinity{
NodeAffinity: &corev1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{
NodeSelectorTerms: []corev1.NodeSelectorTerm{
{
MatchExpressions: []corev1.NodeSelectorRequirement{
{
Key: "kubernetes.io/arch",
Operator: corev1.NodeSelectorOpIn,
Values: []string{"amd64"},
},
},
},
},
},
},
},
},
},
}}

for _, testCase := range cases {
t.Run(testCase.name, func(t *testing.T) {
result := hasArchitectureAffinity(testCase.vmSpec)
if result != testCase.result {
t.Errorf("Expected %v, got %v", testCase.result, result)
}
})
}
}

func TestAddArchitectureAffinity(t *testing.T) {
vmSpec := &vmv1.VirtualMachine{}
if hasArchitectureAffinity(vmSpec) {
t.Errorf("Expected no architecture affinity")
}
addArchitectureAffinity(vmSpec, &corev1.Node{
Status: corev1.NodeStatus{
NodeInfo: corev1.NodeSystemInfo{
Architecture: "amd64",
},
},
})

if !hasArchitectureAffinity(vmSpec) {
t.Errorf("Expected architecture affinity to be added")
}
}
9 changes: 9 additions & 0 deletions tests/e2e/vm-migration/00-assert.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
apiVersion: kuttl.dev/v1beta1
kind: TestAssert
timeout: 90
commands:
# it is not expected to have affinity set before the migration started
- script: |
set -eux
affinity=$(kubectl -n "$NAMESPACE" get virtualmachines example -ojsonpath="{.spec.affinity}")
if [ -n "$affinity" ]; then
echo "Affinity is set"
exit 1
fi
---
apiVersion: vm.neon.tech/v1
kind: VirtualMachine
Expand Down
15 changes: 15 additions & 0 deletions tests/e2e/vm-migration/01-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,21 @@ status:
conditions:
- type: Available
status: "True"
# original spec had no affinity but it should be added during the migration to prevent the VM from being scheduled on a different architecture
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/arch
operator: In
values:
- amd64
- key: kubernetes.io/os
operator: In
values:
- linux
---
apiVersion: v1
kind: pod
Expand Down

0 comments on commit a036d85

Please sign in to comment.