-
Notifications
You must be signed in to change notification settings - Fork 21
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
neonvm: prevent migrating VMs to the nodes with non-matching architecture #1123
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -174,6 +174,26 @@ func (r *VirtualMachineMigrationReconciler) Reconcile(ctx context.Context, req c | |
if err := ctrl.SetControllerReference(vm, migration, r.Scheme); err != nil { | ||
return ctrl.Result{}, err | ||
} | ||
// First check if VM has the affinity with CPU architecture | ||
// if it doesn't we need to set it explicitly | ||
// to prevent migration going to the node with different architecture | ||
sourceNode := &corev1.Node{} | ||
//nolint:exhaustruct // corev1.Node objects is not namespaced | ||
err := r.Get(ctx, types.NamespacedName{Name: vm.Status.Node}, sourceNode) | ||
if err != nil { | ||
log.Error(err, "Failed to get source node") | ||
return ctrl.Result{}, err | ||
} | ||
if !hasArchitectureAffinity(vm, sourceNode) { | ||
|
||
if err := addArchitectureAffinity(vm, sourceNode); err != nil { | ||
log.Error(err, "Failed to add architecture affinity to VM", "sourceNodeStatus", sourceNode.Status) | ||
} | ||
if err = r.Update(ctx, vm); err != nil { | ||
log.Error(err, "Failed to update node affinity of the source vm") | ||
return ctrl.Result{RequeueAfter: time.Second}, err | ||
} | ||
} | ||
if err := r.Update(ctx, migration); err != nil { | ||
log.Info("Failed to add owner to Migration", "error", err) | ||
return ctrl.Result{}, err | ||
|
@@ -752,3 +772,55 @@ func (r *VirtualMachineMigrationReconciler) targetPodForVirtualMachine( | |
|
||
return pod, nil | ||
} | ||
|
||
// hasArchitectureAffinity checks if the VM has the affinity with the CPU architecture of the source node | ||
func hasArchitectureAffinity(vm *vmv1.VirtualMachine, sourceNode *corev1.Node) bool { | ||
if vm.Spec.Affinity != nil && vm.Spec.Affinity.NodeAffinity != nil && vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil { | ||
for _, term := range vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms { | ||
for _, expr := range term.MatchExpressions { | ||
if expr.Key == "kubernetes.io/arch" && expr.Operator == corev1.NodeSelectorOpIn && len(expr.Values) > 0 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. question: does There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Values are checked in the loop body, if I understand correctly your question |
||
for _, value := range expr.Values { | ||
if value == sourceNode.Status.NodeInfo.Architecture { | ||
return true | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
return false | ||
} | ||
|
||
func addArchitectureAffinity(vm *vmv1.VirtualMachine, sourceNode *corev1.Node) error { | ||
if sourceNode.Status.NodeInfo.Architecture == "" { | ||
return errors.New("source node architecture is empty") | ||
} | ||
if vm.Spec.Affinity == nil { | ||
vm.Spec.Affinity = &corev1.Affinity{} | ||
} | ||
if vm.Spec.Affinity.NodeAffinity == nil { | ||
vm.Spec.Affinity.NodeAffinity = &corev1.NodeAffinity{} | ||
} | ||
if vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil { | ||
vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution = &corev1.NodeSelector{} | ||
} | ||
|
||
vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms = append( | ||
vm.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms, | ||
corev1.NodeSelectorTerm{ | ||
MatchExpressions: []corev1.NodeSelectorRequirement{ | ||
{ | ||
Key: "kubernetes.io/arch", | ||
Operator: corev1.NodeSelectorOpIn, | ||
Values: []string{sourceNode.Status.NodeInfo.Architecture}, | ||
}, | ||
{ | ||
Key: "kubernetes.io/os", | ||
Operator: "In", | ||
Values: []string{"linux"}, | ||
}, | ||
}, | ||
}, | ||
) | ||
return nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
package controllers | ||
|
||
import ( | ||
"testing" | ||
|
||
corev1 "k8s.io/api/core/v1" | ||
|
||
vmv1 "github.com/neondatabase/autoscaling/neonvm/apis/neonvm/v1" | ||
) | ||
|
||
func TestHasArchitectureAffinity(t *testing.T) { | ||
cases := []struct { | ||
name string | ||
result bool | ||
vmSpec *vmv1.VirtualMachine | ||
node *corev1.Node | ||
}{ | ||
{ | ||
name: "No architecture affinity", | ||
result: false, | ||
vmSpec: &vmv1.VirtualMachine{}, | ||
node: &corev1.Node{}, | ||
}, | ||
{ | ||
name: "VM spec has affinity", | ||
result: true, | ||
vmSpec: &vmv1.VirtualMachine{ | ||
Spec: vmv1.VirtualMachineSpec{ | ||
Affinity: &corev1.Affinity{ | ||
NodeAffinity: &corev1.NodeAffinity{ | ||
RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ | ||
NodeSelectorTerms: []corev1.NodeSelectorTerm{ | ||
{ | ||
MatchExpressions: []corev1.NodeSelectorRequirement{ | ||
{ | ||
Key: "kubernetes.io/arch", | ||
Operator: corev1.NodeSelectorOpIn, | ||
Values: []string{"amd64"}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
node: &corev1.Node{ | ||
Status: corev1.NodeStatus{ | ||
NodeInfo: corev1.NodeSystemInfo{ | ||
Architecture: "amd64", | ||
}, | ||
}, | ||
}, | ||
}, | ||
{ | ||
name: "VM affinity and node has different architectures", | ||
result: false, | ||
vmSpec: &vmv1.VirtualMachine{ | ||
Spec: vmv1.VirtualMachineSpec{ | ||
Affinity: &corev1.Affinity{ | ||
NodeAffinity: &corev1.NodeAffinity{ | ||
RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ | ||
NodeSelectorTerms: []corev1.NodeSelectorTerm{ | ||
{ | ||
MatchExpressions: []corev1.NodeSelectorRequirement{ | ||
{ | ||
Key: "kubernetes.io/arch", | ||
Operator: corev1.NodeSelectorOpIn, | ||
Values: []string{"arm64"}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
node: &corev1.Node{ | ||
Status: corev1.NodeStatus{ | ||
NodeInfo: corev1.NodeSystemInfo{ | ||
Architecture: "amd64", | ||
}, | ||
}, | ||
}, | ||
}, | ||
} | ||
|
||
for _, testCase := range cases { | ||
t.Run(testCase.name, func(t *testing.T) { | ||
result := hasArchitectureAffinity(testCase.vmSpec, testCase.node) | ||
if result != testCase.result { | ||
t.Errorf("Expected %v, got %v", testCase.result, result) | ||
} | ||
}) | ||
} | ||
} | ||
|
||
func TestAddArchitectureAffinity(t *testing.T) { | ||
vmSpec := &vmv1.VirtualMachine{} | ||
sourceNode := &corev1.Node{ | ||
Status: corev1.NodeStatus{ | ||
NodeInfo: corev1.NodeSystemInfo{ | ||
Architecture: "amd64", | ||
}, | ||
}, | ||
} | ||
if hasArchitectureAffinity(vmSpec, sourceNode) { | ||
t.Errorf("Expected no architecture affinity") | ||
} | ||
if err := addArchitectureAffinity(vmSpec, &corev1.Node{ | ||
Status: corev1.NodeStatus{ | ||
NodeInfo: corev1.NodeSystemInfo{ | ||
Architecture: "amd64", | ||
}, | ||
}, | ||
}); err != nil { | ||
t.Errorf("Unexpected error: %v", err) | ||
} | ||
|
||
if !hasArchitectureAffinity(vmSpec, sourceNode) { | ||
t.Errorf("Expected architecture affinity to be added") | ||
} | ||
} | ||
|
||
func TestSourceNodeHasNoArchitecture(t *testing.T) { | ||
sourceNode := &corev1.Node{ | ||
Status: corev1.NodeStatus{ | ||
NodeInfo: corev1.NodeSystemInfo{ | ||
Architecture: "", | ||
}, | ||
}, | ||
} | ||
|
||
vmSpec := &vmv1.VirtualMachine{} | ||
|
||
err := addArchitectureAffinity(vmSpec, sourceNode) | ||
if err == nil { | ||
t.Errorf("Expected error when source node has no architecture") | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
question: IIUC, does this mean that the VM will permanently get affinity for the node it starts with?
Is there a reason we can't just do it on the target pod for the migration? (I guess, potential race conditions?)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think there is a particular reason, I just feel it is a bit safer to make the guard on the very first step of the migration.
Plus if we going to have multiarch clusters it is a bit unsafe to even have VM without architecture affinity, may be in later it should be even added during the VM creation, in the vmcontroller?