Skip to content

Commit

Permalink
Ensure correct control plane node affinity for the agent during provi…
Browse files Browse the repository at this point in the history
…sioning

Signed-off-by: Danil Grigorev <[email protected]>
  • Loading branch information
Danil-Grigorev committed Mar 14, 2024
1 parent feb9d09 commit b4cb33e
Show file tree
Hide file tree
Showing 5 changed files with 241 additions and 21 deletions.
70 changes: 63 additions & 7 deletions internal/controllers/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ package controllers

import (
"bufio"
"bytes"
"cmp"
"context"
"crypto/tls"
"errors"
Expand All @@ -26,9 +28,12 @@ import (
"net/http"
"time"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
yamlDecoder "k8s.io/apimachinery/pkg/util/yaml"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand All @@ -50,6 +55,7 @@ const (
capiClusterOwner = "cluster-api.cattle.io/capi-cluster-owner"
capiClusterOwnerNamespace = "cluster-api.cattle.io/capi-cluster-owner-ns"

deploymentKind = "Deployment"
defaultRequeueDuration = 1 * time.Minute
)

Expand Down Expand Up @@ -178,21 +184,71 @@ func createImportManifest(ctx context.Context, remoteClient client.Client, in io
return nil
}

func createRawManifest(ctx context.Context, remoteClient client.Client, bytes []byte) error {
items, err := utilyaml.ToUnstructured(bytes)
if err != nil {
return fmt.Errorf("error unmarshalling bytes or empty object passed: %w", err)
}
func createRawManifest(ctx context.Context, remoteClient client.Client, data []byte) error {
log := log.FromContext(ctx)
decoder := utilyaml.NewYAMLDecoder(io.NopCloser(bytes.NewReader(data)))

for _, obj := range items {
if err := createObject(ctx, remoteClient, obj.DeepCopy()); err != nil {
for {
u := &unstructured.Unstructured{}

_, gvk, err := decoder.Decode(nil, u)
if errors.Is(err, io.EOF) {
break
} else if err != nil {
return err
}

if gvk.Kind == deploymentKind {
deploy := &appsv1.Deployment{}
if err := runtime.DefaultUnstructuredConverter.FromUnstructured(u.Object, deploy); err != nil {
log.Error(err, "failed to decode agent deployment")
return err
}

setDeploymentAffinity(deploy)

if err := createObject(ctx, remoteClient, deploy); err != nil {
return err
}

continue
}

if err := createObject(ctx, remoteClient, u.DeepCopy()); err != nil {
return err
}
}

return nil
}

func setDeploymentAffinity(deploy *appsv1.Deployment) {
affinity := cmp.Or(deploy.Spec.Template.Spec.Affinity, &corev1.Affinity{})
nodeAffinity := cmp.Or(affinity.NodeAffinity, &corev1.NodeAffinity{})
preference := corev1.PreferredSchedulingTerm{
Weight: 100,
Preference: corev1.NodeSelectorTerm{
MatchExpressions: []corev1.NodeSelectorRequirement{{
Key: "node-role.kubernetes.io/control-plane",
Operator: corev1.NodeSelectorOpExists,
}},
},
}
nodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution = append(nodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution, preference)
preference.Preference.MatchExpressions = []corev1.NodeSelectorRequirement{{
Key: "node-role.kubernetes.io/controlplane",
Operator: corev1.NodeSelectorOpExists,
}}
nodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution = append(nodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution, preference)
preference.Preference.MatchExpressions = []corev1.NodeSelectorRequirement{{
Key: "node-role.kubernetes.io/master",
Operator: corev1.NodeSelectorOpExists,
}}
nodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution = append(nodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution, preference)
affinity.NodeAffinity = nodeAffinity
deploy.Spec.Template.Spec.Affinity = affinity
}

func createObject(ctx context.Context, c client.Client, obj client.Object) error {
log := log.FromContext(ctx)
gvk := obj.GetObjectKind().GroupVersionKind()
Expand Down
52 changes: 45 additions & 7 deletions internal/controllers/import_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ import (
"context"
"fmt"
"strings"
"time"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -49,6 +51,8 @@ import (
turtlespredicates "github.com/rancher-sandbox/rancher-turtles/util/predicates"
)

const fieldOwner = "rancher-turtles"

// CAPIImportReconciler represents a reconciler for importing CAPI clusters in Rancher.
type CAPIImportReconciler struct {
Client client.Client
Expand Down Expand Up @@ -241,11 +245,50 @@ func (r *CAPIImportReconciler) reconcileNormal(ctx context.Context, capiCluster

log.Info("found cluster name", "name", rancherCluster.Status.ClusterName)

if rancherCluster.Status.AgentDeployed {
log.Info("agent already deployed, no action needed")
if rancherCluster.Status.Ready {
log.Info("cluster is ready, no action needed")
return ctrl.Result{}, nil
}

// We have to ensure the agent deployment has correct nodeAffinity settings at all times
remoteClient, err := r.remoteClientGetter(ctx, capiCluster.Name, r.Client, client.ObjectKeyFromObject(capiCluster))
if err != nil {
return ctrl.Result{}, fmt.Errorf("getting remote cluster client: %w", err)
}

if rancherCluster.Status.AgentDeployed {
log.Info("updating agent node affinity settings")

agent := &appsv1.Deployment{ObjectMeta: metav1.ObjectMeta{
Name: "cattle-cluster-agent",
Namespace: "cattle-system",
}}

if err := remoteClient.Get(ctx, client.ObjectKeyFromObject(agent), agent); err != nil {
log.Error(err, "unable to get existing agent deployment")
return ctrl.Result{}, err
}

setDeploymentAffinity(agent)
agent.SetManagedFields(nil)
agent.TypeMeta = metav1.TypeMeta{
APIVersion: "apps/v1",
Kind: deploymentKind,
}

if err := remoteClient.Patch(ctx, agent, client.Apply, []client.PatchOption{
client.ForceOwnership,
client.FieldOwner(fieldOwner),
}...); err != nil {
log.Error(err, "unable to update existing agent deployment")
return ctrl.Result{}, err
}

// During the provisioning after registration the initial deployment gets
// updated by the rancher. We must not miss it.
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
}

// get the registration manifest
manifest, err := getClusterRegistrationManifest(ctx, rancherCluster.Status.ClusterName, capiCluster.Namespace, r.RancherClient, r.InsecureSkipVerify)
if err != nil {
Expand All @@ -259,11 +302,6 @@ func (r *CAPIImportReconciler) reconcileNormal(ctx context.Context, capiCluster

log.Info("Creating import manifest")

remoteClient, err := r.remoteClientGetter(ctx, capiCluster.Name, r.Client, client.ObjectKeyFromObject(capiCluster))
if err != nil {
return ctrl.Result{}, fmt.Errorf("getting remote cluster client: %w", err)
}

if err := createImportManifest(ctx, remoteClient, strings.NewReader(manifest)); err != nil {
return ctrl.Result{}, fmt.Errorf("creating import manifest: %w", err)
}
Expand Down
45 changes: 45 additions & 0 deletions internal/controllers/import_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
provisioningv1 "github.com/rancher-sandbox/rancher-turtles/internal/rancher/provisioning/v1"
"github.com/rancher-sandbox/rancher-turtles/internal/test"
turtlesnaming "github.com/rancher-sandbox/rancher-turtles/util/naming"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
Expand Down Expand Up @@ -225,6 +226,50 @@ var _ = Describe("reconcile CAPI Cluster", func() {
unstructuredObj.SetUnstructuredContent(u)
unstructuredObj.SetGroupVersionKind(obj.GetObjectKind().GroupVersionKind())

if unstructuredObj.GroupVersionKind().Kind == "Deployment" {
dep := &appsv1.Deployment{}
g.Eventually(testEnv.GetAs(unstructuredObj, dep)).ShouldNot(BeNil())
affinity := dep.Spec.Template.Spec.Affinity.NodeAffinity
g.Expect(affinity).ToNot(BeNil())
g.Expect(affinity.PreferredDuringSchedulingIgnoredDuringExecution).To(
ContainElement(
HaveField(
"Preference.MatchExpressions",
HaveExactElements(corev1.NodeSelectorRequirement{
Key: "node-role.kubernetes.io/control-plane",
Operator: corev1.NodeSelectorOpExists,
})),
))
g.Expect(affinity.PreferredDuringSchedulingIgnoredDuringExecution).To(
ContainElement(
HaveField(
"Preference.MatchExpressions",
HaveExactElements(corev1.NodeSelectorRequirement{
Key: "node-role.kubernetes.io/controlplane",
Operator: corev1.NodeSelectorOpExists,
})),
))
g.Expect(affinity.PreferredDuringSchedulingIgnoredDuringExecution).To(
ContainElement(
HaveField(
"Preference.MatchExpressions",
HaveExactElements(corev1.NodeSelectorRequirement{
Key: "node-role.kubernetes.io/master",
Operator: corev1.NodeSelectorOpExists,
})),
))
g.Expect(affinity.PreferredDuringSchedulingIgnoredDuringExecution).To(
ContainElement(
HaveField(
"Preference.MatchExpressions",
HaveExactElements(corev1.NodeSelectorRequirement{
Key: "node-role.kubernetes.io/master",
Operator: corev1.NodeSelectorOpIn,
Values: []string{"true"},
})),
))
}

g.Expect(cl.Get(ctx, client.ObjectKey{
Namespace: unstructuredObj.GetNamespace(),
Name: unstructuredObj.GetName(),
Expand Down
50 changes: 43 additions & 7 deletions internal/controllers/import_controller_v3.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ import (
"context"
"fmt"
"strings"
"time"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -274,11 +276,50 @@ func (r *CAPIImportManagementV3Reconciler) reconcileNormal(ctx context.Context,
return ctrl.Result{}, err
}

if conditions.IsTrue(rancherCluster, managementv3.ClusterConditionAgentDeployed) {
log.Info("agent already deployed, no action needed")
if conditions.IsTrue(rancherCluster, managementv3.ClusterConditionReady) {
log.Info("cluster is ready, no action needed")
return ctrl.Result{}, nil
}

// We have to ensure the agent deployment has correct nodeAffinity settings at all times
remoteClient, err := r.remoteClientGetter(ctx, capiCluster.Name, r.Client, client.ObjectKeyFromObject(capiCluster))
if err != nil {
return ctrl.Result{}, fmt.Errorf("getting remote cluster client: %w", err)
}

if conditions.IsTrue(rancherCluster, managementv3.ClusterConditionAgentDeployed) {
log.Info("updating agent node affinity settings")

agent := &appsv1.Deployment{ObjectMeta: metav1.ObjectMeta{
Name: "cattle-cluster-agent",
Namespace: "cattle-system",
}}

if err := remoteClient.Get(ctx, client.ObjectKeyFromObject(agent), agent); err != nil {
log.Error(err, "unable to get existing agent deployment")
return ctrl.Result{}, err
}

setDeploymentAffinity(agent)
agent.SetManagedFields(nil)
agent.TypeMeta = metav1.TypeMeta{
APIVersion: "apps/v1",
Kind: deploymentKind,
}

if err := remoteClient.Patch(ctx, agent, client.Apply, []client.PatchOption{
client.ForceOwnership,
client.FieldOwner(fieldOwner),
}...); err != nil {
log.Error(err, "unable to update existing agent deployment")
return ctrl.Result{}, err
}

// During the provisioning after registration the initial deployment gets
// updated by the rancher. We must not miss it.
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
}

// get the registration manifest
manifest, err := getClusterRegistrationManifest(ctx, rancherCluster.Name, rancherCluster.Name, r.RancherClient, r.InsecureSkipVerify)
if err != nil {
Expand All @@ -292,11 +333,6 @@ func (r *CAPIImportManagementV3Reconciler) reconcileNormal(ctx context.Context,

log.Info("Creating import manifest")

remoteClient, err := r.remoteClientGetter(ctx, capiCluster.Name, r.Client, client.ObjectKeyFromObject(capiCluster))
if err != nil {
return ctrl.Result{}, fmt.Errorf("getting remote cluster client: %w", err)
}

if err := createImportManifest(ctx, remoteClient, strings.NewReader(manifest)); err != nil {
return ctrl.Result{}, fmt.Errorf("creating import manifest: %w", err)
}
Expand Down
45 changes: 45 additions & 0 deletions internal/controllers/import_controller_v3_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/rancher-sandbox/rancher-turtles/internal/controllers/testdata"
managementv3 "github.com/rancher-sandbox/rancher-turtles/internal/rancher/management/v3"
"github.com/rancher-sandbox/rancher-turtles/internal/test"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
Expand Down Expand Up @@ -261,6 +262,50 @@ var _ = Describe("reconcile CAPI Cluster", func() {
unstructuredObj.SetUnstructuredContent(u)
unstructuredObj.SetGroupVersionKind(obj.GetObjectKind().GroupVersionKind())

if unstructuredObj.GroupVersionKind().Kind == "Deployment" {
dep := &appsv1.Deployment{}
g.Eventually(testEnv.GetAs(unstructuredObj, dep)).ShouldNot(BeNil())
affinity := dep.Spec.Template.Spec.Affinity.NodeAffinity
g.Expect(affinity).ToNot(BeNil())
g.Expect(affinity.PreferredDuringSchedulingIgnoredDuringExecution).To(
ContainElement(
HaveField(
"Preference.MatchExpressions",
HaveExactElements(corev1.NodeSelectorRequirement{
Key: "node-role.kubernetes.io/control-plane",
Operator: corev1.NodeSelectorOpExists,
})),
))
g.Expect(affinity.PreferredDuringSchedulingIgnoredDuringExecution).To(
ContainElement(
HaveField(
"Preference.MatchExpressions",
HaveExactElements(corev1.NodeSelectorRequirement{
Key: "node-role.kubernetes.io/controlplane",
Operator: corev1.NodeSelectorOpExists,
})),
))
g.Expect(affinity.PreferredDuringSchedulingIgnoredDuringExecution).To(
ContainElement(
HaveField(
"Preference.MatchExpressions",
HaveExactElements(corev1.NodeSelectorRequirement{
Key: "node-role.kubernetes.io/master",
Operator: corev1.NodeSelectorOpExists,
})),
))
g.Expect(affinity.PreferredDuringSchedulingIgnoredDuringExecution).To(
ContainElement(
HaveField(
"Preference.MatchExpressions",
HaveExactElements(corev1.NodeSelectorRequirement{
Key: "node-role.kubernetes.io/master",
Operator: corev1.NodeSelectorOpIn,
Values: []string{"true"},
})),
))
}

g.Expect(cl.Get(ctx, client.ObjectKey{
Namespace: unstructuredObj.GetNamespace(),
Name: unstructuredObj.GetName(),
Expand Down

0 comments on commit b4cb33e

Please sign in to comment.