diff --git a/charts/cass-operator-chart/templates/customresourcedefinition.yaml b/charts/cass-operator-chart/templates/customresourcedefinition.yaml index 73d52e9e0..0fcf5fc5f 100644 --- a/charts/cass-operator-chart/templates/customresourcedefinition.yaml +++ b/charts/cass-operator-chart/templates/customresourcedefinition.yaml @@ -148,6 +148,11 @@ spec: type: integer type: object type: object + nodeAffinityLabels: + additionalProperties: + type: string + description: NodeAffinityLabels to pin the Datacenter, using node affinity + type: object nodeSelector: additionalProperties: type: string @@ -5932,8 +5937,14 @@ spec: description: The rack name minLength: 2 type: string + nodeAffinityLabels: + additionalProperties: + type: string + description: NodeAffinityLabels to pin the rack, using node affinity + type: object zone: - description: Zone name to pin the rack, using node affinity + description: Deprecated. Use nodeAffinityLabels instead. Zone + name to pin the rack, using node affinity type: string required: - name diff --git a/operator/deploy/crds/cassandra.datastax.com_cassandradatacenters_crd.yaml b/operator/deploy/crds/cassandra.datastax.com_cassandradatacenters_crd.yaml index 8600c84e5..5d8a945df 100644 --- a/operator/deploy/crds/cassandra.datastax.com_cassandradatacenters_crd.yaml +++ b/operator/deploy/crds/cassandra.datastax.com_cassandradatacenters_crd.yaml @@ -148,6 +148,11 @@ spec: type: integer type: object type: object + nodeAffinityLabels: + additionalProperties: + type: string + description: NodeAffinityLabels to pin the Datacenter, using node affinity + type: object nodeSelector: additionalProperties: type: string @@ -5944,8 +5949,14 @@ spec: description: The rack name minLength: 2 type: string + nodeAffinityLabels: + additionalProperties: + type: string + description: NodeAffinityLabels to pin the rack, using node affinity + type: object zone: - description: Zone name to pin the rack, using node affinity + description: Deprecated. Use nodeAffinityLabels instead. Zone + name to pin the rack, using node affinity type: string required: - name diff --git a/operator/example-cassdc-yaml/cassandra-3.11.x/example-cassdc-full.yaml b/operator/example-cassdc-yaml/cassandra-3.11.x/example-cassdc-full.yaml index d672b5724..9de508ab5 100644 --- a/operator/example-cassdc-yaml/cassandra-3.11.x/example-cassdc-full.yaml +++ b/operator/example-cassdc-yaml/cassandra-3.11.x/example-cassdc-full.yaml @@ -24,11 +24,14 @@ spec: # Each rack is pinned to a Google Cloud zone. racks: - name: rack1 - zone: us-central1-a + nodeAffinityLabels: + failure-domain.beta.kubernetes.io/zone: us-central1-a - name: rack2 - zone: us-central1-b + nodeAffinityLabels: + failure-domain.beta.kubernetes.io/zone: us-central1-b - name: rack3 - zone: us-central1-c + nodeAffinityLabels: + failure-domain.beta.kubernetes.io/zone: us-central1-c # Limit each pod to a fixed 6 CPU cores and 24 GB of RAM. resources: diff --git a/operator/example-cassdc-yaml/dse-6.8.x/example-cassdc-full.yaml b/operator/example-cassdc-yaml/dse-6.8.x/example-cassdc-full.yaml index 0934e08de..45dbec9b8 100644 --- a/operator/example-cassdc-yaml/dse-6.8.x/example-cassdc-full.yaml +++ b/operator/example-cassdc-yaml/dse-6.8.x/example-cassdc-full.yaml @@ -24,11 +24,14 @@ spec: # Each rack is pinned to a Google Cloud zone. racks: - name: rack1 - zone: us-central1-a + nodeAffinityLabels: + failure-domain.beta.kubernetes.io/zone: us-central1-a - name: rack2 - zone: us-central1-b + nodeAffinityLabels: + failure-domain.beta.kubernetes.io/zone: us-central1-b - name: rack3 - zone: us-central1-c + nodeAffinityLabels: + failure-domain.beta.kubernetes.io/zone: us-central1-c # Limit each pod to a fixed 6 CPU cores and 24 GB of RAM. resources: diff --git a/operator/pkg/apis/cassandra/v1beta1/cassandradatacenter_types.go b/operator/pkg/apis/cassandra/v1beta1/cassandradatacenter_types.go index c66713f44..0398b79f4 100644 --- a/operator/pkg/apis/cassandra/v1beta1/cassandradatacenter_types.go +++ b/operator/pkg/apis/cassandra/v1beta1/cassandradatacenter_types.go @@ -91,6 +91,9 @@ type CassandraDatacenterSpec struct { // Config for the Management API certificates ManagementApiAuth ManagementApiAuthConfig `json:"managementApiAuth,omitempty"` + //NodeAffinityLabels to pin the Datacenter, using node affinity + NodeAffinityLabels map[string]string `json:"nodeAffinityLabels,omitempty"` + // Kubernetes resource requests and limits, per pod Resources corev1.ResourceRequirements `json:"resources,omitempty"` @@ -229,8 +232,12 @@ type Rack struct { // The rack name // +kubebuilder:validation:MinLength=2 Name string `json:"name"` - // Zone name to pin the rack, using node affinity + + // Deprecated. Use nodeAffinityLabels instead. Zone name to pin the rack, using node affinity Zone string `json:"zone,omitempty"` + + //NodeAffinityLabels to pin the rack, using node affinity + NodeAffinityLabels map[string]string `json:"nodeAffinityLabels,omitempty"` } type CassandraNodeStatus struct { diff --git a/operator/pkg/apis/cassandra/v1beta1/zz_generated.deepcopy.go b/operator/pkg/apis/cassandra/v1beta1/zz_generated.deepcopy.go index 0a2ad69c2..d95c3b8ef 100644 --- a/operator/pkg/apis/cassandra/v1beta1/zz_generated.deepcopy.go +++ b/operator/pkg/apis/cassandra/v1beta1/zz_generated.deepcopy.go @@ -86,13 +86,22 @@ func (in *CassandraDatacenterSpec) DeepCopyInto(out *CassandraDatacenterSpec) { copy(*out, *in) } in.ManagementApiAuth.DeepCopyInto(&out.ManagementApiAuth) + if in.NodeAffinityLabels != nil { + in, out := &in.NodeAffinityLabels, &out.NodeAffinityLabels + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } in.Resources.DeepCopyInto(&out.Resources) in.SystemLoggerResources.DeepCopyInto(&out.SystemLoggerResources) in.ConfigBuilderResources.DeepCopyInto(&out.ConfigBuilderResources) if in.Racks != nil { in, out := &in.Racks, &out.Racks *out = make([]Rack, len(*in)) - copy(*out, *in) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } } in.StorageConfig.DeepCopyInto(&out.StorageConfig) if in.ReplaceNodes != nil { @@ -380,6 +389,13 @@ func (in *NodePortConfig) DeepCopy() *NodePortConfig { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Rack) DeepCopyInto(out *Rack) { *out = *in + if in.NodeAffinityLabels != nil { + in, out := &in.NodeAffinityLabels, &out.NodeAffinityLabels + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } return } diff --git a/operator/pkg/reconciliation/construct_podtemplatespec.go b/operator/pkg/reconciliation/construct_podtemplatespec.go index 1384086a9..cfafef3c8 100644 --- a/operator/pkg/reconciliation/construct_podtemplatespec.go +++ b/operator/pkg/reconciliation/construct_podtemplatespec.go @@ -8,6 +8,7 @@ package reconciliation import ( "fmt" "reflect" + "sort" api "github.com/datastax/cass-operator/operator/pkg/apis/cassandra/v1beta1" "github.com/datastax/cass-operator/operator/pkg/httphelper" @@ -28,22 +29,34 @@ const ( SystemLoggerContainerName = "server-system-logger" ) -// calculateNodeAffinity provides a way to pin all pods within a statefulset to the same zone -func calculateNodeAffinity(zone string) *corev1.NodeAffinity { - if zone == "" { +// calculateNodeAffinity provides a way to decide where to schedule pods within a statefulset based on labels +func calculateNodeAffinity(labels map[string]string) *corev1.NodeAffinity { + if len(labels) == 0 { return nil } + + var nodeSelectors []corev1.NodeSelectorRequirement + + //we make a new map in order to sort because a map is random by design + keys := make([]string, 0, len(labels)) + for key := range labels { + keys = append(keys, key) + } + sort.Strings(keys) // Keep labels in the same order across statefulsets + for _, key := range keys { + selector := corev1.NodeSelectorRequirement{ + Key: key, + Operator: corev1.NodeSelectorOpIn, + Values: []string{labels[key]}, + } + nodeSelectors = append(nodeSelectors, selector) + } + return &corev1.NodeAffinity{ RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ NodeSelectorTerms: []corev1.NodeSelectorTerm{ { - MatchExpressions: []corev1.NodeSelectorRequirement{ - { - Key: "failure-domain.beta.kubernetes.io/zone", - Operator: corev1.NodeSelectorOpIn, - Values: []string{zone}, - }, - }, + MatchExpressions: nodeSelectors, }, }, }, @@ -437,7 +450,8 @@ func buildContainers(dc *api.CassandraDatacenter, baseTemplate *corev1.PodTempla return nil } -func buildPodTemplateSpec(dc *api.CassandraDatacenter, zone string, rackName string) (*corev1.PodTemplateSpec, error) { +func buildPodTemplateSpec(dc *api.CassandraDatacenter, nodeAffinityLabels map[string]string, + rackName string) (*corev1.PodTemplateSpec, error) { baseTemplate := dc.Spec.PodTemplateSpec.DeepCopy() @@ -493,7 +507,7 @@ func buildPodTemplateSpec(dc *api.CassandraDatacenter, zone string, rackName str // Affinity affinity := &corev1.Affinity{} - affinity.NodeAffinity = calculateNodeAffinity(zone) + affinity.NodeAffinity = calculateNodeAffinity(nodeAffinityLabels) affinity.PodAntiAffinity = calculatePodAntiAffinity(dc.Spec.AllowMultipleNodesPerWorker) baseTemplate.Spec.Affinity = affinity diff --git a/operator/pkg/reconciliation/construct_podtemplatespec_test.go b/operator/pkg/reconciliation/construct_podtemplatespec_test.go index 15f5d4352..d163e7428 100644 --- a/operator/pkg/reconciliation/construct_podtemplatespec_test.go +++ b/operator/pkg/reconciliation/construct_podtemplatespec_test.go @@ -34,14 +34,14 @@ func Test_calculatePodAntiAffinity(t *testing.T) { func Test_calculateNodeAffinity(t *testing.T) { t.Run("check when we dont have a zone we want to use", func(t *testing.T) { - na := calculateNodeAffinity("") + na := calculateNodeAffinity(map[string]string{}) if na != nil { t.Errorf("calculateNodeAffinity() = %v, and we want nil", na) } }) t.Run("check when we do not allow more than one dse pod per node", func(t *testing.T) { - na := calculateNodeAffinity("thezone") + na := calculateNodeAffinity(map[string]string{zoneLabel: "thezone"}) if na == nil || na.RequiredDuringSchedulingIgnoredDuringExecution == nil { t.Errorf("calculateNodeAffinity() = %v, and we want a non-nil RequiredDuringSchedulingIgnoredDuringExecution", na) @@ -372,7 +372,7 @@ func TestCassandraDatacenter_buildPodTemplateSpec_containers_merge(t *testing.T) }, }, } - got, err := buildPodTemplateSpec(dc, "testzone", "testrack") + got, err := buildPodTemplateSpec(dc, map[string]string{zoneLabel: "testzone"}, "testrack") assert.NoError(t, err, "should not have gotten error when building podTemplateSpec") assert.Equal(t, 3, len(got.Spec.Containers)) @@ -401,7 +401,7 @@ func TestCassandraDatacenter_buildPodTemplateSpec_initcontainers_merge(t *testin ConfigBuilderResources: testContainer.Resources, }, } - got, err := buildPodTemplateSpec(dc, "testzone", "testrack") + got, err := buildPodTemplateSpec(dc, map[string]string{zoneLabel: "testzone"}, "testrack") assert.NoError(t, err, "should not have gotten error when building podTemplateSpec") assert.Equal(t, 2, len(got.Spec.InitContainers)) @@ -421,7 +421,7 @@ func TestCassandraDatacenter_buildPodTemplateSpec_labels_merge(t *testing.T) { } dc.Spec.PodTemplateSpec.Labels = map[string]string{"abc": "123"} - spec, err := buildPodTemplateSpec(dc, "testzone", "testrack") + spec, err := buildPodTemplateSpec(dc, map[string]string{zoneLabel: "testzone"}, "testrack") got := spec.Labels expected := dc.GetRackLabels("testrack") @@ -445,10 +445,10 @@ func TestCassandraDatacenter_buildPodTemplateSpec_propagate_volumes(t *testing.T PodTemplateSpec: &corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ InitContainers: []corev1.Container{ - corev1.Container{ + { Name: ServerConfigContainerName, VolumeMounts: []corev1.VolumeMount{ - corev1.VolumeMount{ + { Name: "extra", MountPath: "/extra", }, @@ -460,7 +460,7 @@ func TestCassandraDatacenter_buildPodTemplateSpec_propagate_volumes(t *testing.T }, } - spec, err := buildPodTemplateSpec(dc, "testzone", "testrack") + spec, err := buildPodTemplateSpec(dc, map[string]string{zoneLabel: "testzone"}, "testrack") assert.NoError(t, err, "should not have gotten error when building podTemplateSpec") if !reflect.DeepEqual(spec.Spec.InitContainers[0].VolumeMounts, diff --git a/operator/pkg/reconciliation/construct_statefulset.go b/operator/pkg/reconciliation/construct_statefulset.go index b134562af..ed003cb11 100644 --- a/operator/pkg/reconciliation/construct_statefulset.go +++ b/operator/pkg/reconciliation/construct_statefulset.go @@ -7,13 +7,13 @@ package reconciliation import ( "fmt" - api "github.com/datastax/cass-operator/operator/pkg/apis/cassandra/v1beta1" "github.com/datastax/cass-operator/operator/pkg/httphelper" "github.com/datastax/cass-operator/operator/pkg/images" "github.com/datastax/cass-operator/operator/pkg/oplabels" - "github.com/datastax/cass-operator/operator/pkg/utils" "github.com/datastax/cass-operator/operator/pkg/psp" + "github.com/datastax/cass-operator/operator/pkg/utils" + logf "sigs.k8s.io/controller-runtime/pkg/log" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -21,6 +21,8 @@ import ( "k8s.io/apimachinery/pkg/types" ) +const zoneLabel = "failure-domain.beta.kubernetes.io/zone" + func usesDefunctPvcManagedByLabel(sts *appsv1.StatefulSet) bool { usesDefunct := false for _, pvc := range sts.Spec.VolumeClaimTemplates { @@ -80,6 +82,30 @@ func shouldDefineSecurityContext(dc *api.CassandraDatacenter) bool { return dc.Spec.ServerType == "dse" || images.CalculateDockerImageRunsAsCassandra(dc.Spec.ServerVersion) } +func rackNodeAffinitylabels(dc *api.CassandraDatacenter, rackName string) (map[string]string, error) { + var nodeAffinityLabels map[string]string + var log = logf.Log.WithName("construct_statefulset") + racks := dc.GetRacks() + for _, rack := range racks { + if rack.Name == rackName { + nodeAffinityLabels = utils.MergeMap(emptyMapIfNil(rack.NodeAffinityLabels), + emptyMapIfNil(dc.Spec.NodeAffinityLabels)) + if rack.Zone != "" { + if _, found := nodeAffinityLabels[zoneLabel]; found { + log.Error(nil, + "Deprecated parameter Zone is used and also defined in NodeAffinityLabels. " + + "You should only define it in NodeAffinityLabels") + } + nodeAffinityLabels = utils.MergeMap( + emptyMapIfNil(nodeAffinityLabels), map[string]string{zoneLabel: rack.Zone}, + ) + } + break + } + } + return nodeAffinityLabels, nil +} + // Create a statefulset object for the Datacenter. func newStatefulSetForCassandraDatacenterHelper( rackName string, @@ -105,13 +131,9 @@ func newStatefulSetForCassandraDatacenterHelper( var volumeClaimTemplates []corev1.PersistentVolumeClaim - racks := dc.GetRacks() - var zone string - for _, rack := range racks { - if rack.Name == rackName { - zone = rack.Zone - break - } + nodeAffinityLabels, nodeAffinityLabelsConfigurationError := rackNodeAffinitylabels(dc, rackName) + if nodeAffinityLabelsConfigurationError != nil { + return nil, nodeAffinityLabelsConfigurationError } // Add storage @@ -130,7 +152,7 @@ func newStatefulSetForCassandraDatacenterHelper( nsName := newNamespacedNameForStatefulSet(dc, rackName) - template, err := buildPodTemplateSpec(dc, zone, rackName) + template, err := buildPodTemplateSpec(dc, nodeAffinityLabels, rackName) if err != nil { return nil, err } diff --git a/operator/pkg/reconciliation/construct_statefulset_test.go b/operator/pkg/reconciliation/construct_statefulset_test.go index f054404a7..6cdf2a16f 100644 --- a/operator/pkg/reconciliation/construct_statefulset_test.go +++ b/operator/pkg/reconciliation/construct_statefulset_test.go @@ -48,3 +48,39 @@ func Test_newStatefulSetForCassandraDatacenter(t *testing.T) { assert.Equal(t, map[string]string{"dedicated": "cassandra"}, got.Spec.Template.Spec.NodeSelector) } } + +func Test_newStatefulSetForCassandraDatacenter_rackNodeAffinitylabels(t *testing.T) { + dc := &api.CassandraDatacenter{ + Spec: api.CassandraDatacenterSpec{ + ClusterName: "bob", + ServerType: "cassandra", + ServerVersion: "3.11.7", + PodTemplateSpec: &corev1.PodTemplateSpec{}, + NodeAffinityLabels: map[string]string{"dclabel1": "dcvalue1", "dclabel2": "dcvalue2"}, + Racks: []api.Rack{ + { + Name: "rack1", + Zone: "z1", + NodeAffinityLabels: map[string]string{"r1label1": "r1value1", "r1label2": "r1value2"}, + }, + }, + }, + } + var nodeAffinityLabels map[string]string + var nodeAffinityLabelsConfigurationError error + + nodeAffinityLabels, nodeAffinityLabelsConfigurationError = rackNodeAffinitylabels(dc, "rack1") + + assert.NoError(t, nodeAffinityLabelsConfigurationError, + "should not have gotten error when getting NodeAffinitylabels of rack rack1") + + expected := map[string]string { + "dclabel1": "dcvalue1", + "dclabel2": "dcvalue2", + "r1label1": "r1value1", + "r1label2": "r1value2", + zoneLabel: "z1", + } + + assert.Equal(t, expected, nodeAffinityLabels) +} \ No newline at end of file diff --git a/operator/pkg/reconciliation/utils.go b/operator/pkg/reconciliation/utils.go index 60dc24e69..029302380 100644 --- a/operator/pkg/reconciliation/utils.go +++ b/operator/pkg/reconciliation/utils.go @@ -36,4 +36,11 @@ func getResourcesOrDefault(res *corev1.ResourceRequirements, } return res -} \ No newline at end of file +} + +func emptyMapIfNil(m map[string]string) map[string]string { + if m == nil { + return map[string]string{} + } + return m +}