From c384319561ea2660e8eda64c2223770098e33ba5 Mon Sep 17 00:00:00 2001 From: DONG BEIQING Date: Thu, 8 Feb 2024 15:34:52 +0800 Subject: [PATCH] Configurable QoS resources for cluster-manager and klusterlet operators and their managed pods (#400) Signed-off-by: Dong Beiqing <350758787@qq.com> --- pkg/cmd/init/cmd.go | 4 + pkg/cmd/init/exec.go | 14 +- pkg/cmd/init/options.go | 13 ++ .../init/scenario/init/clustermanager.cr.yaml | 6 + pkg/cmd/init/scenario/init/operator.yaml | 15 ++ pkg/cmd/join/cmd.go | 4 +- pkg/cmd/join/exec.go | 9 +- pkg/cmd/join/options.go | 16 +- .../join/scenario/join/klusterlets.cr.yaml | 4 + pkg/cmd/join/scenario/join/operator.yaml | 11 +- .../resourcerequirement/fake_deployment.yaml | 34 ++++ .../resourcerequirement.go | 78 ++++++++++ .../resourcerequirement_test.go | 145 ++++++++++++++++++ 13 files changed, 338 insertions(+), 15 deletions(-) create mode 100644 pkg/helpers/resourcerequirement/fake_deployment.yaml create mode 100644 pkg/helpers/resourcerequirement/resourcerequirement.go create mode 100644 pkg/helpers/resourcerequirement/resourcerequirement_test.go diff --git a/pkg/cmd/init/cmd.go b/pkg/cmd/init/cmd.go index 34fb7e0bb..dc398c8cd 100644 --- a/pkg/cmd/init/cmd.go +++ b/pkg/cmd/init/cmd.go @@ -54,6 +54,10 @@ func NewCmd(clusteradmFlags *genericclioptionsclusteradm.ClusteradmFlags, stream "If set, the command will initialize the OCM control plan in foreground.") cmd.Flags().StringVarP(&o.output, "output", "o", "text", "output foramt, should be json or text") cmd.Flags().BoolVar(&o.singleton, "singleton", false, "If true, deploy singleton controlplane instead of cluster-manager. This is an alpha stage flag.") + cmd.Flags().StringVar(&o.resourceQosClass, "resource-qos-class", "Default", "the resource QoS class of all the containers managed by the cluster manager and the cluster manager operator. Can be one of Default, BestEffort or ResourceRequirement.") + cmd.Flags().StringToStringVar(&o.resourceLimits, "resource-limits", nil, "the resource limits of all the containers managed by the cluster manager and the cluster manager operator, for example: cpu=800m,memory=800Mi") + cmd.Flags().StringToStringVar(&o.resourceRequests, "resource-requests", nil, "the resource requests of all the containers managed by the cluster manager and the cluster manager operator, for example: cpu=500m,memory=500Mi") + cmd.Flags().BoolVar(&o.createNamespace, "create-namespace", true, "If true, create open-cluster-management namespace, otherwise use existing one") //clusterManagetSet contains the flags for deploy cluster-manager clusterManagerSet := pflag.NewFlagSet("clusterManagerSet", pflag.ExitOnError) diff --git a/pkg/cmd/init/exec.go b/pkg/cmd/init/exec.go index 2f180173d..6573099ad 100644 --- a/pkg/cmd/init/exec.go +++ b/pkg/cmd/init/exec.go @@ -24,6 +24,7 @@ import ( clusteradmjson "open-cluster-management.io/clusteradm/pkg/helpers/json" preflightinterface "open-cluster-management.io/clusteradm/pkg/helpers/preflight" "open-cluster-management.io/clusteradm/pkg/helpers/reader" + "open-cluster-management.io/clusteradm/pkg/helpers/resourcerequirement" "open-cluster-management.io/clusteradm/pkg/helpers/version" helperwait "open-cluster-management.io/clusteradm/pkg/helpers/wait" ) @@ -67,6 +68,11 @@ func (o *Options) complete(cmd *cobra.Command, args []string) (err error) { WorkFeatures: genericclioptionsclusteradm.ConvertToFeatureGateAPI(genericclioptionsclusteradm.HubMutableFeatureGate, ocmfeature.DefaultHubWorkFeatureGates), AddonFeatures: genericclioptionsclusteradm.ConvertToFeatureGateAPI(genericclioptionsclusteradm.HubMutableFeatureGate, ocmfeature.DefaultHubAddonManagerFeatureGates), } + resourceRequirement, err := resourcerequirement.NewResourceRequirement(o.resourceQosClass, o.resourceLimits, o.resourceRequests) + if err != nil { + return err + } + o.values.ResourceRequirement = *resourceRequirement } else { o.Helm.WithNamespace(o.SingletonName) } @@ -150,9 +156,13 @@ func (o *Options) run() error { } else { token := fmt.Sprintf("%s.%s", o.values.Hub.TokenID, o.values.Hub.TokenSecret) - files := []string{ - "init/namespace.yaml", + files := []string{} + if o.createNamespace { + files = append(files, "init/namespace.yaml") + } else { + fmt.Fprintf(o.Streams.Out, "skip creating namespace\n") } + if o.useBootstrapToken { files = append(files, "init/bootstrap-token-secret.yaml", diff --git a/pkg/cmd/init/options.go b/pkg/cmd/init/options.go index dfd1bcb9a..0d7ee0219 100644 --- a/pkg/cmd/init/options.go +++ b/pkg/cmd/init/options.go @@ -6,6 +6,7 @@ import ( operatorv1 "open-cluster-management.io/api/operator/v1" genericclioptionsclusteradm "open-cluster-management.io/clusteradm/pkg/genericclioptions" "open-cluster-management.io/clusteradm/pkg/helpers/helm" + "open-cluster-management.io/clusteradm/pkg/helpers/resourcerequirement" ) // Options is holding all the command-line options @@ -29,6 +30,14 @@ type Options struct { SingletonName string Helm *helm.Helm + // Resource requirement for the containers managed by the cluster manager and the cluster manager operator + resourceQosClass string + resourceLimits map[string]string + resourceRequests map[string]string + + // If create ns or use existing ns + createNamespace bool + //If set, will be persisting the generated join command to a local file outputJoinCommandFile string //If set, the command will hold until the OCM control plane initialized @@ -70,6 +79,10 @@ type Values struct { // Features is the slice of feature for addon manager AddonFeatures []operatorv1.FeatureGate + + // ResourceRequirement is the resource requirement setting for the containers managed by the cluster manager + // and the cluster manager operator + ResourceRequirement resourcerequirement.ResourceRequirement } // Hub: The hub values for the template diff --git a/pkg/cmd/init/scenario/init/clustermanager.cr.yaml b/pkg/cmd/init/scenario/init/clustermanager.cr.yaml index b5d37e8af..e4e77da11 100644 --- a/pkg/cmd/init/scenario/init/clustermanager.cr.yaml +++ b/pkg/cmd/init/scenario/init/clustermanager.cr.yaml @@ -13,6 +13,12 @@ spec: deployOption: mode: Default {{if .RegistrationFeatures}} + resourceRequirement: + type: {{ .ResourceRequirement.Type }} + {{- if eq .ResourceRequirement.Type "ResourceRequirement" }} + resourceRequirements: + {{ .ResourceRequirement.ResourceRequirements | indent 6 }} + {{- end }} registrationConfiguration: {{if .AutoApprove}} autoApproveUsers: diff --git a/pkg/cmd/init/scenario/init/operator.yaml b/pkg/cmd/init/scenario/init/operator.yaml index 1a55e1df7..426ae2b79 100644 --- a/pkg/cmd/init/scenario/init/operator.yaml +++ b/pkg/cmd/init/scenario/init/operator.yaml @@ -16,6 +16,9 @@ spec: labels: app: cluster-manager spec: + volumes: + - emptyDir: {} + name: tmpdir affinity: podAntiAffinity: preferredDuringSchedulingIgnoredDuringExecution: @@ -56,16 +59,28 @@ spec: initialDelaySeconds: 2 periodSeconds: 10 name: registration-operator + volumeMounts: + - mountPath: /tmp + name: tmpdir readinessProbe: httpGet: path: /healthz port: 8443 scheme: HTTPS initialDelaySeconds: 2 + {{- if or (eq .ResourceRequirement.Type "Default") (eq .ResourceRequirement.Type "") }} resources: requests: cpu: 100m memory: 128Mi + {{- end }} + {{- if eq .ResourceRequirement.Type "BestEffort" }} + resources: {} + {{- end }} + {{- if eq .ResourceRequirement.Type "ResourceRequirement" }} + resources: + {{ .ResourceRequirement.ResourceRequirements | indent 10 }} + {{- end }} securityContext: allowPrivilegeEscalation: false capabilities: diff --git a/pkg/cmd/join/cmd.go b/pkg/cmd/join/cmd.go index c32c96388..2decbb6e4 100644 --- a/pkg/cmd/join/cmd.go +++ b/pkg/cmd/join/cmd.go @@ -68,7 +68,9 @@ func NewCmd(clusteradmFlags *genericclioptionsclusteradm.ClusteradmFlags, stream cmd.Flags().BoolVar(&o.singleton, "singleton", false, "If true, deploy singleton mode of klusterlet to have registration and work agents run in a single pod. This is an alpha stage flag.") cmd.Flags().StringVar(&o.proxyURL, "proxy-url", "", "the URL of a forward proxy server that will be used by agents to connect to the hub cluster.") cmd.Flags().StringVar(&o.proxyCAFile, "proxy-ca-file", "", "the file path to proxy ca, optional") - cmd.Flags().StringVar(&o.resourceQosClass, "resource-qos-class", "Default", "the resource QoS class of the klusterlet pod. Can be one of Default or BestEffort") + cmd.Flags().StringVar(&o.resourceQosClass, "resource-qos-class", "Default", "the resource QoS class of all the containers managed by the klusterlet and the klusterlet operator. Can be one of Default, BestEffort or ResourceRequirement.") + cmd.Flags().StringToStringVar(&o.resourceLimits, "resource-limits", nil, "the resource limits of all the containers managed by the klusterlet and the klusterlet operator, for example: cpu=800m,memory=800Mi") + cmd.Flags().StringToStringVar(&o.resourceRequests, "resource-requests", nil, "the resource requests of all the containers managed by the klusterlet and the klusterlet operator, for example: cpu=500m,memory=500Mi") cmd.Flags().BoolVar(&o.createNameSpace, "create-namespace", true, "If true, create open-cluster-management namespace, otherwise use existing one") return cmd diff --git a/pkg/cmd/join/exec.go b/pkg/cmd/join/exec.go index 43aa4ef01..69ab22f1e 100644 --- a/pkg/cmd/join/exec.go +++ b/pkg/cmd/join/exec.go @@ -39,6 +39,7 @@ import ( preflightinterface "open-cluster-management.io/clusteradm/pkg/helpers/preflight" "open-cluster-management.io/clusteradm/pkg/helpers/printer" "open-cluster-management.io/clusteradm/pkg/helpers/reader" + "open-cluster-management.io/clusteradm/pkg/helpers/resourcerequirement" "open-cluster-management.io/clusteradm/pkg/helpers/version" "open-cluster-management.io/clusteradm/pkg/helpers/wait" ) @@ -119,9 +120,13 @@ func (o *Options) complete(cmd *cobra.Command, args []string) (err error) { Name: klusterletName, KlusterletNamespace: klusterletNamespace, } - o.values.ResourceRequirement = ResourceRequirement{ - Type: o.resourceQosClass, + + resourceRequirement, err := resourcerequirement.NewResourceRequirement(o.resourceQosClass, o.resourceLimits, o.resourceRequests) + if err != nil { + return err } + o.values.ResourceRequirement = *resourceRequirement + o.values.ManagedKubeconfig = o.managedKubeconfigFile o.values.RegistrationFeatures = genericclioptionsclusteradm.ConvertToFeatureGateAPI(genericclioptionsclusteradm.SpokeMutableFeatureGate, ocmfeature.DefaultSpokeRegistrationFeatureGates) o.values.WorkFeatures = genericclioptionsclusteradm.ConvertToFeatureGateAPI(genericclioptionsclusteradm.SpokeMutableFeatureGate, ocmfeature.DefaultSpokeWorkFeatureGates) diff --git a/pkg/cmd/join/options.go b/pkg/cmd/join/options.go index 62dd9c9f3..9f3dac37f 100644 --- a/pkg/cmd/join/options.go +++ b/pkg/cmd/join/options.go @@ -6,6 +6,7 @@ import ( clientcmdapiv1 "k8s.io/client-go/tools/clientcmd/api/v1" operatorv1 "open-cluster-management.io/api/operator/v1" genericclioptionsclusteradm "open-cluster-management.io/clusteradm/pkg/genericclioptions" + "open-cluster-management.io/clusteradm/pkg/helpers/resourcerequirement" ) // Options: The structure holding all the command-line options @@ -62,8 +63,10 @@ type Options struct { //The proxy server ca-file(optional) proxyCAFile string - // Resource requirement + // Resource requirement for the containers managed by klusterlet and the klusterlet operator resourceQosClass string + resourceLimits map[string]string + resourceRequests map[string]string // If create ns or use existing ns createNameSpace bool @@ -84,8 +87,6 @@ type Values struct { Hub Hub //Klusterlet is the klusterlet related configuration Klusterlet Klusterlet - //ResourceRequirement is the resource requirement - ResourceRequirement ResourceRequirement //Registry is the image registry related configuration Registry string //bundle version @@ -98,6 +99,10 @@ type Values struct { // Features is the slice of feature for work WorkFeatures []operatorv1.FeatureGate + + // ResourceRequirement is the resource requirement setting for the containers managed by the klusterlet + // and the klusterlet operator + ResourceRequirement resourcerequirement.ResourceRequirement } // Hub: The hub values for the template @@ -117,11 +122,6 @@ type Klusterlet struct { KlusterletNamespace string } -// ResourceRequirement is for templating resource requirement -type ResourceRequirement struct { - Type string -} - type BundleVersion struct { // registration image version RegistrationImageVersion string diff --git a/pkg/cmd/join/scenario/join/klusterlets.cr.yaml b/pkg/cmd/join/scenario/join/klusterlets.cr.yaml index e987e6382..0eb7eb168 100644 --- a/pkg/cmd/join/scenario/join/klusterlets.cr.yaml +++ b/pkg/cmd/join/scenario/join/klusterlets.cr.yaml @@ -17,6 +17,10 @@ spec: {{ end }} resourceRequirement: type: {{ .ResourceRequirement.Type }} + {{- if eq .ResourceRequirement.Type "ResourceRequirement" }} + resourceRequirements: + {{ .ResourceRequirement.ResourceRequirements | indent 6 }} + {{- end }} {{if .RegistrationFeatures}} registrationConfiguration: featureGates: diff --git a/pkg/cmd/join/scenario/join/operator.yaml b/pkg/cmd/join/scenario/join/operator.yaml index a992538e5..79847fab8 100644 --- a/pkg/cmd/join/scenario/join/operator.yaml +++ b/pkg/cmd/join/scenario/join/operator.yaml @@ -72,12 +72,19 @@ spec: scheme: HTTPS port: 8443 initialDelaySeconds: 2 - {{ if ne .ResourceRequirement.Type "BestEffort" }} + {{- if or (eq .ResourceRequirement.Type "Default") (eq .ResourceRequirement.Type "") }} resources: requests: cpu: 100m memory: 128Mi - {{ end }} + {{- end }} + {{- if eq .ResourceRequirement.Type "BestEffort" }} + resources: {} + {{- end }} + {{- if eq .ResourceRequirement.Type "ResourceRequirement" }} + resources: + {{ .ResourceRequirement.ResourceRequirements | indent 10 }} + {{- end }} volumeMounts: - name: tmpdir mountPath: /tmp diff --git a/pkg/helpers/resourcerequirement/fake_deployment.yaml b/pkg/helpers/resourcerequirement/fake_deployment.yaml new file mode 100644 index 000000000..6cf4020d0 --- /dev/null +++ b/pkg/helpers/resourcerequirement/fake_deployment.yaml @@ -0,0 +1,34 @@ +# Copyright Contributors to the Open Cluster Management project +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + a: b + name: fake-deployment + namespace: fake-namespace +spec: + replicas: 1 + selector: + matchLabels: + a: b + template: + metadata: + labels: + a: b + spec: + containers: + - name: my-container + image: nginx + {{- if or (eq .ResourceRequirement.Type "Default") (eq .ResourceRequirement.Type "") }} + resources: + requests: + cpu: 100m + memory: 128Mi + {{- end }} + {{- if eq .ResourceRequirement.Type "BestEffort" }} + resources: {} + {{- end }} + {{- if eq .ResourceRequirement.Type "ResourceRequirement" }} + resources: + {{ .ResourceRequirement.ResourceRequirements | indent 10 }} + {{- end }} diff --git a/pkg/helpers/resourcerequirement/resourcerequirement.go b/pkg/helpers/resourcerequirement/resourcerequirement.go new file mode 100644 index 000000000..c02b00b5f --- /dev/null +++ b/pkg/helpers/resourcerequirement/resourcerequirement.go @@ -0,0 +1,78 @@ +// Copyright Contributors to the Open Cluster Management project + +package resourcerequirement + +import ( + "fmt" + + "github.com/ghodss/yaml" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + operatorv1 "open-cluster-management.io/api/operator/v1" +) + +// ResourceRequirement is for templating resource requirement +type ResourceRequirement struct { + Type string + ResourceRequirements []byte +} + +func NewResourceRequirement(resourceType string, limits, requests map[string]string) (*ResourceRequirement, error) { + if len(limits)+len(requests) == 0 { + if resourceType == string(operatorv1.ResourceQosClassResourceRequirement) { + return nil, fmt.Errorf("resource type is %s but both limits and requests are not set", resourceType) + } + return &ResourceRequirement{ + Type: resourceType, + }, nil + } + if resourceType == "" { + resourceType = string(operatorv1.ResourceQosClassResourceRequirement) + } else if resourceType != string(operatorv1.ResourceQosClassResourceRequirement) { + return nil, fmt.Errorf("resource type must be %s when resource limits or requests are set", string(operatorv1.ResourceQosClassResourceRequirement)) + } + rr := &corev1.ResourceRequirements{ + Limits: corev1.ResourceList{}, + Requests: corev1.ResourceList{}, + } + for rsc, quantityStr := range limits { + quantity, err := resource.ParseQuantity(quantityStr) + if err != nil { + return nil, err + } + rr.Limits[corev1.ResourceName(rsc)] = quantity + } + for rsc, quantityStr := range requests { + quantity, err := resource.ParseQuantity(quantityStr) + if err != nil { + return nil, err + } + rr.Requests[corev1.ResourceName(rsc)] = quantity + } + if err := ensureQuantity(rr); err != nil { + return nil, err + } + marshal, err := yaml.Marshal(rr) + if err != nil { + return nil, err + } + return &ResourceRequirement{ + Type: resourceType, + ResourceRequirements: marshal, + }, nil +} + +func ensureQuantity(r *corev1.ResourceRequirements) error { + for rsc, limitsQuantity := range r.Limits { + requestsQuantity, ok := r.Requests[rsc] + if !ok { + continue + } + if requestsQuantity.Cmp(limitsQuantity) <= 0 { + continue + } + return fmt.Errorf("requests %s must be less than or equal to limits %s", + requestsQuantity.String(), limitsQuantity.String()) + } + return nil +} diff --git a/pkg/helpers/resourcerequirement/resourcerequirement_test.go b/pkg/helpers/resourcerequirement/resourcerequirement_test.go new file mode 100644 index 000000000..956203d3f --- /dev/null +++ b/pkg/helpers/resourcerequirement/resourcerequirement_test.go @@ -0,0 +1,145 @@ +// Copyright Contributors to the Open Cluster Management project + +package resourcerequirement + +import ( + _ "embed" + "testing" + + "github.com/ghodss/yaml" + "github.com/openshift/library-go/pkg/assets" + v1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + operatorv1 "open-cluster-management.io/api/operator/v1" +) + +//go:embed fake_deployment.yaml +var fakeDeploymentYaml string + +type values struct { + ResourceRequirement ResourceRequirement +} + +func TestResourceRequirement(t *testing.T) { + expectedLimitsCpu := "200m" + expectedRequestsCpu := "100m" + expectedLimitsMemory := "256Mi" + expectedRequestsMemory := "128Mi" + + limits := map[string]string{ + "cpu": expectedLimitsCpu, + "memory": expectedLimitsMemory, + } + requests := map[string]string{ + "cpu": expectedRequestsCpu, + "memory": expectedRequestsMemory, + } + r, err := NewResourceRequirement(string(operatorv1.ResourceQosClassResourceRequirement), limits, requests) + if err != nil { + t.Fatalf("failed to create resource requirement: %v", err) + } + val := values{ + ResourceRequirement: *r, + } + data := assets.MustCreateAssetFromTemplate("fake-deployment", []byte(fakeDeploymentYaml), val).Data + deployment := &v1.Deployment{} + if err := yaml.Unmarshal(data, deployment); err != nil { + t.Fatalf("failed to unmarshal deployment: %v", err) + } + resources := deployment.Spec.Template.Spec.Containers[0].Resources + if resources.Limits.Cpu().String() != expectedLimitsCpu { + t.Fatalf("expect limits.cpu is %s, but got %s", expectedLimitsCpu, resources.Limits.Cpu().String()) + } + if resources.Requests.Cpu().String() != expectedRequestsCpu { + t.Fatalf("expect requests.cpu is %s, but got %s", expectedRequestsCpu, resources.Requests.Cpu().String()) + } + if resources.Limits.Memory().String() != expectedLimitsMemory { + t.Fatalf("expect limits.memory to be %s, but got %s", expectedLimitsMemory, resources.Limits.Memory().String()) + } + if resources.Requests.Memory().String() != expectedRequestsMemory { + t.Fatalf("expect requests.memory to be %s, but got %s", expectedRequestsMemory, resources.Requests.Memory().String()) + } +} + +func TestEnsureQuantity(t *testing.T) { + tests := []struct { + name string + limits map[string]string + requests map[string]string + wantErr bool + }{ + { + name: "requests less than limits", + limits: map[string]string{ + "cpu": "200m", + "memory": "256Mi", + }, + requests: map[string]string{ + "cpu": "100m", + "memory": "128Mi", + }, + wantErr: false, + }, + { + name: "requests equal to limits", + limits: map[string]string{ + "cpu": "200m", + "memory": "256Mi", + }, + requests: map[string]string{ + "cpu": "200m", + "memory": "256Mi", + }, + wantErr: false, + }, + { + name: "requests greater than limits", + limits: map[string]string{ + "cpu": "100m", + "memory": "128Mi", + }, + requests: map[string]string{ + "cpu": "200m", + "memory": "256Mi", + }, + wantErr: true, + }, + { + name: "only limits but no requests", + limits: map[string]string{ + "cpu": "100m", + "memory": "128Mi", + }, + wantErr: false, + }, + { + name: "only requests but no limits", + requests: map[string]string{ + "cpu": "200m", + "memory": "256Mi", + }, + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rr := &corev1.ResourceRequirements{ + Limits: corev1.ResourceList{}, + Requests: corev1.ResourceList{}, + } + for rsc, quantityStr := range tt.limits { + quantity, _ := resource.ParseQuantity(quantityStr) + rr.Limits[corev1.ResourceName(rsc)] = quantity + } + for rsc, quantityStr := range tt.requests { + quantity, _ := resource.ParseQuantity(quantityStr) + rr.Requests[corev1.ResourceName(rsc)] = quantity + } + if err := ensureQuantity(rr); (err != nil) != tt.wantErr { + t.Errorf("ensureQuantity() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +}