Skip to content

Commit

Permalink
Add node group specific options to NodeGroupAutoscalingOptions from m…
Browse files Browse the repository at this point in the history
…achineDeployment annotations
  • Loading branch information
aaronfern committed Nov 16, 2023
1 parent c7e66c9 commit 33b5b2a
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 1 deletion.
63 changes: 62 additions & 1 deletion cluster-autoscaler/cloudprovider/mcm/mcm_cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ package mcm
import (
"context"
"fmt"
"strconv"
"strings"
"time"

apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
Expand All @@ -46,6 +48,21 @@ const (
// GPULabel is the label added to nodes with GPU resource.
// TODO: Align on a GPU Label for Gardener.
GPULabel = "gardener.cloud/accelerator"

// ScaleDownUtilizationThresholdAnnotation is the annotation key for the value of NodeGroupAutoscalingOptions.ScaleDownUtilizationThreshold
ScaleDownUtilizationThresholdAnnotation = "autoscaler.gardener.cloud/scale-down-utilization-threshold"

// ScaleDownGpuUtilizationThresholdAnnotation is the annotation key for the value of NodeGroupAutoscalingOptions.ScaleDownGpuUtilizationThreshold
ScaleDownGpuUtilizationThresholdAnnotation = "autoscaler.gardener.cloud/scale-down-gpu-utilization-threshold"

// ScaleDownUnneededTimeAnnotation is the annotation key for the value of NodeGroupAutoscalingOptions.ScaleDownUnneededTime
ScaleDownUnneededTimeAnnotation = "autoscaler.gardener.cloud/scale-down-unneeded-time"

// ScaleDownUnreadyTimeAnnotation is the annotation key for the value of NodeGroupAutoscalingOptions.ScaleDownUnreadyTime
ScaleDownUnreadyTimeAnnotation = "autoscaler.gardener.cloud/scale-down-unready-time"

// MaxNodeProvisionTimeAnnotation is the annotation key for the value of NodeGroupAutoscalingOptions.MaxNodeProvisionTime
MaxNodeProvisionTimeAnnotation = "autoscaler.gardener.cloud/max-node-provision-time"
)

// MCMCloudProvider implements the cloud provider interface for machine-controller-manager
Expand Down Expand Up @@ -423,7 +440,51 @@ func (machinedeployment *MachineDeployment) Nodes() ([]cloudprovider.Instance, e
// Implementation optional.
// TODO: add proper implementation
func (machinedeployment *MachineDeployment) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*config.NodeGroupAutoscalingOptions, error) {
return nil, cloudprovider.ErrNotImplemented
mcdSpec, err := machinedeployment.mcmManager.GetMachineDeploymentSpec(machinedeployment.Name)
if err != nil {
return nil, err
}
if mcdSpec == nil {
return nil, fmt.Errorf("nil machinedeployment returned for %s", machinedeployment.Name)
}

scaleDownUtilThresholdValue := defaults.ScaleDownUtilizationThreshold
if _, ok := mcdSpec.Annotations[ScaleDownUtilizationThresholdAnnotation]; ok {
if floatVal, err := strconv.ParseFloat(mcdSpec.Annotations[ScaleDownUtilizationThresholdAnnotation], 64); err == nil {
scaleDownUtilThresholdValue = floatVal
}
}
scaleDownGPUUtilThresholdValue := defaults.ScaleDownGpuUtilizationThreshold
if _, ok := mcdSpec.Annotations[ScaleDownGpuUtilizationThresholdAnnotation]; ok {
if floatVal, err := strconv.ParseFloat(mcdSpec.Annotations[ScaleDownGpuUtilizationThresholdAnnotation], 64); err == nil {
scaleDownGPUUtilThresholdValue = floatVal
}
}
scaleDownUnneededTimeValue := defaults.ScaleDownUnneededTime
if _, ok := mcdSpec.Annotations[ScaleDownUnneededTimeAnnotation]; ok {
if timeValue, err := time.ParseDuration(mcdSpec.Annotations[ScaleDownUnneededTimeAnnotation]); err == nil {
scaleDownUnneededTimeValue = timeValue
}
}
scaleDownUnreadyTimeValue := defaults.ScaleDownUnreadyTime
if _, ok := mcdSpec.Annotations[ScaleDownUnreadyTimeAnnotation]; ok {
if timeValue, err := time.ParseDuration(mcdSpec.Annotations[ScaleDownUnreadyTimeAnnotation]); err == nil {
scaleDownUnreadyTimeValue = timeValue
}
}
maxNodeProvisionTimeValue := defaults.MaxNodeProvisionTime
if _, ok := mcdSpec.Annotations[MaxNodeProvisionTimeAnnotation]; ok {
if timeValue, err := time.ParseDuration(mcdSpec.Annotations[MaxNodeProvisionTimeAnnotation]); err == nil {
maxNodeProvisionTimeValue = timeValue
}
}
return &config.NodeGroupAutoscalingOptions{
ScaleDownUtilizationThreshold: scaleDownUtilThresholdValue,
ScaleDownGpuUtilizationThreshold: scaleDownGPUUtilThresholdValue,
ScaleDownUnneededTime: scaleDownUnneededTimeValue,
ScaleDownUnreadyTime: scaleDownUnreadyTimeValue,
MaxNodeProvisionTime: maxNodeProvisionTimeValue,
}, nil
}

// TemplateNodeInfo returns a node template for this node group.
Expand Down
10 changes: 10 additions & 0 deletions cluster-autoscaler/cloudprovider/mcm/mcm_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,16 @@ func validateNodeTemplate(nodeTemplateAttributes *v1alpha1.NodeTemplate) error {
return nil
}

// GetMachineDeploymentSpec returns the machine deployment spec of the provided machine deployment name
func (m *McmManager) GetMachineDeploymentSpec(machineDeploymentName string) (*v1alpha1.MachineDeployment, error) {
md, err := m.machineDeploymentLister.MachineDeployments(m.namespace).Get(machineDeploymentName)
if err != nil {
return nil, fmt.Errorf("unable to fetch MachineDeployment object %s, Error: %v", machineDeploymentName, err)
}

return md, nil
}

// GetMachineDeploymentNodeTemplate returns the NodeTemplate of a node belonging to the same worker pool as the machinedeployment
// If no node present then it forms the nodeTemplate using the one present in machineClass
func (m *McmManager) GetMachineDeploymentNodeTemplate(machinedeployment *MachineDeployment) (*nodeTemplate, error) {
Expand Down

0 comments on commit 33b5b2a

Please sign in to comment.