diff --git a/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go b/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go index ec3e348b5152..0b7d59d31641 100644 --- a/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go +++ b/cluster-autoscaler/cloudprovider/mcm/mcm_manager.go @@ -29,9 +29,11 @@ import ( "fmt" v1appslister "k8s.io/client-go/listers/apps/v1" "k8s.io/utils/pointer" + "maps" "math/rand" "net/http" "os" + "slices" "strconv" "strings" "time" @@ -111,6 +113,12 @@ var ( machineGVR = schema.GroupVersionResource{Group: machineGroup, Version: machineVersion, Resource: "machines"} machineSetGVR = schema.GroupVersionResource{Group: machineGroup, Version: machineVersion, Resource: "machinesets"} machineDeploymentGVR = schema.GroupVersionResource{Group: machineGroup, Version: machineVersion, Resource: "machinedeployments"} + + // ErrInvalidNodeTemplate is a sentinel error that indicates that the nodeTemplate is invalid. + ErrInvalidNodeTemplate = errors.New("invalid node template") + coreResourceNames = []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory, "gpu"} + extraResourceNames = []v1.ResourceName{gpu.ResourceNvidiaGPU, v1.ResourcePods, v1.ResourceEphemeralStorage} + knownResourceNames = slices.Concat(coreResourceNames, extraResourceNames) ) // McmManager manages the client communication for MachineDeployments. @@ -130,12 +138,13 @@ type McmManager struct { } type instanceType struct { - InstanceType string - VCPU resource.Quantity - Memory resource.Quantity - GPU resource.Quantity - EphemeralStorage resource.Quantity - PodCount resource.Quantity + InstanceType string + VCPU resource.Quantity + Memory resource.Quantity + GPU resource.Quantity + EphemeralStorage resource.Quantity + PodCount resource.Quantity + ExtendedResources apiv1.ResourceList } type nodeTemplate struct { @@ -689,11 +698,9 @@ func generateInstanceStatus(machine *v1alpha1.Machine) *cloudprovider.InstanceSt func validateNodeTemplate(nodeTemplateAttributes *v1alpha1.NodeTemplate) error { var allErrs []error - capacityAttributes := []v1.ResourceName{"cpu", "gpu", "memory"} - - for _, attribute := range capacityAttributes { + for _, attribute := range coreResourceNames { if _, ok := nodeTemplateAttributes.Capacity[attribute]; !ok { - errMessage := fmt.Errorf("CPU, GPU and memory fields are mandatory") + errMessage := fmt.Errorf("the core resource fields %q are mandatory: %w", coreResourceNames, ErrInvalidNodeTemplate) klog.Warning(errMessage) allErrs = append(allErrs, errMessage) break @@ -701,13 +708,13 @@ func validateNodeTemplate(nodeTemplateAttributes *v1alpha1.NodeTemplate) error { } if nodeTemplateAttributes.Region == "" || nodeTemplateAttributes.InstanceType == "" || nodeTemplateAttributes.Zone == "" { - errMessage := fmt.Errorf("InstanceType, Region and Zone attributes are mandatory") + errMessage := fmt.Errorf("InstanceType, Region and Zone attributes are mandatory: %w", ErrInvalidNodeTemplate) klog.Warning(errMessage) allErrs = append(allErrs, errMessage) } if allErrs != nil { - return fmt.Errorf("%s", allErrs) + return errors.Join(allErrs...) } return nil @@ -771,21 +778,26 @@ func (m *McmManager) GetMachineDeploymentNodeTemplate(machinedeployment *Machine klog.V(1).Infof("Nodes already existing in the worker pool %s", workerPool) baseNode := filteredNodes[0] klog.V(1).Infof("Worker pool node used to form template is %s and its capacity is cpu: %s, memory:%s", baseNode.Name, baseNode.Status.Capacity.Cpu().String(), baseNode.Status.Capacity.Memory().String()) + extendedResources := filterExtendedResources(baseNode.Status.Capacity) instance = instanceType{ - VCPU: baseNode.Status.Capacity[apiv1.ResourceCPU], - Memory: baseNode.Status.Capacity[apiv1.ResourceMemory], - GPU: baseNode.Status.Capacity[gpu.ResourceNvidiaGPU], - EphemeralStorage: baseNode.Status.Capacity[apiv1.ResourceEphemeralStorage], - PodCount: baseNode.Status.Capacity[apiv1.ResourcePods], + VCPU: baseNode.Status.Capacity[apiv1.ResourceCPU], + Memory: baseNode.Status.Capacity[apiv1.ResourceMemory], + GPU: baseNode.Status.Capacity[gpu.ResourceNvidiaGPU], + EphemeralStorage: baseNode.Status.Capacity[apiv1.ResourceEphemeralStorage], + PodCount: baseNode.Status.Capacity[apiv1.ResourcePods], + ExtendedResources: extendedResources, } } else { klog.V(1).Infof("Generating node template only using nodeTemplate from MachineClass %s: template resources-> cpu: %s,memory: %s", machineClass.Name, nodeTemplateAttributes.Capacity.Cpu().String(), nodeTemplateAttributes.Capacity.Memory().String()) + extendedResources := filterExtendedResources(nodeTemplateAttributes.Capacity) instance = instanceType{ - VCPU: nodeTemplateAttributes.Capacity[apiv1.ResourceCPU], - Memory: nodeTemplateAttributes.Capacity[apiv1.ResourceMemory], - GPU: nodeTemplateAttributes.Capacity["gpu"], + VCPU: nodeTemplateAttributes.Capacity[apiv1.ResourceCPU], + Memory: nodeTemplateAttributes.Capacity[apiv1.ResourceMemory], + GPU: nodeTemplateAttributes.Capacity["gpu"], + EphemeralStorage: nodeTemplateAttributes.Capacity[apiv1.ResourceEphemeralStorage], // Numbers pods per node will depends on the CNI used and the maxPods kubelet config, default is often 110 - PodCount: resource.MustParse("110"), + PodCount: resource.MustParse("110"), + ExtendedResources: extendedResources, } } instance.InstanceType = nodeTemplateAttributes.InstanceType @@ -964,6 +976,12 @@ func (m *McmManager) buildNodeFromTemplate(name string, template *nodeTemplate) node.Status.Capacity["hugepages-1Gi"] = *resource.NewQuantity(0, resource.DecimalSI) node.Status.Capacity["hugepages-2Mi"] = *resource.NewQuantity(0, resource.DecimalSI) + // populate extended resources from nodeTemplate + if len(template.InstanceType.ExtendedResources) > 0 { + klog.V(2).Infof("Copying extended resources %v to template node.Status.Capacity", template.InstanceType.ExtendedResources) + maps.Copy(node.Status.Capacity, template.InstanceType.ExtendedResources) + } + node.Status.Allocatable = node.Status.Capacity // NodeLabels @@ -1011,3 +1029,12 @@ func isMachineFailedOrTerminating(machine *v1alpha1.Machine) bool { } return false } + +// filterExtendedResources removes knownResourceNames from allResources and retains only the extendedResources. +func filterExtendedResources(allResources v1.ResourceList) (extendedResources v1.ResourceList) { + extendedResources = allResources.DeepCopy() + maps.DeleteFunc(extendedResources, func(name v1.ResourceName, _ resource.Quantity) bool { + return slices.Contains(knownResourceNames, name) + }) + return +} diff --git a/cluster-autoscaler/cloudprovider/mcm/mcm_manager_test.go b/cluster-autoscaler/cloudprovider/mcm/mcm_manager_test.go index 90baac278205..eff9df3d7ce7 100644 --- a/cluster-autoscaler/cloudprovider/mcm/mcm_manager_test.go +++ b/cluster-autoscaler/cloudprovider/mcm/mcm_manager_test.go @@ -17,6 +17,12 @@ limitations under the License. package mcm import ( + "errors" + "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" + "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" + "k8s.io/utils/ptr" + "maps" + "math/rand/v2" "testing" "github.com/stretchr/testify/assert" @@ -134,3 +140,107 @@ func TestFilterNodes(t *testing.T) { assert.EqualValues(t, len(filteredNodes), 1) assert.Equal(t, filteredNodes, []*apiv1.Node{node2}) } + +func TestValidateNodeTemplate(t *testing.T) { + m5LargeType := createSampleInstanceType("m5.large", "sap.com/mana", resource.MustParse("300")) + nt := v1alpha1.NodeTemplate{ + InstanceType: m5LargeType.InstanceType, + Capacity: make(apiv1.ResourceList), + } + err := validateNodeTemplate(&nt) + assert.NotNil(t, err) + assert.True(t, errors.Is(err, ErrInvalidNodeTemplate)) + + nt.Region = "europe-west1" + nt.Zone = nt.Region + "-b" + + err = validateNodeTemplate(&nt) + assert.NotNil(t, err) + assert.True(t, errors.Is(err, ErrInvalidNodeTemplate)) + + if err != nil { + t.Logf("error %s", err) + } +} + +func TestBuildNodeFromTemplate(t *testing.T) { + m := &McmManager{} + namePrefix := "bingo" + m5LargeType := createSampleInstanceType("m5.large", "sap.com/mana", resource.MustParse("300")) + labels := map[string]string{ + "weapon": "light-saber", + } + nt := nodeTemplate{ + InstanceType: m5LargeType, + Architecture: ptr.To("amd64"), + Labels: labels, + } + nt.Region = "europe-west1" + nt.Zone = nt.Region + "-b" + node, err := m.buildNodeFromTemplate(namePrefix, &nt) + assert.Nil(t, err) + if err != nil { + t.Logf("error %s", err) + } + assert.True(t, isSubset(labels, node.Labels), "labels should be a subset of node.Labels") + for _, k := range []apiv1.ResourceName{apiv1.ResourceMemory, apiv1.ResourceCPU} { + assert.Contains(t, node.Status.Capacity, k, "node.Status.Capacity should contain the mandatory resource named: %s", k) + } + + // test with gpu resource + gpuQuantity := resource.MustParse("4") + nt.InstanceType.GPU = gpuQuantity + node, err = m.buildNodeFromTemplate(namePrefix, &nt) + assert.Nil(t, err) + if err != nil { + t.Logf("error %s", err) + } + for _, k := range []apiv1.ResourceName{apiv1.ResourceMemory, apiv1.ResourceCPU, gpu.ResourceNvidiaGPU} { + assert.Contains(t, node.Status.Capacity, k, "node.Status.Capacity should contain the mandatory resource named: %s", k) + } + actualGpuQuantity, hasGpuResource := node.Status.Capacity[gpu.ResourceNvidiaGPU] + assert.True(t, hasGpuResource, "node.Status.Capacity should have a gpu resource named %q", gpu.ResourceNvidiaGPU) + if hasGpuResource { + assert.Equal(t, gpuQuantity, actualGpuQuantity, "node.Status.Capacity should have gpu resource named %q with value %s instead of %s", gpu.ResourceDirectX, gpuQuantity, actualGpuQuantity) + } +} + +func TestFilterExtendedResources(t *testing.T) { + resources := make(apiv1.ResourceList) + for _, n := range knownResourceNames { + resources[n] = *resource.NewQuantity(rand.Int64(), resource.DecimalSI) + } + customResources := make(apiv1.ResourceList) + customResources["resource.com/dongle"] = resource.MustParse("50") + customResources["quantum.com/memory"] = resource.MustParse("100Gi") + + allResources := resources.DeepCopy() + maps.Copy(allResources, customResources) + + extendedResources := filterExtendedResources(allResources) + t.Logf("TestFilterExtendedResources obtained: %+v", extendedResources) + assert.Equal(t, customResources, extendedResources) +} + +func createSampleInstanceType(instanceTypeName string, customResourceName apiv1.ResourceName, customResourceQuantity resource.Quantity) *instanceType { + awsM5Large := AWSInstanceTypes[instanceTypeName] + extendedResources := make(apiv1.ResourceList) + extendedResources[customResourceName] = customResourceQuantity + iType := &instanceType{ + InstanceType: awsM5Large.InstanceType, + VCPU: awsM5Large.VCPU, + Memory: awsM5Large.Memory, + GPU: awsM5Large.GPU, + ExtendedResources: extendedResources, + } + return iType +} + +func isSubset[K comparable, V comparable](map1, map2 map[K]V) bool { + for k, v := range map1 { + if val, ok := map2[k]; !ok || val != v { + return false + } + } + return true +}