diff --git a/config/internal/base/deployment.yaml.tmpl b/config/internal/base/deployment.yaml.tmpl index ee64e89e..84000316 100644 --- a/config/internal/base/deployment.yaml.tmpl +++ b/config/internal/base/deployment.yaml.tmpl @@ -22,7 +22,9 @@ metadata: app.kubernetes.io/name: modelmesh-controller name: {{.ServiceName}}-{{.Name}} spec: + {{if ge .Replicas 0}} replicas: {{.Replicas}} + {{end}} selector: matchLabels: modelmesh-service: {{.ServiceName}} diff --git a/controllers/autoscaler/autoscaler_reconciler.go b/controllers/autoscaler/autoscaler_reconciler.go index 91a87dba..f3aba606 100644 --- a/controllers/autoscaler/autoscaler_reconciler.go +++ b/controllers/autoscaler/autoscaler_reconciler.go @@ -104,6 +104,10 @@ func createAutoscaler(client client.Client, // Set HPA reconciler even though AutoscalerClass is None to delete existing hpa as.HPA = hpa.NewHPAReconciler(client, scheme, runtimeMeta, mmDeploymentName, mmNamespace) return as, nil + case constants.AutoscalerClassExternal: + // Set HPA reconciler even though AutoscalerClass is External to delete existing hpa + as.HPA = hpa.NewHPAReconciler(client, scheme, runtimeMeta, mmDeploymentName, mmNamespace) + return as, nil default: return nil, errors.New("unknown autoscaler class type.") } diff --git a/controllers/modelmesh/modelmesh.go b/controllers/modelmesh/modelmesh.go index cf136a7d..2f4019b8 100644 --- a/controllers/modelmesh/modelmesh.go +++ b/controllers/modelmesh/modelmesh.go @@ -66,7 +66,7 @@ type Deployment struct { PullerImage string PullerImageCommand []string PullerResources *corev1.ResourceRequirements - Replicas uint16 + Replicas int32 Port uint16 TLSSecretName string TLSClientAuth string diff --git a/controllers/servingruntime_controller.go b/controllers/servingruntime_controller.go index 28144afd..9dff2aa8 100644 --- a/controllers/servingruntime_controller.go +++ b/controllers/servingruntime_controller.go @@ -28,6 +28,7 @@ import ( kserveapi "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" "github.com/kserve/kserve/pkg/apis/serving/v1beta1" + "github.com/kserve/kserve/pkg/constants" api "github.com/kserve/modelmesh-serving/apis/serving/v1alpha1" "github.com/kserve/modelmesh-serving/controllers/autoscaler" "github.com/kserve/modelmesh-serving/controllers/modelmesh" @@ -291,7 +292,12 @@ func (r *ServingRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Reque //ScaleToZero or None autoscaler case if replicas == uint16(0) || as.Autoscaler.AutoscalerClass == autoscaler.AutoscalerClassNone { - mmDeployment.Replicas = replicas + mmDeployment.Replicas = int32(replicas) + if _, err = as.Reconcile(true); err != nil { + return ctrl.Result{}, fmt.Errorf("HPA reconcile error: %w", err) + } + } else if as.Autoscaler.AutoscalerClass == constants.AutoscalerClassExternal { + mmDeployment.Replicas = -1 if _, err = as.Reconcile(true); err != nil { return ctrl.Result{}, fmt.Errorf("HPA reconcile error: %w", err) } @@ -309,9 +315,9 @@ func (r *ServingRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Reque return ctrl.Result{}, fmt.Errorf("Could not get the deployment for the servingruntime : %w", err) } if *existingDeployment.Spec.Replicas == int32(0) { - mmDeployment.Replicas = uint16(*(as.Autoscaler.HPA.HPA).Spec.MinReplicas) + mmDeployment.Replicas = *(as.Autoscaler.HPA.HPA).Spec.MinReplicas } else { - mmDeployment.Replicas = uint16(*(existingDeployment.Spec.Replicas)) + mmDeployment.Replicas = *(existingDeployment.Spec.Replicas) } }