diff --git a/docs/modelserving/autoscaling/autoscale-gpu-new.yaml b/docs/modelserving/autoscaling/autoscale-gpu-new.yaml index 780862629..6048ab7dd 100644 --- a/docs/modelserving/autoscaling/autoscale-gpu-new.yaml +++ b/docs/modelserving/autoscaling/autoscale-gpu-new.yaml @@ -4,6 +4,8 @@ metadata: name: "flowers-sample-gpu" spec: predictor: + scaleTarget: 1 + scaleMetric: concurrency model: modelFormat: name: tensorflow diff --git a/docs/modelserving/autoscaling/autoscale-new.yaml b/docs/modelserving/autoscaling/autoscale-new.yaml index 700c49f16..3c43e99fc 100644 --- a/docs/modelserving/autoscaling/autoscale-new.yaml +++ b/docs/modelserving/autoscaling/autoscale-new.yaml @@ -2,10 +2,10 @@ apiVersion: "serving.kserve.io/v1beta1" kind: "InferenceService" metadata: name: "flowers-sample" - annotations: - autoscaling.knative.dev/target: "1" spec: predictor: + scaleTarget: 1 + scaleMetric: concurrency model: modelFormat: name: tensorflow diff --git a/docs/modelserving/autoscaling/autoscaling.md b/docs/modelserving/autoscaling/autoscaling.md index 914c7656f..7071518bf 100644 --- a/docs/modelserving/autoscaling/autoscaling.md +++ b/docs/modelserving/autoscaling/autoscaling.md @@ -248,6 +248,8 @@ Apply the tensorflow gpu example CR name: "flowers-sample-gpu" spec: predictor: + scaleTarget: 1 + scaleMetric: concurrency model: modelFormat: name: tensorflow @@ -265,6 +267,8 @@ Apply the tensorflow gpu example CR kind: "InferenceService" metadata: name: "flowers-sample-gpu" + annotations: + autoscaling.knative.dev/target: "1" spec: predictor: tensorflow: