From 2ba4317430a5a9878de324afc2f9b172ec223130 Mon Sep 17 00:00:00 2001 From: Andrews Arokiam <87992092+andyi2it@users.noreply.github.com> Date: Sun, 24 Mar 2024 22:47:15 +0530 Subject: [PATCH] Updated docs for autoscaling on gpu. (#328) Signed-off-by: Andrews Arokiam --- docs/modelserving/autoscaling/autoscale-gpu-new.yaml | 2 ++ docs/modelserving/autoscaling/autoscale-new.yaml | 4 ++-- docs/modelserving/autoscaling/autoscaling.md | 4 ++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/modelserving/autoscaling/autoscale-gpu-new.yaml b/docs/modelserving/autoscaling/autoscale-gpu-new.yaml index 780862629..6048ab7dd 100644 --- a/docs/modelserving/autoscaling/autoscale-gpu-new.yaml +++ b/docs/modelserving/autoscaling/autoscale-gpu-new.yaml @@ -4,6 +4,8 @@ metadata: name: "flowers-sample-gpu" spec: predictor: + scaleTarget: 1 + scaleMetric: concurrency model: modelFormat: name: tensorflow diff --git a/docs/modelserving/autoscaling/autoscale-new.yaml b/docs/modelserving/autoscaling/autoscale-new.yaml index 700c49f16..3c43e99fc 100644 --- a/docs/modelserving/autoscaling/autoscale-new.yaml +++ b/docs/modelserving/autoscaling/autoscale-new.yaml @@ -2,10 +2,10 @@ apiVersion: "serving.kserve.io/v1beta1" kind: "InferenceService" metadata: name: "flowers-sample" - annotations: - autoscaling.knative.dev/target: "1" spec: predictor: + scaleTarget: 1 + scaleMetric: concurrency model: modelFormat: name: tensorflow diff --git a/docs/modelserving/autoscaling/autoscaling.md b/docs/modelserving/autoscaling/autoscaling.md index 914c7656f..7071518bf 100644 --- a/docs/modelserving/autoscaling/autoscaling.md +++ b/docs/modelserving/autoscaling/autoscaling.md @@ -248,6 +248,8 @@ Apply the tensorflow gpu example CR name: "flowers-sample-gpu" spec: predictor: + scaleTarget: 1 + scaleMetric: concurrency model: modelFormat: name: tensorflow @@ -265,6 +267,8 @@ Apply the tensorflow gpu example CR kind: "InferenceService" metadata: name: "flowers-sample-gpu" + annotations: + autoscaling.knative.dev/target: "1" spec: predictor: tensorflow: