diff --git a/docs/modelserving/autoscaling/autoscale-gpu-new.yaml b/docs/modelserving/autoscaling/autoscale-gpu-new.yaml
index 780862629..6048ab7dd 100644
--- a/docs/modelserving/autoscaling/autoscale-gpu-new.yaml
+++ b/docs/modelserving/autoscaling/autoscale-gpu-new.yaml
@@ -4,6 +4,8 @@ metadata:
   name: "flowers-sample-gpu"
 spec:
   predictor:
+    scaleTarget: 1
+    scaleMetric: concurrency
     model:
       modelFormat:
         name: tensorflow
diff --git a/docs/modelserving/autoscaling/autoscale-new.yaml b/docs/modelserving/autoscaling/autoscale-new.yaml
index 700c49f16..3c43e99fc 100644
--- a/docs/modelserving/autoscaling/autoscale-new.yaml
+++ b/docs/modelserving/autoscaling/autoscale-new.yaml
@@ -2,10 +2,10 @@ apiVersion: "serving.kserve.io/v1beta1"
 kind: "InferenceService"
 metadata:
   name: "flowers-sample"
-  annotations:
-    autoscaling.knative.dev/target: "1"
 spec:
   predictor:
+    scaleTarget: 1
+    scaleMetric: concurrency
     model:
       modelFormat:
         name: tensorflow
diff --git a/docs/modelserving/autoscaling/autoscaling.md b/docs/modelserving/autoscaling/autoscaling.md
index 914c7656f..7071518bf 100644
--- a/docs/modelserving/autoscaling/autoscaling.md
+++ b/docs/modelserving/autoscaling/autoscaling.md
@@ -248,6 +248,8 @@ Apply the tensorflow gpu example CR
       name: "flowers-sample-gpu"
     spec:
       predictor:
+        scaleTarget: 1
+        scaleMetric: concurrency
         model:
           modelFormat:
             name: tensorflow
@@ -265,6 +267,8 @@ Apply the tensorflow gpu example CR
     kind: "InferenceService"
     metadata:
       name: "flowers-sample-gpu"
+      annotations:
+        autoscaling.knative.dev/target: "1"
     spec:
       predictor:
         tensorflow: