Added Tensorflow model REST protocol test on triton for Kserve (red-h…

…at-data-services#1846)
asanzgom · Dec 4, 2024 · ed5b9ef · ed5b9ef
1 parent d273f6f
commit ed5b9ef
Show file tree

Hide file tree

Showing 7 changed files with 148 additions and 48 deletions.
diff --git a/ods_ci/tests/Resources/Files/triton/kserve-triton-tensorflow-rest-input.json b/ods_ci/tests/Resources/Files/triton/kserve-triton-tensorflow-rest-input.json
diff --git a/ods_ci/tests/Resources/Files/triton/kserve-triton-tensorflow-rest-output.json b/ods_ci/tests/Resources/Files/triton/kserve-triton-tensorflow-rest-output.json
diff --git a/ods_ci/tests/Resources/Files/triton/triton_tensorflow_rest_servingruntime.yaml b/ods_ci/tests/Resources/Files/triton/triton_tensorflow_rest_servingruntime.yaml
@@ -0,0 +1,54 @@
+apiVersion: serving.kserve.io/v1alpha1
+kind: ServingRuntime
+metadata:
+  name: triton-kserve-rest
+spec:
+  annotations:
+    prometheus.kserve.io/path: /metrics
+    prometheus.kserve.io/port: "8002"
+  containers:
+    - args:
+        - tritonserver
+        - --model-store=/mnt/models
+        - --grpc-port=9000
+        - --http-port=8080
+        - --allow-grpc=true
+        - --allow-http=true
+      image: nvcr.io/nvidia/tritonserver:23.05-py3
+      name: kserve-container
+      resources:
+        limits:
+          cpu: "1"
+          memory: 2Gi
+        requests:
+          cpu: "1"
+          memory: 2Gi
+      ports:
+        - containerPort: 8080
+          protocol: TCP
+  protocolVersions:
+    - v2
+    - grpc-v2
+  supportedModelFormats:
+    - autoSelect: true
+      name: tensorrt
+      priority: 1
+      version: "8"
+    - autoSelect: true
+      name: tensorflow
+      priority: 1
+      version: "1"
+    - autoSelect: true
+      name: tensorflow
+      priority: 1
+      version: "2"
+    - autoSelect: true
+      name: onnx
+      priority: 1
+      version: "1"
+    - name: pytorch
+      version: "1"
+    - autoSelect: true
+      name: triton
+      priority: 1
+      version: "2"
diff --git a/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource b/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource
@@ -226,6 +226,7 @@ Get Model Serving Access Token via UI
     ELSE
         SeleniumLibrary.Wait Until Page Contains Element    xpath://td[@data-label="Tokens"]/button
         SeleniumLibrary.Click Element    xpath://td[@data-label="Tokens"]/button
+        Log    ${service_account_name}
         ${token}=    SeleniumLibrary.Get Element Attribute
         ...    xpath://div[.="${service_account_name}"]/../../td[@data-label="Token Secret"]//span/input    value
     END

diff --git a/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHModelServing.resource b/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHModelServing.resource
@@ -292,14 +292,18 @@ Get Model Inference
     ...    set to ${TRUE}.
     [Arguments]    ${model_name}    ${inference_input}    ${token_auth}=${FALSE}   ${project_title}=${NONE}
     ...    ${kserve_mode}=Serverless   ${deployment_mode}=UI   ${service_port}=8888   ${end_point}=${NONE}
-    ...    ${service_account_name}=default-name   ${token}=${NONE}
+    ...    ${service_account_name}=default-name   ${token}=${NONE}      ${set_json_content_type}=${FALSE}
     ${curl_cmd}=     Set Variable    ${NONE}
     ${self_managed}=    Is RHODS Self-Managed
     IF    $deployment_mode == 'UI'
         ${url}=    Get Model Route Via UI    ${model_name}
         ${kserve}=    Run Keyword And Return Status    SeleniumLibrary.Page Should Contain
         ...    Single-model serving enabled
         ${curl_cmd}=     Set Variable    curl -s ${url} -d ${inference_input}
+        IF    ${set_json_content_type}
+            ${curl_cmd}=     Catenate    ${curl_cmd}    -H 'Content-Type: application/json'
+        END
+        Log     ${curl_cmd}
         IF    ${token_auth}
             IF    "${project_title}" == "${NONE}"
                 ${project_title}=    Get Model Project    ${model_name}
@@ -310,12 +314,15 @@ Get Model Inference
             END
             ${curl_cmd}=     Catenate    ${curl_cmd}    -H "Authorization: Bearer ${token}"
         END
+        Log     ${curl_cmd}
         IF  ${kserve}
             Fetch Knative CA Certificate    filename=openshift_ca_istio_knative.crt
             ${curl_cmd}=     Catenate    ${curl_cmd}    --cacert openshift_ca_istio_knative.crt
+            Log     ${curl_cmd}
         ELSE IF  ${self_managed}
             Fetch Openshift CA Bundle
             ${curl_cmd}=     Catenate    ${curl_cmd}    --cacert openshift_ca.crt
+            Log     ${curl_cmd}
         END
     ELSE IF    $deployment_mode == 'Cli'
         ${rc}   ${cmd_op}=  Run And Return Rc And Output
@@ -355,15 +362,15 @@ Verify Model Inference
     [Documentation]    Verifies that the inference result of a model is equal to an expected output
     [Arguments]    ${model_name}    ${inference_input}    ${expected_inference_output}    ${token_auth}=${FALSE}
     ...    ${project_title}=${NONE}    ${deployment_mode}=UI    ${kserve_mode}=Serverless
-    ...    ${service_port}=${NONE}   ${end_point}=${NONE}    ${token}=${NONE}
+    ...    ${service_port}=${NONE}   ${end_point}=${NONE}    ${token}=${NONE}      ${set_json_content_type}=${FALSE}
     IF   $deployment_mode == 'UI'
          Open Model Serving Home Page
          Switch Model Serving Project    ${project_title}
     END
     ${inference_output}=    Get Model Inference    model_name=${model_name}   inference_input=${inference_input}
     ...    token_auth=${token_auth}    kserve_mode=${kserve_mode}    project_title=${project_title}
     ...    deployment_mode=${deployment_mode}    service_port=${service_port}    end_point=${end_point}
-    ...    token=${token}      # robocop: disable
+    ...    token=${token}      set_json_content_type=${set_json_content_type}           # robocop: disable
     Log    ${inference_output}
     ${result}    ${list}=    Inference Comparison    ${expected_inference_output}    ${inference_output}
     Log    ${result}
@@ -385,9 +392,10 @@ Verify Model Inference With Retries
     ...                This is a temporary mitigation meanwhile we find a better way to check the model
     [Arguments]    ${model_name}    ${inference_input}    ${expected_inference_output}    ${token_auth}=${FALSE}
     ...            ${project_title}=${NONE}    ${retries}=${5}     ${deployment_mode}=UI    ${kserve_mode}=Serverless
-    ...            ${service_port}=${NONE}   ${end_point}=${NONE}
+    ...            ${service_port}=${NONE}   ${end_point}=${NONE}   ${set_json_content_type}=${FALSE}
     ${status}=    Run Keyword And Return Status    Verify Model Inference    ${model_name}    ${inference_input}
     ...    ${expected_inference_output}    ${token_auth}    ${project_title}    ${deployment_mode}
+    ...    set_json_content_type=${set_json_content_type}
     IF    not ${status}
         ${retry}=    Set Variable    ${0}
         WHILE    ${retry} < ${retries}
@@ -398,7 +406,7 @@ Verify Model Inference With Retries
             ${status}=    Run Keyword And Return Status    Verify Model Inference
             ...    ${model_name}    ${inference_input}    ${expected_inference_output}    ${token_auth}
             ...    project_title=${project_title}     deployment_mode=${deployment_mode}  kserve_mode=${kserve_mode}
-            ...    service_port=${service_port}   end_point=${end_point}
+            ...    service_port=${service_port}   end_point=${end_point}    set_json_content_type=${set_json_content_type}
             IF    ${status}
                 BREAK
             END

diff --git a/...l_serving/1009__model_serving_triton_on_kserve/1009__model_serving_triton_on_kserve.robot b/...l_serving/1009__model_serving_triton_on_kserve/1009__model_serving_triton_on_kserve.robot
@@ -36,9 +36,14 @@ ${ONNX_RUNTIME_FILEPATH}=    ${RESOURCES_DIRPATH}/triton_onnx_rest_servingruntim
 ${EXPECTED_INFERENCE_REST_OUTPUT_FILE}=      tests/Resources/Files/triton/kserve-triton-onnx-rest-output.json
 ${INFERENCE_REST_INPUT_PYTORCH}=    @tests/Resources/Files/triton/kserve-triton-resnet-rest-input.json
 ${PYTORCH_MODEL_NAME}=    resnet50
+${TENSORFLOW_MODEL_NAME}=   inception_graphdef
+${TENSORFLOW_MODEL_LABEL}=    inceptiongraphdef
 ${PYTORCH_RUNTIME_NAME}=    triton-kserve-rest
 ${PYTORCH_RUNTIME_FILEPATH}=    ${RESOURCES_DIRPATH}/triton_onnx_rest_servingruntime.yaml
 ${EXPECTED_INFERENCE_REST_OUTPUT_FILE_PYTORCH}=       tests/Resources/Files/triton/kserve-triton-resnet-rest-output.json
+${INFERENCE_REST_INPUT_TENSORFLOW}=    @tests/Resources/Files/triton/kserve-triton-tensorflow-rest-input.json
+${TENSORFLOW_RUNTIME_FILEPATH}=    ${RESOURCES_DIRPATH}/triton_tensorflow_rest_servingruntime.yaml
+${EXPECTED_INFERENCE_REST_OUTPUT_FILE_TENSORFLOW}=       tests/Resources/Files/triton/kserve-triton-tensorflow-rest-output.json
 ${INFERENCE_GRPC_INPUT_TENSORFLOW}=    tests/Resources/Files/triton/kserve-triton-inception_graphdef-gRPC-input.json
 ${TENSORFLOW_MODEL_NAME}=    inception_graphdef
 ${TENSORFLOW_MODEL_LABEL}=     inceptiongraphdef
@@ -99,10 +104,9 @@ Test Onnx Model Rest Inference Via UI (Triton on Kserve)    # robocop: off=too-l
     ...  AND
     ...  Delete Serving Runtime Template From CLI    displayed_name=triton-kserve-rest
 
-
-Test PYTORCH Model Inference Via UI(Triton on Kserve)
+Test PYTORCH Model Rest Inference Via UI(Triton on Kserve)
     [Documentation]    Test the deployment of an pytorch model in Kserve using Triton
-    [Tags]    Sanity           RHOAIENG-11561
+    [Tags]    Tier2           RHOAIENG-11561
 
     Open Data Science Projects Home Page
     Create Data Science Project    title=${PRJ_TITLE}    description=${PRJ_DESCRIPTION}
@@ -134,7 +138,7 @@ Test PYTORCH Model Inference Via UI(Triton on Kserve)
 
 Test Onnx Model Grpc Inference Via UI (Triton on Kserve)    # robocop: off=too-long-test-case
     [Documentation]    Test the deployment of an onnx model in Kserve using Triton
-    [Tags]    Sanity    RHOAIENG-9053
+    [Tags]    Tier2    RHOAIENG-9053
     Open Data Science Projects Home Page
     Create Data Science Project    title=${PRJ_TITLE}    description=${PRJ_DESCRIPTION}
     ...    existing_project=${FALSE}
@@ -180,7 +184,7 @@ Test Onnx Model Grpc Inference Via UI (Triton on Kserve)    # robocop: off=too-l
 
 Test Tensorflow Model Grpc Inference Via UI (Triton on Kserve)    # robocop: off=too-long-test-case
     [Documentation]    Test the deployment of an tensorflow model in Kserve using Triton
-    [Tags]    Sanity    RHOAIENG-9052
+    [Tags]    Tier2    RHOAIENG-9052
     Open Data Science Projects Home Page
     Create Data Science Project    title=${PRJ_TITLE}    description=${PRJ_DESCRIPTION}
     ...    existing_project=${FALSE}
@@ -223,9 +227,40 @@ Test Tensorflow Model Grpc Inference Via UI (Triton on Kserve)    # robocop: off
     ...  AND
     ...  Delete Serving Runtime Template From CLI    displayed_name=triton-tensorflow-grpc
 
+Test KERAS Model Rest Inference Via UI(Triton on Kserve)
+    [Documentation]    Test the deployment of an keras model in Kserve using Triton
+    [Tags]    Tier2      RHOAIENG-10328
+    Open Data Science Projects Home Page
+    Create Data Science Project    title=${PRJ_TITLE}    description=${PRJ_DESCRIPTION}
+    ...    existing_project=${FALSE}
+    Open Dashboard Settings    settings_page=Serving runtimes
+    Upload Serving Runtime Template    runtime_filepath=${KERAS_RUNTIME_FILEPATH}
+    ...    serving_platform=single     runtime_protocol=REST
+    Serving Runtime Template Should Be Listed    displayed_name=${KERAS_RUNTIME_NAME}
+    ...    serving_platform=single
+    Recreate S3 Data Connection    project_title=${PRJ_TITLE}    dc_name=model-serving-connection
+    ...            aws_access_key=${S3.AWS_ACCESS_KEY_ID}    aws_secret_access=${S3.AWS_SECRET_ACCESS_KEY}
+    ...            aws_bucket_name=ods-ci-s3
+    Deploy Kserve Model Via UI    model_name=${KERAS_MODEL_NAME}    serving_runtime=triton-keras-rest
+    ...    data_connection=model-serving-connection    path=tritonkeras/model_repository/    model_framework=tensorflow - 2
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${KERAS_MODEL_NAME}
+    ...    namespace=${PRJ_TITLE}    timeout=180s
+    ${EXPECTED_INFERENCE_REST_OUTPUT_KERAS}=     Load Json File
+    ...    file_path=${EXPECTED_INFERENCE_REST_OUTPUT_FILE_KERAS}    as_string=${TRUE}
+    Log    ${EXPECTED_INFERENCE_REST_OUTPUT_KERAS}
+    Run Keyword And Continue On Failure    Verify Model Inference With Retries
+    ...    ${KERAS_MODEL_NAME}    ${INFERENCE_REST_INPUT_KERAS}    ${EXPECTED_INFERENCE_REST_OUTPUT_KERAS}
+    ...    token_auth=${FALSE}    project_title=${PRJ_TITLE}      set_json_content_type=${TRUE}
+    [Teardown]  Run Keywords    Get Kserve Events And Logs      model_name=${KERAS_MODEL_NAME}
+    ...  project_title=${PRJ_TITLE}
+    ...  AND
+    ...  Clean All Models Of Current User
+    ...  AND
+    ...  Delete Serving Runtime Template From CLI    displayed_name=triton-keras-rest
+
 Test KERAS Model Grpc Inference Via UI (Triton on Kserve)    # robocop: off=too-long-test-case
     [Documentation]    Test the deployment of an keras model in Kserve using Triton
-    [Tags]    Sanity    RHOAIENG-10327
+    [Tags]    Tier2    RHOAIENG-10327
     Open Data Science Projects Home Page
     Create Data Science Project    title=${PRJ_TITLE}    description=${PRJ_DESCRIPTION}
     ...    existing_project=${FALSE}
@@ -267,38 +302,6 @@ Test KERAS Model Grpc Inference Via UI (Triton on Kserve)    # robocop: off=too-
     ...  AND
     ...  Delete Serving Runtime Template From CLI    displayed_name=triton-keras-grpc
 
-Test KERAS Model Inference Via UI(Triton on Kserve)
-    [Documentation]    Test the deployment of an keras model in Kserve using Triton
-    [Tags]    Sanity           RHOAIENG-10328
-
-    Open Data Science Projects Home Page
-    Create Data Science Project    title=${PRJ_TITLE}    description=${PRJ_DESCRIPTION}
-    ...    existing_project=${FALSE}
-    Open Dashboard Settings    settings_page=Serving runtimes
-    Upload Serving Runtime Template    runtime_filepath=${KERAS_RUNTIME_FILEPATH}
-    ...    serving_platform=single     runtime_protocol=REST
-    Serving Runtime Template Should Be Listed    displayed_name=${KERAS_RUNTIME_NAME}
-    ...    serving_platform=single
-    Recreate S3 Data Connection    project_title=${PRJ_TITLE}    dc_name=model-serving-connection
-    ...            aws_access_key=${S3.AWS_ACCESS_KEY_ID}    aws_secret_access=${S3.AWS_SECRET_ACCESS_KEY}
-    ...            aws_bucket_name=ods-ci-s3
-    Deploy Kserve Model Via UI    model_name=${PYTORCH_MODEL_NAME}    serving_runtime=triton-keras-rest
-    ...    data_connection=model-serving-connection    path=tritonkeras/model_repository/    model_framework=tensorflow - 2
-    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${PYTORCH_MODEL_NAME}
-    ...    namespace=${PRJ_TITLE}    timeout=180s
-    ${EXPECTED_INFERENCE_REST_OUTPUT_KERAS}=     Load Json File
-    ...    file_path=${EXPECTED_INFERENCE_REST_OUTPUT_FILE_KERAS}    as_string=${TRUE}
-    Log    ${EXPECTED_INFERENCE_REST_OUTPUT_KERAS}
-    Run Keyword And Continue On Failure    Verify Model Inference With Retries
-    ...    ${PYTORCH_MODEL_NAME}    ${INFERENCE_REST_INPUT_KERAS}    ${EXPECTED_INFERENCE_REST_OUTPUT_KERAS}
-    ...    token_auth=${FALSE}    project_title=${PRJ_TITLE}
-    [Teardown]  Run Keywords    Get Kserve Events And Logs      model_name=${PYTORCH_MODEL_NAME}
-    ...  project_title=${PRJ_TITLE}
-    ...  AND
-    ...  Clean All Models Of Current User
-    ...  AND
-    ...  Delete Serving Runtime Template From CLI    displayed_name=triton-keras-rest
-
 Test Python Model Grpc Inference Via UI (Triton on Kserve)    # robocop: off=too-long-test-case
     [Documentation]    Test the deployment of an python model in Kserve using Triton
     [Tags]    Tier2    RHOAIENG-15374
@@ -344,7 +347,7 @@ Test Python Model Grpc Inference Via UI (Triton on Kserve)    # robocop: off=too
     ...  Clean All Models Of Current User
     ...  AND
     ...  Delete Serving Runtime Template From CLI    displayed_name=triton-kserve-grpc
-    
+
 Test Python Model Rest Inference Via UI (Triton on Kserve)    # robocop: off=too-long-test-case
     [Documentation]    Test the deployment of an python model in Kserve using Triton
     [Tags]    Tier2    RHOAIENG-15374
@@ -377,7 +380,7 @@ Test Python Model Rest Inference Via UI (Triton on Kserve)    # robocop: off=too
 
 Test FIL Model Rest Inference Via UI (Triton on Kserve)    # robocop: off=too-long-test-case
     [Documentation]    Test the deployment of an fil model in Kserve using Triton
-    [Tags]    Sanity    RHOAIENG-15649
+    [Tags]    Tier2    RHOAIENG-15649
     Open Data Science Projects Home Page
     Create Data Science Project    title=${PRJ_TITLE}    description=${PRJ_DESCRIPTION}
     ...    existing_project=${FALSE}
@@ -449,6 +452,38 @@ Test FIL Model Grpc Inference Via UI (Triton on Kserve)    # robocop: off=too-lo
     ...  AND
     ...  Delete Serving Runtime Template From CLI    displayed_name=triton-kserve-grpc
 
+Test Tensorflow Model Rest Inference Via UI (Triton on Kserve)    # robocop: off=too-long-test-case
+    [Documentation]    Test the deployment of an tensorflow model in Kserve using Triton
+    [Tags]    Tier2    RHOAIENG-11568
+    Open Data Science Projects Home Page
+    Create Data Science Project    title=${PRJ_TITLE}    description=${PRJ_DESCRIPTION}
+    ...    existing_project=${FALSE}
+    Open Dashboard Settings    settings_page=Serving runtimes
+    Upload Serving Runtime Template    runtime_filepath=${TENSORFLOW_RUNTIME_FILEPATH}
+    ...    serving_platform=single      runtime_protocol=REST
+    Serving Runtime Template Should Be Listed    displayed_name=${PYTORCH_RUNTIME_NAME}
+    ...    serving_platform=single
+    Recreate S3 Data Connection    project_title=${PRJ_TITLE}    dc_name=model-serving-connection
+    ...            aws_access_key=${S3.AWS_ACCESS_KEY_ID}    aws_secret_access=${S3.AWS_SECRET_ACCESS_KEY}
+    ...            aws_bucket_name=ods-ci-s3
+    Deploy Kserve Model Via UI    model_name=${TENSORFLOW_MODEL_NAME}    serving_runtime=triton-kserve-rest
+    ...    data_connection=model-serving-connection    path=triton/model_repository/
+    ...    model_framework=tensorflow - 2      token=${TRUE}
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${TENSORFLOW_MODEL_LABEL}
+    ...    namespace=${PRJ_TITLE}
+    ${EXPECTED_INFERENCE_REST_OUTPUT_TENSORFLOW}=     Load Json File     file_path=${EXPECTED_INFERENCE_REST_OUTPUT_FILE_TENSORFLOW}
+    ...     as_string=${TRUE}
+    Run Keyword And Continue On Failure    Verify Model Inference With Retries
+    ...    ${TENSORFLOW_MODEL_NAME}    ${INFERENCE_REST_INPUT_TENSORFLOW}    ${EXPECTED_INFERENCE_REST_OUTPUT_TENSORFLOW}
+    ...    token_auth=${TRUE}    project_title=${PRJ_TITLE}     set_json_content_type=${TRUE}
+    [Teardown]  Run Keywords    Get Kserve Events And Logs      model_name=${TENSORFLOW_MODEL_NAME}
+    ...  project_title=${PRJ_TITLE}
+    ...  AND
+    ...  Clean All Models Of Current User
+    ...  AND
+    ...  Delete Serving Runtime Template From CLI    displayed_name=triton-kserve-rest
+
+
 *** Keywords ***
 Triton On Kserve Suite Setup
     [Documentation]    Suite setup steps for testing Triton. It creates some test variables

diff --git a/...ing/1010__model_serving_triton_on_modelmesh/1010__model_serving_triton_on_modelmesh.robot b/...ing/1010__model_serving_triton_on_modelmesh/1010__model_serving_triton_on_modelmesh.robot
@@ -81,7 +81,7 @@ Test Onnx Model Rest Inference Via UI (Triton on Modelmesh)
 
 Test Pytorch Model Rest Inference Via UI (Triton on Modelmesh)
     [Documentation]    Test the deployment of an onnx model in Kserve using Triton
-    [Tags]    Sanity    RHOAIENG-11561
+    [Tags]    Tier2    RHOAIENG-11561
 
     Open Data Science Projects Home Page
     Create Data Science Project    title=${PRJ_TITLE}    description=${PRJ_DESCRIPTION}
@@ -120,7 +120,7 @@ Test Pytorch Model Rest Inference Via UI (Triton on Modelmesh)
 
 Test Tensorflow Model Rest Inference Via UI (Triton on Modelmesh)
     [Documentation]    Test the deployment of an onnx model in Kserve using Triton
-    [Tags]    Sanity    RHOAIENG-9069
+    [Tags]    Tier2    RHOAIENG-9069
 
     Open Data Science Projects Home Page
     Create Data Science Project    title=${PRJ_TITLE}    description=${PRJ_DESCRIPTION}
@@ -156,7 +156,7 @@ Test Tensorflow Model Rest Inference Via UI (Triton on Modelmesh)
 
 Test Python Model Rest Inference Via UI (Triton on Modelmesh)
     [Documentation]    Test the deployment of an python model in Kserve using Triton
-    [Tags]    Sanity    RHOAIENG-11564
+    [Tags]    Tier2    RHOAIENG-11564
 
     Open Data Science Projects Home Page
     Create Data Science Project    title=${PRJ_TITLE}    description=${PRJ_DESCRIPTION}