red-hat-data-services · bdattoma · Dec 10, 2024 · Dec 4, 2024 · Dec 4, 2024 · Dec 10, 2024
diff --git a/ods_ci/tests/Resources/Files/triton/kserve-triton-tensorflow-rest-input.json b/ods_ci/tests/Resources/Files/triton/kserve-triton-tensorflow-rest-input.json
diff --git a/ods_ci/tests/Resources/Files/triton/kserve-triton-tensorflow-rest-output.json b/ods_ci/tests/Resources/Files/triton/kserve-triton-tensorflow-rest-output.json
diff --git a/ods_ci/tests/Resources/Files/triton/triton_tensorflow_rest_servingruntime.yaml b/ods_ci/tests/Resources/Files/triton/triton_tensorflow_rest_servingruntime.yaml
@@ -0,0 +1,54 @@
+apiVersion: serving.kserve.io/v1alpha1
+kind: ServingRuntime
+metadata:
+  name: triton-kserve-rest
+spec:
+  annotations:
+    prometheus.kserve.io/path: /metrics
+    prometheus.kserve.io/port: "8002"
+  containers:
+    - args:
+        - tritonserver
+        - --model-store=/mnt/models
+        - --grpc-port=9000
+        - --http-port=8080
+        - --allow-grpc=true
+        - --allow-http=true
+      image: nvcr.io/nvidia/tritonserver:23.05-py3
+      name: kserve-container
+      resources:
+        limits:
+          cpu: "1"
+          memory: 2Gi
+        requests:
+          cpu: "1"
+          memory: 2Gi
+      ports:
+        - containerPort: 8080
+          protocol: TCP
+  protocolVersions:
+    - v2
+    - grpc-v2
+  supportedModelFormats:
+    - autoSelect: true
+      name: tensorrt
+      priority: 1
+      version: "8"
+    - autoSelect: true
+      name: tensorflow
+      priority: 1
+      version: "1"
+    - autoSelect: true
+      name: tensorflow
+      priority: 1
+      version: "2"
+    - autoSelect: true
+      name: onnx
+      priority: 1
+      version: "1"
+    - name: pytorch
+      version: "1"
+    - autoSelect: true
+      name: triton
+      priority: 1
+      version: "2"
diff --git a/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource b/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource
@@ -226,6 +226,7 @@ Get Model Serving Access Token via UI
     ELSE
         SeleniumLibrary.Wait Until Page Contains Element    xpath://td[@data-label="Tokens"]/button
         SeleniumLibrary.Click Element    xpath://td[@data-label="Tokens"]/button
+        Log    ${service_account_name}
         ${token}=    SeleniumLibrary.Get Element Attribute
         ...    xpath://div[.="${service_account_name}"]/../../td[@data-label="Token Secret"]//span/input    value
     END

diff --git a/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHModelServing.resource b/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHModelServing.resource
@@ -292,14 +292,18 @@
     ...    set to ${TRUE}.
     [Arguments]    ${model_name}    ${inference_input}    ${token_auth}=${FALSE}   ${project_title}=${NONE}
     ...    ${kserve_mode}=Serverless   ${deployment_mode}=UI   ${service_port}=8888   ${end_point}=${NONE}
-    ...    ${service_account_name}=default-name   ${token}=${NONE}
+    ...    ${service_account_name}=default-name   ${token}=${NONE}      ${set_json_content_type}=${FALSE}
     ${curl_cmd}=     Set Variable    ${NONE}
     ${self_managed}=    Is RHODS Self-Managed
     IF    $deployment_mode == 'UI'
         ${url}=    Get Model Route Via UI    ${model_name}
         ${kserve}=    Run Keyword And Return Status    SeleniumLibrary.Page Should Contain
         ...    Single-model serving enabled
         ${curl_cmd}=     Set Variable    curl -s ${url} -d ${inference_input}
+        IF    ${set_json_content_type}
+            ${curl_cmd}=     Catenate    ${curl_cmd}    -H 'Content-Type: application/json'
+        END
+        Log     ${curl_cmd}
         IF    ${token_auth}
             IF    "${project_title}" == "${NONE}"
                 ${project_title}=    Get Model Project    ${model_name}
@@ -310,12 +314,15 @@
             END
             ${curl_cmd}=     Catenate    ${curl_cmd}    -H "Authorization: Bearer ${token}"
         END
+        Log     ${curl_cmd}
         IF  ${kserve}
             Fetch Knative CA Certificate    filename=openshift_ca_istio_knative.crt
             ${curl_cmd}=     Catenate    ${curl_cmd}    --cacert openshift_ca_istio_knative.crt
+            Log     ${curl_cmd}
         ELSE IF  ${self_managed}
             Fetch Openshift CA Bundle
             ${curl_cmd}=     Catenate    ${curl_cmd}    --cacert openshift_ca.crt
+            Log     ${curl_cmd}
         END
     ELSE IF    $deployment_mode == 'Cli'
         ${rc}   ${cmd_op}=  Run And Return Rc And Output
@@ -355,15 +362,15 @@
     [Documentation]    Verifies that the inference result of a model is equal to an expected output
     [Arguments]    ${model_name}    ${inference_input}    ${expected_inference_output}    ${token_auth}=${FALSE}
     ...    ${project_title}=${NONE}    ${deployment_mode}=UI    ${kserve_mode}=Serverless
-    ...    ${service_port}=${NONE}   ${end_point}=${NONE}    ${token}=${NONE}
+    ...    ${service_port}=${NONE}   ${end_point}=${NONE}    ${token}=${NONE}      ${set_json_content_type}=${FALSE}
     IF   $deployment_mode == 'UI'
          Open Model Serving Home Page
          Switch Model Serving Project    ${project_title}
     END
     ${inference_output}=    Get Model Inference    model_name=${model_name}   inference_input=${inference_input}
     ...    token_auth=${token_auth}    kserve_mode=${kserve_mode}    project_title=${project_title}
     ...    deployment_mode=${deployment_mode}    service_port=${service_port}    end_point=${end_point}
-    ...    token=${token}      # robocop: disable
+    ...    token=${token}      set_json_content_type=${set_json_content_type}           # robocop: disable
     Log    ${inference_output}
     ${result}    ${list}=    Inference Comparison    ${expected_inference_output}    ${inference_output}
     Log    ${result}
@@ -385,9 +392,10 @@
     ...                This is a temporary mitigation meanwhile we find a better way to check the model
     [Arguments]    ${model_name}    ${inference_input}    ${expected_inference_output}    ${token_auth}=${FALSE}
     ...            ${project_title}=${NONE}    ${retries}=${5}     ${deployment_mode}=UI    ${kserve_mode}=Serverless
-    ...            ${service_port}=${NONE}   ${end_point}=${NONE}
+    ...            ${service_port}=${NONE}   ${end_point}=${NONE}   ${set_json_content_type}=${FALSE}
     ${status}=    Run Keyword And Return Status    Verify Model Inference    ${model_name}    ${inference_input}
     ...    ${expected_inference_output}    ${token_auth}    ${project_title}    ${deployment_mode}
+    ...    set_json_content_type=${set_json_content_type}
     IF    not ${status}
         ${retry}=    Set Variable    ${0}
         WHILE    ${retry} < ${retries}
@@ -398,7 +406,7 @@
             ${status}=    Run Keyword And Return Status    Verify Model Inference
             ...    ${model_name}    ${inference_input}    ${expected_inference_output}    ${token_auth}
             ...    project_title=${project_title}     deployment_mode=${deployment_mode}  kserve_mode=${kserve_mode}
-            ...    service_port=${service_port}   end_point=${end_point}
+            ...    service_port=${service_port}   end_point=${end_point}    set_json_content_type=${set_json_content_type}
             IF    ${status}
                 BREAK
             END

diff --git a/...l_serving/1009__model_serving_triton_on_kserve/1009__model_serving_triton_on_kserve.robot b/...l_serving/1009__model_serving_triton_on_kserve/1009__model_serving_triton_on_kserve.robot
@@ -36,9 +36,14 @@
 ${EXPECTED_INFERENCE_REST_OUTPUT_FILE}=      tests/Resources/Files/triton/kserve-triton-onnx-rest-output.json
 ${INFERENCE_REST_INPUT_PYTORCH}=    @tests/Resources/Files/triton/kserve-triton-resnet-rest-input.json
 ${PYTORCH_MODEL_NAME}=    resnet50
+${TENSORFLOW_MODEL_NAME}=   inception_graphdef
+${TENSORFLOW_MODEL_LABEL}=    inceptiongraphdef
 ${PYTORCH_RUNTIME_NAME}=    triton-kserve-rest
 ${PYTORCH_RUNTIME_FILEPATH}=    ${RESOURCES_DIRPATH}/triton_onnx_rest_servingruntime.yaml
 ${EXPECTED_INFERENCE_REST_OUTPUT_FILE_PYTORCH}=       tests/Resources/Files/triton/kserve-triton-resnet-rest-output.json
+${INFERENCE_REST_INPUT_TENSORFLOW}=    @tests/Resources/Files/triton/kserve-triton-tensorflow-rest-input.json
+${TENSORFLOW_RUNTIME_FILEPATH}=    ${RESOURCES_DIRPATH}/triton_tensorflow_rest_servingruntime.yaml
+${EXPECTED_INFERENCE_REST_OUTPUT_FILE_TENSORFLOW}=       tests/Resources/Files/triton/kserve-triton-tensorflow-rest-output.json
 ${INFERENCE_GRPC_INPUT_TENSORFLOW}=    tests/Resources/Files/triton/kserve-triton-inception_graphdef-gRPC-input.json
 ${TENSORFLOW_MODEL_NAME}=    inception_graphdef
 ${TENSORFLOW_MODEL_LABEL}=     inceptiongraphdef
@@ -99,11 +104,10 @@
    ...  Clean All Models Of Current User
     ...  AND
     ...  Delete Serving Runtime Template From CLI    displayed_name=triton-kserve-rest
 
-
-Test PYTORCH Model Inference Via UI(Triton on Kserve)
+Test PYTORCH Model Rest Inference Via UI(Triton on Kserve)
     [Documentation]    Test the deployment of an pytorch model in Kserve using Triton
-    [Tags]    Sanity           RHOAIENG-11561
+    [Tags]    Tier2           RHOAIENG-11561
 
     Open Data Science Projects Home Page
     Create Data Science Project    title=${PRJ_TITLE}    description=${PRJ_DESCRIPTION}
@@ -135,7 +139,7 @@
 
 Test Onnx Model Grpc Inference Via UI (Triton on Kserve)    # robocop: off=too-long-test-case
     [Documentation]    Test the deployment of an onnx model in Kserve using Triton
-    [Tags]    Sanity    RHOAIENG-9053
+    [Tags]    Tier2    RHOAIENG-9053
     Open Data Science Projects Home Page
     Create Data Science Project    title=${PRJ_TITLE}    description=${PRJ_DESCRIPTION}
     ...    existing_project=${FALSE}
@@ -181,7 +185,7 @@
 
 Test Tensorflow Model Grpc Inference Via UI (Triton on Kserve)    # robocop: off=too-long-test-case
     [Documentation]    Test the deployment of an tensorflow model in Kserve using Triton
-    [Tags]    Sanity    RHOAIENG-9052
+    [Tags]    Tier2    RHOAIENG-9052
     Open Data Science Projects Home Page
     Create Data Science Project    title=${PRJ_TITLE}    description=${PRJ_DESCRIPTION}
     ...    existing_project=${FALSE}
@@ -213,8 +217,8 @@
    ...    insecure=${True}    protobuf_file=${PROTOBUFF_FILE}      json_header="Authorization: Bearer ${token}"
    Log    ${inference_output}
    ${inference_output}=    Evaluate    json.dumps(${inference_output})
    Log    ${inference_output}
    ${result}    ${list}=    Inference Comparison    ${EXPECTED_INFERENCE_GRPC_OUTPUT_TENSORFLOW}    ${inference_output}
    Log    ${result}
    Log    ${list}
    [Teardown]  Run Keywords    Get Kserve Events And Logs      model_name=${TENSORFLOW_MODEL_NAME}
@@ -223,10 +227,11 @@
    ...  Clean All Models Of Current User
     ...  AND
     ...  Delete Serving Runtime Template From CLI    displayed_name=triton-tensorflow-grpc
 
-Test KERAS Model Inference Via UI(Triton on Kserve)
+
+Test KERAS Model Rest Inference Via UI(Triton on Kserve)
     [Documentation]    Test the deployment of an keras model in Kserve using Triton
-    [Tags]    Sanity           RHOAIENG-10328
+    [Tags]    Tier2      RHOAIENG-10328
 
     Open Data Science Projects Home Page
     Create Data Science Project    title=${PRJ_TITLE}    description=${PRJ_DESCRIPTION}
@@ -248,14 +253,58 @@
     Log    ${EXPECTED_INFERENCE_REST_OUTPUT_KERAS}
     Run Keyword And Continue On Failure    Verify Model Inference With Retries
     ...    ${KERAS_MODEL_NAME}    ${INFERENCE_REST_INPUT_KERAS}    ${EXPECTED_INFERENCE_REST_OUTPUT_KERAS}
-    ...    token_auth=${FALSE}    project_title=${PRJ_TITLE}
+    ...    token_auth=${FALSE}    project_title=${PRJ_TITLE}      set_json_content_type=${TRUE}
     [Teardown]  Run Keywords    Get Kserve Events And Logs      model_name=${KERAS_MODEL_NAME}
     ...  project_title=${PRJ_TITLE}
     ...  AND
     ...  Clean All Models Of Current User
     ...  AND
     ...  Delete Serving Runtime Template From CLI    displayed_name=triton-keras-rest
 
+Test KERAS Model Grpc Inference Via UI (Triton on Kserve)    # robocop: off=too-long-test-case
+    [Documentation]    Test the deployment of an keras model in Kserve using Triton
+    [Tags]    Tier2    RHOAIENG-10327
+    Open Data Science Projects Home Page
+    Create Data Science Project    title=${PRJ_TITLE}    description=${PRJ_DESCRIPTION}
+    ...    existing_project=${FALSE}
+    Open Dashboard Settings    settings_page=Serving runtimes
+    Upload Serving Runtime Template    runtime_filepath=${KERAS_GRPC_RUNTIME_FILEPATH}
+    ...    serving_platform=single      runtime_protocol=gRPC
+    Serving Runtime Template Should Be Listed    displayed_name=${KERAS_RUNTIME_NAME_GRPC}
+    ...    serving_platform=single
+    Recreate S3 Data Connection    project_title=${PRJ_TITLE}    dc_name=model-serving-connection
+    ...            aws_access_key=${S3.AWS_ACCESS_KEY_ID}    aws_secret_access=${S3.AWS_SECRET_ACCESS_KEY}
+    ...            aws_bucket_name=ods-ci-s3
+    Deploy Kserve Model Via UI    model_name=${PYTORCH_MODEL_NAME}    serving_runtime=triton-keras-grpc
+    ...    data_connection=model-serving-connection    path=tritonkeras/model_repository/    model_framework=tensorflow - 2
+    ...    token=${TRUE}
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${PYTORCH_MODEL_NAME}
+    ...    namespace=${PRJ_TITLE}
+    ${EXPECTED_INFERENCE_GRPC_OUTPUT_KERAS}=     Load Json File     file_path=${EXPECTED_INFERENCE_GRPC_OUTPUT_FILE_KERAS}
+    ...     as_string=${TRUE}
+    ${EXPECTED_INFERENCE_GRPC_OUTPUT_KERAS}=     Load Json String    ${EXPECTED_INFERENCE_GRPC_OUTPUT_KERAS}
+    ${EXPECTED_INFERENCE_GRPC_OUTPUT_KERAS}=     Evaluate    json.dumps(${EXPECTED_INFERENCE_GRPC_OUTPUT_KERAS})
+    Log     ${EXPECTED_INFERENCE_GRPC_OUTPUT_KERAS}
+    Open Model Serving Home Page
+    ${host_url}=    Get Model Route Via UI       model_name=${PYTORCH_MODEL_NAME}
+    ${host}=    Evaluate    re.search(r"${PATTERN}", r"${host_url}").group(1)    re
+    Log    ${host}
+    ${token}=   Get Access Token Via UI    single_model=${TRUE}      model_name=resnet50   project_name=${PRJ_TITLE}
+    ${inference_output}=    Query Model With GRPCURL   host=${host}    port=443
+    ...    endpoint=inference.GRPCInferenceService/ModelInfer
+    ...    json_body=@      input_filepath=${INFERENCE_GRPC_INPUT_KERAS}
+    ...    insecure=${True}    protobuf_file=${PROTOBUFF_FILE}      json_header="Authorization: Bearer ${token}"
+    Log    ${inference_output}
+    ${inference_output}=    Evaluate    json.dumps(${inference_output})
+    Log    ${inference_output}
+    ${result}    ${list}=    Inference Comparison    ${EXPECTED_INFERENCE_GRPC_OUTPUT_KERAS}    ${inference_output}
+    [Teardown]  Run Keywords    Get Kserve Events And Logs      model_name=${PYTORCH_MODEL_NAME}
+    ...  project_title=${PRJ_TITLE}
+    ...  AND
+    ...  Clean All Models Of Current User
+    ...  AND
+    ...  Delete Serving Runtime Template From CLI    displayed_name=triton-keras-grpc
+
 Test Python Model Grpc Inference Via UI (Triton on Kserve)    # robocop: off=too-long-test-case
     [Documentation]    Test the deployment of an python model in Kserve using Triton
     [Tags]    Tier2    RHOAIENG-15374
@@ -334,7 +383,7 @@
 
 Test FIL Model Rest Inference Via UI (Triton on Kserve)    # robocop: off=too-long-test-case
     [Documentation]    Test the deployment of an fil model in Kserve using Triton
-    [Tags]    Sanity    RHOAIENG-15649
+    [Tags]    Tier2    RHOAIENG-15649
     Open Data Science Projects Home Page
     Create Data Science Project    title=${PRJ_TITLE}    description=${PRJ_DESCRIPTION}
     ...    existing_project=${FALSE}
@@ -361,8 +410,9 @@
     ...  Clean All Models Of Current User
     ...  AND
     ...  Delete Serving Runtime Template From CLI    displayed_name=triton-kserve-rest
-
+
+
 Test FIL Model Grpc Inference Via UI (Triton on Kserve)    # robocop: off=too-long-test-case
     [Documentation]    Test the deployment of an fil model in Kserve using Triton
     [Tags]    Sanity    RHOAIENG-15823
    Open Data Science Projects Home Page
@@ -406,6 +456,37 @@
     ...  AND
     ...  Delete Serving Runtime Template From CLI    displayed_name=triton-kserve-grpc
 
+Test Tensorflow Model Rest Inference Via UI (Triton on Kserve)    # robocop: off=too-long-test-case
+    [Documentation]    Test the deployment of an tensorflow model in Kserve using Triton
+    [Tags]    Tier2    RHOAIENG-11568
+    Open Data Science Projects Home Page
+    Create Data Science Project    title=${PRJ_TITLE}    description=${PRJ_DESCRIPTION}
+    ...    existing_project=${FALSE}
+    Open Dashboard Settings    settings_page=Serving runtimes
+    Upload Serving Runtime Template    runtime_filepath=${TENSORFLOW_RUNTIME_FILEPATH}
+    ...    serving_platform=single      runtime_protocol=REST
+    Serving Runtime Template Should Be Listed    displayed_name=${PYTORCH_RUNTIME_NAME}
+    ...    serving_platform=single
+    Recreate S3 Data Connection    project_title=${PRJ_TITLE}    dc_name=model-serving-connection
+    ...            aws_access_key=${S3.AWS_ACCESS_KEY_ID}    aws_secret_access=${S3.AWS_SECRET_ACCESS_KEY}
+    ...            aws_bucket_name=ods-ci-s3
+    Deploy Kserve Model Via UI    model_name=${TENSORFLOW_MODEL_NAME}    serving_runtime=triton-kserve-rest
+    ...    data_connection=model-serving-connection    path=triton/model_repository/
+    ...    model_framework=tensorflow - 2      token=${TRUE}
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=${TENSORFLOW_MODEL_LABEL}
+    ...    namespace=${PRJ_TITLE}
+    ${EXPECTED_INFERENCE_REST_OUTPUT_TENSORFLOW}=     Load Json File     file_path=${EXPECTED_INFERENCE_REST_OUTPUT_FILE_TENSORFLOW}
+    ...     as_string=${TRUE}
+    Run Keyword And Continue On Failure    Verify Model Inference With Retries
+    ...    ${TENSORFLOW_MODEL_NAME}    ${INFERENCE_REST_INPUT_TENSORFLOW}    ${EXPECTED_INFERENCE_REST_OUTPUT_TENSORFLOW}
+    ...    token_auth=${TRUE}    project_title=${PRJ_TITLE}     set_json_content_type=${TRUE}
+    [Teardown]  Run Keywords    Get Kserve Events And Logs      model_name=${TENSORFLOW_MODEL_NAME}
+    ...  project_title=${PRJ_TITLE}
+    ...  AND
+    ...  Clean All Models Of Current User
+    ...  AND
+    ...  Delete Serving Runtime Template From CLI    displayed_name=triton-kserve-rest
+
 
 *** Keywords ***
 Triton On Kserve Suite Setup

diff --git a/...ing/1010__model_serving_triton_on_modelmesh/1010__model_serving_triton_on_modelmesh.robot b/...ing/1010__model_serving_triton_on_modelmesh/1010__model_serving_triton_on_modelmesh.robot
@@ -81,7 +81,7 @@ Test Onnx Model Rest Inference Via UI (Triton on Modelmesh)
 
 Test Pytorch Model Rest Inference Via UI (Triton on Modelmesh)
     [Documentation]    Test the deployment of an onnx model in Kserve using Triton
-    [Tags]    Sanity    RHOAIENG-11561
+    [Tags]    Tier2    RHOAIENG-11561
 
     Open Data Science Projects Home Page
     Create Data Science Project    title=${PRJ_TITLE}    description=${PRJ_DESCRIPTION}
@@ -120,7 +120,7 @@ Test Pytorch Model Rest Inference Via UI (Triton on Modelmesh)
 
 Test Tensorflow Model Rest Inference Via UI (Triton on Modelmesh)
     [Documentation]    Test the deployment of an onnx model in Kserve using Triton
-    [Tags]    Sanity    RHOAIENG-9069
+    [Tags]    Tier2    RHOAIENG-9069
 
     Open Data Science Projects Home Page
     Create Data Science Project    title=${PRJ_TITLE}    description=${PRJ_DESCRIPTION}
@@ -156,7 +156,7 @@ Test Tensorflow Model Rest Inference Via UI (Triton on Modelmesh)
 
 Test Python Model Rest Inference Via UI (Triton on Modelmesh)
     [Documentation]    Test the deployment of an python model in Kserve using Triton
-    [Tags]    Sanity    RHOAIENG-11564
+    [Tags]    Tier2    RHOAIENG-11564
 
     Open Data Science Projects Home Page
     Create Data Science Project    title=${PRJ_TITLE}    description=${PRJ_DESCRIPTION}