Skip to content

Commit

Permalink
Update custom model and ray docs (#421)
Browse files Browse the repository at this point in the history
Signed-off-by: Sivanantham Chinnaiyan <[email protected]>
  • Loading branch information
sivanantha321 authored Nov 19, 2024
1 parent 928499f commit 5bead2f
Show file tree
Hide file tree
Showing 19 changed files with 540 additions and 228 deletions.
1 change: 0 additions & 1 deletion docs/modelserving/v1beta1/custom/custom_model/Procfile

This file was deleted.

451 changes: 282 additions & 169 deletions docs/modelserving/v1beta1/custom/custom_model/README.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.11
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
web: python -m model_grpc --model_name=custom-model
23 changes: 23 additions & 0 deletions docs/modelserving/v1beta1/custom/custom_model/grpc/grpc_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import asyncio
import json
import base64
import os

from kserve import InferRequest, InferInput
from kserve.inference_client import InferenceGRPCClient


async def main():
client = InferenceGRPCClient(
url=os.environ.get("INGRESS_HOST", "localhost") + ":" + os.environ.get("INGRESS_PORT", "8081"),
channel_args=[('grpc.ssl_target_name_override', os.environ.get("SERVICE_HOSTNAME", ""))]
)
with open("../input.json") as json_file:
data = json.load(json_file)
infer_input = InferInput(name="input-0", shape=[1], datatype="BYTES",
data=[base64.b64decode(data["instances"][0]["image"]["b64"])])
request = InferRequest(infer_inputs=[infer_input], model_name=os.environ.get("MODEL_NAME", "custom-model"))
res = await client.infer(infer_request=request)
print(res)

asyncio.run(main())
69 changes: 69 additions & 0 deletions docs/modelserving/v1beta1/custom/custom_model/grpc/model_grpc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import argparse
import io
from typing import Dict

import torch
from PIL import Image
from torchvision import models, transforms

from kserve import InferRequest, InferResponse, Model, ModelServer, logging, model_server
from kserve.utils.utils import get_predict_response

# This custom predictor example implements the custom model following KServe
# v2 inference gPPC protocol, the input can be raw image bytes or image tensor
# which is pre-processed by transformer and then passed to predictor, the
# output is the prediction response.
class AlexNetModel(Model):
def __init__(self, name: str):
super().__init__(name)
self.load()
self.ready = False

def load(self):
self.model = models.alexnet(pretrained=True)
self.model.eval()
# The ready flag is used by model ready endpoint for readiness probes,
# set to True when model is loaded successfully without exceptions.
self.ready = True

async def predict(
self, payload: InferRequest,
headers: Dict[str, str] = None,
response_headers: Dict[str, str] = None,
) -> InferResponse:
req = payload.inputs[0]
if req.datatype == "BYTES":
input_image = Image.open(io.BytesIO(req.data[0]))
preprocess = transforms.Compose(
[
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
),
]
)
input_tensor = preprocess(input_image)
input_tensor = input_tensor.unsqueeze(0)
elif req.datatype == "FP32":
np_array = payload.inputs[0].as_numpy()
input_tensor = torch.Tensor(np_array)

output = self.model(input_tensor)
torch.nn.functional.softmax(output, dim=1)
values, top_5 = torch.topk(output, 5)
result = values.detach().numpy()
return get_predict_response(payload, result, self.name)


parser = argparse.ArgumentParser(parents=[model_server.parser])
args, _ = parser.parse_known_args()

if __name__ == "__main__":
# Configure kserve and uvicorn logger
if args.configure_logging:
logging.configure_logging(args.log_config_file)
model = AlexNetModel(args.model_name)
model.load()
ModelServer().start([model])
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
kserve
torchvision==0.18.0
pillow >=10.3.0,<11.0.0
13 changes: 0 additions & 13 deletions docs/modelserving/v1beta1/custom/custom_model/grpc_test_client.py

This file was deleted.

42 changes: 0 additions & 42 deletions docs/modelserving/v1beta1/custom/custom_model/model.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.11
1 change: 1 addition & 0 deletions docs/modelserving/v1beta1/custom/custom_model/ray/Procfile
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
web: python -m model_remote --model_name=custom-model
67 changes: 67 additions & 0 deletions docs/modelserving/v1beta1/custom/custom_model/ray/model_remote.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import argparse
import base64
import io
from typing import Dict

from torchvision import models, transforms
import torch
from PIL import Image
from ray import serve
from kserve import Model, ModelServer, logging, model_server
from kserve.ray import RayModel


# the model handle name should match the model endpoint name
@serve.deployment(name="custom-model", num_replicas=1)
class AlexNetModel(Model):
def __init__(self, name):
super().__init__(name)
self.ready = False
self.load()

def load(self):
self.model = models.alexnet(pretrained=True, progress=False)
self.model.eval()
# The ready flag is used by model ready endpoint for readiness probes,
# set to True when model is loaded successfully without exceptions.
self.ready = True

async def predict(self, payload: Dict, headers: Dict[str, str] = None) -> Dict:
inputs = payload["instances"]

# Input follows the Tensorflow V1 HTTP API for binary values
# https://www.tensorflow.org/tfx/serving/api_rest#encoding_binary_values
data = inputs[0]["image"]["b64"]
raw_img_data = base64.b64decode(data)
input_image = Image.open(io.BytesIO(raw_img_data))
preprocess = transforms.Compose(
[
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
),
]
)

input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0)
output = self.model(input_batch)
torch.nn.functional.softmax(output, dim=1)
values, top_5 = torch.topk(output, 5)
return {"predictions": values.tolist()}


parser = argparse.ArgumentParser(parents=[model_server.parser])
args, _ = parser.parse_known_args()

if __name__ == "__main__":
# Configure kserve and uvicorn logger
if args.configure_logging:
logging.configure_logging(args.log_config_file)
app = AlexNetModel.bind(name=args.model_name)
handle = serve.run(app)
model = RayModel(name=args.model_name, handle=handle)
model.load()
ModelServer().start([model])
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
kserve[ray]
torchvision==0.18.0
pillow >=10.3.0,<11.0.0

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.11
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
web: python -m model --model_name=custom-model
84 changes: 84 additions & 0 deletions docs/modelserving/v1beta1/custom/custom_model/rest/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import argparse
import base64
import io
import time

from fastapi.middleware.cors import CORSMiddleware
from torchvision import models, transforms
from typing import Dict
import torch
from PIL import Image

import kserve
from kserve import Model, ModelServer, logging
from kserve.model_server import app
from kserve.utils.utils import generate_uuid


class AlexNetModel(Model):
def __init__(self, name: str):
super().__init__(name, return_response_headers=True)
self.load()
self.ready = False

def load(self):
self.model = models.alexnet(pretrained=True)
self.model.eval()
# The ready flag is used by model ready endpoint for readiness probes,
# set to True when model is loaded successfully without exceptions.
self.ready = True

async def predict(
self,
payload: Dict,
headers: Dict[str, str] = None,
response_headers: Dict[str, str] = None,
) -> Dict:
start = time.time()
# Input follows the Tensorflow V1 HTTP API for binary values
# https://www.tensorflow.org/tfx/serving/api_rest#encoding_binary_values
img_data = payload["instances"][0]["image"]["b64"]
raw_img_data = base64.b64decode(img_data)
input_image = Image.open(io.BytesIO(raw_img_data))
preprocess = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])
input_tensor = preprocess(input_image).unsqueeze(0)
output = self.model(input_tensor)
torch.nn.functional.softmax(output, dim=1)
values, top_5 = torch.topk(output, 5)
result = values.flatten().tolist()
end = time.time()
response_id = generate_uuid()

# Custom response headers can be added to the inference response
if response_headers is not None:
response_headers.update(
{"prediction-time-latency": f"{round((end - start) * 1000, 9)}"}
)

return {"predictions": result}


parser = argparse.ArgumentParser(parents=[kserve.model_server.parser])
args, _ = parser.parse_known_args()

if __name__ == "__main__":
# Configure kserve and uvicorn logger
if args.configure_logging:
logging.configure_logging(args.log_config_file)
model = AlexNetModel(args.model_name)
model.load()
# Custom middlewares can be added to the model
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
ModelServer().start([model])
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
kserve
torchvision==0.18.0
pillow >=10.3.0,<11.0.0
1 change: 0 additions & 1 deletion docs/modelserving/v1beta1/custom/custom_model/runtime.txt

This file was deleted.

0 comments on commit 5bead2f

Please sign in to comment.