156
-157
-158
+158
159
160
161
@@ -1705,7 +1708,8 @@
378
379
380
-381 | class ModelServer:
+381
+382
| class ModelServer:
def __init__(
self,
http_port: int = args.http_port,
@@ -1717,8 +1721,6 @@
enable_grpc: bool = args.enable_grpc,
enable_docs_url: bool = args.enable_docs_url,
enable_latency_logging: bool = args.enable_latency_logging,
- configure_logging: bool = args.configure_logging,
- log_config: Optional[Union[Dict, str]] = args.log_config_file,
access_log_format: str = args.access_log_format,
):
"""KServe ModelServer Constructor
@@ -1733,9 +1735,12 @@
enable_grpc: Whether to turn on grpc server. Default: ``True``
enable_docs_url: Whether to turn on ``/docs`` Swagger UI. Default: ``False``.
enable_latency_logging: Whether to log latency metric. Default: ``True``.
- configure_logging: Whether to configure KServe and Uvicorn logging. Default: ``True``.
- log_config: File path or dict containing log config. Default: ``None``.
- access_log_format: Format to set for the access log (provided by asgi-logger). Default: ``None``
+ access_log_format: Format to set for the access log (provided by asgi-logger). Default: ``None``.
+ it allows to override only the `uvicorn.access`'s format configuration with a richer
+ set of fields (output hardcoded to `stdout`). This limitation is currently due to the
+ ASGI specs that don't describe how access logging should be implemented in detail
+ (please refer to this Uvicorn
+ [github issue](https://github.com/encode/uvicorn/issues/527) for more info).
"""
self.registered_models = registered_models
self.http_port = http_port
@@ -1756,17 +1761,11 @@
self._grpc_server = GRPCServer(
grpc_port, self.dataplane, self.model_repository_extension
)
-
- # Logs can be passed as a path to a file or a dictConfig.
- # We rely on Uvicorn to configure the loggers for us.
- if configure_logging:
- self.log_config = (
- log_config if log_config is not None else KSERVE_LOG_CONFIG
- )
- else:
- # By setting log_config to None we tell Uvicorn not to configure logging
- self.log_config = None
-
+ if args.configure_logging:
+ # If the logger does not have any handlers, then the logger is not configured.
+ # For backward compatibility, we configure the logger here.
+ if len(logger.handlers) == 0:
+ logging.configure_logging(args.log_config_file)
self.access_log_format = access_log_format
self._custom_exception_handler = None
@@ -1832,7 +1831,9 @@
self.dataplane,
self.model_repository_extension,
self.enable_docs_url,
- log_config=self.log_config,
+ # By setting log_config to None we tell Uvicorn not to configure logging as it is already
+ # configured by kserve.
+ log_config=None,
access_log_format=self.access_log_format,
)
await self._rest_server.run()
@@ -1852,7 +1853,9 @@
self.dataplane,
self.model_repository_extension,
self.enable_docs_url,
- log_config=self.log_config,
+ # By setting log_config to None we tell Uvicorn not to configure logging as it is already
+ # configured by kserve.
+ log_config=None,
access_log_format=self.access_log_format,
)
for _ in range(self.workers):
@@ -1936,7 +1939,7 @@
-__init__(http_port=args.http_port, grpc_port=args.grpc_port, workers=args.workers, max_threads=args.max_threads, max_asyncio_workers=args.max_asyncio_workers, registered_models=ModelRepository(), enable_grpc=args.enable_grpc, enable_docs_url=args.enable_docs_url, enable_latency_logging=args.enable_latency_logging, configure_logging=args.configure_logging, log_config=args.log_config_file, access_log_format=args.access_log_format)
+__init__(http_port=args.http_port, grpc_port=args.grpc_port, workers=args.workers, max_threads=args.max_threads, max_asyncio_workers=args.max_asyncio_workers, registered_models=ModelRepository(), enable_grpc=args.enable_grpc, enable_docs_url=args.enable_docs_url, enable_latency_logging=args.enable_latency_logging, access_log_format=args.access_log_format)
KServe ModelServer Constructor
@@ -2078,41 +2081,18 @@
|
-configure_logging |
-
-bool
- |
-
-
- Whether to configure KServe and Uvicorn logging. Default: True .
-
- |
-
-configure_logging
- |
-
-
-log_config |
-
-Optional[Union[Dict, str]]
- |
-
-
- File path or dict containing log config. Default: None .
-
- |
-
-log_config_file
- |
-
-
access_log_format |
str
|
- Format to set for the access log (provided by asgi-logger). Default: None
+ Format to set for the access log (provided by asgi-logger). Default: None .
+ it allows to override only the uvicorn.access 's format configuration with a richer
+ set of fields (output hardcoded to stdout ). This limitation is currently due to the
+ ASGI specs that don't describe how access logging should be implemented in detail
+ (please refer to this Uvicorn
+ github issue for more info).
|
@@ -2123,9 +2103,7 @@
|
Source code in kserve/model_server.py
-157
-158
-159
+159
160
161
162
@@ -2182,10 +2160,7 @@
213
214
215
-216
-217
-218
-219 | | def __init__(
self,
http_port: int = args.http_port,
grpc_port: int = args.grpc_port,
@@ -2196,8 +2171,6 @@
enable_grpc: bool = args.enable_grpc,
enable_docs_url: bool = args.enable_docs_url,
enable_latency_logging: bool = args.enable_latency_logging,
- configure_logging: bool = args.configure_logging,
- log_config: Optional[Union[Dict, str]] = args.log_config_file,
access_log_format: str = args.access_log_format,
):
"""KServe ModelServer Constructor
@@ -2212,9 +2185,12 @@
enable_grpc: Whether to turn on grpc server. Default: ``True``
enable_docs_url: Whether to turn on ``/docs`` Swagger UI. Default: ``False``.
enable_latency_logging: Whether to log latency metric. Default: ``True``.
- configure_logging: Whether to configure KServe and Uvicorn logging. Default: ``True``.
- log_config: File path or dict containing log config. Default: ``None``.
- access_log_format: Format to set for the access log (provided by asgi-logger). Default: ``None``
+ access_log_format: Format to set for the access log (provided by asgi-logger). Default: ``None``.
+ it allows to override only the `uvicorn.access`'s format configuration with a richer
+ set of fields (output hardcoded to `stdout`). This limitation is currently due to the
+ ASGI specs that don't describe how access logging should be implemented in detail
+ (please refer to this Uvicorn
+ [github issue](https://github.com/encode/uvicorn/issues/527) for more info).
"""
self.registered_models = registered_models
self.http_port = http_port
@@ -2235,17 +2211,11 @@
self._grpc_server = GRPCServer(
grpc_port, self.dataplane, self.model_repository_extension
)
-
- # Logs can be passed as a path to a file or a dictConfig.
- # We rely on Uvicorn to configure the loggers for us.
- if configure_logging:
- self.log_config = (
- log_config if log_config is not None else KSERVE_LOG_CONFIG
- )
- else:
- # By setting log_config to None we tell Uvicorn not to configure logging
- self.log_config = None
-
+ if args.configure_logging:
+ # If the logger does not have any handlers, then the logger is not configured.
+ # For backward compatibility, we configure the logger here.
+ if len(logger.handlers) == 0:
+ logging.configure_logging(args.log_config_file)
self.access_log_format = access_log_format
self._custom_exception_handler = None
|
@@ -2263,8 +2233,7 @@
Source code in kserve/model_server.py
-348
-349
+349
350
351
352
@@ -2275,7 +2244,8 @@ 357
358
359
-360 | def default_exception_handler(
+360
+361
| def default_exception_handler(
self, loop: asyncio.events.AbstractEventLoop, context: Dict[str, Any]
):
"""Default exception handler for event loop.
@@ -2304,8 +2274,7 @@
Source code in kserve/model_server.py
-334
-335
+335
336
337
338
@@ -2316,7 +2285,8 @@ 343
344
345
-346 | def register_exception_handler(
+346
+347
| def register_exception_handler(
self,
handler: Callable[[asyncio.events.AbstractEventLoop, Dict[str, Any]], None],
):
@@ -2368,8 +2338,7 @@
|
Source code in kserve/model_server.py
-372
-373
+373
374
375
376
@@ -2377,7 +2346,8 @@
378
379
380
-381 | def register_model(self, model: BaseKServeModel):
+381
+382
| def register_model(self, model: BaseKServeModel):
"""Register a model to the model server.
Args:
@@ -2440,15 +2410,15 @@
Source code in kserve/model_server.py
-362
-363
+363
364
365
366
367
368
369
-370 | def register_model_handle(self, name: str, model_handle: DeploymentHandle):
+370
+371
| def register_model_handle(self, name: str, model_handle: DeploymentHandle):
"""Register a model handle to the model server.
Args:
@@ -2496,7 +2466,10 @@
|
Source code in kserve/model_server.py
-221
+218
+219
+220
+221
222
223
224
@@ -2591,7 +2564,8 @@
313
314
315
-316 | | def start(
self, models: Union[List[BaseKServeModel], Dict[str, Deployment]]
) -> None:
"""Start the model server with a set of registered models.
@@ -2653,7 +2627,9 @@
self.dataplane,
self.model_repository_extension,
self.enable_docs_url,
- log_config=self.log_config,
+ # By setting log_config to None we tell Uvicorn not to configure logging as it is already
+ # configured by kserve.
+ log_config=None,
access_log_format=self.access_log_format,
)
await self._rest_server.run()
@@ -2673,7 +2649,9 @@
self.dataplane,
self.model_repository_extension,
self.enable_docs_url,
- log_config=self.log_config,
+ # By setting log_config to None we tell Uvicorn not to configure logging as it is already
+ # configured by kserve.
+ log_config=None,
access_log_format=self.access_log_format,
)
for _ in range(self.workers):
@@ -2729,8 +2707,7 @@
|
Source code in kserve/model_server.py
-318
-319
+319
320
321
322
@@ -2743,7 +2720,8 @@
329
330
331
-332 | async def stop(self, sig: Optional[int] = None):
+332
+333
| async def stop(self, sig: Optional[int] = None):
"""Stop the instances of REST and gRPC model servers.
Args:
@@ -2802,7 +2780,16 @@
62
63
64
-65
| class BaseKServeModel(ABC):
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
| class BaseKServeModel(ABC):
"""
A base class to inherit all of the kserve models from.
@@ -2819,6 +2806,15 @@
self.name = name
self.ready = False
+ def healthy(self) -> bool:
+ """
+ Check the health of this model. By default returns `self.ready`.
+
+ Returns:
+ True if healthy, false otherwise
+ """
+ return self.ready
+
def stop(self):
"""Stop handler can be overridden to perform model teardown"""
pass
@@ -2882,6 +2878,54 @@
+
+healthy()
+
+
+ Check the health of this model. By default returns self.ready .
+ Returns:
+
+
+
+Type |
+Description |
+
+
+
+
+
+bool
+ |
+
+
+ True if healthy, false otherwise
+
+ |
+
+
+
+
+Source code in kserve/model.py
+63
+64
+65
+66
+67
+68
+69
+70 | def healthy(self) -> bool:
+ """
+ Check the health of this model. By default returns `self.ready`.
+
+ Returns:
+ True if healthy, false otherwise
+ """
+ return self.ready
+
|
+
+
+
+
stop()
@@ -2889,9 +2933,9 @@
Stop handler can be overridden to perform model teardown
Source code in kserve/model.py
- | def stop(self):
+ | def stop(self):
"""Stop handler can be overridden to perform model teardown"""
pass
|
@@ -2910,16 +2954,7 @@
Bases: BaseKServeModel
Source code in kserve/model.py
-110
-111
-112
-113
-114
-115
-116
-117
-118
-119
+119
120
121
122
@@ -3248,7 +3283,16 @@
445
446
447
-448 | class Model(BaseKServeModel):
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
| class Model(BaseKServeModel):
def __init__(self, name: str, predictor_config: Optional[PredictorConfig] = None):
"""KServe Model Public Interface
@@ -3677,16 +3721,7 @@
|
Source code in kserve/model.py
-145
-146
-147
-148
-149
-150
-151
-152
-153
-154
+154
155
156
157
@@ -3748,7 +3783,16 @@
213
214
215
-216 | async def __call__(
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
| async def __call__(
self,
body: Union[Dict, CloudEvent, InferRequest],
verb: InferenceVerb = InferenceVerb.PREDICT,
@@ -3874,16 +3918,7 @@
|
Source code in kserve/model.py
-111
-112
-113
-114
-115
-116
-117
-118
-119
-120
+120
121
122
123
@@ -3906,7 +3941,16 @@
140
141
142
-143 | def __init__(self, name: str, predictor_config: Optional[PredictorConfig] = None):
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
| def __init__(self, name: str, predictor_config: Optional[PredictorConfig] = None):
"""KServe Model Public Interface
Model is intended to be subclassed to implement the model handlers.
@@ -4017,16 +4061,7 @@
|
Source code in kserve/model.py
-421
-422
-423
-424
-425
-426
-427
-428
-429
-430
+430
431
432
433
@@ -4044,7 +4079,16 @@
445
446
447
-448 | async def explain(self, payload: Dict, headers: Dict[str, str] = None) -> Dict:
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
| async def explain(self, payload: Dict, headers: Dict[str, str] = None) -> Dict:
"""`explain` handler can be overridden to implement the model explanation.
The default implementation makes call to the explainer if ``explainer_host`` is specified.
@@ -4106,15 +4150,15 @@
|
Source code in kserve/model.py
-260
-261
-262
-263
-264
-265
-266
-267
-268 | def load(self) -> bool:
+269
+270
+271
+272
+273
+274
+275
+276
+277 | def load(self) -> bool:
"""Load handler can be overridden to load the model from storage.
The `self.ready` should be set to True after the model is loaded. The flag is used for model health check.
@@ -4201,20 +4245,20 @@
|
Source code in kserve/model.py
-303
-304
-305
-306
-307
-308
-309
-310
-311
-312
+ | async def postprocess(
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
| async def postprocess(
self, result: Union[Dict, InferResponse], headers: Dict[str, str] = None
) -> Union[Dict, InferResponse]:
"""The `postprocess` handler can be overridden for inference result or response transformation.
@@ -4306,16 +4350,7 @@
|
Source code in kserve/model.py
-389
-390
-391
-392
-393
-394
-395
-396
-397
-398
+398
399
400
401
@@ -4336,7 +4371,16 @@
416
417
418
-419 | async def predict(
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
| async def predict(
self,
payload: Union[Dict, InferRequest, ModelInferRequest],
headers: Dict[str, str] = None,
@@ -4455,22 +4499,22 @@
|
Source code in kserve/model.py
-286
-287
-288
-289
-290
-291
-292
-293
-294
-295
+295
296
297
298
299
300
-301 | async def preprocess(
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
| async def preprocess(
self, payload: Union[Dict, InferRequest], headers: Dict[str, str] = None
) -> Union[Dict, InferRequest]:
"""`preprocess` handler can be overridden for data or feature transformation.
@@ -4500,16 +4544,7 @@
Source code in kserve/model.py
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
+ 97
98
99
100
@@ -4519,7 +4554,16 @@
104
105
106
-107 | class PredictorConfig:
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
| class PredictorConfig:
def __init__(
self,
predictor_host: str,
@@ -4619,16 +4663,7 @@
|
Source code in kserve/model.py
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
+ 98
99
100
101
@@ -4637,7 +4672,16 @@
104
105
106
-107 | def __init__(
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
| def __init__(
self,
predictor_host: str,
predictor_protocol: str = PredictorProtocol.REST_V1.value,
diff --git a/master/python_runtime_api/docs/index.html b/master/python_runtime_api/docs/index.html
index e82ecf377..a5fac2d79 100644
--- a/master/python_runtime_api/docs/index.html
+++ b/master/python_runtime_api/docs/index.html
@@ -32,7 +32,7 @@
diff --git a/master/reference/api/index.html b/master/reference/api/index.html
index 99754d008..3e84e5644 100644
--- a/master/reference/api/index.html
+++ b/master/reference/api/index.html
@@ -29,7 +29,7 @@
diff --git a/master/reference/swagger-ui/index.html b/master/reference/swagger-ui/index.html
index 9603e72d2..bc7bd71af 100644
--- a/master/reference/swagger-ui/index.html
+++ b/master/reference/swagger-ui/index.html
@@ -32,7 +32,7 @@
diff --git a/master/reference/v2_inference/index.html b/master/reference/v2_inference/index.html
index 4d8adafd3..315af96cd 100644
--- a/master/reference/v2_inference/index.html
+++ b/master/reference/v2_inference/index.html
@@ -32,7 +32,7 @@
diff --git a/master/reference/v2_inference/template/index.html b/master/reference/v2_inference/template/index.html
index f89d7568c..2ddaa8b06 100644
--- a/master/reference/v2_inference/template/index.html
+++ b/master/reference/v2_inference/template/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/KServeClient/index.html b/master/sdk_docs/docs/KServeClient/index.html
index 908e5e766..6ff8a9d29 100644
--- a/master/sdk_docs/docs/KServeClient/index.html
+++ b/master/sdk_docs/docs/KServeClient/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/KnativeAddressable/index.html b/master/sdk_docs/docs/KnativeAddressable/index.html
index 62cf218b9..3f3d4606a 100644
--- a/master/sdk_docs/docs/KnativeAddressable/index.html
+++ b/master/sdk_docs/docs/KnativeAddressable/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/KnativeCondition/index.html b/master/sdk_docs/docs/KnativeCondition/index.html
index 8e353d8c9..df59a7e43 100644
--- a/master/sdk_docs/docs/KnativeCondition/index.html
+++ b/master/sdk_docs/docs/KnativeCondition/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/KnativeStatus/index.html b/master/sdk_docs/docs/KnativeStatus/index.html
index e4d6be7ba..e1edca607 100644
--- a/master/sdk_docs/docs/KnativeStatus/index.html
+++ b/master/sdk_docs/docs/KnativeStatus/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/KnativeURL/index.html b/master/sdk_docs/docs/KnativeURL/index.html
index 59d774d44..a8bc89fb7 100644
--- a/master/sdk_docs/docs/KnativeURL/index.html
+++ b/master/sdk_docs/docs/KnativeURL/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/KnativeVolatileTime/index.html b/master/sdk_docs/docs/KnativeVolatileTime/index.html
index 9b05c49b6..87183579a 100644
--- a/master/sdk_docs/docs/KnativeVolatileTime/index.html
+++ b/master/sdk_docs/docs/KnativeVolatileTime/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/NetUrlUserinfo/index.html b/master/sdk_docs/docs/NetUrlUserinfo/index.html
index fd9341dc1..0f376b24c 100644
--- a/master/sdk_docs/docs/NetUrlUserinfo/index.html
+++ b/master/sdk_docs/docs/NetUrlUserinfo/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1Time/index.html b/master/sdk_docs/docs/V1Time/index.html
index 66f17ee8f..e610f7bf7 100644
--- a/master/sdk_docs/docs/V1Time/index.html
+++ b/master/sdk_docs/docs/V1Time/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1alpha1BuiltInAdapter/index.html b/master/sdk_docs/docs/V1alpha1BuiltInAdapter/index.html
index a43c093ea..0a789447d 100644
--- a/master/sdk_docs/docs/V1alpha1BuiltInAdapter/index.html
+++ b/master/sdk_docs/docs/V1alpha1BuiltInAdapter/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1alpha1ClusterServingRuntime/index.html b/master/sdk_docs/docs/V1alpha1ClusterServingRuntime/index.html
index c2b87437e..b257354c7 100644
--- a/master/sdk_docs/docs/V1alpha1ClusterServingRuntime/index.html
+++ b/master/sdk_docs/docs/V1alpha1ClusterServingRuntime/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1alpha1ClusterServingRuntimeList/index.html b/master/sdk_docs/docs/V1alpha1ClusterServingRuntimeList/index.html
index f037ec0e9..62fa36a68 100644
--- a/master/sdk_docs/docs/V1alpha1ClusterServingRuntimeList/index.html
+++ b/master/sdk_docs/docs/V1alpha1ClusterServingRuntimeList/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1alpha1Container/index.html b/master/sdk_docs/docs/V1alpha1Container/index.html
index 9e161d526..a992809d6 100644
--- a/master/sdk_docs/docs/V1alpha1Container/index.html
+++ b/master/sdk_docs/docs/V1alpha1Container/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1alpha1InferenceGraph/index.html b/master/sdk_docs/docs/V1alpha1InferenceGraph/index.html
index debada9c1..2cde39aa0 100644
--- a/master/sdk_docs/docs/V1alpha1InferenceGraph/index.html
+++ b/master/sdk_docs/docs/V1alpha1InferenceGraph/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1alpha1InferenceGraphList/index.html b/master/sdk_docs/docs/V1alpha1InferenceGraphList/index.html
index 7d2e201a4..da3f9f238 100644
--- a/master/sdk_docs/docs/V1alpha1InferenceGraphList/index.html
+++ b/master/sdk_docs/docs/V1alpha1InferenceGraphList/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1alpha1InferenceGraphSpec/index.html b/master/sdk_docs/docs/V1alpha1InferenceGraphSpec/index.html
index c32be2385..bc5ae9c22 100644
--- a/master/sdk_docs/docs/V1alpha1InferenceGraphSpec/index.html
+++ b/master/sdk_docs/docs/V1alpha1InferenceGraphSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1alpha1InferenceGraphStatus/index.html b/master/sdk_docs/docs/V1alpha1InferenceGraphStatus/index.html
index ad96672af..7e80d2d51 100644
--- a/master/sdk_docs/docs/V1alpha1InferenceGraphStatus/index.html
+++ b/master/sdk_docs/docs/V1alpha1InferenceGraphStatus/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1alpha1InferenceRouter/index.html b/master/sdk_docs/docs/V1alpha1InferenceRouter/index.html
index 54c51ece9..9471202c4 100644
--- a/master/sdk_docs/docs/V1alpha1InferenceRouter/index.html
+++ b/master/sdk_docs/docs/V1alpha1InferenceRouter/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1alpha1InferenceStep/index.html b/master/sdk_docs/docs/V1alpha1InferenceStep/index.html
index fdaf7b617..2258dd2db 100644
--- a/master/sdk_docs/docs/V1alpha1InferenceStep/index.html
+++ b/master/sdk_docs/docs/V1alpha1InferenceStep/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1alpha1InferenceTarget/index.html b/master/sdk_docs/docs/V1alpha1InferenceTarget/index.html
index f864a4efa..6aee47bd3 100644
--- a/master/sdk_docs/docs/V1alpha1InferenceTarget/index.html
+++ b/master/sdk_docs/docs/V1alpha1InferenceTarget/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1alpha1ServingRuntime/index.html b/master/sdk_docs/docs/V1alpha1ServingRuntime/index.html
index 6296e55b4..0846798df 100644
--- a/master/sdk_docs/docs/V1alpha1ServingRuntime/index.html
+++ b/master/sdk_docs/docs/V1alpha1ServingRuntime/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1alpha1ServingRuntimeList/index.html b/master/sdk_docs/docs/V1alpha1ServingRuntimeList/index.html
index 29263b6f7..183f2f08f 100644
--- a/master/sdk_docs/docs/V1alpha1ServingRuntimeList/index.html
+++ b/master/sdk_docs/docs/V1alpha1ServingRuntimeList/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1alpha1ServingRuntimePodSpec/index.html b/master/sdk_docs/docs/V1alpha1ServingRuntimePodSpec/index.html
index bfb6c5ccb..cab565d30 100644
--- a/master/sdk_docs/docs/V1alpha1ServingRuntimePodSpec/index.html
+++ b/master/sdk_docs/docs/V1alpha1ServingRuntimePodSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1alpha1ServingRuntimeSpec/index.html b/master/sdk_docs/docs/V1alpha1ServingRuntimeSpec/index.html
index e34233493..bde683469 100644
--- a/master/sdk_docs/docs/V1alpha1ServingRuntimeSpec/index.html
+++ b/master/sdk_docs/docs/V1alpha1ServingRuntimeSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1alpha1StorageHelper/index.html b/master/sdk_docs/docs/V1alpha1StorageHelper/index.html
index 9fbb899b3..56e5c3ce8 100644
--- a/master/sdk_docs/docs/V1alpha1StorageHelper/index.html
+++ b/master/sdk_docs/docs/V1alpha1StorageHelper/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1alpha1SupportedModelFormat/index.html b/master/sdk_docs/docs/V1alpha1SupportedModelFormat/index.html
index a86b3ef67..a3fce179b 100644
--- a/master/sdk_docs/docs/V1alpha1SupportedModelFormat/index.html
+++ b/master/sdk_docs/docs/V1alpha1SupportedModelFormat/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1AIXExplainerSpec/index.html b/master/sdk_docs/docs/V1beta1AIXExplainerSpec/index.html
index 70a4b9fd8..b4d0b16bf 100644
--- a/master/sdk_docs/docs/V1beta1AIXExplainerSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1AIXExplainerSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1ARTExplainerSpec/index.html b/master/sdk_docs/docs/V1beta1ARTExplainerSpec/index.html
index e5330dedb..8c87ea15a 100644
--- a/master/sdk_docs/docs/V1beta1ARTExplainerSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1ARTExplainerSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1AlibiExplainerSpec/index.html b/master/sdk_docs/docs/V1beta1AlibiExplainerSpec/index.html
index 45d1de2e9..d2382dde1 100644
--- a/master/sdk_docs/docs/V1beta1AlibiExplainerSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1AlibiExplainerSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1Batcher/index.html b/master/sdk_docs/docs/V1beta1Batcher/index.html
index 737d02885..3944f810b 100644
--- a/master/sdk_docs/docs/V1beta1Batcher/index.html
+++ b/master/sdk_docs/docs/V1beta1Batcher/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1ComponentExtensionSpec/index.html b/master/sdk_docs/docs/V1beta1ComponentExtensionSpec/index.html
index 15fc37a9b..d1bbc4812 100644
--- a/master/sdk_docs/docs/V1beta1ComponentExtensionSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1ComponentExtensionSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1ComponentStatusSpec/index.html b/master/sdk_docs/docs/V1beta1ComponentStatusSpec/index.html
index c43041278..f264ee86c 100644
--- a/master/sdk_docs/docs/V1beta1ComponentStatusSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1ComponentStatusSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1CustomExplainer/index.html b/master/sdk_docs/docs/V1beta1CustomExplainer/index.html
index 87820e295..d3169065b 100644
--- a/master/sdk_docs/docs/V1beta1CustomExplainer/index.html
+++ b/master/sdk_docs/docs/V1beta1CustomExplainer/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1CustomPredictor/index.html b/master/sdk_docs/docs/V1beta1CustomPredictor/index.html
index 30ed32cbb..6a1ffdfd9 100644
--- a/master/sdk_docs/docs/V1beta1CustomPredictor/index.html
+++ b/master/sdk_docs/docs/V1beta1CustomPredictor/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1CustomTransformer/index.html b/master/sdk_docs/docs/V1beta1CustomTransformer/index.html
index a14a65b25..2608a6bd5 100644
--- a/master/sdk_docs/docs/V1beta1CustomTransformer/index.html
+++ b/master/sdk_docs/docs/V1beta1CustomTransformer/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1ExplainerConfig/index.html b/master/sdk_docs/docs/V1beta1ExplainerConfig/index.html
index 8d742acff..c4b4b1de8 100644
--- a/master/sdk_docs/docs/V1beta1ExplainerConfig/index.html
+++ b/master/sdk_docs/docs/V1beta1ExplainerConfig/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1ExplainerExtensionSpec/index.html b/master/sdk_docs/docs/V1beta1ExplainerExtensionSpec/index.html
index aa51e333b..bf268b18a 100644
--- a/master/sdk_docs/docs/V1beta1ExplainerExtensionSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1ExplainerExtensionSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1ExplainerSpec/index.html b/master/sdk_docs/docs/V1beta1ExplainerSpec/index.html
index e416f3f7a..f7f0a9e6c 100644
--- a/master/sdk_docs/docs/V1beta1ExplainerSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1ExplainerSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1ExplainersConfig/index.html b/master/sdk_docs/docs/V1beta1ExplainersConfig/index.html
index fd542c5e2..9eef5a960 100644
--- a/master/sdk_docs/docs/V1beta1ExplainersConfig/index.html
+++ b/master/sdk_docs/docs/V1beta1ExplainersConfig/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1FailureInfo/index.html b/master/sdk_docs/docs/V1beta1FailureInfo/index.html
index 799cab18a..abb364430 100644
--- a/master/sdk_docs/docs/V1beta1FailureInfo/index.html
+++ b/master/sdk_docs/docs/V1beta1FailureInfo/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1InferenceService/index.html b/master/sdk_docs/docs/V1beta1InferenceService/index.html
index da2fb3da0..1f1bd24c6 100644
--- a/master/sdk_docs/docs/V1beta1InferenceService/index.html
+++ b/master/sdk_docs/docs/V1beta1InferenceService/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1InferenceServiceList/index.html b/master/sdk_docs/docs/V1beta1InferenceServiceList/index.html
index 430c499b3..b68bf85b8 100644
--- a/master/sdk_docs/docs/V1beta1InferenceServiceList/index.html
+++ b/master/sdk_docs/docs/V1beta1InferenceServiceList/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1InferenceServiceSpec/index.html b/master/sdk_docs/docs/V1beta1InferenceServiceSpec/index.html
index e4ac2262e..29cfb7e06 100644
--- a/master/sdk_docs/docs/V1beta1InferenceServiceSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1InferenceServiceSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1InferenceServiceStatus/index.html b/master/sdk_docs/docs/V1beta1InferenceServiceStatus/index.html
index 1d4682818..7dfb1eea8 100644
--- a/master/sdk_docs/docs/V1beta1InferenceServiceStatus/index.html
+++ b/master/sdk_docs/docs/V1beta1InferenceServiceStatus/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1InferenceServicesConfig/index.html b/master/sdk_docs/docs/V1beta1InferenceServicesConfig/index.html
index 98a4c5e71..11dfe202a 100644
--- a/master/sdk_docs/docs/V1beta1InferenceServicesConfig/index.html
+++ b/master/sdk_docs/docs/V1beta1InferenceServicesConfig/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1IngressConfig/index.html b/master/sdk_docs/docs/V1beta1IngressConfig/index.html
index ccad24831..00773cd4d 100644
--- a/master/sdk_docs/docs/V1beta1IngressConfig/index.html
+++ b/master/sdk_docs/docs/V1beta1IngressConfig/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1LightGBMSpec/index.html b/master/sdk_docs/docs/V1beta1LightGBMSpec/index.html
index b31b28e2a..595255c31 100644
--- a/master/sdk_docs/docs/V1beta1LightGBMSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1LightGBMSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1LoggerSpec/index.html b/master/sdk_docs/docs/V1beta1LoggerSpec/index.html
index bff2ed547..e3ae19462 100644
--- a/master/sdk_docs/docs/V1beta1LoggerSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1LoggerSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1ModelCopies/index.html b/master/sdk_docs/docs/V1beta1ModelCopies/index.html
index 01f9a3d6c..2700a2716 100644
--- a/master/sdk_docs/docs/V1beta1ModelCopies/index.html
+++ b/master/sdk_docs/docs/V1beta1ModelCopies/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1ModelFormat/index.html b/master/sdk_docs/docs/V1beta1ModelFormat/index.html
index 9aef3bdef..632141ce6 100644
--- a/master/sdk_docs/docs/V1beta1ModelFormat/index.html
+++ b/master/sdk_docs/docs/V1beta1ModelFormat/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1ModelRevisionStates/index.html b/master/sdk_docs/docs/V1beta1ModelRevisionStates/index.html
index 23175d9e6..4fe92a501 100644
--- a/master/sdk_docs/docs/V1beta1ModelRevisionStates/index.html
+++ b/master/sdk_docs/docs/V1beta1ModelRevisionStates/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1ModelSpec/index.html b/master/sdk_docs/docs/V1beta1ModelSpec/index.html
index dcffda356..9c7bcc12f 100644
--- a/master/sdk_docs/docs/V1beta1ModelSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1ModelSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1ModelStatus/index.html b/master/sdk_docs/docs/V1beta1ModelStatus/index.html
index 6460f5f12..6d9392f0f 100644
--- a/master/sdk_docs/docs/V1beta1ModelStatus/index.html
+++ b/master/sdk_docs/docs/V1beta1ModelStatus/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1ONNXRuntimeSpec/index.html b/master/sdk_docs/docs/V1beta1ONNXRuntimeSpec/index.html
index de6de25c4..a9e9ee4d9 100644
--- a/master/sdk_docs/docs/V1beta1ONNXRuntimeSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1ONNXRuntimeSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1PMMLSpec/index.html b/master/sdk_docs/docs/V1beta1PMMLSpec/index.html
index 50835e0a7..db47310e5 100644
--- a/master/sdk_docs/docs/V1beta1PMMLSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1PMMLSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1PaddleServerSpec/index.html b/master/sdk_docs/docs/V1beta1PaddleServerSpec/index.html
index 6afa526f4..7bfd5a512 100644
--- a/master/sdk_docs/docs/V1beta1PaddleServerSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1PaddleServerSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1PodSpec/index.html b/master/sdk_docs/docs/V1beta1PodSpec/index.html
index 40773dbfd..ce1090b5b 100644
--- a/master/sdk_docs/docs/V1beta1PodSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1PodSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1PredictorConfig/index.html b/master/sdk_docs/docs/V1beta1PredictorConfig/index.html
index 263a16796..171ede295 100644
--- a/master/sdk_docs/docs/V1beta1PredictorConfig/index.html
+++ b/master/sdk_docs/docs/V1beta1PredictorConfig/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1PredictorExtensionSpec/index.html b/master/sdk_docs/docs/V1beta1PredictorExtensionSpec/index.html
index e57bec78e..a24c6dcba 100644
--- a/master/sdk_docs/docs/V1beta1PredictorExtensionSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1PredictorExtensionSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1PredictorProtocols/index.html b/master/sdk_docs/docs/V1beta1PredictorProtocols/index.html
index 80a20ed67..88e4e0861 100644
--- a/master/sdk_docs/docs/V1beta1PredictorProtocols/index.html
+++ b/master/sdk_docs/docs/V1beta1PredictorProtocols/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1PredictorSpec/index.html b/master/sdk_docs/docs/V1beta1PredictorSpec/index.html
index c3de1de40..83cf9a93a 100644
--- a/master/sdk_docs/docs/V1beta1PredictorSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1PredictorSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1PredictorsConfig/index.html b/master/sdk_docs/docs/V1beta1PredictorsConfig/index.html
index 43d1c9776..fec05ae1c 100644
--- a/master/sdk_docs/docs/V1beta1PredictorsConfig/index.html
+++ b/master/sdk_docs/docs/V1beta1PredictorsConfig/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1SKLearnSpec/index.html b/master/sdk_docs/docs/V1beta1SKLearnSpec/index.html
index 67db3b08d..cd17056d6 100644
--- a/master/sdk_docs/docs/V1beta1SKLearnSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1SKLearnSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1StorageSpec/index.html b/master/sdk_docs/docs/V1beta1StorageSpec/index.html
index 0e70ab6e6..93c11f155 100644
--- a/master/sdk_docs/docs/V1beta1StorageSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1StorageSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1TFServingSpec/index.html b/master/sdk_docs/docs/V1beta1TFServingSpec/index.html
index 0d6bde0f4..372fd7a6e 100644
--- a/master/sdk_docs/docs/V1beta1TFServingSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1TFServingSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1TorchServeSpec/index.html b/master/sdk_docs/docs/V1beta1TorchServeSpec/index.html
index e06337bed..04414b117 100644
--- a/master/sdk_docs/docs/V1beta1TorchServeSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1TorchServeSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1TransformerConfig/index.html b/master/sdk_docs/docs/V1beta1TransformerConfig/index.html
index 3a0d853fd..e26efcdbd 100644
--- a/master/sdk_docs/docs/V1beta1TransformerConfig/index.html
+++ b/master/sdk_docs/docs/V1beta1TransformerConfig/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1TransformerSpec/index.html b/master/sdk_docs/docs/V1beta1TransformerSpec/index.html
index 293f9ef0f..6edc69704 100644
--- a/master/sdk_docs/docs/V1beta1TransformerSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1TransformerSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1TransformersConfig/index.html b/master/sdk_docs/docs/V1beta1TransformersConfig/index.html
index 0d119353b..b58e52fd3 100644
--- a/master/sdk_docs/docs/V1beta1TransformersConfig/index.html
+++ b/master/sdk_docs/docs/V1beta1TransformersConfig/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1TritonSpec/index.html b/master/sdk_docs/docs/V1beta1TritonSpec/index.html
index d04251f1f..2a8d0136d 100644
--- a/master/sdk_docs/docs/V1beta1TritonSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1TritonSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/docs/V1beta1XGBoostSpec/index.html b/master/sdk_docs/docs/V1beta1XGBoostSpec/index.html
index eda761b4c..b1d44b1af 100644
--- a/master/sdk_docs/docs/V1beta1XGBoostSpec/index.html
+++ b/master/sdk_docs/docs/V1beta1XGBoostSpec/index.html
@@ -32,7 +32,7 @@
diff --git a/master/sdk_docs/sdk_doc/index.html b/master/sdk_docs/sdk_doc/index.html
index 12132be97..e963c7024 100644
--- a/master/sdk_docs/sdk_doc/index.html
+++ b/master/sdk_docs/sdk_doc/index.html
@@ -32,7 +32,7 @@
diff --git a/master/search/search_index.json b/master/search/search_index.json
index f466fd1e7..cde47c90f 100644
--- a/master/search/search_index.json
+++ b/master/search/search_index.json
@@ -1 +1 @@
-{"config":{"indexing":"full","lang":["en"],"min_search_length":3,"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"","title":"Home"},{"location":"admin/kubernetes_deployment/","text":"Kubernetes Deployment Installation Guide \u00b6 KServe supports RawDeployment mode to enable InferenceService deployment with Kubernetes resources Deployment , Service , Ingress and Horizontal Pod Autoscaler . Comparing to serverless deployment it unlocks Knative limitations such as mounting multiple volumes, on the other hand Scale down and from Zero is not supported in RawDeployment mode. Kubernetes 1.22 is the minimally required version and please check the following recommended Istio versions for the corresponding Kubernetes version. Recommended Version Matrix \u00b6 Kubernetes Version Recommended Istio Version 1.27 1.18, 1.19 1.28 1.19, 1.20 1.29 1.20, 1.21 1. Install Istio \u00b6 The minimally required Istio version is 1.13 and you can refer to the Istio install guide . Once Istio is installed, create IngressClass resource for istio. apiVersion : networking.k8s.io/v1 kind : IngressClass metadata : name : istio spec : controller : istio.io/ingress-controller Note Istio ingress is recommended, but you can choose to install with other Ingress controllers and create IngressClass resource for your Ingress option. 2. Install Cert Manager \u00b6 The minimally required Cert Manager version is 1.9.0 and you can refer to Cert Manager installation guide . Note Cert manager is required to provision webhook certs for production grade installation, alternatively you can run self signed certs generation script. 3. Install KServe \u00b6 Note The default KServe deployment mode is Serverless which depends on Knative. The following step changes the default deployment mode to RawDeployment before installing KServe. i. Install KServe kubectl kubectl apply -f https://github.com/kserve/kserve/releases/download/v0.12.0/kserve.yaml Install KServe default serving runtimes: kubectl kubectl apply -f https://github.com/kserve/kserve/releases/download/v0.12.0/kserve-cluster-resources.yaml ii. Change default deployment mode and ingress option First in ConfigMap inferenceservice-config modify the defaultDeploymentMode in the deploy section, kubectl kubectl patch configmap/inferenceservice-config -n kserve --type = strategic -p '{\"data\": {\"deploy\": \"{\\\"defaultDeploymentMode\\\": \\\"RawDeployment\\\"}\"}}' then modify the ingressClassName in ingress section to point to IngressClass name created in step 1 . ingress : |- { \"ingressClassName\" : \"your-ingress-class\" , }","title":"Kubernetes deployment installation"},{"location":"admin/kubernetes_deployment/#kubernetes-deployment-installation-guide","text":"KServe supports RawDeployment mode to enable InferenceService deployment with Kubernetes resources Deployment , Service , Ingress and Horizontal Pod Autoscaler . Comparing to serverless deployment it unlocks Knative limitations such as mounting multiple volumes, on the other hand Scale down and from Zero is not supported in RawDeployment mode. Kubernetes 1.22 is the minimally required version and please check the following recommended Istio versions for the corresponding Kubernetes version.","title":"Kubernetes Deployment Installation Guide"},{"location":"admin/kubernetes_deployment/#recommended-version-matrix","text":"Kubernetes Version Recommended Istio Version 1.27 1.18, 1.19 1.28 1.19, 1.20 1.29 1.20, 1.21","title":"Recommended Version Matrix"},{"location":"admin/kubernetes_deployment/#1-install-istio","text":"The minimally required Istio version is 1.13 and you can refer to the Istio install guide . Once Istio is installed, create IngressClass resource for istio. apiVersion : networking.k8s.io/v1 kind : IngressClass metadata : name : istio spec : controller : istio.io/ingress-controller Note Istio ingress is recommended, but you can choose to install with other Ingress controllers and create IngressClass resource for your Ingress option.","title":"1. Install Istio"},{"location":"admin/kubernetes_deployment/#2-install-cert-manager","text":"The minimally required Cert Manager version is 1.9.0 and you can refer to Cert Manager installation guide . Note Cert manager is required to provision webhook certs for production grade installation, alternatively you can run self signed certs generation script.","title":"2. Install Cert Manager"},{"location":"admin/kubernetes_deployment/#3-install-kserve","text":"Note The default KServe deployment mode is Serverless which depends on Knative. The following step changes the default deployment mode to RawDeployment before installing KServe. i. Install KServe kubectl kubectl apply -f https://github.com/kserve/kserve/releases/download/v0.12.0/kserve.yaml Install KServe default serving runtimes: kubectl kubectl apply -f https://github.com/kserve/kserve/releases/download/v0.12.0/kserve-cluster-resources.yaml ii. Change default deployment mode and ingress option First in ConfigMap inferenceservice-config modify the defaultDeploymentMode in the deploy section, kubectl kubectl patch configmap/inferenceservice-config -n kserve --type = strategic -p '{\"data\": {\"deploy\": \"{\\\"defaultDeploymentMode\\\": \\\"RawDeployment\\\"}\"}}' then modify the ingressClassName in ingress section to point to IngressClass name created in step 1 . ingress : |- { \"ingressClassName\" : \"your-ingress-class\" , }","title":"3. Install KServe"},{"location":"admin/migration/","text":"Migrating from KFServing \u00b6 This doc explains how to migrate existing inference services from KFServing to KServe without downtime. Note The migration job will by default delete the leftover KFServing installation after migrating the inference services from serving.kubeflow.org to serving.kserve.io . Migrating from standalone KFServing \u00b6 Install KServe v0.7 using the install YAML This will not affect existing services yet. kubectl apply -f https://raw.githubusercontent.com/kserve/kserve/master/install/v0.7.0/kserve.yaml Run the KServe Migration YAML This will begin the migration. Any errors here may affect your existing services. If you do not want to delete the KFServing resources after migrating, download and edit the env REMOVE_KFSERVING in the YAML before applying it If your KFServing is installed in a namespace other than kfserving-system , then download and set the env KFSERVING_NAMESPACE in the YAML before applying it kubectl apply -f https://raw.githubusercontent.com/kserve/kserve/master/hack/kserve_migration/kserve_migration_job.yaml Clean up the migration resources kubectl delete ClusterRoleBinding cluster-migration-rolebinding kubectl delete ClusterRole cluster-migration-role kubectl delete ServiceAccount cluster-migration-svcaccount -n kserve Migrating from Kubeflow-based KFServing \u00b6 Install Kubeflow-based KServe 0.7 using the install YAML This will not affect existing services yet. kubectl apply -f https://raw.githubusercontent.com/kserve/kserve/master/install/v0.7.0/kserve_kubeflow.yaml Run the KServe Migration YAML for Kubeflow-based installations This will begin the migration. Any errors here may affect your existing services. If you do not want to delete the KFServing resources after migrating, download and edit the env REMOVE_KFSERVING in the YAML before applying it kubectl apply -f https://raw.githubusercontent.com/kserve/kserve/master/hack/kserve_migration/kserve_migration_job_kubeflow.yaml Clean up the migration resources kubectl delete ClusterRoleBinding cluster-migration-rolebinding kubectl delete ClusterRole cluster-migration-role kubectl delete ServiceAccount cluster-migration-svcaccount -n kubeflow Update the models web app to use the new InferenceService API group serving.kserve.io Change the deployment image to kserve/models-web-app:v0.7.0-rc0 This is a temporary fix until the next Kubeflow release includes these changes kubectl edit deployment kfserving-models-web-app -n kubeflow Update the cluster role to be able to access the new InferenceService API group serving.kserve.io Edit the apiGroups from serving.kubeflow.org to serving.kserve.io This is a temporary fix until the next Kubeflow release includes these changes kubectl edit clusterrole kfserving-models-web-app-cluster-role","title":"Migrating from KFServing"},{"location":"admin/migration/#migrating-from-kfserving","text":"This doc explains how to migrate existing inference services from KFServing to KServe without downtime. Note The migration job will by default delete the leftover KFServing installation after migrating the inference services from serving.kubeflow.org to serving.kserve.io .","title":"Migrating from KFServing"},{"location":"admin/migration/#migrating-from-standalone-kfserving","text":"Install KServe v0.7 using the install YAML This will not affect existing services yet. kubectl apply -f https://raw.githubusercontent.com/kserve/kserve/master/install/v0.7.0/kserve.yaml Run the KServe Migration YAML This will begin the migration. Any errors here may affect your existing services. If you do not want to delete the KFServing resources after migrating, download and edit the env REMOVE_KFSERVING in the YAML before applying it If your KFServing is installed in a namespace other than kfserving-system , then download and set the env KFSERVING_NAMESPACE in the YAML before applying it kubectl apply -f https://raw.githubusercontent.com/kserve/kserve/master/hack/kserve_migration/kserve_migration_job.yaml Clean up the migration resources kubectl delete ClusterRoleBinding cluster-migration-rolebinding kubectl delete ClusterRole cluster-migration-role kubectl delete ServiceAccount cluster-migration-svcaccount -n kserve","title":"Migrating from standalone KFServing"},{"location":"admin/migration/#migrating-from-kubeflow-based-kfserving","text":"Install Kubeflow-based KServe 0.7 using the install YAML This will not affect existing services yet. kubectl apply -f https://raw.githubusercontent.com/kserve/kserve/master/install/v0.7.0/kserve_kubeflow.yaml Run the KServe Migration YAML for Kubeflow-based installations This will begin the migration. Any errors here may affect your existing services. If you do not want to delete the KFServing resources after migrating, download and edit the env REMOVE_KFSERVING in the YAML before applying it kubectl apply -f https://raw.githubusercontent.com/kserve/kserve/master/hack/kserve_migration/kserve_migration_job_kubeflow.yaml Clean up the migration resources kubectl delete ClusterRoleBinding cluster-migration-rolebinding kubectl delete ClusterRole cluster-migration-role kubectl delete ServiceAccount cluster-migration-svcaccount -n kubeflow Update the models web app to use the new InferenceService API group serving.kserve.io Change the deployment image to kserve/models-web-app:v0.7.0-rc0 This is a temporary fix until the next Kubeflow release includes these changes kubectl edit deployment kfserving-models-web-app -n kubeflow Update the cluster role to be able to access the new InferenceService API group serving.kserve.io Edit the apiGroups from serving.kubeflow.org to serving.kserve.io This is a temporary fix until the next Kubeflow release includes these changes kubectl edit clusterrole kfserving-models-web-app-cluster-role","title":"Migrating from Kubeflow-based KFServing"},{"location":"admin/modelmesh/","text":"ModelMesh Installation Guide \u00b6 KServe ModelMesh installation enables high-scale, high-density and frequently-changing model serving use cases. A Kubernetes cluster is required. You will need cluster-admin authority. Additionally, kustomize and an etcd server on the Kubernetes cluster are required. 1. Standard Installation \u00b6 You can find the standard installation instructions in the ModelMesh Serving installation guide . This approach assumes you have installed the prerequisites such as etcd and S3-compatible object storage. 2. Quick Installation \u00b6 A quick installation allows you to quickly get ModelMesh Serving up and running without having to manually install the prerequisites. The steps are described in the ModelMesh Serving quick start guide . Note ModelMesh Serving is namespace scoped, meaning all of its components must exist within a single namespace and only one instance of ModelMesh Serving can be installed per namespace. For more details, you can check out the ModelMesh Serving getting started guide .","title":"ModelMesh installation"},{"location":"admin/modelmesh/#modelmesh-installation-guide","text":"KServe ModelMesh installation enables high-scale, high-density and frequently-changing model serving use cases. A Kubernetes cluster is required. You will need cluster-admin authority. Additionally, kustomize and an etcd server on the Kubernetes cluster are required.","title":"ModelMesh Installation Guide"},{"location":"admin/modelmesh/#1-standard-installation","text":"You can find the standard installation instructions in the ModelMesh Serving installation guide . This approach assumes you have installed the prerequisites such as etcd and S3-compatible object storage.","title":"1. Standard Installation"},{"location":"admin/modelmesh/#2-quick-installation","text":"A quick installation allows you to quickly get ModelMesh Serving up and running without having to manually install the prerequisites. The steps are described in the ModelMesh Serving quick start guide . Note ModelMesh Serving is namespace scoped, meaning all of its components must exist within a single namespace and only one instance of ModelMesh Serving can be installed per namespace. For more details, you can check out the ModelMesh Serving getting started guide .","title":"2. Quick Installation"},{"location":"admin/serverless/serverless/","text":"Serverless Installation Guide \u00b6 KServe Serverless installation enables autoscaling based on request volume and supports scale down to and from zero. It also supports revision management and canary rollout based on revisions. Kubernetes 1.22 is the minimally required version and please check the following recommended Knative, Istio versions for the corresponding Kubernetes version. Recommended Version Matrix \u00b6 Kubernetes Version Recommended Istio Version Recommended Knative Version 1.27 1.18,1.19 1.10,1.11 1.28 1.19,1.20 1.11,1.12.4 1.29 1.20,1.21 1.12.4,1.13.1 1. Install Knative Serving \u00b6 Please refer to Knative Serving install guide . Note If you are looking to use PodSpec fields such as nodeSelector, affinity or tolerations which are now supported in the v1beta1 API spec, you need to turn on the corresponding feature flags in your Knative configuration. Warning Knative 1.13.1 requires Istio 1.20+, gRPC routing does not work with previous Istio releases, see release notes . 2. Install Networking Layer \u00b6 The recommended networking layer for KServe is Istio as currently it works best with KServe, please refer to the Istio install guide . Alternatively you can also choose other networking layers like Kourier or Contour , see how to install Kourier with KServe guide . 3. Install Cert Manager \u00b6 The minimally required Cert Manager version is 1.9.0 and you can refer to Cert Manager . Note Cert manager is required to provision webhook certs for production grade installation, alternatively you can run self signed certs generation script. 4. Install KServe \u00b6 kubectl kubectl apply -f https://github.com/kserve/kserve/releases/download/v0.12.0/kserve.yaml 5. Install KServe Built-in ClusterServingRuntimes \u00b6 kubectl kubectl apply -f https://github.com/kserve/kserve/releases/download/v0.12.0/kserve-cluster-resources.yaml Note ClusterServingRuntimes are required to create InferenceService for built-in model serving runtimes with KServe v0.8.0 or higher.","title":"Serverless installation"},{"location":"admin/serverless/serverless/#serverless-installation-guide","text":"KServe Serverless installation enables autoscaling based on request volume and supports scale down to and from zero. It also supports revision management and canary rollout based on revisions. Kubernetes 1.22 is the minimally required version and please check the following recommended Knative, Istio versions for the corresponding Kubernetes version.","title":"Serverless Installation Guide"},{"location":"admin/serverless/serverless/#recommended-version-matrix","text":"Kubernetes Version Recommended Istio Version Recommended Knative Version 1.27 1.18,1.19 1.10,1.11 1.28 1.19,1.20 1.11,1.12.4 1.29 1.20,1.21 1.12.4,1.13.1","title":"Recommended Version Matrix"},{"location":"admin/serverless/serverless/#1-install-knative-serving","text":"Please refer to Knative Serving install guide . Note If you are looking to use PodSpec fields such as nodeSelector, affinity or tolerations which are now supported in the v1beta1 API spec, you need to turn on the corresponding feature flags in your Knative configuration. Warning Knative 1.13.1 requires Istio 1.20+, gRPC routing does not work with previous Istio releases, see release notes .","title":"1. Install Knative Serving"},{"location":"admin/serverless/serverless/#2-install-networking-layer","text":"The recommended networking layer for KServe is Istio as currently it works best with KServe, please refer to the Istio install guide . Alternatively you can also choose other networking layers like Kourier or Contour , see how to install Kourier with KServe guide .","title":"2. Install Networking Layer"},{"location":"admin/serverless/serverless/#3-install-cert-manager","text":"The minimally required Cert Manager version is 1.9.0 and you can refer to Cert Manager . Note Cert manager is required to provision webhook certs for production grade installation, alternatively you can run self signed certs generation script.","title":"3. Install Cert Manager"},{"location":"admin/serverless/serverless/#4-install-kserve","text":"kubectl kubectl apply -f https://github.com/kserve/kserve/releases/download/v0.12.0/kserve.yaml","title":"4. Install KServe"},{"location":"admin/serverless/serverless/#5-install-kserve-built-in-clusterservingruntimes","text":"kubectl kubectl apply -f https://github.com/kserve/kserve/releases/download/v0.12.0/kserve-cluster-resources.yaml Note ClusterServingRuntimes are required to create InferenceService for built-in model serving runtimes with KServe v0.8.0 or higher.","title":"5. Install KServe Built-in ClusterServingRuntimes"},{"location":"admin/serverless/kourier_networking/","text":"Deploy InferenceService with Alternative Networking Layer \u00b6 KServe creates the top level Istio Virtual Service for routing to InferenceService components based on the virtual host or path based routing. Now KServe provides an option for disabling the top level virtual service to allow configuring other networking layers Knative supports. For example, Kourier is an alternative networking layer and the following steps show how you can deploy KServe with Kourier . Install Kourier Networking Layer \u00b6 Please refer to the Serverless Installation Guide and change the second step to install Kourier instead of Istio . Install the Kourier networking layer: kubectl apply -f https://github.com/knative/net-kourier/releases/download/ ${ KNATIVE_VERSION } /kourier.yaml Configure Knative Serving to use Kourier: kubectl patch configmap/config-network \\ --namespace knative-serving \\ --type merge \\ --patch '{\"data\":{\"ingress-class\":\"kourier.ingress.networking.knative.dev\"}}' Verify Kourier installation: kubectl get pods -n knative-serving && kubectl get pods -n kourier-system Expected Output NAME READY STATUS RESTARTS AGE activator-77db7d9dd7-kbrgr 1 /1 Running 0 10m autoscaler-67dbf79b95-htnp9 1 /1 Running 0 10m controller-684b6bc97f-ffm58 1 /1 Running 0 10m domain-mapping-6d99d99978-ktmrf 1 /1 Running 0 10m domainmapping-webhook-5f998498b6-sddnm 1 /1 Running 0 10m net-kourier-controller-68967d76dc-ncj2n 1 /1 Running 0 10m webhook-97bdc7b4d-nr7qf 1 /1 Running 0 10m NAME READY STATUS RESTARTS AGE 3scale-kourier-gateway-54c49c8ff5-x8tgn 1 /1 Running 0 10m Edit inferenceservice-config configmap to disable Istio top level virtual host: kubectl edit configmap/inferenceservice-config --namespace kserve # Add the flag `\"disableIstioVirtualHost\": true` under the ingress section ingress : | - { \"disableIstioVirtualHost\" : true } Restart the KServe Controller kubectl rollout restart deployment kserve-controller-manager -n kserve Deploy InferenceService for Testing Kourier Gateway \u00b6 Create the InferenceService \u00b6 New Schema Old Schema apiVersion : \"serving.kserve.io/v1beta1\" kind : \"InferenceService\" metadata : name : \"pmml-demo\" spec : predictor : model : modelFormat : name : pmml storageUri : \"gs://kfserving-examples/models/pmml\" apiVersion : \"serving.kserve.io/v1beta1\" kind : \"InferenceService\" metadata : name : \"pmml-demo\" spec : predictor : pmml : storageUri : gs://kfserving-examples/models/pmml kubectl apply -f pmml.yaml Expected Output $ inferenceservice.serving.kserve.io/pmml-demo created Run a Prediction \u00b6 Note that when setting INGRESS_HOST and INGRESS_PORT following the determining the ingress IP and ports guide you need to replace istio-ingressgateway with kourier-gateway . For example if you choose to do Port Forward for testing you need to select the kourier-gateway pod as following. kubectl port-forward --namespace kourier-system \\ $( kubectl get pod -n kourier-system -l \"app=3scale-kourier-gateway\" --output = jsonpath = \"{.items[0].metadata.name}\" ) 8080 :8080 export INGRESS_HOST = localhost export INGRESS_PORT = 8080 Make sure that you create a file named pmml-input.json with the following content, under your current terminal path. { \"instances\" : [ [ 5.1 , 3.5 , 1.4 , 0.2 ] ] } Send a prediction request to the InferenceService and check the output. MODEL_NAME = pmml-demo INPUT_PATH = @./pmml-input.json SERVICE_HOSTNAME = $( kubectl get inferenceservice pmml-demo -o jsonpath = '{.status.url}' | cut -d \"/\" -f 3 ) curl -v -H \"Host: ${ SERVICE_HOSTNAME } \" -H \"Content-Type: application/json\" http:// ${ INGRESS_HOST } : ${ INGRESS_PORT } /v1/models/ $MODEL_NAME :predict -d $INPUT_PATH Expected Output * Trying 127 .0.0.1... * TCP_NODELAY set * Connected to localhost ( 127 .0.0.1 ) port 8080 ( #0) > POST /v1/models/pmml-demo:predict HTTP/1.1 > Host: pmml-demo-predictor-default.default.example.com > User-Agent: curl/7.58.0 > Accept: */* > Content-Length: 45 > Content-Type: application/x-www-form-urlencoded > * upload completely sent off: 45 out of 45 bytes < HTTP/1.1 200 OK < content-length: 144 < content-type: application/json ; charset = UTF-8 < date: Wed, 14 Sep 2022 13 :30:09 GMT < server: envoy < x-envoy-upstream-service-time: 58 < * Connection #0 to host localhost left intact { \"predictions\" : [{ \"Species\" : \"setosa\" , \"Probability_setosa\" : 1 .0, \"Probability_versicolor\" : 0 .0, \"Probability_virginica\" : 0 .0, \"Node_Id\" : \"2\" }]}","title":"Kourier Networking Layer"},{"location":"admin/serverless/kourier_networking/#deploy-inferenceservice-with-alternative-networking-layer","text":"KServe creates the top level Istio Virtual Service for routing to InferenceService components based on the virtual host or path based routing. Now KServe provides an option for disabling the top level virtual service to allow configuring other networking layers Knative supports. For example, Kourier is an alternative networking layer and the following steps show how you can deploy KServe with Kourier .","title":"Deploy InferenceService with Alternative Networking Layer"},{"location":"admin/serverless/kourier_networking/#install-kourier-networking-layer","text":"Please refer to the Serverless Installation Guide and change the second step to install Kourier instead of Istio . Install the Kourier networking layer: kubectl apply -f https://github.com/knative/net-kourier/releases/download/ ${ KNATIVE_VERSION } /kourier.yaml Configure Knative Serving to use Kourier: kubectl patch configmap/config-network \\ --namespace knative-serving \\ --type merge \\ --patch '{\"data\":{\"ingress-class\":\"kourier.ingress.networking.knative.dev\"}}' Verify Kourier installation: kubectl get pods -n knative-serving && kubectl get pods -n kourier-system Expected Output NAME READY STATUS RESTARTS AGE activator-77db7d9dd7-kbrgr 1 /1 Running 0 10m autoscaler-67dbf79b95-htnp9 1 /1 Running 0 10m controller-684b6bc97f-ffm58 1 /1 Running 0 10m domain-mapping-6d99d99978-ktmrf 1 /1 Running 0 10m domainmapping-webhook-5f998498b6-sddnm 1 /1 Running 0 10m net-kourier-controller-68967d76dc-ncj2n 1 /1 Running 0 10m webhook-97bdc7b4d-nr7qf 1 /1 Running 0 10m NAME READY STATUS RESTARTS AGE 3scale-kourier-gateway-54c49c8ff5-x8tgn 1 /1 Running 0 10m Edit inferenceservice-config configmap to disable Istio top level virtual host: kubectl edit configmap/inferenceservice-config --namespace kserve # Add the flag `\"disableIstioVirtualHost\": true` under the ingress section ingress : | - { \"disableIstioVirtualHost\" : true } Restart the KServe Controller kubectl rollout restart deployment kserve-controller-manager -n kserve","title":"Install Kourier Networking Layer"},{"location":"admin/serverless/kourier_networking/#deploy-inferenceservice-for-testing-kourier-gateway","text":"","title":"Deploy InferenceService for Testing Kourier Gateway"},{"location":"admin/serverless/kourier_networking/#create-the-inferenceservice","text":"New Schema Old Schema apiVersion : \"serving.kserve.io/v1beta1\" kind : \"InferenceService\" metadata : name : \"pmml-demo\" spec : predictor : model : modelFormat : name : pmml storageUri : \"gs://kfserving-examples/models/pmml\" apiVersion : \"serving.kserve.io/v1beta1\" kind : \"InferenceService\" metadata : name : \"pmml-demo\" spec : predictor : pmml : storageUri : gs://kfserving-examples/models/pmml kubectl apply -f pmml.yaml Expected Output $ inferenceservice.serving.kserve.io/pmml-demo created","title":"Create the InferenceService"},{"location":"admin/serverless/kourier_networking/#run-a-prediction","text":"Note that when setting INGRESS_HOST and INGRESS_PORT following the determining the ingress IP and ports guide you need to replace istio-ingressgateway with kourier-gateway . For example if you choose to do Port Forward for testing you need to select the kourier-gateway pod as following. kubectl port-forward --namespace kourier-system \\ $( kubectl get pod -n kourier-system -l \"app=3scale-kourier-gateway\" --output = jsonpath = \"{.items[0].metadata.name}\" ) 8080 :8080 export INGRESS_HOST = localhost export INGRESS_PORT = 8080 Make sure that you create a file named pmml-input.json with the following content, under your current terminal path. { \"instances\" : [ [ 5.1 , 3.5 , 1.4 , 0.2 ] ] } Send a prediction request to the InferenceService and check the output. MODEL_NAME = pmml-demo INPUT_PATH = @./pmml-input.json SERVICE_HOSTNAME = $( kubectl get inferenceservice pmml-demo -o jsonpath = '{.status.url}' | cut -d \"/\" -f 3 ) curl -v -H \"Host: ${ SERVICE_HOSTNAME } \" -H \"Content-Type: application/json\" http:// ${ INGRESS_HOST } : ${ INGRESS_PORT } /v1/models/ $MODEL_NAME :predict -d $INPUT_PATH Expected Output * Trying 127 .0.0.1... * TCP_NODELAY set * Connected to localhost ( 127 .0.0.1 ) port 8080 ( #0) > POST /v1/models/pmml-demo:predict HTTP/1.1 > Host: pmml-demo-predictor-default.default.example.com > User-Agent: curl/7.58.0 > Accept: */* > Content-Length: 45 > Content-Type: application/x-www-form-urlencoded > * upload completely sent off: 45 out of 45 bytes < HTTP/1.1 200 OK < content-length: 144 < content-type: application/json ; charset = UTF-8 < date: Wed, 14 Sep 2022 13 :30:09 GMT < server: envoy < x-envoy-upstream-service-time: 58 < * Connection #0 to host localhost left intact { \"predictions\" : [{ \"Species\" : \"setosa\" , \"Probability_setosa\" : 1 .0, \"Probability_versicolor\" : 0 .0, \"Probability_virginica\" : 0 .0, \"Node_Id\" : \"2\" }]}","title":"Run a Prediction"},{"location":"admin/serverless/servicemesh/","text":"Secure InferenceService with ServiceMesh \u00b6 A service mesh is a dedicated infrastructure layer that you can add to your InferenceService to allow you to transparently add capabilities like observability, traffic management and security. In this example we show how you can turn on the Istio service mesh mode to provide a uniform and efficient way to secure service-to-service communication in a cluster with TLS encryption, strong identity-based authentication and authorization. Turn on strict mTLS and Authorization Policy \u00b6 For namespace traffic isolation, we lock down the in cluster traffic to only allow requests from the same namespace and enable mTLS for TLS encryption and strong identity-based authentication. Because Knative requests are frequently routed through activator, when turning on mTLS additional traffic rules are required and activator/autoscaler in knative-serving namespace must have sidecar injected as well. For more details please see mTLS in Knative , to understand when requests are forwarded through the activator, see target burst capacity docs. Create the namespace user1 which is used for this example. kubectl create namespace user1 When activator is not on the request path, the rule checks if the source namespace of the request is the same as the destination namespace of InferenceService . When activator is on the request path, the rule checks the source namespace knative-serving namespace as the request is proxied through activator. Warning Currently when activator is on the request path, it is not able to check the originated namespace or original identity due to the net-istio issue . apiVersion : security.istio.io/v1beta1 kind : PeerAuthentication metadata : name : default namespace : user1 spec : mtls : mode : STRICT --- apiVersion : security.istio.io/v1beta1 kind : AuthorizationPolicy metadata : name : allow-serving-tests namespace : user1 spec : action : ALLOW rules : # 1. mTLS for service from source \"user1\" namespace to destination service when TargetBurstCapacity=0 without local gateway and activator on the path # Source Service from \"user1\" namespace -> Destination Service in \"user1\" namespace - from : - source : namespaces : [ \"user1\" ] # 2. mTLS for service from source \"user1\" namespace to destination service with activator on the path # Source Service from \"user1\" namespace -> Activator(Knative Serving namespace) -> Destination service in \"user1\" namespace # unfortunately currently we could not lock down the source namespace as Activator does not capture the source namespace when proxying the request, see https://github.com/knative-sandbox/net-istio/issues/554. - from : - source : namespaces : [ \"knative-serving\" ] # 3. allow metrics and probes from knative serving namespaces - from : - source : namespaces : [ \"knative-serving\" ] to : - operation : paths : [ \"/metrics\" , \"/healthz\" , \"/ready\" , \"/wait-for-drain\" ] Apply the PeerAuthentication and AuthorizationPolicy rules with auth.yaml : kubectl apply -f auth.yaml Disable Top Level Virtual Service \u00b6 KServe currently creates an Istio top level virtual service to support routing between InferenceService components like predictor, transformer and explainer, as well as support path based routing as an alternative routing with service hosts. In serverless service mesh mode this creates a problem that in order to route through the underlying virtual service created by Knative Service, the top level virtual service is required to route to the Istio Gateway instead of to the InferenceService component on the service mesh directly. By disabling the top level virtual service, it eliminates the extra route to Istio local gateway and the authorization policy can check the source namespace when mTLS is established directly between service to service and activator is not on the request path. To disable the top level virtual service, add the flag \"disableIstioVirtualHost\": true under the ingress config in inferenceservice configmap. kubectl edit configmap/inferenceservice-config --namespace kserve ingress : | - { \"disableIstioVirtualHost\" : true } Deploy InferenceService with Istio sidecar injection \u00b6 First label the namespace with istio-injection=enabled to turn on the sidecar injection for the namespace. kubectl label namespace user1 istio-injection = enabled --overwrite Create the InferenceService with and without Knative activator on the path: When autoscaling.knative.dev/targetBurstCapacity is set to 0, Knative removes the activator from the request path so the test service can directly establish the mTLS connection to the InferenceService and the authorization policy can check the original namespace of the request to lock down the traffic for namespace isolation. InferenceService with activator on path InferenceService without activator on path apiVersion : \"serving.kserve.io/v1beta1\" kind : \"InferenceService\" metadata : name : \"sklearn-iris-burst\" namespace : user1 annotations : \"sidecar.istio.io/inject\" : \"true\" spec : predictor : model : modelFormat : name : sklearn storageUri : \"gs://kfserving-examples/models/sklearn/1.0/model\" apiVersion : \"serving.kserve.io/v1beta1\" kind : \"InferenceService\" metadata : name : \"sklearn-iris\" namespace : user1 annotations : \"autoscaling.knative.dev/targetBurstCapacity\" : \"0\" \"sidecar.istio.io/inject\" : \"true\" spec : predictor : model : modelFormat : name : sklearn storageUri : \"gs://kfserving-examples/models/sklearn/1.0/model\" kubectl apply -f sklearn_iris.yaml Expected Output $ inferenceservice.serving.kserve.io/sklearn-iris created $ inferenceservice.serving.kserve.io/sklearn-iris-burst created kubectl get pods -n user1 NAME READY STATUS RESTARTS AGE httpbin-6484879498-qxqj8 2 /2 Running 0 19h sklearn-iris-burst-predictor-default-00001-deployment-5685n46f6 3 /3 Running 0 12h sklearn-iris-predictor-default-00001-deployment-985d5cd46-zzw4x 3 /3 Running 0 12h Run a prediction from the same namespace \u00b6 Deploy a test service in user1 namespace with httpbin.yaml . kubectl apply -f httpbin.yaml Run a prediction request to the sklearn-iris InferenceService without activator on the path, you are expected to get HTTP 200 as the authorization rule allows traffic from the same namespace. kubectl exec -it httpbin-6484879498-qxqj8 -c istio-proxy -n user1 -- curl -v sklearn-iris-predictor-default.user1.svc.cluster.local/v1/models/sklearn-iris Expected Output * Connected to sklearn-iris-predictor-default.user1.svc.cluster.local ( 10 .96.137.152 ) port 80 ( #0) > GET /v1/models/sklearn-iris HTTP/1.1 > Host: sklearn-iris-predictor-default.user1.svc.cluster.local > User-Agent: curl/7.81.0 > Accept: */* > * Mark bundle as not supporting multiuse < HTTP/1.1 200 OK < content-length: 36 < content-type: application/json < date: Sat, 26 Nov 2022 01 :45:10 GMT < server: istio-envoy < x-envoy-upstream-service-time: 42 < * Connection #0 to host sklearn-iris-predictor-default.user1.svc.cluster.local left intact { \"name\" : \"sklearn-iris\" , \"ready\" :true } Run a prediction request to the sklearn-iris-burst InferenceService with activator on the path, you are expected to get HTTP 200 as the authorization rule allows traffic from knative-serving namespace. kubectl exec -it httpbin-6484879498-qxqj8 -c istio-proxy -n user1 -- curl -v sklearn-iris-burst-predictor-default.user1.svc.cluster.local/v1/models/sklearn-iris-burst Expected Output * Connected to sklearn-iris-burst-predictor-default.user1.svc.cluster.local ( 10 .96.137.152 ) port 80 ( #0) > GET /v1/models/sklearn-iris-burst HTTP/1.1 > Host: sklearn-iris-burst-predictor-default.user1.svc.cluster.local > User-Agent: curl/7.81.0 > Accept: */* > * Mark bundle as not supporting multiuse < HTTP/1.1 200 OK < content-length: 42 < content-type: application/json < date: Sat, 26 Nov 2022 13 :55:14 GMT < server: istio-envoy < x-envoy-upstream-service-time: 209 < * Connection #0 to host sklearn-iris-burst-predictor-default.user1.svc.cluster.local left intact { \"name\" : \"sklearn-iris-burst\" , \"ready\" :true } Run a prediction from a different namespace \u00b6 Deploy a test service in default namespace with sleep.yaml which is different from the namespace the InferenceService is deployed to. kubectl apply -f sleep.yaml When you send a prediction request to the sklearn-iris InferenceService without activator on the request path from a different namespace, you are expected to get HTTP 403 \"RBAC denied\" as the authorization rule only allows the traffic from the same namespace user1 where the InferenceService is deployed. kubectl exec -it sleep-6d6b49d8b8-6ths6 -- curl -v sklearn-iris-predictor-default.user1.svc.cluster.local/v1/models/sklearn-iris Expected Output * Connected to sklearn-iris-predictor-default.user1.svc.cluster.local ( 10 .96.137.152 ) port 80 ( #0) > GET /v1/models/sklearn-iris HTTP/1.1 > Host: sklearn-iris-predictor-default.user1.svc.cluster.local > User-Agent: curl/7.86.0-DEV > Accept: */* > * Mark bundle as not supporting multiuse < HTTP/1.1 403 Forbidden < content-length: 19 < content-type: text/plain < date: Sat, 26 Nov 2022 02 :45:46 GMT < server: envoy < x-envoy-upstream-service-time: 14 < * Connection #0 to host sklearn-iris-predictor-default.user1.svc.cluster.local left intact When you send a prediction request to the sklearn-iris-burst InferenceService with activator on the request path from a different namespace, you actually get HTTP 200 response due to the above limitation as the authorization policy is not able to lock down the traffic only from the same namespace as the request is proxied through activator in knative-serving namespace, we expect to get HTTP 403 once upstream Knative net-istio is fixed. kubectl exec -it sleep-6d6b49d8b8-6ths6 -- curl -v sklearn-iris-burst-predictor-default.user1.svc.cluster.local/v1/models/sklearn-iris-burst Expected Output * Connected to sklearn-iris-burst-predictor-default.user1.svc.cluster.local ( 10 .96.137.152 ) port 80 ( #0) > GET /v1/models/sklearn-iris-burst HTTP/1.1 > Host: sklearn-iris-burst-predictor-default.user1.svc.cluster.local > User-Agent: curl/7.86.0-DEV > Accept: */* > * Mark bundle as not supporting multiuse < HTTP/1.1 200 OK < content-length: 42 < content-type: application/json < date: Sat, 26 Nov 2022 13 :59:04 GMT < server: envoy < x-envoy-upstream-service-time: 6 < * Connection #0 to host sklearn-iris-burst-predictor-default.user1.svc.cluster.local left intact { \"name\" : \"sklearn-iris-burst\" , \"ready\" :true }","title":"Istio Service Mesh"},{"location":"admin/serverless/servicemesh/#secure-inferenceservice-with-servicemesh","text":"A service mesh is a dedicated infrastructure layer that you can add to your InferenceService to allow you to transparently add capabilities like observability, traffic management and security. In this example we show how you can turn on the Istio service mesh mode to provide a uniform and efficient way to secure service-to-service communication in a cluster with TLS encryption, strong identity-based authentication and authorization.","title":"Secure InferenceService with ServiceMesh"},{"location":"admin/serverless/servicemesh/#turn-on-strict-mtls-and-authorization-policy","text":"For namespace traffic isolation, we lock down the in cluster traffic to only allow requests from the same namespace and enable mTLS for TLS encryption and strong identity-based authentication. Because Knative requests are frequently routed through activator, when turning on mTLS additional traffic rules are required and activator/autoscaler in knative-serving namespace must have sidecar injected as well. For more details please see mTLS in Knative , to understand when requests are forwarded through the activator, see target burst capacity docs. Create the namespace user1 which is used for this example. kubectl create namespace user1 When activator is not on the request path, the rule checks if the source namespace of the request is the same as the destination namespace of InferenceService . When activator is on the request path, the rule checks the source namespace knative-serving namespace as the request is proxied through activator. Warning Currently when activator is on the request path, it is not able to check the originated namespace or original identity due to the net-istio issue . apiVersion : security.istio.io/v1beta1 kind : PeerAuthentication metadata : name : default namespace : user1 spec : mtls : mode : STRICT --- apiVersion : security.istio.io/v1beta1 kind : AuthorizationPolicy metadata : name : allow-serving-tests namespace : user1 spec : action : ALLOW rules : # 1. mTLS for service from source \"user1\" namespace to destination service when TargetBurstCapacity=0 without local gateway and activator on the path # Source Service from \"user1\" namespace -> Destination Service in \"user1\" namespace - from : - source : namespaces : [ \"user1\" ] # 2. mTLS for service from source \"user1\" namespace to destination service with activator on the path # Source Service from \"user1\" namespace -> Activator(Knative Serving namespace) -> Destination service in \"user1\" namespace # unfortunately currently we could not lock down the source namespace as Activator does not capture the source namespace when proxying the request, see https://github.com/knative-sandbox/net-istio/issues/554. - from : - source : namespaces : [ \"knative-serving\" ] # 3. allow metrics and probes from knative serving namespaces - from : - source : namespaces : [ \"knative-serving\" ] to : - operation : paths : [ \"/metrics\" , \"/healthz\" , \"/ready\" , \"/wait-for-drain\" ] Apply the PeerAuthentication and AuthorizationPolicy rules with auth.yaml : kubectl apply -f auth.yaml","title":"Turn on strict mTLS and Authorization Policy"},{"location":"admin/serverless/servicemesh/#disable-top-level-virtual-service","text":"KServe currently creates an Istio top level virtual service to support routing between InferenceService components like predictor, transformer and explainer, as well as support path based routing as an alternative routing with service hosts. In serverless service mesh mode this creates a problem that in order to route through the underlying virtual service created by Knative Service, the top level virtual service is required to route to the Istio Gateway instead of to the InferenceService component on the service mesh directly. By disabling the top level virtual service, it eliminates the extra route to Istio local gateway and the authorization policy can check the source namespace when mTLS is established directly between service to service and activator is not on the request path. To disable the top level virtual service, add the flag \"disableIstioVirtualHost\": true under the ingress config in inferenceservice configmap. kubectl edit configmap/inferenceservice-config --namespace kserve ingress : | - { \"disableIstioVirtualHost\" : true }","title":"Disable Top Level Virtual Service"},{"location":"admin/serverless/servicemesh/#deploy-inferenceservice-with-istio-sidecar-injection","text":"First label the namespace with istio-injection=enabled to turn on the sidecar injection for the namespace. kubectl label namespace user1 istio-injection = enabled --overwrite Create the InferenceService with and without Knative activator on the path: When autoscaling.knative.dev/targetBurstCapacity is set to 0, Knative removes the activator from the request path so the test service can directly establish the mTLS connection to the InferenceService and the authorization policy can check the original namespace of the request to lock down the traffic for namespace isolation. InferenceService with activator on path InferenceService without activator on path apiVersion : \"serving.kserve.io/v1beta1\" kind : \"InferenceService\" metadata : name : \"sklearn-iris-burst\" namespace : user1 annotations : \"sidecar.istio.io/inject\" : \"true\" spec : predictor : model : modelFormat : name : sklearn storageUri : \"gs://kfserving-examples/models/sklearn/1.0/model\" apiVersion : \"serving.kserve.io/v1beta1\" kind : \"InferenceService\" metadata : name : \"sklearn-iris\" namespace : user1 annotations : \"autoscaling.knative.dev/targetBurstCapacity\" : \"0\" \"sidecar.istio.io/inject\" : \"true\" spec : predictor : model : modelFormat : name : sklearn storageUri : \"gs://kfserving-examples/models/sklearn/1.0/model\" kubectl apply -f sklearn_iris.yaml Expected Output $ inferenceservice.serving.kserve.io/sklearn-iris created $ inferenceservice.serving.kserve.io/sklearn-iris-burst created kubectl get pods -n user1 NAME READY STATUS RESTARTS AGE httpbin-6484879498-qxqj8 2 /2 Running 0 19h sklearn-iris-burst-predictor-default-00001-deployment-5685n46f6 3 /3 Running 0 12h sklearn-iris-predictor-default-00001-deployment-985d5cd46-zzw4x 3 /3 Running 0 12h","title":"Deploy InferenceService with Istio sidecar injection"},{"location":"admin/serverless/servicemesh/#run-a-prediction-from-the-same-namespace","text":"Deploy a test service in user1 namespace with httpbin.yaml . kubectl apply -f httpbin.yaml Run a prediction request to the sklearn-iris InferenceService without activator on the path, you are expected to get HTTP 200 as the authorization rule allows traffic from the same namespace. kubectl exec -it httpbin-6484879498-qxqj8 -c istio-proxy -n user1 -- curl -v sklearn-iris-predictor-default.user1.svc.cluster.local/v1/models/sklearn-iris Expected Output * Connected to sklearn-iris-predictor-default.user1.svc.cluster.local ( 10 .96.137.152 ) port 80 ( #0) > GET /v1/models/sklearn-iris HTTP/1.1 > Host: sklearn-iris-predictor-default.user1.svc.cluster.local > User-Agent: curl/7.81.0 > Accept: */* > * Mark bundle as not supporting multiuse < HTTP/1.1 200 OK < content-length: 36 < content-type: application/json < date: Sat, 26 Nov 2022 01 :45:10 GMT < server: istio-envoy < x-envoy-upstream-service-time: 42 < * Connection #0 to host sklearn-iris-predictor-default.user1.svc.cluster.local left intact { \"name\" : \"sklearn-iris\" , \"ready\" :true } Run a prediction request to the sklearn-iris-burst InferenceService with activator on the path, you are expected to get HTTP 200 as the authorization rule allows traffic from knative-serving namespace. kubectl exec -it httpbin-6484879498-qxqj8 -c istio-proxy -n user1 -- curl -v sklearn-iris-burst-predictor-default.user1.svc.cluster.local/v1/models/sklearn-iris-burst Expected Output * Connected to sklearn-iris-burst-predictor-default.user1.svc.cluster.local ( 10 .96.137.152 ) port 80 ( #0) > GET /v1/models/sklearn-iris-burst HTTP/1.1 > Host: sklearn-iris-burst-predictor-default.user1.svc.cluster.local > User-Agent: curl/7.81.0 > Accept: */* > * Mark bundle as not supporting multiuse < HTTP/1.1 200 OK < content-length: 42 < content-type: application/json < date: Sat, 26 Nov 2022 13 :55:14 GMT < server: istio-envoy < x-envoy-upstream-service-time: 209 < * Connection #0 to host sklearn-iris-burst-predictor-default.user1.svc.cluster.local left intact { \"name\" : \"sklearn-iris-burst\" , \"ready\" :true }","title":"Run a prediction from the same namespace"},{"location":"admin/serverless/servicemesh/#run-a-prediction-from-a-different-namespace","text":"Deploy a test service in default namespace with sleep.yaml which is different from the namespace the InferenceService is deployed to. kubectl apply -f sleep.yaml When you send a prediction request to the sklearn-iris InferenceService without activator on the request path from a different namespace, you are expected to get HTTP 403 \"RBAC denied\" as the authorization rule only allows the traffic from the same namespace user1 where the InferenceService is deployed. kubectl exec -it sleep-6d6b49d8b8-6ths6 -- curl -v sklearn-iris-predictor-default.user1.svc.cluster.local/v1/models/sklearn-iris Expected Output * Connected to sklearn-iris-predictor-default.user1.svc.cluster.local ( 10 .96.137.152 ) port 80 ( #0) > GET /v1/models/sklearn-iris HTTP/1.1 > Host: sklearn-iris-predictor-default.user1.svc.cluster.local > User-Agent: curl/7.86.0-DEV > Accept: */* > * Mark bundle as not supporting multiuse < HTTP/1.1 403 Forbidden < content-length: 19 < content-type: text/plain < date: Sat, 26 Nov 2022 02 :45:46 GMT < server: envoy < x-envoy-upstream-service-time: 14 < * Connection #0 to host sklearn-iris-predictor-default.user1.svc.cluster.local left intact When you send a prediction request to the sklearn-iris-burst InferenceService with activator on the request path from a different namespace, you actually get HTTP 200 response due to the above limitation as the authorization policy is not able to lock down the traffic only from the same namespace as the request is proxied through activator in knative-serving namespace, we expect to get HTTP 403 once upstream Knative net-istio is fixed. kubectl exec -it sleep-6d6b49d8b8-6ths6 -- curl -v sklearn-iris-burst-predictor-default.user1.svc.cluster.local/v1/models/sklearn-iris-burst Expected Output * Connected to sklearn-iris-burst-predictor-default.user1.svc.cluster.local ( 10 .96.137.152 ) port 80 ( #0) > GET /v1/models/sklearn-iris-burst HTTP/1.1 > Host: sklearn-iris-burst-predictor-default.user1.svc.cluster.local > User-Agent: curl/7.86.0-DEV > Accept: */* > * Mark bundle as not supporting multiuse < HTTP/1.1 200 OK < content-length: 42 < content-type: application/json < date: Sat, 26 Nov 2022 13 :59:04 GMT < server: envoy < x-envoy-upstream-service-time: 6 < * Connection #0 to host sklearn-iris-burst-predictor-default.user1.svc.cluster.local left intact { \"name\" : \"sklearn-iris-burst\" , \"ready\" :true }","title":"Run a prediction from a different namespace"},{"location":"api/api/","text":"KServe API \u00b6","title":"KServe API"},{"location":"api/api/#kserve-api","text":"","title":"KServe API"},{"location":"blog/_index/","text":"","title":" index"},{"location":"blog/articles/2021-09-27-kfserving-transition/","text":"Authors \u00b6 Dan Sun and Animesh Singh on behalf of the Kubeflow Serving Working Group KFServing is now KServe \u00b6 We are excited to announce the next chapter for KFServing. In coordination with the Kubeflow Project Steering Group, the KFServing GitHub repository has now been transferred to an independent KServe GitHub organization under the stewardship of the Kubeflow Serving Working Group leads. The project has been rebranded from KFServing to KServe , and we are planning to graduate the project from Kubeflow Project later this year. Developed collaboratively by Google, IBM, Bloomberg, NVIDIA, and Seldon in 2019, KFServing was published as open source in early 2019. The project sets out to provide the following features: - A simple, yet powerful, Kubernetes Custom Resource for deploying machine learning (ML) models on production across ML frameworks. - Provide performant, standardized inference protocol. - Serverless inference according to live traffic patterns, supporting \u201cScale-to-zero\u201d on both CPUs and GPUs. - Complete story for production ML Model Serving including prediction, pre/post-processing, explainability, and monitoring. - Support for deploying thousands of models at scale and inference graph capability for multiple models. KFServing was created to address the challenges of deploying and monitoring machine learning models on production for organizations. After publishing the open source project, we\u2019ve seen an explosion in demand for the software, leading to strong adoption and community growth. The scope of the project has since increased, and we have developed multiple components along the way, including our own growing body of documentation that needs it's own website and independent GitHub organization. What's Next \u00b6 Over the coming weeks, we will be releasing KServe 0.7 outside of the Kubeflow Project and will provide more details on how to migrate from KFServing to KServe with minimal disruptions. KFServing 0.5.x/0.6.x releases are still supported in next six months after KServe 0.7 release. We are also working on integrating core Kubeflow APIs and standards for the conformance program . For contributors, please follow the KServe developer and doc contribution guide to make code or doc contributions. We are excited to work with you to make KServe better and promote its adoption by more and more users! KServe Key Links \u00b6 Website Github Slack(#kubeflow-kfserving) Contributor Acknowledgement \u00b6 We'd like to thank all the KServe contributors for this transition work! Andrews Arokiam Animesh Singh Chin Huang Dan Sun Jagadeesh Jinchi He Nick Hill Paul Van Eck Qianshan Chen Suresh Nakkiran Sukumar Gaonkar Theofilos Papapanagiotou Tommy Li Vedant Padwal Yao Xiao Yuzhui Liu","title":"KFserving Transition"},{"location":"blog/articles/2021-09-27-kfserving-transition/#authors","text":"Dan Sun and Animesh Singh on behalf of the Kubeflow Serving Working Group","title":"Authors"},{"location":"blog/articles/2021-09-27-kfserving-transition/#kfserving-is-now-kserve","text":"We are excited to announce the next chapter for KFServing. In coordination with the Kubeflow Project Steering Group, the KFServing GitHub repository has now been transferred to an independent KServe GitHub organization under the stewardship of the Kubeflow Serving Working Group leads. The project has been rebranded from KFServing to KServe , and we are planning to graduate the project from Kubeflow Project later this year. Developed collaboratively by Google, IBM, Bloomberg, NVIDIA, and Seldon in 2019, KFServing was published as open source in early 2019. The project sets out to provide the following features: - A simple, yet powerful, Kubernetes Custom Resource for deploying machine learning (ML) models on production across ML frameworks. - Provide performant, standardized inference protocol. - Serverless inference according to live traffic patterns, supporting \u201cScale-to-zero\u201d on both CPUs and GPUs. - Complete story for production ML Model Serving including prediction, pre/post-processing, explainability, and monitoring. - Support for deploying thousands of models at scale and inference graph capability for multiple models. KFServing was created to address the challenges of deploying and monitoring machine learning models on production for organizations. After publishing the open source project, we\u2019ve seen an explosion in demand for the software, leading to strong adoption and community growth. The scope of the project has since increased, and we have developed multiple components along the way, including our own growing body of documentation that needs it's own website and independent GitHub organization.","title":"KFServing is now KServe"},{"location":"blog/articles/2021-09-27-kfserving-transition/#whats-next","text":"Over the coming weeks, we will be releasing KServe 0.7 outside of the Kubeflow Project and will provide more details on how to migrate from KFServing to KServe with minimal disruptions. KFServing 0.5.x/0.6.x releases are still supported in next six months after KServe 0.7 release. We are also working on integrating core Kubeflow APIs and standards for the conformance program . For contributors, please follow the KServe developer and doc contribution guide to make code or doc contributions. We are excited to work with you to make KServe better and promote its adoption by more and more users!","title":"What's Next"},{"location":"blog/articles/2021-09-27-kfserving-transition/#kserve-key-links","text":"Website Github Slack(#kubeflow-kfserving)","title":"KServe Key Links"},{"location":"blog/articles/2021-09-27-kfserving-transition/#contributor-acknowledgement","text":"We'd like to thank all the KServe contributors for this transition work! Andrews Arokiam Animesh Singh Chin Huang Dan Sun Jagadeesh Jinchi He Nick Hill Paul Van Eck Qianshan Chen Suresh Nakkiran Sukumar Gaonkar Theofilos Papapanagiotou Tommy Li Vedant Padwal Yao Xiao Yuzhui Liu","title":"Contributor Acknowledgement"},{"location":"blog/articles/2021-10-11-KServe-0.7-release/","text":"Authors \u00b6 Dan Sun , Animesh Singh , Yuzhui Liu , Vedant Padwal on behalf of the KServe Working Group. KFServing is now KServe and KServe 0.7 release is available, the release also ensures a smooth user migration experience from KFServing to KServe. What's Changed? \u00b6 InferenceService API group is changed from serving.kubeflow.org to serving.kserve.io #1826 , the migration job is created for smooth transition. Python SDK name is changed from kfserving to kserve . KServe Installation manifests #1824 . Models-web-app is separated out of the kserve repository to models-web-app . Docs and examples are moved to separate repository website . KServe images are migrated to kserve docker hub account. v1alpha2 API group is deprecated #1850 . \ud83c\udf08 What's New? \u00b6 ModelMesh project is joining KServe under repository modelmesh-serving ! ModelMesh is designed for high-scale, high-density and frequently-changing model use cases. ModelMesh intelligently loads and unloads AI models to and from memory to strike an intelligent trade-off between responsiveness to users and computational footprint. To learn more about ModelMesh features and components, check out the ModelMesh announcement blog and Join talk at #KubeCon NA to get a deeper dive into ModelMesh and KServe . (Alpha feature) Raw Kubernetes deployment support, Istio/Knative dependency is now optional and please follow the guide to install and turn on RawDeployment mode. KServe now has its own documentation website temporarily hosted on website . Support v1 crd and webhook configuration for Kubernetes 1.22 #1837 . Triton model serving runtime now defaults to 21.09 version #1840 . \ud83d\udc1e What's Fixed? \u00b6 Bug fix for Azure blob storage #1845 . Tar/Zip support for all storage options #1836 . Fix AWS_REGION env variable and add AWS_CA_BUNDLE for S3 #1780 . Torchserve custom package install fix #1619 . Join the community \u00b6 Visit our Website or GitHub Join the Slack(#kubeflow-kfserving) Attend a Biweekly community meeting on Wednesday 9am PST Contribute at developer and doc contribution guide to make code or doc contributions. We are excited to work with you to make KServe better and promote its adoption by more and more users! Contributors \u00b6 We would like to thank everyone for their efforts on v0.7 Andrews Arokiam Animesh Singh Chin Huang Dan Sun Jagadeesh Jinchi He Nick Hill Paul Van Eck Qianshan Chen Suresh Nakkiran Sukumar Gaonkar Theofilos Papapanagiotou Tommy Li Vedant Padwal Yao Xiao Yuzhui Liu","title":"KServe 0.7 Release"},{"location":"blog/articles/2021-10-11-KServe-0.7-release/#authors","text":"Dan Sun , Animesh Singh , Yuzhui Liu , Vedant Padwal on behalf of the KServe Working Group. KFServing is now KServe and KServe 0.7 release is available, the release also ensures a smooth user migration experience from KFServing to KServe.","title":"Authors"},{"location":"blog/articles/2021-10-11-KServe-0.7-release/#whats-changed","text":"InferenceService API group is changed from serving.kubeflow.org to serving.kserve.io #1826 , the migration job is created for smooth transition. Python SDK name is changed from kfserving to kserve . KServe Installation manifests #1824 . Models-web-app is separated out of the kserve repository to models-web-app . Docs and examples are moved to separate repository website . KServe images are migrated to kserve docker hub account. v1alpha2 API group is deprecated #1850 .","title":"What's Changed?"},{"location":"blog/articles/2021-10-11-KServe-0.7-release/#whats-new","text":"ModelMesh project is joining KServe under repository modelmesh-serving ! ModelMesh is designed for high-scale, high-density and frequently-changing model use cases. ModelMesh intelligently loads and unloads AI models to and from memory to strike an intelligent trade-off between responsiveness to users and computational footprint. To learn more about ModelMesh features and components, check out the ModelMesh announcement blog and Join talk at #KubeCon NA to get a deeper dive into ModelMesh and KServe . (Alpha feature) Raw Kubernetes deployment support, Istio/Knative dependency is now optional and please follow the guide to install and turn on RawDeployment mode. KServe now has its own documentation website temporarily hosted on website . Support v1 crd and webhook configuration for Kubernetes 1.22 #1837 . Triton model serving runtime now defaults to 21.09 version #1840 .","title":"\ud83c\udf08 What's New?"},{"location":"blog/articles/2021-10-11-KServe-0.7-release/#whats-fixed","text":"Bug fix for Azure blob storage #1845 . Tar/Zip support for all storage options #1836 . Fix AWS_REGION env variable and add AWS_CA_BUNDLE for S3 #1780 . Torchserve custom package install fix #1619 .","title":"\ud83d\udc1e What's Fixed?"},{"location":"blog/articles/2021-10-11-KServe-0.7-release/#join-the-community","text":"Visit our Website or GitHub Join the Slack(#kubeflow-kfserving) Attend a Biweekly community meeting on Wednesday 9am PST Contribute at developer and doc contribution guide to make code or doc contributions. We are excited to work with you to make KServe better and promote its adoption by more and more users!","title":"Join the community"},{"location":"blog/articles/2021-10-11-KServe-0.7-release/#contributors","text":"We would like to thank everyone for their efforts on v0.7 Andrews Arokiam Animesh Singh Chin Huang Dan Sun Jagadeesh Jinchi He Nick Hill Paul Van Eck Qianshan Chen Suresh Nakkiran Sukumar Gaonkar Theofilos Papapanagiotou Tommy Li Vedant Padwal Yao Xiao Yuzhui Liu","title":"Contributors"},{"location":"blog/articles/2022-02-18-KServe-0.8-release/","text":"Authors \u00b6 Dan Sun , Paul Van Eck , Vedant Padwal , Andrews Arokiam on behalf of the KServe Working Group. Announcing: KServe v0.8 \u00b6 February 18, 2022 Today, we are pleased to announce the v0.8.0 release of KServe! While the last release was focused on the transition of KFServing to KServe, this release was focused on unifying the InferenceService API for deploying models on KServe and ModelMesh. Note : For current users of KFServing/KServe, please take a few minutes to answer this short survey and provide your feedback! Now, let's take a look at some of the changes and additions to KServe. What\u2019s changed? \u00b6 ONNX Runtime Server has been removed from the supported serving runtime list. KServe by default now uses the Triton Inference Server to serve ONNX models. KServe\u2019s PyTorchServer has been removed from the supported serving runtime list. KServe by default now uses TorchServe to serve PyTorch models. A few main KServe SDK class names have been changed: KFModel is renamed to Model KFServer is renamed to ModelServer KFModelRepository is renamed to ModelRepository What's new? \u00b6 Some notable updates are: ClusterServingRuntime and ServingRuntime CRDs are introduced. Learn more below . A new Model Spec was introduced to the InferenceService Predictor Spec as a new way to specify models. Learn more below . Knative 1.0 is now supported and certified for the KServe Serverless installation. gRPC is now supported for transformer to predictor network communication. TorchServe Serving runtime has been updated to 0.5.2 which now supports the KServe V2 REST protocol. ModelMesh now has multi-namespace support, and users can now deploy GCS or HTTP(S) hosted models. To see all release updates, check out the KServe release notes and ModelMesh Serving release notes ! ServingRuntimes and ClusterServingRuntimes \u00b6 This release introduces two new CRDs ServingRuntimes and ClusterServingRuntimes with the only difference between these two is that one is namespace-scoped and one is cluster-scoped. A ServingRuntime defines the templates for Pods that can serve one or more particular model formats. Each ServingRuntime defines key information such as the container image of the runtime and a list of the model formats that the runtime supports. In previous versions of KServe, supported predictor formats and container images were defined in a config map in the control plane namespace. The ServingRuntime CRD should allow for improved flexibility and extensibility for defining or customizing runtimes to how you see fit without having to modify any controller code or any resources in the controller namespace. Several out-of-the-box ClusterServingRuntimes are provided with KServe so that users can continue to use KServe how they did before without having to define the runtimes themselves. Example SKLearn ClusterServingRuntime: apiVersion : serving.kserve.io/v1alpha1 kind : ClusterServingRuntime metadata : name : kserve-sklearnserver spec : supportedModelFormats : - name : sklearn version : \"1\" autoSelect : true containers : - name : kserve-container image : kserve/sklearnserver:latest args : - --model_name={{.Name}} - --model_dir=/mnt/models - --http_port=8080 resources : requests : cpu : \"1\" memory : 2Gi limits : cpu : \"1\" memory : 2Gi Updated InferenceService Predictor Spec \u00b6 A new Model spec was also introduced as a part of the Predictor spec for InferenceServices. One of the problems KServe was having was that the InferenceService CRD was becoming unwieldy with each model serving runtime being an object in the Predictor spec. This generated a lot of field duplication in the schema, bloating the overall size of the CRD. If a user wanted to introduce a new model serving framework for KServe to support, the CRD would have to be modified, and subsequently the controller code. Now, with the Model spec, a user can specify a model format and optionally a corresponding version. The KServe control plane will automatically select and use the ClusterServingRuntime or ServingRuntime that supports the given format. Each ServingRuntime maintains a list of supported model formats and versions. If a format has autoselect as true , then that opens the ServingRuntime up for automatic model placement for that model format. New Schema Previous Schema apiVersion : serving.kserve.io/v1beta1 kind : InferenceService metadata : name : example-sklearn-isvc spec : predictor : model : modelFormat : name : sklearn storageUri : s3://bucket/sklearn/mnist.joblib apiVersion : serving.kserve.io/v1beta1 kind : InferenceService metadata : name : example-sklearn-isvc spec : predictor : sklearn : storageUri : s3://bucket/sklearn/mnist.joblib The previous way of defining predictors is still supported, however, the new approach will be the preferred one going forward. Eventually, the previous schema, with the framework names as keys in the predictor spec, will be removed. ModelMesh Updates \u00b6 ModelMesh has been in the process of integrating as KServe\u2019s multi-model serving backend. With the inclusion of the aforementioned ServingRuntime CRDs and the Predictor Model spec, the two projects are now much more aligned, with continual improvements underway. ModelMesh now supports multi-namespace reconciliation. Previously, the ModelMesh controller would only reconcile against resources deployed in the same namespace as the controller. Now, by default, ModelMesh will be able to handle InferenceService deployments in any \"modelmesh-enabled\" namespace. Learn more here . Also, while ModelMesh previously only supported S3-based storage, we are happy to share that ModelMesh now works with models hosted using GCS and HTTP(S). Join the community \u00b6 Visit our Website or GitHub Join the Slack ( #kubeflow-kfserving ) Attend a biweekly community meeting on Wednesday 9am PST View our developer and doc contribution guides to learn how to make contributions. We are excited to work with you to make KServe better and promote its adoption! Thank you for trying out KServe!","title":"KServe 0.8 Release"},{"location":"blog/articles/2022-02-18-KServe-0.8-release/#authors","text":"Dan Sun , Paul Van Eck , Vedant Padwal , Andrews Arokiam on behalf of the KServe Working Group.","title":"Authors"},{"location":"blog/articles/2022-02-18-KServe-0.8-release/#announcing-kserve-v08","text":"February 18, 2022 Today, we are pleased to announce the v0.8.0 release of KServe! While the last release was focused on the transition of KFServing to KServe, this release was focused on unifying the InferenceService API for deploying models on KServe and ModelMesh. Note : For current users of KFServing/KServe, please take a few minutes to answer this short survey and provide your feedback! Now, let's take a look at some of the changes and additions to KServe.","title":"Announcing: KServe v0.8"},{"location":"blog/articles/2022-02-18-KServe-0.8-release/#whats-changed","text":"ONNX Runtime Server has been removed from the supported serving runtime list. KServe by default now uses the Triton Inference Server to serve ONNX models. KServe\u2019s PyTorchServer has been removed from the supported serving runtime list. KServe by default now uses TorchServe to serve PyTorch models. A few main KServe SDK class names have been changed: KFModel is renamed to Model KFServer is renamed to ModelServer KFModelRepository is renamed to ModelRepository","title":"What\u2019s changed?"},{"location":"blog/articles/2022-02-18-KServe-0.8-release/#whats-new","text":"Some notable updates are: ClusterServingRuntime and ServingRuntime CRDs are introduced. Learn more below . A new Model Spec was introduced to the InferenceService Predictor Spec as a new way to specify models. Learn more below . Knative 1.0 is now supported and certified for the KServe Serverless installation. gRPC is now supported for transformer to predictor network communication. TorchServe Serving runtime has been updated to 0.5.2 which now supports the KServe V2 REST protocol. ModelMesh now has multi-namespace support, and users can now deploy GCS or HTTP(S) hosted models. To see all release updates, check out the KServe release notes and ModelMesh Serving release notes !","title":"What's new?"},{"location":"blog/articles/2022-02-18-KServe-0.8-release/#servingruntimes-and-clusterservingruntimes","text":"This release introduces two new CRDs ServingRuntimes and ClusterServingRuntimes with the only difference between these two is that one is namespace-scoped and one is cluster-scoped. A ServingRuntime defines the templates for Pods that can serve one or more particular model formats. Each ServingRuntime defines key information such as the container image of the runtime and a list of the model formats that the runtime supports. In previous versions of KServe, supported predictor formats and container images were defined in a config map in the control plane namespace. The ServingRuntime CRD should allow for improved flexibility and extensibility for defining or customizing runtimes to how you see fit without having to modify any controller code or any resources in the controller namespace. Several out-of-the-box ClusterServingRuntimes are provided with KServe so that users can continue to use KServe how they did before without having to define the runtimes themselves. Example SKLearn ClusterServingRuntime: apiVersion : serving.kserve.io/v1alpha1 kind : ClusterServingRuntime metadata : name : kserve-sklearnserver spec : supportedModelFormats : - name : sklearn version : \"1\" autoSelect : true containers : - name : kserve-container image : kserve/sklearnserver:latest args : - --model_name={{.Name}} - --model_dir=/mnt/models - --http_port=8080 resources : requests : cpu : \"1\" memory : 2Gi limits : cpu : \"1\" memory : 2Gi","title":"ServingRuntimes and ClusterServingRuntimes"},{"location":"blog/articles/2022-02-18-KServe-0.8-release/#updated-inferenceservice-predictor-spec","text":"A new Model spec was also introduced as a part of the Predictor spec for InferenceServices. One of the problems KServe was having was that the InferenceService CRD was becoming unwieldy with each model serving runtime being an object in the Predictor spec. This generated a lot of field duplication in the schema, bloating the overall size of the CRD. If a user wanted to introduce a new model serving framework for KServe to support, the CRD would have to be modified, and subsequently the controller code. Now, with the Model spec, a user can specify a model format and optionally a corresponding version. The KServe control plane will automatically select and use the ClusterServingRuntime or ServingRuntime that supports the given format. Each ServingRuntime maintains a list of supported model formats and versions. If a format has autoselect as true , then that opens the ServingRuntime up for automatic model placement for that model format. New Schema Previous Schema apiVersion : serving.kserve.io/v1beta1 kind : InferenceService metadata : name : example-sklearn-isvc spec : predictor : model : modelFormat : name : sklearn storageUri : s3://bucket/sklearn/mnist.joblib apiVersion : serving.kserve.io/v1beta1 kind : InferenceService metadata : name : example-sklearn-isvc spec : predictor : sklearn : storageUri : s3://bucket/sklearn/mnist.joblib The previous way of defining predictors is still supported, however, the new approach will be the preferred one going forward. Eventually, the previous schema, with the framework names as keys in the predictor spec, will be removed.","title":"Updated InferenceService Predictor Spec"},{"location":"blog/articles/2022-02-18-KServe-0.8-release/#modelmesh-updates","text":"ModelMesh has been in the process of integrating as KServe\u2019s multi-model serving backend. With the inclusion of the aforementioned ServingRuntime CRDs and the Predictor Model spec, the two projects are now much more aligned, with continual improvements underway. ModelMesh now supports multi-namespace reconciliation. Previously, the ModelMesh controller would only reconcile against resources deployed in the same namespace as the controller. Now, by default, ModelMesh will be able to handle InferenceService deployments in any \"modelmesh-enabled\" namespace. Learn more here . Also, while ModelMesh previously only supported S3-based storage, we are happy to share that ModelMesh now works with models hosted using GCS and HTTP(S).","title":"ModelMesh Updates"},{"location":"blog/articles/2022-02-18-KServe-0.8-release/#join-the-community","text":"Visit our Website or GitHub Join the Slack ( #kubeflow-kfserving ) Attend a biweekly community meeting on Wednesday 9am PST View our developer and doc contribution guides to learn how to make contributions. We are excited to work with you to make KServe better and promote its adoption! Thank you for trying out KServe!","title":"Join the community"},{"location":"blog/articles/2022-07-21-KServe-0.9-release/","text":"Announcing: KServe v0.9.0 \u00b6 Today, we are pleased to announce the v0.9.0 release of KServe! KServe has now fully onboarded to LF AI & Data Foundation as an Incubation Project ! In this release we are excited to introduce the new InferenceGraph feature which has long been asked from the community. Also continuing the effort from the last release for unifying the InferenceService API for deploying models on KServe and ModelMesh, ModelMesh is now fully compatible with KServe InferenceService API! Introduce InferenceGraph \u00b6 The ML Inference system is getting bigger and more complex. It often consists of many models to make a single prediction. The common use cases are image classification and natural language multi-stage processing pipelines. For example, an image classification pipeline needs to run top level classification first then downstream further classification based on previous prediction results. KServe has the unique strength to build the distributed inference graph with its native integration of InferenceServices, standard inference protocol for chaining models and serverless auto-scaling capabilities. KServe leverages these strengths to build the InferenceGraph and enable users to deploy complex ML Inference pipelines to production in a declarative and scalable way. InferenceGraph is made up of a list of routing nodes with each node consisting of a set of routing steps. Each step can either route to an InferenceService or another node defined on the graph which makes the InferenceGraph highly composable. The graph router is deployed behind an HTTP endpoint and can be scaled dynamically based on request volume. The InferenceGraph supports four different types of routing nodes: Sequence , Switch , Ensemble , Splitter . Sequence Node : It allows users to define multiple Steps with InferenceServices or Nodes as routing targets in a sequence. The Steps are executed in sequence and the request/response from the previous step and be passed to the next step as input based on configuration. Switch Node : It allows users to define routing conditions and select a Step to execute if it matches the condition. The response is returned as soon as it finds the first step that matches the condition. If no condition is matched, the graph returns the original request. Ensemble Node : A model ensemble requires scoring each model separately and then combines the results into a single prediction response. You can then use different combination methods to produce the final result. Multiple classification trees, for example, are commonly combined using a \"majority vote\" method. Multiple regression trees are often combined using various averaging techniques. Splitter Node : It allows users to split the traffic to multiple targets using a weighted distribution. apiVersion : \"serving.kserve.io/v1beta1\" kind : \"InferenceService\" metadata : name : \"cat-dog-classifier\" spec : predictor : pytorch : resources : requests : cpu : 100m storageUri : gs://kfserving-examples/models/torchserve/cat_dog_classification --- apiVersion : \"serving.kserve.io/v1beta1\" kind : \"InferenceService\" metadata : name : \"dog-breed-classifier\" spec : predictor : pytorch : resources : requests : cpu : 100m storageUri : gs://kfserving-examples/models/torchserve/dog_breed_classification --- apiVersion : \"serving.kserve.io/v1alpha1\" kind : \"InferenceGraph\" metadata : name : \"dog-breed-pipeline\" spec : nodes : root : routerType : Sequence steps : - serviceName : cat-dog-classifier name : cat_dog_classifier # step name - serviceName : dog-breed-classifier name : dog_breed_classifier data : $request condition : \"[@this].#(predictions.0==\\\"dog\\\")\" Currently InferenceGraph is supported with the Serverless deployment mode. You can try it out following the tutorial . InferenceService API for ModelMesh \u00b6 The InferenceService CRD is now the primary interface for interacting with ModelMesh. Some changes were made to the InferenceService spec to better facilitate ModelMesh\u2019s needs. Storage Spec \u00b6 To unify how model storage is defined for both single and multi-model serving, a new storage spec was added to the predictor model spec. With this storage spec, users can specify a key inside a common secret holding config/credentials for each of the storage backends from which models can be loaded. Example: storage : key : localMinIO # Credential key for the destination storage in the common secret path : sklearn # Model path inside the bucket # schemaPath: null # Optional schema files for payload schema parameters : # Parameters to override the default values inside the common secret. bucket : example-models Learn more here . Model Status \u00b6 For further alignment between ModelMesh and KServe, some additions to the InferenceService status were made. There is now a Model Status section which contains information about the model loaded in the predictor. New fields include: states - State information of the predictor's model. activeModelState - The state of the model currently being served by the predictor's endpoints. targetModelState - This will be set only when transitionStatus is not UpToDate , meaning that the target model differs from the currently-active model. transitionStatus - Indicates state of the predictor relative to its current spec. modelCopies - Model copy information of the predictor's model. lastFailureInfo - Details about the most recent error associated with this predictor. Not all of the contained fields will necessarily have a value. Deploying on ModelMesh \u00b6 For deploying InferenceServices on ModelMesh, the ModelMesh and KServe controllers will still require that the user specifies the serving.kserve.io/deploymentMode: ModelMesh annotation. A complete example on an InferenceService with the new storage spec is showing below: apiVersion : serving.kserve.io/v1beta1 kind : InferenceService metadata : name : example-tensorflow-mnist annotations : serving.kserve.io/deploymentMode : ModelMesh spec : predictor : model : modelFormat : name : tensorflow storage : key : localMinIO path : tensorflow/mnist.savedmodel Other New Features: \u00b6 Support serving MLFlow model format via MLServer serving runtime. Support unified autoscaling target and metric fields for InferenceService components with both Serverless and RawDeployment mode. Support InferenceService ingress class and url domain template configuration for RawDeployment mode. ModelMesh now has a default OpenVINO Model Server ServingRuntime. What\u2019s Changed? \u00b6 The KServe controller manager is changed from StatefulSet to Deployment to support HA mode. log4j security vulnerability fix Upgrade TorchServe serving runtime to 0.6.0 Update MLServer serving runtime to 1.0.0 Check out the full release notes for KServe and ModelMesh for more details. Join the community \u00b6 Visit our Website or GitHub Join the Slack ( #kserve ) Attend our community meeting by subscribing to the KServe calendar . View our community github repository to learn how to make contributions. We are excited to work with you to make KServe better and promote its adoption! Thank you for contributing or checking out KServe! \u2013 The KServe Working Group","title":"KServe 0.9 Release"},{"location":"blog/articles/2022-07-21-KServe-0.9-release/#announcing-kserve-v090","text":"Today, we are pleased to announce the v0.9.0 release of KServe! KServe has now fully onboarded to LF AI & Data Foundation as an Incubation Project ! In this release we are excited to introduce the new InferenceGraph feature which has long been asked from the community. Also continuing the effort from the last release for unifying the InferenceService API for deploying models on KServe and ModelMesh, ModelMesh is now fully compatible with KServe InferenceService API!","title":"Announcing: KServe v0.9.0"},{"location":"blog/articles/2022-07-21-KServe-0.9-release/#introduce-inferencegraph","text":"The ML Inference system is getting bigger and more complex. It often consists of many models to make a single prediction. The common use cases are image classification and natural language multi-stage processing pipelines. For example, an image classification pipeline needs to run top level classification first then downstream further classification based on previous prediction results. KServe has the unique strength to build the distributed inference graph with its native integration of InferenceServices, standard inference protocol for chaining models and serverless auto-scaling capabilities. KServe leverages these strengths to build the InferenceGraph and enable users to deploy complex ML Inference pipelines to production in a declarative and scalable way. InferenceGraph is made up of a list of routing nodes with each node consisting of a set of routing steps. Each step can either route to an InferenceService or another node defined on the graph which makes the InferenceGraph highly composable. The graph router is deployed behind an HTTP endpoint and can be scaled dynamically based on request volume. The InferenceGraph supports four different types of routing nodes: Sequence , Switch , Ensemble , Splitter . Sequence Node : It allows users to define multiple Steps with InferenceServices or Nodes as routing targets in a sequence. The Steps are executed in sequence and the request/response from the previous step and be passed to the next step as input based on configuration. Switch Node : It allows users to define routing conditions and select a Step to execute if it matches the condition. The response is returned as soon as it finds the first step that matches the condition. If no condition is matched, the graph returns the original request. Ensemble Node : A model ensemble requires scoring each model separately and then combines the results into a single prediction response. You can then use different combination methods to produce the final result. Multiple classification trees, for example, are commonly combined using a \"majority vote\" method. Multiple regression trees are often combined using various averaging techniques. Splitter Node : It allows users to split the traffic to multiple targets using a weighted distribution. apiVersion : \"serving.kserve.io/v1beta1\" kind : \"InferenceService\" metadata : name : \"cat-dog-classifier\" spec : predictor : pytorch : resources : requests : cpu : 100m storageUri : gs://kfserving-examples/models/torchserve/cat_dog_classification --- apiVersion : \"serving.kserve.io/v1beta1\" kind : \"InferenceService\" metadata : name : \"dog-breed-classifier\" spec : predictor : pytorch : resources : requests : cpu : 100m storageUri : gs://kfserving-examples/models/torchserve/dog_breed_classification --- apiVersion : \"serving.kserve.io/v1alpha1\" kind : \"InferenceGraph\" metadata : name : \"dog-breed-pipeline\" spec : nodes : root : routerType : Sequence steps : - serviceName : cat-dog-classifier name : cat_dog_classifier # step name - serviceName : dog-breed-classifier name : dog_breed_classifier data : $request condition : \"[@this].#(predictions.0==\\\"dog\\\")\" Currently InferenceGraph is supported with the Serverless deployment mode. You can try it out following the tutorial .","title":"Introduce InferenceGraph"},{"location":"blog/articles/2022-07-21-KServe-0.9-release/#inferenceservice-api-for-modelmesh","text":"The InferenceService CRD is now the primary interface for interacting with ModelMesh. Some changes were made to the InferenceService spec to better facilitate ModelMesh\u2019s needs.","title":"InferenceService API for ModelMesh"},{"location":"blog/articles/2022-07-21-KServe-0.9-release/#storage-spec","text":"To unify how model storage is defined for both single and multi-model serving, a new storage spec was added to the predictor model spec. With this storage spec, users can specify a key inside a common secret holding config/credentials for each of the storage backends from which models can be loaded. Example: storage : key : localMinIO # Credential key for the destination storage in the common secret path : sklearn # Model path inside the bucket # schemaPath: null # Optional schema files for payload schema parameters : # Parameters to override the default values inside the common secret. bucket : example-models Learn more here .","title":"Storage Spec"},{"location":"blog/articles/2022-07-21-KServe-0.9-release/#model-status","text":"For further alignment between ModelMesh and KServe, some additions to the InferenceService status were made. There is now a Model Status section which contains information about the model loaded in the predictor. New fields include: states - State information of the predictor's model. activeModelState - The state of the model currently being served by the predictor's endpoints. targetModelState - This will be set only when transitionStatus is not UpToDate , meaning that the target model differs from the currently-active model. transitionStatus - Indicates state of the predictor relative to its current spec. modelCopies - Model copy information of the predictor's model. lastFailureInfo - Details about the most recent error associated with this predictor. Not all of the contained fields will necessarily have a value.","title":"Model Status"},{"location":"blog/articles/2022-07-21-KServe-0.9-release/#deploying-on-modelmesh","text":"For deploying InferenceServices on ModelMesh, the ModelMesh and KServe controllers will still require that the user specifies the serving.kserve.io/deploymentMode: ModelMesh annotation. A complete example on an InferenceService with the new storage spec is showing below: apiVersion : serving.kserve.io/v1beta1 kind : InferenceService metadata : name : example-tensorflow-mnist annotations : serving.kserve.io/deploymentMode : ModelMesh spec : predictor : model : modelFormat : name : tensorflow storage : key : localMinIO path : tensorflow/mnist.savedmodel","title":"Deploying on ModelMesh"},{"location":"blog/articles/2022-07-21-KServe-0.9-release/#other-new-features","text":"Support serving MLFlow model format via MLServer serving runtime. Support unified autoscaling target and metric fields for InferenceService components with both Serverless and RawDeployment mode. Support InferenceService ingress class and url domain template configuration for RawDeployment mode. ModelMesh now has a default OpenVINO Model Server ServingRuntime.","title":"Other New Features:"},{"location":"blog/articles/2022-07-21-KServe-0.9-release/#whats-changed","text":"The KServe controller manager is changed from StatefulSet to Deployment to support HA mode. log4j security vulnerability fix Upgrade TorchServe serving runtime to 0.6.0 Update MLServer serving runtime to 1.0.0 Check out the full release notes for KServe and ModelMesh for more details.","title":"What\u2019s Changed?"},{"location":"blog/articles/2022-07-21-KServe-0.9-release/#join-the-community","text":"Visit our Website or GitHub Join the Slack ( #kserve ) Attend our community meeting by subscribing to the KServe calendar . View our community github repository to learn how to make contributions. We are excited to work with you to make KServe better and promote its adoption! Thank you for contributing or checking out KServe! \u2013 The KServe Working Group","title":"Join the community"},{"location":"blog/articles/2023-02-05-KServe-0.10-release/","text":"Announcing: KServe v0.10.0 \u00b6 We are excited to announce KServe 0.10 release. In this release we have enabled more KServe networking options, improved KServe telemetry for supported serving runtimes and increased support coverage for Open(aka v2) inference protocol for both standard and ModelMesh InferenceService. KServe Networking Options \u00b6 Istio is now optional for both Serverless and RawDeployment mode. Please see the alternative networking guide for how you can enable other ingress options supported by Knative with Serverless mode. For Istio users, if you want to turn on full service mesh mode to secure InferenceService with mutual TLS and enable the traffic policies, please read the service mesh setup guideline . KServe Telemetry for Serving Runtimes \u00b6 We have instrumented additional latency metrics in KServe Python ServingRuntimes for preprocess , predict and postprocess handlers. In Serverless mode we have extended Knative queue-proxy to enable metrics aggregation for both metrics exposed in queue-proxy and kserve-container from each ServingRuntime . Please read the prometheus metrics setup guideline for how to enable the metrics scraping and aggregations. Open(v2) Inference Protocol Support Coverage \u00b6 As there have been increasing adoptions for KServe v2 Inference Protocol from AMD Inference ServingRuntime which supports FPGAs and OpenVINO which now provides KServe REST and gRPC compatible API, in the issue we have proposed to rename to KServe Open Inference Protocol . In KServe 0.10, we have added Open(v2) inference protocol support for KServe custom runtimes. Now, you can enable v2 REST/gRPC for both custom transformer and predictor with images built by implementing KServe Python SDK API. gRPC enables high performance inference data plane as it is built on top of HTTP/2 and binary data transportation which is more efficient to send over the wire compared to REST. Please see the detailed example for transformer and predictor . from kserve import Model def image_transform ( byte_array ): image_processing = transforms . Compose ([ transforms . ToTensor (), transforms . Normalize (( 0.1307 ,), ( 0.3081 ,)) ]) image = Image . open ( io . BytesIO ( byte_array )) tensor = image_processing ( image ) . numpy () return tensor class CustomModel ( Model ): def predict ( self , request : InferRequest , headers : Dict [ str , str ]) -> InferResponse : input_tensors = [ image_transform ( instance ) for instance in request . inputs [ 0 ] . data ] input_tensors = np . asarray ( input_tensors ) output = self . model ( input_tensors ) torch . nn . functional . softmax ( output , dim = 1 ) values , top_5 = torch . topk ( output , 5 ) result = values . flatten () . tolist () response_id = generate_uuid () infer_output = InferOutput ( name = \"output-0\" , shape = list ( values . shape ), datatype = \"FP32\" , data = result ) infer_response = InferResponse ( model_name = self . name , infer_outputs = [ infer_output ], response_id = response_id ) return infer_response class CustomTransformer ( Model ): def preprocess ( self , request : InferRequest , headers : Dict [ str , str ]) -> InferRequest : input_tensors = [ image_transform ( instance ) for instance in request . inputs [ 0 ] . data ] input_tensors = np . asarray ( input_tensors ) infer_inputs = [ InferInput ( name = \"INPUT__0\" , datatype = 'FP32' , shape = list ( input_tensors . shape ), data = input_tensors )] infer_request = InferRequest ( model_name = self . model_name , infer_inputs = infer_inputs ) return infer_request You can use the same Python API type InferRequest and InferResponse for both REST and gRPC protocol. KServe handles the underlying decoding and encoding according to the protocol. Warning A new headers argument is added to the custom handlers to pass http/gRPC headers or other metadata. You can also use this as context dict to pass data between handlers. If you have existing custom transformer or predictor, the headers argument is now required to add to the preprocess , predict and postprocess handlers. Please check the following matrix for supported ModelFormats and ServingRuntimes . Model Format v1 Open(v2) REST/gRPC Tensorflow \u2705 TFServing \u2705 Triton PyTorch \u2705 TorchServe \u2705 TorchServe TorchScript \u2705 TorchServe \u2705 Triton ONNX \u274c \u2705 Triton Scikit-learn \u2705 KServe \u2705 MLServer XGBoost \u2705 KServe \u2705 MLServer LightGBM \u2705 KServe \u2705 MLServer MLFlow \u274c \u2705 MLServer Custom \u2705 KServe \u2705 KServe Multi-Arch Image Support \u00b6 KServe control plane images kserve-controller , kserve/agent , kserve/router are now supported for multiple architectures: ppc64le , arm64 , amd64 , s390x . KServe Storage Credentials Support \u00b6 Currently, AWS users need to create a secret with long term/static IAM credentials for downloading models stored in S3. Security best practice is to use IAM role for service account(IRSA) which enables automatic credential rotation and fine-grained access control, see how to setup IRSA . Support Azure Blobs with managed identity . ModelMesh updates \u00b6 ModelMesh has continued to integrate itself as KServe's multi-model serving backend, introducing improvements and features that better align the two projects. For example, it now supports ClusterServingRuntimes, allowing use of cluster-scoped ServingRuntimes, originally introduced in KServe 0.8. Additionally, ModelMesh introduced support for TorchServe enabling users to serve arbitrary PyTorch models (e.g. eager-mode) in the context of distributed-multi-model serving. Other limitations have been addressed as well, such as adding support for BYTES/string type tensors when using the REST inference API for inference requests that require them. Other Changes: \u00b6 For a complete change list please read the release notes from KServe v0.10 and ModelMesh v0.10 . Join the community \u00b6 Visit our Website or GitHub Join the Slack ( #kserve ) Attend our community meeting by subscribing to the KServe calendar . View our community github repository to learn how to make contributions. We are excited to work with you to make KServe better and promote its adoption! Thanks for all the contributors who have made the commits to 0.10 release! Steve Larkin Stephan Schielke Curtis Maddalozzo Zhongcheng Lao Dimitris Aragiorgis Pan Li tjandy98 Sukumar Gaonkar Rachit Chauhan Rafael Vasquez Tim Kleinloog Christian Kadner ddelange Lize Cai sangjune.park Suresh Nakkeran Konstantinos Messis Matt Rose Alexa Griffith Jagadeesh J Alex Lembiyeuski Yuki Iwai Andrews Arokiam Xin Fu adilhusain-s Pranav Pandit C1berwiz dilverse Yuan Tang Dan Sun Nick Hill The KServe Working Group","title":"KServe 0.10 Release"},{"location":"blog/articles/2023-02-05-KServe-0.10-release/#announcing-kserve-v0100","text":"We are excited to announce KServe 0.10 release. In this release we have enabled more KServe networking options, improved KServe telemetry for supported serving runtimes and increased support coverage for Open(aka v2) inference protocol for both standard and ModelMesh InferenceService.","title":"Announcing: KServe v0.10.0"},{"location":"blog/articles/2023-02-05-KServe-0.10-release/#kserve-networking-options","text":"Istio is now optional for both Serverless and RawDeployment mode. Please see the alternative networking guide for how you can enable other ingress options supported by Knative with Serverless mode. For Istio users, if you want to turn on full service mesh mode to secure InferenceService with mutual TLS and enable the traffic policies, please read the service mesh setup guideline .","title":"KServe Networking Options"},{"location":"blog/articles/2023-02-05-KServe-0.10-release/#kserve-telemetry-for-serving-runtimes","text":"We have instrumented additional latency metrics in KServe Python ServingRuntimes for preprocess , predict and postprocess handlers. In Serverless mode we have extended Knative queue-proxy to enable metrics aggregation for both metrics exposed in queue-proxy and kserve-container from each ServingRuntime . Please read the prometheus metrics setup guideline for how to enable the metrics scraping and aggregations.","title":"KServe Telemetry for Serving Runtimes"},{"location":"blog/articles/2023-02-05-KServe-0.10-release/#openv2-inference-protocol-support-coverage","text":"As there have been increasing adoptions for KServe v2 Inference Protocol from AMD Inference ServingRuntime which supports FPGAs and OpenVINO which now provides KServe REST and gRPC compatible API, in the issue we have proposed to rename to KServe Open Inference Protocol . In KServe 0.10, we have added Open(v2) inference protocol support for KServe custom runtimes. Now, you can enable v2 REST/gRPC for both custom transformer and predictor with images built by implementing KServe Python SDK API. gRPC enables high performance inference data plane as it is built on top of HTTP/2 and binary data transportation which is more efficient to send over the wire compared to REST. Please see the detailed example for transformer and predictor . from kserve import Model def image_transform ( byte_array ): image_processing = transforms . Compose ([ transforms . ToTensor (), transforms . Normalize (( 0.1307 ,), ( 0.3081 ,)) ]) image = Image . open ( io . BytesIO ( byte_array )) tensor = image_processing ( image ) . numpy () return tensor class CustomModel ( Model ): def predict ( self , request : InferRequest , headers : Dict [ str , str ]) -> InferResponse : input_tensors = [ image_transform ( instance ) for instance in request . inputs [ 0 ] . data ] input_tensors = np . asarray ( input_tensors ) output = self . model ( input_tensors ) torch . nn . functional . softmax ( output , dim = 1 ) values , top_5 = torch . topk ( output , 5 ) result = values . flatten () . tolist () response_id = generate_uuid () infer_output = InferOutput ( name = \"output-0\" , shape = list ( values . shape ), datatype = \"FP32\" , data = result ) infer_response = InferResponse ( model_name = self . name , infer_outputs = [ infer_output ], response_id = response_id ) return infer_response class CustomTransformer ( Model ): def preprocess ( self , request : InferRequest , headers : Dict [ str , str ]) -> InferRequest : input_tensors = [ image_transform ( instance ) for instance in request . inputs [ 0 ] . data ] input_tensors = np . asarray ( input_tensors ) infer_inputs = [ InferInput ( name = \"INPUT__0\" , datatype = 'FP32' , shape = list ( input_tensors . shape ), data = input_tensors )] infer_request = InferRequest ( model_name = self . model_name , infer_inputs = infer_inputs ) return infer_request You can use the same Python API type InferRequest and InferResponse for both REST and gRPC protocol. KServe handles the underlying decoding and encoding according to the protocol. Warning A new headers argument is added to the custom handlers to pass http/gRPC headers or other metadata. You can also use this as context dict to pass data between handlers. If you have existing custom transformer or predictor, the headers argument is now required to add to the preprocess , predict and postprocess handlers. Please check the following matrix for supported ModelFormats and ServingRuntimes . Model Format v1 Open(v2) REST/gRPC Tensorflow \u2705 TFServing \u2705 Triton PyTorch \u2705 TorchServe \u2705 TorchServe TorchScript \u2705 TorchServe \u2705 Triton ONNX \u274c \u2705 Triton Scikit-learn \u2705 KServe \u2705 MLServer XGBoost \u2705 KServe \u2705 MLServer LightGBM \u2705 KServe \u2705 MLServer MLFlow \u274c \u2705 MLServer Custom \u2705 KServe \u2705 KServe","title":"Open(v2) Inference Protocol Support Coverage"},{"location":"blog/articles/2023-02-05-KServe-0.10-release/#multi-arch-image-support","text":"KServe control plane images kserve-controller , kserve/agent , kserve/router are now supported for multiple architectures: ppc64le , arm64 , amd64 , s390x .","title":"Multi-Arch Image Support"},{"location":"blog/articles/2023-02-05-KServe-0.10-release/#kserve-storage-credentials-support","text":"Currently, AWS users need to create a secret with long term/static IAM credentials for downloading models stored in S3. Security best practice is to use IAM role for service account(IRSA) which enables automatic credential rotation and fine-grained access control, see how to setup IRSA . Support Azure Blobs with managed identity .","title":"KServe Storage Credentials Support"},{"location":"blog/articles/2023-02-05-KServe-0.10-release/#modelmesh-updates","text":"ModelMesh has continued to integrate itself as KServe's multi-model serving backend, introducing improvements and features that better align the two projects. For example, it now supports ClusterServingRuntimes, allowing use of cluster-scoped ServingRuntimes, originally introduced in KServe 0.8. Additionally, ModelMesh introduced support for TorchServe enabling users to serve arbitrary PyTorch models (e.g. eager-mode) in the context of distributed-multi-model serving. Other limitations have been addressed as well, such as adding support for BYTES/string type tensors when using the REST inference API for inference requests that require them.","title":"ModelMesh updates"},{"location":"blog/articles/2023-02-05-KServe-0.10-release/#other-changes","text":"For a complete change list please read the release notes from KServe v0.10 and ModelMesh v0.10 .","title":"Other Changes:"},{"location":"blog/articles/2023-02-05-KServe-0.10-release/#join-the-community","text":"Visit our Website or GitHub Join the Slack ( #kserve ) Attend our community meeting by subscribing to the KServe calendar . View our community github repository to learn how to make contributions. We are excited to work with you to make KServe better and promote its adoption! Thanks for all the contributors who have made the commits to 0.10 release! Steve Larkin Stephan Schielke Curtis Maddalozzo Zhongcheng Lao Dimitris Aragiorgis Pan Li tjandy98 Sukumar Gaonkar Rachit Chauhan Rafael Vasquez Tim Kleinloog Christian Kadner ddelange Lize Cai sangjune.park Suresh Nakkeran Konstantinos Messis Matt Rose Alexa Griffith Jagadeesh J Alex Lembiyeuski Yuki Iwai Andrews Arokiam Xin Fu adilhusain-s Pranav Pandit C1berwiz dilverse Yuan Tang Dan Sun Nick Hill The KServe Working Group","title":"Join the community"},{"location":"blog/articles/2023-10-08-KServe-0.11-release/","text":"Announcing: KServe v0.11 \u00b6 We are excited to announce the release of KServe 0.11, in this release we introduced Large Language Model (LLM) runtimes, made enhancements to the KServe control plane, Python SDK Open Inference Protocol support and dependency managemenet. For ModelMesh we have added features PVC, HPA, payload logging to ensure feature parity with KServe. Here is a summary of the key changes: KServe Core Inference Enhancements \u00b6 Support path based routing which is served as an alternative way to the host based routing, the URL of the InferenceService could look like http:///serving// . Please refer to the doc for how to enable path based routing. Introduced priority field for Serving Runtime custom resource to handle the case when you have multiple serving runtimes which support the same model formats, see more details from the serving runtime doc . Introduced Custom Storage Container CRD to allow customized implementations with supported storage URI prefixes, example use cases are private model registry integration: apiVersion : \"serving.kserve.io/v1alpha1\" kind : ClusterStorageContainer metadata : name : default spec : container : name : storage-initializer image : kserve/model-registry:latest resources : requests : memory : 100Mi cpu : 100m limits : memory : 1Gi cpu : \"1\" supportedUriFormats : - prefix : model-registry:// Inference Graph enhancements for improving the API spec to support pod affinity and resource requirement fields. Dependency field with options Soft and Hard is introduced to handle error responses from the inference steps to decide whether to short-circuit the request in case of errors, see the following example with hard dependency with the node steps: apiVersion : serving.kserve.io/v1alpha1 kind : InferenceGraph metadata : name : graph_with_switch_node spec : nodes : root : routerType : Sequence steps : - name : \"rootStep1\" nodeName : node1 dependency : Hard - name : \"rootStep2\" serviceName : {{ success_200_isvc_id }} node1 : routerType : Switch steps : - name : \"node1Step1\" serviceName : {{ error_404_isvc_id }} condition : \"[@this].#(decision_picker==ERROR)\" dependency : Hard For more details please refer to the issue . Improved InferenceService debugging experience by adding the aggregated RoutesReady status and LastDeploymentReady condition to the InferenceService Status to differentiate the endpoint and deployment status. This applies to the serverless mode and for more details refer to the API docs . Enhanced Python SDK Dependency Management \u00b6 KServe has adopted poetry to manage python dependencies. You can now install the KServe SDK with locked dependencies using poetry install . While pip install still works, we highly recommend using poetry to ensure predictable dependency management. The KServe SDK is also slimmed down by making the cloud storage dependency optional, if you require storage dependency for custom serving runtimes you can still install with pip install kserve[storage] . KServe Python Runtimes Improvements \u00b6 KServe Python Runtimes including sklearnserver , lgbserver , xgbserver now support the open inference protocol for both REST and gRPC. Logging improvements including adding Uvicorn access logging and a default KServe logger. Postprocess handler has been aligned with open inference protocol, simplifying the underlying transportation protocol complexities. LLM Runtimes \u00b6 TorchServe LLM Runtime \u00b6 KServe now integrates with TorchServe 0.8, offering the support for LLM models that may not fit onto a single GPU. Huggingface Accelerate and Deepspeed are available options to split the model into multiple partitions over multiple GPUs. You can see the detailed example for how to serve the LLM on KServe with TorchServe runtime. vLLM Runtime \u00b6 Serving LLM models can be surprisingly slow even on high end GPUs, vLLM is a fast and easy-to-use LLM inference engine. It can achieve 10x-20x higher throughput than Huggingface transformers. It supports continuous batching for increased throughput and GPU utilization, paged attention to address the memory bottleneck where in the autoregressive decoding process all the attention key value tensors(KV Cache) are kept in the GPU memory to generate next tokens. In the example we show how to deploy vLLM on KServe and expects further integration in KServe 0.12 with proposed generate endpoint for open inference protocol. ModelMesh Updates \u00b6 Storing Models on Kubernetes Persistent Volumes (PVC) \u00b6 ModelMesh now allows to directly mount model files onto serving runtimes pods using Kubernetes Persistent Volumes . Depending on the selected storage solution this approach can significantly reduce latency when deploying new predictors, potentially remove the need for additional S3 cloud object storage like AWS S3, GCS, or Azure Blob Storage altogether. Horizontal Pod Autoscaling (HPA) \u00b6 Kubernetes Horizontal Pod Autoscaling can now be used at the serving runtime pod level. With HPA enabled, the ModelMesh controller no longer manages the number of replicas. Instead, a HorizontalPodAutoscaler automatically updates the serving runtime deployment with the number of Pods to best match the demand. Model Metrics, Metrics Dashboard, Payload Event Logging \u00b6 ModelMesh v0.11 introduces a new configuration option to emit a subset of useful metrics at the individual model level. These metrics can help identify outlier or \"heavy hitter\" models and consequently fine-tune the deployments of those inference services, like allocating more resources or increasing the number of replicas for improved responsiveness or avoid frequent cache misses. A new Grafana dashboard was added to display the comprehensive set of Prometheus metrics like model loading and unloading rates, internal queuing delays, capacity and usage, cache state, etc. to monitor the general health of the ModelMesh Serving deployment. The new PayloadProcessor interface can be implemented to log prediction requests and responses, to create data sinks for data visualization, for model quality assessment, or for drift and outlier detection by external monitoring systems. What's Changed? \u00b6 To allow longer InferenceService name due to DNS max length limits from issue , the Default suffix in the inference service component(predictor/transformer/explainer) name has been removed for newly created InferenceServices. This affects the client that is using the component url directly instead of the top level InferenceService url. Status.address.url is now consistent for both serverless and raw deployment mode, the url path portion is dropped in serverless mode. Raw bytes are now accepted in v1 protocol, setting the right content-type header to application/json is required to recognize and decode the json payload if content-type is specified. curl -v -H \"Content-Type: application/json\" http://sklearn-iris.kserve-test. ${ CUSTOM_DOMAIN } /v1/models/sklearn-iris:predict -d @./iris-input.json For a complete change list please read the release notes from KServe v0.11 and ModelMesh v0.11 . Join the community \u00b6 Visit our Website or GitHub Join the Slack ( #kserve ) Attend our community meeting by subscribing to the KServe calendar . View our community github repository to learn how to make contributions. We are excited to work with you to make KServe better and promote its adoption! Thanks for all the contributors who have made the commits to 0.11 release! The KServe Working Group","title":"KServe 0.11 Release"},{"location":"blog/articles/2023-10-08-KServe-0.11-release/#announcing-kserve-v011","text":"We are excited to announce the release of KServe 0.11, in this release we introduced Large Language Model (LLM) runtimes, made enhancements to the KServe control plane, Python SDK Open Inference Protocol support and dependency managemenet. For ModelMesh we have added features PVC, HPA, payload logging to ensure feature parity with KServe. Here is a summary of the key changes:","title":"Announcing: KServe v0.11"},{"location":"blog/articles/2023-10-08-KServe-0.11-release/#kserve-core-inference-enhancements","text":"Support path based routing which is served as an alternative way to the host based routing, the URL of the InferenceService could look like http:///serving// . Please refer to the doc for how to enable path based routing. Introduced priority field for Serving Runtime custom resource to handle the case when you have multiple serving runtimes which support the same model formats, see more details from the serving runtime doc . Introduced Custom Storage Container CRD to allow customized implementations with supported storage URI prefixes, example use cases are private model registry integration: apiVersion : \"serving.kserve.io/v1alpha1\" kind : ClusterStorageContainer metadata : name : default spec : container : name : storage-initializer image : kserve/model-registry:latest resources : requests : memory : 100Mi cpu : 100m limits : memory : 1Gi cpu : \"1\" supportedUriFormats : - prefix : model-registry:// Inference Graph enhancements for improving the API spec to support pod affinity and resource requirement fields. Dependency field with options Soft and Hard is introduced to handle error responses from the inference steps to decide whether to short-circuit the request in case of errors, see the following example with hard dependency with the node steps: apiVersion : serving.kserve.io/v1alpha1 kind : InferenceGraph metadata : name : graph_with_switch_node spec : nodes : root : routerType : Sequence steps : - name : \"rootStep1\" nodeName : node1 dependency : Hard - name : \"rootStep2\" serviceName : {{ success_200_isvc_id }} node1 : routerType : Switch steps : - name : \"node1Step1\" serviceName : {{ error_404_isvc_id }} condition : \"[@this].#(decision_picker==ERROR)\" dependency : Hard For more details please refer to the issue . Improved InferenceService debugging experience by adding the aggregated RoutesReady status and LastDeploymentReady condition to the InferenceService Status to differentiate the endpoint and deployment status. This applies to the serverless mode and for more details refer to the API docs .","title":"KServe Core Inference Enhancements"},{"location":"blog/articles/2023-10-08-KServe-0.11-release/#enhanced-python-sdk-dependency-management","text":"KServe has adopted poetry to manage python dependencies. You can now install the KServe SDK with locked dependencies using poetry install . While pip install still works, we highly recommend using poetry to ensure predictable dependency management. The KServe SDK is also slimmed down by making the cloud storage dependency optional, if you require storage dependency for custom serving runtimes you can still install with pip install kserve[storage] .","title":"Enhanced Python SDK Dependency Management"},{"location":"blog/articles/2023-10-08-KServe-0.11-release/#kserve-python-runtimes-improvements","text":"KServe Python Runtimes including sklearnserver , lgbserver , xgbserver now support the open inference protocol for both REST and gRPC. Logging improvements including adding Uvicorn access logging and a default KServe logger. Postprocess handler has been aligned with open inference protocol, simplifying the underlying transportation protocol complexities.","title":"KServe Python Runtimes Improvements"},{"location":"blog/articles/2023-10-08-KServe-0.11-release/#llm-runtimes","text":"","title":"LLM Runtimes"},{"location":"blog/articles/2023-10-08-KServe-0.11-release/#torchserve-llm-runtime","text":"KServe now integrates with TorchServe 0.8, offering the support for LLM models that may not fit onto a single GPU. Huggingface Accelerate and Deepspeed are available options to split the model into multiple partitions over multiple GPUs. You can see the detailed example for how to serve the LLM on KServe with TorchServe runtime.","title":"TorchServe LLM Runtime"},{"location":"blog/articles/2023-10-08-KServe-0.11-release/#vllm-runtime","text":"Serving LLM models can be surprisingly slow even on high end GPUs, vLLM is a fast and easy-to-use LLM inference engine. It can achieve 10x-20x higher throughput than Huggingface transformers. It supports continuous batching for increased throughput and GPU utilization, paged attention to address the memory bottleneck where in the autoregressive decoding process all the attention key value tensors(KV Cache) are kept in the GPU memory to generate next tokens. In the example we show how to deploy vLLM on KServe and expects further integration in KServe 0.12 with proposed generate endpoint for open inference protocol.","title":"vLLM Runtime"},{"location":"blog/articles/2023-10-08-KServe-0.11-release/#modelmesh-updates","text":"","title":"ModelMesh Updates"},{"location":"blog/articles/2023-10-08-KServe-0.11-release/#storing-models-on-kubernetes-persistent-volumes-pvc","text":"ModelMesh now allows to directly mount model files onto serving runtimes pods using Kubernetes Persistent Volumes . Depending on the selected storage solution this approach can significantly reduce latency when deploying new predictors, potentially remove the need for additional S3 cloud object storage like AWS S3, GCS, or Azure Blob Storage altogether.","title":"Storing Models on Kubernetes Persistent Volumes (PVC)"},{"location":"blog/articles/2023-10-08-KServe-0.11-release/#horizontal-pod-autoscaling-hpa","text":"Kubernetes Horizontal Pod Autoscaling can now be used at the serving runtime pod level. With HPA enabled, the ModelMesh controller no longer manages the number of replicas. Instead, a HorizontalPodAutoscaler automatically updates the serving runtime deployment with the number of Pods to best match the demand.","title":"Horizontal Pod Autoscaling (HPA)"},{"location":"blog/articles/2023-10-08-KServe-0.11-release/#model-metrics-metrics-dashboard-payload-event-logging","text":"ModelMesh v0.11 introduces a new configuration option to emit a subset of useful metrics at the individual model level. These metrics can help identify outlier or \"heavy hitter\" models and consequently fine-tune the deployments of those inference services, like allocating more resources or increasing the number of replicas for improved responsiveness or avoid frequent cache misses. A new Grafana dashboard was added to display the comprehensive set of Prometheus metrics like model loading and unloading rates, internal queuing delays, capacity and usage, cache state, etc. to monitor the general health of the ModelMesh Serving deployment. The new PayloadProcessor interface can be implemented to log prediction requests and responses, to create data sinks for data visualization, for model quality assessment, or for drift and outlier detection by external monitoring systems.","title":"Model Metrics, Metrics Dashboard, Payload Event Logging"},{"location":"blog/articles/2023-10-08-KServe-0.11-release/#whats-changed","text":"To allow longer InferenceService name due to DNS max length limits from issue , the Default suffix in the inference service component(predictor/transformer/explainer) name has been removed for newly created InferenceServices. This affects the client that is using the component url directly instead of the top level InferenceService url. Status.address.url is now consistent for both serverless and raw deployment mode, the url path portion is dropped in serverless mode. Raw bytes are now accepted in v1 protocol, setting the right content-type header to application/json is required to recognize and decode the json payload if content-type is specified. curl -v -H \"Content-Type: application/json\" http://sklearn-iris.kserve-test. ${ CUSTOM_DOMAIN } /v1/models/sklearn-iris:predict -d @./iris-input.json For a complete change list please read the release notes from KServe v0.11 and ModelMesh v0.11 .","title":"What's Changed?"},{"location":"blog/articles/2023-10-08-KServe-0.11-release/#join-the-community","text":"Visit our Website or GitHub Join the Slack ( #kserve ) Attend our community meeting by subscribing to the KServe calendar . View our community github repository to learn how to make contributions. We are excited to work with you to make KServe better and promote its adoption! Thanks for all the contributors who have made the commits to 0.11 release! The KServe Working Group","title":"Join the community"},{"location":"blog/articles/_index/","text":"","title":" index"},{"location":"community/adopters/","text":"Adopters of KServe \u00b6 This page contains a list of organizations who are using KServe either in production, or providing integrations or deployment options with their Cloud or product offerings. If you'd like to be included here, please send a pull request which modifies this file. Please keep the list in alphabetical order. Organization Contact Advanced Micro Devices Varun Sharma Amazon Web Services Ellis Tarn Bloomberg Dan Sun Cars24 Swapnesh Khare Chamred Kubeflow from Canonical Daniela Plasencia Cisco Krishna Durai Cloudera Zoram Thanga CoreWeave Peter Salanki Gojek Willem Pienaar Deeploy Tim Kleinloog Halodoc ID Joinal Ahmed IBM Nick Hill Kubeflow on Google Cloud James Liu Inspur Qingshan Chen Max Kelsen Jacob O'Farrell Naver Mark Winter Nuance Jeff Griffith NVIDIA David Goodwin One Convergence Subra Ongole PITS Global Data Recovery Services Pheianox Red Hat Taneem Ibrahim Seldon Clive Cox Patterson Consulting Josh Patterson Samsung SDS Hanbae Seo Striveworks Jordan Yono Zillow Peilun Li Upstage JuHyung Son Intuit Rachit Chauhan","title":"Adopters"},{"location":"community/adopters/#adopters-of-kserve","text":"This page contains a list of organizations who are using KServe either in production, or providing integrations or deployment options with their Cloud or product offerings. If you'd like to be included here, please send a pull request which modifies this file. Please keep the list in alphabetical order. Organization Contact Advanced Micro Devices Varun Sharma Amazon Web Services Ellis Tarn Bloomberg Dan Sun Cars24 Swapnesh Khare Chamred Kubeflow from Canonical Daniela Plasencia Cisco Krishna Durai Cloudera Zoram Thanga CoreWeave Peter Salanki Gojek Willem Pienaar Deeploy Tim Kleinloog Halodoc ID Joinal Ahmed IBM Nick Hill Kubeflow on Google Cloud James Liu Inspur Qingshan Chen Max Kelsen Jacob O'Farrell Naver Mark Winter Nuance Jeff Griffith NVIDIA David Goodwin One Convergence Subra Ongole PITS Global Data Recovery Services Pheianox Red Hat Taneem Ibrahim Seldon Clive Cox Patterson Consulting Josh Patterson Samsung SDS Hanbae Seo Striveworks Jordan Yono Zillow Peilun Li Upstage JuHyung Son Intuit Rachit Chauhan","title":"Adopters of KServe"},{"location":"community/presentations/","text":"KServe(Formally KFServing) Presentations and Demoes \u00b6 This page contains a list of presentations and demos. If you'd like to add a presentation or demo here, please send a pull request. Presentation/Demo Presenters Distributed Machine Learning Patterns from Manning Publications Yuan Tang KubeCon 2019: Introducing KFServing: Serverless Model Serving on Kubernetes Dan Sun, Ellis Tarn KubeCon 2019: Advanced Model Inferencing Leveraging KNative, Istio & Kubeflow Serving Animesh Singh, Clive Cox KubeflowDojo: KFServing - Production Model Serving Platform Animesh Singh, Tommy Li NVIDIA: Accelerate and Autoscale Deep Learning Inference on GPUs with KFServing Dan Sun, David Goodwin KF Community: KFServing - Enabling Serverless Workloads Across Model Frameworks Ellis Tarn KubeflowDojo: Demo - KFServing End to End through Notebook Animesh Singh, Tommy Li KubeflowDojo: Demo - KFServing with Kafka and Kubeflow Pipelines Animesh Singh Anchor MLOps Podcast: Serving Models with KFServing David Aponte, Demetrios Brinkmann Kubeflow 101: What is KFServing? Stephanie Wong ICML 2020, Workshop on Challenges in Deploying and Monitoring Machine Learning Systems : Serverless inferencing on Kubernetes Clive Cox Serverless Practitioners Summit 2020: Serverless Machine Learning Inference with KFServing Clive Cox, Yuzhui Liu MLOps Meetup: KServe Live Coding Session Theofilos Papapanagiotou KubeCon AI Days 2021: Serving Machine Learning Models at Scale Using KServe Yuzhui Liu KubeCon 2021: Serving Machine Learning Models at Scale Using KServe Animesh Singh KubeCon China 2021: Accelerate Federated Learning Model Deployment with KServe Fangchi Wang & Jiahao Chen KubeCon AI Days 2022: Exploring ML Model Serving with KServe Alexa Nicole Griffith KubeCon AI Days 2022: Enhancing the Performance Testing Process for gRPC Model Inferencing at Scale Ted Chang, Paul Van Eck KubeCon Edge Days 2022: Model Serving at the Edge Made Easier Paul Van Eck KnativeCon 2022: How We Built an ML inference Platform with Knative Dan Sun KubeCon EU 2023: The state and future of cloud native model serving Dan Sun, Theofilos Papapanagiotou Kubeflow Summit 2023: Scale your Models to Zero with Knative and KServe Jooho Lee Kubeflow Summit 2023: What to choose? ModelMesh vs Model Serving? Vaibhav Jain","title":"Demos and Presentations"},{"location":"community/presentations/#kserveformally-kfserving-presentations-and-demoes","text":"This page contains a list of presentations and demos. If you'd like to add a presentation or demo here, please send a pull request. Presentation/Demo Presenters Distributed Machine Learning Patterns from Manning Publications Yuan Tang KubeCon 2019: Introducing KFServing: Serverless Model Serving on Kubernetes Dan Sun, Ellis Tarn KubeCon 2019: Advanced Model Inferencing Leveraging KNative, Istio & Kubeflow Serving Animesh Singh, Clive Cox KubeflowDojo: KFServing - Production Model Serving Platform Animesh Singh, Tommy Li NVIDIA: Accelerate and Autoscale Deep Learning Inference on GPUs with KFServing Dan Sun, David Goodwin KF Community: KFServing - Enabling Serverless Workloads Across Model Frameworks Ellis Tarn KubeflowDojo: Demo - KFServing End to End through Notebook Animesh Singh, Tommy Li KubeflowDojo: Demo - KFServing with Kafka and Kubeflow Pipelines Animesh Singh Anchor MLOps Podcast: Serving Models with KFServing David Aponte, Demetrios Brinkmann Kubeflow 101: What is KFServing? Stephanie Wong ICML 2020, Workshop on Challenges in Deploying and Monitoring Machine Learning Systems : Serverless inferencing on Kubernetes Clive Cox Serverless Practitioners Summit 2020: Serverless Machine Learning Inference with KFServing Clive Cox, Yuzhui Liu MLOps Meetup: KServe Live Coding Session Theofilos Papapanagiotou KubeCon AI Days 2021: Serving Machine Learning Models at Scale Using KServe Yuzhui Liu KubeCon 2021: Serving Machine Learning Models at Scale Using KServe Animesh Singh KubeCon China 2021: Accelerate Federated Learning Model Deployment with KServe Fangchi Wang & Jiahao Chen KubeCon AI Days 2022: Exploring ML Model Serving with KServe Alexa Nicole Griffith KubeCon AI Days 2022: Enhancing the Performance Testing Process for gRPC Model Inferencing at Scale Ted Chang, Paul Van Eck KubeCon Edge Days 2022: Model Serving at the Edge Made Easier Paul Van Eck KnativeCon 2022: How We Built an ML inference Platform with Knative Dan Sun KubeCon EU 2023: The state and future of cloud native model serving Dan Sun, Theofilos Papapanagiotou Kubeflow Summit 2023: Scale your Models to Zero with Knative and KServe Jooho Lee Kubeflow Summit 2023: What to choose? ModelMesh vs Model Serving? Vaibhav Jain","title":"KServe(Formally KFServing) Presentations and Demoes"},{"location":"developer/debug/","text":"KServe Debugging Guide \u00b6 Debug KServe InferenceService Status \u00b6 You deployed an InferenceService to KServe, but it is not in ready state. Go through this step by step guide to understand what failed. kubectl get inferenceservices sklearn-iris NAME URL READY DEFAULT TRAFFIC CANARY TRAFFIC AGE model-example False 1m IngressNotConfigured \u00b6 If you see IngressNotConfigured error, this indicates Istio Ingress Gateway probes are failing. kubectl get ksvc NAME URL LATESTCREATED LATESTREADY READY REASON sklearn-iris-predictor-default http://sklearn-iris-predictor-default.default.example.com sklearn-iris-predictor-default-jk794 mnist-sample-predictor-default-jk794 Unknown IngressNotConfigured You can then check Knative networking-istio pod logs for more details. kubectl logs -l app = networking-istio -n knative-serving If you are seeing HTTP 403, then you may have Istio RBAC turned on which blocks the probes to your service. { \"level\" : \"error\" , \"ts\" : \"2020-03-26T19:12:00.749Z\" , \"logger\" : \"istiocontroller.ingress-controller.status-manager\" , \"caller\" : \"ingress/status.go:366\" , \"msg\" : \"Probing of http://flowers-sample-predictor-default.kubeflow-jeanarmel-luce.example.com:80/ failed, IP: 10.0.0.29:80, ready: false, error: unexpected status code: want [200], got 403 (depth: 0)\" , \"commit\" : \"6b0e5c6\" , \"knative.dev/controller\" : \"ingress-controller\" , \"stacktrace\" : \"knative.dev/serving/pkg/reconciler/ingress.(*StatusProber).processWorkItem\\n\\t/home/prow/go/src/knative.dev/serving/pkg/reconciler/ingress/status.go:366\\nknative.dev/serving/pkg/reconciler/ingress.(*StatusProber).Start.func1\\n\\t/home/prow/go/src/knative.dev/serving/pkg/reconciler/ingress/status.go:268\" } RevisionMissing Error \u00b6 If you see RevisionMissing error, then your service pods are not in ready state. Knative Service creates Knative Revision which represents a snapshot of the InferenceService code and configuration. Storage Initializer fails to download model \u00b6 kubectl get revision $( kubectl get configuration sklearn-iris-predictor-default --output jsonpath = \"{.status.latestCreatedRevisionName}\" ) NAME CONFIG NAME K8S SERVICE NAME GENERATION READY REASON sklearn-iris-predictor-default-csjpw sklearn-iris-predictor-default sklearn-iris-predictor-default-csjpw 2 Unknown Deploying If you see READY status in Unknown error, this usually indicates that the KServe Storage Initializer init container fails to download the model and you can check the init container logs to see why it fails, note that the pod scales down after sometime if the init container fails . kubectl get pod -l serving.kserve.io/inferenceservice = sklearn-iris NAME READY STATUS RESTARTS AGE sklearn-iris-predictor-default-29jks-deployment-5f7d4b9996hzrnc 0 /3 Init:Error 1 10s kubectl logs -l model = sklearn-iris -c storage-initializer [ I 200517 03 :56:19 initializer-entrypoint:13 ] Initializing, args: src_uri [ gs://kfserving-examples/models/sklearn/iris-1 ] dest_path [ [ /mnt/models ] [ I 200517 03 :56:19 storage:35 ] Copying contents of gs://kfserving-examples/models/sklearn/iris-1 to local Traceback ( most recent call last ) : File \"/storage-initializer/scripts/initializer-entrypoint\" , line 14 , in kserve.Storage.download ( src_uri, dest_path ) File \"/usr/local/lib/python3.7/site-packages/kfserving/storage.py\" , line 48 , in download Storage._download_gcs ( uri, out_dir ) File \"/usr/local/lib/python3.7/site-packages/kfserving/storage.py\" , line 116 , in _download_gcs The path or model %s does not exist. \" % (uri)) RuntimeError: Failed to fetch model. The path or model gs://kfserving-examples/models/sklearn/iris-1 does not exist. [I 200517 03:40:19 initializer-entrypoint:13] Initializing, args: src_uri [gs://kfserving-examples/models/sklearn/iris] dest_path[ [/mnt/models] [I 200517 03:40:19 storage:35] Copying contents of gs://kfserving-examples/models/sklearn/iris to local [I 200517 03:40:20 storage:111] Downloading: /mnt/models/model.joblib [I 200517 03:40:20 storage:60] Successfully copied gs://kfserving-examples/models/sklearn/iris to /mnt/models Inference Service in OOM status \u00b6 If you see ExitCode137 from the revision status, this means the revision has failed and this usually happens when the inference service pod is out of memory. To address it, you might need to bump up the memory limit of the InferenceService . kubectl get revision $( kubectl get configuration sklearn-iris-predictor-default --output jsonpath = \"{.status.latestCreatedRevisionName}\" ) NAME CONFIG NAME K8S SERVICE NAME GENERATION READY REASON sklearn-iris-predictor-default-84bzf sklearn-iris-predictor-default sklearn-iris-predictor-default-84bzf 8 False ExitCode137s Inference Service fails to start \u00b6 If you see other exit codes from the revision status you can further check the pod status. kubectl get pods -l serving.kserve.io/inferenceservice = sklearn-iris sklearn-iris-predictor-default-rvhmk-deployment-867c6444647tz7n 1 /3 CrashLoopBackOff 3 80s If you see the CrashLoopBackOff , then check the kserve-container log to see more details where it fails, the error log is usually propagated on revision container status also. kubectl logs sklearn-iris-predictor-default-rvhmk-deployment-867c6444647tz7n kserve-container [ I 200517 04 :58:21 storage:35 ] Copying contents of /mnt/models to local Traceback ( most recent call last ) : File \"/usr/local/lib/python3.7/runpy.py\" , line 193 , in _run_module_as_main \"__main__\" , mod_spec ) File \"/usr/local/lib/python3.7/runpy.py\" , line 85 , in _run_code exec ( code, run_globals ) File \"/sklearnserver/sklearnserver/__main__.py\" , line 33 , in model.load () File \"/sklearnserver/sklearnserver/model.py\" , line 36 , in load model_file = next ( path for path in paths if os.path.exists ( path )) StopIteration Inference Service cannot fetch docker images from AWS ECR \u00b6 If you don't see the inference service created at all for custom images from private registries (such as AWS ECR), it might be that the Knative Serving Controller fails to authenticate itself against the registry. failed to resolve image to digest: failed to fetch image information: unsupported status code 401 ; body: Not Authorized You can verify that this is actually the case by spinning up a pod that uses your image. The pod should be able to fetch it, if the correct IAM roles are attached, while Knative is not able to. To circumvent this issue you can either skip tag resolution or provide certificates for your registry as detailed in the official knative docs . kubectl -n knative-serving edit configmap config-deployment The resultant yaml will look like something below. apiVersion : v1 kind : ConfigMap metadata : name : config-deployment namespace : knative-serving data : # List of repositories for which tag to digest resolving should be skipped (for AWS ECR: {account_id}.dkr.ecr.{region}.amazonaws.com) registriesSkippingTagResolving : registry.example.com Debug KServe Request flow \u00b6 +----------------------+ +-----------------------+ +--------------------------+ |Istio Virtual Service | |Istio Virtual Service | | K8S Service | | | | | | | |sklearn-iris | |sklearn-iris-predictor | | sklearn-iris-predictor | | +------->|-default +----->| -default-$revision | | | | | | | |KServe Route | |Knative Route | | Knative Revision Service | +----------------------+ +-----------------------+ +------------+-------------+ Knative Ingress Gateway Knative Local Gateway Kube Proxy (Istio gateway) (Istio gateway) | | | +-------------------------------------------------------+ | | Knative Revision Pod | | | | | | +-------------------+ +-----------------+ | | | | | | | | | | |kserve-container |<-----+ Queue Proxy | |<------------------+ | | | | | | | +-------------------+ +--------------^--+ | | | | +-----------------------^-------------------------------+ | scale deployment | +--------+--------+ | pull metrics | Knative | | | Autoscaler |----------- | KPA/HPA | +-----------------+ 1.Traffic arrives through Knative Ingress/Local Gateway for external/internal traffic \u00b6 Istio Gateway resource describes the edge of the mesh receiving incoming or outgoing HTTP/TCP connections. The specification describes a set of ports that should be exposed and the type of protocol to use. If you are using Standalone mode, it installs the Gateway in knative-serving namespace, if you are using Kubeflow KServe (KServe installed with Kubeflow), it installs the Gateway in kubeflow namespace e.g on GCP the gateway is protected behind IAP with Istio authentication policy . kubectl get gateway knative-ingress-gateway -n knative-serving -oyaml kind : Gateway metadata : labels : networking.knative.dev/ingress-provider : istio serving.knative.dev/release : v0.12.1 name : knative-ingress-gateway namespace : knative-serving spec : selector : istio : ingressgateway servers : - hosts : - '*' port : name : http number : 80 protocol : HTTP - hosts : - '*' port : name : https number : 443 protocol : HTTPS tls : mode : SIMPLE privateKey : /etc/istio/ingressgateway-certs/tls.key serverCertificate : /etc/istio/ingressgateway-certs/tls.crt The InferenceService request routes to the Istio Ingress Gateway by matching the host and port from the url, by default http is configured, you can configure HTTPS with TLS certificates . 2. KServe Istio virtual service to route for predictor, transformer, explainer. \u00b6 kubectl get vs sklearn-iris -oyaml apiVersion : networking.istio.io/v1alpha3 kind : VirtualService metadata : name : sklearn-iris namespace : default gateways : - knative-serving/knative-local-gateway - knative-serving/knative-ingress-gateway hosts : - sklearn-iris.default.svc.cluster.local - sklearn-iris.default.example.com http : - headers : request : set : Host : sklearn-iris-predictor-default.default.svc.cluster.local match : - authority : regex : ^sklearn-iris\\.default(\\.svc(\\.cluster\\.local)?)?(?::\\d{1,5})?$ gateways : - knative-serving/knative-local-gateway - authority : regex : ^sklearn-iris\\.default\\.example\\.com(?::\\d{1,5})?$ gateways : - knative-serving/knative-ingress-gateway route : - destination : host : knative-local-gateway.istio-system.svc.cluster.local port : number : 80 weight : 100 KServe creates the routing rule which by default routes to Predictor if you only have Predictor specified on InferenceService . When Transformer and Explainer are specified on InferenceService the routing rule configures the traffic to route to Transformer or Explainer based on the verb. The request then routes to the second level Knative created virtual service via local gateway with the matching host header. 3. Knative Istio virtual service to route the inference request to the latest ready revision. \u00b6 kubectl get vs sklearn-iris-predictor-default-ingress -oyaml apiVersion : networking.istio.io/v1alpha3 kind : VirtualService metadata : name : sklearn-iris-predictor-default-mesh namespace : default spec : gateways : - knative-serving/knative-ingress-gateway - knative-serving/knative-local-gateway hosts : - sklearn-iris-predictor-default.default - sklearn-iris-predictor-default.default.example.com - sklearn-iris-predictor-default.default.svc - sklearn-iris-predictor-default.default.svc.cluster.local http : - match : - authority : prefix : sklearn-iris-predictor-default.default gateways : - knative-serving/knative-local-gateway - authority : prefix : sklearn-iris-predictor-default.default.svc gateways : - knative-serving/knative-local-gateway - authority : prefix : sklearn-iris-predictor-default.default gateways : - knative-serving/knative-local-gateway retries : {} route : - destination : host : sklearn-iris-predictor-default-00001.default.svc.cluster.local port : number : 80 headers : request : set : Knative-Serving-Namespace : default Knative-Serving-Revision : sklearn-iris-predictor-default-00001 weight : 100 - match : - authority : prefix : sklearn-iris-predictor-default.default.example.com gateways : - knative-serving/knative-ingress-gateway retries : {} route : - destination : host : sklearn-iris-predictor-default-00001.default.svc.cluster.local port : number : 80 headers : request : set : Knative-Serving-Namespace : default Knative-Serving-Revision : sklearn-iris-predictor-default-00001 weight : 100 The destination here is the k8s Service for the latest ready Knative Revision and it is reconciled by Knative every time user rolls out a new revision. When a new revision is rolled out and in ready state, the old revision is then scaled down, after configured revision GC time the revision resource is garbage collected if the revision no longer has traffic referenced. 4. Kubernetes Service routes the requests to the queue proxy sidecar of the inference service pod on port 8012 . \u00b6 kubectl get svc sklearn-iris-predictor-default-fhmjk-private -oyaml apiVersion : v1 kind : Service metadata : name : sklearn-iris-predictor-default-fhmjk-private namespace : default spec : clusterIP : 10.105.186.18 ports : - name : http port : 80 protocol : TCP targetPort : 8012 - name : queue-metrics port : 9090 protocol : TCP targetPort : queue-metrics - name : http-usermetric port : 9091 protocol : TCP targetPort : http-usermetric - name : http-queueadm port : 8022 protocol : TCP targetPort : 8022 selector : serving.knative.dev/revisionUID : a8f1eafc-3c64-4930-9a01-359f3235333a sessionAffinity : None type : ClusterIP 5. The queue proxy routes to kserve container with max concurrent requests configured with ContainerConcurrency . \u00b6 If the queue proxy has more requests than it can handle, the Knative Autoscaler creates more pods to handle additional requests. 6. Finally The queue proxy routes traffic to the kserve-container for processing the inference requests. \u00b6","title":"Debugging guide"},{"location":"developer/debug/#kserve-debugging-guide","text":"","title":"KServe Debugging Guide"},{"location":"developer/debug/#debug-kserve-inferenceservice-status","text":"You deployed an InferenceService to KServe, but it is not in ready state. Go through this step by step guide to understand what failed. kubectl get inferenceservices sklearn-iris NAME URL READY DEFAULT TRAFFIC CANARY TRAFFIC AGE model-example False 1m","title":"Debug KServe InferenceService Status"},{"location":"developer/debug/#ingressnotconfigured","text":"If you see IngressNotConfigured error, this indicates Istio Ingress Gateway probes are failing. kubectl get ksvc NAME URL LATESTCREATED LATESTREADY READY REASON sklearn-iris-predictor-default http://sklearn-iris-predictor-default.default.example.com sklearn-iris-predictor-default-jk794 mnist-sample-predictor-default-jk794 Unknown IngressNotConfigured You can then check Knative networking-istio pod logs for more details. kubectl logs -l app = networking-istio -n knative-serving If you are seeing HTTP 403, then you may have Istio RBAC turned on which blocks the probes to your service. { \"level\" : \"error\" , \"ts\" : \"2020-03-26T19:12:00.749Z\" , \"logger\" : \"istiocontroller.ingress-controller.status-manager\" , \"caller\" : \"ingress/status.go:366\" , \"msg\" : \"Probing of http://flowers-sample-predictor-default.kubeflow-jeanarmel-luce.example.com:80/ failed, IP: 10.0.0.29:80, ready: false, error: unexpected status code: want [200], got 403 (depth: 0)\" , \"commit\" : \"6b0e5c6\" , \"knative.dev/controller\" : \"ingress-controller\" , \"stacktrace\" : \"knative.dev/serving/pkg/reconciler/ingress.(*StatusProber).processWorkItem\\n\\t/home/prow/go/src/knative.dev/serving/pkg/reconciler/ingress/status.go:366\\nknative.dev/serving/pkg/reconciler/ingress.(*StatusProber).Start.func1\\n\\t/home/prow/go/src/knative.dev/serving/pkg/reconciler/ingress/status.go:268\" }","title":"IngressNotConfigured"},{"location":"developer/debug/#revisionmissing-error","text":"If you see RevisionMissing error, then your service pods are not in ready state. Knative Service creates Knative Revision which represents a snapshot of the InferenceService code and configuration.","title":"RevisionMissing Error"},{"location":"developer/debug/#storage-initializer-fails-to-download-model","text":"kubectl get revision $( kubectl get configuration sklearn-iris-predictor-default --output jsonpath = \"{.status.latestCreatedRevisionName}\" ) NAME CONFIG NAME K8S SERVICE NAME GENERATION READY REASON sklearn-iris-predictor-default-csjpw sklearn-iris-predictor-default sklearn-iris-predictor-default-csjpw 2 Unknown Deploying If you see READY status in Unknown error, this usually indicates that the KServe Storage Initializer init container fails to download the model and you can check the init container logs to see why it fails, note that the pod scales down after sometime if the init container fails . kubectl get pod -l serving.kserve.io/inferenceservice = sklearn-iris NAME READY STATUS RESTARTS AGE sklearn-iris-predictor-default-29jks-deployment-5f7d4b9996hzrnc 0 /3 Init:Error 1 10s kubectl logs -l model = sklearn-iris -c storage-initializer [ I 200517 03 :56:19 initializer-entrypoint:13 ] Initializing, args: src_uri [ gs://kfserving-examples/models/sklearn/iris-1 ] dest_path [ [ /mnt/models ] [ I 200517 03 :56:19 storage:35 ] Copying contents of gs://kfserving-examples/models/sklearn/iris-1 to local Traceback ( most recent call last ) : File \"/storage-initializer/scripts/initializer-entrypoint\" , line 14 , in kserve.Storage.download ( src_uri, dest_path ) File \"/usr/local/lib/python3.7/site-packages/kfserving/storage.py\" , line 48 , in download Storage._download_gcs ( uri, out_dir ) File \"/usr/local/lib/python3.7/site-packages/kfserving/storage.py\" , line 116 , in _download_gcs The path or model %s does not exist. \" % (uri)) RuntimeError: Failed to fetch model. The path or model gs://kfserving-examples/models/sklearn/iris-1 does not exist. [I 200517 03:40:19 initializer-entrypoint:13] Initializing, args: src_uri [gs://kfserving-examples/models/sklearn/iris] dest_path[ [/mnt/models] [I 200517 03:40:19 storage:35] Copying contents of gs://kfserving-examples/models/sklearn/iris to local [I 200517 03:40:20 storage:111] Downloading: /mnt/models/model.joblib [I 200517 03:40:20 storage:60] Successfully copied gs://kfserving-examples/models/sklearn/iris to /mnt/models","title":"Storage Initializer fails to download model"},{"location":"developer/debug/#inference-service-in-oom-status","text":"If you see ExitCode137 from the revision status, this means the revision has failed and this usually happens when the inference service pod is out of memory. To address it, you might need to bump up the memory limit of the InferenceService . kubectl get revision $( kubectl get configuration sklearn-iris-predictor-default --output jsonpath = \"{.status.latestCreatedRevisionName}\" ) NAME CONFIG NAME K8S SERVICE NAME GENERATION READY REASON sklearn-iris-predictor-default-84bzf sklearn-iris-predictor-default sklearn-iris-predictor-default-84bzf 8 False ExitCode137s","title":"Inference Service in OOM status"},{"location":"developer/debug/#inference-service-fails-to-start","text":"If you see other exit codes from the revision status you can further check the pod status. kubectl get pods -l serving.kserve.io/inferenceservice = sklearn-iris sklearn-iris-predictor-default-rvhmk-deployment-867c6444647tz7n 1 /3 CrashLoopBackOff 3 80s If you see the CrashLoopBackOff , then check the kserve-container log to see more details where it fails, the error log is usually propagated on revision container status also. kubectl logs sklearn-iris-predictor-default-rvhmk-deployment-867c6444647tz7n kserve-container [ I 200517 04 :58:21 storage:35 ] Copying contents of /mnt/models to local Traceback ( most recent call last ) : File \"/usr/local/lib/python3.7/runpy.py\" , line 193 , in _run_module_as_main \"__main__\" , mod_spec ) File \"/usr/local/lib/python3.7/runpy.py\" , line 85 , in _run_code exec ( code, run_globals ) File \"/sklearnserver/sklearnserver/__main__.py\" , line 33 , in model.load () File \"/sklearnserver/sklearnserver/model.py\" , line 36 , in load model_file = next ( path for path in paths if os.path.exists ( path )) StopIteration","title":"Inference Service fails to start"},{"location":"developer/debug/#inference-service-cannot-fetch-docker-images-from-aws-ecr","text":"If you don't see the inference service created at all for custom images from private registries (such as AWS ECR), it might be that the Knative Serving Controller fails to authenticate itself against the registry. failed to resolve image to digest: failed to fetch image information: unsupported status code 401 ; body: Not Authorized You can verify that this is actually the case by spinning up a pod that uses your image. The pod should be able to fetch it, if the correct IAM roles are attached, while Knative is not able to. To circumvent this issue you can either skip tag resolution or provide certificates for your registry as detailed in the official knative docs . kubectl -n knative-serving edit configmap config-deployment The resultant yaml will look like something below. apiVersion : v1 kind : ConfigMap metadata : name : config-deployment namespace : knative-serving data : # List of repositories for which tag to digest resolving should be skipped (for AWS ECR: {account_id}.dkr.ecr.{region}.amazonaws.com) registriesSkippingTagResolving : registry.example.com","title":"Inference Service cannot fetch docker images from AWS ECR"},{"location":"developer/debug/#debug-kserve-request-flow","text":"+----------------------+ +-----------------------+ +--------------------------+ |Istio Virtual Service | |Istio Virtual Service | | K8S Service | | | | | | | |sklearn-iris | |sklearn-iris-predictor | | sklearn-iris-predictor | | +------->|-default +----->| -default-$revision | | | | | | | |KServe Route | |Knative Route | | Knative Revision Service | +----------------------+ +-----------------------+ +------------+-------------+ Knative Ingress Gateway Knative Local Gateway Kube Proxy (Istio gateway) (Istio gateway) | | | +-------------------------------------------------------+ | | Knative Revision Pod | | | | | | +-------------------+ +-----------------+ | | | | | | | | | | |kserve-container |<-----+ Queue Proxy | |<------------------+ | | | | | | | +-------------------+ +--------------^--+ | | | | +-----------------------^-------------------------------+ | scale deployment | +--------+--------+ | pull metrics | Knative | | | Autoscaler |----------- | KPA/HPA | +-----------------+","title":"Debug KServe Request flow"},{"location":"developer/debug/#1traffic-arrives-through-knative-ingresslocal-gateway-for-externalinternal-traffic","text":"Istio Gateway resource describes the edge of the mesh receiving incoming or outgoing HTTP/TCP connections. The specification describes a set of ports that should be exposed and the type of protocol to use. If you are using Standalone mode, it installs the Gateway in knative-serving namespace, if you are using Kubeflow KServe (KServe installed with Kubeflow), it installs the Gateway in kubeflow namespace e.g on GCP the gateway is protected behind IAP with Istio authentication policy . kubectl get gateway knative-ingress-gateway -n knative-serving -oyaml kind : Gateway metadata : labels : networking.knative.dev/ingress-provider : istio serving.knative.dev/release : v0.12.1 name : knative-ingress-gateway namespace : knative-serving spec : selector : istio : ingressgateway servers : - hosts : - '*' port : name : http number : 80 protocol : HTTP - hosts : - '*' port : name : https number : 443 protocol : HTTPS tls : mode : SIMPLE privateKey : /etc/istio/ingressgateway-certs/tls.key serverCertificate : /etc/istio/ingressgateway-certs/tls.crt The InferenceService request routes to the Istio Ingress Gateway by matching the host and port from the url, by default http is configured, you can configure HTTPS with TLS certificates .","title":"1.Traffic arrives through Knative Ingress/Local Gateway for external/internal traffic"},{"location":"developer/debug/#2-kserve-istio-virtual-service-to-route-for-predictor-transformer-explainer","text":"kubectl get vs sklearn-iris -oyaml apiVersion : networking.istio.io/v1alpha3 kind : VirtualService metadata : name : sklearn-iris namespace : default gateways : - knative-serving/knative-local-gateway - knative-serving/knative-ingress-gateway hosts : - sklearn-iris.default.svc.cluster.local - sklearn-iris.default.example.com http : - headers : request : set : Host : sklearn-iris-predictor-default.default.svc.cluster.local match : - authority : regex : ^sklearn-iris\\.default(\\.svc(\\.cluster\\.local)?)?(?::\\d{1,5})?$ gateways : - knative-serving/knative-local-gateway - authority : regex : ^sklearn-iris\\.default\\.example\\.com(?::\\d{1,5})?$ gateways : - knative-serving/knative-ingress-gateway route : - destination : host : knative-local-gateway.istio-system.svc.cluster.local port : number : 80 weight : 100 KServe creates the routing rule which by default routes to Predictor if you only have Predictor specified on InferenceService . When Transformer and Explainer are specified on InferenceService the routing rule configures the traffic to route to Transformer or Explainer based on the verb. The request then routes to the second level Knative created virtual service via local gateway with the matching host header.","title":"2. KServe Istio virtual service to route for predictor, transformer, explainer."},{"location":"developer/debug/#3-knative-istio-virtual-service-to-route-the-inference-request-to-the-latest-ready-revision","text":"kubectl get vs sklearn-iris-predictor-default-ingress -oyaml apiVersion : networking.istio.io/v1alpha3 kind : VirtualService metadata : name : sklearn-iris-predictor-default-mesh namespace : default spec : gateways : - knative-serving/knative-ingress-gateway - knative-serving/knative-local-gateway hosts : - sklearn-iris-predictor-default.default - sklearn-iris-predictor-default.default.example.com - sklearn-iris-predictor-default.default.svc - sklearn-iris-predictor-default.default.svc.cluster.local http : - match : - authority : prefix : sklearn-iris-predictor-default.default gateways : - knative-serving/knative-local-gateway - authority : prefix : sklearn-iris-predictor-default.default.svc gateways : - knative-serving/knative-local-gateway - authority : prefix : sklearn-iris-predictor-default.default gateways : - knative-serving/knative-local-gateway retries : {} route : - destination : host : sklearn-iris-predictor-default-00001.default.svc.cluster.local port : number : 80 headers : request : set : Knative-Serving-Namespace : default Knative-Serving-Revision : sklearn-iris-predictor-default-00001 weight : 100 - match : - authority : prefix : sklearn-iris-predictor-default.default.example.com gateways : - knative-serving/knative-ingress-gateway retries : {} route : - destination : host : sklearn-iris-predictor-default-00001.default.svc.cluster.local port : number : 80 headers : request : set : Knative-Serving-Namespace : default Knative-Serving-Revision : sklearn-iris-predictor-default-00001 weight : 100 The destination here is the k8s Service for the latest ready Knative Revision and it is reconciled by Knative every time user rolls out a new revision. When a new revision is rolled out and in ready state, the old revision is then scaled down, after configured revision GC time the revision resource is garbage collected if the revision no longer has traffic referenced.","title":"3. Knative Istio virtual service to route the inference request to the latest ready revision."},{"location":"developer/debug/#4-kubernetes-service-routes-the-requests-to-the-queue-proxy-sidecar-of-the-inference-service-pod-on-port-8012","text":"kubectl get svc sklearn-iris-predictor-default-fhmjk-private -oyaml apiVersion : v1 kind : Service metadata : name : sklearn-iris-predictor-default-fhmjk-private namespace : default spec : clusterIP : 10.105.186.18 ports : - name : http port : 80 protocol : TCP targetPort : 8012 - name : queue-metrics port : 9090 protocol : TCP targetPort : queue-metrics - name : http-usermetric port : 9091 protocol : TCP targetPort : http-usermetric - name : http-queueadm port : 8022 protocol : TCP targetPort : 8022 selector : serving.knative.dev/revisionUID : a8f1eafc-3c64-4930-9a01-359f3235333a sessionAffinity : None type : ClusterIP","title":"4. Kubernetes Service routes the requests to the queue proxy sidecar of the inference service pod on port 8012."},{"location":"developer/debug/#5-the-queue-proxy-routes-to-kserve-container-with-max-concurrent-requests-configured-with-containerconcurrency","text":"If the queue proxy has more requests than it can handle, the Knative Autoscaler creates more pods to handle additional requests.","title":"5. The queue proxy routes to kserve container with max concurrent requests configured with ContainerConcurrency."},{"location":"developer/debug/#6-finally-the-queue-proxy-routes-traffic-to-the-kserve-container-for-processing-the-inference-requests","text":"","title":"6. Finally The queue proxy routes traffic to the kserve-container for processing the inference requests."},{"location":"developer/developer/","text":"Development \u00b6 This doc explains how to setup a development environment so you can get started contributing . Prerequisites \u00b6 Follow the instructions below to set up your development environment. Once you meet these requirements, you can make changes and deploy your own version of kserve ! Before submitting a PR, see also CONTRIBUTING.md . Install requirements \u00b6 You must install these tools: go : KServe controller is written in Go and requires Go 1.20.0+. git : For source control. Go Module : Go's new dependency management system. ko : For development. kubectl : For managing development environments. kustomize To customize YAMLs for different environments, requires v5.0.0+. yq yq is used in the project makefiles to parse and display YAML output, requires yq 4.* . Install Knative on a Kubernetes cluster \u00b6 KServe currently requires Knative Serving for auto-scaling, canary rollout, Istio for traffic routing and ingress. To install Knative components on your Kubernetes cluster, follow the installation guide or alternatively, use the Knative Operators to manage your installation. Observability, tracing and logging are optional but are often very valuable tools for troubleshooting difficult issues, they can be installed via the directions here . If you start from scratch, KServe requires Kubernetes 1.25+, Knative 1.7+, Istio 1.15+. If you already have Istio or Knative (e.g. from a Kubeflow install) then you don't need to install them explicitly, as long as version dependencies are satisfied. Note On a local environment, when using minikube or kind as Kubernetes cluster, there has been a reported issue that knative quickstart bootstrap does not work as expected. It is recommended to follow the installation manual from knative using yaml or using knative operator for a better result. Setup your environment \u00b6 To start your environment you'll need to set these environment variables (we recommend adding them to your .bashrc ): GOPATH : If you don't have one, simply pick a directory and add export GOPATH=... $GOPATH/bin on PATH : This is so that tooling installed via go get will work properly. KO_DEFAULTPLATFORMS : If you are using M1 Mac book the value is linux/arm64 . KO_DOCKER_REPO : The docker repository to which developer images should be pushed (e.g. docker.io/ ). Note : Set up a docker repository for pushing images. You can use any container image registry by adjusting the authentication methods and repository paths mentioned in the sections below. Google Container Registry quickstart Docker Hub quickstart Azure Container Registry quickstart Note if you are using docker hub to store your images your KO_DOCKER_REPO variable should be docker.io/ . Currently Docker Hub doesn't let you create subdirs under your username. .bashrc example: export GOPATH = \" $HOME /go\" export PATH = \" ${ PATH } : ${ GOPATH } /bin\" export KO_DOCKER_REPO = 'docker.io/' Checkout your fork \u00b6 The Go tools require that you clone the repository to the src/github.com/kserve/kserve directory in your GOPATH . To check out this repository: Create your own fork of this repo Clone it to your machine: mkdir -p ${ GOPATH } /src/github.com/kserve cd ${ GOPATH } /src/github.com/kserve git clone git@github.com: ${ YOUR_GITHUB_USERNAME } /kserve.git cd kserve git remote add upstream git@github.com:kserve/kserve.git git remote set-url --push upstream no_push Adding the upstream remote sets you up nicely for regularly syncing your fork . Once you reach this point you are ready to do a full build and deploy as described below. Deploy KServe \u00b6 Check Knative Serving installation \u00b6 Once you've setup your development environment , you can verify the installation with following: Success $ kubectl -n knative-serving get pods NAME READY STATUS RESTARTS AGE activator-77784645fc-t2pjf 1 /1 Running 0 11d autoscaler-6fddf74d5-z2fzf 1 /1 Running 0 11d autoscaler-hpa-5bf4476cc5-tsbw6 1 /1 Running 0 11d controller-7b8cd7f95c-6jxxj 1 /1 Running 0 11d istio-webhook-866c5bc7f8-t5ztb 1 /1 Running 0 11d networking-istio-54fb8b5d4b-xznwd 1 /1 Running 0 11d webhook-5f5f7bd9b4-cv27c 1 /1 Running 0 11d $ kubectl get gateway -n knative-serving NAME AGE knative-ingress-gateway 11d knative-local-gateway 11d $ kubectl get svc -n istio-system NAME TYPE CLUSTER-IP EXTERNAL-IP PORT ( S ) AGE istio-ingressgateway LoadBalancer 10 .101.196.89 X.X.X.X 15021 :31101/TCP,80:31781/TCP,443:30372/TCP,15443:31067/TCP 11d istiod ClusterIP 10 .101.116.203 15010 /TCP,15012/TCP,443/TCP,15014/TCP,853/TCP 11d Deploy KServe from master branch \u00b6 We suggest using cert manager for provisioning the certificates for the webhook server. Other solutions should also work as long as they put the certificates in the desired location. You can follow the cert manager documentation to install it. If you don't want to install cert manager, you can set the KSERVE_ENABLE_SELF_SIGNED_CA environment variable to true. KSERVE_ENABLE_SELF_SIGNED_CA will execute a script to create a self-signed CA and patch it to the webhook config. export KSERVE_ENABLE_SELF_SIGNED_CA = true After that you can run following command to deploy KServe , you can skip above step if cert manager is already installed. make deploy Optional you can change CPU and memory limits when deploying KServe . export KSERVE_CONTROLLER_CPU_LIMIT = export KSERVE_CONTROLLER_MEMORY_LIMIT = make deploy Expected Output $ kubectl get pods -n kserve -l control-plane = kserve-controller-manager NAME READY STATUS RESTARTS AGE kserve-controller-manager-0 2/2 Running 0 13m Note By default it installs to kserve namespace with the published controller manager image from master branch. Deploy KServe with your own version \u00b6 Run the following command to deploy KServe controller and model agent with your local change. make deploy-dev Note deploy-dev builds the image from your local code, publishes to KO_DOCKER_REPO and deploys the kserve-controller-manager and model agent with the image digest to your cluster for testing. Please also ensure you are logged in to KO_DOCKER_REPO from your client machine. Run the following command to deploy model server with your local change. make deploy-dev-sklearn make deploy-dev-xgb Run the following command to deploy explainer with your local change. make deploy-dev-alibi Run the following command to deploy storage initializer with your local change. make deploy-dev-storageInitializer Warning The deploy command publishes the image to KO_DOCKER_REPO with the version latest , it changes the InferenceService configmap to point to the newly built image sha. The built image is only for development and testing purpose, the current limitation is that it changes the image impacted and reset all other images including the kserver-controller-manager to use the default ones. Smoke test after deployment \u00b6 Run the following command to smoke test the deployment kubectl apply -f https://raw.githubusercontent.com/kserve/kserve/master/docs/samples/v1beta1/tensorflow/tensorflow.yaml You should see model serving deployment running under default or your specified namespace. $ kubectl get pods -n default -l serving.kserve.io/inferenceservice=flower-sample Expected Output NAME READY STATUS RESTARTS AGE flower-sample-default-htz8r-deployment-8fd979f9b-w2qbv 3/3 Running 0 10s Running unit/integration tests \u00b6 kserver-controller-manager has a few integration tests which requires mock apiserver and etcd, they get installed along with kubebuilder . To run all unit/integration tests: make test Run e2e tests locally \u00b6 To setup from local code, do: ./hack/quick_install.sh make undeploy make deploy-dev Go to python/kserve and install kserve python sdk deps pip3 install -e . [ test ] Then go to test/e2e . Run kubectl create namespace kserve-ci-e2e-test For KIND/minikube: Run export KSERVE_INGRESS_HOST_PORT=localhost:8080 In a different window run kubectl port-forward -n istio-system svc/istio-ingressgateway 8080:80 Note that not all tests will pass as the pytorch test requires gpu. These will show as pending pods at the end or you can add marker to skip the test. Run pytest > testresults.txt Tests may not clean up. To re-run, first do kubectl delete namespace kserve-ci-e2e-test , recreate namespace and run again. Iterating \u00b6 As you make changes to the code-base, there are two special cases to be aware of: If you change an input to generated code , then you must run make manifests . Inputs include: API type definitions in apis/serving Manifests or kustomize patches stored in config . To generate the KServe python/go clients, you should run make generate . If you want to add new dependencies , then you add the imports and the specific version of the dependency module in go.mod . When it encounters an import of a package not provided by any module in go.mod , the go command automatically looks up the module containing the package and adds it to go.mod using the latest version. If you want to upgrade the dependency , then you run go get command e.g go get golang.org/x/text to upgrade to the latest version, go get golang.org/x/text@v0.3.0 to upgrade to a specific version. make deploy-dev Contribute to the code \u00b6 See the guidelines for contributing a feature contributing to an existing issue Releases \u00b6 Please check out the documentation here to understand the release schedule and process. Feedback \u00b6 The best place to provide feedback about the KServe code is via a Github issue. See creating a Github issue for guidelines on submitting bugs and feature requests.","title":"How to contribute"},{"location":"developer/developer/#development","text":"This doc explains how to setup a development environment so you can get started contributing .","title":"Development"},{"location":"developer/developer/#prerequisites","text":"Follow the instructions below to set up your development environment. Once you meet these requirements, you can make changes and deploy your own version of kserve ! Before submitting a PR, see also CONTRIBUTING.md .","title":"Prerequisites"},{"location":"developer/developer/#install-requirements","text":"You must install these tools: go : KServe controller is written in Go and requires Go 1.20.0+. git : For source control. Go Module : Go's new dependency management system. ko : For development. kubectl : For managing development environments. kustomize To customize YAMLs for different environments, requires v5.0.0+. yq yq is used in the project makefiles to parse and display YAML output, requires yq 4.* .","title":"Install requirements"},{"location":"developer/developer/#install-knative-on-a-kubernetes-cluster","text":"KServe currently requires Knative Serving for auto-scaling, canary rollout, Istio for traffic routing and ingress. To install Knative components on your Kubernetes cluster, follow the installation guide or alternatively, use the Knative Operators to manage your installation. Observability, tracing and logging are optional but are often very valuable tools for troubleshooting difficult issues, they can be installed via the directions here . If you start from scratch, KServe requires Kubernetes 1.25+, Knative 1.7+, Istio 1.15+. If you already have Istio or Knative (e.g. from a Kubeflow install) then you don't need to install them explicitly, as long as version dependencies are satisfied. Note On a local environment, when using minikube or kind as Kubernetes cluster, there has been a reported issue that knative quickstart bootstrap does not work as expected. It is recommended to follow the installation manual from knative using yaml or using knative operator for a better result.","title":"Install Knative on a Kubernetes cluster"},{"location":"developer/developer/#setup-your-environment","text":"To start your environment you'll need to set these environment variables (we recommend adding them to your .bashrc ): GOPATH : If you don't have one, simply pick a directory and add export GOPATH=... $GOPATH/bin on PATH : This is so that tooling installed via go get will work properly. KO_DEFAULTPLATFORMS : If you are using M1 Mac book the value is linux/arm64 . KO_DOCKER_REPO : The docker repository to which developer images should be pushed (e.g. docker.io/ ). Note : Set up a docker repository for pushing images. You can use any container image registry by adjusting the authentication methods and repository paths mentioned in the sections below. Google Container Registry quickstart Docker Hub quickstart Azure Container Registry quickstart Note if you are using docker hub to store your images your KO_DOCKER_REPO variable should be docker.io/ . Currently Docker Hub doesn't let you create subdirs under your username. .bashrc example: export GOPATH = \" $HOME /go\" export PATH = \" ${ PATH } : ${ GOPATH } /bin\" export KO_DOCKER_REPO = 'docker.io/'","title":"Setup your environment"},{"location":"developer/developer/#checkout-your-fork","text":"The Go tools require that you clone the repository to the src/github.com/kserve/kserve directory in your GOPATH . To check out this repository: Create your own fork of this repo Clone it to your machine: mkdir -p ${ GOPATH } /src/github.com/kserve cd ${ GOPATH } /src/github.com/kserve git clone git@github.com: ${ YOUR_GITHUB_USERNAME } /kserve.git cd kserve git remote add upstream git@github.com:kserve/kserve.git git remote set-url --push upstream no_push Adding the upstream remote sets you up nicely for regularly syncing your fork . Once you reach this point you are ready to do a full build and deploy as described below.","title":"Checkout your fork"},{"location":"developer/developer/#deploy-kserve","text":"","title":"Deploy KServe"},{"location":"developer/developer/#check-knative-serving-installation","text":"Once you've setup your development environment , you can verify the installation with following: Success $ kubectl -n knative-serving get pods NAME READY STATUS RESTARTS AGE activator-77784645fc-t2pjf 1 /1 Running 0 11d autoscaler-6fddf74d5-z2fzf 1 /1 Running 0 11d autoscaler-hpa-5bf4476cc5-tsbw6 1 /1 Running 0 11d controller-7b8cd7f95c-6jxxj 1 /1 Running 0 11d istio-webhook-866c5bc7f8-t5ztb 1 /1 Running 0 11d networking-istio-54fb8b5d4b-xznwd 1 /1 Running 0 11d webhook-5f5f7bd9b4-cv27c 1 /1 Running 0 11d $ kubectl get gateway -n knative-serving NAME AGE knative-ingress-gateway 11d knative-local-gateway 11d $ kubectl get svc -n istio-system NAME TYPE CLUSTER-IP EXTERNAL-IP PORT ( S ) AGE istio-ingressgateway LoadBalancer 10 .101.196.89 X.X.X.X 15021 :31101/TCP,80:31781/TCP,443:30372/TCP,15443:31067/TCP 11d istiod ClusterIP 10 .101.116.203 15010 /TCP,15012/TCP,443/TCP,15014/TCP,853/TCP 11d","title":"Check Knative Serving installation"},{"location":"developer/developer/#deploy-kserve-from-master-branch","text":"We suggest using cert manager for provisioning the certificates for the webhook server. Other solutions should also work as long as they put the certificates in the desired location. You can follow the cert manager documentation to install it. If you don't want to install cert manager, you can set the KSERVE_ENABLE_SELF_SIGNED_CA environment variable to true. KSERVE_ENABLE_SELF_SIGNED_CA will execute a script to create a self-signed CA and patch it to the webhook config. export KSERVE_ENABLE_SELF_SIGNED_CA = true After that you can run following command to deploy KServe , you can skip above step if cert manager is already installed. make deploy Optional you can change CPU and memory limits when deploying KServe . export KSERVE_CONTROLLER_CPU_LIMIT = export KSERVE_CONTROLLER_MEMORY_LIMIT = make deploy Expected Output $ kubectl get pods -n kserve -l control-plane = kserve-controller-manager NAME READY STATUS RESTARTS AGE kserve-controller-manager-0 2/2 Running 0 13m Note By default it installs to kserve namespace with the published controller manager image from master branch.","title":"Deploy KServe from master branch"},{"location":"developer/developer/#deploy-kserve-with-your-own-version","text":"Run the following command to deploy KServe controller and model agent with your local change. make deploy-dev Note deploy-dev builds the image from your local code, publishes to KO_DOCKER_REPO and deploys the kserve-controller-manager and model agent with the image digest to your cluster for testing. Please also ensure you are logged in to KO_DOCKER_REPO from your client machine. Run the following command to deploy model server with your local change. make deploy-dev-sklearn make deploy-dev-xgb Run the following command to deploy explainer with your local change. make deploy-dev-alibi Run the following command to deploy storage initializer with your local change. make deploy-dev-storageInitializer Warning The deploy command publishes the image to KO_DOCKER_REPO with the version latest , it changes the InferenceService configmap to point to the newly built image sha. The built image is only for development and testing purpose, the current limitation is that it changes the image impacted and reset all other images including the kserver-controller-manager to use the default ones.","title":"Deploy KServe with your own version"},{"location":"developer/developer/#smoke-test-after-deployment","text":"Run the following command to smoke test the deployment kubectl apply -f https://raw.githubusercontent.com/kserve/kserve/master/docs/samples/v1beta1/tensorflow/tensorflow.yaml You should see model serving deployment running under default or your specified namespace. $ kubectl get pods -n default -l serving.kserve.io/inferenceservice=flower-sample Expected Output NAME READY STATUS RESTARTS AGE flower-sample-default-htz8r-deployment-8fd979f9b-w2qbv 3/3 Running 0 10s","title":"Smoke test after deployment"},{"location":"developer/developer/#running-unitintegration-tests","text":"kserver-controller-manager has a few integration tests which requires mock apiserver and etcd, they get installed along with kubebuilder . To run all unit/integration tests: make test","title":"Running unit/integration tests"},{"location":"developer/developer/#run-e2e-tests-locally","text":"To setup from local code, do: ./hack/quick_install.sh make undeploy make deploy-dev Go to python/kserve and install kserve python sdk deps pip3 install -e . [ test ] Then go to test/e2e . Run kubectl create namespace kserve-ci-e2e-test For KIND/minikube: Run export KSERVE_INGRESS_HOST_PORT=localhost:8080 In a different window run kubectl port-forward -n istio-system svc/istio-ingressgateway 8080:80 Note that not all tests will pass as the pytorch test requires gpu. These will show as pending pods at the end or you can add marker to skip the test. Run pytest > testresults.txt Tests may not clean up. To re-run, first do kubectl delete namespace kserve-ci-e2e-test , recreate namespace and run again.","title":"Run e2e tests locally"},{"location":"developer/developer/#iterating","text":"As you make changes to the code-base, there are two special cases to be aware of: If you change an input to generated code , then you must run make manifests . Inputs include: API type definitions in apis/serving Manifests or kustomize patches stored in config . To generate the KServe python/go clients, you should run make generate . If you want to add new dependencies , then you add the imports and the specific version of the dependency module in go.mod . When it encounters an import of a package not provided by any module in go.mod , the go command automatically looks up the module containing the package and adds it to go.mod using the latest version. If you want to upgrade the dependency , then you run go get command e.g go get golang.org/x/text to upgrade to the latest version, go get golang.org/x/text@v0.3.0 to upgrade to a specific version. make deploy-dev","title":"Iterating"},{"location":"developer/developer/#contribute-to-the-code","text":"See the guidelines for contributing a feature contributing to an existing issue","title":"Contribute to the code"},{"location":"developer/developer/#releases","text":"Please check out the documentation here to understand the release schedule and process.","title":"Releases"},{"location":"developer/developer/#feedback","text":"The best place to provide feedback about the KServe code is via a Github issue. See creating a Github issue for guidelines on submitting bugs and feature requests.","title":"Feedback"},{"location":"get_started/","text":"Getting Started with KServe \u00b6 Before you begin \u00b6 Warning KServe Quickstart Environments are for experimentation use only. For production installation, see our Administrator's Guide Before you can get started with a KServe Quickstart deployment you must install kind and the Kubernetes CLI. Install Kind (Kubernetes in Docker) \u00b6 You can use kind (Kubernetes in Docker) to run a local Kubernetes cluster with Docker container nodes. Install the Kubernetes CLI \u00b6 The Kubernetes CLI ( kubectl ) , allows you to run commands against Kubernetes clusters. You can use kubectl to deploy applications, inspect and manage cluster resources, and view logs. Install the KServe \"Quickstart\" environment \u00b6 After having kind installed, create a kind cluster with: kind create cluster Then run: kubectl config get-contexts It should list out a list of contexts you have, one of them should be kind-kind . Then run: kubectl config use-context kind-kind to use this context. You can then get started with a local deployment of KServe by using KServe Quick installation script on Kind : curl -s \"https://raw.githubusercontent.com/kserve/kserve/release-0.12/hack/quick_install.sh\" | bash or install via our published Helm Charts: helm install kserve-crd oci://ghcr.io/kserve/charts/kserve-crd --version v0.12.0 helm install kserve oci://ghcr.io/kserve/charts/kserve --version v0.12.0","title":"KServe Quickstart"},{"location":"get_started/#getting-started-with-kserve","text":"","title":"Getting Started with KServe"},{"location":"get_started/#before-you-begin","text":"Warning KServe Quickstart Environments are for experimentation use only. For production installation, see our Administrator's Guide Before you can get started with a KServe Quickstart deployment you must install kind and the Kubernetes CLI.","title":"Before you begin"},{"location":"get_started/#install-kind-kubernetes-in-docker","text":"You can use kind (Kubernetes in Docker) to run a local Kubernetes cluster with Docker container nodes.","title":"Install Kind (Kubernetes in Docker)"},{"location":"get_started/#install-the-kubernetes-cli","text":"The Kubernetes CLI ( kubectl ) , allows you to run commands against Kubernetes clusters. You can use kubectl to deploy applications, inspect and manage cluster resources, and view logs.","title":"Install the Kubernetes CLI"},{"location":"get_started/#install-the-kserve-quickstart-environment","text":"After having kind installed, create a kind cluster with: kind create cluster Then run: kubectl config get-contexts It should list out a list of contexts you have, one of them should be kind-kind . Then run: kubectl config use-context kind-kind to use this context. You can then get started with a local deployment of KServe by using KServe Quick installation script on Kind : curl -s \"https://raw.githubusercontent.com/kserve/kserve/release-0.12/hack/quick_install.sh\" | bash or install via our published Helm Charts: helm install kserve-crd oci://ghcr.io/kserve/charts/kserve-crd --version v0.12.0 helm install kserve oci://ghcr.io/kserve/charts/kserve --version v0.12.0","title":"Install the KServe \"Quickstart\" environment"},{"location":"get_started/first_isvc/","text":"Run your first InferenceService \u00b6 In this tutorial, you will deploy an InferenceService with a predictor that will load a scikit-learn model trained with the iris dataset. This dataset has three output class: Iris Setosa, Iris Versicolour, and Iris Virginica. You will then send an inference request to your deployed model in order to get a prediction for the class of iris plant your request corresponds to. Since your model is being deployed as an InferenceService, not a raw Kubernetes Service, you just need to provide the storage location of the model and it gets some super powers out of the box . 1. Create a namespace \u00b6 First, create a namespace to use for deploying KServe resources: kubectl create namespace kserve-test 2. Create an InferenceService \u00b6 Next, define a new InferenceService YAML for the model and apply it to the cluster. A new predictor schema was introduced in v0.8.0 . New InferenceServices should be deployed using the new schema. The old schema is provided as reference. New Schema Old Schema kubectl apply -n kserve-test -f - < \"./iris-input.json\" { \"instances\": [ [6.8, 2.8, 4.8, 1.4], [6.0, 3.4, 4.5, 1.6] ] } EOF Depending on your setup, use one of the following commands to curl the InferenceService : Real DNS Magic DNS From Ingress gateway with HOST Header From local cluster gateway If you have configured the DNS, you can directly curl the InferenceService with the URL obtained from the status print. e.g curl -v -H \"Content-Type: application/json\" http://sklearn-iris.kserve-test. ${ CUSTOM_DOMAIN } /v1/models/sklearn-iris:predict -d @./iris-input.json If you don't want to go through the trouble to get a real domain, you can instead use \"magic\" dns xip.io . The key is to get the external IP for your cluster. kubectl get svc istio-ingressgateway --namespace istio-system Look for the EXTERNAL-IP column's value(in this case 35.237.217.209) NAME TYPE CLUSTER-IP EXTERNAL-IP PORT ( S ) AGE istio-ingressgateway LoadBalancer 10 .51.253.94 35 .237.217.209 Next step is to setting up the custom domain: kubectl edit cm config-domain --namespace knative-serving Now in your editor, change example.com to {{external-ip}}.xip.io (make sure to replace {{external-ip}} with the IP you found earlier). With the change applied you can now directly curl the URL curl -v -H \"Content-Type: application/json\" http://sklearn-iris.kserve-test.35.237.217.209.xip.io/v1/models/sklearn-iris:predict -d @./iris-input.json If you do not have DNS, you can still curl with the ingress gateway external IP using the HOST Header. SERVICE_HOSTNAME = $( kubectl get inferenceservice sklearn-iris -n kserve-test -o jsonpath = '{.status.url}' | cut -d \"/\" -f 3 ) curl -v -H \"Host: ${ SERVICE_HOSTNAME } \" -H \"Content-Type: application/json\" \"http:// ${ INGRESS_HOST } : ${ INGRESS_PORT } /v1/models/sklearn-iris:predict\" -d @./iris-input.json If you are calling from in cluster you can curl with the internal url with host {{InferenceServiceName}}.{{namespace}} curl -v -H \"Content-Type: application/json\" http://sklearn-iris.kserve-test/v1/models/sklearn-iris:predict -d @./iris-input.json You should see two predictions returned (i.e. {\"predictions\": [1, 1]} ). Both sets of data points sent for inference correspond to the flower with index 1 . In this case, the model predicts that both flowers are \"Iris Versicolour\". 6. Run performance test (optional) \u00b6 If you want to load test the deployed model, try deploying the following Kubernetes Job to drive load to the model: # use kubectl create instead of apply because the job template is using generateName which doesn't work with kubectl apply kubectl create -f https://raw.githubusercontent.com/kserve/kserve/release-0.11/docs/samples/v1beta1/sklearn/v1/perf.yaml -n kserve-test Execute the following command to view output: kubectl logs load-test8b58n-rgfxr -n kserve-test Expected Output Requests [ total, rate, throughput ] 30000 , 500 .02, 499 .99 Duration [ total, attack, wait ] 1m0s, 59 .998s, 3 .336ms Latencies [ min, mean, 50 , 90 , 95 , 99 , max ] 1 .743ms, 2 .748ms, 2 .494ms, 3 .363ms, 4 .091ms, 7 .749ms, 46 .354ms Bytes In [ total, mean ] 690000 , 23 .00 Bytes Out [ total, mean ] 2460000 , 82 .00 Success [ ratio ] 100 .00% Status Codes [ code:count ] 200 :30000 Error Set:","title":"First InferenceService"},{"location":"get_started/first_isvc/#run-your-first-inferenceservice","text":"In this tutorial, you will deploy an InferenceService with a predictor that will load a scikit-learn model trained with the iris dataset. This dataset has three output class: Iris Setosa, Iris Versicolour, and Iris Virginica. You will then send an inference request to your deployed model in order to get a prediction for the class of iris plant your request corresponds to. Since your model is being deployed as an InferenceService, not a raw Kubernetes Service, you just need to provide the storage location of the model and it gets some super powers out of the box .","title":"Run your first InferenceService"},{"location":"get_started/first_isvc/#1-create-a-namespace","text":"First, create a namespace to use for deploying KServe resources: kubectl create namespace kserve-test","title":"1. Create a namespace"},{"location":"get_started/first_isvc/#2-create-an-inferenceservice","text":"Next, define a new InferenceService YAML for the model and apply it to the cluster. A new predictor schema was introduced in v0.8.0 . New InferenceServices should be deployed using the new schema. The old schema is provided as reference. New Schema Old Schema kubectl apply -n kserve-test -f - < \"./iris-input.json\" { \"instances\": [ [6.8, 2.8, 4.8, 1.4], [6.0, 3.4, 4.5, 1.6] ] } EOF Depending on your setup, use one of the following commands to curl the InferenceService : Real DNS Magic DNS From Ingress gateway with HOST Header From local cluster gateway If you have configured the DNS, you can directly curl the InferenceService with the URL obtained from the status print. e.g curl -v -H \"Content-Type: application/json\" http://sklearn-iris.kserve-test. ${ CUSTOM_DOMAIN } /v1/models/sklearn-iris:predict -d @./iris-input.json If you don't want to go through the trouble to get a real domain, you can instead use \"magic\" dns xip.io . The key is to get the external IP for your cluster. kubectl get svc istio-ingressgateway --namespace istio-system Look for the EXTERNAL-IP column's value(in this case 35.237.217.209) NAME TYPE CLUSTER-IP EXTERNAL-IP PORT ( S ) AGE istio-ingressgateway LoadBalancer 10 .51.253.94 35 .237.217.209 Next step is to setting up the custom domain: kubectl edit cm config-domain --namespace knative-serving Now in your editor, change example.com to {{external-ip}}.xip.io (make sure to replace {{external-ip}} with the IP you found earlier). With the change applied you can now directly curl the URL curl -v -H \"Content-Type: application/json\" http://sklearn-iris.kserve-test.35.237.217.209.xip.io/v1/models/sklearn-iris:predict -d @./iris-input.json If you do not have DNS, you can still curl with the ingress gateway external IP using the HOST Header. SERVICE_HOSTNAME = $( kubectl get inferenceservice sklearn-iris -n kserve-test -o jsonpath = '{.status.url}' | cut -d \"/\" -f 3 ) curl -v -H \"Host: ${ SERVICE_HOSTNAME } \" -H \"Content-Type: application/json\" \"http:// ${ INGRESS_HOST } : ${ INGRESS_PORT } /v1/models/sklearn-iris:predict\" -d @./iris-input.json If you are calling from in cluster you can curl with the internal url with host {{InferenceServiceName}}.{{namespace}} curl -v -H \"Content-Type: application/json\" http://sklearn-iris.kserve-test/v1/models/sklearn-iris:predict -d @./iris-input.json You should see two predictions returned (i.e. {\"predictions\": [1, 1]} ). Both sets of data points sent for inference correspond to the flower with index 1 . In this case, the model predicts that both flowers are \"Iris Versicolour\".","title":"5. Perform inference"},{"location":"get_started/first_isvc/#6-run-performance-test-optional","text":"If you want to load test the deployed model, try deploying the following Kubernetes Job to drive load to the model: # use kubectl create instead of apply because the job template is using generateName which doesn't work with kubectl apply kubectl create -f https://raw.githubusercontent.com/kserve/kserve/release-0.11/docs/samples/v1beta1/sklearn/v1/perf.yaml -n kserve-test Execute the following command to view output: kubectl logs load-test8b58n-rgfxr -n kserve-test Expected Output Requests [ total, rate, throughput ] 30000 , 500 .02, 499 .99 Duration [ total, attack, wait ] 1m0s, 59 .998s, 3 .336ms Latencies [ min, mean, 50 , 90 , 95 , 99 , max ] 1 .743ms, 2 .748ms, 2 .494ms, 3 .363ms, 4 .091ms, 7 .749ms, 46 .354ms Bytes In [ total, mean ] 690000 , 23 .00 Bytes Out [ total, mean ] 2460000 , 82 .00 Success [ ratio ] 100 .00% Status Codes [ code:count ] 200 :30000 Error Set:","title":"6. Run performance test (optional)"},{"location":"get_started/swagger_ui/","text":"InferenceService Swagger UI \u00b6 KServe ModelServer is built on top of FastAPI , which brings out-of-box support for OpenAPI specification and Swagger UI . Swagger UI allows visualizing and interacting with the KServe InferenceService API directly in the browser , making it easy for exploring the endpoints and validating the outputs without using any command-line tool. Enable Swagger UI \u00b6 Warning Be careful when enabling this for your production InferenceService deployments since the endpoint does not require authentication at this time. Currently, POST request only work for v2 endpoints in the UI. To enable, simply add an extra argument to the InferenceService YAML example from First Inference chapter: kubectl apply -n kserve-test -f - <.github.io/docs/ Where is your Github handle. After a few moments, your changes should be available for public preview at the link provided by MkDocs! This means you can rapidly prototype and share your changes before making a PR! Navigation \u00b6 Navigation in MkDocs uses the \"mkdocs.yml\" file (found in the /docs directory) to organize navigation. For more in-depth information on Navigation, see: https://www.mkdocs.org/user-guide/writing-your-docs/#configure-pages-and-navigation and https://squidfunk.github.io/mkdocs-material/setup/setting-up-navigation/ Content Tabs \u00b6 Content tabs are handy way to organize lots of information in a visually pleasing way. Some documentation from https://squidfunk.github.io/mkdocs-material/reference/content-tabs/#usage is reproduced here: Grouping Code blocks Grouping other content Code blocks are one of the primary targets to be grouped, and can be considered a special case of content tabs, as tabs with a single code block are always rendered without horizontal spacing. Example: === \"C\" ``` c #include int main(void) { printf(\"Hello world!\\n\"); return 0; } ``` === \"C++\" ``` c++ #include int main(void) { std::cout << \"Hello world!\" << std::endl; return 0; } ``` Result: C C++ #include int main ( void ) { printf ( \"Hello world! \\n \" ); return 0 ; } #include int main ( void ) { std :: cout << \"Hello world!\" << std :: endl ; return 0 ; } When a content tab contains more than one code block, it is rendered with horizontal spacing. Vertical spacing is never added, but can be achieved by nesting tabs in other blocks. Example: === \"Unordered list\" * Sed sagittis eleifend rutrum * Donec vitae suscipit est * Nulla tempor lobortis orci === \"Ordered list\" 1. Sed sagittis eleifend rutrum 2. Donec vitae suscipit est 3. Nulla tempor lobortis orci Result: Unordered list Ordered list Sed sagittis eleifend rutrum Donec vitae suscipit est Nulla tempor lobortis orci Sed sagittis eleifend rutrum Donec vitae suscipit est Nulla tempor lobortis orci For more information, see: https://squidfunk.github.io/mkdocs-material/reference/content-tabs/#usage File Includes (Content Reuse) \u00b6 KServe strives to reduce duplicative effort by reusing commonly used bits of information, see the docs/snippet directory for some examples. Snippets does not require a specific extension, and as long as a valid file name is specified, it will attempt to process it. Snippets can handle recursive file inclusion. And if Snippets encounters the same file in the current stack, it will avoid re-processing it in order to avoid an infinite loop (or crash on hitting max recursion depth). For more info, see: https://facelessuser.github.io/pymdown-extensions/extensions/snippets/ Admonitions \u00b6 We use the following admonition boxes only. Use admonitions sparingly; too many admonitions can be distracting. Admonitions Formatting Note A Note contains information that is useful, but not essential. A reader can skip a note without bypassing required information. If the information suggests an action to take, use a tip instead. Tip A Tip suggests an helpful, but not mandatory, action to take. Warning A Warning draws attention to potential trouble. !!! note A Note contains information that is useful, but not essential. A reader can skip a note without bypassing required information. If the information suggests an action to take, use a tip instead. !!! tip A Tip suggests a helpful, but not mandatory, action to take. !!! warning A Warning draws attention to potential trouble. Icons and Emojis \u00b6 Material for MkDocs supports using Material Icons and Emojis using easy shortcodes. Emojs Formatting :taco: To search a database of Icons and Emojis (all of which can be used on kserve.io), as well as usage information, see: https://squidfunk.github.io/mkdocs-material/reference/icons-emojis/#search Redirects \u00b6 The KServe site uses mkdocs-redirects to \"redirect\" users from a page that may no longer exist (or has been moved) to their desired location. Adding re-directs to the KServe site is done in one centralized place, docs/config/redirects.yml . The format is shown here: plugins: redirects: redirect_maps: ... path_to_old_or_moved_URL : path_to_new_URL","title":"MkDocs Contributions"},{"location":"help/contributor/mkdocs-contributor-guide/#mkdocs-contributions","text":"This is a temporary home for contribution guidelines for the MkDocs branch. When MkDocs becomes \"main\" this will be moved to the appropriate place on the website","title":"MkDocs Contributions"},{"location":"help/contributor/mkdocs-contributor-guide/#install-material-for-mkdocs","text":"kserve.io uses Material for MkDocs to render documentation. Material for MkDocs is Python based and uses pip to install most of it's required packages as well as optional add-ons (which we use). You can choose to install MkDocs locally or using a Docker image. pip actually comes pre-installed with Python so it is included in many operating systems (like MacOSx or Ubuntu) but if you don\u2019t have Python, you can install it here: https://www.python.org For some (e.g. folks using RHEL), you may have to use pip3. pip pip3 pip install mkdocs-material mike More detailed instructions can be found here: https://squidfunk.github.io/mkdocs-material/getting-started/#installation pip3 install mkdocs-material mike More detailed instructions can be found here: https://squidfunk.github.io/mkdocs-material/getting-started/#installation","title":"Install Material for MkDocs"},{"location":"help/contributor/mkdocs-contributor-guide/#install-kserve-specific-extensions","text":"KServe uses a number of extensions to MkDocs which can also be installed using pip. If you used pip to install, run the following: pip pip3 pip install mkdocs-material-extensions mkdocs-macros-plugin mkdocs-exclude mkdocs-awesome-pages-plugin mkdocs-redirects pip3 install mkdocs-material-extensions mkdocs-macros-plugin mkdocs-exclude mkdocs-awesome-pages-plugin mkdocs-redirects","title":"Install KServe-Specific Extensions"},{"location":"help/contributor/mkdocs-contributor-guide/#install-dependencies-in-requirementstxt-file","text":"Navigate to root folder and run below command to install required packages and libraries specified in the requirements.txt file. pip pip3 pip install -r requirements.txt pip3 install -r requirements.txt","title":"Install Dependencies in Requirements.txt file"},{"location":"help/contributor/mkdocs-contributor-guide/#setting-up-local-preview","text":"Once you have installed Material for MkDocs and all of the extensions, head over to and clone the repo. In your terminal, find your way over to the location of the cloned repo. Once you are in the main folder and run: Local Preview Local Preview w/ Dirty Reload Local Preview including Blog and Community Site mkdocs serve If you\u2019re only changing a single page in the /docs/ folder (i.e. not the homepage or mkdocs.yml) adding the flag --dirtyreload will make the site rebuild super crazy insta-fast. mkdocs serve --dirtyreload First, install the necessary extensions: npm install -g postcss postcss-cli autoprefixer http-server Once you have those npm packages installed, run: ./hack/build-with-blog.sh serve Note Unfortunately, there aren\u2019t live previews for this version of the local preview. After awhile, your terminal should spit out: INFO - Documentation built in 13 .54 seconds [ I 210519 10 :47:10 server:335 ] Serving on http://127.0.0.1:8000 [ I 210519 10 :47:10 handlers:62 ] Start watching changes [ I 210519 10 :47:10 handlers:64 ] Start detecting changes Now access http://127.0.0.1:8000 and you should see the site is built! \ud83c\udf89 Anytime you change any file in your /docs/ repo and hit save, the site will automatically rebuild itself to reflect your changes!","title":"Setting Up Local Preview"},{"location":"help/contributor/mkdocs-contributor-guide/#setting-up-public-preview","text":"If, for whatever reason, you want to share your work before submitting a PR (where Netlify would generate a preview for you), you can deploy your changes as a Github Page easily using the following command: mkdocs gh-deploy --force INFO - Documentation built in 14 .29 seconds WARNING - Version check skipped: No version specified in previous deployment. INFO - Your documentation should shortly be available at: https://.github.io/docs/ Where is your Github handle. After a few moments, your changes should be available for public preview at the link provided by MkDocs! This means you can rapidly prototype and share your changes before making a PR!","title":"Setting Up \"Public\" Preview"},{"location":"help/contributor/mkdocs-contributor-guide/#navigation","text":"Navigation in MkDocs uses the \"mkdocs.yml\" file (found in the /docs directory) to organize navigation. For more in-depth information on Navigation, see: https://www.mkdocs.org/user-guide/writing-your-docs/#configure-pages-and-navigation and https://squidfunk.github.io/mkdocs-material/setup/setting-up-navigation/","title":"Navigation"},{"location":"help/contributor/mkdocs-contributor-guide/#content-tabs","text":"Content tabs are handy way to organize lots of information in a visually pleasing way. Some documentation from https://squidfunk.github.io/mkdocs-material/reference/content-tabs/#usage is reproduced here: Grouping Code blocks Grouping other content Code blocks are one of the primary targets to be grouped, and can be considered a special case of content tabs, as tabs with a single code block are always rendered without horizontal spacing. Example: === \"C\" ``` c #include int main(void) { printf(\"Hello world!\\n\"); return 0; } ``` === \"C++\" ``` c++ #include int main(void) { std::cout << \"Hello world!\" << std::endl; return 0; } ``` Result: C C++ #include int main ( void ) { printf ( \"Hello world! \\n \" ); return 0 ; } #include int main ( void ) { std :: cout << \"Hello world!\" << std :: endl ; return 0 ; } When a content tab contains more than one code block, it is rendered with horizontal spacing. Vertical spacing is never added, but can be achieved by nesting tabs in other blocks. Example: === \"Unordered list\" * Sed sagittis eleifend rutrum * Donec vitae suscipit est * Nulla tempor lobortis orci === \"Ordered list\" 1. Sed sagittis eleifend rutrum 2. Donec vitae suscipit est 3. Nulla tempor lobortis orci Result: Unordered list Ordered list Sed sagittis eleifend rutrum Donec vitae suscipit est Nulla tempor lobortis orci Sed sagittis eleifend rutrum Donec vitae suscipit est Nulla tempor lobortis orci For more information, see: https://squidfunk.github.io/mkdocs-material/reference/content-tabs/#usage","title":"Content Tabs"},{"location":"help/contributor/mkdocs-contributor-guide/#file-includes-content-reuse","text":"KServe strives to reduce duplicative effort by reusing commonly used bits of information, see the docs/snippet directory for some examples. Snippets does not require a specific extension, and as long as a valid file name is specified, it will attempt to process it. Snippets can handle recursive file inclusion. And if Snippets encounters the same file in the current stack, it will avoid re-processing it in order to avoid an infinite loop (or crash on hitting max recursion depth). For more info, see: https://facelessuser.github.io/pymdown-extensions/extensions/snippets/","title":"File Includes (Content Reuse)"},{"location":"help/contributor/mkdocs-contributor-guide/#admonitions","text":"We use the following admonition boxes only. Use admonitions sparingly; too many admonitions can be distracting. Admonitions Formatting Note A Note contains information that is useful, but not essential. A reader can skip a note without bypassing required information. If the information suggests an action to take, use a tip instead. Tip A Tip suggests an helpful, but not mandatory, action to take. Warning A Warning draws attention to potential trouble. !!! note A Note contains information that is useful, but not essential. A reader can skip a note without bypassing required information. If the information suggests an action to take, use a tip instead. !!! tip A Tip suggests a helpful, but not mandatory, action to take. !!! warning A Warning draws attention to potential trouble.","title":"Admonitions"},{"location":"help/contributor/mkdocs-contributor-guide/#icons-and-emojis","text":"Material for MkDocs supports using Material Icons and Emojis using easy shortcodes. Emojs Formatting :taco: To search a database of Icons and Emojis (all of which can be used on kserve.io), as well as usage information, see: https://squidfunk.github.io/mkdocs-material/reference/icons-emojis/#search","title":"Icons and Emojis"},{"location":"help/contributor/mkdocs-contributor-guide/#redirects","text":"The KServe site uses mkdocs-redirects to \"redirect\" users from a page that may no longer exist (or has been moved) to their desired location. Adding re-directs to the KServe site is done in one centralized place, docs/config/redirects.yml . The format is shown here: plugins: redirects: redirect_maps: ... path_to_old_or_moved_URL : path_to_new_URL","title":"Redirects"},{"location":"help/contributor/templates/template-blog/","text":"Blog template instructions \u00b6 An example template with best-practices that you can use to start drafting an entry to post on the KServe blog. Copy a version of this template without the instructions Include a commented-out table with tracking info about reviews and approvals: | YYYY-MM-DD | :+1:, :monocle_face:, :-1: | | | YYYY-MM-DD | :+1:, :monocle_face:, :-1: | --> Blog content body \u00b6 Example step/section 1: \u00b6 Example step/section 2: \u00b6 Example step/section 3: \u00b6 Example section about results \u00b6 Further reading \u00b6 About the author \u00b6 Copy the template \u00b6 | YYYY-MM-DD | :+1:, :monocle_face:, :-1: | | | YYYY-MM-DD | :+1:, :monocle_face:, :-1: | --> # ## Blog content body ### Example step/section 1: ### Example step/section 2: ### Example step/section 3: ### Example section about results ## Further reading ## About the author ","title":"Blog template instructions"},{"location":"help/contributor/templates/template-blog/#blog-template-instructions","text":"An example template with best-practices that you can use to start drafting an entry to post on the KServe blog. Copy a version of this template without the instructions Include a commented-out table with tracking info about reviews and approvals: | YYYY-MM-DD | :+1:, :monocle_face:, :-1: | | | YYYY-MM-DD | :+1:, :monocle_face:, :-1: | -->","title":"Blog template instructions"},{"location":"help/contributor/templates/template-blog/#blog-content-body","text":" ","title":"Blog content body"},{"location":"help/contributor/templates/template-blog/#example-stepsection-1","text":"","title":"Example step/section 1:"},{"location":"help/contributor/templates/template-blog/#example-stepsection-2","text":"","title":"Example step/section 2:"},{"location":"help/contributor/templates/template-blog/#example-stepsection-3","text":"","title":"Example step/section 3:"},{"location":"help/contributor/templates/template-blog/#example-section-about-results","text":"","title":"Example section about results"},{"location":"help/contributor/templates/template-blog/#further-reading","text":"","title":"Further reading"},{"location":"help/contributor/templates/template-blog/#about-the-author","text":"","title":"About the author"},{"location":"help/contributor/templates/template-blog/#copy-the-template","text":" | YYYY-MM-DD | :+1:, :monocle_face:, :-1: | | | YYYY-MM-DD | :+1:, :monocle_face:, :-1: | --> # ## Blog content body ### Example step/section 1: ### Example step/section 2: ### Example step/section 3: ### Example section about results ## Further reading ## About the author ","title":"Copy the template"},{"location":"help/contributor/templates/template-concept/","text":"Concept Template \u00b6 Use this template when writing conceptual topics. Conceptual topics explain how things work or what things mean. They provide helpful context to readers. They do not include procedures. Template \u00b6 The following template includes the standard sections that should appear in conceptual topics, including a topic introduction sentence, an overview, and placeholders for additional sections and subsections. Copy and paste the markdown from the template to use it in your topic. This topic describes... Write a sentence or two that describes the topic itself, not the subject of the topic. The goal of the topic sentence is to help readers understand if this topic is for them. For example, \"This topic describes what KServe is and how it works.\" ## Overview Write a few sentences describing the subject of the topic. ## Section Title Write a sentence or two to describe the content in this section. Create more sections as necessary. Optionally, add two or more subsections to each section. Do not skip header levels: H2 >> H3, not H2 >> H4. ### Subsection Title Write a sentence or two to describe the content in this section. ### Subsection Title Write a sentence or two to describe the content in this section. Conceptual Content Samples \u00b6 This section provides common content types that appear in conceptual topics. Copy and paste the markdown to use it in your topic. Table \u00b6 Introduce the table with a sentence. For example, \u201cThe following table lists which features are available to a KServe supported ML framework.\u201d Markdown Table Template \u00b6 Header 1 Header 2 Data1 Data2 Data3 Data4 Ordered List \u00b6 Write a sentence or two to introduce the content of the list. For example, \u201cIf you want to fix or add content to a past release, you can find the source files in the following folders.\u201d. Optionally, include bold lead-ins before each list item. Markdown Ordered List Templates \u00b6 Item 1 Item 2 Item 3 Lead-in description: Item 1 Lead-in description: Item 2 Lead-in description: Item 3 Unordered List \u00b6 Write a sentence or two to introduce the content of the list. For example, \u201cYour own path to becoming a KServe contributor can begin in any of the following components:\u201d. Optionally, include bold lead-ins before each list item. Markdown Unordered List Template \u00b6 List item List item List item Lead-in : List item Lead-in : List item Lead-in : List item Note \u00b6 Ensure the text beneath the note is indented as much as note is. Note This is a note. Warning \u00b6 If the note regards an issue that could lead to data loss, the note should be a warning. Warning This is a warning.","title":"Concept Template"},{"location":"help/contributor/templates/template-concept/#concept-template","text":"Use this template when writing conceptual topics. Conceptual topics explain how things work or what things mean. They provide helpful context to readers. They do not include procedures.","title":"Concept Template"},{"location":"help/contributor/templates/template-concept/#template","text":"The following template includes the standard sections that should appear in conceptual topics, including a topic introduction sentence, an overview, and placeholders for additional sections and subsections. Copy and paste the markdown from the template to use it in your topic. This topic describes... Write a sentence or two that describes the topic itself, not the subject of the topic. The goal of the topic sentence is to help readers understand if this topic is for them. For example, \"This topic describes what KServe is and how it works.\" ## Overview Write a few sentences describing the subject of the topic. ## Section Title Write a sentence or two to describe the content in this section. Create more sections as necessary. Optionally, add two or more subsections to each section. Do not skip header levels: H2 >> H3, not H2 >> H4. ### Subsection Title Write a sentence or two to describe the content in this section. ### Subsection Title Write a sentence or two to describe the content in this section.","title":"Template"},{"location":"help/contributor/templates/template-concept/#conceptual-content-samples","text":"This section provides common content types that appear in conceptual topics. Copy and paste the markdown to use it in your topic.","title":"Conceptual Content Samples"},{"location":"help/contributor/templates/template-concept/#table","text":"Introduce the table with a sentence. For example, \u201cThe following table lists which features are available to a KServe supported ML framework.\u201d","title":"Table"},{"location":"help/contributor/templates/template-concept/#markdown-table-template","text":"Header 1 Header 2 Data1 Data2 Data3 Data4","title":"Markdown Table Template"},{"location":"help/contributor/templates/template-concept/#ordered-list","text":"Write a sentence or two to introduce the content of the list. For example, \u201cIf you want to fix or add content to a past release, you can find the source files in the following folders.\u201d. Optionally, include bold lead-ins before each list item.","title":"Ordered List"},{"location":"help/contributor/templates/template-concept/#markdown-ordered-list-templates","text":"Item 1 Item 2 Item 3 Lead-in description: Item 1 Lead-in description: Item 2 Lead-in description: Item 3","title":"Markdown Ordered List Templates"},{"location":"help/contributor/templates/template-concept/#unordered-list","text":"Write a sentence or two to introduce the content of the list. For example, \u201cYour own path to becoming a KServe contributor can begin in any of the following components:\u201d. Optionally, include bold lead-ins before each list item.","title":"Unordered List"},{"location":"help/contributor/templates/template-concept/#markdown-unordered-list-template","text":"List item List item List item Lead-in : List item Lead-in : List item Lead-in : List item","title":"Markdown Unordered List Template"},{"location":"help/contributor/templates/template-concept/#note","text":"Ensure the text beneath the note is indented as much as note is. Note This is a note.","title":"Note"},{"location":"help/contributor/templates/template-concept/#warning","text":"If the note regards an issue that could lead to data loss, the note should be a warning. Warning This is a warning.","title":"Warning"},{"location":"help/contributor/templates/template-procedure/","text":"Procedure template \u00b6 Use this template when writing procedural (how-to) topics. Procedural topics include detailed steps to perform a task as well as some context about the task. Template \u00b6 The following template includes the standard sections that should appear in procedural topics, including a topic sentence, an overview section, and sections for each task within the procedure. Copy and paste the markdown from the template to use it in your topic. This topic describes... Write a sentence or two that describes the topic itself, not the subject of the topic. The goal of the topic sentence is to help readers understand if this topic is for them. For example, \"This topic instructs how to serve a TensorFlow model.\" ## Overview Write a few sentences to describe the subject of the topic, if useful. For example, if the topic is about configuring a broker, you might provide some useful context about brokers. If there are multiple tasks in the procedure and they must be completed in order, create an ordered list that contains each task in the topic. Use bullets for sub-tasks. Include anchor links to the headings for each task. To [task]: 1. [Name of Task 1 (for example, Apply default configuration)](#task-1) 1. [Optional: Name of Task 2](#task-2) !!! note Unless the number of tasks in the procedure is particularly high, do not use numbered lead-ins in the task headings. For example, instead of \"Task 1: Apply default configuration\", use \"Apply default configuration\". ## Prerequisites Use one of the following formats for the Prerequisites section. ### Formatting for two or more prerequisites If there are two or more prerequisites, use the following format. Include links for more information, if necessary. Before you [task], you must have/do: * Prerequisite. See [Link](). * Prerequisite. See [Link](). For example: Before you deploy PyTorch model, you must have: * KServe. See [Installing the KServe](link-to-that-topic). * An Apache Kafka cluster. See [Link to Instructions to Download](link-to-that-topic). ### Format for one prerequisite If there is one prerequisite, use the following format. Include a link for more information, if necessary. Before you [task], you must have/do [prerequisite]. See [Link](link). For example: Before you create the `InferenceService`, you must have a Kubernetes cluster with KServe installed and DNS configured. See the [installation instructions](../../../install/README.md) if you need to create one. ## Task 1 Write a few sentences to describe the task and provide additional context on the task. !!! note When writing a single-step procedure, write the step in one sentence and make it a bullet. The signposting is important given readers are strongly inclined to look for numbered steps and bullet points when searching for instructions. If possible, expand the procedure to include at least one more step. Few procedures truly require a single step. [Task]: 1. Step 1 1. Step 2 ## Optional: Task 2 If the task is optional, put \"Optional:\" in the heading. Write a few sentences to describe the task and provide additional context on the task. [Task]: 1. Step 1 2. Step 2 Procedure Content Samples \u00b6 This section provides common content types that appear in procedural topics. Copy and paste the markdown to use it in your topic. \u201cFill-in-the-Fields\u201d Table \u00b6 Where the reader must enter many values in, for example, a YAML file, use a table within the procedure as follows: Open the YAML file. Key1 : Value1 Key2 : Value2 metadata : annotations : # case-sensitive Key3 : Value3 Key4 : Value4 Key5 : Value5 spec : # Configuration specific to this broker. config : Key6 : Value6 Change the relevant values to your needs, using the following table as a guide. Key Value Type Description Key1 String Description Key2 Integer Description Key3 String Description Key4 String Description Key5 Float Description Key6 String Description Table \u00b6 Introduce the table with a sentence. For example, \u201cThe following table lists which features are available to a KServe supported ML framework. Markdown Table Template \u00b6 Header 1 Header 2 Data1 Data2 Data3 Data4 Ordered List \u00b6 Write a sentence or two to introduce the content of the list. For example, \u201cIf you want to fix or add content to a past release, you can find the source files in the following folders.\u201d. Optionally, include bold lead-ins before each list item. Markdown Ordered List Templates \u00b6 Item 1 Item 2 Item 3 Lead-in description: Item 1 Lead-in description: Item 2 Lead-in description: Item 3 Unordered List \u00b6 Write a sentence or two to introduce the content of the list. For example, \u201cYour own path to becoming a KServe contributor can begin in any of the following components:\u201d. Optionally, include bold lead-ins before each list item. Markdown Unordered List Template \u00b6 List item List item List item Lead-in : List item Lead-in : List item Lead-in : List item Note \u00b6 Ensure the text beneath the note is indented as much as note is. Note This is a note. Warning \u00b6 If the note regards an issue that could lead to data loss, the note should be a warning. Warning This is a warning. Markdown Embedded Image \u00b6 The following is an embedded image reference in markdown. Tabs \u00b6 Place multiple versions of the same procedure (such as a CLI procedure vs a YAML procedure) within tabs. Indent the opening tabs tags 3 spaces to make the tabs display properly. == \"tab1 name\" This is a stem: 1. This is a step. ``` This is some code. ``` 1. This is another step. == \"tab2 name\" This is a stem: 1. This is a step. ``` This is some code. ``` 1. This is another step. Documenting Code and Code Snippets \u00b6 For instructions on how to format code and code snippets, see the Style Guide.","title":"Procedure template"},{"location":"help/contributor/templates/template-procedure/#procedure-template","text":"Use this template when writing procedural (how-to) topics. Procedural topics include detailed steps to perform a task as well as some context about the task.","title":"Procedure template"},{"location":"help/contributor/templates/template-procedure/#template","text":"The following template includes the standard sections that should appear in procedural topics, including a topic sentence, an overview section, and sections for each task within the procedure. Copy and paste the markdown from the template to use it in your topic. This topic describes... Write a sentence or two that describes the topic itself, not the subject of the topic. The goal of the topic sentence is to help readers understand if this topic is for them. For example, \"This topic instructs how to serve a TensorFlow model.\" ## Overview Write a few sentences to describe the subject of the topic, if useful. For example, if the topic is about configuring a broker, you might provide some useful context about brokers. If there are multiple tasks in the procedure and they must be completed in order, create an ordered list that contains each task in the topic. Use bullets for sub-tasks. Include anchor links to the headings for each task. To [task]: 1. [Name of Task 1 (for example, Apply default configuration)](#task-1) 1. [Optional: Name of Task 2](#task-2) !!! note Unless the number of tasks in the procedure is particularly high, do not use numbered lead-ins in the task headings. For example, instead of \"Task 1: Apply default configuration\", use \"Apply default configuration\". ## Prerequisites Use one of the following formats for the Prerequisites section. ### Formatting for two or more prerequisites If there are two or more prerequisites, use the following format. Include links for more information, if necessary. Before you [task], you must have/do: * Prerequisite. See [Link](). * Prerequisite. See [Link](). For example: Before you deploy PyTorch model, you must have: * KServe. See [Installing the KServe](link-to-that-topic). * An Apache Kafka cluster. See [Link to Instructions to Download](link-to-that-topic). ### Format for one prerequisite If there is one prerequisite, use the following format. Include a link for more information, if necessary. Before you [task], you must have/do [prerequisite]. See [Link](link). For example: Before you create the `InferenceService`, you must have a Kubernetes cluster with KServe installed and DNS configured. See the [installation instructions](../../../install/README.md) if you need to create one. ## Task 1 Write a few sentences to describe the task and provide additional context on the task. !!! note When writing a single-step procedure, write the step in one sentence and make it a bullet. The signposting is important given readers are strongly inclined to look for numbered steps and bullet points when searching for instructions. If possible, expand the procedure to include at least one more step. Few procedures truly require a single step. [Task]: 1. Step 1 1. Step 2 ## Optional: Task 2 If the task is optional, put \"Optional:\" in the heading. Write a few sentences to describe the task and provide additional context on the task. [Task]: 1. Step 1 2. Step 2","title":"Template"},{"location":"help/contributor/templates/template-procedure/#procedure-content-samples","text":"This section provides common content types that appear in procedural topics. Copy and paste the markdown to use it in your topic.","title":"Procedure Content Samples"},{"location":"help/contributor/templates/template-procedure/#fill-in-the-fields-table","text":"Where the reader must enter many values in, for example, a YAML file, use a table within the procedure as follows: Open the YAML file. Key1 : Value1 Key2 : Value2 metadata : annotations : # case-sensitive Key3 : Value3 Key4 : Value4 Key5 : Value5 spec : # Configuration specific to this broker. config : Key6 : Value6 Change the relevant values to your needs, using the following table as a guide. Key Value Type Description Key1 String Description Key2 Integer Description Key3 String Description Key4 String Description Key5 Float Description Key6 String Description","title":"\u201cFill-in-the-Fields\u201d Table"},{"location":"help/contributor/templates/template-procedure/#table","text":"Introduce the table with a sentence. For example, \u201cThe following table lists which features are available to a KServe supported ML framework.","title":"Table"},{"location":"help/contributor/templates/template-procedure/#markdown-table-template","text":"Header 1 Header 2 Data1 Data2 Data3 Data4","title":"Markdown Table Template"},{"location":"help/contributor/templates/template-procedure/#ordered-list","text":"Write a sentence or two to introduce the content of the list. For example, \u201cIf you want to fix or add content to a past release, you can find the source files in the following folders.\u201d. Optionally, include bold lead-ins before each list item.","title":"Ordered List"},{"location":"help/contributor/templates/template-procedure/#markdown-ordered-list-templates","text":"Item 1 Item 2 Item 3 Lead-in description: Item 1 Lead-in description: Item 2 Lead-in description: Item 3","title":"Markdown Ordered List Templates"},{"location":"help/contributor/templates/template-procedure/#unordered-list","text":"Write a sentence or two to introduce the content of the list. For example, \u201cYour own path to becoming a KServe contributor can begin in any of the following components:\u201d. Optionally, include bold lead-ins before each list item.","title":"Unordered List"},{"location":"help/contributor/templates/template-procedure/#markdown-unordered-list-template","text":"List item List item List item Lead-in : List item Lead-in : List item Lead-in : List item","title":"Markdown Unordered List Template"},{"location":"help/contributor/templates/template-procedure/#note","text":"Ensure the text beneath the note is indented as much as note is. Note This is a note.","title":"Note"},{"location":"help/contributor/templates/template-procedure/#warning","text":"If the note regards an issue that could lead to data loss, the note should be a warning. Warning This is a warning.","title":"Warning"},{"location":"help/contributor/templates/template-procedure/#markdown-embedded-image","text":"The following is an embedded image reference in markdown.","title":"Markdown Embedded Image"},{"location":"help/contributor/templates/template-procedure/#tabs","text":"Place multiple versions of the same procedure (such as a CLI procedure vs a YAML procedure) within tabs. Indent the opening tabs tags 3 spaces to make the tabs display properly. == \"tab1 name\" This is a stem: 1. This is a step. ``` This is some code. ``` 1. This is another step. == \"tab2 name\" This is a stem: 1. This is a step. ``` This is some code. ``` 1. This is another step.","title":"Tabs"},{"location":"help/contributor/templates/template-procedure/#documenting-code-and-code-snippets","text":"For instructions on how to format code and code snippets, see the Style Guide.","title":"Documenting Code and Code Snippets"},{"location":"help/contributor/templates/template-troubleshooting/","text":"Troubleshooting template \u00b6 When writing guidance to help to troubleshoot specific errors, the error must include: Error Description: To describe the error very briefly so that users can search for it easily. Symptom: To describe the error in a way that helps users to diagnose their issue. Include error messages or anything else users might see if they encounter this error. Explanation (or cause): To inform users about why they are seeing this error. This can be omitted if the cause of the error is unknown. Solution: To inform the user about how to fix the error. Example Troubleshooting Table \u00b6 Troubleshooting \u00b6 | Error Description | |----------|------------| | Symptom | During the event something breaks. | | Cause | The thing is broken. | | Solution | To solve this issue, do the following: 1. This. 2. That. |","title":"Troubleshooting template"},{"location":"help/contributor/templates/template-troubleshooting/#troubleshooting-template","text":"When writing guidance to help to troubleshoot specific errors, the error must include: Error Description: To describe the error very briefly so that users can search for it easily. Symptom: To describe the error in a way that helps users to diagnose their issue. Include error messages or anything else users might see if they encounter this error. Explanation (or cause): To inform users about why they are seeing this error. This can be omitted if the cause of the error is unknown. Solution: To inform the user about how to fix the error.","title":"Troubleshooting template"},{"location":"help/contributor/templates/template-troubleshooting/#example-troubleshooting-table","text":"","title":"Example Troubleshooting Table"},{"location":"help/contributor/templates/template-troubleshooting/#troubleshooting","text":"| Error Description | |----------|------------| | Symptom | During the event something breaks. | | Cause | The thing is broken. | | Solution | To solve this issue, do the following: 1. This. 2. That. |","title":"Troubleshooting"},{"location":"help/style-guide/documenting-code/","text":"Documenting Code \u00b6 Words requiring code formatting \u00b6 Apply code formatting only to special-purpose text: Filenames Path names Fields and values from a YAML file Any text that goes into a CLI CLI names Specify the programming language \u00b6 Specify the language your code is in as part of the code block Specify non-language specific code, like CLI commands, with ```bash. See the following examples for formatting. Correct Incorrect Correct Formatting Incorrect Formatting package main import \"fmt\" func main () { fmt . Println ( \"hello world\" ) } package main import \"fmt\" func main () { fmt.Println ( \"hello world\" ) } ```go package main import \"fmt\" func main() { fmt.Println(\"hello world\") } ``` ```bash package main import \"fmt\" func main() { fmt.Println(\"hello world\") } ``` Documenting YAML \u00b6 When documenting YAML, use two steps. Use step 1 to create the YAML file, and step 2 to apply the YAML file. Use kubectl apply for files/objects that the user creates: it works for both \u201ccreate\u201d and \u201cupdate\u201d, and the source of truth is their local files. Use kubectl edit for files which are shipped as part of the KServe software, like the KServe ConfigMaps. Write ```yaml at the beginning of your code block if you are typing YAML code as part of a CLI command. Correct Incorrect Creating or updating a resource: Create a YAML file using the following template: # YAML FILE CONTENTS Apply the YAML file by running the command: kubectl apply -f .yaml Where is the name of the file you created in the previous step. Editing a ConfigMap: kubectl -n edit configmap Example 1: cat < is\u2026\" Single variable \u00b6 Correct Incorrect kubectl get isvc Where is the name of your InferenceService. kubectl get isvc { SERVICE_NAME } {SERVICE_NAME} = The name of your service Multiple variables \u00b6 Correct Incorrect kn create service --revision-name Where: is the name of your Knative Service. is the desired name of your revision. kn create service --revision-name Where is the name of your Knative Service. Where is the desired name of your revision. CLI output \u00b6 CLI Output should include the custom css \"{ .bash .no-copy }\" in place of \"bash\" which removes the \"Copy to clipboard button\" on the right side of the code block Correct Incorrect Correct Formatting Incorrect Formatting ```{ .bash .no-copy } ``` ```bash ```","title":"Documenting Code"},{"location":"help/style-guide/documenting-code/#documenting-code","text":"","title":"Documenting Code"},{"location":"help/style-guide/documenting-code/#words-requiring-code-formatting","text":"Apply code formatting only to special-purpose text: Filenames Path names Fields and values from a YAML file Any text that goes into a CLI CLI names","title":"Words requiring code formatting"},{"location":"help/style-guide/documenting-code/#specify-the-programming-language","text":"Specify the language your code is in as part of the code block Specify non-language specific code, like CLI commands, with ```bash. See the following examples for formatting. Correct Incorrect Correct Formatting Incorrect Formatting package main import \"fmt\" func main () { fmt . Println ( \"hello world\" ) } package main import \"fmt\" func main () { fmt.Println ( \"hello world\" ) } ```go package main import \"fmt\" func main() { fmt.Println(\"hello world\") } ``` ```bash package main import \"fmt\" func main() { fmt.Println(\"hello world\") } ```","title":"Specify the programming language"},{"location":"help/style-guide/documenting-code/#documenting-yaml","text":"When documenting YAML, use two steps. Use step 1 to create the YAML file, and step 2 to apply the YAML file. Use kubectl apply for files/objects that the user creates: it works for both \u201ccreate\u201d and \u201cupdate\u201d, and the source of truth is their local files. Use kubectl edit for files which are shipped as part of the KServe software, like the KServe ConfigMaps. Write ```yaml at the beginning of your code block if you are typing YAML code as part of a CLI command. Correct Incorrect Creating or updating a resource: Create a YAML file using the following template: # YAML FILE CONTENTS Apply the YAML file by running the command: kubectl apply -f .yaml Where is the name of the file you created in the previous step. Editing a ConfigMap: kubectl -n edit configmap Example 1: cat < is\u2026\"","title":"Referencing variables in code blocks"},{"location":"help/style-guide/documenting-code/#single-variable","text":"Correct Incorrect kubectl get isvc Where is the name of your InferenceService. kubectl get isvc { SERVICE_NAME } {SERVICE_NAME} = The name of your service","title":"Single variable"},{"location":"help/style-guide/documenting-code/#multiple-variables","text":"Correct Incorrect kn create service --revision-name Where: is the name of your Knative Service. is the desired name of your revision. kn create service
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|