diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 3b37bc5..06c3aab 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.0.28.dev0
+current_version = 0.0.28.dev1
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<build>\d+))?
 serialize = 
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 171d1e0..b79e62a 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -83,7 +83,7 @@ jobs:
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
 
-      - name: Run test
+      - name: Run cache constraint test
         env:
           WHYLABS_API_KEY: ${{ secrets.WHYLABS_API_KEY }}
         run: make test-cache-constraint
diff --git a/Dockerfile.cache_test b/Dockerfile.cache_test
index fd08b2c..86ece0e 100644
--- a/Dockerfile.cache_test
+++ b/Dockerfile.cache_test
@@ -4,7 +4,6 @@ WORKDIR /opt/whylogs-container
 RUN chown -R whylabs:whylabs /opt/whylogs-container
 USER whylabs
 
-ENV LLM_CONTAINER=True
 ENV CONTAINER_CACHE_BASE=/opt/whylogs-container/.cache
 ENV HF_HOME=$CONTAINER_CACHE_BASE/hf_home/
 ENV NLTK_DATA=$CONTAINER_CACHE_BASE/nltk_data/
@@ -38,9 +37,15 @@ RUN poetry config virtualenvs.in-project true
 RUN poetry install --no-root --extras "all" --without dev
 RUN rm -rf .venv/lib/python3.10/site-packages/pandas/tests # Pandas deploys a ton of tests to pypi
 
+ENV TRANSFORMERS_VERBOSITY=debug
 
 copy ./langkit ./langkit
 RUN bash -c "source .venv/bin/activate; python -m langkit.scripts.langkit_cache"
+RUN find  $CONTAINER_CACHE_BASE/
+
 # This step will fail if any network requests happen
+ENV TRANSFORMERS_OFFLINE=1
+ENV HF_DATASETS_OFFLINE=1
+ENV HF_HUB_OFFLINE=1
 RUN --network=none bash -c "source .venv/bin/activate; python -m langkit.scripts.langkit_cache --skip-downloads"
 
diff --git a/langkit/asset_downloader.py b/langkit/asset_downloader.py
index ffc6d0b..150dc30 100644
--- a/langkit/asset_downloader.py
+++ b/langkit/asset_downloader.py
@@ -36,7 +36,7 @@ def _get_asset_path(asset_id: str, tag: str = "0") -> AssetPath:
         asset_id=asset_id,
         tag=tag,
         zip_path=f"{LANGKIT_CACHE}/assets/{asset_id}/{tag}/{asset_id}.zip",
-        extract_path=f"{LANGKIT_CACHE}/assets/{asset_id}/{tag}/{asset_id}/",
+        extract_path=f"{LANGKIT_CACHE}/assets/{asset_id}/{tag}/{asset_id}",
     )
 
 
diff --git a/langkit/metrics/library.py b/langkit/metrics/library.py
index 2050bef..14b55cc 100644
--- a/langkit/metrics/library.py
+++ b/langkit/metrics/library.py
@@ -16,14 +16,13 @@ def all(prompt: bool = True, response: bool = True) -> MetricCreator:
             from langkit.metrics.text_statistics import prompt_textstat_metric, response_textstat_metric
             from langkit.metrics.themes.themes import prompt_jailbreak_similarity_metric, response_refusal_similarity_metric
             from langkit.metrics.token import prompt_token_metric, response_token_metric
-            from langkit.metrics.toxicity import prompt_toxicity_metric, response_toxicity_metric
 
             prompt_metrics = [
                 prompt_textstat_metric,
                 prompt_token_metric,
                 prompt_regex_metric,
                 prompt_sentiment_polarity,
-                prompt_toxicity_metric,
+                lib.prompt.toxicity(),
                 prompt_response_input_output_similarity_metric,
                 prompt_injections_metric,
                 prompt_jailbreak_similarity_metric,
@@ -38,7 +37,7 @@ def all(prompt: bool = True, response: bool = True) -> MetricCreator:
                 response_sentiment_polarity,
                 response_refusal_similarity_metric,
                 response_presidio_pii_metric,
-                response_toxicity_metric,
+                lib.response.toxicity(),
                 lib.response.topics.medicine(),
             ]
 
@@ -119,7 +118,7 @@ def toxicity_score() -> MetricCreator:
                 Analyze the input for toxicity. The output of this metric ranges from 0 to 1, where 0 indicates
                 non-toxic and 1 indicates toxic.
                 """
-                from langkit.metrics.toxicity import prompt_toxicity_metric
+                from langkit.metrics.toxicity_onnx import prompt_toxicity_metric
 
                 return prompt_toxicity_metric
 
@@ -282,20 +281,31 @@ def sentiment_score() -> MetricCreator:
                 return prompt_sentiment_polarity
 
         class topics:
-            def __init__(self, topics: List[str], hypothesis_template: Optional[str] = None):
+            def __init__(self, topics: List[str], hypothesis_template: Optional[str] = None, onnx: bool = True):
                 self.topics = topics
                 self.hypothesis_template = hypothesis_template
+                self.onnx = onnx
 
             def __call__(self) -> MetricCreator:
-                from langkit.metrics.topic import topic_metric
+                if self.onnx:
+                    from langkit.metrics.topic_onnx import topic_metric
 
-                return partial(topic_metric, "prompt", self.topics, self.hypothesis_template)
+                    return partial(topic_metric, "prompt", self.topics, self.hypothesis_template)
+                else:
+                    from langkit.metrics.topic import topic_metric
+
+                    return partial(topic_metric, "prompt", self.topics, self.hypothesis_template)
 
             @staticmethod
-            def medicine() -> MetricCreator:
-                from langkit.metrics.topic import topic_metric
+            def medicine(onnx: bool = False) -> MetricCreator:
+                if onnx:
+                    from langkit.metrics.topic_onnx import topic_metric
+
+                    return lambda: topic_metric("prompt", ["medicine"])
+                else:
+                    from langkit.metrics.topic_onnx import topic_metric
 
-                return lambda: topic_metric("prompt", ["medicine"])
+                    return lambda: topic_metric("prompt", ["medicine"])
 
     class response:
         @staticmethod
@@ -326,7 +336,7 @@ def toxicity_score() -> MetricCreator:
                 Analyze the toxicity of the response. The output of this metric ranges from 0 to 1, where 0
                 indicates a non-toxic response and 1 indicates a toxic response.
                 """
-                from langkit.metrics.toxicity import response_toxicity_metric
+                from langkit.metrics.toxicity_onnx import response_toxicity_metric
 
                 return response_toxicity_metric
 
@@ -486,17 +496,28 @@ def refusal() -> MetricCreator:
                 return response_refusal_similarity_metric
 
         class topics:
-            def __init__(self, topics: List[str], hypothesis_template: Optional[str] = None):
+            def __init__(self, topics: List[str], hypothesis_template: Optional[str] = None, onnx: bool = True):
                 self.topics = topics
                 self.hypothesis_template = hypothesis_template
+                self.onnx = onnx
 
             def __call__(self) -> MetricCreator:
-                from langkit.metrics.topic import topic_metric
+                if self.onnx:
+                    from langkit.metrics.topic_onnx import topic_metric
 
-                return partial(topic_metric, "response", self.topics, self.hypothesis_template)
+                    return partial(topic_metric, "response", self.topics, self.hypothesis_template)
+                else:
+                    from langkit.metrics.topic import topic_metric
+
+                    return partial(topic_metric, "response", self.topics, self.hypothesis_template)
 
             @staticmethod
-            def medicine() -> MetricCreator:
-                from langkit.metrics.topic import topic_metric
+            def medicine(onnx: bool = False) -> MetricCreator:
+                if onnx:
+                    from langkit.metrics.topic_onnx import topic_metric
+
+                    return partial(topic_metric, "response", ["medicine"])
+                else:
+                    from langkit.metrics.topic_onnx import topic_metric
 
-                return partial(topic_metric, "response", ["medicine"])
+                    return partial(topic_metric, "response", ["medicine"])
diff --git a/langkit/metrics/topic_onnx.py b/langkit/metrics/topic_onnx.py
new file mode 100644
index 0000000..2511bc0
--- /dev/null
+++ b/langkit/metrics/topic_onnx.py
@@ -0,0 +1,152 @@
+# pyright: reportUnknownMemberType=none
+# pyright: reportUnknownVariableType=none
+# pyright: reportUnknownLambdaType=none
+
+import os
+from dataclasses import dataclass
+from functools import lru_cache, partial
+from typing import List, Optional, TypedDict
+
+import pandas as pd
+import torch
+from optimum.modeling_base import PreTrainedModel
+from optimum.onnxruntime import ORTModelForSequenceClassification
+from transformers import AutoTokenizer, Pipeline, PreTrainedTokenizerBase, pipeline  # type: ignore
+
+from langkit.core.metric import MetricCreator, MultiMetric, MultiMetricResult, UdfInput
+
+__default_topics = [
+    "medicine",
+    "economy",
+    "technology",
+    "entertainment",
+]
+
+_hypothesis_template = "This example is about {}"
+
+_model = "MoritzLaurer/xtremedistil-l6-h256-zeroshot-v1.1-all-33"
+_revision = "dea69e79cd6063916d08b883ea8a3c1823fd10b4"
+
+
+def _download_assets():
+    ORTModelForSequenceClassification.from_pretrained(
+        _model,
+        subfolder="onnx",
+        file_name="model.onnx",
+        revision=_revision,
+        export=False,
+    )
+    AutoTokenizer.from_pretrained(_model, revision=_revision)
+
+
+def _get_tokenizer() -> PreTrainedTokenizerBase:
+    return AutoTokenizer.from_pretrained(_model, revision=_revision, local_files_only=True)
+
+
+def _get_model() -> PreTrainedModel:
+    # return ORTModelForSequenceClassification.from_pretrained(
+    #     _model,
+    #     subfolder="onnx",
+    #     file_name="model.onnx",
+    #     export=False,
+    #     revision=_revision,
+    #     local_files_only=True,
+    # )
+    # Optimum doesn't support offline mode https://github.com/huggingface/optimum/issues/1796
+    # workaround for now is to reference the actual model path after caching it. Uncomment the above code when the issue is resolved
+
+    model_name = _model.replace("/", "--")
+    home_dir = os.path.expanduser("~")
+    base = os.environ.get("HF_HOME", os.path.join(home_dir, ".cache/huggingface"))
+    model_path = f"{base}/hub/models--{model_name }/snapshots/{_revision}"
+    return ORTModelForSequenceClassification.from_pretrained(
+        model_path,
+        file_name="onnx/model.onnx",
+        export=False,
+        revision=_revision,
+        local_files_only=True,
+    )
+
+
+@lru_cache
+def _get_classifier() -> Pipeline:
+    return pipeline(
+        "zero-shot-classification",
+        model=_get_model(),  # pyright: ignore[reportArgumentType]
+        tokenizer=_get_tokenizer(),  # pyright: ignore[reportArgumentType]
+        truncation=True,
+        device="cuda" if torch.cuda.is_available() else "cpu",
+    )
+
+
+class ClassificationResults(TypedDict):
+    sequence: str
+    labels: List[str]
+    scores: List[float]
+
+
+def __get_scores_per_label(
+    text: List[str], topics: List[str], hypothesis_template: str = _hypothesis_template, multi_label: bool = True
+) -> List[ClassificationResults]:
+    if not text:
+        return []
+
+    classifier = _get_classifier()
+    result: List[ClassificationResults] = classifier(text, topics, hypothesis_template=hypothesis_template, multi_label=multi_label)  # type: ignore
+    return result
+
+
+def _sanitize_metric_name(topic: str) -> str:
+    """
+    sanitize a metric name created from a topic. Replace white space with underscores, etc.
+    """
+    return topic.replace(" ", "_").lower()
+
+
+def topic_metric(input_name: str, topics: List[str], hypothesis_template: Optional[str] = None) -> MultiMetric:
+    hypothesis_template = hypothesis_template or _hypothesis_template
+
+    def udf(text: pd.DataFrame) -> MultiMetricResult:
+        value: List[str] = list(UdfInput(text).iter_column_rows(input_name))
+        results = __get_scores_per_label(value, topics=topics, hypothesis_template=hypothesis_template)
+
+        all_metrics: List[List[float]] = [[] for _ in topics]
+        for result in results:
+            # Map each topic to its score in the current result
+            topic_to_score = {label: score for label, score in zip(result["labels"], result["scores"])}
+            # For each topic, append the score to the corresponding list in all_metrics
+            for i, topic in enumerate(topics):
+                all_metrics[i].append(topic_to_score[topic])  # Append list of score for the topic
+
+        return MultiMetricResult(metrics=all_metrics)
+
+    def cache_assets():
+        _download_assets()
+
+    def init():
+        _get_classifier()
+
+    metric_names = [f"{input_name}.topics.{_sanitize_metric_name(topic)}" for topic in topics]
+    return MultiMetric(names=metric_names, input_names=[input_name], evaluate=udf, cache_assets=cache_assets, init=init)
+
+
+prompt_topic_module = partial(topic_metric, "prompt", __default_topics, _hypothesis_template)
+response_topic_module = partial(topic_metric, "response", __default_topics, _hypothesis_template)
+prompt_response_topic_module = [prompt_topic_module, response_topic_module, _hypothesis_template]
+
+
+@dataclass
+class CustomTopicModules:
+    prompt_topic_module: MetricCreator
+    response_topic_module: MetricCreator
+    prompt_response_topic_module: MetricCreator
+
+
+def get_custom_topic_modules(topics: List[str], template: str = _hypothesis_template) -> CustomTopicModules:
+    prompt_topic_module = partial(topic_metric, "prompt", topics, template)
+    response_topic_module = partial(topic_metric, "response", topics, template)
+    return CustomTopicModules(
+        prompt_topic_module=prompt_topic_module,
+        response_topic_module=response_topic_module,
+        prompt_response_topic_module=[prompt_topic_module, response_topic_module],
+    )
diff --git a/langkit/metrics/toxicity_onnx.py b/langkit/metrics/toxicity_onnx.py
new file mode 100644
index 0000000..6dd87ac
--- /dev/null
+++ b/langkit/metrics/toxicity_onnx.py
@@ -0,0 +1,81 @@
+# pyright: reportUnknownMemberType=none
+# pyright: reportUnknownVariableType=none
+# pyright: reportUnknownLambdaType=none
+import os
+from functools import lru_cache, partial
+from typing import List, cast
+
+import numpy as np
+import onnxruntime
+import pandas as pd
+from transformers import (
+    AutoTokenizer,
+    PreTrainedTokenizerBase,
+)
+
+from langkit.asset_downloader import get_asset
+from langkit.core.metric import Metric, SingleMetric, SingleMetricResult, UdfInput
+from langkit.onnx_encoder import TransformerModel
+
+
+def __toxicity(tokenizer: PreTrainedTokenizerBase, session: onnxruntime.InferenceSession, max_length: int, text: List[str]) -> List[float]:
+    max_length_in_chars = tokenizer.model_max_length * 5
+    truncated_text = [content[:max_length_in_chars] for content in text]
+    inputs = tokenizer(truncated_text, return_tensors="pt", padding=True, truncation=True)
+    onnx_inputs = {k: v.numpy() for k, v in inputs.items() if k in ["input_ids", "attention_mask"]}
+    onnx_output_logits = session.run(None, onnx_inputs)[0]
+
+    # Apply softmax to convert logits into probabilities
+    probabilities = np.exp(onnx_output_logits) / np.sum(np.exp(onnx_output_logits), axis=1, keepdims=True)  # pyright: ignore[reportUnknownArgumentType]
+    labels = ["non-toxic", "toxic"]
+    # Find the index of the highest probability to determine the predicted label
+    predicted_label_idx = np.argmax(probabilities, axis=1)
+    predicted_labels: List[str] = [labels[idx] for idx in predicted_label_idx]
+    predicted_scores: List[float] = [prob[idx] for prob, idx in zip(probabilities, predicted_label_idx)]
+    results = [{"label": label, "score": score} for label, score in zip(predicted_labels, predicted_scores)]
+    return [result["score"] if result["label"] == "toxic" else 1.0 - result["score"] for result in results]  # type: ignore
+
+
+def _download_assets():
+    name, tag = TransformerModel.ToxicCommentModel.value
+    return get_asset(name, tag)
+
+
+@lru_cache
+def _get_tokenizer() -> PreTrainedTokenizerBase:
+    return AutoTokenizer.from_pretrained(_download_assets())
+
+
+@lru_cache
+def _get_session() -> onnxruntime.InferenceSession:
+    downloaded_path = _download_assets()
+    onnx_model_path = os.path.join(downloaded_path, "model.onnx")
+    print(f"Loading ONNX model from {onnx_model_path}")
+    return onnxruntime.InferenceSession(onnx_model_path, providers=["CPUExecutionProvider"])
+
+
+def toxicity_metric(column_name: str) -> Metric:
+    def cache_assets():
+        _download_assets()
+
+    def init():
+        _get_session()
+        _get_tokenizer()
+
+    def udf(text: pd.DataFrame) -> SingleMetricResult:
+        _tokenizer = _get_tokenizer()
+        _session = _get_session()
+
+        col = list(UdfInput(text).iter_column_rows(column_name))
+        max_length = cast(int, _tokenizer.model_max_length)
+        metrics = __toxicity(_tokenizer, _session, max_length, col)
+        return SingleMetricResult(metrics=metrics)
+
+    return SingleMetric(
+        name=f"{column_name}.toxicity.toxicity_score", input_names=[column_name], evaluate=udf, init=init, cache_assets=cache_assets
+    )
+
+
+prompt_toxicity_metric = partial(toxicity_metric, "prompt")
+response_toxicity_metric = partial(toxicity_metric, "response")
+prompt_response_toxicity_module = [prompt_toxicity_metric, response_toxicity_metric]
diff --git a/langkit/onnx_encoder.py b/langkit/onnx_encoder.py
index 7b6364b..a615bc8 100644
--- a/langkit/onnx_encoder.py
+++ b/langkit/onnx_encoder.py
@@ -19,8 +19,12 @@ def _get_inference_session(onnx_file_path: str):
 
 class TransformerModel(Enum):
     AllMiniLM = ("all-MiniLM-L6-v2", "0")
+    ToxicCommentModel = ("toxic-comment-model", "0")
 
-    def get_model_path(self):
+    def cache_model_assets(self):
+        """
+        Returns the path of the cached model assets, downloading them if necessary.
+        """
         name, tag = self.value
         return f"{get_asset(name, tag)}/{name}.onnx"
 
@@ -28,8 +32,7 @@ def get_model_path(self):
 class OnnxSentenceTransformer(EmbeddingEncoder):
     def __init__(self, model: TransformerModel):
         self._tokenizer: BertTokenizerFast = cast(BertTokenizerFast, BertTokenizerFast.from_pretrained("bert-base-uncased"))
-        self._model = model
-        self._session = _get_inference_session(model.get_model_path())
+        self._session = _get_inference_session(model.cache_model_assets())
 
     def encode(self, text: Tuple[str, ...]) -> "torch.Tensor":
         # Pre-truncate the inputs to the model length for better performance
diff --git a/poetry.lock b/poetry.lock
index 87fcf88..574ca58 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1672,6 +1672,53 @@ files = [
     {file = "numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463"},
 ]
 
+[[package]]
+name = "onnx"
+version = "1.16.0"
+description = "Open Neural Network Exchange"
+optional = true
+python-versions = ">=3.8"
+files = [
+    {file = "onnx-1.16.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:9eadbdce25b19d6216f426d6d99b8bc877a65ed92cbef9707751c6669190ba4f"},
+    {file = "onnx-1.16.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:034ae21a2aaa2e9c14119a840d2926d213c27aad29e5e3edaa30145a745048e1"},
+    {file = "onnx-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec22a43d74eb1f2303373e2fbe7fbcaa45fb225f4eb146edfed1356ada7a9aea"},
+    {file = "onnx-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:298f28a2b5ac09145fa958513d3d1e6b349ccf86a877dbdcccad57713fe360b3"},
+    {file = "onnx-1.16.0-cp310-cp310-win32.whl", hash = "sha256:66300197b52beca08bc6262d43c103289c5d45fde43fb51922ed1eb83658cf0c"},
+    {file = "onnx-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:ae0029f5e47bf70a1a62e7f88c80bca4ef39b844a89910039184221775df5e43"},
+    {file = "onnx-1.16.0-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:f51179d4af3372b4f3800c558d204b592c61e4b4a18b8f61e0eea7f46211221a"},
+    {file = "onnx-1.16.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:5202559070afec5144332db216c20f2fff8323cf7f6512b0ca11b215eacc5bf3"},
+    {file = "onnx-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77579e7c15b4df39d29465b216639a5f9b74026bdd9e4b6306cd19a32dcfe67c"},
+    {file = "onnx-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e60ca76ac24b65c25860d0f2d2cdd96d6320d062a01dd8ce87c5743603789b8"},
+    {file = "onnx-1.16.0-cp311-cp311-win32.whl", hash = "sha256:81b4ee01bc554e8a2b11ac6439882508a5377a1c6b452acd69a1eebb83571117"},
+    {file = "onnx-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:7449241e70b847b9c3eb8dae622df8c1b456d11032a9d7e26e0ee8a698d5bf86"},
+    {file = "onnx-1.16.0-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:03a627488b1a9975d95d6a55582af3e14c7f3bb87444725b999935ddd271d352"},
+    {file = "onnx-1.16.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:c392faeabd9283ee344ccb4b067d1fea9dfc614fa1f0de7c47589efd79e15e78"},
+    {file = "onnx-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0efeb46985de08f0efe758cb54ad3457e821a05c2eaf5ba2ccb8cd1602c08084"},
+    {file = "onnx-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddf14a3d32234f23e44abb73a755cb96a423fac7f004e8f046f36b10214151ee"},
+    {file = "onnx-1.16.0-cp312-cp312-win32.whl", hash = "sha256:62a2e27ae8ba5fc9b4a2620301446a517b5ffaaf8566611de7a7c2160f5bcf4c"},
+    {file = "onnx-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:3e0860fea94efde777e81a6f68f65761ed5e5f3adea2e050d7fbe373a9ae05b3"},
+    {file = "onnx-1.16.0-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:70a90649318f3470985439ea078277c9fb2a2e6e2fd7c8f3f2b279402ad6c7e6"},
+    {file = "onnx-1.16.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:71839546b7f93be4fa807995b182ab4b4414c9dbf049fee11eaaced16fcf8df2"},
+    {file = "onnx-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7665217c45a61eb44718c8e9349d2ad004efa0cb9fbc4be5c6d5e18b9fe12b52"},
+    {file = "onnx-1.16.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5752bbbd5717304a7643643dba383a2fb31e8eb0682f4e7b7d141206328a73b"},
+    {file = "onnx-1.16.0-cp38-cp38-win32.whl", hash = "sha256:257858cbcb2055284f09fa2ae2b1cfd64f5850367da388d6e7e7b05920a40c90"},
+    {file = "onnx-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:209fe84995a28038e29ae8369edd35f33e0ef1ebc3bddbf6584629823469deb1"},
+    {file = "onnx-1.16.0-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:8cf3e518b1b1b960be542e7c62bed4e5219e04c85d540817b7027029537dec92"},
+    {file = "onnx-1.16.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:30f02beaf081c7d9fa3a8c566a912fc4408e28fc33b1452d58f890851691d364"},
+    {file = "onnx-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7fb29a9a692b522deef1f6b8f2145da62c0c43ea1ed5b4c0f66f827fdc28847d"},
+    {file = "onnx-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7755cbd5f4e47952e37276ea5978a46fc8346684392315902b5ed4a719d87d06"},
+    {file = "onnx-1.16.0-cp39-cp39-win32.whl", hash = "sha256:7532343dc5b8b5e7c3e3efa441a3100552f7600155c4db9120acd7574f64ffbf"},
+    {file = "onnx-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:d7886c05aa6d583ec42f6287678923c1e343afc4350e49d5b36a0023772ffa22"},
+    {file = "onnx-1.16.0.tar.gz", hash = "sha256:237c6987c6c59d9f44b6136f5819af79574f8d96a760a1fa843bede11f3822f7"},
+]
+
+[package.dependencies]
+numpy = ">=1.20"
+protobuf = ">=3.20.2"
+
+[package.extras]
+reference = ["Pillow", "google-re2"]
+
 [[package]]
 name = "onnxruntime"
 version = "1.17.1"
@@ -1737,6 +1784,57 @@ typing-extensions = ">=4.7,<5"
 [package.extras]
 datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
 
+[[package]]
+name = "optimum"
+version = "1.18.0"
+description = "Optimum Library is an extension of the Hugging Face Transformers library, providing a framework to integrate third-party libraries from Hardware Partners and interface with their specific functionality."
+optional = true
+python-versions = ">=3.7.0"
+files = [
+    {file = "optimum-1.18.0-py3-none-any.whl", hash = "sha256:289035db5f88308af9387c7fefabe7dfa51187dfc8f02c96054614275688fd4a"},
+    {file = "optimum-1.18.0.tar.gz", hash = "sha256:28ea691515df6bfee92540fb0ef93a6614aa691f17c188dccd817719fea45936"},
+]
+
+[package.dependencies]
+coloredlogs = "*"
+datasets = [
+    {version = "*"},
+    {version = ">=1.2.1", optional = true, markers = "extra == \"onnxruntime\""},
+]
+evaluate = {version = "*", optional = true, markers = "extra == \"onnxruntime\""}
+huggingface-hub = ">=0.8.0"
+numpy = "*"
+onnx = {version = "*", optional = true, markers = "extra == \"onnxruntime\""}
+onnxruntime = {version = ">=1.11.0", optional = true, markers = "extra == \"onnxruntime\""}
+packaging = "*"
+protobuf = {version = ">=3.20.1", optional = true, markers = "extra == \"onnxruntime\""}
+sympy = "*"
+torch = ">=1.11"
+transformers = {version = ">=4.26.0,<4.40.0", extras = ["sentencepiece"]}
+
+[package.extras]
+amd = ["optimum-amd"]
+benchmark = ["evaluate (>=0.2.0)", "optuna", "scikit-learn", "seqeval", "torchvision", "tqdm"]
+dev = ["Pillow", "accelerate", "black (>=23.1,<24.0)", "diffusers (>=0.17.0)", "einops", "invisible-watermark", "parameterized", "pytest (<=8.0.0)", "pytest-xdist", "requests", "rjieba", "ruff (==0.1.5)", "sacremoses", "scikit-learn", "timm", "torchaudio", "torchvision"]
+diffusers = ["diffusers"]
+doc-build = ["accelerate"]
+exporters = ["onnx", "onnxruntime", "timm"]
+exporters-gpu = ["onnx", "onnxruntime-gpu", "timm"]
+exporters-tf = ["h5py", "numpy (<1.24.0)", "onnx", "onnxruntime", "tensorflow (>=2.4,<=2.12.1)", "tf2onnx", "timm", "transformers[sentencepiece] (>=4.26.0,<4.38.0)"]
+furiosa = ["optimum-furiosa"]
+graphcore = ["optimum-graphcore"]
+habana = ["optimum-habana", "transformers (>=4.37.0,<4.38.0)"]
+intel = ["optimum-intel (>=1.15.0)"]
+neural-compressor = ["optimum-intel[neural-compressor] (>=1.15.0)"]
+neuron = ["optimum-neuron[neuron]"]
+neuronx = ["optimum-neuron[neuronx]"]
+nncf = ["optimum-intel[nncf] (>=1.15.0)"]
+onnxruntime = ["datasets (>=1.2.1)", "evaluate", "onnx", "onnxruntime (>=1.11.0)", "protobuf (>=3.20.1)"]
+onnxruntime-gpu = ["accelerate", "datasets (>=1.2.1)", "evaluate", "onnx", "onnxruntime-gpu (>=1.11.0)", "protobuf (>=3.20.1)"]
+openvino = ["optimum-intel[openvino] (>=1.15.0)"]
+quality = ["black (>=23.1,<24.0)", "ruff (==0.1.5)"]
+tests = ["Pillow", "accelerate", "diffusers (>=0.17.0)", "einops", "invisible-watermark", "parameterized", "pytest (<=8.0.0)", "pytest-xdist", "requests", "rjieba", "sacremoses", "scikit-learn", "timm", "torchaudio", "torchvision"]
+
 [[package]]
 name = "packaging"
 version = "23.2"
@@ -3845,10 +3943,12 @@ filelock = "*"
 huggingface-hub = ">=0.19.3,<1.0"
 numpy = ">=1.17"
 packaging = ">=20.0"
+protobuf = {version = "*", optional = true, markers = "extra == \"sentencepiece\""}
 pyyaml = ">=5.1"
 regex = "!=2019.12.17"
 requests = "*"
 safetensors = ">=0.4.1"
+sentencepiece = {version = ">=0.1.91,<0.1.92 || >0.1.92", optional = true, markers = "extra == \"sentencepiece\""}
 tokenizers = ">=0.14,<0.19"
 tqdm = ">=4.27"
 
@@ -4401,10 +4501,10 @@ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.link
 testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"]
 
 [extras]
-all = ["datasets", "evaluate", "faiss-cpu", "ipywidgets", "nltk", "numpy", "onnxruntime", "openai", "presidio-analyzer", "presidio-anonymizer", "sentence-transformers", "tiktoken"]
+all = ["datasets", "evaluate", "faiss-cpu", "ipywidgets", "nltk", "numpy", "onnxruntime", "openai", "optimum", "presidio-analyzer", "presidio-anonymizer", "sentence-transformers", "tiktoken"]
 torch = ["torch"]
 
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8,<4"
-content-hash = "e276fe46e042506ef0624ba65bff36b8995059f9be8f479c6a8d59f0987add7d"
+content-hash = "d9b0bbb7380686ed92f3ed811434bbe311735b0112f1a74ce5a04bfe398055a5"
diff --git a/pyproject.toml b/pyproject.toml
index b4d93f9..a403a37 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langkit"
-version = "0.0.28.dev0"
+version = "0.0.28.dev1"
 description = "A language toolkit for monitoring LLM interactions"
 authors = ["WhyLabs.ai <langkit@whylabs.ai>"]
 homepage = "https://docs.whylabs.ai/docs/large-language-model-monitoring"
@@ -32,6 +32,7 @@ presidio-anonymizer = {version = "^2.2.352", optional = true}
 tiktoken = {version = "^0.6.0", optional = true}
 tenacity = "^8.2.3"
 onnxruntime = {version = "^1.17.1", optional = true}
+optimum = {extras = ["onnxruntime"], version = "^1.18.0", optional = true}
 
 
 [tool.poetry.group.dev.dependencies]
@@ -63,6 +64,7 @@ all = [
     "presidio-anonymizer",
     "tiktoken",
     "onnxruntime",
+    "optimum",
 ]
 
 [build-system]