diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 3b37bc5..06c3aab 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.0.28.dev0 +current_version = 0.0.28.dev1 tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\.(?P[a-z]+)(?P\d+))? serialize = diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 171d1e0..b79e62a 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -83,7 +83,7 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Run test + - name: Run cache constraint test env: WHYLABS_API_KEY: ${{ secrets.WHYLABS_API_KEY }} run: make test-cache-constraint diff --git a/Dockerfile.cache_test b/Dockerfile.cache_test index fd08b2c..86ece0e 100644 --- a/Dockerfile.cache_test +++ b/Dockerfile.cache_test @@ -4,7 +4,6 @@ WORKDIR /opt/whylogs-container RUN chown -R whylabs:whylabs /opt/whylogs-container USER whylabs -ENV LLM_CONTAINER=True ENV CONTAINER_CACHE_BASE=/opt/whylogs-container/.cache ENV HF_HOME=$CONTAINER_CACHE_BASE/hf_home/ ENV NLTK_DATA=$CONTAINER_CACHE_BASE/nltk_data/ @@ -38,9 +37,15 @@ RUN poetry config virtualenvs.in-project true RUN poetry install --no-root --extras "all" --without dev RUN rm -rf .venv/lib/python3.10/site-packages/pandas/tests # Pandas deploys a ton of tests to pypi +ENV TRANSFORMERS_VERBOSITY=debug copy ./langkit ./langkit RUN bash -c "source .venv/bin/activate; python -m langkit.scripts.langkit_cache" +RUN find $CONTAINER_CACHE_BASE/ + # This step will fail if any network requests happen +ENV TRANSFORMERS_OFFLINE=1 +ENV HF_DATASETS_OFFLINE=1 +ENV HF_HUB_OFFLINE=1 RUN --network=none bash -c "source .venv/bin/activate; python -m langkit.scripts.langkit_cache --skip-downloads" diff --git a/langkit/asset_downloader.py b/langkit/asset_downloader.py index ffc6d0b..150dc30 100644 --- a/langkit/asset_downloader.py +++ b/langkit/asset_downloader.py @@ -36,7 +36,7 @@ def _get_asset_path(asset_id: str, tag: str = "0") -> AssetPath: asset_id=asset_id, tag=tag, zip_path=f"{LANGKIT_CACHE}/assets/{asset_id}/{tag}/{asset_id}.zip", - extract_path=f"{LANGKIT_CACHE}/assets/{asset_id}/{tag}/{asset_id}/", + extract_path=f"{LANGKIT_CACHE}/assets/{asset_id}/{tag}/{asset_id}", ) diff --git a/langkit/metrics/library.py b/langkit/metrics/library.py index 2050bef..14b55cc 100644 --- a/langkit/metrics/library.py +++ b/langkit/metrics/library.py @@ -16,14 +16,13 @@ def all(prompt: bool = True, response: bool = True) -> MetricCreator: from langkit.metrics.text_statistics import prompt_textstat_metric, response_textstat_metric from langkit.metrics.themes.themes import prompt_jailbreak_similarity_metric, response_refusal_similarity_metric from langkit.metrics.token import prompt_token_metric, response_token_metric - from langkit.metrics.toxicity import prompt_toxicity_metric, response_toxicity_metric prompt_metrics = [ prompt_textstat_metric, prompt_token_metric, prompt_regex_metric, prompt_sentiment_polarity, - prompt_toxicity_metric, + lib.prompt.toxicity(), prompt_response_input_output_similarity_metric, prompt_injections_metric, prompt_jailbreak_similarity_metric, @@ -38,7 +37,7 @@ def all(prompt: bool = True, response: bool = True) -> MetricCreator: response_sentiment_polarity, response_refusal_similarity_metric, response_presidio_pii_metric, - response_toxicity_metric, + lib.response.toxicity(), lib.response.topics.medicine(), ] @@ -119,7 +118,7 @@ def toxicity_score() -> MetricCreator: Analyze the input for toxicity. The output of this metric ranges from 0 to 1, where 0 indicates non-toxic and 1 indicates toxic. """ - from langkit.metrics.toxicity import prompt_toxicity_metric + from langkit.metrics.toxicity_onnx import prompt_toxicity_metric return prompt_toxicity_metric @@ -282,20 +281,31 @@ def sentiment_score() -> MetricCreator: return prompt_sentiment_polarity class topics: - def __init__(self, topics: List[str], hypothesis_template: Optional[str] = None): + def __init__(self, topics: List[str], hypothesis_template: Optional[str] = None, onnx: bool = True): self.topics = topics self.hypothesis_template = hypothesis_template + self.onnx = onnx def __call__(self) -> MetricCreator: - from langkit.metrics.topic import topic_metric + if self.onnx: + from langkit.metrics.topic_onnx import topic_metric - return partial(topic_metric, "prompt", self.topics, self.hypothesis_template) + return partial(topic_metric, "prompt", self.topics, self.hypothesis_template) + else: + from langkit.metrics.topic import topic_metric + + return partial(topic_metric, "prompt", self.topics, self.hypothesis_template) @staticmethod - def medicine() -> MetricCreator: - from langkit.metrics.topic import topic_metric + def medicine(onnx: bool = False) -> MetricCreator: + if onnx: + from langkit.metrics.topic_onnx import topic_metric + + return lambda: topic_metric("prompt", ["medicine"]) + else: + from langkit.metrics.topic_onnx import topic_metric - return lambda: topic_metric("prompt", ["medicine"]) + return lambda: topic_metric("prompt", ["medicine"]) class response: @staticmethod @@ -326,7 +336,7 @@ def toxicity_score() -> MetricCreator: Analyze the toxicity of the response. The output of this metric ranges from 0 to 1, where 0 indicates a non-toxic response and 1 indicates a toxic response. """ - from langkit.metrics.toxicity import response_toxicity_metric + from langkit.metrics.toxicity_onnx import response_toxicity_metric return response_toxicity_metric @@ -486,17 +496,28 @@ def refusal() -> MetricCreator: return response_refusal_similarity_metric class topics: - def __init__(self, topics: List[str], hypothesis_template: Optional[str] = None): + def __init__(self, topics: List[str], hypothesis_template: Optional[str] = None, onnx: bool = True): self.topics = topics self.hypothesis_template = hypothesis_template + self.onnx = onnx def __call__(self) -> MetricCreator: - from langkit.metrics.topic import topic_metric + if self.onnx: + from langkit.metrics.topic_onnx import topic_metric - return partial(topic_metric, "response", self.topics, self.hypothesis_template) + return partial(topic_metric, "response", self.topics, self.hypothesis_template) + else: + from langkit.metrics.topic import topic_metric + + return partial(topic_metric, "response", self.topics, self.hypothesis_template) @staticmethod - def medicine() -> MetricCreator: - from langkit.metrics.topic import topic_metric + def medicine(onnx: bool = False) -> MetricCreator: + if onnx: + from langkit.metrics.topic_onnx import topic_metric + + return partial(topic_metric, "response", ["medicine"]) + else: + from langkit.metrics.topic_onnx import topic_metric - return partial(topic_metric, "response", ["medicine"]) + return partial(topic_metric, "response", ["medicine"]) diff --git a/langkit/metrics/topic_onnx.py b/langkit/metrics/topic_onnx.py new file mode 100644 index 0000000..2511bc0 --- /dev/null +++ b/langkit/metrics/topic_onnx.py @@ -0,0 +1,152 @@ +# pyright: reportUnknownMemberType=none +# pyright: reportUnknownVariableType=none +# pyright: reportUnknownLambdaType=none + +import os +from dataclasses import dataclass +from functools import lru_cache, partial +from typing import List, Optional, TypedDict + +import pandas as pd +import torch +from optimum.modeling_base import PreTrainedModel +from optimum.onnxruntime import ORTModelForSequenceClassification +from transformers import AutoTokenizer, Pipeline, PreTrainedTokenizerBase, pipeline # type: ignore + +from langkit.core.metric import MetricCreator, MultiMetric, MultiMetricResult, UdfInput + +__default_topics = [ + "medicine", + "economy", + "technology", + "entertainment", +] + +_hypothesis_template = "This example is about {}" + +_model = "MoritzLaurer/xtremedistil-l6-h256-zeroshot-v1.1-all-33" +_revision = "dea69e79cd6063916d08b883ea8a3c1823fd10b4" + + +def _download_assets(): + ORTModelForSequenceClassification.from_pretrained( + _model, + subfolder="onnx", + file_name="model.onnx", + revision=_revision, + export=False, + ) + AutoTokenizer.from_pretrained(_model, revision=_revision) + + +def _get_tokenizer() -> PreTrainedTokenizerBase: + return AutoTokenizer.from_pretrained(_model, revision=_revision, local_files_only=True) + + +def _get_model() -> PreTrainedModel: + # return ORTModelForSequenceClassification.from_pretrained( + # _model, + # subfolder="onnx", + # file_name="model.onnx", + # export=False, + # revision=_revision, + # local_files_only=True, + # ) + # Optimum doesn't support offline mode https://github.com/huggingface/optimum/issues/1796 + # workaround for now is to reference the actual model path after caching it. Uncomment the above code when the issue is resolved + + model_name = _model.replace("/", "--") + home_dir = os.path.expanduser("~") + base = os.environ.get("HF_HOME", os.path.join(home_dir, ".cache/huggingface")) + model_path = f"{base}/hub/models--{model_name }/snapshots/{_revision}" + return ORTModelForSequenceClassification.from_pretrained( + model_path, + file_name="onnx/model.onnx", + export=False, + revision=_revision, + local_files_only=True, + ) + + +@lru_cache +def _get_classifier() -> Pipeline: + return pipeline( + "zero-shot-classification", + model=_get_model(), # pyright: ignore[reportArgumentType] + tokenizer=_get_tokenizer(), # pyright: ignore[reportArgumentType] + truncation=True, + device="cuda" if torch.cuda.is_available() else "cpu", + ) + + +class ClassificationResults(TypedDict): + sequence: str + labels: List[str] + scores: List[float] + + +def __get_scores_per_label( + text: List[str], topics: List[str], hypothesis_template: str = _hypothesis_template, multi_label: bool = True +) -> List[ClassificationResults]: + if not text: + return [] + + classifier = _get_classifier() + result: List[ClassificationResults] = classifier(text, topics, hypothesis_template=hypothesis_template, multi_label=multi_label) # type: ignore + return result + + +def _sanitize_metric_name(topic: str) -> str: + """ + sanitize a metric name created from a topic. Replace white space with underscores, etc. + """ + return topic.replace(" ", "_").lower() + + +def topic_metric(input_name: str, topics: List[str], hypothesis_template: Optional[str] = None) -> MultiMetric: + hypothesis_template = hypothesis_template or _hypothesis_template + + def udf(text: pd.DataFrame) -> MultiMetricResult: + value: List[str] = list(UdfInput(text).iter_column_rows(input_name)) + results = __get_scores_per_label(value, topics=topics, hypothesis_template=hypothesis_template) + + all_metrics: List[List[float]] = [[] for _ in topics] + for result in results: + # Map each topic to its score in the current result + topic_to_score = {label: score for label, score in zip(result["labels"], result["scores"])} + # For each topic, append the score to the corresponding list in all_metrics + for i, topic in enumerate(topics): + all_metrics[i].append(topic_to_score[topic]) # Append list of score for the topic + + return MultiMetricResult(metrics=all_metrics) + + def cache_assets(): + _download_assets() + + def init(): + _get_classifier() + + metric_names = [f"{input_name}.topics.{_sanitize_metric_name(topic)}" for topic in topics] + return MultiMetric(names=metric_names, input_names=[input_name], evaluate=udf, cache_assets=cache_assets, init=init) + + +prompt_topic_module = partial(topic_metric, "prompt", __default_topics, _hypothesis_template) +response_topic_module = partial(topic_metric, "response", __default_topics, _hypothesis_template) +prompt_response_topic_module = [prompt_topic_module, response_topic_module, _hypothesis_template] + + +@dataclass +class CustomTopicModules: + prompt_topic_module: MetricCreator + response_topic_module: MetricCreator + prompt_response_topic_module: MetricCreator + + +def get_custom_topic_modules(topics: List[str], template: str = _hypothesis_template) -> CustomTopicModules: + prompt_topic_module = partial(topic_metric, "prompt", topics, template) + response_topic_module = partial(topic_metric, "response", topics, template) + return CustomTopicModules( + prompt_topic_module=prompt_topic_module, + response_topic_module=response_topic_module, + prompt_response_topic_module=[prompt_topic_module, response_topic_module], + ) diff --git a/langkit/metrics/toxicity_onnx.py b/langkit/metrics/toxicity_onnx.py new file mode 100644 index 0000000..6dd87ac --- /dev/null +++ b/langkit/metrics/toxicity_onnx.py @@ -0,0 +1,81 @@ +# pyright: reportUnknownMemberType=none +# pyright: reportUnknownVariableType=none +# pyright: reportUnknownLambdaType=none +import os +from functools import lru_cache, partial +from typing import List, cast + +import numpy as np +import onnxruntime +import pandas as pd +from transformers import ( + AutoTokenizer, + PreTrainedTokenizerBase, +) + +from langkit.asset_downloader import get_asset +from langkit.core.metric import Metric, SingleMetric, SingleMetricResult, UdfInput +from langkit.onnx_encoder import TransformerModel + + +def __toxicity(tokenizer: PreTrainedTokenizerBase, session: onnxruntime.InferenceSession, max_length: int, text: List[str]) -> List[float]: + max_length_in_chars = tokenizer.model_max_length * 5 + truncated_text = [content[:max_length_in_chars] for content in text] + inputs = tokenizer(truncated_text, return_tensors="pt", padding=True, truncation=True) + onnx_inputs = {k: v.numpy() for k, v in inputs.items() if k in ["input_ids", "attention_mask"]} + onnx_output_logits = session.run(None, onnx_inputs)[0] + + # Apply softmax to convert logits into probabilities + probabilities = np.exp(onnx_output_logits) / np.sum(np.exp(onnx_output_logits), axis=1, keepdims=True) # pyright: ignore[reportUnknownArgumentType] + labels = ["non-toxic", "toxic"] + # Find the index of the highest probability to determine the predicted label + predicted_label_idx = np.argmax(probabilities, axis=1) + predicted_labels: List[str] = [labels[idx] for idx in predicted_label_idx] + predicted_scores: List[float] = [prob[idx] for prob, idx in zip(probabilities, predicted_label_idx)] + results = [{"label": label, "score": score} for label, score in zip(predicted_labels, predicted_scores)] + return [result["score"] if result["label"] == "toxic" else 1.0 - result["score"] for result in results] # type: ignore + + +def _download_assets(): + name, tag = TransformerModel.ToxicCommentModel.value + return get_asset(name, tag) + + +@lru_cache +def _get_tokenizer() -> PreTrainedTokenizerBase: + return AutoTokenizer.from_pretrained(_download_assets()) + + +@lru_cache +def _get_session() -> onnxruntime.InferenceSession: + downloaded_path = _download_assets() + onnx_model_path = os.path.join(downloaded_path, "model.onnx") + print(f"Loading ONNX model from {onnx_model_path}") + return onnxruntime.InferenceSession(onnx_model_path, providers=["CPUExecutionProvider"]) + + +def toxicity_metric(column_name: str) -> Metric: + def cache_assets(): + _download_assets() + + def init(): + _get_session() + _get_tokenizer() + + def udf(text: pd.DataFrame) -> SingleMetricResult: + _tokenizer = _get_tokenizer() + _session = _get_session() + + col = list(UdfInput(text).iter_column_rows(column_name)) + max_length = cast(int, _tokenizer.model_max_length) + metrics = __toxicity(_tokenizer, _session, max_length, col) + return SingleMetricResult(metrics=metrics) + + return SingleMetric( + name=f"{column_name}.toxicity.toxicity_score", input_names=[column_name], evaluate=udf, init=init, cache_assets=cache_assets + ) + + +prompt_toxicity_metric = partial(toxicity_metric, "prompt") +response_toxicity_metric = partial(toxicity_metric, "response") +prompt_response_toxicity_module = [prompt_toxicity_metric, response_toxicity_metric] diff --git a/langkit/onnx_encoder.py b/langkit/onnx_encoder.py index 7b6364b..a615bc8 100644 --- a/langkit/onnx_encoder.py +++ b/langkit/onnx_encoder.py @@ -19,8 +19,12 @@ def _get_inference_session(onnx_file_path: str): class TransformerModel(Enum): AllMiniLM = ("all-MiniLM-L6-v2", "0") + ToxicCommentModel = ("toxic-comment-model", "0") - def get_model_path(self): + def cache_model_assets(self): + """ + Returns the path of the cached model assets, downloading them if necessary. + """ name, tag = self.value return f"{get_asset(name, tag)}/{name}.onnx" @@ -28,8 +32,7 @@ def get_model_path(self): class OnnxSentenceTransformer(EmbeddingEncoder): def __init__(self, model: TransformerModel): self._tokenizer: BertTokenizerFast = cast(BertTokenizerFast, BertTokenizerFast.from_pretrained("bert-base-uncased")) - self._model = model - self._session = _get_inference_session(model.get_model_path()) + self._session = _get_inference_session(model.cache_model_assets()) def encode(self, text: Tuple[str, ...]) -> "torch.Tensor": # Pre-truncate the inputs to the model length for better performance diff --git a/poetry.lock b/poetry.lock index 87fcf88..574ca58 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1672,6 +1672,53 @@ files = [ {file = "numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463"}, ] +[[package]] +name = "onnx" +version = "1.16.0" +description = "Open Neural Network Exchange" +optional = true +python-versions = ">=3.8" +files = [ + {file = "onnx-1.16.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:9eadbdce25b19d6216f426d6d99b8bc877a65ed92cbef9707751c6669190ba4f"}, + {file = "onnx-1.16.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:034ae21a2aaa2e9c14119a840d2926d213c27aad29e5e3edaa30145a745048e1"}, + {file = "onnx-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec22a43d74eb1f2303373e2fbe7fbcaa45fb225f4eb146edfed1356ada7a9aea"}, + {file = "onnx-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:298f28a2b5ac09145fa958513d3d1e6b349ccf86a877dbdcccad57713fe360b3"}, + {file = "onnx-1.16.0-cp310-cp310-win32.whl", hash = "sha256:66300197b52beca08bc6262d43c103289c5d45fde43fb51922ed1eb83658cf0c"}, + {file = "onnx-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:ae0029f5e47bf70a1a62e7f88c80bca4ef39b844a89910039184221775df5e43"}, + {file = "onnx-1.16.0-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:f51179d4af3372b4f3800c558d204b592c61e4b4a18b8f61e0eea7f46211221a"}, + {file = "onnx-1.16.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:5202559070afec5144332db216c20f2fff8323cf7f6512b0ca11b215eacc5bf3"}, + {file = "onnx-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77579e7c15b4df39d29465b216639a5f9b74026bdd9e4b6306cd19a32dcfe67c"}, + {file = "onnx-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e60ca76ac24b65c25860d0f2d2cdd96d6320d062a01dd8ce87c5743603789b8"}, + {file = "onnx-1.16.0-cp311-cp311-win32.whl", hash = "sha256:81b4ee01bc554e8a2b11ac6439882508a5377a1c6b452acd69a1eebb83571117"}, + {file = "onnx-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:7449241e70b847b9c3eb8dae622df8c1b456d11032a9d7e26e0ee8a698d5bf86"}, + {file = "onnx-1.16.0-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:03a627488b1a9975d95d6a55582af3e14c7f3bb87444725b999935ddd271d352"}, + {file = "onnx-1.16.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:c392faeabd9283ee344ccb4b067d1fea9dfc614fa1f0de7c47589efd79e15e78"}, + {file = "onnx-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0efeb46985de08f0efe758cb54ad3457e821a05c2eaf5ba2ccb8cd1602c08084"}, + {file = "onnx-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddf14a3d32234f23e44abb73a755cb96a423fac7f004e8f046f36b10214151ee"}, + {file = "onnx-1.16.0-cp312-cp312-win32.whl", hash = "sha256:62a2e27ae8ba5fc9b4a2620301446a517b5ffaaf8566611de7a7c2160f5bcf4c"}, + {file = "onnx-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:3e0860fea94efde777e81a6f68f65761ed5e5f3adea2e050d7fbe373a9ae05b3"}, + {file = "onnx-1.16.0-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:70a90649318f3470985439ea078277c9fb2a2e6e2fd7c8f3f2b279402ad6c7e6"}, + {file = "onnx-1.16.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:71839546b7f93be4fa807995b182ab4b4414c9dbf049fee11eaaced16fcf8df2"}, + {file = "onnx-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7665217c45a61eb44718c8e9349d2ad004efa0cb9fbc4be5c6d5e18b9fe12b52"}, + {file = "onnx-1.16.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5752bbbd5717304a7643643dba383a2fb31e8eb0682f4e7b7d141206328a73b"}, + {file = "onnx-1.16.0-cp38-cp38-win32.whl", hash = "sha256:257858cbcb2055284f09fa2ae2b1cfd64f5850367da388d6e7e7b05920a40c90"}, + {file = "onnx-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:209fe84995a28038e29ae8369edd35f33e0ef1ebc3bddbf6584629823469deb1"}, + {file = "onnx-1.16.0-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:8cf3e518b1b1b960be542e7c62bed4e5219e04c85d540817b7027029537dec92"}, + {file = "onnx-1.16.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:30f02beaf081c7d9fa3a8c566a912fc4408e28fc33b1452d58f890851691d364"}, + {file = "onnx-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7fb29a9a692b522deef1f6b8f2145da62c0c43ea1ed5b4c0f66f827fdc28847d"}, + {file = "onnx-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7755cbd5f4e47952e37276ea5978a46fc8346684392315902b5ed4a719d87d06"}, + {file = "onnx-1.16.0-cp39-cp39-win32.whl", hash = "sha256:7532343dc5b8b5e7c3e3efa441a3100552f7600155c4db9120acd7574f64ffbf"}, + {file = "onnx-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:d7886c05aa6d583ec42f6287678923c1e343afc4350e49d5b36a0023772ffa22"}, + {file = "onnx-1.16.0.tar.gz", hash = "sha256:237c6987c6c59d9f44b6136f5819af79574f8d96a760a1fa843bede11f3822f7"}, +] + +[package.dependencies] +numpy = ">=1.20" +protobuf = ">=3.20.2" + +[package.extras] +reference = ["Pillow", "google-re2"] + [[package]] name = "onnxruntime" version = "1.17.1" @@ -1737,6 +1784,57 @@ typing-extensions = ">=4.7,<5" [package.extras] datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] +[[package]] +name = "optimum" +version = "1.18.0" +description = "Optimum Library is an extension of the Hugging Face Transformers library, providing a framework to integrate third-party libraries from Hardware Partners and interface with their specific functionality." +optional = true +python-versions = ">=3.7.0" +files = [ + {file = "optimum-1.18.0-py3-none-any.whl", hash = "sha256:289035db5f88308af9387c7fefabe7dfa51187dfc8f02c96054614275688fd4a"}, + {file = "optimum-1.18.0.tar.gz", hash = "sha256:28ea691515df6bfee92540fb0ef93a6614aa691f17c188dccd817719fea45936"}, +] + +[package.dependencies] +coloredlogs = "*" +datasets = [ + {version = "*"}, + {version = ">=1.2.1", optional = true, markers = "extra == \"onnxruntime\""}, +] +evaluate = {version = "*", optional = true, markers = "extra == \"onnxruntime\""} +huggingface-hub = ">=0.8.0" +numpy = "*" +onnx = {version = "*", optional = true, markers = "extra == \"onnxruntime\""} +onnxruntime = {version = ">=1.11.0", optional = true, markers = "extra == \"onnxruntime\""} +packaging = "*" +protobuf = {version = ">=3.20.1", optional = true, markers = "extra == \"onnxruntime\""} +sympy = "*" +torch = ">=1.11" +transformers = {version = ">=4.26.0,<4.40.0", extras = ["sentencepiece"]} + +[package.extras] +amd = ["optimum-amd"] +benchmark = ["evaluate (>=0.2.0)", "optuna", "scikit-learn", "seqeval", "torchvision", "tqdm"] +dev = ["Pillow", "accelerate", "black (>=23.1,<24.0)", "diffusers (>=0.17.0)", "einops", "invisible-watermark", "parameterized", "pytest (<=8.0.0)", "pytest-xdist", "requests", "rjieba", "ruff (==0.1.5)", "sacremoses", "scikit-learn", "timm", "torchaudio", "torchvision"] +diffusers = ["diffusers"] +doc-build = ["accelerate"] +exporters = ["onnx", "onnxruntime", "timm"] +exporters-gpu = ["onnx", "onnxruntime-gpu", "timm"] +exporters-tf = ["h5py", "numpy (<1.24.0)", "onnx", "onnxruntime", "tensorflow (>=2.4,<=2.12.1)", "tf2onnx", "timm", "transformers[sentencepiece] (>=4.26.0,<4.38.0)"] +furiosa = ["optimum-furiosa"] +graphcore = ["optimum-graphcore"] +habana = ["optimum-habana", "transformers (>=4.37.0,<4.38.0)"] +intel = ["optimum-intel (>=1.15.0)"] +neural-compressor = ["optimum-intel[neural-compressor] (>=1.15.0)"] +neuron = ["optimum-neuron[neuron]"] +neuronx = ["optimum-neuron[neuronx]"] +nncf = ["optimum-intel[nncf] (>=1.15.0)"] +onnxruntime = ["datasets (>=1.2.1)", "evaluate", "onnx", "onnxruntime (>=1.11.0)", "protobuf (>=3.20.1)"] +onnxruntime-gpu = ["accelerate", "datasets (>=1.2.1)", "evaluate", "onnx", "onnxruntime-gpu (>=1.11.0)", "protobuf (>=3.20.1)"] +openvino = ["optimum-intel[openvino] (>=1.15.0)"] +quality = ["black (>=23.1,<24.0)", "ruff (==0.1.5)"] +tests = ["Pillow", "accelerate", "diffusers (>=0.17.0)", "einops", "invisible-watermark", "parameterized", "pytest (<=8.0.0)", "pytest-xdist", "requests", "rjieba", "sacremoses", "scikit-learn", "timm", "torchaudio", "torchvision"] + [[package]] name = "packaging" version = "23.2" @@ -3845,10 +3943,12 @@ filelock = "*" huggingface-hub = ">=0.19.3,<1.0" numpy = ">=1.17" packaging = ">=20.0" +protobuf = {version = "*", optional = true, markers = "extra == \"sentencepiece\""} pyyaml = ">=5.1" regex = "!=2019.12.17" requests = "*" safetensors = ">=0.4.1" +sentencepiece = {version = ">=0.1.91,<0.1.92 || >0.1.92", optional = true, markers = "extra == \"sentencepiece\""} tokenizers = ">=0.14,<0.19" tqdm = ">=4.27" @@ -4401,10 +4501,10 @@ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.link testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] [extras] -all = ["datasets", "evaluate", "faiss-cpu", "ipywidgets", "nltk", "numpy", "onnxruntime", "openai", "presidio-analyzer", "presidio-anonymizer", "sentence-transformers", "tiktoken"] +all = ["datasets", "evaluate", "faiss-cpu", "ipywidgets", "nltk", "numpy", "onnxruntime", "openai", "optimum", "presidio-analyzer", "presidio-anonymizer", "sentence-transformers", "tiktoken"] torch = ["torch"] [metadata] lock-version = "2.0" python-versions = ">=3.8,<4" -content-hash = "e276fe46e042506ef0624ba65bff36b8995059f9be8f479c6a8d59f0987add7d" +content-hash = "d9b0bbb7380686ed92f3ed811434bbe311735b0112f1a74ce5a04bfe398055a5" diff --git a/pyproject.toml b/pyproject.toml index b4d93f9..a403a37 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langkit" -version = "0.0.28.dev0" +version = "0.0.28.dev1" description = "A language toolkit for monitoring LLM interactions" authors = ["WhyLabs.ai "] homepage = "https://docs.whylabs.ai/docs/large-language-model-monitoring" @@ -32,6 +32,7 @@ presidio-anonymizer = {version = "^2.2.352", optional = true} tiktoken = {version = "^0.6.0", optional = true} tenacity = "^8.2.3" onnxruntime = {version = "^1.17.1", optional = true} +optimum = {extras = ["onnxruntime"], version = "^1.18.0", optional = true} [tool.poetry.group.dev.dependencies] @@ -63,6 +64,7 @@ all = [ "presidio-anonymizer", "tiktoken", "onnxruntime", + "optimum", ] [build-system]