Update from https://github.com/whylabs-ai/whylogs-container-python/co…

…mmit/76fef2fc822475b13ee6ed410435d3cf5b1b9806
whylabs · Aug 29, 2024 · 7447ee6 · 7447ee6
1 parent 3484eb8
commit 7447ee6
Show file tree

Hide file tree

Showing 28 changed files with 183 additions and 83 deletions.
diff --git a/README.md b/README.md
@@ -102,7 +102,7 @@ See [configure_container_python][configure_container_python] for an example that
 What goes into this Dockerfile can depend on what you're trying to do. The simplest Dockerfile would look like this.
 
 ```Dockerfile
-FROM registry.gitlab.com/whylabs/langkit-container:2.0.0
+FROM registry.gitlab.com/whylabs/langkit-container:2.0.1
 
 # Force the container to fail if the config is not present. Safeguard for messing up the
 # build in such a way that the config is not included correctly.
@@ -116,7 +116,7 @@ You're in full control of this Dockerfile and build and you can do basically any
 you might want to include some additional pip dependencies as well, which could look like this.
 
 ```Dockerfile
-FROM registry.gitlab.com/whylabs/langkit-container:2.0.0
+FROM registry.gitlab.com/whylabs/langkit-container:2.0.1
 
 # Force the container to fail if the config is not present. Safeguard for messing up the
 # build in such a way that the config is not included correctly.

diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -1,3 +1,105 @@
+# 2.0.1 Release Notes
+
+- The `/debug/evaluate` endpoint is now disabled by default. You can enable it by setting the `DEBUG_ENDPOINT_ENABLED` environment variable
+  to `True`. It has a large performance impact and should only be used for debugging and prototyping policies.
+
+## Innocuous Prompt Filtering
+
+The newest injection metric has an option to filter out innocuous prompts using our internal classifier. This can help reduce false
+positives by first checking to see if the prompt is innocuous before running the injection metric. If it is then the metric value will end
+up being `0.0`. You can enable it on the policy by setting the `filter_innocuous` option to `true`. We'll eventually be making this the
+default after additional tuning.
+
+```yaml
+id: policy-id
+policy_version: 1
+schema_version: 0.0.1
+whylabs_dataset_id: injection
+
+metrics:
+  - metric: prompt.similarity.injection
+    options:
+      filter_innocuous: true
+```
+
+## New Embedding Creation API
+
+There is a new `/debug/embeddings` endpoint that allows you to create embeddings for a prompt and response. This is useful when paired with
+the injection metric customization feature, allowing you to generate pre computed embeddings using the correct embedding model for the
+version of the container you're using.
+
+```python
+import whylogs_container_client.api.debug.debug_embeddings as DebugEmbeddings
+from whylogs_container_client.models.evaluation_result import EvaluationResult
+
+request = EmbeddingRequest(prompt="my prompt", response="my response")
+
+response = DebugEmbeddings.sync_detailed(client=client_external, body=request)
+
+if not isinstance(response.parsed, EvaluationResult):
+    raise Exception(f"Failed to generate embeddings. Status code: {response.status_code}. {response.parsed}")
+
+actual: EvaluationResult = response.parsed
+
+metrics = actual.metrics[0]
+
+# These are embeddings of shape 384 by default
+assert metrics["prompt.util.embedding"] == AnyCollection(384)
+assert metrics["response.util.embedding"] == AnyCollection(384)
+```
+
+## Customizing Injection Metric
+
+The injection metric can now be customized with pre-computed parquet/numpy embeddings. The injection metric is a vector store under the hood
+and these embeddings will be used in nearest neighbor calculations. Not available via rulesets yet. You might want to disable leave
+innocuous filtering off when using this feature if it ends up classifying your embeddings as innocuous.
+
+```yaml
+id: policy-id
+policy_version: 1
+schema_version: 0.0.1
+whylabs_dataset_id: injection
+
+metrics:
+  - metric: prompt.similarity.injection
+    options:
+      filter_innocuous: false
+      additional_data_url: s3://anthony-test-bucket-2/data/embeddings.parquet
+      neighbors_num: 10
+      return_neighbors: true
+```
+
+## Remote Metric Support
+
+Metrics in the underlying workflow framework that the container uses can now be remote, which is a synonym for IO bound. For now, this only
+applies to the custom python configuration path because metrics have to be defined from scratch in order to signal that they're actually IO
+bound, and none of the standard metrics that we ship are actually IO bound yet, they're all CPU bound. See the python configuration examples
+for defining a custom metric. The following is a simple example.
+
+```python
+def remote_metric(id: str, work_time: float = 0.01) -> MetricCreator:
+    def _metric():
+        def udf(text: pd.DataFrame) -> SingleMetricResult:
+            try:
+                # Insert api call or any io bound work here
+                # Use the results of that work to return metric values
+                metrics = [1 for _ in range(len(text))]
+                return SingleMetricResult(metrics)
+            except Exception as e:
+                # return None for any errors
+                return SingleMetricResult([None for _ in range(len(text))])
+
+        return SingleMetric(
+            name="remote_metric_name",
+            input_names=["prompt"],
+            evaluate=udf,
+            remote=True, # This marks the metric as remote
+        )
+
+    return _metric
+```
+
+All remote metrics are executed upfront and in parallel, then the rest of the configured metrics are run in serial, if there are any.
 # 2.0.0 Release Notes
 
 - New metrics for computing a set of 3d coordinates that the WhyLabs platform can interpret to visualize the prompt/response data relative

diff --git a/examples/configure_container_python/Dockerfile b/examples/configure_container_python/Dockerfile
@@ -1,5 +1,5 @@
 # DOCSUB_START simple_dockerfile
-FROM registry.gitlab.com/whylabs/langkit-container:2.0.0
+FROM registry.gitlab.com/whylabs/langkit-container:2.0.1
 
 # Force the container to fail if the config is not present. Safeguard for messing up the
 # build in such a way that the config is not included correctly.

diff --git a/examples/configure_container_python/Makefile b/examples/configure_container_python/Makefile
@@ -3,7 +3,7 @@
 
 CONTAINER_NAME = langkit_example_configure_container_python
 
-version := 2.0.0
+version := 2.0.1
 
 all: build
 

diff --git a/examples/configure_container_python/poetry.lock b/examples/configure_container_python/poetry.lock
diff --git a/examples/configure_container_python/pyproject.toml b/examples/configure_container_python/pyproject.toml
@@ -13,7 +13,7 @@ python = "^3.10"
 # These are all dev dependencies. They're already included in the container and we don't want to
 # overwrite those versions, we just want types and auto completion in this project.
 langkit = "0.0.28.dev2"
-whylogs-container-client = "2.0.0"
+whylogs-container-client = "2.0.1"
 whylogs-container-types = "0.4.13"
 pandas = "1.3.5"
 

diff --git a/examples/configure_container_yaml/Dockerfile b/examples/configure_container_yaml/Dockerfile
@@ -1,4 +1,4 @@
-FROM registry.gitlab.com/whylabs/langkit-container:2.0.0
+FROM registry.gitlab.com/whylabs/langkit-container:2.0.1
 
 # Force the container to fail if the config is not present. Safeguard for messing up the
 # build in such a way that the config is not included correctly.

diff --git a/examples/configure_container_yaml/Makefile b/examples/configure_container_yaml/Makefile
@@ -2,7 +2,7 @@
 .PHONY: test-no-secure run-no-secure ci-install
 
 CONTAINER_NAME = langkit_example_configure_container_yaml
-version := 2.0.0
+version := 2.0.1
 
 
 all: build

diff --git a/examples/configure_container_yaml/poetry.lock b/examples/configure_container_yaml/poetry.lock
diff --git a/examples/configure_container_yaml/pyproject.toml b/examples/configure_container_yaml/pyproject.toml
@@ -12,7 +12,7 @@ python = "^3.10"
 [tool.poetry.group.dev.dependencies]
 # These are all dev dependencies. They're already included in the container and we don't want to
 # overwrite those versions, we just want types and auto completion in this project.
-whylogs-container-client = "2.0.0"
+whylogs-container-client = "2.0.1"
 pandas = "1.3.5"
 
 pyright = "^1.1.347"

diff --git a/examples/configure_container_yaml/test/test_container.py b/examples/configure_container_yaml/test/test_container.py
@@ -8,6 +8,7 @@
 from whylogs_container_client import AuthenticatedClient
 from whylogs_container_client.models.action import Action
 from whylogs_container_client.models.action_type import ActionType
+from whylogs_container_client.models.embedding_request import EmbeddingRequest
 from whylogs_container_client.models.evaluation_result import EvaluationResult
 from whylogs_container_client.models.evaluation_result_metrics_item import EvaluationResultMetricsItem
 from whylogs_container_client.models.evaluation_result_scores_item import EvaluationResultScoresItem
@@ -1226,3 +1227,23 @@ def test_multi_col_computer_medical_advice(client: AuthenticatedClient):
     )
 
     assert expected == actual
+
+
+def test_embedding_creation(client: AuthenticatedClient):
+    import whylogs_container_client.api.debug.debug_embeddings as DebugEmbeddings
+    from whylogs_container_client.models.evaluation_result import EvaluationResult
+
+    request = EmbeddingRequest(prompt="my prompt", response="my response")
+
+    response = DebugEmbeddings.sync_detailed(client=client, body=request)
+
+    if not isinstance(response.parsed, EvaluationResult):
+        raise Exception(f"Failed to generate embeddings. Status code: {response.status_code}. {response.parsed}")
+
+    actual: EvaluationResult = response.parsed
+
+    metrics = actual.metrics[0]
+
+    # These are embeddings of shape 384 by default
+    assert metrics["prompt.util.embedding"] == AnyCollection(384)
+    assert metrics["response.util.embedding"] == AnyCollection(384)
diff --git a/examples/container_library/Makefile b/examples/container_library/Makefile
@@ -1,6 +1,6 @@
 .PHONY: install run test lint lint-fix format format-fix fix all clean
 
-version := 2.0.0
+version := 2.0.1
 
 all: install build test
 

diff --git a/examples/container_library/poetry.lock b/examples/container_library/poetry.lock
diff --git a/examples/container_library/pyproject.toml b/examples/container_library/pyproject.toml
@@ -9,8 +9,8 @@ readme = "README.md"
 python = ">=3.9,<3.12"
 whylogs = "^1.4.4"
 
-whylogs-container = {version = "2.0.0", extras = ["llm"], source = "whylabs_container_gitlab"}
-whylabs-llm-toolkit = {version = "0.1.20", extras = ["infer"], source = "whylabs_container_gitlab"}
+whylogs-container = {version = "2.0.1", extras = ["llm"], source = "whylabs_container_gitlab"}
+whylabs-llm-toolkit = {version = "*", extras = ["infer"], source = "whylabs_container_gitlab"}
 torch = { version = "2.2.1+cpu", source = "torch" }
 
 # Just here to work around a runtime bug in a later version of transformers, may not be necessary in the future
@@ -19,7 +19,7 @@ transformers = "4.39.3"
 [tool.poetry.group.dev.dependencies]
 pyright = "^1.1.367"
 ruff = "0.6.2"
-whylogs-container-client = "2.0.0"
+whylogs-container-client = "2.0.1"
 
 [build-system]
 requires = ["poetry-core"]

diff --git a/examples/custom_model/Dockerfile b/examples/custom_model/Dockerfile
@@ -1,5 +1,5 @@
 # DOCSUB_START docker_dependencies
-FROM registry.gitlab.com/whylabs/langkit-container:2.0.0
+FROM registry.gitlab.com/whylabs/langkit-container:2.0.1
 
 # Force the container to fail if the config is not present. Safeguard for messing up the
 # build in such a way that the config is not included correctly.