Add no_configuration example that uses the containe without config

whylabs · Jan 27, 2024 · e70d3a5 · e70d3a5
1 parent cc5973d
commit e70d3a5
Show file tree

Hide file tree

Showing 9 changed files with 3,694 additions and 0 deletions.
diff --git a/examples/no_configuration/Makefile b/examples/no_configuration/Makefile
@@ -0,0 +1,53 @@
+.PHONY: help requirements build run all clean lint lint-fix format format-fix fix test pip-install-python-client
+
+CONTAINER_NAME = langkit_example_configure_container_python
+DOCKER_IMAGE = whylabs/whylogs:py-llm-1.0.2.dev0
+
+all: build
+
+install:
+	poetry install
+
+build:
+	docker pull $(DOCKER_IMAGE)
+	docker tag $(DOCKER_IMAGE) $(CONTAINER_NAME)
+
+test:
+	poetry run pytest -vvv ./test
+
+run:
+	docker run -it --platform=linux/amd64 --rm -p 127.0.0.1:8000:8000 --env-file local.env $(CONTAINER_NAME)
+
+debug:
+	docker run -it --platform=linux/amd64 --entrypoint /bin/bash $(CONTAINER_NAME)
+
+clean:
+	rm -rf requirements.txt
+
+lint: ## Check for type issues with pyright
+	@{ echo "Running pyright\n"; poetry run pyright; PYRIGHT_EXIT_CODE=$$?; } ; \
+	{ echo "\nRunning ruff check\n"; poetry run ruff check; RUFF_EXIT_CODE=$$?; } ; \
+	exit $$(($$PYRIGHT_EXIT_CODE + $$RUFF_EXIT_CODE))
+
+lint-fix:
+	poetry run ruff check --fix
+
+format: ## Check for formatting issues
+	poetry run ruff format --check
+
+format-fix: ## Fix formatting issues
+	poetry run ruff format
+
+fix: lint-fix format-fix ## Fix all linting and formatting issues
+
+pip-install-python-client:  ## Install the latest python client from the main project
+	poetry run pip uninstall whylogs-container-client -y || true
+	poetry run pip install ../../whylogs-container-client/dist/*.whl
+
+help: ## Show this help message.
+	@echo 'usage: make [target] ...'
+	@echo
+	@echo 'targets:'
+	@egrep '^(.+)\:(.*) ##\ (.+)' ${MAKEFILE_LIST} | sed -s 's/:\(.*\)##/: ##/' | column -t -c 2 -s ':#'
+
+
diff --git a/examples/no_configuration/README.md b/examples/no_configuration/README.md
@@ -0,0 +1,46 @@
+# Custom Langkit Model (Presidio) Example
+
+Sample project that demonstrates how to use the langkit container without any custom configuration. You'll be using the `log api` (below) to
+track metrics and have them sent to WhyLabs. Validation doesn't work here because there are no thresholds defined, but you will get all of
+the langkit metrics by default.
+
+## Setup
+
+Make sure you have [poetry](https://python-poetry.org/) and docker installed. Create a `local.env` file with your WhyLabs credentials.
+
+```
+# Generated at https://hub.whylabsapp.com/settings/access-tokens
+WHYLABS_API_KEY=<api key>
+CONTAINER_PASSWORD=password
+
+# Set based on your model type in WhyLabs. Daily is the default.
+DEFAULT_WHYLABS_DATASET_CADENCE=DAILY
+
+# IMPORTANT
+# Usually the container fails without custom config because that's the primary use case, but
+# this example shows how it works without custom config.
+FAIL_STARTUP_WITHOUT_CONFIG=False
+```
+
+Now you can build the custom container and send validation requests to it.
+
+```
+make install build test
+```
+
+Or just run the container locally to manually test and send ad hoc requests.
+
+```
+make install build run
+```
+
+## Making Requests
+
+Check out the `tests` folder for a full example that uses the python client to make requests. If you prefer using other languages, curl, or
+generic http then see the [api docs](https://whylabs.github.io/langkit-container-examples/api.html) for request formats.
+
+- [validate api](https://whylabs.github.io/langkit-container-examples/api.html#tag/llm/operation/validate_llm)
+- [log api](https://whylabs.github.io/langkit-container-examples/api.html#tag/llm/operation/log_llm)
+- [bulk log api](https://whylabs.github.io/langkit-container-examples/api.html#tag/profile/operation/log)
+
+
diff --git a/examples/no_configuration/local.env b/examples/no_configuration/local.env
@@ -0,0 +1,13 @@
+WHYLABS_API_KEY=vRLv6grHgg.hDbeVjN6k49R3Ii4durZ0bjRyuTzFeE51zqOuDIzZcG5F6zy09mD4:org-JpsdM6
+CONTAINER_PASSWORD=password
+
+# Set based on your model type. Daily is the default.
+DEFAULT_WHYLABS_DATASET_CADENCE=DAILY
+
+# Upload profiles every five minutes
+DEFAULT_WHYLABS_UPLOAD_CADENCE=M
+DEFAULT_WHYLABS_UPLOAD_INTERVAL=5
+
+LOG_LEVEL=DEBUG
+FAIL_STARTUP_WITHOUT_CONFIG=False
+
diff --git a/examples/no_configuration/poetry.lock b/examples/no_configuration/poetry.lock
diff --git a/examples/no_configuration/pyproject.toml b/examples/no_configuration/pyproject.toml
@@ -0,0 +1,57 @@
+[tool.poetry]
+name =  "whylogs_config"
+version = "0.1.0"
+description = ""
+authors = ["Anthony Naddeo <[email protected]>"]
+readme = "README.md"
+packages = [{include = "whylogs_config"}]
+
+[tool.poetry.dependencies]
+python = "^3.10"
+whylogs-container-client = "1.0.2.dev0"
+
+[tool.poetry.group.dev.dependencies]
+# These are all dev dependencies. They're already included in the container and we don't want to
+# overwrite those versions, we just want types and auto completion in this project.
+# whylogs-container-types = "^0.4.0"
+whylogs = "*"
+langkit = {url = "https://whypy.s3.us-west-2.amazonaws.com/langkit-0.0.40-py3-none-any.whl", extras = ["all"]}
+whylogs-container-types = {url = "https://whypy.s3.us-west-2.amazonaws.com/whylogs_container_types-0.4.8-py3-none-any.whl"}
+pandas = "1.3.5"
+
+pyright = "^1.1.347"
+ruff = "^0.1.13"
+pytest = "^7.4.4"
+whylogs-container-client = "*"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.pyright]
+include = ["./whylogs_config/**/*.py", "./test/**/*.py"]
+typeCheckingMode = "strict"
+
+reportMissingTypeStubs = false
+reportMissingParameterType = false
+reportMissingTypeArgumet = false
+
+[tool.ruff]
+line-length = 140
+indent-width = 4
+include = ["./whylogs_config/**/*.py", "./test/**/*.py"]
+select = ["E", "F", "I", "W"]
+
+[tool.ruff.isort]
+known-first-party = ["whylogs", "langkit"]
+
+[tool.ruff.lint]
+fixable = ["ALL"]
+dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
+
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"
+skip-magic-trailing-comma = false
+line-ending = "auto"
+
diff --git a/examples/no_configuration/test/__init__.py b/examples/no_configuration/test/__init__.py
diff --git a/examples/no_configuration/test/assert_util.py b/examples/no_configuration/test/assert_util.py
@@ -0,0 +1,58 @@
+from base64 import b64decode
+from dataclasses import dataclass
+from typing import Any, Dict, List, Type, TypeVar, cast
+
+from whylogs_container_client.models.process_logger_status_response import ProcessLoggerStatusResponse
+
+from whylogs.core.view.segmented_dataset_profile_view import DatasetProfileView
+
+
+def assert_profile(p1: DatasetProfileView, p2: DatasetProfileView):
+    print(p1.dataset_timestamp)
+    print(p2.dataset_timestamp)
+    assert p1.dataset_timestamp == p2.dataset_timestamp
+    assert p1.to_pandas().to_dict() == p2.to_pandas().to_dict()  # type: ignore
+
+
+@dataclass
+class LoggerStatusProfiles:
+    views: List[DatasetProfileView]
+    pending_views: List[DatasetProfileView]
+
+
+def get_profiles(response: ProcessLoggerStatusResponse) -> Dict[str, LoggerStatusProfiles]:
+    """
+    Returns a dictionary of dataset_id to a list of dataset profile views.
+    This preserves the mapping of dataset id and the separation of pending views from views.
+    """
+    views: Dict[str, LoggerStatusProfiles] = {}
+
+    for k, v in response.statuses.additional_properties.items():
+        views[k] = LoggerStatusProfiles(
+            views=[DatasetProfileView.deserialize(b64decode(x)) for x in v.views],
+            pending_views=[DatasetProfileView.deserialize(b64decode(x)) for x in v.pending_views],
+        )
+    return views
+
+
+def get_profile_list(response: ProcessLoggerStatusResponse) -> List[DatasetProfileView]:
+    """
+    Returns a single list of all dataset profile views.
+    """
+    views: List[DatasetProfileView] = []
+    for v in response.statuses.additional_properties.values():
+        views.extend([DatasetProfileView.deserialize(b64decode(x)) for x in v.views])
+        views.extend([DatasetProfileView.deserialize(b64decode(x)) for x in v.pending_views])
+    return views
+
+
+class AlwaysEqual:
+    def __eq__(self, _other: Any):
+        return True
+
+
+T = TypeVar("T")
+
+
+def always_equal(type: Type[T]) -> T:
+    return cast(T, AlwaysEqual())
diff --git a/examples/no_configuration/test/conftest.py b/examples/no_configuration/test/conftest.py
@@ -0,0 +1,80 @@
+import random
+import signal
+import subprocess
+import time
+from typing import Callable, Generator, List, TypeVar
+
+import pytest
+import whylogs_container_client.api.manage.health as Health
+from whylogs_container_client import AuthenticatedClient
+
+image_name = "langkit_example_configure_container_python"  # from the makefile, run `make build` to build the image
+
+T = TypeVar("T")
+
+
+def retry(func: Callable[[], T], max_retries=40, interval=1) -> T:
+    """
+    Retry a function until it succeeds or the max_retries is reached.
+    """
+    retry_count = 0
+    while retry_count < max_retries:
+        try:
+            return func()
+        except Exception:
+            time.sleep(interval)
+            retry_count += 1
+
+    raise Exception(f"Failed to run function after {retry_count} retries")
+
+
+_fake_key = "xxxxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx:xxx-xxxxxx"
+
+
+class ServerCommands:
+    @staticmethod
+    def docker(port: str) -> List[str]:
+        return [
+            "docker",
+            "run",
+            "--rm",
+            "-p",
+            f"127.0.0.1:{port}:8000",
+            "--env",
+            f"WHYLABS_API_KEY={_fake_key}",  # Not uploading anything for these tests, doesn't matter
+            "--env",
+            "DEFAULT_MODEL_ID=model-62",
+            "--env",
+            "CONTAINER_PASSWORD=password",
+            "--env",
+            "DEFAULT_WHYLABS_DATASET_CADENCE=DAILY",
+            "--env",
+            "DEFAULT_WHYLABS_UPLOAD_CADENCE=M",
+            "--env",
+            "DEFAULT_WHYLABS_UPLOAD_INTERVAL=5",
+            image_name,
+        ]
+
+
+def create_server(port: int) -> subprocess.Popen[bytes]:
+    command = ServerCommands.docker(str(port))
+    print(f"Starting container with command: {' '.join(command)}")
+    return subprocess.Popen(ServerCommands.docker(str(port)))
+
+
+@pytest.fixture(scope="module")
+def client() -> Generator[AuthenticatedClient, None, None]:
+    port = random.randint(8000, 9000)
+    proc = create_server(port=port)
+    client = AuthenticatedClient(base_url=f"http://localhost:{port}", token="password", prefix="", auth_header_name="X-API-Key")  # type: ignore[reportGeneralTypeIssues]
+
+    def _check_health():
+        print("Checking health", flush=True)
+        Health.sync_detailed(client=client)
+
+    try:
+        retry(_check_health)
+        yield client
+    finally:
+        proc.send_signal(signal.SIGINT)
+        proc.wait()
diff --git a/examples/no_configuration/test/test_container.py b/examples/no_configuration/test/test_container.py
@@ -0,0 +1,32 @@
+from test.assert_util import get_profile_list
+
+import pandas as pd
+import whylogs_container_client.api.llm.log_llm as LogLLM
+import whylogs_container_client.api.manage.status as Status
+from whylogs_container_client import AuthenticatedClient
+from whylogs_container_client.models.llm_validate_request import LLMValidateRequest
+from whylogs_container_client.models.process_logger_status_response import ProcessLoggerStatusResponse
+
+
+def test_log(client: AuthenticatedClient):
+    time_ms = 1701284201000  # Wednesday, November 29, 2023 6:56:41 PM
+
+    request = LLMValidateRequest(prompt="How do I see my settings", response="I don't know", dataset_id="model-140", timestamp=time_ms)
+
+    LogLLM.sync_detailed(client=client, body=request)
+
+    response = Status.sync_detailed(client=client)
+
+    if not isinstance(response.parsed, ProcessLoggerStatusResponse):
+        raise Exception("Unexpected response type")
+
+    profiles = get_profile_list(response.parsed)
+
+    assert len(profiles) == 1
+
+    profile = profiles[0]
+
+    # This profile has all of the langkit metrics that are sent to whylogs
+    pd.set_option("display.max_columns", None)
+    pd.set_option("display.width", None)
+    print(profile.to_pandas())  # type: ignore