Skip to content

Commit

Permalink
Add new examples with integ tests
Browse files Browse the repository at this point in the history
  • Loading branch information
naddeoa committed Jan 26, 2024
1 parent a2ced04 commit 4b00a10
Show file tree
Hide file tree
Showing 38 changed files with 6,521 additions and 864 deletions.
5 changes: 0 additions & 5 deletions examples/configure-container/Dockerfile

This file was deleted.

Binary file not shown.
Binary file not shown.
Binary file not shown.
44 changes: 0 additions & 44 deletions examples/configure-container/test/test_container.py

This file was deleted.

32 changes: 0 additions & 32 deletions examples/configure-container/whylogs_config/config.yaml

This file was deleted.

7 changes: 5 additions & 2 deletions examples/configure_container_python/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# FROM whylabs/whylogs:py-llm-1.0.1
FROM whylabs/whylogs:py-llm-latest
FROM whylabs/whylogs:py-llm-1.0.2.dev0

# Force the container to fail if the config is not present. Safeguard for messing up the
# build in such a way that the config is not included correctly.
ENV FAIL_STARTUP_WITHOUT_CONFIG=True

# Copy our custom config code
COPY ./whylogs_config /opt/whylogs-container/whylogs_container/whylogs_config/
Expand Down
9 changes: 7 additions & 2 deletions examples/configure_container_python/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.PHONY: help requirements build run all clean lint lint-fix format format-fix fix test
.PHONY: help requirements build run all clean lint lint-fix format format-fix fix test pip-install-python-client

CONTAINER_NAME = langkit_configuration_example
CONTAINER_NAME = langkit_example_configure_container_python

all: build

Expand Down Expand Up @@ -38,9 +38,14 @@ format-fix: ## Fix formatting issues

fix: lint-fix format-fix ## Fix all linting and formatting issues

pip-install-python-client: ## Install the latest python client from the main project
poetry run pip uninstall whylogs-container-client -y || true
poetry run pip install ../../whylogs-container-client/dist/*.whl

help: ## Show this help message.
@echo 'usage: make [target] ...'
@echo
@echo 'targets:'
@egrep '^(.+)\:(.*) ##\ (.+)' ${MAKEFILE_LIST} | sed -s 's/:\(.*\)##/: ##/' | column -t -c 2 -s ':#'


1,016 changes: 455 additions & 561 deletions examples/configure_container_python/poetry.lock

Large diffs are not rendered by default.

16 changes: 6 additions & 10 deletions examples/configure_container_python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[tool.poetry]
name = "python_container_example"
name = "configure_container_python"
version = "0.1.0"
description = ""
authors = ["Anthony Naddeo <[email protected]>"]
Expand All @@ -8,24 +8,20 @@ packages = [{include = "whylogs_config"}]

[tool.poetry.dependencies]
python = "^3.10"
whylogs-container-types = {url = "https://guest-session-testing-public.s3.us-west-2.amazonaws.com/whylogs_container_types-0.4.2-py3-none-any.whl"}
whylogs-container-client = "1.0.2.dev0"

[tool.poetry.group.dev.dependencies]
# These are all dev dependencies. They're already included in the container and we don't want to
# overwrite those versions, we just want types and auto completion in this project.
# whylogs-container-types = "^0.4.0"
langkit = {url = "https://whypy.s3.us-west-2.amazonaws.com/langkit-0.0.38-py3-none-any.whl", extras = ["all"]}
whylogs-container-types = {url = "https://whypy.s3.us-west-2.amazonaws.com/whylogs_container_types-0.4.8-py3-none-any.whl"}
pandas = "1.3.5"
whylogs = {version = "1.3.20", extras = ["proc"]}
whylogs-container-client = "^1.0.1"

pyright = "^1.1.347"
ruff = "^0.1.13"
pytest = "^7.4.4"

[[tool.poetry.source]]
name = "torch"
url = "https://download.pytorch.org/whl/cpu"
priority = "explicit"

whylogs-container-client = "*"

[build-system]
requires = ["poetry-core"]
Expand Down
6 changes: 1 addition & 5 deletions examples/configure_container_python/test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import whylogs_container_client.api.manage.health as Health
from whylogs_container_client import AuthenticatedClient

image_name = "langkit_configuration_example" # from the makefile, run `make build` to build the image
image_name = "langkit_example_configure_container_python" # from the makefile, run `make build` to build the image

T = TypeVar("T")

Expand Down Expand Up @@ -52,10 +52,6 @@ def docker(port: str) -> List[str]:
"DEFAULT_WHYLABS_UPLOAD_CADENCE=M",
"--env",
"DEFAULT_WHYLABS_UPLOAD_INTERVAL=5",
"--env",
"FAIL_STARTUP_WITHOUT_CONFIG=False",
"--env",
f"PORT={port}",
image_name,
]

Expand Down
185 changes: 158 additions & 27 deletions examples/configure_container_python/test/test_container.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,175 @@
import whylogs_container_client.api.llm.validate_llm as ValidateLLM
from whylogs_container_client import AuthenticatedClient
from whylogs_container_client.models.llm_validate_request import LLMValidateRequest
from whylogs_container_client.models.validation_metadata import ValidationMetadata
from whylogs_container_client.models.validation_report import ValidationReport
from whylogs_container_client.models.validation_failure import ValidationFailure
from whylogs_container_client.models.validation_result import ValidationResult


def test_toxic_prompt(client: AuthenticatedClient):
# Validate a prompt and response pair for LLM validations
def test_toxic_response_131(client: AuthenticatedClient):
request = LLMValidateRequest(
prompt="This llm sucks and everyone who made is sucks.",
prompt="How do I see my settings",
response="You're a stupid user, it's obvious.",
dataset_id="model-131",
)

response = ValidateLLM.sync_detailed(client=client, body=request)

if not isinstance(response.parsed, ValidationResult):
raise Exception(f"Failed to validate data. Status code: {response.status_code}. {response.parsed}")

actual: ValidationResult = response.parsed

expected = ValidationResult(
report=[
ValidationFailure(
id=0,
metric="response.toxicity",
details="Value 0.9642418622970581 is above threshold 0.4",
value=0.9642418622970581,
upper_threshold=0.4,
lower_threshold=None,
)
],
)

assert actual == expected


def test_upper_case_letters_prompt_131(client: AuthenticatedClient):
request = LLMValidateRequest(
prompt="...", # <1 upper case letters
response="I'm sorry you feel that way.",
dataset_id="model-62",
dataset_id="model-131",
)

response = ValidateLLM.sync_detailed(client=client, json_body=request)
response = ValidateLLM.sync_detailed(client=client, body=request)

if not isinstance(response.parsed, ValidationReport):
if not isinstance(response.parsed, ValidationResult):
raise Exception(f"Failed to validate data. Status code: {response.status_code}. {response.parsed}")

report: ValidationReport = response.parsed

expected = ValidationReport(
failures=[
ValidationMetadata(
prompt_id="---",
validator_name="toxicity_validator",
failed_metric="toxicity_prompt",
value=0.9417606592178345,
timestamp=None,
is_valid=False,
actual: ValidationResult = response.parsed

expected = ValidationResult(
report=[
ValidationFailure(
id=0,
metric="prompt.upper_case_char_count",
details="Value 0 is below threshold 1",
value=0,
upper_threshold=None,
lower_threshold=1.0,
)
]
],
)

assert len(report.failures) == 1
assert actual == expected


def test_upper_case_letters_prompt_reading_ease_response_131(client: AuthenticatedClient):
response = (
"Playing games has always been thought to be important to "
"the development of well-balanced and creative children; "
"however, what part, if any, they should play in the lives "
"of adults has never been researched that deeply. I believe "
"that playing games is every bit as important for adults "
"as for children. Not only is taking time out to play games "
"with our children and other adults valuable to building "
"interpersonal relationships but is also a wonderful way "
"to release built up tension."
)

request = LLMValidateRequest(
prompt="...", # <1 upper case letters
response=response,
dataset_id="model-131",
)

response = ValidateLLM.sync_detailed(client=client, body=request)

if not isinstance(response.parsed, ValidationResult):
raise Exception(f"Failed to validate data. Status code: {response.status_code}. {response.parsed}")

actual: ValidationResult = response.parsed

expected = ValidationResult(
report=[
ValidationFailure(
id=0,
metric="prompt.upper_case_char_count",
details="Value 0 is below threshold 1",
value=0,
upper_threshold=None,
lower_threshold=1.0,
),
ValidationFailure(
id=0,
metric="response.flesch_reading_ease",
details="Value 52.23 is below threshold 70.0",
value=52.23,
upper_threshold=None,
lower_threshold=70.0,
),
],
)

actual = report.failures[0]
assert actual == expected


def test_prompt_sentiment_133(client: AuthenticatedClient):
request = LLMValidateRequest(
prompt="Ugh, this is way too hard...",
response="I'm sorry you feel that way.",
dataset_id="model-133",
)

response = ValidateLLM.sync_detailed(client=client, body=request)

if not isinstance(response.parsed, ValidationResult):
raise Exception(f"Failed to validate data. Status code: {response.status_code}. {response.parsed}")

actual: ValidationResult = response.parsed

expected = ValidationResult(
report=[
ValidationFailure(
id=0,
metric="prompt.sentiment_polarity",
details="Value -0.4215 is below threshold 0",
value=-0.4215,
upper_threshold=None,
lower_threshold=0.0,
)
],
)

assert actual == expected


def test_response_lower_case_133(client: AuthenticatedClient):
request = LLMValidateRequest(
prompt="Hello!",
response="I'M SORRY YOU FEEL THAT WAY.",
dataset_id="model-133",
)

response = ValidateLLM.sync_detailed(client=client, body=request)

if not isinstance(response.parsed, ValidationResult):
raise Exception(f"Failed to validate data. Status code: {response.status_code}. {response.parsed}")

actual: ValidationResult = response.parsed

expected = ValidationResult(
report=[
ValidationFailure(
id=0,
metric="response.lower_case_char_count",
details="Value 0 is below threshold 10",
value=0,
upper_threshold=None,
lower_threshold=10.0,
)
],
)

assert actual.validator_name == expected.failures[0].validator_name
assert actual.failed_metric == expected.failures[0].failed_metric
assert actual.value == expected.failures[0].value
assert actual.timestamp is None
assert actual.is_valid == expected.failures[0].is_valid
assert actual == expected
Loading

0 comments on commit 4b00a10

Please sign in to comment.