-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
38 changed files
with
6,521 additions
and
864 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
Binary file removed
BIN
-191 Bytes
examples/configure-container/test/__pycache__/__init__.cpython-310.pyc
Binary file not shown.
Binary file removed
BIN
-2.95 KB
examples/configure-container/test/__pycache__/conftest.cpython-310-pytest-7.4.4.pyc
Binary file not shown.
Binary file removed
BIN
-3.34 KB
examples/configure-container/test/__pycache__/test_container.cpython-310-pytest-7.4.4.pyc
Binary file not shown.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
[tool.poetry] | ||
name = "python_container_example" | ||
name = "configure_container_python" | ||
version = "0.1.0" | ||
description = "" | ||
authors = ["Anthony Naddeo <[email protected]>"] | ||
|
@@ -8,24 +8,20 @@ packages = [{include = "whylogs_config"}] | |
|
||
[tool.poetry.dependencies] | ||
python = "^3.10" | ||
whylogs-container-types = {url = "https://guest-session-testing-public.s3.us-west-2.amazonaws.com/whylogs_container_types-0.4.2-py3-none-any.whl"} | ||
whylogs-container-client = "1.0.2.dev0" | ||
|
||
[tool.poetry.group.dev.dependencies] | ||
# These are all dev dependencies. They're already included in the container and we don't want to | ||
# overwrite those versions, we just want types and auto completion in this project. | ||
# whylogs-container-types = "^0.4.0" | ||
langkit = {url = "https://whypy.s3.us-west-2.amazonaws.com/langkit-0.0.38-py3-none-any.whl", extras = ["all"]} | ||
whylogs-container-types = {url = "https://whypy.s3.us-west-2.amazonaws.com/whylogs_container_types-0.4.8-py3-none-any.whl"} | ||
pandas = "1.3.5" | ||
whylogs = {version = "1.3.20", extras = ["proc"]} | ||
whylogs-container-client = "^1.0.1" | ||
|
||
pyright = "^1.1.347" | ||
ruff = "^0.1.13" | ||
pytest = "^7.4.4" | ||
|
||
[[tool.poetry.source]] | ||
name = "torch" | ||
url = "https://download.pytorch.org/whl/cpu" | ||
priority = "explicit" | ||
|
||
whylogs-container-client = "*" | ||
|
||
[build-system] | ||
requires = ["poetry-core"] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
185 changes: 158 additions & 27 deletions
185
examples/configure_container_python/test/test_container.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,44 +1,175 @@ | ||
import whylogs_container_client.api.llm.validate_llm as ValidateLLM | ||
from whylogs_container_client import AuthenticatedClient | ||
from whylogs_container_client.models.llm_validate_request import LLMValidateRequest | ||
from whylogs_container_client.models.validation_metadata import ValidationMetadata | ||
from whylogs_container_client.models.validation_report import ValidationReport | ||
from whylogs_container_client.models.validation_failure import ValidationFailure | ||
from whylogs_container_client.models.validation_result import ValidationResult | ||
|
||
|
||
def test_toxic_prompt(client: AuthenticatedClient): | ||
# Validate a prompt and response pair for LLM validations | ||
def test_toxic_response_131(client: AuthenticatedClient): | ||
request = LLMValidateRequest( | ||
prompt="This llm sucks and everyone who made is sucks.", | ||
prompt="How do I see my settings", | ||
response="You're a stupid user, it's obvious.", | ||
dataset_id="model-131", | ||
) | ||
|
||
response = ValidateLLM.sync_detailed(client=client, body=request) | ||
|
||
if not isinstance(response.parsed, ValidationResult): | ||
raise Exception(f"Failed to validate data. Status code: {response.status_code}. {response.parsed}") | ||
|
||
actual: ValidationResult = response.parsed | ||
|
||
expected = ValidationResult( | ||
report=[ | ||
ValidationFailure( | ||
id=0, | ||
metric="response.toxicity", | ||
details="Value 0.9642418622970581 is above threshold 0.4", | ||
value=0.9642418622970581, | ||
upper_threshold=0.4, | ||
lower_threshold=None, | ||
) | ||
], | ||
) | ||
|
||
assert actual == expected | ||
|
||
|
||
def test_upper_case_letters_prompt_131(client: AuthenticatedClient): | ||
request = LLMValidateRequest( | ||
prompt="...", # <1 upper case letters | ||
response="I'm sorry you feel that way.", | ||
dataset_id="model-62", | ||
dataset_id="model-131", | ||
) | ||
|
||
response = ValidateLLM.sync_detailed(client=client, json_body=request) | ||
response = ValidateLLM.sync_detailed(client=client, body=request) | ||
|
||
if not isinstance(response.parsed, ValidationReport): | ||
if not isinstance(response.parsed, ValidationResult): | ||
raise Exception(f"Failed to validate data. Status code: {response.status_code}. {response.parsed}") | ||
|
||
report: ValidationReport = response.parsed | ||
|
||
expected = ValidationReport( | ||
failures=[ | ||
ValidationMetadata( | ||
prompt_id="---", | ||
validator_name="toxicity_validator", | ||
failed_metric="toxicity_prompt", | ||
value=0.9417606592178345, | ||
timestamp=None, | ||
is_valid=False, | ||
actual: ValidationResult = response.parsed | ||
|
||
expected = ValidationResult( | ||
report=[ | ||
ValidationFailure( | ||
id=0, | ||
metric="prompt.upper_case_char_count", | ||
details="Value 0 is below threshold 1", | ||
value=0, | ||
upper_threshold=None, | ||
lower_threshold=1.0, | ||
) | ||
] | ||
], | ||
) | ||
|
||
assert len(report.failures) == 1 | ||
assert actual == expected | ||
|
||
|
||
def test_upper_case_letters_prompt_reading_ease_response_131(client: AuthenticatedClient): | ||
response = ( | ||
"Playing games has always been thought to be important to " | ||
"the development of well-balanced and creative children; " | ||
"however, what part, if any, they should play in the lives " | ||
"of adults has never been researched that deeply. I believe " | ||
"that playing games is every bit as important for adults " | ||
"as for children. Not only is taking time out to play games " | ||
"with our children and other adults valuable to building " | ||
"interpersonal relationships but is also a wonderful way " | ||
"to release built up tension." | ||
) | ||
|
||
request = LLMValidateRequest( | ||
prompt="...", # <1 upper case letters | ||
response=response, | ||
dataset_id="model-131", | ||
) | ||
|
||
response = ValidateLLM.sync_detailed(client=client, body=request) | ||
|
||
if not isinstance(response.parsed, ValidationResult): | ||
raise Exception(f"Failed to validate data. Status code: {response.status_code}. {response.parsed}") | ||
|
||
actual: ValidationResult = response.parsed | ||
|
||
expected = ValidationResult( | ||
report=[ | ||
ValidationFailure( | ||
id=0, | ||
metric="prompt.upper_case_char_count", | ||
details="Value 0 is below threshold 1", | ||
value=0, | ||
upper_threshold=None, | ||
lower_threshold=1.0, | ||
), | ||
ValidationFailure( | ||
id=0, | ||
metric="response.flesch_reading_ease", | ||
details="Value 52.23 is below threshold 70.0", | ||
value=52.23, | ||
upper_threshold=None, | ||
lower_threshold=70.0, | ||
), | ||
], | ||
) | ||
|
||
actual = report.failures[0] | ||
assert actual == expected | ||
|
||
|
||
def test_prompt_sentiment_133(client: AuthenticatedClient): | ||
request = LLMValidateRequest( | ||
prompt="Ugh, this is way too hard...", | ||
response="I'm sorry you feel that way.", | ||
dataset_id="model-133", | ||
) | ||
|
||
response = ValidateLLM.sync_detailed(client=client, body=request) | ||
|
||
if not isinstance(response.parsed, ValidationResult): | ||
raise Exception(f"Failed to validate data. Status code: {response.status_code}. {response.parsed}") | ||
|
||
actual: ValidationResult = response.parsed | ||
|
||
expected = ValidationResult( | ||
report=[ | ||
ValidationFailure( | ||
id=0, | ||
metric="prompt.sentiment_polarity", | ||
details="Value -0.4215 is below threshold 0", | ||
value=-0.4215, | ||
upper_threshold=None, | ||
lower_threshold=0.0, | ||
) | ||
], | ||
) | ||
|
||
assert actual == expected | ||
|
||
|
||
def test_response_lower_case_133(client: AuthenticatedClient): | ||
request = LLMValidateRequest( | ||
prompt="Hello!", | ||
response="I'M SORRY YOU FEEL THAT WAY.", | ||
dataset_id="model-133", | ||
) | ||
|
||
response = ValidateLLM.sync_detailed(client=client, body=request) | ||
|
||
if not isinstance(response.parsed, ValidationResult): | ||
raise Exception(f"Failed to validate data. Status code: {response.status_code}. {response.parsed}") | ||
|
||
actual: ValidationResult = response.parsed | ||
|
||
expected = ValidationResult( | ||
report=[ | ||
ValidationFailure( | ||
id=0, | ||
metric="response.lower_case_char_count", | ||
details="Value 0 is below threshold 10", | ||
value=0, | ||
upper_threshold=None, | ||
lower_threshold=10.0, | ||
) | ||
], | ||
) | ||
|
||
assert actual.validator_name == expected.failures[0].validator_name | ||
assert actual.failed_metric == expected.failures[0].failed_metric | ||
assert actual.value == expected.failures[0].value | ||
assert actual.timestamp is None | ||
assert actual.is_valid == expected.failures[0].is_valid | ||
assert actual == expected |
Oops, something went wrong.