Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
alexeichhorn committed Oct 25, 2023
0 parents commit c859f31
Show file tree
Hide file tree
Showing 26 changed files with 1,790 additions and 0 deletions.
164 changes: 164 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
*.ipynb
temp.py


# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
36 changes: 36 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"editor.formatOnSave": true,
"search.followSymlinks": false,
"files.trimTrailingWhitespace": true,
"files.trimFinalNewlines": true,
"files.insertFinalNewline": true,
"python.analysis.typeCheckingMode": "basic",
"python.linting.pylintEnabled": true,
"python.linting.pylintArgs": [
"--disable=C0111",
"--disable=no-name-in-module",
"--max-line-length=120",
"--rcfile",
"${workspaceFolder}/setup.cfg"
],
"isort.args": ["-l", "140"],
"editor.defaultFormatter": "esbenp.prettier-vscode",
"python.formatting.provider": "black",
"python.formatting.blackArgs": ["--line-length", "140"],
"python.linting.enabled": true,
"python.testing.pytestEnabled": true,
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.codeActionsOnSave": {
"source.organizeImports": false
}
},
"python.analysis.extraPaths": [],
"search.exclude": {
"**/.vscode": true,
"**/.git": true
},
"editor.codeActionsOnSave": {
"source.fixAll": true
},
}
3 changes: 3 additions & 0 deletions llm_lib/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .base import BaseLLMResponse
from .fields import LLMArrayOutput, LLMOutput
from .prompt_definition.prompt_template import PromptTemplate
127 changes: 127 additions & 0 deletions llm_lib/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
from typing import TYPE_CHECKING, Any, ClassVar, TypeVar

from .exceptions import LLMOutputFieldMissing, LLMOutputFieldWrongType
from .fields import ClassPlaceholder, LLMArrayOutputInfo, LLMFieldInfo, LLMOutputInfo
from .meta import LLMMeta
from .parser import Parser
from .utils.type_checker import SupportedBaseTypes, array_item_type

if TYPE_CHECKING:
from inspect import Signature


class BaseLLMResponse(metaclass=LLMMeta):
if TYPE_CHECKING:
# populated by the metaclass (ClassPlaceholder used to prevent showing up as type suggestion)
__fields__: ClassVar[dict[str, LLMFieldInfo]] = ClassPlaceholder(init=False, value={})
__signature__: ClassVar["Signature"] = ClassPlaceholder(init=False)

def __init__(self, **data: Any):
# print(data)
data = self._prepare_and_validate_dict(data)
self.__dict__.update(data)

# don't allow setting of fields that aren't defined in IDE
if not TYPE_CHECKING:

def __setattr__(self, __name: str, __value: Any) -> None:
# print(f"Setting {__name} to {__value}")

__value = self._prepare_and_validate_field(__name, __value) # throws error if invalid

super().__setattr__(__name, __value)

def __repr__(self) -> str:
attrs = ", ".join(f"{k}={v}" for k, v in self.__dict__.items())
return f"{self.__class__.__name__}({attrs})"

def _prepare_field_value(self, value: Any, _type: type) -> Any:
"""Converts single values from string to their type, otherwise leaves as is"""
if _type == str:
return value

if isinstance(value, _type):
return value

if isinstance(value, str):
value = value.strip()
if _type == int:
try:
return int(value)
except ValueError:
raise LLMOutputFieldWrongType(f'"{value}" is not a valid integer value')
elif _type == float:
try:
return float(value)
except ValueError:
raise LLMOutputFieldWrongType(f'"{value}" is not a valid float value')
elif _type == bool:
if value.lower() in ("true", "yes", "1"):
return True
elif value.lower() in ("false", "no", "0"):
return False
else:
raise LLMOutputFieldWrongType(f'"{value}" is not a valid boolean value')

return value

def _prepare_and_validate_field(self, __name: str, __value: Any) -> Any:
if __name not in self.__fields__:
raise ValueError(f'"{self.__class__.__name__}" object has no field "{__name}"')

field_info = self.__fields__[__name]
if isinstance(field_info.info, LLMOutputInfo):
__value = self._prepare_field_value(__value, field_info.type_)

if __value is None and field_info.info.required:
raise TypeError(f'"{self.__class__.__name__}" field "{__name}" is required')
if not isinstance(__value, field_info.type_):
raise LLMOutputFieldWrongType(f'"{self.__class__.__name__}" field "{__name}" must be of type {field_info.type_}')

elif isinstance(field_info.info, LLMArrayOutputInfo):
item_type = array_item_type(field_info.type_)

if not isinstance(__value, list):
raise TypeError(f'"{self.__class__.__name__}" field "{__name}" must be a list')
if field_info.info.min_count is not None and len(__value) < field_info.info.min_count:
raise ValueError(f'"{self.__class__.__name__}" field "{__name}" must have at least {field_info.info.min_count} items')
if field_info.info.max_count is not None and len(__value) > field_info.info.max_count:
raise ValueError(f'"{self.__class__.__name__}" field "{__name}" must have at most {field_info.info.max_count} items')

__value = [self._prepare_field_value(v, item_type) for v in __value]
if not all(isinstance(v, item_type) for v in __value):
raise LLMOutputFieldWrongType(f'"{self.__class__.__name__}" field "{__name}" must be a list of type {field_info.type_}')

return __value

def _prepare_and_validate_dict(self, __values: dict[str, Any]) -> dict[str, Any]:
# check every field if it's valid
for k, v in __values.items():
__values[k] = self._prepare_and_validate_field(k, v)

# check every field not in dict, if it's required
for field in self.__fields__.values():
if field.key in __values:
continue

if isinstance(field.info, LLMOutputInfo):
if field.info.required:
raise ValueError(f'"{self.__class__.__name__}" field "{field.key}" is required')
else:
__values[field.key] = field.info.default

elif isinstance(field.info, LLMArrayOutputInfo):
if field.info.min_count > 0:
raise ValueError(f'"{self.__class__.__name__}" field "{field.key}" requires at least {field.info.min_count} items')
else:
__values[field.key] = []

return __values

# - Parsing

_Self = TypeVar("_Self", bound="BaseLLMResponse") # backward compatibility for pre-Python 3.12

@classmethod
def parse_response(cls: type[_Self], response: str) -> _Self:
return Parser(cls).parse(response)
39 changes: 39 additions & 0 deletions llm_lib/example_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from typing import Any, Iterable


class LimitedExampleListFormatter:
"""
The LimitedExampleListFormatter class converts a list of objects into a string representation,
limiting the number of displayed elements. If the number of elements exceeds the limit,
it includes a skip separator between the displayed and omitted elements.
Attributes:
max_examples (int): The maximum number of elements to display in the string.
separator (str): The separator used between the displayed elements.
skip_separator (str, optional): The separator used between the displayed and omitted elements. Defaults to "...".
Examples:
formatter = LimitedExampleListFormatter(3, ', ')
print(formatter.format(['apple', 'banana', 'cherry', 'date', 'elderberry']))
# Output: "apple, banana, ..., elderberry"
"""

def __init__(self, max_examples: int, separator: str, skip_separator: str = "...", examples_after_separtor: int = 1):
self.max_examples = max_examples
self.separator = separator
self.skip_separator = skip_separator
self.examples_after_separtor = examples_after_separtor
assert self.max_examples > self.examples_after_separtor, "max_examples must be greater than examples_after_separtor"

def format(self, items: Iterable[Any]) -> str:
str_items = [str(item) for item in items]

if len(str_items) <= self.max_examples:
return self.separator.join(str_items)

# We want to show first n-1 examples, then skip separator, then last example
return self.separator.join(
str_items[: self.max_examples - self.examples_after_separtor]
+ [self.skip_separator]
+ str_items[-self.examples_after_separtor :]
)
14 changes: 14 additions & 0 deletions llm_lib/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
class LLMException(Exception):
...


class LLMTokenLimitExceeded(LLMException):
...


class LLMOutputFieldMissing(LLMException):
...


class LLMOutputFieldWrongType(LLMException):
...
Loading

0 comments on commit c859f31

Please sign in to comment.