Skip to content
This repository has been archived by the owner on Sep 24, 2024. It is now read-only.

Fixes flakey integration test/syncs deps #114

Merged
merged 8 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/merge_checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
id: ruff
run: |
. .venv/bin/activate
ruff --output-format=github .
ruff check --output-format=github .
continue-on-error: false

tests:
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ repos:
- id: requirements-txt-fixer
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.2.1
rev: v0.5.6
hooks:
- id: ruff
args: [--exit-non-zero-on-fix]
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.2.1
rev: v0.5.6
hooks:
- id: ruff-format
21 changes: 19 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,23 @@ development dependencies.
You can execute Ruff by calling `ruff --fix .` or `ruff format .` from the workspace root.
Ruff will pick up the configuration defined in the `pyproject.toml` file automatically.


## Updating deps
Update deps in `pyproject.toml` and ensure you can `pip-compile` them as follows:

pip-tools:
```
pip install pip-tools
pip-compile -o requirements.lock --all-extras pyproject.toml
```

uv:
```
pip install uv
uv pip compile --python-version=3.11.9 --all-extras --universal -o requirements.lock pyproject.toml
```


## Testing a development branch on Ray

LM Buddy is intended to be installed as a pip requirement in the runtime environment of a Ray job.
Expand All @@ -40,11 +57,11 @@ as shown below:
```
# pip-tools
pip install pip-tools
pip-compile -o requirements.txt pyproject.toml
pip-compile -o requirements.txt --all-extras pyproject.toml

# uv
pip install uv
uv pip compile -o requirements.txt pyproject.toml
uv pip compile --python-version=3.11.9 --all-extras --universal -o requirements.txt pyproject.toml
```

2. When submitting a job to a Ray cluster, specify in the Ray runtime environment the following:
Expand Down
48 changes: 23 additions & 25 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "lm-buddy"
version = "0.12.1"
version = "0.13.0"
authors = [
{ name = "Sean Friedowitz", email = "[email protected]" },
{ name = "Aaron Gonzales", email = "[email protected]" },
Expand All @@ -28,45 +28,43 @@ dependencies = [
"urllib3>=1.26.18,<2",
"pydantic>=2.6.4",
"pydantic-yaml>=1.2.0",
binaryaaron marked this conversation as resolved.
Show resolved Hide resolved
"pydantic==2.6.4",
"pydantic-yaml==1.2.0",
"ray[default]==2.30.0",
"loguru==0.7.2",
"s3fs",
"s3fs==2024.5.0",
binaryaaron marked this conversation as resolved.
Show resolved Hide resolved
]

[project.optional-dependencies]
ruff = ["ruff==0.2.1"]
ruff = ["ruff==0.5.5"]
jobs = [
# HuggingFace / pytorch
"torch==2.3.1",
"datasets>=2.17.1",
"transformers==4.38.0",
"accelerate==0.26.1",
"peft==0.7.1",
"trl==0.7.10",
"torch==2.4.0",
"datasets>=2.20.0",
"transformers==4.43.4",
"accelerate==0.33.0",
"peft==0.12.0",
"trl==0.9.6",
"bitsandbytes==0.42.0",
"bert_score==0.3.13",
# Evaluation frameworks
"lm-eval==0.4.2",
"einops==0.7.0",
"openai==1.14.3",
"ragas==0.1.5",
"langchain-community==0.0.29",
"langchain_openai==0.1.1",
"lm-eval==0.4.3",
"einops==0.8.0",
"openai==1.38.0",
"ragas==0.1.12",
"langchain-community==0.2.11",
"langchain_openai==0.1.20",
"sentencepiece==0.2.0",
"evaluate==0.4.2",
"mistralai==0.4.2",
]
test = ["pytest==7.4.3", "pytest-cov==4.1.0"]
test = ["pytest==8.3.2", "pytest-cov==5.0.0"]
docs = [
"Sphinx==7.2.6",
"nbsphinx==0.9.3",
"sphinx-codeautolink==0.15.0",
"myst-parser==2.0.0",
"furo==2024.1.29",
"Sphinx==7.4.7",
"nbsphinx==0.9.4",
"sphinx-codeautolink==0.15.2",
"myst-parser==3.0.1",
"furo==2024.7.18",
]
dev = ["lm-buddy[jobs,ruff,test,docs]", "pre-commit==3.6.0", "jupyter==1.0.0"]
dev = ["lm-buddy[jobs,ruff,test,docs]", "pre-commit==3.8.0", "jupyter>=1.0.0"]

[project.scripts]
lm-buddy = "lm_buddy.cli:cli"
Expand All @@ -82,7 +80,7 @@ addopts = "-v --cov src --no-cov-on-fail --disable-warnings"
testpaths = ["tests"]

[tool.ruff]
target-version = "py310"
target-version = "py311"

exclude = [
".bzr",
Expand Down
2 changes: 1 addition & 1 deletion src/lm_buddy/configs/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def validate_adapter_args(cls, config: "AdapterConfig"):
# Filter fields to those found on the PeftConfig
extra_fields = config.model_fields_set.difference(allowed_fields)
if extra_fields:
raise ValueError(f"Unknowon arguments for {peft_type} adapter: {extra_fields}")
raise ValueError(f"Unknown arguments for {peft_type} adapter: {extra_fields}")

return config

Expand Down
2 changes: 1 addition & 1 deletion src/lm_buddy/jobs/evaluation/hf_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def run_eval(config: HuggingFaceEvalJobConfig) -> Path:
dataset_iterable = tqdm(input_samples) if config.evaluation.enable_tqdm else input_samples

# Choose which model client to use
if type(config.model) == VLLMCompletionsConfig:
if isinstance(config.model, VLLMCompletionsConfig):
model_name = config.model.inference.base_url
output_model_name = config.model.inference.engine
if "mistral" in model_name:
Expand Down
2 changes: 1 addition & 1 deletion src/lm_buddy/jobs/evaluation/lm_harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def load_harness_model(config: LMHarnessJobConfig) -> HFLM | OpenaiCompletionsLM
tokenizer=model_path,
peft=peft_path,
device="cuda" if torch.cuda.device_count() > 0 else "cpu",
trust_remote_code=config.model.trust_remote_code,
trust_remote_code=model_config.trust_remote_code,
dtype=config.model.torch_dtype if config.model.torch_dtype else "auto",
**quantization_kwargs,
)
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/test_lm_harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
def job_config(llm_model_path) -> LMHarnessJobConfig:
model_config = AutoModelConfig(path=format_file_path(llm_model_path))
tracking_config = WandbRunConfig(project="test-project")
evaluation_config = LMHarnessEvaluationConfig(tasks=["hellaswag"], limit=5)
evaluation_config = LMHarnessEvaluationConfig(tasks=["glue"], limit=5)
return LMHarnessJobConfig(
name="test-job",
model=model_config,
Expand All @@ -23,5 +23,5 @@ def job_config(llm_model_path) -> LMHarnessJobConfig:
def test_lm_harness_job(job_config):
buddy = LMBuddy()
result = buddy.evaluate(job_config)
assert len(result.tables) == 1 # One table for hellaswag
assert len(result.tables) == 10
assert len(result.artifacts) == 1 # One table artifact
Loading