mozilla-ai · aittalam · Aug 6, 2024 · Aug 6, 2024 · Aug 6, 2024 · Aug 6, 2024
diff --git a/.github/workflows/merge_checks.yaml b/.github/workflows/merge_checks.yaml
@@ -40,7 +40,7 @@ jobs:
         id: ruff
         run: |
           . .venv/bin/activate
-          ruff --output-format=github .
+          ruff check --output-format=github .
         continue-on-error: false
 
   tests:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -14,12 +14,12 @@ repos:
       - id: requirements-txt-fixer
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.2.1
+    rev: v0.5.6
     hooks:
       - id: ruff
         args: [--exit-non-zero-on-fix]
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.2.1
+    rev: v0.5.6
     hooks:
       - id: ruff-format
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -21,6 +21,23 @@ development dependencies.
 You can execute Ruff by calling `ruff --fix .` or `ruff format .` from the workspace root.
 Ruff will pick up the configuration defined in the `pyproject.toml` file automatically.
 
+
+## Updating deps
+Update deps in `pyproject.toml` and ensure you can `pip-compile` them as follows:
+
+pip-tools:
+```
+pip install pip-tools
+pip-compile -o requirements.lock --all-extras pyproject.toml
+```
+
+uv:
+```
+pip install uv
+uv pip compile --python-version=3.11.9 --all-extras --universal -o requirements.lock pyproject.toml
+```
+
+
 ## Testing a development branch on Ray
 
 LM Buddy is intended to be installed as a pip requirement in the runtime environment of a Ray job.
@@ -40,11 +57,11 @@ as shown below:
     ```
     # pip-tools
     pip install pip-tools
-    pip-compile -o requirements.txt pyproject.toml
+    pip-compile -o requirements.txt --all-extras pyproject.toml
 
     # uv
     pip install uv
-    uv pip compile -o requirements.txt pyproject.toml
+    uv pip compile --python-version=3.11.9 --all-extras --universal -o requirements.txt pyproject.toml
     ```
 
 2. When submitting a job to a Ray cluster, specify in the Ray runtime environment the following:

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "lm-buddy"
-version = "0.12.1"
+version = "0.13.0"
 authors = [
     { name = "Sean Friedowitz", email = "[email protected]" },
     { name = "Aaron Gonzales", email = "[email protected]" },
@@ -28,45 +28,43 @@ dependencies = [
     "urllib3>=1.26.18,<2",
     "pydantic>=2.6.4",
     "pydantic-yaml>=1.2.0",
-    "pydantic==2.6.4",
-    "pydantic-yaml==1.2.0",
     "ray[default]==2.30.0",
     "loguru==0.7.2",
-    "s3fs",
+    "s3fs==2024.5.0",
 ]
 
 [project.optional-dependencies]
-ruff = ["ruff==0.2.1"]
+ruff = ["ruff==0.5.5"]
 jobs = [
     # HuggingFace / pytorch
-    "torch==2.3.1",
-    "datasets>=2.17.1",
-    "transformers==4.38.0",
-    "accelerate==0.26.1",
-    "peft==0.7.1",
-    "trl==0.7.10",
+    "torch==2.4.0",
+    "datasets>=2.20.0",
+    "transformers==4.43.4",
+    "accelerate==0.33.0",
+    "peft==0.12.0",
+    "trl==0.9.6",
     "bitsandbytes==0.42.0",
     "bert_score==0.3.13",
     # Evaluation frameworks
-    "lm-eval==0.4.2",
-    "einops==0.7.0",
-    "openai==1.14.3",
-    "ragas==0.1.5",
-    "langchain-community==0.0.29",
-    "langchain_openai==0.1.1",
+    "lm-eval==0.4.3",
+    "einops==0.8.0",
+    "openai==1.38.0",
+    "ragas==0.1.12",
+    "langchain-community==0.2.11",
+    "langchain_openai==0.1.20",
     "sentencepiece==0.2.0",
     "evaluate==0.4.2",
     "mistralai==0.4.2",
 ]
-test = ["pytest==7.4.3", "pytest-cov==4.1.0"]
+test = ["pytest==8.3.2", "pytest-cov==5.0.0"]
 docs = [
-    "Sphinx==7.2.6",
-    "nbsphinx==0.9.3",
-    "sphinx-codeautolink==0.15.0",
-    "myst-parser==2.0.0",
-    "furo==2024.1.29",
+    "Sphinx==7.4.7",
+    "nbsphinx==0.9.4",
+    "sphinx-codeautolink==0.15.2",
+    "myst-parser==3.0.1",
+    "furo==2024.7.18",
 ]
-dev = ["lm-buddy[jobs,ruff,test,docs]", "pre-commit==3.6.0", "jupyter==1.0.0"]
+dev = ["lm-buddy[jobs,ruff,test,docs]", "pre-commit==3.8.0", "jupyter>=1.0.0"]
 
 [project.scripts]
 lm-buddy = "lm_buddy.cli:cli"
@@ -82,7 +80,7 @@ addopts = "-v --cov src --no-cov-on-fail --disable-warnings"
 testpaths = ["tests"]
 
 [tool.ruff]
-target-version = "py310"
+target-version = "py311"
 
 exclude = [
     ".bzr",

diff --git a/src/lm_buddy/configs/huggingface.py b/src/lm_buddy/configs/huggingface.py
@@ -100,7 +100,7 @@ def validate_adapter_args(cls, config: "AdapterConfig"):
         # Filter fields to those found on the PeftConfig
         extra_fields = config.model_fields_set.difference(allowed_fields)
         if extra_fields:
-            raise ValueError(f"Unknowon arguments for {peft_type} adapter: {extra_fields}")
+            raise ValueError(f"Unknown arguments for {peft_type} adapter: {extra_fields}")
 
         return config
 

diff --git a/src/lm_buddy/jobs/evaluation/hf_evaluate.py b/src/lm_buddy/jobs/evaluation/hf_evaluate.py
@@ -102,7 +102,7 @@ def run_eval(config: HuggingFaceEvalJobConfig) -> Path:
     dataset_iterable = tqdm(input_samples) if config.evaluation.enable_tqdm else input_samples
 
     # Choose which model client to use
-    if type(config.model) == VLLMCompletionsConfig:
+    if isinstance(config.model, VLLMCompletionsConfig):
         model_name = config.model.inference.base_url
         output_model_name = config.model.inference.engine
         if "mistral" in model_name:

diff --git a/src/lm_buddy/jobs/evaluation/lm_harness.py b/src/lm_buddy/jobs/evaluation/lm_harness.py
@@ -50,7 +50,7 @@ def load_harness_model(config: LMHarnessJobConfig) -> HFLM | OpenaiCompletionsLM
                 tokenizer=model_path,
                 peft=peft_path,
                 device="cuda" if torch.cuda.device_count() > 0 else "cpu",
-                trust_remote_code=config.model.trust_remote_code,
+                trust_remote_code=model_config.trust_remote_code,
                 dtype=config.model.torch_dtype if config.model.torch_dtype else "auto",
                 **quantization_kwargs,
             )

diff --git a/tests/integration/test_lm_harness.py b/tests/integration/test_lm_harness.py
@@ -11,7 +11,7 @@
 def job_config(llm_model_path) -> LMHarnessJobConfig:
     model_config = AutoModelConfig(path=format_file_path(llm_model_path))
     tracking_config = WandbRunConfig(project="test-project")
-    evaluation_config = LMHarnessEvaluationConfig(tasks=["hellaswag"], limit=5)
+    evaluation_config = LMHarnessEvaluationConfig(tasks=["glue"], limit=5)
     return LMHarnessJobConfig(
         name="test-job",
         model=model_config,
@@ -23,5 +23,5 @@ def job_config(llm_model_path) -> LMHarnessJobConfig:
 def test_lm_harness_job(job_config):
     buddy = LMBuddy()
     result = buddy.evaluate(job_config)
-    assert len(result.tables) == 1  # One table for hellaswag
+    assert len(result.tables) == 10
     assert len(result.artifacts) == 1  # One table artifact