diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index df6f360c..6371d585 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -22,7 +22,7 @@ If you have an active Conda environment, Poetry should recognize it during insta and install the package dependencies there. This hasn't been explicitly tested with other virtual python environments, but will likely work. -Alternatively, you can use poetry's own environment by running +Alternatively, you can use Poetry's own environment by running ``` poetry lock poetry env use python3.10 @@ -30,10 +30,6 @@ poetry install ``` where `python3.10` is your python interpreter. -The `pyproject.toml` file defines dependency groups for the logical job types in the package. -Individual dependency groups can be installed by running -`poetry install --with ,` or `poetry install --only `. - ## Code style This repository uses [Ruff](https://docs.astral.sh/ruff/) for Python formatting and linting. @@ -53,10 +49,11 @@ local development branch of the `lm-buddy` repo. To do so, follow the steps: -1. Export a copy of the package dependencies by running the following command, which will create a `requirements.txt` file in the `lm-buddy` repository. This will contain the dependencies for the `finetuning` and `evaluation` job groups: +1. Export a copy of the package dependencies by running the following command, which will create a `requirements.txt` file in the `lm-buddy` repository. +This will contain all non-development dependencies for the package: ``` - poetry export --without-hashes --with finetuning,evaluation -o requirements.txt + poetry export --without-hashes -o requirements.txt ``` 2. When submitting a job to a Ray cluster, specify in the Ray runtime environment the following: @@ -109,7 +106,7 @@ poetry publish --repository testpypi --dry-run --build poetry publish --repository testpypi --build ``` -### Publish to PyPi +### Publish to PyPI When you're ready, run: diff --git a/examples/notebooks/dev_workflow.ipynb b/examples/notebooks/dev_ray_submission.ipynb similarity index 88% rename from examples/notebooks/dev_workflow.ipynb rename to examples/notebooks/dev_ray_submission.ipynb index 183b491f..6f6e2fac 100644 --- a/examples/notebooks/dev_workflow.ipynb +++ b/examples/notebooks/dev_ray_submission.ipynb @@ -5,12 +5,33 @@ "id": "123e34e9-70f8-42ab-b790-b59ddc01b1f3", "metadata": {}, "source": [ - "# Notebook Development Workflow" + "# Development Ray submission" ] }, { "cell_type": "markdown", - "id": "fcd5240e", + "id": "8a4fc01e", + "metadata": {}, + "source": [ + "Generally, `lm-buddy` is installed as a pip requirement in the runtime environment of the Ray job.\n", + "During development, however, it can be helpful to execute a job from a local branch \n", + "that has not been published to PyPI.\n", + "\n", + "This example notebook shows how to bypass the pip requirements section of the Ray runtime environment\n", + "and instead upload a local copy of the `lm_buddy` Python module directly to Ray." + ] + }, + { + "cell_type": "markdown", + "id": "5518ab35", + "metadata": {}, + "source": [ + "## File-based submission" + ] + }, + { + "cell_type": "markdown", + "id": "ae7c26d9", "metadata": {}, "source": [ "This demonstrates the basic workflow for submitting an LM Buddy job to Ray\n", @@ -22,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "fd3e4db3-829b-495f-9864-7567bd2ac0ce", "metadata": {}, "outputs": [], @@ -91,17 +112,15 @@ "client.submit_job(\n", " entrypoint=f\"python -m lm_buddy run simple --config simple_config.yaml\",\n", " runtime_env=runtime_env,\n", - ")\n" + ")" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "2f7ccdfd-0b09-47e5-b670-45c614dd4bd8", + "cell_type": "markdown", + "id": "ff88c2f6", "metadata": {}, - "outputs": [], "source": [ - "# Iterative Submission with Temp Config File" + "## Iterative submission with temporary config files" ] }, { @@ -213,7 +232,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.0" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/examples/notebooks/direct_job_execution.ipynb b/examples/notebooks/direct_job_execution.ipynb new file mode 100644 index 00000000..077cda2a --- /dev/null +++ b/examples/notebooks/direct_job_execution.ipynb @@ -0,0 +1,207 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Direct job execution" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook illustrates how to use LM Buddy as a library to run jobs directly on the host machine.\n", + "\n", + "Jobs are fully specified by a `lm_buddy.jobs.configs.LMBuddyJobConfig` \n", + "and are executed with the `lm_buddy.run_job` method.\n", + "\n", + "**Warning**: This workflow is still considered experimental.\n", + "Some jobs depend on external services (e.g., W&B, Ray cluster) and host-machine GPU resources,\n", + "and may not work without a properly configured local environment." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import lm_buddy\n", + "from lm_buddy.jobs.configs import (\n", + " FinetuningJobConfig,\n", + " FinetuningRayConfig,\n", + " LMHarnessJobConfig,\n", + " LMHarnessEvaluatorConfig,\n", + ")\n", + "from lm_buddy.integrations.huggingface import (\n", + " HuggingFaceRepoConfig,\n", + " AutoModelConfig,\n", + " TextDatasetConfig,\n", + " TrainerConfig,\n", + " AdapterConfig,\n", + ")\n", + "from lm_buddy.integrations.wandb import WandbRunConfig" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Finetuning" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Base model to finetune from HuggingFace\n", + "model_config = AutoModelConfig(\n", + " load_from=HuggingFaceRepoConfig(repo_id=\"distilgpt2\"),\n", + ")\n", + "\n", + "# Text dataset for finetuning\n", + "dataset_config = TextDatasetConfig(\n", + " load_from=HuggingFaceRepoConfig(repo_id=\"imdb\"),\n", + " split=\"train[:100]\",\n", + " text_field=\"text\",\n", + ")\n", + "\n", + "# HuggingFace trainer arguments\n", + "trainer_config = TrainerConfig(\n", + " max_seq_length=256,\n", + " per_device_train_batch_size=8,\n", + " learning_rate=1e-4,\n", + " num_train_epochs=1,\n", + " logging_strategy=\"steps\",\n", + " logging_steps=1,\n", + " save_strategy=\"epoch\",\n", + " save_steps=1,\n", + ")\n", + "\n", + "# LORA adapter settings\n", + "adapter_config = AdapterConfig(\n", + " peft_type=\"LORA\",\n", + " task_type=\"CAUSAL_LM\",\n", + " r=8,\n", + " lora_alpha=16,\n", + " lora_dropout=0.2,\n", + ")\n", + "\n", + "# Define tracking for finetuning run\n", + "tracking_config = WandbRunConfig(\n", + " name=\"example-finetuning\",\n", + " project=\"lm-buddy-examples\", # Update to your project name\n", + " entity=\"mozilla-ai\", # Update to your entity name\n", + ")\n", + "\n", + "# Ray train settings\n", + "ray_config = FinetuningRayConfig(\n", + " use_gpu=False, # Change to True if GPUs are available on your machine\n", + " num_workers=2,\n", + ")\n", + "\n", + "# Full finetuning config\n", + "finetuning_config = FinetuningJobConfig(\n", + " model=model_config,\n", + " dataset=dataset_config,\n", + " trainer=trainer_config,\n", + " adapter=adapter_config,\n", + " tracking=tracking_config,\n", + " ray=ray_config,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Run the job\n", + "lm_buddy.run_job(finetuning_config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define the model to be evaluated\n", + "# In this case, loading directly a pretrained model from HuggingFace\n", + "model_config = AutoModelConfig(\n", + " load_from=HuggingFaceRepoConfig(repo_id=\"distilgpt2\"),\n", + ")\n", + "\n", + "# Define evaluation tasks and settings\n", + "evaluator_config = LMHarnessEvaluatorConfig(\n", + " tasks=[\"hellaswag\"],\n", + " limit=10, # Only run 10 samples per task. Remove for a real run.\n", + " num_fewshot=5,\n", + ")\n", + "\n", + "# Define tracking for eval run\n", + "tracking_config = WandbRunConfig(\n", + " name=\"example-lm-harness\",\n", + " project=\"lm-buddy-examples\", # Update to your project name\n", + " entity=\"mozilla-ai\", # Update to your entity name\n", + ")\n", + "\n", + "# Full lm-harness job config\n", + "lm_harness_config = LMHarnessJobConfig(\n", + " model=model_config,\n", + " evaluator=evaluator_config,\n", + " tracking=tracking_config,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Run the job\n", + "lm_buddy.run_job(lm_harness_config)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "lm-buddy", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pyproject.toml b/pyproject.toml index c14d4676..b3922c57 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "lm-buddy" -version = "0.1.0rc12" +version = "0.2.0" description = "Ray-centric library for finetuning and evaluation of (large) language models." repository = "https://github.com/mozilla-ai/lm-buddy" readme = "README.md" @@ -17,6 +17,7 @@ license = "Apache-2.0" packages = [{ include = "lm_buddy", from = "src" }] [tool.poetry.dependencies] +# Core python = ">=3.10,<3.11" click = "8.1.7" torch = "2.1.2" @@ -28,6 +29,18 @@ pydantic = "2.6.0" pydantic-yaml = "1.2.0" ray = { version = "2.9.1", extras = ["default"] } +# HuggingFace +datasets = "2.16.1" +transformers = "4.36.2" +accelerate = "0.26.1" +peft = "0.7.1" +trl = "0.7.10" +bitsandbytes = "0.42.0" + +# Evaluation frameworks +lm-eval = { version = "0.4.1", extras = ["openai"] } +einops = "0.7.0" + [tool.poetry.dev-dependencies] ruff = "0.2.1" pytest = "7.4.3" @@ -39,18 +52,6 @@ nbsphinx = "0.9.3" myst-parser = "2.0.0" recommonmark = "^0.7.1" -[tool.poetry.group.finetuning.dependencies] -datasets = "2.16.1" -transformers = "4.36.2" -accelerate = "0.26.1" -peft = "0.7.1" -trl = "0.7.10" -bitsandbytes = "0.42.0" - -[tool.poetry.group.evaluation.dependencies] -lm-eval = { version = "0.4.1", extras = ["openai"] } -einops = "0.7.0" - [tool.poetry.scripts] lm_buddy = "lm_buddy.cli:cli" diff --git a/src/lm_buddy/__init__.py b/src/lm_buddy/__init__.py index e69de29b..0f30f312 100644 --- a/src/lm_buddy/__init__.py +++ b/src/lm_buddy/__init__.py @@ -0,0 +1,3 @@ +from lm_buddy.jobs import run_job + +__all__ = ["run_job"] diff --git a/src/lm_buddy/cli/run.py b/src/lm_buddy/cli/run.py index 4ce6f70f..64d8ff90 100644 --- a/src/lm_buddy/cli/run.py +++ b/src/lm_buddy/cli/run.py @@ -1,6 +1,11 @@ import click -from lm_buddy.integrations.wandb import WandbArtifactLoader +import lm_buddy +from lm_buddy.jobs.configs import FinetuningJobConfig, LMHarnessJobConfig, SimpleJobConfig + +# TODO(RD2024-125): We should probably collapse all these commands into a single CLI command +# - Need to figure out best way to polymorphically deserialize the job config classes +# - Do we just add type discriminators at the job config level? @click.group(name="run", help="Run an LM Buddy job.") @@ -11,27 +16,19 @@ def group(): @group.command("simple", help="Run the simple test job.") @click.option("--config", type=str) def run_simple(config: str) -> None: - from lm_buddy.jobs.simple import SimpleJobConfig, run_simple - config = SimpleJobConfig.from_yaml_file(config) - run_simple(config) + lm_buddy.run_job(config) @group.command("finetuning", help="Run the HuggingFace LLM finetuning job.") @click.option("--config", type=str) def run_finetuning(config: str) -> None: - from lm_buddy.jobs.finetuning import FinetuningJobConfig, run_finetuning - config = FinetuningJobConfig.from_yaml_file(config) - artifact_loader = WandbArtifactLoader() - run_finetuning(config, artifact_loader) + lm_buddy.run_job(config) @group.command("lm-harness", help="Run the lm-harness evaluation job.") @click.option("--config", type=str) def run_lm_harness(config: str) -> None: - from lm_buddy.jobs.lm_harness import LMHarnessJobConfig, run_lm_harness - config = LMHarnessJobConfig.from_yaml_file(config) - artifact_loader = WandbArtifactLoader() - run_lm_harness(config, artifact_loader) + lm_buddy.run_job(config) diff --git a/src/lm_buddy/cli/schema.py b/src/lm_buddy/cli/schema.py index 06bb5cd3..d33ad25c 100644 --- a/src/lm_buddy/cli/schema.py +++ b/src/lm_buddy/cli/schema.py @@ -1,5 +1,9 @@ +import json + import click +from lm_buddy.jobs.configs import FinetuningJobConfig, LMHarnessJobConfig, SimpleJobConfig + @click.group(name="schema", help="Get a job configuration schema.") def group(): @@ -8,23 +12,17 @@ def group(): @group.command("simple", help="Schema for the simple test job configuration.") def schema_simple() -> None: - from lm_buddy.jobs.simple import SimpleJobConfig - - schema_json = SimpleJobConfig.schema_json(indent=2) - click.secho(schema_json) + schema = SimpleJobConfig.model_json_schema() + click.secho(json.dumps(schema, indent=2)) @group.command("finetuning", help="Schema for the finetuning job configuration.") def schema_finetuning() -> None: - from lm_buddy.jobs.finetuning import FinetuningJobConfig - - schema_json = FinetuningJobConfig.schema_json(indent=2) - click.secho(schema_json) + schema = FinetuningJobConfig.model_json_schema() + click.secho(json.dumps(schema, indent=2)) @group.command("lm-harness", help="Schema for the lm-harness job configuration.") def schema_lm_harness() -> None: - from lm_buddy.jobs.lm_harness import LMHarnessJobConfig - - schema_json = LMHarnessJobConfig.schema_json(indent=2) - click.secho(schema_json) + schema = LMHarnessJobConfig.model_json_schema() + click.secho(json.dumps(schema, indent=2)) diff --git a/src/lm_buddy/jobs/__init__.py b/src/lm_buddy/jobs/__init__.py index e69de29b..68aeec2d 100644 --- a/src/lm_buddy/jobs/__init__.py +++ b/src/lm_buddy/jobs/__init__.py @@ -0,0 +1,30 @@ +from lm_buddy.integrations.wandb import ArtifactLoader, WandbArtifactLoader +from lm_buddy.jobs._entrypoints import run_finetuning, run_lm_harness, run_simple +from lm_buddy.jobs.configs import ( + FinetuningJobConfig, + LMBuddyJobConfig, + LMHarnessJobConfig, + SimpleJobConfig, +) + + +def run_job( + config: LMBuddyJobConfig, + artifact_loader: ArtifactLoader = WandbArtifactLoader(), +) -> None: + """Run an LM Buddy job from the configuration. + + Args: + config (LMBuddyJobConfig): Configuration defining the job to run. + artifact_loader (ArtifactLoader): Implementation of the artifact loader protocol. + Defaults to WandbArtifactLoader(). + """ + match config: + case SimpleJobConfig() as simple_config: + run_simple(simple_config, artifact_loader) + case FinetuningJobConfig() as finetuning_config: + run_finetuning(finetuning_config, artifact_loader) + case LMHarnessJobConfig() as lm_harness_config: + run_lm_harness(lm_harness_config, artifact_loader) + case _: + raise ValueError(f"Received invalid job configuration: {config}") diff --git a/src/lm_buddy/jobs/_entrypoints/__init__.py b/src/lm_buddy/jobs/_entrypoints/__init__.py new file mode 100644 index 00000000..bef03bac --- /dev/null +++ b/src/lm_buddy/jobs/_entrypoints/__init__.py @@ -0,0 +1,5 @@ +from lm_buddy.jobs._entrypoints.finetuning import run_finetuning +from lm_buddy.jobs._entrypoints.lm_harness import run_lm_harness +from lm_buddy.jobs._entrypoints.simple import run_simple + +__all__ = ["run_finetuning", "run_lm_harness", "run_simple"] diff --git a/src/lm_buddy/jobs/finetuning/entrypoint.py b/src/lm_buddy/jobs/_entrypoints/finetuning.py similarity index 97% rename from src/lm_buddy/jobs/finetuning/entrypoint.py rename to src/lm_buddy/jobs/_entrypoints/finetuning.py index 3c7e337f..31092410 100644 --- a/src/lm_buddy/jobs/finetuning/entrypoint.py +++ b/src/lm_buddy/jobs/_entrypoints/finetuning.py @@ -17,8 +17,8 @@ default_artifact_name, wandb_init_from_config, ) -from lm_buddy.jobs.finetuning import FinetuningJobConfig -from lm_buddy.jobs.utils import LMBuddyJobType +from lm_buddy.jobs.common import LMBuddyJobType +from lm_buddy.jobs.configs import FinetuningJobConfig def is_tracking_enabled(config: FinetuningJobConfig): diff --git a/src/lm_buddy/jobs/lm_harness/entrypoint.py b/src/lm_buddy/jobs/_entrypoints/lm_harness.py similarity index 97% rename from src/lm_buddy/jobs/lm_harness/entrypoint.py rename to src/lm_buddy/jobs/_entrypoints/lm_harness.py index 511520fb..79bcd958 100644 --- a/src/lm_buddy/jobs/lm_harness/entrypoint.py +++ b/src/lm_buddy/jobs/_entrypoints/lm_harness.py @@ -19,8 +19,8 @@ default_artifact_name, wandb_init_from_config, ) -from lm_buddy.jobs.lm_harness import LMHarnessJobConfig, LocalChatCompletionsConfig -from lm_buddy.jobs.utils import LMBuddyJobType +from lm_buddy.jobs.common import LMBuddyJobType +from lm_buddy.jobs.configs import LMHarnessJobConfig, LocalChatCompletionsConfig def get_numeric_metrics( diff --git a/src/lm_buddy/jobs/simple/entrypoint.py b/src/lm_buddy/jobs/_entrypoints/simple.py similarity index 90% rename from src/lm_buddy/jobs/simple/entrypoint.py rename to src/lm_buddy/jobs/_entrypoints/simple.py index 0cfd0f4c..13cd0456 100644 --- a/src/lm_buddy/jobs/simple/entrypoint.py +++ b/src/lm_buddy/jobs/_entrypoints/simple.py @@ -1,6 +1,6 @@ import ray -from lm_buddy.jobs.simple import SimpleJobConfig +from lm_buddy.jobs.configs import SimpleJobConfig @ray.remote diff --git a/src/lm_buddy/jobs/utils.py b/src/lm_buddy/jobs/common.py similarity index 100% rename from src/lm_buddy/jobs/utils.py rename to src/lm_buddy/jobs/common.py diff --git a/src/lm_buddy/jobs/configs/__init__.py b/src/lm_buddy/jobs/configs/__init__.py new file mode 100644 index 00000000..294f4855 --- /dev/null +++ b/src/lm_buddy/jobs/configs/__init__.py @@ -0,0 +1,19 @@ +from lm_buddy.jobs.configs.base import LMBuddyJobConfig +from lm_buddy.jobs.configs.finetuning import FinetuningJobConfig, FinetuningRayConfig +from lm_buddy.jobs.configs.lm_harness import ( + LMHarnessEvaluatorConfig, + LMHarnessJobConfig, + LocalChatCompletionsConfig, +) +from lm_buddy.jobs.configs.simple import SimpleJobConfig + +__all__ = [ + "LMBuddyJobConfig", + "SimpleJobConfig", + "FinetuningJobConfig", + "FinetuningRayConfig", + "LMHarnessEvaluatorConfig", + "LMHarnessJobConfig", + "LocalChatCompletionsConfig", + "SimpleJobConfig", +] diff --git a/src/lm_buddy/jobs/configs/base.py b/src/lm_buddy/jobs/configs/base.py new file mode 100644 index 00000000..a6639488 --- /dev/null +++ b/src/lm_buddy/jobs/configs/base.py @@ -0,0 +1,40 @@ +import contextlib +import tempfile +from pathlib import Path + +from pydantic_yaml import parse_yaml_file_as, to_yaml_file + +from lm_buddy.types import BaseLMBuddyConfig + + +class LMBuddyJobConfig(BaseLMBuddyConfig): + """Configuration that comprises the entire input to an LM Buddy job. + + This class implements helper methods for de/serializing the configuration from file. + + Currently, there is a 1:1 mapping between job entrypoints and job config implementations, + but this is not rigidly constrained by the interface. This may change in the future. + """ + + @classmethod + def from_yaml_file(cls, path: Path | str): + return parse_yaml_file_as(cls, path) + + def to_yaml_file(self, path: Path | str): + to_yaml_file(path, self, exclude_none=True) + + @contextlib.contextmanager + def to_tempfile(self, *, name: str = "config.yaml", dir: str | Path | None = None): + """Enter a context manager with the config written to a temporary YAML file. + + Keyword Args: + name (str): Name of the config file in the tmp directory. Defaults to "config.yaml". + dir (str | Path | None): Root path of the temporary directory. Defaults to None. + + Returns: + Path to the temporary config file. + """ + with tempfile.TemporaryDirectory(dir=dir) as tmpdir: + config_path = Path(tmpdir) / name + self.to_yaml_file(config_path) + yield config_path diff --git a/src/lm_buddy/jobs/finetuning/config.py b/src/lm_buddy/jobs/configs/finetuning.py similarity index 96% rename from src/lm_buddy/jobs/finetuning/config.py rename to src/lm_buddy/jobs/configs/finetuning.py index b649e4c5..c5448872 100644 --- a/src/lm_buddy/jobs/finetuning/config.py +++ b/src/lm_buddy/jobs/configs/finetuning.py @@ -9,6 +9,7 @@ TrainerConfig, ) from lm_buddy.integrations.wandb import WandbRunConfig +from lm_buddy.jobs.configs import LMBuddyJobConfig from lm_buddy.types import BaseLMBuddyConfig @@ -23,7 +24,7 @@ class FinetuningRayConfig(BaseLMBuddyConfig): storage_path: str | None = None # TODO: This should be set globally somehow -class FinetuningJobConfig(BaseLMBuddyConfig): +class FinetuningJobConfig(LMBuddyJobConfig): """Configuration to submit an LLM finetuning job.""" model: AutoModelConfig diff --git a/src/lm_buddy/jobs/lm_harness/config.py b/src/lm_buddy/jobs/configs/lm_harness.py similarity index 94% rename from src/lm_buddy/jobs/lm_harness/config.py rename to src/lm_buddy/jobs/configs/lm_harness.py index 691e8f45..c1710626 100644 --- a/src/lm_buddy/jobs/lm_harness/config.py +++ b/src/lm_buddy/jobs/configs/lm_harness.py @@ -8,6 +8,7 @@ ) from lm_buddy.integrations.vllm import InferenceServerConfig from lm_buddy.integrations.wandb import WandbRunConfig +from lm_buddy.jobs.configs import LMBuddyJobConfig from lm_buddy.types import BaseLMBuddyConfig @@ -43,7 +44,7 @@ class LMHarnessEvaluatorConfig(BaseLMBuddyConfig): limit: int | float | None = None -class LMHarnessJobConfig(BaseLMBuddyConfig): +class LMHarnessJobConfig(LMBuddyJobConfig): """Configuration to run an lm-evaluation-harness evaluation job.""" model: AutoModelConfig | LocalChatCompletionsConfig diff --git a/src/lm_buddy/jobs/configs/simple.py b/src/lm_buddy/jobs/configs/simple.py new file mode 100644 index 00000000..f0270923 --- /dev/null +++ b/src/lm_buddy/jobs/configs/simple.py @@ -0,0 +1,7 @@ +from lm_buddy.jobs.configs import LMBuddyJobConfig + + +class SimpleJobConfig(LMBuddyJobConfig): + """Simple job submission config.""" + + magic_number: int diff --git a/src/lm_buddy/jobs/finetuning/__init__.py b/src/lm_buddy/jobs/finetuning/__init__.py deleted file mode 100644 index 5b354058..00000000 --- a/src/lm_buddy/jobs/finetuning/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from lm_buddy.jobs.finetuning.config import FinetuningJobConfig, FinetuningRayConfig -from lm_buddy.jobs.finetuning.entrypoint import run_finetuning - -__all__ = ["FinetuningJobConfig", "FinetuningRayConfig", "run_finetuning"] diff --git a/src/lm_buddy/jobs/lm_harness/__init__.py b/src/lm_buddy/jobs/lm_harness/__init__.py deleted file mode 100644 index 9ede24ff..00000000 --- a/src/lm_buddy/jobs/lm_harness/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -from lm_buddy.jobs.lm_harness.config import ( - LMHarnessEvaluatorConfig, - LMHarnessJobConfig, - LocalChatCompletionsConfig, -) -from lm_buddy.jobs.lm_harness.entrypoint import run_lm_harness - -__all__ = [ - "LMHarnessEvaluatorConfig", - "LMHarnessJobConfig", - "LocalChatCompletionsConfig", - "run_lm_harness", -] diff --git a/src/lm_buddy/jobs/simple/__init__.py b/src/lm_buddy/jobs/simple/__init__.py deleted file mode 100644 index ef8f4264..00000000 --- a/src/lm_buddy/jobs/simple/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from lm_buddy.jobs.simple.config import SimpleJobConfig -from lm_buddy.jobs.simple.entrypoint import run_simple - -__all__ = ["SimpleJobConfig", "run_simple"] diff --git a/src/lm_buddy/jobs/simple/config.py b/src/lm_buddy/jobs/simple/config.py deleted file mode 100644 index ec3f01d3..00000000 --- a/src/lm_buddy/jobs/simple/config.py +++ /dev/null @@ -1,7 +0,0 @@ -from lm_buddy.types import BaseLMBuddyConfig - - -class SimpleJobConfig(BaseLMBuddyConfig): - """Simple job submission config.""" - - magic_number: int diff --git a/src/lm_buddy/types.py b/src/lm_buddy/types.py index 6ccfeb8a..ce42b7de 100644 --- a/src/lm_buddy/types.py +++ b/src/lm_buddy/types.py @@ -1,11 +1,7 @@ -import contextlib -import tempfile -from pathlib import Path from typing import Annotated, Any import torch from pydantic import BaseModel, BeforeValidator, PlainSerializer, WithJsonSchema -from pydantic_yaml import parse_yaml_file_as, to_yaml_file def validate_torch_dtype(x: Any) -> torch.dtype: @@ -42,26 +38,3 @@ class BaseLMBuddyConfig( Defines some common settings used by all subclasses. """ - - @classmethod - def from_yaml_file(cls, path: Path | str): - return parse_yaml_file_as(cls, path) - - def to_yaml_file(self, path: Path | str): - to_yaml_file(path, self, exclude_none=True) - - @contextlib.contextmanager - def to_tempfile(self, *, name: str = "config.yaml", dir: str | Path | None = None): - """Enter a context manager with the config written to a temporary YAML file. - - Args: - name (str): Name of the config file in the tmp directory. Defaults to "config.yaml" - dir (str | Path, optional): Root path of the temporary directory - - Returns: - Path to the temporary config file - """ - with tempfile.TemporaryDirectory(dir=dir) as tmpdir: - config_path = Path(tmpdir) / name - self.to_yaml_file(config_path) - yield config_path diff --git a/tests/integration/test_finetuning.py b/tests/integration/test_finetuning.py index 6b193468..01734410 100644 --- a/tests/integration/test_finetuning.py +++ b/tests/integration/test_finetuning.py @@ -1,8 +1,9 @@ import pytest +import lm_buddy from lm_buddy.integrations.huggingface import AutoModelConfig, TextDatasetConfig, TrainerConfig from lm_buddy.integrations.wandb import ArtifactType, WandbArtifactConfig, WandbRunConfig -from lm_buddy.jobs.finetuning import FinetuningJobConfig, FinetuningRayConfig, run_finetuning +from lm_buddy.jobs.configs import FinetuningJobConfig, FinetuningRayConfig from tests.test_utils import FakeArtifactLoader @@ -40,7 +41,7 @@ def test_finetuning_job(llm_model_artifact, text_dataset_artifact, job_config): artifact_loader.log_artifact(text_dataset_artifact) # Run test job - run_finetuning(job_config, artifact_loader) + lm_buddy.run_job(job_config, artifact_loader=artifact_loader) # Two input artifacts, and one output model artifact produced artifacts = artifact_loader.get_artifacts() diff --git a/tests/integration/test_lm_harness.py b/tests/integration/test_lm_harness.py index b6837db6..f8b8dd32 100644 --- a/tests/integration/test_lm_harness.py +++ b/tests/integration/test_lm_harness.py @@ -1,8 +1,9 @@ import pytest +import lm_buddy from lm_buddy.integrations.huggingface import AutoModelConfig from lm_buddy.integrations.wandb import WandbArtifactConfig, WandbRunConfig -from lm_buddy.jobs.lm_harness import LMHarnessEvaluatorConfig, LMHarnessJobConfig, run_lm_harness +from lm_buddy.jobs.configs import LMHarnessEvaluatorConfig, LMHarnessJobConfig from tests.test_utils import FakeArtifactLoader @@ -27,7 +28,7 @@ def test_lm_harness_job_with_tracking(llm_model_artifact, job_config): artifact_loader.log_artifact(llm_model_artifact) # Run test job - run_lm_harness(job_config, artifact_loader) + lm_buddy.run_job(job_config, artifact_loader=artifact_loader) # One input artifact, and one eval artifact produced assert artifact_loader.num_artifacts() == 2 @@ -42,7 +43,7 @@ def test_lm_harness_job_no_tracking(llm_model_artifact, job_config): artifact_loader.log_artifact(llm_model_artifact) # Run test job - run_lm_harness(job_config, artifact_loader) + lm_buddy.run_job(job_config, artifact_loader=artifact_loader) # One input artifact, no additional eval artifacts produced assert artifact_loader.num_artifacts() == 1 diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py index ffb9852a..c1e7f7ca 100644 --- a/tests/integration/test_simple.py +++ b/tests/integration/test_simple.py @@ -1,7 +1,7 @@ import ray -from lm_buddy.jobs.simple import SimpleJobConfig -from lm_buddy.jobs.simple.entrypoint import get_magic_number +from lm_buddy.jobs._entrypoints.simple import get_magic_number +from lm_buddy.jobs.configs import SimpleJobConfig def test_simple_remote_task(): diff --git a/tests/unit/jobs/configs/__init__.py b/tests/unit/jobs/configs/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/jobs/test_finetuning_config.py b/tests/unit/jobs/configs/test_finetuning_config.py similarity index 97% rename from tests/unit/jobs/test_finetuning_config.py rename to tests/unit/jobs/configs/test_finetuning_config.py index 629b6c3e..cce77868 100644 --- a/tests/unit/jobs/test_finetuning_config.py +++ b/tests/unit/jobs/configs/test_finetuning_config.py @@ -2,7 +2,7 @@ from pydantic import ValidationError from lm_buddy.integrations.huggingface import HuggingFaceRepoConfig, TextDatasetConfig -from lm_buddy.jobs.finetuning import FinetuningJobConfig, FinetuningRayConfig +from lm_buddy.jobs.configs import FinetuningJobConfig, FinetuningRayConfig from tests.test_utils import copy_pydantic_json diff --git a/tests/unit/jobs/configs/test_job_config.py b/tests/unit/jobs/configs/test_job_config.py new file mode 100644 index 00000000..73c83dfe --- /dev/null +++ b/tests/unit/jobs/configs/test_job_config.py @@ -0,0 +1,9 @@ +from lm_buddy.jobs.configs import SimpleJobConfig + + +def test_config_as_tempfile(): + config = SimpleJobConfig(magic_number=42) + config_name = "my-job-config.yaml" + with config.to_tempfile(name=config_name) as path: + assert path.name == config_name + assert SimpleJobConfig.from_yaml_file(path) == config diff --git a/tests/unit/jobs/test_lm_harness_config.py b/tests/unit/jobs/configs/test_lm_harness_config.py similarity index 98% rename from tests/unit/jobs/test_lm_harness_config.py rename to tests/unit/jobs/configs/test_lm_harness_config.py index 3d55c023..16135e09 100644 --- a/tests/unit/jobs/test_lm_harness_config.py +++ b/tests/unit/jobs/configs/test_lm_harness_config.py @@ -2,7 +2,7 @@ from pydantic import ValidationError from lm_buddy.integrations.vllm import InferenceServerConfig -from lm_buddy.jobs.lm_harness import ( +from lm_buddy.jobs.configs import ( LMHarnessEvaluatorConfig, LMHarnessJobConfig, LocalChatCompletionsConfig, diff --git a/tests/unit/jobs/test_run_job.py b/tests/unit/jobs/test_run_job.py new file mode 100644 index 00000000..47d78a34 --- /dev/null +++ b/tests/unit/jobs/test_run_job.py @@ -0,0 +1,10 @@ +import pytest + +import lm_buddy +from lm_buddy.integrations.huggingface import AutoModelConfig + + +def test_invalid_config_error(): + not_a_job_config = AutoModelConfig(load_from="distilgpt2") + with pytest.raises(ValueError): + lm_buddy.run_job(not_a_job_config) diff --git a/tests/unit/test_types.py b/tests/unit/test_types.py index aa163635..47dc0b06 100644 --- a/tests/unit/test_types.py +++ b/tests/unit/test_types.py @@ -2,7 +2,6 @@ import torch from pydantic import ValidationError -from lm_buddy.jobs.simple import SimpleJobConfig from lm_buddy.types import BaseLMBuddyConfig, SerializableTorchDtype @@ -32,11 +31,3 @@ class TestConfig(BaseLMBuddyConfig): TestConfig(torch_dtype=5) with pytest.raises(ValueError): TestConfig(torch_dtype="dogs") - - -def test_config_as_tempfile(): - config = SimpleJobConfig(magic_number=42) - config_name = "my-special-config.yaml" - with config.to_tempfile(name=config_name) as path: - assert path.name == config_name - assert SimpleJobConfig.from_yaml_file(path) == config