diff --git a/README.md b/README.md index 8675f481..9ef78dcf 100644 --- a/README.md +++ b/README.md @@ -43,9 +43,6 @@ To see all available job commands, run `lm_buddy run --help` Once LM Buddy is installed in your local Python environment, usage is as follows: ``` -# Simple test -lm_buddy run simple --config simple_config.yaml - # LLM finetuning lm_buddy run finetuning --config finetuning_config.yaml diff --git a/examples/configs/evaluation/lm_harness_hf_config.yaml b/examples/configs/evaluation/lm_harness_hf_config.yaml index 6f3271bf..75b62dff 100644 --- a/examples/configs/evaluation/lm_harness_hf_config.yaml +++ b/examples/configs/evaluation/lm_harness_hf_config.yaml @@ -4,7 +4,7 @@ model: torch_dtype: "bfloat16" # Settings specific to lm_harness.evaluate -evaluator: +evaluation: tasks: ["hellaswag"] num_fewshot: 5 limit: 10 diff --git a/examples/configs/evaluation/lm_harness_inference_server_config.yaml b/examples/configs/evaluation/lm_harness_inference_server_config.yaml index 7f3ead2d..a0568803 100644 --- a/examples/configs/evaluation/lm_harness_inference_server_config.yaml +++ b/examples/configs/evaluation/lm_harness_inference_server_config.yaml @@ -5,7 +5,6 @@ model: # HuggingFace repo for the engine model being hosted engine: repo_id: "distilgpt2" - revision: "main" # # W&B artifact can also be specified as the engine model to generate a lineage # engine: # name: "wandb-artifact-name" @@ -15,7 +14,7 @@ model: tokenizer_backend: "huggingface" # Settings specific to lm_harness.evaluate -evaluator: +evaluation: tasks: ["gsm8k"] num_fewshot: 5 limit: 10 diff --git a/examples/configs/evaluation/lm_harness_quantized_config.yaml b/examples/configs/evaluation/lm_harness_quantized_config.yaml deleted file mode 100644 index f73387a9..00000000 --- a/examples/configs/evaluation/lm_harness_quantized_config.yaml +++ /dev/null @@ -1,21 +0,0 @@ -# Model to evaluate -model: - load_from: "tiiuae/falcon-7b" - torch_dtype: "bfloat16" - -# Settings specific to lm_harness.evaluate -evaluator: - tasks: ["hellaswag", "mmlu"] - num_fewshot: 5 - limit: 10 - -quantization: - load_in_4bit: True - bnb_4bit_quant_type: "fp4" - bnb_4bit_compute_dtype: "bfloat16" - -# Tracking info for where to log the run results -tracking: - name: "lm-buddy-lm-harness-inference" - project: "lm-buddy-examples" - entity: "mozilla-ai" \ No newline at end of file diff --git a/examples/configs/simple/simple_config.yaml b/examples/configs/simple/simple_config.yaml deleted file mode 100644 index 339b64c3..00000000 --- a/examples/configs/simple/simple_config.yaml +++ /dev/null @@ -1 +0,0 @@ -magic_number: 42 diff --git a/examples/dev_submission/configs/eval_config.yaml b/examples/dev_submission/configs/eval_config.yaml index cbb35cd6..dd3ea018 100644 --- a/examples/dev_submission/configs/eval_config.yaml +++ b/examples/dev_submission/configs/eval_config.yaml @@ -1,12 +1,12 @@ # Model to evaluate, specificed as a vLLM endpoint model: - base_url: "http://your.cluster.ip:8000/v1/" - tokenizer: "mistralai/Mistral-7B-v0.1" - model_name: "mistralai/Mistral-7B-v0.1" - tokenizer_backend: "huggingface" + base_url: "http://your.cluster.ip:8000/v1/" + tokenizer: "mistralai/Mistral-7B-v0.1" + model_name: "mistralai/Mistral-7B-v0.1" + tokenizer_backend: "huggingface" # Settings specific to lm_harness.evaluate -evaluator: +evaluation: tasks: ["gsm8k"] num_fewshot: 5 diff --git a/examples/notebooks/dev_ray_submission.ipynb b/examples/notebooks/dev_ray_submission.ipynb index 9da68b9a..0a9ecce3 100644 --- a/examples/notebooks/dev_ray_submission.ipynb +++ b/examples/notebooks/dev_ray_submission.ipynb @@ -95,7 +95,7 @@ "# pip contains an export of the dependencies for the LM Buddy package (see CONTRIBUTING.md for how to generate)\n", "\n", "runtime_env = {\n", - " \"working_dir\": f\"{root_dir}/examples/configs/simple\",\n", + " \"working_dir\": f\"{root_dir}/examples/configs/finetuning\",\n", " \"env_vars\": {\"WANDB_API_KEY\": os.environ[\"WANDB_API_KEY\"]}, # If running a job that uses W&B\n", " \"py_modules\": [str(lm_buddy_module)],\n", " \"pip\": \"requirements.txt\", # See CONTRIBUTING.md for how to generate this\n", @@ -111,8 +111,8 @@ "source": [ "# Submit the job to the Ray cluster\n", "# Note: LM Buddy is invoked by 'python -m lm_buddy run ...' since the CLI is not installed in the environment\n", - "simple_job = client.submit_job(\n", - " entrypoint=f\"python -m lm_buddy run simple --config simple_config.yaml\",\n", + "submission_id = client.submit_job(\n", + " entrypoint=f\"python -m lm_buddy run finetuning --config finetuning_config.yaml\",\n", " runtime_env=runtime_env,\n", ")" ] @@ -120,16 +120,16 @@ { "cell_type": "code", "execution_count": null, + "id": "a373b21d1d57778b", + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "# The client outputs a string with a job ID\n", "# Jobs can be interacted with and terminated via client methods\n", - "client.stop_job(simple_job)" - ], - "metadata": { - "collapsed": false - }, - "id": "a373b21d1d57778b" + "client.stop_job(submission_id)" + ] }, { "cell_type": "markdown", @@ -199,18 +199,24 @@ "source": [ "import os\n", "\n", - "from lm_buddy.jobs.simple import SimpleJobConfig\n", + "from lm_buddy.jobs.configs import FinetuningJobConfig\n", + "\n", + "# Parameters for a programatic sweep\n", + "learning_rates = [1e-5, 1e-4, 1e-3, 1e-2]\n", + "\n", "\n", - "# Generate job configs programatically for sweeps over parameter ranges\n", - "magic_numbers = [0, 10, 20, 40]\n", + "# Load a \"base\" config from file with some suitable defaults\n", + "base_config = FinetuningJobConfig.from_yaml_file(\n", + " f\"{root_dir}/examples/configs/finetuning/finetuning_config.yaml\"\n", + ")\n", "\n", - "for number in magic_numbers:\n", - " # Instantitate config in your workflow script\n", - " # You may also want to read a \"base\" config from file with some suitable defaults\n", - " config = SimpleJobConfig(magic_number=number)\n", + "for lr in learning_rates:\n", + " # Modify based on current iteration lr\n", + " job_config = base_config.model_copy(deep=True)\n", + " job_config.trainer.learning_rate = lr\n", "\n", " # `config_path` is the fully qualified path to the config file on your local filesystem\n", - " with config.to_tempfile(name=\"config.yaml\") as config_path:\n", + " with job_config.to_tempfile(name=\"config.yaml\") as config_path:\n", " # `config_path.parent` is the working directory\n", " runtime_env = {\n", " \"working_dir\": str(config_path.parent),\n", @@ -221,7 +227,7 @@ "\n", " # `config_path.name` is the file name within the working directory, i.e., \"config.yaml\"\n", " client.submit_job(\n", - " entrypoint=f\"python -m lm_buddy run simple --config {config_path.name}\",\n", + " entrypoint=f\"python -m lm_buddy run finetuning --config {config_path.name}\",\n", " runtime_env=runtime_env,\n", " )" ] diff --git a/examples/notebooks/direct_job_execution.ipynb b/examples/notebooks/direct_job_execution.ipynb index 077cda2a..b9b34487 100644 --- a/examples/notebooks/direct_job_execution.ipynb +++ b/examples/notebooks/direct_job_execution.ipynb @@ -13,8 +13,10 @@ "source": [ "This notebook illustrates how to use LM Buddy as a library to run jobs directly on the host machine.\n", "\n", - "Jobs are fully specified by a `lm_buddy.jobs.configs.LMBuddyJobConfig` \n", - "and are executed with the `lm_buddy.run_job` method.\n", + "Jobs are executed in the following manner:\n", + "- Construct an instance of the `lm_buddy.LMBuddy` class\n", + "- Construct an instance of your desired job configuration\n", + "- Execute a job via the `LMBuddy.finetune` or `LMBuddy.evaluate` methods\n", "\n", "**Warning**: This workflow is still considered experimental.\n", "Some jobs depend on external services (e.g., W&B, Ray cluster) and host-machine GPU resources,\n", @@ -34,15 +36,14 @@ "metadata": {}, "outputs": [], "source": [ - "import lm_buddy\n", + "from lm_buddy import LMBuddy\n", "from lm_buddy.jobs.configs import (\n", " FinetuningJobConfig,\n", " FinetuningRayConfig,\n", " LMHarnessJobConfig,\n", - " LMHarnessEvaluatorConfig,\n", + " LMHarnessEvaluationConfig,\n", ")\n", "from lm_buddy.integrations.huggingface import (\n", - " HuggingFaceRepoConfig,\n", " AutoModelConfig,\n", " TextDatasetConfig,\n", " TrainerConfig,\n", @@ -65,13 +66,11 @@ "outputs": [], "source": [ "# Base model to finetune from HuggingFace\n", - "model_config = AutoModelConfig(\n", - " load_from=HuggingFaceRepoConfig(repo_id=\"distilgpt2\"),\n", - ")\n", + "model_config = AutoModelConfig(load_from=\"distilgpt2\")\n", "\n", "# Text dataset for finetuning\n", "dataset_config = TextDatasetConfig(\n", - " load_from=HuggingFaceRepoConfig(repo_id=\"imdb\"),\n", + " load_from=\"imdb\",\n", " split=\"train[:100]\",\n", " text_field=\"text\",\n", ")\n", @@ -128,7 +127,8 @@ "outputs": [], "source": [ "# Run the job\n", - "lm_buddy.run_job(finetuning_config)" + "buddy = LMBuddy()\n", + "buddy.finetune(finetuning_config)" ] }, { @@ -146,12 +146,10 @@ "source": [ "# Define the model to be evaluated\n", "# In this case, loading directly a pretrained model from HuggingFace\n", - "model_config = AutoModelConfig(\n", - " load_from=HuggingFaceRepoConfig(repo_id=\"distilgpt2\"),\n", - ")\n", + "model_config = AutoModelConfig(load_from=\"distilgpt2\")\n", "\n", "# Define evaluation tasks and settings\n", - "evaluator_config = LMHarnessEvaluatorConfig(\n", + "evaluation_config = LMHarnessEvaluationConfig(\n", " tasks=[\"hellaswag\"],\n", " limit=10, # Only run 10 samples per task. Remove for a real run.\n", " num_fewshot=5,\n", @@ -167,7 +165,7 @@ "# Full lm-harness job config\n", "lm_harness_config = LMHarnessJobConfig(\n", " model=model_config,\n", - " evaluator=evaluator_config,\n", + " evaluation=evaluation_config,\n", " tracking=tracking_config,\n", ")" ] @@ -179,7 +177,8 @@ "outputs": [], "source": [ "# Run the job\n", - "lm_buddy.run_job(lm_harness_config)" + "buddy = LMBuddy()\n", + "eval_results = buddy.evaluate(lm_harness_config)" ] } ], diff --git a/src/lm_buddy/__init__.py b/src/lm_buddy/__init__.py index 0f30f312..d6da68b4 100644 --- a/src/lm_buddy/__init__.py +++ b/src/lm_buddy/__init__.py @@ -1,3 +1,3 @@ -from lm_buddy.jobs import run_job +from lm_buddy.buddy import LMBuddy -__all__ = ["run_job"] +__all__ = ["LMBuddy"] diff --git a/src/lm_buddy/buddy.py b/src/lm_buddy/buddy.py new file mode 100644 index 00000000..97d688b9 --- /dev/null +++ b/src/lm_buddy/buddy.py @@ -0,0 +1,37 @@ +from lm_buddy.integrations.wandb import ArtifactLoader, WandbArtifactLoader +from lm_buddy.jobs._entrypoints import run_finetuning, run_lm_harness, run_prometheus +from lm_buddy.jobs.common import EvaluationResult, FinetuningResult +from lm_buddy.jobs.configs import ( + EvaluationJobConfig, + FinetuningJobConfig, + LMHarnessJobConfig, + PrometheusJobConfig, +) + + +class LMBuddy: + """Your buddy in the (L)LM space. + + Simple wrapper around executable functions for tasks available in the library. + """ + + def __init__(self, artifact_loader: ArtifactLoader = WandbArtifactLoader()): + self._artifact_loader = artifact_loader + + def finetune(self, config: FinetuningJobConfig) -> FinetuningResult: + """Run a supervised finetuning task with the provided configuration.""" + finetuning_result = run_finetuning(config, self._artifact_loader) + return finetuning_result + + def evaluate(self, config: EvaluationJobConfig) -> EvaluationResult: + """Run an evaluation task with the provided configuration. + + The underlying evaluation framework is determined by the configuration type. + """ + match config: + case LMHarnessJobConfig() as lm_harness_config: + return run_lm_harness(lm_harness_config, self._artifact_loader) + case PrometheusJobConfig() as prometheus_config: + return run_prometheus(prometheus_config, self._artifact_loader) + case _: + raise ValueError(f"Invlid configuration for evaluation: {type(config)}") diff --git a/src/lm_buddy/cli/run.py b/src/lm_buddy/cli/run.py index 1f0432c7..b0d2f46b 100644 --- a/src/lm_buddy/cli/run.py +++ b/src/lm_buddy/cli/run.py @@ -1,16 +1,12 @@ import click -import lm_buddy -from lm_buddy.jobs.configs import ( - FinetuningJobConfig, - LMHarnessJobConfig, - PrometheusJobConfig, - SimpleJobConfig, -) +from lm_buddy import LMBuddy +from lm_buddy.jobs.configs import FinetuningJobConfig, LMHarnessJobConfig, PrometheusJobConfig -# TODO(RD2024-125): We should probably collapse all these commands into a single CLI command -# - Need to figure out best way to polymorphically deserialize the job config classes -# - Do we just add type discriminators at the job config level? +# TODO(RD2024-125): Collapse the run commands into `lm-buddy finetune` and `lm-buddy evaluate` +# to match the methods on the `LMBuddy` class + +buddy = LMBuddy() @click.group(name="run", help="Run an LM Buddy job.") @@ -18,29 +14,22 @@ def group(): pass -@group.command("simple", help="Run the simple test job.") -@click.option("--config", type=str) -def run_simple(config: str) -> None: - config = SimpleJobConfig.from_yaml_file(config) - lm_buddy.run_job(config) - - @group.command("finetuning", help="Run the HuggingFace LLM finetuning job.") @click.option("--config", type=str) def run_finetuning(config: str) -> None: config = FinetuningJobConfig.from_yaml_file(config) - lm_buddy.run_job(config) + buddy.finetune(config) @group.command("lm-harness", help="Run the lm-harness evaluation job.") @click.option("--config", type=str) def run_lm_harness(config: str) -> None: config = LMHarnessJobConfig.from_yaml_file(config) - lm_buddy.run_job(config) + buddy.evaluate(config) @group.command("prometheus", help="Run the prometheus evaluation job.") @click.option("--config", type=str) def run_prometheus(config: str) -> None: config = PrometheusJobConfig.from_yaml_file(config) - lm_buddy.run_job(config) + buddy.evaluate(config) diff --git a/src/lm_buddy/cli/schema.py b/src/lm_buddy/cli/schema.py index 4577ee84..8ff1ecab 100644 --- a/src/lm_buddy/cli/schema.py +++ b/src/lm_buddy/cli/schema.py @@ -2,12 +2,7 @@ import click -from lm_buddy.jobs.configs import ( - FinetuningJobConfig, - LMHarnessJobConfig, - PrometheusJobConfig, - SimpleJobConfig, -) +from lm_buddy.jobs.configs import FinetuningJobConfig, LMHarnessJobConfig, PrometheusJobConfig @click.group(name="schema", help="Get a job configuration schema.") @@ -15,12 +10,6 @@ def group(): pass -@group.command("simple", help="Schema for the simple test job configuration.") -def schema_simple() -> None: - schema = SimpleJobConfig.model_json_schema() - click.secho(json.dumps(schema, indent=2)) - - @group.command("finetuning", help="Schema for the finetuning job configuration.") def schema_finetuning() -> None: schema = FinetuningJobConfig.model_json_schema() diff --git a/src/lm_buddy/integrations/huggingface/__init__.py b/src/lm_buddy/integrations/huggingface/__init__.py index c42646b2..aacabf25 100644 --- a/src/lm_buddy/integrations/huggingface/__init__.py +++ b/src/lm_buddy/integrations/huggingface/__init__.py @@ -1,5 +1,4 @@ # ruff: noqa: I001 -from lm_buddy.integrations.huggingface.repo_config import * from lm_buddy.integrations.huggingface.adapter_config import * from lm_buddy.integrations.huggingface.dataset_config import * from lm_buddy.integrations.huggingface.model_config import * diff --git a/src/lm_buddy/integrations/huggingface/asset_loader.py b/src/lm_buddy/integrations/huggingface/asset_loader.py index 9635655c..ce5a61ed 100644 --- a/src/lm_buddy/integrations/huggingface/asset_loader.py +++ b/src/lm_buddy/integrations/huggingface/asset_loader.py @@ -17,7 +17,6 @@ AutoModelConfig, AutoTokenizerConfig, DatasetConfig, - HuggingFaceRepoConfig, QuantizationConfig, ) from lm_buddy.integrations.wandb import ( @@ -25,9 +24,7 @@ WandbArtifactConfig, get_artifact_filesystem_path, ) - -HuggingFaceAssetPath = HuggingFaceRepoConfig | WandbArtifactConfig -"""Config that can be resolved to a HuggingFace name/path.""" +from lm_buddy.paths import AssetPath, FilePath, HuggingFaceRepoID def resolve_peft_and_pretrained(path: str) -> tuple[str, str | None]: @@ -66,23 +63,18 @@ class HuggingFaceAssetLoader: def __init__(self, artifact_loader: ArtifactLoader): self._artifact_loader = artifact_loader - def resolve_asset_path(self, path: HuggingFaceAssetPath) -> tuple[str, str | None]: - """Resolve the actual HuggingFace name/path from a config. - - Currently, two config types contain references to a loadable HuggingFace path: - (1) A `HuggingFaceRepoConfig` that contains the repo path directly - (2) A `WandbArtifactConfig` where the filesystem path is resolved from the artifact - """ + def resolve_asset_path(self, path: AssetPath) -> str: + """Resolve the actual HuggingFace name/path from a `LoadableAssetPath`.""" match path: - case HuggingFaceRepoConfig(repo_id, revision): - load_path, revision = repo_id, revision + case FilePath(value): + return str(value) + case HuggingFaceRepoID(repo_id): + return repo_id case WandbArtifactConfig() as artifact_config: artifact = self._artifact_loader.use_artifact(artifact_config) - load_path = get_artifact_filesystem_path(artifact) - revision = None + return str(get_artifact_filesystem_path(artifact)) case unknown_path: raise ValueError(f"Unable to resolve asset path from {unknown_path}.") - return str(load_path), revision def load_pretrained_config( self, @@ -92,10 +84,8 @@ def load_pretrained_config( An exception is raised if the HuggingFace repo does not contain a `config.json` file. """ - model_path, revision = self.resolve_asset_path(config.load_from) - return AutoConfig.from_pretrained( - pretrained_model_name_or_path=model_path, revision=revision - ) + config_path = self.resolve_asset_path(config.load_from) + return AutoConfig.from_pretrained(pretrained_model_name_or_path=config_path) def load_pretrained_model( self, @@ -122,10 +112,9 @@ def load_pretrained_model( # TODO: HuggingFace has many AutoModel classes with different "language model heads" # Can we abstract this to load with any type of AutoModel class? - model_path, revision = self.resolve_asset_path(config.load_from) + model_path = self.resolve_asset_path(config.load_from) return AutoModelForCausalLM.from_pretrained( pretrained_model_name_or_path=model_path, - revision=revision, trust_remote_code=config.trust_remote_code, torch_dtype=config.torch_dtype, quantization_config=bnb_config, @@ -137,10 +126,9 @@ def load_pretrained_tokenizer(self, config: AutoTokenizerConfig) -> PreTrainedTo An exception is raised if the HuggingFace repo does not contain a `tokenizer.json` file. """ - tokenizer_path, revision = self.resolve_asset_path(config.load_from) + tokenizer_path = self.resolve_asset_path(config.load_from) tokenizer = AutoTokenizer.from_pretrained( pretrained_model_name_or_path=tokenizer_path, - revision=revision, trust_remote_code=config.trust_remote_code, use_fast=config.use_fast, ) @@ -156,10 +144,10 @@ def load_dataset(self, config: DatasetConfig) -> Dataset: When loading from HuggingFace directly, the `Dataset` is for the provided split. When loading from disk, the saved files must be for a dataset else an exception is raised. """ - dataset_path, revision = self.resolve_asset_path(config.load_from) + dataset_path = self.resolve_asset_path(config.load_from) # Dataset loading requires a different method if from a HF vs. disk - if isinstance(config.load_from, HuggingFaceRepoConfig): - return load_dataset(dataset_path, revision=revision, split=config.split) + if isinstance(config.load_from, HuggingFaceRepoID): + return load_dataset(dataset_path, split=config.split) else: match load_from_disk(dataset_path): case Dataset() as dataset: diff --git a/src/lm_buddy/integrations/huggingface/dataset_config.py b/src/lm_buddy/integrations/huggingface/dataset_config.py index 5ff398c7..b74682b8 100644 --- a/src/lm_buddy/integrations/huggingface/dataset_config.py +++ b/src/lm_buddy/integrations/huggingface/dataset_config.py @@ -1,7 +1,6 @@ -from pydantic import field_validator, model_validator +from pydantic import model_validator -from lm_buddy.integrations.huggingface import HuggingFaceRepoConfig, convert_string_to_repo_config -from lm_buddy.integrations.wandb import WandbArtifactConfig +from lm_buddy.paths import AssetPath, HuggingFaceRepoID from lm_buddy.types import BaseLMBuddyConfig DEFAULT_TEXT_FIELD: str = "text" @@ -10,15 +9,11 @@ class DatasetConfig(BaseLMBuddyConfig): """Base configuration to load a HuggingFace dataset.""" - load_from: HuggingFaceRepoConfig | WandbArtifactConfig + load_from: AssetPath split: str | None = None test_size: float | None = None seed: int | None = None - _validate_load_from_string = field_validator("load_from", mode="before")( - convert_string_to_repo_config - ) - @model_validator(mode="after") def validate_split_if_huggingface_repo(cls, config: "DatasetConfig"): """ @@ -26,9 +21,7 @@ def validate_split_if_huggingface_repo(cls, config: "DatasetConfig"): This makes it such that the `load_dataset` function returns the type `Dataset` instead of `DatasetDict`, which makes some of the downstream logic easier. """ - load_from = config.load_from - split = config.split - if split is None and isinstance(load_from, HuggingFaceRepoConfig): + if config.split is None and isinstance(config.load_from, HuggingFaceRepoID): raise ValueError( "A `split` must be specified when loading a dataset directly from HuggingFace." ) diff --git a/src/lm_buddy/integrations/huggingface/model_config.py b/src/lm_buddy/integrations/huggingface/model_config.py index 663d70de..41c8cc5d 100644 --- a/src/lm_buddy/integrations/huggingface/model_config.py +++ b/src/lm_buddy/integrations/huggingface/model_config.py @@ -1,7 +1,4 @@ -from pydantic import field_validator - -from lm_buddy.integrations.huggingface import HuggingFaceRepoConfig, convert_string_to_repo_config -from lm_buddy.integrations.wandb import WandbArtifactConfig +from lm_buddy.paths import AssetPath from lm_buddy.types import BaseLMBuddyConfig, SerializableTorchDtype @@ -11,10 +8,6 @@ class AutoModelConfig(BaseLMBuddyConfig): The model to load can either be a HuggingFace repo or an artifact reference on W&B. """ - load_from: HuggingFaceRepoConfig | WandbArtifactConfig + load_from: AssetPath trust_remote_code: bool = False torch_dtype: SerializableTorchDtype | None = None - - _validate_load_from_string = field_validator("load_from", mode="before")( - convert_string_to_repo_config - ) diff --git a/src/lm_buddy/integrations/huggingface/repo_config.py b/src/lm_buddy/integrations/huggingface/repo_config.py deleted file mode 100644 index 81549c70..00000000 --- a/src/lm_buddy/integrations/huggingface/repo_config.py +++ /dev/null @@ -1,42 +0,0 @@ -from typing import Any - -from huggingface_hub.utils import HFValidationError, validate_repo_id -from pydantic import field_validator - -from lm_buddy.types import BaseLMBuddyConfig - - -def convert_string_to_repo_config(x: Any): - if isinstance(x, str): - return HuggingFaceRepoConfig(repo_id=x) - return x - - -def is_valid_huggingface_repo_id(s: str): - """ - Simple test to check if an HF model is valid using HuggingFace's tools. - Sadly, theirs throws an exception and has no return. - - Args: - s: string to test. - """ - try: - validate_repo_id(s) - return True - except HFValidationError: - return False - - -class HuggingFaceRepoConfig(BaseLMBuddyConfig): - """Configuration for a HuggingFace Hub repository.""" - - __match_args__ = ("repo_id", "revision") - - repo_id: str - revision: str | None = None - - @field_validator("repo_id", mode="after") - def validate_repo_id(cls, x: str): - if not is_valid_huggingface_repo_id(x): - raise ValueError(f"{x} is not a valid HuggingFace repo ID.") - return x diff --git a/src/lm_buddy/integrations/huggingface/tokenizer_config.py b/src/lm_buddy/integrations/huggingface/tokenizer_config.py index 157f44f5..8fa7d975 100644 --- a/src/lm_buddy/integrations/huggingface/tokenizer_config.py +++ b/src/lm_buddy/integrations/huggingface/tokenizer_config.py @@ -1,17 +1,10 @@ -from pydantic import field_validator - -from lm_buddy.integrations.huggingface import HuggingFaceRepoConfig, convert_string_to_repo_config -from lm_buddy.integrations.wandb import WandbArtifactConfig +from lm_buddy.paths import AssetPath from lm_buddy.types import BaseLMBuddyConfig class AutoTokenizerConfig(BaseLMBuddyConfig): """Settings passed to a HuggingFace AutoTokenizer instantiation.""" - load_from: HuggingFaceRepoConfig | WandbArtifactConfig + load_from: AssetPath trust_remote_code: bool | None = None use_fast: bool | None = None - - _validate_load_from_string = field_validator("load_from", mode="before")( - convert_string_to_repo_config - ) diff --git a/src/lm_buddy/integrations/vllm.py b/src/lm_buddy/integrations/vllm.py index af99ccf6..74621ebc 100644 --- a/src/lm_buddy/integrations/vllm.py +++ b/src/lm_buddy/integrations/vllm.py @@ -1,4 +1,4 @@ -from lm_buddy.integrations.huggingface import HuggingFaceAssetPath +from lm_buddy.paths import AssetPath from lm_buddy.types import BaseLMBuddyConfig @@ -16,7 +16,7 @@ class InferenceServerConfig(BaseLMBuddyConfig): """ base_url: str - engine: str | HuggingFaceAssetPath | None = None + engine: str | AssetPath | None = None class VLLMCompletionsConfig(BaseLMBuddyConfig): diff --git a/src/lm_buddy/integrations/wandb/artifact_config.py b/src/lm_buddy/integrations/wandb/artifact_config.py index 50106330..c5dad098 100644 --- a/src/lm_buddy/integrations/wandb/artifact_config.py +++ b/src/lm_buddy/integrations/wandb/artifact_config.py @@ -8,8 +8,8 @@ class WandbArtifactConfig(BaseLMBuddyConfig): name: str project: str - version: str = "latest" entity: str | None = None + version: str = "latest" @classmethod def from_wandb_path(cls, path: str) -> "WandbArtifactConfig": diff --git a/src/lm_buddy/integrations/wandb/artifact_utils.py b/src/lm_buddy/integrations/wandb/artifact_utils.py index 995ba869..0def0eb1 100644 --- a/src/lm_buddy/integrations/wandb/artifact_utils.py +++ b/src/lm_buddy/integrations/wandb/artifact_utils.py @@ -1,8 +1,8 @@ from enum import Enum from pathlib import Path -from typing import Any from urllib.parse import ParseResult, urlparse +import pandas as pd import wandb @@ -91,57 +91,21 @@ def build_directory_artifact( def build_table_artifact( artifact_name: str, artifact_type: ArtifactType, - columns: list[str], - tables: dict[str, list[list[Any]]], + tables: dict[str, pd.DataFrame], ) -> wandb.Artifact: """Build an artifact containing one or more table entries. Args: artifact_name (str): Name of the artifact. artifact_type (ArtifactType): Type of artifact. - columns (list[str]): Column names for the tables. - tables (dict[str, list[list[Any]]]): Mapping from table name to table rows. + tables (dict[str, pd.DataFrame]): Mapping from table name to table data + in the form of a `pd.DataFrame` object. Returns: wandb.Artifact: The artifact containing the table(s). """ artifact = wandb.Artifact(artifact_name, type=artifact_type) for table_name, table_data in tables.items(): - table = wandb.Table(data=table_data, columns=columns) + table = wandb.Table(data=table_data) artifact.add(table, name=table_name) return artifact - - -def build_file_artifact( - artifact_name: str, - artifact_type: ArtifactType, - file_path: str | Path, - *, - reference: bool = False, - entry_name: str | None = None, -) -> wandb.Artifact: - """Build an artifact containing a single file - - Args: - artifact_name (str): Name of the artifact - artifact_type (ArtifactType): Type of artifact - file_path (str | Path): The full path (including filename) of the file - - Keyword Args: - reference (bool): Only reference the file, do not copy contents. Defaults to False. - entry_name (str | None): Name for the file within the artifact. If None, defaults - to the original filename. - - Returns: - wandb.Artifact: The generated artifact. - """ - artifact = wandb.Artifact(name=artifact_name, type=artifact_type) - - if reference: - artifact.add_reference( - uri=f"{ArtifactURIScheme.FILE}://{file_path}", - name=entry_name, - ) - else: - artifact.add_file(str(file_path), name=entry_name) - return artifact diff --git a/src/lm_buddy/jobs/__init__.py b/src/lm_buddy/jobs/__init__.py index 41e57911..e69de29b 100644 --- a/src/lm_buddy/jobs/__init__.py +++ b/src/lm_buddy/jobs/__init__.py @@ -1,38 +0,0 @@ -from lm_buddy.integrations.wandb import ArtifactLoader, WandbArtifactLoader -from lm_buddy.jobs._entrypoints import ( - run_finetuning, - run_lm_harness, - run_prometheus, - run_simple, -) -from lm_buddy.jobs.configs import ( - FinetuningJobConfig, - LMBuddyJobConfig, - LMHarnessJobConfig, - PrometheusJobConfig, - SimpleJobConfig, -) - - -def run_job( - config: LMBuddyJobConfig, - artifact_loader: ArtifactLoader = WandbArtifactLoader(), -) -> None: - """Run an LM Buddy job from the configuration. - - Args: - config (LMBuddyJobConfig): Configuration defining the job to run. - artifact_loader (ArtifactLoader): Implementation of the artifact loader protocol. - Defaults to WandbArtifactLoader(). - """ - match config: - case SimpleJobConfig() as simple_config: - run_simple(simple_config) - case FinetuningJobConfig() as finetuning_config: - run_finetuning(finetuning_config, artifact_loader) - case LMHarnessJobConfig() as lm_harness_config: - run_lm_harness(lm_harness_config, artifact_loader) - case PrometheusJobConfig() as prometheus_config: - run_prometheus(prometheus_config, artifact_loader) - case _: - raise ValueError(f"Received invalid job configuration: {config}") diff --git a/src/lm_buddy/jobs/_entrypoints/__init__.py b/src/lm_buddy/jobs/_entrypoints/__init__.py index 26de4304..a5f3145a 100644 --- a/src/lm_buddy/jobs/_entrypoints/__init__.py +++ b/src/lm_buddy/jobs/_entrypoints/__init__.py @@ -1,6 +1,5 @@ from lm_buddy.jobs._entrypoints.finetuning import run_finetuning from lm_buddy.jobs._entrypoints.lm_harness import run_lm_harness from lm_buddy.jobs._entrypoints.prometheus import run_prometheus -from lm_buddy.jobs._entrypoints.simple import run_simple -__all__ = ["run_finetuning", "run_lm_harness", "run_prometheus", "run_simple"] +__all__ = ["run_finetuning", "run_lm_harness", "run_prometheus"] diff --git a/src/lm_buddy/jobs/_entrypoints/finetuning.py b/src/lm_buddy/jobs/_entrypoints/finetuning.py index 31092410..39d93438 100644 --- a/src/lm_buddy/jobs/_entrypoints/finetuning.py +++ b/src/lm_buddy/jobs/_entrypoints/finetuning.py @@ -1,3 +1,4 @@ +from pathlib import Path from typing import Any import ray @@ -12,12 +13,13 @@ from lm_buddy.integrations.wandb import ( ArtifactLoader, ArtifactType, + WandbArtifactConfig, WandbResumeMode, build_directory_artifact, default_artifact_name, wandb_init_from_config, ) -from lm_buddy.jobs.common import LMBuddyJobType +from lm_buddy.jobs.common import FinetuningResult, LMBuddyJobType from lm_buddy.jobs.configs import FinetuningJobConfig @@ -62,7 +64,10 @@ def load_and_train(config: FinetuningJobConfig, artifact_loader: ArtifactLoader) trainer.train() -def run_finetuning(config: FinetuningJobConfig, artifact_loader: ArtifactLoader): +def run_finetuning( + config: FinetuningJobConfig, + artifact_loader: ArtifactLoader, +) -> FinetuningResult: # Place the artifact loader in Ray object store artifact_loader_ref = ray.put(artifact_loader) @@ -101,14 +106,31 @@ def training_function(config_data: dict[str, Any]): print(f"Training result: {result}") # Register a model artifact if tracking is enabled and Ray saved a checkpoint - if config.tracking and result.checkpoint: - # Must resume from the just-completed training run - with wandb_init_from_config(config.tracking, resume=WandbResumeMode.MUST) as run: - model_artifact = build_directory_artifact( - artifact_name=default_artifact_name(run.name, ArtifactType.MODEL), - artifact_type=ArtifactType.MODEL, - dir_path=f"{result.checkpoint.path}/{RayTrainReportCallback.CHECKPOINT_NAME}", - reference=True, - ) - print("Logging artifact for model checkpoint...") - artifact_loader.log_artifact(model_artifact) + ckpt_path, artifact_config = None, None + if result.checkpoint: + ckpt_path = Path(f"{result.checkpoint.path}/{RayTrainReportCallback.CHECKPOINT_NAME}") + if config.tracking: + # Must resume from the just-completed training run + with wandb_init_from_config(config.tracking, resume=WandbResumeMode.MUST) as run: + model_artifact = build_directory_artifact( + artifact_name=default_artifact_name(run.name, ArtifactType.MODEL), + artifact_type=ArtifactType.MODEL, + dir_path=ckpt_path, + reference=True, + ) + print("Logging artifact for model checkpoint...") + artifact_loader.log_artifact(model_artifact) + # Create an artifact config referencing the new artifact + artifact_config = WandbArtifactConfig( + name=model_artifact.name, + project=run.project, + entity=run.entity, + ) + + # Return finetuning result object + return FinetuningResult( + checkpoint_path=ckpt_path, + checkpoint_artifact=artifact_config, + metrics=result.metrics or {}, + is_adapter=config.adapter is not None, + ) diff --git a/src/lm_buddy/jobs/_entrypoints/lm_harness.py b/src/lm_buddy/jobs/_entrypoints/lm_harness.py index d5af1ace..87034284 100644 --- a/src/lm_buddy/jobs/_entrypoints/lm_harness.py +++ b/src/lm_buddy/jobs/_entrypoints/lm_harness.py @@ -1,6 +1,7 @@ from typing import Any import lm_eval +import pandas as pd import torch from lm_eval.models.huggingface import HFLM from lm_eval.models.openai_completions import OpenaiCompletionsLM @@ -8,36 +9,37 @@ from lm_buddy.integrations.huggingface import ( AutoModelConfig, HuggingFaceAssetLoader, - HuggingFaceAssetPath, resolve_peft_and_pretrained, ) from lm_buddy.integrations.wandb import ( ArtifactLoader, ArtifactType, + WandbArtifactConfig, WandbResumeMode, build_table_artifact, default_artifact_name, wandb_init_from_config, ) -from lm_buddy.jobs.common import LMBuddyJobType +from lm_buddy.jobs.common import EvaluationResult, LMBuddyJobType from lm_buddy.jobs.configs import LMHarnessJobConfig, LocalChatCompletionsConfig +from lm_buddy.paths import AssetPath -def get_numeric_metrics( +def get_per_task_dataframes( results: dict[str, dict[str, Any]], -) -> dict[str, list[tuple[str, float]]]: - """Filter non-numeric values from the evaluation results. +) -> dict[str, pd.DataFrame]: + """Create a `pd.DataFrame` of numeric metrics for each evaluation task. This is necessary because artifact tables must have a single datatype for each column. lm-harness returns mostly numeric values, but there are also some misc string-valued metrics. Filtering down to only numeric values allows us to produce a valid table artifact. """ - numeric_results = {} - for key, data in results.items(): + task_dataframes = {} + for task_name, data in results.items(): numeric_rows = [(k, v) for k, v in data.items() if isinstance(v, int | float)] - numeric_results[key] = numeric_rows - return numeric_results + task_dataframes[task_name] = pd.DataFrame(data=numeric_rows, columns=["metric", "value"]) + return task_dataframes def load_harness_model( @@ -48,7 +50,7 @@ def load_harness_model( hf_loader = HuggingFaceAssetLoader(artifact_loader) match config.model: case AutoModelConfig() as model_config: - model_path, revision = hf_loader.resolve_asset_path(model_config.load_from) + model_path = hf_loader.resolve_asset_path(model_config.load_from) model_path, peft_path = resolve_peft_and_pretrained(model_path) quantization_kwargs: dict[str, Any] = ( config.quantization.model_dump() if config.quantization else {} @@ -58,7 +60,6 @@ def load_harness_model( pretrained=model_path, tokenizer=model_path, peft=peft_path, - revision=revision if revision else "main", device="cuda" if torch.cuda.device_count() > 0 else "cpu", trust_remote_code=config.model.trust_remote_code, dtype=config.model.torch_dtype if config.model.torch_dtype else "auto", @@ -67,8 +68,8 @@ def load_harness_model( case LocalChatCompletionsConfig() as local_config: model = local_config.inference.engine - if isinstance(model, HuggingFaceAssetPath): - model, _ = hf_loader.resolve_asset_path(model) + if isinstance(model, AssetPath): + model = hf_loader.resolve_asset_path(model) # If tokenizer is not provided, it is set to the value of model internally return OpenaiCompletionsLM( model=model, @@ -82,43 +83,57 @@ def load_harness_model( raise ValueError(f"Unexpected model config type: {type(config.model)}") -def load_and_evaluate( +def run_eval( config: LMHarnessJobConfig, artifact_loader: ArtifactLoader, ) -> dict[str, list[tuple[str, float]]]: - llm = load_harness_model(config, artifact_loader) eval_results = lm_eval.simple_evaluate( model=llm, - tasks=config.evaluator.tasks, - batch_size=config.evaluator.batch_size, - num_fewshot=config.evaluator.num_fewshot, - limit=config.evaluator.limit, + tasks=config.evaluation.tasks, + batch_size=config.evaluation.batch_size, + num_fewshot=config.evaluation.num_fewshot, + limit=config.evaluation.limit, log_samples=False, ) - eval_results = get_numeric_metrics(eval_results["results"]) print(f"Obtained evaluation results: {eval_results}") - return eval_results + return get_per_task_dataframes(eval_results["results"]) -def run_lm_harness(config: LMHarnessJobConfig, artifact_loader: ArtifactLoader): - print(f"Received job configuration:\n {config.model_dump_json(indent=2)}") +def run_lm_harness( + config: LMHarnessJobConfig, + artifact_loader: ArtifactLoader, +) -> EvaluationResult: + print(f"Running lm-harness evaluation with configuration:\n {config.model_dump_json(indent=2)}") if config.tracking is not None: with wandb_init_from_config( config.tracking, - parameters=config.evaluator, # Log eval settings in W&B run + parameters=config.evaluation, # Log eval settings in W&B run resume=WandbResumeMode.ALLOW, job_type=LMBuddyJobType.EVALUATION, ) as run: - eval_results = load_and_evaluate(config, artifact_loader) - eval_artifact = build_table_artifact( + eval_tables = run_eval(config, artifact_loader) + table_artifact = build_table_artifact( artifact_name=default_artifact_name(run.name, ArtifactType.EVALUATION), artifact_type=ArtifactType.EVALUATION, - columns=["metric", "value"], - tables=eval_results, + tables=eval_tables, ) print("Logging artifact for evaluation results...") - artifact_loader.log_artifact(eval_artifact) + artifact_loader.log_artifact(table_artifact) + # Create an artifact config to reference the new table artifact + table_artifact_config = WandbArtifactConfig( + name=table_artifact.name, + project=run.project, + entity=run.entity, + ) else: - load_and_evaluate(config, artifact_loader) + eval_tables = run_eval(config, artifact_loader) + table_artifact_config = None + + return EvaluationResult( + tables=eval_tables, + table_artifact=table_artifact_config, + dataset_path=None, + dataset_artifact=None, + ) diff --git a/src/lm_buddy/jobs/_entrypoints/prometheus.py b/src/lm_buddy/jobs/_entrypoints/prometheus.py index 75ca71ba..8146f4ed 100644 --- a/src/lm_buddy/jobs/_entrypoints/prometheus.py +++ b/src/lm_buddy/jobs/_entrypoints/prometheus.py @@ -13,15 +13,16 @@ from openai import Completion, OpenAI, OpenAIError from tqdm import tqdm -from lm_buddy.integrations.huggingface import HuggingFaceAssetLoader -from lm_buddy.integrations.huggingface.tokenizer_config import AutoTokenizerConfig +from lm_buddy.integrations.huggingface import AutoTokenizerConfig, HuggingFaceAssetLoader from lm_buddy.integrations.wandb import ( ArtifactLoader, ArtifactType, + WandbArtifactConfig, build_directory_artifact, + default_artifact_name, wandb_init_from_config, ) -from lm_buddy.jobs.common import LMBuddyJobType +from lm_buddy.jobs.common import EvaluationResult, LMBuddyJobType from lm_buddy.jobs.configs import PrometheusJobConfig @@ -107,7 +108,7 @@ def run_eval( config: PrometheusJobConfig, artifact_loader: ArtifactLoader, client: OpenAI, -) -> str: +) -> Path: # load dataset from W&B artifact hf_loader = HuggingFaceAssetLoader(artifact_loader) data = hf_loader.load_dataset(config.dataset) @@ -138,7 +139,7 @@ def run_eval( result["prometheus_output"] = [] result["prometheus_score"] = [] - for idx in range(config.evaluation.num_answers): + for _ in range(config.evaluation.num_answers): (feedback, score) = get_response_with_retries( config, client, prompt, config.evaluation.max_retries ) @@ -149,32 +150,50 @@ def run_eval( file.write(json.dumps(result) + "\n") # convert plain json dataset in HF format - output_hf_name = str(Path(config.evaluation.output_folder) / "hf" / tracking_name) + output_dataset_path = Path(config.evaluation.output_folder) / "hf" / tracking_name ds = load_dataset("json", data_files=str(output_fname), split="train") - ds.save_to_disk(output_hf_name) + ds.save_to_disk(output_dataset_path) - return str(output_hf_name) + return output_dataset_path -def run_prometheus(config: PrometheusJobConfig, artifact_loader: ArtifactLoader): - # instantiate OpenAI client to speak with the vLLM endpoint +def run_prometheus( + config: PrometheusJobConfig, + artifact_loader: ArtifactLoader, +) -> EvaluationResult: + # Instantiate OpenAI client to speak with the vLLM endpoint client = OpenAI(base_url=config.prometheus.inference.base_url) - # Register a dataset file artifact if tracking is enabled + # Run eval and store output in local filename if config.tracking: - with wandb_init_from_config(config.tracking, job_type=LMBuddyJobType.EVALUATION): - # run eval and store output in local filename - output_dataset_name = run_eval(config, artifact_loader, client) - - # store HF dataset as a directory artifact - artifact = build_directory_artifact( - dir_path=output_dataset_name, - artifact_name=config.tracking.name, + with wandb_init_from_config(config.tracking, job_type=LMBuddyJobType.EVALUATION) as run: + output_dataset_path = run_eval(config, artifact_loader, client) + + # Create a directory artifact for the HF dataset + dataset_artifact = build_directory_artifact( + dir_path=output_dataset_path, + artifact_name=default_artifact_name(run.name, artifact_type=ArtifactType.DATASET), artifact_type=ArtifactType.DATASET, reference=False, ) - print("Logging artifact for evaluation results...") - artifact_loader.log_artifact(artifact) + + print("Logging artifact for evaluation dataset...") + artifact_loader.log_artifact(dataset_artifact) + + # Create a config referencing the new artifact + dataset_artifact_config = WandbArtifactConfig( + name=dataset_artifact.name, + project=run.project, + entity=run.entity, + ) else: - output_dataset_name = run_eval(config, artifact_loader, client) - print(f"Evaluation results stored in {output_dataset_name}") + output_dataset_path = run_eval(config, artifact_loader, client) + dataset_artifact_config = None + + print(f"Evaluation dataset stored at {output_dataset_path}") + return EvaluationResult( + tables={}, + table_artifact=None, + dataset_artifact=dataset_artifact_config, + dataset_path=output_dataset_path, + ) diff --git a/src/lm_buddy/jobs/_entrypoints/simple.py b/src/lm_buddy/jobs/_entrypoints/simple.py deleted file mode 100644 index 13cd0456..00000000 --- a/src/lm_buddy/jobs/_entrypoints/simple.py +++ /dev/null @@ -1,17 +0,0 @@ -import ray - -from lm_buddy.jobs.configs import SimpleJobConfig - - -@ray.remote -def get_magic_number(config: SimpleJobConfig) -> int: - return config.magic_number - - -def run_simple(config: SimpleJobConfig): - """A simple entrypoint to demonstrate the Ray interface.""" - # Connect to the Ray cluster (if not already running) - ray.init(ignore_reinit_error=True) - # Run dummy remote task - magic_number = ray.get(get_magic_number.remote(config)) - print(f"The magic number is {magic_number}") diff --git a/src/lm_buddy/jobs/common.py b/src/lm_buddy/jobs/common.py index cc170892..c25130ca 100644 --- a/src/lm_buddy/jobs/common.py +++ b/src/lm_buddy/jobs/common.py @@ -1,4 +1,11 @@ +from dataclasses import dataclass from enum import Enum +from pathlib import Path +from typing import Any + +import pandas as pd + +from lm_buddy.integrations.wandb import WandbArtifactConfig class LMBuddyJobType(str, Enum): @@ -7,3 +14,23 @@ class LMBuddyJobType(str, Enum): PREPROCESSING = "preprocessing" FINETUNING = "finetuning" EVALUATION = "evaluation" + + +@dataclass +class FinetuningResult: + """Result from a finetuning task.""" + + checkpoint_path: Path | None + checkpoint_artifact: WandbArtifactConfig | None + metrics: dict[str, Any] + is_adapter: bool + + +@dataclass +class EvaluationResult: + """Result from an evaluation task, containing aggregate metrics and artifacts.""" + + tables: dict[str, pd.DataFrame] + table_artifact: WandbArtifactConfig | None + dataset_artifact: WandbArtifactConfig | None + dataset_path: Path | None diff --git a/src/lm_buddy/jobs/configs/__init__.py b/src/lm_buddy/jobs/configs/__init__.py index e7f71236..d4701cdd 100644 --- a/src/lm_buddy/jobs/configs/__init__.py +++ b/src/lm_buddy/jobs/configs/__init__.py @@ -1,22 +1,22 @@ from lm_buddy.jobs.configs.base import LMBuddyJobConfig from lm_buddy.jobs.configs.finetuning import FinetuningJobConfig, FinetuningRayConfig from lm_buddy.jobs.configs.lm_harness import ( - LMHarnessEvaluatorConfig, + LMHarnessEvaluationConfig, LMHarnessJobConfig, LocalChatCompletionsConfig, ) -from lm_buddy.jobs.configs.prometheus import PrometheusEvaluationTaskConfig, PrometheusJobConfig -from lm_buddy.jobs.configs.simple import SimpleJobConfig +from lm_buddy.jobs.configs.prometheus import PrometheusEvaluationConfig, PrometheusJobConfig + +EvaluationJobConfig = LMHarnessJobConfig | PrometheusJobConfig __all__ = [ "LMBuddyJobConfig", - "SimpleJobConfig", "FinetuningJobConfig", "FinetuningRayConfig", - "LMHarnessEvaluatorConfig", + "LMHarnessEvaluationConfig", "LMHarnessJobConfig", "LocalChatCompletionsConfig", - "PrometheusEvaluationTaskConfig", + "PrometheusEvaluationConfig", "PrometheusJobConfig", - "SimpleJobConfig", + "EvaluationJobConfig", ] diff --git a/src/lm_buddy/jobs/configs/lm_harness.py b/src/lm_buddy/jobs/configs/lm_harness.py index c1710626..42e8b227 100644 --- a/src/lm_buddy/jobs/configs/lm_harness.py +++ b/src/lm_buddy/jobs/configs/lm_harness.py @@ -35,7 +35,7 @@ def validate_inference_engine(cls, config: "LocalChatCompletionsConfig"): return config -class LMHarnessEvaluatorConfig(BaseLMBuddyConfig): +class LMHarnessEvaluationConfig(BaseLMBuddyConfig): """Misc settings provided to an lm-harness evaluation job.""" tasks: conlist(str, min_length=1) @@ -48,6 +48,6 @@ class LMHarnessJobConfig(LMBuddyJobConfig): """Configuration to run an lm-evaluation-harness evaluation job.""" model: AutoModelConfig | LocalChatCompletionsConfig - evaluator: LMHarnessEvaluatorConfig + evaluation: LMHarnessEvaluationConfig quantization: QuantizationConfig | None = None tracking: WandbRunConfig | None = None diff --git a/src/lm_buddy/jobs/configs/prometheus.py b/src/lm_buddy/jobs/configs/prometheus.py index db428a97..2500eb5a 100644 --- a/src/lm_buddy/jobs/configs/prometheus.py +++ b/src/lm_buddy/jobs/configs/prometheus.py @@ -7,7 +7,7 @@ from lm_buddy.types import BaseLMBuddyConfig -class PrometheusEvaluationTaskConfig(BaseLMBuddyConfig): +class PrometheusEvaluationConfig(BaseLMBuddyConfig): """Parameters specific to Prometheus evaluation.""" num_answers: int = 3 @@ -22,17 +22,16 @@ class PrometheusEvaluationTaskConfig(BaseLMBuddyConfig): class PrometheusJobConfig(LMBuddyJobConfig): - """Configuration to run a prometheus job.""" + """Configuration for a Prometheus judge evaluation task.""" + prometheus: VLLMCompletionsConfig = Field( + description="Externally hosted Prometheus judge model." + ) dataset: TextDatasetConfig = Field( description="Dataset of text completions to evaluate using the Prometheus judge model." ) - - # vLLM endpoint configuration - prometheus: VLLMCompletionsConfig - - # evaluation task configuration - evaluation: PrometheusEvaluationTaskConfig | None = None - - # wandb experiment tracking details + evaluation: PrometheusEvaluationConfig = Field( + default_factory=PrometheusEvaluationConfig, + description="Settings for the Prometheus evaluation.", + ) tracking: WandbRunConfig | None = None diff --git a/src/lm_buddy/jobs/configs/simple.py b/src/lm_buddy/jobs/configs/simple.py deleted file mode 100644 index f0270923..00000000 --- a/src/lm_buddy/jobs/configs/simple.py +++ /dev/null @@ -1,7 +0,0 @@ -from lm_buddy.jobs.configs import LMBuddyJobConfig - - -class SimpleJobConfig(LMBuddyJobConfig): - """Simple job submission config.""" - - magic_number: int diff --git a/src/lm_buddy/paths.py b/src/lm_buddy/paths.py new file mode 100644 index 00000000..d2e58eb8 --- /dev/null +++ b/src/lm_buddy/paths.py @@ -0,0 +1,68 @@ +from pathlib import Path +from typing import Annotated, Any + +from huggingface_hub.utils import HFValidationError, validate_repo_id +from pydantic import BeforeValidator + +from lm_buddy.integrations.wandb import WandbArtifactConfig +from lm_buddy.types import BaseLMBuddyConfig + + +class FilePath(BaseLMBuddyConfig): + """Absolute path to an object on the local filesystem.""" + + __match_args__ = ("path",) + + path: Path + + +class HuggingFaceRepoID(BaseLMBuddyConfig): + """Repository ID on the HuggingFace Hub.""" + + __match_args__ = ("repo_id",) + + repo_id: str + + +def is_valid_huggingface_repo_id(s: str): + """ + Simple test to check if an HF model is valid using HuggingFace's tools. + Sadly, theirs throws an exception and has no return. + + Args: + s: string to test. + """ + try: + validate_repo_id(s) + return True + except HFValidationError: + return False + + +def validate_asset_path(x: Any) -> Any: + match x: + case Path() as p: + return FilePath(path=p) + case str() as s if Path(s).is_absolute(): + return FilePath(path=s) + case str() as s if is_valid_huggingface_repo_id(s): + return HuggingFaceRepoID(repo_id=s) + case str(): + raise ValueError(f"{x} is neither a valid HuggingFace repo ID or an absolute path.") + case _: + # Handled by downstream "after" validators + return x + + +AssetPath = Annotated[ + FilePath | HuggingFaceRepoID | WandbArtifactConfig, + BeforeValidator(lambda x: validate_asset_path(x)), +] +"""Union type representing the name/path for loading HuggingFace asset. + +The path is represented by either a `FileSystemPath`, a `HuggingFaceRepoID` +or a `WandbArtifactConfig` that can be resolved to a path via the artifact's manifest. + +This type is annotated with Pydantic validation logic to convert absolute path strings +to `FilesystemPath`s and other strings to `HuggingFaceRepoID`s. +""" diff --git a/tests/integration/test_finetuning.py b/tests/integration/test_finetuning.py index 01734410..6e2829b6 100644 --- a/tests/integration/test_finetuning.py +++ b/tests/integration/test_finetuning.py @@ -1,6 +1,6 @@ import pytest -import lm_buddy +from lm_buddy import LMBuddy from lm_buddy.integrations.huggingface import AutoModelConfig, TextDatasetConfig, TrainerConfig from lm_buddy.integrations.wandb import ArtifactType, WandbArtifactConfig, WandbRunConfig from lm_buddy.jobs.configs import FinetuningJobConfig, FinetuningRayConfig @@ -41,7 +41,8 @@ def test_finetuning_job(llm_model_artifact, text_dataset_artifact, job_config): artifact_loader.log_artifact(text_dataset_artifact) # Run test job - lm_buddy.run_job(job_config, artifact_loader=artifact_loader) + buddy = LMBuddy(artifact_loader) + buddy.finetune(job_config) # Two input artifacts, and one output model artifact produced artifacts = artifact_loader.get_artifacts() diff --git a/tests/integration/test_lm_harness.py b/tests/integration/test_lm_harness.py index f8b8dd32..bac69f3d 100644 --- a/tests/integration/test_lm_harness.py +++ b/tests/integration/test_lm_harness.py @@ -1,9 +1,9 @@ import pytest -import lm_buddy +from lm_buddy import LMBuddy from lm_buddy.integrations.huggingface import AutoModelConfig from lm_buddy.integrations.wandb import WandbArtifactConfig, WandbRunConfig -from lm_buddy.jobs.configs import LMHarnessEvaluatorConfig, LMHarnessJobConfig +from lm_buddy.jobs.configs import LMHarnessEvaluationConfig, LMHarnessJobConfig from tests.test_utils import FakeArtifactLoader @@ -14,10 +14,10 @@ def job_config(llm_model_artifact): ) tracking_config = WandbRunConfig(name="test-lm-harness-job") - evaluator_config = LMHarnessEvaluatorConfig(tasks=["hellaswag"], limit=5) + evaluation_config = LMHarnessEvaluationConfig(tasks=["hellaswag"], limit=5) return LMHarnessJobConfig( model=model_config, - evaluator=evaluator_config, + evaluation=evaluation_config, tracking=tracking_config, ) @@ -28,7 +28,8 @@ def test_lm_harness_job_with_tracking(llm_model_artifact, job_config): artifact_loader.log_artifact(llm_model_artifact) # Run test job - lm_buddy.run_job(job_config, artifact_loader=artifact_loader) + buddy = LMBuddy(artifact_loader) + buddy.evaluate(job_config) # One input artifact, and one eval artifact produced assert artifact_loader.num_artifacts() == 2 @@ -43,7 +44,8 @@ def test_lm_harness_job_no_tracking(llm_model_artifact, job_config): artifact_loader.log_artifact(llm_model_artifact) # Run test job - lm_buddy.run_job(job_config, artifact_loader=artifact_loader) + buddy = LMBuddy(artifact_loader) + buddy.evaluate(job_config) # One input artifact, no additional eval artifacts produced assert artifact_loader.num_artifacts() == 1 diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py deleted file mode 100644 index c1e7f7ca..00000000 --- a/tests/integration/test_simple.py +++ /dev/null @@ -1,10 +0,0 @@ -import ray - -from lm_buddy.jobs._entrypoints.simple import get_magic_number -from lm_buddy.jobs.configs import SimpleJobConfig - - -def test_simple_remote_task(): - config = SimpleJobConfig(magic_number=42) - result = ray.get(get_magic_number.remote(config)) - assert result == 42 diff --git a/tests/unit/integrations/huggingface/test_dataset_config.py b/tests/unit/integrations/huggingface/test_dataset_config.py index f696c09e..4b215208 100644 --- a/tests/unit/integrations/huggingface/test_dataset_config.py +++ b/tests/unit/integrations/huggingface/test_dataset_config.py @@ -1,10 +1,11 @@ import pytest from pydantic import ValidationError -from lm_buddy.integrations.huggingface import DatasetConfig, HuggingFaceRepoConfig +from lm_buddy.integrations.huggingface import DatasetConfig +from lm_buddy.paths import HuggingFaceRepoID def test_split_is_required(): with pytest.raises(ValidationError): - repo = HuggingFaceRepoConfig(repo_id="dataset/xyz") + repo = HuggingFaceRepoID(repo_id="dataset/xyz") DatasetConfig(load_from=repo, split=None) diff --git a/tests/unit/jobs/configs/test_finetuning_config.py b/tests/unit/jobs/configs/test_finetuning_config.py index cce77868..6aee111b 100644 --- a/tests/unit/jobs/configs/test_finetuning_config.py +++ b/tests/unit/jobs/configs/test_finetuning_config.py @@ -1,8 +1,9 @@ import pytest from pydantic import ValidationError -from lm_buddy.integrations.huggingface import HuggingFaceRepoConfig, TextDatasetConfig +from lm_buddy.integrations.huggingface import TextDatasetConfig from lm_buddy.jobs.configs import FinetuningJobConfig, FinetuningRayConfig +from lm_buddy.paths import HuggingFaceRepoID from tests.test_utils import copy_pydantic_json @@ -52,10 +53,10 @@ def test_load_example_config(examples_dir): def test_argument_validation(): - model_repo = HuggingFaceRepoConfig(repo_id="model_path") - tokenizer_repo = HuggingFaceRepoConfig(repo_id="dataset_path") + model_repo = HuggingFaceRepoID(repo_id="model_repo_id") + tokenizer_repo = HuggingFaceRepoID(repo_id="tokenizer_repo_id") dataset_config = TextDatasetConfig( - load_from=HuggingFaceRepoConfig(repo_id="dataset_path"), + load_from=HuggingFaceRepoID(repo_id="dataset_repo_id"), split="train", ) @@ -70,11 +71,11 @@ def test_argument_validation(): # Check passing invalid arguments is validated for each asset type with pytest.raises(ValidationError): - FinetuningJobConfig(model=12345, tokenizer="tokenizer_path", dataset="dataset_path") + FinetuningJobConfig(model=12345, tokenizer="tokenizer_repo_id", dataset="dataset_repo_id") with pytest.raises(ValidationError): - FinetuningJobConfig(model="model_path", tokenizer=12345, dataset="dataset_path") + FinetuningJobConfig(model="model_repo_id", tokenizer=12345, dataset="dataset_repo_id") with pytest.raises(ValidationError): - FinetuningJobConfig(model="model_path", tokenizer="tokenizer_path", dataset=12345) + FinetuningJobConfig(model="model_repo_id", tokenizer="tokenizer_repo_id", dataset=12345) # Check that tokenizer is set to model path when absent missing_tokenizer_config = FinetuningJobConfig(model=model_repo.repo_id, dataset=dataset_config) diff --git a/tests/unit/jobs/configs/test_job_config.py b/tests/unit/jobs/configs/test_job_config.py index 73c83dfe..ebf3bbea 100644 --- a/tests/unit/jobs/configs/test_job_config.py +++ b/tests/unit/jobs/configs/test_job_config.py @@ -1,9 +1,12 @@ -from lm_buddy.jobs.configs import SimpleJobConfig +from lm_buddy.jobs.configs import LMBuddyJobConfig def test_config_as_tempfile(): - config = SimpleJobConfig(magic_number=42) + class TestConfig(LMBuddyJobConfig): + magic_number: int + + config = TestConfig(magic_number=42) config_name = "my-job-config.yaml" with config.to_tempfile(name=config_name) as path: assert path.name == config_name - assert SimpleJobConfig.from_yaml_file(path) == config + assert TestConfig.from_yaml_file(path) == config diff --git a/tests/unit/jobs/configs/test_lm_harness_config.py b/tests/unit/jobs/configs/test_lm_harness_config.py index 16135e09..0c28e3c0 100644 --- a/tests/unit/jobs/configs/test_lm_harness_config.py +++ b/tests/unit/jobs/configs/test_lm_harness_config.py @@ -3,7 +3,7 @@ from lm_buddy.integrations.vllm import InferenceServerConfig from lm_buddy.jobs.configs import ( - LMHarnessEvaluatorConfig, + LMHarnessEvaluationConfig, LMHarnessJobConfig, LocalChatCompletionsConfig, ) @@ -21,8 +21,8 @@ def local_completions_config(inference_server_config): @pytest.fixture -def lm_harness_evaluator_config(): - return LMHarnessEvaluatorConfig( +def lm_harness_evaluation_config(): + return LMHarnessEvaluationConfig( tasks=["task1", "task2", "task3"], num_fewshot=5, ) @@ -35,19 +35,19 @@ def lm_harness_job_config( local_completions_config, quantization_config, wandb_run_config, - lm_harness_evaluator_config, + lm_harness_evaluation_config, ): if request.param == "model_config_with_artifact": return LMHarnessJobConfig( model=model_config_with_artifact, - evaluator=lm_harness_evaluator_config, + evaluation=lm_harness_evaluation_config, tracking=wandb_run_config, quantization=quantization_config, ) elif request.param == "local_completions_config": return LMHarnessJobConfig( model=local_completions_config, - evaluator=lm_harness_evaluator_config, + evaluation=lm_harness_evaluation_config, tracking=wandb_run_config, quantization=quantization_config, ) diff --git a/tests/unit/jobs/test_run_job.py b/tests/unit/jobs/test_run_job.py deleted file mode 100644 index 47d78a34..00000000 --- a/tests/unit/jobs/test_run_job.py +++ /dev/null @@ -1,10 +0,0 @@ -import pytest - -import lm_buddy -from lm_buddy.integrations.huggingface import AutoModelConfig - - -def test_invalid_config_error(): - not_a_job_config = AutoModelConfig(load_from="distilgpt2") - with pytest.raises(ValueError): - lm_buddy.run_job(not_a_job_config) diff --git a/tests/unit/test_paths.py b/tests/unit/test_paths.py new file mode 100644 index 00000000..c77789d8 --- /dev/null +++ b/tests/unit/test_paths.py @@ -0,0 +1,30 @@ +from pathlib import Path + +import pytest +from pydantic import TypeAdapter, ValidationError + +from lm_buddy.integrations.wandb import WandbArtifactConfig +from lm_buddy.paths import AssetPath, FilePath, HuggingFaceRepoID + + +def test_asset_path_validation(): + # Imbues the LoadableAssetPath type with Pydantic validation methods + adapter_cls = TypeAdapter(AssetPath) + + repo_string = adapter_cls.validate_python("repo_id") + assert isinstance(repo_string, HuggingFaceRepoID) + + path_string = adapter_cls.validate_python("/absolute/path") + assert isinstance(path_string, FilePath) + + path_object = adapter_cls.validate_python(Path("/absolute/path")) + assert isinstance(path_object, FilePath) + + artifact_config = WandbArtifactConfig(name="artifact", project="project") + artifact_config = adapter_cls.validate_python(artifact_config) + assert isinstance(artifact_config, WandbArtifactConfig) + + with pytest.raises(ValidationError): + adapter_cls.validate_python("bad...repo_id") + with pytest.raises(ValidationError): + adapter_cls.validate_python(120850120)