diff --git a/examples/configs/evaluation/lm_harness_hf_config.yaml b/examples/configs/evaluation/lm_harness_hf_config.yaml index 3b2d59f5..08e06f45 100644 --- a/examples/configs/evaluation/lm_harness_hf_config.yaml +++ b/examples/configs/evaluation/lm_harness_hf_config.yaml @@ -1,3 +1,5 @@ +name: "lm-buddy-lm-harness" + # Model to evaluate model: path: "hf://distilgpt2" @@ -15,6 +17,5 @@ quantization: # Tracking info for where to log the run results tracking: - name: "lm-buddy-lm-harness" project: "lm-buddy-examples" entity: "sample" diff --git a/examples/configs/evaluation/lm_harness_inference_server_config.yaml b/examples/configs/evaluation/lm_harness_inference_server_config.yaml index 4d2595b5..9e71e04a 100644 --- a/examples/configs/evaluation/lm_harness_inference_server_config.yaml +++ b/examples/configs/evaluation/lm_harness_inference_server_config.yaml @@ -1,3 +1,5 @@ +name: "lm-buddy-lm-harness-inference" + # Model to evaluate specified as a local-chat-completions inference server model: inference: @@ -16,6 +18,5 @@ evaluation: limit: 10 tracking: - name: "lm-buddy-lm-harness-inference" project: "lm-buddy-examples" entity: "sample" diff --git a/examples/configs/evaluation/prometheus_config.yaml b/examples/configs/evaluation/prometheus_config.yaml index 1fea3bf0..f47af127 100644 --- a/examples/configs/evaluation/prometheus_config.yaml +++ b/examples/configs/evaluation/prometheus_config.yaml @@ -1,3 +1,5 @@ +name: "lm-buddy-prometheus-job" + dataset: path: "wandb://sample-entity/lm-buddy-examples/wandb-file-artifact:latest" # field containing scoring instructions in the json file @@ -26,6 +28,5 @@ evaluation: enable_tqdm: True tracking: - name: "lm-buddy-prometheus" project: "lm-buddy-examples" entity: "sample" diff --git a/examples/configs/evaluation/ragas_config.yaml b/examples/configs/evaluation/ragas_config.yaml index 570ab025..a992a3d5 100644 --- a/examples/configs/evaluation/ragas_config.yaml +++ b/examples/configs/evaluation/ragas_config.yaml @@ -1,3 +1,5 @@ +name: "lm-buddy-ragas" + dataset: path: "wandb://sample-entity/lm-buddy-examples/wandb-file-artifact:latest" # field containing scoring instructions in the json file @@ -21,6 +23,5 @@ evaluation: embedding_model: "sentence-transformers/all-mpnet-base-v2" tracking: - name: "lm-buddy-ragas" project: "lm-buddy-examples" entity: "sample" diff --git a/examples/configs/finetuning/finetuning_config.yaml b/examples/configs/finetuning/finetuning_config.yaml index 9afd85d7..9ea9a321 100644 --- a/examples/configs/finetuning/finetuning_config.yaml +++ b/examples/configs/finetuning/finetuning_config.yaml @@ -1,3 +1,5 @@ +name: "lm-buddy-finetuning" + # Base model to load for finetuning model: path: "hf://distilgpt2" @@ -36,7 +38,6 @@ adapter: # Tracking info for where to log the run results tracking: - name: "lm-buddy-finetuning" project: "lm-buddy-examples" entity: "mozilla-ai" diff --git a/examples/notebooks/dataset_preprocessing.ipynb b/examples/notebooks/dataset_preprocessing.ipynb index 56be1992..b0e89855 100644 --- a/examples/notebooks/dataset_preprocessing.ipynb +++ b/examples/notebooks/dataset_preprocessing.ipynb @@ -85,7 +85,7 @@ "id": "5cea9f8f-7279-44ac-947c-1d79f6bf6ebc", "metadata": {}, "source": [ - "(3a) Log the dataset directory as an reference artifact using W&B directly" + "(3) Log the dataset directory as an reference artifact using W&B directly" ] }, { @@ -107,45 +107,6 @@ " artifact.add_reference(uri=f\"file://{dataset_save_path}\")\n", " wandb.log_artifact(artifact)" ] - }, - { - "cell_type": "markdown", - "id": "c5ab6772", - "metadata": {}, - "source": [ - "(3b) Log the dataset directory as an artifact using LM Buddy helper functions" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "8b09e47d-3ced-4eef-a89f-048754edc758", - "metadata": {}, - "outputs": [], - "source": [ - "from lm_buddy.integrations.wandb import (\n", - " ArtifactType,\n", - " WandbRunConfig,\n", - " build_directory_artifact,\n", - " wandb_init_from_config,\n", - ")\n", - "from lm_buddy.jobs.utils import LMBuddyJobType\n", - "\n", - "run_config = WandbRunConfig(\n", - " name=\"lm-buddy-preprocessing-example\",\n", - " project=\"lm-buddy-examples\",\n", - " entity=\"mozilla-ai\",\n", - ")\n", - "\n", - "with wandb_init_from_config(run_config, job_type=LMBuddyJobType.PREPROCESSING):\n", - " artifact = build_directory_artifact(\n", - " dir_path=dataset_save_path,\n", - " artifact_name=\"example-dataset-artfact-reference\",\n", - " artifact_type=ArtifactType.DATASET,\n", - " reference=True,\n", - " )\n", - " wandb.log_artifact(artifact)" - ] } ], "metadata": { diff --git a/pyproject.toml b/pyproject.toml index 5954e939..76a42cde 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "lm-buddy" -version = "0.7.0" +version = "0.8.0" authors = [ { name = "Sean Friedowitz", email = "sean@mozilla.ai" }, { name = "Aaron Gonzales", email = "aaron@mozilla.ai" }, diff --git a/src/lm_buddy/buddy.py b/src/lm_buddy/buddy.py index cc050030..956ca789 100644 --- a/src/lm_buddy/buddy.py +++ b/src/lm_buddy/buddy.py @@ -1,13 +1,17 @@ -from lm_buddy.integrations.wandb import ArtifactLoader, WandbArtifactLoader +import wandb + +from lm_buddy.integrations.wandb import WandbResumeMode from lm_buddy.jobs._entrypoints import run_finetuning, run_lm_harness, run_prometheus, run_ragas -from lm_buddy.jobs.common import EvaluationResult, FinetuningResult +from lm_buddy.jobs.common import EvaluationResult, FinetuningResult, LMBuddyJobType from lm_buddy.jobs.configs import ( EvaluationJobConfig, FinetuningJobConfig, + LMBuddyJobConfig, LMHarnessJobConfig, PrometheusJobConfig, RagasJobConfig, ) +from lm_buddy.paths import strip_path_prefix class LMBuddy: @@ -16,25 +20,53 @@ class LMBuddy: Simple wrapper around executable functions for tasks available in the library. """ - def __init__(self, artifact_loader: ArtifactLoader = WandbArtifactLoader()): - self._artifact_loader = artifact_loader + # TODO: Store some configuration (e.g., tracking info, name) globally on the buddy + def __init__(self): + pass + + def _generate_artifact_lineage( + self, + config: LMBuddyJobConfig, + results: list[wandb.Artifact], + job_type: LMBuddyJobType, + ) -> None: + """Link input artifacts and log output artifacts to a run. + + A no-op if no tracking config is available. + """ + if config.tracking is not None: + with wandb.init( + name=config.name, + job_type=job_type, + resume=WandbResumeMode.ALLOW, + **config.tracking.model_dump(), + ) as run: + for path in config.artifact_paths(): + artifact_name = strip_path_prefix(path) + run.use_artifact(artifact_name) + for artifact in results: + artifact = run.log_artifact(artifact) + artifact.wait() def finetune(self, config: FinetuningJobConfig) -> FinetuningResult: - """Run a supervised finetuning task with the provided configuration.""" - finetuning_result = run_finetuning(config, self._artifact_loader) - return finetuning_result + """Run a supervised finetuning job with the provided configuration.""" + result = run_finetuning(config) + self._generate_artifact_lineage(config, result.artifacts, LMBuddyJobType.FINETUNING) + return result def evaluate(self, config: EvaluationJobConfig) -> EvaluationResult: - """Run an evaluation task with the provided configuration. + """Run an evaluation job with the provided configuration. The underlying evaluation framework is determined by the configuration type. """ match config: case LMHarnessJobConfig() as lm_harness_config: - return run_lm_harness(lm_harness_config, self._artifact_loader) + result = run_lm_harness(lm_harness_config) case PrometheusJobConfig() as prometheus_config: - return run_prometheus(prometheus_config, self._artifact_loader) + result = run_prometheus(prometheus_config) case RagasJobConfig() as ragas_config: - return run_ragas(ragas_config, self._artifact_loader) + result = run_ragas(ragas_config) case _: raise ValueError(f"Invlid configuration for evaluation: {type(config)}") + self._generate_artifact_lineage(config, result.artifacts, LMBuddyJobType.EVALUATION) + return result diff --git a/src/lm_buddy/integrations/huggingface/asset_loader.py b/src/lm_buddy/integrations/huggingface/asset_loader.py index 354519f9..88b6dc11 100644 --- a/src/lm_buddy/integrations/huggingface/asset_loader.py +++ b/src/lm_buddy/integrations/huggingface/asset_loader.py @@ -19,45 +19,19 @@ DatasetConfig, QuantizationConfig, ) -from lm_buddy.integrations.wandb import ArtifactLoader, get_artifact_directory +from lm_buddy.integrations.wandb import get_artifact_directory, get_artifact_from_api from lm_buddy.paths import AssetPath, PathPrefix, strip_path_prefix -def resolve_peft_and_pretrained(path: str) -> tuple[str, str | None]: - """Helper method for determining if a path corresponds to a PEFT model. - - A PEFT model contains an `adapter_config.json` in its directory. - If this file can be loaded, we know the path is a for a PEFT model. - If not, we assume the provided path corresponds to a base HF model. - - Args: - path (str): Name/path to a HuggingFace directory - - Returns: - Tuple of (base model path, optional PEFT path) - """ - # We don't know if the checkpoint is adapter weights or merged model weights - # Try to load as an adapter and fall back to the checkpoint containing the full model - try: - peft_config = PeftConfig.from_pretrained(path) - return peft_config.base_model_name_or_path, path - except ValueError as e: - warnings.warn( - f"Unable to load model as adapter: {e}. " - "This is expected if the checkpoint does not contain adapter weights." - ) - return path, None - - class HuggingFaceAssetLoader: """Helper class for loading HuggingFace assets from LM Buddy configurations. This class depends on an `ArtifactLoader` in order to resolve actual paths from artifact references. - """ - def __init__(self, artifact_loader: ArtifactLoader): - self._artifact_loader = artifact_loader + TODO: We can probably move these to standalone functions now that ArtifactLoader is gone. + What if we add other deps (e.g, S3 client in the future?) + """ def resolve_asset_path(self, path: AssetPath) -> str: """Resolve an `AssetPath` to a loadable string path. @@ -69,11 +43,37 @@ def resolve_asset_path(self, path: AssetPath) -> str: if path.startswith((PathPrefix.FILE, PathPrefix.HUGGINGFACE)): return raw_path elif path.startswith(PathPrefix.WANDB): - artifact = self._artifact_loader.use_artifact(raw_path) + artifact = get_artifact_from_api(raw_path) return str(get_artifact_directory(artifact)) else: raise ValueError(f"Unable to resolve asset path from {path}.") + def resolve_peft_and_pretrained(self, path: AssetPath) -> tuple[str, str | None]: + """Helper method for determining if a path corresponds to a PEFT model. + + A PEFT model contains an `adapter_config.json` in its directory. + If this file can be loaded, we know the path is a for a PEFT model. + If not, we assume the provided path corresponds to a base HF model. + + Args: + path (AssetPath): Path for the asset with its `PathPrefix` present + + Returns: + Tuple of (base model path, optional PEFT path) + """ + # We don't know if the checkpoint is adapter weights or merged model weights + # Try to load as an adapter and fall back to the checkpoint containing the full model + resolved_path = self.resolve_asset_path(path) + try: + peft_config = PeftConfig.from_pretrained(resolved_path) + return peft_config.base_model_name_or_path, resolved_path + except ValueError as e: + warnings.warn( + f"Unable to load model as adapter: {e}. " + "This is expected if the checkpoint does not contain adapter weights." + ) + return resolved_path, None + def load_pretrained_config( self, config: AutoModelConfig, diff --git a/src/lm_buddy/integrations/wandb/__init__.py b/src/lm_buddy/integrations/wandb/__init__.py index 09a30c67..cbf899c8 100644 --- a/src/lm_buddy/integrations/wandb/__init__.py +++ b/src/lm_buddy/integrations/wandb/__init__.py @@ -1,4 +1,3 @@ -from lm_buddy.integrations.wandb.artifact_loader import * from lm_buddy.integrations.wandb.artifact_utils import * from lm_buddy.integrations.wandb.run_config import * from lm_buddy.integrations.wandb.run_utils import * diff --git a/src/lm_buddy/integrations/wandb/artifact_loader.py b/src/lm_buddy/integrations/wandb/artifact_loader.py deleted file mode 100644 index 02692961..00000000 --- a/src/lm_buddy/integrations/wandb/artifact_loader.py +++ /dev/null @@ -1,47 +0,0 @@ -from typing import Protocol - -import wandb - - -class ArtifactLoader(Protocol): - """Base interface for using/logging artifacts. - - Note: If/when we decide to support other tracking services (e.g., MLFlow, CometML), - this interface should be abstracted to handle the types for those respective services. - """ - - def use_artifact(self, artifact_path: str) -> wandb.Artifact: - """Load an artifact from its full W&B path. - - If a W&B run is active, the artifact is declared as an input to the run. - If not, the artifact is retrieved outside of the run. - """ - pass - - def log_artifact(self, artifact: wandb.Artifact) -> wandb.Artifact: - """Log an artifact, declaring it as an output of the currently active W&B run. - - Return the logged version of the artifact containing any additional metadata - generated by the tracking platform. - """ - pass - - -class WandbArtifactLoader: - """Weights & Biases implementation of the `ArtifactLoader` protocol. - - This class makes external calls to the W&B API and is not suitable for test environments. - """ - - def use_artifact(self, artifact_path: str) -> wandb.Artifact: - if wandb.run is not None: - # Retrieves the artifact and links it as an input to the run - return wandb.use_artifact(artifact_path) - else: - # Retrieves the artifact outside of the run - api = wandb.Api() - return api.artifact(artifact_path) - - def log_artifact(self, artifact: wandb.Artifact) -> wandb.Artifact: - artifact = wandb.log_artifact(artifact) - return artifact.wait() # Wait for artifact to finish committing w/ new metadata diff --git a/src/lm_buddy/integrations/wandb/artifact_utils.py b/src/lm_buddy/integrations/wandb/artifact_utils.py index f0a450ed..fec501a8 100644 --- a/src/lm_buddy/integrations/wandb/artifact_utils.py +++ b/src/lm_buddy/integrations/wandb/artifact_utils.py @@ -17,9 +17,19 @@ class ArtifactType(str, Enum): EVALUATION = "evaluation" -def default_artifact_name(name: str, artifact_type: ArtifactType) -> str: - """A default name for an artifact based on the run name and type.""" - return f"{name}-{artifact_type}" +def default_artifact_name(job_name: str, artifact_type: ArtifactType) -> str: + """A default name for an artifact based on the job name and type.""" + return f"{job_name}-{artifact_type}" + + +def get_artifact_from_api(artifact_name: str) -> wandb.Artifact: + """Retrieve an artifact by fully qualified name from the W&B API. + + This does not handle linking the artifact to an active run. + For that, use `run.use_artifact(artifact_name)`. + """ + api = wandb.Api() + return api.artifact(artifact_name) def get_artifact_directory( diff --git a/src/lm_buddy/integrations/wandb/run_config.py b/src/lm_buddy/integrations/wandb/run_config.py index ae6d2b67..5fff3773 100644 --- a/src/lm_buddy/integrations/wandb/run_config.py +++ b/src/lm_buddy/integrations/wandb/run_config.py @@ -11,7 +11,7 @@ class WandbRunConfig(BaseLMBuddyConfig): """Configuration required to log to a W&B run. - A W&B Run is uniquely identified by the combination of `entity/project/run_id`. + A W&B Run is uniquely identified by the combination of `//`. The W&B platform will auto-generate values for these variables if they are not provided when you initialize a run. @@ -21,12 +21,11 @@ class WandbRunConfig(BaseLMBuddyConfig): if it is not provided. """ - __match_args__ = ("run_id", "name", "project", "run_group", "entity") + __match_args__ = ("id", "project", "group", "entity") - run_id: str - name: str | None = None + id: str project: str | None = None - run_group: str | None = None + group: str | None = None entity: str | None = None @model_validator(mode="before") @@ -40,9 +39,9 @@ def warn_missing_api_key(cls, values): @model_validator(mode="before") def ensure_run_id(cls, values): - if values.get("run_id", None) is None: + if values.get("id", None) is None: # Generate an random 8-digit alphanumeric string, analogous to W&B platform - values["run_id"] = random_string(length=8) + values["id"] = random_string(length=8) return values @classmethod @@ -53,23 +52,21 @@ def from_run(cls, run: Run) -> "WandbRunConfig": """ # TODO: Can we get the run group from this when it exists? return cls( - name=run.name, project=run.project, entity=run.entity, - run_id=run.id, + id=run.id, ) def wandb_path(self) -> str: """String identifier for the asset on the W&B platform.""" - path = "/".join(x for x in [self.entity, self.project, self.run_id] if x is not None) + path = "/".join(x for x in [self.entity, self.project, self.id] if x is not None) return path def env_vars(self) -> dict[str, str]: env_vars = { - "WANDB_RUN_ID": self.run_id, - "WANDB_NAME": self.name, + "WANDB_RUN_ID": self.id, "WANDB_PROJECT": self.project, - "WANDB_RUN_GROUP": self.run_group, + "WANDB_RUN_GROUP": self.group, "WANDB_ENTITY": self.entity, "WANDB_API_KEY": os.environ.get("WANDB_API_KEY", None), } diff --git a/src/lm_buddy/integrations/wandb/run_utils.py b/src/lm_buddy/integrations/wandb/run_utils.py index 62a6f0a4..1f15bd0b 100644 --- a/src/lm_buddy/integrations/wandb/run_utils.py +++ b/src/lm_buddy/integrations/wandb/run_utils.py @@ -1,4 +1,3 @@ -import contextlib from enum import Enum from typing import Any @@ -6,7 +5,6 @@ from wandb.apis.public import Run as ApiRun from lm_buddy.integrations.wandb import WandbRunConfig -from lm_buddy.types import BaseLMBuddyConfig class WandbResumeMode(str, Enum): @@ -21,52 +19,20 @@ class WandbResumeMode(str, Enum): NEVER = "never" -@contextlib.contextmanager -def wandb_init_from_config( - config: WandbRunConfig, - *, - parameters: BaseLMBuddyConfig | None = None, - resume: WandbResumeMode | None = None, - job_type: str | None = None, -): - """Initialize a W&B run from the internal run configuration. - - This method can be entered as a context manager similar to `wandb.init` as follows: - - ``` - with wandb_init_from_config(run_config, resume=WandbResumeMode.MUST) as run: - # Use the initialized run here - ... - ``` - """ - init_kwargs = dict( - id=config.run_id, - name=config.name, - project=config.project, - entity=config.entity, - group=config.run_group, - config=parameters.model_dump(mode="json") if parameters else None, - job_type=job_type, - resume=resume, - ) - with wandb.init(**init_kwargs) as run: - yield run - - -def get_wandb_api_run(config: WandbRunConfig) -> ApiRun: +def get_run_from_api(config: WandbRunConfig) -> ApiRun: """Retrieve a run from the W&B API.""" api = wandb.Api() return api.run(config.wandb_path()) -def get_wandb_summary(config: WandbRunConfig) -> dict[str, Any]: +def get_run_summary(config: WandbRunConfig) -> dict[str, Any]: """Get the summary dictionary attached to a W&B run.""" - run = get_wandb_api_run(config) + run = get_run_from_api(config) return dict(run.summary) def update_wandb_summary(config: WandbRunConfig, metrics: dict[str, Any]) -> None: """Update a run's summary with the provided metrics.""" - run = get_wandb_api_run(config) + run = get_run_from_api(config) run.summary.update(metrics) run.update() diff --git a/src/lm_buddy/jobs/_entrypoints/finetuning.py b/src/lm_buddy/jobs/_entrypoints/finetuning.py index 4afe1d0f..47a744ed 100644 --- a/src/lm_buddy/jobs/_entrypoints/finetuning.py +++ b/src/lm_buddy/jobs/_entrypoints/finetuning.py @@ -1,7 +1,7 @@ from pathlib import Path from typing import Any -import ray +import wandb from ray import train from ray.train import CheckpointConfig, RunConfig, ScalingConfig from ray.train.huggingface.transformers import RayTrainReportCallback, prepare_trainer @@ -11,15 +11,12 @@ from lm_buddy.integrations.huggingface import HuggingFaceAssetLoader from lm_buddy.integrations.wandb import ( - ArtifactLoader, ArtifactType, WandbResumeMode, build_directory_artifact, default_artifact_name, - wandb_init_from_config, ) -from lm_buddy.jobs._entrypoints.utils import preprocess_text_dataset -from lm_buddy.jobs.common import FinetuningResult, LMBuddyJobType +from lm_buddy.jobs.common import FinetuningResult, LMBuddyJobType, preprocess_text_dataset from lm_buddy.jobs.configs import FinetuningJobConfig @@ -29,10 +26,9 @@ def is_tracking_enabled(config: FinetuningJobConfig): return config.tracking is not None and train.get_context().get_world_rank() == 0 -def load_and_train(config: FinetuningJobConfig, artifact_loader: ArtifactLoader): +def load_and_train(config: FinetuningJobConfig): # Load the HF assets from configurations - # Internally, artifact lineages are declared for the active training run - hf_loader = HuggingFaceAssetLoader(artifact_loader) + hf_loader = HuggingFaceAssetLoader() model = hf_loader.load_pretrained_model(config.model, config.quantization) tokenizer = hf_loader.load_pretrained_tokenizer(config.tokenizer) @@ -67,35 +63,28 @@ def load_and_train(config: FinetuningJobConfig, artifact_loader: ArtifactLoader) trainer.train() -def run_finetuning( - config: FinetuningJobConfig, - artifact_loader: ArtifactLoader, -) -> FinetuningResult: - # Place the artifact loader in Ray object store - artifact_loader_ref = ray.put(artifact_loader) - - # Define training function internally to capture the artifact loader ref as a closure - # Reference: https://docs.ray.io/en/latest/ray-core/objects.html#closure-capture-of-objects - def training_function(config_data: dict[str, Any]): - artifact_loader = ray.get(artifact_loader_ref) - config = FinetuningJobConfig(**config_data) - if is_tracking_enabled(config): - with wandb_init_from_config( - config.tracking, - resume=WandbResumeMode.NEVER, - job_type=LMBuddyJobType.FINETUNING, - ): - load_and_train(config, artifact_loader) - else: - load_and_train(config, artifact_loader) +def training_function(config_data: dict[str, Any]): + config = FinetuningJobConfig(**config_data) + if is_tracking_enabled(config): + with wandb.init( + name=config.name, + resume=WandbResumeMode.NEVER, + job_type=LMBuddyJobType.FINETUNING, + **config.tracking.model_dump(), + ): + load_and_train(config) + else: + load_and_train(config) + +def run_finetuning(config: FinetuningJobConfig) -> FinetuningResult: # Construct Ray train configurations from input config scaling_config = ScalingConfig( use_gpu=config.ray.use_gpu, num_workers=config.ray.num_workers, ) run_config = RunConfig( - name=config.tracking.name if config.tracking else None, + name=config.name, storage_path=config.ray.storage_path, checkpoint_config=CheckpointConfig(num_to_keep=1), ) @@ -108,27 +97,22 @@ def training_function(config_data: dict[str, Any]): result = trainer.fit() print(f"Training result: {result}") - # Register a model artifact if tracking is enabled and Ray saved a checkpoint - checkpoint_path, checkpoint_artifact = None, None + # Create a checkpoint artifact if tracking is enabled and Ray saved a checkpoint if result.checkpoint: checkpoint_path = Path(result.checkpoint.path) / RayTrainReportCallback.CHECKPOINT_NAME - if config.tracking: - # Must resume from the just-completed training run - with wandb_init_from_config(config.tracking, resume=WandbResumeMode.MUST) as run: - checkpoint_artifact = build_directory_artifact( - artifact_name=default_artifact_name(run.name, ArtifactType.MODEL), - artifact_type=ArtifactType.MODEL, - dir_path=checkpoint_path, - reference=True, - ) - print("Logging artifact for finetuning checkpoint...") - checkpoint_artifact = artifact_loader.log_artifact(checkpoint_artifact) + checkpoint_artifact = build_directory_artifact( + artifact_name=default_artifact_name(config.name, ArtifactType.MODEL), + artifact_type=ArtifactType.MODEL, + dir_path=checkpoint_path, + reference=True, + ) + else: + checkpoint_path, checkpoint_artifact = None, None # Return finetuning result object - output_artifacts = [checkpoint_artifact] if checkpoint_artifact else [] return FinetuningResult( - artifacts=output_artifacts, + artifacts=[checkpoint_artifact] if checkpoint_artifact else [], checkpoint_path=checkpoint_path, - metrics=result.metrics or {}, + metrics=result.metrics, is_adapter=config.adapter is not None, ) diff --git a/src/lm_buddy/jobs/_entrypoints/lm_harness.py b/src/lm_buddy/jobs/_entrypoints/lm_harness.py index 8baa5ca4..2fb31b8a 100644 --- a/src/lm_buddy/jobs/_entrypoints/lm_harness.py +++ b/src/lm_buddy/jobs/_entrypoints/lm_harness.py @@ -6,20 +6,9 @@ from lm_eval.models.huggingface import HFLM from lm_eval.models.openai_completions import OpenaiCompletionsLM -from lm_buddy.integrations.huggingface import ( - AutoModelConfig, - HuggingFaceAssetLoader, - resolve_peft_and_pretrained, -) -from lm_buddy.integrations.wandb import ( - ArtifactLoader, - ArtifactType, - WandbResumeMode, - build_table_artifact, - default_artifact_name, - wandb_init_from_config, -) -from lm_buddy.jobs.common import EvaluationResult, LMBuddyJobType +from lm_buddy.integrations.huggingface import AutoModelConfig, HuggingFaceAssetLoader +from lm_buddy.integrations.wandb import ArtifactType, build_table_artifact, default_artifact_name +from lm_buddy.jobs.common import EvaluationResult from lm_buddy.jobs.configs import LMHarnessJobConfig, LocalChatCompletionsConfig @@ -40,16 +29,12 @@ def get_per_task_dataframes( return task_dataframes -def load_harness_model( - config: LMHarnessJobConfig, - artifact_loader: ArtifactLoader, -) -> HFLM | OpenaiCompletionsLM: +def load_harness_model(config: LMHarnessJobConfig) -> HFLM | OpenaiCompletionsLM: # Instantiate the lm-harness LM class based on the provided model config type - hf_loader = HuggingFaceAssetLoader(artifact_loader) + hf_loader = HuggingFaceAssetLoader() match config.model: case AutoModelConfig() as model_config: - model_path = hf_loader.resolve_asset_path(model_config.path) - model_path, peft_path = resolve_peft_and_pretrained(model_path) + model_path, peft_path = hf_loader.resolve_peft_and_pretrained(model_config.path) quantization_kwargs: dict[str, Any] = ( config.quantization.model_dump() if config.quantization else {} ) @@ -78,11 +63,10 @@ def load_harness_model( raise ValueError(f"Unexpected model config type: {type(config.model)}") -def run_eval( - config: LMHarnessJobConfig, - artifact_loader: ArtifactLoader, -) -> dict[str, list[tuple[str, float]]]: - llm = load_harness_model(config, artifact_loader) +def run_lm_harness(config: LMHarnessJobConfig) -> EvaluationResult: + print(f"Running lm-harness evaluation with configuration:\n {config.model_dump_json(indent=2)}") + + llm = load_harness_model(config) eval_results = lm_eval.simple_evaluate( model=llm, tasks=config.evaluation.tasks, @@ -92,37 +76,18 @@ def run_eval( log_samples=False, ) print(f"Obtained evaluation results: {eval_results}") - return get_per_task_dataframes(eval_results["results"]) - -def run_lm_harness( - config: LMHarnessJobConfig, - artifact_loader: ArtifactLoader, -) -> EvaluationResult: - print(f"Running lm-harness evaluation with configuration:\n {config.model_dump_json(indent=2)}") + result_tables = get_per_task_dataframes(eval_results["results"]) - if config.tracking is not None: - with wandb_init_from_config( - config.tracking, - parameters=config.evaluation, # Log eval settings in W&B run - resume=WandbResumeMode.ALLOW, - job_type=LMBuddyJobType.EVALUATION, - ) as run: - eval_tables = run_eval(config, artifact_loader) - table_artifact = build_table_artifact( - artifact_name=default_artifact_name(run.name, ArtifactType.EVALUATION), - artifact_type=ArtifactType.EVALUATION, - tables=eval_tables, - ) - print("Logging artifact for evaluation results...") - table_artifact = artifact_loader.log_artifact(table_artifact) - else: - eval_tables = run_eval(config, artifact_loader) - table_artifact = None + artifact_name = default_artifact_name(config.name, ArtifactType.EVALUATION) + table_artifact = build_table_artifact( + artifact_name=artifact_name, + artifact_type=ArtifactType.EVALUATION, + tables=result_tables, + ) - output_artifacts = [table_artifact] if table_artifact else [] return EvaluationResult( - tables=eval_tables, - artifacts=output_artifacts, + tables=result_tables, + artifacts=[table_artifact], dataset_path=None, ) diff --git a/src/lm_buddy/jobs/_entrypoints/prometheus.py b/src/lm_buddy/jobs/_entrypoints/prometheus.py index ce0c8c22..6d883888 100644 --- a/src/lm_buddy/jobs/_entrypoints/prometheus.py +++ b/src/lm_buddy/jobs/_entrypoints/prometheus.py @@ -15,14 +15,11 @@ from lm_buddy.integrations.huggingface import AutoTokenizerConfig, HuggingFaceAssetLoader from lm_buddy.integrations.wandb import ( - ArtifactLoader, ArtifactType, build_directory_artifact, default_artifact_name, - wandb_init_from_config, ) -from lm_buddy.jobs._entrypoints.utils import preprocess_text_dataset -from lm_buddy.jobs.common import EvaluationResult, LMBuddyJobType +from lm_buddy.jobs.common import EvaluationResult, preprocess_text_dataset from lm_buddy.jobs.configs import PrometheusJobConfig @@ -104,13 +101,12 @@ def get_response_with_retries( return (feedback, score) -def run_eval( - config: PrometheusJobConfig, - artifact_loader: ArtifactLoader, - client: OpenAI, -) -> Path: +def run_eval(config: PrometheusJobConfig) -> Path: + # Instantiate OpenAI client to speak with the vLLM endpoint + client = OpenAI(base_url=config.prometheus.inference.base_url) + # load dataset from W&B artifact - hf_loader = HuggingFaceAssetLoader(artifact_loader) + hf_loader = HuggingFaceAssetLoader() dataset = hf_loader.load_dataset(config.dataset) dataset = preprocess_text_dataset(dataset, config.dataset) @@ -158,34 +154,22 @@ def run_eval( return output_dataset_path -def run_prometheus( - config: PrometheusJobConfig, - artifact_loader: ArtifactLoader, -) -> EvaluationResult: - # Instantiate OpenAI client to speak with the vLLM endpoint - client = OpenAI(base_url=config.prometheus.inference.base_url) - +def run_prometheus(config: PrometheusJobConfig) -> EvaluationResult: # Run eval and store output in local filename - if config.tracking: - with wandb_init_from_config(config.tracking, job_type=LMBuddyJobType.EVALUATION) as run: - output_dataset_path = run_eval(config, artifact_loader, client) - # Create a directory artifact for the HF dataset - dataset_artifact = build_directory_artifact( - artifact_name=default_artifact_name(run.name, artifact_type=ArtifactType.DATASET), - artifact_type=ArtifactType.DATASET, - dir_path=output_dataset_path, - reference=False, - ) - print("Logging artifact for evaluation dataset...") - dataset_artifact = artifact_loader.log_artifact(dataset_artifact) - else: - output_dataset_path = run_eval(config, artifact_loader, client) - dataset_artifact = None - + output_dataset_path = run_eval(config) print(f"Prometheus evaluation dataset stored at {output_dataset_path}") - output_artifacts = [dataset_artifact] if dataset_artifact else [] + + # Create a directory artifact for the HF dataset + artifact_name = default_artifact_name(config.name, artifact_type=ArtifactType.DATASET) + dataset_artifact = build_directory_artifact( + artifact_name=artifact_name, + artifact_type=ArtifactType.DATASET, + dir_path=output_dataset_path, + reference=False, + ) + return EvaluationResult( - artifacts=output_artifacts, + artifacts=[dataset_artifact], dataset_path=output_dataset_path, tables={}, ) diff --git a/src/lm_buddy/jobs/_entrypoints/ragas.py b/src/lm_buddy/jobs/_entrypoints/ragas.py index e22a2105..63268934 100644 --- a/src/lm_buddy/jobs/_entrypoints/ragas.py +++ b/src/lm_buddy/jobs/_entrypoints/ragas.py @@ -8,14 +8,11 @@ from lm_buddy.integrations.huggingface import HuggingFaceAssetLoader from lm_buddy.integrations.wandb import ( - ArtifactLoader, ArtifactType, build_directory_artifact, default_artifact_name, - wandb_init_from_config, ) -from lm_buddy.jobs._entrypoints.utils import preprocess_text_dataset -from lm_buddy.jobs.common import EvaluationResult, LMBuddyJobType +from lm_buddy.jobs.common import EvaluationResult, preprocess_text_dataset from lm_buddy.jobs.configs import RagasJobConfig RAGAS_METRICS_MAP = { @@ -26,9 +23,9 @@ } -def run_eval(config: RagasJobConfig, artifact_loader: ArtifactLoader) -> Path: +def run_eval(config: RagasJobConfig) -> Path: # load dataset from W&B artifact - hf_loader = HuggingFaceAssetLoader(artifact_loader) + hf_loader = HuggingFaceAssetLoader() evaluation_dataset = hf_loader.load_dataset(config.dataset) evaluation_dataset = preprocess_text_dataset(evaluation_dataset, config.dataset) @@ -70,28 +67,21 @@ def run_eval(config: RagasJobConfig, artifact_loader: ArtifactLoader) -> Path: return output_dataset_path -def run_ragas(config: RagasJobConfig, artifact_loader: ArtifactLoader) -> EvaluationResult: - # Run ragas eval and store output in local filename - if config.tracking: - with wandb_init_from_config(config.tracking, job_type=LMBuddyJobType.EVALUATION) as run: - output_dataset_path = run_eval(config, artifact_loader) - # Create a directory artifact for the HF dataset - dataset_artifact = build_directory_artifact( - artifact_name=default_artifact_name(run.name, artifact_type=ArtifactType.DATASET), - artifact_type=ArtifactType.DATASET, - dir_path=output_dataset_path, - reference=False, - ) - print("Logging dataset artifact for Ragas evaluation ...") - dataset_artifact = artifact_loader.log_artifact(dataset_artifact) - else: - output_dataset_path = run_eval(config, artifact_loader) - dataset_artifact = None - +def run_ragas(config: RagasJobConfig) -> EvaluationResult: + output_dataset_path = run_eval(config) print(f"Ragas evaluation dataset stored at {output_dataset_path}") - output_artifacts = [dataset_artifact] if dataset_artifact else [] + + # Create a directory artifact for the HF dataset + artifact_name = default_artifact_name(config.name, artifact_type=ArtifactType.DATASET) + dataset_artifact = build_directory_artifact( + artifact_name=artifact_name, + artifact_type=ArtifactType.DATASET, + dir_path=output_dataset_path, + reference=False, + ) + return EvaluationResult( - artifacts=output_artifacts, + artifacts=[dataset_artifact], dataset_path=output_dataset_path, tables={}, ) diff --git a/src/lm_buddy/jobs/_entrypoints/utils.py b/src/lm_buddy/jobs/_entrypoints/utils.py deleted file mode 100644 index 6ef2603e..00000000 --- a/src/lm_buddy/jobs/_entrypoints/utils.py +++ /dev/null @@ -1,16 +0,0 @@ -from datasets import Dataset - -from lm_buddy.integrations.huggingface.dataset_config import TextDatasetConfig -from lm_buddy.preprocessing import format_dataset_with_prompt - - -def preprocess_text_dataset(dataset: Dataset, dataset_config: TextDatasetConfig) -> Dataset: - """Prompt format a text dataset if a prompt template is specified on the config.""" - if dataset_config.prompt_template is not None: - return format_dataset_with_prompt( - dataset=dataset, - template=dataset_config.prompt_template, - output_field=dataset_config.text_field, - ) - else: - return dataset diff --git a/src/lm_buddy/jobs/common.py b/src/lm_buddy/jobs/common.py index ed2d2b77..59afe823 100644 --- a/src/lm_buddy/jobs/common.py +++ b/src/lm_buddy/jobs/common.py @@ -5,6 +5,10 @@ import pandas as pd import wandb +from datasets import Dataset + +from lm_buddy.integrations.huggingface import TextDatasetConfig +from lm_buddy.preprocessing import format_dataset_with_prompt class LMBuddyJobType(str, Enum): @@ -25,7 +29,7 @@ class FinetuningResult(JobResult): """Result from a finetuning task.""" checkpoint_path: Path | None - metrics: dict[str, Any] + metrics: dict[str, Any] | None is_adapter: bool @@ -35,3 +39,15 @@ class EvaluationResult(JobResult): tables: dict[str, pd.DataFrame] dataset_path: Path | None + + +def preprocess_text_dataset(dataset: Dataset, dataset_config: TextDatasetConfig) -> Dataset: + """Prompt format a text dataset if a prompt template is specified on the config.""" + if dataset_config.prompt_template is not None: + return format_dataset_with_prompt( + dataset=dataset, + template=dataset_config.prompt_template, + output_field=dataset_config.text_field, + ) + else: + return dataset diff --git a/src/lm_buddy/jobs/configs/base.py b/src/lm_buddy/jobs/configs/base.py index a6639488..417d5157 100644 --- a/src/lm_buddy/jobs/configs/base.py +++ b/src/lm_buddy/jobs/configs/base.py @@ -1,9 +1,13 @@ import contextlib import tempfile +from abc import abstractmethod from pathlib import Path +from pydantic import Field from pydantic_yaml import parse_yaml_file_as, to_yaml_file +from lm_buddy.integrations.wandb import WandbRunConfig +from lm_buddy.paths import AssetPath, PathPrefix from lm_buddy.types import BaseLMBuddyConfig @@ -16,6 +20,15 @@ class LMBuddyJobConfig(BaseLMBuddyConfig): but this is not rigidly constrained by the interface. This may change in the future. """ + name: str = Field(description="Name of the job.") + tracking: WandbRunConfig | None = Field( + default=None, + description=( + "Tracking information to associate with the job. " + "A new run is created with these details." + ), + ) + @classmethod def from_yaml_file(cls, path: Path | str): return parse_yaml_file_as(cls, path) @@ -38,3 +51,12 @@ def to_tempfile(self, *, name: str = "config.yaml", dir: str | Path | None = Non config_path = Path(tmpdir) / name self.to_yaml_file(config_path) yield config_path + + @abstractmethod + def asset_paths(self) -> set[AssetPath]: + """Return a set of all `AssetPath` fields on this config.""" + pass + + def artifact_paths(self) -> set[AssetPath]: + """Return a set of all W&B artifact paths on this config.""" + return {x for x in self.asset_paths() if x.startswith(PathPrefix.WANDB)} diff --git a/src/lm_buddy/jobs/configs/finetuning.py b/src/lm_buddy/jobs/configs/finetuning.py index be57811c..dce71419 100644 --- a/src/lm_buddy/jobs/configs/finetuning.py +++ b/src/lm_buddy/jobs/configs/finetuning.py @@ -8,8 +8,8 @@ TextDatasetConfig, TrainerConfig, ) -from lm_buddy.integrations.wandb import WandbRunConfig from lm_buddy.jobs.configs import LMBuddyJobConfig +from lm_buddy.paths import AssetPath from lm_buddy.types import BaseLMBuddyConfig @@ -32,7 +32,6 @@ class FinetuningJobConfig(LMBuddyJobConfig): tokenizer: AutoTokenizerConfig quantization: QuantizationConfig | None = None adapter: AdapterConfig | None = None - tracking: WandbRunConfig | None = None trainer: TrainerConfig = Field(default_factory=TrainerConfig) ray: FinetuningRayConfig = Field(default_factory=FinetuningRayConfig) @@ -64,3 +63,6 @@ def validate_tokenizer_arg(cls, x): if isinstance(x, str): return AutoTokenizerConfig(path=x) return x + + def asset_paths(self) -> list[AssetPath]: + return {self.model.path, self.dataset.path, self.tokenizer.path} diff --git a/src/lm_buddy/jobs/configs/lm_harness.py b/src/lm_buddy/jobs/configs/lm_harness.py index 42e8b227..88af2446 100644 --- a/src/lm_buddy/jobs/configs/lm_harness.py +++ b/src/lm_buddy/jobs/configs/lm_harness.py @@ -7,8 +7,8 @@ QuantizationConfig, ) from lm_buddy.integrations.vllm import InferenceServerConfig -from lm_buddy.integrations.wandb import WandbRunConfig from lm_buddy.jobs.configs import LMBuddyJobConfig +from lm_buddy.paths import AssetPath from lm_buddy.types import BaseLMBuddyConfig @@ -50,4 +50,12 @@ class LMHarnessJobConfig(LMBuddyJobConfig): model: AutoModelConfig | LocalChatCompletionsConfig evaluation: LMHarnessEvaluationConfig quantization: QuantizationConfig | None = None - tracking: WandbRunConfig | None = None + + def asset_paths(self) -> list[AssetPath]: + match self.model: + case AutoModelConfig() as config: + return {config.path} + case LocalChatCompletionsConfig() as config if config.inference.engine is not None: + return {config.inference.engine} + case _: + return {} diff --git a/src/lm_buddy/jobs/configs/prometheus.py b/src/lm_buddy/jobs/configs/prometheus.py index 2500eb5a..008622d6 100644 --- a/src/lm_buddy/jobs/configs/prometheus.py +++ b/src/lm_buddy/jobs/configs/prometheus.py @@ -2,8 +2,8 @@ from lm_buddy.integrations.huggingface import TextDatasetConfig from lm_buddy.integrations.vllm import VLLMCompletionsConfig -from lm_buddy.integrations.wandb import WandbRunConfig from lm_buddy.jobs.configs import LMBuddyJobConfig +from lm_buddy.paths import AssetPath from lm_buddy.types import BaseLMBuddyConfig @@ -34,4 +34,7 @@ class PrometheusJobConfig(LMBuddyJobConfig): default_factory=PrometheusEvaluationConfig, description="Settings for the Prometheus evaluation.", ) - tracking: WandbRunConfig | None = None + + def asset_paths(self) -> set[AssetPath]: + paths = {self.dataset.path, self.prometheus.inference.engine} + return {x for x in paths if x is not None} diff --git a/src/lm_buddy/jobs/configs/ragas.py b/src/lm_buddy/jobs/configs/ragas.py index 2217a16d..ae200413 100644 --- a/src/lm_buddy/jobs/configs/ragas.py +++ b/src/lm_buddy/jobs/configs/ragas.py @@ -5,7 +5,8 @@ from lm_buddy.integrations.huggingface import AutoModelConfig from lm_buddy.integrations.huggingface.dataset_config import TextDatasetConfig from lm_buddy.integrations.vllm import VLLMCompletionsConfig -from lm_buddy.integrations.wandb import WandbRunConfig +from lm_buddy.jobs.configs.base import LMBuddyJobConfig +from lm_buddy.paths import AssetPath from lm_buddy.types import BaseLMBuddyConfig RagasEvaluationMetric = Literal[ @@ -42,7 +43,7 @@ def validate_embedding_model_arg(cls, x): return x -class RagasJobConfig(BaseLMBuddyConfig): +class RagasJobConfig(LMBuddyJobConfig): """Configuration to run a Ragas evaluation job. This job loads a dataset from an existing path on our cluster. @@ -50,19 +51,15 @@ class RagasJobConfig(BaseLMBuddyConfig): the contexts (retrieved), and optionally a ground truth field. """ - # vllm inference server for generation judge: VLLMCompletionsConfig = Field(description="Externally hosted Ragas judge model.") - - # dataset containing the relevant fields required for ragas evaluation dataset: TextDatasetConfig = Field( description="Dataset of text completions to evaluate using the Ragas judge model." ) - - # evaluation settings for ragas evaluation: RagasEvaluationConfig = Field( default_factory=RagasEvaluationConfig, description="Settings for the Ragas evaluation.", ) - # wandb model run to associate to the ragas evaluator - tracking: WandbRunConfig | None = None + def asset_paths(self) -> set[AssetPath]: + paths = {self.dataset.path, self.judge.inference.engine} + return {x for x in paths if x is not None} diff --git a/tests/conftest.py b/tests/conftest.py index e5eb8d35..422cbcbc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,33 +19,45 @@ def resources_dir(): @pytest.fixture -def xyz_dataset_artifact(resources_dir): - dataset_path = resources_dir / "datasets" / "xyz" +def xyz_dataset_path(resources_dir): + return resources_dir / "datasets" / "xyz" + + +@pytest.fixture +def text_dataset_path(resources_dir): + return resources_dir / "datasets" / "tiny_shakespeare" + + +@pytest.fixture +def llm_model_path(resources_dir): + return resources_dir / "models" / "tiny_gpt2" + + +@pytest.fixture +def xyz_dataset_artifact(xyz_dataset_path): return build_directory_artifact( artifact_name="xyz-dataset", artifact_type=ArtifactType.DATASET, - dir_path=dataset_path, + dir_path=xyz_dataset_path, reference=True, ) @pytest.fixture -def text_dataset_artifact(resources_dir): - dataset_path = resources_dir / "datasets" / "tiny_shakespeare" +def text_dataset_artifact(text_dataset_path): return build_directory_artifact( artifact_name="tiny-shakespeare-dataset", artifact_type=ArtifactType.DATASET, - dir_path=dataset_path, + dir_path=text_dataset_path, reference=True, ) @pytest.fixture -def llm_model_artifact(resources_dir): - model_path = resources_dir / "models" / "tiny_gpt2" +def llm_model_artifact(llm_model_path): return build_directory_artifact( artifact_name="tiny-gpt2-model", artifact_type=ArtifactType.MODEL, - dir_path=model_path, + dir_path=llm_model_path, reference=True, ) diff --git a/tests/integration/test_finetuning.py b/tests/integration/test_finetuning.py index e13f2e4e..c41c549c 100644 --- a/tests/integration/test_finetuning.py +++ b/tests/integration/test_finetuning.py @@ -1,24 +1,17 @@ -import wandb +import pytest from lm_buddy import LMBuddy from lm_buddy.integrations.huggingface import AutoModelConfig, TextDatasetConfig, TrainerConfig from lm_buddy.integrations.wandb import ArtifactType, WandbRunConfig from lm_buddy.jobs.configs import FinetuningJobConfig, FinetuningRayConfig -from lm_buddy.paths import format_artifact_path -from tests.utils import FakeArtifactLoader +from lm_buddy.paths import format_file_path -def get_job_config( - model_artifact: wandb.Artifact, - dataset_artifact: wandb.Artifact, -) -> FinetuningJobConfig: - """Create a job config for finetuning. - - The artifacts should already be logged and contain a fully qualified W&B name. - """ - model_config = AutoModelConfig(path=format_artifact_path(model_artifact)) +@pytest.fixture +def job_config(llm_model_path, text_dataset_path) -> FinetuningJobConfig: + model_config = AutoModelConfig(path=format_file_path(llm_model_path)) dataset_config = TextDatasetConfig( - path=format_artifact_path(dataset_artifact), + path=format_file_path(text_dataset_path), text_field="text", split="train", ) @@ -28,9 +21,10 @@ def get_job_config( save_steps=1, save_strategy="epoch", ) - tracking_config = WandbRunConfig(name="test-finetuning-job") + tracking_config = WandbRunConfig(project="test-project") ray_config = FinetuningRayConfig(num_workers=1, use_gpu=False) return FinetuningJobConfig( + name="test-job", model=model_config, dataset=dataset_config, trainer=trainer_config, @@ -39,22 +33,12 @@ def get_job_config( ) -def test_finetuning_job(llm_model_artifact, text_dataset_artifact): - # Preload input artifact in loader - artifact_loader = FakeArtifactLoader() - logged_model_artifact = artifact_loader.log_artifact(llm_model_artifact) - logged_dataset_artifact = artifact_loader.log_artifact(text_dataset_artifact) - - # Build a job config - job_config = get_job_config(logged_model_artifact, logged_dataset_artifact) - +def test_finetuning_job(job_config): # Run test job - buddy = LMBuddy(artifact_loader) - buddy.finetune(job_config) + buddy = LMBuddy() + result = buddy.finetune(job_config) - # Two input artifacts, and one output model artifact produced - artifacts = artifact_loader.get_artifacts() - num_dataset_artifacts = len([a for a in artifacts if a.type == ArtifactType.DATASET]) - num_model_artifacts = len([a for a in artifacts if a.type == ArtifactType.MODEL]) - assert num_dataset_artifacts == 1 - assert num_model_artifacts == 2 + # One model artifact should be generated as a result + artifacts = result.artifacts + assert len(artifacts) == 1 + assert artifacts[0].type == ArtifactType.MODEL diff --git a/tests/integration/test_lm_harness.py b/tests/integration/test_lm_harness.py index c9321058..29832a00 100644 --- a/tests/integration/test_lm_harness.py +++ b/tests/integration/test_lm_harness.py @@ -1,56 +1,27 @@ -import wandb +import pytest from lm_buddy import LMBuddy from lm_buddy.integrations.huggingface import AutoModelConfig from lm_buddy.integrations.wandb import WandbRunConfig from lm_buddy.jobs.configs import LMHarnessEvaluationConfig, LMHarnessJobConfig -from lm_buddy.paths import format_artifact_path -from tests.utils import FakeArtifactLoader +from lm_buddy.paths import format_file_path -def get_job_config(model_artifact: wandb.Artifact) -> LMHarnessJobConfig: - """Create a job config for evaluation. - - The artifact should already be logged and contain a fully qualified W&B name. - """ - model_config = AutoModelConfig(path=format_artifact_path(model_artifact)) - tracking_config = WandbRunConfig(name="test-lm-harness-job") +@pytest.fixture +def job_config(llm_model_path) -> LMHarnessJobConfig: + model_config = AutoModelConfig(path=format_file_path(llm_model_path)) + tracking_config = WandbRunConfig(project="test-project") evaluation_config = LMHarnessEvaluationConfig(tasks=["hellaswag"], limit=5) return LMHarnessJobConfig( + name="test-job", model=model_config, evaluation=evaluation_config, tracking=tracking_config, ) -def test_lm_harness_job_with_tracking(llm_model_artifact): - # Preload input artifact in loader - artifact_loader = FakeArtifactLoader() - logged_model_artifact = artifact_loader.log_artifact(llm_model_artifact) - - # Get a job config - job_config = get_job_config(logged_model_artifact) - - # Run test job - buddy = LMBuddy(artifact_loader) - buddy.evaluate(job_config) - - # One input artifact, and one eval artifact produced - assert artifact_loader.num_artifacts() == 2 - - -def test_lm_harness_job_no_tracking(llm_model_artifact): - # Preload input artifact in loader - artifact_loader = FakeArtifactLoader() - llm_model_artifact = artifact_loader.log_artifact(llm_model_artifact) - - # Get a job config - job_config = get_job_config(llm_model_artifact) - job_config.tracking = None # Disable tracking on job config - - # Run test job - buddy = LMBuddy(artifact_loader) - buddy.evaluate(job_config) - - # One input artifact, no additional eval artifacts produced - assert artifact_loader.num_artifacts() == 1 +def test_lm_harness_job(job_config): + buddy = LMBuddy() + result = buddy.evaluate(job_config) + assert len(result.tables) == 1 # One table for hellaswag + assert len(result.artifacts) == 1 # One table artifact diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 00000000..945086d3 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,6 @@ +from pydantic import BaseModel + + +def copy_pydantic_json(model: BaseModel) -> BaseModel: + """Copy a Pydantic model through round-trip JSON serialization.""" + return model.__class__.model_validate_json(model.model_dump_json()) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 15199e13..d4362d4b 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -63,7 +63,7 @@ def adapter_config(): @pytest.fixture def wandb_run_config(): - return WandbRunConfig(name="run", run_id="12345", project="research", entity="mzai") + return WandbRunConfig(id="12345", project="research", entity="mzai") @pytest.fixture diff --git a/tests/unit/integrations/huggingface/test_asset_loader.py b/tests/unit/integrations/huggingface/test_asset_loader.py index 0af6677c..a3293b99 100644 --- a/tests/unit/integrations/huggingface/test_asset_loader.py +++ b/tests/unit/integrations/huggingface/test_asset_loader.py @@ -2,18 +2,14 @@ from datasets import Dataset, DatasetDict from lm_buddy.integrations.huggingface import AutoModelConfig, DatasetConfig, HuggingFaceAssetLoader -from lm_buddy.paths import format_artifact_path -from tests.utils import FakeArtifactLoader +from lm_buddy.paths import format_file_path -def test_dataset_loading(xyz_dataset_artifact): - # Preload fake artifact for testing - artifact_loader = FakeArtifactLoader() - artifact_loader.log_artifact(xyz_dataset_artifact) - hf_loader = HuggingFaceAssetLoader(artifact_loader) +def test_dataset_loading(xyz_dataset_path): + hf_loader = HuggingFaceAssetLoader() - artifact_path = format_artifact_path(xyz_dataset_artifact) - dataset_config = DatasetConfig(path=artifact_path, test_size=0.2, seed=0) + asset_path = format_file_path(xyz_dataset_path) + dataset_config = DatasetConfig(path=asset_path, test_size=0.2, seed=0) dataset = hf_loader.load_dataset(dataset_config) assert type(dataset) is Dataset @@ -23,14 +19,11 @@ def test_dataset_loading(xyz_dataset_artifact): assert "train" in datasets and "test" in datasets -def test_model_loading(llm_model_artifact): - # Preload fake artifact for testing - artifact_loader = FakeArtifactLoader() - artifact_loader.log_artifact(llm_model_artifact) - hf_loader = HuggingFaceAssetLoader(artifact_loader) +def test_model_loading(llm_model_path): + hf_loader = HuggingFaceAssetLoader() - artifact_path = format_artifact_path(llm_model_artifact) - model_config = AutoModelConfig(path=artifact_path, torch_dtype=torch.bfloat16) + asset_path = format_file_path(llm_model_path) + model_config = AutoModelConfig(path=asset_path, torch_dtype=torch.bfloat16) hf_config = hf_loader.load_pretrained_config(model_config) hf_model = hf_loader.load_pretrained_model(model_config) diff --git a/tests/unit/integrations/huggingface/test_quantization_config.py b/tests/unit/integrations/huggingface/test_quantization_config.py index 742c8c0f..7efaac0c 100644 --- a/tests/unit/integrations/huggingface/test_quantization_config.py +++ b/tests/unit/integrations/huggingface/test_quantization_config.py @@ -2,7 +2,7 @@ import torch from lm_buddy.integrations.huggingface import QuantizationConfig -from tests.utils import copy_pydantic_json +from tests.test_utils import copy_pydantic_json @pytest.fixture diff --git a/tests/unit/integrations/wandb/test_run_config.py b/tests/unit/integrations/wandb/test_run_config.py index bad8233d..8da07230 100644 --- a/tests/unit/integrations/wandb/test_run_config.py +++ b/tests/unit/integrations/wandb/test_run_config.py @@ -5,7 +5,7 @@ from pydantic import ValidationError from lm_buddy.integrations.wandb import WandbRunConfig -from tests.utils import copy_pydantic_json +from tests.test_utils import copy_pydantic_json @pytest.fixture @@ -18,8 +18,7 @@ def mock_environment_without_keys(): @pytest.fixture def wandb_run_config(): return WandbRunConfig( - name="run-name", - run_id="run-id", + id="run-id", project="research", entity="team", ) @@ -34,13 +33,13 @@ def test_wandb_path(wandb_run_config): def test_ensure_run_id(): - env = WandbRunConfig(name="defined", project="defined", entity="defined") - assert env.run_id is not None # Pydantic validator fills this in + env = WandbRunConfig(project="defined", entity="defined") + assert env.id is not None # Pydantic validator fills this in def test_env_vars(wandb_run_config): env_vars = wandb_run_config.env_vars() - expected = ["WANDB_NAME", "WANDB_PROJECT", "WANDB_ENTITY", "WANDB_RUN_ID"] + expected = ["WANDB_PROJECT", "WANDB_ENTITY", "WANDB_RUN_ID"] for key in expected: assert key in env_vars assert "WANDB_RUN_GROUP" not in env_vars @@ -48,10 +47,10 @@ def test_env_vars(wandb_run_config): def test_disallowed_kwargs(): with pytest.raises(ValidationError): - WandbRunConfig(name="name", project="project", old_name="I will throw") + WandbRunConfig(project="project", old_name="I will throw") def test_missing_key_warning(mock_environment_without_keys): with pytest.warns(UserWarning): - config = WandbRunConfig(name="I am missing an API key", project="I should warn the user") + config = WandbRunConfig(id="I am missing an API key", project="I should warn the user") assert "WANDB_API_KEY" not in config.env_vars() diff --git a/tests/unit/jobs/configs/test_finetuning_config.py b/tests/unit/jobs/configs/test_finetuning_config.py index 0ef9dceb..120b43b8 100644 --- a/tests/unit/jobs/configs/test_finetuning_config.py +++ b/tests/unit/jobs/configs/test_finetuning_config.py @@ -3,7 +3,7 @@ from lm_buddy.integrations.huggingface import TextDatasetConfig from lm_buddy.jobs.configs import FinetuningJobConfig, FinetuningRayConfig -from tests.utils import copy_pydantic_json +from tests.test_utils import copy_pydantic_json @pytest.fixture @@ -25,6 +25,7 @@ def finetuning_job_config( finetuning_ray_config, ): return FinetuningJobConfig( + name="finetuning-job-config", model=model_config_with_artifact, dataset=dataset_config_with_artifact, tokenizer=tokenizer_config_with_artifact, @@ -58,6 +59,7 @@ def test_argument_validation(): # Strings should be upcast to configs as the path argument allowed_config = FinetuningJobConfig( + name="test", model=model_path, tokenizer=tokenizer_path, dataset=dataset_config, @@ -67,12 +69,14 @@ def test_argument_validation(): # Check passing invalid arguments is validated for each asset type with pytest.raises(ValidationError): - FinetuningJobConfig(model=12345, tokenizer=tokenizer_path, dataset=dataset_config) + FinetuningJobConfig(name="test", model=12345, dataset=dataset_config) with pytest.raises(ValidationError): - FinetuningJobConfig(model=model_path, tokenizer=12345, dataset=dataset_config) + FinetuningJobConfig(name="test", model=model_path, tokenizer=12345, dataset=dataset_config) with pytest.raises(ValidationError): - FinetuningJobConfig(model=model_path, tokenizer=tokenizer_path, dataset=12345) + FinetuningJobConfig(name="test", model=model_path, tokenizer=tokenizer_path, dataset=12345) # Check that tokenizer is set to model path when absent - missing_tokenizer_config = FinetuningJobConfig(model=model_path, dataset=dataset_config) + missing_tokenizer_config = FinetuningJobConfig( + name="test", model=model_path, dataset=dataset_config + ) assert missing_tokenizer_config.tokenizer.path == model_path diff --git a/tests/unit/jobs/configs/test_job_config.py b/tests/unit/jobs/configs/test_job_config.py index ebf3bbea..47114302 100644 --- a/tests/unit/jobs/configs/test_job_config.py +++ b/tests/unit/jobs/configs/test_job_config.py @@ -1,11 +1,13 @@ from lm_buddy.jobs.configs import LMBuddyJobConfig +from lm_buddy.paths import AssetPath def test_config_as_tempfile(): class TestConfig(LMBuddyJobConfig): - magic_number: int + def asset_paths(self) -> set[AssetPath]: + return super().asset_paths() - config = TestConfig(magic_number=42) + config = TestConfig(name="test-config") config_name = "my-job-config.yaml" with config.to_tempfile(name=config_name) as path: assert path.name == config_name diff --git a/tests/unit/jobs/configs/test_lm_harness_config.py b/tests/unit/jobs/configs/test_lm_harness_config.py index 49105475..70fd3d6f 100644 --- a/tests/unit/jobs/configs/test_lm_harness_config.py +++ b/tests/unit/jobs/configs/test_lm_harness_config.py @@ -7,7 +7,7 @@ LMHarnessJobConfig, LocalChatCompletionsConfig, ) -from tests.utils import copy_pydantic_json +from tests.test_utils import copy_pydantic_json @pytest.fixture @@ -39,6 +39,7 @@ def lm_harness_job_config( ): if request.param == "model_config_with_artifact": return LMHarnessJobConfig( + name="lm-harness-job-config", model=model_config_with_artifact, evaluation=lm_harness_evaluation_config, tracking=wandb_run_config, @@ -46,6 +47,7 @@ def lm_harness_job_config( ) elif request.param == "local_completions_config": return LMHarnessJobConfig( + name="lm-harness-job-config", model=local_completions_config, evaluation=lm_harness_evaluation_config, tracking=wandb_run_config, diff --git a/tests/utils.py b/tests/utils.py deleted file mode 100644 index 34de4885..00000000 --- a/tests/utils.py +++ /dev/null @@ -1,52 +0,0 @@ -import wandb -from pydantic import BaseModel -from wandb.sdk.artifacts.artifact_state import ArtifactState - - -def copy_pydantic_json(model: BaseModel) -> BaseModel: - """Copy a Pydantic model through round-trip JSON serialization.""" - return model.__class__.model_validate_json(model.model_dump_json()) - - -class FakeArtifactLoader: - """Fake implementation of an `ArtifactLoader` with in-memory artifact storage. - - This class bypasses calls to the W&B SDK for using/logging artifacts, - making it suitable for use in testing when W&B is disabled. - - Note: Artifacts are retrieved from the in-memory storage using just their name, - not the full W&B path, since the project/entity cannot be inferred when W&B is disabled. - """ - - def __init__(self, project: str = "test-project", entity: str = "test-entity"): - self.project = project - self.entity = entity - self._storage: dict[str, wandb.Artifact] = dict() - - def _set_commit_attributes(self, artifact: wandb.Artifact) -> wandb.Artifact: - artifact._project = self.project - artifact._entity = self.entity - artifact._version = "latest" - artifact._state = ArtifactState.COMMITTED - - # W&B does this after logging an artifact - name_has_version = len(artifact.name.split(":")) > 1 - if not name_has_version: - artifact._name = f"{artifact._name}:{artifact._version}" - - return artifact - - def num_artifacts(self) -> int: - return len(self._storage) - - def get_artifacts(self) -> list[wandb.Artifact]: - return list(self._storage.values()) - - def use_artifact(self, artifact_path: str) -> wandb.Artifact: - return self._storage[artifact_path] - - def log_artifact(self, artifact: wandb.Artifact) -> wandb.Artifact: - """Store the artifact in-memory and update its attributes to mimic the W&B platform.""" - artifact = self._set_commit_attributes(artifact) - self._storage[artifact.qualified_name] = artifact - return artifact