mozilla-ai · sfriedowitz · Mar 18, 2024 · Mar 14, 2024 · Mar 14, 2024 · Mar 14, 2024
diff --git a/examples/configs/evaluation/lm_harness_hf_config.yaml b/examples/configs/evaluation/lm_harness_hf_config.yaml
@@ -4,7 +4,7 @@ model:
   torch_dtype: "bfloat16"
 
 # Settings specific to lm_harness.evaluate
-evaluator:
+evaluation:
   tasks: ["hellaswag"]
   num_fewshot: 5
   limit: 10

diff --git a/examples/configs/evaluation/lm_harness_inference_server_config.yaml b/examples/configs/evaluation/lm_harness_inference_server_config.yaml
@@ -5,7 +5,6 @@ model:
     # HuggingFace repo for the engine model being hosted
     engine:
       repo_id: "distilgpt2"
-      revision: "main"
     # # W&B artifact can also be specified as the engine model to generate a lineage
     # engine:
     #   name: "wandb-artifact-name"
@@ -15,7 +14,7 @@ model:
   tokenizer_backend: "huggingface"
 
 # Settings specific to lm_harness.evaluate
-evaluator:
+evaluation:
   tasks: ["gsm8k"]
   num_fewshot: 5
   limit: 10

diff --git a/examples/configs/evaluation/lm_harness_quantized_config.yaml b/examples/configs/evaluation/lm_harness_quantized_config.yaml
diff --git a/examples/dev_submission/configs/eval_config.yaml b/examples/dev_submission/configs/eval_config.yaml
@@ -1,12 +1,12 @@
 # Model to evaluate, specificed as a vLLM endpoint
 model:
-    base_url: "http://your.cluster.ip:8000/v1/"
-    tokenizer: "mistralai/Mistral-7B-v0.1"
-    model_name: "mistralai/Mistral-7B-v0.1"
-    tokenizer_backend: "huggingface"
+  base_url: "http://your.cluster.ip:8000/v1/"
+  tokenizer: "mistralai/Mistral-7B-v0.1"
+  model_name: "mistralai/Mistral-7B-v0.1"
+  tokenizer_backend: "huggingface"
 
 # Settings specific to lm_harness.evaluate
-evaluator:
+evaluation:
   tasks: ["gsm8k"]
   num_fewshot: 5
 

diff --git a/examples/notebooks/direct_job_execution.ipynb b/examples/notebooks/direct_job_execution.ipynb
@@ -13,8 +13,10 @@
    "source": [
     "This notebook illustrates how to use LM Buddy as a library to run jobs directly on the host machine.\n",
     "\n",
-    "Jobs are fully specified by a `lm_buddy.jobs.configs.LMBuddyJobConfig` \n",
-    "and are executed with the `lm_buddy.run_job` method.\n",
+    "Jobs are executed in the following manner:\n",
+    "- Construct an instance of the `lm_buddy.LMBuddy` class\n",
+    "- Construct an instance of your desired job configuration\n",
+    "- Execute a job via the `LMBuddy.finetune` or `LMBuddy.evaluate` methods\n",
     "\n",
     "**Warning**: This workflow is still considered experimental.\n",
     "Some jobs depend on external services (e.g., W&B, Ray cluster) and host-machine GPU resources,\n",
@@ -34,15 +36,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import lm_buddy\n",
+    "from lm_buddy import LMBuddy\n",
     "from lm_buddy.jobs.configs import (\n",
     "    FinetuningJobConfig,\n",
     "    FinetuningRayConfig,\n",
     "    LMHarnessJobConfig,\n",
-    "    LMHarnessEvaluatorConfig,\n",
+    "    LMHarnessEvaluationConfig,\n",
     ")\n",
     "from lm_buddy.integrations.huggingface import (\n",
-    "    HuggingFaceRepoConfig,\n",
     "    AutoModelConfig,\n",
     "    TextDatasetConfig,\n",
     "    TrainerConfig,\n",
@@ -65,13 +66,11 @@
    "outputs": [],
    "source": [
     "# Base model to finetune from HuggingFace\n",
-    "model_config = AutoModelConfig(\n",
-    "    load_from=HuggingFaceRepoConfig(repo_id=\"distilgpt2\"),\n",
-    ")\n",
+    "model_config = AutoModelConfig(load_from=\"distilgpt2\")\n",
     "\n",
     "# Text dataset for finetuning\n",
     "dataset_config = TextDatasetConfig(\n",
-    "    load_from=HuggingFaceRepoConfig(repo_id=\"imdb\"),\n",
+    "    load_from=\"imdb\",\n",
     "    split=\"train[:100]\",\n",
     "    text_field=\"text\",\n",
     ")\n",
@@ -128,7 +127,8 @@
    "outputs": [],
    "source": [
     "# Run the job\n",
-    "lm_buddy.run_job(finetuning_config)"
+    "buddy = LMBuddy()\n",
+    "buddy.finetune(finetuning_config)"
    ]
   },
   {
@@ -146,12 +146,10 @@
    "source": [
     "# Define the model to be evaluated\n",
     "# In this case, loading directly a pretrained model from HuggingFace\n",
-    "model_config = AutoModelConfig(\n",
-    "    load_from=HuggingFaceRepoConfig(repo_id=\"distilgpt2\"),\n",
-    ")\n",
+    "model_config = AutoModelConfig(load_from=\"distilgpt2\")\n",
     "\n",
     "# Define evaluation tasks and settings\n",
-    "evaluator_config = LMHarnessEvaluatorConfig(\n",
+    "evaluation_config = LMHarnessEvaluationConfig(\n",
     "    tasks=[\"hellaswag\"],\n",
     "    limit=10,  # Only run 10 samples per task. Remove for a real run.\n",
     "    num_fewshot=5,\n",
@@ -167,7 +165,7 @@
     "# Full lm-harness job config\n",
     "lm_harness_config = LMHarnessJobConfig(\n",
     "    model=model_config,\n",
-    "    evaluator=evaluator_config,\n",
+    "    evaluation=evaluation_config,\n",
     "    tracking=tracking_config,\n",
     ")"
    ]
@@ -179,7 +177,8 @@
    "outputs": [],
    "source": [
     "# Run the job\n",
-    "lm_buddy.run_job(lm_harness_config)"
+    "buddy = LMBuddy()\n",
+    "eval_results = buddy.evaluate(lm_harness_config)"
    ]
   }
  ],

diff --git a/src/lm_buddy/__init__.py b/src/lm_buddy/__init__.py
@@ -1,3 +1,3 @@
-from lm_buddy.jobs import run_job
+from lm_buddy.buddy import LMBuddy
 
-__all__ = ["run_job"]
+__all__ = ["LMBuddy"]
diff --git a/src/lm_buddy/buddy.py b/src/lm_buddy/buddy.py
@@ -0,0 +1,41 @@
+from lm_buddy.integrations.wandb import ArtifactLoader, WandbArtifactLoader
+from lm_buddy.jobs._entrypoints import run_finetuning, run_lm_harness, run_prometheus, run_simple
+from lm_buddy.jobs.common import EvaluationResult, FinetuningResult, SimpleResult
+from lm_buddy.jobs.configs import (
+    EvaluationJobConfig,
+    FinetuningJobConfig,
+    LMHarnessJobConfig,
+    PrometheusJobConfig,
+    SimpleJobConfig,
+)
+
+
+class LMBuddy:
+    """Your buddy in the (L)LM space.
+
+    Simple wrapper around executable functions for tasks available in the library.
+    """
+
+    def __init__(self, artifact_loader: ArtifactLoader = WandbArtifactLoader()):
+        self._artifact_loader = artifact_loader
+
+    def simple(self, config: SimpleJobConfig) -> SimpleResult:
+        """Run a simple test task."""
+        return run_simple(config)
+
+    def finetune(self, config: FinetuningJobConfig) -> FinetuningResult:
+        """Run a supervised finetuning task with the provided configuration."""
+        return run_finetuning(config, self._artifact_loader)
+
+    def evaluate(self, config: EvaluationJobConfig) -> EvaluationResult:
+        """Run an evaluation task with the provided configuration.
+
+        The underlying evaluation framework is determined by the configuration type.
+        """
+        match config:
+            case LMHarnessJobConfig() as lm_harness_config:
+                return run_lm_harness(lm_harness_config, self._artifact_loader)
+            case PrometheusJobConfig() as prometheus_config:
+                return run_prometheus(prometheus_config, self._artifact_loader)
+            case _:
+                raise ValueError(f"Invlid configuration for evaluation: {type(config)}")
diff --git a/src/lm_buddy/cli/run.py b/src/lm_buddy/cli/run.py
@@ -1,16 +1,17 @@
 import click
 
-import lm_buddy
+from lm_buddy import LMBuddy
 from lm_buddy.jobs.configs import (
     FinetuningJobConfig,
     LMHarnessJobConfig,
     PrometheusJobConfig,
     SimpleJobConfig,
 )
 
-# TODO(RD2024-125): We should probably collapse all these commands into a single CLI command
-# - Need to figure out best way to polymorphically deserialize the job config classes
-# - Do we just add type discriminators at the job config level?
+# TODO(RD2024-125): Collapse the run commands into `lm-buddy finetune` and `lm-buddy evaluate`
+#   to match the methods on the `LMBuddy` class
+
+buddy = LMBuddy()
 
 
 @click.group(name="run", help="Run an LM Buddy job.")
@@ -22,25 +23,25 @@ def group():
 @click.option("--config", type=str)
 def run_simple(config: str) -> None:
     config = SimpleJobConfig.from_yaml_file(config)
-    lm_buddy.run_job(config)
+    buddy.simple(config)
 
 
 @group.command("finetuning", help="Run the HuggingFace LLM finetuning job.")
 @click.option("--config", type=str)
 def run_finetuning(config: str) -> None:
     config = FinetuningJobConfig.from_yaml_file(config)
-    lm_buddy.run_job(config)
+    buddy.finetune(config)
 
 
 @group.command("lm-harness", help="Run the lm-harness evaluation job.")
 @click.option("--config", type=str)
 def run_lm_harness(config: str) -> None:
     config = LMHarnessJobConfig.from_yaml_file(config)
-    lm_buddy.run_job(config)
+    buddy.evaluate(config)
 
 
 @group.command("prometheus", help="Run the prometheus evaluation job.")
 @click.option("--config", type=str)
 def run_prometheus(config: str) -> None:
     config = PrometheusJobConfig.from_yaml_file(config)
-    lm_buddy.run_job(config)
+    buddy.evaluate(config)
diff --git a/src/lm_buddy/integrations/huggingface/__init__.py b/src/lm_buddy/integrations/huggingface/__init__.py
@@ -1,5 +1,4 @@
 # ruff: noqa: I001
-from lm_buddy.integrations.huggingface.repo_config import *
 from lm_buddy.integrations.huggingface.adapter_config import *
 from lm_buddy.integrations.huggingface.dataset_config import *
 from lm_buddy.integrations.huggingface.model_config import *

diff --git a/src/lm_buddy/integrations/huggingface/asset_loader.py b/src/lm_buddy/integrations/huggingface/asset_loader.py
@@ -17,17 +17,14 @@
     AutoModelConfig,
     AutoTokenizerConfig,
     DatasetConfig,
-    HuggingFaceRepoConfig,
     QuantizationConfig,
 )
 from lm_buddy.integrations.wandb import (
     ArtifactLoader,
     WandbArtifactConfig,
     get_artifact_filesystem_path,
 )
-
-HuggingFaceAssetPath = HuggingFaceRepoConfig | WandbArtifactConfig
-"""Config that can be resolved to a HuggingFace name/path."""
+from lm_buddy.paths import FilesystemPath, HuggingFaceRepoID, LoadableAssetPath
 
 
 def resolve_peft_and_pretrained(path: str) -> tuple[str, str | None]:
@@ -66,23 +63,18 @@ class HuggingFaceAssetLoader:
     def __init__(self, artifact_loader: ArtifactLoader):
         self._artifact_loader = artifact_loader
 
-    def resolve_asset_path(self, path: HuggingFaceAssetPath) -> tuple[str, str | None]:
-        """Resolve the actual HuggingFace name/path from a config.
-
-        Currently, two config types contain references to a loadable HuggingFace path:
-        (1) A `HuggingFaceRepoConfig` that contains the repo path directly
-        (2) A `WandbArtifactConfig` where the filesystem path is resolved from the artifact
-        """
+    def resolve_asset_path(self, path: LoadableAssetPath) -> str:
+        """Resolve the actual HuggingFace name/path from a `LoadableAssetPath`."""
         match path:
-            case HuggingFaceRepoConfig(repo_id, revision):
-                load_path, revision = repo_id, revision
+            case FilesystemPath(value):
+                return str(value)
+            case HuggingFaceRepoID(repo_id):
+                return repo_id
             case WandbArtifactConfig() as artifact_config:
                 artifact = self._artifact_loader.use_artifact(artifact_config)
-                load_path = get_artifact_filesystem_path(artifact)
-                revision = None
+                return str(get_artifact_filesystem_path(artifact))
             case unknown_path:
                 raise ValueError(f"Unable to resolve asset path from {unknown_path}.")
-        return str(load_path), revision
 
     def load_pretrained_config(
         self,
@@ -92,10 +84,8 @@ def load_pretrained_config(
 
         An exception is raised if the HuggingFace repo does not contain a `config.json` file.
         """
-        model_path, revision = self.resolve_asset_path(config.load_from)
-        return AutoConfig.from_pretrained(
-            pretrained_model_name_or_path=model_path, revision=revision
-        )
+        config_path = self.resolve_asset_path(config.load_from)
+        return AutoConfig.from_pretrained(pretrained_model_name_or_path=config_path)
 
     def load_pretrained_model(
         self,
@@ -122,10 +112,9 @@ def load_pretrained_model(
 
         # TODO: HuggingFace has many AutoModel classes with different "language model heads"
         #   Can we abstract this to load with any type of AutoModel class?
-        model_path, revision = self.resolve_asset_path(config.load_from)
+        model_path = self.resolve_asset_path(config.load_from)
         return AutoModelForCausalLM.from_pretrained(
             pretrained_model_name_or_path=model_path,
-            revision=revision,
             trust_remote_code=config.trust_remote_code,
             torch_dtype=config.torch_dtype,
             quantization_config=bnb_config,
@@ -137,10 +126,9 @@ def load_pretrained_tokenizer(self, config: AutoTokenizerConfig) -> PreTrainedTo
 
         An exception is raised if the HuggingFace repo does not contain a `tokenizer.json` file.
         """
-        tokenizer_path, revision = self.resolve_asset_path(config.load_from)
+        tokenizer_path = self.resolve_asset_path(config.load_from)
         tokenizer = AutoTokenizer.from_pretrained(
             pretrained_model_name_or_path=tokenizer_path,
-            revision=revision,
             trust_remote_code=config.trust_remote_code,
             use_fast=config.use_fast,
         )
@@ -156,10 +144,10 @@ def load_dataset(self, config: DatasetConfig) -> Dataset:
         When loading from HuggingFace directly, the `Dataset` is for the provided split.
         When loading from disk, the saved files must be for a dataset else an exception is raised.
         """
-        dataset_path, revision = self.resolve_asset_path(config.load_from)
+        dataset_path = self.resolve_asset_path(config.load_from)
         # Dataset loading requires a different method if from a HF vs. disk
-        if isinstance(config.load_from, HuggingFaceRepoConfig):
-            return load_dataset(dataset_path, revision=revision, split=config.split)
+        if isinstance(config.load_from, HuggingFaceRepoID):
+            return load_dataset(dataset_path, split=config.split)
         else:
             match load_from_disk(dataset_path):
                 case Dataset() as dataset: