mozilla-ai · sfriedowitz · Mar 18, 2024 · Mar 14, 2024 · Mar 14, 2024 · Mar 14, 2024
diff --git a/README.md b/README.md
@@ -43,9 +43,6 @@ To see all available job commands, run `lm_buddy run --help`
 
 Once LM Buddy is installed in your local Python environment, usage is as follows:
 ```
-# Simple test
-lm_buddy run simple --config simple_config.yaml
-
 # LLM finetuning
 lm_buddy run finetuning --config finetuning_config.yaml
 

diff --git a/examples/configs/evaluation/lm_harness_hf_config.yaml b/examples/configs/evaluation/lm_harness_hf_config.yaml
@@ -4,7 +4,7 @@ model:
   torch_dtype: "bfloat16"
 
 # Settings specific to lm_harness.evaluate
-evaluator:
+evaluation:
   tasks: ["hellaswag"]
   num_fewshot: 5
   limit: 10

diff --git a/examples/configs/evaluation/lm_harness_inference_server_config.yaml b/examples/configs/evaluation/lm_harness_inference_server_config.yaml
@@ -5,7 +5,6 @@ model:
     # HuggingFace repo for the engine model being hosted
     engine:
       repo_id: "distilgpt2"
-      revision: "main"
     # # W&B artifact can also be specified as the engine model to generate a lineage
     # engine:
     #   name: "wandb-artifact-name"
@@ -15,7 +14,7 @@ model:
   tokenizer_backend: "huggingface"
 
 # Settings specific to lm_harness.evaluate
-evaluator:
+evaluation:
   tasks: ["gsm8k"]
   num_fewshot: 5
   limit: 10

diff --git a/examples/configs/evaluation/lm_harness_quantized_config.yaml b/examples/configs/evaluation/lm_harness_quantized_config.yaml
diff --git a/examples/configs/simple/simple_config.yaml b/examples/configs/simple/simple_config.yaml
diff --git a/examples/dev_submission/configs/eval_config.yaml b/examples/dev_submission/configs/eval_config.yaml
@@ -1,12 +1,12 @@
 # Model to evaluate, specificed as a vLLM endpoint
 model:
-    base_url: "http://your.cluster.ip:8000/v1/"
-    tokenizer: "mistralai/Mistral-7B-v0.1"
-    model_name: "mistralai/Mistral-7B-v0.1"
-    tokenizer_backend: "huggingface"
+  base_url: "http://your.cluster.ip:8000/v1/"
+  tokenizer: "mistralai/Mistral-7B-v0.1"
+  model_name: "mistralai/Mistral-7B-v0.1"
+  tokenizer_backend: "huggingface"
 
 # Settings specific to lm_harness.evaluate
-evaluator:
+evaluation:
   tasks: ["gsm8k"]
   num_fewshot: 5
 

diff --git a/examples/notebooks/dev_ray_submission.ipynb b/examples/notebooks/dev_ray_submission.ipynb
@@ -95,7 +95,7 @@
     "# pip contains an export of the dependencies for the LM Buddy package (see CONTRIBUTING.md for how to generate)\n",
     "\n",
     "runtime_env = {\n",
-    "    \"working_dir\": f\"{root_dir}/examples/configs/simple\",\n",
+    "    \"working_dir\": f\"{root_dir}/examples/configs/finetuning\",\n",
     "    \"env_vars\": {\"WANDB_API_KEY\": os.environ[\"WANDB_API_KEY\"]},  # If running a job that uses W&B\n",
     "    \"py_modules\": [str(lm_buddy_module)],\n",
     "    \"pip\": \"requirements.txt\",  # See CONTRIBUTING.md for how to generate this\n",
@@ -111,25 +111,25 @@
    "source": [
     "# Submit the job to the Ray cluster\n",
     "# Note: LM Buddy is invoked by 'python -m lm_buddy run ...' since the CLI is not installed in the environment\n",
-    "simple_job = client.submit_job(\n",
-    "    entrypoint=f\"python -m lm_buddy run simple --config simple_config.yaml\",\n",
+    "submission_id = client.submit_job(\n",
+    "    entrypoint=f\"python -m lm_buddy run finetuning --config finetuning_config.yaml\",\n",
     "    runtime_env=runtime_env,\n",
     ")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "a373b21d1d57778b",
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [],
    "source": [
     "# The client outputs a string with a job ID\n",
     "# Jobs can be interacted with and terminated via client methods\n",
-    "client.stop_job(simple_job)"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "a373b21d1d57778b"
+    "client.stop_job(submission_id)"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -199,18 +199,24 @@
    "source": [
     "import os\n",
     "\n",
-    "from lm_buddy.jobs.simple import SimpleJobConfig\n",
+    "from lm_buddy.jobs.configs import FinetuningJobConfig\n",
+    "\n",
+    "# Parameters for a programatic sweep\n",
+    "learning_rates = [1e-5, 1e-4, 1e-3, 1e-2]\n",
+    "\n",
     "\n",
-    "# Generate job configs programatically for sweeps over parameter ranges\n",
-    "magic_numbers = [0, 10, 20, 40]\n",
+    "# Load a \"base\" config from file with some suitable defaults\n",
+    "base_config = FinetuningJobConfig.from_yaml_file(\n",
+    "    f\"{root_dir}/examples/configs/finetuning/finetuning_config.yaml\"\n",
+    ")\n",
     "\n",
-    "for number in magic_numbers:\n",
-    "    # Instantitate config in your workflow script\n",
-    "    # You may also want to read a \"base\" config from file with some suitable defaults\n",
-    "    config = SimpleJobConfig(magic_number=number)\n",
+    "for lr in learning_rates:\n",
+    "    # Modify based on current iteration lr\n",
+    "    job_config = base_config.model_copy(deep=True)\n",
+    "    job_config.trainer.learning_rate = lr\n",
     "\n",
     "    # `config_path` is the fully qualified path to the config file on your local filesystem\n",
-    "    with config.to_tempfile(name=\"config.yaml\") as config_path:\n",
+    "    with job_config.to_tempfile(name=\"config.yaml\") as config_path:\n",
     "        # `config_path.parent` is the working directory\n",
     "        runtime_env = {\n",
     "            \"working_dir\": str(config_path.parent),\n",
@@ -221,7 +227,7 @@
     "\n",
     "        # `config_path.name` is the file name within the working directory, i.e., \"config.yaml\"\n",
     "        client.submit_job(\n",
-    "            entrypoint=f\"python -m lm_buddy run simple --config {config_path.name}\",\n",
+    "            entrypoint=f\"python -m lm_buddy run finetuning --config {config_path.name}\",\n",
     "            runtime_env=runtime_env,\n",
     "        )"
    ]

diff --git a/examples/notebooks/direct_job_execution.ipynb b/examples/notebooks/direct_job_execution.ipynb
@@ -13,8 +13,10 @@
    "source": [
     "This notebook illustrates how to use LM Buddy as a library to run jobs directly on the host machine.\n",
     "\n",
-    "Jobs are fully specified by a `lm_buddy.jobs.configs.LMBuddyJobConfig` \n",
-    "and are executed with the `lm_buddy.run_job` method.\n",
+    "Jobs are executed in the following manner:\n",
+    "- Construct an instance of the `lm_buddy.LMBuddy` class\n",
+    "- Construct an instance of your desired job configuration\n",
+    "- Execute a job via the `LMBuddy.finetune` or `LMBuddy.evaluate` methods\n",
     "\n",
     "**Warning**: This workflow is still considered experimental.\n",
     "Some jobs depend on external services (e.g., W&B, Ray cluster) and host-machine GPU resources,\n",
@@ -34,15 +36,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import lm_buddy\n",
+    "from lm_buddy import LMBuddy\n",
     "from lm_buddy.jobs.configs import (\n",
     "    FinetuningJobConfig,\n",
     "    FinetuningRayConfig,\n",
     "    LMHarnessJobConfig,\n",
-    "    LMHarnessEvaluatorConfig,\n",
+    "    LMHarnessEvaluationConfig,\n",
     ")\n",
     "from lm_buddy.integrations.huggingface import (\n",
-    "    HuggingFaceRepoConfig,\n",
     "    AutoModelConfig,\n",
     "    TextDatasetConfig,\n",
     "    TrainerConfig,\n",
@@ -65,13 +66,11 @@
    "outputs": [],
    "source": [
     "# Base model to finetune from HuggingFace\n",
-    "model_config = AutoModelConfig(\n",
-    "    load_from=HuggingFaceRepoConfig(repo_id=\"distilgpt2\"),\n",
-    ")\n",
+    "model_config = AutoModelConfig(load_from=\"distilgpt2\")\n",
     "\n",
     "# Text dataset for finetuning\n",
     "dataset_config = TextDatasetConfig(\n",
-    "    load_from=HuggingFaceRepoConfig(repo_id=\"imdb\"),\n",
+    "    load_from=\"imdb\",\n",
     "    split=\"train[:100]\",\n",
     "    text_field=\"text\",\n",
     ")\n",
@@ -128,7 +127,8 @@
    "outputs": [],
    "source": [
     "# Run the job\n",
-    "lm_buddy.run_job(finetuning_config)"
+    "buddy = LMBuddy()\n",
+    "buddy.finetune(finetuning_config)"
    ]
   },
   {
@@ -146,12 +146,10 @@
    "source": [
     "# Define the model to be evaluated\n",
     "# In this case, loading directly a pretrained model from HuggingFace\n",
-    "model_config = AutoModelConfig(\n",
-    "    load_from=HuggingFaceRepoConfig(repo_id=\"distilgpt2\"),\n",
-    ")\n",
+    "model_config = AutoModelConfig(load_from=\"distilgpt2\")\n",
     "\n",
     "# Define evaluation tasks and settings\n",
-    "evaluator_config = LMHarnessEvaluatorConfig(\n",
+    "evaluation_config = LMHarnessEvaluationConfig(\n",
     "    tasks=[\"hellaswag\"],\n",
     "    limit=10,  # Only run 10 samples per task. Remove for a real run.\n",
     "    num_fewshot=5,\n",
@@ -167,7 +165,7 @@
     "# Full lm-harness job config\n",
     "lm_harness_config = LMHarnessJobConfig(\n",
     "    model=model_config,\n",
-    "    evaluator=evaluator_config,\n",
+    "    evaluation=evaluation_config,\n",
     "    tracking=tracking_config,\n",
     ")"
    ]
@@ -179,7 +177,8 @@
    "outputs": [],
    "source": [
     "# Run the job\n",
-    "lm_buddy.run_job(lm_harness_config)"
+    "buddy = LMBuddy()\n",
+    "eval_results = buddy.evaluate(lm_harness_config)"
    ]
   }
  ],

diff --git a/src/lm_buddy/__init__.py b/src/lm_buddy/__init__.py
@@ -1,3 +1,3 @@
-from lm_buddy.jobs import run_job
+from lm_buddy.buddy import LMBuddy
 
-__all__ = ["run_job"]
+__all__ = ["LMBuddy"]
diff --git a/src/lm_buddy/buddy.py b/src/lm_buddy/buddy.py
@@ -0,0 +1,37 @@
+from lm_buddy.integrations.wandb import ArtifactLoader, WandbArtifactLoader
+from lm_buddy.jobs._entrypoints import run_finetuning, run_lm_harness, run_prometheus
+from lm_buddy.jobs.common import EvaluationResult, FinetuningResult
+from lm_buddy.jobs.configs import (
+    EvaluationJobConfig,
+    FinetuningJobConfig,
+    LMHarnessJobConfig,
+    PrometheusJobConfig,
+)
+
+
+class LMBuddy:
+    """Your buddy in the (L)LM space.
+
+    Simple wrapper around executable functions for tasks available in the library.
+    """
+
+    def __init__(self, artifact_loader: ArtifactLoader = WandbArtifactLoader()):
+        self._artifact_loader = artifact_loader
+
+    def finetune(self, config: FinetuningJobConfig) -> FinetuningResult:
+        """Run a supervised finetuning task with the provided configuration."""
+        finetuning_result = run_finetuning(config, self._artifact_loader)
+        return finetuning_result
+
+    def evaluate(self, config: EvaluationJobConfig) -> EvaluationResult:
+        """Run an evaluation task with the provided configuration.
+
+        The underlying evaluation framework is determined by the configuration type.
+        """
+        match config:
+            case LMHarnessJobConfig() as lm_harness_config:
+                return run_lm_harness(lm_harness_config, self._artifact_loader)
+            case PrometheusJobConfig() as prometheus_config:
+                return run_prometheus(prometheus_config, self._artifact_loader)
+            case _:
+                raise ValueError(f"Invlid configuration for evaluation: {type(config)}")
diff --git a/src/lm_buddy/cli/run.py b/src/lm_buddy/cli/run.py
@@ -1,46 +1,35 @@
 import click
 
-import lm_buddy
-from lm_buddy.jobs.configs import (
-    FinetuningJobConfig,
-    LMHarnessJobConfig,
-    PrometheusJobConfig,
-    SimpleJobConfig,
-)
+from lm_buddy import LMBuddy
+from lm_buddy.jobs.configs import FinetuningJobConfig, LMHarnessJobConfig, PrometheusJobConfig
 
-# TODO(RD2024-125): We should probably collapse all these commands into a single CLI command
-# - Need to figure out best way to polymorphically deserialize the job config classes
-# - Do we just add type discriminators at the job config level?
+# TODO(RD2024-125): Collapse the run commands into `lm-buddy finetune` and `lm-buddy evaluate`
+#   to match the methods on the `LMBuddy` class
+
+buddy = LMBuddy()
 
 
 @click.group(name="run", help="Run an LM Buddy job.")
 def group():
     pass
 
 
-@group.command("simple", help="Run the simple test job.")
-@click.option("--config", type=str)
-def run_simple(config: str) -> None:
-    config = SimpleJobConfig.from_yaml_file(config)
-    lm_buddy.run_job(config)
-
-
 @group.command("finetuning", help="Run the HuggingFace LLM finetuning job.")
 @click.option("--config", type=str)
 def run_finetuning(config: str) -> None:
     config = FinetuningJobConfig.from_yaml_file(config)
-    lm_buddy.run_job(config)
+    buddy.finetune(config)
 
 
 @group.command("lm-harness", help="Run the lm-harness evaluation job.")
 @click.option("--config", type=str)
 def run_lm_harness(config: str) -> None:
     config = LMHarnessJobConfig.from_yaml_file(config)
-    lm_buddy.run_job(config)
+    buddy.evaluate(config)
 
 
 @group.command("prometheus", help="Run the prometheus evaluation job.")
 @click.option("--config", type=str)
 def run_prometheus(config: str) -> None:
     config = PrometheusJobConfig.from_yaml_file(config)
-    lm_buddy.run_job(config)
+    buddy.evaluate(config)
diff --git a/src/lm_buddy/cli/schema.py b/src/lm_buddy/cli/schema.py
@@ -2,25 +2,14 @@
 
 import click
 
-from lm_buddy.jobs.configs import (
-    FinetuningJobConfig,
-    LMHarnessJobConfig,
-    PrometheusJobConfig,
-    SimpleJobConfig,
-)
+from lm_buddy.jobs.configs import FinetuningJobConfig, LMHarnessJobConfig, PrometheusJobConfig
 
 
 @click.group(name="schema", help="Get a job configuration schema.")
 def group():
     pass
 
 
-@group.command("simple", help="Schema for the simple test job configuration.")
-def schema_simple() -> None:
-    schema = SimpleJobConfig.model_json_schema()
-    click.secho(json.dumps(schema, indent=2))
-
-
 @group.command("finetuning", help="Schema for the finetuning job configuration.")
 def schema_finetuning() -> None:
     schema = FinetuningJobConfig.model_json_schema()

diff --git a/src/lm_buddy/integrations/huggingface/__init__.py b/src/lm_buddy/integrations/huggingface/__init__.py
@@ -1,5 +1,4 @@
 # ruff: noqa: I001
-from lm_buddy.integrations.huggingface.repo_config import *
 from lm_buddy.integrations.huggingface.adapter_config import *
 from lm_buddy.integrations.huggingface.dataset_config import *
 from lm_buddy.integrations.huggingface.model_config import *