mozilla-ai · aittalam · Mar 12, 2024 · Feb 29, 2024 · Feb 29, 2024 · Feb 29, 2024
diff --git a/examples/configs/prometheus/prometheus_config.yaml b/examples/configs/prometheus/prometheus_config.yaml
@@ -0,0 +1,35 @@
+dataset:
+  load_from:
+    name: "wandb_file_artifact_name.json"
+    version: "latest"
+    project: "lm-buddy-prometheus"
+    entity: "mozilla-ai"
+  # field containing scoring instructions in the json file
+  text_field: "instruction"
+
+prometheus:
+  inference:
+    base_url: "http://your.vllm.server:8000/v1"
+    engine: "kaist-ai/prometheus-13b-v1.0"
+  best_of: 1
+  max_tokens: 512
+  frequency_penalty: 1.03
+  temperature: 1.0
+  top_p: 0.9
+
+evaluation:
+  # number of times a model is evaluated per sample
+  num_answers: 3
+  # max number of retries if a communication error
+  # with the server occurs
+  max_retries: 5
+  # min and max scores as defined in the scoring rubric
+  min_score: 1
+  max_score: 5
+  # enable/disable tqdm to track eval progress
+  enable_tqdm: True
+
+tracking:
+  name: "lm-buddy-prometheus"
+  project: "lm-buddy-examples"
+  entity: "mozilla-ai"
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,11 +4,12 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "lm-buddy"
-version = "0.2.4"
+version = "0.3.0"
 authors = [
     { name = "Sean Friedowitz", email = "[email protected]" },
     { name = "Aaron Gonzales", email = "[email protected]" },
     { name = "Vicki Boykis", email = "[email protected]" },
+    { name = "Davide Eynard", email = "[email protected]" },
 ]
 description = "Ray-centric library for finetuning and evaluation of (large) language models."
 readme = "README.md"
@@ -37,6 +38,8 @@ dependencies = [
     # Evaluation frameworks
     "lm-eval[openai]==0.4.1",
     "einops==0.7.0",
+    "fschat==0.2.36",
+    "openai==1.3.9",
 ]
 
 [project.optional-dependencies]

diff --git a/src/lm_buddy/cli/run.py b/src/lm_buddy/cli/run.py
@@ -1,7 +1,12 @@
 import click
 
 import lm_buddy
-from lm_buddy.jobs.configs import FinetuningJobConfig, LMHarnessJobConfig, SimpleJobConfig
+from lm_buddy.jobs.configs import (
+    FinetuningJobConfig,
+    LMHarnessJobConfig,
+    PrometheusJobConfig,
+    SimpleJobConfig,
+)
 
 # TODO(RD2024-125): We should probably collapse all these commands into a single CLI command
 # - Need to figure out best way to polymorphically deserialize the job config classes
@@ -32,3 +37,10 @@ def run_finetuning(config: str) -> None:
 def run_lm_harness(config: str) -> None:
     config = LMHarnessJobConfig.from_yaml_file(config)
     lm_buddy.run_job(config)
+
+
+@group.command("prometheus", help="Run the prometheus evaluation job.")
+@click.option("--config", type=str)
+def run_prometheus(config: str) -> None:
+    config = PrometheusJobConfig.from_yaml_file(config)
+    lm_buddy.run_job(config)
diff --git a/src/lm_buddy/cli/schema.py b/src/lm_buddy/cli/schema.py
@@ -2,7 +2,12 @@
 
 import click
 
-from lm_buddy.jobs.configs import FinetuningJobConfig, LMHarnessJobConfig, SimpleJobConfig
+from lm_buddy.jobs.configs import (
+    FinetuningJobConfig,
+    LMHarnessJobConfig,
+    PrometheusJobConfig,
+    SimpleJobConfig,
+)
 
 
 @click.group(name="schema", help="Get a job configuration schema.")
@@ -26,3 +31,9 @@ def schema_finetuning() -> None:
 def schema_lm_harness() -> None:
     schema = LMHarnessJobConfig.model_json_schema()
     click.secho(json.dumps(schema, indent=2))
+
+
+@group.command("prometheus", help="Schema for the prometheus job configuration.")
+def schema_prometheus() -> None:
+    schema = PrometheusJobConfig.model_json_schema()
+    click.secho(json.dumps(schema, indent=2))
diff --git a/src/lm_buddy/integrations/vllm.py b/src/lm_buddy/integrations/vllm.py
@@ -12,8 +12,31 @@ class InferenceServerConfig(BaseLMBuddyConfig):
 
     Note: This configuration is intended to be generic and not bound to the interface
     of any specific training/evaluation framework. See `LocalChatCompletionConfig`
-    for intended usage alongside a third-party framework.
+    or `vLLMCompleptionsConfig` for intended usage alongside a third-party framework.
     """
 
     base_url: str
     engine: str | HuggingFaceAssetPath | None = None
+
+
+class VLLMCompletionsConfig(BaseLMBuddyConfig):
+    """Configuration for a vLLM-based completions service
+
+    The "local-chat-completions" model is powered by a self-hosted inference server,
+    specified as an `InferenceServerConfig`. Additional arguments are also provided
+    to control the tokenizer type and generation parameters.
+
+    Note that this is just a subset of the parameters allowed by a vLLM server (see
+    https://github.com/vllm-project/vllm/blob/main/vllm/sampling_params.py). If we
+    choose to use this configuration to cover for more use cases, it will make sense
+    to add the other supported configuration parameters too.
+    """
+
+    inference: InferenceServerConfig
+
+    # vLLM-specific params
+    best_of: int | None = None
+    max_tokens: int | None = None
+    frequency_penalty: float | None = None
+    temperature: float | None = None
+    top_p: float | None = None
diff --git a/src/lm_buddy/integrations/wandb/artifact_utils.py b/src/lm_buddy/integrations/wandb/artifact_utils.py
@@ -110,3 +110,38 @@ def build_table_artifact(
         table = wandb.Table(data=table_data, columns=columns)
         artifact.add(table, name=table_name)
     return artifact
+
+
+def build_file_artifact(
+    artifact_name: str,
+    artifact_type: ArtifactType,
+    file_path: str | Path,
+    *,
+    reference: bool = False,
+    entry_name: str | None = None,
+) -> wandb.Artifact:
+    """Build an artifact containing a single file
+
+    Args:
+        artifact_name (str): Name of the artifact
+        artifact_type (ArtifactType): Type of artifact
+        file_path (str | Path): The full path (including filename) of the file
+
+    Keyword Args:
+        reference (bool): Only reference the file, do not copy contents. Defaults to False.
+        entry_name (str | None): Name for the file within the artifact. If None, defaults
+                                 to the original filename.
+
+    Returns:
+        wandb.Artifact: The generated artifact.
+    """
+    artifact = wandb.Artifact(name=artifact_name, type=artifact_type)
+
+    if reference:
+        artifact.add_reference(
+            uri=f"{ArtifactURIScheme.FILE}://{file_path}",
+            name=entry_name,
+        )
+    else:
+        artifact.add_file(str(file_path), name=entry_name)
+    return artifact
diff --git a/src/lm_buddy/jobs/__init__.py b/src/lm_buddy/jobs/__init__.py
@@ -1,9 +1,15 @@
 from lm_buddy.integrations.wandb import ArtifactLoader, WandbArtifactLoader
-from lm_buddy.jobs._entrypoints import run_finetuning, run_lm_harness, run_simple
+from lm_buddy.jobs._entrypoints import (
+    run_finetuning,
+    run_lm_harness,
+    run_prometheus,
+    run_simple,
+)
 from lm_buddy.jobs.configs import (
     FinetuningJobConfig,
     LMBuddyJobConfig,
     LMHarnessJobConfig,
+    PrometheusJobConfig,
     SimpleJobConfig,
 )
 
@@ -26,5 +32,7 @@ def run_job(
             run_finetuning(finetuning_config, artifact_loader)
         case LMHarnessJobConfig() as lm_harness_config:
             run_lm_harness(lm_harness_config, artifact_loader)
+        case PrometheusJobConfig() as prometheus_config:
+            run_prometheus(prometheus_config, artifact_loader)
         case _:
             raise ValueError(f"Received invalid job configuration: {config}")
diff --git a/src/lm_buddy/jobs/_entrypoints/__init__.py b/src/lm_buddy/jobs/_entrypoints/__init__.py
@@ -1,5 +1,6 @@
 from lm_buddy.jobs._entrypoints.finetuning import run_finetuning
 from lm_buddy.jobs._entrypoints.lm_harness import run_lm_harness
+from lm_buddy.jobs._entrypoints.prometheus import run_prometheus
 from lm_buddy.jobs._entrypoints.simple import run_simple
 
-__all__ = ["run_finetuning", "run_lm_harness", "run_simple"]
+__all__ = ["run_finetuning", "run_lm_harness", "run_prometheus", "run_simple"]