Skip to content
This repository has been archived by the owner on Sep 24, 2024. It is now read-only.

Added v0 of prometheus lm-buddy entrypoint #75

Merged
merged 19 commits into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions examples/configs/prometheus/prometheus_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
dataset:
load_from:
name: "wandb_file_artifact_name.json"
version: "latest"
project: "lm-buddy-prometheus"
entity: "mozilla-ai"
# field containing scoring instructions in the json file
text_field: "instruction"

prometheus:
inference:
base_url: "http://your.vllm.server:8000/v1"
sfriedowitz marked this conversation as resolved.
Show resolved Hide resolved
engine: "kaist-ai/prometheus-13b-v1.0"
best_of: 1
max_tokens: 512
frequency_penalty: 1.03
temperature: 1.0
top_p: 0.9

evaluation:
# number of times a model is evaluated per sample
num_answers: 3
# max number of retries if a communication error
# with the server occurs
max_retries: 5
# min and max scores as defined in the scoring rubric
min_score: 1
max_score: 5
# enable/disable tqdm to track eval progress
enable_tqdm: True

tracking:
name: "lm-buddy-prometheus"
project: "lm-buddy-examples"
entity: "mozilla-ai"
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ build-backend = "setuptools.build_meta"

[project]
name = "lm-buddy"
version = "0.2.4"
version = "0.3.0"
authors = [
{ name = "Sean Friedowitz", email = "[email protected]" },
{ name = "Aaron Gonzales", email = "[email protected]" },
{ name = "Vicki Boykis", email = "[email protected]" },
{ name = "Davide Eynard", email = "[email protected]" },
]
description = "Ray-centric library for finetuning and evaluation of (large) language models."
readme = "README.md"
Expand Down Expand Up @@ -37,6 +38,8 @@ dependencies = [
# Evaluation frameworks
"lm-eval[openai]==0.4.1",
"einops==0.7.0",
"fschat==0.2.36",
"openai==1.3.9",
]

[project.optional-dependencies]
Expand Down
14 changes: 13 additions & 1 deletion src/lm_buddy/cli/run.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import click

import lm_buddy
from lm_buddy.jobs.configs import FinetuningJobConfig, LMHarnessJobConfig, SimpleJobConfig
from lm_buddy.jobs.configs import (
FinetuningJobConfig,
LMHarnessJobConfig,
PrometheusJobConfig,
SimpleJobConfig,
)

# TODO(RD2024-125): We should probably collapse all these commands into a single CLI command
# - Need to figure out best way to polymorphically deserialize the job config classes
Expand Down Expand Up @@ -32,3 +37,10 @@ def run_finetuning(config: str) -> None:
def run_lm_harness(config: str) -> None:
config = LMHarnessJobConfig.from_yaml_file(config)
lm_buddy.run_job(config)


@group.command("prometheus", help="Run the prometheus evaluation job.")
sfriedowitz marked this conversation as resolved.
Show resolved Hide resolved
@click.option("--config", type=str)
def run_prometheus(config: str) -> None:
config = PrometheusJobConfig.from_yaml_file(config)
lm_buddy.run_job(config)
13 changes: 12 additions & 1 deletion src/lm_buddy/cli/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@

import click

from lm_buddy.jobs.configs import FinetuningJobConfig, LMHarnessJobConfig, SimpleJobConfig
from lm_buddy.jobs.configs import (
FinetuningJobConfig,
LMHarnessJobConfig,
PrometheusJobConfig,
SimpleJobConfig,
)


@click.group(name="schema", help="Get a job configuration schema.")
Expand All @@ -26,3 +31,9 @@ def schema_finetuning() -> None:
def schema_lm_harness() -> None:
schema = LMHarnessJobConfig.model_json_schema()
click.secho(json.dumps(schema, indent=2))


@group.command("prometheus", help="Schema for the prometheus job configuration.")
def schema_prometheus() -> None:
schema = PrometheusJobConfig.model_json_schema()
click.secho(json.dumps(schema, indent=2))
25 changes: 24 additions & 1 deletion src/lm_buddy/integrations/vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,31 @@ class InferenceServerConfig(BaseLMBuddyConfig):

Note: This configuration is intended to be generic and not bound to the interface
of any specific training/evaluation framework. See `LocalChatCompletionConfig`
for intended usage alongside a third-party framework.
or `vLLMCompleptionsConfig` for intended usage alongside a third-party framework.
"""

base_url: str
engine: str | HuggingFaceAssetPath | None = None


class VLLMCompletionsConfig(BaseLMBuddyConfig):
"""Configuration for a vLLM-based completions service

The "local-chat-completions" model is powered by a self-hosted inference server,
specified as an `InferenceServerConfig`. Additional arguments are also provided
to control the tokenizer type and generation parameters.

Note that this is just a subset of the parameters allowed by a vLLM server (see
https://github.com/vllm-project/vllm/blob/main/vllm/sampling_params.py). If we
choose to use this configuration to cover for more use cases, it will make sense
to add the other supported configuration parameters too.
"""

inference: InferenceServerConfig

# vLLM-specific params
aittalam marked this conversation as resolved.
Show resolved Hide resolved
best_of: int | None = None
max_tokens: int | None = None
frequency_penalty: float | None = None
temperature: float | None = None
top_p: float | None = None
35 changes: 35 additions & 0 deletions src/lm_buddy/integrations/wandb/artifact_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,38 @@ def build_table_artifact(
table = wandb.Table(data=table_data, columns=columns)
artifact.add(table, name=table_name)
return artifact


def build_file_artifact(
aittalam marked this conversation as resolved.
Show resolved Hide resolved
artifact_name: str,
artifact_type: ArtifactType,
file_path: str | Path,
*,
reference: bool = False,
entry_name: str | None = None,
) -> wandb.Artifact:
"""Build an artifact containing a single file

Args:
artifact_name (str): Name of the artifact
artifact_type (ArtifactType): Type of artifact
file_path (str | Path): The full path (including filename) of the file

Keyword Args:
reference (bool): Only reference the file, do not copy contents. Defaults to False.
entry_name (str | None): Name for the file within the artifact. If None, defaults
to the original filename.

Returns:
wandb.Artifact: The generated artifact.
"""
artifact = wandb.Artifact(name=artifact_name, type=artifact_type)

if reference:
artifact.add_reference(
uri=f"{ArtifactURIScheme.FILE}://{file_path}",
name=entry_name,
)
else:
artifact.add_file(str(file_path), name=entry_name)
return artifact
10 changes: 9 additions & 1 deletion src/lm_buddy/jobs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
from lm_buddy.integrations.wandb import ArtifactLoader, WandbArtifactLoader
from lm_buddy.jobs._entrypoints import run_finetuning, run_lm_harness, run_simple
from lm_buddy.jobs._entrypoints import (
run_finetuning,
run_lm_harness,
sfriedowitz marked this conversation as resolved.
Show resolved Hide resolved
run_prometheus,
run_simple,
)
from lm_buddy.jobs.configs import (
FinetuningJobConfig,
LMBuddyJobConfig,
LMHarnessJobConfig,
PrometheusJobConfig,
SimpleJobConfig,
)

Expand All @@ -26,5 +32,7 @@ def run_job(
run_finetuning(finetuning_config, artifact_loader)
case LMHarnessJobConfig() as lm_harness_config:
run_lm_harness(lm_harness_config, artifact_loader)
case PrometheusJobConfig() as prometheus_config:
run_prometheus(prometheus_config, artifact_loader)
case _:
raise ValueError(f"Received invalid job configuration: {config}")
3 changes: 2 additions & 1 deletion src/lm_buddy/jobs/_entrypoints/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from lm_buddy.jobs._entrypoints.finetuning import run_finetuning
from lm_buddy.jobs._entrypoints.lm_harness import run_lm_harness
from lm_buddy.jobs._entrypoints.prometheus import run_prometheus
from lm_buddy.jobs._entrypoints.simple import run_simple

__all__ = ["run_finetuning", "run_lm_harness", "run_simple"]
__all__ = ["run_finetuning", "run_lm_harness", "run_prometheus", "run_simple"]
Loading