Skip to content
This repository has been archived by the owner on Sep 24, 2024. It is now read-only.

Adding vLLM inference entrypoints to Flamingo #18

Merged
merged 16 commits into from
Jan 31, 2024
Prev Previous commit
Next Next commit
simplifying vllm config
veekaybee committed Jan 30, 2024
commit f5668a1984d48898fc5605a4b078c5ad7c86dff4
2 changes: 0 additions & 2 deletions src/flamingo/integrations/vllm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
# ruff: noqa: I001
veekaybee marked this conversation as resolved.
Show resolved Hide resolved
from flamingo.integrations.vllm.model_config import *
from flamingo.integrations.vllm.path_config import *

15 changes: 2 additions & 13 deletions src/flamingo/integrations/vllm/model_config.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,7 @@
from pydantic import validator

from flamingo.integrations.wandb import WandbArtifactConfig
from flamingo.types import BaseFlamingoConfig, TorchDtypeString
from flamingo.integrations.vllm import LocalServerConfig
from flamingo.types import BaseFlamingoConfig

veekaybee marked this conversation as resolved.
Show resolved Hide resolved

class InferenceServerConfig(BaseFlamingoConfig):
"""Inference Server URL endpoint path"""
sfriedowitz marked this conversation as resolved.
Show resolved Hide resolved

load_from: LocalServerConfig | WandbArtifactConfig

trust_remote_code: bool = False
torch_dtype: TorchDtypeString | None = None

_validate_load_from_string = validator("load_from", pre=True, allow_reuse=True)(
convert_string_to_repo_config
)
base_url: str
25 changes: 0 additions & 25 deletions src/flamingo/integrations/vllm/path_config.py

This file was deleted.

12 changes: 1 addition & 11 deletions src/flamingo/jobs/lm_harness/config.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import datetime

from pydantic import Field, conlist, validator

Check failure on line 3 in src/flamingo/jobs/lm_harness/config.py

GitHub Actions / pytest_ruff

Ruff (F401)

src/flamingo/jobs/lm_harness/config.py:3:38: F401 `pydantic.validator` imported but unused

from flamingo.integrations.vllm import LocalServerConfig
from flamingo.integrations.huggingface import AutoModelConfig, QuantizationConfig
from flamingo.integrations.vllm import InferenceServerConfig
from flamingo.integrations.wandb import WandbRunConfig
from flamingo.types import BaseFlamingoConfig

@@ -25,7 +25,6 @@
limit: int | float | None = None



class LMHarnessJobConfig(BaseFlamingoConfig):
"""Configuration to run an lm-evaluation-harness evaluation job."""

@@ -34,12 +33,3 @@
quantization: QuantizationConfig | None = None
tracking: WandbRunConfig | None = None
ray: LMHarnessRayConfig = Field(default_factory=LMHarnessRayConfig)

@validator("model", pre=True, always=True)
def validate_model_arg(cls, x):
"""Allow for passing a path string as the model argument."""
if "v1/completions" in x:
return InferenceServerConfig(load_from=x)
else:
return AutoModelConfig(load_from=x)
return x
3 changes: 2 additions & 1 deletion src/flamingo/jobs/lm_harness/entrypoint.py
Original file line number Diff line number Diff line change
@@ -34,7 +34,8 @@

def load_harness_model(config: LMHarnessJobConfig) -> HFLM | OpenaiCompletionsLM:
sfriedowitz marked this conversation as resolved.
Show resolved Hide resolved
# Helper method to return lm-harness model wrapper
def loader(model: str | None, tokenizer: str, peft: str | None):
def _loader(model: str | None , tokenizer: str, peft: str | None):

"""Load model directly from HF if HF path, otherwise from an inference server URL"""

if isinstance(config.model) == AutoModelConfig:
@@ -52,7 +53,7 @@
elif isinstance(config.model) == InferenceServerConfig:
return OpenaiCompletionsLM(
model=model,
base_url=base_url,

Check failure on line 56 in src/flamingo/jobs/lm_harness/entrypoint.py

GitHub Actions / pytest_ruff

Ruff (F821)

src/flamingo/jobs/lm_harness/entrypoint.py:56:26: F821 Undefined name `base_url`
tokenizer=tokenizer,
)

@@ -61,7 +62,7 @@
load_path, revision = resolve_loadable_path(config.model.load_from)
try:
peft_config = PeftConfig.from_pretrained(load_path, revision=revision)
return loader(

Check failure on line 65 in src/flamingo/jobs/lm_harness/entrypoint.py

GitHub Actions / pytest_ruff

Ruff (F821)

src/flamingo/jobs/lm_harness/entrypoint.py:65:16: F821 Undefined name `loader`
pretrained=peft_config.base_model_name_or_path,
tokenizer=peft_config.base_model_name_or_path,
peft=load_path,
@@ -71,7 +72,7 @@
f"Unable to load model as adapter: {e}. "
"This is expected if the checkpoint does not contain adapter weights."
)
return loader(pretrained=load_path, tokenizer=load_path, peft=None)

Check failure on line 75 in src/flamingo/jobs/lm_harness/entrypoint.py

GitHub Actions / pytest_ruff

Ruff (F821)

src/flamingo/jobs/lm_harness/entrypoint.py:75:16: F821 Undefined name `loader`


def load_and_evaluate(config: LMHarnessJobConfig) -> dict[str, Any]:

Unchanged files with check annotations Beta

from pydantic import validator
from flamingo.integrations.huggingface import HuggingFaceRepoConfig,convert_string_to_repo_config
from flamingo.integrations.wandb import WandbArtifactConfig
from flamingo.types import BaseFlamingoConfig, TorchDtypeString
class AutoModelConfig(BaseFlamingoConfig):

Check failure on line 8 in src/flamingo/integrations/huggingface/model_config.py

GitHub Actions / pytest_ruff

Ruff (I001)

src/flamingo/integrations/huggingface/model_config.py:1:1: I001 Import block is un-sorted or un-formatted
"""Settings passed to a HuggingFace AutoModel instantiation.
The model to load can either be a HuggingFace repo or an artifact reference on W&B.