Skip to content
This repository has been archived by the owner on Sep 24, 2024. It is now read-only.

Commit

Permalink
Merge branch 'main' into vicki/flamingo-inference
Browse files Browse the repository at this point in the history
  • Loading branch information
veekaybee authored Jan 30, 2024
2 parents 197a989 + a185d88 commit 7bfa435
Show file tree
Hide file tree
Showing 18 changed files with 323 additions and 138 deletions.
50 changes: 50 additions & 0 deletions examples/configs/finetuning.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Base model to load for finetuning
model:
load_from:
repo_id: "distilgpt2"
# Can also specify the asset to load as a W&B artifact
# load_from:
# name: "artifact-name"
# project: "artifact-project"
# version: "v0"
torch_dtype: "bfloat16"

# Tokenizer section (when not defined, will default to the model value)
# tokenizer: "distilgpt2"

# Text dataset to use for training
dataset:
load_from:
repo_id: "imdb"
split: "train[:100]"
test_size: 0.2
text_field: "text"

trainer:
max_seq_length: 512
learning_rate: 0.001
num_train_epochs: 2
save_steps: 1
save_strategy: "epochs"
logging_steps: 1
logging_strategy: "steps"

# Quantization section (not necessary when using LORA w/ built in LOFT-Q)
# quantization:

adapter:
peft_type: "LORA"
task_type: "CAUSAL_LM"
r: 16
lora_alpha: 32
lora_dropout: 0.2

# Tracking info for where to log the run results
tracking:
name: "flamingo-example-finetuning"
project: "flamingo-examples"
entity: "mozilla-ai"

ray:
use_gpu: True
num_workers: 2
42 changes: 0 additions & 42 deletions examples/configs/finetuning_config.yaml

This file was deleted.

25 changes: 25 additions & 0 deletions examples/configs/lm_harness.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Model to evaluate
model:
load_from: "distilgpt2"
torch_dtype: "bfloat16"

# Settings specific to lm_harness.evaluate
evaluator:
tasks: ["hellaswag"]
num_fewshot: 5
limit: 10

quantization:
load_in_4bit: True
bnb_4bit_quant_type: "fp4"

# Tracking info for where to log the run results
tracking:
name: "flamingo-example-lm-harness"
project: "flamingo-examples"
entity: "mozilla-ai"

ray:
num_cpus: 1
num_gpus: 1
timeout: 3600
28 changes: 0 additions & 28 deletions examples/configs/lm_harness_config.yaml

This file was deleted.

File renamed without changes.
157 changes: 131 additions & 26 deletions examples/dev_workflow.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,38 @@
"# Development Workflow"
]
},
{
"cell_type": "markdown",
"id": "9366fd9e",
"metadata": {},
"source": [
"## File-based Submission"
]
},
{
"cell_type": "markdown",
"id": "fcd5240e",
"metadata": {},
"source": [
"This demonstrates the basic workflow for submitting a Flamingo job to Ray\n",
"from a configuration stored as a local file.\n",
"\n",
"The job configuration is stored as a YAML file in a the local `configs` directory,\n",
"and that directory is specified as the working directory of the Ray runtime environment upon submission."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8c0f15ed-77dc-44ce-adb6-d1b59368f03c",
"id": "9b26d777",
"metadata": {},
"outputs": [],
"source": [
"# Required imports\n",
"import os\n",
"from pathlib import Path\n",
"\n",
"from ray.job_submission import JobSubmissionClient\n",
"\n",
"# flamingo should be installed in your development environment\n",
"import flamingo"
"from ray.job_submission import JobSubmissionClient"
]
},
{
Expand All @@ -45,25 +62,11 @@
"outputs": [],
"source": [
"# Determine local module path for the flamingo repo\n",
"flamingo_module = Path(flamingo.__file__).parent"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1db3b9aa-99a4-49d9-8773-7b91ccf89c85",
"metadata": {},
"outputs": [],
"source": [
"# Load and inspect the config file\n",
"# Not mandatory for job submission, but helpful when debugging\n",
"from flamingo.jobs.simple import SimpleJobConfig\n",
"\n",
"CONFIG_DIR = Path(\"configs\")\n",
"CONFIG_FILE = \"simple_config.yaml\"\n",
"# In theory this workflow is possible without having the flamingo package installed locally,\n",
"# but this is a convenient means to access the local module path\n",
"import flamingo\n",
"\n",
"config = SimpleJobConfig.from_yaml_file(CONFIG_DIR / CONFIG_FILE)\n",
"config"
"flamingo_module = Path(flamingo.__file__).parent"
]
},
{
Expand All @@ -77,10 +80,10 @@
"# py_modules contains the path to the local flamingo module directory\n",
"# pip contains an export of the dependencies for the flamingo package (see CONTRIBUTING.md for how to generate)\n",
"runtime_env = {\n",
" \"working_dir\": str(CONFIG_DIR),\n",
" \"working_dir\": \"configs\",\n",
" \"env_vars\": {\"WANDB_API_KEY\": os.environ[\"WANDB_API_KEY\"]}, # If running a job that uses W&B\n",
" \"py_modules\": [str(flamingo_module)],\n",
" \"pip\": \"/path/to/flamingo/requirements.txt\",\n",
" \"pip\": \"requirements.txt\", # See CONTRIBUTING.md for how to generate this\n",
"}"
]
},
Expand All @@ -94,9 +97,111 @@
"# Submit the job to the Ray cluster\n",
"# Note: flamingo is invoked by 'python -m flamingo' since the CLI is not installed in the environment\n",
"client.submit_job(\n",
" entrypoint=f\"python -m flamingo run simple --config {CONFIG_FILE}\", runtime_env=runtime_env\n",
" entrypoint=f\"python -m flamingo run simple --config simple.yaml\",\n",
" runtime_env=runtime_env,\n",
")"
]
},
{
"cell_type": "markdown",
"id": "425be140",
"metadata": {},
"source": [
"## Iterative Submission"
]
},
{
"cell_type": "markdown",
"id": "e99ce273",
"metadata": {},
"source": [
"It is also possible to submit Flamingo jobs using a fully Python/Jupyter driven workflow.\n",
"\n",
"In this case, the Flamingo job configuration is instantiated in your Python script\n",
"and written to a temporary directory for submission. \n",
"\n",
"The Ray working directory is based off this temporary YAML file location.\n",
"\n",
"This approach is convenient if you want to run sweeps over parameter ranges\n",
"and use a Python script/Jupyter notebook as your local \"driver\" for the workflow."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0cfccaa9",
"metadata": {},
"outputs": [],
"source": [
"# Required imports\n",
"import os\n",
"from pathlib import Path\n",
"\n",
"from ray.job_submission import JobSubmissionClient"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a51235ed",
"metadata": {},
"outputs": [],
"source": [
"# Create a submission client bound to a Ray cluster\n",
"# Note: You will likely have to update the cluster address shown below\n",
"client = JobSubmissionClient(\"http://10.147.154.77:8265\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a1216c43",
"metadata": {},
"outputs": [],
"source": [
"# Determine local module path for the flamingo repo\n",
"# In theory this workflow is possible without having the flamingo package installed locally,\n",
"# but this is a convenient means to access the local module path\n",
"import flamingo\n",
"\n",
"flamingo_module = Path(flamingo.__file__).parent"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e5715d09",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"from flamingo.jobs.simple import SimpleJobConfig\n",
"\n",
"# Generate job configs programatically for sweeps over parameter ranges\n",
"magic_numbers = [0, 10, 20, 40]\n",
"\n",
"for number in magic_numbers:\n",
" # Instantitate config in your workflow script\n",
" # You may also want to read a \"base\" config from file with some suitable defaults\n",
" config = SimpleJobConfig(magic_number=number)\n",
"\n",
" # `config_path` is the fully qualified path to the config file on your local filesystem\n",
" with config.to_tempfile(name=\"config.yaml\") as config_path:\n",
" # `config_path.parent` is the working directory\n",
" runtime_env = {\n",
" \"working_dir\": str(config_path.parent),\n",
" \"env_vars\": {\"WANDB_API_KEY\": os.environ[\"WANDB_API_KEY\"]},\n",
" \"py_modules\": [str(flamingo_module)],\n",
" \"pip\": \"requirements.txt\", # See CONTRIBUTING.md for how to generate this\n",
" }\n",
"\n",
" # `config_path.name` is the file name within the working directory, i.e., \"config.yaml\"\n",
" client.submit_job(\n",
" entrypoint=f\"python -m flamingo run simple --config {config_path.name}\",\n",
" runtime_env=runtime_env,\n",
" )"
]
}
],
"metadata": {
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ protobuf = "3.20.0"
urllib3 = ">=1.26.18,<2"
pydantic = "1.10.14"
pydantic-yaml = "1.2.0"
ray = { version = "2.8.0", extras = ["default"] }
ray = { version = "2.9.1", extras = ["default"] }

[tool.poetry.dev-dependencies]
ruff = "0.1.7"
Expand Down
22 changes: 19 additions & 3 deletions src/flamingo/integrations/wandb/artifact_config.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,32 @@
import re

from flamingo.types import BaseFlamingoConfig


class WandbArtifactConfig(BaseFlamingoConfig):
"""Configuration required to retrieve an artifact from W&B."""

name: str
project: str
version: str = "latest"
project: str | None = None
entity: str | None = None

@classmethod
def from_wandb_path(cls, path: str) -> "WandbArtifactConfig":
"""Construct an artifact configuration from the W&B name.
The name should be of the form "<entity>/<project>/<name>:<version>"
with the "entity" field optional.
"""
match = re.search(r"((.*)\/)?(.*)\/(.*)\:(.*)", path)
if match is not None:
entity, project, name, version = match.groups()[1:]
return cls(name=name, project=project, version=version, entity=entity)
raise ValueError(f"Invalid artifact path: {path}")

def wandb_path(self) -> str:
"""String identifier for the asset on the W&B platform."""
path = "/".join(x for x in [self.entity, self.project, self.name] if x is not None)
path = f"{path}:{self.version}"
path = f"{self.project}/{self.name}:{self.version}"
if self.entity is not None:
path = f"{self.entity}/{path}"
return path
Loading

0 comments on commit 7bfa435

Please sign in to comment.