Merge branch 'main' into vicki/flamingo-inference

mozilla-ai · Jan 30, 2024 · 7bfa435 · 7bfa435
2 parents 197a989 + a185d88
commit 7bfa435
Show file tree

Hide file tree

Showing 18 changed files with 323 additions and 138 deletions.
diff --git a/examples/configs/finetuning.yaml b/examples/configs/finetuning.yaml
@@ -0,0 +1,50 @@
+# Base model to load for finetuning
+model:
+  load_from:
+    repo_id: "distilgpt2"
+  # Can also specify the asset to load as a W&B artifact
+  # load_from:
+  #   name: "artifact-name"
+  #   project: "artifact-project"
+  #   version: "v0"
+  torch_dtype: "bfloat16"
+
+# Tokenizer section (when not defined, will default to the model value)
+# tokenizer: "distilgpt2"
+
+# Text dataset to use for training
+dataset:
+  load_from:
+    repo_id: "imdb"
+  split: "train[:100]"
+  test_size: 0.2
+  text_field: "text"
+
+trainer:
+  max_seq_length: 512
+  learning_rate: 0.001
+  num_train_epochs: 2
+  save_steps: 1
+  save_strategy: "epochs"
+  logging_steps: 1
+  logging_strategy: "steps"
+
+# Quantization section (not necessary when using LORA w/ built in LOFT-Q)
+# quantization:
+
+adapter:
+  peft_type: "LORA"
+  task_type: "CAUSAL_LM"
+  r: 16
+  lora_alpha: 32
+  lora_dropout: 0.2
+
+# Tracking info for where to log the run results
+tracking:
+  name: "flamingo-example-finetuning"
+  project: "flamingo-examples"
+  entity: "mozilla-ai"
+
+ray:
+  use_gpu: True
+  num_workers: 2
diff --git a/examples/configs/finetuning_config.yaml b/examples/configs/finetuning_config.yaml
diff --git a/examples/configs/lm_harness.yaml b/examples/configs/lm_harness.yaml
@@ -0,0 +1,25 @@
+# Model to evaluate
+model:
+  load_from: "distilgpt2"
+  torch_dtype: "bfloat16"
+
+# Settings specific to lm_harness.evaluate
+evaluator:
+  tasks: ["hellaswag"]
+  num_fewshot: 5
+  limit: 10
+
+quantization:
+  load_in_4bit: True
+  bnb_4bit_quant_type: "fp4"
+
+# Tracking info for where to log the run results
+tracking:
+  name: "flamingo-example-lm-harness"
+  project: "flamingo-examples"
+  entity: "mozilla-ai"
+
+ray:
+  num_cpus: 1
+  num_gpus: 1
+  timeout: 3600
diff --git a/examples/configs/lm_harness_config.yaml b/examples/configs/lm_harness_config.yaml
diff --git a/examples/configs/simple_config.yaml → examples/configs/simple.yaml b/examples/configs/simple_config.yaml → examples/configs/simple.yaml
diff --git a/examples/dev_workflow.ipynb b/examples/dev_workflow.ipynb
@@ -8,21 +8,38 @@
     "# Development Workflow"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "9366fd9e",
+   "metadata": {},
+   "source": [
+    "## File-based Submission"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fcd5240e",
+   "metadata": {},
+   "source": [
+    "This demonstrates the basic workflow for submitting a Flamingo job to Ray\n",
+    "from a configuration stored as a local file.\n",
+    "\n",
+    "The job configuration is stored as a YAML file in a the local `configs` directory,\n",
+    "and that directory is specified as the working directory of the Ray runtime environment upon submission."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "8c0f15ed-77dc-44ce-adb6-d1b59368f03c",
+   "id": "9b26d777",
    "metadata": {},
    "outputs": [],
    "source": [
     "# Required imports\n",
     "import os\n",
     "from pathlib import Path\n",
     "\n",
-    "from ray.job_submission import JobSubmissionClient\n",
-    "\n",
-    "# flamingo should be installed in your development environment\n",
-    "import flamingo"
+    "from ray.job_submission import JobSubmissionClient"
    ]
   },
   {
@@ -45,25 +62,11 @@
    "outputs": [],
    "source": [
     "# Determine local module path for the flamingo repo\n",
-    "flamingo_module = Path(flamingo.__file__).parent"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1db3b9aa-99a4-49d9-8773-7b91ccf89c85",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Load and inspect the config file\n",
-    "# Not mandatory for job submission, but helpful when debugging\n",
-    "from flamingo.jobs.simple import SimpleJobConfig\n",
-    "\n",
-    "CONFIG_DIR = Path(\"configs\")\n",
-    "CONFIG_FILE = \"simple_config.yaml\"\n",
+    "# In theory this workflow is possible without having the flamingo package installed locally,\n",
+    "# but this is a convenient means to access the local module path\n",
+    "import flamingo\n",
     "\n",
-    "config = SimpleJobConfig.from_yaml_file(CONFIG_DIR / CONFIG_FILE)\n",
-    "config"
+    "flamingo_module = Path(flamingo.__file__).parent"
    ]
   },
   {
@@ -77,10 +80,10 @@
     "# py_modules contains the path to the local flamingo module directory\n",
     "# pip contains an export of the dependencies for the flamingo package (see CONTRIBUTING.md for how to generate)\n",
     "runtime_env = {\n",
-    "    \"working_dir\": str(CONFIG_DIR),\n",
+    "    \"working_dir\": \"configs\",\n",
     "    \"env_vars\": {\"WANDB_API_KEY\": os.environ[\"WANDB_API_KEY\"]},  # If running a job that uses W&B\n",
     "    \"py_modules\": [str(flamingo_module)],\n",
-    "    \"pip\": \"/path/to/flamingo/requirements.txt\",\n",
+    "    \"pip\": \"requirements.txt\",  # See CONTRIBUTING.md for how to generate this\n",
     "}"
    ]
   },
@@ -94,9 +97,111 @@
     "# Submit the job to the Ray cluster\n",
     "# Note: flamingo is invoked by 'python -m flamingo' since the CLI is not installed in the environment\n",
     "client.submit_job(\n",
-    "    entrypoint=f\"python -m flamingo run simple --config {CONFIG_FILE}\", runtime_env=runtime_env\n",
+    "    entrypoint=f\"python -m flamingo run simple --config simple.yaml\",\n",
+    "    runtime_env=runtime_env,\n",
     ")"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "425be140",
+   "metadata": {},
+   "source": [
+    "## Iterative Submission"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e99ce273",
+   "metadata": {},
+   "source": [
+    "It is also possible to submit Flamingo jobs using a fully Python/Jupyter driven workflow.\n",
+    "\n",
+    "In this case, the Flamingo job configuration is instantiated in your Python script\n",
+    "and written to a temporary directory for submission. \n",
+    "\n",
+    "The Ray working directory is based off this temporary YAML file location.\n",
+    "\n",
+    "This approach is convenient if you want to run sweeps over parameter ranges\n",
+    "and use a Python script/Jupyter notebook as your local \"driver\" for the workflow."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0cfccaa9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Required imports\n",
+    "import os\n",
+    "from pathlib import Path\n",
+    "\n",
+    "from ray.job_submission import JobSubmissionClient"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a51235ed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a submission client bound to a Ray cluster\n",
+    "# Note: You will likely have to update the cluster address shown below\n",
+    "client = JobSubmissionClient(\"http://10.147.154.77:8265\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a1216c43",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Determine local module path for the flamingo repo\n",
+    "# In theory this workflow is possible without having the flamingo package installed locally,\n",
+    "# but this is a convenient means to access the local module path\n",
+    "import flamingo\n",
+    "\n",
+    "flamingo_module = Path(flamingo.__file__).parent"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e5715d09",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "from flamingo.jobs.simple import SimpleJobConfig\n",
+    "\n",
+    "# Generate job configs programatically for sweeps over parameter ranges\n",
+    "magic_numbers = [0, 10, 20, 40]\n",
+    "\n",
+    "for number in magic_numbers:\n",
+    "    # Instantitate config in your workflow script\n",
+    "    # You may also want to read a \"base\" config from file with some suitable defaults\n",
+    "    config = SimpleJobConfig(magic_number=number)\n",
+    "\n",
+    "    # `config_path` is the fully qualified path to the config file on your local filesystem\n",
+    "    with config.to_tempfile(name=\"config.yaml\") as config_path:\n",
+    "        # `config_path.parent` is the working directory\n",
+    "        runtime_env = {\n",
+    "            \"working_dir\": str(config_path.parent),\n",
+    "            \"env_vars\": {\"WANDB_API_KEY\": os.environ[\"WANDB_API_KEY\"]},\n",
+    "            \"py_modules\": [str(flamingo_module)],\n",
+    "            \"pip\": \"requirements.txt\",  # See CONTRIBUTING.md for how to generate this\n",
+    "        }\n",
+    "\n",
+    "        # `config_path.name` is the file name within the working directory, i.e., \"config.yaml\"\n",
+    "        client.submit_job(\n",
+    "            entrypoint=f\"python -m flamingo run simple --config {config_path.name}\",\n",
+    "            runtime_env=runtime_env,\n",
+    "        )"
+   ]
   }
  ],
  "metadata": {

diff --git a/pyproject.toml b/pyproject.toml
@@ -21,7 +21,7 @@ protobuf = "3.20.0"
 urllib3 = ">=1.26.18,<2"
 pydantic = "1.10.14"
 pydantic-yaml = "1.2.0"
-ray = { version = "2.8.0", extras = ["default"] }
+ray = { version = "2.9.1", extras = ["default"] }
 
 [tool.poetry.dev-dependencies]
 ruff = "0.1.7"

diff --git a/src/flamingo/integrations/wandb/artifact_config.py b/src/flamingo/integrations/wandb/artifact_config.py
@@ -1,16 +1,32 @@
+import re
+
 from flamingo.types import BaseFlamingoConfig
 
 
 class WandbArtifactConfig(BaseFlamingoConfig):
     """Configuration required to retrieve an artifact from W&B."""
 
     name: str
+    project: str
     version: str = "latest"
-    project: str | None = None
     entity: str | None = None
 
+    @classmethod
+    def from_wandb_path(cls, path: str) -> "WandbArtifactConfig":
+        """Construct an artifact configuration from the W&B name.
+
+        The name should be of the form "<entity>/<project>/<name>:<version>"
+        with the "entity" field optional.
+        """
+        match = re.search(r"((.*)\/)?(.*)\/(.*)\:(.*)", path)
+        if match is not None:
+            entity, project, name, version = match.groups()[1:]
+            return cls(name=name, project=project, version=version, entity=entity)
+        raise ValueError(f"Invalid artifact path: {path}")
+
     def wandb_path(self) -> str:
         """String identifier for the asset on the W&B platform."""
-        path = "/".join(x for x in [self.entity, self.project, self.name] if x is not None)
-        path = f"{path}:{self.version}"
+        path = f"{self.project}/{self.name}:{self.version}"
+        if self.entity is not None:
+            path = f"{self.entity}/{path}"
         return path