Skip to content

Commit

Permalink
Merge branch 'merge_trackers' into doremi
Browse files Browse the repository at this point in the history
  • Loading branch information
dlwh committed Feb 9, 2024
2 parents 3e3c9da + 7ba2b39 commit c5fb7a6
Show file tree
Hide file tree
Showing 37 changed files with 1,399 additions and 483 deletions.
11 changes: 11 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file

version: 2
updates:
- package-ecosystem: "pip" # See documentation for possible values
directory: "/" # Location of package manifests
schedule:
interval: "weekly"
6 changes: 3 additions & 3 deletions config/gpt2_nano.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#data:
# id: dlwh/wikitext_103_detokenized
data:
id: dlwh/wikitext_103_detokenized
model:
type: gpt2
hidden_dim: 32
Expand All @@ -14,7 +14,7 @@ trainer:
- every: 50
save_interval: 5m

per_device_parallelism: 16
per_device_parallelism: -1
train_batch_size: 32

tensor_parallel_axes: ["mlp", "heads"]
Expand Down
3 changes: 1 addition & 2 deletions config/gpt2_nano_tb.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ trainer:
- every: 50
save_interval: 5m

per_device_eval_parallelism: 1
per_device_parallelism: 1
per_device_parallelism: -1
train_batch_size: 32

tensor_parallel_axes: ["mlp", "heads"]
Expand Down
2 changes: 1 addition & 1 deletion config/gpt2_small.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ trainer:

mp: p=f32,c=bfloat16
model_axis_size: 1
per_device_parallelism: 4
per_device_parallelism: -1

train_batch_size: 512
optimizer:
Expand Down
28 changes: 28 additions & 0 deletions config/mistral_7b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
data:
train_urls:
- "gs://pubmed-mosaic/openwebtext-sharded/openwebtext_train.{1..128}-of-128.jsonl.gz"
validation_urls:
- "gs://pubmed-mosaic/openwebtext-sharded/openwebtext_val.{1..8}-of-8.jsonl.gz"
cache_dir: "gs://levanter-data/tokenized/openwebtext_llama/"
tokenizer: "mistralai/Mistral-7B-v0.1"
model:
type: mistral
# TODO: uncomment this once we resolve the resource exhaustion issue
# initialize_from_hf: "mistralai/Mistral-7B-v0.1"
# use_hf_model_config: true
trainer:
wandb:
project: "levanter"
tags: ["openwebtext", "mistral"]

mp: p=f32,c=bfloat16
train_batch_size: 256 # set for v4-64 TPU
num_train_steps: 1000
steps_per_eval: 50
tensor_parallel_axes: ["mlp", "heads"]
fsdp_axis: "embed"
batch_axis: "batch"
optimizer:
learning_rate: 1.2E-5 # set low for fine-tuning
weight_decay: 0.1
min_lr_ratio: 0.1
2 changes: 1 addition & 1 deletion docs/Configuration-Guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class TrainLmConfig:
data: LMDatasetConfig = field(default_factory=LMDatasetConfig)
trainer: TrainerConfig = field(default_factory=TrainerConfig)
model: LmConfig = field(default_factory=Gpt2Config)
optimizer: OptimizerConfig = field(default_factory=OptimizerConfig)
optimizer: OptimizerConfig = field(default_factory=AdamConfig)
```

Your training run will typically be associated with a single config file. For instance, you might have a file
Expand Down
Loading

0 comments on commit c5fb7a6

Please sign in to comment.