Skip to content

Commit

Permalink
remove per_device_parallelism from configs. almost always a mistake
Browse files Browse the repository at this point in the history
  • Loading branch information
dlwh committed Feb 14, 2024
1 parent 7cf1686 commit 6dedbdc
Show file tree
Hide file tree
Showing 15 changed files with 0 additions and 16 deletions.
1 change: 0 additions & 1 deletion config/backpack.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ trainer:

num_train_steps: 50000
train_batch_size: 1024
per_device_parallelism: 4
model_axis_size: 1

optimizer:
Expand Down
1 change: 0 additions & 1 deletion config/backpack_nano.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ trainer:

num_train_steps: 100
train_batch_size: 32
per_device_parallelism: 1
model_axis_size: 1

optimizer:
Expand Down
1 change: 0 additions & 1 deletion config/gpt2_1536.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ trainer:

mp: p=f32,c=bfloat16
model_axis_size: 1
per_device_parallelism: 2
per_device_eval_parallelism: 8
optimizer:
learning_rate: 1E-4
Expand Down
2 changes: 0 additions & 2 deletions config/gpt2_1536_sophiah.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ trainer:

mp: p=f32,c=bfloat16
model_axis_size: 1
per_device_parallelism: 2
per_device_eval_parallelism: 8
optimizer:
type: sophia-h
learning_rate: 2E-4
Expand Down
1 change: 0 additions & 1 deletion config/gpt2_20b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ trainer:
mp: p=f32,c=bfloat16


per_device_parallelism: 4
per_device_eval_parallelism: 4

train_batch_size: 1024
Expand Down
1 change: 0 additions & 1 deletion config/gpt2_medium.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ trainer:

mp: p=f32,c=bfloat16
model_axis_size: 1
per_device_parallelism: 16
optimizer:
learning_rate: 3E-4
weight_decay: 0.1
Expand Down
1 change: 0 additions & 1 deletion config/gpt2_micro.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,4 @@ trainer:
mp: p=f32,c=bfloat16
num_train_steps: 100
per_device_eval_parallelism: 1
per_device_parallelism: 4
train_batch_size: 32
1 change: 0 additions & 1 deletion config/gpt2_nano_mixture.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ trainer:
save_interval: 5m

per_device_eval_parallelism: 1
per_device_parallelism: 1
train_batch_size: 32

tensor_parallel_axes: ["mlp", "heads"]
Expand Down
1 change: 0 additions & 1 deletion config/gpt2_small_fast_mix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ trainer:

mp: p=f32,c=bfloat16
model_axis_size: 1
per_device_parallelism: 8

train_batch_size: 256
num_train_steps: 20000
Expand Down
1 change: 0 additions & 1 deletion config/gpt2_small_fast_pile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ trainer:

mp: p=f32,c=bfloat16
model_axis_size: 1
per_device_parallelism: 8

train_batch_size: 256
num_train_steps: 20000
Expand Down
1 change: 0 additions & 1 deletion config/gpt2_small_fast_sophia_h.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ trainer:

mp: p=f32,c=bfloat16
model_axis_size: 1
per_device_parallelism: 8

train_batch_size: 256
num_train_steps: 20000
Expand Down
1 change: 0 additions & 1 deletion config/gpt2_small_pile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ trainer:

mp: p=f32,c=bfloat16
model_axis_size: 1
per_device_parallelism: 8

train_batch_size: 256
num_train_steps: 50000
Expand Down
1 change: 0 additions & 1 deletion config/gpt2_small_pile_mixture.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ trainer:

mp: p=f32,c=bfloat16
model_axis_size: 1
per_device_parallelism: 8

train_batch_size: 256
num_train_steps: 50000
Expand Down
1 change: 0 additions & 1 deletion config/gpt2_xl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ trainer:
project: "levanter"
tags: [ "openwebtext", "gpt2"]
mp: p=f32,c=bfloat16
per_device_parallelism: 1
optimizer:
learning_rate: 1E-4
weight_decay: 0.1
Expand Down
1 change: 0 additions & 1 deletion config/llama2_7b_continued.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ trainer:
mp: p=f32,c=bfloat16

model_axis_size: 1
per_device_parallelism: 4
per_device_eval_parallelism: 4

train_batch_size: 1024
Expand Down

0 comments on commit 6dedbdc

Please sign in to comment.