From eea0fa0714d6df45dea3ac451851699ac508027e Mon Sep 17 00:00:00 2001 From: Ahmed Ahmed Date: Fri, 16 Feb 2024 10:22:58 -0800 Subject: [PATCH] fix --- config/gpt2_med.yaml | 4 ++-- config/gpt2_medium.yaml | 18 ++++++++++++++---- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/config/gpt2_med.yaml b/config/gpt2_med.yaml index 02042da7e..bfe0e4867 100644 --- a/config/gpt2_med.yaml +++ b/config/gpt2_med.yaml @@ -11,8 +11,8 @@ model: gradient_checkpointing: true scale_attn_by_inverse_layer_idx: true -# initialize_from_hf: "gpt2-medium" -# use_hf_model_config: true +initialize_from_hf: "gpt2-medium" +use_hf_model_config: true trainer: wandb: diff --git a/config/gpt2_medium.yaml b/config/gpt2_medium.yaml index 9ea4408bc..ff0970055 100644 --- a/config/gpt2_medium.yaml +++ b/config/gpt2_medium.yaml @@ -1,4 +1,7 @@ -data: !include data/openwebtext_source.yaml +data: + id: dlwh/wikitext_103_detokenized + tokenizer: "EleutherAI/gpt-neox-20b" + cache_dir: "gs://levanter-data/tokenized/wiki-fast/" model: type: gpt2 hidden_dim: 1024 @@ -7,15 +10,22 @@ model: seq_len: 1024 gradient_checkpointing: true scale_attn_by_inverse_layer_idx: true + +initialize_from_hf: "gpt2-medium" +use_hf_model_config: true + trainer: wandb: - project: "levanter" + project: "locked" tags: [ "openwebtext", "gpt2"] mp: p=f32,c=bfloat16 model_axis_size: 1 - per_device_parallelism: 16 + per_device_parallelism: -1 + train_batch_size: 32 + num_train_steps: 2000000 optimizer: - learning_rate: 3E-4 + learning_rate: 1E-6 weight_decay: 0.1 + warmup: 0.01 min_lr_ratio: 0.1