diff --git a/config/lora_llama2.yaml b/config/lora_llama2.yaml index f4241f44e..cf6592153 100644 --- a/config/lora_llama2.yaml +++ b/config/lora_llama2.yaml @@ -9,7 +9,7 @@ trainer: project: "levanter-lora" tags: ["lora", "llama2"] num_train_steps: 5000 # tune to suit your needs - train_batch_size: 128 + train_batch_size: 64 # if using model parallelism, this is useful: tensor_parallel_axes: ["mlp", "heads"]