Skip to content

Launch Llama 2 Small Fast #115

Launch Llama 2 Small Fast

Launch Llama 2 Small Fast #115

name: Launch Llama 2 Small Fast
on:
workflow_run:
workflows: ["Build and Push Docker TPU Images"]
types:
- completed
branches: [main, "experiment/*"]
# pull_request:
workflow_dispatch:
jobs:
test:
if: (github.event.pull_request.head.repo.full_name == github.repository)
runs-on: ubuntu-latest
env:
TPU_ZONE: "us-central2-b"
TPU_TYPE: "v4-32"
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Set up Google Cloud SDK
uses: google-github-actions/setup-gcloud@v1
with:
project_id: ${{ secrets.GCP_PROJECT_ID }}
- name: Authenticate to Google Cloud
uses: google-github-actions/auth@v1
with:
credentials_json: ${{ secrets.GCP_SA_KEY }}
- name: Configure Google Cloud
run: |
gcloud config set project ${{ secrets.GCP_PROJECT_ID }}
REGION=${TPU_ZONE%-*}
echo "$REGION"
gcloud auth configure-docker $REGION-docker.pkg.dev
- name: Install locally
run: |
python -m pip install --upgrade pip
pip install -e .[test] "jax[cpu]==0.4.30"
- name: Launch Small Fast TPU Train LM job
run: |
export TPU_NAME=small-fast-${{ github.run_id }}
export WANDB_API_KEY=${{ secrets.WANDB_API_KEY }}
export RUN_ID=small_fast_${{ github.run_id }}
export HF_TOKEN=${{ secrets.HF_TOKEN }}
cat > .config <<EOF
env:
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
WANDB_ENTITY: stanford-mercury
WANDB_PROJECT: levanter
HF_TOKEN: ${{ secrets.HF_TOKEN }}
EOF
python infra/launch.py -e CI 1 --foreground --tpu_name ${TPU_NAME} --run_id $RUN_ID --zone ${TPU_ZONE} --tpu_type ${TPU_TYPE} --preemptible -- \
python -m levanter.main.train_lm \
--config_path config/llama_small_fast.yaml \
--trainer.checkpointer.base_path gs://levanter-checkpoints/llama-itest/ \
--trainer.checkpointer.save_interval 10m
--trainer.num_train_steps 10000
- name: Cleanup
if: ${{ always() }}
run: |
export TPU_NAME=small-fast-${{ github.run_id }}
gcloud compute tpus queued-resources delete $TPU_NAME --zone ${TPU_ZONE} --quiet --force