Skip to content

[sharktank] Evaluation - Add Perplexity test for vmfb #35

[sharktank] Evaluation - Add Perplexity test for vmfb

[sharktank] Evaluation - Add Perplexity test for vmfb #35

Workflow file for this run

name: Evaluation Tests
on:
pull_request:
workflow_dispatch:
schedule:
# Weekdays nightly at 07:00 UTC = 23:00 PST / 00:00 PDT.
- cron: "0 7 * * 1-5"
concurrency:
# A PR number if a pull request and otherwise the commit hash. This cancels
# queued and in-progress runs for the same PR (presubmit) or commit
# (postsubmit). The workflow name is prepended to avoid conflicts between
# different workflows.
group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
cancel-in-progress: true
jobs:
test_perplexity_vmfb:
timeout-minutes: 1000
name: "Evaluation Tests - perplexity_vmfb"
strategy:
matrix:
version: [3.11]
runs-on: [llama-mi300x-3]
fail-fast: false
runs-on: ${{matrix.runs-on}}
defaults:
run:
shell: bash
env:
PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache"
SHARK_PLATFORM_REPO_ROOT: ${{ github.workspace }}
steps:
- name: "Setting up Python"
id: setup_python
uses: actions/setup-python@v3
with:
python-version: ${{matrix.version}}
- name: "Checkout Code"
uses: actions/checkout@v3
- name: Cache Pip Packages
uses: actions/cache@v4
id: cache-pip
with:
path: ${{ env.PIP_CACHE_DIR }}
key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements*.txt','sharktank/requirements*.txt') }}
- name: Install sharktank deps
run: |
python -m pip install --no-compile --upgrade pip
# Note: We install in three steps in order to satisfy requirements
# from non default locations first. Installing the PyTorch CPU
# wheels saves multiple minutes and a lot of bandwidth on runner setup.
pip install --no-compile -r pytorch-cpu-requirements.txt
pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \
-e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine"
pip install --no-compile -r requirements.txt -r sharktank/requirements-tests.txt -e sharktank/
# Try with the latest nightly releases, not what iree-turbine pins.
# We could also pin to a known working or stable version.
# This should eventually stabilize. Do the best we can for now.
pip install -f https://iree.dev/pip-release-links.html --upgrade \
iree-compiler \
iree-runtime \
"numpy<2.0"
- name: Run perplexity test with vmfb
run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_vmfb_test.py \
--longrun \
--iree-device='hip://7' \
--iree-hip-target='gfx942' \
--llama3-8b-f16-model-path=/data/llama-3.1/8b/llama8b_f16.irpa \
--llama3-8b-tokenizer-path=/data/llama-3.1/8b/tokenizer_config.json
# test_perplexity_torch:
# timeout-minutes: 1000
# name: "Evaluation Tests - perplexity_torch"
# strategy:
# matrix:
# version: [3.11]
# runs-on: [llama-mi300]
# fail-fast: false
# runs-on: ${{matrix.runs-on}}
# defaults:
# run:
# shell: bash
# env:
# PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache"
# SHARK_PLATFORM_REPO_ROOT: ${{ github.workspace }}
# steps:
# - name: "Setting up Python"
# id: setup_python
# uses: actions/setup-python@v3
# with:
# python-version: ${{matrix.version}}
# - name: "Checkout Code"
# uses: actions/checkout@v3
# - name: Cache Pip Packages
# uses: actions/cache@v4
# id: cache-pip
# with:
# path: ${{ env.PIP_CACHE_DIR }}
# key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements.txt') }}
# - name: Install sharktank deps
# run: |
# python -m pip install --no-compile --upgrade pip
# # Note: We install in three steps in order to satisfy requirements
# # from non default locations first. Installing the PyTorch CPU
# # wheels saves multiple minutes and a lot of bandwidth on runner setup.
# pip install --no-compile -r pytorch-cpu-requirements.txt
# pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \
# -e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine"
# pip install --no-compile -r requirements.txt -r sharktank/requirements-tests.txt -e sharktank/
# - name: Run perplexity test in eager mode
# run: pytest -n 8 -v -s sharktank/tests/evaluate/perplexity_vmfb_test.py \
# --longrun \
# --llama3-8b-f16-model-path=/data/llama-3.1/8b/llama8b_f16.irpa \
# --llama3-8b-tokenizer-path=/data/llama-3.1/8b/tokenizer_config.json