.github/workflows/llm_bench-python.yml

# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: llm_bench Python Test

on:
  push:
    branches: [ "master" ]
    paths:
      - tools/llm_bench/**
      - tools/who_what_benchmark/**
  pull_request:
    paths:
      - tools/llm_bench/**
      - tools/who_what_benchmark/**
      - .github/workflows/llm_bench-python.yml

permissions: read-all  # Required by https://github.com/ossf/scorecard/blob/e23b8ad91fd6a64a0a971ca4fc0a4d1650725615/docs/checks.md#token-permissions

concurrency:
  # github.ref is not unique in post-commit
  group: ${{ github.event_name == 'push' && github.run_id || github.ref }}-llm-bench-python
  cancel-in-progress: true

env:
  LLM_BENCH_PYPATH: tools/llm_bench
  WWB_PATH: tools/who_what_benchmark

jobs:
  build:
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        python-version: ["3.10"]
    steps:
    - uses: actions/checkout@v4
    - name: Set up Python ${{ matrix.python-version }}
      uses: actions/setup-python@v3
      with:
        python-version: ${{ matrix.python-version }}
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        python -m pip install flake8 pytest black
        GIT_CLONE_PROTECTION_ACTIVE=false pip install -r ${{ env.LLM_BENCH_PYPATH }}/requirements.txt
        python -m pip install -U --pre openvino openvino-tokenizers openvino-genai --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
    - name: Lint with flake8
      run: |
        # stop the build if there are Python syntax errors or undefined names
        python -m flake8 ${{ env.LLM_BENCH_PYPATH }} --config=${{ env.LLM_BENCH_PYPATH }}/setup.cfg
        python -m flake8 ${{ env.WWB_PATH }} --config=${{ env.WWB_PATH }}/setup.cfg
    - name: Create code style diff for samples
      if: failure()
      run: |
        python -m black -l 160 -S ${{ env.LLM_BENCH_PYPATH }}/
        git diff > llm.bench_diff.diff
    - uses: actions/upload-artifact@v3
      if: failure()
      with:
        name: llm.bench_diff
        path: llm.bench_diff.diff
    - name: Test native pytorch model on Linux
      run: |
        git clone --depth 1 https://huggingface.co/katuni4ka/tiny-random-qwen
        python ./tools/llm_bench/benchmark.py -m tiny-random-qwen -d cpu -n 1 -f pt
      env:
        GIT_LFS_SKIP_SMUDGE: 0
    - name: Test tiny-random-baichuan2 on Linux
      run: |
        optimum-cli export openvino --model katuni4ka/tiny-random-baichuan2 --trust-remote-code --weight-format fp16 ./ov_models/tiny-random-baichuan2/pytorch/dldt/FP16
        python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-baichuan2/pytorch/dldt/FP16/ -d cpu -n 1
    - name: Test tiny-stable-diffusion on Linux
      run: |
        optimum-cli export openvino --model segmind/tiny-sd --trust-remote-code --weight-format fp16 ./ov_models/tiny-sd/pytorch/dldt/FP16/
        python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-sd/pytorch/dldt/FP16/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1
    - name: Test dreamlike-anime on Linux with GenAI
      run: |
        optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --weight-format fp16 ov_models/dreamlike-art-dreamlike-anime-1.0/FP16
        python ./tools/llm_bench/benchmark.py -m ./ov_models/dreamlike-art-dreamlike-anime-1.0/FP16/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --genai
    - name: Test dreamlike-anime on Linux with GenAI and LoRA
      run: |
        wget -O ./ov_models/soulcard.safetensors https://civitai.com/api/download/models/72591
        python ./tools/llm_bench/benchmark.py -m ./ov_models/dreamlike-art-dreamlike-anime-1.0/FP16/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --genai  --lora ./ov_models/soulcard.safetensors --lora_alphas 0.7
    - name: Test TinyLlama-1.1B-Chat-v1.0 in Speculative Deconding mode on Linux
      run: |
        optimum-cli export openvino --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --trust-remote-code --weight-format fp16 ov_models/TinyLlama-1.1B-Chat-v1.0/FP16
        optimum-cli export openvino --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 --trust-remote-code --weight-format int8 ov_models/TinyLlama-1.1B-Chat-v1.0/INT8
        python ./tools/llm_bench/benchmark.py -m ./ov_models/TinyLlama-1.1B-Chat-v1.0/FP16/ --draft_model ./ov_models/TinyLlama-1.1B-Chat-v1.0/INT8/ -p "Why is the Sun yellow?" -d cpu --draft_device cpu -n 1 --genai  --assistant_confidence_threshold 0.4
        python ./tools/llm_bench/benchmark.py -m ./ov_models/TinyLlama-1.1B-Chat-v1.0/FP16/ --draft_model ./ov_models/TinyLlama-1.1B-Chat-v1.0/INT8/ -p "Why is the Sun yellow?" -d cpu --draft_device cpu -n 1 --genai  --num_assistant_tokens 5
    - name: Test whisper-tiny on Linux
      run: |
        GIT_LFS_SKIP_SMUDGE=1 git clone --depth 1 --branch main --single-branch https://huggingface.co/datasets/facebook/multilingual_librispeech
        cd multilingual_librispeech
        git lfs pull -I /data/mls_polish/train/audio/3283_1447_000.tar.gz
        mkdir data/mls_polish/train/audio/3283_1447_000
        tar zxvf data/mls_polish/train/audio/3283_1447_000.tar.gz -C data/mls_polish/train/audio/3283_1447_000/
        cd ..
        optimum-cli export openvino --trust-remote-code --model openai/whisper-tiny ./ov_models/whisper-tiny
        python ./tools/llm_bench/benchmark.py -m ./ov_models/whisper-tiny --media multilingual_librispeech/data/mls_polish/train/audio/3283_1447_000/3283_1447_000000.flac -d cpu -n 1
        python ./tools/llm_bench/benchmark.py -m ./ov_models/whisper-tiny --media multilingual_librispeech/data/mls_polish/train/audio/3283_1447_000/3283_1447_000000.flac -d cpu -n 1 --genai
    - name: WWB Tests
      run: |
        GIT_CLONE_PROTECTION_ACTIVE=false pip install -r ${{ env.WWB_PATH }}/requirements.txt
        pip install git+https://github.com/huggingface/optimum-intel.git
        GIT_CLONE_PROTECTION_ACTIVE=false pip install ${{ env.WWB_PATH }}
        python -m pip install -U --pre openvino openvino-tokenizers openvino-genai --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --force-reinstall
        python -m pytest -v tools/who_what_benchmark/tests
  stateful:
    runs-on: ubuntu-20.04
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v4
        with:
          python-version: "3.10"
      - name: Test stateful
        run: |
          GIT_CLONE_PROTECTION_ACTIVE=false python -m pip install -r tools/llm_bench/requirements.txt
          python -m pip uninstall --yes openvino
          python -m pip install -U --pre openvino openvino-tokenizers openvino-genai --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
          python tools/llm_bench/convert.py --model_id TinyLlama/TinyLlama-1.1B-Chat-v1.0 --output_dir . --stateful
          grep beam_idx pytorch/dldt/FP32/openvino_model.xml
      - name: WWB Tests
        run: |
          GIT_CLONE_PROTECTION_ACTIVE=false pip install -r tools/who_what_benchmark/requirements.txt
          pip install git+https://github.com/huggingface/optimum-intel.git
          GIT_CLONE_PROTECTION_ACTIVE=false pip install tools/who_what_benchmark/
          pip install pytest
          python -m pip install -U --pre openvino openvino-tokenizers openvino-genai --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --force-reinstall
          python -m pytest -v tools/who_what_benchmark/tests