Benchmark TFLite

iree-org · Oct 10, 2023 · bb4ad91 · bb4ad91
1 parent 1ea715d
commit bb4ad91
Show file tree

Hide file tree

Showing 12 changed files with 873 additions and 3 deletions.
diff --git a/.github/workflows/run_tflite_benchmark.yml b/.github/workflows/run_tflite_benchmark.yml
@@ -0,0 +1,123 @@
+# Copyright 2023 The OpenXLA Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# TFLite Benchmarks Workflow.
+
+name: TFLite Benchmarks
+
+on:
+  workflow_dispatch:
+  pull_request:
+
+concurrency:
+  # A PR number if a pull request and otherwise the commit hash. This cancels
+  # queued and in-progress runs for the same PR (presubmit) or commit
+  # (postsubmit).
+  group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
+  cancel-in-progress: true
+
+env:
+  GCS_DIR: gs://openxla-github-actions-${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }}-artifacts/${{ github.run_id }}/${{ github.run_attempt }}
+
+jobs:
+  setup:
+    runs-on: ubuntu-22.04
+    outputs:
+      runner-group: ${{ steps.configure.outputs.runner-group }}
+      benchmark-gcs-dir: ${{ steps.configure.outputs.benchmark-gcs-dir }}
+    steps:
+      - name: "Checking out PR repository"
+        uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791  # v2.5.0
+      - name: "Configuring CI options"
+        id: configure
+        env:
+          RUNNER_GROUP: ${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }}
+        run: |
+          # Just informative logging. There should only be two commits in the
+          # history here, but limiting the depth helps when copying from a local
+          # repo instead of using checkout, e.g. with
+          # https://github.com/nektos/act where there will be more.
+          git log --oneline --graph --max-count=3
+          # Workflow jobs can't access `env` in `runs-on`, so we need to make
+          # `runner-group` a job output variable.
+          echo "runner-group=${RUNNER_GROUP}" > "${GITHUB_OUTPUT}"
+
+          # For presubmit testing, the result artifacts are uploaded to the
+          # temporary workflow GCS dir. In postsubmit, the result artifacts are
+          # uploaded to the comparative benchmark GCS dir.
+          if [[ "${RUNNER_GROUP}" == "presubmit" ]]; then
+            BENCHMARK_GCS_DIR="${GCS_DIR}/comparative-benchmark-artifacts"
+          else
+            BENCHMARK_GCS_DIR="gs://comparative-benchmark-artifacts/$(date +'%Y-%m-%d').$(date +'%s')"
+          fi
+          echo "benchmark-gcs-dir=${BENCHMARK_GCS_DIR}" >> "${GITHUB_OUTPUT}"
+
+  benchmark_on_c2-standard-16:
+    needs: [setup]
+    runs-on:
+      - self-hosted  # must come first
+      - runner-group=${{ needs.setup.outputs.runner-group }}
+      - environment=prod
+      - machine-type=c2-standard-16
+    env:
+      BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }}
+      RESULTS_DIR: results-dir
+      TARGET_DEVICE: c2-standard-16
+      TFLITE_TOOL_DIR: tool-dir
+    steps:
+      - name: "Checking out PR repository"
+        uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791  # v2.5.0
+      - name: "Setup"
+        id: setup
+        run: |
+          echo "results-gcs-dir=${BENCHMARK_GCS_DIR}/${TARGET_DEVICE}-results" >> "${GITHUB_OUTPUT}"
+          mkdir "${RESULTS_DIR}"
+          mkdir "${TFLITE_TOOL_DIR}"
+      - name: "Benchmarking TFLite"
+        env:
+          TFLITE_RESULTS_JSON: tflite.json
+          RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }}
+        run: |
+          RESULTS_PATH="${RESULTS_DIR}/${TFLITE_RESULTS_JSON}"
+          docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
+            "gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251" \
+            ./experimental/tflite/benchmark_tflite.sh \
+              "${TARGET_DEVICE}" \
+              "${TFLITE_TOOL_DIR}" \
+              "${RESULTS_PATH}"
+          gcloud storage cp "${RESULTS_PATH}" "${RESULTS_GCS_DIR}/"
+
+  benchmark_on_pixel-6-pro:
+    needs: [setup]
+    runs-on:
+      - self-hosted  # must come first
+      - runner-group=${{ needs.setup.outputs.runner-group }}
+      - environment=prod
+      - machine-type=pixel-6-pro
+    env:
+      BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }}
+      RESULTS_DIR: results-dir
+      TARGET_DEVICE: pixel-6-pro
+      TFLITE_TOOL_DIR: tool-dir
+    steps:
+      - name: "Checking out PR repository"
+        uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791  # v2.5.0
+      - name: "Setup"
+        id: setup
+        run: |
+          echo "results-gcs-dir=${BENCHMARK_GCS_DIR}/${TARGET_DEVICE}-results" >> "${GITHUB_OUTPUT}"
+          mkdir "${RESULTS_DIR}"
+          mkdir "${TFLITE_TOOL_DIR}"
+      - name: "Benchmarking TFLite"
+        env:
+          TFLITE_RESULTS_JSON: tflite.json
+          RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }}
+        run: |
+          ./experimental/iree/benchmark_iree.sh
+
+          #RESULTS_PATH="${RESULTS_DIR}/${TFLITE_RESULTS_JSON}"
+          #./experimental/tflite/benchmark_tflite.sh "${TARGET_DEVICE}" "${TFLITE_TOOL_DIR}" "${RESULTS_PATH}"
+          #gcloud storage cp "${RESULTS_PATH}" "${RESULTS_GCS_DIR}/"
diff --git a/common_benchmark_suite/openxla/benchmark/comparative_suite/tf/model_definitions.py b/common_benchmark_suite/openxla/benchmark/comparative_suite/tf/model_definitions.py
@@ -10,7 +10,7 @@
 from openxla.benchmark import def_types
 from openxla.benchmark.comparative_suite import utils
 
-PARENT_GCS_DIR = "https://storage.googleapis.com/iree-model-artifacts/tensorflow/tf_models_2.15.0.dev20230817_1692333975/"
+PARENT_GCS_DIR = "https://storage.googleapis.com/iree-model-artifacts/tensorflow/tf_models_2.15.0.dev20230829_1696537918/"
 ARTIFACTS_DIR_URL_TEMPLATE = string.Template(PARENT_GCS_DIR + "${name}")
 
 # T5-Large models.

diff --git a/experimental/ggml/benchmark_ggml.sh b/experimental/ggml/benchmark_ggml.sh
@@ -88,8 +88,8 @@ if [[ "${TARGET_DEVICE_NAME}" =~ ^(pixel-4|pixel-6-pro|moto-edge-x30)$ ]]; then
   adb shell "su root sh /data/local/tmp/set_android_scaling_governor.sh performance"
 else
   BENCHMARK_SCRIPT="run_benchmarks.py"
-  # c2-standard-16 has 16 cores.
-  THREADS="1,8,16"
+  # c2-standard-16 has 8 cores.
+  THREADS="1,8"
 
   args+=(
      --threads "${THREADS}"

diff --git a/experimental/iree/benchmark_iree.sh b/experimental/iree/benchmark_iree.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+# This is a temporary hack to run IREE benchmarks on pixel-6-pro since
+# it's currently not working in the IREE repo.
+
+ROOT_DIR=/tmp/iree-benchmarks
+TD="$(cd $(dirname $0) && pwd)"
+
+mkdir "${ROOT_DIR}"
+pushd "${ROOT_DIR}"
+
+# Download benchmark tool.
+gsutil cp "gs://iree-github-actions-presubmit-artifacts/6464567954/1/benchmark-tools/android-armv8.2-a-benchmark-tools.tar" .
+tar -xf "android-armv8.2-a-benchmark-tools.tar"
+adb push "android-armv8.2-a-benchmark-tools-dir/build/tools/iree-benchmark-module" "/data/local/tmp"
+adb shell "chmod +x /data/local/tmp/iree-benchmark-module"
+
+# Download vmfb's.
+gsutil cp "gs://iree-github-actions-presubmit-artifacts/6464567954/1/e2e-test-artifacts/iree_module_BertLarge_Fp32_Batch1_tflite___armv8.2-a-generic-linux_android29-llvm_cpu__experimental-flags_data-tiling_ukernel_/module.vmfb" "BertLarge_Batch1.vmfb"
+gsutil cp "gs://iree-github-actions-presubmit-artifacts/6464567954/1/e2e-test-artifacts/iree_module_BertLarge_Fp32_Batch16_tflite___armv8.2-a-generic-linux_android29-llvm_cpu__experimental-flags_data-tiling_ukernel_/module.vmfb" "BertLarge_Batch16.vmfb"
+gsutil cp "gs://iree-github-actions-presubmit-artifacts/6464567954/1/e2e-test-artifacts/iree_module_BertLarge_Fp32_Batch24_tflite___armv8.2-a-generic-linux_android29-llvm_cpu__experimental-flags_data-tiling_ukernel_/module.vmfb" "BertLarge_Batch24.vmfb"
+gsutil cp "gs://iree-github-actions-presubmit-artifacts/6464567954/1/e2e-test-artifacts/iree_module_BertLarge_Fp32_Batch32_tflite___armv8.2-a-generic-linux_android29-llvm_cpu__experimental-flags_data-tiling_ukernel_/module.vmfb" "BertLarge_Batch32.vmfb"
+
+# Setup environment.
+adb push "${TD}/set_android_scaling_governor.sh" "/data/local/tmp"
+adb shell "chmod +x /data/local/tmp/set_android_scaling_governor.sh"
+adb shell "su root sh /data/local/tmp/set_android_scaling_governor.sh performance"
+
+# Benchmark.
+ITERATIONS=10
+adb push "BertLarge_Batch1.vmfb" "/data/local/tmp"
+adb shell "taskset f0 /data/local/tmp/iree-benchmark-module --function=main --input=1x384xi32=0 --input=1x384xi32=0 --device_allocator=caching --task_topology_group_count=4 --device=local-task --module=/data/local/tmp/BertLarge_Batch1.vmfb --time_unit=ms --benchmark_format=json --benchmark_out_format=json --print_statistics=true --benchmark_repetitions=${ITERATIONS}"
+adb shell "rm /data/local/tmp/BertLarge_Batch1.vmfb"
+
+adb push "BertLarge_Batch16.vmfb" "/data/local/tmp"
+adb shell "taskset f0 /data/local/tmp/iree-benchmark-module --function=main --input=16x384xi32=0 --input=16x384xi32=0 --device_allocator=caching --task_topology_group_count=4 --device=local-task --module=/data/local/tmp/BertLarge_Batch16.vmfb --time_unit=ms --benchmark_format=json --benchmark_out_format=json --print_statistics=true --benchmark_repetitions=${ITERATIONS}"
+adb shell "rm /data/local/tmp/BertLarge_Batch16.vmfb"
+
+adb push "BertLarge_Batch24.vmfb" "/data/local/tmp"
+adb shell "taskset f0 /data/local/tmp/iree-benchmark-module --function=main --input=24x384xi32=0 --input=24x384xi32=0 --device_allocator=caching --task_topology_group_count=4 --device=local-task --module=/data/local/tmp/BertLarge_Batch24.vmfb --time_unit=ms --benchmark_format=json --benchmark_out_format=json --print_statistics=true --benchmark_repetitions=${ITERATIONS}"
+adb shell "rm /data/local/tmp/BertLarge_Batch24.vmfb"
+
+adb push "BertLarge_Batch32.vmfb" "/data/local/tmp"
+adb shell "taskset f0 /data/local/tmp/iree-benchmark-module --function=main --input=32x384xi32=0 --input=32x384xi32=0 --device_allocator=caching --task_topology_group_count=4 --device=local-task --module=/data/local/tmp/BertLarge_Batch32.vmfb --time_unit=ms --benchmark_format=json --benchmark_out_format=json --print_statistics=true --benchmark_repetitions=${ITERATIONS}"
+adb shell "rm /data/local/tmp/BertLarge_Batch32.vmfb"
+
+adb shell "rm -rf /data/local/tmp/*"
+
+popd
+
+
+
+
+
diff --git a/experimental/iree/set_android_scaling_governor.sh b/experimental/iree/set_android_scaling_governor.sh
@@ -0,0 +1,51 @@
+#!/bin/sh
+
+# Copyright 2023 The OpenXLA Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Runs on an android device itself to set the frequency scaling governor for all
+# CPUs (default performance).
+
+################################### WARNING ####################################
+# This will overheat the phone if it's not on a cooling plate, resulting in    #
+# thermal throttling. To prevent anything catching on fire, the actual CPU     #
+# frequencies will be throttled to below the maximum, skewing your results.    #
+################################################################################
+
+set -euo pipefail
+
+GOVERNOR="${1:-performance}"
+
+echo "CPU info (before changing governor):"
+echo 'cpu\tgovernor\tcur\tmin\tmax'
+echo "------------------------------------------------"
+for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \
+    echo "cpu${i}" | paste \
+      - \
+      "/sys/devices/system/cpu/cpu${i}/cpufreq/scaling_governor" \
+      "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_cur_freq" \
+      "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_min_freq" \
+      "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_max_freq"; \
+done
+
+echo "Setting CPU frequency governor to ${GOVERNOR}"
+
+for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \
+  echo "${GOVERNOR}" > \
+    "/sys/devices/system/cpu/cpu${i?}/cpufreq/scaling_governor"; \
+done
+
+echo "CPU info (after changing governor):"
+echo 'cpu\tgovernor\tcur\tmin\tmax'
+echo "------------------------------------------------"
+for i in `cat /sys/devices/system/cpu/present | tr '-' ' ' | xargs seq`; do \
+    echo "cpu${i}" | paste \
+      - \
+      "/sys/devices/system/cpu/cpu${i}/cpufreq/scaling_governor" \
+      "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_cur_freq" \
+      "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_min_freq" \
+      "/sys/devices/system/cpu/cpu${i}/cpufreq/cpuinfo_max_freq"; \
+done