Consistency CI #1
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-FileCopyrightText: 2023 LakeSoul Contributors | |
# | |
# SPDX-License-Identifier: Apache-2.0 | |
on: | |
push: | |
paths: | |
- "rust/**" | |
branches: | |
- 'main' | |
pull_request: | |
paths: | |
- "rust/**" | |
branches: | |
- 'main' | |
- 'release/**' | |
workflow_dispatch: | |
name: Consistency CI | |
env: | |
RUSTFLAGS: "-Awarnings" | |
jobs: | |
verify-hash-consistency: | |
name: verify benchmark results (amd64) | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: true | |
- name: Setup Rust toolchain | |
uses: ./.github/actions/setup-builder | |
with: | |
rust-version: stable | |
- name: Generate benchmark data and expected query results | |
run: | | |
mkdir -p lakesoul/test_files/tpch/data | |
git clone https://github.com/databricks/tpch-dbgen.git | |
cd tpch-dbgen | |
make | |
./dbgen -f -s 0.1 | |
mv *.tbl ../lakesoul/test_files/tpch/data | |
- name: Verify that benchmark queries return expected results | |
run: | | |
export TPCH_DATA=`realpath lakesoul/test_files/tpch/data` | |
# use release build for plan verificaton because debug build causes stack overflow | |
# sudo cargo test verify_hash_consistency --package lakesoul-datafusion --features=ci -- --test-threads=1 | |
- uses: actions/checkout@v4 | |
- name: Set up JDK 8 | |
uses: actions/setup-java@v4 | |
with: | |
java-version: '8' | |
distribution: 'temurin' | |
cache: maven | |
- name: Set up Python 3.9 | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.9' | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip setuptools wheel | |
pip install pymysql cryptography jproperties --no-cache-dir | |
wget https://dlcdn.apache.org/hadoop/common/hadoop-3.3.5/hadoop-3.3.5.tar.gz -O $HOME/hadoop-3.3.5.tar.gz && tar xf $HOME/hadoop-3.3.5.tar.gz -C $HOME | |
echo "HADOOP_HOME=$HOME/hadoop-3.3.5" >> $GITHUB_ENV | |
wget https://repo1.maven.org/maven2/org/apache/flink/flink-s3-fs-hadoop/1.17.1/flink-s3-fs-hadoop-1.17.1.jar -O $HOME/flink-s3-fs-hadoop-1.17.1.jar | |
wget https://repo1.maven.org/maven2/org/apache/parquet/parquet-hadoop-bundle/1.12.3/parquet-hadoop-bundle-1.12.3.jar -O $HOME/parquet-hadoop-bundle-1.12.3.jar | |
wget https://repo1.maven.org/maven2/org/apache/flink/flink-parquet/1.17.1/flink-parquet-1.17.1.jar -O $HOME/flink-parquet-1.17.1.jar | |
- name: Install Protoc | |
uses: arduino/setup-protoc@v2 | |
with: | |
version: "23.x" | |
repo-token: ${{ secrets.GITHUB_TOKEN }} | |
- uses: actions-rs/toolchain@v1 | |
with: | |
profile: minimal | |
toolchain: nightly-2023-05-20 | |
default: true | |
- uses: Swatinem/rust-cache@v2 | |
with: | |
workspaces: "./rust -> target" | |
- name: Pull images | |
run: | | |
docker pull -q bitnami/spark:3.3.1 | |
- uses: actions-rs/cargo@v1 | |
with: | |
use-cross: true | |
command: build | |
args: '--manifest-path rust/Cargo.toml --target x86_64-unknown-linux-gnu --release --all-features' | |
- name: Build with Maven | |
run: | | |
mkdir -p rust/target/release | |
cp rust/target/x86_64-unknown-linux-gnu/release/liblakesoul_io_c.so rust/target/release | |
cp rust/target/x86_64-unknown-linux-gnu/release/liblakesoul_metadata_c.so rust/target/release | |
MAVEN_OPTS="-Xmx4000m" mvn -q -B package -f pom.xml -Pcross-build -DskipTests | |
- name: Get jar names | |
run: | | |
echo "FLINK_JAR_NAME=$(python script/get_jar_name.py lakesoul-flink)" >> $GITHUB_ENV | |
echo "FLINK_TEST_JAR_NAME=$(python script/get_jar_name.py lakesoul-flink | sed -e 's/.jar/-tests.jar/g')" >> $GITHUB_ENV | |
echo "SPARK_JAR_NAME=$(python script/get_jar_name.py lakesoul-spark)" >> $GITHUB_ENV | |
echo "SPARK_TEST_JAR_NAME=$(python script/get_jar_name.py lakesoul-spark | sed -e 's/.jar/-tests.jar/g')" >> $GITHUB_ENV | |
- name: Copy built jar to work-dir | |
run: | | |
cp ./lakesoul-flink/target/$FLINK_JAR_NAME ./script/benchmark/work-dir | |
cp ./lakesoul-flink/target/$FLINK_TEST_JAR_NAME ./script/benchmark/work-dir | |
cp ./lakesoul-spark/target/$SPARK_JAR_NAME ./script/benchmark/work-dir | |
cp ./lakesoul-spark/target/$SPARK_TEST_JAR_NAME ./script/benchmark/work-dir | |
- name: Hash-Consistency Verification task | |
run: | | |
cd ./script/benchmark | |
docker run --cpus 2 -m 5000m --rm -t -v ${PWD}/work-dir:/opt/spark/work-dir --env lakesoul_home=/opt/spark/work-dir/lakesoul.properties bitnami/spark:3.3.1 spark-submit --driver-memory 4G --executor-memory 4G --conf spark.driver.memoryOverhead=1500m --conf spark.executor.memoryOverhead=1500m --jars /opt/spark/work-dir/$SPARK_JAR_NAME --class org.apache.spark.sql.lakesoul.benchmark.Benchmark --master local[4] /opt/spark/work-dir/$SPARK_TEST_JAR_NAME | |