Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into branch-test-spark-rc
Browse files Browse the repository at this point in the history
  • Loading branch information
EnricoMi committed Jul 18, 2024
2 parents 64cd47d + 4ba4159 commit 654a592
Show file tree
Hide file tree
Showing 143 changed files with 9,186 additions and 1,872 deletions.
84 changes: 84 additions & 0 deletions .github/actions/build-whl/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
name: 'Build Whl'
author: 'EnricoMi'
description: 'A GitHub Action that builds pyspark-extension package'

inputs:
spark-version:
description: Spark version, e.g. 3.4.0 or 3.4.0-SNAPSHOT
required: true
scala-version:
description: Scala version, e.g. 2.12.15
required: true
spark-compat-version:
description: Spark compatibility version, e.g. 3.4
required: true
scala-compat-version:
description: Scala compatibility version, e.g. 2.12
required: true
python-version:
description: Python version, e.g. 3.8
required: true

runs:
using: 'composite'
steps:
- name: Set versions in pom.xml
run: |
./set-version.sh ${{ inputs.spark-version }} ${{ inputs.scala-version }}
git diff
shell: bash

- name: Fetch Binaries Artifact
uses: actions/download-artifact@v4
with:
name: Binaries-${{ inputs.spark-compat-version }}-${{ inputs.scala-compat-version }}
path: .

- name: Cache Maven packages
if: github.event_name != 'merge_group'
uses: actions/cache@v4
with:
path: ~/.m2/repository
key: ${{ runner.os }}-mvn-build-${{ inputs.spark-version }}-${{ inputs.scala-version }}-${{ hashFiles('pom.xml') }}
restore-keys: ${{ runner.os }}-mvn-build-${{ inputs.spark-version }}-${{ inputs.scala-version }}-

- name: Cache Pip packages
if: github.event_name != 'merge_group'
uses: actions/cache@v4
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-whl-${{ inputs.python-version }}-${{ inputs.spark-version }}

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: ${{ inputs.python-version }}

- name: Install Python dependencies
run: |
python -m pip install --upgrade pip build twine
shell: bash

- name: Build whl
run: |
./build-whl.sh
shell: bash

- name: Test whl
run: |
twine check python/dist/*
pip install pyspark==${{ inputs.spark-version }}
pip install python/dist/*.whl
python test-release.py
shell: bash

- name: Upload whl
uses: actions/upload-artifact@v4
with:
name: Whl (Spark ${{ inputs.spark-compat-version }} Scala ${{ inputs.scala-compat-version }})
path: |
python/dist/*.whl
branding:
icon: 'check-circle'
color: 'green'
28 changes: 19 additions & 9 deletions .github/actions/build/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ inputs:
spark-compat-version:
description: Spark compatibility version, e.g. 3.4
required: true
scala-compat-version:
description: Scala compatibility version, e.g. 2.12
required: true
java-compat-version:
description: Java compatibility version, e.g. 8
required: true

runs:
using: 'composite'
Expand All @@ -23,33 +29,37 @@ runs:
shell: bash

- name: Cache Maven packages
uses: actions/cache@v3
if: github.event_name != 'merge_group'
uses: actions/cache@v4
with:
path: ~/.m2/repository
key: ${{ runner.os }}-mvn-build-${{ inputs.spark-version }}-${{ inputs.scala-version }}-${{ hashFiles('pom.xml') }}
restore-keys: ${{ runner.os }}-mvn-build-${{ inputs.spark-version }}-${{ inputs.scala-version }}-

- name: Setup JDK 1.8
uses: actions/setup-java@v3
- name: Setup JDK ${{ inputs.java-compat-version }}
uses: actions/setup-java@v4
with:
java-version: '8'
java-version: ${{ inputs.java-compat-version }}
distribution: 'zulu'

- name: Build
env:
JDK_JAVA_OPTIONS: --add-exports java.base/sun.nio.ch=ALL-UNNAMED --add-exports java.base/sun.util.calendar=ALL-UNNAMED
run: |
mvn --batch-mode --update-snapshots clean compile test-compile
mvn --batch-mode package -DskipTests -Dmaven.test.skip=true
mvn --batch-mode install -DskipTests -Dmaven.test.skip=true -Dgpg.skip
mvn --batch-mode --update-snapshots -Dspotless.check.skip clean compile test-compile
mvn --batch-mode package -Dspotless.check.skip -DskipTests -Dmaven.test.skip=true
mvn --batch-mode install -Dspotless.check.skip -DskipTests -Dmaven.test.skip=true -Dgpg.skip
shell: bash

- name: Upload Binaries
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: Binaries-${{ inputs.spark-compat-version }}-${{ inputs.scala-version }}
name: Binaries-${{ inputs.spark-compat-version }}-${{ inputs.scala-compat-version }}
path: |
*
!.*
!target/*-javadoc.jar
!target/*-sources.jar
!target/site
branding:
Expand Down
89 changes: 89 additions & 0 deletions .github/actions/check-compat/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
name: 'Check'
author: 'EnricoMi'
description: 'A GitHub Action that checks compatibility of spark-extension'

inputs:
spark-version:
description: Spark version, e.g. 3.4.0 or 3.4.0-SNAPSHOT
required: true
scala-version:
description: Scala version, e.g. 2.12.15
required: true
spark-compat-version:
description: Spark compatibility version, e.g. 3.4
required: true
scala-compat-version:
description: Scala compatibility version, e.g. 2.12
required: true
package-version:
description: Spark-Extension version to check against
required: true

runs:
using: 'composite'
steps:
- name: Set versions in pom.xml
run: |
./set-version.sh ${{ inputs.spark-version }} ${{ inputs.scala-version }}
git diff
shell: bash

- name: Fetch Binaries Artifact
uses: actions/download-artifact@v4
with:
name: Binaries-${{ inputs.spark-compat-version }}-${{ inputs.scala-compat-version }}
path: .

- name: Cache Maven packages
if: github.event_name != 'merge_group'
uses: actions/cache@v4
with:
path: ~/.m2/repository
key: ${{ runner.os }}-mvn-check-${{ inputs.spark-version }}-${{ inputs.scala-version }}-${{ hashFiles('pom.xml') }}
restore-keys: |
${{ runner.os }}-mvn-check-${{ inputs.spark-version }}-${{ inputs.scala-version }}
${{ runner.os }}-mvn-build-${{ inputs.spark-version }}-${{ inputs.scala-version }}-
- name: Setup JDK 1.8
uses: actions/setup-java@v4
with:
java-version: '8'
distribution: 'zulu'

- name: Install Checker
run: |
sudo apt update
sudo apt install japi-compliance-checker
shell: bash

- name: Release exists
id: exists
continue-on-error: true
run: |
curl --head --fail https://repo1.maven.org/maven2/uk/co/gresearch/spark/spark-extension_${{ inputs.scala-compat-version }}/${{ inputs.package-version }}-${{ inputs.spark-compat-version }}/spark-extension_${{ inputs.scala-compat-version }}-${{ inputs.package-version }}-${{ inputs.spark-compat-version }}.jar
shell: bash

- name: Fetch package
if: steps.exists.outcome == 'success'
run: |
mvn dependency:get -Dtransitive=false -DremoteRepositories -Dartifact=uk.co.gresearch.spark:spark-extension_${{ inputs.scala-compat-version }}:${{ inputs.package-version }}-${{ inputs.spark-compat-version }}
shell: bash

- name: Check
if: steps.exists.outcome == 'success'
continue-on-error: ${{ github.ref == 'refs/heads/master' }}
run: |
ls -lah ~/.m2/repository/uk/co/gresearch/spark/spark-extension_${{ inputs.scala-compat-version }}/${{ inputs.package-version }}-${{ inputs.spark-compat-version }}/spark-extension_${{ inputs.scala-compat-version }}-${{ inputs.package-version }}-${{ inputs.spark-compat-version }}.jar target/spark-extension*.jar
japi-compliance-checker ~/.m2/repository/uk/co/gresearch/spark/spark-extension_${{ inputs.scala-compat-version }}/${{ inputs.package-version }}-${{ inputs.spark-compat-version }}/spark-extension_${{ inputs.scala-compat-version }}-${{ inputs.package-version }}-${{ inputs.spark-compat-version }}.jar target/spark-extension*.jar
shell: bash

- name: Upload Report
uses: actions/upload-artifact@v4
if: always() && steps.exists.outcome == 'success'
with:
name: Compat-Report-${{ inputs.spark-compat-version }}
path: compat_reports/spark-extension/*

branding:
icon: 'check-circle'
color: 'green'
52 changes: 44 additions & 8 deletions .github/actions/test-jvm/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ inputs:
scala-compat-version:
description: Scala compatibility version, e.g. 2.12
required: true
hadoop-version:
description: Hadoop version, e.g. 2.7 or 2
required: true
java-compat-version:
description: Java compatibility version, e.g. 8
required: true

runs:
using: 'composite'
Expand All @@ -26,26 +32,56 @@ runs:
shell: bash

- name: Fetch Binaries Artifact
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
name: Binaries-${{ inputs.spark-compat-version }}-${{ inputs.scala-version }}
name: Binaries-${{ inputs.spark-compat-version }}-${{ inputs.scala-compat-version }}
path: .

- name: Cache Spark Binaries
uses: actions/cache@v4
if: ( ! contains(inputs.spark-version, '-SNAPSHOT') )
with:
path: ~/spark
key: ${{ runner.os }}-spark-binaries-${{ inputs.spark-version }}-${{ inputs.scala-compat-version }}

- name: Setup Spark Binaries
if: ( ! contains(inputs.spark-version, '-SNAPSHOT') )
env:
SPARK_PACKAGE: spark-${{ inputs.spark-version }}/spark-${{ inputs.spark-version }}-bin-hadoop${{ inputs.hadoop-version }}${{ inputs.scala-compat-version == '2.13' && '-scala2.13' || '' }}.tgz
run: |
if [[ ! -e ~/spark ]]
then
wget --progress=dot:giga "https://www.apache.org/dyn/closer.lua/spark/${SPARK_PACKAGE}?action=download" -O - | tar -xzC "${{ runner.temp }}"
archive=$(basename "${SPARK_PACKAGE}") bash -c "mv -v "${{ runner.temp }}/\${archive/%.tgz/}" ~/spark"
fi
echo "SPARK_HOME=$(cd ~/spark; pwd)" >> $GITHUB_ENV
shell: bash

- name: Cache Maven packages
uses: actions/cache@v3
if: github.event_name != 'merge_group'
uses: actions/cache@v4
with:
path: ~/.m2/repository
key: ${{ runner.os }}-mvn-build-${{ inputs.spark-version }}-${{ inputs.scala-version }}-${{ hashFiles('pom.xml') }}
restore-keys: ${{ runner.os }}-mvn-build-${{ inputs.spark-version }}-${{ inputs.scala-version }}-

- name: Setup JDK 1.8
uses: actions/setup-java@v3
- name: Setup JDK ${{ inputs.java-compat-version }}
uses: actions/setup-java@v4
with:
java-version: '8'
java-version: ${{ inputs.java-compat-version }}
distribution: 'zulu'

- name: Scala and Java Tests
run: mvn --batch-mode test
env:
JDK_JAVA_OPTIONS: --add-exports java.base/sun.nio.ch=ALL-UNNAMED --add-exports java.base/sun.util.calendar=ALL-UNNAMED
run: mvn --batch-mode --update-snapshots -Dspotless.check.skip test
shell: bash

- name: Diff App test
if: ( ! contains(inputs.spark-version, '-SNAPSHOT') )
run: |
$SPARK_HOME/bin/spark-submit --packages com.github.scopt:scopt_${{ inputs.scala-compat-version }}:4.1.0 target/spark-extension_*.jar --format parquet --id id src/test/files/test.parquet/file1.parquet src/test/files/test.parquet/file2.parquet diff.parquet
$SPARK_HOME/bin/spark-shell <<< 'val df = spark.read.parquet("diff.parquet").orderBy($"id").groupBy($"diff").count; df.show; if (df.count != 2) sys.exit(1)'
shell: bash

- name: Generate Unit Test Report
Expand All @@ -55,7 +91,7 @@ runs:

- name: Upload Unit Test Results
if: always()
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: JVM Test Results (Spark ${{ inputs.spark-version }} Scala ${{ inputs.scala-version }})
path: |
Expand Down
Loading

0 comments on commit 654a592

Please sign in to comment.