Skip to content

Commit

Permalink
Merge branch 'apache:master' into SPARK-48463
Browse files Browse the repository at this point in the history
  • Loading branch information
WeichenXu123 authored Jul 13, 2024
2 parents f91fedf + b4cd2ec commit 99a0a29
Show file tree
Hide file tree
Showing 2,517 changed files with 7,916 additions and 1,930 deletions.
1 change: 1 addition & 0 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ DEPLOY:
CONNECT:
- changed-files:
- any-glob-to-any-file: [
'connect/**/*',
'connector/connect/**/*',
'python/pyspark/sql/**/connect/**/*',
'python/pyspark/ml/**/connect/**/*'
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,7 @@ jobs:
- name: Breaking change detection against branch-3.5
uses: bufbuild/buf-breaking-action@v1
with:
input: connector/connect/common/src/main
input: connect/common/src/main
against: 'https://github.com/apache/spark.git#branch=branch-3.5,subdir=connector/connect/common/src/main'
- name: Install Python 3.9
uses: actions/setup-python@v5
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/build_python_connect.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ jobs:
# Start a Spark Connect server for local
PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
--driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" \
--jars "`find connector/connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
--jars "`find connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
# Remove Py4J and PySpark zipped library to make sure there is no JVM connection
mv python/lib lib.back
Expand All @@ -104,7 +104,7 @@ jobs:
PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
--master "local-cluster[2, 4, 1024]" \
--driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" \
--jars "`find connector/connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
--jars "`find connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
# Remove Py4J and PySpark zipped library to make sure there is no JVM connection
mv python/lib lib.back
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build_python_connect35.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ jobs:
# Start a Spark Connect server for local
PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
--driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" \
--jars "`find connector/connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
--jars "`find connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
# Checkout to branch-3.5 to use the tests in branch-3.5.
cd ..
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/maven_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ jobs:
if [[ "$INCLUDED_TAGS" != "" ]]; then
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test -fae
elif [[ "$MODULES_TO_TEST" == "connect" ]]; then
./build/mvn $MAVEN_CLI_OPTS -Dtest.exclude.tags="$EXCLUDED_TAGS" -Djava.version=${JAVA_VERSION/-ea} -pl connector/connect/client/jvm,connector/connect/common,connector/connect/server test -fae
./build/mvn $MAVEN_CLI_OPTS -Dtest.exclude.tags="$EXCLUDED_TAGS" -Djava.version=${JAVA_VERSION/-ea} -pl connector/connect/client/jvm,connect/common,connect/server test -fae
elif [[ "$EXCLUDED_TAGS" != "" ]]; then
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae
elif [[ "$MODULES_TO_TEST" == *"sql#hive-thriftserver"* ]]; then
Expand Down
6 changes: 6 additions & 0 deletions R/pkg/R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -1558,6 +1558,9 @@ setMethod("max",
#' @details
#' \code{max_by}: Returns the value associated with the maximum value of ord.
#'
#' Note: The function is non-deterministic so the output order can be different
#' for those associated the same values of `x`.
#'
#' @rdname column_aggregate_functions
#' @aliases max_by max_by,Column-method
#' @note max_by since 3.3.0
Expand Down Expand Up @@ -1633,6 +1636,9 @@ setMethod("min",
#' @details
#' \code{min_by}: Returns the value associated with the minimum value of ord.
#'
#' Note: The function is non-deterministic so the output order can be different
#' for those associated the same values of `x`.
#'
#' @rdname column_aggregate_functions
#' @aliases min_by min_by,Column-method
#' @note min_by since 3.3.0
Expand Down
63 changes: 35 additions & 28 deletions assembly/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,41 @@
<artifactId>spark-repl_${scala.binary.version}</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-connect_${scala.binary.version}</artifactId>
<version>${project.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-connect-common_${scala.binary.version}</artifactId>
</exclusion>
<exclusion>
<groupId>io.grpc</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>failureaccess</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-avro_${scala.binary.version}</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-protobuf_${scala.binary.version}</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>

<!--
Because we don't shade dependencies anymore, we need to restore Guava to compile scope so
Expand Down Expand Up @@ -138,34 +173,6 @@
</dependency>
</dependencies>
</profile>
<profile>
<id>connect</id>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-connect_${scala.binary.version}</artifactId>
<version>${project.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-connect-common_${scala.binary.version}</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-avro_${scala.binary.version}</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-protobuf_${scala.binary.version}</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
</dependencies>
</profile>
<profile>
<id>kubernetes</id>
<dependencies>
Expand Down
3 changes: 1 addition & 2 deletions bin/spark-connect-shell
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,4 @@ if [ -z "${SPARK_HOME}" ]; then
source "$(dirname "$0")"/find-spark-home
fi

# This requires building the spark with `-Pconnect`, e,g, `build/sbt -Pconnect package`
exec "${SPARK_HOME}"/bin/spark-shell --conf spark.plugins=org.apache.spark.sql.connect.SparkConnectPlugin "$@"
exec "${SPARK_HOME}"/bin/spark-shell --conf spark.plugins=org.apache.spark.sql.connect.SparkConnectPlugin "$@"
Loading

0 comments on commit 99a0a29

Please sign in to comment.