Skip to content

Commit

Permalink
fix(hudi): upgrade to 0.7.0 (spark 3)
Browse files Browse the repository at this point in the history
  • Loading branch information
lyogev committed Jan 30, 2021
1 parent 561cf6d commit 6b6b2bb
Show file tree
Hide file tree
Showing 7 changed files with 25 additions and 25 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ env:
- SPARK2_VERSION=2.4.6
- SPARK_VERSION=3.0.1
- HIVE_VERSION=2.3.7
- HUDI_VERSION=0.5.3
- HUDI_VERSION=0.7.0
- TARGET_CACHE=$HOME/target-cache/${TRAVIS_COMMIT}
- LC_ALL=en_US.UTF-8
- LANG=en_US.UTF-8
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,7 @@ Metorikku supports reading/writing with [Apache Hudi](https://github.com/apache/
Hudi is a very exciting project that basically allows upserts and deletes directly on top of partitioned parquet data.

In order to use Hudi with Metorikku you need to add to your classpath (via ```--jars``` or if running locally with ```-cp```)
an external JAR from here: https://repo1.maven.org/maven2/org/apache/hudi/hudi-spark-bundle_2.12/0.5.3/hudi-spark-bundle_2.12-0.5.3.jar
an external JAR from here: https://repo1.maven.org/maven2/org/apache/hudi/hudi-spark-bundle_2.12/0.7.0/hudi-spark-bundle_2.12-0.7.0.jar

To run Hudi jobs you also have to make sure you have the following spark configuration (pass with ```--conf``` or ```-D```):
```properties
Expand Down
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ libraryDependencies ++= Seq(
"com.redislabs" %% "spark-redis" % "2.5.0" % "provided",
"org.apache.kafka" %% "kafka" % "2.2.0" % "provided",
"za.co.absa" %% "abris" % "3.2.1" % "provided" excludeAll(excludeAvro, excludeSpark),
"org.apache.hudi" %% "hudi-spark-bundle" % "0.5.3" % "provided",
"org.apache.hudi" %% "hudi-spark-bundle" % "0.7.0" % "provided",
"org.apache.parquet" % "parquet-avro" % "1.10.1" % "provided",
"com.amazon.deequ" % "deequ" % "1.0.5" excludeAll(excludeSpark, excludeScalanlp),
"org.apache.avro" % "avro" % "1.8.2" % "provided"
Expand Down
2 changes: 1 addition & 1 deletion docker/hive/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ ENV MYSQL_CONNECTOR_VERSION=5.1.47
RUN wget -q https://repo1.maven.org/maven2/mysql/mysql-connector-java/$MYSQL_CONNECTOR_VERSION/mysql-connector-java-$MYSQL_CONNECTOR_VERSION.jar \
&& mv mysql-connector-java-$MYSQL_CONNECTOR_VERSION.jar $HIVE_HOME/lib

ARG HUDI_VERSION=0.5.3
ARG HUDI_VERSION=0.7.0
RUN wget -q https://repo1.maven.org/maven2/org/apache/hudi/hudi-hive-bundle/$HUDI_VERSION/hudi-hive-bundle-$HUDI_VERSION.jar \
&& mv hudi-hive-bundle-$HUDI_VERSION.jar $HIVE_HOME/lib
RUN wget -q https://repo1.maven.org/maven2/org/apache/hudi/hudi-hadoop-mr-bundle/$HUDI_VERSION/hudi-hadoop-mr-bundle-$HUDI_VERSION.jar \
Expand Down
2 changes: 1 addition & 1 deletion docker/spark/custom-hadoop/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ RUN wget -q https://archive.apache.org/dist/hive/hive-$HIVE_VERSION/apache-hive-
&& rm apache-hive-$HIVE_VERSION-bin.tar.gz

#Hudi for hive
ENV HUDI_VERSION=0.5.3
ENV HUDI_VERSION=0.7.0
RUN wget -q https://repo1.maven.org/maven2/org/apache/hudi/hudi-hive-bundle/$HUDI_VERSION/hudi-hive-bundle-$HUDI_VERSION.jar \
&& mv hudi-hive-bundle-$HUDI_VERSION.jar $HIVE_HOME/lib
RUN wget -q https://repo1.maven.org/maven2/org/apache/hudi/hudi-hadoop-mr-bundle/$HUDI_VERSION/hudi-hadoop-mr-bundle-$HUDI_VERSION.jar \
Expand Down
12 changes: 6 additions & 6 deletions e2e/cdc/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,13 @@ services:
- mysql
# Spark Resources
spark-master:
image: metorikku/metorikku:spark2_standalone
image: metorikku/metorikku:standalone
entrypoint:
- /scripts/entrypoint-master.sh
logging:
driver: none
spark-worker:
image: metorikku/metorikku:spark2_standalone
image: metorikku/metorikku:standalone
entrypoint:
- /scripts/entrypoint-worker.sh
logging:
Expand All @@ -108,9 +108,9 @@ services:
- SCHEMA_REGISTRY_LISTENERS=http://schema-registry:8081
# Spark job: Read from CDC Kafka topic, Deserialize according to schema registry, Write to Hudi output
spark-submit:
image: metorikku/metorikku:spark2_standalone
image: metorikku/metorikku:standalone
environment:
- SUBMIT_COMMAND=spark-submit --repositories http://packages.confluent.io/maven/ --jars https://repo1.maven.org/maven2/org/apache/hudi/hudi-spark-bundle_2.11/0.5.3/hudi-spark-bundle_2.11-0.5.3.jar,https://repo1.maven.org/maven2/za/co/absa/abris_2.11/3.2.2/abris_2.11-3.2.2.jar --conf spark.hadoop.mapreduce.input.pathFilter.class=org.apache.hudi.hadoop.HoodieROTablePathFilter --packages io.confluent:kafka-schema-registry-client:5.3.0,io.confluent:kafka-avro-serializer:5.3.0 --conf spark.sql.warehouse.dir=/warehouse --class com.yotpo.metorikku.Metorikku metorikku.jar -c examples/kafka/kafka_example_cdc.yaml
- SUBMIT_COMMAND=spark-submit --repositories http://packages.confluent.io/maven/ --jars https://repo1.maven.org/maven2/org/apache/hudi/hudi-spark-bundle_2.12/0.7.0/hudi-spark-bundle_2.12-0.7.0.jar,https://repo1.maven.org/maven2/za/co/absa/abris_2.12/3.2.2/abris_2.12-3.2.2.jar --conf spark.hadoop.mapreduce.input.pathFilter.class=org.apache.hudi.hadoop.HoodieROTablePathFilter --packages io.confluent:kafka-schema-registry-client:5.3.0,io.confluent:kafka-avro-serializer:5.3.0 --conf spark.sql.warehouse.dir=/warehouse --class com.yotpo.metorikku.Metorikku metorikku.jar -c examples/kafka/kafka_example_cdc.yaml
- HIVE_METASTORE_URI=hive:9083
entrypoint:
- /scripts/entrypoint-submit.sh
Expand Down Expand Up @@ -142,9 +142,9 @@ services:
# - 9083:9083
# Hive test: Select from hive table and assert over the result
hive-tester:
image: metorikku/metorikku:spark2_standalone
image: metorikku/metorikku:standalone
environment:
- SUBMIT_COMMAND=spark-submit --jars https://repo1.maven.org/maven2/org/apache/hudi/hudi-spark-bundle_2.11/0.5.3/hudi-spark-bundle_2.11-0.5.3.jar --conf spark.hadoop.mapreduce.input.pathFilter.class=org.apache.hudi.hadoop.HoodieROTablePathFilter --conf spark.sql.warehouse.dir=/warehouse --class com.yotpo.metorikku.MetorikkuTester metorikku.jar --test-settings /test_metrics/hive_test.yaml
- SUBMIT_COMMAND=spark-submit --jars https://repo1.maven.org/maven2/org/apache/hudi/hudi-spark-bundle_2.12/0.7.0/hudi-spark-bundle_2.12-0.7.0.jar --conf spark.hadoop.mapreduce.input.pathFilter.class=org.apache.hudi.hadoop.HoodieROTablePathFilter --conf spark.sql.warehouse.dir=/warehouse --class com.yotpo.metorikku.MetorikkuTester metorikku.jar --test-settings /test_metrics/hive_test.yaml
- HIVE_METASTORE_URI=hive:9083
volumes:
- ./output/:/examples/output/
Expand Down
28 changes: 14 additions & 14 deletions e2e/hudi/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
version: '3'
services:
spark-submit:
image: metorikku/metorikku:spark2_standalone
image: metorikku/metorikku:standalone
environment:
- SUBMIT_COMMAND=spark-submit --jars https://repo1.maven.org/maven2/org/apache/hudi/hudi-spark-bundle_2.11/0.5.3/hudi-spark-bundle_2.11-0.5.3.jar --conf spark.hadoop.mapreduce.input.pathFilter.class=org.apache.hudi.hadoop.HoodieROTablePathFilter --class com.yotpo.metorikku.Metorikku metorikku.jar -c examples/hudi/movies.yaml
- SUBMIT_COMMAND=spark-submit --jars https://repo1.maven.org/maven2/org/apache/hudi/hudi-spark-bundle_2.12/0.7.0/hudi-spark-bundle_2.12-0.7.0.jar --conf spark.hadoop.mapreduce.input.pathFilter.class=org.apache.hudi.hadoop.HoodieROTablePathFilter --class com.yotpo.metorikku.Metorikku metorikku.jar -c examples/hudi/movies.yaml
- HIVE_METASTORE_URI=hive:9083
volumes:
- ./output/:/examples/output/
Expand All @@ -13,9 +13,9 @@ services:
- spark-master
- spark-worker
hive-tester:
image: metorikku/metorikku:spark2_standalone
image: metorikku/metorikku:standalone
environment:
- SUBMIT_COMMAND=spark-submit --jars https://repo1.maven.org/maven2/org/apache/hudi/hudi-spark-bundle_2.11/0.5.3/hudi-spark-bundle_2.11-0.5.3.jar --conf spark.hadoop.mapreduce.input.pathFilter.class=org.apache.hudi.hadoop.HoodieROTablePathFilter --class com.yotpo.metorikku.MetorikkuTester metorikku.jar --test-settings examples/hudi/movies_test.yaml
- SUBMIT_COMMAND=spark-submit --jars https://repo1.maven.org/maven2/org/apache/hudi/hudi-spark-bundle_2.12/0.7.0/hudi-spark-bundle_2.12-0.7.0.jar --conf spark.hadoop.mapreduce.input.pathFilter.class=org.apache.hudi.hadoop.HoodieROTablePathFilter --class com.yotpo.metorikku.MetorikkuTester metorikku.jar --test-settings examples/hudi/movies_test.yaml
- HIVE_METASTORE_URI=hive:9083
volumes:
- ./output/:/examples/output/
Expand All @@ -25,9 +25,9 @@ services:
- spark-master
- spark-worker
spark-submit-manual-hive-sync:
image: metorikku/metorikku:spark2_standalone
image: metorikku/metorikku:standalone
environment:
- SUBMIT_COMMAND=spark-submit --jars https://repo1.maven.org/maven2/org/apache/hudi/hudi-spark-bundle_2.11/0.5.3/hudi-spark-bundle_2.11-0.5.3.jar --conf spark.hadoop.mapreduce.input.pathFilter.class=org.apache.hudi.hadoop.HoodieROTablePathFilter --class com.yotpo.metorikku.Metorikku metorikku.jar -c examples/hudi/manual_hive_sync_config.yaml
- SUBMIT_COMMAND=spark-submit --jars https://repo1.maven.org/maven2/org/apache/hudi/hudi-spark-bundle_2.12/0.7.0/hudi-spark-bundle_2.12-0.7.0.jar --conf spark.hadoop.mapreduce.input.pathFilter.class=org.apache.hudi.hadoop.HoodieROTablePathFilter --class com.yotpo.metorikku.Metorikku metorikku.jar -c examples/hudi/manual_hive_sync_config.yaml
- HIVE_METASTORE_URI=hive:9083
volumes:
- ./output/:/examples/output/
Expand All @@ -37,9 +37,9 @@ services:
- spark-master
- spark-worker
hive-tester-manual-hive-sync:
image: metorikku/metorikku:spark2_standalone
image: metorikku/metorikku:standalone
environment:
- SUBMIT_COMMAND=spark-submit --jars https://repo1.maven.org/maven2/org/apache/hudi/hudi-spark-bundle_2.11/0.5.3/hudi-spark-bundle_2.11-0.5.3.jar --conf spark.hadoop.mapreduce.input.pathFilter.class=org.apache.hudi.hadoop.HoodieROTablePathFilter --class com.yotpo.metorikku.MetorikkuTester metorikku.jar --test-settings examples/hudi/manual_hive_sync_test.yaml
- SUBMIT_COMMAND=spark-submit --jars https://repo1.maven.org/maven2/org/apache/hudi/hudi-spark-bundle_2.12/0.7.0/hudi-spark-bundle_2.12-0.7.0.jar --conf spark.hadoop.mapreduce.input.pathFilter.class=org.apache.hudi.hadoop.HoodieROTablePathFilter --class com.yotpo.metorikku.MetorikkuTester metorikku.jar --test-settings examples/hudi/manual_hive_sync_test.yaml
- HIVE_METASTORE_URI=hive:9083
volumes:
- ./output/:/examples/output/
Expand All @@ -49,9 +49,9 @@ services:
- spark-master
- spark-worker
spark-submit-manual-hive-sync-non-partition:
image: metorikku/metorikku:spark2_standalone
image: metorikku/metorikku:standalone
environment:
- SUBMIT_COMMAND=spark-submit --jars https://repo1.maven.org/maven2/org/apache/hudi/hudi-spark-bundle_2.11/0.5.3/hudi-spark-bundle_2.11-0.5.3.jar --conf spark.hadoop.mapreduce.input.pathFilter.class=org.apache.hudi.hadoop.HoodieROTablePathFilter --class com.yotpo.metorikku.Metorikku metorikku.jar -c examples/hudi/manual_hive_sync_no_partitions_config.yaml
- SUBMIT_COMMAND=spark-submit --jars https://repo1.maven.org/maven2/org/apache/hudi/hudi-spark-bundle_2.12/0.7.0/hudi-spark-bundle_2.12-0.7.0.jar --conf spark.hadoop.mapreduce.input.pathFilter.class=org.apache.hudi.hadoop.HoodieROTablePathFilter --class com.yotpo.metorikku.Metorikku metorikku.jar -c examples/hudi/manual_hive_sync_no_partitions_config.yaml
- HIVE_METASTORE_URI=hive:9083
volumes:
- ./output/:/examples/output/
Expand All @@ -61,9 +61,9 @@ services:
- spark-master
- spark-worker
hive-tester-manual-hive-sync-no-partition:
image: metorikku/metorikku:spark2_standalone
image: metorikku/metorikku:standalone
environment:
- SUBMIT_COMMAND=spark-submit --jars https://repo1.maven.org/maven2/org/apache/hudi/hudi-spark-bundle_2.11/0.5.3/hudi-spark-bundle_2.11-0.5.3.jar --conf spark.hadoop.mapreduce.input.pathFilter.class=org.apache.hudi.hadoop.HoodieROTablePathFilter --class com.yotpo.metorikku.MetorikkuTester metorikku.jar --test-settings examples/hudi/manual_hive_sync_no_partitions_test.yaml
- SUBMIT_COMMAND=spark-submit --jars https://repo1.maven.org/maven2/org/apache/hudi/hudi-spark-bundle_2.12/0.7.0/hudi-spark-bundle_2.12-0.7.0.jar --conf spark.hadoop.mapreduce.input.pathFilter.class=org.apache.hudi.hadoop.HoodieROTablePathFilter --class com.yotpo.metorikku.MetorikkuTester metorikku.jar --test-settings examples/hudi/manual_hive_sync_no_partitions_test.yaml
- HIVE_METASTORE_URI=hive:9083
volumes:
- ./output/:/examples/output/
Expand All @@ -73,13 +73,13 @@ services:
- spark-master
- spark-worker
spark-master:
image: metorikku/metorikku:spark2_standalone
image: metorikku/metorikku:standalone
entrypoint:
- /scripts/entrypoint-master.sh
logging:
driver: none
spark-worker:
image: metorikku/metorikku:spark2_standalone
image: metorikku/metorikku:standalone
entrypoint:
- /scripts/entrypoint-worker.sh
volumes:
Expand Down

0 comments on commit 6b6b2bb

Please sign in to comment.