From 04e06e34f21c4f4edc6fdde286ddd7e3d8429789 Mon Sep 17 00:00:00 2001 From: chenxu Date: Tue, 6 Aug 2024 11:59:15 +0800 Subject: [PATCH] fix hadoop deps in flink Signed-off-by: chenxu --- .github/workflows/presto-cdc-test.yml | 24 +++++++------------ lakesoul-common/pom.xml | 2 +- lakesoul-flink/pom.xml | 8 ++++++- .../flink/lakesoul/test/AbstractTestBase.java | 2 +- lakesoul-presto/pom.xml | 22 ++++------------- native-io/lakesoul-io-java/pom.xml | 1 - rust/lakesoul-io/src/lakesoul_io_config.rs | 5 ++++ 7 files changed, 27 insertions(+), 37 deletions(-) diff --git a/.github/workflows/presto-cdc-test.yml b/.github/workflows/presto-cdc-test.yml index 5806e668c..9e3a089ad 100644 --- a/.github/workflows/presto-cdc-test.yml +++ b/.github/workflows/presto-cdc-test.yml @@ -131,13 +131,11 @@ jobs: run: | docker exec -t lakesoul-docker-compose-env-jobmanager-1 flink run -d -c org.apache.flink.lakesoul.test.benchmark.LakeSoulSourceToSinkTable -C file:///opt/flink/work-dir/$FLINK_JAR_NAME /opt/flink/work-dir/$FLINK_TEST_JAR_NAME --source.database.name test_cdc --source.table.name default_init --sink.database.name flink_sink --sink.table.name default_init --use.cdc true --hash.bucket.number 2 --job.checkpoint_interval 10000 --server_time_zone UTC --warehouse.path s3://lakesoul-test-bucket/flink-sink/data --flink.checkpoint s3://lakesoul-test-bucket/flink-sink/chk sleep 30s - # - name: Start flink DataGenSource without primary key task-3 - # run: | - # docker exec -t lakesoul-docker-compose-env-jobmanager-1 flink run -d -c org.apache.flink.lakesoul.test.benchmark.LakeSoulDataGenSourceTable -C file:///opt/flink/work-dir/$FLINK_JAR_NAME /opt/flink/work-dir/$FLINK_TEST_JAR_NAME --sink.database.name flink --sink.table.name sink_table --job.checkpoint_interval 10000 --server_time_zone UTC --warehouse.path s3://lakesoul-test-bucket/flink/ --flink.checkpoint s3://lakesoul-test-bucket/flink/chk --sink.parallel 2 --data.size 1000 --write.time 5 - name: Download mysql driver jar run: | cd ./script/benchmark/work-dir if [ ! -e mysql-connector-java-8.0.30.jar ]; then wget -q https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.30/mysql-connector-java-8.0.30.jar; fi + if [ ! -e presto-jdbc-0.282.jar ]; then wget -q https://repo1.maven.org/maven2/com/facebook/presto/presto-jdbc/0.282/presto-jdbc-0.282.jar; fi - name: Create table and insert data run: | cd ./script/benchmark @@ -150,11 +148,11 @@ jobs: - name: "[Check] Mysql cdc data accuracy verification task" run: | cd ./script/benchmark - docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark + docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark - name: "[Check] Presto source to sink data accuracy verification task" run: | cd ./script/benchmark - docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true + docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true - name: Adding columns for tables and deleting some data from tables run: | cd ./script/benchmark @@ -165,11 +163,11 @@ jobs: - name: "[Check] Mysql cdc data accuracy verification task" run: | cd ./script/benchmark - docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark + docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark - name: "[Check] Presto source to sink data accuracy verification task" run: | cd ./script/benchmark - docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true + docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true - name: Updating data in tables run: | cd ./script/benchmark @@ -178,11 +176,11 @@ jobs: - name: "[Check] Mysql cdc data accuracy verification task" run: | cd ./script/benchmark - docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark + docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark - name: "[Check] Presto source to sink data accuracy verification task" run: | cd ./script/benchmark - docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true + docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true - name: Dropping columns and deleting some data in tables run: | cd ./script/benchmark @@ -193,15 +191,11 @@ jobs: - name: "[Check] Mysql cdc data accuracy verification task" run: | cd ./script/benchmark - docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark + docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark - name: "[Check] Presto source to sink data accuracy verification task" run: | cd ./script/benchmark - docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true - # - name: "[Check] Table without primary key data accuracy verification task" - # run: | - # cd ./script/benchmark - # docker run --cpus 2 -m 5000m --net lakesoul-docker-compose-env_default --rm -t -v ${PWD}/work-dir:/opt/spark/work-dir --env lakesoul_home=/opt/spark/work-dir/lakesoul.properties bitnami/spark:3.3.1 spark-submit --driver-memory 4G --executor-memory 4G --conf spark.driver.memoryOverhead=1500m --conf spark.executor.memoryOverhead=1500m --conf spark.dmetasoul.lakesoul.native.io.enable=true --jars /opt/spark/work-dir/$SPARK_JAR_NAME,/opt/spark/work-dir/mysql-connector-java-8.0.30.jar --class org.apache.spark.sql.lakesoul.benchmark.FlinkWriteDataCheck --master local[4] /opt/spark/work-dir/$SPARK_TEST_JAR_NAME --csv.path s3://lakesoul-test-bucket/flink/csv --lakesoul.table.path s3://lakesoul-test-bucket/flink/sink_table --server.time.zone UTC + docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true - name: Print Flink Log if: always() run: | diff --git a/lakesoul-common/pom.xml b/lakesoul-common/pom.xml index 0f99d5804..93515dc8a 100644 --- a/lakesoul-common/pom.xml +++ b/lakesoul-common/pom.xml @@ -341,7 +341,7 @@ SPDX-License-Identifier: Apache-2.0 org.apache.hadoop hadoop-client-api - 3.4.0 + 3.3.2 ${local.scope} diff --git a/lakesoul-flink/pom.xml b/lakesoul-flink/pom.xml index 08bdb24a4..1a9d24282 100644 --- a/lakesoul-flink/pom.xml +++ b/lakesoul-flink/pom.xml @@ -350,7 +350,13 @@ SPDX-License-Identifier: Apache-2.0 org.apache.hadoop hadoop-client-api - 3.4.0 + 3.3.2 + ${local.scope} + + + org.apache.hadoop + hadoop-client-runtime + 3.3.2 ${local.scope} diff --git a/lakesoul-flink/src/test/java/org/apache/flink/lakesoul/test/AbstractTestBase.java b/lakesoul-flink/src/test/java/org/apache/flink/lakesoul/test/AbstractTestBase.java index 684ba423b..86f30e919 100644 --- a/lakesoul-flink/src/test/java/org/apache/flink/lakesoul/test/AbstractTestBase.java +++ b/lakesoul-flink/src/test/java/org/apache/flink/lakesoul/test/AbstractTestBase.java @@ -96,7 +96,7 @@ public final void cleanupRunningJobs() throws Exception { /* * @path: a subdir name under temp dir, e.g. /lakesoul_table - * @return: file://PLATFORM_TMP_DIR/path + * @return: file:///PLATFORM_TMP_DIR/path */ public static String getTempDirUri(String path) { String tmp = System.getProperty("java.io.tmpdir"); diff --git a/lakesoul-presto/pom.xml b/lakesoul-presto/pom.xml index e0fb904a7..3fa205ff8 100644 --- a/lakesoul-presto/pom.xml +++ b/lakesoul-presto/pom.xml @@ -23,7 +23,6 @@ UTF-8 0.282 provided - 8.1.0 @@ -63,13 +62,6 @@ ${presto.version} test - - com.facebook.presto - presto-jdbc - ${presto.version} - ${local.scope} - - org.apache.parquet parquet-column @@ -83,10 +75,10 @@ - com.mysql - mysql-connector-j - compile - ${mysql.version} + org.apache.hadoop + hadoop-client-api + 3.3.2 + ${local.scope} @@ -94,12 +86,6 @@ guava 32.0.0-jre - - org.apache.hadoop - hadoop-client-api - 3.4.0 - ${local.scope} - diff --git a/native-io/lakesoul-io-java/pom.xml b/native-io/lakesoul-io-java/pom.xml index 65c0a1f37..23ac2ca09 100644 --- a/native-io/lakesoul-io-java/pom.xml +++ b/native-io/lakesoul-io-java/pom.xml @@ -32,7 +32,6 @@ SPDX-License-Identifier: Apache-2.0 0.6.1 - com.dmetasoul diff --git a/rust/lakesoul-io/src/lakesoul_io_config.rs b/rust/lakesoul-io/src/lakesoul_io_config.rs index 8f1f7e29a..64c65db52 100644 --- a/rust/lakesoul-io/src/lakesoul_io_config.rs +++ b/rust/lakesoul-io/src/lakesoul_io_config.rs @@ -24,6 +24,7 @@ use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_common::DataFusionError::{External, ObjectStore}; use datafusion_substrait::substrait::proto::Plan; use derivative::Derivative; +use log::info; use object_store::aws::AmazonS3Builder; use object_store::{ClientOptions, RetryConfig}; use url::{ParseError, Url}; @@ -495,11 +496,13 @@ pub fn create_session_context_with_planner( .cloned(); if let Some(fs) = default_fs { config.default_fs = fs.clone(); + info!("NativeIO register default fs {}", fs); register_object_store(&fs, config, &runtime)?; }; if !config.prefix.is_empty() { let prefix = config.prefix.clone(); + info!("NativeIO register prefix fs {}", prefix); let normalized_prefix = register_object_store(&prefix, config, &runtime)?; config.prefix = normalized_prefix; } @@ -512,6 +515,8 @@ pub fn create_session_context_with_planner( .map(|file_name| register_object_store(&file_name, config, &runtime)) .collect::>>()?; config.files = normalized_filenames; + info!("NativeIO normalized file names: {:?}", config.files); + info!("NativeIO final config: {:?}", config); // create session context let mut state = if let Some(planner) = planner {