diff --git a/.github/workflows/presto-cdc-test.yml b/.github/workflows/presto-cdc-test.yml
index 5806e668c..9e3a089ad 100644
--- a/.github/workflows/presto-cdc-test.yml
+++ b/.github/workflows/presto-cdc-test.yml
@@ -131,13 +131,11 @@ jobs:
run: |
docker exec -t lakesoul-docker-compose-env-jobmanager-1 flink run -d -c org.apache.flink.lakesoul.test.benchmark.LakeSoulSourceToSinkTable -C file:///opt/flink/work-dir/$FLINK_JAR_NAME /opt/flink/work-dir/$FLINK_TEST_JAR_NAME --source.database.name test_cdc --source.table.name default_init --sink.database.name flink_sink --sink.table.name default_init --use.cdc true --hash.bucket.number 2 --job.checkpoint_interval 10000 --server_time_zone UTC --warehouse.path s3://lakesoul-test-bucket/flink-sink/data --flink.checkpoint s3://lakesoul-test-bucket/flink-sink/chk
sleep 30s
- # - name: Start flink DataGenSource without primary key task-3
- # run: |
- # docker exec -t lakesoul-docker-compose-env-jobmanager-1 flink run -d -c org.apache.flink.lakesoul.test.benchmark.LakeSoulDataGenSourceTable -C file:///opt/flink/work-dir/$FLINK_JAR_NAME /opt/flink/work-dir/$FLINK_TEST_JAR_NAME --sink.database.name flink --sink.table.name sink_table --job.checkpoint_interval 10000 --server_time_zone UTC --warehouse.path s3://lakesoul-test-bucket/flink/ --flink.checkpoint s3://lakesoul-test-bucket/flink/chk --sink.parallel 2 --data.size 1000 --write.time 5
- name: Download mysql driver jar
run: |
cd ./script/benchmark/work-dir
if [ ! -e mysql-connector-java-8.0.30.jar ]; then wget -q https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.30/mysql-connector-java-8.0.30.jar; fi
+ if [ ! -e presto-jdbc-0.282.jar ]; then wget -q https://repo1.maven.org/maven2/com/facebook/presto/presto-jdbc/0.282/presto-jdbc-0.282.jar; fi
- name: Create table and insert data
run: |
cd ./script/benchmark
@@ -150,11 +148,11 @@ jobs:
- name: "[Check] Mysql cdc data accuracy verification task"
run: |
cd ./script/benchmark
- docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark
+ docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark
- name: "[Check] Presto source to sink data accuracy verification task"
run: |
cd ./script/benchmark
- docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true
+ docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true
- name: Adding columns for tables and deleting some data from tables
run: |
cd ./script/benchmark
@@ -165,11 +163,11 @@ jobs:
- name: "[Check] Mysql cdc data accuracy verification task"
run: |
cd ./script/benchmark
- docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark
+ docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark
- name: "[Check] Presto source to sink data accuracy verification task"
run: |
cd ./script/benchmark
- docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true
+ docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true
- name: Updating data in tables
run: |
cd ./script/benchmark
@@ -178,11 +176,11 @@ jobs:
- name: "[Check] Mysql cdc data accuracy verification task"
run: |
cd ./script/benchmark
- docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark
+ docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark
- name: "[Check] Presto source to sink data accuracy verification task"
run: |
cd ./script/benchmark
- docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true
+ docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true
- name: Dropping columns and deleting some data in tables
run: |
cd ./script/benchmark
@@ -193,15 +191,11 @@ jobs:
- name: "[Check] Mysql cdc data accuracy verification task"
run: |
cd ./script/benchmark
- docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark
+ docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark
- name: "[Check] Presto source to sink data accuracy verification task"
run: |
cd ./script/benchmark
- docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true
- # - name: "[Check] Table without primary key data accuracy verification task"
- # run: |
- # cd ./script/benchmark
- # docker run --cpus 2 -m 5000m --net lakesoul-docker-compose-env_default --rm -t -v ${PWD}/work-dir:/opt/spark/work-dir --env lakesoul_home=/opt/spark/work-dir/lakesoul.properties bitnami/spark:3.3.1 spark-submit --driver-memory 4G --executor-memory 4G --conf spark.driver.memoryOverhead=1500m --conf spark.executor.memoryOverhead=1500m --conf spark.dmetasoul.lakesoul.native.io.enable=true --jars /opt/spark/work-dir/$SPARK_JAR_NAME,/opt/spark/work-dir/mysql-connector-java-8.0.30.jar --class org.apache.spark.sql.lakesoul.benchmark.FlinkWriteDataCheck --master local[4] /opt/spark/work-dir/$SPARK_TEST_JAR_NAME --csv.path s3://lakesoul-test-bucket/flink/csv --lakesoul.table.path s3://lakesoul-test-bucket/flink/sink_table --server.time.zone UTC
+ docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true
- name: Print Flink Log
if: always()
run: |
diff --git a/lakesoul-common/pom.xml b/lakesoul-common/pom.xml
index 0f99d5804..93515dc8a 100644
--- a/lakesoul-common/pom.xml
+++ b/lakesoul-common/pom.xml
@@ -341,7 +341,7 @@ SPDX-License-Identifier: Apache-2.0
org.apache.hadoop
hadoop-client-api
- 3.4.0
+ 3.3.2
${local.scope}
diff --git a/lakesoul-flink/pom.xml b/lakesoul-flink/pom.xml
index 08bdb24a4..1a9d24282 100644
--- a/lakesoul-flink/pom.xml
+++ b/lakesoul-flink/pom.xml
@@ -350,7 +350,13 @@ SPDX-License-Identifier: Apache-2.0
org.apache.hadoop
hadoop-client-api
- 3.4.0
+ 3.3.2
+ ${local.scope}
+
+
+ org.apache.hadoop
+ hadoop-client-runtime
+ 3.3.2
${local.scope}
diff --git a/lakesoul-flink/src/test/java/org/apache/flink/lakesoul/test/AbstractTestBase.java b/lakesoul-flink/src/test/java/org/apache/flink/lakesoul/test/AbstractTestBase.java
index 684ba423b..86f30e919 100644
--- a/lakesoul-flink/src/test/java/org/apache/flink/lakesoul/test/AbstractTestBase.java
+++ b/lakesoul-flink/src/test/java/org/apache/flink/lakesoul/test/AbstractTestBase.java
@@ -96,7 +96,7 @@ public final void cleanupRunningJobs() throws Exception {
/*
* @path: a subdir name under temp dir, e.g. /lakesoul_table
- * @return: file://PLATFORM_TMP_DIR/path
+ * @return: file:///PLATFORM_TMP_DIR/path
*/
public static String getTempDirUri(String path) {
String tmp = System.getProperty("java.io.tmpdir");
diff --git a/lakesoul-presto/pom.xml b/lakesoul-presto/pom.xml
index e0fb904a7..3fa205ff8 100644
--- a/lakesoul-presto/pom.xml
+++ b/lakesoul-presto/pom.xml
@@ -23,7 +23,6 @@
UTF-8
0.282
provided
- 8.1.0
@@ -63,13 +62,6 @@
${presto.version}
test
-
- com.facebook.presto
- presto-jdbc
- ${presto.version}
- ${local.scope}
-
-
org.apache.parquet
parquet-column
@@ -83,10 +75,10 @@
- com.mysql
- mysql-connector-j
- compile
- ${mysql.version}
+ org.apache.hadoop
+ hadoop-client-api
+ 3.3.2
+ ${local.scope}
@@ -94,12 +86,6 @@
guava
32.0.0-jre
-
- org.apache.hadoop
- hadoop-client-api
- 3.4.0
- ${local.scope}
-
diff --git a/native-io/lakesoul-io-java/pom.xml b/native-io/lakesoul-io-java/pom.xml
index 65c0a1f37..23ac2ca09 100644
--- a/native-io/lakesoul-io-java/pom.xml
+++ b/native-io/lakesoul-io-java/pom.xml
@@ -32,7 +32,6 @@ SPDX-License-Identifier: Apache-2.0
0.6.1
-
com.dmetasoul
diff --git a/rust/lakesoul-io/src/lakesoul_io_config.rs b/rust/lakesoul-io/src/lakesoul_io_config.rs
index 8f1f7e29a..64c65db52 100644
--- a/rust/lakesoul-io/src/lakesoul_io_config.rs
+++ b/rust/lakesoul-io/src/lakesoul_io_config.rs
@@ -24,6 +24,7 @@ use datafusion::prelude::{SessionConfig, SessionContext};
use datafusion_common::DataFusionError::{External, ObjectStore};
use datafusion_substrait::substrait::proto::Plan;
use derivative::Derivative;
+use log::info;
use object_store::aws::AmazonS3Builder;
use object_store::{ClientOptions, RetryConfig};
use url::{ParseError, Url};
@@ -495,11 +496,13 @@ pub fn create_session_context_with_planner(
.cloned();
if let Some(fs) = default_fs {
config.default_fs = fs.clone();
+ info!("NativeIO register default fs {}", fs);
register_object_store(&fs, config, &runtime)?;
};
if !config.prefix.is_empty() {
let prefix = config.prefix.clone();
+ info!("NativeIO register prefix fs {}", prefix);
let normalized_prefix = register_object_store(&prefix, config, &runtime)?;
config.prefix = normalized_prefix;
}
@@ -512,6 +515,8 @@ pub fn create_session_context_with_planner(
.map(|file_name| register_object_store(&file_name, config, &runtime))
.collect::>>()?;
config.files = normalized_filenames;
+ info!("NativeIO normalized file names: {:?}", config.files);
+ info!("NativeIO final config: {:?}", config);
// create session context
let mut state = if let Some(planner) = planner {