Skip to content

Commit

Permalink
fix hadoop deps in flink
Browse files Browse the repository at this point in the history
Signed-off-by: chenxu <[email protected]>
  • Loading branch information
dmetasoul01 committed Aug 6, 2024
1 parent 005d4e0 commit 04e06e3
Show file tree
Hide file tree
Showing 7 changed files with 27 additions and 37 deletions.
24 changes: 9 additions & 15 deletions .github/workflows/presto-cdc-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,11 @@ jobs:
run: |
docker exec -t lakesoul-docker-compose-env-jobmanager-1 flink run -d -c org.apache.flink.lakesoul.test.benchmark.LakeSoulSourceToSinkTable -C file:///opt/flink/work-dir/$FLINK_JAR_NAME /opt/flink/work-dir/$FLINK_TEST_JAR_NAME --source.database.name test_cdc --source.table.name default_init --sink.database.name flink_sink --sink.table.name default_init --use.cdc true --hash.bucket.number 2 --job.checkpoint_interval 10000 --server_time_zone UTC --warehouse.path s3://lakesoul-test-bucket/flink-sink/data --flink.checkpoint s3://lakesoul-test-bucket/flink-sink/chk
sleep 30s
# - name: Start flink DataGenSource without primary key task-3
# run: |
# docker exec -t lakesoul-docker-compose-env-jobmanager-1 flink run -d -c org.apache.flink.lakesoul.test.benchmark.LakeSoulDataGenSourceTable -C file:///opt/flink/work-dir/$FLINK_JAR_NAME /opt/flink/work-dir/$FLINK_TEST_JAR_NAME --sink.database.name flink --sink.table.name sink_table --job.checkpoint_interval 10000 --server_time_zone UTC --warehouse.path s3://lakesoul-test-bucket/flink/ --flink.checkpoint s3://lakesoul-test-bucket/flink/chk --sink.parallel 2 --data.size 1000 --write.time 5
- name: Download mysql driver jar
run: |
cd ./script/benchmark/work-dir
if [ ! -e mysql-connector-java-8.0.30.jar ]; then wget -q https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.30/mysql-connector-java-8.0.30.jar; fi
if [ ! -e presto-jdbc-0.282.jar ]; then wget -q https://repo1.maven.org/maven2/com/facebook/presto/presto-jdbc/0.282/presto-jdbc-0.282.jar; fi
- name: Create table and insert data
run: |
cd ./script/benchmark
Expand All @@ -150,11 +148,11 @@ jobs:
- name: "[Check] Mysql cdc data accuracy verification task"
run: |
cd ./script/benchmark
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark
- name: "[Check] Presto source to sink data accuracy verification task"
run: |
cd ./script/benchmark
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true
- name: Adding columns for tables and deleting some data from tables
run: |
cd ./script/benchmark
Expand All @@ -165,11 +163,11 @@ jobs:
- name: "[Check] Mysql cdc data accuracy verification task"
run: |
cd ./script/benchmark
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark
- name: "[Check] Presto source to sink data accuracy verification task"
run: |
cd ./script/benchmark
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true
- name: Updating data in tables
run: |
cd ./script/benchmark
Expand All @@ -178,11 +176,11 @@ jobs:
- name: "[Check] Mysql cdc data accuracy verification task"
run: |
cd ./script/benchmark
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark
- name: "[Check] Presto source to sink data accuracy verification task"
run: |
cd ./script/benchmark
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true
- name: Dropping columns and deleting some data in tables
run: |
cd ./script/benchmark
Expand All @@ -193,15 +191,11 @@ jobs:
- name: "[Check] Mysql cdc data accuracy verification task"
run: |
cd ./script/benchmark
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark
- name: "[Check] Presto source to sink data accuracy verification task"
run: |
cd ./script/benchmark
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true
# - name: "[Check] Table without primary key data accuracy verification task"
# run: |
# cd ./script/benchmark
# docker run --cpus 2 -m 5000m --net lakesoul-docker-compose-env_default --rm -t -v ${PWD}/work-dir:/opt/spark/work-dir --env lakesoul_home=/opt/spark/work-dir/lakesoul.properties bitnami/spark:3.3.1 spark-submit --driver-memory 4G --executor-memory 4G --conf spark.driver.memoryOverhead=1500m --conf spark.executor.memoryOverhead=1500m --conf spark.dmetasoul.lakesoul.native.io.enable=true --jars /opt/spark/work-dir/$SPARK_JAR_NAME,/opt/spark/work-dir/mysql-connector-java-8.0.30.jar --class org.apache.spark.sql.lakesoul.benchmark.FlinkWriteDataCheck --master local[4] /opt/spark/work-dir/$SPARK_TEST_JAR_NAME --csv.path s3://lakesoul-test-bucket/flink/csv --lakesoul.table.path s3://lakesoul-test-bucket/flink/sink_table --server.time.zone UTC
docker run --cpus 2 -m 5000m --net container:presto --rm -t -v ${PWD}/work-dir:/root openjdk:11 java -cp /root/$PRESTO_TEST_JAR_NAME:/root/$PRESTO_JAR_NAME:/root/mysql-connector-java-8.0.30.jar:/root/presto-jdbc-0.282.jar com.facebook.presto.benchmark.Benchmark --cdc.contract false --single.table.contract true
- name: Print Flink Log
if: always()
run: |
Expand Down
2 changes: 1 addition & 1 deletion lakesoul-common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ SPDX-License-Identifier: Apache-2.0
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client-api</artifactId>
<version>3.4.0</version>
<version>3.3.2</version>
<scope>${local.scope}</scope>
</dependency>
</dependencies>
Expand Down
8 changes: 7 additions & 1 deletion lakesoul-flink/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,13 @@ SPDX-License-Identifier: Apache-2.0
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client-api</artifactId>
<version>3.4.0</version>
<version>3.3.2</version>
<scope>${local.scope}</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client-runtime</artifactId>
<version>3.3.2</version>
<scope>${local.scope}</scope>
</dependency>
</dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ public final void cleanupRunningJobs() throws Exception {

/*
* @path: a subdir name under temp dir, e.g. /lakesoul_table
* @return: file://PLATFORM_TMP_DIR/path
* @return: file:///PLATFORM_TMP_DIR/path
*/
public static String getTempDirUri(String path) {
String tmp = System.getProperty("java.io.tmpdir");
Expand Down
22 changes: 4 additions & 18 deletions lakesoul-presto/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<presto.version>0.282</presto.version>
<local.scope>provided</local.scope>
<mysql.version>8.1.0</mysql.version>
</properties>

<dependencies>
Expand Down Expand Up @@ -63,13 +62,6 @@
<version>${presto.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-jdbc</artifactId>
<version>${presto.version}</version>
<scope>${local.scope}</scope>
</dependency>

<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-column</artifactId>
Expand All @@ -83,23 +75,17 @@
</dependency>

<dependency>
<groupId>com.mysql</groupId>
<artifactId>mysql-connector-j</artifactId>
<scope>compile</scope>
<version>${mysql.version}</version>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client-api</artifactId>
<version>3.3.2</version>
<scope>${local.scope}</scope>
</dependency>

<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>32.0.0-jre</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client-api</artifactId>
<version>3.4.0</version>
<scope>${local.scope}</scope>
</dependency>
</dependencies>

<build>
Expand Down
1 change: 0 additions & 1 deletion native-io/lakesoul-io-java/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ SPDX-License-Identifier: Apache-2.0
<protobuf.plugin.version>0.6.1</protobuf.plugin.version>
</properties>


<dependencies>
<dependency>
<groupId>com.dmetasoul</groupId>
Expand Down
5 changes: 5 additions & 0 deletions rust/lakesoul-io/src/lakesoul_io_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use datafusion::prelude::{SessionConfig, SessionContext};
use datafusion_common::DataFusionError::{External, ObjectStore};
use datafusion_substrait::substrait::proto::Plan;
use derivative::Derivative;
use log::info;
use object_store::aws::AmazonS3Builder;
use object_store::{ClientOptions, RetryConfig};
use url::{ParseError, Url};
Expand Down Expand Up @@ -495,11 +496,13 @@ pub fn create_session_context_with_planner(
.cloned();
if let Some(fs) = default_fs {
config.default_fs = fs.clone();
info!("NativeIO register default fs {}", fs);
register_object_store(&fs, config, &runtime)?;
};

if !config.prefix.is_empty() {
let prefix = config.prefix.clone();
info!("NativeIO register prefix fs {}", prefix);
let normalized_prefix = register_object_store(&prefix, config, &runtime)?;
config.prefix = normalized_prefix;
}
Expand All @@ -512,6 +515,8 @@ pub fn create_session_context_with_planner(
.map(|file_name| register_object_store(&file_name, config, &runtime))
.collect::<Result<Vec<String>>>()?;
config.files = normalized_filenames;
info!("NativeIO normalized file names: {:?}", config.files);
info!("NativeIO final config: {:?}", config);

// create session context
let mut state = if let Some(planner) = planner {
Expand Down

0 comments on commit 04e06e3

Please sign in to comment.