From 2906c6102a5d984e46105db1a3b8e821721a6cde Mon Sep 17 00:00:00 2001
From: ChenYunHey <1908166778@qq.com>
Date: Mon, 11 Mar 2024 15:00:56 +0800
Subject: [PATCH 01/13] support mongodb cdc

Signed-off-by: ChenYunHey <1908166778@qq.com>
---
 lakesoul-flink/pom.xml                        |  18 +-
 .../apache/flink/lakesoul/entry/JdbcCDC.java  |  39 ++-
 .../sink/committer/LakeSoulSinkCommitter.java |   3 +
 .../LakeSoulSinkGlobalCommitter.java          |  15 +-
 .../lakesoul/types/BinarySourceRecord.java    |   8 +-
 .../lakesoul/types/LakeSoulRecordConvert.java | 278 +++++++++++++-----
 .../flink/lakesoul/types/ParseDocument.java   |  78 +++++
 .../flink/lakesoul/types/StructConvert.java   | 106 +++++++
 8 files changed, 464 insertions(+), 81 deletions(-)
 create mode 100644 lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/ParseDocument.java
 create mode 100644 lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/StructConvert.java
diff --git a/lakesoul-flink/pom.xml b/lakesoul-flink/pom.xml
index 89a57f4e7..6462cf692 100644
--- a/lakesoul-flink/pom.xml
+++ b/lakesoul-flink/pom.xml
@@ -23,11 +23,17 @@ SPDX-License-Identifier: Apache-2.0
         <scala.binary.version>2.12</scala.binary.version>
         <scala.version>2.12.10</scala.version>
         <log4j.version>2.17.2</log4j.version>
-        <cdc.version>2.4.2</cdc.version>
+        <cdc.version>3.0.0</cdc.version>
         <local.scope>provided</local.scope>
     </properties>
 
     <dependencies>
+        <dependency>
+            <groupId>org.hamcrest</groupId>
+            <artifactId>hamcrest</artifactId>
+            <version>2.1</version>
+            <scope>test</scope>
+        </dependency>
         <dependency>
             <groupId>com.dmetasoul</groupId>
             <artifactId>lakesoul-common</artifactId>
@@ -141,6 +147,16 @@ SPDX-License-Identifier: Apache-2.0
             <artifactId>flink-sql-connector-oracle-cdc</artifactId>
             <version>${cdc.version}</version>
         </dependency>
+        <dependency>
+            <groupId>com.ververica</groupId>
+            <artifactId>flink-sql-connector-mongodb-cdc</artifactId>
+            <version>${cdc.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.mongodb</groupId>
+            <artifactId>bson</artifactId>
+            <version>4.3.4</version> <!-- 使用最新版本 -->
+        </dependency>
         <dependency>
             <groupId>org.apache.doris</groupId>
             <artifactId>flink-doris-connector-1.17</artifactId>
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java
index 00c0471da..aa5a6317f 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java
@@ -11,6 +11,8 @@
 import com.ververica.cdc.connectors.mysql.source.MySqlSourceBuilder;
 import com.ververica.cdc.connectors.oracle.source.OracleSourceBuilder;
 import com.ververica.cdc.connectors.postgres.source.PostgresSourceBuilder;
+import com.ververica.cdc.connectors.mongodb.source.MongoDBSource;
+import com.ververica.cdc.debezium.JsonDebeziumDeserializationSchema;
 import org.apache.flink.api.common.restartstrategy.RestartStrategies;
 import org.apache.flink.api.common.time.Time;
 import com.ververica.cdc.connectors.sqlserver.source.SqlServerSourceBuilder;
@@ -62,7 +64,7 @@ public static void main(String[] args) throws Exception {
         host = parameter.get(SOURCE_DB_HOST.key());
         port = parameter.getInt(SOURCE_DB_PORT.key(), MysqlDBManager.DEFAULT_MYSQL_PORT);
         //Postgres Oracle
-        if (dbType.equalsIgnoreCase("oracle") || dbType.equalsIgnoreCase("postgres")) {
+        if (dbType.equalsIgnoreCase("oracle") || dbType.equalsIgnoreCase("postgres") || dbType.equalsIgnoreCase("mongodb")) {
             schemaList = parameter.get(SOURCE_DB_SCHEMA_LIST.key()).split(",");
             String[] tables = parameter.get(SOURCE_DB_SCHEMA_TABLES.key()).split(",");
             tableList = new String[tables.length];
@@ -92,6 +94,7 @@ public static void main(String[] args) throws Exception {
         conf.set(SOURCE_DB_PORT, port);
         conf.set(WAREHOUSE_PATH, databasePrefixPath);
         conf.set(SERVER_TIME_ZONE, serverTimezone);
+        conf.set(SOURCE_DB_TYPE,dbType);
 
         // parameters for mutil tables dml sink
         conf.set(LakeSoulSinkOptions.USE_CDC, true);
@@ -136,9 +139,12 @@ public static void main(String[] args) throws Exception {
         if (dbType.equalsIgnoreCase("oracle")) {
             oracleCdc(lakeSoulRecordConvert, conf, env);
         }
-        if (dbType.equalsIgnoreCase("sqlserver")){
+        if (dbType.equalsIgnoreCase("sqlserver")) {
             sqlserverCdc(lakeSoulRecordConvert, conf, env);
         }
+        if (dbType.equalsIgnoreCase("mongodb")) {
+            mongoCdc(lakeSoulRecordConvert, conf, env);
+        }
 
     }
 
@@ -276,4 +282,33 @@ public static void sqlserverCdc(LakeSoulRecordConvert lakeSoulRecordConvert, Con
         DataStreamSink<BinarySourceRecord> dmlSink = builder.buildLakeSoulDMLSink(stream);
         env.execute("LakeSoul CDC Sink From sqlserver Database " + dbName);
     }
+
+    private static void mongoCdc(LakeSoulRecordConvert lakeSoulRecordConvert, Configuration conf, StreamExecutionEnvironment env) throws Exception {
+        MongoDBSource<BinarySourceRecord> mongoSource =
+                MongoDBSource.<BinarySourceRecord>builder()
+                        .hosts(host)
+                        .databaseList(schemaList) // 设置捕获的数据库，支持正则表达式
+                        .collectionList(tableList) //设置捕获的集合，支持正则表达式
+                        .startupOptions(StartupOptions.initial())
+                        .scanFullChangelog(true)
+                        .batchSize(splitSize)
+                        .username(userName)
+                        .password(passWord)
+                        .deserializer(new BinaryDebeziumDeserializationSchema(lakeSoulRecordConvert, conf.getString(WAREHOUSE_PATH)))
+                        .build();
+        NameSpaceManager manager = new NameSpaceManager();
+        manager.importOrSyncLakeSoulNamespace(dbName);
+        LakeSoulMultiTableSinkStreamBuilder.Context context = new LakeSoulMultiTableSinkStreamBuilder.Context();
+        context.env = env;
+        context.conf = conf;
+        LakeSoulMultiTableSinkStreamBuilder
+                builder =
+                new LakeSoulMultiTableSinkStreamBuilder(mongoSource, context, lakeSoulRecordConvert);
+        DataStreamSource<BinarySourceRecord> source = builder.buildMultiTableSource("mongodb Source");
+
+        DataStream<BinarySourceRecord> stream = builder.buildHashPartitionedCDCStream(source);
+        DataStreamSink<BinarySourceRecord> dmlSink = builder.buildLakeSoulDMLSink(stream);
+        env.execute("LakeSoul CDC Sink From mongo Database " + dbName);
+
+    }
 }
\ No newline at end of file
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkCommitter.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkCommitter.java
index 0ae7f13a7..ed7eb2bba 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkCommitter.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkCommitter.java
@@ -99,6 +99,9 @@ public List<LakeSoulMultiTableSinkCommittable> commit(List<LakeSoulMultiTableSin
 
                 TableNameId tableNameId =
                         lakeSoulDBManager.shortTableName(identity.tableId.table(), identity.tableId.schema());
+                if (identity.tableId.schema() == null){
+                    tableNameId = lakeSoulDBManager.shortTableName(identity.tableId.table(), identity.tableId.catalog());
+                }
 
                 DataCommitInfo.Builder dataCommitInfo = DataCommitInfo.newBuilder();
                 dataCommitInfo.setTableId(tableNameId.getTableId());
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkGlobalCommitter.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkGlobalCommitter.java
index c096a6bb6..de3c129dc 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkGlobalCommitter.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkGlobalCommitter.java
@@ -33,6 +33,8 @@
 import static com.dmetasoul.lakesoul.meta.DBConfig.LAKESOUL_HASH_PARTITION_SPLITTER;
 import static org.apache.flink.lakesoul.metadata.LakeSoulCatalog.TABLE_ID_PREFIX;
 import static org.apache.flink.lakesoul.tool.LakeSoulSinkOptions.*;
+import static org.apache.flink.lakesoul.tool.LakeSoulDDLSinkOptions.SOURCE_DB_TYPE;
+
 
 /**
  * Global Committer implementation for {@link LakeSoulMultiTablesSink}.
@@ -111,6 +113,8 @@ public List<LakeSoulMultiTableSinkGlobalCommittable> commit(
         LOG.info("Committing: {}", globalCommittable);
 
         int index = 0;
+        String dbType = this.conf.getString(SOURCE_DB_TYPE,"");
+
         for (Map.Entry<Tuple2<TableSchemaIdentity, String>, List<LakeSoulMultiTableSinkCommittable>> entry :
                 globalCommittable.getGroupedCommitables()
                         .entrySet()) {
@@ -118,6 +122,9 @@ public List<LakeSoulMultiTableSinkGlobalCommittable> commit(
             List<LakeSoulMultiTableSinkCommittable> lakeSoulMultiTableSinkCommittable = entry.getValue();
             String tableName = identity.tableId.table();
             String tableNamespace = identity.tableId.schema();
+            if (tableNamespace==null){
+                tableNamespace = identity.tableId.catalog();
+            }
             boolean isCdc = identity.useCDC;
             Schema msgSchema = FlinkUtil.toArrowSchema(identity.rowType, isCdc ? Optional.of(
                     identity.cdcColumn) :
@@ -154,7 +161,7 @@ public List<LakeSoulMultiTableSinkGlobalCommittable> commit(
                         !new HashSet<>(partitionKeys.rangeKeys).containsAll(identity.partitionKeyList)) {
                     throw new IOException("Change of partition key column of table " + tableName + " is forbidden");
                 }
-                StructType origSchema = null;
+                StructType origSchema ;
                 if (TableInfoDao.isArrowKindSchema(tableInfo.getTableSchema())) {
                     Schema arrowSchema = Schema.fromJSON(tableInfo.getTableSchema());
                     origSchema = ArrowUtils.fromArrowSchema(arrowSchema);
@@ -193,7 +200,11 @@ public List<LakeSoulMultiTableSinkGlobalCommittable> commit(
                                 msgSchema,
                                 identity.useCDC,
                                 identity.cdcColumn);
-                        dbManager.updateTableSchema(tableInfo.getTableId(), msgSchema.toJson());
+                        if (dbType.equals("mongodb")){
+                            dbManager.updateTableSchema(tableInfo.getTableId(), mergeStructType.json());
+                        }else {
+                            dbManager.updateTableSchema(tableInfo.getTableId(), msgSchema.toJson());
+                        }
                         if (JSONObject.parseObject(tableInfo.getProperties()).containsKey(DBConfig.TableInfoProperty.DROPPED_COLUMN)) {
                             dbManager.removeLogicallyDropColumn(tableInfo.getTableId());
                         }
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/BinarySourceRecord.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/BinarySourceRecord.java
index 94f0f9a30..b327f6c3c 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/BinarySourceRecord.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/BinarySourceRecord.java
@@ -100,8 +100,12 @@ public static BinarySourceRecord fromMysqlSourceRecord(SourceRecord sourceRecord
             }
             long sortField = (binlogFileIndex << 32) + binlogPosition;
             LakeSoulRowDataWrapper data = convert.toLakeSoulDataType(valueSchema, value, tableId, tsMs, sortField);
-            String tablePath = new Path(new Path(basePath, tableId.schema()), tableId.table()).toString();
-
+            String tablePath;
+            if (tableId.schema()==null){
+                tablePath = new Path(new Path(basePath, tableId.catalog()), tableId.table()).toString();
+            }else {
+                tablePath = new Path(new Path(basePath, tableId.schema()), tableId.table()).toString();
+            }
             return new BinarySourceRecord(sourceRecord.topic(), primaryKeys, tableId, FlinkUtil.makeQualifiedPath(tablePath).toString(),
                     Collections.emptyList(), false, data, null);
         }
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java
index 31877780a..829cc6eb6 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java
@@ -4,10 +4,13 @@
 
 package org.apache.flink.lakesoul.types;
 
+import com.amazonaws.services.dynamodbv2.xspec.S;
+import com.ververica.cdc.connectors.mongodb.internal.MongoDBEnvelope;
 import com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.data.Decimal;
 import com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.data.Field;
 import com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.data.Schema;
 import com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.data.Struct;
+import com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.errors.DataException;
 import com.ververica.cdc.debezium.utils.TemporalConversions;
 import io.debezium.data.Enum;
 import io.debezium.data.EnumSet;
@@ -33,6 +36,8 @@
 import org.apache.flink.table.data.*;
 import org.apache.flink.table.data.binary.BinaryRowData;
 import org.apache.flink.table.data.writer.BinaryRowWriter;
+import org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer;
+import org.apache.flink.table.runtime.typeutils.RowDataSerializer;
 import org.apache.flink.table.types.logical.*;
 import org.apache.flink.types.RowKind;
 
@@ -49,6 +54,7 @@
 import static org.apache.flink.lakesoul.tool.LakeSoulSinkOptions.CDC_CHANGE_COLUMN_DEFAULT;
 import static org.apache.flink.lakesoul.tool.LakeSoulSinkOptions.SORT_FIELD;
 import static org.apache.flink.lakesoul.tool.LakeSoulSinkOptions.USE_CDC;
+import static org.apache.flink.lakesoul.types.ParseDocument.convertBSONToStruct;
 
 public class LakeSoulRecordConvert implements Serializable {
 
@@ -60,6 +66,7 @@ public class LakeSoulRecordConvert implements Serializable {
 
     List<String> partitionFields;
 
+
     public LakeSoulRecordConvert(Configuration conf, String serverTimeZone) {
         this(conf, serverTimeZone, Collections.emptyList());
     }
@@ -111,46 +118,98 @@ private boolean partitionFieldsChanged(RowType beforeType, RowData beforeData, R
     }
 
     public LakeSoulRowDataWrapper toLakeSoulDataType(Schema sch, Struct value, TableId tableId, long tsMs, long sortField) throws Exception {
-        Envelope.Operation op = getOperation(sch, value);
-        Schema valueSchema = value.schema();
         LakeSoulRowDataWrapper.Builder builder = LakeSoulRowDataWrapper.newBuilder().setTableId(tableId)
                 .setUseCDC(useCDC).setCDCColumn(cdcColumn);
-        if (op == Envelope.Operation.CREATE || op == Envelope.Operation.READ) {
-            Schema afterSchema = valueSchema.field(Envelope.FieldName.AFTER).schema();
-            Struct after = value.getStruct(Envelope.FieldName.AFTER);
-            RowData insert = convert(after, afterSchema, RowKind.INSERT, sortField);
-            RowType rt = toFlinkRowType(afterSchema);
-            insert.setRowKind(RowKind.INSERT);
-            builder.setOperation("insert").setAfterRowData(insert).setAfterType(rt);
-        } else if (op == Envelope.Operation.DELETE) {
-            Schema beforeSchema = valueSchema.field(Envelope.FieldName.BEFORE).schema();
-            Struct before = value.getStruct(Envelope.FieldName.BEFORE);
-            RowData delete = convert(before, beforeSchema, RowKind.DELETE, sortField);
-            RowType rt = toFlinkRowType(beforeSchema);
-            builder.setOperation("delete").setBeforeRowData(delete).setBeforeRowType(rt);
-            delete.setRowKind(RowKind.DELETE);
+        boolean isMongoDDL = true;
+        try {
+            value.getWithoutDefault(MongoDBEnvelope.FULL_DOCUMENT_FIELD);
+        } catch (DataException e) {
+            isMongoDDL = false;
+        }
+        if (isMongoDDL) {
+            String op = value.getString(MongoDBEnvelope.OPERATION_TYPE_FIELD);
+            if (op.equals("insert")) {
+                String fullDocument = value.getString(MongoDBEnvelope.FULL_DOCUMENT_FIELD);
+                Struct bsonStruct = convertBSONToStruct(fullDocument);
+                Schema documentSchema = bsonStruct.schema();
+                RowData insert = convert(bsonStruct, documentSchema, RowKind.INSERT, sortField);
+                RowType mongoRt = toFlinkRowType(documentSchema,true);
+                insert.setRowKind(RowKind.INSERT);
+                builder.setOperation("insert").setAfterRowData(insert).setAfterType(mongoRt);
+            } else if (op.equals("delete")) {
+                String fullDocumentValue = value.getString("fullDocumentBeforeChange");
+                Struct before = convertBSONToStruct(fullDocumentValue);
+                Schema beforSchema = before.schema();
+                RowData delete = convert(before,beforSchema,RowKind.DELETE,sortField);
+                RowType rt = toFlinkRowType(beforSchema, true);
+                builder.setOperation("delete").setBeforeRowData(delete).setBeforeRowType(rt);
+                delete.setRowKind(RowKind.DELETE);
+            } else {
+                String fullDocumentBeforChange = value.getString("fullDocumentBeforeChange");
+                Struct before = convertBSONToStruct(fullDocumentBeforChange);
+                Schema beforeSchema = before.schema();
+                RowData beforeData = convert(before, beforeSchema, RowKind.UPDATE_BEFORE, sortField);
+                beforeData.setRowKind(RowKind.UPDATE_BEFORE);
+                RowType beforeRT = toFlinkRowType(beforeSchema, true);
+                String fullDocument = value.getString(MongoDBEnvelope.FULL_DOCUMENT_FIELD);
+                Struct after = convertBSONToStruct(fullDocument);
+                Schema afterSchema = after.schema();
+                RowData afterData = convert(after, afterSchema, RowKind.UPDATE_AFTER, sortField);
+                afterData.setRowKind(RowKind.UPDATE_AFTER);
+                RowType afterRT = toFlinkRowType(afterSchema, true);
+                if (partitionFieldsChanged(beforeRT, beforeData, afterRT, afterData)) {
+                    // partition fields changed. we need to emit both before and after RowData
+                    builder.setOperation("update").setBeforeRowData(beforeData).setBeforeRowType(beforeRT)
+                            .setAfterRowData(afterData).setAfterType(afterRT);
+                } else {
+                    // otherwise we only need to keep the after RowData
+                    builder.setOperation("update")
+                            .setAfterRowData(afterData).setAfterType(afterRT);
+                }
+            }
         } else {
-            Schema beforeSchema = valueSchema.field(Envelope.FieldName.BEFORE).schema();
-            Struct before = value.getStruct(Envelope.FieldName.BEFORE);
-            RowData beforeData = convert(before, beforeSchema, RowKind.UPDATE_BEFORE, sortField);
-            RowType beforeRT = toFlinkRowType(beforeSchema);
-            beforeData.setRowKind(RowKind.UPDATE_BEFORE);
-            Schema afterSchema = valueSchema.field(Envelope.FieldName.AFTER).schema();
-            Struct after = value.getStruct(Envelope.FieldName.AFTER);
-            RowData afterData = convert(after, afterSchema, RowKind.UPDATE_AFTER, sortField);
-            RowType afterRT = toFlinkRowType(afterSchema);
-            afterData.setRowKind(RowKind.UPDATE_AFTER);
-            if (partitionFieldsChanged(beforeRT, beforeData, afterRT, afterData)) {
-                // partition fields changed. we need to emit both before and after RowData
-                builder.setOperation("update").setBeforeRowData(beforeData).setBeforeRowType(beforeRT)
-                        .setAfterRowData(afterData).setAfterType(afterRT);
+            Envelope.Operation op = getOperation(sch, value);
+            Schema valueSchema = value.schema();
+            if (op == Envelope.Operation.CREATE || op == Envelope.Operation.READ) {
+                Schema afterSchema = valueSchema.field(Envelope.FieldName.AFTER).schema();
+                Struct after = value.getStruct(Envelope.FieldName.AFTER);
+                RowData insert = convert(after, afterSchema, RowKind.INSERT, sortField);
+                boolean afterNullable = afterSchema.isOptional();
+                RowType rt = toFlinkRowType(afterSchema,afterNullable);
+                insert.setRowKind(RowKind.INSERT);
+                builder.setOperation("insert").setAfterRowData(insert).setAfterType(rt);
+            } else if (op == Envelope.Operation.DELETE) {
+                Schema beforeSchema = valueSchema.field(Envelope.FieldName.BEFORE).schema();
+                Struct before = value.getStruct(Envelope.FieldName.BEFORE);
+                RowData delete = convert(before, beforeSchema, RowKind.DELETE, sortField);
+                boolean nullable = beforeSchema.isOptional();
+                RowType rt = toFlinkRowType(beforeSchema,nullable);
+                builder.setOperation("delete").setBeforeRowData(null).setBeforeRowType(rt);
+                delete.setRowKind(RowKind.DELETE);
             } else {
-                // otherwise we only need to keep the after RowData
-                builder.setOperation("update")
-                        .setAfterRowData(afterData).setAfterType(afterRT);
+                Schema beforeSchema = valueSchema.field(Envelope.FieldName.BEFORE).schema();
+                Struct before = value.getStruct(Envelope.FieldName.BEFORE);
+                RowData beforeData = convert(before, beforeSchema, RowKind.UPDATE_BEFORE, sortField);
+                boolean beforNullable = beforeSchema.isOptional();
+                RowType beforeRT = toFlinkRowType(beforeSchema,beforNullable);
+                beforeData.setRowKind(RowKind.UPDATE_BEFORE);
+                Schema afterSchema = valueSchema.field(Envelope.FieldName.AFTER).schema();
+                Struct after = value.getStruct(Envelope.FieldName.AFTER);
+                RowData afterData = convert(after, afterSchema, RowKind.UPDATE_AFTER, sortField);
+                boolean afterNullable = afterSchema.isOptional();
+                RowType afterRT = toFlinkRowType(afterSchema,afterNullable);
+                afterData.setRowKind(RowKind.UPDATE_AFTER);
+                if (partitionFieldsChanged(beforeRT, beforeData, afterRT, afterData)) {
+                    // partition fields changed. we need to emit both before and after RowData
+                    builder.setOperation("update").setBeforeRowData(beforeData).setBeforeRowType(beforeRT)
+                            .setAfterRowData(afterData).setAfterType(afterRT);
+                } else {
+                    // otherwise we only need to keep the after RowData
+                    builder.setOperation("update")
+                            .setAfterRowData(afterData).setAfterType(afterRT);
+                }
             }
         }
-
         return builder.setTsMs(tsMs).build();
     }
 
@@ -169,7 +228,7 @@ public RowType toFlinkRowTypeCDC(RowType rowType) {
         return RowType.of(colTypes, colNames);
     }
 
-    public RowType toFlinkRowType(Schema schema) {
+    public RowType toFlinkRowType(Schema schema, boolean nullable) {
         int arity = schema.fields().size() + 1;
         if (useCDC) ++arity;
         String[] colNames = new String[arity];
@@ -178,7 +237,7 @@ public RowType toFlinkRowType(Schema schema) {
         for (int i = 0; i < (useCDC ? arity - 2 : arity - 1); i++) {
             Field item = fieldNames.get(i);
             colNames[i] = item.name();
-            colTypes[i] = convertToLogical(item.schema());
+            colTypes[i] = convertToLogical(item.schema(),nullable);
         }
 //        colNames[useCDC ? arity - 3 : arity - 2] = BINLOG_FILE_INDEX;
 //        colTypes[useCDC ? arity - 3 : arity - 2] = new BigIntType();
@@ -191,16 +250,30 @@ public RowType toFlinkRowType(Schema schema) {
         return RowType.of(colTypes, colNames);
     }
 
-    public LogicalType convertToLogical(Schema fieldSchema) {
+    public LogicalType convertToLogical(Schema fieldSchema, boolean nullable) {
         if (isPrimitiveType(fieldSchema)) {
-            return primitiveLogicalType(fieldSchema);
+            return primitiveLogicalType(fieldSchema, nullable);
         } else {
-            return otherLogicalType(fieldSchema);
+            return otherLogicalType(fieldSchema, nullable);
         }
     }
 
-    private LogicalType primitiveLogicalType(Schema fieldSchema) {
-        boolean nullable = fieldSchema.isOptional();
+    public List<RowType.RowField> getRowFields(Schema schema) {
+        List<RowType.RowField> rowFields = new ArrayList<>();
+
+        for (Field field : schema.fields()) {
+            String fieldName = field.name();
+            Schema fieldype = field.schema();
+            LogicalType logicalType = convertToLogical(fieldype ,true);
+            RowType.RowField rowField = new RowType.RowField(fieldName, logicalType);
+            rowFields.add(rowField);
+        }
+        return rowFields;
+    }
+
+    private LogicalType primitiveLogicalType(Schema fieldSchema,boolean nullable) {
+//        boolean nullable = fieldSchema.isOptional();
+//        if (isMongoDDl) nullable = true;
         switch (fieldSchema.type()) {
             case BOOLEAN:
                 return new BooleanType(nullable);
@@ -216,6 +289,9 @@ private LogicalType primitiveLogicalType(Schema fieldSchema) {
                 return new DoubleType(nullable);
             case STRING:
                 return new VarCharType(nullable, Integer.MAX_VALUE);
+            case STRUCT:
+                List<RowType.RowField> rowFields = getRowFields(fieldSchema);
+                return new RowType(nullable, rowFields);
             case BYTES:
                 Map<String, String> paras = fieldSchema.parameters();
                 int byteLen = Integer.MAX_VALUE;
@@ -229,8 +305,8 @@ private LogicalType primitiveLogicalType(Schema fieldSchema) {
         }
     }
 
-    private LogicalType otherLogicalType(Schema fieldSchema) {
-        boolean nullable = fieldSchema.isOptional();
+    private LogicalType otherLogicalType(Schema fieldSchema, boolean nullable) {
+        //boolean nullable = fieldSchema.isOptional();
         switch (fieldSchema.name()) {
             case Enum.LOGICAL_NAME:
             case Json.LOGICAL_NAME:
@@ -361,11 +437,47 @@ public RowData convert(Struct struct, Schema schema, RowKind rowKind, long sortF
         return row;
     }
 
+    public RowData convertDocumentStruct(Struct struct) {
+        Schema schema = struct.schema();
+        int arity = schema.fields().size();
+        List<Field> fieldNames = schema.fields();
+        BinaryRowData row = new BinaryRowData(arity);
+        BinaryRowWriter writer = new BinaryRowWriter(row);
+
+        for (int i = 0; i < schema.fields().size(); i++) {
+            Field field = fieldNames.get(i);
+            String fieldName = field.name();
+            Schema fieldSchema = schema.field(fieldName).schema();
+            Object fieldValue = struct.getWithoutDefault(fieldName);
+            sqlSchemaAndFieldWrite(writer, i,fieldValue, fieldSchema, serverTimeZone);
+        }
+        writer.complete();
+        return row;
+    }
+
+
+    private void convertNestedStruct(BinaryRowWriter writer, int index, Struct nestedStruct, Schema nestedSchema) {
+        int nestedArity = nestedSchema.fields().size();
+        List<Field> nestedFields = nestedSchema.fields();
+        BinaryRowData nestedRow = new BinaryRowData(nestedArity);
+        BinaryRowWriter nestedWriter = new BinaryRowWriter(nestedRow);
+        for (int i = 0; i < nestedArity; i++) {
+            Field field = nestedFields.get(i);
+            String nestedFieldName = field.name();
+            Schema nestedFieldType = nestedSchema.field(nestedFieldName).schema();
+            Object nestedFieldValue = nestedStruct.getWithoutDefault(nestedFieldName);
+            sqlSchemaAndFieldWrite(nestedWriter, i, nestedFieldValue,nestedFieldType, serverTimeZone);
+        }
+        nestedWriter.complete();
+        RowDataSerializer rowDataSerializer = new RowDataSerializer();
+        writer.writeRow(index, nestedRow, rowDataSerializer);
+    }
+
     private boolean isPrimitiveType(Schema fieldSchema) {
         return fieldSchema.name() == null;
     }
 
-    private void sqlSchemaAndFieldWrite(BinaryRowWriter writer, int index, Object fieldValue, Schema fieldSchema, ZoneId serverTimeZone) {
+    public void sqlSchemaAndFieldWrite(BinaryRowWriter writer, int index, Object fieldValue, Schema fieldSchema, ZoneId serverTimeZone) {
         if (isPrimitiveType(fieldSchema)) {
             primitiveTypeWrite(writer, index, fieldValue, fieldSchema);
         } else {
@@ -374,32 +486,41 @@ private void sqlSchemaAndFieldWrite(BinaryRowWriter writer, int index, Object fi
     }
 
     private void primitiveTypeWrite(BinaryRowWriter writer, int index, Object fieldValue, Schema fieldSchema) {
-        switch (fieldSchema.type()) {
-            case BOOLEAN:
-                writeBoolean(writer, index, fieldValue);
-                break;
-            case INT8:
-            case INT16:
-            case INT32:
-                writeInt(writer, index, fieldValue);
-                break;
-            case INT64:
-                writeLong(writer, index, fieldValue);
-                break;
-            case FLOAT32:
-                writeFloat(writer, index, fieldValue);
-                break;
-            case FLOAT64:
-                writeDouble(writer, index, fieldValue);
-                break;
-            case STRING:
-                writeString(writer, index, fieldValue);
-                break;
-            case BYTES:
-                writeBinary(writer, index, fieldValue);
-                break;
-            default:
-                throw new UnsupportedOperationException("LakeSoul doesn't support type: " + fieldSchema.type());
+        if (fieldValue == null){
+            writer.setNullAt(index);
+        }else if (fieldSchema.type().getName().equals("struct")){
+            convertNestedStruct(writer, index, (Struct) fieldValue,fieldSchema);
+        }else {
+            switch (fieldSchema.type()) {
+                case BOOLEAN:
+                    writeBoolean(writer, index, fieldValue);
+                    break;
+                case INT8:
+                case INT16:
+                case INT32:
+                    writeInt(writer, index, fieldValue);
+                    break;
+                case INT64:
+                    writeLong(writer, index, fieldValue);
+                    break;
+                case FLOAT32:
+                    writeFloat(writer, index, fieldValue);
+                    break;
+                case FLOAT64:
+                    writeDouble(writer, index, fieldValue);
+                    break;
+                case STRING:
+                    writeString(writer, index, fieldValue);
+                    break;
+                case BYTES:
+                    writeBinary(writer, index, fieldValue);
+                    break;
+//                case STRUCT:
+//                    writeRow(writer, index, fieldValue);
+//                    break;
+                default:
+                    throw new UnsupportedOperationException("LakeSoul doesn't support type: " + fieldSchema.type());
+            }
         }
     }
 
@@ -491,10 +612,9 @@ public Object convertToDecimal(Object dbzObj, Schema schema) {
             }
         }
         Map<String, String> paras = schema.parameters();
-        if (paras==null){
+        if (paras == null) {
             return DecimalData.fromBigDecimal(bigDecimal, 38, 30);
-        }
-        else {
+        } else {
             return DecimalData.fromBigDecimal(bigDecimal, Integer.parseInt(paras.get("connect.decimal.precision")), Integer.parseInt(paras.get("scale")));
         }
     }
@@ -653,6 +773,16 @@ public void writeString(BinaryRowWriter writer, int index, Object dbzObj) {
         writer.writeString(index, StringData.fromString(dbzObj.toString()));
     }
 
+    public void writeRow(BinaryRowWriter writer, int index, Object dbzObj) {
+        RowData rowData = null;
+        if (dbzObj instanceof Struct) {
+            Struct struct = (Struct) dbzObj;
+            rowData = convertDocumentStruct(struct);
+        }
+        RowDataSerializer rowDataSerializer = new RowDataSerializer();
+        writer.writeRow(index, rowData, rowDataSerializer);
+    }
+
     public void writeBinary(BinaryRowWriter writer, int index, Object dbzObj) {
         if (dbzObj instanceof byte[]) {
             writer.writeBinary(index, (byte[]) dbzObj);
@@ -666,4 +796,4 @@ public void writeBinary(BinaryRowWriter writer, int index, Object dbzObj) {
                     "Unsupported BYTES value type: " + dbzObj.getClass().getSimpleName());
         }
     }
-}
+}
\ No newline at end of file
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/ParseDocument.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/ParseDocument.java
new file mode 100644
index 000000000..79e8d86b5
--- /dev/null
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/ParseDocument.java
@@ -0,0 +1,78 @@
+// SPDX-FileCopyrightText: 2023 LakeSoul Contributors
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package org.apache.flink.lakesoul.types;
+
+import com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.data.Field;
+import com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.data.Schema;
+import com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.data.SchemaBuilder;
+import com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.data.Struct;
+import org.bson.Document;
+
+import java.util.Map;
+
+public class ParseDocument {
+    public static Struct convertBSONToStruct(String value) {
+        Document bsonDocument = Document.parse(value);
+        // 创建根 Struct 的 Schema
+        SchemaBuilder structSchemaBuilder = SchemaBuilder.struct();
+        Struct struct = new Struct(buildSchema(bsonDocument, structSchemaBuilder));
+        // 填充 Struct 的值
+        fillStructValues(bsonDocument, struct);
+
+        return struct;
+    }
+
+    private static Schema buildSchema(Document bsonDocument, SchemaBuilder structSchemaBuilder) {
+        for (Map.Entry<String, Object> entry : bsonDocument.entrySet()) {
+            String fieldName = entry.getKey();
+            Object value = entry.getValue();
+            if (value instanceof Document) {
+                // 处理嵌套的 Document
+                SchemaBuilder nestedStructSchemaBuilder = SchemaBuilder.struct();
+                structSchemaBuilder.field(fieldName, buildSchema((Document) value, nestedStructSchemaBuilder));
+            } else {
+                // 处理普通字段
+                structSchemaBuilder.field(fieldName, getSchemaForValue(value));
+            }
+        }
+        return structSchemaBuilder.build();
+    }
+
+    private static void fillStructValues(Document bsonDocument, Struct struct) {
+        for (Map.Entry<String, Object> entry : bsonDocument.entrySet()) {
+            String fieldName = entry.getKey();
+            Object value = entry.getValue();
+            if (value instanceof Document) {
+                // 处理嵌套的 Document
+                Struct nestedStruct = new Struct(struct.schema().field(fieldName).schema());
+                fillStructValues((Document) value, nestedStruct);
+                struct.put(fieldName, nestedStruct);
+            } else {
+                // 处理普通字段
+                struct.put(fieldName, value);
+            }
+        }
+    }
+
+    private static Schema getSchemaForValue(Object value) {
+        // 根据值的类型返回对应的 Schema
+        if (value instanceof String) {
+            return Schema.STRING_SCHEMA;
+        } else if (value instanceof Integer) {
+            return Schema.INT32_SCHEMA;
+        } else if (value instanceof Long) {
+            return Schema.INT64_SCHEMA;
+        } else if (value instanceof Double) {
+            return Schema.FLOAT64_SCHEMA;
+        } else if (value instanceof Boolean) {
+            return Schema.BOOLEAN_SCHEMA;
+        } else if (value instanceof Byte) {
+            return Schema.BYTES_SCHEMA;
+        }  else {
+            // 处理其他类型，可以根据实际情况添加更多类型
+            return Schema.STRING_SCHEMA;
+        }
+    }
+}
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/StructConvert.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/StructConvert.java
new file mode 100644
index 000000000..1917d8367
--- /dev/null
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/StructConvert.java
@@ -0,0 +1,106 @@
+// SPDX-FileCopyrightText: 2023 LakeSoul Contributors
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package org.apache.flink.lakesoul.types;
+
+import com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.data.Field;
+import com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.data.Schema;
+import com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.data.Struct;
+import org.apache.flink.table.data.*;
+import org.apache.flink.table.data.binary.BinaryRowData;
+import org.apache.flink.table.data.writer.BinaryRowWriter;
+import org.apache.flink.table.runtime.typeutils.RowDataSerializer;
+
+import java.util.List;
+public class StructConvert {
+    public RowData convert(Struct struct) {
+        // 获取Struct的schema和字段个数
+        Schema schema = struct.schema();
+        int arity = schema.fields().size() + 1;  // +1 for extra event sortField
+        List<Field> fieldNames = schema.fields();
+        // 创建BinaryRowData
+        BinaryRowData row = new BinaryRowData(arity);
+        BinaryRowWriter writer = new BinaryRowWriter(row);
+        // 遍历Struct的字段
+        for (int i = 0; i < schema.fields().size(); i++) {
+            Field field = fieldNames.get(i);
+            String fieldName = field.name();
+            Schema fieldSchema = schema.field(fieldName).schema();
+            // 获取Struct字段的值
+            Object fieldValue = struct.getWithoutDefault(fieldName);
+            // 将字段值写入BinaryRowData
+            writeField(writer, i, fieldSchema, fieldValue);
+        }
+        // 关闭BinaryRowWriter
+        writer.complete();
+        return row;
+    }
+
+    public RowData convert1(Struct struct){
+        Schema schema = struct.schema();
+        int arity = schema.fields().size()+1;
+        List<Field> fieldNames = schema.fields();
+        BinaryRowData row = new BinaryRowData(arity);
+        BinaryRowWriter writer = new BinaryRowWriter(row);
+        Field field = fieldNames.get(0);
+        String fieldName = field.name();;
+        Schema fieldSchema = schema.field(fieldName).schema();
+        Object fieldValue = struct.getWithoutDefault(fieldName);
+        RowDataSerializer rowDataSerializer = new RowDataSerializer();
+        writer.writeRow(0,(RowData) fieldValue ,rowDataSerializer);
+        writer.complete();
+        return row;
+    }
+    // 递归处理嵌套的Struct类型
+    private void writeField(BinaryRowWriter writer, int index, Schema schema, Object fieldValue) {
+
+        if (fieldValue == null) {
+            // 如果字段值为null，则写入null
+            writer.setNullAt(index);
+        } else if (schema.type().getName().equals("struct")) {
+            // 如果字段类型是StructType，递归处理
+            convertNestedStruct(writer, index, (Struct) fieldValue, schema);
+        } else {
+            // 根据字段类型写入值
+            // 这里根据实际情况，可能需要根据字段类型进行不同的处理
+            switch (schema.type()) {
+                case INT8:
+                case INT16:
+                case INT32:
+                    writer.writeInt(index, (Integer) fieldValue);
+                    break;
+                case STRING:
+                    writer.writeString(index, StringData.fromString(fieldValue.toString()));
+                    break;
+                default:
+                    writer.writeString(index, StringData.fromString(fieldValue.toString()));
+            }
+
+        }
+    }
+    // 递归处理嵌套的Struct类型
+    private void convertNestedStruct(BinaryRowWriter writer, int index, Struct nestedStruct, Schema nestedSchema) {
+        // 获取嵌套Struct的schema和字段个数
+        int nestedArity = nestedSchema.fields().size();
+        List<Field> nestedFields = nestedSchema.fields();
+        // 创建嵌套Struct的BinaryRowData
+        BinaryRowData nestedRow = new BinaryRowData(nestedArity);
+        BinaryRowWriter nestedWriter = new BinaryRowWriter(nestedRow);
+        // 遍历嵌套Struct的字段
+        for (int i = 0; i < nestedArity; i++) {
+            Field field = nestedFields.get(i);
+            String nestedFieldName = field.name();
+            Schema nestedFieldType = nestedSchema.field(nestedFieldName).schema();
+            // 获取嵌套Struct字段的值
+            Object nestedFieldValue = nestedStruct.getWithoutDefault(nestedFieldName);
+            // 将嵌套Struct字段值写入BinaryRowData
+            writeField(nestedWriter, i, nestedFieldType, nestedFieldValue);
+        }
+        // 关闭嵌套Struct的BinaryRowWriter
+        nestedWriter.complete();
+        RowDataSerializer rowDataSerializer = new RowDataSerializer();
+        writer.writeRow(index, nestedRow, rowDataSerializer);
+        writer.complete();
+    }
+}
\ No newline at end of file

From fb1eb116251da1bac5a3ba668ba96450eb41e029 Mon Sep 17 00:00:00 2001
From: ChenYunHey <1908166778@qq.com>
Date: Mon, 18 Mar 2024 14:54:49 +0800
Subject: [PATCH 02/13] support mongodb sync

Signed-off-by: ChenYunHey <1908166778@qq.com>
---
 lakesoul-flink/pom.xml                        |  15 ++
 .../apache/flink/lakesoul/entry/JdbcCDC.java  |  17 +-
 .../flink/lakesoul/entry/SyncDatabase.java    | 161 ++++++++++++++++--
 .../LakeSoulSinkGlobalCommitter.java          |   3 +
 .../tool/LakeSoulSinkDatabasesOptions.java    |  20 ++-
 .../lakesoul/tool/LakeSoulSinkOptions.java    |  11 ++
 .../lakesoul/types/LakeSoulRecordConvert.java |  53 +++++-
 .../flink/lakesoul/types/ParseDocument.java   |  41 ++++-
 8 files changed, 292 insertions(+), 29 deletions(-)

diff --git a/lakesoul-flink/pom.xml b/lakesoul-flink/pom.xml
index 6462cf692..5218fc67d 100644
--- a/lakesoul-flink/pom.xml
+++ b/lakesoul-flink/pom.xml
@@ -147,11 +147,26 @@ SPDX-License-Identifier: Apache-2.0
             <artifactId>flink-sql-connector-oracle-cdc</artifactId>
             <version>${cdc.version}</version>
         </dependency>
+        <dependency>
+            <groupId>org.mongodb</groupId>
+            <artifactId>mongo-java-driver</artifactId>
+            <version>3.12.14</version>
+        </dependency>
         <dependency>
             <groupId>com.ververica</groupId>
             <artifactId>flink-sql-connector-mongodb-cdc</artifactId>
             <version>${cdc.version}</version>
         </dependency>
+        <dependency>
+            <groupId>org.apache.flink</groupId>
+            <artifactId>flink-connector-mongodb</artifactId>
+            <version>1.0.1-1.17</version>
+        </dependency>
+        <dependency>
+            <groupId>org.mongodb</groupId>
+            <artifactId>mongodb-driver-sync</artifactId>
+            <version>3.10.1</version> <!-- Use the appropriate version -->
+        </dependency>
         <dependency>
             <groupId>org.mongodb</groupId>
             <artifactId>bson</artifactId>
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java
index aa5a6317f..d1c9cb1e5 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java
@@ -54,6 +54,8 @@ public class JdbcCDC {
     private static String[] tableList;
     private static String serverTimezone;
     private static String pluginName;
+    private static int batchSize;
+    private static String mongoDatabase;
 
     public static void main(String[] args) throws Exception {
         ParameterTool parameter = ParameterTool.fromArgs(args);
@@ -64,7 +66,7 @@ public static void main(String[] args) throws Exception {
         host = parameter.get(SOURCE_DB_HOST.key());
         port = parameter.getInt(SOURCE_DB_PORT.key(), MysqlDBManager.DEFAULT_MYSQL_PORT);
         //Postgres Oracle
-        if (dbType.equalsIgnoreCase("oracle") || dbType.equalsIgnoreCase("postgres") || dbType.equalsIgnoreCase("mongodb")) {
+        if (dbType.equalsIgnoreCase("oracle") || dbType.equalsIgnoreCase("postgres") ) {
             schemaList = parameter.get(SOURCE_DB_SCHEMA_LIST.key()).split(",");
             String[] tables = parameter.get(SOURCE_DB_SCHEMA_TABLES.key()).split(",");
             tableList = new String[tables.length];
@@ -73,7 +75,12 @@ public static void main(String[] args) throws Exception {
             }
             splitSize = parameter.getInt(SOURCE_DB_SPLIT_SIZE.key(), SOURCE_DB_SPLIT_SIZE.defaultValue());
         }
-        if (dbType.equalsIgnoreCase("sqlserver")){
+        if (dbType.equalsIgnoreCase("sqlserver") ){
+            tableList = parameter.get(SOURCE_DB_SCHEMA_TABLES.key()).split(",");
+        }
+        if ( dbType.equalsIgnoreCase("mongodb")){
+            mongoDatabase = parameter.get(MONGO_DB_DATABASE.key());
+            batchSize = parameter.getInt(BATCH_SIZE.key(), BATCH_SIZE.defaultValue());
             tableList = parameter.get(SOURCE_DB_SCHEMA_TABLES.key()).split(",");
         }
         pluginName = parameter.get(PLUGIN_NAME.key(), PLUGIN_NAME.defaultValue());
@@ -287,11 +294,11 @@ private static void mongoCdc(LakeSoulRecordConvert lakeSoulRecordConvert, Config
         MongoDBSource<BinarySourceRecord> mongoSource =
                 MongoDBSource.<BinarySourceRecord>builder()
                         .hosts(host)
-                        .databaseList(schemaList) // 设置捕获的数据库，支持正则表达式
+                        .databaseList(mongoDatabase) // 设置捕获的数据库，支持正则表达式
                         .collectionList(tableList) //设置捕获的集合，支持正则表达式
                         .startupOptions(StartupOptions.initial())
                         .scanFullChangelog(true)
-                        .batchSize(splitSize)
+                        .batchSize(batchSize)
                         .username(userName)
                         .password(passWord)
                         .deserializer(new BinaryDebeziumDeserializationSchema(lakeSoulRecordConvert, conf.getString(WAREHOUSE_PATH)))
@@ -305,7 +312,7 @@ private static void mongoCdc(LakeSoulRecordConvert lakeSoulRecordConvert, Config
                 builder =
                 new LakeSoulMultiTableSinkStreamBuilder(mongoSource, context, lakeSoulRecordConvert);
         DataStreamSource<BinarySourceRecord> source = builder.buildMultiTableSource("mongodb Source");
-
+        source.print();
         DataStream<BinarySourceRecord> stream = builder.buildHashPartitionedCDCStream(source);
         DataStreamSink<BinarySourceRecord> dmlSink = builder.buildLakeSoulDMLSink(stream);
         env.execute("LakeSoul CDC Sink From mongo Database " + dbName);
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/SyncDatabase.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/SyncDatabase.java
index aaf13d0c3..725707caf 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/SyncDatabase.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/SyncDatabase.java
@@ -5,18 +5,27 @@
 
 package org.apache.flink.lakesoul.entry;
 
-import com.alibaba.fastjson.JSON;
-import com.alibaba.fastjson.JSONObject;
-import com.amazonaws.services.dynamodbv2.xspec.S;
 import com.dmetasoul.lakesoul.meta.DBManager;
 import com.dmetasoul.lakesoul.meta.DBUtil;
 import com.dmetasoul.lakesoul.meta.entity.TableInfo;
+import com.mongodb.MongoClientURI;
+import com.mongodb.MongoClient;
+import com.mongodb.client.MongoDatabase;
+import com.mongodb.client.model.InsertOneModel;
+import com.mongodb.client.model.WriteModel;
 import org.apache.flink.api.common.RuntimeExecutionMode;
+import org.apache.flink.api.java.tuple.Tuple2;
 import org.apache.flink.api.java.utils.ParameterTool;
+import org.apache.flink.connector.base.DeliveryGuarantee;
+import org.apache.flink.connector.mongodb.sink.MongoSink;
+import org.apache.flink.connector.mongodb.sink.writer.context.MongoSinkContext;
+import org.apache.flink.connector.mongodb.sink.writer.serializer.MongoSerializationSchema;
 import org.apache.flink.configuration.Configuration;
 import org.apache.flink.configuration.RestOptions;
 import org.apache.flink.lakesoul.metadata.LakeSoulCatalog;
 import org.apache.flink.streaming.api.CheckpointingMode;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.table.api.Table;
 import org.apache.flink.streaming.api.environment.CheckpointConfig;
 import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 import org.apache.flink.table.api.TableResult;
@@ -24,25 +33,38 @@
 import org.apache.flink.table.catalog.Catalog;
 import org.apache.flink.table.types.DataType;
 import org.apache.flink.table.types.logical.*;
+import org.apache.flink.types.Row;
+import org.bson.*;
+import org.bson.types.Decimal128;
 
+
+import java.io.Serializable;
 import java.sql.*;
-import java.util.List;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.*;
+import java.util.Date;
 
 import static org.apache.flink.lakesoul.tool.LakeSoulSinkDatabasesOptions.*;
 
 public class SyncDatabase {
 
-    public static void main(String[] args) throws SQLException {
+    static Table coll;
+    public static void main(String[] args) throws Exception {
         ParameterTool parameter = ParameterTool.fromArgs(args);
 
         String sourceDatabase = parameter.get(SOURCE_DB_DB_NAME.key());
         String sourceTableName = parameter.get(SOURCE_DB_LAKESOUL_TABLE.key()).toLowerCase();
-        String targetSyncName = parameter.get(TARGET_DATABASE_TYPE.key());
+        String dbType = parameter.get(TARGET_DATABASE_TYPE.key());
         String targetDatabase = parameter.get(TARGET_DB_DB_NAME.key());
         String targetTableName = parameter.get(TARGET_DB_TABLE_NAME.key()).toLowerCase();
         String url = parameter.get(TARGET_DB_URL.key());
-        String username = parameter.get(TARGET_DB_USER.key());
-        String password = parameter.get(TARGET_DB_PASSWORD.key());
+        String username = null;
+        String password = null;
+        if (!dbType.equals("mongodb")){
+            username = parameter.get(TARGET_DB_USER.key());
+            password = parameter.get(TARGET_DB_PASSWORD.key());
+        }
         int sinkParallelism = parameter.getInt(SINK_PARALLELISM.key(), SINK_PARALLELISM.defaultValue());
         boolean useBatch = parameter.getBoolean(BATHC_STREAM_SINK.key(), BATHC_STREAM_SINK.defaultValue());
         //int replicationNum = parameter.getInt(DORIS_REPLICATION_NUM.key(), DORIS_REPLICATION_NUM.defaultValue());
@@ -54,7 +76,7 @@ public static void main(String[] args) throws SQLException {
         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(conf);
         env.setParallelism(sinkParallelism);
 
-        switch (targetSyncName) {
+        switch (dbType) {
             case "mysql":
                 xsyncToMysql(env, useBatch, url, sourceDatabase, username, password, targetDatabase, sourceTableName, targetTableName);
                 break;
@@ -64,8 +86,14 @@ public static void main(String[] args) throws SQLException {
             case "doris":
                 xsyncToDoris(env, useBatch, url, sourceDatabase, username, password, targetDatabase, sourceTableName, targetTableName, fenodes);
                 break;
+            case "mongodb":
+                String uri = parameter.get(MONGO_DB_URI.key());
+                int batchSize = parameter.getInt(BATCH_SIZE.key(), BATCH_SIZE.defaultValue());
+                int batchIntervalMs = parameter.getInt(BATCH_INTERVAL_MS.key(), BATCH_INTERVAL_MS.defaultValue());
+                xsyncToMongodb(env,useBatch,uri,sourceDatabase,targetDatabase,sourceTableName,targetTableName,batchSize,sinkParallelism,batchIntervalMs);
+                break;
             default:
-                throw new RuntimeException("not supported the database: " + targetSyncName);
+                throw new RuntimeException("not supported the database: " + dbType);
         }
     }
 
@@ -296,4 +324,117 @@ public static void xsyncToDoris(StreamExecutionEnvironment env, boolean batchXyn
         tEnvs.executeSql(sql);
         tEnvs.executeSql("insert into "+targetTableName+" select * from lakeSoul.`"+sourceDatabase+"`."+sourceTableName);
     }
+
+    public static void xsyncToMongodb(StreamExecutionEnvironment env,
+                                              boolean batchXync,
+                                              String uri ,
+                                              String sourceDatabase,
+                                              String targetDatabase,
+                                              String sourceTableName,
+                                              String targetCollection,
+                                              int batchSize,
+                                              int sinkParallelism,
+                                              int batchInservalMs) throws Exception {
+        createMongoColl(targetDatabase,targetCollection,uri);
+        if (batchXync) {
+            env.setRuntimeMode(RuntimeExecutionMode.BATCH);
+        } else {
+            env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
+            env.enableCheckpointing(2000, CheckpointingMode.EXACTLY_ONCE);
+            env.getCheckpointConfig().setExternalizedCheckpointCleanup(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
+        }
+        StreamTableEnvironment tEnvs = StreamTableEnvironment.create(env);
+        Catalog lakesoulCatalog = new LakeSoulCatalog();
+        tEnvs.registerCatalog("lakeSoul",lakesoulCatalog);
+        coll = tEnvs.sqlQuery("select * from lakeSoul.`"+sourceDatabase+"`.`"+sourceTableName+"`");
+        tEnvs.registerTable("mongodbTbl",coll);
+        Table table = tEnvs.sqlQuery("select * from mongodbTbl");
+        DataStream<Tuple2<Boolean, Row>> rowDataStream = tEnvs.toRetractStream(table, Row.class);
+
+        MongoClientURI mongUri = new MongoClientURI(uri);
+        MongoClient mongoClient = new MongoClient(mongUri);
+        MongoDatabase database = mongoClient.getDatabase(targetDatabase);
+        database.createCollection(targetCollection);
+        MongoSink<Tuple2<Boolean, Row>> sink = MongoSink.<Tuple2<Boolean, Row>>builder()
+                .setUri(uri)
+                .setDatabase(targetDatabase)
+                .setCollection(targetCollection)
+                .setBatchSize(batchSize)
+                .setBatchIntervalMs(batchInservalMs)
+                .setMaxRetries(3)
+                .setDeliveryGuarantee(DeliveryGuarantee.AT_LEAST_ONCE)
+                .setSerializationSchema(new MyMongoSerializationSchema())
+                .build();
+        rowDataStream.sinkTo(sink).setParallelism(sinkParallelism);
+        env.execute();
+        mongoClient.close();
+    }
+    public static void createMongoColl(String database,String collName,String uri){
+        MongoClientURI clientURI = new MongoClientURI(uri);
+        MongoClient mongoClient = new MongoClient(clientURI);
+        MongoDatabase mongoDatabase = mongoClient.getDatabase(database);
+        mongoDatabase.createCollection(collName);
+        mongoClient.close();
+    }
+    public static class MyMongoSerializationSchema implements MongoSerializationSchema<Tuple2<Boolean, Row>>, Serializable {
+        @Override
+        public WriteModel<BsonDocument> serialize(Tuple2<Boolean, Row> record, MongoSinkContext context) {
+            Row row = record.f1; // Extract the Row object from the Tuple2
+            BsonDocument document = new BsonDocument();
+            int fieldCount = row.getArity();
+            for (int i = 0; i < fieldCount; i++) {
+                String fieldName = coll.getSchema().getFieldNames()[i];
+                Object fieldValue = row.getField(i);
+                if (fieldValue != null) {
+                    try {
+                        document.append(fieldName, convertTonBsonValue(fieldValue));
+                    } catch (ParseException e) {
+                        throw new RuntimeException(e);
+                    }
+                }
+            }
+            return new InsertOneModel<>(document);
+        }
+
+        public static BsonValue convertTonBsonValue(Object value) throws ParseException {
+            if (value == null) {
+                return new BsonNull();
+            } else if (value instanceof Integer) {
+                return new BsonInt32((Integer) value);
+            } else if (value instanceof Long) {
+                return new BsonInt64((Long) value);
+            } else if (value instanceof String) {
+                return new BsonString((String) value);
+            } else if (value instanceof Boolean) {
+                return new BsonBoolean( (Boolean) value);
+            } else if (value instanceof Double) {
+                return new BsonDouble( (Double) value);
+            } else if (value instanceof DecimalType) {
+                return new BsonDecimal128((Decimal128) value);
+            } else if (value instanceof Date) {
+                return new BsonDateTime( (long) value);
+            } else if (value instanceof Object[]) {
+                Object[] array = (Object[]) value;
+                BsonArray bsonArray = new BsonArray();
+                for (Object element : array) {
+                    bsonArray.add(convertTonBsonValue(element));
+                }
+                return bsonArray;
+            } else if (isDateTimeString(value)) {
+                Date date = parseDateTime(value.toString());
+                return new BsonDateTime(date.getTime());
+            } else {
+                throw new IllegalArgumentException("Unsupported data type: " + value.getClass());
+            }
+        }
+        private static boolean isDateTimeString(Object value) {
+            return value.toString().matches("^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{3}Z$");
+        }
+        private static Date parseDateTime(String value) throws ParseException {
+            SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
+            dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+            return dateFormat.parse(value);
+        }
+
+    }
 }
\ No newline at end of file
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkGlobalCommitter.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkGlobalCommitter.java
index de3c129dc..ecd8c3573 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkGlobalCommitter.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkGlobalCommitter.java
@@ -201,6 +201,9 @@ public List<LakeSoulMultiTableSinkGlobalCommittable> commit(
                                 identity.useCDC,
                                 identity.cdcColumn);
                         if (dbType.equals("mongodb")){
+                            System.out.println(msgSchema.toJson());
+                            System.out.println(origSchema.json());
+                            System.out.println(mergeStructType.json());
                             dbManager.updateTableSchema(tableInfo.getTableId(), mergeStructType.json());
                         }else {
                             dbManager.updateTableSchema(tableInfo.getTableId(), msgSchema.toJson());
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/tool/LakeSoulSinkDatabasesOptions.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/tool/LakeSoulSinkDatabasesOptions.java
index 6b9aa5fd3..278e2a6c5 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/tool/LakeSoulSinkDatabasesOptions.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/tool/LakeSoulSinkDatabasesOptions.java
@@ -35,7 +35,23 @@ public class LakeSoulSinkDatabasesOptions extends LakeSoulSinkOptions {
             .noDefaultValue()
             .withDescription("source database user_name");
 
+    public static final ConfigOption<String> MONGO_DB_URI = ConfigOptions
+            .key("mongodb.uri")
+            .stringType()
+            .noDefaultValue()
+            .withDescription("mongodb uri");
 
+    public static final ConfigOption<Integer> BATCH_SIZE = ConfigOptions
+            .key("batchSize")
+            .intType()
+            .defaultValue(1000)
+            .withDescription("Sets the maximum number of actions to buffer for each batch request");
+
+    public static final ConfigOption<Integer> BATCH_INTERVAL_MS = ConfigOptions
+            .key("batchIntervalMs")
+            .intType()
+            .defaultValue(1000)
+            .withDescription("Sets the batch flush interval, in milliseconds");
     public static final ConfigOption<String> TARGET_DB_PASSWORD = ConfigOptions
             .key("target_db.password")
             .stringType()
@@ -49,21 +65,18 @@ public class LakeSoulSinkDatabasesOptions extends LakeSoulSinkOptions {
             .noDefaultValue()
             .withDescription("mysql,postgres,doris");
 
-
     public static final ConfigOption<String> TARGET_DB_DB_NAME = ConfigOptions
             .key("target_db.db_name")
             .stringType()
             .noDefaultValue()
             .withDescription("target ddatabase name");
 
-
     public static final ConfigOption<String> SOURCE_DB_LAKESOUL_TABLE = ConfigOptions
             .key("source_db.table_name")
             .stringType()
             .noDefaultValue()
             .withDescription("lakesoul table");
 
-
     public static final ConfigOption<String> TARGET_DB_TABLE_NAME = ConfigOptions
             .key("target_db.table_name")
             .stringType()
@@ -84,7 +97,6 @@ public class LakeSoulSinkDatabasesOptions extends LakeSoulSinkOptions {
             .defaultValue(1)
             .withDescription("parallelism settings for out-of-the-lake");
 
-
     public static final ConfigOption<Boolean> BATHC_STREAM_SINK = ConfigOptions
             .key("use_batch")
             .booleanType()
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/tool/LakeSoulSinkOptions.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/tool/LakeSoulSinkOptions.java
index 87051d84e..2d4e6fe4c 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/tool/LakeSoulSinkOptions.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/tool/LakeSoulSinkOptions.java
@@ -141,6 +141,17 @@ public class LakeSoulSinkOptions {
             .stringType()
             .noDefaultValue()
             .withDescription("source database schemaList");
+    public static final ConfigOption<String> MONGO_DB_DATABASE = ConfigOptions
+            .key("mongodb_database")
+            .stringType()
+            .noDefaultValue()
+            .withDescription("mongodb database");
+
+    public static final ConfigOption<Integer> BATCH_SIZE = ConfigOptions
+            .key("batchSize")
+            .intType()
+            .defaultValue(1024)
+            .withDescription("The cursor batch size");
 
     public static final ConfigOption<String> SOURCE_DB_SCHEMA_TABLES = ConfigOptions
             .key("source_db.schema_tables")
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java
index 829cc6eb6..a8a27015e 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java
@@ -36,6 +36,7 @@
 import org.apache.flink.table.data.*;
 import org.apache.flink.table.data.binary.BinaryRowData;
 import org.apache.flink.table.data.writer.BinaryRowWriter;
+import org.apache.flink.table.runtime.typeutils.ArrayDataSerializer;
 import org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer;
 import org.apache.flink.table.runtime.typeutils.RowDataSerializer;
 import org.apache.flink.table.types.logical.*;
@@ -237,7 +238,7 @@ public RowType toFlinkRowType(Schema schema, boolean nullable) {
         for (int i = 0; i < (useCDC ? arity - 2 : arity - 1); i++) {
             Field item = fieldNames.get(i);
             colNames[i] = item.name();
-            colTypes[i] = convertToLogical(item.schema(),nullable);
+            colTypes[i] = convertToLogical(item.schema(), !item.name().equals("_id") && nullable);
         }
 //        colNames[useCDC ? arity - 3 : arity - 2] = BINLOG_FILE_INDEX;
 //        colTypes[useCDC ? arity - 3 : arity - 2] = new BigIntType();
@@ -292,6 +293,8 @@ private LogicalType primitiveLogicalType(Schema fieldSchema,boolean nullable) {
             case STRUCT:
                 List<RowType.RowField> rowFields = getRowFields(fieldSchema);
                 return new RowType(nullable, rowFields);
+            case ARRAY:
+                return new ArrayType(nullable, Objects.requireNonNull(primitiveLogicalType(fieldSchema.valueSchema(), nullable)));
             case BYTES:
                 Map<String, String> paras = fieldSchema.parameters();
                 int byteLen = Integer.MAX_VALUE;
@@ -325,13 +328,18 @@ private LogicalType otherLogicalType(Schema fieldSchema, boolean nullable) {
                 return new LocalZonedTimestampType(nullable, 9);
             case Decimal.LOGICAL_NAME:
                 Map<String, String> paras = fieldSchema.parameters();
-                return new DecimalType(nullable, Integer.parseInt(paras.get("connect.decimal.precision")), Integer.parseInt(paras.get("scale")));
+                if (paras.get("connect.decimal.precision") == null){
+                    return new DecimalType(nullable, 38, 30);
+                }else {
+                    return new DecimalType(nullable, Integer.parseInt(paras.get("connect.decimal.precision")), Integer.parseInt(paras.get("scale")));
+                }
             case Date.SCHEMA_NAME:
                 return new DateType(nullable);
             case Year.SCHEMA_NAME:
                 return new IntType(nullable);
             case ZonedTime.SCHEMA_NAME:
             case ZonedTimestamp.SCHEMA_NAME:
+            case com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.data.Timestamp.LOGICAL_NAME:
                 return new LocalZonedTimestampType(nullable, LocalZonedTimestampType.DEFAULT_PRECISION);
             case Geometry.LOGICAL_NAME:
             case VariableScaleDecimal.LOGICAL_NAME:
@@ -515,6 +523,9 @@ private void primitiveTypeWrite(BinaryRowWriter writer, int index, Object fieldV
                 case BYTES:
                     writeBinary(writer, index, fieldValue);
                     break;
+                case ARRAY:
+                    writeArray(writer,index,fieldValue, fieldSchema);
+                    break;
 //                case STRUCT:
 //                    writeRow(writer, index, fieldValue);
 //                    break;
@@ -540,6 +551,7 @@ private void otherTypeWrite(BinaryRowWriter writer, int index,
             case Timestamp.SCHEMA_NAME:
             case MicroTimestamp.SCHEMA_NAME:
             case NanoTimestamp.SCHEMA_NAME:
+            case com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.data.Timestamp.LOGICAL_NAME:
                 writeTimeStamp(writer, index, fieldValue, fieldSchema, serverTimeZone);
                 break;
             case Decimal.LOGICAL_NAME:
@@ -561,6 +573,7 @@ private void otherTypeWrite(BinaryRowWriter writer, int index,
             case MicroDuration.SCHEMA_NAME:
                 writeLong(writer, index, fieldValue);
                 break;
+
             // Geometry and Point can not support now
 //            case Geometry.LOGICAL_NAME:
 //                Object object = convertToGeometry(fieldValue, fieldSchema);
@@ -612,7 +625,7 @@ public Object convertToDecimal(Object dbzObj, Schema schema) {
             }
         }
         Map<String, String> paras = schema.parameters();
-        if (paras == null) {
+        if (paras.get("connect.decimal.precision") == null) {
             return DecimalData.fromBigDecimal(bigDecimal, 38, 30);
         } else {
             return DecimalData.fromBigDecimal(bigDecimal, Integer.parseInt(paras.get("connect.decimal.precision")), Integer.parseInt(paras.get("scale")));
@@ -711,6 +724,40 @@ private Object convertToPoint(Object dbzObj, Schema schema) {
                     "Unsupported Struct value type: " + dbzObj.getClass().getSimpleName());
         }
     }
+    public void writeArray(BinaryRowWriter writer, int index, Object dbzObj, Schema schema) {
+        if (dbzObj instanceof ArrayList) {
+            ArrayList<Object> arrayList = (ArrayList<Object>) dbzObj;
+            ArrayData arrayData = null;
+            ArrayDataSerializer arrayDataSerializer = null;
+            switch (schema.valueSchema().type()){
+                case STRING:
+                    StringData[] stringDataArray = new StringData[arrayList.size()];
+                    for (int i = 0;i < arrayList.size(); i++){
+                        Object element = arrayList.get(i);
+                        stringDataArray[i] = StringData.fromString(element.toString());
+                    }
+                    arrayData = new GenericArrayData(stringDataArray);
+                    arrayDataSerializer = new ArrayDataSerializer(new VarCharType(Integer.MAX_VALUE));
+                    break;
+                case INT8:
+                case INT16:
+                case INT32:
+                    Object[] array = arrayList.toArray();
+                    arrayData = new GenericArrayData(array);
+                    arrayDataSerializer =
+                            new ArrayDataSerializer(new IntType());
+                    break;
+                case FLOAT32:
+                case FLOAT64:
+                    array = arrayList.toArray();
+                    arrayDataSerializer =
+                            new ArrayDataSerializer(new DoubleType());
+                    arrayData = new GenericArrayData(array);
+                    break;
+            }
+            writer.writeArray(index, arrayData, arrayDataSerializer);
+        }
+    }
 
     public void writeTimeStamp(BinaryRowWriter writer, int index, Object dbzObj, Schema schema, ZoneId serverTimeZone) {
         TimestampData data = (TimestampData) convertToTimeStamp(dbzObj, schema, serverTimeZone);
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/ParseDocument.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/ParseDocument.java
index 79e8d86b5..c2899a1fc 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/ParseDocument.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/ParseDocument.java
@@ -4,13 +4,14 @@
 
 package org.apache.flink.lakesoul.types;
 
-import com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.data.Field;
-import com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.data.Schema;
-import com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.data.SchemaBuilder;
-import com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.data.Struct;
+import com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.data.*;
 import org.bson.Document;
+import org.bson.types.Decimal128;
 
+import java.math.BigDecimal;
+import java.util.Date;
 import java.util.Map;
+import java.util.List;
 
 public class ParseDocument {
     public static Struct convertBSONToStruct(String value) {
@@ -32,6 +33,11 @@ private static Schema buildSchema(Document bsonDocument, SchemaBuilder structSch
                 // 处理嵌套的 Document
                 SchemaBuilder nestedStructSchemaBuilder = SchemaBuilder.struct();
                 structSchemaBuilder.field(fieldName, buildSchema((Document) value, nestedStructSchemaBuilder));
+            }  else if (value instanceof List) {
+                // 处理ArrayList类型
+                List<?> arrayList = (List<?>) value;
+                Schema arraySchema = getSchemaForArrayList(arrayList);
+                structSchemaBuilder.field(fieldName, arraySchema);
             } else {
                 // 处理普通字段
                 structSchemaBuilder.field(fieldName, getSchemaForValue(value));
@@ -40,6 +46,16 @@ private static Schema buildSchema(Document bsonDocument, SchemaBuilder structSch
         return structSchemaBuilder.build();
     }
 
+    private static Schema getSchemaForArrayList(List<?> arrayList) {
+        // 假设 ArrayList 中的元素都是整数，您可以根据实际情况调整
+        Schema elementSchema = null;
+        if (!arrayList.isEmpty()) {
+            Object firstElement = arrayList.get(0);
+            elementSchema = getSchemaForValue(firstElement);
+        }
+        return SchemaBuilder.array(elementSchema).build();
+    }
+
     private static void fillStructValues(Document bsonDocument, Struct struct) {
         for (Map.Entry<String, Object> entry : bsonDocument.entrySet()) {
             String fieldName = entry.getKey();
@@ -49,9 +65,15 @@ private static void fillStructValues(Document bsonDocument, Struct struct) {
                 Struct nestedStruct = new Struct(struct.schema().field(fieldName).schema());
                 fillStructValues((Document) value, nestedStruct);
                 struct.put(fieldName, nestedStruct);
+            } else if (value instanceof List) {
+                // 处理ArrayList类型
+                List<?> arrayList = (List<?>) value;
+                struct.put(fieldName, arrayList);
+            } else if (value instanceof Decimal128) {
+                BigDecimal decimalValue = new BigDecimal(value.toString());
+                struct.put(fieldName, decimalValue);
             } else {
-                // 处理普通字段
-                struct.put(fieldName, value);
+                struct.put(fieldName,value);
             }
         }
     }
@@ -68,9 +90,14 @@ private static Schema getSchemaForValue(Object value) {
             return Schema.FLOAT64_SCHEMA;
         } else if (value instanceof Boolean) {
             return Schema.BOOLEAN_SCHEMA;
+        } else if (value instanceof Decimal128) {
+            BigDecimal decimalValue = new BigDecimal(value.toString());
+            return Decimal.schema(decimalValue.scale());
         } else if (value instanceof Byte) {
             return Schema.BYTES_SCHEMA;
-        }  else {
+        } else if (value instanceof Date) {
+            return Timestamp.SCHEMA;
+        } else {
             // 处理其他类型，可以根据实际情况添加更多类型
             return Schema.STRING_SCHEMA;
         }

From 363ead8f644840b658c316edfa79111d55bcfc8f Mon Sep 17 00:00:00 2001
From: ChenYunHey <1908166778@qq.com>
Date: Wed, 27 Mar 2024 14:35:40 +0800
Subject: [PATCH 03/13] fix some bugs

Signed-off-by: ChenYunHey <1908166778@qq.com>
---
 lakesoul-flink/pom.xml                        |  19 +-
 .../apache/flink/lakesoul/entry/JdbcCDC.java  |   3 +-
 .../flink/lakesoul/entry/MongoSinkUtils.java  | 171 ++++++++++++++
 .../flink/lakesoul/entry/SyncDatabase.java    | 223 ++++++------------
 .../LakeSoulSinkGlobalCommitter.java          |  22 +-
 .../tool/LakeSoulSinkDatabasesOptions.java    |   2 +
 .../lakesoul/types/BinarySourceRecord.java    |   3 +
 .../lakesoul/types/LakeSoulRecordConvert.java |  10 +-
 .../flink/lakesoul/types/ParseDocument.java   |  12 +
 .../flink/lakesoul/types/StructConvert.java   |  27 ---
 10 files changed, 292 insertions(+), 200 deletions(-)
 create mode 100644 lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/MongoSinkUtils.java

diff --git a/lakesoul-flink/pom.xml b/lakesoul-flink/pom.xml
index 5218fc67d..6519d0b17 100644
--- a/lakesoul-flink/pom.xml
+++ b/lakesoul-flink/pom.xml
@@ -127,11 +127,7 @@ SPDX-License-Identifier: Apache-2.0
             <scope>test</scope>
             <type>test-jar</type>
         </dependency>
-        <dependency>
-            <groupId>com.ververica</groupId>
-            <artifactId>flink-sql-connector-postgres-cdc</artifactId>
-            <version>${cdc.version}</version>
-        </dependency>
+
         <dependency>
             <groupId>com.ververica</groupId>
             <artifactId>flink-sql-connector-sqlserver-cdc</artifactId>
@@ -148,9 +144,9 @@ SPDX-License-Identifier: Apache-2.0
             <version>${cdc.version}</version>
         </dependency>
         <dependency>
-            <groupId>org.mongodb</groupId>
-            <artifactId>mongo-java-driver</artifactId>
-            <version>3.12.14</version>
+            <groupId>com.ververica</groupId>
+            <artifactId>flink-sql-connector-postgres-cdc</artifactId>
+            <version>${cdc.version}</version>
         </dependency>
         <dependency>
             <groupId>com.ververica</groupId>
@@ -162,15 +158,10 @@ SPDX-License-Identifier: Apache-2.0
             <artifactId>flink-connector-mongodb</artifactId>
             <version>1.0.1-1.17</version>
         </dependency>
-        <dependency>
-            <groupId>org.mongodb</groupId>
-            <artifactId>mongodb-driver-sync</artifactId>
-            <version>3.10.1</version> <!-- Use the appropriate version -->
-        </dependency>
         <dependency>
             <groupId>org.mongodb</groupId>
             <artifactId>bson</artifactId>
-            <version>4.3.4</version> <!-- 使用最新版本 -->
+            <version>4.3.4</version>
         </dependency>
         <dependency>
             <groupId>org.apache.doris</groupId>
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java
index d1c9cb1e5..fd365ece6 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java
@@ -12,7 +12,7 @@
 import com.ververica.cdc.connectors.oracle.source.OracleSourceBuilder;
 import com.ververica.cdc.connectors.postgres.source.PostgresSourceBuilder;
 import com.ververica.cdc.connectors.mongodb.source.MongoDBSource;
-import com.ververica.cdc.debezium.JsonDebeziumDeserializationSchema;
+
 import org.apache.flink.api.common.restartstrategy.RestartStrategies;
 import org.apache.flink.api.common.time.Time;
 import com.ververica.cdc.connectors.sqlserver.source.SqlServerSourceBuilder;
@@ -218,6 +218,7 @@ private static void postgresCdc(LakeSoulRecordConvert lakeSoulRecordConvert, Con
         DataStreamSource<BinarySourceRecord> source = builder.buildMultiTableSource("Postgres Source");
 
         DataStream<BinarySourceRecord> stream = builder.buildHashPartitionedCDCStream(source);
+        stream.print();
         DataStreamSink<BinarySourceRecord> dmlSink = builder.buildLakeSoulDMLSink(stream);
         env.execute("LakeSoul CDC Sink From Postgres Database " + dbName);
     }
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/MongoSinkUtils.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/MongoSinkUtils.java
new file mode 100644
index 000000000..7e4b1f06a
--- /dev/null
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/MongoSinkUtils.java
@@ -0,0 +1,171 @@
+// SPDX-FileCopyrightText: 2023 LakeSoul Contributors
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package org.apache.flink.lakesoul.entry;
+
+import com.alibaba.fastjson.JSONArray;
+import com.alibaba.fastjson.JSONObject;
+import com.mongodb.client.MongoClient;
+import com.mongodb.client.MongoClients;
+import com.mongodb.client.MongoDatabase;
+import com.mongodb.client.model.InsertOneModel;
+import com.mongodb.client.model.WriteModel;
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.connector.mongodb.sink.writer.context.MongoSinkContext;
+import org.apache.flink.connector.mongodb.sink.writer.serializer.MongoSerializationSchema;
+import org.apache.flink.table.api.Table;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.logical.BinaryType;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.flink.types.Row;
+import org.bson.*;
+import org.bson.types.Decimal128;
+
+import java.io.Serializable;
+import java.math.BigDecimal;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.*;
+
+public class MongoSinkUtils {
+
+    static Table coll;
+    static List<String> structNameFiledList;
+
+    public static void createMongoColl(String database, String collName, String uri) {
+        MongoClient mongoClient = MongoClients.create(uri);
+        MongoDatabase mongoDatabase = mongoClient.getDatabase(database);
+        mongoDatabase.createCollection(collName);
+        mongoClient.close();
+    }
+
+    public static class MyMongoSerializationSchema implements MongoSerializationSchema<Tuple2<Boolean, Row>>, Serializable {
+        @Override
+        public WriteModel<BsonDocument> serialize(Tuple2<Boolean, Row> record, MongoSinkContext context) {
+            Row row = record.f1; // Extract the Row object from the Tuple2
+            BsonDocument document = new BsonDocument();
+            int fieldCount = row.getArity();
+            DataType[] fieldDataTypes = coll.getSchema().getFieldDataTypes();
+            for (int i = 0; i < fieldCount; i++) {
+                String fieldName = coll.getSchema().getFieldNames()[i];
+                Object fieldValue = row.getField(i);
+                if (fieldValue instanceof Row) {
+                    DataType dataType = fieldDataTypes[i];
+                    RowType rowType = (RowType) dataType.getLogicalType();
+                    structNameFiledList = traverseRow(rowType);
+                }
+                if (fieldValue != null) {
+                    try {
+                        document.append(fieldName, convertTonBsonValue(fieldValue, fieldName));
+                    } catch (ParseException e) {
+                        throw new RuntimeException(e);
+                    }
+                }
+            }
+            return new InsertOneModel<>(document);
+        }
+
+
+        public static BsonValue convertTonBsonValue(Object value, String fieldName) throws ParseException {
+            if (value == null) {
+                return new BsonNull();
+            } else if (value instanceof Integer) {
+                return new BsonInt32((Integer) value);
+            } else if (value instanceof Long) {
+                return new BsonInt64((Long) value);
+            } else if (value instanceof String) {
+                return new BsonString((String) value);
+            } else if (value instanceof Boolean) {
+                return new BsonBoolean((Boolean) value);
+            } else if (value instanceof Double) {
+                return new BsonDouble((Double) value);
+            } else if (value instanceof BigDecimal) {
+                return new BsonDecimal128(new Decimal128((BigDecimal) value));
+            } else if (value instanceof Date) {
+                return new BsonDateTime((long) value);
+            } else if (value instanceof BinaryType) {
+                return new BsonBinary((byte[]) value);
+            } else if (value instanceof byte[]) {
+                return new BsonBinary((byte[]) value);
+            } else if (value instanceof Object[]) {
+                Object[] array = (Object[]) value;
+                BsonArray bsonArray = new BsonArray();
+                for (Object element : array) {
+                    bsonArray.add(convertTonBsonValue(element, fieldName));
+                }
+                return bsonArray;
+            } else if (isDateTimeString(value)) {
+                Date date = parseDateTime(value.toString());
+                return new BsonDateTime(date.getTime());
+            } else if (value instanceof Row) {
+                Row row = (Row) value;
+                BsonDocument bsonDocument = new BsonDocument();
+                for (int i = 0; i < row.getArity(); i++) {
+                    Object fieldValue = row.getField(i);
+                    List<String> stringList = new ArrayList<>(structNameFiledList);
+                    String name = structNameFiledList.get(0);
+                    stringList.remove(0);
+                    structNameFiledList = stringList;
+                    bsonDocument.append(name, convertTonBsonValue(fieldValue, name));
+                }
+                return bsonDocument;
+            } else {
+                throw new IllegalArgumentException("Unsupported data type: " + value.getClass());
+            }
+        }
+
+        public static List<String> traverseRow(RowType rowType) {
+            List<String> nameList = new ArrayList<>();
+            traverseField(rowType, nameList);
+            return nameList;
+        }
+
+
+        private static void traverseField(RowType rowType, List<String> nameList) {
+            for (int i = 0; i < rowType.getFieldCount(); i++) {
+                String fieldName = rowType.getFieldNames().get(i);
+                LogicalType fieldType = rowType.getTypeAt(i);
+                nameList.add(fieldName);
+                if (fieldType instanceof RowType) {
+                    traverseField((RowType) fieldType, nameList);
+                }
+            }
+        }
+
+        public static List<String> findDirectNestedNames(JSONObject jsonObject, String targetFieldName, int currentLevel, int targetLevel) {
+            List<String> nestedNames = new ArrayList<>();
+            findDirectNestedNamesHelper(jsonObject, targetFieldName, currentLevel, targetLevel, nestedNames);
+            return nestedNames;
+        }
+
+        public static void findDirectNestedNamesHelper(JSONObject jsonObject, String targetFieldName, int currentLevel, int targetLevel, List<String> nestedNames) {
+            if (currentLevel == targetLevel) {
+                if (jsonObject.getString("name").equals(targetFieldName)) {
+                    JSONArray children = jsonObject.getJSONArray("children");
+                    for (Object obj : children) {
+                        JSONObject child = (JSONObject) obj;
+                        nestedNames.add(child.getString("name"));
+                    }
+                }
+            } else {
+                JSONArray children = jsonObject.getJSONArray("children");
+                for (Object obj : children) {
+                    JSONObject child = (JSONObject) obj;
+                    findDirectNestedNamesHelper(child, targetFieldName, currentLevel + 1, targetLevel, nestedNames);
+                }
+            }
+        }
+
+        public static boolean isDateTimeString(Object value) {
+            return value.toString().matches("^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{3}Z$");
+        }
+
+        public static Date parseDateTime(String value) throws ParseException {
+            SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
+            dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+            return dateFormat.parse(value);
+        }
+    }
+}
\ No newline at end of file
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/SyncDatabase.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/SyncDatabase.java
index 725707caf..14b67bbaa 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/SyncDatabase.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/SyncDatabase.java
@@ -2,24 +2,16 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 
-
 package org.apache.flink.lakesoul.entry;
 
 import com.dmetasoul.lakesoul.meta.DBManager;
 import com.dmetasoul.lakesoul.meta.DBUtil;
 import com.dmetasoul.lakesoul.meta.entity.TableInfo;
-import com.mongodb.MongoClientURI;
-import com.mongodb.MongoClient;
-import com.mongodb.client.MongoDatabase;
-import com.mongodb.client.model.InsertOneModel;
-import com.mongodb.client.model.WriteModel;
 import org.apache.flink.api.common.RuntimeExecutionMode;
 import org.apache.flink.api.java.tuple.Tuple2;
 import org.apache.flink.api.java.utils.ParameterTool;
 import org.apache.flink.connector.base.DeliveryGuarantee;
 import org.apache.flink.connector.mongodb.sink.MongoSink;
-import org.apache.flink.connector.mongodb.sink.writer.context.MongoSinkContext;
-import org.apache.flink.connector.mongodb.sink.writer.serializer.MongoSerializationSchema;
 import org.apache.flink.configuration.Configuration;
 import org.apache.flink.configuration.RestOptions;
 import org.apache.flink.lakesoul.metadata.LakeSoulCatalog;
@@ -34,39 +26,41 @@
 import org.apache.flink.table.types.DataType;
 import org.apache.flink.table.types.logical.*;
 import org.apache.flink.types.Row;
-import org.bson.*;
-import org.bson.types.Decimal128;
-
 
-import java.io.Serializable;
 import java.sql.*;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
 import java.util.*;
-import java.util.Date;
 
+import static org.apache.flink.lakesoul.entry.MongoSinkUtils.*;
 import static org.apache.flink.lakesoul.tool.LakeSoulSinkDatabasesOptions.*;
 
 public class SyncDatabase {
 
-    static Table coll;
+    static String targetTableName;
+    static String dbType;
+    static String sourceDatabase;
+    static String sourceTableName;
+    static String targetDatabase;
+    static String url;
+    static String username;
+    static String password;
+    static boolean useBatch;
+    static int sinkParallelism;
+
     public static void main(String[] args) throws Exception {
         ParameterTool parameter = ParameterTool.fromArgs(args);
 
-        String sourceDatabase = parameter.get(SOURCE_DB_DB_NAME.key());
-        String sourceTableName = parameter.get(SOURCE_DB_LAKESOUL_TABLE.key()).toLowerCase();
-        String dbType = parameter.get(TARGET_DATABASE_TYPE.key());
-        String targetDatabase = parameter.get(TARGET_DB_DB_NAME.key());
-        String targetTableName = parameter.get(TARGET_DB_TABLE_NAME.key()).toLowerCase();
-        String url = parameter.get(TARGET_DB_URL.key());
-        String username = null;
-        String password = null;
-        if (!dbType.equals("mongodb")){
+        sourceDatabase = parameter.get(SOURCE_DB_DB_NAME.key());
+        sourceTableName = parameter.get(SOURCE_DB_LAKESOUL_TABLE.key()).toLowerCase();
+        dbType = parameter.get(TARGET_DATABASE_TYPE.key());
+        targetDatabase = parameter.get(TARGET_DB_DB_NAME.key());
+        targetTableName = parameter.get(TARGET_DB_TABLE_NAME.key()).toLowerCase();
+        url = parameter.get(TARGET_DB_URL.key());
+        if (!dbType.equals("mongodb")) {
             username = parameter.get(TARGET_DB_USER.key());
             password = parameter.get(TARGET_DB_PASSWORD.key());
         }
-        int sinkParallelism = parameter.getInt(SINK_PARALLELISM.key(), SINK_PARALLELISM.defaultValue());
-        boolean useBatch = parameter.getBoolean(BATHC_STREAM_SINK.key(), BATHC_STREAM_SINK.defaultValue());
+        sinkParallelism = parameter.getInt(SINK_PARALLELISM.key(), SINK_PARALLELISM.defaultValue());
+        useBatch = parameter.getBoolean(BATHC_STREAM_SINK.key(), BATHC_STREAM_SINK.defaultValue());
         //int replicationNum = parameter.getInt(DORIS_REPLICATION_NUM.key(), DORIS_REPLICATION_NUM.defaultValue());
 
         String fenodes = parameter.get(DORIS_FENODES.key(), DORIS_FENODES.defaultValue());
@@ -78,19 +72,19 @@ public static void main(String[] args) throws Exception {
 
         switch (dbType) {
             case "mysql":
-                xsyncToMysql(env, useBatch, url, sourceDatabase, username, password, targetDatabase, sourceTableName, targetTableName);
+                xsyncToMysql(env);
                 break;
             case "postgres":
-                xsyncToPg(env, useBatch, url, sourceDatabase, username, password, targetDatabase, sourceTableName, targetTableName);
+                xsyncToPg(env);
                 break;
             case "doris":
-                xsyncToDoris(env, useBatch, url, sourceDatabase, username, password, targetDatabase, sourceTableName, targetTableName, fenodes);
+                xsyncToDoris(env, fenodes);
                 break;
             case "mongodb":
                 String uri = parameter.get(MONGO_DB_URI.key());
                 int batchSize = parameter.getInt(BATCH_SIZE.key(), BATCH_SIZE.defaultValue());
                 int batchIntervalMs = parameter.getInt(BATCH_INTERVAL_MS.key(), BATCH_INTERVAL_MS.defaultValue());
-                xsyncToMongodb(env,useBatch,uri,sourceDatabase,targetDatabase,sourceTableName,targetTableName,batchSize,sinkParallelism,batchIntervalMs);
+                xsyncToMongodb(env, uri, batchSize, batchIntervalMs);
                 break;
             default:
                 throw new RuntimeException("not supported the database: " + dbType);
@@ -109,7 +103,7 @@ public static String pgAndMsqlCreateTableSql(String[] stringFieldTypes, String[]
                 createTableQuery.append(", ");
             }
         }
-        if (pk!=null){
+        if (pk != null) {
             createTableQuery.append(" ,PRIMARY KEY(").append(pk);
             createTableQuery.append(")");
         }
@@ -119,11 +113,10 @@ public static String pgAndMsqlCreateTableSql(String[] stringFieldTypes, String[]
 
     public static String[] getMysqlFieldsTypes(DataType[] fieldTypes, String[] fieldNames, String pk) {
         String[] stringFieldTypes = new String[fieldTypes.length];
-
         for (int i = 0; i < fieldTypes.length; i++) {
             if (fieldTypes[i].getLogicalType() instanceof VarCharType) {
                 String mysqlType = "TEXT";
-                if (pk!=null){
+                if (pk != null) {
                     if (pk.contains(fieldNames[i])) {
                         mysqlType = "VARCHAR(100)";
                     }
@@ -137,6 +130,8 @@ public static String[] getMysqlFieldsTypes(DataType[] fieldTypes, String[] field
                 stringFieldTypes[i] = "TIMESTAMP";
             } else if (fieldTypes[i].getLogicalType() instanceof BooleanType) {
                 stringFieldTypes[i] = "BOOLEAN";
+            } else if (fieldTypes[i].getLogicalType() instanceof VarBinaryType) {
+                stringFieldTypes[i] = "BLOB";
             } else {
                 stringFieldTypes[i] = fieldTypes[i].toString();
             }
@@ -150,7 +145,7 @@ public static String[] getPgFieldsTypes(DataType[] fieldTypes, String[] fieldNam
         for (int i = 0; i < fieldTypes.length; i++) {
             if (fieldTypes[i].getLogicalType() instanceof VarCharType) {
                 String mysqlType = "TEXT";
-                if (pk!=null){
+                if (pk != null) {
                     if (pk.contains(fieldNames[i])) {
                         mysqlType = "VARCHAR(100)";
                     }
@@ -164,6 +159,8 @@ public static String[] getPgFieldsTypes(DataType[] fieldTypes, String[] fieldNam
                 stringFieldTypes[i] = "BYTEA";
             } else if (fieldTypes[i].getLogicalType() instanceof LocalZonedTimestampType | fieldTypes[i].getLogicalType() instanceof TimestampType) {
                 stringFieldTypes[i] = "TIMESTAMP";
+            } else if (fieldTypes[i].getLogicalType() instanceof VarBinaryType) {
+                stringFieldTypes[i] = "BYTEA";
             } else {
                 stringFieldTypes[i] = fieldTypes[i].toString();
             }
@@ -174,13 +171,13 @@ public static String[] getPgFieldsTypes(DataType[] fieldTypes, String[] fieldNam
     public static String[] getDorisFieldTypes(DataType[] fieldTypes) {
         String[] stringFieldTypes = new String[fieldTypes.length];
         for (int i = 0; i < fieldTypes.length; i++) {
-            if (fieldTypes[i].getLogicalType() instanceof TimestampType){
+            if (fieldTypes[i].getLogicalType() instanceof TimestampType) {
                 stringFieldTypes[i] = "DATETIME";
-            }
-            else if (fieldTypes[i].getLogicalType() instanceof VarCharType){
+            } else if (fieldTypes[i].getLogicalType() instanceof VarCharType) {
                 stringFieldTypes[i] = "VARCHAR";
-
-            }   else {
+            } else if (fieldTypes[i].getLogicalType() instanceof LocalZonedTimestampType | fieldTypes[i].getLogicalType() instanceof TimestampType) {
+                stringFieldTypes[i] = "TIMESTAMP";
+            } else {
                 stringFieldTypes[i] = fieldTypes[i].toString();
             }
         }
@@ -192,19 +189,20 @@ public static String getTablePk(String sourceDataBae, String sourceTableName) {
         TableInfo tableInfo = dbManager.getTableInfoByNameAndNamespace(sourceTableName, sourceDataBae);
         String partitions = tableInfo.getPartitions();
         DBUtil.TablePartitionKeys keys = DBUtil.parseTableInfoPartitions(partitions);
+
         List<String> primaryKeys = keys.primaryKeys;
         StringBuilder stringBuilder = new StringBuilder();
         for (int i = 0; i < primaryKeys.size(); i++) {
             stringBuilder.append(primaryKeys.get(i));
-            if (i<primaryKeys.size()-1){
+            if (i < primaryKeys.size() - 1) {
                 stringBuilder.append(",");
             }
         }
         return primaryKeys.size() == 0 ? null : stringBuilder.toString();
     }
 
-    public static void xsyncToPg(StreamExecutionEnvironment env, boolean bathXync, String url, String sourceDatabase, String username, String password, String targetDatabase, String sourceTableName, String targetTableName) throws SQLException {
-        if (bathXync) {
+    public static void xsyncToPg(StreamExecutionEnvironment env) throws SQLException {
+        if (useBatch) {
             env.setRuntimeMode(RuntimeExecutionMode.BATCH);
         } else {
             env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
@@ -234,9 +232,10 @@ public static void xsyncToPg(StreamExecutionEnvironment env, boolean bathXync, S
         String createCatalog = "create catalog postgres_catalog with('type'='jdbc','default-database'=" + "'" + targetDatabase + "'" + "," + "'username'=" +
                 "'" + username + "'" + "," + "'password'=" + "'" + password + "'" + "," + "'base-url'=" + "'" + url + "'" + ")";
         // Move data from LakeSoul to MySQL
+
         tEnvs.executeSql(createCatalog);
-        String insertQuery = "INSERT INTO postgres_catalog.`" + targetDatabase+ "`.`" + targetTableName +
-                "` SELECT * FROM lakeSoul.`"  +sourceDatabase + "`.`" + sourceTableName + "`";
+        String insertQuery = "INSERT INTO postgres_catalog.`" + targetDatabase + "`.`" + targetTableName +
+                "` SELECT * FROM lakeSoul.`" + sourceDatabase + "`.`" + sourceTableName + "`";
 
         tEnvs.executeSql(insertQuery);
         statement.close();
@@ -244,8 +243,8 @@ public static void xsyncToPg(StreamExecutionEnvironment env, boolean bathXync, S
     }
 
 
-    public static void xsyncToMysql(StreamExecutionEnvironment env, boolean batchXync, String url, String sourceDatabase, String username, String password, String targetDatabase, String sourceTableName, String targetTableName) throws SQLException {
-        if (batchXync) {
+    public static void xsyncToMysql(StreamExecutionEnvironment env) throws SQLException {
+        if (useBatch) {
             env.setRuntimeMode(RuntimeExecutionMode.BATCH);
         } else {
             env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
@@ -259,7 +258,6 @@ public static void xsyncToMysql(StreamExecutionEnvironment env, boolean batchXyn
         TableResult schemaResult = tEnvs.executeSql(
                 "SELECT * FROM lakeSoul.`" + sourceDatabase + "`.`" + sourceTableName + "` LIMIT 1");
         DataType[] fieldDataTypes = schemaResult.getTableSchema().getFieldDataTypes();
-        String[] mysqlFieldTypes = getDorisFieldTypes(fieldDataTypes);
         String[] fieldNames = schemaResult.getTableSchema().getFieldNames();
         String tablePk = getTablePk(sourceDatabase, sourceTableName);
         String[] stringFieldsTypes = getMysqlFieldsTypes(fieldDataTypes, fieldNames, tablePk);
@@ -269,32 +267,35 @@ public static void xsyncToMysql(StreamExecutionEnvironment env, boolean batchXyn
         Statement statement = conn.createStatement();
         // Create the target table in MySQL
         statement.executeUpdate(createTableSql.toString());
-
         StringBuilder coulmns = new StringBuilder();
         for (int i = 0; i < fieldDataTypes.length; i++) {
-            coulmns.append("`").append(fieldNames[i]).append("` ").append(mysqlFieldTypes[i]);
-            if (i< fieldDataTypes.length-1){
+            if (stringFieldsTypes[i].equals("BLOB")) {
+                coulmns.append("`").append(fieldNames[i]).append("` ").append("BYTES");
+            } else {
+                coulmns.append("`").append(fieldNames[i]).append("` ").append(stringFieldsTypes[i]);
+            }
+            if (i < fieldDataTypes.length - 1) {
                 coulmns.append(",");
             }
         }
         String sql;
-        if (tablePk!=null){
+        if (tablePk != null) {
             sql = String.format(
-                    "create table %s(%s ,PRIMARY KEY (%s) NOT ENFORCED) with ('connector' = '%s', 'url' = '%s', 'table-name' = '%s', 'username' = '%s', 'password' = '%s')",
-                    targetTableName, coulmns, tablePk, "jdbc", jdbcUrl, targetTableName, username, password);
-        }else {
-            sql = String.format("create table %s(%s) with ('connector' = '%s', 'url' = '%s', 'table-name' = '%s', 'username' = '%s', 'password' = '%s')",
-                    targetTableName, coulmns, "jdbc", jdbcUrl, targetTableName, username, password);
+                    "create table %s(%s ,PRIMARY KEY (%s) NOT ENFORCED) with ('connector' = '%s', 'url' = '%s', 'table-name' = '%s', 'username' = '%s', 'password' = '%s', 'sink.parallelism' = '%s')",
+                    targetTableName, coulmns, tablePk, "jdbc", jdbcUrl, targetTableName, username, password, sinkParallelism);
+        } else {
+            sql = String.format("create table %s(%s) with ('connector' = '%s', 'url' = '%s', 'table-name' = '%s', 'username' = '%s', 'password' = '%s' , 'sink.parallelism' = '%s')",
+                    targetTableName, coulmns, "jdbc", jdbcUrl, targetTableName, username, password, sinkParallelism);
         }
         tEnvs.executeSql(sql);
-        tEnvs.executeSql("insert into "+targetTableName+" select * from lakeSoul.`"+sourceDatabase+"`."+sourceTableName);
+        tEnvs.executeSql("insert into " + targetTableName + " select * from lakeSoul.`" + sourceDatabase + "`." + sourceTableName);
 
         statement.close();
         conn.close();
     }
 
-    public static void xsyncToDoris(StreamExecutionEnvironment env, boolean batchXync, String url, String sourceDatabase, String username, String password, String targetDatabase, String sourceTableName, String targetTableName, String fenodes) throws SQLException {
-        if (batchXync) {
+    public static void xsyncToDoris(StreamExecutionEnvironment env, String fenodes) throws SQLException {
+        if (useBatch) {
             env.setRuntimeMode(RuntimeExecutionMode.BATCH);
         } else {
             env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
@@ -314,7 +315,7 @@ public static void xsyncToDoris(StreamExecutionEnvironment env, boolean batchXyn
         StringBuilder coulmns = new StringBuilder();
         for (int i = 0; i < fieldDataTypes.length; i++) {
             coulmns.append("`").append(fieldNames[i]).append("` ").append(dorisFieldTypes[i]);
-            if (i< fieldDataTypes.length-1){
+            if (i < fieldDataTypes.length - 1) {
                 coulmns.append(",");
             }
         }
@@ -322,21 +323,15 @@ public static void xsyncToDoris(StreamExecutionEnvironment env, boolean batchXyn
                 "create table %s(%s) with ('connector' = '%s', 'jdbc-url' = '%s', 'fenodes' = '%s', 'table.identifier' = '%s', 'username' = '%s', 'password' = '%s')",
                 targetTableName, coulmns, "doris", jdbcUrl, fenodes, targetDatabase + "." + targetTableName, username, password);
         tEnvs.executeSql(sql);
-        tEnvs.executeSql("insert into "+targetTableName+" select * from lakeSoul.`"+sourceDatabase+"`."+sourceTableName);
+        tEnvs.executeSql("insert into " + targetTableName + " select * from lakeSoul.`" + sourceDatabase + "`." + sourceTableName);
     }
 
     public static void xsyncToMongodb(StreamExecutionEnvironment env,
-                                              boolean batchXync,
-                                              String uri ,
-                                              String sourceDatabase,
-                                              String targetDatabase,
-                                              String sourceTableName,
-                                              String targetCollection,
-                                              int batchSize,
-                                              int sinkParallelism,
-                                              int batchInservalMs) throws Exception {
-        createMongoColl(targetDatabase,targetCollection,uri);
-        if (batchXync) {
+                                      String uri,
+                                      int batchSize,
+                                      int batchInservalMs) throws Exception {
+        createMongoColl(targetDatabase, targetTableName, uri);
+        if (useBatch) {
             env.setRuntimeMode(RuntimeExecutionMode.BATCH);
         } else {
             env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
@@ -345,20 +340,15 @@ public static void xsyncToMongodb(StreamExecutionEnvironment env,
         }
         StreamTableEnvironment tEnvs = StreamTableEnvironment.create(env);
         Catalog lakesoulCatalog = new LakeSoulCatalog();
-        tEnvs.registerCatalog("lakeSoul",lakesoulCatalog);
-        coll = tEnvs.sqlQuery("select * from lakeSoul.`"+sourceDatabase+"`.`"+sourceTableName+"`");
-        tEnvs.registerTable("mongodbTbl",coll);
+        tEnvs.registerCatalog("lakeSoul", lakesoulCatalog);
+        coll = tEnvs.sqlQuery("select * from lakeSoul.`" + sourceDatabase + "`.`" + sourceTableName + "`");
+        tEnvs.registerTable("mongodbTbl", coll);
         Table table = tEnvs.sqlQuery("select * from mongodbTbl");
         DataStream<Tuple2<Boolean, Row>> rowDataStream = tEnvs.toRetractStream(table, Row.class);
-
-        MongoClientURI mongUri = new MongoClientURI(uri);
-        MongoClient mongoClient = new MongoClient(mongUri);
-        MongoDatabase database = mongoClient.getDatabase(targetDatabase);
-        database.createCollection(targetCollection);
         MongoSink<Tuple2<Boolean, Row>> sink = MongoSink.<Tuple2<Boolean, Row>>builder()
                 .setUri(uri)
                 .setDatabase(targetDatabase)
-                .setCollection(targetCollection)
+                .setCollection(targetTableName)
                 .setBatchSize(batchSize)
                 .setBatchIntervalMs(batchInservalMs)
                 .setMaxRetries(3)
@@ -367,74 +357,5 @@ public static void xsyncToMongodb(StreamExecutionEnvironment env,
                 .build();
         rowDataStream.sinkTo(sink).setParallelism(sinkParallelism);
         env.execute();
-        mongoClient.close();
-    }
-    public static void createMongoColl(String database,String collName,String uri){
-        MongoClientURI clientURI = new MongoClientURI(uri);
-        MongoClient mongoClient = new MongoClient(clientURI);
-        MongoDatabase mongoDatabase = mongoClient.getDatabase(database);
-        mongoDatabase.createCollection(collName);
-        mongoClient.close();
-    }
-    public static class MyMongoSerializationSchema implements MongoSerializationSchema<Tuple2<Boolean, Row>>, Serializable {
-        @Override
-        public WriteModel<BsonDocument> serialize(Tuple2<Boolean, Row> record, MongoSinkContext context) {
-            Row row = record.f1; // Extract the Row object from the Tuple2
-            BsonDocument document = new BsonDocument();
-            int fieldCount = row.getArity();
-            for (int i = 0; i < fieldCount; i++) {
-                String fieldName = coll.getSchema().getFieldNames()[i];
-                Object fieldValue = row.getField(i);
-                if (fieldValue != null) {
-                    try {
-                        document.append(fieldName, convertTonBsonValue(fieldValue));
-                    } catch (ParseException e) {
-                        throw new RuntimeException(e);
-                    }
-                }
-            }
-            return new InsertOneModel<>(document);
-        }
-
-        public static BsonValue convertTonBsonValue(Object value) throws ParseException {
-            if (value == null) {
-                return new BsonNull();
-            } else if (value instanceof Integer) {
-                return new BsonInt32((Integer) value);
-            } else if (value instanceof Long) {
-                return new BsonInt64((Long) value);
-            } else if (value instanceof String) {
-                return new BsonString((String) value);
-            } else if (value instanceof Boolean) {
-                return new BsonBoolean( (Boolean) value);
-            } else if (value instanceof Double) {
-                return new BsonDouble( (Double) value);
-            } else if (value instanceof DecimalType) {
-                return new BsonDecimal128((Decimal128) value);
-            } else if (value instanceof Date) {
-                return new BsonDateTime( (long) value);
-            } else if (value instanceof Object[]) {
-                Object[] array = (Object[]) value;
-                BsonArray bsonArray = new BsonArray();
-                for (Object element : array) {
-                    bsonArray.add(convertTonBsonValue(element));
-                }
-                return bsonArray;
-            } else if (isDateTimeString(value)) {
-                Date date = parseDateTime(value.toString());
-                return new BsonDateTime(date.getTime());
-            } else {
-                throw new IllegalArgumentException("Unsupported data type: " + value.getClass());
-            }
-        }
-        private static boolean isDateTimeString(Object value) {
-            return value.toString().matches("^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{3}Z$");
-        }
-        private static Date parseDateTime(String value) throws ParseException {
-            SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
-            dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
-            return dateFormat.parse(value);
-        }
-
     }
 }
\ No newline at end of file
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkGlobalCommitter.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkGlobalCommitter.java
index ecd8c3573..32cf835c0 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkGlobalCommitter.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkGlobalCommitter.java
@@ -164,7 +164,9 @@ public List<LakeSoulMultiTableSinkGlobalCommittable> commit(
                 StructType origSchema ;
                 if (TableInfoDao.isArrowKindSchema(tableInfo.getTableSchema())) {
                     Schema arrowSchema = Schema.fromJSON(tableInfo.getTableSchema());
+                    System.out.println(arrowSchema.toJson());
                     origSchema = ArrowUtils.fromArrowSchema(arrowSchema);
+                    System.out.println(origSchema.json());
                 } else {
                     origSchema = (StructType) StructType.fromJson(tableInfo.getTableSchema());
                 }
@@ -177,7 +179,20 @@ public List<LakeSoulMultiTableSinkGlobalCommittable> commit(
                 String equalOrCanCast = equalOrCanCastTuple3._1();
                 boolean schemaChanged = (boolean) equalOrCanCastTuple3._2();
                 StructType mergeStructType = equalOrCanCastTuple3._3();
-                if (equalOrCanCast.equals(DataTypeCastUtils.CAN_CAST())) {
+
+                System.out.println(origSchema.json());
+                System.out.println(msgSchema.toJson());
+                System.out.println(mergeStructType.json());
+                System.out.println(ArrowUtils.toArrowSchema(mergeStructType, "UTC").toJson());
+                boolean schemaChangeFound = false;
+                if (dbType.equals("mongodb")){
+                    if (mergeStructType.length() > origSchema.size()){
+                        schemaChangeFound = schemaChanged;
+                    }
+                }else {
+                    schemaChangeFound = equalOrCanCast.equals(DataTypeCastUtils.CAN_CAST());
+                }
+                if (schemaChangeFound) {
                     LOG.warn("Schema change found, origin schema = {}, changed schema = {}",
                             origSchema.json(),
                             msgSchema.toJson());
@@ -201,10 +216,7 @@ public List<LakeSoulMultiTableSinkGlobalCommittable> commit(
                                 identity.useCDC,
                                 identity.cdcColumn);
                         if (dbType.equals("mongodb")){
-                            System.out.println(msgSchema.toJson());
-                            System.out.println(origSchema.json());
-                            System.out.println(mergeStructType.json());
-                            dbManager.updateTableSchema(tableInfo.getTableId(), mergeStructType.json());
+                            dbManager.updateTableSchema(tableInfo.getTableId(), ArrowUtils.toArrowSchema(mergeStructType,"UTC").toJson());
                         }else {
                             dbManager.updateTableSchema(tableInfo.getTableId(), msgSchema.toJson());
                         }
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/tool/LakeSoulSinkDatabasesOptions.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/tool/LakeSoulSinkDatabasesOptions.java
index 278e2a6c5..157a42061 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/tool/LakeSoulSinkDatabasesOptions.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/tool/LakeSoulSinkDatabasesOptions.java
@@ -108,4 +108,6 @@ public class LakeSoulSinkDatabasesOptions extends LakeSoulSinkOptions {
             .key("doris.fenodes")
             .stringType()
             .defaultValue("127.0.0.1:8030");
+
+
 }
\ No newline at end of file
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/BinarySourceRecord.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/BinarySourceRecord.java
index b327f6c3c..cb7bba789 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/BinarySourceRecord.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/BinarySourceRecord.java
@@ -13,6 +13,7 @@
 import org.apache.commons.lang3.StringUtils;
 import org.apache.flink.core.fs.Path;
 import org.apache.flink.lakesoul.tool.FlinkUtil;
+import org.apache.flink.table.data.TimestampData;
 
 import java.util.ArrayList;
 import java.util.Collections;
@@ -100,6 +101,8 @@ public static BinarySourceRecord fromMysqlSourceRecord(SourceRecord sourceRecord
             }
             long sortField = (binlogFileIndex << 32) + binlogPosition;
             LakeSoulRowDataWrapper data = convert.toLakeSoulDataType(valueSchema, value, tableId, tsMs, sortField);
+//            TimestampData timestamp = data.getAfter().getTimestamp(1, 18);
+//            System.out.println(timestamp);
             String tablePath;
             if (tableId.schema()==null){
                 tablePath = new Path(new Path(basePath, tableId.catalog()), tableId.table()).toString();
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java
index a8a27015e..666e1aee6 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java
@@ -552,7 +552,7 @@ private void otherTypeWrite(BinaryRowWriter writer, int index,
             case MicroTimestamp.SCHEMA_NAME:
             case NanoTimestamp.SCHEMA_NAME:
             case com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.data.Timestamp.LOGICAL_NAME:
-                writeTimeStamp(writer, index, fieldValue, fieldSchema, serverTimeZone);
+                writeTimeStamp(writer, index, fieldValue, fieldSchema,serverTimeZone);
                 break;
             case Decimal.LOGICAL_NAME:
                 writeDecimal(writer, index, fieldValue, fieldSchema);
@@ -625,7 +625,7 @@ public Object convertToDecimal(Object dbzObj, Schema schema) {
             }
         }
         Map<String, String> paras = schema.parameters();
-        if (paras.get("connect.decimal.precision") == null) {
+        if ( paras==null || paras.get("connect.decimal.precision") == null) {
             return DecimalData.fromBigDecimal(bigDecimal, 38, 30);
         } else {
             return DecimalData.fromBigDecimal(bigDecimal, Integer.parseInt(paras.get("connect.decimal.precision")), Integer.parseInt(paras.get("scale")));
@@ -700,8 +700,14 @@ public Object convertToTimeStamp(Object dbzObj, Schema schema, ZoneId serverTime
                 return TimestampData.fromInstant(zonedDateTime.toInstant());
             }
             return null;
+        } else if (dbzObj instanceof java.util.Date) {
+            java.util.Date date = (java.util.Date)dbzObj;
+            long timestamp = date.toInstant().toEpochMilli();
+            Instant instant = TimestampData.fromEpochMillis( timestamp).toInstant();
+            return TimestampData.fromInstant(instant);
         }
         // fallback to zoned timestamp
+
         LocalDateTime localDateTime =
                 TemporalConversions.toLocalDateTime(dbzObj, ZoneId.of("UTC"));
         return TimestampData.fromLocalDateTime(localDateTime);
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/ParseDocument.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/ParseDocument.java
index c2899a1fc..f619c2357 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/ParseDocument.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/ParseDocument.java
@@ -5,7 +5,9 @@
 package org.apache.flink.lakesoul.types;
 
 import com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.data.*;
+import org.bson.BsonTimestamp;
 import org.bson.Document;
+import org.bson.types.Binary;
 import org.bson.types.Decimal128;
 
 import java.math.BigDecimal;
@@ -72,6 +74,12 @@ private static void fillStructValues(Document bsonDocument, Struct struct) {
             } else if (value instanceof Decimal128) {
                 BigDecimal decimalValue = new BigDecimal(value.toString());
                 struct.put(fieldName, decimalValue);
+            } else if (value instanceof Binary) {
+                Binary binaryData = (Binary) value;
+                struct.put(fieldName,binaryData.getData());
+            } else if (value instanceof BsonTimestamp) {
+                BsonTimestamp bsonTimestamp = (BsonTimestamp) value;
+                struct.put(fieldName,bsonTimestamp.getValue());
             } else {
                 struct.put(fieldName,value);
             }
@@ -95,8 +103,12 @@ private static Schema getSchemaForValue(Object value) {
             return Decimal.schema(decimalValue.scale());
         } else if (value instanceof Byte) {
             return Schema.BYTES_SCHEMA;
+        } else if (value instanceof Binary) {
+            return Schema.BYTES_SCHEMA;
         } else if (value instanceof Date) {
             return Timestamp.SCHEMA;
+        } else if (value instanceof BsonTimestamp) {
+            return Schema.INT64_SCHEMA;
         } else {
             // 处理其他类型，可以根据实际情况添加更多类型
             return Schema.STRING_SCHEMA;
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/StructConvert.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/StructConvert.java
index 1917d8367..649d1db21 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/StructConvert.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/StructConvert.java
@@ -15,43 +15,22 @@
 import java.util.List;
 public class StructConvert {
     public RowData convert(Struct struct) {
-        // 获取Struct的schema和字段个数
         Schema schema = struct.schema();
         int arity = schema.fields().size() + 1;  // +1 for extra event sortField
         List<Field> fieldNames = schema.fields();
-        // 创建BinaryRowData
         BinaryRowData row = new BinaryRowData(arity);
         BinaryRowWriter writer = new BinaryRowWriter(row);
-        // 遍历Struct的字段
         for (int i = 0; i < schema.fields().size(); i++) {
             Field field = fieldNames.get(i);
             String fieldName = field.name();
             Schema fieldSchema = schema.field(fieldName).schema();
-            // 获取Struct字段的值
             Object fieldValue = struct.getWithoutDefault(fieldName);
-            // 将字段值写入BinaryRowData
             writeField(writer, i, fieldSchema, fieldValue);
         }
-        // 关闭BinaryRowWriter
         writer.complete();
         return row;
     }
 
-    public RowData convert1(Struct struct){
-        Schema schema = struct.schema();
-        int arity = schema.fields().size()+1;
-        List<Field> fieldNames = schema.fields();
-        BinaryRowData row = new BinaryRowData(arity);
-        BinaryRowWriter writer = new BinaryRowWriter(row);
-        Field field = fieldNames.get(0);
-        String fieldName = field.name();;
-        Schema fieldSchema = schema.field(fieldName).schema();
-        Object fieldValue = struct.getWithoutDefault(fieldName);
-        RowDataSerializer rowDataSerializer = new RowDataSerializer();
-        writer.writeRow(0,(RowData) fieldValue ,rowDataSerializer);
-        writer.complete();
-        return row;
-    }
     // 递归处理嵌套的Struct类型
     private void writeField(BinaryRowWriter writer, int index, Schema schema, Object fieldValue) {
 
@@ -81,23 +60,17 @@ private void writeField(BinaryRowWriter writer, int index, Schema schema, Object
     }
     // 递归处理嵌套的Struct类型
     private void convertNestedStruct(BinaryRowWriter writer, int index, Struct nestedStruct, Schema nestedSchema) {
-        // 获取嵌套Struct的schema和字段个数
         int nestedArity = nestedSchema.fields().size();
         List<Field> nestedFields = nestedSchema.fields();
-        // 创建嵌套Struct的BinaryRowData
         BinaryRowData nestedRow = new BinaryRowData(nestedArity);
         BinaryRowWriter nestedWriter = new BinaryRowWriter(nestedRow);
-        // 遍历嵌套Struct的字段
         for (int i = 0; i < nestedArity; i++) {
             Field field = nestedFields.get(i);
             String nestedFieldName = field.name();
             Schema nestedFieldType = nestedSchema.field(nestedFieldName).schema();
-            // 获取嵌套Struct字段的值
             Object nestedFieldValue = nestedStruct.getWithoutDefault(nestedFieldName);
-            // 将嵌套Struct字段值写入BinaryRowData
             writeField(nestedWriter, i, nestedFieldType, nestedFieldValue);
         }
-        // 关闭嵌套Struct的BinaryRowWriter
         nestedWriter.complete();
         RowDataSerializer rowDataSerializer = new RowDataSerializer();
         writer.writeRow(index, nestedRow, rowDataSerializer);

From d025d1a77c71a07b21cc4253de5b8be18a87277a Mon Sep 17 00:00:00 2001
From: ChenYunHey <1908166778@qq.com>
Date: Wed, 27 Mar 2024 15:58:07 +0800
Subject: [PATCH 04/13] 
 lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkGlobalCommitter.java

Signed-off-by: ChenYunHey <1908166778@qq.com>
---
 .../lakesoul/sink/committer/LakeSoulSinkGlobalCommitter.java  | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkGlobalCommitter.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkGlobalCommitter.java
index 32cf835c0..0320655de 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkGlobalCommitter.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkGlobalCommitter.java
@@ -180,10 +180,6 @@ public List<LakeSoulMultiTableSinkGlobalCommittable> commit(
                 boolean schemaChanged = (boolean) equalOrCanCastTuple3._2();
                 StructType mergeStructType = equalOrCanCastTuple3._3();
 
-                System.out.println(origSchema.json());
-                System.out.println(msgSchema.toJson());
-                System.out.println(mergeStructType.json());
-                System.out.println(ArrowUtils.toArrowSchema(mergeStructType, "UTC").toJson());
                 boolean schemaChangeFound = false;
                 if (dbType.equals("mongodb")){
                     if (mergeStructType.length() > origSchema.size()){

From 10f63f644e52d74182e668d93e12754f0c6c0197 Mon Sep 17 00:00:00 2001
From: ChenYunHey <1908166778@qq.com>
Date: Wed, 27 Mar 2024 16:24:50 +0800
Subject: [PATCH 05/13] fix some bugs

Signed-off-by: ChenYunHey <1908166778@qq.com>
---
 .../flink/lakesoul/entry/SyncDatabase.java    | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/SyncDatabase.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/SyncDatabase.java
index 38596cd8c..14b67bbaa 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/SyncDatabase.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/SyncDatabase.java
@@ -114,7 +114,6 @@ public static String pgAndMsqlCreateTableSql(String[] stringFieldTypes, String[]
     public static String[] getMysqlFieldsTypes(DataType[] fieldTypes, String[] fieldNames, String pk) {
         String[] stringFieldTypes = new String[fieldTypes.length];
         for (int i = 0; i < fieldTypes.length; i++) {
-            String typeName = fieldTypes[i].getLogicalType().toString();
             if (fieldTypes[i].getLogicalType() instanceof VarCharType) {
                 String mysqlType = "TEXT";
                 if (pk != null) {
@@ -127,9 +126,7 @@ public static String[] getMysqlFieldsTypes(DataType[] fieldTypes, String[] field
                 stringFieldTypes[i] = "FLOAT";
             } else if (fieldTypes[i].getLogicalType() instanceof BinaryType) {
                 stringFieldTypes[i] = "BINARY";
-            } else if (fieldTypes[i].getLogicalType() instanceof LocalZonedTimestampType || fieldTypes[i].getLogicalType() instanceof TimestampType) {
-                stringFieldTypes[i] = "TIMESTAMP";
-            } else if (fieldTypes[i].getLogicalType().toString().equals("TIMESTAMP_LTZ(6)")) {
+            } else if (fieldTypes[i].getLogicalType() instanceof LocalZonedTimestampType | fieldTypes[i].getLogicalType() instanceof TimestampType) {
                 stringFieldTypes[i] = "TIMESTAMP";
             } else if (fieldTypes[i].getLogicalType() instanceof BooleanType) {
                 stringFieldTypes[i] = "BOOLEAN";
@@ -263,21 +260,19 @@ public static void xsyncToMysql(StreamExecutionEnvironment env) throws SQLExcept
         DataType[] fieldDataTypes = schemaResult.getTableSchema().getFieldDataTypes();
         String[] fieldNames = schemaResult.getTableSchema().getFieldNames();
         String tablePk = getTablePk(sourceDatabase, sourceTableName);
-        String[] mysqlFieldTypes = getMysqlFieldsTypes(fieldDataTypes, fieldNames, tablePk);
-        //String[] stringFieldsTypes = getMysqlFieldsTypes(fieldDataTypes, fieldNames, tablePk);
-        String createTableSql = pgAndMsqlCreateTableSql(mysqlFieldTypes, fieldNames, targetTableName, tablePk);
+        String[] stringFieldsTypes = getMysqlFieldsTypes(fieldDataTypes, fieldNames, tablePk);
+        String createTableSql = pgAndMsqlCreateTableSql(stringFieldsTypes, fieldNames, targetTableName, tablePk);
 
         Connection conn = DriverManager.getConnection(jdbcUrl, username, password);
         Statement statement = conn.createStatement();
         // Create the target table in MySQL
         statement.executeUpdate(createTableSql.toString());
         StringBuilder coulmns = new StringBuilder();
-
-        for (int i = 0; i < mysqlFieldTypes.length; i++) {
-            if (mysqlFieldTypes[i].equals("VARBINARY(40)")) {
+        for (int i = 0; i < fieldDataTypes.length; i++) {
+            if (stringFieldsTypes[i].equals("BLOB")) {
                 coulmns.append("`").append(fieldNames[i]).append("` ").append("BYTES");
             } else {
-                coulmns.append("`").append(fieldNames[i]).append("` ").append(mysqlFieldTypes[i]);
+                coulmns.append("`").append(fieldNames[i]).append("` ").append(stringFieldsTypes[i]);
             }
             if (i < fieldDataTypes.length - 1) {
                 coulmns.append(",");
@@ -291,7 +286,6 @@ public static void xsyncToMysql(StreamExecutionEnvironment env) throws SQLExcept
         } else {
             sql = String.format("create table %s(%s) with ('connector' = '%s', 'url' = '%s', 'table-name' = '%s', 'username' = '%s', 'password' = '%s' , 'sink.parallelism' = '%s')",
                     targetTableName, coulmns, "jdbc", jdbcUrl, targetTableName, username, password, sinkParallelism);
-
         }
         tEnvs.executeSql(sql);
         tEnvs.executeSql("insert into " + targetTableName + " select * from lakeSoul.`" + sourceDatabase + "`." + sourceTableName);
@@ -330,7 +324,6 @@ public static void xsyncToDoris(StreamExecutionEnvironment env, String fenodes)
                 targetTableName, coulmns, "doris", jdbcUrl, fenodes, targetDatabase + "." + targetTableName, username, password);
         tEnvs.executeSql(sql);
         tEnvs.executeSql("insert into " + targetTableName + " select * from lakeSoul.`" + sourceDatabase + "`." + sourceTableName);
-
     }
 
     public static void xsyncToMongodb(StreamExecutionEnvironment env,

From 0079162c075e55feba804a8fbfd4768b6913b99d Mon Sep 17 00:00:00 2001
From: ChenYunHey <1908166778@qq.com>
Date: Wed, 27 Mar 2024 18:29:30 +0800
Subject: [PATCH 06/13] add mongodb cdc docs

Signed-off-by: ChenYunHey <1908166778@qq.com>
---
 .../flink/lakesoul/entry/MongoSinkUtils.java  |  1 -
 .../docs/03-Usage Docs/05-flink-cdc-sync.md   | 30 +++++++-
 .../03-Usage Docs/14-export-to-databases.md   | 24 +++++-
 .../03-Usage Docs/05-flink-cdc-sync.md        | 11 ++-
 .../03-Usage Docs/14-export-to-databases.md   | 74 ++++++++++++-------
 5 files changed, 107 insertions(+), 33 deletions(-)

diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/MongoSinkUtils.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/MongoSinkUtils.java
index 7e4b1f06a..ca281ca4f 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/MongoSinkUtils.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/MongoSinkUtils.java
@@ -122,7 +122,6 @@ public static List<String> traverseRow(RowType rowType) {
             return nameList;
         }
 
-
         private static void traverseField(RowType rowType, List<String> nameList) {
             for (int i = 0; i < rowType.getFieldCount(); i++) {
                 String fieldName = rowType.getFieldNames().get(i);
diff --git a/website/docs/03-Usage Docs/05-flink-cdc-sync.md b/website/docs/03-Usage Docs/05-flink-cdc-sync.md
index 8bf30c7b3..f8fccd907 100644
--- a/website/docs/03-Usage Docs/05-flink-cdc-sync.md	
+++ b/website/docs/03-Usage Docs/05-flink-cdc-sync.md	
@@ -63,8 +63,8 @@ Description of required parameters:
 | -c | The task runs the main function entry class                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                | org.apache.flink.lakesoul.entry.MysqlCdc                                                                   |
 | Main package | Task running jar                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           | lakesoul-flink-flink-1.17-VAR::VERSION.jar                                                                             |
 | --source_db.type | source database type                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       | mysql postgres oracle                                                                                      |
-| --source_db.host | The address of the source database                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |                                                                                                            |
-| --source_db.port | source database port                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |                                                                                                            |
+| --source_db.host | The address of the source database,When mongodb enters the lake, you need to bring the port.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |                                                                                                            |
+| --source_db.port | source database port，This parameter is not required when mongodb enters the lake.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |                                                                                                            |
 | --source_db.user | source database username                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |                                                                                                            |
 | --source_db.password | Password for source database                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |                                                                                                            |
 | --source.parallelism | The parallelism of a single table read task affects the data reading speed. The larger the value, the greater the pressure on source database.                                                                                                                                                                                                                                                                                                                                                                                                                                                             | The parallelism can be adjusted according to the write QPS of source database                              |
@@ -89,6 +89,8 @@ For MySQL, the following additional parameters need to be configured
 | --source_db.exclude_tables| optional | A list of data table names that do not need to be synchronized, separated by commas, the default is empty | --source_db.exclude_tables test_1,test_2                                                  |
 | --server_time_zone=Asia/Shanghai | optional | MySQL server time zone, Flink side defaults to "Asia/Shanghai" | Refer to [JDK ZoneID 文档](https://docs.oracle.com/javase/8/docs/api/java/time/ZoneId.html) |
 
+
+
 Synchronous mysql job example
 For Mysql configuration, please refer to https://ververica.github.io/flink-cdc-connectors/release-2.4/content/connectors/mysql-cdc.html
 
@@ -172,6 +174,30 @@ For Postgresql configuration,please refer to  https://ververica.github.io/flink-
     --flink.checkpoint s3://bucket/lakesoul/flink/checkpoints \
     --flink.savepoint s3://bucket/lakesoul/flink/savepoints
 ```
+
+For mongoDB, the following additinal parameters need to be configured
+
+| Parameter   | Required | Meaning Description   | Parameter Filling Format |
+|-------------|----------|-----------------------|----------------------|
+| --batchSize | optional | Get data size for each batch| --batchSize 1024     |
+
+```bash
+./bin/flink run -c org.apache.flink.lakesoul.entry.JdbcCDC \
+    lakesoul-flink-2.4.0-flink-1.17-SNAPSHOT.jar \
+    --source_db.db_name "cdc" \
+    --source_db.user "flinkuser" \
+    --source.parallelism 1 \
+    --mongodb_database cdc \
+    --server_time_zone Asia/Shanghai \
+    --source_db.db_type "mongodb" \
+    --source_db.password "flinkpw" \
+    --source_db.host "172.18.0.2:27017" \
+    --source_db.schema_tables "cdc.bincoll" \
+    --sink.parallelism 1 \
+    --job.checkpoint_interval 1000 \
+    --flink.checkpoint "file:/tmp/data/lakesoul/227/" \
+    --warehouse_path "file:/home/cyh/data/lakesoul/mongo"
+```
 ## LakeSoul Flink CDC Sink job execution process
 
 In the initialization phase after the LakeSoul Flink job starts, it will first read all the tables in the configured MySQL DB (excluding tables that do not need to be synchronized). For each table, first determine whether it exists in LakeSoul. If it does not exist, a LakeSoul table is automatically created, and its schema is consistent with the corresponding table in MySQL.
diff --git a/website/docs/03-Usage Docs/14-export-to-databases.md b/website/docs/03-Usage Docs/14-export-to-databases.md
index 12daea2c3..dcda3d2fb 100644
--- a/website/docs/03-Usage Docs/14-export-to-databases.md	
+++ b/website/docs/03-Usage Docs/14-export-to-databases.md	
@@ -29,6 +29,12 @@ Synchronize table to doris,additional configuration parameters are required
 |-----------------|----------|----------------------------------------------------------------------------------------------------|
 | --doris.fenodes | optional | Doris FE http address, multiple addresses are separated by commas,   <br/>the default is 127.0.0.1:8030 |
 
+Synchronize table to mongodb,additioanl configuration parameters are
+
+| Parameter     | Required | Meaning Description                                         |
+|---------------|----------|-------------------------------------------------------------|
+| --mongodb.uri | require  | mongodb uri sush as :mongodb://user:password@127.0.0.1:2701 |
+
 ## Examples
 Synchronize table to MySQL task
 
@@ -41,7 +47,7 @@ Synchronize table to MySQL task
     --target_db.user root \
     --target_db.password 123456 \
     --target_db.table_name t1 \
-    --source_db.db_name
+    --source_db.db_name \
     --source_db.table_name t1 \
     --sink_parallelism 1 \
     --use_batch true
@@ -78,7 +84,21 @@ Synchronize table to doris task
     --doris.fenodes 127.0.0.1:8030 \
     --use_batch false 
 ```
-
+Synchronize table to mongodb task
+```bash
+./bin/flink run -c org.apache.flink.lakesoul.entry.SyncDatabase \
+    lakesoul-flink-2.4.0-flink-1.17-SNAPSHOT.jar \
+    --mongodb.uri "mongodb://user:password@127.0.0.1:27017" \
+    --source_db.db_name cdc \
+    --target_db.db_name cdc \
+    --target_db.db_type mongodb \
+    --target_db.table_name bincoll \
+    --source_db.table_name bincoll \
+    --sink_parallelism 2 \
+    --use_batch true \
+    --batchSize 1
+```
+For streaming out of the lake, batchsize is set to 1 or 0
 ## Instructions for use
 1. For data exported to both PostgreSQL and MySQL, users have the option to manually create tables according to users' requirements or table will be automatically created by the program. If users have to specific data type, it is recommended to create the tables in target databases in advacne.  
 2. If the exported table is partitioned, users must manually create the target table; otherwise, the synchronized table will lack partition fields.  
diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/05-flink-cdc-sync.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/05-flink-cdc-sync.md
index ca6549799..02c19f0e2 100644
--- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/05-flink-cdc-sync.md	
+++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/05-flink-cdc-sync.md	
@@ -65,8 +65,8 @@ export LAKESOUL_PG_PASSWORD=root
 | -c                   | 任务运行main函数入口类                                                                        | org.apache.flink.lakesoul.entry.JdbcCDC                             |
 | 主程序包                 | 任务运行jar包                                                                             | lakesoul-flink-flink-1.17-VAR::VERSION.jar                                 |
 | --source_db.type     | 源数据库类型                                                                               | mysql postgres oracle                                               |
-| --source_db.host     | 源数据库的地址                                                                              |                                                                     |
-| --source_db.port     | 源数据库的端口                                                                              |                                                                     |
+| --source_db.host     | 源数据库的地址，mongodb入湖则需要带上port                                                           |                                                                     |
+| --source_db.port     | 源数据库的端口，mongodb入湖不需要这个参数                                                                             |                                                                     |
 | --source_db.user     | 源数据库的用户名                                                                             |                                                                     |
 | --source_db.password | 源数据库的密码                                                                              |                                                                     |
 | --source.parallelism | 单表读取任务并行度，影响数据读取速度，值越大对 MySQL 压力越大                                                   | 可以根据 MySQL 的写入 QPS 来调整并行度                                           |
@@ -91,6 +91,13 @@ export LAKESOUL_PG_PASSWORD=root
 |----------------------|------|------------------------------|------------------------------------------|
 | --source_db.exclude_tables| 否    | 不需要同步的数据表名列表，表名之间用逗号分隔，默认为空            | --source_db.exclude_tables test_1,test_2 |
 | --server_time_zone=Asia/Shanghai | 否    | MySQL 服务端时区，Flink 端默认为 "Asia/Shanghai" | 参考 [JDK ZoneID 文档](https://docs.oracle.com/javase/8/docs/api/java/time/ZoneId.html) |
+对于mongodb需要额外配置以下参数
+
+| 参数          | 是否必须 | 含义说明               | 参数填写格式      |
+|-------------|------|--------------------|-------------|
+| --batchSize | 否    | 每批次获取数据的大小,默认值1024 | --batchSize |
+
+
 
 同步mysql作业示例
 对于Mysql数据库配置，可参考https://ververica.github.io/flink-cdc-connectors/release-2.4/content/connectors/mysql-cdc.html
diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/14-export-to-databases.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/14-export-to-databases.md
index bc760a77d..6d0ea8dc2 100644
--- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/14-export-to-databases.md	
+++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/14-export-to-databases.md	
@@ -6,22 +6,22 @@ SPDX-FileCopyrightText: 2023 LakeSoul Contributors
 SPDX-License-Identifier: Apache-2.0
 -->
 ## 支持出湖的目标库
-LakeSoul 至 2.5.0 开始，支持单表数据以批同步出湖，流同步出湖，现支持 LakeSoul 表导出到 MySQL，Apache Doris，PostgreSQL 以及兼容这些数据库协议的数据库。
+LakeSoul 至 2.5.1 开始，支持单表数据以批同步出湖，流同步出湖，现支持 LakeSoul 表导出到 MySQL，Apache Doris，PostgreSQL，MongoDB 以及兼容这些数据库协议的数据库。
 
 ## 参数配置
 
-| 参数名称                   | 是否必须 | 含义                             |
-|------------------------|------|--------------------------------|
-| --target_db.url        | 是    | 目标数据库的url，‘/’结尾                |
-| --target_db.db_type    | 是    | 目标数据库的类型(doris,mysql,postgres) |
-| --target_db.db_name    | 是    | 目标数据库库名字                       |
-| --target_db.user       | 是    | 目标数据库用户名                       |
-| --target_db.password   | 是    | 用户密码                           |
-| --target_db.table_name | 是    | 目标数据库的表名                       |
-| --source_db.db_name    | 是    | lakesoul库名                     |
-| --source_db.table_name | 是    | lakesoul表名                     |
-| --sink_parallelism     | 否    | 同步作业的并行度，默认1                   |
-| --use_batch            | 否    | true表示批同步，false表示流同步，默认采用批同步   |
+| 参数名称                   | 是否必须 | 含义                                     |
+|------------------------|------|----------------------------------------|
+| --target_db.url        | 是    | 目标数据库的url，‘/’结尾，mongodb不需要配此参数         |
+| --target_db.db_type    | 是    | 目标数据库的类型(doris,mysql,postgres,mongodb) |
+| --target_db.db_name    | 是    | 目标数据库库名字                               |
+| --target_db.user       | 是    | 目标数据库用户名                               |
+| --target_db.password   | 是    | 用户密码                                   |
+| --target_db.table_name | 是    | 目标数据库的表名                               |
+| --source_db.db_name    | 是    | lakesoul库名                             |
+| --source_db.table_name | 是    | lakesoul表名                             |
+| --sink_parallelism     | 否    | 同步作业的并行度，默认1                           |
+| --use_batch            | 否    | true表示批同步，false表示流同步，默认采用批同步           |
 
 对于到doris的出湖，需要额外配置：
 
@@ -29,6 +29,14 @@ LakeSoul 至 2.5.0 开始，支持单表数据以批同步出湖，流同步出
 |-----------------------|------|---------------------------------------------------|
 | --doris.fenodes       | 否    | Doris FE http 地址，多个地址之间使用逗号隔开，默认为  <br/>127.0.0.1:8030 |
 
+对于到mongodb的出湖，需要额外配置
+
+| 参数名称          | 是否必须 | 含义                                                  |
+|---------------|------|-----------------------------------------------------|
+| --mongodb.uri | 是    | mongodb uri：mongodb://user:password@127.0.0.1:27017 |
+
+
+
 ## 启动示例
 出湖mysql任务启动
 
@@ -64,20 +72,34 @@ LakeSoul 至 2.5.0 开始，支持单表数据以批同步出湖，流同步出
 出湖到doris任务启动
 ```bash
 ./bin/flink run -c org.apache.flink.lakesoul.entry.SyncDatabase \
-lakesoul-flink-flink-1.17-VAR::VERSION.jar \
---target_db.url "jdbc:mysql://172.17.0.2:9030/" \
---source_db.db_name test \
---target_db.db_name test \
---target_db.user root \
---target_db.password 123456 \
---target_db.db_type doris \
---target_db.table_name tb \
---source_db.table_name tb \
---sink_parallelism 1 \
---doris.fenodes 127.0.0.1:8030 \
---use_batch false 
+    lakesoul-flink-flink-1.17-VAR::VERSION.jar \
+    --target_db.url "jdbc:mysql://172.17.0.2:9030/" \
+    --source_db.db_name test \
+    --target_db.db_name test \
+    --target_db.user root \
+    --target_db.password 123456 \
+    --target_db.db_type doris \
+    --target_db.table_name tb \
+    --source_db.table_name tb \
+    --sink_parallelism 1 \
+    --doris.fenodes 127.0.0.1:8030 \
+    --use_batch false 
 ```
-
+出湖到mongodb任务启动
+```bash
+./bin/flink run -c org.apache.flink.lakesoul.entry.SyncDatabase \
+    lakesoul-flink-2.4.0-flink-1.17-SNAPSHOT.jar \
+    --mongodb.uri "mongodb://flinkuser:flinkpw@127.0.0.1:27017" \
+    --source_db.db_name cdc \
+    --target_db.db_name cdc \
+    --target_db.db_type mongodb \
+    --target_db.table_name bincoll \
+    --source_db.table_name bincoll \
+    --sink_parallelism 2 \
+    --use_batch true \
+    --batchSize 1 
+```
+当流式出湖时，batchSize的值请设置为1或者0
 ## 使用事项
 1. 出湖到 PostgreSQL 和 MySql，可以支持用户根据需求手动建表，也支持程序自动建表，如果用户对数据的类型要求自定义控制(例如 varchar)，那么建议用户提前在目标库建表  
 2. 如果出湖的 LakeSoul 表是分区表，那么需要用户手动在目标库建表，否则同步后的表无分区字段  

From 4a323f590f64b16c64ae019b4a1dbf7359fada99 Mon Sep 17 00:00:00 2001
From: ChenYunHey <1908166778@qq.com>
Date: Thu, 28 Mar 2024 11:01:29 +0800
Subject: [PATCH 07/13] Add some documentation

Signed-off-by: ChenYunHey <1908166778@qq.com>
---
 .../org/apache/flink/lakesoul/entry/JdbcCDC.java   |  4 ++--
 .../flink/lakesoul/entry/MongoSinkUtils.java       |  8 ++++----
 .../apache/flink/lakesoul/types/ParseDocument.java | 10 ----------
 website/docs/03-Usage Docs/05-flink-cdc-sync.md    | 13 +++++++++++++
 .../current/03-Usage Docs/05-flink-cdc-sync.md     | 14 ++++++++++++++
 5 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java
index fd365ece6..475889679 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java
@@ -295,8 +295,8 @@ private static void mongoCdc(LakeSoulRecordConvert lakeSoulRecordConvert, Config
         MongoDBSource<BinarySourceRecord> mongoSource =
                 MongoDBSource.<BinarySourceRecord>builder()
                         .hosts(host)
-                        .databaseList(mongoDatabase) // 设置捕获的数据库，支持正则表达式
-                        .collectionList(tableList) //设置捕获的集合，支持正则表达式
+                        .databaseList(mongoDatabase)
+                        .collectionList(tableList)
                         .startupOptions(StartupOptions.initial())
                         .scanFullChangelog(true)
                         .batchSize(batchSize)
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/MongoSinkUtils.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/MongoSinkUtils.java
index ca281ca4f..b37392516 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/MongoSinkUtils.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/MongoSinkUtils.java
@@ -58,7 +58,7 @@ public WriteModel<BsonDocument> serialize(Tuple2<Boolean, Row> record, MongoSink
                 }
                 if (fieldValue != null) {
                     try {
-                        document.append(fieldName, convertTonBsonValue(fieldValue, fieldName));
+                        document.append(fieldName, convertTonBsonValue(fieldValue));
                     } catch (ParseException e) {
                         throw new RuntimeException(e);
                     }
@@ -68,7 +68,7 @@ public WriteModel<BsonDocument> serialize(Tuple2<Boolean, Row> record, MongoSink
         }
 
 
-        public static BsonValue convertTonBsonValue(Object value, String fieldName) throws ParseException {
+        public static BsonValue convertTonBsonValue(Object value) throws ParseException {
             if (value == null) {
                 return new BsonNull();
             } else if (value instanceof Integer) {
@@ -93,7 +93,7 @@ public static BsonValue convertTonBsonValue(Object value, String fieldName) thro
                 Object[] array = (Object[]) value;
                 BsonArray bsonArray = new BsonArray();
                 for (Object element : array) {
-                    bsonArray.add(convertTonBsonValue(element, fieldName));
+                    bsonArray.add(convertTonBsonValue(element));
                 }
                 return bsonArray;
             } else if (isDateTimeString(value)) {
@@ -108,7 +108,7 @@ public static BsonValue convertTonBsonValue(Object value, String fieldName) thro
                     String name = structNameFiledList.get(0);
                     stringList.remove(0);
                     structNameFiledList = stringList;
-                    bsonDocument.append(name, convertTonBsonValue(fieldValue, name));
+                    bsonDocument.append(name, convertTonBsonValue(fieldValue));
                 }
                 return bsonDocument;
             } else {
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/ParseDocument.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/ParseDocument.java
index f619c2357..4704ecf04 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/ParseDocument.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/ParseDocument.java
@@ -18,10 +18,8 @@
 public class ParseDocument {
     public static Struct convertBSONToStruct(String value) {
         Document bsonDocument = Document.parse(value);
-        // 创建根 Struct 的 Schema
         SchemaBuilder structSchemaBuilder = SchemaBuilder.struct();
         Struct struct = new Struct(buildSchema(bsonDocument, structSchemaBuilder));
-        // 填充 Struct 的值
         fillStructValues(bsonDocument, struct);
 
         return struct;
@@ -32,16 +30,13 @@ private static Schema buildSchema(Document bsonDocument, SchemaBuilder structSch
             String fieldName = entry.getKey();
             Object value = entry.getValue();
             if (value instanceof Document) {
-                // 处理嵌套的 Document
                 SchemaBuilder nestedStructSchemaBuilder = SchemaBuilder.struct();
                 structSchemaBuilder.field(fieldName, buildSchema((Document) value, nestedStructSchemaBuilder));
             }  else if (value instanceof List) {
-                // 处理ArrayList类型
                 List<?> arrayList = (List<?>) value;
                 Schema arraySchema = getSchemaForArrayList(arrayList);
                 structSchemaBuilder.field(fieldName, arraySchema);
             } else {
-                // 处理普通字段
                 structSchemaBuilder.field(fieldName, getSchemaForValue(value));
             }
         }
@@ -49,7 +44,6 @@ private static Schema buildSchema(Document bsonDocument, SchemaBuilder structSch
     }
 
     private static Schema getSchemaForArrayList(List<?> arrayList) {
-        // 假设 ArrayList 中的元素都是整数，您可以根据实际情况调整
         Schema elementSchema = null;
         if (!arrayList.isEmpty()) {
             Object firstElement = arrayList.get(0);
@@ -63,12 +57,10 @@ private static void fillStructValues(Document bsonDocument, Struct struct) {
             String fieldName = entry.getKey();
             Object value = entry.getValue();
             if (value instanceof Document) {
-                // 处理嵌套的 Document
                 Struct nestedStruct = new Struct(struct.schema().field(fieldName).schema());
                 fillStructValues((Document) value, nestedStruct);
                 struct.put(fieldName, nestedStruct);
             } else if (value instanceof List) {
-                // 处理ArrayList类型
                 List<?> arrayList = (List<?>) value;
                 struct.put(fieldName, arrayList);
             } else if (value instanceof Decimal128) {
@@ -87,7 +79,6 @@ private static void fillStructValues(Document bsonDocument, Struct struct) {
     }
 
     private static Schema getSchemaForValue(Object value) {
-        // 根据值的类型返回对应的 Schema
         if (value instanceof String) {
             return Schema.STRING_SCHEMA;
         } else if (value instanceof Integer) {
@@ -110,7 +101,6 @@ private static Schema getSchemaForValue(Object value) {
         } else if (value instanceof BsonTimestamp) {
             return Schema.INT64_SCHEMA;
         } else {
-            // 处理其他类型，可以根据实际情况添加更多类型
             return Schema.STRING_SCHEMA;
         }
     }
diff --git a/website/docs/03-Usage Docs/05-flink-cdc-sync.md b/website/docs/03-Usage Docs/05-flink-cdc-sync.md
index f8fccd907..b3eec2e24 100644
--- a/website/docs/03-Usage Docs/05-flink-cdc-sync.md	
+++ b/website/docs/03-Usage Docs/05-flink-cdc-sync.md	
@@ -286,6 +286,19 @@ Type mapping relationship between Oracle and LakeSoul
 | CHAR(n)  <br/>NCHAR(n)  <br/>NVARCHAR2(n)  <br/>VARCHAR(n)  <br/>VARCHAR2(n)  <br/>CLOB  <br/>NCLOB  <br/>XMLType  <br/>SYS.XMLTYPE | org.apache.spark.sql.types.DataTypes.StringType                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | BLOB                                                                                                                                |  org.apache.spark.sql.types.DataTypes.BinaryType                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
 
+Type mapping relationship between Oracle and Lakesoul
+
+| STRING      | org.apache.spark.sql.types.DataTypes.StringTypes | 
+|-------------|----------------------------------------|
+| DOUBLE      | org.apache.spark.sql.types.DataTypes.DoubleType |
+| INTEGER     | org.apache.spark.sql.types.DataTypes.Integer |
+| BOOLEAN     | org.apache.spark.sql.types.DataTypes.DoubleType |
+| DATE        | org.apache.spark.sql.types.DataTypes.DateType |
+| ARRAYS      | org.apache.spark.sql.types.ArrayType   |
+| BINARY DATA | org.apache.spark.sql.types.DataTypes.BinaryTYpe |
+| LONG        | org.apache.spark.sql.types.DataTypes.LongType |
+| STRUCT      | org.apache.spark.sql.types.StructField |
+| DECIMAL     |  org.apache.spark.sql.types.DecimalType(M,D)                                      |
 
 
 ## Precautions
diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/05-flink-cdc-sync.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/05-flink-cdc-sync.md
index 02c19f0e2..bef0b7c9c 100644
--- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/05-flink-cdc-sync.md	
+++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/03-Usage Docs/05-flink-cdc-sync.md	
@@ -272,6 +272,20 @@ Oracle与LakeSoul的映射关系
 | CHAR(n)  <br/>NCHAR(n)  <br/>NVARCHAR2(n)  <br/>VARCHAR(n)  <br/>VARCHAR2(n)  <br/>CLOB  <br/>NCLOB  <br/>XMLType  <br/>SYS.XMLTYPE | org.apache.spark.sql.types.DataTypes.StringType                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | BLOB                                                                                                                                |  org.apache.spark.sql.types.DataTypes.BinaryType                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
 
+MongoDB与LakeSoul的类型映射关系
+
+| STRING      | org.apache.spark.sql.types.DataTypes.StringTypes | 
+|-------------|----------------------------------------|
+| DOUBLE      | org.apache.spark.sql.types.DataTypes.DoubleType |
+| INTEGER     | org.apache.spark.sql.types.DataTypes.Integer |
+| BOOLEAN     | org.apache.spark.sql.types.DataTypes.DoubleType |
+| DATE        | org.apache.spark.sql.types.DataTypes.DateType |
+| ARRAYS      | org.apache.spark.sql.types.ArrayType   |
+| BINARY DATA | org.apache.spark.sql.types.DataTypes.BinaryTYpe |
+| LONG        | org.apache.spark.sql.types.DataTypes.LongType |
+| STRUCT      | org.apache.spark.sql.types.StructField |
+| DECIMAL     |  org.apache.spark.sql.types.DecimalType(M,D)                                      |
+
 
 ## 注意事项
 1. 源数据库中的表必须存在主键，无主键表目前暂不支持；

From d4d3e5729dd78bbac3102d50c2f4cf6367cd454d Mon Sep 17 00:00:00 2001
From: ChenYunHey <1908166778@qq.com>
Date: Thu, 28 Mar 2024 11:32:18 +0800
Subject: [PATCH 08/13] format the code

Signed-off-by: ChenYunHey <1908166778@qq.com>
---
 .../java/org/apache/flink/lakesoul/entry/JdbcCDC.java     | 8 +++-----
 .../sink/committer/LakeSoulSinkGlobalCommitter.java       | 2 --
 .../org/apache/flink/lakesoul/types/StructConvert.java    | 7 +------
 3 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java
index 475889679..f246acdf0 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java
@@ -79,7 +79,7 @@ public static void main(String[] args) throws Exception {
             tableList = parameter.get(SOURCE_DB_SCHEMA_TABLES.key()).split(",");
         }
         if ( dbType.equalsIgnoreCase("mongodb")){
-            mongoDatabase = parameter.get(MONGO_DB_DATABASE.key());
+            //mongoDatabase = parameter.get(MONGO_DB_DATABASE.key());
             batchSize = parameter.getInt(BATCH_SIZE.key(), BATCH_SIZE.defaultValue());
             tableList = parameter.get(SOURCE_DB_SCHEMA_TABLES.key()).split(",");
         }
@@ -218,7 +218,6 @@ private static void postgresCdc(LakeSoulRecordConvert lakeSoulRecordConvert, Con
         DataStreamSource<BinarySourceRecord> source = builder.buildMultiTableSource("Postgres Source");
 
         DataStream<BinarySourceRecord> stream = builder.buildHashPartitionedCDCStream(source);
-        stream.print();
         DataStreamSink<BinarySourceRecord> dmlSink = builder.buildLakeSoulDMLSink(stream);
         env.execute("LakeSoul CDC Sink From Postgres Database " + dbName);
     }
@@ -283,7 +282,6 @@ public static void sqlserverCdc(LakeSoulRecordConvert lakeSoulRecordConvert, Con
         LakeSoulMultiTableSinkStreamBuilder
                 builder =
                 new LakeSoulMultiTableSinkStreamBuilder(sqlServerSource, context, lakeSoulRecordConvert);
-
         DataStreamSource<BinarySourceRecord> source = builder.buildMultiTableSource("Sqlserver Source");
 
         DataStream<BinarySourceRecord> stream = builder.buildHashPartitionedCDCStream(source);
@@ -295,7 +293,7 @@ private static void mongoCdc(LakeSoulRecordConvert lakeSoulRecordConvert, Config
         MongoDBSource<BinarySourceRecord> mongoSource =
                 MongoDBSource.<BinarySourceRecord>builder()
                         .hosts(host)
-                        .databaseList(mongoDatabase)
+                        .databaseList(dbName)
                         .collectionList(tableList)
                         .startupOptions(StartupOptions.initial())
                         .scanFullChangelog(true)
@@ -313,7 +311,7 @@ private static void mongoCdc(LakeSoulRecordConvert lakeSoulRecordConvert, Config
                 builder =
                 new LakeSoulMultiTableSinkStreamBuilder(mongoSource, context, lakeSoulRecordConvert);
         DataStreamSource<BinarySourceRecord> source = builder.buildMultiTableSource("mongodb Source");
-        source.print();
+
         DataStream<BinarySourceRecord> stream = builder.buildHashPartitionedCDCStream(source);
         DataStreamSink<BinarySourceRecord> dmlSink = builder.buildLakeSoulDMLSink(stream);
         env.execute("LakeSoul CDC Sink From mongo Database " + dbName);
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkGlobalCommitter.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkGlobalCommitter.java
index 0320655de..52b6f1e95 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkGlobalCommitter.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/sink/committer/LakeSoulSinkGlobalCommitter.java
@@ -164,9 +164,7 @@ public List<LakeSoulMultiTableSinkGlobalCommittable> commit(
                 StructType origSchema ;
                 if (TableInfoDao.isArrowKindSchema(tableInfo.getTableSchema())) {
                     Schema arrowSchema = Schema.fromJSON(tableInfo.getTableSchema());
-                    System.out.println(arrowSchema.toJson());
                     origSchema = ArrowUtils.fromArrowSchema(arrowSchema);
-                    System.out.println(origSchema.json());
                 } else {
                     origSchema = (StructType) StructType.fromJson(tableInfo.getTableSchema());
                 }
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/StructConvert.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/StructConvert.java
index 649d1db21..e9577b64f 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/StructConvert.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/StructConvert.java
@@ -31,18 +31,13 @@ public RowData convert(Struct struct) {
         return row;
     }
 
-    // 递归处理嵌套的Struct类型
     private void writeField(BinaryRowWriter writer, int index, Schema schema, Object fieldValue) {
 
         if (fieldValue == null) {
-            // 如果字段值为null，则写入null
             writer.setNullAt(index);
         } else if (schema.type().getName().equals("struct")) {
-            // 如果字段类型是StructType，递归处理
             convertNestedStruct(writer, index, (Struct) fieldValue, schema);
         } else {
-            // 根据字段类型写入值
-            // 这里根据实际情况，可能需要根据字段类型进行不同的处理
             switch (schema.type()) {
                 case INT8:
                 case INT16:
@@ -58,7 +53,7 @@ private void writeField(BinaryRowWriter writer, int index, Schema schema, Object
 
         }
     }
-    // 递归处理嵌套的Struct类型
+
     private void convertNestedStruct(BinaryRowWriter writer, int index, Struct nestedStruct, Schema nestedSchema) {
         int nestedArity = nestedSchema.fields().size();
         List<Field> nestedFields = nestedSchema.fields();

From 6498ff878103755680122c5e88bcbc6f40d3f8f3 Mon Sep 17 00:00:00 2001
From: ChenYunHey <1908166778@qq.com>
Date: Thu, 28 Mar 2024 14:48:04 +0800
Subject: [PATCH 09/13] Fix field attribute issues

Signed-off-by: ChenYunHey <1908166778@qq.com>
---
 .../apache/flink/lakesoul/entry/JdbcCDC.java  |  2 --
 .../lakesoul/types/LakeSoulRecordConvert.java | 26 ++++++++++---------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java
index f246acdf0..4ded3508a 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/entry/JdbcCDC.java
@@ -55,7 +55,6 @@ public class JdbcCDC {
     private static String serverTimezone;
     private static String pluginName;
     private static int batchSize;
-    private static String mongoDatabase;
 
     public static void main(String[] args) throws Exception {
         ParameterTool parameter = ParameterTool.fromArgs(args);
@@ -79,7 +78,6 @@ public static void main(String[] args) throws Exception {
             tableList = parameter.get(SOURCE_DB_SCHEMA_TABLES.key()).split(",");
         }
         if ( dbType.equalsIgnoreCase("mongodb")){
-            //mongoDatabase = parameter.get(MONGO_DB_DATABASE.key());
             batchSize = parameter.getInt(BATCH_SIZE.key(), BATCH_SIZE.defaultValue());
             tableList = parameter.get(SOURCE_DB_SCHEMA_TABLES.key()).split(",");
         }
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java
index 666e1aee6..fbd120059 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java
@@ -175,30 +175,30 @@ public LakeSoulRowDataWrapper toLakeSoulDataType(Schema sch, Struct value, Table
                 Schema afterSchema = valueSchema.field(Envelope.FieldName.AFTER).schema();
                 Struct after = value.getStruct(Envelope.FieldName.AFTER);
                 RowData insert = convert(after, afterSchema, RowKind.INSERT, sortField);
-                boolean afterNullable = afterSchema.isOptional();
-                RowType rt = toFlinkRowType(afterSchema,afterNullable);
+                //boolean afterNullable = afterSchema.isOptional();
+                RowType rt = toFlinkRowType(afterSchema,false);
                 insert.setRowKind(RowKind.INSERT);
                 builder.setOperation("insert").setAfterRowData(insert).setAfterType(rt);
             } else if (op == Envelope.Operation.DELETE) {
                 Schema beforeSchema = valueSchema.field(Envelope.FieldName.BEFORE).schema();
                 Struct before = value.getStruct(Envelope.FieldName.BEFORE);
                 RowData delete = convert(before, beforeSchema, RowKind.DELETE, sortField);
-                boolean nullable = beforeSchema.isOptional();
-                RowType rt = toFlinkRowType(beforeSchema,nullable);
+//                boolean nullable = beforeSchema.isOptional();
+                RowType rt = toFlinkRowType(beforeSchema,false);
                 builder.setOperation("delete").setBeforeRowData(null).setBeforeRowType(rt);
                 delete.setRowKind(RowKind.DELETE);
             } else {
                 Schema beforeSchema = valueSchema.field(Envelope.FieldName.BEFORE).schema();
                 Struct before = value.getStruct(Envelope.FieldName.BEFORE);
                 RowData beforeData = convert(before, beforeSchema, RowKind.UPDATE_BEFORE, sortField);
-                boolean beforNullable = beforeSchema.isOptional();
-                RowType beforeRT = toFlinkRowType(beforeSchema,beforNullable);
+                //boolean beforNullable = beforeSchema.isOptional();
+                RowType beforeRT = toFlinkRowType(beforeSchema,false);
                 beforeData.setRowKind(RowKind.UPDATE_BEFORE);
                 Schema afterSchema = valueSchema.field(Envelope.FieldName.AFTER).schema();
                 Struct after = value.getStruct(Envelope.FieldName.AFTER);
                 RowData afterData = convert(after, afterSchema, RowKind.UPDATE_AFTER, sortField);
-                boolean afterNullable = afterSchema.isOptional();
-                RowType afterRT = toFlinkRowType(afterSchema,afterNullable);
+                //boolean afterNullable = afterSchema.isOptional();
+                RowType afterRT = toFlinkRowType(afterSchema,false);
                 afterData.setRowKind(RowKind.UPDATE_AFTER);
                 if (partitionFieldsChanged(beforeRT, beforeData, afterRT, afterData)) {
                     // partition fields changed. we need to emit both before and after RowData
@@ -229,7 +229,7 @@ public RowType toFlinkRowTypeCDC(RowType rowType) {
         return RowType.of(colTypes, colNames);
     }
 
-    public RowType toFlinkRowType(Schema schema, boolean nullable) {
+    public RowType toFlinkRowType(Schema schema, boolean isMongoDDL) {
         int arity = schema.fields().size() + 1;
         if (useCDC) ++arity;
         String[] colNames = new String[arity];
@@ -238,7 +238,11 @@ public RowType toFlinkRowType(Schema schema, boolean nullable) {
         for (int i = 0; i < (useCDC ? arity - 2 : arity - 1); i++) {
             Field item = fieldNames.get(i);
             colNames[i] = item.name();
-            colTypes[i] = convertToLogical(item.schema(), !item.name().equals("_id") && nullable);
+            if (isMongoDDL){
+                colTypes[i] = convertToLogical(item.schema(), !item.name().equals("_id"));
+            }else {
+                colTypes[i] = convertToLogical(item.schema(), item.schema().isOptional());
+            }
         }
 //        colNames[useCDC ? arity - 3 : arity - 2] = BINLOG_FILE_INDEX;
 //        colTypes[useCDC ? arity - 3 : arity - 2] = new BigIntType();
@@ -273,8 +277,6 @@ public List<RowType.RowField> getRowFields(Schema schema) {
     }
 
     private LogicalType primitiveLogicalType(Schema fieldSchema,boolean nullable) {
-//        boolean nullable = fieldSchema.isOptional();
-//        if (isMongoDDl) nullable = true;
         switch (fieldSchema.type()) {
             case BOOLEAN:
                 return new BooleanType(nullable);

From 69ab76bdd1c6b5e3b2e0f02123e3936e31e09735 Mon Sep 17 00:00:00 2001
From: ChenYunHey <1908166778@qq.com>
Date: Thu, 28 Mar 2024 18:00:44 +0800
Subject: [PATCH 10/13] test

Signed-off-by: ChenYunHey <1908166778@qq.com>
---
 .../apache/spark/sql/lakesoul/benchmark/Benchmark.scala   | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/lakesoul-spark/src/test/scala/org/apache/spark/sql/lakesoul/benchmark/Benchmark.scala b/lakesoul-spark/src/test/scala/org/apache/spark/sql/lakesoul/benchmark/Benchmark.scala
index 4d681c0f1..2e853b5d2 100644
--- a/lakesoul-spark/src/test/scala/org/apache/spark/sql/lakesoul/benchmark/Benchmark.scala
+++ b/lakesoul-spark/src/test/scala/org/apache/spark/sql/lakesoul/benchmark/Benchmark.scala
@@ -155,6 +155,12 @@ object Benchmark {
 
     val diff1 = jdbcDF.rdd.subtract(lakesoulDF.rdd)
     val diff2 = lakesoulDF.rdd.subtract(jdbcDF.rdd)
+    println("*******************jdbcDF count"+jdbcDF.count()+"***************")
+    println("*******************jdbcDF count"+lakesoulDF.count()+"***************")
+    println("*********diff1.count:"+diff1.count()+"*******")
+    println("*********diff1.count:"+diff2.count()+"*******")
+    println("jdbcDF schema"+jdbcDF.printSchema())
+    println("lakesoulDF schema"+lakesoulDF.printSchema())
 
     val result = diff1.count() == 0 && diff2.count() == 0
     if (!result) {
@@ -177,4 +183,4 @@ object Benchmark {
       .withColumn("col_20", col("col_20").cast("string"))
       .withColumn("col_23", col("col_23").cast("string"))
   }
-}
+}
\ No newline at end of file

From f0c51a96825359a6655e9f248fabad6a2ebcb0a1 Mon Sep 17 00:00:00 2001
From: ChenYunHey <1908166778@qq.com>
Date: Thu, 28 Mar 2024 18:43:43 +0800
Subject: [PATCH 11/13] test

Signed-off-by: ChenYunHey <1908166778@qq.com>
---
 .../spark/sql/lakesoul/benchmark/Benchmark.scala       | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/lakesoul-spark/src/test/scala/org/apache/spark/sql/lakesoul/benchmark/Benchmark.scala b/lakesoul-spark/src/test/scala/org/apache/spark/sql/lakesoul/benchmark/Benchmark.scala
index 2e853b5d2..cdac08413 100644
--- a/lakesoul-spark/src/test/scala/org/apache/spark/sql/lakesoul/benchmark/Benchmark.scala
+++ b/lakesoul-spark/src/test/scala/org/apache/spark/sql/lakesoul/benchmark/Benchmark.scala
@@ -155,19 +155,13 @@ object Benchmark {
 
     val diff1 = jdbcDF.rdd.subtract(lakesoulDF.rdd)
     val diff2 = lakesoulDF.rdd.subtract(jdbcDF.rdd)
-    println("*******************jdbcDF count"+jdbcDF.count()+"***************")
-    println("*******************jdbcDF count"+lakesoulDF.count()+"***************")
-    println("*********diff1.count:"+diff1.count()+"*******")
-    println("*********diff1.count:"+diff2.count()+"*******")
-    println("jdbcDF schema"+jdbcDF.printSchema())
-    println("lakesoulDF schema"+lakesoulDF.printSchema())
 
     val result = diff1.count() == 0 && diff2.count() == 0
     if (!result) {
       println(printLine + table + " result: " + result + printLine)
-      println("*************diff1**************")
+      println("*************diff1**************"+"dfff1.count:"+diff1.count()+"schema"+jdbcDF.printSchema()+"jdbcDF count:"+jdbcDF.count())
       spark.createDataFrame(diff1, lakesoulDF.schema).show()
-      println("*************diff2**************")
+      println("*************diff2**************"+"diff2.count:"+diff2.count()+"schema"+lakesoulDF.printSchema()+"lakesoulDF count:"+lakesoulDF.count())
       spark.createDataFrame(diff2, lakesoulDF.schema).show()
       println(table + " data verification ERROR!!!")
       System.exit(1)

From 6254383130da0488232b9e2cd97a47824f42e9fb Mon Sep 17 00:00:00 2001
From: ChenYunHey <1908166778@qq.com>
Date: Fri, 29 Mar 2024 18:17:36 +0800
Subject: [PATCH 12/13] fix data delete event

Signed-off-by: ChenYunHey <1908166778@qq.com>
---
 .../flink/lakesoul/types/LakeSoulRecordConvert.java   | 11 ++++++++++-
 .../spark/sql/lakesoul/benchmark/Benchmark.scala      |  4 ++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java
index fbd120059..b58f548b6 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java
@@ -185,8 +185,9 @@ public LakeSoulRowDataWrapper toLakeSoulDataType(Schema sch, Struct value, Table
                 RowData delete = convert(before, beforeSchema, RowKind.DELETE, sortField);
 //                boolean nullable = beforeSchema.isOptional();
                 RowType rt = toFlinkRowType(beforeSchema,false);
-                builder.setOperation("delete").setBeforeRowData(null).setBeforeRowType(rt);
                 delete.setRowKind(RowKind.DELETE);
+                builder.setOperation("delete").setBeforeRowData(delete).setBeforeRowType(rt);
+
             } else {
                 Schema beforeSchema = valueSchema.field(Envelope.FieldName.BEFORE).schema();
                 Struct before = value.getStruct(Envelope.FieldName.BEFORE);
@@ -370,6 +371,14 @@ public long computeBinarySourceRecordPrimaryKeyHash(BinarySourceRecord sourceRec
         RowData rowData = Objects.equals(data.getOp(), "delete") ? data.getBefore() : data.getAfter();
         List<String> pks = sourceRecord.getPrimaryKeys();
         long hash = 42;
+        if (Objects.equals(data.getOp(),"delete")){
+            for (String pk : pks) {
+                int typeIndex = rowType.getFieldIndex(pk);
+                LogicalType type = rowType.getTypeAt(typeIndex);
+                Object fieldOrNull = RowData.createFieldGetter(type, typeIndex).getFieldOrNull(rowData);
+                //hash = LakeSoulKeyGen.getHash(type, fieldOrNull, hash);
+            }
+        }
         for (String pk : pks) {
             int typeIndex = rowType.getFieldIndex(pk);
             LogicalType type = rowType.getTypeAt(typeIndex);
diff --git a/lakesoul-spark/src/test/scala/org/apache/spark/sql/lakesoul/benchmark/Benchmark.scala b/lakesoul-spark/src/test/scala/org/apache/spark/sql/lakesoul/benchmark/Benchmark.scala
index cdac08413..28512ade0 100644
--- a/lakesoul-spark/src/test/scala/org/apache/spark/sql/lakesoul/benchmark/Benchmark.scala
+++ b/lakesoul-spark/src/test/scala/org/apache/spark/sql/lakesoul/benchmark/Benchmark.scala
@@ -159,9 +159,9 @@ object Benchmark {
     val result = diff1.count() == 0 && diff2.count() == 0
     if (!result) {
       println(printLine + table + " result: " + result + printLine)
-      println("*************diff1**************"+"dfff1.count:"+diff1.count()+"schema"+jdbcDF.printSchema()+"jdbcDF count:"+jdbcDF.count())
+      println("*************diff1**************")
       spark.createDataFrame(diff1, lakesoulDF.schema).show()
-      println("*************diff2**************"+"diff2.count:"+diff2.count()+"schema"+lakesoulDF.printSchema()+"lakesoulDF count:"+lakesoulDF.count())
+      println("*************diff2**************")
       spark.createDataFrame(diff2, lakesoulDF.schema).show()
       println(table + " data verification ERROR!!!")
       System.exit(1)

From 06b100d7747ee910d1d76dad61c7f1155e45494c Mon Sep 17 00:00:00 2001
From: ChenYunHey <1908166778@qq.com>
Date: Fri, 29 Mar 2024 21:14:40 +0800
Subject: [PATCH 13/13] Remove irrelevant code

Signed-off-by: ChenYunHey <1908166778@qq.com>
---
 .../apache/flink/lakesoul/types/BinarySourceRecord.java   | 2 --
 .../flink/lakesoul/types/LakeSoulRecordConvert.java       | 8 --------
 2 files changed, 10 deletions(-)

diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/BinarySourceRecord.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/BinarySourceRecord.java
index cb7bba789..0db4354d5 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/BinarySourceRecord.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/BinarySourceRecord.java
@@ -101,8 +101,6 @@ public static BinarySourceRecord fromMysqlSourceRecord(SourceRecord sourceRecord
             }
             long sortField = (binlogFileIndex << 32) + binlogPosition;
             LakeSoulRowDataWrapper data = convert.toLakeSoulDataType(valueSchema, value, tableId, tsMs, sortField);
-//            TimestampData timestamp = data.getAfter().getTimestamp(1, 18);
-//            System.out.println(timestamp);
             String tablePath;
             if (tableId.schema()==null){
                 tablePath = new Path(new Path(basePath, tableId.catalog()), tableId.table()).toString();
diff --git a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java
index b58f548b6..66d961a46 100644
--- a/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java
+++ b/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/LakeSoulRecordConvert.java
@@ -371,14 +371,6 @@ public long computeBinarySourceRecordPrimaryKeyHash(BinarySourceRecord sourceRec
         RowData rowData = Objects.equals(data.getOp(), "delete") ? data.getBefore() : data.getAfter();
         List<String> pks = sourceRecord.getPrimaryKeys();
         long hash = 42;
-        if (Objects.equals(data.getOp(),"delete")){
-            for (String pk : pks) {
-                int typeIndex = rowType.getFieldIndex(pk);
-                LogicalType type = rowType.getTypeAt(typeIndex);
-                Object fieldOrNull = RowData.createFieldGetter(type, typeIndex).getFieldOrNull(rowData);
-                //hash = LakeSoulKeyGen.getHash(type, fieldOrNull, hash);
-            }
-        }
         for (String pk : pks) {
             int typeIndex = rowType.getFieldIndex(pk);
             LogicalType type = rowType.getTypeAt(typeIndex);