From b88a88f8cde19a78b39d5999726c0dd8e2d2f884 Mon Sep 17 00:00:00 2001
From: wangbo <wangbo@apache.org>
Date: Thu, 25 Apr 2024 10:11:56 +0800
Subject: [PATCH 001/163] Fix remote scan pool (#33976)

---
 .../runtime/workload_group/workload_group.cpp | 31 ++++-----
 be/src/vec/exec/scan/scanner_scheduler.cpp    | 24 +++++--
 be/src/vec/exec/scan/scanner_scheduler.h      | 26 +++++---
 .../resource/workloadgroup/WorkloadGroup.java | 11 ++++
 .../workload_manager_p0/test_curd_wlg.out     | 24 +++++++
 .../workload_manager_p0/test_curd_wlg.groovy  | 64 +++++++++++++++++++
 6 files changed, 145 insertions(+), 35 deletions(-)

diff --git a/be/src/runtime/workload_group/workload_group.cpp b/be/src/runtime/workload_group/workload_group.cpp
index 673263f1a17d2e..c82346f040ec82 100644
--- a/be/src/runtime/workload_group/workload_group.cpp
+++ b/be/src/runtime/workload_group/workload_group.cpp
@@ -295,7 +295,8 @@ Status WorkloadGroupInfo::parse_topic_info(const TWorkloadGroupInfo& tworkload_g
     }
 
     // 10 max remote scan thread num
-    workload_group_info->max_remote_scan_thread_num = config::doris_scanner_thread_pool_thread_num;
+    workload_group_info->max_remote_scan_thread_num =
+            vectorized::ScannerScheduler::get_remote_scan_thread_num();
     if (tworkload_group_info.__isset.max_remote_scan_thread_num &&
         tworkload_group_info.max_remote_scan_thread_num > 0) {
         workload_group_info->max_remote_scan_thread_num =
@@ -303,7 +304,8 @@ Status WorkloadGroupInfo::parse_topic_info(const TWorkloadGroupInfo& tworkload_g
     }
 
     // 11 min remote scan thread num
-    workload_group_info->min_remote_scan_thread_num = config::doris_scanner_thread_pool_thread_num;
+    workload_group_info->min_remote_scan_thread_num =
+            vectorized::ScannerScheduler::get_remote_scan_thread_num();
     if (tworkload_group_info.__isset.min_remote_scan_thread_num &&
         tworkload_group_info.min_remote_scan_thread_num > 0) {
         workload_group_info->min_remote_scan_thread_num =
@@ -384,23 +386,18 @@ void WorkloadGroup::upsert_task_scheduler(WorkloadGroupInfo* tg_info, ExecEnv* e
         }
     }
     if (scan_thread_num > 0 && _scan_task_sched) {
-        _scan_task_sched->reset_thread_num(scan_thread_num);
+        _scan_task_sched->reset_thread_num(scan_thread_num, scan_thread_num);
     }
 
     if (_remote_scan_task_sched == nullptr) {
-        int remote_max_thread_num =
-                config::doris_max_remote_scanner_thread_pool_thread_num != -1
-                        ? config::doris_max_remote_scanner_thread_pool_thread_num
-                        : std::max(512, CpuInfo::num_cores() * 10);
-        remote_max_thread_num =
-                std::max(remote_max_thread_num, config::doris_scanner_thread_pool_thread_num);
-
+        int remote_max_thread_num = vectorized::ScannerScheduler::get_remote_scan_thread_num();
+        int remote_scan_thread_queue_size =
+                vectorized::ScannerScheduler::get_remote_scan_thread_queue_size();
         std::unique_ptr<vectorized::SimplifiedScanScheduler> remote_scan_scheduler =
                 std::make_unique<vectorized::SimplifiedScanScheduler>("RScan_" + tg_name,
                                                                       cg_cpu_ctl_ptr);
-        Status ret =
-                remote_scan_scheduler->start(remote_max_thread_num, remote_max_thread_num,
-                                             config::doris_remote_scanner_thread_pool_queue_size);
+        Status ret = remote_scan_scheduler->start(remote_max_thread_num, remote_max_thread_num,
+                                                  remote_scan_thread_queue_size);
         if (ret.ok()) {
             _remote_scan_task_sched = std::move(remote_scan_scheduler);
         } else {
@@ -408,11 +405,9 @@ void WorkloadGroup::upsert_task_scheduler(WorkloadGroupInfo* tg_info, ExecEnv* e
                       << tg_id;
         }
     }
-    if (max_remote_scan_thread_num > 0 && _remote_scan_task_sched) {
-        _remote_scan_task_sched->reset_max_thread_num(max_remote_scan_thread_num);
-    }
-    if (min_remote_scan_thread_num > 0 && _remote_scan_task_sched) {
-        _remote_scan_task_sched->reset_min_thread_num(min_remote_scan_thread_num);
+    if (max_remote_scan_thread_num >= min_remote_scan_thread_num && _remote_scan_task_sched) {
+        _remote_scan_task_sched->reset_thread_num(max_remote_scan_thread_num,
+                                                  min_remote_scan_thread_num);
     }
 
     if (_non_pipe_thread_pool == nullptr) {
diff --git a/be/src/vec/exec/scan/scanner_scheduler.cpp b/be/src/vec/exec/scan/scanner_scheduler.cpp
index 571df35e55ed85..eba62dcf19a1ea 100644
--- a/be/src/vec/exec/scan/scanner_scheduler.cpp
+++ b/be/src/vec/exec/scan/scanner_scheduler.cpp
@@ -97,14 +97,10 @@ Status ScannerScheduler::init(ExecEnv* env) {
             config::doris_scanner_thread_pool_queue_size, "local_scan");
 
     // 2. remote scan thread pool
-    _remote_thread_pool_max_size = config::doris_max_remote_scanner_thread_pool_thread_num != -1
-                                           ? config::doris_max_remote_scanner_thread_pool_thread_num
-                                           : std::max(512, CpuInfo::num_cores() * 10);
-    _remote_thread_pool_max_size =
-            std::max(_remote_thread_pool_max_size, config::doris_scanner_thread_pool_thread_num);
+    _remote_thread_pool_max_size = ScannerScheduler::get_remote_scan_thread_num();
+    int remote_scan_pool_queue_size = ScannerScheduler::get_remote_scan_thread_queue_size();
     _remote_scan_thread_pool = std::make_unique<PriorityThreadPool>(
-            _remote_thread_pool_max_size, config::doris_remote_scanner_thread_pool_queue_size,
-            "RemoteScanThreadPool");
+            _remote_thread_pool_max_size, remote_scan_pool_queue_size, "RemoteScanThreadPool");
 
     // 3. limited scan thread pool
     static_cast<void>(ThreadPoolBuilder("LimitedScanThreadPool")
@@ -329,4 +325,18 @@ void ScannerScheduler::_deregister_metrics() {
     DEREGISTER_HOOK_METRIC(group_local_scan_thread_pool_queue_size);
     DEREGISTER_HOOK_METRIC(group_local_scan_thread_pool_thread_num);
 }
+
+int ScannerScheduler::get_remote_scan_thread_num() {
+    int remote_max_thread_num = config::doris_max_remote_scanner_thread_pool_thread_num != -1
+                                        ? config::doris_max_remote_scanner_thread_pool_thread_num
+                                        : std::max(512, CpuInfo::num_cores() * 10);
+    remote_max_thread_num =
+            std::max(remote_max_thread_num, config::doris_scanner_thread_pool_thread_num);
+    return remote_max_thread_num;
+}
+
+int ScannerScheduler::get_remote_scan_thread_queue_size() {
+    return config::doris_remote_scanner_thread_pool_queue_size;
+}
+
 } // namespace doris::vectorized
diff --git a/be/src/vec/exec/scan/scanner_scheduler.h b/be/src/vec/exec/scan/scanner_scheduler.h
index 01e0859643409a..b3d02860f9a3d4 100644
--- a/be/src/vec/exec/scan/scanner_scheduler.h
+++ b/be/src/vec/exec/scan/scanner_scheduler.h
@@ -65,6 +65,10 @@ class ScannerScheduler {
 
     int remote_thread_pool_max_size() const { return _remote_thread_pool_max_size; }
 
+    static int get_remote_scan_thread_num();
+
+    static int get_remote_scan_thread_queue_size();
+
 private:
     static void _scanner_scan(std::shared_ptr<ScannerContext> ctx,
                               std::shared_ptr<ScanTask> scan_task);
@@ -136,16 +140,18 @@ class SimplifiedScanScheduler {
         }
     }
 
-    void reset_thread_num(int thread_num) {
-        int max_thread_num = _scan_thread_pool->max_threads();
-        if (max_thread_num != thread_num) {
-            if (thread_num > max_thread_num) {
-                static_cast<void>(_scan_thread_pool->set_max_threads(thread_num));
-                static_cast<void>(_scan_thread_pool->set_min_threads(thread_num));
-            } else {
-                static_cast<void>(_scan_thread_pool->set_min_threads(thread_num));
-                static_cast<void>(_scan_thread_pool->set_max_threads(thread_num));
-            }
+    void reset_thread_num(int new_max_thread_num, int new_min_thread_num) {
+        int cur_max_thread_num = _scan_thread_pool->max_threads();
+        int cur_min_thread_num = _scan_thread_pool->min_threads();
+        if (cur_max_thread_num == new_max_thread_num && cur_min_thread_num == new_min_thread_num) {
+            return;
+        }
+        if (new_max_thread_num >= cur_max_thread_num) {
+            static_cast<void>(_scan_thread_pool->set_max_threads(new_max_thread_num));
+            static_cast<void>(_scan_thread_pool->set_min_threads(new_min_thread_num));
+        } else {
+            static_cast<void>(_scan_thread_pool->set_min_threads(new_min_thread_num));
+            static_cast<void>(_scan_thread_pool->set_max_threads(new_max_thread_num));
         }
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/resource/workloadgroup/WorkloadGroup.java b/fe/fe-core/src/main/java/org/apache/doris/resource/workloadgroup/WorkloadGroup.java
index 482d2f6f11a301..e5ec2c619b6b0c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/resource/workloadgroup/WorkloadGroup.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/resource/workloadgroup/WorkloadGroup.java
@@ -272,6 +272,7 @@ private static void checkProperties(Map<String, String> properties) throws DdlEx
             }
         }
 
+        int maxRemoteScanNum = -1;
         if (properties.containsKey(MAX_REMOTE_SCAN_THREAD_NUM)) {
             String value = properties.get(MAX_REMOTE_SCAN_THREAD_NUM);
             try {
@@ -279,12 +280,14 @@ private static void checkProperties(Map<String, String> properties) throws DdlEx
                 if (intValue <= 0 && intValue != -1) {
                     throw new NumberFormatException();
                 }
+                maxRemoteScanNum = intValue;
             } catch (NumberFormatException e) {
                 throw new DdlException(
                         MAX_REMOTE_SCAN_THREAD_NUM + " must be a positive integer or -1. but input value is " + value);
             }
         }
 
+        int minRemoteScanNum = -1;
         if (properties.containsKey(MIN_REMOTE_SCAN_THREAD_NUM)) {
             String value = properties.get(MIN_REMOTE_SCAN_THREAD_NUM);
             try {
@@ -292,12 +295,20 @@ private static void checkProperties(Map<String, String> properties) throws DdlEx
                 if (intValue <= 0 && intValue != -1) {
                     throw new NumberFormatException();
                 }
+                minRemoteScanNum = intValue;
             } catch (NumberFormatException e) {
                 throw new DdlException(
                         MIN_REMOTE_SCAN_THREAD_NUM + " must be a positive integer or -1. but input value is " + value);
             }
         }
 
+        if ((maxRemoteScanNum == -1 && minRemoteScanNum != -1) || (maxRemoteScanNum != -1 && minRemoteScanNum == -1)) {
+            throw new DdlException(MAX_REMOTE_SCAN_THREAD_NUM + " and " + MIN_REMOTE_SCAN_THREAD_NUM
+                    + " must be specified simultaneously");
+        } else if (maxRemoteScanNum < minRemoteScanNum) {
+            throw new DdlException(MAX_REMOTE_SCAN_THREAD_NUM + " must bigger or equal " + MIN_REMOTE_SCAN_THREAD_NUM);
+        }
+
         // check queue property
         if (properties.containsKey(MAX_CONCURRENCY)) {
             try {
diff --git a/regression-test/data/workload_manager_p0/test_curd_wlg.out b/regression-test/data/workload_manager_p0/test_curd_wlg.out
index fca16d077e4095..876be32601a5b1 100644
--- a/regression-test/data/workload_manager_p0/test_curd_wlg.out
+++ b/regression-test/data/workload_manager_p0/test_curd_wlg.out
@@ -64,3 +64,27 @@ tag1_wg1	0%	10%	tag1
 tag1_wg2	0%	10%	tag1
 tag1_wg3	0%	80%	tag1
 
+-- !select_remote_scan_num --
+20	10
+
+-- !select_remote_scan_num_2 --
+21	10
+
+-- !select_remote_scan_num_3 --
+21	2
+
+-- !select_remote_scan_num_4 --
+40	20
+
+-- !select_remote_scan_num_5 --
+10	5
+
+-- !select_remote_scan_num_6 --
+3	3
+
+-- !select_remote_scan_num_7 --
+10	5
+
+-- !select_remote_scan_num_8 --
+-1	-1
+
diff --git a/regression-test/suites/workload_manager_p0/test_curd_wlg.groovy b/regression-test/suites/workload_manager_p0/test_curd_wlg.groovy
index 875eeb668e2a37..05034529726302 100644
--- a/regression-test/suites/workload_manager_p0/test_curd_wlg.groovy
+++ b/regression-test/suites/workload_manager_p0/test_curd_wlg.groovy
@@ -528,6 +528,70 @@ suite("test_crud_wlg") {
     sql "set bypass_workload_group = true;"
     sql "select count(1) from information_schema.active_queries;"
 
+    // test set remote scan pool
+    sql "drop workload group if exists test_remote_scan_wg;"
+    test {
+        sql "create workload group test_remote_scan_wg properties('min_remote_scan_thread_num'='123');"
+        exception "must be specified simultaneously"
+    }
+
+    test {
+        sql "create workload group test_remote_scan_wg properties('max_remote_scan_thread_num'='123');"
+        exception "must be specified simultaneously"
+    }
+
+    test {
+        sql "create workload group test_remote_scan_wg properties('max_remote_scan_thread_num'='10', 'min_remote_scan_thread_num'='123');"
+        exception "must bigger or equal "
+    }
+
+    sql "create workload group test_remote_scan_wg properties('max_remote_scan_thread_num'='20', 'min_remote_scan_thread_num'='10');"
+    qt_select_remote_scan_num "select MAX_REMOTE_SCAN_THREAD_NUM,MIN_REMOTE_SCAN_THREAD_NUM from information_schema.workload_groups where name='test_remote_scan_wg';"
+
+    sql "alter workload group test_remote_scan_wg properties('max_remote_scan_thread_num'='21')"
+    qt_select_remote_scan_num_2 "select MAX_REMOTE_SCAN_THREAD_NUM,MIN_REMOTE_SCAN_THREAD_NUM from information_schema.workload_groups where name='test_remote_scan_wg';"
+
+    test {
+        sql "alter workload group test_remote_scan_wg properties('max_remote_scan_thread_num'='5')"
+        exception "must bigger or equal"
+    }
+
+    sql "alter workload group test_remote_scan_wg properties('min_remote_scan_thread_num'='2')"
+    qt_select_remote_scan_num_3 "select MAX_REMOTE_SCAN_THREAD_NUM,MIN_REMOTE_SCAN_THREAD_NUM from information_schema.workload_groups where name='test_remote_scan_wg';"
+
+    test {
+        sql "alter workload group test_remote_scan_wg properties('min_remote_scan_thread_num'='30')"
+        exception "must bigger or equal"
+    }
+
+    sql "alter workload group test_remote_scan_wg properties('max_remote_scan_thread_num'='40', 'min_remote_scan_thread_num'='20')"
+    qt_select_remote_scan_num_4 "select MAX_REMOTE_SCAN_THREAD_NUM,MIN_REMOTE_SCAN_THREAD_NUM from information_schema.workload_groups where name='test_remote_scan_wg';"
+
+    sql "alter workload group test_remote_scan_wg properties('max_remote_scan_thread_num'='10', 'min_remote_scan_thread_num'='5')"
+    qt_select_remote_scan_num_5 "select MAX_REMOTE_SCAN_THREAD_NUM,MIN_REMOTE_SCAN_THREAD_NUM from information_schema.workload_groups where name='test_remote_scan_wg';"
+
+    sql "alter workload group test_remote_scan_wg properties('max_remote_scan_thread_num'='3', 'min_remote_scan_thread_num'='3')"
+    qt_select_remote_scan_num_6 "select MAX_REMOTE_SCAN_THREAD_NUM,MIN_REMOTE_SCAN_THREAD_NUM from information_schema.workload_groups where name='test_remote_scan_wg';"
+
+    sql "drop workload group test_remote_scan_wg;"
+    sql "create workload group test_remote_scan_wg properties('cpu_share'='1024');"
+    test {
+        sql "alter workload group test_remote_scan_wg properties('min_remote_scan_thread_num'='30')"
+        exception "must be specified simultaneously"
+    }
+
+    test {
+        sql "alter workload group test_remote_scan_wg properties('max_remote_scan_thread_num'='30')"
+        exception "must be specified simultaneously"
+    }
+
+    sql "alter workload group test_remote_scan_wg properties('max_remote_scan_thread_num'='10', 'min_remote_scan_thread_num'='5')"
+    qt_select_remote_scan_num_7 "select MAX_REMOTE_SCAN_THREAD_NUM,MIN_REMOTE_SCAN_THREAD_NUM from information_schema.workload_groups where name='test_remote_scan_wg';"
+
+    sql "alter workload group test_remote_scan_wg properties('max_remote_scan_thread_num'='-1', 'min_remote_scan_thread_num'='-1')"
+    qt_select_remote_scan_num_8 "select MAX_REMOTE_SCAN_THREAD_NUM,MIN_REMOTE_SCAN_THREAD_NUM from information_schema.workload_groups where name='test_remote_scan_wg';"
+    sql "drop workload group test_remote_scan_wg"
+
     sql "drop workload group tag1_wg1;"
     sql "drop workload group tag1_wg2;"
     sql "drop workload group if exists tag2_wg1;"

From 6b9f1328a7622368fd62412539607085d97f1880 Mon Sep 17 00:00:00 2001
From: wangbo <wangbo@apache.org>
Date: Thu, 25 Apr 2024 10:13:22 +0800
Subject: [PATCH 002/163] Fix stream load can only use default group (#33875)

---
 .../java/org/apache/doris/service/FrontendServiceImpl.java    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
index d5f0a5aafd7939..6b60e01d1c02a5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
@@ -1917,8 +1917,8 @@ public TStreamLoadPutResult streamLoadPut(TStreamLoadPutRequest request) {
             // mysql load request not carry user info, need fix it later.
             boolean hasUserName = !StringUtils.isEmpty(request.getUser());
             if (Config.enable_workload_group && hasUserName) {
-                ConnectContext ctx = ConnectContext.get();
-                tWorkloadGroupList = Env.getCurrentEnv().getWorkloadGroupMgr().getWorkloadGroup(ctx);
+                UserIdentity userIdentity = UserIdentity.createAnalyzedUserIdentWithIp(request.getUser(), "%");
+                tWorkloadGroupList = Env.getCurrentEnv().getWorkloadGroupMgr().getWorkloadGroupByUser(userIdentity);
             }
             if (!Strings.isNullOrEmpty(request.getLoadSql())) {
                 httpStreamPutImpl(request, result);

From 3034ac3fe29dab050cbe9572f1db26635a18ec0d Mon Sep 17 00:00:00 2001
From: AlexYue <yj976240184@gmail.com>
Date: Thu, 25 Apr 2024 10:27:19 +0800
Subject: [PATCH 003/163] [chore](config) Add config to control BufferedReader
 and S3FileWriter's thread pool's min max nums (#33974)

---
 be/src/common/config.cpp                      | 14 ++++++++---
 be/src/common/config.h                        | 14 ++++++++---
 .../io/cache/block_file_cache_downloader.cpp  |  2 +-
 be/src/io/fs/s3_file_writer.cpp               |  2 +-
 be/src/runtime/exec_env_init.cpp              | 25 ++++++++++++++++---
 5 files changed, 45 insertions(+), 12 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 9df1a184f609ef..de1458c240dff3 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1025,9 +1025,8 @@ DEFINE_mInt32(tablet_path_check_batch_size, "1000");
 DEFINE_mInt64(row_column_page_size, "4096");
 // it must be larger than or equal to 5MB
 DEFINE_mInt64(s3_write_buffer_size, "5242880");
-DEFINE_mInt32(s3_task_check_interval, "60");
-// The timeout config for S3 buffer allocation
-DEFINE_mInt32(s3_writer_buffer_allocation_timeout, "300");
+// Log interval when doing s3 upload task
+DEFINE_mInt32(s3_file_writer_log_interval_second, "60");
 DEFINE_mInt64(file_cache_max_file_reader_cache_size, "1000000");
 DEFINE_mInt64(hdfs_write_batch_buffer_size_mb, "4"); // 4MB
 
@@ -1217,6 +1216,15 @@ DEFINE_mBool(enable_injection_point, "false");
 
 DEFINE_mBool(ignore_schema_change_check, "false");
 
+// The min thread num for BufferedReaderPrefetchThreadPool
+DEFINE_Int64(num_buffered_reader_prefetch_thread_pool_min_thread, "16");
+// The max thread num for BufferedReaderPrefetchThreadPool
+DEFINE_Int64(num_buffered_reader_prefetch_thread_pool_max_thread, "64");
+// The min thread num for S3FileUploadThreadPool
+DEFINE_Int64(num_s3_file_upload_thread_pool_min_thread, "16");
+// The max thread num for S3FileUploadThreadPool
+DEFINE_Int64(num_s3_file_upload_thread_pool_max_thread, "64");
+
 // clang-format off
 #ifdef BE_TEST
 // test s3
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 81910dd2553fad..4139d76b6bcb7a 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1070,9 +1070,8 @@ DECLARE_mInt32(tablet_path_check_batch_size);
 DECLARE_mInt64(row_column_page_size);
 // it must be larger than or equal to 5MB
 DECLARE_mInt64(s3_write_buffer_size);
-DECLARE_mInt32(s3_task_check_interval);
-// The timeout config for S3 buffer allocation
-DECLARE_mInt32(s3_writer_buffer_allocation_timeout);
+// Log interval when doing s3 upload task
+DECLARE_mInt32(s3_file_writer_log_interval_second);
 // the max number of cached file handle for block segemnt
 DECLARE_mInt64(file_cache_max_file_reader_cache_size);
 DECLARE_mInt64(hdfs_write_batch_buffer_size_mb);
@@ -1296,6 +1295,15 @@ DECLARE_mBool(enable_injection_point);
 
 DECLARE_mBool(ignore_schema_change_check);
 
+// The min thread num for BufferedReaderPrefetchThreadPool
+DECLARE_Int64(num_buffered_reader_prefetch_thread_pool_min_thread);
+// The max thread num for BufferedReaderPrefetchThreadPool
+DECLARE_Int64(num_buffered_reader_prefetch_thread_pool_max_thread);
+// The min thread num for S3FileUploadThreadPool
+DECLARE_Int64(num_s3_file_upload_thread_pool_min_thread);
+// The max thread num for S3FileUploadThreadPool
+DECLARE_Int64(num_s3_file_upload_thread_pool_max_thread);
+
 #ifdef BE_TEST
 // test s3
 DECLARE_String(test_s3_resource);
diff --git a/be/src/io/cache/block_file_cache_downloader.cpp b/be/src/io/cache/block_file_cache_downloader.cpp
index 30fb3a8633891c..283605f23bed91 100644
--- a/be/src/io/cache/block_file_cache_downloader.cpp
+++ b/be/src/io/cache/block_file_cache_downloader.cpp
@@ -184,7 +184,7 @@ struct DownloadTaskExecutor {
                 LOG_WARNING("").error(st);
             }
         }
-        auto timeout_duration = config::s3_task_check_interval;
+        auto timeout_duration = config::s3_file_writer_log_interval_second;
         timespec current_time;
         // We don't need high accuracy here, so we use time(nullptr)
         // since it's the fastest way to get current time(second)
diff --git a/be/src/io/fs/s3_file_writer.cpp b/be/src/io/fs/s3_file_writer.cpp
index 9df1ac847af365..84487f496ac1e9 100644
--- a/be/src/io/fs/s3_file_writer.cpp
+++ b/be/src/io/fs/s3_file_writer.cpp
@@ -135,7 +135,7 @@ Status S3FileWriter::_create_multi_upload_request() {
 }
 
 void S3FileWriter::_wait_until_finish(std::string_view task_name) {
-    auto timeout_duration = config::s3_writer_buffer_allocation_timeout;
+    auto timeout_duration = config::s3_file_writer_log_interval_second;
     auto msg = fmt::format(
             "{} multipart upload already takes {} seconds, bucket={}, key={}, upload_id={}",
             task_name, timeout_duration, _bucket, _path.native(), _upload_id);
diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp
index 5cbb5829ee0ac1..5a7e39cf158c41 100644
--- a/be/src/runtime/exec_env_init.cpp
+++ b/be/src/runtime/exec_env_init.cpp
@@ -142,6 +142,17 @@ static void init_doris_metrics(const std::vector<StorePath>& store_paths) {
     DorisMetrics::instance()->initialize(init_system_metrics, disk_devices, network_interfaces);
 }
 
+// Used to calculate the num of min thread and max thread based on the passed config
+static pair<size_t, size_t> get_num_threads(size_t min_num, size_t max_num) {
+    auto num_cores = doris::CpuInfo::num_cores();
+    min_num = (min_num == 0) ? num_cores : min_num;
+    max_num = (max_num == 0) ? num_cores : max_num;
+    auto factor = max_num / min_num;
+    min_num = std::min(num_cores * factor, min_num);
+    max_num = std::min(min_num * factor, max_num);
+    return {min_num, max_num};
+}
+
 Status ExecEnv::init(ExecEnv* env, const std::vector<StorePath>& store_paths,
                      const std::vector<StorePath>& spill_store_paths,
                      const std::set<std::string>& broken_paths) {
@@ -184,9 +195,12 @@ Status ExecEnv::_init(const std::vector<StorePath>& store_paths,
                               .set_max_queue_size(config::send_batch_thread_pool_queue_size)
                               .build(&_send_batch_thread_pool));
 
+    auto [buffered_reader_min_threads, buffered_reader_max_threads] =
+            get_num_threads(config::num_buffered_reader_prefetch_thread_pool_min_thread,
+                            config::num_buffered_reader_prefetch_thread_pool_max_thread);
     static_cast<void>(ThreadPoolBuilder("BufferedReaderPrefetchThreadPool")
-                              .set_min_threads(16)
-                              .set_max_threads(64)
+                              .set_min_threads(buffered_reader_min_threads)
+                              .set_max_threads(buffered_reader_max_threads)
                               .build(&_buffered_reader_prefetch_thread_pool));
 
     static_cast<void>(ThreadPoolBuilder("SendTableStatsThreadPool")
@@ -199,9 +213,12 @@ Status ExecEnv::_init(const std::vector<StorePath>& store_paths,
                               .set_max_threads(16)
                               .build(&_s3_downloader_download_poller_thread_pool));
 
+    auto [s3_file_upload_min_threads, s3_file_upload_max_threads] =
+            get_num_threads(config::num_s3_file_upload_thread_pool_min_thread,
+                            config::num_s3_file_upload_thread_pool_max_thread);
     static_cast<void>(ThreadPoolBuilder("S3FileUploadThreadPool")
-                              .set_min_threads(16)
-                              .set_max_threads(64)
+                              .set_min_threads(s3_file_upload_min_threads)
+                              .set_max_threads(s3_file_upload_max_threads)
                               .build(&_s3_file_upload_thread_pool));
 
     // min num equal to fragment pool's min num

From 440f60738afba27e4393e74a3461d2bd02843a61 Mon Sep 17 00:00:00 2001
From: Lightman <31928846+Lchangliang@users.noreply.github.com>
Date: Thu, 25 Apr 2024 10:33:13 +0800
Subject: [PATCH 004/163] (cloud-merge) Fix create mv failed bacause expr is
 null (#34010)

---
 .../src/main/java/org/apache/doris/alter/CloudRollupJobV2.java | 3 ++-
 .../src/main/java/org/apache/doris/alter/RollupJobV2.java      | 2 +-
 .../java/org/apache/doris/cloud/CacheHotspotManagerUtils.java  | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudRollupJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudRollupJobV2.java
index 51c4abde769bb4..1474de4d22f858 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudRollupJobV2.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudRollupJobV2.java
@@ -55,7 +55,7 @@
 public class CloudRollupJobV2 extends RollupJobV2 {
     private static final Logger LOG = LogManager.getLogger(CloudRollupJobV2.class);
 
-    public static AlterJobV2 buildCloudRollupJobV2(RollupJobV2 job) throws IllegalAccessException {
+    public static AlterJobV2 buildCloudRollupJobV2(RollupJobV2 job) throws IllegalAccessException, AnalysisException {
         CloudRollupJobV2 ret = new CloudRollupJobV2();
         List<Field> allFields = new ArrayList<>();
         Class tmpClass = RollupJobV2.class;
@@ -70,6 +70,7 @@ public static AlterJobV2 buildCloudRollupJobV2(RollupJobV2 job) throws IllegalAc
                 field.set(ret, field.get(job));
             }
         }
+        ret.initAnalyzer();
         return ret;
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java
index 60c02b6b5252b5..ec0868637e781e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java
@@ -183,7 +183,7 @@ public void setStorageFormat(TStorageFormat storageFormat) {
         this.storageFormat = storageFormat;
     }
 
-    private void initAnalyzer() throws AnalysisException {
+    protected void initAnalyzer() throws AnalysisException {
         ConnectContext connectContext = new ConnectContext();
         Database db;
         try {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManagerUtils.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManagerUtils.java
index 796b2f99560782..e3fff32035dbcc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManagerUtils.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManagerUtils.java
@@ -48,7 +48,7 @@ public class CacheHotspotManagerUtils {
             FeConstants.INTERNAL_DB_NAME, FeConstants.INTERNAL_FILE_CACHE_HOTSPOT_TABLE_NAME);
     // TODO(yuejing): 如何加字段
     private static final String CREATE_CACHE_TABLE =
-            "create table " + TABLE_NAME + " IF NOT EXISTS (\n"
+            "create table " + TABLE_NAME + " (\n"
                 + "    cluster_id varchar(65530),\n"
                 + "    backend_id bigint,\n"
                 + "    table_id bigint,\n"

From 4e7307d8043a43c731844f17444eabe403e8c09f Mon Sep 17 00:00:00 2001
From: walter <w41ter.l@gmail.com>
Date: Thu, 25 Apr 2024 11:09:12 +0800
Subject: [PATCH 005/163] [fix](fe) Fix SHOW CREATE TABLE with AUTO PARTITION
 (#34071)

AUTO PARTITION grammar has changed since #31585, but the output
of SHOW CREATE TABLE was left out to change, so the result is not
able to be recognized by the FE parser.
---
 .../doris/catalog/RangePartitionInfo.java     | 10 +----
 .../test_create_table_auto_partition.groovy   | 45 +++++++++++++++++++
 2 files changed, 47 insertions(+), 8 deletions(-)
 create mode 100644 regression-test/suites/ddl_p0/test_create_table_auto_partition.groovy

diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/RangePartitionInfo.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/RangePartitionInfo.java
index 952fa88d259292..9a6c5d353fd78a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/RangePartitionInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/RangePartitionInfo.java
@@ -23,7 +23,6 @@
 import org.apache.doris.analysis.PartitionKeyDesc;
 import org.apache.doris.analysis.RangePartitionDesc;
 import org.apache.doris.analysis.SinglePartitionDesc;
-import org.apache.doris.analysis.SlotRef;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.DdlException;
 import org.apache.doris.common.util.RangeUtils;
@@ -266,14 +265,9 @@ public String toSql(OlapTable table, List<Long> partitionId) {
         if (enableAutomaticPartition()) {
             sb.append("AUTO PARTITION BY RANGE ");
             for (Expr e : partitionExprs) {
-                boolean isSlotRef = (e instanceof SlotRef);
-                if (isSlotRef) {
-                    sb.append("(");
-                }
+                sb.append("(");
                 sb.append(e.toSql());
-                if (isSlotRef) {
-                    sb.append(")");
-                }
+                sb.append(")");
             }
             sb.append("\n(");
         } else {
diff --git a/regression-test/suites/ddl_p0/test_create_table_auto_partition.groovy b/regression-test/suites/ddl_p0/test_create_table_auto_partition.groovy
new file mode 100644
index 00000000000000..a9ca68a87771ba
--- /dev/null
+++ b/regression-test/suites/ddl_p0/test_create_table_auto_partition.groovy
@@ -0,0 +1,45 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// this suite is for creating table with timestamp datatype in defferent 
+// case. For example: 'year' and 'Year' datatype should also be valid in definition
+
+
+suite("test_create_table_auto_partition") {
+    def testTable = "test_create_table_auto_partition_table"
+
+    sql "DROP TABLE IF EXISTS ${testTable}"
+    sql """
+    CREATE TABLE `${testTable}` (
+        `TIME_STAMP` datev2 NOT NULL COMMENT 'Date of collection'
+    ) ENGINE=OLAP
+    DUPLICATE KEY(`TIME_STAMP`)
+    AUTO PARTITION BY RANGE (date_trunc(`TIME_STAMP`, 'month'))
+    (
+    )
+    DISTRIBUTED BY HASH(`TIME_STAMP`) BUCKETS 10
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1"
+    );
+    """
+
+    // The AUTO PARTITION func call must wrapped with ().
+    def text = sql_return_maparray "show create table ${testTable}"
+    def createTable = text[0]['Create Table']
+    assertTrue(createTable.contains("AUTO PARTITION BY RANGE (date_trunc(`TIME_STAMP`, 'month')"))
+}
+

From 3165b5ea3821b09af0f4dcf2ec4a2e2015bd4dde Mon Sep 17 00:00:00 2001
From: zzzxl <33418555+zzzxl1993@users.noreply.github.com>
Date: Thu, 25 Apr 2024 11:10:50 +0800
Subject: [PATCH 006/163] [fix](inverted index) the rowset may be deleted and
 become nullptr.(#33878)

1. between compaction prepare and execute, a rowset may be deleted by cold down, leading to a nullptr exception
2. intermittent bug that cannot be replicated with a test case
---
 be/src/olap/compaction.cpp | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index 55af708c759fe8..061d667df8bb68 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -529,11 +529,9 @@ Status Compaction::do_inverted_index_compaction() {
     // src index files
     // format: rowsetId_segmentId
     std::vector<std::string> src_index_files(src_segment_num);
-    std::vector<RowsetId> src_rowset_ids;
     for (const auto& m : src_seg_to_id_map) {
         std::pair<RowsetId, uint32_t> p = m.first;
         src_index_files[m.second] = p.first.to_string() + "_" + std::to_string(p.second);
-        src_rowset_ids.push_back(p.first);
     }
 
     // dest index files
@@ -671,9 +669,8 @@ Status Compaction::do_inverted_index_compaction() {
         // if index properties are different, index compaction maybe needs to be skipped.
         bool is_continue = false;
         std::optional<std::map<std::string, std::string>> first_properties;
-        for (const auto& rowset_id : src_rowset_ids) {
-            auto rowset_ptr = _tablet->get_rowset(rowset_id);
-            const auto* tablet_index = rowset_ptr->tablet_schema()->get_inverted_index(col);
+        for (const auto& rowset : _input_rowsets) {
+            const auto* tablet_index = rowset->tablet_schema()->get_inverted_index(col);
             const auto& properties = tablet_index->properties();
             if (!first_properties.has_value()) {
                 first_properties = properties;

From 93f9a2c5bcd0cc3aa2803857c422978de1678dc4 Mon Sep 17 00:00:00 2001
From: yujun <yu.jun.reach@gmail.com>
Date: Thu, 25 Apr 2024 11:13:01 +0800
Subject: [PATCH 007/163] [fix](tablet invert index) fix tablet invert index
 leaky caused by auto partition (#33973)

---
 .../doris/datasource/InternalCatalog.java     | 24 ++++----
 .../doris/alter/AddExistsPartitionTest.java   | 56 +++++++++++++++++++
 .../doris/utframe/TestWithFeService.java      |  3 +-
 3 files changed, 72 insertions(+), 11 deletions(-)
 create mode 100644 fe/fe-core/src/test/java/org/apache/doris/alter/AddExistsPartitionTest.java

diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
index 10983a955b79c1..dd52fad4f7f68c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
@@ -1466,8 +1466,10 @@ public void addPartition(Database db, String tableName, AddPartitionClause addPa
             // check partition name
             if (olapTable.checkPartitionNameExist(partitionName)) {
                 if (singlePartitionDesc.isSetIfNotExists()) {
-                    LOG.info("add partition[{}] which already exists", partitionName);
-                    return;
+                    LOG.info("table[{}] add partition[{}] which already exists", olapTable.getName(), partitionName);
+                    if (!DebugPointUtil.isEnable("InternalCatalog.addPartition.noCheckExists")) {
+                        return;
+                    }
                 } else {
                     ErrorReport.reportDdlException(ErrorCode.ERR_SAME_NAME_PARTITION, partitionName);
                 }
@@ -1624,6 +1626,11 @@ public void addPartition(Database db, String tableName, AddPartitionClause addPa
         if (!Strings.isNullOrEmpty(dataProperty.getStoragePolicy())) {
             storagePolicy = dataProperty.getStoragePolicy();
         }
+        Runnable failedCleanCallback = () -> {
+            for (Long tabletId : tabletIdSet) {
+                Env.getCurrentInvertedIndex().deleteTablet(tabletId);
+            }
+        };
         try {
             long partitionId = idGeneratorBuffer.getNextId();
             List<Long> partitionIds = Lists.newArrayList(partitionId);
@@ -1646,8 +1653,9 @@ public void addPartition(Database db, String tableName, AddPartitionClause addPa
                 olapTable.checkNormalStateForAlter();
                 // check partition name
                 if (olapTable.checkPartitionNameExist(partitionName)) {
+                    LOG.info("table[{}] add partition[{}] which already exists", olapTable.getName(), partitionName);
                     if (singlePartitionDesc.isSetIfNotExists()) {
-                        LOG.info("add partition[{}] which already exists", partitionName);
+                        failedCleanCallback.run();
                         return;
                     } else {
                         ErrorReport.reportDdlException(ErrorCode.ERR_SAME_NAME_PARTITION, partitionName);
@@ -1696,8 +1704,6 @@ public void addPartition(Database db, String tableName, AddPartitionClause addPa
                     }
                 }
 
-
-
                 if (metaChanged) {
                     throw new DdlException("Table[" + tableName + "]'s meta has been changed. try again.");
                 }
@@ -1741,9 +1747,7 @@ public void addPartition(Database db, String tableName, AddPartitionClause addPa
                 olapTable.writeUnlock();
             }
         } catch (DdlException e) {
-            for (Long tabletId : tabletIdSet) {
-                Env.getCurrentInvertedIndex().deleteTablet(tabletId);
-            }
+            failedCleanCallback.run();
             throw e;
         }
     }
@@ -2844,10 +2848,10 @@ private void createOlapTable(Database db, CreateTableStmt stmt) throws UserExcep
                     Env.getCurrentEnv().getEditLog().logColocateAddTable(info);
                 }
                 LOG.info("successfully create table[{};{}]", tableName, tableId);
-                // register or remove table from DynamicPartition after table created
-                DynamicPartitionUtil.registerOrRemoveDynamicPartitionTable(db.getId(), olapTable, false);
                 Env.getCurrentEnv().getDynamicPartitionScheduler()
                         .executeDynamicPartitionFirstTime(db.getId(), olapTable.getId());
+                // register or remove table from DynamicPartition after table created
+                DynamicPartitionUtil.registerOrRemoveDynamicPartitionTable(db.getId(), olapTable, false);
                 Env.getCurrentEnv().getDynamicPartitionScheduler()
                         .createOrUpdateRuntimeInfo(tableId, DynamicPartitionScheduler.LAST_UPDATE_TIME,
                                 TimeUtils.getCurrentFormatTime());
diff --git a/fe/fe-core/src/test/java/org/apache/doris/alter/AddExistsPartitionTest.java b/fe/fe-core/src/test/java/org/apache/doris/alter/AddExistsPartitionTest.java
new file mode 100644
index 00000000000000..0d95ee30cdeb20
--- /dev/null
+++ b/fe/fe-core/src/test/java/org/apache/doris/alter/AddExistsPartitionTest.java
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.alter;
+
+import org.apache.doris.catalog.Env;
+import org.apache.doris.common.Config;
+import org.apache.doris.common.util.DebugPointUtil;
+import org.apache.doris.common.util.DebugPointUtil.DebugPoint;
+import org.apache.doris.utframe.TestWithFeService;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.List;
+
+public class AddExistsPartitionTest extends TestWithFeService {
+
+    @Override
+    protected void beforeCreatingConnectContext() throws Exception {
+        Config.enable_debug_points = true;
+    }
+
+    @Test
+    public void testAddExistsPartition() throws Exception {
+        DebugPointUtil.addDebugPoint("InternalCatalog.addPartition.noCheckExists", new DebugPoint());
+        createDatabase("test");
+        createTable("CREATE TABLE test.tbl (k INT) DISTRIBUTED BY HASH(k) "
+                + " BUCKETS 5 PROPERTIES ( \"replication_num\" = \"" + backendNum() + "\" )");
+        List<Long> backendIds = Env.getCurrentSystemInfo().getAllBackendIds();
+        for (long backendId : backendIds) {
+            Assertions.assertEquals(5, Env.getCurrentInvertedIndex().getTabletIdsByBackendId(backendId).size());
+        }
+
+        String addPartitionSql = "ALTER TABLE test.tbl  ADD PARTITION  IF NOT EXISTS tbl"
+                + " DISTRIBUTED BY HASH(k) BUCKETS 5";
+        Assertions.assertNotNull(getSqlStmtExecutor(addPartitionSql));
+        for (long backendId : backendIds) {
+            Assertions.assertEquals(5, Env.getCurrentInvertedIndex().getTabletIdsByBackendId(backendId).size());
+        }
+    }
+}
diff --git a/fe/fe-core/src/test/java/org/apache/doris/utframe/TestWithFeService.java b/fe/fe-core/src/test/java/org/apache/doris/utframe/TestWithFeService.java
index b590234a3e83e9..063ab21d8bcf62 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/utframe/TestWithFeService.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/utframe/TestWithFeService.java
@@ -583,7 +583,8 @@ public StmtExecutor getSqlStmtExecutor(String queryStr) throws Exception {
         connectContext.getState().reset();
         StmtExecutor stmtExecutor = new StmtExecutor(connectContext, queryStr);
         stmtExecutor.execute();
-        if (connectContext.getState().getStateType() != QueryState.MysqlStateType.ERR) {
+        if (connectContext.getState().getStateType() != QueryState.MysqlStateType.ERR
+                && connectContext.getState().getErrorCode() == null) {
             return stmtExecutor;
         } else {
             return null;

From 8ffb170462028abd40fe166b9df8687f032bb020 Mon Sep 17 00:00:00 2001
From: AlexYue <yj976240184@gmail.com>
Date: Thu, 25 Apr 2024 11:20:41 +0800
Subject: [PATCH 008/163] [feature](Cloud) Implement gcs accessor for
 compatibility (#34081)

---
 cloud/src/recycler/s3_accessor.cpp |  33 ++++++++-
 cloud/src/recycler/s3_accessor.h   |   9 +++
 cloud/test/s3_accessor_test.cpp    | 103 ++++++++++++++++++++++++++++-
 3 files changed, 142 insertions(+), 3 deletions(-)

diff --git a/cloud/src/recycler/s3_accessor.cpp b/cloud/src/recycler/s3_accessor.cpp
index 543f84f87fc420..d1ebfe62a1d781 100644
--- a/cloud/src/recycler/s3_accessor.cpp
+++ b/cloud/src/recycler/s3_accessor.cpp
@@ -30,6 +30,8 @@
 #include <aws/s3/model/ListObjectsV2Request.h>
 #include <aws/s3/model/PutObjectRequest.h>
 
+#include <algorithm>
+#include <execution>
 #include <utility>
 
 #include "common/logging.h"
@@ -226,7 +228,21 @@ int S3Accessor::delete_objects(const std::vector<std::string>& relative_paths) {
 }
 
 int S3Accessor::delete_object(const std::string& relative_path) {
-    // TODO(cyx)
+    Aws::S3::Model::DeleteObjectRequest request;
+    auto key = get_key(relative_path);
+    request.WithBucket(conf_.bucket).WithKey(key);
+    auto outcome = SYNC_POINT_HOOK_RETURN_VALUE(s3_client_->DeleteObject(request),
+                                                "s3_client::delete_object", request);
+    if (!outcome.IsSuccess()) {
+        LOG_WARNING("failed to delete object")
+                .tag("endpoint", conf_.endpoint)
+                .tag("bucket", conf_.bucket)
+                .tag("key", key)
+                .tag("responseCode", static_cast<int>(outcome.GetError().GetResponseCode()))
+                .tag("error", outcome.GetError().GetMessage())
+                .tag("exception", outcome.GetError().GetExceptionName());
+        return -1;
+    }
     return 0;
 }
 
@@ -422,5 +438,20 @@ int S3Accessor::check_bucket_versioning() {
     return 0;
 }
 
+int GcsAccessor::delete_objects(const std::vector<std::string>& relative_paths) {
+    std::vector<int> delete_rets(relative_paths.size());
+    std::transform(std::execution::par, relative_paths.begin(), relative_paths.end(),
+                   delete_rets.begin(),
+                   [this](const std::string& path) { return delete_object(path); });
+    int ret = 0;
+    for (int delete_ret : delete_rets) {
+        if (delete_ret != 0) {
+            ret = delete_ret;
+            break;
+        }
+    }
+    return ret;
+}
+
 #undef HELP_MACRO
 } // namespace doris::cloud
diff --git a/cloud/src/recycler/s3_accessor.h b/cloud/src/recycler/s3_accessor.h
index 10291cfd4bad68..1025ceab52e1f7 100644
--- a/cloud/src/recycler/s3_accessor.h
+++ b/cloud/src/recycler/s3_accessor.h
@@ -90,4 +90,13 @@ class S3Accessor : public ObjStoreAccessor {
     std::string path_;
 };
 
+class GcsAccessor final : public S3Accessor {
+public:
+    explicit GcsAccessor(S3Conf conf) : S3Accessor(std::move(conf)) {}
+    ~GcsAccessor() override = default;
+
+    // returns 0 for success otherwise error
+    int delete_objects(const std::vector<std::string>& relative_paths) override;
+};
+
 } // namespace doris::cloud
diff --git a/cloud/test/s3_accessor_test.cpp b/cloud/test/s3_accessor_test.cpp
index bb8b7c27bd989d..972505c3999e09 100644
--- a/cloud/test/s3_accessor_test.cpp
+++ b/cloud/test/s3_accessor_test.cpp
@@ -58,6 +58,8 @@ class S3ClientInterface {
             const Aws::S3::Model::ListObjectsV2Request& req) = 0;
     virtual Aws::S3::Model::DeleteObjectsOutcome DeleteObjects(
             const Aws::S3::Model::DeleteObjectsRequest& req) = 0;
+    virtual Aws::S3::Model::DeleteObjectOutcome DeleteObject(
+            const Aws::S3::Model::DeleteObjectRequest& req) = 0;
     virtual Aws::S3::Model::PutObjectOutcome PutObject(
             const Aws::S3::Model::PutObjectRequest& req) = 0;
     virtual Aws::S3::Model::HeadObjectOutcome HeadObject(
@@ -122,6 +124,13 @@ class S3Client : public S3ClientInterface {
         return Aws::S3::Model::DeleteObjectsOutcome(std::move(result));
     }
 
+    Aws::S3::Model::DeleteObjectOutcome DeleteObject(
+            const Aws::S3::Model::DeleteObjectRequest& req) override {
+        Aws::S3::Model::DeleteObjectResult result;
+        _mock_fs->delete_object(req.GetKey());
+        return Aws::S3::Model::DeleteObjectOutcome(std::move(result));
+    }
+
     Aws::S3::Model::PutObjectOutcome PutObject(
             const Aws::S3::Model::PutObjectRequest& req) override {
         Aws::S3::Model::PutObjectResult result;
@@ -207,6 +216,18 @@ class ErrorS3Client : public S3ClientInterface {
         return Aws::S3::Model::DeleteObjectsOutcome(std::move(err));
     }
 
+    Aws::S3::Model::DeleteObjectOutcome DeleteObject(
+            const Aws::S3::Model::DeleteObjectRequest& req) override {
+        if (!return_error_for_error_s3_client) {
+            return _correct_impl->DeleteObject(req);
+        }
+        auto err = Aws::Client::AWSError<Aws::S3::S3Errors>(Aws::S3::S3Errors::RESOURCE_NOT_FOUND,
+                                                            false);
+        err.SetResponseCode(Aws::Http::HttpResponseCode::NOT_FOUND);
+        // return -1
+        return Aws::S3::Model::DeleteObjectOutcome(std::move(err));
+    }
+
     Aws::S3::Model::PutObjectOutcome PutObject(
             const Aws::S3::Model::PutObjectRequest& req) override {
         if (!return_error_for_error_s3_client) {
@@ -267,6 +288,10 @@ class MockS3Client {
         return _impl->DeleteObjects(req);
     }
 
+    auto DeleteObject(const Aws::S3::Model::DeleteObjectRequest& req) {
+        return _impl->DeleteObject(req);
+    }
+
     auto PutObject(const Aws::S3::Model::PutObjectRequest& req) { return _impl->PutObject(req); }
 
     auto HeadObject(const Aws::S3::Model::HeadObjectRequest& req) { return _impl->HeadObject(req); }
@@ -304,6 +329,12 @@ static auto callbacks = std::array {
                                                   Aws::S3::Model::DeleteObjectsRequest*>*)p;
                           *pair.first = (*_mock_client).DeleteObjects(*pair.second);
                       }},
+        MockCallable {"s3_client::delete_object",
+                      [](void* p) {
+                          auto pair = *(std::pair<Aws::S3::Model::DeleteObjectOutcome*,
+                                                  Aws::S3::Model::DeleteObjectRequest*>*)p;
+                          *pair.first = (*_mock_client).DeleteObject(*pair.second);
+                      }},
         MockCallable {"s3_client::put_object",
                       [](void* p) {
                           auto pair = *(std::pair<Aws::S3::Model::PutObjectOutcome*,
@@ -614,8 +645,7 @@ TEST(S3AccessorTest, exist_error) {
     ASSERT_EQ(-1, accessor->exist(prefix));
 }
 
-// function is not implemented
-TEST(S3AccessorTest, DISABLED_delete_object) {
+TEST(S3AccessorTest, delete_object) {
     _mock_fs = std::make_unique<cloud::MockS3Accessor>(cloud::S3Conf {});
     _mock_client = std::make_unique<MockS3Client>();
     auto accessor = std::make_unique<S3Accessor>(S3Conf {});
@@ -641,6 +671,75 @@ TEST(S3AccessorTest, DISABLED_delete_object) {
     }
 }
 
+TEST(S3AccessorTest, gcs_delete_objects) {
+    _mock_fs = std::make_unique<cloud::MockS3Accessor>(cloud::S3Conf {});
+    _mock_client = std::make_unique<MockS3Client>();
+    auto accessor = std::make_unique<GcsAccessor>(S3Conf {});
+    auto sp = SyncPoint::get_instance();
+    std::for_each(callbacks.begin(), callbacks.end(), [&](const MockCallable& mock_callback) {
+        sp->set_call_back(fmt::format("{}::pred", mock_callback.point_name),
+                          [](void* p) { *((bool*)p) = true; });
+        sp->set_call_back(mock_callback.point_name, mock_callback.func);
+    });
+    sp->enable_processing();
+    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
+        sp->disable_processing();
+        std::for_each(callbacks.begin(), callbacks.end(), [&](const MockCallable& mock_callback) {
+            sp->clear_call_back(mock_callback.point_name);
+        });
+    });
+    std::string prefix = "test_delete_object";
+    std::vector<std::string> paths;
+    size_t num = 300;
+    for (size_t i = 0; i < num; i++) {
+        auto path = fmt::format("{}{}", prefix, i);
+        _mock_fs->put_object(path, "");
+        paths.emplace_back(std::move(path));
+    }
+    ASSERT_EQ(0, accessor->delete_objects(paths));
+    for (size_t i = 0; i < num; i++) {
+        auto path = fmt::format("{}{}", prefix, i);
+        ASSERT_EQ(1, accessor->exist(path));
+    }
+}
+
+TEST(S3AccessorTest, gcs_delete_objects_error) {
+    _mock_fs = std::make_unique<cloud::MockS3Accessor>(cloud::S3Conf {});
+    _mock_client = std::make_unique<MockS3Client>(std::make_unique<ErrorS3Client>());
+    auto accessor = std::make_unique<GcsAccessor>(S3Conf {});
+    auto sp = SyncPoint::get_instance();
+    std::for_each(callbacks.begin(), callbacks.end(), [&](const MockCallable& mock_callback) {
+        sp->set_call_back(fmt::format("{}::pred", mock_callback.point_name),
+                          [](void* p) { *((bool*)p) = true; });
+        sp->set_call_back(mock_callback.point_name, mock_callback.func);
+    });
+    sp->enable_processing();
+    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
+        sp->disable_processing();
+        std::for_each(callbacks.begin(), callbacks.end(), [&](const MockCallable& mock_callback) {
+            sp->clear_call_back(mock_callback.point_name);
+        });
+        return_error_for_error_s3_client = false;
+    });
+    std::string prefix = "test_delete_objects";
+    std::vector<std::string> paths_first_half;
+    std::vector<std::string> paths_second_half;
+    size_t num = 300;
+    for (size_t i = 0; i < num; i++) {
+        auto path = fmt::format("{}{}", prefix, i);
+        _mock_fs->put_object(path, "");
+        if (i < 150) {
+            paths_first_half.emplace_back(std::move(path));
+        } else {
+            paths_second_half.emplace_back(std::move(path));
+        }
+    }
+    std::vector<std::string> empty;
+    ASSERT_EQ(0, accessor->delete_objects(empty));
+    return_error_for_error_s3_client = true;
+    ASSERT_EQ(-1, accessor->delete_objects(paths_first_half));
+}
+
 TEST(S3AccessorTest, delete_objects) {
     _mock_fs = std::make_unique<cloud::MockS3Accessor>(cloud::S3Conf {});
     _mock_client = std::make_unique<MockS3Client>();

From df11c8c1e13a69fa20ec4d5d7a55e2edd5b357be Mon Sep 17 00:00:00 2001
From: AlexYue <yj976240184@gmail.com>
Date: Thu, 25 Apr 2024 11:24:21 +0800
Subject: [PATCH 009/163] [enhance](Cloud) Unify s3 and hdfs vault's check
 existence logic (#34060)

---
 .../meta-service/meta_service_resource.cpp    | 21 +++++++++++--------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/cloud/src/meta-service/meta_service_resource.cpp b/cloud/src/meta-service/meta_service_resource.cpp
index 57220e50c5ebf7..1dd04617cc4ac6 100644
--- a/cloud/src/meta-service/meta_service_resource.cpp
+++ b/cloud/src/meta-service/meta_service_resource.cpp
@@ -374,13 +374,6 @@ static int add_hdfs_storage_vault(InstanceInfoPB& instance, Transaction* txn,
         msg = fmt::format("vault_name={} passed invalid argument", hdfs_param.name());
         return -1;
     }
-    if (std::find_if(instance.storage_vault_names().begin(), instance.storage_vault_names().end(),
-                     [&hdfs_param](const auto& name) { return name == hdfs_param.name(); }) !=
-        instance.storage_vault_names().end()) {
-        code = MetaServiceCode::ALREADY_EXISTED;
-        msg = fmt::format("vault_name={} already created", hdfs_param.name());
-        return -1;
-    }
 
     using namespace detail;
     // Check and normalize hdfs conf
@@ -472,13 +465,23 @@ static void create_object_info_with_encrypt(const InstanceInfoPB& instance, Obje
 static int add_vault_into_instance(InstanceInfoPB& instance, Transaction* txn,
                                    StorageVaultPB& vault_param, MetaServiceCode& code,
                                    std::string& msg) {
+    if (std::find_if(instance.storage_vault_names().begin(), instance.storage_vault_names().end(),
+                     [&vault_param](const auto& name) { return name == vault_param.name(); }) !=
+        instance.storage_vault_names().end()) {
+        code = MetaServiceCode::ALREADY_EXISTED;
+        msg = fmt::format("vault_name={} already created", vault_param.name());
+        return -1;
+    }
+
     if (vault_param.has_hdfs_info()) {
         return add_hdfs_storage_vault(instance, txn, vault_param, code, msg);
     }
+
     create_object_info_with_encrypt(instance, vault_param.mutable_obj_info(), true, code, msg);
     if (code != MetaServiceCode::OK) {
         return -1;
     }
+
     vault_param.mutable_obj_info()->CopyFrom(vault_param.obj_info());
     vault_param.set_id(vault_param.obj_info().id());
     auto vault_key = storage_vault_key({instance.instance_id(), vault_param.obj_info().id()});
@@ -757,7 +760,7 @@ void MetaServiceImpl::alter_obj_store_info(google::protobuf::RpcController* cont
         }
     } break;
     case AlterObjStoreInfoRequest::ADD_HDFS_INFO: {
-        if (auto ret = add_hdfs_storage_vault(
+        if (auto ret = add_vault_into_instance(
                     instance, txn.get(), const_cast<StorageVaultPB&>(request->vault()), code, msg);
             ret != 0) {
             return;
@@ -773,7 +776,7 @@ void MetaServiceImpl::alter_obj_store_info(google::protobuf::RpcController* cont
             msg = ss.str();
             return;
         }
-        if (auto ret = add_hdfs_storage_vault(
+        if (auto ret = add_vault_into_instance(
                     instance, txn.get(), const_cast<StorageVaultPB&>(request->vault()), code, msg);
             ret != 0) {
             return;

From 4073aba5ad53fa35f2a0992316d59b101870b23c Mon Sep 17 00:00:00 2001
From: Pxl <pxl290@qq.com>
Date: Thu, 25 Apr 2024 11:40:05 +0800
Subject: [PATCH 010/163] =?UTF-8?q?[Improvementation](join)=20empty=5Fbloc?=
 =?UTF-8?q?k=20shall=20be=20set=20true=20when=20build=20blo=E2=80=A6=20(#3?=
 =?UTF-8?q?3977)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

empty_block shall be set true when build block only one row
---
 be/src/pipeline/exec/hashjoin_build_sink.cpp  | 17 ++--
 .../pipeline/exec/hashjoin_probe_operator.cpp | 80 +++++++++----------
 .../pipeline/exec/hashjoin_probe_operator.h   |  9 ++-
 be/src/vec/core/column_with_type_and_name.cpp | 12 +--
 .../test_half_join_nullable_build_side.out    |  6 ++
 .../test_half_join_nullable_build_side.groovy |  4 +
 6 files changed, 67 insertions(+), 61 deletions(-)

diff --git a/be/src/pipeline/exec/hashjoin_build_sink.cpp b/be/src/pipeline/exec/hashjoin_build_sink.cpp
index a0d111c63a7d98..2b2bdad86f70f8 100644
--- a/be/src/pipeline/exec/hashjoin_build_sink.cpp
+++ b/be/src/pipeline/exec/hashjoin_build_sink.cpp
@@ -156,21 +156,22 @@ bool HashJoinBuildSinkLocalState::build_unique() const {
 
 void HashJoinBuildSinkLocalState::init_short_circuit_for_probe() {
     auto& p = _parent->cast<HashJoinBuildSinkOperatorX>();
+    bool empty_block =
+            !_shared_state->build_block ||
+            !(_shared_state->build_block->rows() > 1); // build size always mock a row into block
     _shared_state->short_circuit_for_probe =
             (_shared_state->_has_null_in_build_side &&
              p._join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN && !p._is_mark_join) ||
-            (!_shared_state->build_block && p._join_op == TJoinOp::INNER_JOIN &&
-             !p._is_mark_join) ||
-            (!_shared_state->build_block && p._join_op == TJoinOp::LEFT_SEMI_JOIN &&
-             !p._is_mark_join) ||
-            (!_shared_state->build_block && p._join_op == TJoinOp::RIGHT_OUTER_JOIN) ||
-            (!_shared_state->build_block && p._join_op == TJoinOp::RIGHT_SEMI_JOIN) ||
-            (!_shared_state->build_block && p._join_op == TJoinOp::RIGHT_ANTI_JOIN);
+            (empty_block && p._join_op == TJoinOp::INNER_JOIN && !p._is_mark_join) ||
+            (empty_block && p._join_op == TJoinOp::LEFT_SEMI_JOIN && !p._is_mark_join) ||
+            (empty_block && p._join_op == TJoinOp::RIGHT_OUTER_JOIN) ||
+            (empty_block && p._join_op == TJoinOp::RIGHT_SEMI_JOIN) ||
+            (empty_block && p._join_op == TJoinOp::RIGHT_ANTI_JOIN);
 
     //when build table rows is 0 and not have other_join_conjunct and not _is_mark_join and join type is one of LEFT_OUTER_JOIN/FULL_OUTER_JOIN/LEFT_ANTI_JOIN
     //we could get the result is probe table + null-column(if need output)
     _shared_state->empty_right_table_need_probe_dispose =
-            (!_shared_state->build_block && !p._have_other_join_conjunct && !p._is_mark_join) &&
+            (empty_block && !p._have_other_join_conjunct && !p._is_mark_join) &&
             (p._join_op == TJoinOp::LEFT_OUTER_JOIN || p._join_op == TJoinOp::FULL_OUTER_JOIN ||
              p._join_op == TJoinOp::LEFT_ANTI_JOIN);
 }
diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.cpp b/be/src/pipeline/exec/hashjoin_probe_operator.cpp
index a58ad62211ceb2..fc6f81f41902a2 100644
--- a/be/src/pipeline/exec/hashjoin_probe_operator.cpp
+++ b/be/src/pipeline/exec/hashjoin_probe_operator.cpp
@@ -247,7 +247,7 @@ Status HashJoinProbeOperatorX::pull(doris::RuntimeState* state, vectorized::Bloc
     }
 
     //TODO: this short circuit maybe could refactor, no need to check at here.
-    if (local_state._shared_state->empty_right_table_need_probe_dispose) {
+    if (local_state.empty_right_table_shortcut()) {
         // when build table rows is 0 and not have other_join_conjunct and join type is one of LEFT_OUTER_JOIN/FULL_OUTER_JOIN/LEFT_ANTI_JOIN
         // we could get the result is probe table + null-column(if need output)
         // If we use a short-circuit strategy, should return block directly by add additional null data.
@@ -257,12 +257,6 @@ Status HashJoinProbeOperatorX::pull(doris::RuntimeState* state, vectorized::Bloc
             return Status::OK();
         }
 
-        vectorized::Block temp_block;
-        //get probe side output column
-        for (int i = 0; i < _left_output_slot_flags.size(); ++i) {
-            temp_block.insert(local_state._probe_block.get_by_position(i));
-        }
-
         //create build side null column, if need output
         for (int i = 0;
              (_join_op != TJoinOp::LEFT_ANTI_JOIN) && i < _right_output_slot_flags.size(); ++i) {
@@ -273,8 +267,8 @@ Status HashJoinProbeOperatorX::pull(doris::RuntimeState* state, vectorized::Bloc
                     vectorized::ColumnVector<vectorized::UInt8>::create(block_rows, 1);
             auto nullable_column = vectorized::ColumnNullable::create(std::move(column),
                                                                       std::move(null_map_column));
-            temp_block.insert({std::move(nullable_column), make_nullable(type),
-                               _right_table_column_names[i]});
+            local_state._probe_block.insert({std::move(nullable_column), make_nullable(type),
+                                             _right_table_column_names[i]});
         }
         if (_is_outer_join) {
             reinterpret_cast<vectorized::ColumnUInt8*>(
@@ -290,8 +284,7 @@ Status HashJoinProbeOperatorX::pull(doris::RuntimeState* state, vectorized::Bloc
         /// No need to check the block size in `_filter_data_and_build_output` because here dose not
         /// increase the output rows count(just same as `_probe_block`'s rows count).
         RETURN_IF_ERROR(local_state.filter_data_and_build_output(state, output_block, eos,
-                                                                 &temp_block, false));
-        temp_block.clear();
+                                                                 &local_state._probe_block, false));
         local_state._probe_block.clear_column_data(_child_x->row_desc().num_materialized_slots());
         return Status::OK();
     }
@@ -374,36 +367,52 @@ Status HashJoinProbeOperatorX::pull(doris::RuntimeState* state, vectorized::Bloc
 }
 
 Status HashJoinProbeLocalState::_extract_join_column(vectorized::Block& block,
-                                                     vectorized::ColumnUInt8::MutablePtr& null_map,
-                                                     vectorized::ColumnRawPtrs& raw_ptrs,
                                                      const std::vector<int>& res_col_ids) {
+    if (empty_right_table_shortcut()) {
+        return Status::OK();
+    }
+
+    _probe_columns.resize(_probe_expr_ctxs.size());
+
+    if (!_has_set_need_null_map_for_probe) {
+        _has_set_need_null_map_for_probe = true;
+        _need_null_map_for_probe = _need_probe_null_map(block, res_col_ids);
+    }
+    if (_need_null_map_for_probe) {
+        if (_null_map_column == nullptr) {
+            _null_map_column = vectorized::ColumnUInt8::create();
+        }
+        _null_map_column->get_data().assign(block.rows(), (uint8_t)0);
+    }
+
     auto& shared_state = *_shared_state;
     auto& p = _parent->cast<HashJoinProbeOperatorX>();
     for (size_t i = 0; i < shared_state.build_exprs_size; ++i) {
         if (p._should_convert_to_nullable[i]) {
             _key_columns_holder.emplace_back(
                     vectorized::make_nullable(block.get_by_position(res_col_ids[i]).column));
-            raw_ptrs[i] = _key_columns_holder.back().get();
+            _probe_columns[i] = _key_columns_holder.back().get();
             continue;
         }
 
         if (shared_state.is_null_safe_eq_join[i]) {
-            raw_ptrs[i] = block.get_by_position(res_col_ids[i]).column.get();
+            _probe_columns[i] = block.get_by_position(res_col_ids[i]).column.get();
         } else {
-            auto column = block.get_by_position(res_col_ids[i]).column.get();
-            if (auto* nullable = check_and_get_column<vectorized::ColumnNullable>(*column)) {
-                auto& col_nested = nullable->get_nested_column();
-                auto& col_nullmap = nullable->get_null_map_data();
-
-                DCHECK(null_map != nullptr);
-                vectorized::VectorizedUtils::update_null_map(null_map->get_data(), col_nullmap);
+            const auto* column = block.get_by_position(res_col_ids[i]).column.get();
+            if (const auto* nullable = check_and_get_column<vectorized::ColumnNullable>(*column)) {
+                const auto& col_nested = nullable->get_nested_column();
+                const auto& col_nullmap = nullable->get_null_map_data();
+
+                DCHECK(_null_map_column != nullptr);
+                vectorized::VectorizedUtils::update_null_map(_null_map_column->get_data(),
+                                                             col_nullmap);
                 if (shared_state.store_null_in_hash_table[i]) {
-                    raw_ptrs[i] = nullable;
+                    _probe_columns[i] = nullable;
                 } else {
-                    raw_ptrs[i] = &col_nested;
+                    _probe_columns[i] = &col_nested;
                 }
             } else {
-                raw_ptrs[i] = column;
+                _probe_columns[i] = column;
             }
         }
     }
@@ -482,10 +491,7 @@ Status HashJoinProbeOperatorX::push(RuntimeState* state, vectorized::Block* inpu
     local_state._probe_eos = eos;
     if (input_block->rows() > 0) {
         COUNTER_UPDATE(local_state._probe_rows_counter, input_block->rows());
-        int probe_expr_ctxs_sz = local_state._probe_expr_ctxs.size();
-        local_state._probe_columns.resize(probe_expr_ctxs_sz);
-
-        std::vector<int> res_col_ids(probe_expr_ctxs_sz);
+        std::vector<int> res_col_ids(local_state._probe_expr_ctxs.size());
         RETURN_IF_ERROR(_do_evaluate(*input_block, local_state._probe_expr_ctxs,
                                      *local_state._probe_expr_call_timer, res_col_ids));
         if (_join_op == TJoinOp::RIGHT_OUTER_JOIN || _join_op == TJoinOp::FULL_OUTER_JOIN) {
@@ -493,22 +499,8 @@ Status HashJoinProbeOperatorX::push(RuntimeState* state, vectorized::Block* inpu
                     local_state._convert_block_to_null(*input_block);
         }
 
-        // TODO: Now we are not sure whether a column is nullable only by ExecNode's `row_desc`
-        //  so we have to initialize this flag by the first probe block.
-        if (!local_state._has_set_need_null_map_for_probe) {
-            local_state._has_set_need_null_map_for_probe = true;
-            local_state._need_null_map_for_probe =
-                    local_state._need_probe_null_map(*input_block, res_col_ids);
-        }
-        if (local_state._need_null_map_for_probe) {
-            if (local_state._null_map_column == nullptr) {
-                local_state._null_map_column = vectorized::ColumnUInt8::create();
-            }
-            local_state._null_map_column->get_data().assign(input_block->rows(), (uint8_t)0);
-        }
+        RETURN_IF_ERROR(local_state._extract_join_column(*input_block, res_col_ids));
 
-        RETURN_IF_ERROR(local_state._extract_join_column(*input_block, local_state._null_map_column,
-                                                         local_state._probe_columns, res_col_ids));
         if (&local_state._probe_block != input_block) {
             input_block->swap(local_state._probe_block);
         }
diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.h b/be/src/pipeline/exec/hashjoin_probe_operator.h
index b4930307bcc818..1b45a2a258eb07 100644
--- a/be/src/pipeline/exec/hashjoin_probe_operator.h
+++ b/be/src/pipeline/exec/hashjoin_probe_operator.h
@@ -94,15 +94,16 @@ class HashJoinProbeLocalState final
     const std::shared_ptr<vectorized::Block>& build_block() const {
         return _shared_state->build_block;
     }
+    bool empty_right_table_shortcut() const {
+        // !Base::_projections.empty() means nereids planner
+        return _shared_state->empty_right_table_need_probe_dispose && !Base::_projections.empty();
+    }
 
 private:
     void _prepare_probe_block();
     bool _need_probe_null_map(vectorized::Block& block, const std::vector<int>& res_col_ids);
     std::vector<uint16_t> _convert_block_to_null(vectorized::Block& block);
-    Status _extract_join_column(vectorized::Block& block,
-                                vectorized::ColumnUInt8::MutablePtr& null_map,
-                                vectorized::ColumnRawPtrs& raw_ptrs,
-                                const std::vector<int>& res_col_ids);
+    Status _extract_join_column(vectorized::Block& block, const std::vector<int>& res_col_ids);
     friend class HashJoinProbeOperatorX;
     template <int JoinOpType, typename Parent>
     friend struct vectorized::ProcessHashTableProbe;
diff --git a/be/src/vec/core/column_with_type_and_name.cpp b/be/src/vec/core/column_with_type_and_name.cpp
index cd0f7194004073..e93946804ffa60 100644
--- a/be/src/vec/core/column_with_type_and_name.cpp
+++ b/be/src/vec/core/column_with_type_and_name.cpp
@@ -62,15 +62,17 @@ void ColumnWithTypeAndName::dump_structure(std::ostream& out) const {
         out << name;
     }
 
-    if (type)
+    if (type) {
         out << " " << type->get_name();
-    else
+    } else {
         out << " nullptr";
+    }
 
-    if (column)
-        out << ' ' << column->dump_structure();
-    else
+    if (column) {
+        out << ' ' << column->dump_structure() << "(use_count=" << column->use_count() << ')';
+    } else {
         out << " nullptr";
+    }
 }
 
 String ColumnWithTypeAndName::dump_structure() const {
diff --git a/regression-test/data/query_p0/join/test_half_join_nullable_build_side.out b/regression-test/data/query_p0/join/test_half_join_nullable_build_side.out
index 8404bee641f5b2..56c5f6e2229ba7 100644
--- a/regression-test/data/query_p0/join/test_half_join_nullable_build_side.out
+++ b/regression-test/data/query_p0/join/test_half_join_nullable_build_side.out
@@ -134,3 +134,9 @@
 4	\N	\N	\N	\N	\N
 5	1111	1111	3	1111	1111
 
+-- !shortcut --
+1	11	11
+2	111	111
+3	1111	1111
+4	111	111
+
diff --git a/regression-test/suites/query_p0/join/test_half_join_nullable_build_side.groovy b/regression-test/suites/query_p0/join/test_half_join_nullable_build_side.groovy
index 2bb24309960054..230332fdf3bbe2 100644
--- a/regression-test/suites/query_p0/join/test_half_join_nullable_build_side.groovy
+++ b/regression-test/suites/query_p0/join/test_half_join_nullable_build_side.groovy
@@ -286,4 +286,8 @@ suite("test_half_join_nullable_build_side", "query,p0") {
             left join test_half_join_nullable_build_side_l r on  l.v2 <=> r.v2
         order by 1, 2, 3;
     """
+
+    qt_shortcut """
+    select *         from             test_half_join_nullable_build_side_l l left anti join test_half_join_nullable_build_side_r r on  l.v2 <=> r.v2 and r.k1=5         order by 1, 2, 3;
+    """
 }
\ No newline at end of file

From 40b1c4c7269b465e39e8c9e8d3916a8ebb8aa444 Mon Sep 17 00:00:00 2001
From: Mryange <59914473+Mryange@users.noreply.github.com>
Date: Thu, 25 Apr 2024 14:06:15 +0800
Subject: [PATCH 011/163] [fix](scan) fix ignore expr exec when
 _non_predicate_columns is empty (#33934)

fix ignore expr exec when _non_predicate_columns is empty
---
 .../olap/rowset/segment_v2/segment_iterator.cpp | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index a1b8cf35b353a5..506d7d0b2ad322 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1957,14 +1957,16 @@ void SegmentIterator::_replace_version_col(size_t num_rows) {
 uint16_t SegmentIterator::_evaluate_vectorization_predicate(uint16_t* sel_rowid_idx,
                                                             uint16_t selected_size) {
     SCOPED_RAW_TIMER(&_opts.stats->vec_cond_ns);
-    if (_is_need_vec_eval) {
-        _is_need_vec_eval = false;
-        for (const auto& pred : _pre_eval_block_predicate) {
-            _is_need_vec_eval |= (!pred->always_true());
+    bool all_pred_always_true = true;
+    for (const auto& pred : _pre_eval_block_predicate) {
+        if (!pred->always_true()) {
+            all_pred_always_true = false;
+            break;
         }
     }
-    if (!_is_need_vec_eval) {
-        for (uint32_t i = 0; i < selected_size; ++i) {
+    //If all predicates are always_true, then return directly.
+    if (all_pred_always_true || !_is_need_vec_eval) {
+        for (uint16_t i = 0; i < selected_size; ++i) {
             sel_rowid_idx[i] = i;
         }
         return selected_size;
@@ -2248,6 +2250,9 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
     }
 
     if (!_is_need_vec_eval && !_is_need_short_eval && !_is_need_expr_eval) {
+        if (_non_predicate_columns.empty()) {
+            return Status::InternalError("_non_predicate_columns is empty");
+        }
         RETURN_IF_ERROR(_convert_to_expected_type(_first_read_column_ids));
         RETURN_IF_ERROR(_convert_to_expected_type(_non_predicate_columns));
         _output_non_pred_columns(block);

From ad4e82bea6d2428827d98eb74946e014511839f2 Mon Sep 17 00:00:00 2001
From: Yongqiang YANG <98214048+dataroaring@users.noreply.github.com>
Date: Thu, 25 Apr 2024 14:17:42 +0800
Subject: [PATCH 012/163] [chore](version) revert version changes (#34095)

---
 gensrc/script/gen_build_version.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gensrc/script/gen_build_version.sh b/gensrc/script/gen_build_version.sh
index b99e2edd3a377c..eec7281c449c5b 100755
--- a/gensrc/script/gen_build_version.sh
+++ b/gensrc/script/gen_build_version.sh
@@ -28,10 +28,10 @@
 set -eo pipefail
 
 build_version_prefix="doris"
-build_version_major=4
+build_version_major=0
 build_version_minor=0
 build_version_patch=0
-build_version_rc_version="preview"
+build_version_rc_version=""
 
 build_version="${build_version_prefix}-${build_version_major}.${build_version_minor}.${build_version_patch}-${build_version_rc_version}"
 

From 3aa89110bc6e459b81ead622a9ff794e8073ac2f Mon Sep 17 00:00:00 2001
From: AlexYue <yj976240184@gmail.com>
Date: Thu, 25 Apr 2024 14:17:48 +0800
Subject: [PATCH 013/163] [enhance](Cloud) Add case to test if vault is forbid
 for cloud mode without vault (#34006)

---
 .../doris/analysis/ShowStorageVaultStmt.java  | 12 +++++
 .../suites/vaults/forbid/forbid.groovy        | 50 +++++++++++++++++++
 2 files changed, 62 insertions(+)
 create mode 100644 regression-test/suites/vaults/forbid/forbid.groovy

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowStorageVaultStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowStorageVaultStmt.java
index e4af07e43b1e37..ddf2529284602b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowStorageVaultStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowStorageVaultStmt.java
@@ -19,9 +19,12 @@
 
 import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.StorageVault;
+import org.apache.doris.cloud.catalog.CloudEnv;
 import org.apache.doris.common.AnalysisException;
+import org.apache.doris.common.Config;
 import org.apache.doris.common.ErrorCode;
 import org.apache.doris.common.ErrorReport;
+import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.UserException;
 import org.apache.doris.mysql.privilege.PrivPredicate;
 import org.apache.doris.qe.ConnectContext;
@@ -41,6 +44,15 @@ public ShowStorageVaultStmt() {
 
     @Override
     public void analyze(Analyzer analyzer) throws AnalysisException, UserException {
+        if (Config.isNotCloudMode()) {
+            throw new AnalysisException("Storage Vault is only supported for cloud mode");
+        }
+        if (!FeConstants.runningUnitTest) {
+            // In legacy cloud mode, some s3 back-ended storage does need to use storage vault.
+            if (!((CloudEnv) Env.getCurrentEnv()).getEnableStorageVault()) {
+                throw new AnalysisException("Your cloud instance doesn't support storage vault");
+            }
+        }
         super.analyze(analyzer);
         // check auth
         if (!Env.getCurrentEnv().getAccessManager().checkGlobalPriv(ConnectContext.get(), PrivPredicate.ADMIN)) {
diff --git a/regression-test/suites/vaults/forbid/forbid.groovy b/regression-test/suites/vaults/forbid/forbid.groovy
new file mode 100644
index 00000000000000..15fba18fc6da1f
--- /dev/null
+++ b/regression-test/suites/vaults/forbid/forbid.groovy
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("forbid_vault") {
+    if (enableStoragevault()) {
+        logger.info("skip forbid storage vault case because storage vault enabled")
+        return
+    }
+
+    if (!isCloudMode()) {
+        logger.info("skip forbid storage vault case because not cloud mode")
+        return
+    }
+
+    expectExceptionLike({
+        sql """
+            set not_exist as default storage vault
+        """
+    }, "Your cloud instance doesn't support storage vault")
+
+    expectExceptionLike({
+        sql """
+            CREATE STORAGE VAULT IF NOT EXISTS hdfs_vault
+            PROPERTIES (
+            "type"="hdfs",
+            "fs.defaultFS"="hdfs://127.0.0.1:8020"
+            );
+        """
+    }, "Your cloud instance doesn't support storage vault")
+
+    expectExceptionLike({
+        sql """
+            show storage vault
+        """
+    }, "Your cloud instance doesn't support storage vault")
+}
\ No newline at end of file

From b95b37bf0279f2a116c8ec84c6830bcf6423ccf4 Mon Sep 17 00:00:00 2001
From: AlexYue <yj976240184@gmail.com>
Date: Thu, 25 Apr 2024 15:00:36 +0800
Subject: [PATCH 014/163]  [enhance](Cloud) Add GCP enum for ObjectStoreInfoPB
 (#34100)

---
 .../src/main/java/org/apache/doris/cloud/storage/RemoteBase.java | 1 +
 gensrc/proto/cloud.proto                                         | 1 +
 2 files changed, 2 insertions(+)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/storage/RemoteBase.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/storage/RemoteBase.java
index 7b12be88d74a24..e146e52f534d94 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/cloud/storage/RemoteBase.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/storage/RemoteBase.java
@@ -140,6 +140,7 @@ public static RemoteBase newInstance(ObjectInfo obj) throws Exception {
         switch (obj.provider) {
             case OSS:
                 return new OssRemote(obj);
+            case GCP:
             case S3:
                 return new S3Remote(obj);
             case COS:
diff --git a/gensrc/proto/cloud.proto b/gensrc/proto/cloud.proto
index 0ed9b693988dd7..f132e2eab4d0ee 100644
--- a/gensrc/proto/cloud.proto
+++ b/gensrc/proto/cloud.proto
@@ -176,6 +176,7 @@ message ObjectStoreInfoPB {
         COS = 2;
         OBS = 3;
         BOS = 4;
+        GCP = 5;
     }
     optional int64 ctime = 1;
     optional int64 mtime = 2;

From 48a306149c11f93363c1197d576444f71d0ef708 Mon Sep 17 00:00:00 2001
From: feiniaofeiafei <53502832+feiniaofeiafei@users.noreply.github.com>
Date: Thu, 25 Apr 2024 15:01:55 +0800
Subject: [PATCH 015/163] [Fix](nereids) fix rule SimplifyWindowExpression
 (#34099)

Co-authored-by: feiniaofeiafei <moailing@selectdb.com>
---
 .../rewrite/SimplifyWindowExpression.java     | 12 +++-
 .../simplify_window_expression.out            | 67 +++++++++++--------
 .../simplify_window_expression.groovy         |  3 +
 3 files changed, 53 insertions(+), 29 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SimplifyWindowExpression.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SimplifyWindowExpression.java
index 872ca789818b5e..c0548a42579da5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SimplifyWindowExpression.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SimplifyWindowExpression.java
@@ -27,10 +27,12 @@
 import org.apache.doris.nereids.trees.expressions.Slot;
 import org.apache.doris.nereids.trees.expressions.WindowExpression;
 import org.apache.doris.nereids.trees.expressions.functions.BoundFunction;
+import org.apache.doris.nereids.trees.expressions.functions.agg.Count;
 import org.apache.doris.nereids.trees.expressions.literal.TinyIntLiteral;
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
 import org.apache.doris.nereids.trees.plans.logical.LogicalWindow;
+import org.apache.doris.nereids.util.TypeCoercionUtils;
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
@@ -87,11 +89,13 @@ private Plan simplify(MatchingContext<LogicalWindow<Plan>> ctx) {
             if (function instanceof BoundFunction) {
                 BoundFunction boundFunction = (BoundFunction) function;
                 String name = ((BoundFunction) function).getName();
-                if ((name.equals(COUNT) && boundFunction.child(0).notNullable())
+                if ((name.equals(COUNT) && checkCount((Count) boundFunction))
                         || REWRRITE_TO_CONST_WINDOW_FUNCTIONS.contains(name)) {
                     projectionsBuilder.add(new Alias(alias.getExprId(), new TinyIntLiteral((byte) 1), alias.getName()));
                 } else if (REWRRITE_TO_SLOT_WINDOW_FUNCTIONS.contains(name)) {
-                    projectionsBuilder.add(new Alias(alias.getExprId(), boundFunction.child(0), alias.getName()));
+                    projectionsBuilder.add(new Alias(alias.getExprId(),
+                            TypeCoercionUtils.castIfNotSameType(boundFunction.child(0), boundFunction.getDataType()),
+                            alias.getName()));
                 } else {
                     remainWindowExpression.add(expr);
                 }
@@ -120,4 +124,8 @@ private Plan simplify(MatchingContext<LogicalWindow<Plan>> ctx) {
                     window.child(0)));
         }
     }
+
+    private boolean checkCount(Count count) {
+        return count.isCountStar() || count.child(0).notNullable();
+    }
 }
diff --git a/regression-test/data/nereids_rules_p0/simplify_window_expression/simplify_window_expression.out b/regression-test/data/nereids_rules_p0/simplify_window_expression/simplify_window_expression.out
index 3befc3dcbb2742..e660cd7702c650 100644
--- a/regression-test/data/nereids_rules_p0/simplify_window_expression/simplify_window_expression.out
+++ b/regression-test/data/nereids_rules_p0/simplify_window_expression/simplify_window_expression.out
@@ -119,28 +119,28 @@
 -- !select_avg --
 \N	\N	\N
 \N	\N	\N
-1	1	1
-1	1	1
-2	2	2
-3	3	3
-3	3	3
-4	4	4
-5	5	5
-5	5	5
-7	7	7
+1	1.0	1.0
+1	1.0	1.0
+2	2.0	2.0
+3	3.0	3.0
+3	3.0	3.0
+4	4.0	4.0
+5	5.0	5.0
+5	5.0	5.0
+7	7.0	7.0
 
 -- !more_than_pk --
 \N	\N	\N
 \N	\N	\N
-1	1	1
-1	1	1
-2	2	2
-3	3	3
-3	3	3
-4	4	4
-5	5	5
-5	5	5
-7	7	7
+1	1.0	1.0
+1	1.0	1.0
+2	2.0	2.0
+3	3.0	3.0
+3	3.0	3.0
+4	4.0	4.0
+5	5.0	5.0
+5	5.0	5.0
+7	7.0	7.0
 
 -- !select_last_value_shape --
 PhysicalResultSink
@@ -163,18 +163,31 @@ PhysicalResultSink
 ------filter((mal_test_simplify_window.__DORIS_DELETE_SIGN__ = 0))
 --------PhysicalOlapScan[mal_test_simplify_window]
 
+-- !select_count_star_col1 --
+\N	1	1
+1	1	1
+1	1	1
+2	1	1
+2	1	1
+2	1	1
+3	1	1
+3	1	1
+4	1	1
+6	1	1
+6	1	1
+
 -- !select_upper_plan_use_all_rewrite --
 \N	\N
 \N	\N
-1	1
-1	1
-2	2
-3	3
-3	3
-4	4
-5	5
-5	5
-7	7
+1	1.0
+1	1.0
+2	2.0
+3	3.0
+3	3.0
+4	4.0
+5	5.0
+5	5.0
+7	7.0
 
 -- !select_upper_plan_use_rewrite_and_not_rewrite --
 \N	\N	\N
diff --git a/regression-test/suites/nereids_rules_p0/simplify_window_expression/simplify_window_expression.groovy b/regression-test/suites/nereids_rules_p0/simplify_window_expression/simplify_window_expression.groovy
index 11ad672c74ff11..3e247b2a78f577 100644
--- a/regression-test/suites/nereids_rules_p0/simplify_window_expression/simplify_window_expression.groovy
+++ b/regression-test/suites/nereids_rules_p0/simplify_window_expression/simplify_window_expression.groovy
@@ -78,6 +78,9 @@ suite("simplify_window_expression") {
         explain shape plan
         select b, avg(b) over (partition by a,b,c) c1, avg(b) over (partition by a,b,c order by b) c2
         from mal_test_simplify_window"""
+    qt_select_count_star_col1 """
+        select a,count() over (partition by a,b) c1, count() over (partition by a,b order by a) c2
+        from mal_test_simplify_window order by 1,2,3;"""
 
     qt_select_upper_plan_use_all_rewrite """
         select b, c1 from (select b,avg(b) over (partition by a,b) c1 

From 693a48522187546ec1e1a8519336524c6cb9baa6 Mon Sep 17 00:00:00 2001
From: spaces-x <dlut.weixiang@gmail.com>
Date: Thu, 25 Apr 2024 15:25:23 +0800
Subject: [PATCH 016/163] [fix](cloud): fix parent directory doesn't exist in
 HdfsFileWriter (#33985)

Co-authored-by: weixiang06 <weixiang06@meituan.com>
---
 be/src/io/fs/hdfs_file_writer.cpp | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/be/src/io/fs/hdfs_file_writer.cpp b/be/src/io/fs/hdfs_file_writer.cpp
index d15745ebf37ad5..c596c0e290fe8c 100644
--- a/be/src/io/fs/hdfs_file_writer.cpp
+++ b/be/src/io/fs/hdfs_file_writer.cpp
@@ -281,6 +281,21 @@ Result<FileWriterPtr> HdfsFileWriter::create(Path full_path, std::shared_ptr<Hdf
                                              const std::string& fs_name,
                                              const FileWriterOptions* opts) {
     auto path = convert_path(full_path, fs_name);
+#ifdef USE_LIBHDFS3
+    std::string hdfs_dir = path.parent_path().string();
+    int exists = hdfsExists(handler->hdfs_fs, hdfs_dir.c_str());
+    if (exists != 0) {
+        VLOG_NOTICE << "hdfs dir doesn't exist, create it: " << hdfs_dir;
+        int ret = hdfsCreateDirectory(handler->hdfs_fs, hdfs_dir.c_str());
+        if (ret != 0) {
+            std::stringstream ss;
+            ss << "create dir failed. "
+               << " fs_name: " << fs_name << " path: " << hdfs_dir << ", err: " << hdfs_error();
+            LOG(WARNING) << ss.str();
+            return ResultError(Status::InternalError(ss.str()));
+        }
+    }
+#endif
     // open file
     hdfsFile hdfs_file = nullptr;
     {

From 785ffb2aeb1bf02b88a22b8a961f1bede47d2f81 Mon Sep 17 00:00:00 2001
From: LiBinfeng <46676950+LiBinfeng-01@users.noreply.github.com>
Date: Thu, 25 Apr 2024 16:27:37 +0800
Subject: [PATCH 017/163] [Fix](nereids) fix cases unstable of hint (#34101)

fix cases unstable of hint, remove unused cases and project nodes and use string contains in order to avoid unstable problem.
---
 .../data/nereids_p0/hint/test_leading.out     | 507 ------------------
 .../suites/nereids_p0/hint/fix_leading.groovy |   6 +-
 .../nereids_p0/hint/multi_leading.groovy      |  17 +-
 .../nereids_p0/hint/test_leading.groovy       | 110 +++-
 4 files changed, 105 insertions(+), 535 deletions(-)

diff --git a/regression-test/data/nereids_p0/hint/test_leading.out b/regression-test/data/nereids_p0/hint/test_leading.out
index 531950358804d3..31df4aafe732ba 100644
--- a/regression-test/data/nereids_p0/hint/test_leading.out
+++ b/regression-test/data/nereids_p0/hint/test_leading.out
@@ -2165,25 +2165,6 @@ Used: [broadcast]_2
 UnUsed:
 SyntaxError:
 
--- !select90_2 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t1]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t2]
-
-Hint log:
-Used:
-UnUsed: [broadcast]_2
-SyntaxError:
-
 -- !select90_3 --
 PhysicalResultSink
 --hashAgg[GLOBAL]
@@ -2208,30 +2189,6 @@ Used: [broadcast]_2 [shuffle]_3
 UnUsed:
 SyntaxError:
 
--- !select90_4 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
-
-Hint log:
-Used: [shuffle]_3
-UnUsed: [broadcast]_2
-SyntaxError:
-
 -- !select90_5 --
 PhysicalResultSink
 --hashAgg[GLOBAL]
@@ -2256,125 +2213,6 @@ Used: [broadcast]_2 [shuffle]_3
 UnUsed:
 SyntaxError:
 
--- !select90_6 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
-
-Hint log:
-Used: [shuffle]_2
-UnUsed: [broadcast]_3
-SyntaxError:
-
--- !select91_1 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
-
-Hint log:
-Used: leading(t1 t2 t3 )
-UnUsed: [broadcast]_2 [shuffle]_3
-SyntaxError:
-
--- !select91_2 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
-
-Hint log:
-Used: leading(t1 t2 t3 )
-UnUsed: [broadcast]_2 [shuffle]_3
-SyntaxError:
-
--- !select91_3 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
-
-Hint log:
-Used: leading(t1 t2 t3 )
-UnUsed: [broadcast]_2 [shuffle]_3
-SyntaxError:
-
--- !select91_4 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
-
-Hint log:
-Used: leading(t1 t2 t3 )
-UnUsed: [shuffle]_2 [broadcast]_3
-SyntaxError:
-
 -- !select92_1 --
 PhysicalResultSink
 --hashAgg[GLOBAL]
@@ -2806,52 +2644,6 @@ Used:  leading(t1 broadcast t2 t3 )
 UnUsed:
 SyntaxError:
 
--- !select95_2 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
-
-Hint log:
-Used:
-UnUsed: [broadcast]_2 leading(t1 broadcast { t2 t3 })
-SyntaxError:
-
--- !select95_3 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
-
-Hint log:
-Used:
-UnUsed: [broadcast]_2 leading(t1 broadcast { t3 t2 })
-SyntaxError:
-
 -- !select95_4 --
 PhysicalResultSink
 --hashAgg[GLOBAL]
@@ -2875,75 +2667,6 @@ Used:  leading(t2 broadcast t1 t3 )
 UnUsed:
 SyntaxError:
 
--- !select95_5 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
-
-Hint log:
-Used:
-UnUsed: [broadcast]_2 leading(t2 broadcast { t1 t3 })
-SyntaxError:
-
--- !select95_6 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
-
-Hint log:
-Used:
-UnUsed: [broadcast]_2 leading(t2 broadcast { t3 t1 })
-SyntaxError:
-
--- !select95_7 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
-
-Hint log:
-Used:
-UnUsed:  leading(t3 broadcast t1 t2)
-SyntaxError:
-
 -- !select95_8 --
 PhysicalResultSink
 --hashAgg[GLOBAL]
@@ -3013,52 +2736,6 @@ Used:   leading(t1 shuffle t2 broadcast t3 )
 UnUsed:
 SyntaxError:
 
--- !select96_2 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
-
-Hint log:
-Used:
-UnUsed: [shuffle]_2 [broadcast]_3 leading(t1 shuffle { t2 broadcast t3 })
-SyntaxError:
-
--- !select96_3 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
-
-Hint log:
-Used:
-UnUsed: [shuffle]_2 [broadcast]_3 leading(t1 shuffle { t3 broadcast t2 })
-SyntaxError:
-
 -- !select96_4 --
 PhysicalResultSink
 --hashAgg[GLOBAL]
@@ -3082,75 +2759,6 @@ Used:   leading(t2 shuffle t1 broadcast t3 )
 UnUsed:
 SyntaxError:
 
--- !select96_5 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
-
-Hint log:
-Used:
-UnUsed: [shuffle]_2  leading(t2 shuffle { t1 broadcast t3 })
-SyntaxError:
-
--- !select96_6 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
-
-Hint log:
-Used:
-UnUsed: [shuffle]_2  leading(t2 shuffle { t3 broadcast t1 })
-SyntaxError:
-
--- !select96_7 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
-
-Hint log:
-Used:
-UnUsed:  [broadcast]_3 leading(t3 shuffle t1 broadcast t2)
-SyntaxError:
-
 -- !select96_8 --
 PhysicalResultSink
 --hashAgg[GLOBAL]
@@ -3221,52 +2829,6 @@ Used:   leading(t1 broadcast t2 shuffle t3 )
 UnUsed:
 SyntaxError:
 
--- !select97_2 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
-
-Hint log:
-Used:
-UnUsed: [broadcast]_2 [shuffle]_3 leading(t1 broadcast { t2 shuffle t3 })
-SyntaxError:
-
--- !select97_3 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
-
-Hint log:
-Used:
-UnUsed: [broadcast]_2 [shuffle]_3 leading(t1 broadcast { t3 shuffle t2 })
-SyntaxError:
-
 -- !select97_4 --
 PhysicalResultSink
 --hashAgg[GLOBAL]
@@ -3290,75 +2852,6 @@ Used:   leading(t2 broadcast t1 shuffle t3 )
 UnUsed:
 SyntaxError:
 
--- !select97_5 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
-
-Hint log:
-Used:
-UnUsed: [broadcast]_2  leading(t2 broadcast { t1 shuffle t3 })
-SyntaxError:
-
--- !select97_6 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
-
-Hint log:
-Used:
-UnUsed: [broadcast]_2  leading(t2 broadcast { t3 shuffle t1 })
-SyntaxError:
-
--- !select97_7 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
-
-Hint log:
-Used:
-UnUsed:  [shuffle]_3 leading(t3 broadcast t1 shuffle t2)
-SyntaxError:
-
 -- !select97_8 --
 PhysicalResultSink
 --hashAgg[GLOBAL]
diff --git a/regression-test/suites/nereids_p0/hint/fix_leading.groovy b/regression-test/suites/nereids_p0/hint/fix_leading.groovy
index 49823e9769830a..4c88fb2ac30f62 100644
--- a/regression-test/suites/nereids_p0/hint/fix_leading.groovy
+++ b/regression-test/suites/nereids_p0/hint/fix_leading.groovy
@@ -172,6 +172,8 @@ suite("fix_leading") {
     qt_select4_3 """explain shape plan select /*+ leading(t1 t2 t3)*/ count(*) from t1 left join t2 on c1 > 500 and c2 >500 right join t3 on c3 > 500 and c1 < 200;"""
 
     // check whether we have all tables
-    qt_select5_1 """explain shape plan select /*+ leading(t1 t2)*/ count(*) from t1 left join t2 on c1 > 500 and c2 >500 right join t3 on c3 > 500 and c1 < 200;"""
-
+    explain {
+        sql """shape plan select /*+ leading(t1 t2)*/ count(*) from t1 left join t2 on c1 > 500 and c2 >500 right join t3 on c3 > 500 and c1 < 200;"""
+        contains("SyntaxError: leading(t1 t2) Msg:leading should have all tables in query block, missing tables: t3")
+    }
 }
diff --git a/regression-test/suites/nereids_p0/hint/multi_leading.groovy b/regression-test/suites/nereids_p0/hint/multi_leading.groovy
index bd567ecdefa378..f716b5c98d05e0 100644
--- a/regression-test/suites/nereids_p0/hint/multi_leading.groovy
+++ b/regression-test/suites/nereids_p0/hint/multi_leading.groovy
@@ -113,9 +113,18 @@ suite("multi_leading") {
     qt_sql4_2 """explain shape plan select count(*) from (select /*+ leading(alias2 t1) */ c1, c11 from t1 join (select c2, c22 from t2 join t4 on c2 = c4) as alias2 on c1 = alias2.c2) as alias1 join t3 on alias1.c1 = t3.c3;"""
     qt_sql4_3 """explain shape plan select count(*) from (select c1, c11 from t1 join (select /*+ leading(t4 t2) */ c2, c22 from t2 join t4 on c2 = c4) as alias2 on c1 = alias2.c2) as alias1 join t3 on alias1.c1 = t3.c3;"""
     qt_sql4_4 """explain shape plan select /*+ leading(t3 alias1) */ count(*) from (select /*+ leading(alias2 t1) */ c1, c11 from t1 join (select c2, c22 from t2 join t4 on c2 = c4) as alias2 on c1 = alias2.c2) as alias1 join t3 on alias1.c1 = t3.c3;"""
-    qt_sql4_5 """explain shape plan select /*+ leading(t3 alias1) */ count(*) from (select c1, c11 from t1 join (select /*+ leading(t4 t2) */ c2, c22 from t2 join t4 on c2 = c4) as alias2 on c1 = alias2.c2) as alias1 join t3 on alias1.c1 = t3.c3;"""
-    qt_sql4_6 """explain shape plan select count(*) from (select /*+ leading(alias2 t1) */ c1, c11 from t1 join (select /*+ leading(t4 t2) */ c2, c22 from t2 join t4 on c2 = c4) as alias2 on c1 = alias2.c2) as alias1 join t3 on alias1.c1 = t3.c3;"""
-    qt_sql4_7 """explain shape plan select /*+ leading(t3 alias1) */ count(*) from (select /*+ leading(alias2 t1) */ c1, c11 from t1 join (select /*+ leading(t4 t2) */ c2, c22 from t2 join t4 on c2 = c4) as alias2 on c1 = alias2.c2) as alias1 join t3 on alias1.c1 = t3.c3;"""
+    explain {
+        sql """shape plan select /*+ leading(t3 alias1) */ count(*) from (select c1, c11 from t1 join (select /*+ leading(t4 t2) */ c2, c22 from t2 join t4 on c2 = c4) as alias2 on c1 = alias2.c2) as alias1 join t3 on alias1.c1 = t3.c3;"""
+        contains("SyntaxError: leading(t4 t2) Msg:one query block can only have one leading clause")
+    }
+    explain {
+        sql """shape plan select count(*) from (select /*+ leading(alias2 t1) */ c1, c11 from t1 join (select /*+ leading(t4 t2) */ c2, c22 from t2 join t4 on c2 = c4) as alias2 on c1 = alias2.c2) as alias1 join t3 on alias1.c1 = t3.c3;"""
+        contains("SyntaxError: leading(t4 t2) Msg:one query block can only have one leading clause")
+    }
+    explain {
+        sql """shape plan select /*+ leading(t3 alias1) */ count(*) from (select /*+ leading(alias2 t1) */ c1, c11 from t1 join (select /*+ leading(t4 t2) */ c2, c22 from t2 join t4 on c2 = c4) as alias2 on c1 = alias2.c2) as alias1 join t3 on alias1.c1 = t3.c3;"""
+        contains("UnUsed: leading(alias2 t1)")
+    }
 
     qt_sql4_res_0 """select count(*) from (select c1, c11 from t1 join (select c2, c22 from t2 join t4 on c2 = c4) as alias2 on c1 = alias2.c2) as alias1 join t3 on alias1.c1 = t3.c3;"""
     qt_sql4_res_1 """select /*+ leading(t3 alias1) */ count(*) from (select c1, c11 from t1 join (select c2, c22 from t2 join t4 on c2 = c4) as alias2 on c1 = alias2.c2) as alias1 join t3 on alias1.c1 = t3.c3;"""
@@ -129,4 +138,4 @@ suite("multi_leading") {
     // use cte in scalar query
     qt_sql5_1 """explain shape plan with  cte as (select c11, c1 from t1)  SELECT c1 FROM cte group by c1 having sum(cte.c11) > (select 0.05 * avg(t1.c11) from t1 join cte on t1.c1 = cte.c11 )"""
     qt_sql5_2 """explain shape plan with  cte as (select c11, c1 from t1)  SELECT c1 FROM cte group by c1 having sum(cte.c11) > (select /*+ leading(cte t1) */ 0.05 * avg(t1.c11) from t1 join cte on t1.c1 = cte.c11 )"""
-}
\ No newline at end of file
+}
diff --git a/regression-test/suites/nereids_p0/hint/test_leading.groovy b/regression-test/suites/nereids_p0/hint/test_leading.groovy
index a9ad4aa705ccfa..d1e11144b527c7 100644
--- a/regression-test/suites/nereids_p0/hint/test_leading.groovy
+++ b/regression-test/suites/nereids_p0/hint/test_leading.groovy
@@ -936,19 +936,40 @@ suite("test_leading") {
     // used
     qt_select90_1 """explain shape plan select count(*) from t1 join [broadcast] t2 on c1 = c2;"""
     // unused
-    qt_select90_2 """explain shape plan select count(*) from t1 right outer join [broadcast] t2 on c1 = c2;"""
+    explain {
+        sql """shape plan select count(*) from t1 right outer join [broadcast] t2 on c1 = c2;"""
+        contains("UnUsed: [broadcast]_2")
+    }
 
 // only distribute hint + multi hints
     qt_select90_3 """explain shape plan select count(*) from t1 join [broadcast] t2 on c1 = c2 join[shuffle] t3 on c2 = c3;"""
-    qt_select90_4 """explain shape plan select count(*) from t1 right outer join [broadcast] t2 on c1 = c2 join[shuffle] t3 on c2 = c3;"""
+    explain {
+        sql """shape plan select count(*) from t1 right outer join [broadcast] t2 on c1 = c2 join[shuffle] t3 on c2 = c3;"""
+        contains("UnUsed: [broadcast]_2")
+    }
     qt_select90_5 """explain shape plan select count(*) from t1 join [broadcast] t2 on c1 = c2 right outer join[shuffle] t3 on c2 = c3;"""
-    qt_select90_6 """explain shape plan select count(*) from t1 join [shuffle] t2 on c1 = c2 right outer join[broadcast] t3 on c2 = c3;"""
+    explain {
+        sql """shape plan select count(*) from t1 join [shuffle] t2 on c1 = c2 right outer join[broadcast] t3 on c2 = c3;"""
+        contains("UnUsed: [broadcast]_3")
+    }
 
 // leading + distribute hint outside leading + single hint
-    qt_select91_1 """explain shape plan select /*+ leading(t1 t2 t3) */ count(*) from t1 join [broadcast] t2 on c1 = c2 join[shuffle] t3 on c2 = c3;"""
-    qt_select91_2 """explain shape plan select /*+ leading(t1 t2 t3) */ count(*) from t1 right outer join [broadcast] t2 on c1 = c2 join[shuffle] t3 on c2 = c3;"""
-    qt_select91_3 """explain shape plan select /*+ leading(t1 t2 t3) */ count(*) from t1 join [broadcast] t2 on c1 = c2 right outer join[shuffle] t3 on c2 = c3;"""
-    qt_select91_4 """explain shape plan select /*+ leading(t1 t2 t3) */ count(*) from t1 join [shuffle] t2 on c1 = c2 right outer join[broadcast] t3 on c2 = c3;"""
+    explain {
+        sql """shape plan select /*+ leading(t1 t2 t3) */ count(*) from t1 join [broadcast] t2 on c1 = c2 join[shuffle] t3 on c2 = c3;"""
+        contains("UnUsed: [broadcast]_2 [shuffle]_3")
+    }
+    explain {
+        sql """shape plan select /*+ leading(t1 t2 t3) */ count(*) from t1 right outer join [broadcast] t2 on c1 = c2 join[shuffle] t3 on c2 = c3;"""
+        contains("UnUsed: [broadcast]_2 [shuffle]_3")
+    }
+    explain {
+        sql """shape plan select /*+ leading(t1 t2 t3) */ count(*) from t1 join [broadcast] t2 on c1 = c2 right outer join[shuffle] t3 on c2 = c3;"""
+        contains("UnUsed: [broadcast]_2 [shuffle]_3")
+    }
+    explain {
+        sql """shape plan select /*+ leading(t1 t2 t3) */ count(*) from t1 join [shuffle] t2 on c1 = c2 right outer join[broadcast] t3 on c2 = c3;"""
+        contains("UnUsed: [shuffle]_2 [broadcast]_3")
+    }
 
 // leading + distribute hint inside leading + single hint
     // inner join
@@ -975,32 +996,77 @@ suite("test_leading") {
 
     // outer join
     qt_select95_1 """explain shape plan select /*+ leading(t1 broadcast t2 t3) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
-    qt_select95_2 """explain shape plan select /*+ leading(t1 broadcast {t2 t3}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
-    qt_select95_3 """explain shape plan select /*+ leading(t1 broadcast {t3 t2}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
+    explain {
+        sql """shape plan select /*+ leading(t1 broadcast {t2 t3}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
+        contains("UnUsed: [broadcast]_2 leading(t1 broadcast { t2 t3 })")
+    }
+    explain {
+        sql """shape plan select /*+ leading(t1 broadcast {t3 t2}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
+        contains("UnUsed: [broadcast]_2 leading(t1 broadcast { t3 t2 })")
+    }
     qt_select95_4 """explain shape plan select /*+ leading(t2 broadcast t1 t3) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
-    qt_select95_5 """explain shape plan select /*+ leading(t2 broadcast {t1 t3}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
-    qt_select95_6 """explain shape plan select /*+ leading(t2 broadcast {t3 t1}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
-    qt_select95_7 """explain shape plan select /*+ leading(t3 broadcast t1 t2) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
+    explain {
+        sql """shape plan select /*+ leading(t2 broadcast {t1 t3}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
+        contains("UnUsed: [broadcast]_2 leading(t2 broadcast { t1 t3 })")
+    }
+    explain {
+        sql """shape plan select /*+ leading(t2 broadcast {t3 t1}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
+        contains("UnUsed: [broadcast]_2 leading(t2 broadcast { t3 t1 })")
+    }
+    explain {
+        sql """shape plan select /*+ leading(t3 broadcast t1 t2) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
+        contains("UnUsed:  leading(t3 broadcast t1 t2)")
+    }
     qt_select95_8 """explain shape plan select /*+ leading(t3 broadcast {t1 t2}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
     qt_select95_9 """explain shape plan select /*+ leading(t3 broadcast {t2 t1}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
 
     qt_select96_1 """explain shape plan select /*+ leading(t1 shuffle t2 broadcast t3) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
-    qt_select96_2 """explain shape plan select /*+ leading(t1 shuffle {t2 broadcast t3}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
-    qt_select96_3 """explain shape plan select /*+ leading(t1 shuffle {t3 broadcast t2}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
+    explain {
+        sql """shape plan select /*+ leading(t1 shuffle {t2 broadcast t3}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
+        contains("UnUsed: [shuffle]_2 [broadcast]_3 leading(t1 shuffle { t2 broadcast t3 })")
+    }
+    explain {
+        sql """shape plan select /*+ leading(t1 shuffle {t3 broadcast t2}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
+        contains("UnUsed: [shuffle]_2 [broadcast]_3 leading(t1 shuffle { t3 broadcast t2 })")
+    }
     qt_select96_4 """explain shape plan select /*+ leading(t2 shuffle t1 broadcast t3) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
-    qt_select96_5 """explain shape plan select /*+ leading(t2 shuffle {t1 broadcast t3}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
-    qt_select96_6 """explain shape plan select /*+ leading(t2 shuffle {t3 broadcast t1}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
-    qt_select96_7 """explain shape plan select /*+ leading(t3 shuffle t1 broadcast t2) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
+    explain {
+        sql """shape plan select /*+ leading(t2 shuffle {t1 broadcast t3}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
+        contains("UnUsed: [shuffle]_2  leading(t2 shuffle { t1 broadcast t3 })")
+    }
+    explain {
+        sql """shape plan select /*+ leading(t2 shuffle {t3 broadcast t1}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
+        contains("UnUsed: [shuffle]_2  leading(t2 shuffle { t3 broadcast t1 })")
+    }
+    explain {
+        sql """shape plan select /*+ leading(t3 shuffle t1 broadcast t2) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
+        contains("UnUsed:  [broadcast]_3 leading(t3 shuffle t1 broadcast t2)")
+    }
     qt_select96_8 """explain shape plan select /*+ leading(t3 shuffle {t1 broadcast t2}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
     qt_select96_9 """explain shape plan select /*+ leading(t3 shuffle {t2 broadcast t1}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
 
     qt_select97_1 """explain shape plan select /*+ leading(t1 broadcast t2 shuffle t3) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
-    qt_select97_2 """explain shape plan select /*+ leading(t1 broadcast {t2 shuffle t3}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
-    qt_select97_3 """explain shape plan select /*+ leading(t1 broadcast {t3 shuffle t2}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
+    explain {
+        sql """shape plan select /*+ leading(t1 broadcast {t2 shuffle t3}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
+        contains("UnUsed: [broadcast]_2 [shuffle]_3 leading(t1 broadcast { t2 shuffle t3 })")
+    }
+    explain {
+        sql """shape plan select /*+ leading(t1 broadcast {t3 shuffle t2}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
+        contains("UnUsed: [broadcast]_2 [shuffle]_3 leading(t1 broadcast { t3 shuffle t2 })")
+    }
     qt_select97_4 """explain shape plan select /*+ leading(t2 broadcast t1 shuffle t3) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
-    qt_select97_5 """explain shape plan select /*+ leading(t2 broadcast {t1 shuffle t3}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
-    qt_select97_6 """explain shape plan select /*+ leading(t2 broadcast {t3 shuffle t1}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
-    qt_select97_7 """explain shape plan select /*+ leading(t3 broadcast t1 shuffle t2) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
+    explain {
+        sql """shape plan select /*+ leading(t2 broadcast {t1 shuffle t3}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
+        contains("UnUsed: [broadcast]_2  leading(t2 broadcast { t1 shuffle t3 })")
+    }
+    explain {
+        sql """shape plan select /*+ leading(t2 broadcast {t3 shuffle t1}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
+        contains("UnUsed: [broadcast]_2  leading(t2 broadcast { t3 shuffle t1 })")
+    }
+    explain {
+        sql """shape plan select /*+ leading(t3 broadcast t1 shuffle t2) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
+        contains("UnUsed:  [shuffle]_3 leading(t3 broadcast t1 shuffle t2)")
+    }
     qt_select97_8 """explain shape plan select /*+ leading(t3 broadcast {t1 shuffle t2}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
     qt_select97_9 """explain shape plan select /*+ leading(t3 broadcast {t2 shuffle t1}) */ count(*) from t1 left outer join t2 on c1 = c2 join t3 on c2 = c3;"""
 

From 073fec33b96da14754dbb795275f76e6b97dd2fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=B0=A2=E5=81=A5?= <jianxie0@gmail.com>
Date: Thu, 25 Apr 2024 16:31:38 +0800
Subject: [PATCH 018/163] [feature](Nereids): add equal set in functional
 dependencies (#33642)

---
 .../properties/FunctionalDependencies.java    |  42 +++-
 .../trees/plans/BlockFuncDepsPropagation.java |   5 +
 .../trees/plans/PropagateFuncDeps.java        |   5 +
 .../trees/plans/logical/LogicalAggregate.java |   5 +
 .../plans/logical/LogicalAssertNumRows.java   |   6 +
 .../plans/logical/LogicalCatalogRelation.java |   5 +
 .../logical/LogicalDeferMaterializeTopN.java  |   5 +
 .../trees/plans/logical/LogicalExcept.java    |  15 ++
 .../trees/plans/logical/LogicalFilter.java    |  10 +
 .../trees/plans/logical/LogicalGenerate.java  |   6 +
 .../trees/plans/logical/LogicalHaving.java    |  10 +
 .../trees/plans/logical/LogicalIntersect.java |   9 +
 .../trees/plans/logical/LogicalJoin.java      |  16 ++
 .../trees/plans/logical/LogicalLimit.java     |   5 +
 .../plans/logical/LogicalOneRowRelation.java  |  16 ++
 .../trees/plans/logical/LogicalPlan.java      |   3 +
 .../trees/plans/logical/LogicalProject.java   |  21 ++
 .../trees/plans/logical/LogicalRepeat.java    |   5 +
 .../trees/plans/logical/LogicalSqlCache.java  |  21 +-
 .../plans/logical/LogicalSubQueryAlias.java   |  11 +
 .../trees/plans/logical/LogicalTopN.java      |   5 +
 .../trees/plans/logical/LogicalUnion.java     |  61 +++++
 .../trees/plans/logical/LogicalView.java      |   6 +
 .../trees/plans/logical/LogicalWindow.java    |   6 +
 .../doris/nereids/util/ExpressionUtils.java   |   8 +
 .../doris/nereids/util/ImmutableEqualSet.java |  69 +++++-
 .../nereids/properties/EqualSetTest.java      | 230 ++++++++++++++++++
 .../nereids_syntax_p0/join_order.groovy       |   8 +-
 28 files changed, 575 insertions(+), 39 deletions(-)
 create mode 100644 fe/fe-core/src/test/java/org/apache/doris/nereids/properties/EqualSetTest.java

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FunctionalDependencies.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FunctionalDependencies.java
index 2b0c1f5c9143e3..a516bf9ae1c0e1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FunctionalDependencies.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FunctionalDependencies.java
@@ -18,11 +18,13 @@
 package org.apache.doris.nereids.properties;
 
 import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.util.ImmutableEqualSet;
 
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Sets;
 
 import java.util.HashSet;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.stream.Collectors;
@@ -36,15 +38,20 @@ public class FunctionalDependencies {
 
     public static final FunctionalDependencies EMPTY_FUNC_DEPS
             = new FunctionalDependencies(new NestedSet().toImmutable(),
-                    new NestedSet().toImmutable(), new ImmutableSet.Builder<FdItem>().build());
+                    new NestedSet().toImmutable(), new ImmutableSet.Builder<FdItem>().build(),
+                    ImmutableEqualSet.empty());
     private final NestedSet uniqueSet;
     private final NestedSet uniformSet;
     private final ImmutableSet<FdItem> fdItems;
 
-    private FunctionalDependencies(NestedSet uniqueSet, NestedSet uniformSet, ImmutableSet<FdItem> fdItems) {
+    private final ImmutableEqualSet<Slot> equalSet;
+
+    private FunctionalDependencies(
+            NestedSet uniqueSet, NestedSet uniformSet, ImmutableSet<FdItem> fdItems, ImmutableEqualSet<Slot> equalSet) {
         this.uniqueSet = uniqueSet;
         this.uniformSet = uniformSet;
         this.fdItems = fdItems;
+        this.equalSet = equalSet;
     }
 
     public boolean isEmpty() {
@@ -87,13 +94,26 @@ public boolean isUniformAndNotNull(ImmutableSet<Slot> slotSet) {
         return slotSet.stream().noneMatch(Slot::nullable) && isUniform(slotSet);
     }
 
+    public boolean isNullSafeEqual(Slot l, Slot r) {
+        return equalSet.isEqual(l, r);
+    }
+
+    public boolean isEqualAndNotNotNull(Slot l, Slot r) {
+        return equalSet.isEqual(l, r) && !l.nullable() && !r.nullable();
+    }
+
+    public List<Set<Slot>> calAllEqualSet() {
+        return equalSet.calEqualSetList();
+    }
+
     public ImmutableSet<FdItem> getFdItems() {
         return fdItems;
     }
 
     @Override
     public String toString() {
-        return String.format("FuncDeps[uniform:%s, unique:%s, fdItems:%s]", uniformSet, uniqueSet, fdItems);
+        return String.format("FuncDeps[uniform:%s, unique:%s, fdItems:%s, equalSet:%s]",
+                uniformSet, uniqueSet, fdItems, equalSet);
     }
 
     /**
@@ -103,17 +123,21 @@ public static class Builder {
         private final NestedSet uniqueSet;
         private final NestedSet uniformSet;
         private ImmutableSet<FdItem> fdItems;
+        private final ImmutableEqualSet.Builder<Slot> equalSetBuilder;
 
         public Builder() {
             uniqueSet = new NestedSet();
             uniformSet = new NestedSet();
             fdItems = new ImmutableSet.Builder<FdItem>().build();
+            equalSetBuilder = new ImmutableEqualSet.Builder<>();
         }
 
         public Builder(FunctionalDependencies other) {
             this.uniformSet = new NestedSet(other.uniformSet);
             this.uniqueSet = new NestedSet(other.uniqueSet);
             this.fdItems = ImmutableSet.copyOf(other.fdItems);
+            equalSetBuilder = new ImmutableEqualSet.Builder<>(other.equalSet);
+
         }
 
         public void addUniformSlot(Slot slot) {
@@ -147,11 +171,20 @@ public void addFdItems(ImmutableSet<FdItem> items) {
         public void addFunctionalDependencies(FunctionalDependencies fd) {
             uniformSet.add(fd.uniformSet);
             uniqueSet.add(fd.uniqueSet);
+            equalSetBuilder.addEqualSet(fd.equalSet);
+        }
+
+        public void addEqualPair(Slot l, Slot r) {
+            equalSetBuilder.addEqualPair(l, r);
+        }
+
+        public void addEqualSet(FunctionalDependencies functionalDependencies) {
+            equalSetBuilder.addEqualSet(functionalDependencies.equalSet);
         }
 
         public FunctionalDependencies build() {
             return new FunctionalDependencies(uniqueSet.toImmutable(), uniformSet.toImmutable(),
-                    ImmutableSet.copyOf(fdItems));
+                    ImmutableSet.copyOf(fdItems), equalSetBuilder.build());
         }
 
         public void pruneSlots(Set<Slot> outputSlots) {
@@ -162,6 +195,7 @@ public void pruneSlots(Set<Slot> outputSlots) {
         public void replace(Map<Slot, Slot> replaceMap) {
             uniformSet.replace(replaceMap);
             uniqueSet.replace(replaceMap);
+            equalSetBuilder.replace(replaceMap);
         }
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/BlockFuncDepsPropagation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/BlockFuncDepsPropagation.java
index a679ad0f7d26b6..684643643714d9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/BlockFuncDepsPropagation.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/BlockFuncDepsPropagation.java
@@ -46,4 +46,9 @@ default void computeUnique(FunctionalDependencies.Builder fdBuilder) {
     default void computeUniform(FunctionalDependencies.Builder fdBuilder) {
         // don't generate
     }
+
+    @Override
+    default void computeEqualSet(FunctionalDependencies.Builder fdBuilder) {
+        // don't generate
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PropagateFuncDeps.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PropagateFuncDeps.java
index f37059049ae844..3d3d3cc8271408 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PropagateFuncDeps.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PropagateFuncDeps.java
@@ -66,4 +66,9 @@ default void computeUnique(FunctionalDependencies.Builder fdBuilder) {
     default void computeUniform(FunctionalDependencies.Builder fdBuilder) {
         fdBuilder.addUniformSlot(child(0).getLogicalProperties().getFunctionalDependencies());
     }
+
+    @Override
+    default void computeEqualSet(FunctionalDependencies.Builder fdBuilder) {
+        fdBuilder.addEqualSet(child(0).getLogicalProperties().getFunctionalDependencies());
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java
index 73ddde6cef5c19..031ea8a46b5e30 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java
@@ -397,4 +397,9 @@ public ImmutableSet<FdItem> computeFdItems() {
 
         return builder.build();
     }
+
+    @Override
+    public void computeEqualSet(FunctionalDependencies.Builder fdBuilder) {
+        fdBuilder.addEqualSet(child().getLogicalProperties().getFunctionalDependencies());
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAssertNumRows.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAssertNumRows.java
index cad8d6e14d6a9f..a3f540d184778c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAssertNumRows.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAssertNumRows.java
@@ -19,6 +19,7 @@
 
 import org.apache.doris.nereids.memo.GroupExpression;
 import org.apache.doris.nereids.properties.FdItem;
+import org.apache.doris.nereids.properties.FunctionalDependencies;
 import org.apache.doris.nereids.properties.FunctionalDependencies.Builder;
 import org.apache.doris.nereids.properties.LogicalProperties;
 import org.apache.doris.nereids.trees.expressions.AssertNumRowsElement;
@@ -145,4 +146,9 @@ public void computeUniform(Builder fdBuilder) {
             getOutput().forEach(fdBuilder::addUniformSlot);
         }
     }
+
+    @Override
+    public void computeEqualSet(FunctionalDependencies.Builder fdBuilder) {
+        fdBuilder.addEqualSet(child().getLogicalProperties().getFunctionalDependencies());
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalCatalogRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalCatalogRelation.java
index 433feb741bac10..277695be0796d3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalCatalogRelation.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalCatalogRelation.java
@@ -215,4 +215,9 @@ private ImmutableSet<SlotReference> findSlotsByColumn(Set<Slot> outputSet, Set<C
         }
         return slotSet.build();
     }
+
+    @Override
+    public void computeEqualSet(FunctionalDependencies.Builder fdBuilder) {
+        // don't generate any equal pair
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalDeferMaterializeTopN.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalDeferMaterializeTopN.java
index 06c03f17f04335..cf894fbce3ad5e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalDeferMaterializeTopN.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalDeferMaterializeTopN.java
@@ -209,4 +209,9 @@ public String toString() {
                 "columnIdSlot", columnIdSlot
         );
     }
+
+    @Override
+    public void computeEqualSet(FunctionalDependencies.Builder fdBuilder) {
+        fdBuilder.addEqualSet(child().getLogicalProperties().getFunctionalDependencies());
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalExcept.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalExcept.java
index d022395441117e..5cfec0a4ed7a59 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalExcept.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalExcept.java
@@ -21,6 +21,7 @@
 import org.apache.doris.nereids.properties.ExprFdItem;
 import org.apache.doris.nereids.properties.FdFactory;
 import org.apache.doris.nereids.properties.FdItem;
+import org.apache.doris.nereids.properties.FunctionalDependencies;
 import org.apache.doris.nereids.properties.FunctionalDependencies.Builder;
 import org.apache.doris.nereids.properties.LogicalProperties;
 import org.apache.doris.nereids.trees.expressions.NamedExpression;
@@ -148,6 +149,20 @@ public void computeUnique(Builder fdBuilder) {
         fdBuilder.replace(replaceMap);
     }
 
+    @Override
+    public void computeEqualSet(FunctionalDependencies.Builder fdBuilder) {
+        fdBuilder.addEqualSet(child(0).getLogicalProperties().getFunctionalDependencies());
+        Map<Slot, Slot> replaceMap = new HashMap<>();
+        List<Slot> output = getOutput();
+        List<? extends Slot> originalOutputs = regularChildrenOutputs.isEmpty()
+                ? child(0).getOutput()
+                : regularChildrenOutputs.get(0);
+        for (int i = 0; i < output.size(); i++) {
+            replaceMap.put(originalOutputs.get(i), output.get(i));
+        }
+        fdBuilder.replace(replaceMap);
+    }
+
     @Override
     public void computeUniform(Builder fdBuilder) {
         fdBuilder.addUniformSlot(child(0).getLogicalProperties().getFunctionalDependencies());
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFilter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFilter.java
index 69d378fd6a5d80..45b96bda4519b4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFilter.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalFilter.java
@@ -17,6 +17,7 @@
 
 package org.apache.doris.nereids.trees.plans.logical;
 
+import org.apache.doris.common.Pair;
 import org.apache.doris.nereids.memo.GroupExpression;
 import org.apache.doris.nereids.properties.FdItem;
 import org.apache.doris.nereids.properties.FunctionalDependencies.Builder;
@@ -166,4 +167,13 @@ public void computeUniform(Builder fdBuilder) {
         getConjuncts().forEach(e -> fdBuilder.addUniformSlot(ExpressionUtils.extractUniformSlot(e)));
         fdBuilder.addUniformSlot(child(0).getLogicalProperties().getFunctionalDependencies());
     }
+
+    @Override
+    public void computeEqualSet(Builder fdBuilder) {
+        fdBuilder.addEqualSet(child().getLogicalProperties().getFunctionalDependencies());
+        for (Expression expression : getConjuncts()) {
+            Optional<Pair<Slot, Slot>> equalSlot = ExpressionUtils.extractEqualSlot(expression);
+            equalSlot.ifPresent(slotSlotPair -> fdBuilder.addEqualPair(slotSlotPair.first, slotSlotPair.second));
+        }
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalGenerate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalGenerate.java
index a54a7514dbce43..c0195e11fed9db 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalGenerate.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalGenerate.java
@@ -19,6 +19,7 @@
 
 import org.apache.doris.nereids.memo.GroupExpression;
 import org.apache.doris.nereids.properties.FdItem;
+import org.apache.doris.nereids.properties.FunctionalDependencies;
 import org.apache.doris.nereids.properties.FunctionalDependencies.Builder;
 import org.apache.doris.nereids.properties.LogicalProperties;
 import org.apache.doris.nereids.trees.expressions.Expression;
@@ -169,4 +170,9 @@ public void computeUnique(Builder fdBuilder) {
     public void computeUniform(Builder fdBuilder) {
         fdBuilder.addUniformSlot(child(0).getLogicalProperties().getFunctionalDependencies());
     }
+
+    @Override
+    public void computeEqualSet(FunctionalDependencies.Builder fdBuilder) {
+        fdBuilder.addEqualSet(child().getLogicalProperties().getFunctionalDependencies());
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHaving.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHaving.java
index da526c33af94ef..41ee1b14712a67 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHaving.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalHaving.java
@@ -17,6 +17,7 @@
 
 package org.apache.doris.nereids.trees.plans.logical;
 
+import org.apache.doris.common.Pair;
 import org.apache.doris.nereids.memo.GroupExpression;
 import org.apache.doris.nereids.properties.FdItem;
 import org.apache.doris.nereids.properties.FunctionalDependencies.Builder;
@@ -138,6 +139,15 @@ public ImmutableSet<FdItem> computeFdItems() {
         return builder.build();
     }
 
+    @Override
+    public void computeEqualSet(Builder fdBuilder) {
+        fdBuilder.addEqualSet(child().getLogicalProperties().getFunctionalDependencies());
+        for (Expression expression : getConjuncts()) {
+            Optional<Pair<Slot, Slot>> equalSlot = ExpressionUtils.extractEqualSlot(expression);
+            equalSlot.ifPresent(slotSlotPair -> fdBuilder.addEqualPair(slotSlotPair.first, slotSlotPair.second));
+        }
+    }
+
     @Override
     public String toString() {
         return Utils.toSqlString("LogicalHaving", "predicates", getPredicate());
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalIntersect.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalIntersect.java
index e9e4889a8e553c..3d3ccd9ad8ed14 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalIntersect.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalIntersect.java
@@ -139,6 +139,15 @@ public void computeUniform(Builder fdBuilder) {
         }
     }
 
+    @Override
+    public void computeEqualSet(Builder fdBuilder) {
+        for (Plan child : children) {
+            fdBuilder.addEqualSet(
+                    child.getLogicalProperties().getFunctionalDependencies());
+            replaceSlotInFuncDeps(fdBuilder, child.getOutput(), getOutput());
+        }
+    }
+
     @Override
     public ImmutableSet<FdItem> computeFdItems() {
         Set<NamedExpression> output = ImmutableSet.copyOf(getOutput());
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalJoin.java
index ea92aeca22d3fd..9ad0b5ab23f866 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalJoin.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalJoin.java
@@ -696,4 +696,20 @@ public void computeUniform(Builder fdBuilder) {
             fdBuilder.addUniformSlot(left().getLogicalProperties().getFunctionalDependencies());
         }
     }
+
+    @Override
+    public void computeEqualSet(Builder fdBuilder) {
+        if (!joinType.isLeftSemiOrAntiJoin()) {
+            fdBuilder.addEqualSet(right().getLogicalProperties().getFunctionalDependencies());
+        }
+        if (!joinType.isRightSemiOrAntiJoin()) {
+            fdBuilder.addEqualSet(left().getLogicalProperties().getFunctionalDependencies());
+        }
+        if (joinType.isInnerJoin()) {
+            for (Expression expression : getHashJoinConjuncts()) {
+                Optional<Pair<Slot, Slot>> equalSlot = ExpressionUtils.extractEqualSlot(expression);
+                equalSlot.ifPresent(slotSlotPair -> fdBuilder.addEqualPair(slotSlotPair.first, slotSlotPair.second));
+            }
+        }
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalLimit.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalLimit.java
index 02558fe2ed2188..0fdf3212d88259 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalLimit.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalLimit.java
@@ -181,4 +181,9 @@ public ImmutableSet<FdItem> computeFdItems() {
         }
         return fdItems;
     }
+
+    @Override
+    public void computeEqualSet(FunctionalDependencies.Builder fdBuilder) {
+        fdBuilder.addEqualSet(child().getLogicalProperties().getFunctionalDependencies());
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOneRowRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOneRowRelation.java
index 78a1e9a3a5e8b4..b6e5af2c591370 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOneRowRelation.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOneRowRelation.java
@@ -23,6 +23,7 @@
 import org.apache.doris.nereids.properties.FdItem;
 import org.apache.doris.nereids.properties.FunctionalDependencies;
 import org.apache.doris.nereids.properties.LogicalProperties;
+import org.apache.doris.nereids.trees.expressions.Alias;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.NamedExpression;
 import org.apache.doris.nereids.trees.expressions.Slot;
@@ -37,7 +38,9 @@
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
 
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.Objects;
 import java.util.Optional;
 import java.util.Set;
@@ -157,4 +160,17 @@ public ImmutableSet<FdItem> computeFdItems() {
 
         return builder.build();
     }
+
+    @Override
+    public void computeEqualSet(FunctionalDependencies.Builder fdBuilder) {
+        Map<Expression, NamedExpression> aliasMap = new HashMap<>();
+        for (NamedExpression namedExpr : getOutputs()) {
+            if (namedExpr instanceof Alias) {
+                if (aliasMap.containsKey(namedExpr.child(0))) {
+                    fdBuilder.addEqualPair(namedExpr.toSlot(), aliasMap.get(namedExpr.child(0)).toSlot());
+                }
+                aliasMap.put(namedExpr.child(0), namedExpr);
+            }
+        }
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalPlan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalPlan.java
index c0fb6ae7365cba..8c3be3b0fa46c7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalPlan.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalPlan.java
@@ -63,6 +63,7 @@ default FunctionalDependencies computeFuncDeps() {
         FunctionalDependencies.Builder fdBuilder = new FunctionalDependencies.Builder();
         computeUniform(fdBuilder);
         computeUnique(fdBuilder);
+        computeEqualSet(fdBuilder);
         ImmutableSet<FdItem> fdItems = computeFdItems();
         fdBuilder.addFdItems(fdItems);
         return fdBuilder.build();
@@ -73,4 +74,6 @@ default FunctionalDependencies computeFuncDeps() {
     void computeUnique(FunctionalDependencies.Builder fdBuilder);
 
     void computeUniform(FunctionalDependencies.Builder fdBuilder);
+
+    void computeEqualSet(FunctionalDependencies.Builder fdBuilder);
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java
index ebecee0d3ae595..de23bc6f5b07c8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java
@@ -23,6 +23,7 @@
 import org.apache.doris.nereids.properties.FdItem;
 import org.apache.doris.nereids.properties.FunctionalDependencies;
 import org.apache.doris.nereids.properties.LogicalProperties;
+import org.apache.doris.nereids.trees.expressions.Alias;
 import org.apache.doris.nereids.trees.expressions.BoundStar;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.NamedExpression;
@@ -41,7 +42,9 @@
 import com.google.common.collect.ImmutableSet;
 import org.json.JSONObject;
 
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.Objects;
 import java.util.Optional;
 
@@ -282,4 +285,22 @@ public void computeUniform(FunctionalDependencies.Builder fdBuilder) {
         }
         fdBuilder.pruneSlots(getOutputSet());
     }
+
+    @Override
+    public void computeEqualSet(FunctionalDependencies.Builder fdBuilder) {
+        Map<Expression, NamedExpression> aliasMap = new HashMap<>();
+        fdBuilder.addEqualSet(child().getLogicalProperties().getFunctionalDependencies());
+        for (NamedExpression expr : getProjects()) {
+            if (expr instanceof Alias) {
+                if (aliasMap.containsKey(expr.child(0))) {
+                    fdBuilder.addEqualPair(expr.toSlot(), aliasMap.get(expr.child(0)).toSlot());
+                }
+                aliasMap.put(expr.child(0), expr);
+                if (expr.child(0).isSlot()) {
+                    fdBuilder.addEqualPair(expr.toSlot(), (Slot) expr.child(0));
+                }
+            }
+        }
+        fdBuilder.pruneSlots(getOutputSet());
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalRepeat.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalRepeat.java
index 95ae92b686674b..9c24fab3352f36 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalRepeat.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalRepeat.java
@@ -204,4 +204,9 @@ public ImmutableSet<FdItem> computeFdItems() {
 
         return builder.build();
     }
+
+    @Override
+    public void computeEqualSet(FunctionalDependencies.Builder fdBuilder) {
+        fdBuilder.addEqualSet(child().getLogicalProperties().getFunctionalDependencies());
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSqlCache.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSqlCache.java
index 663044d569fc6e..26c3006d5e5c82 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSqlCache.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSqlCache.java
@@ -20,11 +20,10 @@
 import org.apache.doris.analysis.Expr;
 import org.apache.doris.common.util.DebugUtil;
 import org.apache.doris.nereids.memo.GroupExpression;
-import org.apache.doris.nereids.properties.FdItem;
-import org.apache.doris.nereids.properties.FunctionalDependencies.Builder;
 import org.apache.doris.nereids.properties.LogicalProperties;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.trees.plans.BlockFuncDepsPropagation;
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.PlanType;
 import org.apache.doris.nereids.trees.plans.TreeStringPlan;
@@ -36,14 +35,13 @@
 import org.apache.doris.thrift.TUniqueId;
 
 import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableSet;
 
 import java.util.List;
 import java.util.Objects;
 import java.util.Optional;
 
 /** LogicalSqlCache */
-public class LogicalSqlCache extends LogicalLeaf implements SqlCache, TreeStringPlan {
+public class LogicalSqlCache extends LogicalLeaf implements SqlCache, TreeStringPlan, BlockFuncDepsPropagation {
     private final TUniqueId queryId;
     private final List<String> columnLabels;
     private final List<Expr> resultExprs;
@@ -137,19 +135,4 @@ public List<Slot> computeOutput() {
     public String getChildrenTreeString() {
         return planBody;
     }
-
-    @Override
-    public ImmutableSet<FdItem> computeFdItems() {
-        return ImmutableSet.of();
-    }
-
-    @Override
-    public void computeUnique(Builder fdBuilder) {
-
-    }
-
-    @Override
-    public void computeUniform(Builder fdBuilder) {
-
-    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSubQueryAlias.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSubQueryAlias.java
index ea9fd143c9470c..5d23561b1a954c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSubQueryAlias.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSubQueryAlias.java
@@ -191,6 +191,17 @@ public ImmutableSet<FdItem> computeFdItems() {
         return ImmutableSet.of();
     }
 
+    @Override
+    public void computeEqualSet(FunctionalDependencies.Builder fdBuilder) {
+        fdBuilder.addEqualSet(child(0).getLogicalProperties().getFunctionalDependencies());
+        Map<Slot, Slot> replaceMap = new HashMap<>();
+        List<Slot> outputs = getOutput();
+        for (int i = 0; i < outputs.size(); i++) {
+            replaceMap.put(child(0).getOutput().get(i), outputs.get(i));
+        }
+        fdBuilder.replace(replaceMap);
+    }
+
     public void setRelationId(RelationId relationId) {
         this.relationId = relationId;
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalTopN.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalTopN.java
index 791ded58cceb63..dd1c171ca2c8bc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalTopN.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalTopN.java
@@ -187,6 +187,11 @@ public void computeUniform(FunctionalDependencies.Builder fdBuilder) {
         }
     }
 
+    @Override
+    public void computeEqualSet(FunctionalDependencies.Builder fdBuilder) {
+        fdBuilder.addEqualSet(child(0).getLogicalProperties().getFunctionalDependencies());
+    }
+
     @Override
     public ImmutableSet<FdItem> computeFdItems() {
         ImmutableSet<FdItem> fdItems = child(0).getLogicalProperties().getFunctionalDependencies().getFdItems();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalUnion.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalUnion.java
index 57d944bc2b9c67..8c5bbfcc6a60c5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalUnion.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalUnion.java
@@ -25,6 +25,7 @@
 import org.apache.doris.nereids.properties.LogicalProperties;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.NamedExpression;
+import org.apache.doris.nereids.trees.expressions.Slot;
 import org.apache.doris.nereids.trees.expressions.SlotReference;
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.PlanType;
@@ -36,7 +37,12 @@
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
 
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
 import java.util.Objects;
 import java.util.Optional;
 import java.util.Set;
@@ -196,6 +202,61 @@ public void computeUniform(FunctionalDependencies.Builder fdBuilder) {
         // don't propagate uniform slots
     }
 
+    private List<Set<Integer>> mapSlotToIndex(Plan plan, List<Set<Slot>> equalSlotsList) {
+        Map<Slot, Integer> slotToIndex = new HashMap<>();
+        for (int i = 0; i < plan.getOutput().size(); i++) {
+            slotToIndex.put(plan.getOutput().get(i), i);
+        }
+        List<Set<Integer>> equalSlotIndicesList = new ArrayList<>();
+        for (Set<Slot> equalSlots : equalSlotsList) {
+            Set<Integer> equalSlotIndices = new HashSet<>();
+            for (Slot slot : equalSlots) {
+                if (slotToIndex.containsKey(slot)) {
+                    equalSlotIndices.add(slotToIndex.get(slot));
+                }
+            }
+            if (equalSlotIndices.size() > 1) {
+                equalSlotIndicesList.add(equalSlotIndices);
+            }
+        }
+        return equalSlotIndicesList;
+    }
+
+    @Override
+    public void computeEqualSet(FunctionalDependencies.Builder fdBuilder) {
+        List<Set<Integer>> unionEqualSlotIndicesList = new ArrayList<>();
+
+        for (Plan child : children) {
+            List<Set<Slot>> childEqualSlotsList =
+                    child.getLogicalProperties().getFunctionalDependencies().calAllEqualSet();
+            List<Set<Integer>> childEqualSlotsIndicesList = mapSlotToIndex(child, childEqualSlotsList);
+            if (unionEqualSlotIndicesList.isEmpty()) {
+                unionEqualSlotIndicesList = childEqualSlotsIndicesList;
+            } else {
+                // Only all child of union has the equal pair, we keep the equal pair.
+                // It means we should calculate the intersection of all child
+                for (Set<Integer> childEqualSlotIndices : childEqualSlotsIndicesList) {
+                    for (Set<Integer> unionEqualSlotIndices : unionEqualSlotIndicesList) {
+                        if (Collections.disjoint(childEqualSlotIndices, unionEqualSlotIndices)) {
+                            unionEqualSlotIndices.retainAll(childEqualSlotIndices);
+                        }
+                    }
+                }
+            }
+
+            List<Slot> ouputList = getOutput();
+            for (Set<Integer> equalSlotIndices : unionEqualSlotIndicesList) {
+                if (equalSlotIndices.size() <= 1) {
+                    continue;
+                }
+                int first = equalSlotIndices.iterator().next();
+                for (int idx : equalSlotIndices) {
+                    fdBuilder.addEqualPair(ouputList.get(first), ouputList.get(idx));
+                }
+            }
+        }
+    }
+
     @Override
     public ImmutableSet<FdItem> computeFdItems() {
         Set<NamedExpression> output = ImmutableSet.copyOf(getOutput());
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalView.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalView.java
index 200ccc2ffcbb42..371d3cae43fcff 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalView.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalView.java
@@ -21,6 +21,7 @@
 import org.apache.doris.nereids.exceptions.AnalysisException;
 import org.apache.doris.nereids.memo.GroupExpression;
 import org.apache.doris.nereids.properties.FdItem;
+import org.apache.doris.nereids.properties.FunctionalDependencies;
 import org.apache.doris.nereids.properties.FunctionalDependencies.Builder;
 import org.apache.doris.nereids.properties.LogicalProperties;
 import org.apache.doris.nereids.trees.expressions.Expression;
@@ -146,4 +147,9 @@ public void computeUnique(Builder fdBuilder) {
     public void computeUniform(Builder fdBuilder) {
         fdBuilder.addUniformSlot(child(0).getLogicalProperties().getFunctionalDependencies());
     }
+
+    @Override
+    public void computeEqualSet(FunctionalDependencies.Builder fdBuilder) {
+        fdBuilder.addEqualSet(child(0).getLogicalProperties().getFunctionalDependencies());
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalWindow.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalWindow.java
index b03a3365402bb0..56a306b60056c9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalWindow.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalWindow.java
@@ -19,6 +19,7 @@
 
 import org.apache.doris.nereids.memo.GroupExpression;
 import org.apache.doris.nereids.properties.FdItem;
+import org.apache.doris.nereids.properties.FunctionalDependencies;
 import org.apache.doris.nereids.properties.FunctionalDependencies.Builder;
 import org.apache.doris.nereids.properties.LogicalProperties;
 import org.apache.doris.nereids.trees.expressions.Expression;
@@ -324,4 +325,9 @@ public void computeUniform(Builder fdBuilder) {
             }
         }
     }
+
+    @Override
+    public void computeEqualSet(FunctionalDependencies.Builder fdBuilder) {
+        fdBuilder.addEqualSet(child(0).getLogicalProperties().getFunctionalDependencies());
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ExpressionUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ExpressionUtils.java
index 51eef31de2ffe3..a6a4d999a92539 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ExpressionUtils.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ExpressionUtils.java
@@ -20,6 +20,7 @@
 import org.apache.doris.catalog.TableIf.TableType;
 import org.apache.doris.common.MaterializedViewException;
 import org.apache.doris.common.NereidsException;
+import org.apache.doris.common.Pair;
 import org.apache.doris.nereids.CascadesContext;
 import org.apache.doris.nereids.rules.expression.ExpressionRewriteContext;
 import org.apache.doris.nereids.rules.expression.rules.FoldConstantRule;
@@ -130,6 +131,13 @@ private static void extract(Class<? extends Expression> type, Expression expr, C
         }
     }
 
+    public static Optional<Pair<Slot, Slot>> extractEqualSlot(Expression expr) {
+        if (expr instanceof EqualTo && expr.child(0).isSlot() && expr.child(1).isSlot()) {
+            return Optional.of(Pair.of((Slot) expr.child(0), (Slot) expr.child(1)));
+        }
+        return Optional.empty();
+    }
+
     public static Optional<Expression> optionalAnd(List<Expression> expressions) {
         if (expressions.isEmpty()) {
             return Optional.empty();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ImmutableEqualSet.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ImmutableEqualSet.java
index f5f3dd75b51bfc..35bc78ac117bba 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ImmutableEqualSet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ImmutableEqualSet.java
@@ -24,6 +24,7 @@
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Set;
 
 /**
@@ -44,34 +45,66 @@ public static <T> ImmutableEqualSet<T> empty() {
      * Builder for ImmutableEqualSet.
      */
     public static class Builder<T> {
-        private final Map<T, T> parent = new LinkedHashMap<>();
-        private final Map<T, Integer> size = new LinkedHashMap<>();
+        private Map<T, T> parent;
+
+        Builder(Map<T, T> parent) {
+            this.parent = parent;
+        }
+
+        public Builder() {
+            this(new LinkedHashMap<>());
+        }
+
+        public Builder(ImmutableEqualSet<T> equalSet) {
+            this(new LinkedHashMap<>(equalSet.root));
+        }
+
+        /**
+         * replace all key value according replace map
+         */
+        public void replace(Map<T, T> replaceMap) {
+            Map<T, T> newMap = new LinkedHashMap<>();
+            for (Entry<T, T> entry : parent.entrySet()) {
+                newMap.put(replaceMap.getOrDefault(entry.getKey(), entry.getKey()),
+                        replaceMap.getOrDefault(entry.getValue(), entry.getValue()));
+            }
+            parent = newMap;
+        }
 
         /**
          * Add a equal pair
          */
         public void addEqualPair(T a, T b) {
+            if (!parent.containsKey(a)) {
+                parent.put(a, a);
+            }
+            if (!parent.containsKey(b)) {
+                parent.put(b, b);
+            }
             T root1 = findRoot(a);
             T root2 = findRoot(b);
             if (root1 != root2) {
-                parent.put(b, root1);
-                findRoot(b);
+                parent.put(root1, root2);
             }
         }
 
-        private T findRoot(T a) {
-            parent.putIfAbsent(a, a); // Ensure that the element is added
-            size.putIfAbsent(a, 1); // Initialize size to 1
+        public void addEqualSet(ImmutableEqualSet<T> equalSet) {
+            this.parent.putAll(equalSet.root);
+        }
 
-            if (!parent.get(a).equals(a)) {
-                parent.put(a, findRoot(parent.get(a))); // Path compression
+        private T findRoot(T a) {
+            if (a.equals(parent.get(a))) {
+                return parent.get(a);
             }
-            return parent.get(a);
+            return findRoot(parent.get(a));
         }
 
         public ImmutableEqualSet<T> build() {
-            parent.keySet().forEach(this::findRoot);
-            return new ImmutableEqualSet<>(parent);
+            ImmutableMap.Builder<T, T> foldMapBuilder = new ImmutableMap.Builder<>();
+            for (T k : parent.keySet()) {
+                foldMapBuilder.put(k, findRoot(k));
+            }
+            return new ImmutableEqualSet<>(foldMapBuilder.build());
         }
     }
 
@@ -103,4 +136,16 @@ public List<Set<T>> calEqualSetList() {
     public Set<T> getAllItemSet() {
         return ImmutableSet.copyOf(root.keySet());
     }
+
+    public boolean isEqual(T l, T r) {
+        if (!root.containsKey(l) || !root.containsKey(r)) {
+            return false;
+        }
+        return root.get(l) == root.get(r);
+    }
+
+    @Override
+    public String toString() {
+        return root.toString();
+    }
 }
diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/EqualSetTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/EqualSetTest.java
new file mode 100644
index 00000000000000..fec795ccbe2809
--- /dev/null
+++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/EqualSetTest.java
@@ -0,0 +1,230 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.properties;
+
+import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.trees.expressions.SlotReference;
+import org.apache.doris.nereids.trees.plans.Plan;
+import org.apache.doris.nereids.types.IntegerType;
+import org.apache.doris.nereids.util.PlanChecker;
+import org.apache.doris.utframe.TestWithFeService;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+class EqualSetTest extends TestWithFeService {
+    Slot slot1 = new SlotReference("1", IntegerType.INSTANCE, false);
+    Slot slot2 = new SlotReference("2", IntegerType.INSTANCE, false);
+    Slot slot3 = new SlotReference("1", IntegerType.INSTANCE, false);
+    Slot slot4 = new SlotReference("1", IntegerType.INSTANCE, false);
+
+    @Override
+    protected void runBeforeAll() throws Exception {
+        createDatabase("test");
+        createTable("create table test.agg (\n"
+                + "id int not null,\n"
+                + "id2 int replace not null,\n"
+                + "name varchar(128) replace not null )\n"
+                + "AGGREGATE KEY(id)\n"
+                + "distributed by hash(id) buckets 10\n"
+                + "properties('replication_num' = '1');");
+        createTable("create table test.uni (\n"
+                + "id int not null,\n"
+                + "id2 int not null,\n"
+                + "name varchar(128) not null)\n"
+                + "UNIQUE KEY(id)\n"
+                + "distributed by hash(id) buckets 10\n"
+                + "properties('replication_num' = '1');");
+        connectContext.setDatabase("test");
+    }
+
+    @Test
+    void testAgg() {
+        Plan plan = PlanChecker.from(connectContext)
+                .analyze("select id, id2 from agg where id2 = id group by id, id2")
+                .getPlan();
+        Assertions.assertTrue(plan.getLogicalProperties().getFunctionalDependencies()
+                .isNullSafeEqual(plan.getOutput().get(0), plan.getOutput().get(1)));
+    }
+
+    @Test
+    void testTopNLimit() {
+        Plan plan = PlanChecker.from(connectContext)
+                .analyze("select id, id2 from agg where id2 = id limit 1")
+                .getPlan();
+        Assertions.assertTrue(plan.getLogicalProperties().getFunctionalDependencies()
+                .isNullSafeEqual(plan.getOutput().get(0), plan.getOutput().get(1)));
+        plan = PlanChecker.from(connectContext)
+                .analyze("select id, id2 from agg where id2 = id limit 1 order by id")
+                .getPlan();
+        Assertions.assertTrue(plan.getLogicalProperties().getFunctionalDependencies()
+                .isNullSafeEqual(plan.getOutput().get(0), plan.getOutput().get(1)));
+    }
+
+    @Test
+    void testSetOp() {
+        Plan plan = PlanChecker.from(connectContext)
+                .analyze("select id, id2 from agg where id2 = id intersect select id, id2 from agg")
+                .getPlan();
+        Assertions.assertTrue(plan.getLogicalProperties().getFunctionalDependencies()
+                .isNullSafeEqual(plan.getOutput().get(0), plan.getOutput().get(1)));
+        plan = PlanChecker.from(connectContext)
+                .analyze("select id, id2 from agg where id2 = id except select id, id2 from agg")
+                .getPlan();
+        Assertions.assertTrue(plan.getLogicalProperties().getFunctionalDependencies()
+                .isNullSafeEqual(plan.getOutput().get(0), plan.getOutput().get(1)));
+        plan = PlanChecker.from(connectContext)
+                .analyze("select id, id2 from agg where id2 = id union all select id, id2 from agg")
+                .getPlan();
+        Assertions.assertTrue(plan.getLogicalProperties().getFunctionalDependencies()
+                .isEmpty());
+        plan = PlanChecker.from(connectContext)
+                .analyze("select id, id2 from agg where id2 = id union all select id, id2 from agg where id2 = id")
+                .getPlan();
+        Assertions.assertTrue(plan.getLogicalProperties().getFunctionalDependencies()
+                .isNullSafeEqual(plan.getOutput().get(0), plan.getOutput().get(1)));
+        plan = PlanChecker.from(connectContext)
+                .analyze("select id, id2 from agg union all select id, id2 from agg where id2 = id")
+                .getPlan();
+        Assertions.assertTrue(plan.getLogicalProperties().getFunctionalDependencies()
+                .isEmpty());
+    }
+
+    @Test
+    void testFilterHaving() {
+        Plan plan = PlanChecker.from(connectContext)
+                .analyze("select id, id2 from agg where id2 = id")
+                .getPlan();
+        Assertions.assertTrue(plan.getLogicalProperties().getFunctionalDependencies()
+                .isNullSafeEqual(plan.getOutput().get(0), plan.getOutput().get(1)));
+        plan = PlanChecker.from(connectContext)
+                .analyze("select id, id2 from agg  group by id, id2 having id = id2")
+                .rewrite()
+                .getPlan();
+        Assertions.assertTrue(plan.getLogicalProperties().getFunctionalDependencies()
+                .isNullSafeEqual(plan.getOutput().get(0), plan.getOutput().get(1)));
+    }
+
+    @Test
+    void testGenerate() {
+        Plan plan = PlanChecker.from(connectContext)
+                .analyze("select id, id2 from agg lateral view explode([1,2,3]) tmp1 as e1 where id = id2")
+                .rewrite()
+                .getPlan();
+        Assertions.assertTrue(plan.getLogicalProperties().getFunctionalDependencies()
+                .isNullSafeEqual(plan.getOutput().get(0), plan.getOutput().get(1)));
+    }
+
+    @Test
+    void testJoin() {
+        // inner join
+        Plan plan = PlanChecker.from(connectContext)
+                .analyze("select uni.id, agg.id from agg join uni "
+                        + "where agg.id = uni.id")
+                .rewrite()
+                .getPlan();
+        Assertions.assertTrue(plan.getLogicalProperties().getFunctionalDependencies()
+                .isNullSafeEqual(plan.getOutput().get(0), plan.getOutput().get(1)));
+
+        // foj
+        plan = PlanChecker.from(connectContext)
+                .analyze("select t1.id, t2.id, t3.id from agg as t1 join uni as t2 "
+                        + " on t1.id = t2.id  full outer join uni as t3 on t1.id2 = t2.id2")
+                .rewrite()
+                .getPlan();
+        Assertions.assertTrue(plan.getLogicalProperties().getFunctionalDependencies()
+                .isNullSafeEqual(plan.getOutput().get(0), plan.getOutput().get(1)));
+        Assertions.assertFalse(plan.getLogicalProperties().getFunctionalDependencies()
+                .isEqualAndNotNotNull(plan.getOutput().get(0), plan.getOutput().get(1)));
+        Assertions.assertFalse(plan.getLogicalProperties().getFunctionalDependencies()
+                .isNullSafeEqual(plan.getOutput().get(0), plan.getOutput().get(2)));
+
+        // loj
+        plan = PlanChecker.from(connectContext)
+                .analyze("select t1.id, t2.id, t3.id from agg as t1 join uni as t2 "
+                        + " on t1.id = t2.id  left outer join uni as t3 on t1.id2 = t2.id2")
+                .rewrite()
+                .getPlan();
+        Assertions.assertTrue(plan.getLogicalProperties().getFunctionalDependencies()
+                .isNullSafeEqual(plan.getOutput().get(0), plan.getOutput().get(1)));
+        Assertions.assertTrue(plan.getLogicalProperties().getFunctionalDependencies()
+                .isEqualAndNotNotNull(plan.getOutput().get(0), plan.getOutput().get(1)));
+        Assertions.assertFalse(plan.getLogicalProperties().getFunctionalDependencies()
+                .isNullSafeEqual(plan.getOutput().get(0), plan.getOutput().get(2)));
+
+        // roj
+        plan = PlanChecker.from(connectContext)
+                .analyze("select t1.id, t2.id, t3.id from agg as t1 join uni as t2 "
+                        + " on t1.id = t2.id  right outer join uni as t3 on t1.id2 = t2.id2")
+                .rewrite()
+                .getPlan();
+        Assertions.assertTrue(plan.getLogicalProperties().getFunctionalDependencies()
+                .isNullSafeEqual(plan.getOutput().get(0), plan.getOutput().get(1)));
+        Assertions.assertFalse(plan.getLogicalProperties().getFunctionalDependencies()
+                .isEqualAndNotNotNull(plan.getOutput().get(0), plan.getOutput().get(1)));
+        Assertions.assertFalse(plan.getLogicalProperties().getFunctionalDependencies()
+                .isNullSafeEqual(plan.getOutput().get(0), plan.getOutput().get(2)));
+    }
+
+    @Test
+    void testOneRowRelation() {
+        Plan plan = PlanChecker.from(connectContext)
+                .analyze("select 1, 1")
+                .rewrite()
+                .getPlan();
+        Assertions.assertTrue(plan.getLogicalProperties()
+                .getFunctionalDependencies().isNullSafeEqual(plan.getOutput().get(0), plan.getOutput().get(1)));
+    }
+
+    @Test
+    void testProject() {
+        Plan plan = PlanChecker.from(connectContext)
+                .analyze("select id as o1, id as o2, id2 as o4, 1 as c1, 1 as c2 from uni where id = id2")
+                .rewrite()
+                .getPlan();
+        Assertions.assertTrue(plan.getLogicalProperties()
+                .getFunctionalDependencies().isNullSafeEqual(plan.getOutput().get(0), plan.getOutput().get(1)));
+        Assertions.assertTrue(plan.getLogicalProperties()
+                .getFunctionalDependencies().isNullSafeEqual(plan.getOutput().get(0), plan.getOutput().get(2)));
+        Assertions.assertTrue(plan.getLogicalProperties()
+                .getFunctionalDependencies().isNullSafeEqual(plan.getOutput().get(1), plan.getOutput().get(2)));
+        Assertions.assertTrue(plan.getLogicalProperties()
+                .getFunctionalDependencies().isNullSafeEqual(plan.getOutput().get(3), plan.getOutput().get(4)));
+    }
+
+    @Test
+    void testSubQuery() {
+        Plan plan = PlanChecker.from(connectContext)
+                .analyze("select id, id2 from (select id, id2 from agg where id = id2) t")
+                .getPlan();
+        Assertions.assertTrue(plan.getLogicalProperties()
+                .getFunctionalDependencies().isNullSafeEqual(plan.getOutput().get(0), plan.getOutput().get(1)));
+    }
+
+    @Test
+    void testWindow() {
+        // partition by uniform
+        Plan plan = PlanChecker.from(connectContext)
+                .analyze("select id, id2, row_number() over(partition by id) from agg where id = id2")
+                .rewrite()
+                .getPlan();
+        Assertions.assertTrue(plan.getLogicalProperties()
+                .getFunctionalDependencies().isNullSafeEqual(plan.getOutput().get(0), plan.getOutput().get(1)));
+    }
+
+}
diff --git a/regression-test/suites/nereids_syntax_p0/join_order.groovy b/regression-test/suites/nereids_syntax_p0/join_order.groovy
index 64056e0202bc84..461c2bb448522a 100644
--- a/regression-test/suites/nereids_syntax_p0/join_order.groovy
+++ b/regression-test/suites/nereids_syntax_p0/join_order.groovy
@@ -43,7 +43,7 @@ suite("join_order") {
     """
     sql """ drop table if exists outerjoin_C_order;"""
     sql """
-        create table outerjoin_C_order ( c int not null )
+        create table outerjoin_C_order ( c int not null, c2 int not null )
         ENGINE=OLAP
         DISTRIBUTED BY HASH(c) BUCKETS 1
         PROPERTIES (
@@ -77,7 +77,7 @@ suite("join_order") {
 
     sql """insert into outerjoin_A_order values( 1,2 );"""
     sql """insert into outerjoin_B_order values( 1 );"""
-    sql """insert into outerjoin_C_order values( 1 );"""
+    sql """insert into outerjoin_C_order values( 1,2 );"""
     sql """insert into outerjoin_D_order values( 1,2,3 );"""
     sql """insert into outerjoin_E_order values( 1,2 );"""
 
@@ -96,13 +96,13 @@ suite("join_order") {
 
     sql 'set disable_join_reorder=true;'
     explain {
-        sql("select * from outerjoin_A_order, outerjoin_B_order, outerjoin_C_order where outerjoin_A_order.a1 = outerjoin_C_order.c and outerjoin_B_order.b = outerjoin_C_order.c;")
+        sql("select * from outerjoin_A_order, outerjoin_B_order, outerjoin_C_order where outerjoin_A_order.a1 = outerjoin_C_order.c and outerjoin_B_order.b = outerjoin_C_order.c2;")
         contains "CROSS JOIN"
     }
 
     sql 'set disable_join_reorder=false;'
     explain {
-        sql("select * from outerjoin_A_order, outerjoin_B_order, outerjoin_C_order where outerjoin_A_order.a1 = outerjoin_C_order.c and outerjoin_B_order.b = outerjoin_C_order.c;")
+        sql("select * from outerjoin_A_order, outerjoin_B_order, outerjoin_C_order where outerjoin_A_order.a1 = outerjoin_C_order.c and outerjoin_B_order.b = outerjoin_C_order.c2;")
         notContains "CROSS JOIN"
     }
 

From dfe30f542d89f74c008f05d1b7241c8d33c94d6c Mon Sep 17 00:00:00 2001
From: morrySnow <101034200+morrySnow@users.noreply.github.com>
Date: Thu, 25 Apr 2024 16:42:35 +0800
Subject: [PATCH 019/163] [fix](Nereids) support aggregate function only in
 having statement (#34086)

SQL like

> SELECT 1 AS c1 FROM t HAVING count(1) > 0 OR c1 IS NOT NULL
---
 .../rules/analysis/FillUpMissingSlots.java    | 71 +++++++++++++++----
 .../test_having_with_aggregate_function.out   |  4 ++
 ...test_having_with_aggregate_function.groovy | 32 +++++++++
 3 files changed, 92 insertions(+), 15 deletions(-)
 create mode 100644 regression-test/data/nereids_rules_p0/fill_up_missing_slots/test_having_with_aggregate_function.out
 create mode 100644 regression-test/suites/nereids_rules_p0/fill_up_missing_slots/test_having_with_aggregate_function.groovy

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/FillUpMissingSlots.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/FillUpMissingSlots.java
index 82de4453c131f1..1cab3614302381 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/FillUpMissingSlots.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/FillUpMissingSlots.java
@@ -30,6 +30,7 @@
 import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction;
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.algebra.Aggregate;
+import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
 import org.apache.doris.nereids.trees.plans.logical.LogicalHaving;
 import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
 import org.apache.doris.nereids.trees.plans.logical.LogicalSort;
@@ -158,20 +159,52 @@ public List<Rule> buildRules() {
             // Convert having to filter
             RuleType.FILL_UP_HAVING_PROJECT.build(
                     logicalHaving(logicalProject()).then(having -> {
-                        LogicalProject<Plan> project = having.child();
-                        Set<Slot> projectOutputSet = project.getOutputSet();
-                        Set<Slot> notExistedInProject = having.getExpressions().stream()
-                                .map(Expression::getInputSlots)
-                                .flatMap(Set::stream)
-                                .filter(s -> !projectOutputSet.contains(s))
-                                .collect(Collectors.toSet());
-                        if (notExistedInProject.isEmpty()) {
-                            return null;
+                        if (having.getExpressions().stream().anyMatch(e -> e.containsType(AggregateFunction.class))) {
+                            // This is very weird pattern.
+                            // There are some aggregate functions in having, but its child is project.
+                            // There are some slot from project in having too.
+                            // Having should execute after project.
+                            // But aggregate function should execute before project.
+                            // Since no aggregate here, we should add an empty aggregate before project.
+                            // We should push aggregate function into aggregate node first.
+                            // Then put aggregate result slots and original project slots into new project.
+                            // The final plan should be
+                            //   Having
+                            //   +-- Project
+                            //       +-- Aggregate
+                            // Since aggregate node have no group by key.
+                            // So project should not contain any slot from its original child.
+                            // Or otherwise slot cannot find will be thrown.
+                            LogicalProject<Plan> project = having.child();
+                            // new an empty agg here
+                            LogicalAggregate<Plan> agg = new LogicalAggregate<>(
+                                    ImmutableList.of(), ImmutableList.of(), project.child());
+                            // avoid throw exception even if having have slot from its child.
+                            // because we will add a project between having and project.
+                            Resolver resolver = new Resolver(agg, false);
+                            having.getConjuncts().forEach(resolver::resolve);
+                            agg = agg.withAggOutput(resolver.getNewOutputSlots());
+                            Set<Expression> newConjuncts = ExpressionUtils.replace(
+                                    having.getConjuncts(), resolver.getSubstitution());
+                            ImmutableList.Builder<NamedExpression> projects = ImmutableList.builder();
+                            projects.addAll(project.getOutputs()).addAll(agg.getOutput());
+                            return new LogicalHaving<>(newConjuncts, new LogicalProject<>(projects.build(), agg));
+                        } else {
+                            LogicalProject<Plan> project = having.child();
+                            Set<Slot> projectOutputSet = project.getOutputSet();
+                            Set<Slot> notExistedInProject = having.getExpressions().stream()
+                                    .map(Expression::getInputSlots)
+                                    .flatMap(Set::stream)
+                                    .filter(s -> !projectOutputSet.contains(s))
+                                    .collect(Collectors.toSet());
+                            if (notExistedInProject.isEmpty()) {
+                                return null;
+                            }
+                            List<NamedExpression> projects = ImmutableList.<NamedExpression>builder()
+                                    .addAll(project.getProjects()).addAll(notExistedInProject).build();
+                            return new LogicalProject<>(ImmutableList.copyOf(project.getOutput()),
+                                    having.withChildren(new LogicalProject<>(projects, project.child())));
                         }
-                        List<NamedExpression> projects = ImmutableList.<NamedExpression>builder()
-                                .addAll(project.getProjects()).addAll(notExistedInProject).build();
-                        return new LogicalProject<>(ImmutableList.copyOf(project.getOutput()),
-                                having.withChildren(new LogicalProject<>(projects, project.child())));
                     })
             )
         );
@@ -184,13 +217,19 @@ static class Resolver {
         private final Map<Expression, Slot> substitution = Maps.newHashMap();
         private final List<NamedExpression> newOutputSlots = Lists.newArrayList();
         private final Map<Slot, Expression> outputSubstitutionMap;
+        private final boolean checkSlot;
 
-        Resolver(Aggregate aggregate) {
+        Resolver(Aggregate<?> aggregate, boolean checkSlot) {
             outputExpressions = aggregate.getOutputExpressions();
             groupByExpressions = aggregate.getGroupByExpressions();
             outputSubstitutionMap = outputExpressions.stream().filter(Alias.class::isInstance)
                     .collect(Collectors.toMap(NamedExpression::toSlot, alias -> alias.child(0),
                             (k1, k2) -> k1));
+            this.checkSlot = checkSlot;
+        }
+
+        Resolver(Aggregate<?> aggregate) {
+            this(aggregate, true);
         }
 
         public void resolve(Expression expression) {
@@ -218,7 +257,9 @@ public void resolve(Expression expression) {
                 // We couldn't find the equivalent expression in output expressions and group-by expressions,
                 // so we should check whether the expression is valid.
                 if (expression instanceof SlotReference) {
-                    throw new AnalysisException(expression.toSql() + " should be grouped by.");
+                    if (checkSlot) {
+                        throw new AnalysisException(expression.toSql() + " should be grouped by.");
+                    }
                 } else if (expression instanceof AggregateFunction) {
                     if (checkWhetherNestedAggregateFunctionsExist((AggregateFunction) expression)) {
                         throw new AnalysisException("Aggregate functions in having clause can't be nested: "
diff --git a/regression-test/data/nereids_rules_p0/fill_up_missing_slots/test_having_with_aggregate_function.out b/regression-test/data/nereids_rules_p0/fill_up_missing_slots/test_having_with_aggregate_function.out
new file mode 100644
index 00000000000000..2bef87ab2b7a2d
--- /dev/null
+++ b/regression-test/data/nereids_rules_p0/fill_up_missing_slots/test_having_with_aggregate_function.out
@@ -0,0 +1,4 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !having_project_with_having_count_1_and_slot_from_project --
+1
+
diff --git a/regression-test/suites/nereids_rules_p0/fill_up_missing_slots/test_having_with_aggregate_function.groovy b/regression-test/suites/nereids_rules_p0/fill_up_missing_slots/test_having_with_aggregate_function.groovy
new file mode 100644
index 00000000000000..9047a7c85bf456
--- /dev/null
+++ b/regression-test/suites/nereids_rules_p0/fill_up_missing_slots/test_having_with_aggregate_function.groovy
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_having_project") {
+    sql "SET enable_nereids_planner=true"
+    sql "SET enable_fallback_to_original_planner=false"
+
+    sql """
+        DROP TABLE IF EXISTS t
+       """
+    sql """
+        create table t(id smallint) distributed by random properties('replication_num'='1');
+    """
+
+    qt_having_project_with_having_count_1_and_slot_from_project """
+        SELECT 1 AS c1 FROM t HAVING count(1) > 0 OR c1 IS NOT NULL
+    """
+}

From c3402ad2cbf0b9f082954418eaa86fd1603f9144 Mon Sep 17 00:00:00 2001
From: lihangyu <15605149486@163.com>
Date: Thu, 25 Apr 2024 16:43:31 +0800
Subject: [PATCH 020/163] [Chore](regression-test) adjust variant
 tpch/q09_trans.sql batch_size from default to 2048 (#34105)

since 50 may cause performance issue introduced by #33853
---
 regression-test/suites/variant_p0/tpch/sql/q09_trans.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/regression-test/suites/variant_p0/tpch/sql/q09_trans.sql b/regression-test/suites/variant_p0/tpch/sql/q09_trans.sql
index ddbc82812bc5b2..f501d73dd727aa 100644
--- a/regression-test/suites/variant_p0/tpch/sql/q09_trans.sql
+++ b/regression-test/suites/variant_p0/tpch/sql/q09_trans.sql
@@ -1,6 +1,6 @@
 -- TABLES: part,SUPPLIER,lineitem,partsupp,orders,NATION
 -- ERROR: not stable
-SELECT  /*+SET_VAR(enable_fallback_to_original_planner=false) */
+SELECT  /*+SET_VAR(enable_fallback_to_original_planner=false,batch_size=2048) */
   NATION,
   O_YEAR,
   SUM(AMOUNT) AS SUM_PROFIT

From f79e0ca6015b8d5475d967e85ce678cc653178d2 Mon Sep 17 00:00:00 2001
From: Pxl <pxl290@qq.com>
Date: Thu, 25 Apr 2024 17:30:22 +0800
Subject: [PATCH 021/163] [Bug](runtime-filter) fix bloom filter size error on
 rf merge (#34082)

fix bloom filter size error on rf merge

W20240424 11:28:56.826277 3494287 ref_count_closure.h:80] RPC meet error status: [INVALID_ARGUMENT]PStatus: (172.21.0.15)[INVALID_ARGUMENT]bloom filter size not the same: already allocated bytes 65536, expected allocated bytes 32768
---
 be/src/exprs/bloom_filter_func.h             |  2 +-
 be/src/exprs/runtime_filter.cpp              | 88 +++++++++++---------
 be/src/pipeline/exec/hashjoin_build_sink.cpp |  4 +-
 3 files changed, 51 insertions(+), 43 deletions(-)

diff --git a/be/src/exprs/bloom_filter_func.h b/be/src/exprs/bloom_filter_func.h
index 987c2b0c05d9b7..bc56c7b505afa1 100644
--- a/be/src/exprs/bloom_filter_func.h
+++ b/be/src/exprs/bloom_filter_func.h
@@ -167,7 +167,7 @@ class BloomFilterFuncBase : public RuntimeFilterFuncBase {
         DCHECK(bloomfilter_func != nullptr);
         auto* other_func = static_cast<BloomFilterFuncBase*>(bloomfilter_func);
         if (_bloom_filter_alloced != other_func->_bloom_filter_alloced) {
-            return Status::InvalidArgument(
+            return Status::InternalError(
                     "bloom filter size not the same: already allocated bytes {}, expected "
                     "allocated bytes {}",
                     _bloom_filter_alloced, other_func->_bloom_filter_alloced);
diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp
index 174ce8f3fe5104..c77fdcd903ac77 100644
--- a/be/src/exprs/runtime_filter.cpp
+++ b/be/src/exprs/runtime_filter.cpp
@@ -246,7 +246,7 @@ Status create_vbin_predicate(const TypeDescriptor& type, TExprOpcode::type opcod
         fn_name.__set_function_name("ge");
         break;
     default:
-        return Status::InvalidArgument(
+        return Status::InternalError(
                 strings::Substitute("Invalid opcode for max_min_runtimefilter: '$0'", opcode));
     }
     fn.__set_name(fn_name);
@@ -282,17 +282,16 @@ class RuntimePredicateWrapper {
 public:
     RuntimePredicateWrapper(ObjectPool* pool, const RuntimeFilterParams* params)
             : RuntimePredicateWrapper(pool, params->column_return_type, params->filter_type,
-                                      params->filter_id, params->build_bf_exactly) {};
+                                      params->filter_id) {};
     // for a 'tmp' runtime predicate wrapper
     // only could called assign method or as a param for merge
     RuntimePredicateWrapper(ObjectPool* pool, PrimitiveType column_type, RuntimeFilterType type,
-                            uint32_t filter_id, bool build_bf_exactly = false)
+                            uint32_t filter_id)
             : _pool(pool),
               _column_return_type(column_type),
               _filter_type(type),
               _context(new RuntimeFilterContext()),
-              _filter_id(filter_id),
-              _build_bf_exactly(build_bf_exactly) {}
+              _filter_id(filter_id) {}
 
     // init runtime filter wrapper
     // alloc memory to init runtime filter function
@@ -333,24 +332,26 @@ class RuntimePredicateWrapper {
             return Status::OK();
         }
         default:
-            return Status::InvalidArgument("Unknown Filter type");
+            return Status::InternalError("Unknown Filter type");
         }
         return Status::OK();
     }
 
-    Status change_to_bloom_filter(bool need_init_bf = false) {
-        CHECK(_filter_type == RuntimeFilterType::IN_OR_BLOOM_FILTER)
-                << "Can not change to bloom filter because of runtime filter type is "
-                << IRuntimeFilter::to_string(_filter_type);
+    Status change_to_bloom_filter() {
+        if (_filter_type != RuntimeFilterType::IN_OR_BLOOM_FILTER) {
+            return Status::InternalError(
+                    "Can not change to bloom filter because of runtime filter type is {}",
+                    IRuntimeFilter::to_string(_filter_type));
+        }
         BloomFilterFuncBase* bf = _context->bloom_filter_func.get();
-        if (need_init_bf) {
-            // BloomFilter may be not init
-            RETURN_IF_ERROR(bf->init_with_fixed_length());
+
+        if (bf != nullptr) {
             insert_to_bloom_filter(bf);
-        } else {
-            DCHECK(_context->hybrid_set == nullptr || _context->hybrid_set->size() == 0)
-                    << "set size: " << (_context->hybrid_set ? _context->hybrid_set->size() : 0);
+        } else if (_context->hybrid_set != nullptr && _context->hybrid_set->size() != 0) {
+            return Status::InternalError("change to bloom filter need empty set ",
+                                         IRuntimeFilter::to_string(_filter_type));
         }
+
         // release in filter
         _context->hybrid_set.reset();
         return Status::OK();
@@ -514,26 +515,27 @@ class RuntimePredicateWrapper {
             }
 
             if (real_filter_type == RuntimeFilterType::IN_FILTER) {
-                if (other_filter_type == RuntimeFilterType::IN_FILTER) { // in merge in
+                // when we meet base rf is in-filter, threre only have two case:
+                // case1: all input-filter's build_bf_exactly is true, inited by synced global size
+                // case2: all input-filter's build_bf_exactly is false, inited by default size
+                if (other_filter_type == RuntimeFilterType::IN_FILTER) {
                     _context->hybrid_set->insert(wrapper->_context->hybrid_set.get());
                     if (_max_in_num >= 0 && _context->hybrid_set->size() >= _max_in_num) {
-                        VLOG_DEBUG << " change runtime filter to bloom filter(id=" << _filter_id
-                                   << ") because: in_num(" << _context->hybrid_set->size()
-                                   << ") >= max_in_num(" << _max_in_num << ")";
-                        RETURN_IF_ERROR(change_to_bloom_filter(true));
+                        // case2: use default size to init bf
+                        RETURN_IF_ERROR(_context->bloom_filter_func->init_with_fixed_length());
+                        RETURN_IF_ERROR(change_to_bloom_filter());
                     }
                 } else {
-                    VLOG_DEBUG << " change runtime filter to bloom filter(id=" << _filter_id
-                               << ") because: already exist a bloom filter";
-                    RETURN_IF_ERROR(change_to_bloom_filter(!_build_bf_exactly));
-                    RETURN_IF_ERROR(_context->bloom_filter_func->merge(
-                            wrapper->_context->bloom_filter_func.get()));
+                    // case1&case2: use input bf directly and insert hybrid set data into bf
+                    _context->bloom_filter_func = wrapper->_context->bloom_filter_func;
+                    RETURN_IF_ERROR(change_to_bloom_filter());
                 }
             } else {
-                if (other_filter_type == RuntimeFilterType::IN_FILTER) { // bloom filter merge in
+                if (other_filter_type == RuntimeFilterType::IN_FILTER) {
+                    // case2: insert data to global filter
                     wrapper->insert_to_bloom_filter(_context->bloom_filter_func.get());
-                    // bloom filter merge bloom filter
                 } else {
+                    // case1&case2: all input bf must has same size
                     RETURN_IF_ERROR(_context->bloom_filter_func->merge(
                             wrapper->_context->bloom_filter_func.get()));
                 }
@@ -716,9 +718,8 @@ class RuntimePredicateWrapper {
             break;
         }
         default: {
-            DCHECK(false) << "unknown type: " << type_to_string(type);
-            return Status::InvalidArgument("not support assign to in filter, type: " +
-                                           type_to_string(type));
+            return Status::InternalError("not support assign to in filter, type: " +
+                                         type_to_string(type));
         }
         }
         return Status::OK();
@@ -870,10 +871,9 @@ class RuntimePredicateWrapper {
             return _context->minmax_func->assign(&min_val, &max_val);
         }
         default:
-            DCHECK(false) << "unknown type";
             break;
         }
-        return Status::InvalidArgument("not support!");
+        return Status::InternalError("not support!");
     }
 
     HybridSetBase::IteratorBase* get_in_filter_iterator() { return _context->hybrid_set->begin(); }
@@ -950,7 +950,6 @@ class RuntimePredicateWrapper {
 
     SharedRuntimeFilterContext _context;
     uint32_t _filter_id;
-    bool _build_bf_exactly;
 };
 
 Status IRuntimeFilter::create(RuntimeFilterParamsContext* state, ObjectPool* pool,
@@ -1375,11 +1374,11 @@ Status IRuntimeFilter::init_with_desc(const TRuntimeFilterDesc* desc, const TQue
     }
     if (_runtime_filter_type == RuntimeFilterType::BITMAP_FILTER) {
         if (!build_ctx->root()->type().is_bitmap_type()) {
-            return Status::InvalidArgument("Unexpected src expr type:{} for bitmap filter.",
-                                           build_ctx->root()->type().debug_string());
+            return Status::InternalError("Unexpected src expr type:{} for bitmap filter.",
+                                         build_ctx->root()->type().debug_string());
         }
         if (!desc->__isset.bitmap_target_expr) {
-            return Status::InvalidArgument("Unknown bitmap filter target expr.");
+            return Status::InternalError("Unknown bitmap filter target expr.");
         }
         vectorized::VExprContextSPtr bitmap_target_ctx;
         RETURN_IF_ERROR(
@@ -1455,7 +1454,7 @@ Status IRuntimeFilter::create_wrapper(const UpdateRuntimeFilterParamsV2* param,
         return (*wrapper)->assign(&param->request->minmax_filter(), param->request->contain_null());
     }
     default:
-        return Status::InvalidArgument("unknown filter type");
+        return Status::InternalError("unknown filter type");
     }
 }
 
@@ -1504,7 +1503,7 @@ Status IRuntimeFilter::_create_wrapper(const T* param, ObjectPool* pool,
         return (*wrapper)->assign(&param->request->minmax_filter(), param->request->contain_null());
     }
     default:
-        return Status::InvalidArgument("unknown filter type");
+        return Status::InternalError("unknown filter type");
     }
 }
 
@@ -1523,7 +1522,14 @@ void IRuntimeFilter::update_runtime_filter_type_to_profile() {
 }
 
 Status IRuntimeFilter::merge_from(const RuntimePredicateWrapper* wrapper) {
-    return _wrapper->merge(wrapper);
+    auto status = _wrapper->merge(wrapper);
+    if (!status) {
+        LOG(WARNING) << "runtime filter merge failed: " << _name
+                     << " ,need_local_merge: " << _need_local_merge
+                     << " ,is_broadcast: " << _is_broadcast_join;
+        DCHECK(false); // rpc response is often ignored, so let it crash directly here
+    }
+    return status;
 }
 
 template <typename T>
@@ -1558,7 +1564,7 @@ Status IRuntimeFilter::serialize_impl(T* request, void** data, int* len) {
         auto minmax_filter = request->mutable_minmax_filter();
         to_protobuf(minmax_filter);
     } else {
-        return Status::InvalidArgument("not implemented !");
+        return Status::InternalError("not implemented !");
     }
     return Status::OK();
 }
diff --git a/be/src/pipeline/exec/hashjoin_build_sink.cpp b/be/src/pipeline/exec/hashjoin_build_sink.cpp
index 2b2bdad86f70f8..da3614e4479fce 100644
--- a/be/src/pipeline/exec/hashjoin_build_sink.cpp
+++ b/be/src/pipeline/exec/hashjoin_build_sink.cpp
@@ -137,7 +137,9 @@ Status HashJoinBuildSinkLocalState::close(RuntimeState* state, Status exec_statu
     uint64_t hash_table_size = block ? block->rows() : 0;
     {
         SCOPED_TIMER(_runtime_filter_init_timer);
-        RETURN_IF_ERROR(_runtime_filter_slots->init_filters(state, hash_table_size));
+        if (_should_build_hash_table) {
+            RETURN_IF_ERROR(_runtime_filter_slots->init_filters(state, hash_table_size));
+        }
         RETURN_IF_ERROR(_runtime_filter_slots->ignore_filters(state));
     }
     if (_should_build_hash_table && hash_table_size > 1) {

From 2efe79d9e56d8a230e865fd44fb2b627746f7116 Mon Sep 17 00:00:00 2001
From: HHoflittlefish777 <77738092+HHoflittlefish777@users.noreply.github.com>
Date: Thu, 25 Apr 2024 19:45:09 +0800
Subject: [PATCH 022/163] [test](streamload) add load empty file regression
 test (#34110)

---
 .../load_p0/stream_load/test_empty_file.csv   |  0
 .../test_stream_load_empty_file.out           |  3 +
 .../test_stream_load_empty_file.groovy        | 73 +++++++++++++++++++
 3 files changed, 76 insertions(+)
 create mode 100644 regression-test/data/load_p0/stream_load/test_empty_file.csv
 create mode 100644 regression-test/data/load_p0/stream_load/test_stream_load_empty_file.out
 create mode 100644 regression-test/suites/load_p0/stream_load/test_stream_load_empty_file.groovy

diff --git a/regression-test/data/load_p0/stream_load/test_empty_file.csv b/regression-test/data/load_p0/stream_load/test_empty_file.csv
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/regression-test/data/load_p0/stream_load/test_stream_load_empty_file.out b/regression-test/data/load_p0/stream_load/test_stream_load_empty_file.out
new file mode 100644
index 00000000000000..9c9c4c6c8a2be1
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/test_stream_load_empty_file.out
@@ -0,0 +1,3 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql --
+
diff --git a/regression-test/suites/load_p0/stream_load/test_stream_load_empty_file.groovy b/regression-test/suites/load_p0/stream_load/test_stream_load_empty_file.groovy
new file mode 100644
index 00000000000000..9265280ecae7a0
--- /dev/null
+++ b/regression-test/suites/load_p0/stream_load/test_stream_load_empty_file.groovy
@@ -0,0 +1,73 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_stream_load_empty_file", "p0") {
+    def tableName = "test_stream_load_empty_file"
+    try {
+        sql """ DROP TABLE IF EXISTS ${tableName} """
+        sql """
+            CREATE TABLE IF NOT EXISTS ${tableName} (
+                `k1` bigint(20) NULL,
+                `k2` bigint(20) NULL,
+                `v1` tinyint(4) SUM NULL,
+                `v2` tinyint(4) REPLACE NULL,
+                `v3` tinyint(4) REPLACE_IF_NOT_NULL NULL,
+                `v4` smallint(6) REPLACE_IF_NOT_NULL NULL,
+                `v5` int(11) REPLACE_IF_NOT_NULL NULL,
+                `v6` bigint(20) REPLACE_IF_NOT_NULL NULL,
+                `v7` largeint(40) REPLACE_IF_NOT_NULL NULL,
+                `v8` datetime REPLACE_IF_NOT_NULL NULL,
+                `v9` date REPLACE_IF_NOT_NULL NULL,
+                `v10` char(10) REPLACE_IF_NOT_NULL NULL,
+                `v11` varchar(6) REPLACE_IF_NOT_NULL NULL,
+                `v12` decimal(27, 9) REPLACE_IF_NOT_NULL NULL
+            ) ENGINE=OLAP
+            AGGREGATE KEY(`k1`, `k2`)
+            COMMENT 'OLAP'
+            PARTITION BY RANGE(`k1`)
+            (PARTITION partition_a VALUES [("-9223372036854775808"), ("100000")),
+            PARTITION partition_b VALUES [("100000"), ("1000000000")),
+            PARTITION partition_c VALUES [("1000000000"), ("10000000000")),
+            PARTITION partition_d VALUES [("10000000000"), (MAXVALUE)))
+            DISTRIBUTED BY HASH(`k1`, `k2`) BUCKETS 3
+            PROPERTIES ("replication_allocation" = "tag.location.default: 1");
+        """
+
+        // test strict_mode success
+        streamLoad {
+            table "${tableName}"
+
+            file 'test_empty_file.csv'
+
+            check { result, exception, startTime, endTime ->
+                if (exception != null) {
+                    throw exception
+                }
+                log.info("Stream load result: ${result}".toString())
+                def json = parseJson(result)
+                assertEquals("success", json.Status.toLowerCase())
+                assertEquals(0, json.NumberTotalRows)
+            }
+            time 10000 // limit inflight 10s
+        }
+
+        sql "sync"
+        qt_sql "select * from ${tableName}"
+    } finally {
+        sql """ DROP TABLE IF EXISTS ${tableName} """
+    }
+}
\ No newline at end of file

From 77e53ca970db3b26c22cc20d5e4e62a650cc9147 Mon Sep 17 00:00:00 2001
From: Yongqiang YANG <98214048+dataroaring@users.noreply.github.com>
Date: Thu, 25 Apr 2024 19:59:49 +0800
Subject: [PATCH 023/163] [fix](cloud) exclude some cases from cloud_p0
 (#33725)

---
 bin/start_be.sh                               | 10 ++--
 .../system/test_query_sys_rowsets.out         | 27 ++++++++++
 .../query_p0/system/test_query_sys_tables.out | 28 -----------
 .../pipeline/cloud_p0/conf/be_custom.conf     |  1 +
 .../conf/regression-conf-custom.groovy        |  6 ++-
 .../test_map_load_and_compaction.groovy       |  2 +-
 .../test_stream_load_properties.groovy        |  6 ++-
 .../select_tablets/select_with_tablets.groovy |  3 +-
 .../system/test_query_sys_rowsets.groovy      | 49 +++++++++++++++++++
 .../system/test_query_sys_tables.groovy       | 28 -----------
 10 files changed, 94 insertions(+), 66 deletions(-)
 create mode 100644 regression-test/data/query_p0/system/test_query_sys_rowsets.out
 create mode 100644 regression-test/suites/query_p0/system/test_query_sys_rowsets.groovy

diff --git a/bin/start_be.sh b/bin/start_be.sh
index 518eecc9740c63..743c2f98a97ec6 100755
--- a/bin/start_be.sh
+++ b/bin/start_be.sh
@@ -187,10 +187,6 @@ export ODBCSYSINI="${DORIS_HOME}/conf"
 # support utf8 for oracle database
 export NLS_LANG='AMERICAN_AMERICA.AL32UTF8'
 
-# filter known leak
-export LSAN_OPTIONS="suppressions=${DORIS_HOME}/conf/lsan_suppr.conf"
-export ASAN_OPTIONS="suppressions=${DORIS_HOME}/conf/asan_suppr.conf"
-
 while read -r line; do
     envline="$(echo "${line}" |
         sed 's/[[:blank:]]*=[[:blank:]]*/=/g' |
@@ -251,9 +247,13 @@ fi
 
 export AWS_MAX_ATTEMPTS=2
 
+# filter known leak
+export LSAN_OPTIONS=suppressions=${DORIS_HOME}/conf/lsan_suppr.conf
+export ASAN_OPTIONS=suppressions=${DORIS_HOME}/conf/asan_suppr.conf
+
 ## set asan and ubsan env to generate core file
 ## detect_container_overflow=0, https://github.com/google/sanitizers/issues/193
-export ASAN_OPTIONS=symbolize=1:abort_on_error=1:disable_coredump=0:unmap_shadow_on_exit=1:detect_container_overflow=0:check_malloc_usable_size=0
+export ASAN_OPTIONS=symbolize=1:abort_on_error=1:disable_coredump=0:unmap_shadow_on_exit=1:detect_container_overflow=0:check_malloc_usable_size=0:${ASAN_OPTIONS}
 export UBSAN_OPTIONS=print_stacktrace=1
 
 ## set TCMALLOC_HEAP_LIMIT_MB to limit memory used by tcmalloc
diff --git a/regression-test/data/query_p0/system/test_query_sys_rowsets.out b/regression-test/data/query_p0/system/test_query_sys_rowsets.out
new file mode 100644
index 00000000000000..958d57d68e44ba
--- /dev/null
+++ b/regression-test/data/query_p0/system/test_query_sys_rowsets.out
@@ -0,0 +1,27 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !desc_rowsets --
+BACKEND_ID	BIGINT	Yes	false	\N	
+ROWSET_ID	VARCHAR(64)	Yes	false	\N	
+TABLET_ID	BIGINT	Yes	false	\N	
+ROWSET_NUM_ROWS	BIGINT	Yes	false	\N	
+TXN_ID	BIGINT	Yes	false	\N	
+NUM_SEGMENTS	BIGINT	Yes	false	\N	
+START_VERSION	BIGINT	Yes	false	\N	
+END_VERSION	BIGINT	Yes	false	\N	
+INDEX_DISK_SIZE	BIGINT	Yes	false	\N	
+DATA_DISK_SIZE	BIGINT	Yes	false	\N	
+CREATION_TIME	BIGINT	Yes	false	\N	
+NEWEST_WRITE_TIMESTAMP	BIGINT	Yes	false	\N	
+
+-- !rowsets1 --
+0	1
+
+-- !rowsets2 --
+0	1
+2	2
+
+-- !rowsets3 --
+0	1
+2	2
+3	3
+4	4
\ No newline at end of file
diff --git a/regression-test/data/query_p0/system/test_query_sys_tables.out b/regression-test/data/query_p0/system/test_query_sys_tables.out
index 91f54556ae8a35..74574ae818ba62 100644
--- a/regression-test/data/query_p0/system/test_query_sys_tables.out
+++ b/regression-test/data/query_p0/system/test_query_sys_tables.out
@@ -154,40 +154,12 @@ TABLESPACE_NAME	VARCHAR(268)	Yes	false	\N
 
 -- !select_partitions --
 
--- !desc_rowsets --
-BACKEND_ID	BIGINT	Yes	false	\N	
-ROWSET_ID	VARCHAR(64)	Yes	false	\N	
-TABLET_ID	BIGINT	Yes	false	\N	
-ROWSET_NUM_ROWS	BIGINT	Yes	false	\N	
-TXN_ID	BIGINT	Yes	false	\N	
-NUM_SEGMENTS	BIGINT	Yes	false	\N	
-START_VERSION	BIGINT	Yes	false	\N	
-END_VERSION	BIGINT	Yes	false	\N	
-INDEX_DISK_SIZE	BIGINT	Yes	false	\N	
-DATA_DISK_SIZE	BIGINT	Yes	false	\N	
-CREATION_TIME	BIGINT	Yes	false	\N	
-NEWEST_WRITE_TIMESTAMP	BIGINT	Yes	false	\N	
-
--- !rowsets1 --
-0	1
-
--- !rowsets2 --
-0	1
-2	2
-
--- !rowsets3 --
-0	1
-2	2
-3	3
-4	4
-
 -- !schemata --
 internal	test_query_sys_db_1	\N
 internal	test_query_sys_db_2	\N
 internal	test_query_sys_db_3	\N
 
 -- !tables --
-internal	test_query_rowset	BASE TABLE	0	\N	\N
 internal	test_query_sys_tb_1	BASE TABLE	0	\N	\N
 internal	test_query_sys_tb_2	BASE TABLE	0	\N	\N
 internal	test_query_sys_tb_3	BASE TABLE	0	\N	\N
diff --git a/regression-test/pipeline/cloud_p0/conf/be_custom.conf b/regression-test/pipeline/cloud_p0/conf/be_custom.conf
index 1f4104304fcaf8..9f85d1c98fac37 100644
--- a/regression-test/pipeline/cloud_p0/conf/be_custom.conf
+++ b/regression-test/pipeline/cloud_p0/conf/be_custom.conf
@@ -29,6 +29,7 @@ meta_service_use_load_balancer = false
 enable_file_cache = true
 file_cache_path = [{"path":"/data/doris_cloud/file_cache","total_size":104857600,"query_limit":104857600}]
 tmp_file_dirs = [{"path":"/data/doris_cloud/tmp","max_cache_bytes":104857600,"max_upload_bytes":104857600}]
+thrift_rpc_timeout_ms = 360000
 # For debug
 sys_log_verbose_modules=vrow_distribution,tablet_info
 sys_log_verbose_level=7
\ No newline at end of file
diff --git a/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy b/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy
index d7e02406196881..0810ac8a403ff3 100644
--- a/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy
+++ b/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy
@@ -39,9 +39,11 @@ excludeSuites = "000_the_start_sentinel_do_not_touch," + // keep this line as th
     "test_set_partition_version," +
     "test_show_transaction," + // not supported yet
     "test_spark_load," +
-    "test_array_index1," +
-    "test_array_index2," +
     "test_index_lowercase_fault_injection," +
+    "test_partial_update_2pc_schema_change," + // mow 2pc
+    "test_query_sys_rowsets," + // rowsets sys table
+    "test_unique_table_debug_data," + // disable auto compaction
+    "test_insert," + // txn insert
     "zzz_the_end_sentinel_do_not_touch" // keep this line as the last line
 
 excludeDirectories = "000_the_start_sentinel_do_not_touch," + // keep this line as the first line
diff --git a/regression-test/suites/load_p0/stream_load/test_map_load_and_compaction.groovy b/regression-test/suites/load_p0/stream_load/test_map_load_and_compaction.groovy
index 4a80004169b46f..703f5dde345587 100644
--- a/regression-test/suites/load_p0/stream_load/test_map_load_and_compaction.groovy
+++ b/regression-test/suites/load_p0/stream_load/test_map_load_and_compaction.groovy
@@ -77,7 +77,7 @@ suite("test_map_load_and_compaction", "p0") {
         for (String rowset in (List<String>) compactStatusJson.rowsets) {
             rowsetsCount += Integer.parseInt(rowset.split(" ")[1])
         }
-        assertTrue(assertRowSetNum==rowsetsCount)
+        assertEquals(assertRowSetNum, rowsetsCount)
     }
 
 
diff --git a/regression-test/suites/load_p0/stream_load/test_stream_load_properties.groovy b/regression-test/suites/load_p0/stream_load/test_stream_load_properties.groovy
index 83dc83efdfeaf5..15ef2fbb5c68b9 100644
--- a/regression-test/suites/load_p0/stream_load/test_stream_load_properties.groovy
+++ b/regression-test/suites/load_p0/stream_load/test_stream_load_properties.groovy
@@ -741,7 +741,11 @@ suite("test_stream_load_properties", "p0") {
             
             // Commit the same txnId again to trigger operate_txn_2pc() failure
             body = do_streamload_2pc.call(txnId, "commit", tableName1)
-            assertEquals("analysis_error", parseJson(body).status.toLowerCase())
+            if (isCloudMode()) {
+                assertEquals("success", parseJson(body).status.toLowerCase())
+            } else {
+                assertEquals("analysis_error", parseJson(body).status.toLowerCase())
+            }
             assertTrue(parseJson(body).msg.toLowerCase().contains("is already visible"))    
 
             i++
diff --git a/regression-test/suites/nereids_p0/select_tablets/select_with_tablets.groovy b/regression-test/suites/nereids_p0/select_tablets/select_with_tablets.groovy
index e3de6448678771..ffaac34200e479 100644
--- a/regression-test/suites/nereids_p0/select_tablets/select_with_tablets.groovy
+++ b/regression-test/suites/nereids_p0/select_tablets/select_with_tablets.groovy
@@ -46,7 +46,8 @@ suite("select_with_tablets") {
 
     def res = sql_return_maparray """ show tablets from ${table_name1} where version = 2 """
     res = deduplicate_tablets(res)
-    assertTrue(res.size() == 1)
+    log.info("res: " + res.toString())
+    assertEquals(res.size(), 1)
     assertEquals("2", res[0].Version)
 
     order_qt_select2 """ SELECT * FROM ${table_name1} TABLET(${res[0].TabletId}) """
diff --git a/regression-test/suites/query_p0/system/test_query_sys_rowsets.groovy b/regression-test/suites/query_p0/system/test_query_sys_rowsets.groovy
new file mode 100644
index 00000000000000..1f159e3832979e
--- /dev/null
+++ b/regression-test/suites/query_p0/system/test_query_sys_rowsets.groovy
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_query_sys_rowsets", "query,p0") {
+    def dbName1 = "test_query_sys_rowsets"
+
+    sql("CREATE DATABASE IF NOT EXISTS ${dbName1}")
+
+    // test rowsets
+    qt_desc_rowsets """ desc information_schema.rowsets """ 
+    def rowsets_table_name = """ test_query_sys_rowsets.test_query_rowset """  
+    sql """ drop table if exists ${rowsets_table_name}  """ 
+
+    sql """ 
+        create table ${rowsets_table_name}( 
+            a int , 
+            b boolean , 
+            c string ) 
+        DISTRIBUTED BY HASH(`a`) BUCKETS 1 
+        PROPERTIES (
+            "replication_num" = "1",
+            "disable_auto_compaction" = "true",
+            "enable_single_replica_compaction"="true"
+        );
+    """
+
+    List<List<Object>> rowsets_table_name_tablets   = sql """ show tablets from ${rowsets_table_name} """
+    order_qt_rowsets1 """  select START_VERSION,END_VERSION from information_schema.rowsets where TABLET_ID=${rowsets_table_name_tablets[0][0]}  order by START_VERSION,END_VERSION; """ 
+    sql """ insert into  ${rowsets_table_name} values (1,0,"abc");  """ 
+    order_qt_rowsets2 """  select START_VERSION,END_VERSION from information_schema.rowsets where TABLET_ID=${rowsets_table_name_tablets[0][0]}  order by START_VERSION,END_VERSION; """ 
+    sql """ insert into  ${rowsets_table_name} values (2,1,"hello world");  """ 
+    sql """ insert into  ${rowsets_table_name} values (3,0,"dssadasdsafafdf");  """ 
+    order_qt_rowsets3 """  select START_VERSION,END_VERSION from information_schema.rowsets where TABLET_ID=${rowsets_table_name_tablets[0][0]}  order by START_VERSION,END_VERSION; """ 
+
+}
\ No newline at end of file
diff --git a/regression-test/suites/query_p0/system/test_query_sys_tables.groovy b/regression-test/suites/query_p0/system/test_query_sys_tables.groovy
index 7d943894168410..b8f14da041b634 100644
--- a/regression-test/suites/query_p0/system/test_query_sys_tables.groovy
+++ b/regression-test/suites/query_p0/system/test_query_sys_tables.groovy
@@ -139,34 +139,6 @@ suite("test_query_sys_tables", "query,p0") {
     qt_desc_partitions """ desc `information_schema`.`partitions` """ 
     order_qt_select_partitions """ select * from  `information_schema`.`partitions`; """ 
 
-
-    // test rowsets
-    qt_desc_rowsets """ desc information_schema.rowsets """ 
-    def rowsets_table_name = """ test_query_sys_db_1.test_query_rowset """  
-    sql """ drop table if exists ${rowsets_table_name}  """ 
-
-    sql """ 
-        create table ${rowsets_table_name}( 
-            a int , 
-            b boolean , 
-            c string ) 
-        DISTRIBUTED BY HASH(`a`) BUCKETS 1 
-        PROPERTIES (
-            "replication_num" = "1",
-            "disable_auto_compaction" = "true",
-            "enable_single_replica_compaction"="true"
-        );
-    """
-    
-    List<List<Object>> rowsets_table_name_tablets   = sql """ show tablets from ${rowsets_table_name} """
-    order_qt_rowsets1 """  select START_VERSION,END_VERSION from information_schema.rowsets where TABLET_ID=${rowsets_table_name_tablets[0][0]}  order by START_VERSION,END_VERSION; """ 
-    sql """ insert into  ${rowsets_table_name} values (1,0,"abc");  """ 
-    order_qt_rowsets2 """  select START_VERSION,END_VERSION from information_schema.rowsets where TABLET_ID=${rowsets_table_name_tablets[0][0]}  order by START_VERSION,END_VERSION; """ 
-    sql """ insert into  ${rowsets_table_name} values (2,1,"hello world");  """ 
-    sql """ insert into  ${rowsets_table_name} values (3,0,"dssadasdsafafdf");  """ 
-    order_qt_rowsets3 """  select START_VERSION,END_VERSION from information_schema.rowsets where TABLET_ID=${rowsets_table_name_tablets[0][0]}  order by START_VERSION,END_VERSION; """ 
-
-
     // test schemata
     // create test dbs
     sql("CREATE DATABASE IF NOT EXISTS ${dbName1}")

From 044f4f9aa8fffeeb35d790478784077366f21af6 Mon Sep 17 00:00:00 2001
From: huanghg1994 <519500479@qq.com>
Date: Thu, 25 Apr 2024 20:05:03 +0800
Subject: [PATCH 024/163] [fix](resource)fix check available fail when s3
 aws_token  is set and reset as, sk faild on be. (#34057)

---
 be/src/agent/task_worker_pool.cpp                |  2 ++
 be/src/io/fs/s3_file_system.cpp                  | 12 ++++++++----
 .../org/apache/doris/catalog/S3Resource.java     | 16 +++++++++++++---
 .../property/constants/S3Properties.java         |  1 +
 gensrc/thrift/AgentService.thrift                |  1 +
 .../cold_heat_separation/policy/alter.groovy     | 11 +++++++++++
 6 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp
index 10590a7faeae16..3b8d63f7a95dbe 100644
--- a/be/src/agent/task_worker_pool.cpp
+++ b/be/src/agent/task_worker_pool.cpp
@@ -1365,6 +1365,7 @@ void update_s3_resource(const TStorageResource& param, io::RemoteFileSystemSPtr
                         .region = param.s3_storage_param.region,
                         .ak = param.s3_storage_param.ak,
                         .sk = param.s3_storage_param.sk,
+                        .token = param.s3_storage_param.token,
                         .max_connections = param.s3_storage_param.max_conn,
                         .request_timeout_ms = param.s3_storage_param.request_timeout_ms,
                         .connect_timeout_ms = param.s3_storage_param.conn_timeout_ms,
@@ -1384,6 +1385,7 @@ void update_s3_resource(const TStorageResource& param, io::RemoteFileSystemSPtr
         S3ClientConf conf {
                 .ak = param.s3_storage_param.ak,
                 .sk = param.s3_storage_param.sk,
+                .token = param.s3_storage_param.token,
         };
         st = client->reset(conf);
         fs = std::move(existed_fs);
diff --git a/be/src/io/fs/s3_file_system.cpp b/be/src/io/fs/s3_file_system.cpp
index 3d9f25686a88f8..dea3279350900d 100644
--- a/be/src/io/fs/s3_file_system.cpp
+++ b/be/src/io/fs/s3_file_system.cpp
@@ -115,16 +115,21 @@ Status S3ClientHolder::init() {
 }
 
 Status S3ClientHolder::reset(const S3ClientConf& conf) {
+    S3ClientConf reset_conf;
     {
         std::shared_lock lock(_mtx);
-        if (conf.ak == _conf.ak && conf.sk == _conf.sk) {
+        if (conf.ak == _conf.ak && conf.sk == _conf.sk && conf.token == _conf.token) {
             return Status::OK(); // Same conf
         }
 
+        reset_conf = _conf;
+        reset_conf.ak = conf.ak;
+        reset_conf.sk = conf.sk;
+        reset_conf.token = conf.token;
         // Should check endpoint here?
     }
 
-    auto client = S3ClientFactory::instance().create(conf);
+    auto client = S3ClientFactory::instance().create(reset_conf);
     if (!client) {
         return Status::InternalError("failed to init s3 client with conf {}", conf.to_string());
     }
@@ -134,8 +139,7 @@ Status S3ClientHolder::reset(const S3ClientConf& conf) {
     {
         std::lock_guard lock(_mtx);
         _client = std::move(client);
-        _conf.ak = conf.ak;
-        _conf.sk = conf.sk;
+        _conf = std::move(reset_conf);
     }
 
     return Status::OK();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/S3Resource.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/S3Resource.java
index 5bcb5123c64f52..a26038970477a6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/S3Resource.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/S3Resource.java
@@ -105,7 +105,8 @@ protected void setProperties(Map<String, String> properties) throws DdlException
         properties.putIfAbsent(S3Properties.REGION, region);
         String ak = properties.get(S3Properties.ACCESS_KEY);
         String sk = properties.get(S3Properties.SECRET_KEY);
-        CloudCredentialWithEndpoint credential = new CloudCredentialWithEndpoint(pingEndpoint, region, ak, sk);
+        String token = properties.get(S3Properties.SESSION_TOKEN);
+        CloudCredentialWithEndpoint credential = new CloudCredentialWithEndpoint(pingEndpoint, region, ak, sk, token);
 
         if (needCheck) {
             String bucketName = properties.get(S3Properties.BUCKET);
@@ -123,6 +124,7 @@ private static void pingS3(CloudCredentialWithEndpoint credential, String bucket
         Map<String, String> propertiesPing = new HashMap<>();
         propertiesPing.put(S3Properties.Env.ACCESS_KEY, credential.getAccessKey());
         propertiesPing.put(S3Properties.Env.SECRET_KEY, credential.getSecretKey());
+        propertiesPing.put(S3Properties.Env.TOKEN, credential.getSessionToken());
         propertiesPing.put(S3Properties.Env.ENDPOINT, credential.getEndpoint());
         propertiesPing.put(S3Properties.Env.REGION, credential.getRegion());
         propertiesPing.put(PropertyConverter.USE_PATH_STYLE,
@@ -188,6 +190,10 @@ public void modifyProperties(Map<String, String> properties) throws DdlException
         writeLock();
         for (Map.Entry<String, String> kv : properties.entrySet()) {
             replaceIfEffectiveValue(this.properties, kv.getKey(), kv.getValue());
+            if (kv.getKey().equals(S3Properties.Env.TOKEN)
+                    || kv.getKey().equals(S3Properties.SESSION_TOKEN)) {
+                this.properties.put(kv.getKey(), kv.getValue());
+            }
         }
         ++version;
         writeUnlock();
@@ -197,11 +203,13 @@ public void modifyProperties(Map<String, String> properties) throws DdlException
     private CloudCredentialWithEndpoint getS3PingCredentials(Map<String, String> properties) {
         String ak = properties.getOrDefault(S3Properties.ACCESS_KEY, this.properties.get(S3Properties.ACCESS_KEY));
         String sk = properties.getOrDefault(S3Properties.SECRET_KEY, this.properties.get(S3Properties.SECRET_KEY));
+        String token = properties.getOrDefault(S3Properties.SESSION_TOKEN,
+                this.properties.get(S3Properties.SESSION_TOKEN));
         String endpoint = properties.getOrDefault(S3Properties.ENDPOINT, this.properties.get(S3Properties.ENDPOINT));
         String pingEndpoint = "http://" + endpoint;
         String region = S3Properties.getRegionOfEndpoint(pingEndpoint);
         properties.putIfAbsent(S3Properties.REGION, region);
-        return new CloudCredentialWithEndpoint(pingEndpoint, region, ak, sk);
+        return new CloudCredentialWithEndpoint(pingEndpoint, region, ak, sk, token);
     }
 
     private boolean isNeedCheck(Map<String, String> newProperties) {
@@ -231,7 +239,9 @@ protected void getProcNodeData(BaseProcResult result) {
             // it's dangerous to show password in show odbc resource,
             // so we use empty string to replace the real password
             if (entry.getKey().equals(S3Properties.Env.SECRET_KEY)
-                    || entry.getKey().equals(S3Properties.SECRET_KEY)) {
+                    || entry.getKey().equals(S3Properties.SECRET_KEY)
+                    || entry.getKey().equals(S3Properties.Env.TOKEN)
+                    || entry.getKey().equals(S3Properties.SESSION_TOKEN)) {
                 result.addRow(Lists.newArrayList(name, lowerCaseType, entry.getKey(), "******"));
             } else {
                 result.addRow(Lists.newArrayList(name, lowerCaseType, entry.getKey(), entry.getValue()));
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java
index 947174f86ef600..26bd37b0cac0e9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java
@@ -265,6 +265,7 @@ public static TS3StorageParam getS3TStorageParam(Map<String, String> properties)
         s3Info.setRegion(properties.get(S3Properties.REGION));
         s3Info.setAk(properties.get(S3Properties.ACCESS_KEY));
         s3Info.setSk(properties.get(S3Properties.SECRET_KEY));
+        s3Info.setToken(properties.get(S3Properties.SESSION_TOKEN));
 
         s3Info.setRootPath(properties.get(S3Properties.ROOT_PATH));
         s3Info.setBucket(properties.get(S3Properties.BUCKET));
diff --git a/gensrc/thrift/AgentService.thrift b/gensrc/thrift/AgentService.thrift
index df478a75a1ba70..4e381b168052ad 100644
--- a/gensrc/thrift/AgentService.thrift
+++ b/gensrc/thrift/AgentService.thrift
@@ -73,6 +73,7 @@ struct TS3StorageParam {
     8: optional string root_path
     9: optional string bucket
     10: optional bool use_path_style = false
+    11: optional string token
 }
 
 struct TStoragePolicy {
diff --git a/regression-test/suites/cold_heat_separation/policy/alter.groovy b/regression-test/suites/cold_heat_separation/policy/alter.groovy
index 672f79427630b0..cfd8e0da6fcfd2 100644
--- a/regression-test/suites/cold_heat_separation/policy/alter.groovy
+++ b/regression-test/suites/cold_heat_separation/policy/alter.groovy
@@ -39,6 +39,7 @@ suite("alter_policy") {
             "AWS_ROOT_PATH" = "path/to/rootaaaa",
             "AWS_ACCESS_KEY" = "bbba",
             "AWS_SECRET_KEY" = "aaaa",
+            "AWS_TOKEN" = "session_token",
             "AWS_MAX_CONNECTIONS" = "50",
             "AWS_REQUEST_TIMEOUT_MS" = "3000",
             "AWS_CONNECTION_TIMEOUT_MS" = "1000",
@@ -70,6 +71,10 @@ suite("alter_policy") {
             ALTER RESOURCE "${resource_name}" PROPERTIES("AWS_REQUEST_TIMEOUT_MS" = "7777");
         """
 
+        def alter_result_succ_8 = try_sql """
+            ALTER RESOURCE "${resource_name}" PROPERTIES("AWS_TOKEN" = "new_session_token");
+        """
+
         // errCode = 2, detailMessage = current not support modify property : AWS_REGION
         def alter_result_fail_1 = try_sql """
             ALTER RESOURCE "${resource_name}" PROPERTIES("AWS_REGION" = "8888");
@@ -112,6 +117,7 @@ suite("alter_policy") {
         // [has_resouce_policy_alter, s3, AWS_REQUEST_TIMEOUT_MS, 7777],
         // [has_resouce_policy_alter, s3, AWS_ROOT_PATH, path/to/rootaaaa],
         // [has_resouce_policy_alter, s3, AWS_SECRET_KEY, ******],
+        // [has_resouce_policy_alter, s3, AWS_TOKEN, ******],
         // [has_resouce_policy_alter, s3, id, {id}],
         // [has_resouce_policy_alter, s3, type, s3]
         // [has_resouce_policy_alter, s3, version, {version}]]
@@ -133,6 +139,8 @@ suite("alter_policy") {
         assertEquals(show_alter_result[8][3], "10101010")
         // AWS_SECRET_KEY
         assertEquals(show_alter_result[9][3], "******")
+        // AWS_SECRET_KEY
+        assertEquals(show_alter_result[10][3], "******")
     }
 
     def check_alter_resource_result_with_policy = { resource_name ->
@@ -151,6 +159,7 @@ suite("alter_policy") {
         // [has_resouce_policy_alter, s3, AWS_REQUEST_TIMEOUT_MS, 7777],
         // [has_resouce_policy_alter, s3, AWS_ROOT_PATH, path/to/rootaaaa],
         // [has_resouce_policy_alter, s3, AWS_SECRET_KEY, ******],
+        // [has_resouce_policy_alter, s3, AWS_TOKEN, ******],
         // [has_resouce_policy_alter, s3, id, {id}],
         // [has_resouce_policy_alter, s3, type, s3]
         // [has_resouce_policy_alter, s3, version, {version}]]
@@ -172,6 +181,8 @@ suite("alter_policy") {
         assertEquals(show_alter_result[8][3], "path/to/rootaaaa")
         // AWS_SECRET_KEY
         assertEquals(show_alter_result[9][3], "******")
+        // AWS_SECRET_KEY
+        assertEquals(show_alter_result[10][3], "******")
     }
 
 
From a6bc92edc5870481d9a7091f422054589d394150 Mon Sep 17 00:00:00 2001
From: HappenLee <happenlee@hotmail.com>
Date: Thu, 25 Apr 2024 20:15:04 +0800
Subject: [PATCH 025/163] [RegressionTest](test) Add fuzzy test config for
 column string overflow and p2 case (#34091)

Add fuzzy test config for column string overflow and p2 case
---
 be/src/common/config.cpp                      |   4 +
 be/src/common/config.h                        |   3 +
 be/src/vec/columns/column_string.cpp          |   3 +-
 .../load_from_big_lateral_view.out            | 721 ++++++++++++++++++
 .../string_over_flow/string_over_flow.groovy  |  24 +
 5 files changed, 753 insertions(+), 2 deletions(-)
 create mode 100644 regression-test/data/query_p2/string_over_flow/load_from_big_lateral_view.out
 create mode 100644 regression-test/suites/query_p2/string_over_flow/string_over_flow.groovy

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index de1458c240dff3..11a8b97842df9d 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1216,6 +1216,8 @@ DEFINE_mBool(enable_injection_point, "false");
 
 DEFINE_mBool(ignore_schema_change_check, "false");
 
+DEFINE_mInt64(string_overflow_size, "4294967295"); // std::numic_limits<uint32_t>::max()
+
 // The min thread num for BufferedReaderPrefetchThreadPool
 DEFINE_Int64(num_buffered_reader_prefetch_thread_pool_min_thread, "16");
 // The max thread num for BufferedReaderPrefetchThreadPool
@@ -1663,6 +1665,8 @@ Status set_fuzzy_configs() {
             ((distribution(*generator) % 2) == 0) ? "true" : "false";
     fuzzy_field_and_value["enable_shrink_memory"] =
             ((distribution(*generator) % 2) == 0) ? "true" : "false";
+    fuzzy_field_and_value["string_overflow_size"] =
+            ((distribution(*generator) % 2) == 0) ? "10" : "4294967295";
 
     fmt::memory_buffer buf;
     for (auto& it : fuzzy_field_and_value) {
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 4139d76b6bcb7a..fe7009c7691fe9 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1295,6 +1295,9 @@ DECLARE_mBool(enable_injection_point);
 
 DECLARE_mBool(ignore_schema_change_check);
 
+/** Only use in fuzzy test **/
+DECLARE_mInt64(string_overflow_size);
+
 // The min thread num for BufferedReaderPrefetchThreadPool
 DECLARE_Int64(num_buffered_reader_prefetch_thread_pool_min_thread);
 // The max thread num for BufferedReaderPrefetchThreadPool
diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp
index 843b2fa72a7eb4..514980cc8bab5c 100644
--- a/be/src/vec/columns/column_string.cpp
+++ b/be/src/vec/columns/column_string.cpp
@@ -608,8 +608,7 @@ ColumnPtr ColumnStr<T>::index(const IColumn& indexes, size_t limit) const {
 
 template <typename T>
 ColumnPtr ColumnStr<T>::convert_column_if_overflow() {
-    // TODO: Try to fuzzy the overflow size to test more case in CI
-    if (std::is_same_v<T, UInt32> && chars.size() > std::numeric_limits<UInt32>::max()) {
+    if (std::is_same_v<T, UInt32> && chars.size() > config::string_overflow_size) {
         auto new_col = ColumnStr<uint64_t>::create();
 
         const auto length = offsets.size();
diff --git a/regression-test/data/query_p2/string_over_flow/load_from_big_lateral_view.out b/regression-test/data/query_p2/string_over_flow/load_from_big_lateral_view.out
new file mode 100644
index 00000000000000..8426d697d00e96
--- /dev/null
+++ b/regression-test/data/query_p2/string_over_flow/load_from_big_lateral_view.out
@@ -0,0 +1,721 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql --
+ above the unusual accounts. slyl
+ according to the blith
+ accounts haggle blithely above the s
+ across the fu
+ along the slyly regula
+ alongside of 
+ alongside of the carefully exp
+ among the ironic, final requests ma
+ asymptotes
+ beans. de
+ blithely 
+ blithely bold forges above t
+ blithely regular deposits cajole fluff
+ blithely. quickly final deposits wa
+ bold accounts. fluffil
+ bold foxes sleep finally 
+ boost blithely after the inst
+ cajole quickly regular, bold packa
+ cajole regular packages. special, ir
+ carefully even ideas cajole 
+ carefully even packages are carefully pend
+ carefully regular ac
+ carefully regular instructions. regula
+ deposits run blithely furiously special re
+ deposits sleep furiously across the pendin
+ deposits. e
+ deposits. furiously close t
+ deposits. ironic, regular foxes throu
+ dolphins are furiously slyly regular depo
+ even deposi
+ even deposits. carefully ir
+ even foxes. final pinto beans sleep
+ even instructions according to the furio
+ even packages co
+ excuses. bl
+ express foxes 
+ express, regular packages hag
+ final dep
+ final pinto
+ furiously blithely unusual requests. 
+ furiously bold dolphins; slyly expr
+ furiously bold platel
+ furiously quickly bold frets. bra
+ furiously regular accounts cajole carefu
+ furiously regular pinto be
+ furiously stealthy deposits cajole around 
+ haggle carefully. reg
+ ideas use blithely a
+ instructions sleep
+ ironic accounts. special theod
+ ironic packages. 
+ ironic requests nag accord
+ ironic, even accounts. accounts use b
+ of the careful
+ on the furiously permanent dol
+ orbits haggle
+ packages a
+ packages according to the final, even p
+ packages sleep blithely fluffily final 
+ packages. busy,
+ packages. car
+ packages. careful
+ packages. carefull
+ pending foxes cajole slyly carefull
+ pending ideas. quickly quiet requests i
+ pending, pending platelets nag e
+ pinto beans above the furiously 
+ pinto beans are slyly 
+ poach unusual foxes. fo
+ quickly even accounts. furiously i
+ quickly pending p
+ quickly regular ideas: express, final requ
+ quietly carefull
+ regular deposits cajole blithel
+ regular deposits unwind slyly al
+ regular packag
+ regular, even instructions lose s
+ requests cajole qu
+ requests. blithely ironic instructions sl
+ requests. quickly regula
+ silent orbits across the 
+ sleep blithely at the slow packages! bli
+ slyly above th
+ slyly across the
+ slyly at 
+ slyly even ideas. 
+ slyly ironic accoun
+ slyly pending 
+ slyly special theodolit
+ special excuses. carefully expres
+ special packages after the d
+ special, silent deposi
+ the ideas. 
+ the regular, ironic excuses. furious
+ the unusual accounts hagg
+ theodolites. quickly final deposits
+ unusual instructions doze bol
+ use about the carefully pending deposits. 
+, express ideas. carefully regular wa
+, express packages accord
+, final platelets. quick theodolites us
+, ironic accounts
+, ironic instructions. iro
+- carefully daring foxes wake furiously. 
+. blithely final decoy
+. carefully regular packages nod
+. fluffily final platelets play ca
+. furiously bold theodolites 
+. furiously expre
+. furiously ironic dependenc
+. ironic i
+above the quickly ironic dependenc
+accounts ar
+accounts boost sl
+accounts haggle foxes. un
+accounts. ironic accounts haggle aft
+accounts? 
+ackages whitho
+ackages. carefully special instructio
+ackages. silent theodolites detect alo
+across the furiousl
+across the s
+after the quickly
+ag slyly pac
+ages affix according to the express accoun
+ages boost. even accounts haggle carefull
+ages haggle slyly among the ironic, r
+ages haggle. pending deposits kind
+ages. slyly ironic 
+aggle acro
+aggle carefu
+aggle quickly requests. fluffily final 
+ainst the furiously
+aintain idly ironic, express 
+ajole quickly. ironic accounts cajo
+ake furiously. furiously bold deposits nag
+ake slyly about the 
+ake. carefully bold deposits boost. i
+ake. silent dependencies x-ray finally. pac
+al accounts. slyly final foxes aff
+al dolphins. so
+al foxes caj
+al foxes. slyly final theodolit
+al ideas should have to integrate
+al ideas sleep acc
+al ideas. busy requests wake car
+al packages use fluffily alon
+al, express requests. blithely exp
+alms sleep furiou
+along the quickly final theodolite
+althily regular requests. f
+ans wake after the evenly p
+ans. regular, special ideas ha
+apades are alongside of the saut
+ar pinto beans about the bold, unusua
+ar pinto beans. carefully unusual pinto bea
+ar theodolites along the final 
+are carefully deposits! furiously bold pac
+arefully dogged, regular packages. 
+arefully final dependenc
+arefully. care
+arthogs lose at
+arthogs lose quickly
+as. slyly silent dolphins sublat
+at the thin, ironic packages
+beans use carefully abo
+beans. even, ironic dep
+blithely e
+blithely ironi
+blithely. furiou
+bold foxes cajole blithely after the flu
+bold, silent accounts detect carefully un
+boost against the slyly regular pl
+boost slyly
+c packages. ironic accounts believe
+c packages. slyly express shea
+carefully f
+carefully ir
+carefully pending requ
+carefully quiet packages wake blithely alon
+carefully speci
+ccounts along the ironic re
+ccounts wake i
+ccounts wake quickly. furiousl
+ccounts. slyly iron
+ch carefully excuses. carefully e
+cies according to the fluffily ruthless
+ckages affix. ca
+ckages snooze never after the carefull
+ckly above the quickly
+ckly bold asymptotes cajole blit
+ckly even, busy pinto beans.
+ckly express pinto b
+ckly ironic dolphins.
+ckly pending foxes. deposits ha
+counts according
+counts across the special, unusual p
+counts. blithe
+cross the pending 
+d ideas. requests 
+d theodolites cajo
+de of the br
+deas was beside
+deas. furiously bold packages alo
+dencies. bold theodolites haggle. blit
+dependencies among the packa
+dependencies. p
+deposits along the furiously special ac
+deposits nag. carefully ironic depos
+ding deposits above the c
+doggedly ironic ideas
+doubt among the ironic, regular 
+doubt ironic multipliers. quick
+e above the blithely
+e accounts. regular, even theodolites do en
+e blithely bold foxes. accounts serve furi
+e busily pending requests. fu
+e carefull
+e carefully regular foxes sleep furio
+e carefully. bl
+e fluffily bold de
+e fluffily final account
+e fluffily unusual hockey players. sl
+e furiously final foxes. fur
+e ironically even requests. sl
+e pinto beans. quickly unusual
+e quickly even requests sleep furiousl
+e the blithely unusual packag
+e the carefully final d
+e. deposits alon
+e. fluffy depths x-ray around the furious
+e. furiously 
+ecial dependenc
+ecial deposits. blithely even requests
+ecial foxes affix furiously agains
+ecial theodolites. carefully bol
+eep blithely. ruthless pac
+eep silently de
+efully against 
+efully express requests. boldly silent theo
+efully. quickly express deposits use. iro
+efully: slyly e
+egular accounts. unusual, special reques
+egular deposits nag about t
+egular ideas n
+egular ideas. blit
+egular packages integrate blithel
+egular pearls integrate carefully. sl
+egular pla
+egularly regu
+ely after the fluffily bold 
+ely silent foxes
+en account
+en deposits cajole slyly fin
+en escapades 
+en excuses beside the furiously final acco
+encies affix slyly: unusual deposits 
+encies snooz
+endencies. furiously express depend
+ending dependencie
+ending deposits. fluffi
+ending packages wake around the
+eodolites. bold, brave patterns sleep. qui
+ep ironic dependencies. regula
+ep slyly. slyly brave deposits
+ependencies.
+eposits sleep blithely final id
+eposits wake furiously unusual th
+eposits. blithely unusual idea
+equests. fina
+equests. final 
+ernes use at
+erve daringly quick packages.
+es detect. deposits wa
+es sleep. slyly even packages a
+es use final, final dependencies. pl
+ess packages. ironic, ironic 
+ests boost according 
+ests haggle. carefully pending 
+ests. blithely even deposit
+ests. regular, final theodolites sleep thi
+evenly special deposits. ir
+express re
+f the quickly pend
+ffily bold 
+ffily bold 
+ffily bold accounts. final, regular 
+ffily busy packages haggle
+fily final packages boost furiously sil
+fily regular deposits. sl
+final deposits wake furiously pen
+final, final deposits. quickly regular pin
+final, ironic accounts. regular
+foxes wake quickly plat
+foxes. fluffily ironic theodolites affi
+fter the accounts. fluffily ironic packag
+ful deposits. blithely
+fully even i
+fully final theodolites 
+furiously b
+furiously final accounts. request
+furiously quiet pinto beans boos
+furiously. 
+g pinto beans.
+g requests wake car
+g the final, pending pinto be
+g the ironic, regular
+g to the regular, r
+gage fluffily ru
+gage. slyly pending platelets in pl
+gainst the special request
+ges are sometimes special dolphins. 
+ges boost along the fu
+ges sleep. final deposits sleep
+ges-- carefully ironic accounts 
+ges. furious
+ges. regular theodolites
+ges; regular theodolites cajo
+gifts x-ray carefully
+gle slyly final deposits. daringl
+gle. packages haggle furiously a
+gular pinto beans. finally fin
+gularly expre
+haggle furiously alo
+haggle-- even foxes affix. de
+have to caj
+he fluffily silent accounts. careful
+he furiously bold de
+he ironic plat
+he ironic, express realms. furiously ironic
+he quickly bold ideas might
+he slyly special
+he slyly special asympt
+hely pending reques
+hely regular theodolites. bold p
+hins nag furiously. regular theodolites 
+hins. ideas dazzle a
+hlessly even accou
+hogs. furiously regular accounts again
+ial foxes. furiously final instructions abo
+ic asymptotes cajole even packages. 
+ic theodolites. 
+icingly even accounts use 
+ick ideas. foxes along the special 
+ickly ironic deposits 
+ickly regula
+ideas. furiously regular r
+idly across the final, special
+ies. special e
+ilent accounts. even dependencies haggl
+ily express pac
+ily ironic 
+iments wake. slyly pending requests hinde
+inal accounts. express, r
+ince the furiously final a
+ins sleep fluffily about t
+inst the carefully bold instructions
+instructions. slow 
+into beans alo
+into beans cajole blithely about the 
+iously ironic deposits sleep blithely bl
+ironic instruct
+ites must have to inte
+ites nag slyly alongside of th
+ites. furio
+ites. slyly special ideas wake furiously ag
+ithely about the fluffily even requests.
+ithely according to t
+ithely among the slyly regular excus
+ithely idle foxes nod alongside of the
+ithely ironic accounts solve 
+ithely ironic requests! furiously regula
+ithely pending f
+ithely. sly
+its doze furi
+ix carefully silently final
+jole quickly fluf
+kages sleep. blithely regul
+kages wake above the asymptotes. carefull
+kages. blith
+kages. blithely 
+ke carefully carefu
+key players. sil
+l accounts-- furi
+l deposits sleep blithely about the qui
+l packages are carefully above the
+l packages. blithely permanent ideas 
+l requests. bl
+l tithes. packages play quickly according 
+l, thin foxes boost slyly platelets
+lar ideas nag alongside of the slyly ironi
+lar pinto beans dazz
+lar requests. blithely expr
+le packages wake slyly among the 
+le quickly slyly regu
+le slyly af
+le. packages boost slyly. final, ex
+lent foxes cajole fur
+lent packages. silent, ironic asym
+lently regular deposits. carefully unusua
+lently spe
+less warthogs wake fluff
+lites against the carefully 
+lites use furiously alongside of the
+lites. blithely silent
+lites. carefully ironic courts alongsid
+lithely bold packages sleep fluffily. f
+lithely furiously
+lithely slyly bold pinto beans. e
+lly furious request
+lly regular excuses. qui
+lly unusual deposits impress. doggedly bo
+long the c
+long the carefully fin
+longside of the quic
+ls. busily regular reques
+lve furiously am
+ly about the ir
+ly above the blithely final foxes. iro
+ly according to the fluffily final packages
+ly among the slyly even deposits. even ex
+ly bold foxes. ironic, expres
+ly even deposits are fluffily after t
+ly even deposits. a
+ly even packages. ironic packag
+ly furiously 
+ly idly bold requests. evenl
+ly ironic warthogs are slyly regular, 
+ly pending requests bo
+ly quickly ev
+ly regular instructions integrate carefu
+ly regular requests. thinly bold deposi
+ly regular theodolites wake th
+ly silent deposits are bra
+ly silent requests haggle f
+ly special war
+ly stealth
+ly. carefully speci
+ly. fluffily 
+lyly after the furiously regular instr
+lyly ironic instructions run unusual pack
+lyly slyly ironic pinto beans. foxes 
+mptotes. furiously final
+must boost. qu
+n requests. final, final packages haggle
+n, final deposits wake slyly blithe
+nal ideas integrate slyly furiously final
+nal sauternes cajole. ironic pac
+nal, express courts c
+nal, regular deposits p
+nal, unusual theodolites 
+ncies hang aroun
+ndencies sleep fluffily
+nding pearls. furiously even ep
+nding request
+nding requests wake above the carefully b
+ng deposits d
+ng ideas above the final instructi
+ng the carefully sil
+ng the quickly reg
+ng the regular fo
+ng the unusual, special inst
+ng, even courts nod furiously. careful
+ngside of the fu
+ngside of the slyly express reque
+nic depths. even deposits about the 
+nic foxes. ideas are. sly
+nic frays hag
+ns haggle against 
+ns. carefully pending requests us
+ns. furiously even deposits integrat
+nst the furiously r
+nstructions slee
+ntly ironic instructions u
+nto beans. f
+nts are. furiously enticing packages 
+nts detect quickly against the blit
+nts sleep slyly ironic accounts. ironic pac
+nts use bold, pending
+nusual sheaves wake blithely am
+o the slyly final packages. furiously fi
+odolites. deposits haggle car
+old ideas. blithely final depo
+olites. ca
+olites. daringly express req
+olphins. blithe
+ong the regular packages x-ray fluffi
+ongside of the regular theodoli
+onic foxes. quickly iron
+onic packages cajole 
+ons. fluffily final foxes above the 
+oost busily ironic, ir
+orges. unusual packages cajole furiou
+osits. ideas hang after
+otes wake enticing dep
+ound the ideas haggle even, fluffy ideas. 
+ounts are carefull
+ounts cajole closely regular reque
+ounts must have to
+ounts. ideas ar
+ounts. slyly ruthless dugouts wake
+ously bold courts. regular 
+ously ironic accounts. even dependencies 
+ously regular requests caj
+ously special instructions among the
+out the blithely 
+out the carefully ironic pa
+ove the requests sleep above
+p alongside of the ironic, unusual in
+p slyly. theodolites alongside 
+p. furiously pending in
+packages cajol
+packages wake sometim
+packages. packages integrate carefull
+packages. slyly regular foxes boost. slyly
+packages? slyly regular pin
+pecial pinto beans ha
+pecial pinto beans. regular packages
+permanent courts detec
+pinto beans haggle carefully bold req
+pon the pending, ev
+press, special 
+pths sleep along th
+ptotes: th
+quests are furiously express theodo
+quests nag sly
+quests. fluffi
+quickly express ideas. u
+r accounts. blithely regular deposits in
+r instructions slee
+r packages cajole sometimes 
+r pinto beans are fluffily a
+r the carefully
+r the regular, express requests are fur
+rate. pending packages against the 
+ray. slyly regular accounts sleep slowly
+rding to the depths. ironic foxes in
+re slyly carefu
+refully even deposits. accounts use 
+regular asymptotes are quickly regular pint
+regular dolphins wake
+regular frets serve furiously.
+regular ideas. slyly express pack
+regular packages. blithely final foxes wake
+requests sleep
+ress, express instructi
+riously bold deposits sleep
+riously express ideas. Tiresias at t
+riously slyly ironic pinto be
+riously special packages n
+ronic packages sleep. blithely unus
+round the exp
+s cajole furiously express request
+s cajole slyly even req
+s except the furiously regular requ
+s hinder fu
+s integrate. slyly ironic foxes wake quick
+s outside the permanent, even instructio
+s sleep bravely 
+s sleep carefully. slyly special
+s sleep. idle multipliers w
+s the express, silent reque
+s wake evenl
+s wake furiously regular asymp
+s wake quickly. slyly pending p
+s. blithely final re
+s. final ideas boost slyl
+s. quickly regular accounts acros
+s. slyly special accounts sleep blithely
+s. special packages a
+s; ironic, silent pain
+s? final, ironic accounts cajole quickly 
+se quickly? 
+se theodolites integrate
+side of the expr
+side of the quickly exp
+silent pack
+sits cajole blithely after the regularly i
+sits sleep furiously fin
+sits. furiously even packages snooze across
+sleep slyly iron
+sly after the even accounts. blithe
+sly final packages along the bold packag
+sly theodolites haggle carefully quickly 
+slyly ironi
+slyly slyly regular theodo
+solve-- slyly silent instructio
+ss the regular accounts. daringly fin
+ss, bold courts boos
+st the carefully regular ideas-
+st the ironic dep
+sts. ironi
+sual pinto beans boost blithely re
+sublate according to the unusual, f
+symptotes breach carefully according to th
+symptotes wake carefully pending i
+t deposits boost blithely s
+t the blithely unusual deposits. bu
+t the quickly pen
+tainments.
+telets across the final sheaves h
+ter the package
+tes are silent
+tes kindle fluffily blit
+tes. final, express deposits against the 
+the carefully regular ideas cajo
+the furiou
+the furiously even acc
+the furiously f
+the ideas. caref
+the instructions. carefully ironic pa
+the quickly bold packages. carefu
+the regular instructions. quickly even pack
+the slyly 
+the slyly final requests: blithe packa
+the warhorses nag enticingly at th
+thely carefully silent dep
+thely never even accounts. express, fi
+thely regular deposits detect furiously. 
+times. furiously idle instr
+tions affix. blithe
+tions. blithely special foxes above the un
+tructions about the quickly final 
+ts above the quickly unus
+ts are quickl
+ts haggle above the final as
+ts nag. blithely
+ts poach. carefully darin
+ts use around
+ts use furiousl
+ts wake. blithely sil
+ts. carefully regular th
+ual packages haggl
+ual, ironic ac
+uests boost along the final, regular acc
+uests need to wake fu
+uests snooze blith
+ugouts are blithely regu
+uickly according
+uickly alongside of the care
+ular accounts nag blithely iron
+ular accounts sublate careful
+ular accounts! 
+ular excuses. fluffily s
+ular packages. final p
+ular pinto bean
+ully bold packages. s
+ully thin wa
+und the final foxes. enticing p
+und the packages. express exc
+unts affix sl
+unts nag blithely b
+unts promise. i
+unts. furiousl
+unusual asymptotes cajole around the fu
+unusual excuses. ironic, even account
+unusual, regular foxes sno
+uring the carefully eve
+uriously at the pending, final frets. acco
+uriously even pi
+uriously even requests. slyly fi
+uriously final accounts. furiously pending 
+uriously pending 
+uriously regular
+uriously regular foxes. ev
+use. quietl
+usily final
+usly regular
+usly. regular p
+usual accounts cajole a
+usy packages. even, sly ideas sleep slyl
+ve dolphins sleep permanentl
+ve the careful
+ve to nag furiously. fluffily iro
+ven packages. even requests across the 
+ven, final ins
+ven, special foxes. pending inst
+wake acros
+wake carefully
+wake furio
+wake furiously about the regular request
+would detect ironic, even pinto be
+x-ray slyly fluffily bus
+xcuses thrash according to th
+xpress accounts. regular, regular pint
+xpress depos
+xpress deposits. requests shall ha
+xpress escapades. fl
+y according to the express 
+y according to the fluf
+y according to the regular, fluff
+y against the quickly ironic pin
+y bold pains? final pinto beans
+y final accounts. carefully even theo
+y final asymptotes. blithely fluffy req
+y final foxe
+y final foxes. fluffily fina
+y final requests wake busily e
+y final requests wake. blithely 
+y final sentiments. reg
+y ironic braids during the
+y ironic grouches. blithely even 
+y ironic instructions 
+y ironic packages boost permanentl
+y pending instructions hagg
+y regular requests nag furio
+y! even accou
+y. close accounts according to the final
+y. regular pinto b
+y. slyly fin
+yly express instructions cajole
+yly final instructions across th
+yly final requests cajole furiou
+yly ironic theodolites at the quickly iron
+yly ruthless accounts wake carefull
+ymptotes above the pend
+ymptotes dazzle; bold, regular re
+ys carefully regular instructi
+
diff --git a/regression-test/suites/query_p2/string_over_flow/string_over_flow.groovy b/regression-test/suites/query_p2/string_over_flow/string_over_flow.groovy
new file mode 100644
index 00000000000000..71f95493138d9b
--- /dev/null
+++ b/regression-test/suites/query_p2/string_over_flow/string_over_flow.groovy
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("string_over_flow") {
+    def dbName = "regression_test_tpch_sf100_p2"
+    sql "USE ${dbName}"
+    sql "set parallel_pipeline_task_num = 4;"
+    qt_sql """select b.l_comment from nation a join[broadcast] lineitem b on a.n_nationkey = b.l_partkey order by 1;"""
+}
+

From 85f37257c2238ebc4e69ada3acc7ac089f847dfb Mon Sep 17 00:00:00 2001
From: AlexYue <yj976240184@gmail.com>
Date: Thu, 25 Apr 2024 20:39:25 +0800
Subject: [PATCH 026/163] [feature](Cache) Limit cache usage of TTL  (#34084)

---
 be/src/common/config.cpp                   |  2 +
 be/src/common/config.h                     |  2 +
 be/src/io/cache/block_file_cache.cpp       | 13 +++-
 be/src/io/cache/block_file_cache.h         |  1 +
 be/src/io/cache/file_block.cpp             |  5 ++
 be/src/io/cache/file_block.h               |  2 +
 be/test/io/cache/block_file_cache_test.cpp | 79 ++++++++++++++++++----
 7 files changed, 89 insertions(+), 15 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 11a8b97842df9d..e1c8114800a521 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1226,6 +1226,8 @@ DEFINE_Int64(num_buffered_reader_prefetch_thread_pool_max_thread, "64");
 DEFINE_Int64(num_s3_file_upload_thread_pool_min_thread, "16");
 // The max thread num for S3FileUploadThreadPool
 DEFINE_Int64(num_s3_file_upload_thread_pool_max_thread, "64");
+// The max ratio for ttl cache's size
+DEFINE_mInt64(max_ttl_cache_ratio, "90");
 
 // clang-format off
 #ifdef BE_TEST
diff --git a/be/src/common/config.h b/be/src/common/config.h
index fe7009c7691fe9..ce244da36d53b9 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1306,6 +1306,8 @@ DECLARE_Int64(num_buffered_reader_prefetch_thread_pool_max_thread);
 DECLARE_Int64(num_s3_file_upload_thread_pool_min_thread);
 // The max thread num for S3FileUploadThreadPool
 DECLARE_Int64(num_s3_file_upload_thread_pool_max_thread);
+// The max ratio for ttl cache's size
+DECLARE_mInt64(max_ttl_cache_ratio);
 
 #ifdef BE_TEST
 // test s3
diff --git a/be/src/io/cache/block_file_cache.cpp b/be/src/io/cache/block_file_cache.cpp
index b41cb9f6a5fed9..26ca8e47596f75 100644
--- a/be/src/io/cache/block_file_cache.cpp
+++ b/be/src/io/cache/block_file_cache.cpp
@@ -623,7 +623,9 @@ BlockFileCache::FileBlockCell* BlockFileCache::add_cell(const UInt128Wrapper& ha
 
     auto& offsets = _files[hash];
     DCHECK((context.expiration_time == 0 && context.cache_type != FileCacheType::TTL) ||
-           (context.cache_type == FileCacheType::TTL && context.expiration_time != 0));
+           (context.cache_type == FileCacheType::TTL && context.expiration_time != 0))
+            << fmt::format("expiration time {}, cache type {}", context.expiration_time,
+                           context.cache_type);
 
     FileCacheKey key;
     key.hash = hash;
@@ -639,6 +641,7 @@ BlockFileCache::FileBlockCell* BlockFileCache::add_cell(const UInt128Wrapper& ha
             _key_to_time[hash] = context.expiration_time;
             _time_to_key.insert(std::make_pair(context.expiration_time, hash));
         }
+        _cur_ttl_size += cell.size();
     }
     auto [it, _] = offsets.insert(std::make_pair(offset, std::move(cell)));
     _cur_cache_size += size;
@@ -695,6 +698,10 @@ const BlockFileCache::LRUQueue& BlockFileCache::get_queue(FileCacheType type) co
 bool BlockFileCache::try_reserve_for_ttl(size_t size, std::lock_guard<std::mutex>& cache_lock) {
     size_t removed_size = 0;
     size_t cur_cache_size = _cur_cache_size;
+    auto limit = config::max_ttl_cache_ratio * _capacity;
+    if ((_cur_ttl_size + size) * 100 > limit) {
+        return false;
+    }
     auto is_overflow = [&] {
         return _disk_resource_limit_mode ? removed_size < size
                                          : cur_cache_size + size - removed_size > _capacity;
@@ -1129,6 +1136,9 @@ void BlockFileCache::remove(FileBlockSPtr file_block, T& cache_lock, U& block_lo
         }
     }
     _cur_cache_size -= file_block->range().size();
+    if (FileCacheType::TTL == type) {
+        _cur_ttl_size -= file_block->range().size();
+    }
     auto& offsets = _files[hash];
     offsets.erase(file_block->offset());
     if (offsets.empty()) {
@@ -1544,6 +1554,7 @@ std::string BlockFileCache::clear_file_cache_directly() {
     int64_t disposible_queue_size = _disposable_queue.get_elements_num(cache_lock);
     _files.clear();
     _cur_cache_size = 0;
+    _cur_ttl_size = 0;
     _time_to_key.clear();
     _key_to_time.clear();
     _index_queue.clear(cache_lock);
diff --git a/be/src/io/cache/block_file_cache.h b/be/src/io/cache/block_file_cache.h
index 282148aa566dd3..f086c2c680ee16 100644
--- a/be/src/io/cache/block_file_cache.h
+++ b/be/src/io/cache/block_file_cache.h
@@ -394,6 +394,7 @@ class BlockFileCache {
     CachedFiles _files;
     QueryFileCacheContextMap _query_map;
     size_t _cur_cache_size = 0;
+    size_t _cur_ttl_size = 0;
     std::multimap<uint64_t, UInt128Wrapper> _time_to_key;
     std::unordered_map<UInt128Wrapper, uint64_t, KeyHash> _key_to_time;
     // The three queues are level queue.
diff --git a/be/src/io/cache/file_block.cpp b/be/src/io/cache/file_block.cpp
index 2efc26fb1a6a12..5985aa95f7abdc 100644
--- a/be/src/io/cache/file_block.cpp
+++ b/be/src/io/cache/file_block.cpp
@@ -30,6 +30,11 @@
 namespace doris {
 namespace io {
 
+std::ostream& operator<<(std::ostream& os, const FileBlock::State& value) {
+    os << FileBlock::state_to_string(value);
+    return os;
+}
+
 FileBlock::FileBlock(const FileCacheKey& key, size_t size, BlockFileCache* mgr,
                      State download_state)
         : _block_range(key.offset, key.offset + size - 1),
diff --git a/be/src/io/cache/file_block.h b/be/src/io/cache/file_block.h
index dd4ef3757074ee..2587cd8607fc5a 100644
--- a/be/src/io/cache/file_block.h
+++ b/be/src/io/cache/file_block.h
@@ -154,6 +154,8 @@ class FileBlock {
     size_t _downloaded_size {0};
 };
 
+extern std::ostream& operator<<(std::ostream& os, const FileBlock::State& value);
+
 using FileBlockSPtr = std::shared_ptr<FileBlock>;
 using FileBlocks = std::list<FileBlockSPtr>;
 
diff --git a/be/test/io/cache/block_file_cache_test.cpp b/be/test/io/cache/block_file_cache_test.cpp
index 6b1139d1b41c02..64778b396a29b2 100644
--- a/be/test/io/cache/block_file_cache_test.cpp
+++ b/be/test/io/cache/block_file_cache_test.cpp
@@ -679,6 +679,59 @@ TEST_F(BlockFileCacheTest, resize) {
     }
 }
 
+TEST_F(BlockFileCacheTest, max_ttl_size) {
+    if (fs::exists(cache_base_path)) {
+        fs::remove_all(cache_base_path);
+    }
+    fs::create_directories(cache_base_path);
+    TUniqueId query_id;
+    query_id.hi = 1;
+    query_id.lo = 1;
+    io::FileCacheSettings settings;
+    settings.query_queue_size = 100000000;
+    settings.query_queue_elements = 100000;
+    settings.capacity = 100000000;
+    settings.max_file_block_size = 100000;
+    settings.max_query_cache_size = 30;
+    io::CacheContext context;
+    context.cache_type = io::FileCacheType::TTL;
+    context.query_id = query_id;
+    int64_t cur_time = UnixSeconds();
+    context.expiration_time = cur_time + 120;
+    auto key1 = io::BlockFileCache::hash("key5");
+    io::BlockFileCache cache(cache_base_path, settings);
+    ASSERT_TRUE(cache.initialize());
+    int i = 0;
+    for (; i < 100; i++) {
+        if (cache.get_lazy_open_success()) {
+            break;
+        }
+        std::this_thread::sleep_for(std::chrono::milliseconds(1));
+    }
+    ASSERT_TRUE(cache.get_lazy_open_success());
+    int64_t offset = 0;
+    for (; offset < 100000000; offset += 100000) {
+        auto holder = cache.get_or_set(key1, offset, 100000, context);
+        auto blocks = fromHolder(holder);
+        ASSERT_EQ(blocks.size(), 1);
+        if (offset < 90000000) {
+            assert_range(1, blocks[0], io::FileBlock::Range(offset, offset + 99999),
+                         io::FileBlock::State::EMPTY);
+            ASSERT_TRUE(blocks[0]->get_or_set_downloader() == io::FileBlock::get_caller_id());
+            download(blocks[0]);
+            assert_range(1, blocks[0], io::FileBlock::Range(offset, offset + 99999),
+                         io::FileBlock::State::DOWNLOADED);
+        } else {
+            assert_range(1, blocks[0], io::FileBlock::Range(offset, offset + 99999),
+                         io::FileBlock::State::SKIP_CACHE);
+        }
+        blocks.clear();
+    }
+    if (fs::exists(cache_base_path)) {
+        fs::remove_all(cache_base_path);
+    }
+}
+
 TEST_F(BlockFileCacheTest, query_limit_heap_use_after_free) {
     if (fs::exists(cache_base_path)) {
         fs::remove_all(cache_base_path);
@@ -1773,10 +1826,10 @@ TEST_F(BlockFileCacheTest, ttl_reverse) {
     query_id.hi = 1;
     query_id.lo = 1;
     io::FileCacheSettings settings;
-    settings.query_queue_size = 30;
+    settings.query_queue_size = 36;
     settings.query_queue_elements = 5;
-    settings.capacity = 30;
-    settings.max_file_block_size = 5;
+    settings.capacity = 36;
+    settings.max_file_block_size = 7;
     settings.max_query_cache_size = 30;
     io::CacheContext context;
     context.cache_type = io::FileCacheType::TTL;
@@ -1792,25 +1845,23 @@ TEST_F(BlockFileCacheTest, ttl_reverse) {
         };
         std::this_thread::sleep_for(std::chrono::milliseconds(1));
     }
-    {
-        auto holder = cache.get_or_set(key2, 0, 30, context); /// Add range [0, 29]
+    ASSERT_TRUE(cache.get_lazy_open_success());
+    for (size_t offset = 0; offset < 30; offset += 6) {
+        auto holder = cache.get_or_set(key2, offset, 6, context);
         auto blocks = fromHolder(holder);
-        for (auto& block : blocks) {
-            ASSERT_TRUE(block->get_or_set_downloader() == io::FileBlock::get_caller_id());
-            download(block);
-        }
-        EXPECT_EQ(blocks.size(), 6);
+        ASSERT_TRUE(blocks[0]->get_or_set_downloader() == io::FileBlock::get_caller_id());
+        download(blocks[0]);
     }
     {
-        auto holder = cache.get_or_set(key2, 50, 5, context); /// Add range [50, 54]
+        auto holder = cache.get_or_set(key2, 50, 7, context); /// Add range [50, 57]
         auto blocks = fromHolder(holder);
-        assert_range(1, blocks[0], io::FileBlock::Range(50, 54), io::FileBlock::State::SKIP_CACHE);
+        assert_range(1, blocks[0], io::FileBlock::Range(50, 56), io::FileBlock::State::SKIP_CACHE);
     }
     {
         context.cache_type = io::FileCacheType::NORMAL;
-        auto holder = cache.get_or_set(key2, 50, 5, context); /// Add range [50, 54]
+        auto holder = cache.get_or_set(key2, 50, 7, context); /// Add range [50, 57]
         auto blocks = fromHolder(holder);
-        assert_range(1, blocks[0], io::FileBlock::Range(50, 54), io::FileBlock::State::SKIP_CACHE);
+        assert_range(1, blocks[0], io::FileBlock::Range(50, 56), io::FileBlock::State::SKIP_CACHE);
     }
 
     if (fs::exists(cache_base_path)) {

From 0644f45df3b5a63a35c67d56dfeb92a6f3fc80d0 Mon Sep 17 00:00:00 2001
From: AlexYue <yj976240184@gmail.com>
Date: Thu, 25 Apr 2024 20:41:45 +0800
Subject: [PATCH 027/163] [feature](Cloud) Load index data into index cache
 when writing data (#34046)

---
 be/src/io/fs/broker_file_writer.h             |  3 +-
 be/src/io/fs/file_writer.h                    |  3 ++
 be/src/io/fs/hdfs_file_writer.cpp             |  3 --
 be/src/io/fs/hdfs_file_writer.h               |  5 ++-
 be/src/io/fs/local_file_writer.h              |  4 ++-
 be/src/io/fs/s3_file_bufferpool.cpp           | 23 +-------------
 be/src/io/fs/s3_file_bufferpool.h             | 23 ++------------
 be/src/io/fs/s3_file_writer.cpp               | 31 +++++++------------
 be/src/io/fs/s3_file_writer.h                 | 12 +++----
 be/src/io/fs/stream_sink_file_writer.h        |  3 ++
 .../olap/rowset/segment_v2/segment_writer.cpp | 15 +++++++++
 be/test/olap/tablet_cooldown_test.cpp         |  2 ++
 12 files changed, 53 insertions(+), 74 deletions(-)

diff --git a/be/src/io/fs/broker_file_writer.h b/be/src/io/fs/broker_file_writer.h
index 58bba9febd3db2..d6fce52a05c662 100644
--- a/be/src/io/fs/broker_file_writer.h
+++ b/be/src/io/fs/broker_file_writer.h
@@ -33,7 +33,7 @@ namespace doris {
 class ExecEnv;
 
 namespace io {
-
+struct FileCacheAllocatorBuilder;
 class BrokerFileWriter final : public FileWriter {
 public:
     // Create and open file writer
@@ -50,6 +50,7 @@ class BrokerFileWriter final : public FileWriter {
     const Path& path() const override { return _path; }
     size_t bytes_appended() const override { return _cur_offset; }
     bool closed() const override { return _closed; }
+    FileCacheAllocatorBuilder* cache_builder() const override { return nullptr; }
 
 private:
     Status _write(const uint8_t* buf, size_t buf_len, size_t* written_bytes);
diff --git a/be/src/io/fs/file_writer.h b/be/src/io/fs/file_writer.h
index 4feab99c09ff24..5d22dca60551c8 100644
--- a/be/src/io/fs/file_writer.h
+++ b/be/src/io/fs/file_writer.h
@@ -28,6 +28,7 @@
 
 namespace doris::io {
 class FileSystem;
+struct FileCacheAllocatorBuilder;
 
 // Only affects remote file writers
 struct FileWriterOptions {
@@ -62,6 +63,8 @@ class FileWriter {
     virtual size_t bytes_appended() const = 0;
 
     virtual bool closed() const = 0;
+
+    virtual FileCacheAllocatorBuilder* cache_builder() const = 0;
 };
 
 } // namespace doris::io
diff --git a/be/src/io/fs/hdfs_file_writer.cpp b/be/src/io/fs/hdfs_file_writer.cpp
index c596c0e290fe8c..9ea66ca4da13eb 100644
--- a/be/src/io/fs/hdfs_file_writer.cpp
+++ b/be/src/io/fs/hdfs_file_writer.cpp
@@ -159,9 +159,6 @@ void HdfsFileWriter::_write_into_local_file_cache() {
         size_t block_size = block->range().size();
         size_t append_size = std::min(data_remain_size, block_size);
         if (block->state() == FileBlock::State::EMPTY) {
-            if (_index_offset != 0 && block->range().right >= _index_offset) {
-                static_cast<void>(block->change_cache_type_self(FileCacheType::INDEX));
-            }
             block->get_or_set_downloader();
             if (block->is_downloader()) {
                 Slice s(_batch_buffer.data() + pos, append_size);
diff --git a/be/src/io/fs/hdfs_file_writer.h b/be/src/io/fs/hdfs_file_writer.h
index 2ce865ecfedb20..234835e083f4c5 100644
--- a/be/src/io/fs/hdfs_file_writer.h
+++ b/be/src/io/fs/hdfs_file_writer.h
@@ -51,6 +51,10 @@ class HdfsFileWriter final : public FileWriter {
     size_t bytes_appended() const override { return _bytes_appended; }
     bool closed() const override { return _closed; }
 
+    FileCacheAllocatorBuilder* cache_builder() const override {
+        return _cache_builder == nullptr ? nullptr : _cache_builder.get();
+    }
+
 private:
     // Flush buffered data into HDFS client and write local file cache if enabled
     // **Notice**: this would clear the underlying buffer
@@ -83,7 +87,6 @@ class HdfsFileWriter final : public FileWriter {
         std::string _batch_buffer;
     };
     BatchBuffer _batch_buffer;
-    size_t _index_offset = 0;
 };
 
 } // namespace io
diff --git a/be/src/io/fs/local_file_writer.h b/be/src/io/fs/local_file_writer.h
index 4cd6712b04e1ef..81ebb0ebd1fcb7 100644
--- a/be/src/io/fs/local_file_writer.h
+++ b/be/src/io/fs/local_file_writer.h
@@ -25,7 +25,7 @@
 #include "util/slice.h"
 
 namespace doris::io {
-
+struct FileCacheAllocatorBuilder;
 class LocalFileWriter final : public FileWriter {
 public:
     LocalFileWriter(Path path, int fd, bool sync_data = true);
@@ -38,6 +38,8 @@ class LocalFileWriter final : public FileWriter {
     size_t bytes_appended() const override;
     bool closed() const override { return _closed; }
 
+    FileCacheAllocatorBuilder* cache_builder() const override { return nullptr; }
+
 private:
     void _abort();
     Status _close(bool sync);
diff --git a/be/src/io/fs/s3_file_bufferpool.cpp b/be/src/io/fs/s3_file_bufferpool.cpp
index 9df60dc1fd3042..82493fa9514f10 100644
--- a/be/src/io/fs/s3_file_bufferpool.cpp
+++ b/be/src/io/fs/s3_file_bufferpool.cpp
@@ -90,24 +90,6 @@ FileBuffer::~FileBuffer() {
     SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->s3_file_buffer_tracker());
     _inner_data.reset();
 }
-/**
- * 0. check if file cache holder allocated
- * 1. update the cache's type to index cache
- */
-void UploadFileBuffer::set_index_offset(size_t offset) {
-    _index_offset = offset;
-    if (_holder) {
-        bool change_to_index_cache = false;
-        for (auto iter = _holder->file_blocks.begin(); iter != _holder->file_blocks.end(); ++iter) {
-            if (iter == _cur_file_block) {
-                change_to_index_cache = true;
-            }
-            if (change_to_index_cache) {
-                static_cast<void>((*iter)->change_cache_type_self(FileCacheType::INDEX));
-            }
-        }
-    }
-}
 
 /**
  * 0. when there is memory preserved, directly write data to buf
@@ -222,9 +204,6 @@ void UploadFileBuffer::upload_to_local_file_cache(bool is_cancelled) {
         size_t block_size = block->range().size();
         size_t append_size = std::min(data_remain_size, block_size);
         if (block->state() == FileBlock::State::EMPTY) {
-            if (_index_offset != 0 && block->range().right >= _index_offset) {
-                static_cast<void>(block->change_cache_type_self(FileCacheType::INDEX));
-            }
             block->get_or_set_downloader();
             // Another thread may have started downloading due to a query
             // Just skip putting to cache from UploadFileBuffer
@@ -279,7 +258,7 @@ Status FileBufferBuilder::build(std::shared_ptr<FileBuffer>* buf) {
     if (_type == BufferType::UPLOAD) {
         RETURN_IF_CATCH_EXCEPTION(*buf = std::make_shared<UploadFileBuffer>(
                                           std::move(_upload_cb), std::move(state), _offset,
-                                          std::move(_alloc_holder_cb), _index_offset));
+                                          std::move(_alloc_holder_cb)));
         return Status::OK();
     }
     if (_type == BufferType::DOWNLOAD) {
diff --git a/be/src/io/fs/s3_file_bufferpool.h b/be/src/io/fs/s3_file_bufferpool.h
index 189f7696967759..15d0976df6b880 100644
--- a/be/src/io/fs/s3_file_bufferpool.h
+++ b/be/src/io/fs/s3_file_bufferpool.h
@@ -151,18 +151,10 @@ struct DownloadFileBuffer final : public FileBuffer {
 
 struct UploadFileBuffer final : public FileBuffer {
     UploadFileBuffer(std::function<void(UploadFileBuffer&)> upload_cb, OperationState state,
-                     size_t offset, std::function<FileBlocksHolderPtr()> alloc_holder,
-                     size_t index_offset)
+                     size_t offset, std::function<FileBlocksHolderPtr()> alloc_holder)
             : FileBuffer(BufferType::UPLOAD, alloc_holder, offset, state),
-              _upload_to_remote(std::move(upload_cb)),
-              _index_offset(index_offset) {}
+              _upload_to_remote(std::move(upload_cb)) {}
     ~UploadFileBuffer() override = default;
-    /**
-    * set the index offset
-    *
-    * @param offset the index offset
-    */
-    void set_index_offset(size_t offset);
     Status append_data(const Slice& s) override;
     /**
     * read the content from local file cache
@@ -206,7 +198,6 @@ struct UploadFileBuffer final : public FileBuffer {
     FileBlocksHolderPtr _holder;
     decltype(_holder->file_blocks.begin()) _cur_file_block;
     size_t _append_offset {0};
-    size_t _index_offset {0};
     uint32_t _crc_value = 0;
 };
 
@@ -272,15 +263,6 @@ struct FileBufferBuilder {
         return *this;
     }
     /**
-    * set the index offset of the file buffer
-    *
-    * @param cb 
-    */
-    FileBufferBuilder& set_index_offset(size_t index_offset) {
-        _index_offset = index_offset;
-        return *this;
-    }
-    /**
     * set the callback which write the content into local file cache
     *
     * @param cb 
@@ -309,7 +291,6 @@ struct FileBufferBuilder {
     std::function<Status(Slice&)> _download;
     std::function<void(Slice, size_t)> _write_to_use_buffer;
     size_t _offset;
-    size_t _index_offset;
 };
 } // namespace io
 } // namespace doris
diff --git a/be/src/io/fs/s3_file_writer.cpp b/be/src/io/fs/s3_file_writer.cpp
index 84487f496ac1e9..69202bd22fe98f 100644
--- a/be/src/io/fs/s3_file_writer.cpp
+++ b/be/src/io/fs/s3_file_writer.cpp
@@ -87,16 +87,16 @@ S3FileWriter::S3FileWriter(std::shared_ptr<Aws::S3::S3Client> client, std::strin
         : _path(fmt::format("s3://{}/{}", bucket, key)),
           _bucket(std::move(bucket)),
           _key(std::move(key)),
-          _client(std::move(client)),
-          _expiration_time(opts ? opts->file_cache_expiration : 0),
-          _is_cold_data(opts ? opts->is_cold_data : true),
-          _write_file_cache(opts ? opts->write_file_cache : false) {
+          _client(std::move(client)) {
     s3_file_writer_total << 1;
     s3_file_being_written << 1;
     Aws::Http::SetCompliantRfc3986Encoding(true);
-    if (config::enable_file_cache && _write_file_cache) {
-        _cache_hash = BlockFileCache::hash(_path.filename().native());
-        _cache = FileCacheFactory::instance()->get_by_path(_cache_hash);
+    if (config::enable_file_cache && opts != nullptr && opts->write_file_cache) {
+        _cache_builder = std::make_unique<FileCacheAllocatorBuilder>(FileCacheAllocatorBuilder {
+                opts ? opts->is_cold_data : false, opts ? opts->file_cache_expiration : 0,
+                BlockFileCache::hash(_path.filename().native()),
+                FileCacheFactory::instance()->get_by_path(
+                        BlockFileCache::hash(_path.filename().native()))});
     }
 }
 
@@ -264,7 +264,6 @@ Status S3FileWriter::appendv(const Slice* data, size_t data_cnt) {
                                     _upload_one_part(part_num, buf);
                                 })
                         .set_file_offset(_bytes_appended)
-                        .set_index_offset(_index_offset)
                         .set_sync_after_complete_task([this, part_num = _cur_part_num](Status s) {
                             bool ret = false;
                             if (!s.ok()) [[unlikely]] {
@@ -282,22 +281,16 @@ Status S3FileWriter::appendv(const Slice* data, size_t data_cnt) {
                             return ret;
                         })
                         .set_is_cancelled([this]() { return _failed.load(); });
-                if (_write_file_cache) {
+                if (_cache_builder != nullptr) {
                     // We would load the data into file cache asynchronously which indicates
                     // that this instance of S3FileWriter might have been destructed when we
                     // try to do writing into file cache, so we make the lambda capture the variable
                     // we need by value to extend their lifetime
                     builder.set_allocate_file_blocks_holder(
-                            [cache = _cache, k = _cache_hash, offset = _bytes_appended,
-                             t = _expiration_time, cold = _is_cold_data]() -> FileBlocksHolderPtr {
-                                CacheContext ctx;
-                                ctx.cache_type =
-                                        t == 0 ? FileCacheType::NORMAL : FileCacheType::TTL;
-                                ctx.expiration_time = t;
-                                ctx.is_cold_data = cold;
-                                auto holder = cache->get_or_set(k, offset,
-                                                                config::s3_write_buffer_size, ctx);
-                                return std::make_unique<FileBlocksHolder>(std::move(holder));
+                            [builder = *_cache_builder,
+                             offset = _bytes_appended]() -> FileBlocksHolderPtr {
+                                return builder.allocate_cache_holder(offset,
+                                                                     config::s3_write_buffer_size);
                             });
                 }
                 RETURN_IF_ERROR(builder.build(&_pending_buf));
diff --git a/be/src/io/fs/s3_file_writer.h b/be/src/io/fs/s3_file_writer.h
index a2c2ec0422a4f1..8f27b202369444 100644
--- a/be/src/io/fs/s3_file_writer.h
+++ b/be/src/io/fs/s3_file_writer.h
@@ -58,6 +58,10 @@ class S3FileWriter final : public FileWriter {
     size_t bytes_appended() const override { return _bytes_appended; }
     bool closed() const override { return _closed; }
 
+    FileCacheAllocatorBuilder* cache_builder() const override {
+        return _cache_builder == nullptr ? nullptr : _cache_builder.get();
+    }
+
 private:
     Status _abort();
     [[nodiscard]] std::string _dump_completed_part() const;
@@ -73,15 +77,12 @@ class S3FileWriter final : public FileWriter {
 
     std::shared_ptr<Aws::S3::S3Client> _client;
     std::string _upload_id;
-    size_t _index_offset {0};
 
     // Current Part Num for CompletedPart
     int _cur_part_num = 1;
     std::mutex _completed_lock;
     std::vector<std::unique_ptr<Aws::S3::Model::CompletedPart>> _completed_parts;
 
-    UInt128Wrapper _cache_hash;
-    BlockFileCache* _cache;
     // **Attention** call add_count() before submitting buf to async thread pool
     bthread::CountdownEvent _countdown_event {0};
 
@@ -92,9 +93,8 @@ class S3FileWriter final : public FileWriter {
     size_t _bytes_appended = 0;
 
     std::shared_ptr<FileBuffer> _pending_buf;
-    uint64_t _expiration_time;
-    bool _is_cold_data;
-    bool _write_file_cache;
+    std::unique_ptr<FileCacheAllocatorBuilder>
+            _cache_builder; // nullptr if disable write file cache
 };
 
 } // namespace io
diff --git a/be/src/io/fs/stream_sink_file_writer.h b/be/src/io/fs/stream_sink_file_writer.h
index 2bd91075ad11a0..4a0eb955c26171 100644
--- a/be/src/io/fs/stream_sink_file_writer.h
+++ b/be/src/io/fs/stream_sink_file_writer.h
@@ -33,6 +33,7 @@ struct RowsetId;
 struct SegmentStatistics;
 
 namespace io {
+struct FileCacheAllocatorBuilder;
 class StreamSinkFileWriter final : public FileWriter {
 public:
     StreamSinkFileWriter(std::vector<std::shared_ptr<LoadStreamStub>> streams)
@@ -57,6 +58,8 @@ class StreamSinkFileWriter final : public FileWriter {
         return dummy;
     }
 
+    FileCacheAllocatorBuilder* cache_builder() const override { return nullptr; }
+
 private:
     std::vector<std::shared_ptr<LoadStreamStub>> _streams;
 
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index f6be1917e57840..7a83496b7fb481 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -34,6 +34,8 @@
 #include "common/status.h"
 #include "gutil/port.h"
 #include "inverted_index_fs_directory.h"
+#include "io/cache/block_file_cache.h"
+#include "io/cache/block_file_cache_factory.h"
 #include "io/fs/file_system.h"
 #include "io/fs/file_writer.h"
 #include "io/fs/local_file_system.h"
@@ -1113,6 +1115,8 @@ Status SegmentWriter::finalize(uint64_t* segment_file_size, uint64_t* index_size
     }
     // write data
     RETURN_IF_ERROR(finalize_columns_data());
+    // Get the index start before finalize_footer since this function would write new data.
+    uint64_t index_start = _file_writer->bytes_appended();
     // write index
     RETURN_IF_ERROR(finalize_columns_index(index_size));
     // write footer
@@ -1122,6 +1126,17 @@ Status SegmentWriter::finalize(uint64_t* segment_file_size, uint64_t* index_size
         LOG(INFO) << "segment flush consumes a lot time_ns " << timer.elapsed_time()
                   << ", segmemt_size " << *segment_file_size;
     }
+    // When the cache type is not ttl(expiration time == 0), the data should be split into normal cache queue
+    // and index cache queue
+    if (auto* cache_builder = _file_writer->cache_builder(); cache_builder != nullptr &&
+                                                             cache_builder->_expiration_time == 0 &&
+                                                             config::is_cloud_mode()) {
+        auto size = *index_size + *segment_file_size;
+        auto holder = cache_builder->allocate_cache_holder(index_start, size);
+        for (auto& segment : holder->file_blocks) {
+            static_cast<void>(segment->change_cache_type_self(io::FileCacheType::INDEX));
+        }
+    }
     return Status::OK();
 }
 
diff --git a/be/test/olap/tablet_cooldown_test.cpp b/be/test/olap/tablet_cooldown_test.cpp
index 45186246006317..49de182610459d 100644
--- a/be/test/olap/tablet_cooldown_test.cpp
+++ b/be/test/olap/tablet_cooldown_test.cpp
@@ -113,6 +113,8 @@ class FileWriterMock final : public io::FileWriter {
 
     const Path& path() const override { return _local_file_writer->path(); }
 
+    io::FileCacheAllocatorBuilder* cache_builder() const override { return nullptr; }
+
 private:
     std::unique_ptr<io::FileWriter> _local_file_writer;
 };

From 6736f02f279a32d5776f3b04b9516cd5b58e76d9 Mon Sep 17 00:00:00 2001
From: zhengyu <freeman.zhang1992@gmail.com>
Date: Thu, 25 Apr 2024 21:14:06 +0800
Subject: [PATCH 028/163] [enhancement](cloud) add bvar to monitor s3
 throughput&QPS (#34087)

Signed-off-by: freemandealer <freeman.zhang1992@gmail.com>
---
 be/src/io/fs/hdfs_file_reader.cpp | 13 +++++++++++++
 be/src/io/fs/s3_file_reader.cpp   |  5 +++++
 2 files changed, 18 insertions(+)

diff --git a/be/src/io/fs/hdfs_file_reader.cpp b/be/src/io/fs/hdfs_file_reader.cpp
index 358663b65d0dad..a26448c90e2ce3 100644
--- a/be/src/io/fs/hdfs_file_reader.cpp
+++ b/be/src/io/fs/hdfs_file_reader.cpp
@@ -24,6 +24,8 @@
 #include <ostream>
 #include <utility>
 
+#include "bvar/latency_recorder.h"
+#include "bvar/reducer.h"
 #include "common/compiler_util.h" // IWYU pragma: keep
 #include "common/logging.h"
 #include "common/sync_point.h"
@@ -33,6 +35,13 @@
 #include "util/doris_metrics.h"
 
 namespace doris::io {
+
+bvar::Adder<uint64_t> hdfs_bytes_read_total("hdfs_file_reader", "bytes_read");
+bvar::LatencyRecorder hdfs_bytes_per_read("hdfs_file_reader", "bytes_per_read"); // also QPS
+bvar::PerSecond<bvar::Adder<uint64_t>> hdfs_read_througthput("hdfs_file_reader",
+                                                             "hdfs_read_throughput",
+                                                             &hdfs_bytes_read_total);
+
 namespace {
 
 Result<FileHandleCache::Accessor> get_file(const hdfsFS& fs, const Path& file, int64_t mtime,
@@ -148,6 +157,8 @@ Status HdfsFileReader::read_at_impl(size_t offset, Slice result, size_t* bytes_r
         has_read += loop_read;
     }
     *bytes_read = has_read;
+    hdfs_bytes_read_total << *bytes_read;
+    hdfs_bytes_per_read << *bytes_read;
     return Status::OK();
 }
 
@@ -206,6 +217,8 @@ Status HdfsFileReader::read_at_impl(size_t offset, Slice result, size_t* bytes_r
         has_read += loop_read;
     }
     *bytes_read = has_read;
+    hdfs_bytes_read_total << *bytes_read;
+    hdfs_bytes_per_read << *bytes_read;
     return Status::OK();
 }
 #endif
diff --git a/be/src/io/fs/s3_file_reader.cpp b/be/src/io/fs/s3_file_reader.cpp
index 2bd40fbbf43861..68acbf47eb18a1 100644
--- a/be/src/io/fs/s3_file_reader.cpp
+++ b/be/src/io/fs/s3_file_reader.cpp
@@ -23,6 +23,7 @@
 #include <aws/s3/S3Errors.h>
 #include <aws/s3/model/GetObjectRequest.h>
 #include <aws/s3/model/GetObjectResult.h>
+#include <bvar/latency_recorder.h>
 #include <bvar/reducer.h>
 #include <fmt/format.h>
 #include <glog/logging.h>
@@ -43,6 +44,9 @@ bvar::Adder<uint64_t> s3_file_reader_read_counter("s3_file_reader", "read_at");
 bvar::Adder<uint64_t> s3_file_reader_total("s3_file_reader", "total_num");
 bvar::Adder<uint64_t> s3_bytes_read_total("s3_file_reader", "bytes_read");
 bvar::Adder<uint64_t> s3_file_being_read("s3_file_reader", "file_being_read");
+bvar::LatencyRecorder s3_bytes_per_read("s3_file_reader", "bytes_per_read"); // also QPS
+bvar::PerSecond<bvar::Adder<uint64_t>> s3_read_througthput("s3_file_reader", "s3_read_throughput",
+                                                           &s3_bytes_read_total);
 
 Result<FileReaderSPtr> S3FileReader::create(std::shared_ptr<const S3ClientHolder> client,
                                             std::string bucket, std::string key,
@@ -125,6 +129,7 @@ Status S3FileReader::read_at_impl(size_t offset, Slice result, size_t* bytes_rea
                                      _path.native(), *bytes_read, bytes_req);
     }
     s3_bytes_read_total << *bytes_read;
+    s3_bytes_per_read << *bytes_read;
     s3_file_reader_read_counter << 1;
     DorisMetrics::instance()->s3_bytes_read_total->increment(*bytes_read);
     return Status::OK();

From eed69d18d8395fd8f27bbb9278c0009bb0c2ea79 Mon Sep 17 00:00:00 2001
From: walter <w41ter.l@gmail.com>
Date: Thu, 25 Apr 2024 21:15:35 +0800
Subject: [PATCH 029/163] [feature](merge-cloud) Add txn kv stats API (#32823)

---
 cloud/src/meta-service/mem_txn_kv.cpp       | 24 +++++++++++-
 cloud/src/meta-service/mem_txn_kv.h         | 16 ++++++++
 cloud/src/meta-service/meta_service_txn.cpp | 30 ++-------------
 cloud/src/meta-service/txn_kv.cpp           | 25 +++++++++++-
 cloud/src/meta-service/txn_kv.h             | 42 ++++++++++++++++++++-
 5 files changed, 107 insertions(+), 30 deletions(-)

diff --git a/cloud/src/meta-service/mem_txn_kv.cpp b/cloud/src/meta-service/mem_txn_kv.cpp
index e10f5dd3d3ed13..9f817fa4398b3e 100644
--- a/cloud/src/meta-service/mem_txn_kv.cpp
+++ b/cloud/src/meta-service/mem_txn_kv.cpp
@@ -38,7 +38,7 @@ int MemTxnKv::init() {
 }
 
 TxnErrorCode MemTxnKv::create_txn(std::unique_ptr<Transaction>* txn) {
-    auto t = new memkv::Transaction(this->shared_from_this());
+    auto* t = new memkv::Transaction(this->shared_from_this());
     txn->reset(t);
     return TxnErrorCode::TXN_OK;
 }
@@ -241,6 +241,9 @@ void Transaction::put(std::string_view key, std::string_view val) {
     std::string v(val.data(), val.size());
     writes_.insert_or_assign(k, v);
     op_list_.emplace_back(ModifyOpType::PUT, k, v);
+    ++num_put_keys_;
+    put_bytes_ += key.size() + val.size();
+    approximate_bytes_ += key.size() + val.size();
 }
 
 TxnErrorCode Transaction::get(std::string_view key, std::string* val, bool snapshot) {
@@ -345,6 +348,10 @@ void Transaction::atomic_set_ver_key(std::string_view key_prefix, std::string_vi
     std::string v(val.data(), val.size());
     unreadable_keys_.insert(k);
     op_list_.emplace_back(ModifyOpType::ATOMIC_SET_VER_KEY, k, v);
+
+    ++num_put_keys_;
+    put_bytes_ += key_prefix.size() + val.size();
+    approximate_bytes_ += key_prefix.size() + val.size();
 }
 
 void Transaction::atomic_set_ver_value(std::string_view key, std::string_view value) {
@@ -353,6 +360,10 @@ void Transaction::atomic_set_ver_value(std::string_view key, std::string_view va
     std::string v(value.data(), value.size());
     unreadable_keys_.insert(k);
     op_list_.emplace_back(ModifyOpType::ATOMIC_SET_VER_VAL, k, v);
+
+    ++num_put_keys_;
+    put_bytes_ += key.size() + value.size();
+    approximate_bytes_ += key.size() + value.size();
 }
 
 void Transaction::atomic_add(std::string_view key, int64_t to_add) {
@@ -361,6 +372,10 @@ void Transaction::atomic_add(std::string_view key, int64_t to_add) {
     memcpy(v.data(), &to_add, sizeof(to_add));
     std::lock_guard<std::mutex> l(lock_);
     op_list_.emplace_back(ModifyOpType::ATOMIC_ADD, std::move(k), std::move(v));
+
+    ++num_put_keys_;
+    put_bytes_ += key.size() + 8;
+    approximate_bytes_ += key.size() + 8;
 }
 
 void Transaction::remove(std::string_view key) {
@@ -371,6 +386,10 @@ void Transaction::remove(std::string_view key) {
     end_key.push_back(0x0);
     remove_ranges_.emplace_back(k, end_key);
     op_list_.emplace_back(ModifyOpType::REMOVE, k, "");
+
+    ++num_del_keys_;
+    delete_bytes_ += key.size();
+    approximate_bytes_ += key.size();
 }
 
 void Transaction::remove(std::string_view begin, std::string_view end) {
@@ -387,6 +406,9 @@ void Transaction::remove(std::string_view begin, std::string_view end) {
         remove_ranges_.emplace_back(begin_k, end_k);
         op_list_.emplace_back(ModifyOpType::REMOVE_RANGE, begin_k, end_k);
     }
+    ++num_del_keys_;
+    delete_bytes_ += begin.size() + end.size();
+    approximate_bytes_ += begin.size() + end.size();
 }
 
 TxnErrorCode Transaction::commit() {
diff --git a/cloud/src/meta-service/mem_txn_kv.h b/cloud/src/meta-service/mem_txn_kv.h
index e385c414bc5e69..359a8dcafc84a1 100644
--- a/cloud/src/meta-service/mem_txn_kv.h
+++ b/cloud/src/meta-service/mem_txn_kv.h
@@ -182,6 +182,16 @@ class Transaction : public cloud::Transaction {
                            const std::vector<std::string>& keys,
                            const BatchGetOptions& opts = BatchGetOptions()) override;
 
+    size_t approximate_bytes() const override { return approximate_bytes_; }
+
+    size_t num_del_keys() const override { return num_del_keys_; }
+
+    size_t num_put_keys() const override { return num_put_keys_; }
+
+    size_t delete_bytes() const override { return delete_bytes_; }
+
+    size_t put_bytes() const override { return put_bytes_; }
+
 private:
     TxnErrorCode inner_get(const std::string& key, std::string* val, bool snapshot);
 
@@ -201,6 +211,12 @@ class Transaction : public cloud::Transaction {
 
     int64_t committed_version_ = -1;
     int64_t read_version_ = -1;
+
+    size_t approximate_bytes_ {0};
+    size_t num_del_keys_ {0};
+    size_t num_put_keys_ {0};
+    size_t delete_bytes_ {0};
+    size_t put_bytes_ {0};
 };
 
 class RangeGetIterator : public cloud::RangeGetIterator {
diff --git a/cloud/src/meta-service/meta_service_txn.cpp b/cloud/src/meta-service/meta_service_txn.cpp
index 0afdc31d10a095..b80f9f8619c9d8 100644
--- a/cloud/src/meta-service/meta_service_txn.cpp
+++ b/cloud/src/meta-service/meta_service_txn.cpp
@@ -789,10 +789,6 @@ void MetaServiceImpl::commit_txn(::google::protobuf::RpcController* controller,
         return;
     }
 
-    int64_t put_size = 0;
-    int64_t del_size = 0;
-    int num_put_keys = 0, num_del_keys = 0;
-
     // Get txn info with db_id and txn_id
     std::string info_val; // Will be reused when saving updated txn
     const std::string info_key = txn_info_key({instance_id, db_id, txn_id});
@@ -1039,17 +1035,14 @@ void MetaServiceImpl::commit_txn(::google::protobuf::RpcController* controller,
     lock_values.clear();
 
     // Save rowset meta
-    num_put_keys += rowsets.size();
     for (auto& i : rowsets) {
         size_t rowset_size = i.first.size() + i.second.size();
         txn->put(i.first, i.second);
-        put_size += rowset_size;
         LOG(INFO) << "xxx put rowset_key=" << hex(i.first) << " txn_id=" << txn_id
                   << " rowset_size=" << rowset_size;
     }
 
     // Save versions
-    num_put_keys += new_versions.size();
     for (auto& i : new_versions) {
         std::string ver_val;
         VersionPB version_pb;
@@ -1062,7 +1055,6 @@ void MetaServiceImpl::commit_txn(::google::protobuf::RpcController* controller,
         }
 
         txn->put(i.first, ver_val);
-        put_size += i.first.size() + ver_val.size();
         LOG(INFO) << "xxx put partition_version_key=" << hex(i.first) << " version:" << i.second
                   << " txn_id=" << txn_id;
 
@@ -1090,11 +1082,9 @@ void MetaServiceImpl::commit_txn(::google::protobuf::RpcController* controller,
     }
 
     // Save table versions
-    num_put_keys += table_id_tablet_ids.size();
     for (auto& i : table_id_tablet_ids) {
         std::string ver_key = table_version_key({instance_id, db_id, i.first});
         txn->atomic_add(ver_key, 1);
-        put_size += ver_key.size();
         LOG(INFO) << "xxx atomic add table_version_key=" << hex(ver_key) << " txn_id=" << txn_id;
     }
 
@@ -1126,8 +1116,6 @@ void MetaServiceImpl::commit_txn(::google::protobuf::RpcController* controller,
         return;
     }
     txn->put(info_key, info_val);
-    put_size += info_key.size() + info_val.size();
-    ++num_put_keys;
     LOG(INFO) << "xxx put info_key=" << hex(info_key) << " txn_id=" << txn_id;
 
     // Update stats of affected tablet
@@ -1145,14 +1133,10 @@ void MetaServiceImpl::commit_txn(::google::protobuf::RpcController* controller,
                 auto& num_segs_key = kv_pool.emplace_back();
                 stats_tablet_num_segs_key(info, &num_segs_key);
                 txn->atomic_add(num_segs_key, stats.num_segs);
-                put_size += data_size_key.size() + num_rows_key.size() + num_segs_key.size() + 24;
-                num_put_keys += 3;
             }
             auto& num_rowsets_key = kv_pool.emplace_back();
             stats_tablet_num_rowsets_key(info, &num_rowsets_key);
             txn->atomic_add(num_rowsets_key, stats.num_rowsets);
-            put_size += num_rowsets_key.size() + 8;
-            ++num_put_keys;
         };
     } else {
         update_tablet_stats = [&](const StatsTabletKeyInfo& info, const TabletStats& stats) {
@@ -1179,8 +1163,6 @@ void MetaServiceImpl::commit_txn(::google::protobuf::RpcController* controller,
             stats_pb.set_num_segments(stats_pb.num_segments() + stats.num_segs);
             stats_pb.SerializeToString(&val);
             txn->put(key, val);
-            put_size += key.size() + val.size();
-            ++num_put_keys;
         };
     }
     for (auto& [tablet_id, stats] : tablet_stats) {
@@ -1192,18 +1174,14 @@ void MetaServiceImpl::commit_txn(::google::protobuf::RpcController* controller,
         if (code != MetaServiceCode::OK) return;
     }
     // Remove tmp rowset meta
-    num_del_keys += tmp_rowsets_meta.size();
     for (auto& [k, _] : tmp_rowsets_meta) {
         txn->remove(k);
-        del_size += k.size();
         LOG(INFO) << "xxx remove tmp_rowset_key=" << hex(k) << " txn_id=" << txn_id;
     }
 
     const std::string running_key = txn_running_key({instance_id, db_id, txn_id});
     LOG(INFO) << "xxx remove running_key=" << hex(running_key) << " txn_id=" << txn_id;
     txn->remove(running_key);
-    del_size += running_key.size();
-    ++num_del_keys;
 
     std::string recycle_val;
     std::string recycle_key = recycle_txn_key({instance_id, db_id, txn_id});
@@ -1218,8 +1196,6 @@ void MetaServiceImpl::commit_txn(::google::protobuf::RpcController* controller,
         return;
     }
     txn->put(recycle_key, recycle_val);
-    put_size += recycle_key.size() + recycle_val.size();
-    ++num_put_keys;
 
     if (txn_info.load_job_source_type() ==
         LoadJobSourceTypePB::LOAD_JOB_SRC_TYPE_ROUTINE_LOAD_TASK) {
@@ -1227,9 +1203,9 @@ void MetaServiceImpl::commit_txn(::google::protobuf::RpcController* controller,
     }
 
     LOG(INFO) << "xxx commit_txn put recycle_key key=" << hex(recycle_key) << " txn_id=" << txn_id;
-    LOG(INFO) << "commit_txn put_size=" << put_size << " del_size=" << del_size
-              << " num_put_keys=" << num_put_keys << " num_del_keys=" << num_del_keys
-              << " txn_id=" << txn_id;
+    LOG(INFO) << "commit_txn put_size=" << txn->put_bytes() << " del_size=" << txn->delete_bytes()
+              << " num_put_keys=" << txn->num_put_keys() << " num_del_keys=" << txn->num_del_keys()
+              << " txn_size=" << txn->approximate_bytes() << " txn_id=" << txn_id;
 
     // Finally we are done...
     err = txn->commit();
diff --git a/cloud/src/meta-service/txn_kv.cpp b/cloud/src/meta-service/txn_kv.cpp
index 5afe0edb40965f..013d21f84fcd58 100644
--- a/cloud/src/meta-service/txn_kv.cpp
+++ b/cloud/src/meta-service/txn_kv.cpp
@@ -254,6 +254,10 @@ void Transaction::put(std::string_view key, std::string_view val) {
     StopWatch sw;
     fdb_transaction_set(txn_, (uint8_t*)key.data(), key.size(), (uint8_t*)val.data(), val.size());
     g_bvar_txn_kv_put << sw.elapsed_us();
+
+    ++num_put_keys_;
+    put_bytes_ += key.size() + val.size();
+    approximate_bytes_ = key.size() * 3 + val.size(); // See fdbclient/ReadYourWrites.actor.cpp
 }
 
 // return 0 for success otherwise error
@@ -290,6 +294,7 @@ static TxnErrorCode await_future(FDBFuture* fut) {
 
 TxnErrorCode Transaction::get(std::string_view key, std::string* val, bool snapshot) {
     StopWatch sw;
+    approximate_bytes_ += key.size() * 2; // See fdbclient/ReadYourWrites.actor.cpp for details
     auto* fut = fdb_transaction_get(txn_, (uint8_t*)key.data(), key.size(), snapshot);
 
     auto release_fut = [fut, &sw](int*) {
@@ -328,6 +333,7 @@ TxnErrorCode Transaction::get(std::string_view begin, std::string_view end,
                               std::unique_ptr<cloud::RangeGetIterator>* iter, bool snapshot,
                               int limit) {
     StopWatch sw;
+    approximate_bytes_ += begin.size() + end.size();
     std::unique_ptr<int, std::function<void(int*)>> defer(
             (int*)0x01, [&sw](int*) { g_bvar_txn_kv_range_get << sw.elapsed_us(); });
 
@@ -368,6 +374,9 @@ void Transaction::atomic_set_ver_key(std::string_view key_prefix, std::string_vi
                               FDBMutationType::FDB_MUTATION_TYPE_SET_VERSIONSTAMPED_KEY);
 
     g_bvar_txn_kv_atomic_set_ver_key << sw.elapsed_us();
+    ++num_put_keys_;
+    put_bytes_ += key_prefix.size() + val.size();
+    approximate_bytes_ += key_prefix.size() * 3 + val.size();
 }
 
 void Transaction::atomic_set_ver_value(std::string_view key, std::string_view value) {
@@ -384,6 +393,9 @@ void Transaction::atomic_set_ver_value(std::string_view key, std::string_view va
                               FDBMutationType::FDB_MUTATION_TYPE_SET_VERSIONSTAMPED_VALUE);
 
     g_bvar_txn_kv_atomic_set_ver_value << sw.elapsed_us();
+    ++num_put_keys_;
+    put_bytes_ += key.size() + value.size();
+    approximate_bytes_ += key.size() * 3 + value.size();
 }
 
 void Transaction::atomic_add(std::string_view key, int64_t to_add) {
@@ -394,12 +406,18 @@ void Transaction::atomic_add(std::string_view key, int64_t to_add) {
                               sizeof(to_add), FDBMutationType::FDB_MUTATION_TYPE_ADD);
 
     g_bvar_txn_kv_atomic_add << sw.elapsed_us();
+    ++num_put_keys_;
+    put_bytes_ += key.size() + 8;
+    approximate_bytes_ += key.size() * 3 + 8;
 }
 
 void Transaction::remove(std::string_view key) {
     StopWatch sw;
     fdb_transaction_clear(txn_, (uint8_t*)key.data(), key.size());
     g_bvar_txn_kv_remove << sw.elapsed_us();
+    ++num_del_keys_;
+    delete_bytes_ += key.size();
+    approximate_bytes_ += key.size() * 4; // See fdbclient/ReadYourWrites.actor.cpp for details.
 }
 
 void Transaction::remove(std::string_view begin, std::string_view end) {
@@ -407,6 +425,10 @@ void Transaction::remove(std::string_view begin, std::string_view end) {
     fdb_transaction_clear_range(txn_, (uint8_t*)begin.data(), begin.size(), (uint8_t*)end.data(),
                                 end.size());
     g_bvar_txn_kv_range_remove << sw.elapsed_us();
+    num_del_keys_ += 2;
+    delete_bytes_ += begin.size() + end.size();
+    approximate_bytes_ +=
+            (begin.size() + end.size()) * 2; // See fdbclient/ReadYourWrites.actor.cpp for details.
 }
 
 TxnErrorCode Transaction::commit() {
@@ -435,7 +457,7 @@ TxnErrorCode Transaction::commit() {
 
 TxnErrorCode Transaction::get_read_version(int64_t* version) {
     StopWatch sw;
-    auto fut = fdb_transaction_get_read_version(txn_);
+    auto* fut = fdb_transaction_get_read_version(txn_);
     std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [fut, &sw](...) {
         fdb_future_destroy(fut);
         g_bvar_txn_kv_get_read_version << sw.elapsed_us();
@@ -512,6 +534,7 @@ TxnErrorCode Transaction::batch_get(std::vector<std::optional<std::string>>* res
             const auto& k = keys[j];
             futures.emplace_back(
                     fdb_transaction_get(txn_, (uint8_t*)k.data(), k.size(), opts.snapshot));
+            approximate_bytes_ += k.size() * 2;
         }
 
         size_t num_futures = futures.size();
diff --git a/cloud/src/meta-service/txn_kv.h b/cloud/src/meta-service/txn_kv.h
index 12ee38823f943a..0fc6d160bf94fc 100644
--- a/cloud/src/meta-service/txn_kv.h
+++ b/cloud/src/meta-service/txn_kv.h
@@ -20,7 +20,6 @@
 #include <foundationdb/fdb_c.h>
 #include <foundationdb/fdb_c_options.g.h>
 
-#include <iostream>
 #include <memory>
 #include <optional>
 #include <string>
@@ -171,6 +170,31 @@ class Transaction {
     virtual TxnErrorCode batch_get(std::vector<std::optional<std::string>>* res,
                                    const std::vector<std::string>& keys,
                                    const BatchGetOptions& opts = BatchGetOptions()) = 0;
+
+    /**
+     * @brief return the approximate bytes consumed by the underlying transaction buffer.
+     **/
+    virtual size_t approximate_bytes() const = 0;
+
+    /**
+     * @brief return the num delete keys submitted to this txn.
+     **/
+    virtual size_t num_del_keys() const = 0;
+
+    /**
+     * @brief return the num put keys submitted to this txn.
+     **/
+    virtual size_t num_put_keys() const = 0;
+
+    /**
+     * @brief return the bytes of the delete keys consumed.
+     **/
+    virtual size_t delete_bytes() const = 0;
+
+    /**
+     * @brief return the bytes of the put key and values consumed.
+     **/
+    virtual size_t put_bytes() const = 0;
 };
 
 class RangeGetIterator {
@@ -460,11 +484,27 @@ class Transaction : public cloud::Transaction {
                            const std::vector<std::string>& keys,
                            const BatchGetOptions& opts = BatchGetOptions()) override;
 
+    size_t approximate_bytes() const override { return approximate_bytes_; }
+
+    size_t num_del_keys() const override { return num_del_keys_; }
+
+    size_t num_put_keys() const override { return num_put_keys_; }
+
+    size_t delete_bytes() const override { return delete_bytes_; }
+
+    size_t put_bytes() const override { return put_bytes_; }
+
 private:
     std::shared_ptr<Database> db_ {nullptr};
     bool commited_ = false;
     bool aborted_ = false;
     FDBTransaction* txn_ = nullptr;
+
+    size_t num_del_keys_ {0};
+    size_t num_put_keys_ {0};
+    size_t delete_bytes_ {0};
+    size_t put_bytes_ {0};
+    size_t approximate_bytes_ {0};
 };
 
 } // namespace fdb

From 152c586b13de34e8f842780e5239e17de25e74ec Mon Sep 17 00:00:00 2001
From: zxealous <zhouchangyue@baidu.com>
Date: Thu, 25 Apr 2024 21:32:16 +0800
Subject: [PATCH 030/163] [improve](disk) Not add disk path to broken list if
 check status is not IO_ERROR (#34111)

---
 be/src/olap/data_dir.cpp      | 2 +-
 be/src/service/doris_main.cpp | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp
index d1b5185b7894ce..da7302ef605445 100644
--- a/be/src/olap/data_dir.cpp
+++ b/be/src/olap/data_dir.cpp
@@ -231,7 +231,7 @@ void DataDir::health_check() {
     // check disk
     if (_is_used) {
         Status res = _read_and_write_test_file();
-        if (!res) {
+        if (!res && res.is<IO_ERROR>()) {
             LOG(WARNING) << "store read/write test file occur IO Error. path=" << _path
                          << ", err: " << res;
             _engine.add_broken_path(_path);
diff --git a/be/src/service/doris_main.cpp b/be/src/service/doris_main.cpp
index 731e09c6be9fc2..98712a8168be5a 100644
--- a/be/src/service/doris_main.cpp
+++ b/be/src/service/doris_main.cpp
@@ -439,6 +439,7 @@ int main(int argc, char** argv) {
                 it = paths.erase(it);
             } else {
                 LOG(ERROR) << "read write test file failed, path=" << it->path;
+                // if only one disk and the disk is full, also need exit because rocksdb will open failed
                 exit(-1);
             }
         } else {

From 98e9937c9e829228ff1ca6581cdb7b2d260d7260 Mon Sep 17 00:00:00 2001
From: seawinde <149132972+seawinde@users.noreply.github.com>
Date: Thu, 25 Apr 2024 22:11:14 +0800
Subject: [PATCH 031/163] [improvement](mtmv)  Optimize the performance of
 nested materialized view rewriting (#34127)

Optimize the performance of nested materialized view rewriting gracefully, future performance optimzie base on this.
---
 .../org/apache/doris/nereids/memo/Memo.java   | 17 +++-
 .../doris/nereids/memo/StructInfoMap.java     | 86 ++++++++++---------
 .../mv/AbstractMaterializedViewRule.java      |  1 +
 .../exploration/mv/MaterializedViewUtils.java | 13 ++-
 .../doris/nereids/memo/StructInfoMapTest.java | 20 ++---
 5 files changed, 77 insertions(+), 60 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java
index db12c02e79a185..8793cb5be51f75 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java
@@ -17,6 +17,7 @@
 
 package org.apache.doris.nereids.memo;
 
+import org.apache.doris.catalog.MTMV;
 import org.apache.doris.common.IdGenerator;
 import org.apache.doris.common.Pair;
 import org.apache.doris.nereids.cost.Cost;
@@ -33,6 +34,7 @@
 import org.apache.doris.nereids.trees.plans.GroupPlan;
 import org.apache.doris.nereids.trees.plans.LeafPlan;
 import org.apache.doris.nereids.trees.plans.Plan;
+import org.apache.doris.nereids.trees.plans.algebra.CatalogRelation;
 import org.apache.doris.nereids.trees.plans.algebra.SetOperation;
 import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
 import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
@@ -55,6 +57,7 @@
 import java.util.Optional;
 import java.util.PriorityQueue;
 import java.util.Set;
+import java.util.concurrent.atomic.AtomicLong;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import javax.annotation.Nullable;
@@ -69,6 +72,8 @@ public class Memo {
             EventChannel.getDefaultChannel().addConsumers(new LogConsumer(GroupMergeEvent.class, EventChannel.LOG)));
     private static long stateId = 0;
     private final ConnectContext connectContext;
+    private final Set<Long> needRefreshTableIdSet = new HashSet<>();
+    private final AtomicLong refreshVersion = new AtomicLong(1);
     private final IdGenerator<GroupId> groupIdGenerator = GroupId.createGenerator();
     private final Map<GroupId, Group> groups = Maps.newLinkedHashMap();
     // we could not use Set, because Set does not have get method.
@@ -118,6 +123,10 @@ public int getGroupExpressionsSize() {
         return groupExpressions.size();
     }
 
+    public long getRefreshVersion() {
+        return refreshVersion.get();
+    }
+
     private Plan skipProject(Plan plan, Group targetGroup) {
         // Some top project can't be eliminated
         if (plan instanceof LogicalProject && ((LogicalProject<?>) plan).canEliminate()) {
@@ -406,14 +415,15 @@ private CopyInResult doCopyIn(Plan plan, @Nullable Group targetGroup, @Nullable
                     plan.getLogicalProperties(), targetGroup.getLogicalProperties());
             throw new IllegalStateException("Insert a plan into targetGroup but differ in logicalproperties");
         }
+        // TODO Support sync materialized view in the future
+        if (plan instanceof CatalogRelation && ((CatalogRelation) plan).getTable() instanceof MTMV) {
+            refreshVersion.incrementAndGet();
+        }
         Optional<GroupExpression> groupExpr = plan.getGroupExpression();
         if (groupExpr.isPresent()) {
             Preconditions.checkState(groupExpressions.containsKey(groupExpr.get()));
             return CopyInResult.of(false, groupExpr.get());
         }
-        if (targetGroup != null) {
-            targetGroup.getstructInfoMap().setRefreshed(false);
-        }
         List<Group> childrenGroups = Lists.newArrayList();
         for (int i = 0; i < plan.children().size(); i++) {
             // skip useless project.
@@ -562,7 +572,6 @@ public void mergeGroup(Group source, Group destination, HashMap<Long, Group> pla
         if (source == root) {
             root = destination;
         }
-        destination.getstructInfoMap().setRefreshed(false);
         groups.remove(source.getGroupId());
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/StructInfoMap.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/StructInfoMap.java
index 4119c6f2f89967..efa2bef1792417 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/StructInfoMap.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/StructInfoMap.java
@@ -42,31 +42,34 @@
 public class StructInfoMap {
     private final Map<BitSet, Pair<GroupExpression, List<BitSet>>> groupExpressionMap = new HashMap<>();
     private final Map<BitSet, StructInfo> infoMap = new HashMap<>();
-    private boolean refreshed;
+    private long refreshVersion = 0;
 
     /**
      * get struct info according to table map
      *
-     * @param mvTableMap the original table map
+     * @param tableMap the original table map
      * @param foldTableMap the fold table map
      * @param group the group that the mv matched
      * @return struct info or null if not found
      */
-    public @Nullable StructInfo getStructInfo(BitSet mvTableMap, BitSet foldTableMap, Group group, Plan originPlan) {
-        if (!infoMap.containsKey(mvTableMap)) {
-            if ((groupExpressionMap.containsKey(foldTableMap) || groupExpressionMap.isEmpty())
-                    && !groupExpressionMap.containsKey(mvTableMap)) {
-                refresh(group);
-            }
-            if (groupExpressionMap.containsKey(mvTableMap)) {
-                Pair<GroupExpression, List<BitSet>> groupExpressionBitSetPair = getGroupExpressionWithChildren(
-                        mvTableMap);
-                StructInfo structInfo = constructStructInfo(groupExpressionBitSetPair.first,
-                        groupExpressionBitSetPair.second, mvTableMap, originPlan);
-                infoMap.put(mvTableMap, structInfo);
-            }
+    public @Nullable StructInfo getStructInfo(Memo memo, BitSet tableMap, BitSet foldTableMap,
+            Group group, Plan originPlan) {
+        StructInfo structInfo = infoMap.get(tableMap);
+        if (structInfo != null) {
+            return structInfo;
+        }
+        if (groupExpressionMap.isEmpty() || !groupExpressionMap.containsKey(tableMap)) {
+            refresh(group, memo.getRefreshVersion(), foldTableMap);
+            group.getstructInfoMap().setRefreshVersion(memo.getRefreshVersion());
         }
-        return infoMap.get(mvTableMap);
+        if (groupExpressionMap.containsKey(tableMap)) {
+            Pair<GroupExpression, List<BitSet>> groupExpressionBitSetPair = getGroupExpressionWithChildren(
+                    tableMap);
+            structInfo = constructStructInfo(groupExpressionBitSetPair.first,
+                    groupExpressionBitSetPair.second, tableMap, originPlan);
+            infoMap.put(tableMap, structInfo);
+        }
+        return structInfo;
     }
 
     public Set<BitSet> getTableMaps() {
@@ -81,12 +84,12 @@ public Pair<GroupExpression, List<BitSet>> getGroupExpressionWithChildren(BitSet
         return groupExpressionMap.get(tableMap);
     }
 
-    public boolean isRefreshed() {
-        return refreshed;
+    public long getRefreshVersion() {
+        return refreshVersion;
     }
 
-    public void setRefreshed(boolean refreshed) {
-        this.refreshed = refreshed;
+    public void setRefreshVersion(long refreshVersion) {
+        this.refreshVersion = refreshVersion;
     }
 
     private StructInfo constructStructInfo(GroupExpression groupExpression, List<BitSet> children,
@@ -114,27 +117,24 @@ private Plan constructPlan(GroupExpression groupExpression, List<BitSet> childre
      *
      * @param group the root group
      *
-     * @return whether groupExpressionMap is updated
      */
-    public boolean refresh(Group group) {
-        Set<Group> refreshedGroup = new HashSet<>();
-        int originSize = groupExpressionMap.size();
+    public void refresh(Group group, long refreshVersion, BitSet targetBitSet) {
+        Set<Integer> refreshedGroup = new HashSet<>();
         for (GroupExpression groupExpression : group.getLogicalExpressions()) {
-            List<Set<BitSet>> childrenTableMap = new ArrayList<>();
-            boolean needRefresh = groupExpressionMap.isEmpty();
+            List<Set<BitSet>> childrenTableMap = new LinkedList<>();
             if (groupExpression.children().isEmpty()) {
                 BitSet leaf = constructLeaf(groupExpression);
-                groupExpressionMap.put(leaf, Pair.of(groupExpression, new ArrayList<>()));
+                groupExpressionMap.put(leaf, Pair.of(groupExpression, new LinkedList<>()));
                 continue;
             }
-
             for (Group child : groupExpression.children()) {
-                if (!refreshedGroup.contains(child) && !child.getstructInfoMap().isRefreshed()) {
-                    StructInfoMap childStructInfoMap = child.getstructInfoMap();
-                    needRefresh |= childStructInfoMap.refresh(child);
-                    childStructInfoMap.setRefreshed(true);
+                StructInfoMap childStructInfoMap = child.getstructInfoMap();
+                if (!refreshedGroup.contains(child.getGroupId().asInt())
+                        && refreshVersion != childStructInfoMap.getRefreshVersion()) {
+                    childStructInfoMap.refresh(child, refreshVersion, targetBitSet);
+                    childStructInfoMap.setRefreshVersion(refreshVersion);
                 }
-                refreshedGroup.add(child);
+                refreshedGroup.add(child.getGroupId().asInt());
                 childrenTableMap.add(child.getstructInfoMap().getTableMaps());
             }
             // if one same groupExpression have refreshed, continue
@@ -150,15 +150,14 @@ public boolean refresh(Group group) {
             }
             // if cumulative child table map is different from current
             // or current group expression map is empty, should update the groupExpressionMap currently
-            Collection<Pair<BitSet, List<BitSet>>> bitSetWithChildren = cartesianProduct(childrenTableMap);
-            if (needRefresh) {
-                for (Pair<BitSet, List<BitSet>> bitSetWithChild : bitSetWithChildren) {
-                    groupExpressionMap.putIfAbsent(bitSetWithChild.first,
-                            Pair.of(groupExpression, bitSetWithChild.second));
-                }
+            Collection<Pair<BitSet, List<BitSet>>> bitSetWithChildren = cartesianProduct(childrenTableMap,
+                    new BitSet());
+            for (Pair<BitSet, List<BitSet>> bitSetWithChild : bitSetWithChildren) {
+                groupExpressionMap.putIfAbsent(bitSetWithChild.first,
+                        Pair.of(groupExpression, bitSetWithChild.second));
             }
+
         }
-        return originSize != groupExpressionMap.size();
     }
 
     private BitSet constructLeaf(GroupExpression groupExpression) {
@@ -172,7 +171,8 @@ private BitSet constructLeaf(GroupExpression groupExpression) {
         return tableMap;
     }
 
-    private Collection<Pair<BitSet, List<BitSet>>> cartesianProduct(List<Set<BitSet>> childrenTableMap) {
+    private Collection<Pair<BitSet, List<BitSet>>> cartesianProduct(List<Set<BitSet>> childrenTableMap,
+            BitSet targetBitSet) {
         Set<List<BitSet>> cartesianLists = Sets.cartesianProduct(childrenTableMap);
         List<Pair<BitSet, List<BitSet>>> resultPairSet = new LinkedList<>();
         for (List<BitSet> bitSetList : cartesianLists) {
@@ -180,6 +180,10 @@ private Collection<Pair<BitSet, List<BitSet>>> cartesianProduct(List<Set<BitSet>
             for (BitSet b : bitSetList) {
                 bitSet.or(b);
             }
+            // filter the useless bitset which targetBitSet not contains, avoid exponential expansion
+            if (!targetBitSet.isEmpty() && !StructInfo.containsAll(targetBitSet, bitSet)) {
+                continue;
+            }
             resultPairSet.add(Pair.of(bitSet, bitSetList));
         }
         return resultPairSet;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java
index 6bf47f00c359c0..3405942c3a8817 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java
@@ -142,6 +142,7 @@ public List<Plan> rewrite(Plan queryPlan, CascadesContext cascadesContext) {
     protected List<StructInfo> getValidQueryStructInfos(Plan queryPlan, CascadesContext cascadesContext,
             BitSet materializedViewTableSet) {
         List<StructInfo> validStructInfos = new ArrayList<>();
+        // For every materialized view we should trigger refreshing struct info map
         List<StructInfo> uncheckedStructInfos = MaterializedViewUtils.extractStructInfo(queryPlan, cascadesContext,
                 materializedViewTableSet);
         uncheckedStructInfos.forEach(queryStructInfo -> {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java
index 46d0adde06e978..5f7dc419eafd22 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java
@@ -148,16 +148,23 @@ public static List<StructInfo> extractStructInfo(Plan plan, CascadesContext casc
         if (plan.getGroupExpression().isPresent()) {
             Group ownerGroup = plan.getGroupExpression().get().getOwnerGroup();
             StructInfoMap structInfoMap = ownerGroup.getstructInfoMap();
-            structInfoMap.refresh(ownerGroup);
+            if (cascadesContext.getMemo().getRefreshVersion() != structInfoMap.getRefreshVersion()
+                    || structInfoMap.getTableMaps().isEmpty()) {
+                structInfoMap.refresh(ownerGroup, cascadesContext.getMemo().getRefreshVersion(),
+                        materializedViewTableSet);
+                structInfoMap.setRefreshVersion(cascadesContext.getMemo().getRefreshVersion());
+            }
             Set<BitSet> queryTableSets = structInfoMap.getTableMaps();
             ImmutableList.Builder<StructInfo> structInfosBuilder = ImmutableList.builder();
             if (!queryTableSets.isEmpty()) {
                 for (BitSet queryTableSet : queryTableSets) {
+                    // TODO As only support MatchMode.COMPLETE, so only get equaled query table struct info
                     if (!materializedViewTableSet.isEmpty()
-                            && !StructInfo.containsAll(materializedViewTableSet, queryTableSet)) {
+                            && !materializedViewTableSet.equals(queryTableSet)) {
                         continue;
                     }
-                    StructInfo structInfo = structInfoMap.getStructInfo(queryTableSet, queryTableSet, ownerGroup, plan);
+                    StructInfo structInfo = structInfoMap.getStructInfo(cascadesContext.getMemo(),
+                            queryTableSet, queryTableSet, ownerGroup, plan);
                     if (structInfo != null) {
                         structInfosBuilder.add(structInfo);
                     }
diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/StructInfoMapTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/StructInfoMapTest.java
index 13bdf35252edd2..9192f86cf3bb41 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/StructInfoMapTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/StructInfoMapTest.java
@@ -50,7 +50,7 @@ void testTableMap() throws Exception {
         Group root = c1.getMemo().getRoot();
         Set<BitSet> tableMaps = root.getstructInfoMap().getTableMaps();
         Assertions.assertTrue(tableMaps.isEmpty());
-        root.getstructInfoMap().refresh(root);
+        root.getstructInfoMap().refresh(root, 1, new BitSet());
         Assertions.assertEquals(1, tableMaps.size());
         new MockUp<MTMVRelationManager>() {
             @Mock
@@ -76,7 +76,7 @@ public boolean isMVPartitionValid(MTMV mtmv, ConnectContext ctx) {
                 .optimize()
                 .printlnBestPlanTree();
         root = c1.getMemo().getRoot();
-        root.getstructInfoMap().refresh(root);
+        root.getstructInfoMap().refresh(root, 1, new BitSet());
         tableMaps = root.getstructInfoMap().getTableMaps();
         Assertions.assertEquals(2, tableMaps.size());
         dropMvByNereids("drop materialized view mv1");
@@ -97,10 +97,8 @@ void testLazyRefresh() throws Exception {
         Group root = c1.getMemo().getRoot();
         Set<BitSet> tableMaps = root.getstructInfoMap().getTableMaps();
         Assertions.assertTrue(tableMaps.isEmpty());
-        boolean refreshed = root.getstructInfoMap().refresh(root);
-        Assertions.assertTrue(refreshed);
-        refreshed = root.getstructInfoMap().refresh(root);
-        Assertions.assertFalse(refreshed);
+        root.getstructInfoMap().refresh(root, 1, new BitSet());
+        root.getstructInfoMap().refresh(root, 1, new BitSet());
         Assertions.assertEquals(1, tableMaps.size());
         new MockUp<MTMVRelationManager>() {
             @Mock
@@ -126,10 +124,8 @@ public boolean isMVPartitionValid(MTMV mtmv, ConnectContext ctx) {
                 .optimize()
                 .printlnBestPlanTree();
         root = c1.getMemo().getRoot();
-        refreshed = root.getstructInfoMap().refresh(root);
-        Assertions.assertTrue(refreshed);
-        refreshed = root.getstructInfoMap().refresh(root);
-        Assertions.assertFalse(refreshed);
+        root.getstructInfoMap().refresh(root, 1, new BitSet());
+        root.getstructInfoMap().refresh(root, 1, new BitSet());
         tableMaps = root.getstructInfoMap().getTableMaps();
         Assertions.assertEquals(2, tableMaps.size());
         dropMvByNereids("drop materialized view mv1");
@@ -166,13 +162,13 @@ public boolean isMVPartitionValid(MTMV mtmv, ConnectContext ctx) {
                 .rewrite()
                 .optimize();
         Group root = c1.getMemo().getRoot();
-        root.getstructInfoMap().refresh(root);
+        root.getstructInfoMap().refresh(root, 1, new BitSet());
         StructInfoMap structInfoMap = root.getstructInfoMap();
         Assertions.assertEquals(2, structInfoMap.getTableMaps().size());
         BitSet mvMap = structInfoMap.getTableMaps().stream()
                 .filter(b -> b.cardinality() == 2)
                 .collect(Collectors.toList()).get(0);
-        StructInfo structInfo = structInfoMap.getStructInfo(mvMap, mvMap, root, null);
+        StructInfo structInfo = structInfoMap.getStructInfo(c1.getMemo(), mvMap, mvMap, root, null);
         System.out.println(structInfo.getOriginalPlan().treeString());
         BitSet bitSet = new BitSet();
         structInfo.getRelations().forEach(r -> bitSet.set((int) r.getTable().getId()));

From 5d5448fd25cf0cb711db6118cb4870e17072601f Mon Sep 17 00:00:00 2001
From: Mryange <59914473+Mryange@users.noreply.github.com>
Date: Fri, 26 Apr 2024 01:10:07 +0800
Subject: [PATCH 032/163] [profile](scan) add projection time in scaner #34120

---
 be/src/vec/exec/scan/scanner_context.cpp | 7 +++++++
 be/src/vec/exec/scan/vscanner.cpp        | 1 +
 be/src/vec/exec/scan/vscanner.h          | 2 ++
 3 files changed, 10 insertions(+)

diff --git a/be/src/vec/exec/scan/scanner_context.cpp b/be/src/vec/exec/scan/scanner_context.cpp
index 8d4a9b3a16436e..81e4dacba57c31 100644
--- a/be/src/vec/exec/scan/scanner_context.cpp
+++ b/be/src/vec/exec/scan/scanner_context.cpp
@@ -405,9 +405,11 @@ void ScannerContext::stop_scanners(RuntimeState* state) {
         std::stringstream scanner_statistics;
         std::stringstream scanner_rows_read;
         std::stringstream scanner_wait_worker_time;
+        std::stringstream scanner_projection;
         scanner_statistics << "[";
         scanner_rows_read << "[";
         scanner_wait_worker_time << "[";
+        scanner_projection << "[";
         // Scanners can in 3 state
         //  state 1: in scanner context, not scheduled
         //  state 2: in scanner worker pool's queue, scheduled but not running
@@ -421,6 +423,9 @@ void ScannerContext::stop_scanners(RuntimeState* state) {
             scanner_statistics << PrettyPrinter::print(scanner->_scanner->get_time_cost_ns(),
                                                        TUnit::TIME_NS)
                                << ", ";
+            scanner_projection << PrettyPrinter::print(scanner->_scanner->projection_time(),
+                                                       TUnit::TIME_NS)
+                               << ", ";
             scanner_rows_read << PrettyPrinter::print(scanner->_scanner->get_rows_read(),
                                                       TUnit::UNIT)
                               << ", ";
@@ -434,9 +439,11 @@ void ScannerContext::stop_scanners(RuntimeState* state) {
         scanner_statistics << "]";
         scanner_rows_read << "]";
         scanner_wait_worker_time << "]";
+        scanner_projection << "]";
         _scanner_profile->add_info_string("PerScannerRunningTime", scanner_statistics.str());
         _scanner_profile->add_info_string("PerScannerRowsRead", scanner_rows_read.str());
         _scanner_profile->add_info_string("PerScannerWaitTime", scanner_wait_worker_time.str());
+        _scanner_profile->add_info_string("PerScannerProjectionTime", scanner_projection.str());
     }
 
     _blocks_queue_added_cv.notify_one();
diff --git a/be/src/vec/exec/scan/vscanner.cpp b/be/src/vec/exec/scan/vscanner.cpp
index f3835e6c889000..79fa4019687b42 100644
--- a/be/src/vec/exec/scan/vscanner.cpp
+++ b/be/src/vec/exec/scan/vscanner.cpp
@@ -187,6 +187,7 @@ Status VScanner::_filter_output_block(Block* block) {
 
 Status VScanner::_do_projections(vectorized::Block* origin_block, vectorized::Block* output_block) {
     SCOPED_RAW_TIMER(&_per_scanner_timer);
+    SCOPED_RAW_TIMER(&_projection_timer);
 
     const size_t rows = origin_block->rows();
     if (rows == 0) {
diff --git a/be/src/vec/exec/scan/vscanner.h b/be/src/vec/exec/scan/vscanner.h
index ba953192507c8e..6e83c059706a45 100644
--- a/be/src/vec/exec/scan/vscanner.h
+++ b/be/src/vec/exec/scan/vscanner.h
@@ -109,6 +109,7 @@ class VScanner {
 
     int64_t get_time_cost_ns() const { return _per_scanner_timer; }
 
+    int64_t projection_time() const { return _projection_timer; }
     int64_t get_rows_read() const { return _num_rows_read; }
 
     bool is_init() const { return _is_init; }
@@ -237,6 +238,7 @@ class VScanner {
 
     ScannerCounter _counter;
     int64_t _per_scanner_timer = 0;
+    int64_t _projection_timer = 0;
 
     bool _should_stop = false;
 };

From 09525e7e905db5984a11568a7ca1a63e6bdee94a Mon Sep 17 00:00:00 2001
From: StarryVerse <113903752+StarryVerse@users.noreply.github.com>
Date: Fri, 26 Apr 2024 07:19:39 +0800
Subject: [PATCH 033/163] Update doris_main.cpp (#34128)

* Update doris_main.cpp

Log(FATAL) introduces a core dump, which is confusing for users. We should print error msg and exit without a core dump.

* Update doris_main.cpp
---
 be/src/service/doris_main.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/be/src/service/doris_main.cpp b/be/src/service/doris_main.cpp
index 98712a8168be5a..ca7fca1ce62de3 100644
--- a/be/src/service/doris_main.cpp
+++ b/be/src/service/doris_main.cpp
@@ -169,8 +169,9 @@ auto instruction_fail_to_string(InstructionFail fail) {
     case InstructionFail::ARM_NEON:
         ret("ARM_NEON");
     }
-    LOG(FATAL) << "__builtin_unreachable";
-    __builtin_unreachable();
+
+    LOG(ERROR) << "Unrecognized instruction fail value." << std::endl;
+    exit(-1);
 }
 
 sigjmp_buf jmpbuf;

From a12b410273ec1bc2a2ed965dcccae000eb5fcb92 Mon Sep 17 00:00:00 2001
From: morrySnow <101034200+morrySnow@users.noreply.github.com>
Date: Fri, 26 Apr 2024 07:20:18 +0800
Subject: [PATCH 034/163] [fix](Nereids) check after rewrite cannot handle agg
 in other opeator (#34114)

this is a stupid mistake. we import a same name class from another package
---
 .../apache/doris/nereids/rules/analysis/CheckAfterRewrite.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java
index 40a59697514209..3e079cd19af9d0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java
@@ -17,7 +17,6 @@
 
 package org.apache.doris.nereids.rules.analysis;
 
-import org.apache.doris.catalog.AggregateFunction;
 import org.apache.doris.catalog.Type;
 import org.apache.doris.nereids.exceptions.AnalysisException;
 import org.apache.doris.nereids.rules.Rule;
@@ -33,6 +32,7 @@
 import org.apache.doris.nereids.trees.expressions.VirtualSlotReference;
 import org.apache.doris.nereids.trees.expressions.WindowExpression;
 import org.apache.doris.nereids.trees.expressions.functions.ExpressionTrait;
+import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction;
 import org.apache.doris.nereids.trees.expressions.functions.generator.TableGeneratingFunction;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.GroupingScalarFunction;
 import org.apache.doris.nereids.trees.expressions.functions.window.WindowFunction;

From 132d974fa21039d43e616355a13eff442cc187b6 Mon Sep 17 00:00:00 2001
From: Xinyi Zou <zouxinyi02@gmail.com>
Date: Fri, 26 Apr 2024 07:21:00 +0800
Subject: [PATCH 035/163] Revert "[fix](memory) Fix Jemalloc hook failed to
 start BE with JDK 17 #33946" (#34107)

This reverts commit 5fee7157ccfc8ed88bc3405eb404a2916b5aac3e.
---
 be/CMakeLists.txt                               | 5 -----
 be/src/runtime/CMakeLists.txt                   | 2 +-
 build.sh                                        | 6 ------
 cloud/CMakeLists.txt                            | 3 ---
 cloud/src/common/CMakeLists.txt                 | 2 +-
 regression-test/pipeline/performance/compile.sh | 1 -
 6 files changed, 2 insertions(+), 17 deletions(-)

diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index fd1f2c1db4a4cd..28b3fee115aab0 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -72,7 +72,6 @@ option(USE_LIBCPP "Use libc++" OFF)
 option(USE_MEM_TRACKER, "Use memory tracker" ON)
 option(USE_UNWIND "Use libunwind" ON)
 option(USE_JEMALLOC "Use jemalloc" ON)
-option(USE_JEMALLOC_HOOK "Use jemalloc hook" ON)
 if (OS_MACOSX)
     set(GLIBC_COMPATIBILITY OFF)
     set(USE_LIBCPP ON)
@@ -88,7 +87,6 @@ message(STATUS "GLIBC_COMPATIBILITY is ${GLIBC_COMPATIBILITY}")
 message(STATUS "USE_LIBCPP is ${USE_LIBCPP}")
 message(STATUS "USE_MEM_TRACKER is ${USE_MEM_TRACKER}")
 message(STATUS "USE_JEMALLOC is ${USE_JEMALLOC}")
-message(STATUS "USE_JEMALLOC_HOOK is ${USE_JEMALLOC_HOOK}")
 message(STATUS "USE_UNWIND is ${USE_UNWIND}")
 message(STATUS "ENABLE_PCH is ${ENABLE_PCH}")
 
@@ -348,9 +346,6 @@ endif()
 if (USE_JEMALLOC)
     add_definitions(-DUSE_JEMALLOC)
 endif()
-if (USE_JEMALLOC_HOOK)
-    add_definitions(-DUSE_JEMALLOC_HOOK)
-endif()
 
 # Compile with libunwind
 if (USE_UNWIND)
diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt
index 3bfec93edfc083..a0b3b799a764cb 100644
--- a/be/src/runtime/CMakeLists.txt
+++ b/be/src/runtime/CMakeLists.txt
@@ -25,7 +25,7 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/runtime")
 
 file(GLOB_RECURSE RUNTIME_FILES CONFIGURE_DEPENDS *.cpp *.cc)
 
-if (NOT USE_JEMALLOC OR NOT USE_MEM_TRACKER OR NOT USE_JEMALLOC_HOOK)
+if (NOT USE_JEMALLOC OR NOT USE_MEM_TRACKER)
     list(REMOVE_ITEM RUNTIME_FILES ${CMAKE_CURRENT_SOURCE_DIR}/memory/jemalloc_hook.cpp)
 endif()
 
diff --git a/build.sh b/build.sh
index f813999f8ba8bc..dbe6fb38b67df1 100755
--- a/build.sh
+++ b/build.sh
@@ -356,9 +356,6 @@ fi
 if [[ -z "${USE_JEMALLOC}" ]]; then
     USE_JEMALLOC='ON'
 fi
-if [[ -z "${USE_JEMALLOC_HOOK}" ]]; then
-    USE_JEMALLOC_HOOK='OFF'
-fi
 if [[ -z "${USE_BTHREAD_SCANNER}" ]]; then
     USE_BTHREAD_SCANNER='OFF'
 fi
@@ -464,7 +461,6 @@ echo "Get params:
     STRIP_DEBUG_INFO            -- ${STRIP_DEBUG_INFO}
     USE_MEM_TRACKER             -- ${USE_MEM_TRACKER}
     USE_JEMALLOC                -- ${USE_JEMALLOC}
-    USE_JEMALLOC_HOOK           -- ${USE_JEMALLOC_HOOK}
     USE_BTHREAD_SCANNER         -- ${USE_BTHREAD_SCANNER}
     ENABLE_STACKTRACE           -- ${ENABLE_STACKTRACE}
     ENABLE_INJECTION_POINT      -- ${ENABLE_INJECTION_POINT}
@@ -567,7 +563,6 @@ if [[ "${BUILD_BE}" -eq 1 ]]; then
         -DENABLE_PCH="${ENABLE_PCH}" \
         -DUSE_MEM_TRACKER="${USE_MEM_TRACKER}" \
         -DUSE_JEMALLOC="${USE_JEMALLOC}" \
-        -DUSE_JEMALLOC_HOOK="${USE_JEMALLOC_HOOK}" \
         -DENABLE_STACKTRACE="${ENABLE_STACKTRACE}" \
         -DUSE_AVX2="${USE_AVX2}" \
         -DGLIBC_COMPATIBILITY="${GLIBC_COMPATIBILITY}" \
@@ -611,7 +606,6 @@ if [[ "${BUILD_CLOUD}" -eq 1 ]]; then
         -DSTRIP_DEBUG_INFO="${STRIP_DEBUG_INFO}" \
         -DUSE_DWARF="${USE_DWARF}" \
         -DUSE_JEMALLOC="${USE_JEMALLOC}" \
-        -DUSE_JEMALLOC_HOOK="${USE_JEMALLOC_HOOK}" \
         -DEXTRA_CXX_FLAGS="${EXTRA_CXX_FLAGS}" \
         -DBUILD_CHECK_META="${BUILD_CHECK_META:-OFF}" \
         "${DORIS_HOME}/cloud/"
diff --git a/cloud/CMakeLists.txt b/cloud/CMakeLists.txt
index 9b9929ae1d5e0c..35164af9f9c546 100644
--- a/cloud/CMakeLists.txt
+++ b/cloud/CMakeLists.txt
@@ -188,9 +188,6 @@ endif ()
 if (USE_JEMALLOC)
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -DUSE_JEMALLOC")
 endif()
-if (USE_JEMALLOC_HOOK)
-    set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -DUSE_JEMALLOC_HOOK")
-endif()
 
 if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -faligned-new")
diff --git a/cloud/src/common/CMakeLists.txt b/cloud/src/common/CMakeLists.txt
index b18947b04a1e5b..a2abfb075bf934 100644
--- a/cloud/src/common/CMakeLists.txt
+++ b/cloud/src/common/CMakeLists.txt
@@ -16,7 +16,7 @@ set(COMMON_FILES
     network_util.cpp
 )
 
-if (USE_JEMALLOC AND USE_JEMALLOC_HOOK)
+if (USE_JEMALLOC)
     set(COMMON_FILES ${COMMON_FILES}
         jemalloc_hook.cpp
         )
diff --git a/regression-test/pipeline/performance/compile.sh b/regression-test/pipeline/performance/compile.sh
index 2a30fec0a7b494..cfeae6af5f2d36 100644
--- a/regression-test/pipeline/performance/compile.sh
+++ b/regression-test/pipeline/performance/compile.sh
@@ -135,7 +135,6 @@ sudo docker run -i --rm \
                     && export CCACHE_REMOTE_STORAGE=file:///root/ccache \
                     && export EXTRA_CXX_FLAGS=-O3 \
                     && export USE_JEMALLOC='ON' \
-                    && export USE_JEMALLOC_HOOK='OFF' \
 					&& export ENABLE_PCH=OFF ${jdk17_str}\
                     && export CUSTOM_NPM_REGISTRY=https://registry.npmjs.org \
                     && bash build.sh --fe --be --clean 2>&1 | tee build.log"

From 0038423c026cbe057237a25d5654fedad5cd3fd3 Mon Sep 17 00:00:00 2001
From: starocean999 <40539150+starocean999@users.noreply.github.com>
Date: Fri, 26 Apr 2024 09:26:36 +0800
Subject: [PATCH 036/163] [fix](nereids)prevent null pointer access if
 translate expression fails (#33990)

---
 .../nereids/rules/expression/rules/FoldConstantRuleOnBE.java | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
index 3c2f0d546dd04a..e0e19bd19e2b06 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
@@ -215,6 +215,11 @@ private static void collectConst(Expression expr, Map<String, Expression> constM
                 LOG.warn("expression {} translate to legacy expr failed. ", expr, e);
                 return;
             }
+            if (staleExpr == null) {
+                // just return, it's a fail-safe
+                LOG.warn("expression {} translate to legacy expr failed. ", expr);
+                return;
+            }
             tExprMap.put(id, staleExpr.treeToThrift());
         } else {
             for (int i = 0; i < expr.children().size(); i++) {

From 837b5e83d0b30391c1847e58dfd84c40dcca6e7b Mon Sep 17 00:00:00 2001
From: starocean999 <40539150+starocean999@users.noreply.github.com>
Date: Fri, 26 Apr 2024 09:35:47 +0800
Subject: [PATCH 037/163] [fix](planner)cast expr should do nothing in
 compactForLiteral method (#34047)

---
 .../main/java/org/apache/doris/analysis/CastExpr.java |  5 +++++
 .../suites/correctness_p0/test_cast_decimal.groovy    | 11 +++++++++++
 2 files changed, 16 insertions(+)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
index 739901b79295fe..c73afac14c8502 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
@@ -592,4 +592,9 @@ public void setNotFold(boolean notFold) {
     public boolean isNotFold() {
         return this.notFold;
     }
+
+    @Override
+    protected void compactForLiteral(Type type) {
+        // do nothing
+    }
 }
diff --git a/regression-test/suites/correctness_p0/test_cast_decimal.groovy b/regression-test/suites/correctness_p0/test_cast_decimal.groovy
index 88859ea1d529fd..21a1ab6d0c3ee4 100644
--- a/regression-test/suites/correctness_p0/test_cast_decimal.groovy
+++ b/regression-test/suites/correctness_p0/test_cast_decimal.groovy
@@ -34,4 +34,15 @@ suite("test_cast_decimal") {
         sql """select cast(32123.34212456734 as decimal(3,2));"""
         contains "CAST(32123.34212456734 AS DECIMALV3(3, 2))"
     }
+
+    sql """drop table if exists test_ttt"""
+    sql """create table test_ttt(big_key bigint)DISTRIBUTED BY HASH(big_key) BUCKETS 1 PROPERTIES ("replication_num" = "1");"""
+    sql """set enable_nereids_planner=false;"""
+    sql """set enable_fold_constant_by_be = false; """
+    sql """SELECT 1
+            FROM test_ttt e1
+            HAVING truncate(100, 2) < -2308.57
+            AND cast(round(round(465.56, min(-5.987)), 2) AS DECIMAL) in
+            (SELECT cast(truncate(round(8990.65 - 4556.2354, 2.4652), 2)AS DECIMAL)
+            FROM test_ttt r2);"""
 }

From 05e734a8e6b033d11caf9d4f03e624554e23ec7c Mon Sep 17 00:00:00 2001
From: starocean999 <40539150+starocean999@users.noreply.github.com>
Date: Fri, 26 Apr 2024 09:36:05 +0800
Subject: [PATCH 038/163] [fix](nereids)move ReplaceVariableByLiteral rule to
 analyze phase (#33997)

---
 .../doris/nereids/jobs/executor/Analyzer.java | 12 ++++++
 .../rules/analysis/VariableToLiteral.java     | 39 +++++++++++++++++++
 .../expression/ExpressionNormalization.java   |  2 -
 .../suites/nereids_p0/test_user_var.groovy    | 35 +++++++++++++++++
 4 files changed, 86 insertions(+), 2 deletions(-)
 create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/VariableToLiteral.java

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java
index a0431e066beee8..ac0a44210717fb 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java
@@ -44,6 +44,7 @@
 import org.apache.doris.nereids.rules.analysis.ProjectWithDistinctToAggregate;
 import org.apache.doris.nereids.rules.analysis.ReplaceExpressionByChildOutput;
 import org.apache.doris.nereids.rules.analysis.SubqueryToApply;
+import org.apache.doris.nereids.rules.analysis.VariableToLiteral;
 import org.apache.doris.nereids.rules.rewrite.MergeProjects;
 import org.apache.doris.nereids.rules.rewrite.SemiJoinCommute;
 import org.apache.doris.nereids.rules.rewrite.SimplifyAggGroupBy;
@@ -157,6 +158,17 @@ private static List<RewriteJob> buildAnalyzeJobs(Optional<CustomTableResolver> c
                 new NormalizeRepeat()
             ),
             bottomUp(new AdjustAggregateNullableForEmptySet()),
+            // consider sql with user defined var @t_zone
+            // set @t_zone='GMT';
+            // SELECT
+            //     DATE_FORMAT(convert_tz(dt, time_zone, @t_zone),'%Y-%m-%d') day
+            // FROM
+            //     t
+            // GROUP BY
+            //     1;
+            // @t_zone must be replaced as 'GMT' before EliminateGroupByConstant and NormalizeAggregate rule.
+            // So need run VariableToLiteral rule before the two rules.
+            topDown(new VariableToLiteral()),
             // run CheckAnalysis before EliminateGroupByConstant in order to report error message correctly like bellow
             // select SUM(lo_tax) FROM lineorder group by 1;
             // errCode = 2, detailMessage = GROUP BY expression must not contain aggregate functions: sum(lo_tax)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/VariableToLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/VariableToLiteral.java
new file mode 100644
index 00000000000000..c7ba1bfe6a7af3
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/VariableToLiteral.java
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.analysis;
+
+import org.apache.doris.nereids.rules.expression.ExpressionRewrite;
+import org.apache.doris.nereids.rules.expression.ExpressionRewriteRule;
+import org.apache.doris.nereids.rules.expression.ExpressionRuleExecutor;
+import org.apache.doris.nereids.rules.expression.rules.ReplaceVariableByLiteral;
+
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * replace Variable To Literal
+ */
+public class VariableToLiteral extends ExpressionRewrite {
+    public static final List<ExpressionRewriteRule> NORMALIZE_REWRITE_RULES =
+            ImmutableList.of(bottomUp(ReplaceVariableByLiteral.INSTANCE));
+
+    public VariableToLiteral() {
+        super(new ExpressionRuleExecutor(NORMALIZE_REWRITE_RULES));
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionNormalization.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionNormalization.java
index adf0cb90a958c1..f63ab2eee5f942 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionNormalization.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionNormalization.java
@@ -24,7 +24,6 @@
 import org.apache.doris.nereids.rules.expression.rules.InPredicateDedup;
 import org.apache.doris.nereids.rules.expression.rules.InPredicateToEqualToRule;
 import org.apache.doris.nereids.rules.expression.rules.NormalizeBinaryPredicatesRule;
-import org.apache.doris.nereids.rules.expression.rules.ReplaceVariableByLiteral;
 import org.apache.doris.nereids.rules.expression.rules.SimplifyArithmeticComparisonRule;
 import org.apache.doris.nereids.rules.expression.rules.SimplifyArithmeticRule;
 import org.apache.doris.nereids.rules.expression.rules.SimplifyCastRule;
@@ -43,7 +42,6 @@ public class ExpressionNormalization extends ExpressionRewrite {
     // from_unixtime(timestamp, 'yyyyMMdd') to 'yyyyMMdd'
     public static final List<ExpressionRewriteRule> NORMALIZE_REWRITE_RULES = ImmutableList.of(
             bottomUp(
-                ReplaceVariableByLiteral.INSTANCE,
                 SupportJavaDateFormatter.INSTANCE,
                 NormalizeBinaryPredicatesRule.INSTANCE,
                 InPredicateDedup.INSTANCE,
diff --git a/regression-test/suites/nereids_p0/test_user_var.groovy b/regression-test/suites/nereids_p0/test_user_var.groovy
index d5eb164e3f9a7f..8a7db3736822eb 100644
--- a/regression-test/suites/nereids_p0/test_user_var.groovy
+++ b/regression-test/suites/nereids_p0/test_user_var.groovy
@@ -31,4 +31,39 @@ suite("test_user_var") {
     qt_boolean 'select @d1, @d2;'
     qt_null_literal 'select @f1, @f2;'
     qt_function 'select @func_1'
+
+    multi_sql(
+        """
+            drop table if exists dwd_login_ttt;
+
+            CREATE TABLE `dwd_login_ttt` (
+            `game_code` varchar(100) NOT NULL DEFAULT "-" ,
+            `plat_code` varchar(100) NOT NULL DEFAULT "-" ,
+            `userid` varchar(255) NULL DEFAULT "-" ,
+            `dt` datetime NOT NULL,
+            `time_zone` varchar(100) NULL 
+            ) ENGINE=OLAP
+            UNIQUE KEY(`game_code`, `plat_code`)
+            DISTRIBUTED BY HASH(`game_code`) BUCKETS 16
+            PROPERTIES("replication_num" = "1");
+
+            drop view if exists dwd_login_ttt_view;
+
+            create view dwd_login_ttt_view as
+            SELECT  game_code,plat_code,time_zone,DATE_FORMAT(convert_tz(dt,time_zone,@t_zone),'%Y-%m-%d') day,count(distinct userid) 
+            from  dwd_login_ttt
+            where  dt>=convert_tz(@t_day,'Asia/Shanghai',@t_zone) 
+            and dt<convert_tz(date_add(@t_day,1),'Asia/Shanghai',@t_zone)
+            GROUP  by 1,2,3,4;
+
+            set @t_day='2024-02-01';
+            set @t_zone='GMT';
+        """
+    )
+
+    explain {
+        sql("shape plan select * from dwd_login_ttt_view where day='2024-04-01';")
+        contains "dt < '2024-02-01 16:00:00'"
+        contains "dt >= '2024-01-31 16:00:00'"
+    }
 }
\ No newline at end of file

From e6801105349b262f4ca5eef7812d764a124dd57c Mon Sep 17 00:00:00 2001
From: starocean999 <40539150+starocean999@users.noreply.github.com>
Date: Fri, 26 Apr 2024 09:38:10 +0800
Subject: [PATCH 039/163] [fix](planner)date_add function should accept date
 type as its param (#34035)

---
 .../apache/doris/analysis/FunctionCallExpr.java  | 16 ++++++++++++++++
 gensrc/script/doris_builtins_functions.py        |  2 ++
 .../correctness/test_date_function_const.groovy  |  5 ++++-
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
index 9cfce9e67de8ad..3978c3802bc1e4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
@@ -1658,6 +1658,22 @@ && collectChildReturnTypes()[0].isDecimalV3()) {
             }
             fn = getBuiltinFunction(fnName.getFunction(), argTypes,
                     Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF);
+        } else if (fnName.getFunction().equalsIgnoreCase("date_add")
+                || fnName.getFunction().equalsIgnoreCase("days_add")
+                || fnName.getFunction().equalsIgnoreCase("adddate")
+                || fnName.getFunction().equalsIgnoreCase("date_sub")
+                || fnName.getFunction().equalsIgnoreCase("days_sub")
+                || fnName.getFunction().equalsIgnoreCase("subdate")) {
+            Type[] childTypes = collectChildReturnTypes();
+            argTypes[0] = childTypes[0];
+            argTypes[1] = childTypes[1];
+            if (childTypes[1] == Type.TINYINT || childTypes[1] == Type.SMALLINT) {
+                // be only support second param as int type
+                uncheckedCastChild(Type.INT, 1);
+                argTypes[1] = Type.INT;
+            }
+            fn = getBuiltinFunction(fnName.getFunction(), argTypes,
+                    Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF);
         } else {
             // now first find table function in table function sets
             if (isTableFnCall) {
diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py
index 841799f0a13b85..b7912d08904775 100644
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -1080,6 +1080,8 @@
         [['weeks_sub'], 'DATEV2', ['DATEV2', 'INT'], ''],
         [['days_add', 'date_add', 'adddate'], 'DATEV2', ['DATEV2', 'INT'], ''],
         [['days_sub', 'date_sub', 'subdate'], 'DATEV2', ['DATEV2', 'INT'], ''],
+        [['days_add', 'date_add', 'adddate'], 'DATE', ['DATE', 'INT'], ''],
+        [['days_sub', 'date_sub', 'subdate'], 'DATE', ['DATE', 'INT'], ''],
         [['hours_add'], 'DATETIMEV2', ['DATEV2', 'INT'], ''],
         [['hours_sub'], 'DATETIMEV2', ['DATEV2', 'INT'], ''],
         [['minutes_add'], 'DATETIMEV2', ['DATEV2', 'INT'], ''],
diff --git a/regression-test/suites/correctness/test_date_function_const.groovy b/regression-test/suites/correctness/test_date_function_const.groovy
index 85d8e5ba711370..df05738b1bf674 100644
--- a/regression-test/suites/correctness/test_date_function_const.groovy
+++ b/regression-test/suites/correctness/test_date_function_const.groovy
@@ -57,5 +57,8 @@ suite("test_date_function_const") {
     qt_select10 """
         select hours_add(cast('2023-03-30 22:23:45.23452' as datetimev2(6)),8)
     """ 
-
+    explain {
+        sql("""select date_add(CURRENT_DATE(),-2);""")
+        notContains("00:00:00")
+    }
 }
\ No newline at end of file

From 04cfa16208e270ea84b4c7e4feb51c90766a0af2 Mon Sep 17 00:00:00 2001
From: Dongyang Li <hello_stephen@qq.com>
Date: Fri, 26 Apr 2024 09:53:06 +0800
Subject: [PATCH 040/163] [chore](ci) adjust cloud_p* be.conf (#34132)

---
 regression-test/pipeline/cloud_p0/conf/be_custom.conf | 1 -
 regression-test/pipeline/cloud_p1/conf/be_custom.conf | 1 -
 2 files changed, 2 deletions(-)

diff --git a/regression-test/pipeline/cloud_p0/conf/be_custom.conf b/regression-test/pipeline/cloud_p0/conf/be_custom.conf
index 9f85d1c98fac37..a84891beda8e62 100644
--- a/regression-test/pipeline/cloud_p0/conf/be_custom.conf
+++ b/regression-test/pipeline/cloud_p0/conf/be_custom.conf
@@ -7,7 +7,6 @@ push_worker_count_high_priority = 2
 streaming_load_max_mb = 107374182400
 clear_file_cache=true
 enable_file_cache=true
-mem_limit=50%
 #disable_storage_page_cache = true
 enable_file_cache_query_limit=true
 file_cache_max_file_segment_size=1048576
diff --git a/regression-test/pipeline/cloud_p1/conf/be_custom.conf b/regression-test/pipeline/cloud_p1/conf/be_custom.conf
index 1f4104304fcaf8..ae9efacf4883bb 100644
--- a/regression-test/pipeline/cloud_p1/conf/be_custom.conf
+++ b/regression-test/pipeline/cloud_p1/conf/be_custom.conf
@@ -7,7 +7,6 @@ push_worker_count_high_priority = 2
 streaming_load_max_mb = 107374182400
 clear_file_cache=true
 enable_file_cache=true
-mem_limit=50%
 #disable_storage_page_cache = true
 enable_file_cache_query_limit=true
 file_cache_max_file_segment_size=1048576

From 09ebfafc86876a3462e7e629c9dbecefc370d2e2 Mon Sep 17 00:00:00 2001
From: morrySnow <101034200+morrySnow@users.noreply.github.com>
Date: Fri, 26 Apr 2024 10:26:16 +0800
Subject: [PATCH 041/163] [chore](variable) deprecated
 GROUP_BY_AND_HAVING_USE_ALIAS_FIRST (#34133)

this variable intro by #15748 for backward compatibility.
currently, it is used very infrequently.
Nereids do not support it anymore.
So, we tag it as deprecated, and will remove it in the future.
---
 .../src/main/java/org/apache/doris/qe/SessionVariable.java      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 2037f5fa1efb27..579ad21c9ab1c5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -1338,7 +1338,7 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) {
 
     // Default value is false, which means the group by and having clause
     // should first use column name not alias. According to mysql.
-    @VariableMgr.VarAttr(name = GROUP_BY_AND_HAVING_USE_ALIAS_FIRST)
+    @VariableMgr.VarAttr(name = GROUP_BY_AND_HAVING_USE_ALIAS_FIRST, varType = VariableAnnotation.DEPRECATED)
     public boolean groupByAndHavingUseAliasFirst = false;
 
     // Whether disable block file cache. Block cache only works when FE's query options sets disableFileCache false

From d4cdd289d37dd93719d344b19cc3ca3ab66af8cd Mon Sep 17 00:00:00 2001
From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com>
Date: Fri, 26 Apr 2024 11:45:35 +0800
Subject: [PATCH 042/163] Support high priority column stats auto collection.
 (#33703)

* fix visible column (#33023)
* Collect high priority columns. (#31235)
* High priority queue and map. (#31509)
* Support column level health value. (#31794)
* Support follower sync query columns to master. (#31859)
* Support show auto analyze pending jobs. (#31926)
* Check column health value earlier, show job priority. (#32064)
* support window (#32094)
* Refactor. (#32273)
* refactor2 (#32278)
* Unit test (#32398)
* Support auto analyze mv (#32433)
* Fix bug (#32454)
* Support identical column name in different index. (#32957)
* Fix visible column
* Use future to block auto analyze before job finish. (#33083)
* Fix ut. (#33147)
* Fix ut (#33161)
* fix p0 (#33210)
* Improve failover logic. (#33382)
* Improve waiting empty table logic. (#33472)
* Fix pipeline (#33671)
---
 .../java/org/apache/doris/common/Config.java  |   2 +-
 fe/fe-core/src/main/cup/sql_parser.cup        |   4 +
 .../doris/analysis/AnalyzeProperties.java     |   2 +
 .../doris/analysis/ShowAnalyzeStmt.java       |   1 +
 .../analysis/ShowAutoAnalyzeJobsStmt.java     | 210 +++++++
 .../doris/analysis/ShowColumnStatsStmt.java   |   4 +
 .../java/org/apache/doris/catalog/Env.java    |  27 +-
 .../org/apache/doris/catalog/OlapTable.java   |  38 +-
 .../java/org/apache/doris/catalog/Table.java  |  16 +-
 .../org/apache/doris/catalog/TableIf.java     |  13 +-
 .../doris/datasource/ExternalTable.java       |  30 +-
 .../doris/datasource/InternalCatalog.java     |   6 +-
 .../doris/nereids/jobs/executor/Rewriter.java |   4 +-
 .../apache/doris/nereids/rules/RuleType.java  |   1 +
 .../expression/QueryColumnCollector.java      | 215 ++++++++
 .../org/apache/doris/qe/SessionVariable.java  |   7 +
 .../org/apache/doris/qe/ShowExecutor.java     |  34 ++
 .../doris/service/FrontendServiceImpl.java    |   8 +
 .../apache/doris/statistics/AnalysisInfo.java |  27 +-
 .../doris/statistics/AnalysisInfoBuilder.java |  29 +-
 .../apache/doris/statistics/AnalysisJob.java  |   8 +-
 .../doris/statistics/AnalysisManager.java     | 165 ++++--
 .../statistics/AnalysisTaskExecutor.java      |   7 +-
 .../statistics/AutoAnalysisPendingJob.java    |  52 ++
 .../doris/statistics/BaseAnalysisTask.java    |  36 +-
 .../apache/doris/statistics/ColStatsMeta.java |  16 +-
 .../statistics/ExternalAnalysisTask.java      |   9 +-
 .../statistics/FollowerColumnSender.java      | 151 ++++++
 .../doris/statistics/HistogramTask.java       |   5 -
 .../apache/doris/statistics/JobPriority.java  |  25 +
 .../doris/statistics/OlapAnalysisTask.java    |  27 +-
 .../apache/doris/statistics/QueryColumn.java  |  66 +++
 .../doris/statistics/StatisticConstants.java  |   4 +-
 .../statistics/StatisticsAutoCollector.java   | 270 +++++----
 .../doris/statistics/StatisticsCollector.java |  79 ---
 .../statistics/StatisticsJobAppender.java     | 204 +++++++
 .../statistics/StatisticsRepository.java      |   4 +-
 .../doris/statistics/TableStatsMeta.java      |  22 +-
 .../doris/statistics/util/StatisticsUtil.java |  83 ++-
 .../doris/statistics/AnalysisJobTest.java     |   8 +-
 .../doris/statistics/AnalysisManagerTest.java | 371 ++++++++++---
 .../statistics/AnalysisTaskExecutorTest.java  |   5 +-
 .../apache/doris/statistics/AnalyzeTest.java  |   6 +-
 .../statistics/FollowerColumnSenderTest.java  |  88 +++
 .../StatisticsAutoCollectorTest.java          | 512 +++---------------
 .../statistics/StatisticsJobAppenderTest.java | 281 ++++++++++
 .../doris/statistics/TableStatsMetaTest.java  |  14 +-
 .../statistics/util/StatisticsUtilTest.java   | 149 +++++
 gensrc/thrift/FrontendService.thrift          |  13 +
 .../hive/test_hive_statistic_auto.groovy      |   2 +-
 .../suites/statistics/analyze_stats.groovy    |   2 +-
 51 files changed, 2422 insertions(+), 940 deletions(-)
 create mode 100644 fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAutoAnalyzeJobsStmt.java
 create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/QueryColumnCollector.java
 create mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/AutoAnalysisPendingJob.java
 create mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/FollowerColumnSender.java
 create mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/JobPriority.java
 create mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/QueryColumn.java
 delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCollector.java
 create mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobAppender.java
 create mode 100644 fe/fe-core/src/test/java/org/apache/doris/statistics/FollowerColumnSenderTest.java
 create mode 100644 fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobAppenderTest.java

diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index c40e4c4bec32fc..e2861310faa592 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -1613,7 +1613,7 @@ public class Config extends ConfigBase {
             "This parameter controls the time interval for automatic collection jobs to check the health of table"
                     + "statistics and trigger automatic collection"
     })
-    public static int auto_check_statistics_in_minutes = 5;
+    public static int auto_check_statistics_in_minutes = 1;
 
     /**
      * If set to TRUE, the compaction slower replica will be skipped when select get queryable replicas
diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup
index 4efd1cf61d4ded..fc0b116bbea967 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -4568,6 +4568,10 @@ show_param ::=
     {:
         RESULT = new ShowAnalyzeStmt(tbl, parser.where, true);
     :}
+    | KW_AUTO KW_JOBS opt_table_name:tbl opt_wild_where
+    {:
+        RESULT = new ShowAutoAnalyzeJobsStmt(tbl, parser.where);
+    :}
     | KW_ANALYZE KW_TASK KW_STATUS INTEGER_LITERAL:jobId
     {:
         RESULT = new ShowAnalyzeTaskStatus(jobId);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeProperties.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeProperties.java
index 94083989ca7b83..f78c63ebea1a83 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeProperties.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeProperties.java
@@ -44,6 +44,7 @@ public class AnalyzeProperties {
     public static final String PROPERTY_PERIOD_SECONDS = "period.seconds";
     public static final String PROPERTY_FORCE_FULL = "force.full";
     public static final String PROPERTY_PARTITION_COLUMN_FROM_SQL = "partition.column.from.sql";
+    public static final String PROPERTY_USE_AUTO_ANALYZER = "use.auto.analyzer";
 
     public static final AnalyzeProperties DEFAULT_PROP = new AnalyzeProperties(new HashMap<String, String>() {
         {
@@ -72,6 +73,7 @@ public class AnalyzeProperties {
             .add(PROPERTY_PERIOD_CRON)
             .add(PROPERTY_FORCE_FULL)
             .add(PROPERTY_PARTITION_COLUMN_FROM_SQL)
+            .add(PROPERTY_USE_AUTO_ANALYZER)
             .build();
 
     public AnalyzeProperties(Map<String, String> properties) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeStmt.java
index 9ccfd956ca5d84..f660d6eeb3c6b5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAnalyzeStmt.java
@@ -62,6 +62,7 @@ public class ShowAnalyzeStmt extends ShowStmt {
             .add("schedule_type")
             .add("start_time")
             .add("end_time")
+            .add("priority")
             .build();
 
     private long jobId;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAutoAnalyzeJobsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAutoAnalyzeJobsStmt.java
new file mode 100644
index 00000000000000..560387fa5bc11c
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowAutoAnalyzeJobsStmt.java
@@ -0,0 +1,210 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.analysis;
+
+import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.ScalarType;
+import org.apache.doris.common.AnalysisException;
+import org.apache.doris.common.ErrorCode;
+import org.apache.doris.common.ErrorReport;
+import org.apache.doris.common.UserException;
+import org.apache.doris.mysql.privilege.PrivPredicate;
+import org.apache.doris.qe.ConnectContext;
+import org.apache.doris.qe.ShowResultSetMetaData;
+import org.apache.doris.statistics.JobPriority;
+
+import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
+import com.google.common.collect.ImmutableList;
+
+/**
+ * ShowAutoAnalyzeJobsStmt is used to show pending auto analysis jobs.
+ * syntax:
+ *    SHOW AUTO ANALYZE JOBS
+ *        [TABLE]
+ *        [
+ *            WHERE
+ *            [PRIORITY = ["HIGH"|"MID"|"LOW"]]
+ *        ]
+ */
+public class ShowAutoAnalyzeJobsStmt extends ShowStmt {
+    private static final String PRIORITY = "priority";
+    private static final ImmutableList<String> TITLE_NAMES = new ImmutableList.Builder<String>()
+            .add("catalog_name")
+            .add("db_name")
+            .add("tbl_name")
+            .add("col_list")
+            .add("priority")
+            .build();
+
+    private final TableName tableName;
+    private final Expr whereClause;
+
+    public ShowAutoAnalyzeJobsStmt(TableName tableName, Expr whereClause) {
+        this.tableName = tableName;
+        this.whereClause = whereClause;
+    }
+
+    // extract from predicate
+    private String jobPriority;
+
+    public String getPriority() {
+        Preconditions.checkArgument(isAnalyzed(),
+                "The stateValue must be obtained after the parsing is complete");
+        return jobPriority;
+    }
+
+    public Expr getWhereClause() {
+        Preconditions.checkArgument(isAnalyzed(),
+                "The whereClause must be obtained after the parsing is complete");
+        return whereClause;
+    }
+
+    @Override
+    public void analyze(Analyzer analyzer) throws UserException {
+        if (!ConnectContext.get().getSessionVariable().enableStats) {
+            throw new UserException("Analyze function is forbidden, you should add `enable_stats=true`"
+                    + "in your FE conf file");
+        }
+        super.analyze(analyzer);
+        if (tableName != null) {
+            tableName.analyze(analyzer);
+            String catalogName = tableName.getCtl();
+            String dbName = tableName.getDb();
+            String tblName = tableName.getTbl();
+            checkShowAnalyzePriv(catalogName, dbName, tblName);
+        }
+
+        // analyze where clause if not null
+        if (whereClause != null) {
+            analyzeSubPredicate(whereClause);
+        }
+    }
+
+    @Override
+    public ShowResultSetMetaData getMetaData() {
+        ShowResultSetMetaData.Builder builder = ShowResultSetMetaData.builder();
+        for (String title : TITLE_NAMES) {
+            builder.addColumn(new Column(title, ScalarType.createVarchar(128)));
+        }
+        return builder.build();
+    }
+
+    @Override
+    public RedirectStatus getRedirectStatus() {
+        return RedirectStatus.FORWARD_NO_SYNC;
+    }
+
+    private void checkShowAnalyzePriv(String catalogName, String dbName, String tblName) throws AnalysisException {
+        if (!Env.getCurrentEnv().getAccessManager()
+                .checkTblPriv(ConnectContext.get(), catalogName, dbName, tblName, PrivPredicate.SHOW)) {
+            ErrorReport.reportAnalysisException(
+                    ErrorCode.ERR_TABLEACCESS_DENIED_ERROR,
+                    "SHOW ANALYZE",
+                    ConnectContext.get().getQualifiedUser(),
+                    ConnectContext.get().getRemoteIP(),
+                    dbName + ": " + tblName);
+        }
+    }
+
+    private void analyzeSubPredicate(Expr subExpr) throws AnalysisException {
+        if (subExpr == null) {
+            return;
+        }
+
+        boolean valid = true;
+
+        CHECK: {
+            if (subExpr instanceof BinaryPredicate) {
+                BinaryPredicate binaryPredicate = (BinaryPredicate) subExpr;
+                if (binaryPredicate.getOp() != BinaryPredicate.Operator.EQ) {
+                    valid = false;
+                    break CHECK;
+                }
+            } else {
+                valid = false;
+                break CHECK;
+            }
+
+            // left child
+            if (!(subExpr.getChild(0) instanceof SlotRef)) {
+                valid = false;
+                break CHECK;
+            }
+            String leftKey = ((SlotRef) subExpr.getChild(0)).getColumnName();
+            if (!PRIORITY.equalsIgnoreCase(leftKey)) {
+                valid = false;
+                break CHECK;
+            }
+
+            // right child
+            if (!(subExpr.getChild(1) instanceof StringLiteral)) {
+                valid = false;
+                break CHECK;
+            }
+
+            String value = subExpr.getChild(1).getStringValue();
+            if (Strings.isNullOrEmpty(value)) {
+                valid = false;
+                break CHECK;
+            }
+
+            jobPriority = value.toUpperCase();
+            try {
+                JobPriority.valueOf(jobPriority);
+            } catch (Exception e) {
+                valid = false;
+            }
+        }
+
+        if (!valid) {
+            throw new AnalysisException("Where clause should looks like: "
+                    + "PRIORITY = \"HIGH|MID|LOW\"");
+        }
+    }
+
+    @Override
+    public String toSql() {
+        StringBuilder sb = new StringBuilder();
+        sb.append("SHOW AUTO ANALYZE");
+
+        if (tableName != null) {
+            sb.append(" ");
+            sb.append(tableName.toSql());
+        }
+
+        if (whereClause != null) {
+            sb.append(" ");
+            sb.append("WHERE");
+            sb.append(" ");
+            sb.append(whereClause.toSql());
+        }
+
+        return sb.toString();
+    }
+
+    @Override
+    public String toString() {
+        return toSql();
+    }
+
+    public TableName getTableName() {
+        return tableName;
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
index 18bb916b8bdfce..cfe2d426f7b166 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java
@@ -62,6 +62,8 @@ public class ShowColumnStatsStmt extends ShowStmt {
                     .add("trigger")
                     .add("query_times")
                     .add("updated_time")
+                    .add("update_rows")
+                    .add("last_analyze_row_count")
                     .build();
 
     private final TableName tableName;
@@ -162,6 +164,8 @@ public ShowResultSet constructResultSet(List<Pair<Pair<String, String>, ColumnSt
             row.add(String.valueOf(colStatsMeta == null ? "N/A" : colStatsMeta.jobType));
             row.add(String.valueOf(colStatsMeta == null ? "N/A" : colStatsMeta.queriedTimes));
             row.add(String.valueOf(p.second.updatedTime));
+            row.add(String.valueOf(colStatsMeta == null ? "N/A" : colStatsMeta.updatedRows));
+            row.add(String.valueOf(colStatsMeta == null ? "N/A" : colStatsMeta.rowCount));
             result.add(row);
         });
         return new ShowResultSet(getMetaData(), result);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
index 0ad32f76566768..275c4d1ff42171 100755
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
@@ -244,9 +244,11 @@
 import org.apache.doris.service.ExecuteEnv;
 import org.apache.doris.service.FrontendOptions;
 import org.apache.doris.statistics.AnalysisManager;
+import org.apache.doris.statistics.FollowerColumnSender;
 import org.apache.doris.statistics.StatisticsAutoCollector;
 import org.apache.doris.statistics.StatisticsCache;
 import org.apache.doris.statistics.StatisticsCleaner;
+import org.apache.doris.statistics.StatisticsJobAppender;
 import org.apache.doris.statistics.query.QueryStats;
 import org.apache.doris.system.Backend;
 import org.apache.doris.system.Frontend;
@@ -524,6 +526,10 @@ public class Env {
 
     private StatisticsAutoCollector statisticsAutoCollector;
 
+    private StatisticsJobAppender statisticsJobAppender;
+
+    private FollowerColumnSender followerColumnSender;
+
     private HiveTransactionMgr hiveTransactionMgr;
 
     private TopicPublisherThread topicPublisherThread;
@@ -756,6 +762,7 @@ public Env(boolean isCheckpointCatalog) {
         this.analysisManager = new AnalysisManager();
         this.statisticsCleaner = new StatisticsCleaner();
         this.statisticsAutoCollector = new StatisticsAutoCollector();
+        this.statisticsJobAppender = new StatisticsJobAppender();
         this.globalFunctionMgr = new GlobalFunctionMgr();
         this.workloadGroupMgr = new WorkloadGroupMgr();
         this.workloadSchedPolicyMgr = new WorkloadSchedPolicyMgr();
@@ -1058,13 +1065,6 @@ public void initialize(String[] args) throws Exception {
             // If not using bdb, we need to notify the FE type transfer manually.
             notifyNewFETypeTransfer(FrontendNodeType.MASTER);
         }
-        if (statisticsCleaner != null) {
-            statisticsCleaner.start();
-        }
-        if (statisticsAutoCollector != null) {
-            statisticsAutoCollector.start();
-        }
-
         queryCancelWorker.start();
     }
 
@@ -1715,6 +1715,10 @@ protected void startMasterOnlyDaemonThreads() {
         topicPublisherThread.addToTopicPublisherList(wpPublisher);
         topicPublisherThread.start();
 
+        // auto analyze related threads.
+        statisticsCleaner.start();
+        statisticsAutoCollector.start();
+        statisticsJobAppender.start();
     }
 
     // start threads that should run on all FE
@@ -1777,6 +1781,11 @@ private void transferToNonMaster(FrontendNodeType newType) {
         if (analysisManager != null) {
             analysisManager.getStatisticsCache().preHeat();
         }
+
+        if (followerColumnSender == null) {
+            followerColumnSender = new FollowerColumnSender();
+            followerColumnSender.start();
+        }
     }
 
     // Set global variable 'lower_case_table_names' only when the cluster is initialized.
@@ -6113,6 +6122,10 @@ public NereidsSqlCacheManager getSqlCacheManager() {
         return sqlCacheManager;
     }
 
+    public StatisticsJobAppender getStatisticsJobAppender() {
+        return statisticsJobAppender;
+    }
+
     public void alterMTMVRefreshInfo(AlterMTMVRefreshInfo info) {
         AlterMTMV alter = new AlterMTMV(info.getMvName(), info.getRefreshInfo(), MTMVAlterOpType.ALTER_REFRESH_INFO);
         this.alter.processAlterMTMV(alter, false);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
index 3932496f661c5a..a4c79d91890c1b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
@@ -64,7 +64,6 @@
 import org.apache.doris.statistics.BaseAnalysisTask;
 import org.apache.doris.statistics.HistogramTask;
 import org.apache.doris.statistics.OlapAnalysisTask;
-import org.apache.doris.statistics.TableStatsMeta;
 import org.apache.doris.statistics.util.StatisticsUtil;
 import org.apache.doris.system.Backend;
 import org.apache.doris.system.SystemInfoService;
@@ -802,9 +801,20 @@ public List<Column> getSchemaByIndexId(Long indexId, boolean full) {
     }
 
     @Override
-    public List<Column> getSchemaAllIndexes(boolean full) {
+    public Set<Column> getSchemaAllIndexes(boolean full) {
+        Set<Column> columns = Sets.newHashSet();
+        for (Long indexId : indexIdToMeta.keySet()) {
+            columns.addAll(getSchemaByIndexId(indexId, full));
+        }
+        return columns;
+    }
+
+    public List<Column> getMvColumns(boolean full) {
         List<Column> columns = Lists.newArrayList();
         for (Long indexId : indexIdToMeta.keySet()) {
+            if (indexId == baseIndexId) {
+                continue;
+            }
             columns.addAll(getSchemaByIndexId(indexId, full));
         }
         return columns;
@@ -1323,29 +1333,9 @@ public BaseAnalysisTask createAnalysisTask(AnalysisInfo info) {
         }
     }
 
-    public boolean needReAnalyzeTable(TableStatsMeta tblStats) {
-        if (tblStats == null) {
-            return true;
-        }
-        if (!tblStats.analyzeColumns().containsAll(getColumnIndexPairs(getSchemaAllIndexes(false)
-                .stream()
-                .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
-                .map(Column::getName)
-                .collect(Collectors.toSet())))) {
-            return true;
-        }
-        long rowCount = getRowCount();
-        if (rowCount > 0 && tblStats.rowCount == 0) {
-            return true;
-        }
-        long updateRows = tblStats.updatedRows.get();
-        int tblHealth = StatisticsUtil.getTableHealth(rowCount, updateRows);
-        return tblHealth < StatisticsUtil.getTableStatsHealthThreshold();
-    }
-
     @Override
-    public List<Pair<String, String>> getColumnIndexPairs(Set<String> columns) {
-        List<Pair<String, String>> ret = Lists.newArrayList();
+    public Set<Pair<String, String>> getColumnIndexPairs(Set<String> columns) {
+        Set<Pair<String, String>> ret = Sets.newHashSet();
         // Check the schema of all indexes for each given column name,
         // If the column name exists in the index, add the <IndexName, ColumnName> pair to return list.
         for (String column : columns) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java
index 136a0e04f2c253..9de006cebb5c46 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java
@@ -34,13 +34,13 @@
 import org.apache.doris.statistics.AnalysisInfo;
 import org.apache.doris.statistics.BaseAnalysisTask;
 import org.apache.doris.statistics.ColumnStatistic;
-import org.apache.doris.statistics.TableStatsMeta;
 import org.apache.doris.thrift.TTableDescriptor;
 
 import com.google.common.base.Preconditions;
 import com.google.common.base.Strings;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
 import com.google.gson.annotations.SerializedName;
 import org.apache.commons.lang3.NotImplementedException;
 import org.apache.commons.lang3.StringUtils;
@@ -391,11 +391,6 @@ public List<Column> getBaseSchema() {
         return getBaseSchema(Util.showHiddenColumns());
     }
 
-    @Override
-    public List<Column> getSchemaAllIndexes(boolean full) {
-        return getBaseSchema();
-    }
-
     public List<Column> getBaseSchema(boolean full) {
         if (full) {
             return fullSchema;
@@ -623,11 +618,6 @@ public Optional<ColumnStatistic> getColumnStatistic(String colName) {
 
     public void analyze(String dbName) {}
 
-    @Override
-    public boolean needReAnalyzeTable(TableStatsMeta tblStats) {
-        return true;
-    }
-
     @Override
     public List<Long> getChunkSizes() {
         throw new NotImplementedException("getChunkSized not implemented");
@@ -639,8 +629,8 @@ public long fetchRowCount() {
     }
 
     @Override
-    public List<Pair<String, String>> getColumnIndexPairs(Set<String> columns) {
-        return Lists.newArrayList();
+    public Set<Pair<String, String>> getColumnIndexPairs(Set<String> columns) {
+        return Sets.newHashSet();
     }
 
     @Override
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java
index 1ba8ee30766667..c5039660e6eeae 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java
@@ -31,7 +31,6 @@
 import org.apache.doris.statistics.AnalysisInfo;
 import org.apache.doris.statistics.BaseAnalysisTask;
 import org.apache.doris.statistics.ColumnStatistic;
-import org.apache.doris.statistics.TableStatsMeta;
 import org.apache.doris.thrift.TTableDescriptor;
 
 import com.google.common.collect.ImmutableList;
@@ -120,7 +119,11 @@ default boolean tryWriteLockIfExist(long timeout, TimeUnit unit) {
 
     List<Column> getBaseSchema();
 
-    List<Column> getSchemaAllIndexes(boolean full);
+    default Set<Column> getSchemaAllIndexes(boolean full) {
+        Set<Column> ret = Sets.newHashSet();
+        ret.addAll(getBaseSchema());
+        return ret;
+    }
 
     default List<Column> getBaseSchemaOrEmpty() {
         try {
@@ -188,13 +191,11 @@ default long getRowCountForNereids() {
 
     Optional<ColumnStatistic> getColumnStatistic(String colName);
 
-    boolean needReAnalyzeTable(TableStatsMeta tblStats);
-
     /**
      * @param columns Set of column names.
-     * @return List of pairs. Each pair is <IndexName, ColumnName>. For external table, index name is table name.
+     * @return Set of pairs. Each pair is <IndexName, ColumnName>. For external table, index name is table name.
      */
-    List<Pair<String, String>> getColumnIndexPairs(Set<String> columns);
+    Set<Pair<String, String>> getColumnIndexPairs(Set<String> columns);
 
     // Get all the chunk sizes of this table. Now, only HMS external table implemented this interface.
     // For HMS external table, the return result is a list of all the files' size.
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
index 952b5c64cf8fd5..11226fc9d78aa9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
@@ -33,11 +33,10 @@
 import org.apache.doris.statistics.AnalysisInfo;
 import org.apache.doris.statistics.BaseAnalysisTask;
 import org.apache.doris.statistics.ColumnStatistic;
-import org.apache.doris.statistics.TableStatsMeta;
 import org.apache.doris.statistics.util.StatisticsUtil;
 import org.apache.doris.thrift.TTableDescriptor;
 
-import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
 import com.google.gson.annotations.SerializedName;
 import lombok.Getter;
 import org.apache.commons.lang3.NotImplementedException;
@@ -51,7 +50,6 @@
 import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
-import java.util.stream.Collectors;
 
 /**
  * External table represent tables that are not self-managed by Doris.
@@ -151,11 +149,6 @@ public List<Column> getBaseSchema() {
         return getFullSchema();
     }
 
-    @Override
-    public List<Column> getSchemaAllIndexes(boolean full) {
-        return getBaseSchema();
-    }
-
     @Override
     public List<Column> getBaseSchema(boolean full) {
         return getFullSchema();
@@ -331,25 +324,8 @@ public void gsonPostProcess() throws IOException {
     }
 
     @Override
-    public boolean needReAnalyzeTable(TableStatsMeta tblStats) {
-        if (tblStats == null) {
-            return true;
-        }
-        if (!tblStats.analyzeColumns().containsAll(getColumnIndexPairs(
-                getBaseSchema()
-                .stream()
-                .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
-                .map(Column::getName)
-                .collect(Collectors.toSet())))) {
-            return true;
-        }
-        return System.currentTimeMillis()
-            - tblStats.updatedTime > StatisticsUtil.getExternalTableAutoAnalyzeIntervalInMillis();
-    }
-
-    @Override
-    public List<Pair<String, String>> getColumnIndexPairs(Set<String> columns) {
-        List<Pair<String, String>> ret = Lists.newArrayList();
+    public Set<Pair<String, String>> getColumnIndexPairs(Set<String> columns) {
+        Set<Pair<String, String>> ret = Sets.newHashSet();
         for (String column : columns) {
             Column col = getColumn(column);
             if (col == null || StatisticsUtil.isUnsupportedType(col.getType())) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
index dd52fad4f7f68c..7bd0040395a174 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
@@ -3206,7 +3206,6 @@ public void truncateTable(TruncateTableStmt truncateTableStmt) throws DdlExcepti
                     rowsToTruncate += partition.getBaseIndex().getRowCount();
                 }
             } else {
-                rowsToTruncate = olapTable.getRowCount();
                 for (Partition partition : olapTable.getPartitions()) {
                     // If need absolutely correct, should check running txn here.
                     // But if the txn is in prepare state, cann't known which partitions had load data.
@@ -3215,6 +3214,7 @@ public void truncateTable(TruncateTableStmt truncateTableStmt) throws DdlExcepti
                     }
                     origPartitions.put(partition.getName(), partition.getId());
                     partitionsDistributionInfo.put(partition.getId(), partition.getDistributionInfo());
+                    rowsToTruncate += partition.getBaseIndex().getRowCount();
                 }
             }
             // if table currently has no partitions, this sql like empty command and do nothing, should return directly.
@@ -3375,10 +3375,8 @@ public void truncateTable(TruncateTableStmt truncateTableStmt) throws DdlExcepti
         if (truncateEntireTable) {
             // Drop the whole table stats after truncate the entire table
             Env.getCurrentEnv().getAnalysisManager().dropStats(olapTable);
-        } else {
-            // Update the updated rows in table stats after truncate some partitions.
-            Env.getCurrentEnv().getAnalysisManager().updateUpdatedRows(updateRecords);
         }
+        Env.getCurrentEnv().getAnalysisManager().updateUpdatedRows(updateRecords);
         LOG.info("finished to truncate table {}, partitions: {}", tblRef.getName().toSql(), tblRef.getPartitionNames());
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
index c4e67d6bc1260f..51d2f4f44d0d55 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
@@ -32,6 +32,7 @@
 import org.apache.doris.nereids.rules.expression.ExpressionNormalization;
 import org.apache.doris.nereids.rules.expression.ExpressionNormalizationAndOptimization;
 import org.apache.doris.nereids.rules.expression.ExpressionRewrite;
+import org.apache.doris.nereids.rules.expression.QueryColumnCollector;
 import org.apache.doris.nereids.rules.rewrite.AddDefaultLimit;
 import org.apache.doris.nereids.rules.rewrite.AdjustConjunctsReturnType;
 import org.apache.doris.nereids.rules.rewrite.AdjustNullable;
@@ -417,7 +418,8 @@ public class Rewriter extends AbstractBatchJobExecutor {
                             new CollectFilterAboveConsumer(),
                             new CollectProjectAboveConsumer()
                     )
-            )
+            ),
+            topic("Collect used column", custom(RuleType.COLLECT_COLUMNS, QueryColumnCollector::new))
     );
 
     private static final List<RewriteJob> WHOLE_TREE_REWRITE_JOBS
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
index 10004953cdf195..f1a797f4e2bc75 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
@@ -311,6 +311,7 @@ public enum RuleType {
 
     LEADING_JOIN(RuleTypeClass.REWRITE),
     REWRITE_SENTINEL(RuleTypeClass.REWRITE),
+    COLLECT_COLUMNS(RuleTypeClass.REWRITE),
 
     // topn opts
     DEFER_MATERIALIZE_TOP_N_RESULT(RuleTypeClass.REWRITE),
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/QueryColumnCollector.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/QueryColumnCollector.java
new file mode 100644
index 00000000000000..ebf361de1d3a9a
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/QueryColumnCollector.java
@@ -0,0 +1,215 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.expression;
+
+import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.TableIf;
+import org.apache.doris.nereids.jobs.JobContext;
+import org.apache.doris.nereids.rules.expression.QueryColumnCollector.CollectorContext;
+import org.apache.doris.nereids.trees.expressions.NamedExpression;
+import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.trees.expressions.SlotReference;
+import org.apache.doris.nereids.trees.plans.Plan;
+import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
+import org.apache.doris.nereids.trees.plans.logical.LogicalCatalogRelation;
+import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan;
+import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
+import org.apache.doris.nereids.trees.plans.logical.LogicalHaving;
+import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
+import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
+import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
+import org.apache.doris.nereids.trees.plans.logical.LogicalWindow;
+import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter;
+import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter;
+import org.apache.doris.qe.ConnectContext;
+import org.apache.doris.statistics.AnalysisManager;
+import org.apache.doris.statistics.util.StatisticsUtil;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * Used to collect query column.
+ */
+public class QueryColumnCollector extends DefaultPlanRewriter<CollectorContext> implements CustomRewriter {
+
+    @Override
+    public Plan rewriteRoot(Plan plan, JobContext jobContext) {
+        ConnectContext connectContext = ConnectContext.get();
+        if (connectContext != null && connectContext.getSessionVariable().internalSession) {
+            return plan;
+        }
+        CollectorContext context = new CollectorContext();
+        plan.accept(this, context);
+        if (StatisticsUtil.enableAutoAnalyze()) {
+            context.midPriority.removeAll(context.highPriority);
+            AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager();
+            analysisManager.updateHighPriorityColumn(context.highPriority);
+            analysisManager.updateMidPriorityColumn(context.midPriority);
+        }
+        return plan;
+    }
+
+    /**
+     * Context.
+     */
+    public static class CollectorContext {
+        public Map<Slot/*project output column*/, NamedExpression/*Actual project expr*/> projects = new HashMap<>();
+
+        public Set<Slot> highPriority = new HashSet<>();
+
+        public Set<Slot> midPriority = new HashSet<>();
+    }
+
+    @Override
+    public Plan visitLogicalProject(LogicalProject<? extends Plan> project, CollectorContext context) {
+        project.child().accept(this, context);
+        List<NamedExpression> projects = project.getOutputs();
+        List<Slot> slots = project.computeOutput();
+        for (int i = 0; i < slots.size(); i++) {
+            context.projects.put(slots.get(i), projects.get(i));
+        }
+        if (project.child() instanceof LogicalCatalogRelation
+                || project.child() instanceof LogicalFilter
+                && ((LogicalFilter) project.child()).child() instanceof LogicalCatalogRelation) {
+            Set<Slot> allUsed = project.getExpressions()
+                    .stream().flatMap(e -> e.<Set<SlotReference>>collect(n -> n instanceof SlotReference).stream())
+                    .collect(Collectors.toSet());
+            LogicalCatalogRelation scan = project.child() instanceof LogicalCatalogRelation
+                    ? (LogicalCatalogRelation) project.child()
+                    : (LogicalCatalogRelation) project.child().child(0);
+            List<Slot> outputOfScan = scan.getOutput();
+            for (Slot slot : outputOfScan) {
+                if (!allUsed.contains(slot)) {
+                    context.midPriority.remove(slot);
+                }
+            }
+        }
+        return project;
+    }
+
+    @Override
+    public Plan visitLogicalJoin(LogicalJoin<? extends Plan, ? extends Plan> join, CollectorContext context) {
+        join.child(0).accept(this, context);
+        join.child(1).accept(this, context);
+        context.highPriority.addAll(
+                (join.isMarkJoin() ? join.getLeftConditionSlot() : join.getConditionSlot())
+                .stream().flatMap(s -> backtrace(s, context).stream())
+                .collect(Collectors.toSet())
+        );
+        return join;
+    }
+
+    @Override
+    public Plan visitLogicalAggregate(LogicalAggregate<? extends Plan> aggregate, CollectorContext context) {
+        aggregate.child(0).accept(this, context);
+        context.highPriority.addAll(aggregate.getGroupByExpressions()
+                .stream()
+                .flatMap(e -> e.<Set<SlotReference>>collect(n -> n instanceof SlotReference).stream())
+                .flatMap(s -> backtrace(s, context).stream())
+                .collect(Collectors.toSet()));
+        return aggregate;
+    }
+
+    @Override
+    public Plan visitLogicalHaving(LogicalHaving<? extends Plan> having, CollectorContext context) {
+        having.child(0).accept(this, context);
+        context.highPriority.addAll(
+                having.getExpressions().stream()
+                .flatMap(e -> e.<Set<SlotReference>>collect(n -> n instanceof SlotReference).stream())
+                .flatMap(s -> backtrace(s, context).stream())
+                .collect(Collectors.toSet()));
+        return having;
+    }
+
+    @Override
+    public Plan visitLogicalOlapScan(LogicalOlapScan olapScan, CollectorContext context) {
+        List<Slot> slots = olapScan.getOutput();
+        context.midPriority.addAll(slots);
+        return olapScan;
+    }
+
+    @Override
+    public Plan visitLogicalFileScan(LogicalFileScan fileScan, CollectorContext context) {
+        List<Slot> slots = fileScan.getOutput();
+        context.midPriority.addAll(slots);
+        return fileScan;
+    }
+
+    @Override
+    public Plan visitLogicalFilter(LogicalFilter<? extends Plan> filter, CollectorContext context) {
+        filter.child(0).accept(this, context);
+        context.highPriority.addAll(filter
+                .getExpressions()
+                .stream()
+                .flatMap(e -> e.<Set<SlotReference>>collect(n -> n instanceof SlotReference).stream())
+                .flatMap(s -> backtrace(s, context).stream())
+                .collect(Collectors.toSet()));
+        return filter;
+    }
+
+    @Override
+    public Plan visitLogicalWindow(LogicalWindow<? extends Plan> window, CollectorContext context) {
+        window.child(0).accept(this, context);
+        context.highPriority.addAll(window
+                .getWindowExpressions()
+                .stream()
+                .flatMap(e -> e.<Set<SlotReference>>collect(n -> n instanceof SlotReference).stream())
+                .flatMap(s -> backtrace(s, context).stream())
+                .collect(Collectors.toSet()));
+        return window;
+    }
+
+    private Set<Slot> backtrace(Slot slot, CollectorContext context) {
+        return backtrace(slot, new HashSet<>(), context);
+    }
+
+    private Set<Slot> backtrace(Slot slot, Set<Slot> path, CollectorContext context) {
+        if (path.contains(slot)) {
+            return Collections.emptySet();
+        }
+        path.add(slot);
+        if (slot instanceof SlotReference) {
+            SlotReference slotReference = (SlotReference) slot;
+            Optional<Column> col = slotReference.getColumn();
+            Optional<TableIf> table = slotReference.getTable();
+            if (col.isPresent() && table.isPresent()) {
+                return Collections.singleton(slot);
+            }
+        }
+        NamedExpression namedExpression = context.projects.get(slot);
+        if (namedExpression == null) {
+            return Collections.emptySet();
+        }
+        Set<SlotReference> slotReferences
+                = namedExpression.<Set<SlotReference>>collect(n -> n instanceof SlotReference);
+        Set<Slot> refCol = new HashSet<>();
+        for (SlotReference slotReference : slotReferences) {
+            refCol.addAll(backtrace(slotReference, path, context));
+        }
+        return refCol;
+    }
+
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 579ad21c9ab1c5..433997035c4f74 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -480,6 +480,8 @@ public class SessionVariable implements Serializable, Writable {
 
     public static final String FORCE_SAMPLE_ANALYZE = "force_sample_analyze";
 
+    public static final String ENABLE_AUTO_ANALYZE_INTERNAL_CATALOG = "enable_auto_analyze_internal_catalog";
+
     public static final String AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD = "auto_analyze_table_width_threshold";
 
     public static final String FASTER_FLOAT_CONVERT = "faster_float_convert";
@@ -1551,6 +1553,11 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) {
             flag = VariableMgr.GLOBAL)
     public boolean forceSampleAnalyze = Config.force_sample_analyze;
 
+    @VariableMgr.VarAttr(name = ENABLE_AUTO_ANALYZE_INTERNAL_CATALOG,
+            description = {"临时参数，收否自动收集所有内表", "Temp variable， enable to auto collect all OlapTable."},
+            flag = VariableMgr.GLOBAL)
+    public boolean enableAutoAnalyzeInternalCatalog = true;
+
     @VariableMgr.VarAttr(name = AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD,
             description = {"参与自动收集的最大表宽度，列数多于这个参数的表不参与自动收集",
                 "Maximum table width to enable auto analyze, "
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
index f56a9993459428..e6fbef298890b8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
@@ -30,6 +30,7 @@
 import org.apache.doris.analysis.ShowAnalyzeStmt;
 import org.apache.doris.analysis.ShowAnalyzeTaskStatus;
 import org.apache.doris.analysis.ShowAuthorStmt;
+import org.apache.doris.analysis.ShowAutoAnalyzeJobsStmt;
 import org.apache.doris.analysis.ShowBackendsStmt;
 import org.apache.doris.analysis.ShowBackupStmt;
 import org.apache.doris.analysis.ShowBrokerStmt;
@@ -213,6 +214,7 @@
 import org.apache.doris.qe.help.HelpTopic;
 import org.apache.doris.rpc.RpcException;
 import org.apache.doris.statistics.AnalysisInfo;
+import org.apache.doris.statistics.AutoAnalysisPendingJob;
 import org.apache.doris.statistics.ColumnStatistic;
 import org.apache.doris.statistics.Histogram;
 import org.apache.doris.statistics.ResultRow;
@@ -455,6 +457,8 @@ public ShowResultSet execute() throws AnalysisException {
             handleShowCreateCatalog();
         } else if (stmt instanceof ShowAnalyzeStmt) {
             handleShowAnalyze();
+        } else if (stmt instanceof ShowAutoAnalyzeJobsStmt) {
+            handleShowAutoAnalyzePendingJobs();
         } else if (stmt instanceof ShowTabletsBelongStmt) {
             handleShowTabletsBelong();
         } else if (stmt instanceof AdminCopyTabletStmt) {
@@ -2855,6 +2859,7 @@ private void handleShowAnalyze() {
                                 java.time.ZoneId.systemDefault());
                 row.add(startTime.format(formatter));
                 row.add(endTime.format(formatter));
+                row.add(analysisInfo.priority.name());
                 resultRows.add(row);
             } catch (Exception e) {
                 LOG.warn("Failed to get analyze info for table {}.{}.{}, reason: {}",
@@ -2865,6 +2870,35 @@ private void handleShowAnalyze() {
         resultSet = new ShowResultSet(showStmt.getMetaData(), resultRows);
     }
 
+    private void handleShowAutoAnalyzePendingJobs() {
+        ShowAutoAnalyzeJobsStmt showStmt = (ShowAutoAnalyzeJobsStmt) stmt;
+        List<AutoAnalysisPendingJob> jobs = Env.getCurrentEnv().getAnalysisManager().showAutoPendingJobs(showStmt);
+        List<List<String>> resultRows = Lists.newArrayList();
+        for (AutoAnalysisPendingJob job : jobs) {
+            try {
+                List<String> row = new ArrayList<>();
+                CatalogIf<? extends DatabaseIf<? extends TableIf>> c = StatisticsUtil.findCatalog(job.catalogName);
+                row.add(c.getName());
+                Optional<? extends DatabaseIf<? extends TableIf>> databaseIf = c.getDb(job.dbName);
+                row.add(databaseIf.isPresent() ? databaseIf.get().getFullName() : "DB may get deleted");
+                if (databaseIf.isPresent()) {
+                    Optional<? extends TableIf> table = databaseIf.get().getTable(job.tableName);
+                    row.add(table.isPresent() ? table.get().getName() : "Table may get deleted");
+                } else {
+                    row.add("DB may get deleted");
+                }
+                row.add(job.getColumnNames());
+                row.add(String.valueOf(job.priority));
+                resultRows.add(row);
+            } catch (Exception e) {
+                LOG.warn("Failed to get pending jobs for table {}.{}.{}, reason: {}",
+                        job.catalogName, job.dbName, job.tableName, e.getMessage());
+                continue;
+            }
+        }
+        resultSet = new ShowResultSet(showStmt.getMetaData(), resultRows);
+    }
+
     private void handleShowTabletsBelong() {
         ShowTabletsBelongStmt showStmt = (ShowTabletsBelongStmt) stmt;
         List<List<String>> rows = new ArrayList<>();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
index 6b60e01d1c02a5..c26712af0bd345 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
@@ -225,6 +225,7 @@
 import org.apache.doris.thrift.TStreamLoadMultiTablePutResult;
 import org.apache.doris.thrift.TStreamLoadPutRequest;
 import org.apache.doris.thrift.TStreamLoadPutResult;
+import org.apache.doris.thrift.TSyncQueryColumns;
 import org.apache.doris.thrift.TTableIndexQueryStats;
 import org.apache.doris.thrift.TTableMetadataNameIds;
 import org.apache.doris.thrift.TTableQueryStats;
@@ -3775,4 +3776,11 @@ public TShowUserResult showUser(TShowUserRequest request) {
         result.setUserinfoList(userInfo);
         return result;
     }
+
+    public TStatus syncQueryColumns(TSyncQueryColumns request) throws TException {
+        Env.getCurrentEnv().getAnalysisManager().mergeFollowerQueryColumns(request.highPriorityColumns,
+                request.midPriorityColumns);
+        return new TStatus(TStatusCode.OK);
+    }
+
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
index c167db2228d8cc..e0fd91d1100354 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java
@@ -96,7 +96,7 @@ public enum ScheduleType {
     public final long tblId;
 
     // Pair<IndexName, ColumnName>
-    public final List<Pair<String, String>> jobColumns;
+    public final Set<Pair<String, String>> jobColumns;
 
     public final Set<String> partitionNames;
 
@@ -188,8 +188,11 @@ public enum ScheduleType {
     @SerializedName("endTime")
     public long endTime;
 
-    @SerializedName("emptyJob")
-    public final boolean emptyJob;
+    @SerializedName("rowCount")
+    public final long rowCount;
+
+    @SerializedName("updateRows")
+    public final long updateRows;
     /**
      *
      * Used to store the newest partition version of tbl when creating this job.
@@ -197,16 +200,21 @@ public enum ScheduleType {
      */
     public final long tblUpdateTime;
 
+    @SerializedName("userInject")
     public final boolean userInject;
 
+    @SerializedName("priority")
+    public final JobPriority priority;
+
     public AnalysisInfo(long jobId, long taskId, List<Long> taskIds, long catalogId, long dbId, long tblId,
-            List<Pair<String, String>> jobColumns, Set<String> partitionNames, String colName, Long indexId,
+            Set<Pair<String, String>> jobColumns, Set<String> partitionNames, String colName, Long indexId,
             JobType jobType, AnalysisMode analysisMode, AnalysisMethod analysisMethod, AnalysisType analysisType,
             int samplePercent, long sampleRows, int maxBucketNum, long periodTimeInMs, String message,
             long lastExecTimeInMs, long timeCostInMs, AnalysisState state, ScheduleType scheduleType,
             boolean isExternalTableLevelTask, boolean partitionOnly, boolean samplingPartition,
             boolean isAllPartition, long partitionCount, CronExpression cronExpression, boolean forceFull,
-            boolean usingSqlForPartitionColumn, long tblUpdateTime, boolean emptyJob, boolean userInject) {
+            boolean usingSqlForPartitionColumn, long tblUpdateTime, long rowCount, boolean userInject,
+            long updateRows, JobPriority priority) {
         this.jobId = jobId;
         this.taskId = taskId;
         this.taskIds = taskIds;
@@ -242,8 +250,10 @@ public AnalysisInfo(long jobId, long taskId, List<Long> taskIds, long catalogId,
         this.forceFull = forceFull;
         this.usingSqlForPartitionColumn = usingSqlForPartitionColumn;
         this.tblUpdateTime = tblUpdateTime;
-        this.emptyJob = emptyJob;
+        this.rowCount = rowCount;
         this.userInject = userInject;
+        this.updateRows = updateRows;
+        this.priority = priority;
     }
 
     @Override
@@ -285,7 +295,10 @@ public String toString() {
         }
         sj.add("forceFull: " + forceFull);
         sj.add("usingSqlForPartitionColumn: " + usingSqlForPartitionColumn);
-        sj.add("emptyJob: " + emptyJob);
+        sj.add("rowCount: " + rowCount);
+        sj.add("userInject: " + userInject);
+        sj.add("updateRows: " + updateRows);
+        sj.add("priority: " + priority.name());
         return sj.toString();
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
index 00cf9f7b1bc560..83da112d33a366 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java
@@ -36,7 +36,7 @@ public class AnalysisInfoBuilder {
     private long catalogId;
     private long dbId;
     private long tblId;
-    private List<Pair<String, String>> jobColumns;
+    private Set<Pair<String, String>> jobColumns;
     private Set<String> partitionNames;
     private String colName;
     private long indexId = -1L;
@@ -62,8 +62,10 @@ public class AnalysisInfoBuilder {
     private boolean forceFull;
     private boolean usingSqlForPartitionColumn;
     private long tblUpdateTime;
-    private boolean emptyJob;
+    private long rowCount;
     private boolean userInject;
+    private long updateRows;
+    private JobPriority priority;
 
     public AnalysisInfoBuilder() {
     }
@@ -101,8 +103,10 @@ public AnalysisInfoBuilder(AnalysisInfo info) {
         forceFull = info.forceFull;
         usingSqlForPartitionColumn = info.usingSqlForPartitionColumn;
         tblUpdateTime = info.tblUpdateTime;
-        emptyJob = info.emptyJob;
+        rowCount = info.rowCount;
         userInject = info.userInject;
+        updateRows = info.updateRows;
+        priority = info.priority;
     }
 
     public AnalysisInfoBuilder setJobId(long jobId) {
@@ -135,7 +139,7 @@ public AnalysisInfoBuilder setTblId(long tblId) {
         return this;
     }
 
-    public AnalysisInfoBuilder setJobColumns(List<Pair<String, String>> jobColumns) {
+    public AnalysisInfoBuilder setJobColumns(Set<Pair<String, String>> jobColumns) {
         this.jobColumns = jobColumns;
         return this;
     }
@@ -265,8 +269,8 @@ public AnalysisInfoBuilder setTblUpdateTime(long tblUpdateTime) {
         return this;
     }
 
-    public AnalysisInfoBuilder setEmptyJob(boolean emptyJob) {
-        this.emptyJob = emptyJob;
+    public AnalysisInfoBuilder setRowCount(long rowCount) {
+        this.rowCount = rowCount;
         return this;
     }
 
@@ -275,12 +279,23 @@ public AnalysisInfoBuilder setUserInject(boolean userInject) {
         return this;
     }
 
+    public AnalysisInfoBuilder setUpdateRows(long updateRows) {
+        this.updateRows = updateRows;
+        return this;
+    }
+
+    public AnalysisInfoBuilder setPriority(JobPriority priority) {
+        this.priority = priority;
+        return this;
+    }
+
     public AnalysisInfo build() {
         return new AnalysisInfo(jobId, taskId, taskIds, catalogId, dbId, tblId, jobColumns, partitionNames,
                 colName, indexId, jobType, analysisMode, analysisMethod, analysisType, samplePercent,
                 sampleRows, maxBucketNum, periodTimeInMs, message, lastExecTimeInMs, timeCostInMs, state, scheduleType,
                 externalTableLevelTask, partitionOnly, samplingPartition, isAllPartition, partitionCount,
-                cronExpression, forceFull, usingSqlForPartitionColumn, tblUpdateTime, emptyJob, userInject);
+                cronExpression, forceFull, usingSqlForPartitionColumn, tblUpdateTime, rowCount, userInject, updateRows,
+                priority);
     }
 
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisJob.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisJob.java
index 5fd5e43be53f2b..0bc0a437898c71 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisJob.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisJob.java
@@ -84,14 +84,12 @@ public synchronized void rowCountDone(BaseAnalysisTask task) {
     protected void markOneTaskDone() {
         if (queryingTask.isEmpty()) {
             try {
-                writeBuf();
-                updateTaskState(AnalysisState.FINISHED, "Cost time in sec: "
-                        + (System.currentTimeMillis() - start) / 1000);
+                flushBuffer();
             } finally {
                 deregisterJob();
             }
         } else if (buf.size() >= StatisticsUtil.getInsertMergeCount()) {
-            writeBuf();
+            flushBuffer();
         }
     }
 
@@ -115,7 +113,7 @@ public void updateTaskState(AnalysisState state, String msg) {
         }
     }
 
-    protected void writeBuf() {
+    protected void flushBuffer() {
         if (killed) {
             return;
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index a2b30cce2ac5f4..03314fe7748a13 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -25,6 +25,7 @@
 import org.apache.doris.analysis.DropStatsStmt;
 import org.apache.doris.analysis.KillAnalysisJobStmt;
 import org.apache.doris.analysis.ShowAnalyzeStmt;
+import org.apache.doris.analysis.ShowAutoAnalyzeJobsStmt;
 import org.apache.doris.analysis.TableName;
 import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.DatabaseIf;
@@ -48,6 +49,8 @@
 import org.apache.doris.datasource.ExternalTable;
 import org.apache.doris.datasource.hive.HMSExternalTable;
 import org.apache.doris.mysql.privilege.PrivPredicate;
+import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.trees.expressions.SlotReference;
 import org.apache.doris.persist.AnalyzeDeletionLog;
 import org.apache.doris.persist.gson.GsonUtils;
 import org.apache.doris.qe.ConnectContext;
@@ -63,6 +66,7 @@
 import org.apache.doris.system.Frontend;
 import org.apache.doris.system.SystemInfoService;
 import org.apache.doris.thrift.TInvalidateFollowerStatsCacheRequest;
+import org.apache.doris.thrift.TQueryColumn;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.ImmutableList;
@@ -83,15 +87,19 @@
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
+import java.util.LinkedHashMap;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.NavigableMap;
+import java.util.Objects;
 import java.util.Optional;
+import java.util.Queue;
 import java.util.Set;
 import java.util.StringJoiner;
 import java.util.TreeMap;
+import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.CountDownLatch;
@@ -104,6 +112,14 @@ public class AnalysisManager implements Writable {
 
     private static final Logger LOG = LogManager.getLogger(AnalysisManager.class);
 
+    public static final int COLUMN_QUEUE_SIZE = 1000;
+    public final Queue<QueryColumn> highPriorityColumns = new ArrayBlockingQueue<>(COLUMN_QUEUE_SIZE);
+    public final Queue<QueryColumn> midPriorityColumns = new ArrayBlockingQueue<>(COLUMN_QUEUE_SIZE);
+    // Map<TableName, Set<Pair<IndexName, ColumnName>>>
+    public final Map<TableName, Set<Pair<String, String>>> highPriorityJobs = new LinkedHashMap<>();
+    public final Map<TableName, Set<Pair<String, String>>> midPriorityJobs = new LinkedHashMap<>();
+    public final Map<TableName, Set<Pair<String, String>>> lowPriorityJobs = new LinkedHashMap<>();
+
     // Tracking running manually submitted async tasks, keep in mem only
     protected final ConcurrentMap<Long, Map<Long, BaseAnalysisTask>> analysisJobIdToTaskMap = new ConcurrentHashMap<>();
 
@@ -154,13 +170,8 @@ public void createAnalyze(AnalyzeStmt analyzeStmt, boolean proxy) throws DdlExce
         }
     }
 
-    public void createAnalysisJobs(AnalyzeDBStmt analyzeDBStmt, boolean proxy) throws DdlException, AnalysisException {
+    public void createAnalysisJobs(AnalyzeDBStmt analyzeDBStmt, boolean proxy) throws AnalysisException {
         DatabaseIf<TableIf> db = analyzeDBStmt.getDb();
-        // Using auto analyzer if user specifies.
-        if (analyzeDBStmt.getAnalyzeProperties().getProperties().containsKey("use.auto.analyzer")) {
-            Env.getCurrentEnv().getStatisticsAutoCollector().analyzeDb(db);
-            return;
-        }
         List<AnalysisInfo> analysisInfos = buildAnalysisInfosForDB(db, analyzeDBStmt.getAnalyzeProperties());
         if (!analyzeDBStmt.isSync()) {
             sendJobId(analysisInfos, proxy);
@@ -178,9 +189,8 @@ public List<AnalysisInfo> buildAnalysisInfosForDB(DatabaseIf<TableIf> db, Analyz
                 if (table instanceof View) {
                     continue;
                 }
-                TableName tableName = new TableName(db.getCatalog().getName(), db.getFullName(),
-                        table.getName());
-                // columnNames null means to add all visitable columns.
+                TableName tableName = new TableName(db.getCatalog().getName(), db.getFullName(), table.getName());
+                // columnNames null means to add all visible columns.
                 // Will get all the visible columns in analyzeTblStmt.check()
                 AnalyzeTblStmt analyzeTblStmt = new AnalyzeTblStmt(analyzeProperties, tableName,
                         null, db.getId(), table);
@@ -208,6 +218,13 @@ public List<AnalysisInfo> buildAnalysisInfosForDB(DatabaseIf<TableIf> db, Analyz
 
     // Each analyze stmt corresponding to an analysis job.
     public void createAnalysisJob(AnalyzeTblStmt stmt, boolean proxy) throws DdlException {
+        // Using auto analyzer if user specifies.
+        if ("true".equalsIgnoreCase(stmt.getAnalyzeProperties().getProperties().get("use.auto.analyzer"))) {
+            Env.getCurrentEnv().getStatisticsAutoCollector()
+                    .processOneJob(stmt.getTable(),
+                            stmt.getTable().getColumnIndexPairs(stmt.getColumnNames()), JobPriority.HIGH);
+            return;
+        }
         AnalysisInfo jobInfo = buildAndAssignJob(stmt);
         if (jobInfo == null) {
             return;
@@ -219,8 +236,9 @@ public void createAnalysisJob(AnalyzeTblStmt stmt, boolean proxy) throws DdlExce
     @VisibleForTesting
     protected AnalysisInfo buildAndAssignJob(AnalyzeTblStmt stmt) throws DdlException {
         AnalysisInfo jobInfo = buildAnalysisJobInfo(stmt);
-        if (jobInfo.jobColumns.isEmpty()) {
+        if (jobInfo.jobColumns == null || jobInfo.jobColumns.isEmpty()) {
             // No statistics need to be collected or updated
+            LOG.info("Job columns are empty, skip analyze table {}", stmt.getTblName().toString());
             return null;
         }
         // Only OlapTable and Hive HMSExternalTable support sample analyze.
@@ -295,7 +313,7 @@ private void sendJobId(List<AnalysisInfo> analysisInfos, boolean proxy) {
     // Make sure colName of job has all the column as this AnalyzeStmt specified, no matter whether it will be analyzed
     // or not.
     @VisibleForTesting
-    public AnalysisInfo buildAnalysisJobInfo(AnalyzeTblStmt stmt) throws DdlException {
+    public AnalysisInfo buildAnalysisJobInfo(AnalyzeTblStmt stmt) {
         AnalysisInfoBuilder infoBuilder = new AnalysisInfoBuilder();
         long jobId = Env.getCurrentEnv().getNextId();
         TableIf table = stmt.getTable();
@@ -329,7 +347,6 @@ public AnalysisInfo buildAnalysisJobInfo(AnalyzeTblStmt stmt) throws DdlExceptio
         infoBuilder.setAnalysisMode(analysisMode);
         infoBuilder.setAnalysisMethod(analysisMethod);
         infoBuilder.setScheduleType(scheduleType);
-        infoBuilder.setLastExecTimeInMs(0);
         infoBuilder.setCronExpression(cronExpression);
         infoBuilder.setForceFull(stmt.forceFull());
         infoBuilder.setUsingSqlForPartitionColumn(stmt.usingSqlForPartitionColumn());
@@ -346,7 +363,7 @@ public AnalysisInfo buildAnalysisJobInfo(AnalyzeTblStmt stmt) throws DdlExceptio
 
         long periodTimeInMs = stmt.getPeriodTimeInMs();
         infoBuilder.setPeriodTimeInMs(periodTimeInMs);
-        List<Pair<String, String>> jobColumns = table.getColumnIndexPairs(columnNames);
+        Set<Pair<String, String>> jobColumns = table.getColumnIndexPairs(columnNames);
         infoBuilder.setJobColumns(jobColumns);
         StringJoiner stringJoiner = new StringJoiner(",", "[", "]");
         for (Pair<String, String> pair : jobColumns) {
@@ -355,8 +372,10 @@ public AnalysisInfo buildAnalysisJobInfo(AnalyzeTblStmt stmt) throws DdlExceptio
         infoBuilder.setColName(stringJoiner.toString());
         infoBuilder.setTaskIds(Lists.newArrayList());
         infoBuilder.setTblUpdateTime(table.getUpdateTime());
-        infoBuilder.setEmptyJob(table instanceof OlapTable && table.getRowCount() == 0
-                && analysisMethod.equals(AnalysisMethod.SAMPLE));
+        infoBuilder.setRowCount(StatisticsUtil.isEmptyTable(table, analysisMethod) ? 0 : table.getRowCount());
+        TableStatsMeta tableStatsStatus = findTableStatsStatus(table.getId());
+        infoBuilder.setUpdateRows(tableStatsStatus == null ? 0 : tableStatsStatus.updatedRows.get());
+        infoBuilder.setPriority(JobPriority.MANUAL);
         return infoBuilder.build();
     }
 
@@ -372,7 +391,7 @@ public void recordAnalysisJob(AnalysisInfo jobInfo) {
 
     public void createTaskForEachColumns(AnalysisInfo jobInfo, Map<Long, BaseAnalysisTask> analysisTasks,
             boolean isSync) throws DdlException {
-        List<Pair<String, String>> jobColumns = jobInfo.jobColumns;
+        Set<Pair<String, String>> jobColumns = jobInfo.jobColumns;
         TableIf table = jobInfo.getTable();
         for (Pair<String, String> pair : jobColumns) {
             AnalysisInfoBuilder colTaskInfoBuilder = new AnalysisInfoBuilder(jobInfo);
@@ -505,7 +524,7 @@ public void updateTableStats(AnalysisInfo jobInfo) {
         }
         TableStatsMeta tableStats = findTableStatsStatus(tbl.getId());
         if (tableStats == null) {
-            updateTableStatsStatus(new TableStatsMeta(jobInfo.emptyJob ? 0 : tbl.getRowCount(), jobInfo, tbl));
+            updateTableStatsStatus(new TableStatsMeta(jobInfo.rowCount, jobInfo, tbl));
         } else {
             tableStats.update(jobInfo, tbl);
             logCreateTableStats(tableStats);
@@ -529,6 +548,39 @@ public void updateTableStatsForAlterStats(AnalysisInfo jobInfo, TableIf tbl) {
         }
     }
 
+    public List<AutoAnalysisPendingJob> showAutoPendingJobs(ShowAutoAnalyzeJobsStmt stmt) {
+        TableName tblName = stmt.getTableName();
+        String priority = stmt.getPriority();
+        List<AutoAnalysisPendingJob> result = Lists.newArrayList();
+        if (priority == null || priority.isEmpty()) {
+            result.addAll(getPendingJobs(highPriorityJobs, JobPriority.HIGH, tblName));
+            result.addAll(getPendingJobs(midPriorityJobs, JobPriority.MID, tblName));
+            result.addAll(getPendingJobs(lowPriorityJobs, JobPriority.LOW, tblName));
+        } else if (priority.equals(JobPriority.HIGH.name())) {
+            result.addAll(getPendingJobs(highPriorityJobs, JobPriority.HIGH, tblName));
+        } else if (priority.equals(JobPriority.MID.name())) {
+            result.addAll(getPendingJobs(midPriorityJobs, JobPriority.MID, tblName));
+        } else if (priority.equals(JobPriority.LOW.name())) {
+            result.addAll(getPendingJobs(lowPriorityJobs, JobPriority.LOW, tblName));
+        }
+        return result;
+    }
+
+    protected List<AutoAnalysisPendingJob> getPendingJobs(Map<TableName, Set<Pair<String, String>>> jobMap,
+            JobPriority priority, TableName tblName) {
+        List<AutoAnalysisPendingJob> result = Lists.newArrayList();
+        synchronized (jobMap) {
+            for (Entry<TableName, Set<Pair<String, String>>> entry : jobMap.entrySet()) {
+                TableName table = entry.getKey();
+                if (tblName == null || tblName.equals(table)) {
+                    result.add(new AutoAnalysisPendingJob(table.getCtl(),
+                            table.getDb(), table.getTbl(), entry.getValue(), priority));
+                }
+            }
+        }
+        return result;
+    }
+
     public List<AnalysisInfo> showAnalysisJob(ShowAnalyzeStmt stmt) {
         return findShowAnalyzeResult(stmt);
     }
@@ -555,7 +607,7 @@ private List<AnalysisInfo> findShowAnalyzeResult(ShowAnalyzeStmt stmt) {
 
     public String getJobProgress(long jobId) {
         List<AnalysisInfo> tasks = findTasksByTaskIds(jobId);
-        if (tasks == null) {
+        if (tasks == null || tasks.isEmpty()) {
             return "N/A";
         }
         int finished = 0;
@@ -674,6 +726,7 @@ public void invalidateLocalStats(long catalogId, long dbId, long tableId,
         }
         tableStats.updatedTime = 0;
         tableStats.userInjected = false;
+        tableStats.rowCount = table.getRowCount();
     }
 
     public void invalidateRemoteStats(long catalogId, long dbId, long tableId,
@@ -751,7 +804,7 @@ private BaseAnalysisTask createTask(AnalysisInfo analysisInfo) throws DdlExcepti
                     analysisInfo.dbId, analysisInfo.tblId);
             return table.createAnalysisTask(analysisInfo);
         } catch (Throwable t) {
-            LOG.warn("Failed to find table", t);
+            LOG.warn("Failed to create task.", t);
             throw new DdlException("Failed to create task", t);
         }
     }
@@ -854,7 +907,7 @@ public List<AnalysisInfo> findTasks(long jobId) {
     public List<AnalysisInfo> findTasksByTaskIds(long jobId) {
         AnalysisInfo jobInfo = analysisJobInfoMap.get(jobId);
         if (jobInfo != null && jobInfo.taskIds != null) {
-            return jobInfo.taskIds.stream().map(analysisTaskInfoMap::get).filter(i -> i != null)
+            return jobInfo.taskIds.stream().map(analysisTaskInfoMap::get).filter(Objects::nonNull)
                     .collect(Collectors.toList());
         }
         return null;
@@ -871,7 +924,7 @@ public void removeAll(List<AnalysisInfo> analysisInfos) {
     public void dropAnalyzeJob(DropAnalyzeJobStmt analyzeJobStmt) throws DdlException {
         AnalysisInfo jobInfo = analysisJobInfoMap.get(analyzeJobStmt.getJobId());
         if (jobInfo == null) {
-            throw new DdlException(String.format("Analyze job [%d] not exists", jobInfo.jobId));
+            throw new DdlException(String.format("Analyze job [%d] not exists", analyzeJobStmt.getJobId()));
         }
         checkPriv(jobInfo);
         long jobId = analyzeJobStmt.getJobId();
@@ -911,15 +964,12 @@ public static boolean needAbandon(AnalysisInfo analysisInfo) {
         if (analysisInfo == null) {
             return true;
         }
-        if (analysisInfo.scheduleType == null || analysisInfo.scheduleType == null || analysisInfo.jobType == null) {
-            return true;
-        }
-        if ((AnalysisState.PENDING.equals(analysisInfo.state) || AnalysisState.RUNNING.equals(analysisInfo.state))
-                && ScheduleType.ONCE.equals(analysisInfo.scheduleType)
-                && JobType.MANUAL.equals(analysisInfo.jobType)) {
+        if (analysisInfo.scheduleType == null || analysisInfo.jobType == null) {
             return true;
         }
-        return false;
+        return (AnalysisState.PENDING.equals(analysisInfo.state) || AnalysisState.RUNNING.equals(analysisInfo.state))
+            && ScheduleType.ONCE.equals(analysisInfo.scheduleType)
+            && JobType.MANUAL.equals(analysisInfo.jobType);
     }
 
     private static void readIdToTblStats(DataInput in, Map<Long, TableStatsMeta> map) throws IOException {
@@ -1075,17 +1125,66 @@ public void removeJob(long id) {
 
     /**
      * Only OlapTable and Hive HMSExternalTable can sample for now.
-     * @param table
+     * @param table Table to check
      * @return Return true if the given table can do sample analyze. False otherwise.
      */
     public boolean canSample(TableIf table) {
         if (table instanceof OlapTable) {
             return true;
         }
-        if (table instanceof HMSExternalTable
-                && ((HMSExternalTable) table).getDlaType().equals(HMSExternalTable.DLAType.HIVE)) {
-            return true;
+        return table instanceof HMSExternalTable
+            && ((HMSExternalTable) table).getDlaType().equals(HMSExternalTable.DLAType.HIVE);
+    }
+
+
+    public void updateHighPriorityColumn(Set<Slot> slotReferences) {
+        updateColumn(slotReferences, highPriorityColumns);
+    }
+
+    public void updateMidPriorityColumn(Collection<Slot> slotReferences) {
+        updateColumn(slotReferences, midPriorityColumns);
+    }
+
+    protected void updateColumn(Collection<Slot> slotReferences, Queue<QueryColumn> queue) {
+        for (Slot s : slotReferences) {
+            if (!(s instanceof SlotReference)) {
+                return;
+            }
+            Optional<Column> optionalColumn = ((SlotReference) s).getColumn();
+            Optional<TableIf> optionalTable = ((SlotReference) s).getTable();
+            if (optionalColumn.isPresent() && optionalTable.isPresent()
+                    && !StatisticsUtil.isUnsupportedType(optionalColumn.get().getType())) {
+                TableIf table = optionalTable.get();
+                DatabaseIf database = table.getDatabase();
+                if (database != null) {
+                    CatalogIf catalog = database.getCatalog();
+                    if (catalog != null) {
+                        queue.offer(new QueryColumn(catalog.getId(), database.getId(),
+                                table.getId(), optionalColumn.get().getName()));
+                        if (LOG.isDebugEnabled()) {
+                            LOG.debug("Offer column " + table.getName() + "(" + table.getId() + ")."
+                                    + optionalColumn.get().getName());
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    public void mergeFollowerQueryColumns(Collection<TQueryColumn> highColumns,
+            Collection<TQueryColumn> midColumns) {
+        LOG.info("Received {} high columns and {} mid columns", highColumns.size(), midColumns.size());
+        for (TQueryColumn c : highColumns) {
+            if (!highPriorityColumns.offer(new QueryColumn(Long.parseLong(c.catalogId), Long.parseLong(c.dbId),
+                    Long.parseLong(c.tblId), c.colName))) {
+                break;
+            }
+        }
+        for (TQueryColumn c : midColumns) {
+            if (!midPriorityColumns.offer(new QueryColumn(Long.parseLong(c.catalogId), Long.parseLong(c.dbId),
+                    Long.parseLong(c.tblId), c.colName))) {
+                break;
+            }
         }
-        return false;
     }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskExecutor.java
index 3bdccaca047954..d787794534a7c4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskExecutor.java
@@ -27,6 +27,7 @@
 
 import java.util.Comparator;
 import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.Future;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.PriorityBlockingQueue;
 import java.util.concurrent.ThreadPoolExecutor;
@@ -52,7 +53,7 @@ public AnalysisTaskExecutor(int simultaneouslyRunningTaskNum, int taskQueueSize)
                     simultaneouslyRunningTaskNum,
                     simultaneouslyRunningTaskNum, 0,
                     TimeUnit.DAYS, new LinkedBlockingQueue<>(taskQueueSize),
-                    new BlockedPolicy("Analysis Job Executor", Integer.MAX_VALUE),
+                    new BlockedPolicy("Analysis Job Executor Block Policy", Integer.MAX_VALUE),
                     "Analysis Job Executor", true);
             cancelExpiredTask();
         } else {
@@ -88,9 +89,9 @@ protected void tryToCancel() {
         }
     }
 
-    public void submitTask(BaseAnalysisTask task) {
+    public Future<?> submitTask(BaseAnalysisTask task) {
         AnalysisTaskWrapper taskWrapper = new AnalysisTaskWrapper(this, task);
-        executors.submit(taskWrapper);
+        return executors.submit(taskWrapper);
     }
 
     public void putJob(AnalysisTaskWrapper wrapper) throws Exception {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AutoAnalysisPendingJob.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AutoAnalysisPendingJob.java
new file mode 100644
index 00000000000000..e349e4fcb3f2e8
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AutoAnalysisPendingJob.java
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import org.apache.doris.common.Pair;
+
+import java.util.Set;
+import java.util.StringJoiner;
+
+public class AutoAnalysisPendingJob {
+
+    public final String catalogName;
+    public final String dbName;
+    public final String tableName;
+    public final Set<Pair<String, String>> columns;
+    public final JobPriority priority;
+
+    public AutoAnalysisPendingJob(String catalogName, String dbName, String tableName,
+            Set<Pair<String, String>> columns, JobPriority priority) {
+        this.catalogName = catalogName;
+        this.dbName = dbName;
+        this.tableName = tableName;
+        this.columns = columns;
+        this.priority = priority;
+    }
+
+    public String getColumnNames() {
+        if (columns == null) {
+            return "";
+        }
+        StringJoiner stringJoiner = new StringJoiner(",");
+        for (Pair<String, String> col : columns) {
+            stringJoiner.add(col.toString());
+        }
+        return stringJoiner.toString();
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
index f871e8761a5e55..d35e45987b8501 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java
@@ -39,7 +39,6 @@
 
 import java.text.MessageFormat;
 import java.util.Collections;
-import java.util.concurrent.TimeUnit;
 
 public abstract class BaseAnalysisTask {
 
@@ -48,7 +47,7 @@ public abstract class BaseAnalysisTask {
     public static final long LIMIT_SIZE = 1024 * 1024 * 1024; // 1GB
     public static final double LIMIT_FACTOR = 1.2;
 
-    protected static final String COLLECT_COL_STATISTICS =
+    protected static final String FULL_ANALYZE_TEMPLATE =
             "SELECT CONCAT(${tblId}, '-', ${idxId}, '-', '${colId}') AS `id`, "
             + "         ${catalogId} AS `catalog_id`, "
             + "         ${dbId} AS `db_id`, "
@@ -194,9 +193,9 @@ protected void init(AnalysisInfo info) {
         }
     }
 
-    public void execute() {
+    public void execute() throws Exception {
         prepareExecution();
-        executeWithRetry();
+        doExecute();
         afterExecution();
     }
 
@@ -204,29 +203,6 @@ protected void prepareExecution() {
         setTaskStateToRunning();
     }
 
-    protected void executeWithRetry() {
-        int retriedTimes = 0;
-        while (retriedTimes < StatisticConstants.ANALYZE_TASK_RETRY_TIMES) {
-            if (killed) {
-                break;
-            }
-            try {
-                doExecute();
-                break;
-            } catch (Throwable t) {
-                if (killed) {
-                    throw new RuntimeException(t);
-                }
-                LOG.warn("Failed to execute analysis task, retried times: {}", retriedTimes++, t);
-                if (retriedTimes >= StatisticConstants.ANALYZE_TASK_RETRY_TIMES) {
-                    job.taskFailed(this, t.getMessage());
-                    throw new RuntimeException(t);
-                }
-                StatisticsUtil.sleep(TimeUnit.SECONDS.toMillis(2 ^ retriedTimes) * 10);
-            }
-        }
-    }
-
     public abstract void doExecute() throws Exception;
 
     protected void afterExecution() {}
@@ -284,9 +260,8 @@ protected String getNdvFunction(String totalRows) {
         // (https://github.com/postgres/postgres/blob/master/src/backend/commands/analyze.c)
         // (http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.93.8637&rep=rep1&type=pdf)
         // sample_row * count_distinct / ( sample_row - once_count + once_count * sample_row / total_row)
-        String fn = MessageFormat.format("{0} * {1} / ({0} - {2} + {2} * {0} / {3})", sampleRows,
+        return MessageFormat.format("{0} * {1} / ({0} - {2} + {2} * {0} / {3})", sampleRows,
                 countDistinct, onceCount, totalRows);
-        return fn;
     }
 
     // Max value is not accurate while sample, so set it to NULL to avoid optimizer generate bad plan.
@@ -336,6 +311,9 @@ protected void runQuery(String sql) {
             Env.getCurrentEnv().getStatisticsCache().syncColStats(colStatsData);
             queryId = DebugUtil.printId(stmtExecutor.getContext().queryId());
             job.appendBuf(this, Collections.singletonList(colStatsData));
+        } catch (Exception e) {
+            LOG.warn("Failed to execute sql {}", sql);
+            throw e;
         } finally {
             if (LOG.isDebugEnabled()) {
                 LOG.debug("End cost time in millisec: " + (System.currentTimeMillis() - startTime)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsMeta.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsMeta.java
index 445641b2505610..7e317d67bd740f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsMeta.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsMeta.java
@@ -43,16 +43,20 @@ public class ColStatsMeta {
     @SerializedName("trigger")
     public JobType jobType;
 
-    public ColStatsMeta(long updatedTime, AnalysisMethod analysisMethod,
-            AnalysisType analysisType, JobType jobType, long queriedTimes) {
+    @SerializedName("updatedRows")
+    public long updatedRows;
+
+    @SerializedName("rowCount")
+    public long rowCount;
+
+    public ColStatsMeta(long updatedTime, AnalysisMethod analysisMethod, AnalysisType analysisType, JobType jobType,
+            long queriedTimes, long rowCount, long updatedRows) {
         this.updatedTime = updatedTime;
         this.analysisMethod = analysisMethod;
         this.analysisType = analysisType;
         this.jobType = jobType;
         this.queriedTimes.addAndGet(queriedTimes);
-    }
-
-    public void clear() {
-        updatedTime = 0;
+        this.updatedRows = updatedRows;
+        this.rowCount = rowCount;
     }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java
index 287941be526635..7d3c9af254800d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java
@@ -73,6 +73,8 @@ protected void setTable(ExternalTable table) {
      */
     private void getTableStats() {
         Map<String, String> params = buildStatsParams(null);
+        Pair<Double, Long> sampleInfo = getSampleInfo();
+        params.put("scaleFactor", String.valueOf(sampleInfo.first));
         List<ResultRow> columnResult =
                 StatisticsUtil.execStatisticQuery(new StringSubstitutor(params)
                         .replace(ANALYZE_TABLE_COUNT_TEMPLATE));
@@ -98,7 +100,7 @@ protected void getColumnStats() throws Exception {
             if (LOG.isDebugEnabled()) {
                 LOG.debug("Will do full collection for column {}", col.getName());
             }
-            sb.append(COLLECT_COL_STATISTICS);
+            sb.append(FULL_ANALYZE_TEMPLATE);
         } else {
             // Do sample analyze
             if (LOG.isDebugEnabled()) {
@@ -254,9 +256,6 @@ protected boolean needLimit(long sizeToRead, double factor) {
             }
             target = columnSize * tableSample.getSampleValue();
         }
-        if (sizeToRead > LIMIT_SIZE && sizeToRead > target * LIMIT_FACTOR) {
-            return true;
-        }
-        return false;
+        return sizeToRead > LIMIT_SIZE && sizeToRead > target * LIMIT_FACTOR;
     }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/FollowerColumnSender.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/FollowerColumnSender.java
new file mode 100644
index 00000000000000..0e66c7f8a75be7
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/FollowerColumnSender.java
@@ -0,0 +1,151 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.TableIf;
+import org.apache.doris.common.ClientPool;
+import org.apache.doris.common.Pair;
+import org.apache.doris.common.util.MasterDaemon;
+import org.apache.doris.ha.FrontendNodeType;
+import org.apache.doris.statistics.util.StatisticsUtil;
+import org.apache.doris.system.Frontend;
+import org.apache.doris.thrift.FrontendService;
+import org.apache.doris.thrift.TNetworkAddress;
+import org.apache.doris.thrift.TQueryColumn;
+import org.apache.doris.thrift.TSyncQueryColumns;
+
+import com.google.common.collect.Sets;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Queue;
+import java.util.Set;
+
+public class FollowerColumnSender extends MasterDaemon {
+
+    private static final Logger LOG = LogManager.getLogger(FollowerColumnSender.class);
+
+    public static final long INTERVAL = 60000;
+
+    public FollowerColumnSender() {
+        super("Follower Column Sender", INTERVAL);
+    }
+
+    @Override
+    protected void runAfterCatalogReady() {
+        if (!StatisticsUtil.enableAutoAnalyze()) {
+            return;
+        }
+        if (Env.getCurrentEnv().isMaster()) {
+            return;
+        }
+        if (Env.isCheckpointThread()) {
+            return;
+        }
+        send();
+    }
+
+    protected void send() {
+        if (Env.getCurrentEnv().isMaster()) {
+            return;
+        }
+        Env currentEnv = Env.getCurrentEnv();
+        AnalysisManager analysisManager = currentEnv.getAnalysisManager();
+        if (analysisManager.highPriorityColumns.isEmpty() && analysisManager.midPriorityColumns.isEmpty()) {
+            return;
+        }
+        Set<TQueryColumn> highs = getNeedAnalyzeColumns(analysisManager.highPriorityColumns);
+        Set<TQueryColumn> mids = getNeedAnalyzeColumns(analysisManager.midPriorityColumns);
+        mids.removeAll(highs);
+        TSyncQueryColumns queryColumns = new TSyncQueryColumns();
+        queryColumns.highPriorityColumns = new ArrayList<>(highs);
+        queryColumns.midPriorityColumns = new ArrayList<>(mids);
+        Frontend master = null;
+        try {
+            InetSocketAddress masterAddress = currentEnv.getHaProtocol().getLeader();
+            for (Frontend fe : currentEnv.getFrontends(FrontendNodeType.FOLLOWER)) {
+                InetSocketAddress socketAddress = new InetSocketAddress(fe.getHost(), fe.getEditLogPort());
+                if (socketAddress.equals(masterAddress)) {
+                    master = fe;
+                    break;
+                }
+            }
+        } catch (Exception e) {
+            LOG.warn("Failed to find master FE.", e);
+            return;
+        }
+
+        if (master == null) {
+            LOG.warn("No master found in cluster.");
+            return;
+        }
+        TNetworkAddress address = new TNetworkAddress(master.getHost(), master.getRpcPort());
+        FrontendService.Client client = null;
+        try {
+            client = ClientPool.frontendPool.borrowObject(address);
+            client.syncQueryColumns(queryColumns);
+            LOG.info("Send {} high priority columns and {} mid priority columns to master.",
+                    highs.size(), mids.size());
+        } catch (Throwable t) {
+            LOG.warn("Failed to sync stats to master: {}", address, t);
+        } finally {
+            if (client != null) {
+                ClientPool.frontendPool.returnObject(address, client);
+            }
+        }
+    }
+
+    protected Set<TQueryColumn> getNeedAnalyzeColumns(Queue<QueryColumn> columnQueue) {
+        Set<TQueryColumn> ret = Sets.newHashSet();
+        TableIf table;
+        int size = columnQueue.size();
+        for (int i = 0; i < size; i++) {
+            QueryColumn column = columnQueue.poll();
+            if (column == null) {
+                continue;
+            }
+            try {
+                table = StatisticsUtil.findTable(column.catalogId, column.dbId, column.tblId);
+            } catch (Exception e) {
+                LOG.warn("Failed to find table for column {}", column.colName, e);
+                continue;
+            }
+            if (StatisticsUtil.isUnsupportedType(table.getColumn(column.colName).getType())) {
+                continue;
+            }
+            Set<Pair<String, String>> columnIndexPairs = table.getColumnIndexPairs(
+                    Collections.singleton(column.colName));
+            for (Pair<String, String> pair : columnIndexPairs) {
+                if (StatisticsUtil.needAnalyzeColumn(table, pair)) {
+                    ret.add(column.toThrift());
+                    break;
+                }
+            }
+        }
+        return ret;
+    }
+
+    protected List<TQueryColumn> convertSetToList(Set<TQueryColumn> set) {
+        return new ArrayList<>(set);
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java
index 60da8f4d2a0803..26ef561ddf6a27 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java
@@ -79,11 +79,6 @@ public void doExecute() throws Exception {
                 tbl.getDatabase().getCatalog().getId(), tbl.getDatabase().getId(), tbl.getId(), -1, col.getName());
     }
 
-    @Override
-    protected void afterExecution() {
-        // DO NOTHING
-    }
-
     private String getSampleRateFunction() {
         if (info.analysisMethod == AnalysisMethod.FULL) {
             return "0";
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/JobPriority.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/JobPriority.java
new file mode 100644
index 00000000000000..c3656b929279e6
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/JobPriority.java
@@ -0,0 +1,25 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+public enum JobPriority {
+    HIGH,
+    MID,
+    LOW,
+    MANUAL;
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index 60bfcab6157377..ce7982d4f1ad58 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -37,7 +37,6 @@
 
 import java.security.SecureRandom;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
@@ -65,11 +64,11 @@ public OlapAnalysisTask(AnalysisInfo info) {
     }
 
     public void doExecute() throws Exception {
-        List<Pair<String, String>> columnList = info.jobColumns;
-        if (StatisticsUtil.isEmptyTable(tbl, info.analysisMethod) || columnList == null || columnList.isEmpty()) {
+        // For empty table, write empty result directly, no need to run SQL to collect stats.
+        if (info.rowCount == 0 && tableSample != null) {
             StatsId statsId = new StatsId(concatColumnStatsId(), info.catalogId, info.dbId,
                     info.tblId, info.indexId, info.colName, null);
-            job.appendBuf(this, Arrays.asList(new ColStatsData(statsId)));
+            job.appendBuf(this, Collections.singletonList(new ColStatsData(statsId)));
             return;
         }
         if (tableSample != null) {
@@ -84,7 +83,7 @@ public void doExecute() throws Exception {
      * 2. estimate partition stats
      * 3. insert col stats and partition stats
      */
-    protected void doSample() throws Exception {
+    protected void doSample() {
         if (LOG.isDebugEnabled()) {
             LOG.debug("Will do sample collection for column {}", col.getName());
         }
@@ -209,7 +208,7 @@ protected ResultRow collectBasicStat(AutoCloseConnectContext context) {
      * 2. insert partition in batch
      * 3. calculate column stats based on partition stats
      */
-    protected void doFull() throws Exception {
+    protected void doFull() {
         if (LOG.isDebugEnabled()) {
             LOG.debug("Will do full collection for column {}", col.getName());
         }
@@ -228,8 +227,7 @@ protected void doFull() throws Exception {
         params.put("tblName", String.valueOf(tbl.getName()));
         params.put("index", getIndex());
         StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
-        String collectColStats = stringSubstitutor.replace(COLLECT_COL_STATISTICS);
-        runQuery(collectColStats);
+        runQuery(stringSubstitutor.replace(FULL_ANALYZE_TEMPLATE));
     }
 
     protected String getIndex() {
@@ -316,10 +314,7 @@ protected boolean needLimit() {
             return false;
         }
         // Partition column need to scan tablets from all partitions.
-        if (tbl.isPartitionColumn(col.getName())) {
-            return false;
-        }
-        return true;
+        return !tbl.isPartitionColumn(col.getName());
     }
 
     /**
@@ -382,12 +377,6 @@ protected boolean isSingleUniqueKey() {
     }
 
     protected String concatColumnStatsId() {
-        StringBuilder stringBuilder = new StringBuilder();
-        stringBuilder.append(info.tblId);
-        stringBuilder.append("-");
-        stringBuilder.append(info.indexId);
-        stringBuilder.append("-");
-        stringBuilder.append(info.colName);
-        return stringBuilder.toString();
+        return info.tblId + "-" + info.indexId + "-" + info.colName;
     }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/QueryColumn.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/QueryColumn.java
new file mode 100644
index 00000000000000..df91ea7f4c0582
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/QueryColumn.java
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import org.apache.doris.thrift.TQueryColumn;
+
+import java.util.Objects;
+
+public class QueryColumn {
+
+    public final long catalogId;
+    public final long dbId;
+    public final long tblId;
+    public final String colName;
+
+    public QueryColumn(long catalogId, long dbId, long tblId, String colName) {
+        this.catalogId = catalogId;
+        this.dbId = dbId;
+        this.tblId = tblId;
+        this.colName = colName;
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(catalogId, dbId, tblId, colName);
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        if (this == other) {
+            return true;
+        }
+        if (!(other instanceof QueryColumn)) {
+            return false;
+        }
+        QueryColumn otherCriticalColumn = (QueryColumn) other;
+        return this.catalogId == otherCriticalColumn.catalogId
+            && this.dbId == otherCriticalColumn.dbId
+            && this.tblId == otherCriticalColumn.tblId
+            && this.colName.equals(otherCriticalColumn.colName);
+    }
+
+    public TQueryColumn toThrift() {
+        TQueryColumn tQueryColumn = new TQueryColumn();
+        tQueryColumn.catalogId = String.valueOf(catalogId);
+        tQueryColumn.dbId = String.valueOf(dbId);
+        tQueryColumn.tblId = String.valueOf(tblId);
+        tQueryColumn.colName = colName;
+        return tQueryColumn;
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
index 74c7bd7c9db127..a5bd18946e8376 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java
@@ -64,8 +64,6 @@ public class StatisticConstants {
 
     public static List<String> SYSTEM_DBS = new ArrayList<>();
 
-    public static int ANALYZE_TASK_RETRY_TIMES = 5;
-
     public static final String DB_NAME = FeConstants.INTERNAL_DB_NAME;
 
     public static final String FULL_QUALIFIED_STATS_TBL_NAME = InternalCatalog.INTERNAL_CATALOG_NAME
@@ -95,7 +93,7 @@ public class StatisticConstants {
 
     public static final int ANALYZE_TIMEOUT_IN_SEC = 43200;
 
-    public static final int TASK_QUEUE_CAP = 10;
+    public static final int TASK_QUEUE_CAP = 1;
 
     public static final int AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD = 100;
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
index 9ca971845b7e64..479610ccea25ba 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java
@@ -17,17 +17,16 @@
 
 package org.apache.doris.statistics;
 
+import org.apache.doris.analysis.TableName;
 import org.apache.doris.catalog.Column;
-import org.apache.doris.catalog.DatabaseIf;
 import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.OlapTable;
-import org.apache.doris.catalog.Partition;
 import org.apache.doris.catalog.TableIf;
 import org.apache.doris.common.Config;
 import org.apache.doris.common.DdlException;
 import org.apache.doris.common.Pair;
+import org.apache.doris.common.util.MasterDaemon;
 import org.apache.doris.common.util.TimeUtils;
-import org.apache.doris.datasource.CatalogIf;
 import org.apache.doris.datasource.hive.HMSExternalTable;
 import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod;
 import org.apache.doris.statistics.AnalysisInfo.JobType;
@@ -40,200 +39,199 @@
 
 import java.time.LocalTime;
 import java.util.ArrayList;
-import java.util.List;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Optional;
 import java.util.Set;
 import java.util.StringJoiner;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
 
-public class StatisticsAutoCollector extends StatisticsCollector {
+public class StatisticsAutoCollector extends MasterDaemon {
 
     private static final Logger LOG = LogManager.getLogger(StatisticsAutoCollector.class);
 
+    protected final AnalysisTaskExecutor analysisTaskExecutor;
+
     public StatisticsAutoCollector() {
-        super("Automatic Analyzer",
-                TimeUnit.MINUTES.toMillis(Config.auto_check_statistics_in_minutes),
-                new AnalysisTaskExecutor(Config.auto_analyze_simultaneously_running_task_num,
-                        StatisticConstants.TASK_QUEUE_CAP));
+        super("Automatic Analyzer", TimeUnit.MINUTES.toMillis(Config.auto_check_statistics_in_minutes));
+        this.analysisTaskExecutor = new AnalysisTaskExecutor(Config.auto_analyze_simultaneously_running_task_num,
+                StatisticConstants.TASK_QUEUE_CAP);
     }
 
     @Override
-    protected void collect() {
-        if (canCollect()) {
-            analyzeAll();
+    protected void runAfterCatalogReady() {
+        if (!Env.getCurrentEnv().isMaster()) {
+            return;
         }
+        if (!StatisticsUtil.statsTblAvailable()) {
+            LOG.info("Stats table not available, skip");
+            return;
+        }
+        if (Env.isCheckpointThread()) {
+            return;
+        }
+        collect();
     }
 
-    protected boolean canCollect() {
-        return StatisticsUtil.enableAutoAnalyze()
-            && StatisticsUtil.inAnalyzeTime(LocalTime.now(TimeUtils.getTimeZone().toZoneId()));
-    }
-
-    protected void analyzeAll() {
-        List<CatalogIf> catalogs = getCatalogsInOrder();
-        for (CatalogIf ctl : catalogs) {
-            if (!canCollect()) {
-                analysisTaskExecutor.clear();
+    protected void collect() {
+        while (canCollect()) {
+            Pair<Entry<TableName, Set<Pair<String, String>>>, JobPriority> job = getJob();
+            if (job == null) {
+                // No more job to process, break and sleep.
                 break;
             }
-            if (!ctl.enableAutoAnalyze()) {
-                continue;
-            }
-            List<DatabaseIf> dbs = getDatabasesInOrder(ctl);
-            for (DatabaseIf<TableIf> databaseIf : dbs) {
-                if (!canCollect()) {
-                    analysisTaskExecutor.clear();
-                    break;
-                }
-                if (StatisticConstants.SYSTEM_DBS.contains(databaseIf.getFullName())) {
-                    continue;
-                }
-                try {
-                    analyzeDb(databaseIf);
-                } catch (Throwable t) {
-                    LOG.warn("Failed to analyze database {}.{}", ctl.getName(), databaseIf.getFullName(), t);
+            try {
+                TableName tblName = job.first.getKey();
+                TableIf table = StatisticsUtil.findTable(tblName.getCtl(), tblName.getDb(), tblName.getTbl());
+                if (!supportAutoAnalyze(table)) {
                     continue;
                 }
+                processOneJob(table, job.first.getValue(), job.second);
+            } catch (Exception e) {
+                LOG.warn("Failed to analyze table {} with columns [{}]", job.first.getKey().getTbl(),
+                        job.first.getValue().stream().map(Pair::toString).collect(Collectors.joining(",")), e);
             }
         }
     }
 
-    public List<CatalogIf> getCatalogsInOrder() {
-        return Env.getCurrentEnv().getCatalogMgr().getCopyOfCatalog().stream()
-            .sorted((c1, c2) -> (int) (c1.getId() - c2.getId())).collect(Collectors.toList());
-    }
-
-    public List<DatabaseIf<? extends TableIf>> getDatabasesInOrder(CatalogIf<DatabaseIf> catalog) {
-        return catalog.getAllDbs().stream()
-            .sorted((d1, d2) -> (int) (d1.getId() - d2.getId())).collect(Collectors.toList());
+    protected boolean canCollect() {
+        return StatisticsUtil.enableAutoAnalyze()
+            && StatisticsUtil.inAnalyzeTime(LocalTime.now(TimeUtils.getTimeZone().toZoneId()));
     }
 
-    public List<TableIf> getTablesInOrder(DatabaseIf<? extends TableIf> db) {
-        return db.getTables().stream()
-            .sorted((t1, t2) -> (int) (t1.getId() - t2.getId())).collect(Collectors.toList());
+    protected Pair<Entry<TableName, Set<Pair<String, String>>>, JobPriority> getJob() {
+        AnalysisManager manager = Env.getServingEnv().getAnalysisManager();
+        Optional<Entry<TableName, Set<Pair<String, String>>>> job = fetchJobFromMap(manager.highPriorityJobs);
+        if (job.isPresent()) {
+            return Pair.of(job.get(), JobPriority.HIGH);
+        }
+        job = fetchJobFromMap(manager.midPriorityJobs);
+        if (job.isPresent()) {
+            return Pair.of(job.get(), JobPriority.MID);
+        }
+        job = fetchJobFromMap(manager.lowPriorityJobs);
+        return job.map(entry -> Pair.of(entry, JobPriority.LOW)).orElse(null);
     }
 
-    public void analyzeDb(DatabaseIf<TableIf> databaseIf) throws DdlException {
-        List<AnalysisInfo> analysisInfos = constructAnalysisInfo(databaseIf);
-        for (AnalysisInfo analysisInfo : analysisInfos) {
-            try {
-                if (!canCollect()) {
-                    analysisTaskExecutor.clear();
-                    break;
-                }
-                analysisInfo = getNeedAnalyzeColumns(analysisInfo);
-                if (analysisInfo == null) {
-                    continue;
-                }
-                createSystemAnalysisJob(analysisInfo);
-            } catch (Throwable t) {
-                analysisInfo.message = t.getMessage();
-                LOG.warn("Failed to auto analyze table {}.{}, reason {}",
-                        databaseIf.getFullName(), analysisInfo.tblId, analysisInfo.message, t);
-                continue;
-            }
+    protected Optional<Map.Entry<TableName, Set<Pair<String, String>>>> fetchJobFromMap(
+            Map<TableName, Set<Pair<String, String>>> jobMap) {
+        synchronized (jobMap) {
+            Optional<Map.Entry<TableName, Set<Pair<String, String>>>> first = jobMap.entrySet().stream().findFirst();
+            first.ifPresent(entry -> jobMap.remove(entry.getKey()));
+            return first;
         }
     }
 
-    protected List<AnalysisInfo> constructAnalysisInfo(DatabaseIf<? extends TableIf> db) {
-        List<AnalysisInfo> analysisInfos = new ArrayList<>();
-        for (TableIf table : getTablesInOrder(db)) {
-            try {
-                if (skip(table)) {
-                    continue;
-                }
-                createAnalyzeJobForTbl(db, analysisInfos, table);
-            } catch (Throwable t) {
-                LOG.warn("Failed to analyze table {}.{}.{}",
-                        db.getCatalog().getName(), db.getFullName(), table.getName(), t);
-                continue;
+    protected void processOneJob(TableIf table, Set<Pair<String, String>> columns,
+            JobPriority priority) throws DdlException {
+        // appendMvColumn(table, columns);
+        appendPartitionColumns(table, columns);
+        columns = columns.stream().filter(c -> StatisticsUtil.needAnalyzeColumn(table, c)).collect(Collectors.toSet());
+        if (columns.isEmpty()) {
+            return;
+        }
+        AnalysisInfo analyzeJob = createAnalyzeJobForTbl(table, columns, priority);
+        LOG.debug("Auto analyze job : {}", analyzeJob.toString());
+        try {
+            executeSystemAnalysisJob(analyzeJob);
+        } catch (Exception e) {
+            StringJoiner stringJoiner = new StringJoiner(",", "[", "]");
+            for (Pair<String, String> pair : columns) {
+                stringJoiner.add(pair.toString());
             }
+            LOG.warn("Fail to auto analyze table {}, columns [{}]", table.getName(), stringJoiner.toString());
         }
-        return analysisInfos;
     }
 
-    // return true if skip auto analyze this time.
-    protected boolean skip(TableIf table) {
-        if (!(table instanceof OlapTable || table instanceof HMSExternalTable)) {
-            return true;
+    protected void appendPartitionColumns(TableIf table, Set<Pair<String, String>> columns) throws DdlException {
+        if (!(table instanceof OlapTable)) {
+            return;
         }
-        // For now, only support Hive HMS table auto collection.
-        if (table instanceof HMSExternalTable
-                && !((HMSExternalTable) table).getDlaType().equals(HMSExternalTable.DLAType.HIVE)) {
-            return true;
+        AnalysisManager manager = Env.getServingEnv().getAnalysisManager();
+        TableStatsMeta tableStatsStatus = manager.findTableStatsStatus(table.getId());
+        if (tableStatsStatus != null && tableStatsStatus.newPartitionLoaded.get()) {
+            OlapTable olapTable = (OlapTable) table;
+            columns.addAll(olapTable.getColumnIndexPairs(olapTable.getPartitionColumnNames()));
         }
-        if (table.getDataSize(true) < StatisticsUtil.getHugeTableLowerBoundSizeInBytes() * 5) {
-            return false;
+    }
+
+    protected void appendMvColumn(TableIf table, Set<String> columns) {
+        if (!(table instanceof OlapTable)) {
+            return;
         }
-        TableStatsMeta tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(table.getId());
-        // means it's never got analyzed or new partition loaded data.
-        if (tableStats == null || tableStats.newPartitionLoaded.get()) {
+        OlapTable olapTable = (OlapTable) table;
+        Set<String> mvColumns = olapTable.getMvColumns(false).stream().map(Column::getName).collect(Collectors.toSet());
+        columns.addAll(mvColumns);
+    }
+
+    protected boolean supportAutoAnalyze(TableIf tableIf) {
+        if (tableIf == null) {
             return false;
         }
-        if (tableStats.userInjected) {
-            return true;
-        }
-        return System.currentTimeMillis()
-                - tableStats.updatedTime < StatisticsUtil.getHugeTableAutoAnalyzeIntervalInMillis();
+        return tableIf instanceof OlapTable
+                || tableIf instanceof HMSExternalTable
+                && ((HMSExternalTable) tableIf).getDlaType().equals(HMSExternalTable.DLAType.HIVE);
     }
 
-    protected void createAnalyzeJobForTbl(DatabaseIf<? extends TableIf> db,
-            List<AnalysisInfo> analysisInfos, TableIf table) {
+    protected AnalysisInfo createAnalyzeJobForTbl(
+            TableIf table, Set<Pair<String, String>> jobColumns, JobPriority priority) {
         AnalysisMethod analysisMethod = table.getDataSize(true) >= StatisticsUtil.getHugeTableLowerBoundSizeInBytes()
                 ? AnalysisMethod.SAMPLE : AnalysisMethod.FULL;
-        AnalysisInfo jobInfo = new AnalysisInfoBuilder()
+        AnalysisManager manager = Env.getServingEnv().getAnalysisManager();
+        TableStatsMeta tableStatsStatus = manager.findTableStatsStatus(table.getId());
+        long rowCount = StatisticsUtil.isEmptyTable(table, analysisMethod) ? 0 : table.getRowCount();
+        StringJoiner stringJoiner = new StringJoiner(",", "[", "]");
+        for (Pair<String, String> pair : jobColumns) {
+            stringJoiner.add(pair.toString());
+        }
+        return new AnalysisInfoBuilder()
                 .setJobId(Env.getCurrentEnv().getNextId())
-                .setCatalogId(db.getCatalog().getId())
-                .setDBId(db.getId())
+                .setCatalogId(table.getDatabase().getCatalog().getId())
+                .setDBId(table.getDatabase().getId())
                 .setTblId(table.getId())
-                .setColName(null)
+                .setColName(stringJoiner.toString())
+                .setJobColumns(jobColumns)
                 .setAnalysisType(AnalysisInfo.AnalysisType.FUNDAMENTALS)
                 .setAnalysisMode(AnalysisInfo.AnalysisMode.INCREMENTAL)
                 .setAnalysisMethod(analysisMethod)
                 .setSampleRows(analysisMethod.equals(AnalysisMethod.SAMPLE)
-                        ? StatisticsUtil.getHugeTableSampleRows() : -1)
+                    ? StatisticsUtil.getHugeTableSampleRows() : -1)
                 .setScheduleType(ScheduleType.AUTOMATIC)
                 .setState(AnalysisState.PENDING)
                 .setTaskIds(new ArrayList<>())
                 .setLastExecTimeInMs(System.currentTimeMillis())
                 .setJobType(JobType.SYSTEM)
                 .setTblUpdateTime(table.getUpdateTime())
-                .setEmptyJob(table instanceof OlapTable && table.getRowCount() == 0
-                    && analysisMethod.equals(AnalysisMethod.SAMPLE))
+                .setRowCount(rowCount)
+                .setUpdateRows(tableStatsStatus == null ? 0 : tableStatsStatus.updatedRows.get())
+                .setPriority(priority)
                 .build();
-        analysisInfos.add(jobInfo);
     }
 
+    // Analysis job created by the system
     @VisibleForTesting
-    protected AnalysisInfo getNeedAnalyzeColumns(AnalysisInfo jobInfo) {
-        TableIf table = StatisticsUtil.findTable(jobInfo.catalogId, jobInfo.dbId, jobInfo.tblId);
-        // Skip tables that are too wide.
-        if (table.getBaseSchema().size() > StatisticsUtil.getAutoAnalyzeTableWidthThreshold()) {
-            return null;
-        }
-
-        AnalysisManager analysisManager = Env.getServingEnv().getAnalysisManager();
-        TableStatsMeta tblStats = analysisManager.findTableStatsStatus(table.getId());
-
-        List<Pair<String, String>> needRunColumns = null;
-        if (table.needReAnalyzeTable(tblStats)) {
-            needRunColumns = table.getColumnIndexPairs(table.getSchemaAllIndexes(false)
-                .stream().map(Column::getName).collect(Collectors.toSet()));
-        } else if (table instanceof OlapTable && tblStats.newPartitionLoaded.get()) {
-            OlapTable olapTable = (OlapTable) table;
-            Set<String> partitionNames = olapTable.getAllPartitions().stream()
-                    .map(Partition::getName).collect(Collectors.toSet());
-            needRunColumns = olapTable.getColumnIndexPairs(partitionNames);
+    protected void executeSystemAnalysisJob(AnalysisInfo jobInfo)
+            throws DdlException, ExecutionException, InterruptedException {
+        Map<Long, BaseAnalysisTask> analysisTasks = new HashMap<>();
+        AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager();
+        analysisManager.createTaskForEachColumns(jobInfo, analysisTasks, false);
+        if (StatisticsUtil.isExternalTable(jobInfo.catalogId, jobInfo.dbId, jobInfo.tblId)
+                && jobInfo.priority.equals(JobPriority.LOW)) {
+            analysisManager.createTableLevelTaskForExternalTable(jobInfo, analysisTasks, false);
         }
-
-        if (needRunColumns == null || needRunColumns.isEmpty()) {
-            return null;
+        Env.getCurrentEnv().getAnalysisManager().constructJob(jobInfo, analysisTasks.values());
+        Env.getCurrentEnv().getAnalysisManager().registerSysJob(jobInfo, analysisTasks);
+        Future<?>[] futures = new Future[analysisTasks.values().size()];
+        int i = 0;
+        for (BaseAnalysisTask task : analysisTasks.values()) {
+            futures[i++] = analysisTaskExecutor.submitTask(task);
         }
-        StringJoiner stringJoiner = new StringJoiner(",", "[", "]");
-        for (Pair<String, String> pair : needRunColumns) {
-            stringJoiner.add(pair.toString());
+        for (Future future : futures) {
+            future.get();
         }
-        return new AnalysisInfoBuilder(jobInfo)
-            .setColName(stringJoiner.toString()).setJobColumns(needRunColumns).build();
     }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCollector.java
deleted file mode 100644
index ec187fe893af49..00000000000000
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCollector.java
+++ /dev/null
@@ -1,79 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package org.apache.doris.statistics;
-
-import org.apache.doris.catalog.Env;
-import org.apache.doris.common.DdlException;
-import org.apache.doris.common.util.MasterDaemon;
-import org.apache.doris.statistics.util.StatisticsUtil;
-
-import org.apache.hudi.common.util.VisibleForTesting;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
-
-import java.util.HashMap;
-import java.util.Map;
-
-public abstract class StatisticsCollector extends MasterDaemon {
-
-    private static final Logger LOG = LogManager.getLogger(StatisticsCollector.class);
-
-    protected final AnalysisTaskExecutor analysisTaskExecutor;
-
-    public StatisticsCollector(String name, long intervalMs, AnalysisTaskExecutor analysisTaskExecutor) {
-        super(name, intervalMs);
-        this.analysisTaskExecutor = analysisTaskExecutor;
-    }
-
-    @Override
-    protected void runAfterCatalogReady() {
-        if (!Env.getCurrentEnv().isMaster()) {
-            return;
-        }
-        if (!StatisticsUtil.statsTblAvailable()) {
-            LOG.info("Stats table not available, skip");
-            return;
-        }
-        if (Env.isCheckpointThread()) {
-            return;
-        }
-        collect();
-    }
-
-    protected abstract void collect();
-
-    // Analysis job created by the system
-    @VisibleForTesting
-    protected void createSystemAnalysisJob(AnalysisInfo jobInfo)
-            throws DdlException {
-        if (jobInfo.jobColumns.isEmpty()) {
-            // No statistics need to be collected or updated
-            return;
-        }
-        Map<Long, BaseAnalysisTask> analysisTasks = new HashMap<>();
-        AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager();
-        analysisManager.createTaskForEachColumns(jobInfo, analysisTasks, false);
-        if (StatisticsUtil.isExternalTable(jobInfo.catalogId, jobInfo.dbId, jobInfo.tblId)) {
-            analysisManager.createTableLevelTaskForExternalTable(jobInfo, analysisTasks, false);
-        }
-        Env.getCurrentEnv().getAnalysisManager().constructJob(jobInfo, analysisTasks.values());
-        Env.getCurrentEnv().getAnalysisManager().registerSysJob(jobInfo, analysisTasks);
-        analysisTasks.values().forEach(analysisTaskExecutor::submitTask);
-    }
-
-}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobAppender.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobAppender.java
new file mode 100644
index 00000000000000..74484a06afa99f
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsJobAppender.java
@@ -0,0 +1,204 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import org.apache.doris.analysis.TableName;
+import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.Database;
+import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.Table;
+import org.apache.doris.catalog.TableIf;
+import org.apache.doris.common.Pair;
+import org.apache.doris.common.util.MasterDaemon;
+import org.apache.doris.datasource.InternalCatalog;
+import org.apache.doris.statistics.util.StatisticsUtil;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Queue;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+public class StatisticsJobAppender extends MasterDaemon {
+
+    private static final Logger LOG = LogManager.getLogger(StatisticsJobAppender.class);
+
+    public static final long INTERVAL = 1000;
+    public static final int JOB_MAP_SIZE = 1000;
+    public static final int TABLE_BATCH_SIZE = 100;
+
+    private long currentDbId = 0;
+    private long currentTableId = 0;
+    private long lastRoundFinishTime = 0;
+    private final long lowJobIntervalMs = TimeUnit.MINUTES.toMillis(1);
+
+    public StatisticsJobAppender() {
+        super("Statistics Job Appender", INTERVAL);
+    }
+
+    @Override
+    protected void runAfterCatalogReady() {
+        if (!StatisticsUtil.enableAutoAnalyze()) {
+            return;
+        }
+        if (!Env.getCurrentEnv().isMaster()) {
+            return;
+        }
+        if (Env.isCheckpointThread()) {
+            return;
+        }
+        appendJobs();
+    }
+
+    protected void appendJobs() {
+        AnalysisManager manager = Env.getCurrentEnv().getAnalysisManager();
+        appendColumnsToJobs(manager.highPriorityColumns, manager.highPriorityJobs);
+        appendColumnsToJobs(manager.midPriorityColumns, manager.midPriorityJobs);
+        if (StatisticsUtil.enableAutoAnalyzeInternalCatalog()) {
+            appendToLowJobs(manager.lowPriorityJobs);
+        }
+    }
+
+    protected void appendColumnsToJobs(Queue<QueryColumn> columnQueue, Map<TableName, Set<Pair<String, String>>> jobs) {
+        int size = columnQueue.size();
+        int processed = 0;
+        for (int i = 0; i < size; i++) {
+            QueryColumn column = columnQueue.poll();
+            if (column == null) {
+                continue;
+            }
+            TableIf table;
+            try {
+                table = StatisticsUtil.findTable(column.catalogId, column.dbId, column.tblId);
+            } catch (Exception e) {
+                LOG.warn("Fail to find table {}.{}.{} for column {}",
+                        column.catalogId, column.dbId, column.tblId, column.colName, e);
+                continue;
+            }
+            if (StatisticConstants.SYSTEM_DBS.contains(table.getDatabase().getFullName())) {
+                continue;
+            }
+            Column col = table.getColumn(column.colName);
+            if (col == null || !col.isVisible() || StatisticsUtil.isUnsupportedType(col.getType())) {
+                continue;
+            }
+            Set<Pair<String, String>> columnIndexPairs = table.getColumnIndexPairs(
+                    Collections.singleton(column.colName)).stream()
+                    .filter(p -> StatisticsUtil.needAnalyzeColumn(table, p))
+                    .collect(Collectors.toSet());
+            if (columnIndexPairs.isEmpty()) {
+                continue;
+            }
+            TableName tableName = new TableName(table.getDatabase().getCatalog().getName(),
+                    table.getDatabase().getFullName(), table.getName());
+            synchronized (jobs) {
+                // If job map reach the upper limit, stop putting new jobs.
+                if (!jobs.containsKey(tableName) && jobs.size() >= JOB_MAP_SIZE) {
+                    LOG.info("High or mid job map full.");
+                    break;
+                }
+                if (jobs.containsKey(tableName)) {
+                    jobs.get(tableName).addAll(columnIndexPairs);
+                } else {
+                    jobs.put(tableName, columnIndexPairs);
+                }
+            }
+            processed++;
+        }
+        if (size > 0 && LOG.isDebugEnabled()) {
+            LOG.debug("{} of {} columns append to jobs", processed, size);
+        }
+    }
+
+    protected void appendToLowJobs(Map<TableName, Set<Pair<String, String>>> jobs) {
+        if (System.currentTimeMillis() - lastRoundFinishTime < lowJobIntervalMs) {
+            return;
+        }
+        InternalCatalog catalog = Env.getCurrentInternalCatalog();
+        List<Long> sortedDbs = catalog.getDbIds().stream().sorted().collect(Collectors.toList());
+        int processed = 0;
+        for (long dbId : sortedDbs) {
+            if (dbId < currentDbId || catalog.getDbNullable(dbId) == null
+                    || StatisticConstants.SYSTEM_DBS.contains(catalog.getDbNullable(dbId).getFullName())) {
+                continue;
+            }
+            currentDbId = dbId;
+            Optional<Database> db = catalog.getDb(dbId);
+            if (!db.isPresent()) {
+                continue;
+            }
+            List<Table> tables = db.get().getTables().stream()
+                    .sorted((t1, t2) -> (int) (t1.getId() - t2.getId())).collect(Collectors.toList());
+            for (Table t : tables) {
+                if (!(t instanceof OlapTable) || t.getId() <= currentTableId) {
+                    continue;
+                }
+                if (t.getBaseSchema().size() > StatisticsUtil.getAutoAnalyzeTableWidthThreshold()) {
+                    continue;
+                }
+                Set<Pair<String, String>> columnIndexPairs = t.getColumnIndexPairs(
+                        t.getSchemaAllIndexes(false).stream()
+                                .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
+                                .map(Column::getName).collect(Collectors.toSet()))
+                        .stream().filter(p -> StatisticsUtil.needAnalyzeColumn(t, p))
+                        .collect(Collectors.toSet());
+                if (columnIndexPairs.isEmpty()) {
+                    continue;
+                }
+                TableName tableName = new TableName(t.getDatabase().getCatalog().getName(),
+                        t.getDatabase().getFullName(), t.getName());
+                synchronized (jobs) {
+                    // If job map reach the upper limit, stop adding new jobs.
+                    if (!jobs.containsKey(tableName) && jobs.size() >= JOB_MAP_SIZE) {
+                        LOG.info("Low job map full.");
+                        return;
+                    }
+                    if (jobs.containsKey(tableName)) {
+                        jobs.get(tableName).addAll(columnIndexPairs);
+                    } else {
+                        jobs.put(tableName, columnIndexPairs);
+                    }
+                }
+                currentTableId = t.getId();
+                if (++processed >= TABLE_BATCH_SIZE) {
+                    return;
+                }
+            }
+        }
+        // All tables have been processed once, reset for the next loop.
+        if (LOG.isDebugEnabled()) {
+            LOG.debug("All low priority internal tables are appended once.");
+        }
+        currentDbId = 0;
+        currentTableId = 0;
+        lastRoundFinishTime = System.currentTimeMillis();
+    }
+
+    // For unit test only.
+    public void setLastRoundFinishTime(long value) {
+        lastRoundFinishTime = value;
+    }
+
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index 87ce90c5300a2b..8ec5582af5c93f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -29,8 +29,8 @@
 import org.apache.doris.statistics.util.DBObjects;
 import org.apache.doris.statistics.util.StatisticsUtil;
 
-import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
 import org.apache.commons.text.StringSubstitutor;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
@@ -337,7 +337,7 @@ public static void alterColumnStatistics(AlterColumnStatsStmt alterColumnStatsSt
             AnalysisInfo mockedJobInfo = new AnalysisInfoBuilder()
                     .setTblUpdateTime(System.currentTimeMillis())
                     .setColName("")
-                    .setJobColumns(Lists.newArrayList())
+                    .setJobColumns(Sets.newHashSet())
                     .setUserInject(true)
                     .setJobType(AnalysisInfo.JobType.MANUAL)
                     .build();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
index 3b9b1e2bead005..a5073a922143f3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java
@@ -25,7 +25,9 @@
 import org.apache.doris.common.io.Text;
 import org.apache.doris.common.io.Writable;
 import org.apache.doris.persist.gson.GsonUtils;
+import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod;
 import org.apache.doris.statistics.AnalysisInfo.JobType;
+import org.apache.doris.statistics.util.StatisticsUtil;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.gson.annotations.SerializedName;
@@ -129,30 +131,34 @@ public void update(AnalysisInfo analyzedJob, TableIf tableIf) {
         for (Pair<String, String> colPair : analyzedJob.jobColumns) {
             ColStatsMeta colStatsMeta = colToColStatsMeta.get(colPair);
             if (colStatsMeta == null) {
-                colToColStatsMeta.put(colPair, new ColStatsMeta(updatedTime,
-                        analyzedJob.analysisMethod, analyzedJob.analysisType, analyzedJob.jobType, 0));
+                colToColStatsMeta.put(colPair, new ColStatsMeta(updatedTime, analyzedJob.analysisMethod,
+                        analyzedJob.analysisType, analyzedJob.jobType, 0, analyzedJob.rowCount,
+                        analyzedJob.updateRows));
             } else {
                 colStatsMeta.updatedTime = updatedTime;
                 colStatsMeta.analysisType = analyzedJob.analysisType;
                 colStatsMeta.analysisMethod = analyzedJob.analysisMethod;
                 colStatsMeta.jobType = analyzedJob.jobType;
+                colStatsMeta.updatedRows = analyzedJob.updateRows;
+                colStatsMeta.rowCount = analyzedJob.rowCount;
             }
         }
         jobType = analyzedJob.jobType;
         if (tableIf != null) {
             if (tableIf instanceof OlapTable) {
-                rowCount = analyzedJob.emptyJob ? 0 : tableIf.getRowCount();
+                rowCount = analyzedJob.rowCount;
             }
-            if (analyzedJob.emptyJob) {
+            if (rowCount == 0 && AnalysisMethod.SAMPLE.equals(analyzedJob.analysisMethod)) {
                 return;
             }
             if (analyzedJob.jobColumns.containsAll(
                     tableIf.getColumnIndexPairs(
-                    tableIf.getSchemaAllIndexes(false).stream().map(Column::getName).collect(Collectors.toSet())))) {
-                updatedRows.set(0);
+                    tableIf.getSchemaAllIndexes(false).stream()
+                            .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
+                            .map(Column::getName).collect(Collectors.toSet())))) {
                 newPartitionLoaded.set(false);
-            }
-            if (tableIf instanceof OlapTable) {
+                userInjected = false;
+            } else if (tableIf instanceof OlapTable) {
                 PartitionInfo partitionInfo = ((OlapTable) tableIf).getPartitionInfo();
                 if (partitionInfo != null && analyzedJob.jobColumns
                         .containsAll(tableIf.getColumnIndexPairs(partitionInfo.getPartitionColumns().stream()
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
index 44ff7dafe64cdf..12e4e8ff63abce 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java
@@ -56,6 +56,7 @@
 import org.apache.doris.datasource.ExternalTable;
 import org.apache.doris.datasource.InternalCatalog;
 import org.apache.doris.datasource.hive.HMSExternalTable;
+import org.apache.doris.datasource.hive.HMSExternalTable.DLAType;
 import org.apache.doris.nereids.trees.expressions.literal.DateTimeLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.VarcharLiteral;
 import org.apache.doris.qe.AutoCloseConnectContext;
@@ -65,11 +66,14 @@
 import org.apache.doris.qe.StmtExecutor;
 import org.apache.doris.qe.VariableMgr;
 import org.apache.doris.statistics.AnalysisInfo;
+import org.apache.doris.statistics.AnalysisManager;
+import org.apache.doris.statistics.ColStatsMeta;
 import org.apache.doris.statistics.ColumnStatistic;
 import org.apache.doris.statistics.ColumnStatisticBuilder;
 import org.apache.doris.statistics.Histogram;
 import org.apache.doris.statistics.ResultRow;
 import org.apache.doris.statistics.StatisticConstants;
+import org.apache.doris.statistics.TableStatsMeta;
 import org.apache.doris.system.Frontend;
 
 import com.google.common.base.Preconditions;
@@ -790,6 +794,16 @@ public static boolean enableAutoAnalyze() {
         return false;
     }
 
+    public static boolean enableAutoAnalyzeInternalCatalog() {
+        try {
+            return findConfigFromGlobalSessionVar(
+                        SessionVariable.ENABLE_AUTO_ANALYZE_INTERNAL_CATALOG).enableAutoAnalyzeInternalCatalog;
+        } catch (Exception e) {
+            LOG.warn("Fail to get value of enable auto analyze internal catalog, return false by default", e);
+        }
+        return true;
+    }
+
     public static int getInsertMergeCount() {
         try {
             return findConfigFromGlobalSessionVar(SessionVariable.STATS_INSERT_MERGE_ITEM_COUNT)
@@ -898,7 +912,7 @@ public static boolean isMvColumn(TableIf table, String columnName) {
     }
 
     public static boolean isEmptyTable(TableIf table, AnalysisInfo.AnalysisMethod method) {
-        int waitRowCountReportedTime = 90;
+        int waitRowCountReportedTime = 75;
         if (!(table instanceof OlapTable) || method.equals(AnalysisInfo.AnalysisMethod.FULL)) {
             return false;
         }
@@ -921,4 +935,71 @@ public static boolean isEmptyTable(TableIf table, AnalysisInfo.AnalysisMethod me
         return true;
     }
 
+    public static boolean needAnalyzeColumn(TableIf table, Pair<String, String> column) {
+        if (column == null) {
+            return false;
+        }
+        AnalysisManager manager = Env.getServingEnv().getAnalysisManager();
+        TableStatsMeta tableStatsStatus = manager.findTableStatsStatus(table.getId());
+        // Table never been analyzed, need analyze.
+        if (tableStatsStatus == null) {
+            return true;
+        }
+        // User injected column stats, don't do auto analyze, avoid overwrite user injected stats.
+        if (tableStatsStatus.userInjected) {
+            return false;
+        }
+        ColStatsMeta columnStatsMeta = tableStatsStatus.findColumnStatsMeta(column.first, column.second);
+        // Column never been analyzed, need analyze.
+        if (columnStatsMeta == null) {
+            return true;
+        }
+        if (table instanceof OlapTable) {
+            OlapTable olapTable = (OlapTable) table;
+            // 0. Check new partition first time loaded flag.
+            if (olapTable.isPartitionColumn(column.second) && tableStatsStatus.newPartitionLoaded.get()) {
+                return true;
+            }
+            // 1. Check row count.
+            // TODO: One conner case. Last analyze row count is 0, but actually it's not 0 because isEmptyTable waiting.
+            long currentRowCount = olapTable.getRowCount();
+            long lastAnalyzeRowCount = columnStatsMeta.rowCount;
+            // 1.1 Empty table -> non-empty table. Need analyze.
+            if (currentRowCount != 0 && lastAnalyzeRowCount == 0) {
+                return true;
+            }
+            // 1.2 Non-empty table -> empty table. Need analyze;
+            if (currentRowCount == 0 && lastAnalyzeRowCount != 0) {
+                return true;
+            }
+            // 1.3 Table is still empty. Not need to analyze. lastAnalyzeRowCount == 0 is always true here.
+            if (currentRowCount == 0) {
+                return false;
+            }
+            // 1.4 If row count changed more than the threshold, need analyze.
+            // lastAnalyzeRowCount == 0 is always false here.
+            double changeRate =
+                    ((double) Math.abs(currentRowCount - lastAnalyzeRowCount) / lastAnalyzeRowCount) * 100.0;
+            if (changeRate > StatisticsUtil.getTableStatsHealthThreshold()) {
+                return true;
+            }
+            // 2. Check update rows.
+            long currentUpdatedRows = tableStatsStatus.updatedRows.get();
+            long lastAnalyzeUpdateRows = columnStatsMeta.updatedRows;
+            changeRate = ((double) Math.abs(currentUpdatedRows - lastAnalyzeUpdateRows) / lastAnalyzeRowCount) * 100.0;
+            return changeRate > StatisticsUtil.getTableStatsHealthThreshold();
+        } else {
+            // Now, we only support Hive external table auto analyze.
+            if (!(table instanceof HMSExternalTable)) {
+                return false;
+            }
+            HMSExternalTable hmsTable = (HMSExternalTable) table;
+            if (!hmsTable.getDlaType().equals(DLAType.HIVE)) {
+                return false;
+            }
+            // External is hard to calculate change rate, use time interval to control analyze frequency.
+            return System.currentTimeMillis()
+                    - tableStatsStatus.updatedTime > StatisticsUtil.getExternalTableAutoAnalyzeIntervalInMillis();
+        }
+    }
 }
diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java
index 1bf2041bb4f12c..8a163523eebabb 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java
@@ -80,7 +80,7 @@ public void testAppendBufTest2(@Mocked AnalysisInfo analysisInfo, @Mocked OlapAn
 
         new MockUp<AnalysisJob>() {
             @Mock
-            protected void writeBuf() {
+            protected void flushBuffer() {
                 writeBufInvokeTimes.incrementAndGet();
             }
 
@@ -111,7 +111,7 @@ public void testAppendBufTest3(@Mocked AnalysisInfo analysisInfo, @Mocked OlapAn
 
         new MockUp<AnalysisJob>() {
             @Mock
-            protected void writeBuf() {
+            protected void flushBuffer() {
                 writeBufInvokeTimes.incrementAndGet();
             }
 
@@ -184,7 +184,7 @@ protected void executeWithExceptionOnFail(StmtExecutor stmtExecutor) throws Exce
             protected void syncLoadStats() {
             }
         };
-        job.writeBuf();
+        job.flushBuffer();
 
         Assertions.assertEquals(0, job.queryFinished.size());
     }
@@ -210,7 +210,7 @@ protected void syncLoadStats() {
         job.buf.add(new ColStatsData());
         job.queryFinished = new HashSet<>();
         job.queryFinished.add(task2);
-        job.writeBuf();
+        job.flushBuffer();
         Assertions.assertEquals(0, job.queryFinished.size());
     }
 
diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java
index 674456b0b46891..9c6580ee5cc2af 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java
@@ -21,20 +21,31 @@
 import org.apache.doris.analysis.AnalyzeTblStmt;
 import org.apache.doris.analysis.PartitionNames;
 import org.apache.doris.analysis.ShowAnalyzeStmt;
+import org.apache.doris.analysis.ShowAutoAnalyzeJobsStmt;
+import org.apache.doris.analysis.StatementBase;
 import org.apache.doris.analysis.TableName;
 import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.Database;
+import org.apache.doris.catalog.DatabaseIf;
 import org.apache.doris.catalog.OlapTable;
 import org.apache.doris.catalog.PrimitiveType;
+import org.apache.doris.catalog.Table;
 import org.apache.doris.common.Config;
 import org.apache.doris.common.DdlException;
 import org.apache.doris.common.Pair;
+import org.apache.doris.datasource.CatalogIf;
+import org.apache.doris.datasource.InternalCatalog;
+import org.apache.doris.nereids.trees.expressions.ExprId;
+import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.trees.expressions.SlotReference;
+import org.apache.doris.nereids.types.IntegerType;
 import org.apache.doris.statistics.AnalysisInfo.AnalysisType;
 import org.apache.doris.statistics.AnalysisInfo.JobType;
 import org.apache.doris.statistics.AnalysisInfo.ScheduleType;
 import org.apache.doris.statistics.util.StatisticsUtil;
+import org.apache.doris.thrift.TQueryColumn;
 
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.collect.Lists;
 import mockit.Expectations;
 import mockit.Injectable;
 import mockit.Mock;
@@ -46,8 +57,10 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Optional;
 import java.util.Set;
 
 // CHECKSTYLE OFF
@@ -110,7 +123,7 @@ public String toString() {
     // test build sync job
     @Test
     public void testBuildAndAssignJob1() throws Exception {
-        AnalysisInfo analysisInfo = new AnalysisInfoBuilder().setJobColumns(new ArrayList<>()).build();
+        AnalysisInfo analysisInfo = new AnalysisInfoBuilder().setJobColumns(new HashSet<>()).build();
         new MockUp<StatisticsUtil>() {
 
             @Mock
@@ -187,7 +200,7 @@ public void updateTableStats(AnalysisInfo jobInfo) {
     // test build async job
     @Test
     public void testBuildAndAssignJob2(@Injectable OlapAnalysisTask analysisTask) throws Exception {
-        AnalysisInfo analysisInfo = new AnalysisInfoBuilder().setJobColumns(new ArrayList<>())
+        AnalysisInfo analysisInfo = new AnalysisInfoBuilder().setJobColumns(new HashSet<>())
                 .setScheduleType(ScheduleType.PERIOD)
                 .build();
         new MockUp<StatisticsUtil>() {
@@ -261,69 +274,6 @@ public void logCreateAnalysisJob(AnalysisInfo analysisJob) {
         };
     }
 
-    @Test
-    public void testReAnalyze() {
-        new MockUp<OlapTable>() {
-
-            final Column c = new Column("col1", PrimitiveType.INT);
-            @Mock
-            public List<Column> getBaseSchema() {
-                return Lists.newArrayList(c);
-            }
-
-            @Mock
-            public List<Column> getColumns() { return Lists.newArrayList(c); }
-
-            @Mock
-            public List<Pair<String, String>> getColumnIndexPairs(Set<String> columns) {
-                List<Pair<String, String>> jobList = Lists.newArrayList();
-                jobList.add(Pair.of("1", "1"));
-                jobList.add(Pair.of("2", "2"));
-                jobList.add(Pair.of("3", "3"));
-                return jobList;
-            }
-        };
-        OlapTable olapTable = new OlapTable();
-        List<Pair<String, String>> jobList = Lists.newArrayList();
-        jobList.add(Pair.of("1", "1"));
-        jobList.add(Pair.of("2", "2"));
-        TableStatsMeta stats0 = new TableStatsMeta(
-            0, new AnalysisInfoBuilder().setJobColumns(jobList)
-            .setColName("col1").build(), olapTable);
-        Assertions.assertTrue(olapTable.needReAnalyzeTable(stats0));
-
-        new MockUp<OlapTable>() {
-            int count = 0;
-            int[] rowCount = new int[]{100, 100, 200, 200, 1, 1};
-
-            @Mock
-            public long getRowCount() {
-                return rowCount[count++];
-            }
-            @Mock
-            public List<Pair<String, String>> getColumnIndexPairs(Set<String> columns) {
-                List<Pair<String, String>> jobList = Lists.newArrayList();
-                return jobList;
-            }
-        };
-        TableStatsMeta stats1 = new TableStatsMeta(
-                50, new AnalysisInfoBuilder().setJobColumns(new ArrayList<>())
-                .setColName("col1").build(), olapTable);
-        stats1.updatedRows.addAndGet(50);
-
-        Assertions.assertTrue(olapTable.needReAnalyzeTable(stats1));
-        TableStatsMeta stats2 = new TableStatsMeta(
-                190, new AnalysisInfoBuilder()
-                .setJobColumns(new ArrayList<>()).setColName("col1").build(), olapTable);
-        stats2.updatedRows.addAndGet(20);
-        Assertions.assertFalse(olapTable.needReAnalyzeTable(stats2));
-
-        TableStatsMeta stats3 = new TableStatsMeta(0, new AnalysisInfoBuilder()
-                .setJobColumns(new ArrayList<>()).setEmptyJob(true).setColName("col1").build(), olapTable);
-        Assertions.assertTrue(olapTable.needReAnalyzeTable(stats3));
-
-    }
-
     @Test
     public void testRecordLimit1() {
         Config.analyze_record_limit = 2;
@@ -399,4 +349,293 @@ public void testShowAutoTasks(@Injectable ShowAnalyzeStmt stmt) {
         Assertions.assertEquals(AnalysisState.FINISHED, analysisInfos.get(1).getState());
         Assertions.assertEquals(AnalysisState.FAILED, analysisInfos.get(2).getState());
     }
+
+    @Test
+    public void testAddQuerySlotToQueue() throws DdlException {
+        AnalysisManager analysisManager = new AnalysisManager();
+        InternalCatalog testCatalog = new InternalCatalog();
+        Database db = new Database(100, "testDb");
+        testCatalog.unprotectCreateDb(db);
+        Column column1 = new Column("placeholder", PrimitiveType.INT);
+        Column column2 = new Column("placeholder", PrimitiveType.INT);
+        Column column3 = new Column("test", PrimitiveType.INT);
+        List<Column> schema = new ArrayList<>();
+        schema.add(column1);
+        OlapTable table = new OlapTable(200, "testTable", schema, null, null, null);
+        db.createTableWithLock(table, true, false);
+
+        new MockUp<Table>() {
+            @Mock
+            public DatabaseIf getDatabase() {
+                return db;
+            }
+        };
+
+        new MockUp<Database>() {
+            @Mock
+            public CatalogIf getCatalog() {
+                return testCatalog;
+            }
+        };
+
+        SlotReference slot1 = new SlotReference(new ExprId(1), "slot1", IntegerType.INSTANCE, true,
+                new ArrayList<>(), table, column1, Optional.empty(), null);
+        SlotReference slot2 = new SlotReference(new ExprId(2), "slot2", IntegerType.INSTANCE, true,
+                new ArrayList<>(), table, column2, Optional.empty(), null);
+        SlotReference slot3 = new SlotReference(new ExprId(3), "slot3", IntegerType.INSTANCE, true,
+                new ArrayList<>(), table, column3, Optional.empty(), null);
+        Set<Slot> set1 = new HashSet<>();
+        set1.add(slot1);
+        set1.add(slot2);
+        analysisManager.updateHighPriorityColumn(set1);
+        Assertions.assertEquals(2, analysisManager.highPriorityColumns.size());
+        QueryColumn result = analysisManager.highPriorityColumns.poll();
+        Assertions.assertEquals("placeholder", result.colName);
+        Assertions.assertEquals(testCatalog.getId(), result.catalogId);
+        Assertions.assertEquals(db.getId(), result.dbId);
+        Assertions.assertEquals(table.getId(), result.tblId);
+
+        result = analysisManager.highPriorityColumns.poll();
+        Assertions.assertEquals("placeholder", result.colName);
+        Assertions.assertEquals(testCatalog.getId(), result.catalogId);
+        Assertions.assertEquals(db.getId(), result.dbId);
+        Assertions.assertEquals(table.getId(), result.tblId);
+        Assertions.assertEquals(0, analysisManager.highPriorityColumns.size());
+        Set<Slot> set2 = new HashSet<>();
+        set2.add(slot3);
+        for (int i = 0; i < AnalysisManager.COLUMN_QUEUE_SIZE / 2 - 1; i++) {
+            analysisManager.updateHighPriorityColumn(set1);
+        }
+        Assertions.assertEquals(AnalysisManager.COLUMN_QUEUE_SIZE - 2, analysisManager.highPriorityColumns.size());
+        analysisManager.updateHighPriorityColumn(set2);
+        Assertions.assertEquals(AnalysisManager.COLUMN_QUEUE_SIZE - 1, analysisManager.highPriorityColumns.size());
+        analysisManager.updateHighPriorityColumn(set2);
+        Assertions.assertEquals(AnalysisManager.COLUMN_QUEUE_SIZE, analysisManager.highPriorityColumns.size());
+        analysisManager.updateHighPriorityColumn(set2);
+        Assertions.assertEquals(AnalysisManager.COLUMN_QUEUE_SIZE, analysisManager.highPriorityColumns.size());
+
+        for (int i = 0; i < AnalysisManager.COLUMN_QUEUE_SIZE - 2; i++) {
+            result = analysisManager.highPriorityColumns.poll();
+            Assertions.assertEquals("placeholder", result.colName);
+            Assertions.assertEquals(testCatalog.getId(), result.catalogId);
+            Assertions.assertEquals(db.getId(), result.dbId);
+            Assertions.assertEquals(table.getId(), result.tblId);
+        }
+        Assertions.assertEquals(2, analysisManager.highPriorityColumns.size());
+        result = analysisManager.highPriorityColumns.poll();
+        Assertions.assertEquals("test", result.colName);
+        Assertions.assertEquals(testCatalog.getId(), result.catalogId);
+        Assertions.assertEquals(db.getId(), result.dbId);
+        Assertions.assertEquals(table.getId(), result.tblId);
+
+        Assertions.assertEquals(1, analysisManager.highPriorityColumns.size());
+        result = analysisManager.highPriorityColumns.poll();
+        Assertions.assertEquals("test", result.colName);
+        Assertions.assertEquals(testCatalog.getId(), result.catalogId);
+        Assertions.assertEquals(db.getId(), result.dbId);
+        Assertions.assertEquals(table.getId(), result.tblId);
+
+        result = analysisManager.highPriorityColumns.poll();
+        Assertions.assertNull(result);
+    }
+
+    @Test
+    public void testMergeFollowerColumn() throws DdlException {
+        AnalysisManager analysisManager = new AnalysisManager();
+        QueryColumn placeholder = new QueryColumn(1, 2, 3, "placeholder");
+        QueryColumn high1 = new QueryColumn(10, 20, 30, "high1");
+        QueryColumn high2 = new QueryColumn(11, 21, 31, "high2");
+        QueryColumn mid1 = new QueryColumn(100, 200, 300, "mid1");
+        QueryColumn mid2 = new QueryColumn(101, 201, 301, "mid2");
+        List<TQueryColumn> highColumns = new ArrayList<>();
+        highColumns.add(high1.toThrift());
+        highColumns.add(high2.toThrift());
+        List<TQueryColumn> midColumns = new ArrayList<>();
+        midColumns.add(mid1.toThrift());
+        midColumns.add(mid2.toThrift());
+        for (int i = 0; i < AnalysisManager.COLUMN_QUEUE_SIZE - 1; i++) {
+            analysisManager.highPriorityColumns.offer(placeholder);
+        }
+        for (int i = 0; i < AnalysisManager.COLUMN_QUEUE_SIZE - 2; i++) {
+            analysisManager.midPriorityColumns.offer(placeholder);
+        }
+        Assertions.assertEquals(AnalysisManager.COLUMN_QUEUE_SIZE - 1, analysisManager.highPriorityColumns.size());
+        Assertions.assertEquals(AnalysisManager.COLUMN_QUEUE_SIZE - 2, analysisManager.midPriorityColumns.size());
+        analysisManager.mergeFollowerQueryColumns(highColumns, midColumns);
+        Assertions.assertEquals(AnalysisManager.COLUMN_QUEUE_SIZE, analysisManager.highPriorityColumns.size());
+        Assertions.assertEquals(AnalysisManager.COLUMN_QUEUE_SIZE, analysisManager.midPriorityColumns.size());
+        for (int i = 0; i < AnalysisManager.COLUMN_QUEUE_SIZE - 1; i++) {
+            QueryColumn poll = analysisManager.highPriorityColumns.poll();
+            Assertions.assertEquals("placeholder", poll.colName);
+            Assertions.assertEquals(1, poll.catalogId);
+            Assertions.assertEquals(2, poll.dbId);
+            Assertions.assertEquals(3, poll.tblId);
+        }
+        QueryColumn poll = analysisManager.highPriorityColumns.poll();
+        Assertions.assertEquals("high1", poll.colName);
+        Assertions.assertEquals(10, poll.catalogId);
+        Assertions.assertEquals(20, poll.dbId);
+        Assertions.assertEquals(30, poll.tblId);
+        Assertions.assertEquals(0, analysisManager.highPriorityColumns.size());
+
+        for (int i = 0; i < AnalysisManager.COLUMN_QUEUE_SIZE - 2; i++) {
+            QueryColumn pol2 = analysisManager.midPriorityColumns.poll();
+            Assertions.assertEquals("placeholder", pol2.colName);
+            Assertions.assertEquals(1, pol2.catalogId);
+            Assertions.assertEquals(2, pol2.dbId);
+            Assertions.assertEquals(3, pol2.tblId);
+        }
+        QueryColumn pol2 = analysisManager.midPriorityColumns.poll();
+        Assertions.assertEquals("mid1", pol2.colName);
+        Assertions.assertEquals(100, pol2.catalogId);
+        Assertions.assertEquals(200, pol2.dbId);
+        Assertions.assertEquals(300, pol2.tblId);
+
+        pol2 = analysisManager.midPriorityColumns.poll();
+        Assertions.assertEquals("mid2", pol2.colName);
+        Assertions.assertEquals(101, pol2.catalogId);
+        Assertions.assertEquals(201, pol2.dbId);
+        Assertions.assertEquals(301, pol2.tblId);
+        Assertions.assertEquals(0, analysisManager.midPriorityColumns.size());
+    }
+
+    @Test
+    public void testShowAutoJobs() {
+        AnalysisManager manager = new AnalysisManager();
+        TableName high1 = new TableName("catalog1", "db1", "high1");
+        TableName high2 = new TableName("catalog2", "db2", "high2");
+        TableName mid1 = new TableName("catalog3", "db3", "mid1");
+        TableName mid2 = new TableName("catalog4", "db4", "mid2");
+        TableName low1 = new TableName("catalog5", "db5", "low1");
+
+        manager.highPriorityJobs.put(high1, new HashSet<>());
+        manager.highPriorityJobs.get(high1).add(Pair.of("index1", "col1"));
+        manager.highPriorityJobs.get(high1).add(Pair.of("index2", "col2"));
+        manager.highPriorityJobs.put(high2, new HashSet<>());
+        manager.highPriorityJobs.get(high2).add(Pair.of("index1", "col3"));
+        manager.midPriorityJobs.put(mid1, new HashSet<>());
+        manager.midPriorityJobs.get(mid1).add(Pair.of("index1", "col4"));
+        manager.midPriorityJobs.put(mid2, new HashSet<>());
+        manager.midPriorityJobs.get(mid2).add(Pair.of("index1", "col5"));
+        manager.lowPriorityJobs.put(low1, new HashSet<>());
+        manager.lowPriorityJobs.get(low1).add(Pair.of("index1", "col6"));
+        manager.lowPriorityJobs.get(low1).add(Pair.of("index1", "col7"));
+
+        new MockUp<StatementBase>() {
+            @Mock
+            public boolean isAnalyzed() {
+                return true;
+            }
+        };
+        ShowAutoAnalyzeJobsStmt stmt = new ShowAutoAnalyzeJobsStmt(null, null);
+        List<AutoAnalysisPendingJob> autoAnalysisPendingJobs = manager.showAutoPendingJobs(stmt);
+        Assertions.assertEquals(5, autoAnalysisPendingJobs.size());
+        AutoAnalysisPendingJob job = autoAnalysisPendingJobs.get(0);
+        Assertions.assertEquals("catalog1", job.catalogName);
+        Assertions.assertEquals("db1", job.dbName);
+        Assertions.assertEquals("high1", job.tableName);
+        Assertions.assertEquals(2, job.columns.size());
+        Assertions.assertTrue(job.columns.contains(Pair.of("index1", "col1")));
+        Assertions.assertTrue(job.columns.contains(Pair.of("index2", "col2")));
+        Assertions.assertEquals(JobPriority.HIGH, job.priority);
+
+        job = autoAnalysisPendingJobs.get(1);
+        Assertions.assertEquals("catalog2", job.catalogName);
+        Assertions.assertEquals("db2", job.dbName);
+        Assertions.assertEquals("high2", job.tableName);
+        Assertions.assertEquals(1, job.columns.size());
+        Assertions.assertTrue(job.columns.contains(Pair.of("index1", "col3")));
+        Assertions.assertEquals(JobPriority.HIGH, job.priority);
+
+        job = autoAnalysisPendingJobs.get(2);
+        Assertions.assertEquals("catalog3", job.catalogName);
+        Assertions.assertEquals("db3", job.dbName);
+        Assertions.assertEquals("mid1", job.tableName);
+        Assertions.assertEquals(1, job.columns.size());
+        Assertions.assertTrue(job.columns.contains(Pair.of("index1", "col4")));
+        Assertions.assertEquals(JobPriority.MID, job.priority);
+
+        job = autoAnalysisPendingJobs.get(3);
+        Assertions.assertEquals("catalog4", job.catalogName);
+        Assertions.assertEquals("db4", job.dbName);
+        Assertions.assertEquals("mid2", job.tableName);
+        Assertions.assertEquals(1, job.columns.size());
+        Assertions.assertTrue(job.columns.contains(Pair.of("index1", "col5")));
+        Assertions.assertEquals(JobPriority.MID, job.priority);
+
+        job = autoAnalysisPendingJobs.get(4);
+        Assertions.assertEquals("catalog5", job.catalogName);
+        Assertions.assertEquals("db5", job.dbName);
+        Assertions.assertEquals("low1", job.tableName);
+        Assertions.assertEquals(2, job.columns.size());
+        Assertions.assertTrue(job.columns.contains(Pair.of("index1", "col6")));
+        Assertions.assertTrue(job.columns.contains(Pair.of("index1", "col7")));
+        Assertions.assertEquals(JobPriority.LOW, job.priority);
+
+        new MockUp<ShowAutoAnalyzeJobsStmt>() {
+            @Mock
+            public String getPriority() {
+                return JobPriority.HIGH.name().toUpperCase();
+            }
+        };
+        List<AutoAnalysisPendingJob> highJobs = manager.showAutoPendingJobs(stmt);
+        Assertions.assertEquals(2, highJobs.size());
+        job = highJobs.get(0);
+        Assertions.assertEquals("catalog1", job.catalogName);
+        Assertions.assertEquals("db1", job.dbName);
+        Assertions.assertEquals("high1", job.tableName);
+        Assertions.assertEquals(2, job.columns.size());
+        Assertions.assertTrue(job.columns.contains(Pair.of("index1", "col1")));
+        Assertions.assertTrue(job.columns.contains(Pair.of("index2", "col2")));
+        Assertions.assertEquals(JobPriority.HIGH, job.priority);
+
+        job = highJobs.get(1);
+        Assertions.assertEquals("catalog2", job.catalogName);
+        Assertions.assertEquals("db2", job.dbName);
+        Assertions.assertEquals("high2", job.tableName);
+        Assertions.assertEquals(1, job.columns.size());
+        Assertions.assertTrue(job.columns.contains(Pair.of("index1", "col3")));
+        Assertions.assertEquals(JobPriority.HIGH, job.priority);
+
+        new MockUp<ShowAutoAnalyzeJobsStmt>() {
+            @Mock
+            public String getPriority() {
+                return JobPriority.MID.name().toUpperCase();
+            }
+        };
+        List<AutoAnalysisPendingJob> midJobs = manager.showAutoPendingJobs(stmt);
+        Assertions.assertEquals(2, midJobs.size());
+        job = midJobs.get(0);
+        Assertions.assertEquals("catalog3", job.catalogName);
+        Assertions.assertEquals("db3", job.dbName);
+        Assertions.assertEquals("mid1", job.tableName);
+        Assertions.assertEquals(1, job.columns.size());
+        Assertions.assertTrue(job.columns.contains(Pair.of("index1", "col4")));
+        Assertions.assertEquals(JobPriority.MID, job.priority);
+
+        job = midJobs.get(1);
+        Assertions.assertEquals("catalog4", job.catalogName);
+        Assertions.assertEquals("db4", job.dbName);
+        Assertions.assertEquals("mid2", job.tableName);
+        Assertions.assertEquals(1, job.columns.size());
+        Assertions.assertTrue(job.columns.contains(Pair.of("index1", "col5")));
+        Assertions.assertEquals(JobPriority.MID, job.priority);
+
+        new MockUp<ShowAutoAnalyzeJobsStmt>() {
+            @Mock
+            public String getPriority() {
+                return JobPriority.LOW.name().toUpperCase();
+            }
+        };
+        List<AutoAnalysisPendingJob> lowJobs = manager.showAutoPendingJobs(stmt);
+        Assertions.assertEquals(1, lowJobs.size());
+        job = lowJobs.get(0);
+        Assertions.assertEquals("catalog5", job.catalogName);
+        Assertions.assertEquals("db5", job.dbName);
+        Assertions.assertEquals("low1", job.tableName);
+        Assertions.assertEquals(2, job.columns.size());
+        Assertions.assertTrue(job.columns.contains(Pair.of("index1", "col6")));
+        Assertions.assertTrue(job.columns.contains(Pair.of("index1", "col7")));
+        Assertions.assertEquals(JobPriority.LOW, job.priority);
+    }
 }
diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java
index 5698f0e9b20e63..29e04b1ef4fd94 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java
@@ -34,7 +34,7 @@
 import org.apache.doris.statistics.util.StatisticsUtil;
 import org.apache.doris.utframe.TestWithFeService;
 
-import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
 import mockit.Mock;
 import mockit.MockUp;
 import mockit.Mocked;
@@ -44,6 +44,7 @@
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.atomic.AtomicBoolean;
 
@@ -157,7 +158,7 @@ public void syncLoadColStats(long tableId, long idxId, String colName) {
         };
 
         AnalysisTaskExecutor analysisTaskExecutor = new AnalysisTaskExecutor(1);
-        List<Pair<String, String>> columns = Lists.newArrayList();
+        Set<Pair<String, String>> columns = Sets.newHashSet();
         columns.add(Pair.of("col1", "t1"));
         AnalysisInfo analysisInfo = new AnalysisInfoBuilder().setJobId(0).setTaskId(0)
                 .setCatalogId(0).setDBId(0).setTblId(0)
diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalyzeTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalyzeTest.java
index bf6ce32e155f42..250a796cee4cd5 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalyzeTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalyzeTest.java
@@ -36,7 +36,7 @@
 import org.apache.doris.statistics.util.StatisticsUtil;
 import org.apache.doris.utframe.TestWithFeService;
 
-import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
 import mockit.Expectations;
 import mockit.Mock;
 import mockit.MockUp;
@@ -48,6 +48,7 @@
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 public class AnalyzeTest extends TestWithFeService {
 
@@ -159,7 +160,7 @@ public void execSQLs(List<String> partitionAnalysisSQLs, Map<String, String> par
             @Mock
             protected void runQuery(String sql) {}
         };
-        List<Pair<String, String>> colList = Lists.newArrayList();
+        Set<Pair<String, String>> colList = Sets.newHashSet();
         colList.add(Pair.of("col1", "index1"));
         AnalysisInfo analysisJobInfo = new AnalysisInfoBuilder().setJobId(0).setTaskId(0)
                 .setCatalogId(0)
@@ -171,6 +172,7 @@ protected void runQuery(String sql) {}
                 .setAnalysisType(AnalysisType.FUNDAMENTALS)
                 .setJobColumns(colList)
                 .setState(AnalysisState.RUNNING)
+                .setRowCount(10)
                 .build();
         new OlapAnalysisTask(analysisJobInfo).doExecute();
         new Expectations() {
diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/FollowerColumnSenderTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/FollowerColumnSenderTest.java
new file mode 100644
index 00000000000000..2a5ae531d1e6dd
--- /dev/null
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/FollowerColumnSenderTest.java
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.PrimitiveType;
+import org.apache.doris.catalog.TableIf;
+import org.apache.doris.common.Pair;
+import org.apache.doris.statistics.util.StatisticsUtil;
+import org.apache.doris.thrift.TQueryColumn;
+
+import mockit.Mock;
+import mockit.MockUp;
+import org.eclipse.jetty.util.BlockingArrayQueue;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.Collections;
+import java.util.Queue;
+import java.util.Set;
+
+public class FollowerColumnSenderTest {
+
+    @Test
+    public void testGetNeedAnalyzeColumns() {
+        new MockUp<OlapTable>() {
+            @Mock
+            public Column getColumn(String name) {
+                return new Column("col", PrimitiveType.INT);
+            }
+
+            @Mock
+            public Set<Pair<String, String>> getColumnIndexPairs(Set<String> columns) {
+                return Collections.singleton(Pair.of("mockIndex", "mockCol"));
+            }
+        };
+
+        new MockUp<StatisticsUtil>() {
+            boolean[] result = {false, true, false, true, true};
+            int i = 0;
+            @Mock
+            public boolean needAnalyzeColumn(TableIf table, Pair<String, String> column) {
+                return result[i++];
+            }
+
+            @Mock
+            public TableIf findTable(long catalogId, long dbId, long tblId) {
+                return new OlapTable();
+            }
+        };
+        QueryColumn column1 = new QueryColumn(1, 2, 3, "col1");
+        QueryColumn column2 = new QueryColumn(1, 2, 3, "col2");
+        QueryColumn column3 = new QueryColumn(1, 2, 3, "col3");
+        QueryColumn column4 = new QueryColumn(1, 2, 3, "col4");
+        Queue<QueryColumn> queue = new BlockingArrayQueue<>();
+        queue.add(column1);
+        queue.add(column2);
+        queue.add(column3);
+        queue.add(column4);
+        queue.add(column4);
+        Assertions.assertEquals(5, queue.size());
+
+        FollowerColumnSender sender = new FollowerColumnSender();
+        Set<TQueryColumn> needAnalyzeColumns = sender.getNeedAnalyzeColumns(queue);
+        Assertions.assertEquals(2, needAnalyzeColumns.size());
+        Assertions.assertFalse(needAnalyzeColumns.contains(column1.toThrift()));
+        Assertions.assertTrue(needAnalyzeColumns.contains(column2.toThrift()));
+        Assertions.assertFalse(needAnalyzeColumns.contains(column3.toThrift()));
+        Assertions.assertTrue(needAnalyzeColumns.contains(column4.toThrift()));
+    }
+
+}
diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
index f7b75261cc54fa..6324624abac6a6 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java
@@ -17,457 +17,125 @@
 
 package org.apache.doris.statistics;
 
+import org.apache.doris.analysis.TableName;
 import org.apache.doris.catalog.Column;
-import org.apache.doris.catalog.Database;
-import org.apache.doris.catalog.DatabaseIf;
 import org.apache.doris.catalog.Env;
-import org.apache.doris.catalog.EnvFactory;
 import org.apache.doris.catalog.OlapTable;
 import org.apache.doris.catalog.PrimitiveType;
-import org.apache.doris.catalog.Table;
-import org.apache.doris.catalog.TableIf;
-import org.apache.doris.catalog.Type;
-import org.apache.doris.catalog.View;
-import org.apache.doris.common.Config;
-import org.apache.doris.common.DdlException;
-import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.Pair;
-import org.apache.doris.datasource.CatalogIf;
-import org.apache.doris.datasource.InternalCatalog;
-import org.apache.doris.statistics.util.StatisticsUtil;
+import org.apache.doris.datasource.ExternalTable;
+import org.apache.doris.datasource.hive.HMSExternalTable;
+import org.apache.doris.datasource.hive.HMSExternalTable.DLAType;
+import org.apache.doris.datasource.jdbc.JdbcExternalTable;
 
-import com.google.common.collect.Lists;
-import mockit.Expectations;
-import mockit.Injectable;
 import mockit.Mock;
 import mockit.MockUp;
-import mockit.Mocked;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
 
-import java.time.LocalTime;
 import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
-import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Set;
-import java.util.concurrent.atomic.AtomicBoolean;
 
 public class StatisticsAutoCollectorTest {
 
     @Test
-    public void testAnalyzeAll(@Injectable AnalysisInfo analysisInfo) {
-        new MockUp<CatalogIf>() {
-            @Mock
-            public Collection<DatabaseIf> getAllDbs() {
-                Database db1 = new Database(1, FeConstants.INTERNAL_DB_NAME);
-                Database db2 = new Database(2, "anyDB");
-                List<DatabaseIf> databaseIfs = new ArrayList<>();
-                databaseIfs.add(db1);
-                databaseIfs.add(db2);
-                return databaseIfs;
-            }
-        };
-        new MockUp<StatisticsAutoCollector>() {
-            @Mock
-            public List<AnalysisInfo> constructAnalysisInfo(DatabaseIf<TableIf> db) {
-                return Arrays.asList(analysisInfo, analysisInfo);
-            }
-
-            int count = 0;
-
-            @Mock
-            public AnalysisInfo getReAnalyzeRequiredPart(AnalysisInfo jobInfo) {
-                return count++ == 0 ? null : jobInfo;
-            }
-
-            @Mock
-            public void createSystemAnalysisJob(AnalysisInfo jobInfo)
-                    throws DdlException {
-
-            }
-        };
-
-        StatisticsAutoCollector saa = new StatisticsAutoCollector();
-        saa.runAfterCatalogReady();
-        new Expectations() {
-            {
-                try {
-                    saa.createSystemAnalysisJob((AnalysisInfo) any);
-                    times = 1;
-                } catch (Exception e) {
-                    throw new RuntimeException(e);
-                }
-            }
-        };
-    }
-
-    @Test
-    public void testConstructAnalysisInfo(
-            @Injectable OlapTable o2, @Injectable View v) {
-        new MockUp<Database>() {
-            @Mock
-            public List<Table> getTables() {
-                List<Table> tableIfs = new ArrayList<>();
-                tableIfs.add(o2);
-                tableIfs.add(v);
-                return tableIfs;
-            }
-
-            @Mock
-            public String getFullName() {
-                return "anyDb";
-            }
-        };
-
-        new MockUp<OlapTable>() {
-            @Mock
-            public String getName() {
-                return "anytable";
-            }
-
-            @Mock
-            public List<Column> getSchemaAllIndexes(boolean full) {
-                List<Column> columns = new ArrayList<>();
-                columns.add(new Column("c1", PrimitiveType.INT));
-                columns.add(new Column("c2", PrimitiveType.HLL));
-                return columns;
-            }
-        };
-        StatisticsAutoCollector saa = new StatisticsAutoCollector();
-        List<AnalysisInfo> analysisInfoList = saa.constructAnalysisInfo(new Database(1, "anydb"));
-        Assertions.assertEquals(1, analysisInfoList.size());
-        Assertions.assertNull(analysisInfoList.get(0).colName);
-    }
-
-    @Test
-    public void testSkipWideTable() {
-
-        TableIf tableIf = new OlapTable();
-
-        new MockUp<OlapTable>() {
-            @Mock
-            public List<Column> getBaseSchema() {
-                return Lists.newArrayList(new Column("col1", Type.INT), new Column("col2", Type.INT));
-            }
-
-            @Mock
-            public List<Pair<String, String>> getColumnIndexPairs(Set<String> columns) {
-                ArrayList<Pair<String, String>> list = Lists.newArrayList();
-                list.add(Pair.of("1", "1"));
-                return list;
-            }
-        };
-
-        new MockUp<StatisticsUtil>() {
-            int count = 0;
-            int[] thresholds = {1, 10};
-
-            @Mock
-            public TableIf findTable(long catalogName, long dbName, long tblName) {
-                return tableIf;
-            }
-
-            @Mock
-            public int getAutoAnalyzeTableWidthThreshold() {
-                return thresholds[count++];
-            }
-        };
-
-        AnalysisInfo analysisInfo = new AnalysisInfoBuilder().build();
-        StatisticsAutoCollector statisticsAutoCollector = new StatisticsAutoCollector();
-        Assertions.assertNull(statisticsAutoCollector.getNeedAnalyzeColumns(analysisInfo));
-        Assertions.assertNotNull(statisticsAutoCollector.getNeedAnalyzeColumns(analysisInfo));
-    }
-
-    @Test
-    public void testLoop() {
-        AtomicBoolean timeChecked = new AtomicBoolean();
-        AtomicBoolean switchChecked = new AtomicBoolean();
-        new MockUp<StatisticsUtil>() {
-
-            @Mock
-            public boolean inAnalyzeTime(LocalTime now) {
-                timeChecked.set(true);
-                return true;
-            }
-
-            @Mock
-            public boolean enableAutoAnalyze() {
-                switchChecked.set(true);
-                return true;
-            }
-        };
-        StatisticsAutoCollector autoCollector = new StatisticsAutoCollector();
-        autoCollector.collect();
-        Assertions.assertTrue(timeChecked.get() && switchChecked.get());
-
-    }
-
-    @Test
-    public void checkAvailableThread() {
-        StatisticsAutoCollector autoCollector = new StatisticsAutoCollector();
-        Assertions.assertEquals(Config.auto_analyze_simultaneously_running_task_num,
-                autoCollector.analysisTaskExecutor.executors.getMaximumPoolSize());
-    }
-
-    @Test
-    public void testSkip(@Mocked OlapTable olapTable, @Mocked TableStatsMeta stats, @Mocked TableIf anyOtherTable) {
-        new MockUp<OlapTable>() {
-
-            @Mock
-            public long getDataSize(boolean singleReplica) {
-                return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() * 5 + 1000000000;
-            }
-        };
-
-        new MockUp<AnalysisManager>() {
-
-            @Mock
-            public TableStatsMeta findTableStatsStatus(long tblId) {
-                return stats;
-            }
-        };
-        // A very huge table has been updated recently, so we should skip it this time
-        stats.updatedTime = System.currentTimeMillis() - 1000;
-        stats.newPartitionLoaded = new AtomicBoolean();
-        stats.newPartitionLoaded.set(true);
-        StatisticsAutoCollector autoCollector = new StatisticsAutoCollector();
-        // Test new partition loaded data for the first time. Not skip.
-        Assertions.assertFalse(autoCollector.skip(olapTable));
-        stats.newPartitionLoaded.set(false);
-        // Assertions.assertTrue(autoCollector.skip(olapTable));
-        // The update of this huge table is long time ago, so we shouldn't skip it this time
-        stats.updatedTime = System.currentTimeMillis()
-                - StatisticsUtil.getHugeTableAutoAnalyzeIntervalInMillis() - 10000;
-        Assertions.assertFalse(autoCollector.skip(olapTable));
-        new MockUp<AnalysisManager>() {
-
-            @Mock
-            public TableStatsMeta findTableStatsStatus(long tblId) {
-                return null;
-            }
-        };
-        // can't find table stats meta, which means this table never get analyzed,  so we shouldn't skip it this time
-        Assertions.assertFalse(autoCollector.skip(olapTable));
-        new MockUp<AnalysisManager>() {
-
-            @Mock
-            public TableStatsMeta findTableStatsStatus(long tblId) {
-                return stats;
-            }
-        };
-        stats.userInjected = true;
-        Assertions.assertTrue(autoCollector.skip(olapTable));
-        // this is not olap table nor external table, so we should skip it this time
-        Assertions.assertTrue(autoCollector.skip(anyOtherTable));
+    public void testFetchJob() {
+        AnalysisManager manager = new AnalysisManager();
+        TableName high1 = new TableName("catalog", "db", "high1");
+        TableName high2 = new TableName("catalog", "db", "high2");
+        TableName mid1 = new TableName("catalog", "db", "mid1");
+        TableName mid2 = new TableName("catalog", "db", "mid2");
+        TableName low1 = new TableName("catalog", "db", "low1");
+
+        manager.highPriorityJobs.put(high1, new HashSet<>());
+        manager.highPriorityJobs.get(high1).add(Pair.of("index1", "col1"));
+        manager.highPriorityJobs.get(high1).add(Pair.of("index1", "col2"));
+        manager.highPriorityJobs.put(high2, new HashSet<>());
+        manager.highPriorityJobs.get(high2).add(Pair.of("index1", "col3"));
+        manager.midPriorityJobs.put(mid1, new HashSet<>());
+        manager.midPriorityJobs.get(mid1).add(Pair.of("index1", "col4"));
+        manager.midPriorityJobs.put(mid2, new HashSet<>());
+        manager.midPriorityJobs.get(mid2).add(Pair.of("index1", "col5"));
+        manager.lowPriorityJobs.put(low1, new HashSet<>());
+        manager.lowPriorityJobs.get(low1).add(Pair.of("index1", "col6"));
+        manager.lowPriorityJobs.get(low1).add(Pair.of("index1", "col7"));
+
+
+        new MockUp<Env>() {
+            @Mock
+            public AnalysisManager getAnalysisManager() {
+                return manager;
+            }
+        };
+        StatisticsAutoCollector collector = new StatisticsAutoCollector();
+        Pair<Entry<TableName, Set<Pair<String, String>>>, JobPriority> job = collector.getJob();
+        Assertions.assertEquals(high1, job.first.getKey());
+        Assertions.assertEquals(2, job.first.getValue().size());
+        Assertions.assertTrue(job.first.getValue().contains(Pair.of("index1", "col1")));
+        Assertions.assertTrue(job.first.getValue().contains(Pair.of("index1", "col2")));
+        Assertions.assertEquals(JobPriority.HIGH, job.second);
+
+        job = collector.getJob();
+        Assertions.assertEquals(high2, job.first.getKey());
+        Assertions.assertEquals(1, job.first.getValue().size());
+        Assertions.assertTrue(job.first.getValue().contains(Pair.of("index1", "col3")));
+        Assertions.assertEquals(JobPriority.HIGH, job.second);
+
+        job = collector.getJob();
+        Assertions.assertEquals(mid1, job.first.getKey());
+        Assertions.assertEquals(1, job.first.getValue().size());
+        Assertions.assertTrue(job.first.getValue().contains(Pair.of("index1", "col4")));
+        Assertions.assertEquals(JobPriority.MID, job.second);
+
+        job = collector.getJob();
+        Assertions.assertEquals(mid2, job.first.getKey());
+        Assertions.assertEquals(1, job.first.getValue().size());
+        Assertions.assertTrue(job.first.getValue().contains(Pair.of("index1", "col5")));
+        Assertions.assertEquals(JobPriority.MID, job.second);
+
+        job = collector.getJob();
+        Assertions.assertEquals(low1, job.first.getKey());
+        Assertions.assertEquals(2, job.first.getValue().size());
+        Assertions.assertTrue(job.first.getValue().contains(Pair.of("index1", "col6")));
+        Assertions.assertTrue(job.first.getValue().contains(Pair.of("index1", "col7")));
+        Assertions.assertEquals(JobPriority.LOW, job.second);
+
+        job = collector.getJob();
+        Assertions.assertNull(job);
     }
 
-    // For small table, use full
     @Test
-    public void testCreateAnalyzeJobForTbl1(
-            @Injectable OlapTable t1,
-            @Injectable Database db
-    ) throws Exception {
-        new MockUp<Database>() {
+    public void testSupportAutoAnalyze() {
+        StatisticsAutoCollector collector = new StatisticsAutoCollector();
+        Assertions.assertFalse(collector.supportAutoAnalyze(null));
+        Column column1 = new Column("placeholder", PrimitiveType.INT);
+        List<Column> schema = new ArrayList<>();
+        schema.add(column1);
+        OlapTable table1 = new OlapTable(200, "testTable", schema, null, null, null);
+        Assertions.assertTrue(collector.supportAutoAnalyze(table1));
 
-            @Mock
-            public CatalogIf getCatalog() {
-                return Env.getCurrentInternalCatalog();
-            }
+        ExternalTable externalTable = new JdbcExternalTable(1, "jdbctable", "jdbcdb", null);
+        Assertions.assertFalse(collector.supportAutoAnalyze(externalTable));
 
+        new MockUp<HMSExternalTable>() {
             @Mock
-            public long getId() {
-                return 0;
+            public DLAType getDlaType() {
+                return DLAType.ICEBERG;
             }
         };
-        new MockUp<OlapTable>() {
-
-            int count = 0;
+        ExternalTable icebergExternalTable = new HMSExternalTable(1, "hmsTable", "hmsDb", null);
+        Assertions.assertFalse(collector.supportAutoAnalyze(icebergExternalTable));
 
+        new MockUp<HMSExternalTable>() {
             @Mock
-            public List<Column> getBaseSchema() {
-                return Lists.newArrayList(new Column("test", PrimitiveType.INT));
-            }
-
-            @Mock
-            public long getDataSize(boolean singleReplica) {
-                return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - 1;
-            }
-
-            @Mock
-            public BaseAnalysisTask createAnalysisTask(AnalysisInfo info) {
-                return new OlapAnalysisTask(info);
-            }
-
-            @Mock
-            public List<Long> getMvColumnIndexIds(String columnName) {
-                ArrayList<Long> objects = new ArrayList<>();
-                objects.add(-1L);
-                return objects;
+            public DLAType getDlaType() {
+                return DLAType.HIVE;
             }
         };
-
-        new MockUp<StatisticsUtil>() {
-            @Mock
-            public TableIf findTable(long catalogId, long dbId, long tblId) {
-                return t1;
-            }
-        };
-
-        StatisticsAutoCollector sac = new StatisticsAutoCollector();
-        List<AnalysisInfo> jobInfos = new ArrayList<>();
-        sac.createAnalyzeJobForTbl(db, jobInfos, t1);
-        AnalysisInfo jobInfo = jobInfos.get(0);
-        List<Pair<String, String>> columnNames = Lists.newArrayList();
-        columnNames.add(Pair.of("test", "t1"));
-        jobInfo = new AnalysisInfoBuilder(jobInfo).setJobColumns(columnNames).build();
-        Map<Long, BaseAnalysisTask> analysisTasks = new HashMap<>();
-        AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager();
-        analysisManager.createTaskForEachColumns(jobInfo, analysisTasks, false);
-        Assertions.assertEquals(1, analysisTasks.size());
-        for (BaseAnalysisTask task : analysisTasks.values()) {
-            Assertions.assertNull(task.getTableSample());
-        }
-    }
-
-    // for big table, use sample
-    @Test
-    public void testCreateAnalyzeJobForTbl2(
-            @Injectable OlapTable t1,
-            @Injectable Database db
-    ) throws Exception {
-        new MockUp<Database>() {
-
-            @Mock
-            public CatalogIf getCatalog() {
-                return Env.getCurrentInternalCatalog();
-            }
-
-            @Mock
-            public long getId() {
-                return 0;
-            }
-        };
-        new MockUp<OlapTable>() {
-
-            int count = 0;
-
-            @Mock
-            public List<Column> getBaseSchema() {
-                return Lists.newArrayList(new Column("test", PrimitiveType.INT));
-            }
-
-            @Mock
-            public long getDataSize(boolean singleReplica) {
-                return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() * 2;
-            }
-
-            @Mock
-            public BaseAnalysisTask createAnalysisTask(AnalysisInfo info) {
-                return new OlapAnalysisTask(info);
-            }
-
-            @Mock
-            public List<Long> getMvColumnIndexIds(String columnName) {
-                ArrayList<Long> objects = new ArrayList<>();
-                objects.add(-1L);
-                return objects;
-            }
-        };
-
-        new MockUp<StatisticsUtil>() {
-            @Mock
-            public TableIf findTable(long catalogId, long dbId, long tblId) {
-                return t1;
-            }
-        };
-
-        StatisticsAutoCollector sac = new StatisticsAutoCollector();
-        List<AnalysisInfo> jobInfos = new ArrayList<>();
-        sac.createAnalyzeJobForTbl(db, jobInfos, t1);
-        AnalysisInfo jobInfo = jobInfos.get(0);
-        List<Pair<String, String>> colNames = Lists.newArrayList();
-        colNames.add(Pair.of("test", "1"));
-        jobInfo = new AnalysisInfoBuilder(jobInfo).setJobColumns(colNames).build();
-        Map<Long, BaseAnalysisTask> analysisTasks = new HashMap<>();
-        AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager();
-        analysisManager.createTaskForEachColumns(jobInfo, analysisTasks, false);
-        Assertions.assertEquals(1, analysisTasks.size());
-        for (BaseAnalysisTask task : analysisTasks.values()) {
-            Assertions.assertNotNull(task.getTableSample());
-        }
-    }
-
-    @Test
-    public void testDisableAuto1() throws Exception {
-        InternalCatalog catalog1 = EnvFactory.getInstance().createInternalCatalog();
-        List<CatalogIf> catalogs = Lists.newArrayList();
-        catalogs.add(catalog1);
-
-        new MockUp<StatisticsAutoCollector>() {
-            @Mock
-            public List<CatalogIf> getCatalogsInOrder() {
-                return catalogs;
-            }
-
-            @Mock
-            protected boolean canCollect() {
-                return false;
-            }
-
-        };
-
-        StatisticsAutoCollector sac = new StatisticsAutoCollector();
-        new Expectations(catalog1) {{
-                catalog1.enableAutoAnalyze();
-                times = 0;
-            }};
-
-        sac.analyzeAll();
-    }
-
-    @Test
-    public void testDisableAuto2() throws Exception {
-        InternalCatalog catalog1 = EnvFactory.getInstance().createInternalCatalog();
-        List<CatalogIf> catalogs = Lists.newArrayList();
-        catalogs.add(catalog1);
-
-        Database db1 = new Database();
-        List<DatabaseIf<? extends TableIf>> dbs = Lists.newArrayList();
-        dbs.add(db1);
-
-        new MockUp<StatisticsAutoCollector>() {
-            int count = 0;
-            boolean[] canCollectReturn = {true, false};
-            @Mock
-            public List<CatalogIf> getCatalogsInOrder() {
-                return catalogs;
-            }
-
-            @Mock
-            public List<DatabaseIf<? extends TableIf>> getDatabasesInOrder(CatalogIf<DatabaseIf> catalog) {
-                return dbs;
-            }
-
-                @Mock
-            protected boolean canCollect() {
-                return canCollectReturn[count++];
-            }
-
-        };
-
-        StatisticsAutoCollector sac = new StatisticsAutoCollector();
-        new Expectations(catalog1, db1) {{
-                catalog1.enableAutoAnalyze();
-                result = true;
-                times = 1;
-                db1.getFullName();
-                times = 0;
-            }};
-
-        sac.analyzeAll();
+        ExternalTable hiveExternalTable = new HMSExternalTable(1, "hmsTable", "hmsDb", null);
+        Assertions.assertTrue(collector.supportAutoAnalyze(hiveExternalTable));
     }
 }
diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobAppenderTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobAppenderTest.java
new file mode 100644
index 00000000000000..e3255ab23a0381
--- /dev/null
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsJobAppenderTest.java
@@ -0,0 +1,281 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics;
+
+import org.apache.doris.analysis.TableName;
+import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.Database;
+import org.apache.doris.catalog.DatabaseIf;
+import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.PrimitiveType;
+import org.apache.doris.catalog.Table;
+import org.apache.doris.catalog.TableIf;
+import org.apache.doris.catalog.Type;
+import org.apache.doris.common.DdlException;
+import org.apache.doris.common.Pair;
+import org.apache.doris.datasource.InternalCatalog;
+import org.apache.doris.statistics.util.StatisticsUtil;
+
+import com.google.common.collect.Lists;
+import mockit.Mock;
+import mockit.MockUp;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+import java.util.Set;
+import java.util.UUID;
+import java.util.concurrent.ArrayBlockingQueue;
+
+public class StatisticsJobAppenderTest {
+
+    @Test
+    public void testAppendQueryColumnToHighAndMidJobMap() throws DdlException {
+        InternalCatalog testCatalog = new InternalCatalog();
+        Database db = new Database(100, "testDb");
+        testCatalog.unprotectCreateDb(db);
+        Column column1 = new Column("placeholder", PrimitiveType.INT);
+        List<Column> schema = new ArrayList<>();
+        schema.add(column1);
+        OlapTable table1 = new OlapTable(200, "testTable", schema, null, null, null);
+        OlapTable table2 = new OlapTable(200, "testTable2", schema, null, null, null);
+        OlapTable table3 = new OlapTable(200, "testTable3", schema, null, null, null);
+        new MockUp<StatisticsUtil>() {
+            int i = 0;
+            Table[] tables = {table1, table2, table1, table3, table2};
+
+            @Mock
+            public boolean needAnalyzeColumn(TableIf table, Pair<String, String> column) {
+                return true;
+            }
+
+            @Mock
+            public TableIf findTable(long catalogId, long dbId, long tblId) {
+                return tables[i++];
+            }
+        };
+
+        new MockUp<Table>() {
+            @Mock
+            public DatabaseIf getDatabase() {
+                return db;
+            }
+
+            @Mock
+            public Column getColumn(String name) {
+                return new Column("mockCol", Type.INT);
+            }
+        };
+
+        new MockUp<OlapTable>() {
+            @Mock
+            public Set<Pair<String, String>> getColumnIndexPairs(Set<String> columns) {
+                String column = columns.iterator().next();
+                return Collections.singleton(Pair.of("mockIndex", column));
+            }
+        };
+
+        Queue<QueryColumn> testQueue = new ArrayBlockingQueue<>(100);
+        Map<TableName, Set<Pair<String, String>>> testMap = new HashMap<>();
+        QueryColumn high1 = new QueryColumn(10, 20, 30, "high1");
+        testQueue.add(high1);
+
+        StatisticsJobAppender appender = new StatisticsJobAppender();
+        appender.appendColumnsToJobs(testQueue, testMap);
+        Assertions.assertEquals(1, testMap.size());
+        Assertions.assertEquals(1, testMap.values().size());
+        Assertions.assertTrue(testMap.get(new TableName("internal", "testDb", "testTable")).contains(Pair.of("mockIndex", "high1")));
+
+        QueryColumn high2 = new QueryColumn(10, 20, 30, "high2");
+        QueryColumn high3 = new QueryColumn(10, 20, 30, "high3");
+        testQueue.add(high2);
+        testQueue.add(high3);
+        appender.appendColumnsToJobs(testQueue, testMap);
+        Assertions.assertEquals(2, testMap.size());
+
+        Set<Pair<String, String>> table1Column = testMap.get(new TableName("internal", "testDb", "testTable"));
+        Assertions.assertEquals(2, table1Column.size());
+        Assertions.assertTrue(table1Column.contains(Pair.of("mockIndex", "high1")));
+        Assertions.assertTrue(table1Column.contains(Pair.of("mockIndex", "high3")));
+
+        Set<Pair<String, String>> table2Column = testMap.get(new TableName("internal", "testDb", "testTable2"));
+        Assertions.assertEquals(1, table2Column.size());
+        Assertions.assertTrue(table2Column.contains(Pair.of("mockIndex", "high2")));
+
+        for (int i = 0; i < StatisticsJobAppender.JOB_MAP_SIZE - 2; i++) {
+            testMap.put(new TableName("a", "b", UUID.randomUUID().toString()), new HashSet<>());
+        }
+        Assertions.assertEquals(StatisticsJobAppender.JOB_MAP_SIZE, testMap.size());
+
+        QueryColumn high4 = new QueryColumn(10, 20, 30, "high4");
+        testQueue.add(high4);
+        appender.appendColumnsToJobs(testQueue, testMap);
+        Assertions.assertEquals(StatisticsJobAppender.JOB_MAP_SIZE, testMap.size());
+
+        QueryColumn high5 = new QueryColumn(10, 20, 30, "high5");
+        testQueue.add(high5);
+        appender.appendColumnsToJobs(testQueue, testMap);
+        table2Column = testMap.get(new TableName("internal", "testDb", "testTable2"));
+        Assertions.assertEquals(2, table2Column.size());
+        Assertions.assertTrue(table2Column.contains(Pair.of("mockIndex", "high2")));
+        Assertions.assertTrue(table2Column.contains(Pair.of("mockIndex", "high5")));
+    }
+
+    @Test
+    public void testAppendQueryColumnToLowJobMap() throws DdlException {
+        InternalCatalog testCatalog = new InternalCatalog();
+        int id = 10;
+        for (int i = 0; i < 70; i++) {
+            Database db = new Database(id++, "testDb" + i);
+            testCatalog.unprotectCreateDb(db);
+            Column column1 = new Column("placeholder", PrimitiveType.INT);
+            List<Column> schema = new ArrayList<>();
+            schema.add(column1);
+            OlapTable table1 = new OlapTable(id++, "testTable" + id + "_1", schema, null, null, null);
+            OlapTable table2 = new OlapTable(id++, "testTable" + id + "_1", schema, null, null, null);
+            db.createTableWithLock(table1, true, false);
+            db.createTableWithLock(table2, true, false);
+        }
+
+        new MockUp<Env>() {
+            @Mock
+            public InternalCatalog getCurrentInternalCatalog() {
+                return testCatalog;
+            }
+        };
+
+        new MockUp<OlapTable>() {
+            @Mock
+            public List<Column> getBaseSchema() {
+                return Lists.newArrayList();
+            }
+
+            @Mock
+            public Set<Pair<String, String>> getColumnIndexPairs(Set<String> columns) {
+                return Collections.singleton(Pair.of("mockIndex", "mockColumn"));
+            }
+        };
+
+        Map<TableName, Set<Pair<String, String>>> testMap = new HashMap<>();
+        StatisticsJobAppender appender = new StatisticsJobAppender();
+        appender.appendToLowJobs(testMap);
+        Assertions.assertEquals(100, testMap.size());
+        testMap.clear();
+        appender.appendToLowJobs(testMap);
+        Assertions.assertEquals(40, testMap.size());
+
+        for (int i = 0; i < StatisticsJobAppender.JOB_MAP_SIZE; i++) {
+            Database db = new Database(id++, "testDb" + i);
+            testCatalog.unprotectCreateDb(db);
+            Column column1 = new Column("placeholder", PrimitiveType.INT);
+            List<Column> schema = new ArrayList<>();
+            schema.add(column1);
+            OlapTable table1 = new OlapTable(id++, "testTable" + id + "_1", schema, null, null, null);
+            OlapTable table2 = new OlapTable(id++, "testTable" + id + "_1", schema, null, null, null);
+            db.createTableWithLock(table1, true, false);
+            db.createTableWithLock(table2, true, false);
+        }
+
+        testMap.clear();
+        appender.setLastRoundFinishTime(0);
+        appender.appendToLowJobs(testMap);
+        appender.setLastRoundFinishTime(0);
+        appender.appendToLowJobs(testMap);
+        appender.setLastRoundFinishTime(0);
+        appender.appendToLowJobs(testMap);
+        appender.setLastRoundFinishTime(0);
+        appender.appendToLowJobs(testMap);
+        appender.setLastRoundFinishTime(0);
+        appender.appendToLowJobs(testMap);
+        appender.setLastRoundFinishTime(0);
+        appender.appendToLowJobs(testMap);
+        appender.setLastRoundFinishTime(0);
+        appender.appendToLowJobs(testMap);
+        appender.setLastRoundFinishTime(0);
+        appender.appendToLowJobs(testMap);
+        appender.setLastRoundFinishTime(0);
+        appender.appendToLowJobs(testMap);
+        appender.setLastRoundFinishTime(0);
+        appender.appendToLowJobs(testMap);
+        appender.setLastRoundFinishTime(0);
+        appender.appendToLowJobs(testMap);
+        appender.setLastRoundFinishTime(0);
+        appender.appendToLowJobs(testMap);
+        appender.setLastRoundFinishTime(0);
+        appender.appendToLowJobs(testMap);
+        appender.setLastRoundFinishTime(0);
+        appender.appendToLowJobs(testMap);
+        appender.setLastRoundFinishTime(0);
+        appender.appendToLowJobs(testMap);
+        Assertions.assertEquals(StatisticsJobAppender.JOB_MAP_SIZE, testMap.size());
+    }
+
+    @Test
+    public void testSkipWideTable() throws DdlException {
+        InternalCatalog testCatalog = new InternalCatalog();
+        int id = 10;
+        Database db = new Database(id++, "testDb");
+        testCatalog.unprotectCreateDb(db);
+        Column column1 = new Column("placeholder", PrimitiveType.INT);
+        List<Column> schema = new ArrayList<>();
+        schema.add(column1);
+        OlapTable table1 = new OlapTable(id++, "testTable" + id + "_1", schema, null, null, null);
+        db.createTableWithLock(table1, true, false);
+        new MockUp<Env>() {
+            @Mock
+            public InternalCatalog getCurrentInternalCatalog() {
+                return testCatalog;
+            }
+        };
+        new MockUp<OlapTable>() {
+            @Mock
+            public List<Column> getBaseSchema() {
+                return Lists.newArrayList(new Column("col1", Type.INT), new Column("col2", Type.INT));
+            }
+
+            @Mock
+            public Set<Pair<String, String>> getColumnIndexPairs(Set<String> columns) {
+                return Collections.singleton(Pair.of("1", "1"));
+            }
+        };
+
+        new MockUp<StatisticsUtil>() {
+            int count = 0;
+            int[] thresholds = {1, 10};
+
+            @Mock
+            public int getAutoAnalyzeTableWidthThreshold() {
+                return thresholds[count++];
+            }
+        };
+        Map<TableName, Set<Pair<String, String>>> testMap = new HashMap<>();
+        StatisticsJobAppender appender = new StatisticsJobAppender();
+        appender.appendToLowJobs(testMap);
+        Assertions.assertEquals(0, testMap.size());
+        appender.setLastRoundFinishTime(0);
+        appender.appendToLowJobs(testMap);
+        Assertions.assertEquals(1, testMap.size());
+    }
+}
diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java
index 94eab9e00cc501..10e1973aa3318d 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java
@@ -19,27 +19,19 @@
 
 import org.apache.doris.catalog.OlapTable;
 
-import mockit.Mock;
-import mockit.MockUp;
 import mockit.Mocked;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
 
-import java.util.ArrayList;
+import java.util.HashSet;
 
 class TableStatsMetaTest {
 
     @Test
     void update(@Mocked OlapTable table) {
-        new MockUp<OlapTable>() {
-            @Mock
-            public long getRowCount() {
-                return 4;
-            }
-        };
         TableStatsMeta tableStatsMeta = new TableStatsMeta();
-        AnalysisInfo jobInfo = new AnalysisInfoBuilder().setJobColumns(new ArrayList<>())
-                .setColName("col1").build();
+        AnalysisInfo jobInfo = new AnalysisInfoBuilder().setRowCount(4)
+                .setJobColumns(new HashSet<>()).setColName("col1").build();
         tableStatsMeta.update(jobInfo, table);
         Assertions.assertEquals(4, tableStatsMeta.rowCount);
     }
diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java
index 724e0363833305..275471a66982b6 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java
@@ -17,10 +17,21 @@
 
 package org.apache.doris.statistics.util;
 
+import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.PrimitiveType;
 import org.apache.doris.catalog.Type;
 import org.apache.doris.common.AnalysisException;
+import org.apache.doris.common.Pair;
+import org.apache.doris.datasource.ExternalTable;
+import org.apache.doris.datasource.hive.HMSExternalTable;
+import org.apache.doris.datasource.hive.HMSExternalTable.DLAType;
+import org.apache.doris.datasource.jdbc.JdbcExternalTable;
 import org.apache.doris.qe.SessionVariable;
+import org.apache.doris.statistics.AnalysisManager;
+import org.apache.doris.statistics.ColStatsMeta;
 import org.apache.doris.statistics.ResultRow;
+import org.apache.doris.statistics.TableStatsMeta;
 
 import com.google.common.collect.Lists;
 import mockit.Mock;
@@ -33,6 +44,7 @@
 import java.time.format.DateTimeFormatter;
 import java.util.ArrayList;
 import java.util.Base64;
+import java.util.List;
 
 class StatisticsUtilTest {
     @Test
@@ -150,4 +162,141 @@ void testEscape() {
         // \\''""
         Assertions.assertEquals("\\\\''\"", StatisticsUtil.escapeSQL(origin));
     }
+
+    @Test
+    void testNeedAnalyzeColumn() {
+        Column column = new Column("testColumn", PrimitiveType.INT);
+        List<Column> schema = new ArrayList<>();
+        schema.add(column);
+        OlapTable table = new OlapTable(200, "testTable", schema, null, null, null);
+        // Test table stats meta is null.
+        new MockUp<AnalysisManager>() {
+            @Mock
+            public TableStatsMeta findTableStatsStatus(long tblId) {
+                return null;
+            }
+        };
+        Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName())));
+
+        // Test user injected flag is set.
+        TableStatsMeta tableMeta = new TableStatsMeta();
+        tableMeta.userInjected = true;
+        new MockUp<AnalysisManager>() {
+            @Mock
+            public TableStatsMeta findTableStatsStatus(long tblId) {
+                return tableMeta;
+            }
+        };
+        Assertions.assertFalse(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName())));
+
+        // Test column meta is null.
+        tableMeta.userInjected = false;
+        Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName())));
+
+        new MockUp<TableStatsMeta>() {
+            @Mock
+            public ColStatsMeta findColumnStatsMeta(String indexName, String colName) {
+                return new ColStatsMeta(0, null, null, null, 0, 0, 0);
+            }
+        };
+
+        // Test not supported external table type.
+        ExternalTable externalTable = new JdbcExternalTable(1, "jdbctable", "jdbcdb", null);
+        Assertions.assertFalse(StatisticsUtil.needAnalyzeColumn(externalTable, Pair.of("index", column.getName())));
+
+        // Test hms external table not hive type.
+        new MockUp<HMSExternalTable>() {
+            @Mock
+            public DLAType getDlaType() {
+                return DLAType.ICEBERG;
+            }
+        };
+        ExternalTable hmsExternalTable = new HMSExternalTable(1, "hmsTable", "hmsDb", null);
+        Assertions.assertFalse(StatisticsUtil.needAnalyzeColumn(hmsExternalTable, Pair.of("index", column.getName())));
+
+        // Test partition first load.
+        new MockUp<OlapTable>() {
+            @Mock
+            public boolean isPartitionColumn(String columnName) {
+                return true;
+            }
+        };
+        tableMeta.newPartitionLoaded.set(true);
+        Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName())));
+
+        // Test empty table to non-empty table.
+        new MockUp<OlapTable>() {
+            @Mock
+            public long getRowCount() {
+                return 100;
+            }
+        };
+        tableMeta.newPartitionLoaded.set(false);
+        Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName())));
+
+        // Test non-empty table to empty table.
+        new MockUp<OlapTable>() {
+            @Mock
+            public long getRowCount() {
+                return 0;
+            }
+        };
+        new MockUp<TableStatsMeta>() {
+            @Mock
+            public ColStatsMeta findColumnStatsMeta(String indexName, String colName) {
+                return new ColStatsMeta(0, null, null, null, 0, 100, 0);
+            }
+        };
+        tableMeta.newPartitionLoaded.set(false);
+        Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName())));
+
+        // Test table still empty.
+        new MockUp<TableStatsMeta>() {
+            @Mock
+            public ColStatsMeta findColumnStatsMeta(String indexName, String colName) {
+                return new ColStatsMeta(0, null, null, null, 0, 0, 0);
+            }
+        };
+        tableMeta.newPartitionLoaded.set(false);
+        Assertions.assertFalse(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName())));
+
+        // Test row count changed more than threshold.
+        new MockUp<OlapTable>() {
+            @Mock
+            public long getRowCount() {
+                return 1000;
+            }
+        };
+        new MockUp<TableStatsMeta>() {
+            @Mock
+            public ColStatsMeta findColumnStatsMeta(String indexName, String colName) {
+                return new ColStatsMeta(0, null, null, null, 0, 500, 0);
+            }
+        };
+        tableMeta.newPartitionLoaded.set(false);
+        Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName())));
+
+        // Test update rows changed more than threshold.
+        new MockUp<OlapTable>() {
+            @Mock
+            public long getRowCount() {
+                return 120;
+            }
+        };
+        new MockUp<TableStatsMeta>() {
+            @Mock
+            public ColStatsMeta findColumnStatsMeta(String indexName, String colName) {
+                return new ColStatsMeta(0, null, null, null, 0, 100, 80);
+            }
+        };
+        tableMeta.newPartitionLoaded.set(false);
+        tableMeta.updatedRows.set(200);
+        Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName())));
+
+        // Test update rows changed less than threshold
+        tableMeta.newPartitionLoaded.set(false);
+        tableMeta.updatedRows.set(100);
+        Assertions.assertFalse(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName())));
+
+    }
 }
diff --git a/gensrc/thrift/FrontendService.thrift b/gensrc/thrift/FrontendService.thrift
index f8e53af077dd85..6ed7c23ec3c2f8 100644
--- a/gensrc/thrift/FrontendService.thrift
+++ b/gensrc/thrift/FrontendService.thrift
@@ -1468,6 +1468,18 @@ struct TReportCommitTxnResultRequest {
     4: optional binary payload
 }
 
+struct TQueryColumn {
+    1: optional string catalogId
+    2: optional string dbId
+    3: optional string tblId
+    4: optional string colName
+}
+
+struct TSyncQueryColumns {
+    1: optional list<TQueryColumn> highPriorityColumns;
+    2: optional list<TQueryColumn> midPriorityColumns;
+}
+
 service FrontendService {
     TGetDbsResult getDbNames(1: TGetDbsParams params)
     TGetTablesResult getTableNames(1: TGetTablesParams params)
@@ -1558,4 +1570,5 @@ service FrontendService {
     TShowProcessListResult showProcessList(1: TShowProcessListRequest request)
     Status.TStatus reportCommitTxnResult(1: TReportCommitTxnResultRequest request)
     TShowUserResult showUser(1: TShowUserRequest request)
+    Status.TStatus syncQueryColumns(1: TSyncQueryColumns request)
 }
diff --git a/regression-test/suites/external_table_p2/hive/test_hive_statistic_auto.groovy b/regression-test/suites/external_table_p2/hive/test_hive_statistic_auto.groovy
index 8a7591daeb1b1e..eddf0bd8e7c16c 100644
--- a/regression-test/suites/external_table_p2/hive/test_hive_statistic_auto.groovy
+++ b/regression-test/suites/external_table_p2/hive/test_hive_statistic_auto.groovy
@@ -32,7 +32,7 @@ suite("test_hive_statistic_auto", "p2,external,hive,external_remote,external_rem
         logger.info("catalog " + catalog_name + " created")
 
         // Test analyze table without init.
-        sql """analyze database ${catalog_name}.statistics PROPERTIES("use.auto.analyzer"="true")"""
+        sql """analyze table ${catalog_name}.statistics.statistics PROPERTIES("use.auto.analyzer"="true")"""
         sql """use ${catalog_name}.statistics"""
 
         for (int i = 0; i < 10; i++) {
diff --git a/regression-test/suites/statistics/analyze_stats.groovy b/regression-test/suites/statistics/analyze_stats.groovy
index 7c6231aa48fd02..bcbec86b1a77b9 100644
--- a/regression-test/suites/statistics/analyze_stats.groovy
+++ b/regression-test/suites/statistics/analyze_stats.groovy
@@ -2774,7 +2774,7 @@ PARTITION `p599` VALUES IN (599)
 
     // Test auto analyze with job type SYSTEM
     sql """drop stats trigger_test"""
-    sql """analyze database trigger PROPERTIES("use.auto.analyzer"="true")"""
+    sql """analyze table trigger_test PROPERTIES("use.auto.analyzer"="true")"""
     int i = 0;
     for (0; i < 10; i++) {
         result = sql """show column stats trigger_test"""

From f34114d35fe17e859ce12189f8d63d948b7173dd Mon Sep 17 00:00:00 2001
From: feiniaofeiafei <53502832+feiniaofeiafei@users.noreply.github.com>
Date: Fri, 26 Apr 2024 12:34:24 +0800
Subject: [PATCH 043/163] [Fix](nereids) fix rule merge_aggregate when has
 project (#33892)

---
 .../doris/nereids/jobs/executor/Rewriter.java |  4 +-
 .../nereids/rules/rewrite/MergeAggregate.java | 23 +++---
 .../merge_aggregate/merge_aggregate.out       | 51 ++++++++++++
 .../merge_aggregate/merge_aggregate.groovy    | 80 +++++++++++++++++++
 4 files changed, 148 insertions(+), 10 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
index 51d2f4f44d0d55..335de322c614ed 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
@@ -304,7 +304,9 @@ public class Rewriter extends AbstractBatchJobExecutor {
 
             topic("Eliminate GroupBy",
                     topDown(new EliminateGroupBy(),
-                            new MergeAggregate())
+                            new MergeAggregate(),
+                            // need to adjust min/max/sum nullable attribute after merge aggregate
+                            new AdjustAggregateNullableForEmptySet())
             ),
 
             topic("Eager aggregation",
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeAggregate.java
index 9a0b9f8b5e0353..a2c23dd9b412b0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeAggregate.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeAggregate.java
@@ -34,10 +34,12 @@
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
 
+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Collectors;
 
@@ -87,15 +89,14 @@ private Plan mergeTwoAggregate(LogicalAggregate<LogicalAggregate<Plan>> outerAgg
     private Plan mergeAggProjectAgg(LogicalAggregate<LogicalProject<LogicalAggregate<Plan>>> outerAgg) {
         LogicalProject<LogicalAggregate<Plan>> project = outerAgg.child();
         LogicalAggregate<Plan> innerAgg = project.child();
-
+        List<NamedExpression> outputExpressions = outerAgg.getOutputExpressions();
+        List<NamedExpression> replacedOutputExpressions = PlanUtils.replaceExpressionByProjections(
+                                project.getProjects(), (List) outputExpressions);
         // rewrite agg function. e.g. max(max)
-        List<NamedExpression> aggFunc = outerAgg.getOutputExpressions().stream()
+        List<NamedExpression> replacedAggFunc = replacedOutputExpressions.stream()
                 .filter(expr -> (expr instanceof Alias) && (expr.child(0) instanceof AggregateFunction))
                 .map(e -> rewriteAggregateFunction(e, innerAggExprIdToAggFunc))
                 .collect(Collectors.toList());
-        // rewrite agg function directly refer to the slot below the project
-        List<Expression> replacedAggFunc = PlanUtils.replaceExpressionByProjections(project.getProjects(),
-                (List) aggFunc);
         // replace groupByKeys directly refer to the slot below the project
         List<Expression> replacedGroupBy = PlanUtils.replaceExpressionByProjections(project.getProjects(),
                 outerAgg.getGroupByExpressions());
@@ -138,13 +139,17 @@ private NamedExpression rewriteAggregateFunction(NamedExpression e,
     }
 
     boolean commonCheck(LogicalAggregate<? extends Plan> outerAgg, LogicalAggregate<Plan> innerAgg,
-            boolean sameGroupBy) {
+            boolean sameGroupBy, Optional<LogicalProject> projectOptional) {
         innerAggExprIdToAggFunc = innerAgg.getOutputExpressions().stream()
                 .filter(expr -> (expr instanceof Alias) && (expr.child(0) instanceof AggregateFunction))
                 .collect(Collectors.toMap(NamedExpression::getExprId, value -> (AggregateFunction) value.child(0),
                         (existValue, newValue) -> existValue));
         Set<AggregateFunction> aggregateFunctions = outerAgg.getAggregateFunctions();
-        for (AggregateFunction outerFunc : aggregateFunctions) {
+        List<AggregateFunction> replacedAggFunctions = projectOptional.map(project ->
+                (List<AggregateFunction>) PlanUtils.replaceExpressionByProjections(
+                projectOptional.get().getProjects(), new ArrayList<>(aggregateFunctions)))
+                .orElse(new ArrayList<>(aggregateFunctions));
+        for (AggregateFunction outerFunc : replacedAggFunctions) {
             if (!(ALLOW_MERGE_AGGREGATE_FUNCTIONS.contains(outerFunc.getName()))) {
                 return false;
             }
@@ -188,7 +193,7 @@ private boolean canMergeAggregateWithoutProject(LogicalAggregate<LogicalAggregat
         }
         boolean sameGroupBy = (innerAgg.getGroupByExpressions().size() == outerAgg.getGroupByExpressions().size());
 
-        return commonCheck(outerAgg, innerAgg, sameGroupBy);
+        return commonCheck(outerAgg, innerAgg, sameGroupBy, Optional.empty());
     }
 
     private boolean canMergeAggregateWithProject(LogicalAggregate<LogicalProject<LogicalAggregate<Plan>>> outerAgg) {
@@ -206,6 +211,6 @@ private boolean canMergeAggregateWithProject(LogicalAggregate<LogicalProject<Log
             return false;
         }
         boolean sameGroupBy = (innerAgg.getGroupByExpressions().size() == outerAgg.getGroupByExpressions().size());
-        return commonCheck(outerAgg, innerAgg, sameGroupBy);
+        return commonCheck(outerAgg, innerAgg, sameGroupBy, Optional.of(project));
     }
 }
diff --git a/regression-test/data/nereids_rules_p0/merge_aggregate/merge_aggregate.out b/regression-test/data/nereids_rules_p0/merge_aggregate/merge_aggregate.out
index ba5b127a56fc6f..fba17e8d7b9c27 100644
--- a/regression-test/data/nereids_rules_p0/merge_aggregate/merge_aggregate.out
+++ b/regression-test/data/nereids_rules_p0/merge_aggregate/merge_aggregate.out
@@ -246,3 +246,54 @@ PhysicalResultSink
 --------------------PhysicalProject
 ----------------------PhysicalOlapScan[mal_test1]
 
+-- !test_has_project_distinct_cant_transform --
+1
+
+-- !test_has_project_distinct_cant_transform_shape --
+PhysicalResultSink
+--hashAgg[GLOBAL]
+----PhysicalDistribute[DistributionSpecGather]
+------hashAgg[LOCAL]
+--------PhysicalProject
+----------hashAgg[GLOBAL]
+------------PhysicalDistribute[DistributionSpecHash]
+--------------hashAgg[LOCAL]
+----------------PhysicalProject
+------------------PhysicalOlapScan[mal_test_merge_agg]
+
+-- !test_distinct_expr_transform --
+-1
+
+-- !test_distinct_expr_transform_shape --
+PhysicalResultSink
+--hashAgg[GLOBAL]
+----PhysicalDistribute[DistributionSpecGather]
+------hashAgg[LOCAL]
+--------PhysicalProject
+----------PhysicalOlapScan[mal_test_merge_agg]
+
+-- !test_has_project_distinct_expr_transform --
+1
+1
+1
+
+-- !test_has_project_distinct_expr_transform --
+PhysicalResultSink
+--PhysicalDistribute[DistributionSpecGather]
+----PhysicalProject
+------hashAgg[GLOBAL]
+--------PhysicalDistribute[DistributionSpecHash]
+----------hashAgg[LOCAL]
+------------PhysicalProject
+--------------PhysicalOlapScan[mal_test_merge_agg]
+
+-- !test_sum_empty_table --
+\N	\N	\N
+
+-- !test_sum_empty_table_shape --
+PhysicalResultSink
+--hashAgg[GLOBAL]
+----PhysicalDistribute[DistributionSpecGather]
+------hashAgg[LOCAL]
+--------PhysicalOlapScan[mal_test2]
+
diff --git a/regression-test/suites/nereids_rules_p0/merge_aggregate/merge_aggregate.groovy b/regression-test/suites/nereids_rules_p0/merge_aggregate/merge_aggregate.groovy
index 44c256e2f57993..46cd4a0a9b78e8 100644
--- a/regression-test/suites/nereids_rules_p0/merge_aggregate/merge_aggregate.groovy
+++ b/regression-test/suites/nereids_rules_p0/merge_aggregate/merge_aggregate.groovy
@@ -174,4 +174,84 @@ suite("merge_aggregate") {
         group by a order by 1,2;
     """
 
+    sql "drop table if exists mal_test_merge_agg"
+    sql """
+         create table mal_test_merge_agg(
+            k1 int null,
+            k2 int not null,
+            k3 string null,
+            k4 varchar(100) null
+        )
+        duplicate key (k1,k2)
+        distributed BY hash(k1) buckets 3
+        properties("replication_num" = "1");
+    """
+    sql "insert into mal_test_merge_agg select 1,1,'1','a';"
+    sql "insert into mal_test_merge_agg select 2,2,'2','b';"
+    sql "insert into mal_test_merge_agg select 3,-3,null,'c';"
+    sql "sync"
+
+    qt_test_has_project_distinct_cant_transform """
+        select max(count_col)
+        from (
+            select k4,
+            count(distinct case when k3 is null then 1 else 0 end) as count_col
+            from mal_test_merge_agg group by k4
+        ) t ;
+    """
+    qt_test_has_project_distinct_cant_transform_shape """
+        explain shape plan
+        select max(count_col)
+        from (
+            select k4,
+            count(distinct case when k3 is null then 1 else 0 end) as count_col
+            from mal_test_merge_agg group by k4
+        ) t ;
+    """
+
+    qt_test_distinct_expr_transform """
+        select max(count_col)
+        from (
+            select k4,
+            max(-abs(k1)) as count_col
+            from mal_test_merge_agg group by k4
+        ) t ;
+    """
+    qt_test_distinct_expr_transform_shape """
+        explain shape plan
+        select max(count_col)
+        from (
+            select k4,
+            max(-abs(k1)) as count_col
+            from mal_test_merge_agg group by k4
+        ) t ;
+    """
+
+    qt_test_has_project_distinct_expr_transform """
+        select sum(count_col)
+        from (
+            select k4,
+            count(distinct case when k3 is null then 1 else 0 end) as count_col
+            from mal_test_merge_agg group by k4
+        ) t  group by k4;
+    """
+
+    qt_test_has_project_distinct_expr_transform """
+        explain shape plan
+        select sum(count_col)
+        from (
+            select k4,
+            count(distinct case when k3 is null then 1 else 0 end) as count_col
+            from mal_test_merge_agg group by k4
+        ) t  group by k4;
+    """
+
+    qt_test_sum_empty_table """
+        select sum(col1),min(col2),max(col3) from (select sum(a) col1, min(b) col2, max(pk) col3 from mal_test2 group by a) t;
+    """
+
+    qt_test_sum_empty_table_shape """
+        explain shape plan
+        select sum(col1),min(col2),max(col3) from (select sum(a) col1, min(b) col2, max(pk) col3 from mal_test2 group by a) t;
+    """
 }

From 6291cf2b2084cf3ef6386c21881b7016c353bffe Mon Sep 17 00:00:00 2001
From: morrySnow <101034200+morrySnow@users.noreply.github.com>
Date: Fri, 26 Apr 2024 12:37:08 +0800
Subject: [PATCH 044/163] [fix](Nereids) column pruning should prune map in cte
 consumer (#34079)

we save bi-map in cte consumer to get the maping between producer and consumer.
the consumer's output is decided by the map in it.
so, cte consumer should be output prunable, and should remove useless entry from map when do column pruning
---
 .../apache/doris/nereids/CascadesContext.java |  26 +---
 .../doris/nereids/StatementContext.java       |  41 ++---
 .../translator/PhysicalPlanTranslator.java    |   6 +-
 .../doris/nereids/jobs/executor/Rewriter.java |   4 +-
 .../apache/doris/nereids/rules/RuleType.java  |   3 +-
 .../rewrite/CollectCteConsumerOutput.java     |  41 +++++
 .../rewrite/CollectProjectAboveConsumer.java  |  81 ----------
 .../nereids/rules/rewrite/ColumnPruning.java  |  31 ++--
 .../rules/rewrite/RewriteCteChildren.java     |  17 ++-
 .../plans/logical/LogicalCTEConsumer.java     |  22 ++-
 .../nereids_hint_tpcds_p0/shape/query1.out    |   3 +-
 .../nereids_hint_tpcds_p0/shape/query24.out   |   3 +-
 .../data/nereids_p0/hint/multi_leading.out    |   6 +-
 .../shape/query1.out                          |   3 +-
 .../shape/query23.out                         |  12 +-
 .../shape/query24.out                         |   3 +-
 .../shape/query30.out                         |   3 +-
 .../shape/query31.out                         |  48 +++---
 .../shape/query39.out                         |  10 +-
 .../shape/query47.out                         |   5 +-
 .../shape/query57.out                         |   5 +-
 .../shape/query59.out                         |  19 ++-
 .../shape/query70.out                         |   1 +
 .../shape/query81.out                         |   3 +-
 .../shape/query95.out                         |   6 +-
 .../constraints/query23.out                   |  12 +-
 .../noStatsRfPrune/query1.out                 |   3 +-
 .../noStatsRfPrune/query23.out                |  12 +-
 .../noStatsRfPrune/query24.out                |   3 +-
 .../noStatsRfPrune/query30.out                |   3 +-
 .../noStatsRfPrune/query31.out                |  40 ++---
 .../noStatsRfPrune/query39.out                |  10 +-
 .../noStatsRfPrune/query47.out                |   5 +-
 .../noStatsRfPrune/query57.out                |   5 +-
 .../noStatsRfPrune/query59.out                |  19 ++-
 .../noStatsRfPrune/query70.out                |   1 +
 .../noStatsRfPrune/query81.out                |   3 +-
 .../noStatsRfPrune/query95.out                |   6 +-
 .../no_stats_shape/query1.out                 |   3 +-
 .../no_stats_shape/query23.out                |  12 +-
 .../no_stats_shape/query24.out                |   3 +-
 .../no_stats_shape/query30.out                |   3 +-
 .../no_stats_shape/query31.out                |  40 ++---
 .../no_stats_shape/query39.out                |  10 +-
 .../no_stats_shape/query47.out                |   5 +-
 .../no_stats_shape/query57.out                |   5 +-
 .../no_stats_shape/query59.out                |  19 ++-
 .../no_stats_shape/query81.out                |   3 +-
 .../no_stats_shape/query95.out                |   6 +-
 .../rf_prune/query1.out                       |   3 +-
 .../rf_prune/query23.out                      |  12 +-
 .../rf_prune/query24.out                      |   3 +-
 .../rf_prune/query30.out                      |   3 +-
 .../rf_prune/query31.out                      |  48 +++---
 .../rf_prune/query39.out                      |  10 +-
 .../rf_prune/query47.out                      |   5 +-
 .../rf_prune/query57.out                      |   5 +-
 .../rf_prune/query59.out                      |  19 ++-
 .../rf_prune/query70.out                      |   1 +
 .../rf_prune/query81.out                      |   3 +-
 .../rf_prune/query95.out                      |   6 +-
 .../shape/query1.out                          |   3 +-
 .../shape/query23.out                         |  12 +-
 .../shape/query24.out                         |   3 +-
 .../shape/query30.out                         |   3 +-
 .../shape/query31.out                         |  48 +++---
 .../shape/query39.out                         |  10 +-
 .../shape/query47.out                         |   5 +-
 .../shape/query57.out                         |   5 +-
 .../shape/query59.out                         |  19 ++-
 .../shape/query81.out                         |   3 +-
 .../shape/query95.out                         |   6 +-
 .../cte/test_cte_column_pruning.groovy        | 143 ++++++++++++++++++
 73 files changed, 513 insertions(+), 498 deletions(-)
 create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CollectCteConsumerOutput.java
 delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CollectProjectAboveConsumer.java
 create mode 100644 regression-test/suites/nereids_p0/cte/test_cte_column_pruning.groovy

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java
index dd569ef8f7519a..3b9ba912383e2f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java
@@ -50,7 +50,6 @@
 import org.apache.doris.nereids.rules.exploration.mv.MaterializationContext;
 import org.apache.doris.nereids.trees.expressions.CTEId;
 import org.apache.doris.nereids.trees.expressions.Expression;
-import org.apache.doris.nereids.trees.expressions.NamedExpression;
 import org.apache.doris.nereids.trees.expressions.Slot;
 import org.apache.doris.nereids.trees.expressions.SubqueryExpr;
 import org.apache.doris.nereids.trees.plans.Plan;
@@ -102,7 +101,7 @@ public class CascadesContext implements ScheduleContext {
     private Optional<RootRewriteJobContext> currentRootRewriteJobContext;
     // in optimize stage, the plan will storage in the memo
     private Memo memo;
-    private StatementContext statementContext;
+    private final StatementContext statementContext;
 
     private final CTEContext cteContext;
     private final RuleSet ruleSet;
@@ -616,16 +615,6 @@ public void putCTEIdToConsumer(LogicalCTEConsumer cteConsumer) {
         consumers.add(cteConsumer);
     }
 
-    public void putCTEIdToProject(CTEId cteId, NamedExpression p) {
-        Set<NamedExpression> projects = this.statementContext.getCteIdToProjects()
-                .computeIfAbsent(cteId, k -> new HashSet<>());
-        projects.add(p);
-    }
-
-    public Set<NamedExpression> getProjectForProducer(CTEId cteId) {
-        return this.statementContext.getCteIdToProjects().get(cteId);
-    }
-
     public Map<CTEId, Set<LogicalCTEConsumer>> getCteIdToConsumers() {
         return this.statementContext.getCteIdToConsumers();
     }
@@ -639,17 +628,6 @@ public Map<RelationId, Set<Expression>> getConsumerIdToFilters() {
         return this.statementContext.getConsumerIdToFilters();
     }
 
-    public void markConsumerUnderProject(LogicalCTEConsumer cteConsumer) {
-        Set<RelationId> consumerIds = this.statementContext.getCteIdToConsumerUnderProjects()
-                .computeIfAbsent(cteConsumer.getCteId(), k -> new HashSet<>());
-        consumerIds.add(cteConsumer.getRelationId());
-    }
-
-    public boolean couldPruneColumnOnProducer(CTEId cteId) {
-        Set<RelationId> consumerIds = this.statementContext.getCteIdToConsumerUnderProjects().get(cteId);
-        return consumerIds.size() == this.statementContext.getCteIdToConsumers().get(cteId).size();
-    }
-
     public void addCTEConsumerGroup(CTEId cteId, Group g, Map<Slot, Slot> producerSlotToConsumerSlot) {
         List<Pair<Map<Slot, Slot>, Group>> consumerGroups =
                 this.statementContext.getCteIdToConsumerGroup().computeIfAbsent(cteId, k -> new ArrayList<>());
@@ -746,7 +724,7 @@ public void printPlanProcess() {
 
     public static void printPlanProcess(List<PlanProcess> planProcesses) {
         for (PlanProcess row : planProcesses) {
-            LOG.info("RULE: " + row.ruleName + "\nBEFORE:\n" + row.beforeShape + "\nafter:\n" + row.afterShape);
+            LOG.info("RULE: {}\nBEFORE:\n{}\nafter:\n{}", row.ruleName, row.beforeShape, row.afterShape);
         }
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
index 819ff032cd6878..3274233c16be75 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
@@ -27,7 +27,6 @@
 import org.apache.doris.nereids.trees.expressions.CTEId;
 import org.apache.doris.nereids.trees.expressions.ExprId;
 import org.apache.doris.nereids.trees.expressions.Expression;
-import org.apache.doris.nereids.trees.expressions.NamedExpression;
 import org.apache.doris.nereids.trees.expressions.Slot;
 import org.apache.doris.nereids.trees.expressions.SlotReference;
 import org.apache.doris.nereids.trees.plans.ObjectId;
@@ -54,7 +53,6 @@
 import java.util.ArrayList;
 import java.util.BitSet;
 import java.util.Collection;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
@@ -103,9 +101,8 @@ public class StatementContext implements Closeable {
     private final IdGenerator<CTEId> cteIdGenerator = CTEId.createGenerator();
 
     private final Map<CTEId, Set<LogicalCTEConsumer>> cteIdToConsumers = new HashMap<>();
-    private final Map<CTEId, Set<NamedExpression>> cteIdToProjects = new HashMap<>();
+    private final Map<CTEId, Set<Slot>> cteIdToOutputIds = new HashMap<>();
     private final Map<RelationId, Set<Expression>> consumerIdToFilters = new HashMap<>();
-    private final Map<CTEId, Set<RelationId>> cteIdToConsumerUnderProjects = new HashMap<>();
     // Used to update consumer's stats
     private final Map<CTEId, List<Pair<Map<Slot, Slot>, Group>>> cteIdToConsumerGroup = new HashMap<>();
     private final Map<CTEId, LogicalPlan> rewrittenCteProducer = new HashMap<>();
@@ -134,12 +131,13 @@ public class StatementContext implements Closeable {
     private BitSet disableRules;
 
     // table locks
-    private Stack<CloseableResource> plannerResources = new Stack<>();
+    private final Stack<CloseableResource> plannerResources = new Stack<>();
 
     // for create view support in nereids
     // key is the start and end position of the sql substring that needs to be replaced,
     // and value is the new string used for replacement.
-    private TreeMap<Pair<Integer, Integer>, String> indexInSqlToString = new TreeMap<>(new Pair.PairComparator<>());
+    private final TreeMap<Pair<Integer, Integer>, String> indexInSqlToString
+            = new TreeMap<>(new Pair.PairComparator<>());
 
     public StatementContext() {
         this(ConnectContext.get(), null, 0);
@@ -216,10 +214,6 @@ public Optional<SqlCacheContext> getSqlCacheContext() {
         return Optional.ofNullable(sqlCacheContext);
     }
 
-    public int getMaxContinuousJoin() {
-        return joinCount;
-    }
-
     public Set<SlotReference> getAllPathsSlots() {
         Set<SlotReference> allSlotReferences = Sets.newHashSet();
         for (Map<List<String>, SlotReference> slotReferenceMap : subColumnSlotRefMap.values()) {
@@ -240,19 +234,16 @@ public Slot getRewrittenSlotRefByOriginalExpr(Expression originalExpr) {
      * Add a slot ref attached with paths in context to avoid duplicated slot
      */
     public void addPathSlotRef(Slot root, List<String> paths, SlotReference slotRef, Expression originalExpr) {
-        subColumnSlotRefMap.computeIfAbsent(root, k -> Maps.newTreeMap(new Comparator<List<String>>() {
-            @Override
-            public int compare(List<String> lst1, List<String> lst2) {
-                Iterator<String> it1 = lst1.iterator();
-                Iterator<String> it2 = lst2.iterator();
-                while (it1.hasNext() && it2.hasNext()) {
-                    int result = it1.next().compareTo(it2.next());
-                    if (result != 0) {
-                        return result;
-                    }
+        subColumnSlotRefMap.computeIfAbsent(root, k -> Maps.newTreeMap((lst1, lst2) -> {
+            Iterator<String> it1 = lst1.iterator();
+            Iterator<String> it2 = lst2.iterator();
+            while (it1.hasNext() && it2.hasNext()) {
+                int result = it1.next().compareTo(it2.next());
+                if (result != 0) {
+                    return result;
                 }
-                return Integer.compare(lst1.size(), lst2.size());
             }
+            return Integer.compare(lst1.size(), lst2.size());
         }));
         subColumnSlotRefMap.get(root).put(paths, slotRef);
         subColumnOriginalExprMap.put(slotRef, originalExpr);
@@ -349,18 +340,14 @@ public Map<CTEId, Set<LogicalCTEConsumer>> getCteIdToConsumers() {
         return cteIdToConsumers;
     }
 
-    public Map<CTEId, Set<NamedExpression>> getCteIdToProjects() {
-        return cteIdToProjects;
+    public Map<CTEId, Set<Slot>> getCteIdToOutputIds() {
+        return cteIdToOutputIds;
     }
 
     public Map<RelationId, Set<Expression>> getConsumerIdToFilters() {
         return consumerIdToFilters;
     }
 
-    public Map<CTEId, Set<RelationId>> getCteIdToConsumerUnderProjects() {
-        return cteIdToConsumerUnderProjects;
-    }
-
     public Map<CTEId, List<Pair<Map<Slot, Slot>, Group>>> getCteIdToConsumerGroup() {
         return cteIdToConsumerGroup;
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
index 98bc581fe8ccd8..3184937e2e051d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
@@ -1123,9 +1123,13 @@ public PlanFragment visitPhysicalCTEConsumer(PhysicalCTEConsumer cteConsumer,
         // update expr to slot mapping
         TupleDescriptor tupleDescriptor = null;
         for (Slot producerSlot : cteProducer.getOutput()) {
-            Slot consumerSlot = cteConsumer.getProducerToConsumerSlotMap().get(producerSlot);
             SlotRef slotRef = context.findSlotRef(producerSlot.getExprId());
             tupleDescriptor = slotRef.getDesc().getParent();
+            Slot consumerSlot = cteConsumer.getProducerToConsumerSlotMap().get(producerSlot);
+            // consumerSlot could be null if we prune partial consumers' columns
+            if (consumerSlot == null) {
+                continue;
+            }
             context.addExprIdSlotRefPair(consumerSlot.getExprId(), slotRef);
         }
         CTEScanNode cteScanNode = new CTEScanNode(tupleDescriptor);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
index 335de322c614ed..cf0d8a638d1b24 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
@@ -44,8 +44,8 @@
 import org.apache.doris.nereids.rules.rewrite.CheckMatchExpression;
 import org.apache.doris.nereids.rules.rewrite.CheckMultiDistinct;
 import org.apache.doris.nereids.rules.rewrite.CheckPrivileges;
+import org.apache.doris.nereids.rules.rewrite.CollectCteConsumerOutput;
 import org.apache.doris.nereids.rules.rewrite.CollectFilterAboveConsumer;
-import org.apache.doris.nereids.rules.rewrite.CollectProjectAboveConsumer;
 import org.apache.doris.nereids.rules.rewrite.ColumnPruning;
 import org.apache.doris.nereids.rules.rewrite.ConvertInnerOrCrossJoin;
 import org.apache.doris.nereids.rules.rewrite.CountDistinctRewrite;
@@ -418,7 +418,7 @@ public class Rewriter extends AbstractBatchJobExecutor {
             topic("Push project and filter on cte consumer to cte producer",
                     topDown(
                             new CollectFilterAboveConsumer(),
-                            new CollectProjectAboveConsumer()
+                            new CollectCteConsumerOutput()
                     )
             ),
             topic("Collect used column", custom(RuleType.COLLECT_COLUMNS, QueryColumnCollector::new))
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
index f1a797f4e2bc75..52e24827c316a4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
@@ -306,8 +306,7 @@ public enum RuleType {
 
     COLLECT_FILTER(RuleTypeClass.REWRITE),
     COLLECT_JOIN_CONSTRAINT(RuleTypeClass.REWRITE),
-    COLLECT_PROJECT_ABOVE_CTE_CONSUMER(RuleTypeClass.REWRITE),
-    COLLECT_PROJECT_ABOVE_FILTER_CTE_CONSUMER(RuleTypeClass.REWRITE),
+    COLLECT_CTE_CONSUMER_OUTPUT(RuleTypeClass.REWRITE),
 
     LEADING_JOIN(RuleTypeClass.REWRITE),
     REWRITE_SENTINEL(RuleTypeClass.REWRITE),
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CollectCteConsumerOutput.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CollectCteConsumerOutput.java
new file mode 100644
index 00000000000000..20b6dc40230b4b
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CollectCteConsumerOutput.java
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.rewrite;
+
+import org.apache.doris.nereids.rules.Rule;
+import org.apache.doris.nereids.rules.RuleType;
+import org.apache.doris.nereids.trees.expressions.Slot;
+
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * Collect outputs of CTE Consumer.
+ */
+public class CollectCteConsumerOutput extends OneRewriteRuleFactory {
+
+    @Override
+    public Rule build() {
+        return logicalCTEConsumer().thenApply(ctx -> {
+            Set<Slot> producerOutputs = ctx.statementContext
+                    .getCteIdToOutputIds().computeIfAbsent(ctx.root.getCteId(), k -> new HashSet<>());
+            producerOutputs.addAll(ctx.root.getProducerToConsumerOutputMap().keySet());
+            return null;
+        }).toRule(RuleType.COLLECT_CTE_CONSUMER_OUTPUT);
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CollectProjectAboveConsumer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CollectProjectAboveConsumer.java
deleted file mode 100644
index 0ecace726dbcbc..00000000000000
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CollectProjectAboveConsumer.java
+++ /dev/null
@@ -1,81 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package org.apache.doris.nereids.rules.rewrite;
-
-import org.apache.doris.nereids.CascadesContext;
-import org.apache.doris.nereids.rules.Rule;
-import org.apache.doris.nereids.rules.RuleType;
-import org.apache.doris.nereids.trees.expressions.Expression;
-import org.apache.doris.nereids.trees.expressions.NamedExpression;
-import org.apache.doris.nereids.trees.expressions.Slot;
-import org.apache.doris.nereids.trees.plans.logical.LogicalCTEConsumer;
-import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
-import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
-
-import com.google.common.collect.ImmutableList;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Set;
-
-/**
- * Collect Projects Above CTE Consumer.
- */
-public class CollectProjectAboveConsumer implements RewriteRuleFactory {
-
-    @Override
-    public List<Rule> buildRules() {
-        return ImmutableList.of(RuleType.COLLECT_PROJECT_ABOVE_CTE_CONSUMER
-                        .build(logicalProject(logicalCTEConsumer()).thenApply(ctx -> {
-                            LogicalProject<LogicalCTEConsumer> project = ctx.root;
-                            List<NamedExpression> namedExpressions = project.getProjects();
-                            LogicalCTEConsumer cteConsumer = project.child();
-                            collectProject(ctx.cascadesContext, namedExpressions, cteConsumer);
-                            return ctx.root;
-                        })),
-                RuleType.COLLECT_PROJECT_ABOVE_FILTER_CTE_CONSUMER
-                        .build(logicalProject(logicalFilter(logicalCTEConsumer())).thenApply(ctx -> {
-                            LogicalProject<LogicalFilter<LogicalCTEConsumer>> project = ctx.root;
-                            LogicalFilter<LogicalCTEConsumer> filter = project.child();
-                            Set<Slot> filterSlots = filter.getInputSlots();
-                            List<NamedExpression> namedExpressions = new ArrayList<>(project.getProjects());
-                            for (Slot slot : filterSlots) {
-                                if (!project.getOutput().contains(slot)) {
-                                    namedExpressions.add(slot);
-                                }
-                            }
-                            collectProject(ctx.cascadesContext, namedExpressions, filter.child());
-                            return ctx.root;
-                        }))
-        );
-    }
-
-    private static void collectProject(CascadesContext ctx,
-            List<NamedExpression> namedExpressions, LogicalCTEConsumer cteConsumer) {
-        for (Expression expr : namedExpressions) {
-            expr.foreach(node -> {
-                if (!(node instanceof Slot)) {
-                    return;
-                }
-                Slot slot = cteConsumer.getProducerSlot((Slot) node);
-                ctx.putCTEIdToProject(cteConsumer.getCteId(), slot);
-                ctx.markConsumerUnderProject(cteConsumer);
-            });
-        }
-    }
-}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java
index e36c0f5172ad70..4cb18e8a380177 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java
@@ -29,6 +29,7 @@
 import org.apache.doris.nereids.trees.plans.algebra.Aggregate;
 import org.apache.doris.nereids.trees.plans.algebra.SetOperation.Qualifier;
 import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
+import org.apache.doris.nereids.trees.plans.logical.LogicalCTEConsumer;
 import org.apache.doris.nereids.trees.plans.logical.LogicalCTEProducer;
 import org.apache.doris.nereids.trees.plans.logical.LogicalExcept;
 import org.apache.doris.nereids.trees.plans.logical.LogicalIntersect;
@@ -200,13 +201,21 @@ public Plan visitLogicalRepeat(LogicalRepeat<? extends Plan> repeat, PruneContex
         return pruneAggregate(repeat, context);
     }
 
-    private Plan pruneAggregate(Aggregate agg, PruneContext context) {
-        // first try to prune group by and aggregate functions
-        Aggregate prunedOutputAgg = pruneOutput(agg, agg.getOutputs(), agg::pruneOutputs, context);
+    @Override
+    public Plan visitLogicalCTEProducer(LogicalCTEProducer<? extends Plan> cteProducer, PruneContext context) {
+        return skipPruneThisAndFirstLevelChildren(cteProducer);
+    }
 
-        Aggregate fillUpAggr = fillUpGroupByAndOutput(prunedOutputAgg);
+    @Override
+    public Plan visitLogicalCTEConsumer(LogicalCTEConsumer cteConsumer, PruneContext context) {
+        return super.visitLogicalCTEConsumer(cteConsumer, context);
+    }
 
-        return pruneChildren(fillUpAggr);
+    private Plan pruneAggregate(Aggregate<?> agg, PruneContext context) {
+        // first try to prune group by and aggregate functions
+        Aggregate<? extends Plan> prunedOutputAgg = pruneOutput(agg, agg.getOutputs(), agg::pruneOutputs, context);
+        Aggregate<?> fillUpAggregate = fillUpGroupByAndOutput(prunedOutputAgg);
+        return pruneChildren(fillUpAggregate);
     }
 
     private Plan skipPruneThisAndFirstLevelChildren(Plan plan) {
@@ -217,7 +226,7 @@ private Plan skipPruneThisAndFirstLevelChildren(Plan plan) {
         return pruneChildren(plan, requireAllOutputOfChildren.build());
     }
 
-    private static Aggregate<Plan> fillUpGroupByAndOutput(Aggregate<Plan> prunedOutputAgg) {
+    private static Aggregate<? extends Plan> fillUpGroupByAndOutput(Aggregate<? extends Plan> prunedOutputAgg) {
         List<Expression> groupBy = prunedOutputAgg.getGroupByExpressions();
         List<NamedExpression> output = prunedOutputAgg.getOutputExpressions();
 
@@ -239,12 +248,11 @@ private static Aggregate<Plan> fillUpGroupByAndOutput(Aggregate<Plan> prunedOutp
         ImmutableList.Builder<Expression> newGroupByExprList
                 = ImmutableList.builderWithExpectedSize(newOutputList.size());
         for (NamedExpression e : newOutputList) {
-            if (!(aggregateFunctions.contains(e)
-                    || (e instanceof Alias && aggregateFunctions.contains(e.child(0))))) {
+            if (!(e instanceof Alias && aggregateFunctions.contains(e.child(0)))) {
                 newGroupByExprList.add(e);
             }
         }
-        return ((LogicalAggregate<Plan>) prunedOutputAgg).withGroupByAndOutput(
+        return ((LogicalAggregate<? extends Plan>) prunedOutputAgg).withGroupByAndOutput(
                 newGroupByExprList.build(), newOutputList);
     }
 
@@ -371,11 +379,6 @@ private Plan doPruneChild(Plan plan, Plan child, Set<Slot> childRequiredSlots) {
         return prunedChild;
     }
 
-    @Override
-    public Plan visitLogicalCTEProducer(LogicalCTEProducer<? extends Plan> cteProducer, PruneContext context) {
-        return skipPruneThisAndFirstLevelChildren(cteProducer);
-    }
-
     /** PruneContext */
     public static class PruneContext {
         public Set<Slot> requiredSlots;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java
index 72a4603fadc949..3a2da623b4c4d0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java
@@ -26,6 +26,7 @@
 import org.apache.doris.nereids.trees.expressions.CTEId;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.NamedExpression;
+import org.apache.doris.nereids.trees.expressions.Slot;
 import org.apache.doris.nereids.trees.expressions.SlotReference;
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.RelationId;
@@ -41,7 +42,6 @@
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
-import org.apache.commons.collections.CollectionUtils;
 
 import java.util.HashSet;
 import java.util.List;
@@ -109,10 +109,17 @@ public Plan visitLogicalCTEProducer(LogicalCTEProducer<? extends Plan> cteProduc
         } else {
             child = (LogicalPlan) cteProducer.child();
             child = tryToConstructFilter(cascadesContext, cteProducer.getCteId(), child);
-            Set<NamedExpression> projects = cascadesContext.getProjectForProducer(cteProducer.getCteId());
-            if (CollectionUtils.isNotEmpty(projects)
-                    && cascadesContext.couldPruneColumnOnProducer(cteProducer.getCteId())) {
-                child = new LogicalProject<>(ImmutableList.copyOf(projects), child);
+            Set<Slot> producerOutputs = cascadesContext.getStatementContext()
+                    .getCteIdToOutputIds().get(cteProducer.getCteId());
+            if (producerOutputs.size() < child.getOutput().size()) {
+                ImmutableList.Builder<NamedExpression> projectsBuilder
+                        = ImmutableList.builderWithExpectedSize(producerOutputs.size());
+                for (Slot slot : child.getOutput()) {
+                    if (producerOutputs.contains(slot)) {
+                        projectsBuilder.add(slot);
+                    }
+                }
+                child = new LogicalProject<>(projectsBuilder.build(), child);
                 child = pushPlanUnderAnchor(child);
             }
             CascadesContext rewrittenCtx = CascadesContext.newSubtreeContext(
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalCTEConsumer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalCTEConsumer.java
index 71b1c43f791191..5fd088a2bba196 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalCTEConsumer.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalCTEConsumer.java
@@ -20,6 +20,7 @@
 import org.apache.doris.nereids.memo.GroupExpression;
 import org.apache.doris.nereids.properties.LogicalProperties;
 import org.apache.doris.nereids.trees.expressions.CTEId;
+import org.apache.doris.nereids.trees.expressions.NamedExpression;
 import org.apache.doris.nereids.trees.expressions.Slot;
 import org.apache.doris.nereids.trees.expressions.SlotReference;
 import org.apache.doris.nereids.trees.expressions.StatementScopeIdGenerator;
@@ -36,6 +37,7 @@
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Objects;
 import java.util.Optional;
 
@@ -43,7 +45,7 @@
  * LogicalCTEConsumer
  */
 //TODO: find cte producer and propagate its functional dependencies
-public class LogicalCTEConsumer extends LogicalRelation implements BlockFuncDepsPropagation {
+public class LogicalCTEConsumer extends LogicalRelation implements BlockFuncDepsPropagation, OutputPrunable {
 
     private final String name;
     private final CTEId cteId;
@@ -145,6 +147,24 @@ public List<Slot> computeOutput() {
         return ImmutableList.copyOf(producerToConsumerOutputMap.values());
     }
 
+    @Override
+    public Plan pruneOutputs(List<NamedExpression> prunedOutputs) {
+        Map<Slot, Slot> consumerToProducerOutputMap = new LinkedHashMap<>(this.consumerToProducerOutputMap.size());
+        Map<Slot, Slot> producerToConsumerOutputMap = new LinkedHashMap<>(this.consumerToProducerOutputMap.size());
+        for (Entry<Slot, Slot> consumerToProducerSlot : this.consumerToProducerOutputMap.entrySet()) {
+            if (prunedOutputs.contains(consumerToProducerSlot.getKey())) {
+                consumerToProducerOutputMap.put(consumerToProducerSlot.getKey(), consumerToProducerSlot.getValue());
+                producerToConsumerOutputMap.put(consumerToProducerSlot.getValue(), consumerToProducerSlot.getKey());
+            }
+        }
+        return withTwoMaps(consumerToProducerOutputMap, producerToConsumerOutputMap);
+    }
+
+    @Override
+    public List<NamedExpression> getOutputs() {
+        return (List) this.getOutput();
+    }
+
     public CTEId getCteId() {
         return cteId;
     }
diff --git a/regression-test/data/nereids_hint_tpcds_p0/shape/query1.out b/regression-test/data/nereids_hint_tpcds_p0/shape/query1.out
index e0104b54a423b2..996ccd7623c2a7 100644
--- a/regression-test/data/nereids_hint_tpcds_p0/shape/query1.out
+++ b/regression-test/data/nereids_hint_tpcds_p0/shape/query1.out
@@ -24,8 +24,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ----------------PhysicalDistribute[DistributionSpecHash]
 ------------------hashAgg[LOCAL]
 --------------------PhysicalDistribute[DistributionSpecExecutionAny]
-----------------------PhysicalProject
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 --------------PhysicalProject
 ----------------hashJoin[INNER_JOIN] hashCondition=((store.s_store_sk = ctr1.ctr_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ctr_store_sk]
 ------------------PhysicalDistribute[DistributionSpecHash]
diff --git a/regression-test/data/nereids_hint_tpcds_p0/shape/query24.out b/regression-test/data/nereids_hint_tpcds_p0/shape/query24.out
index f37faa9bff4cf8..2e2658db1620a8 100644
--- a/regression-test/data/nereids_hint_tpcds_p0/shape/query24.out
+++ b/regression-test/data/nereids_hint_tpcds_p0/shape/query24.out
@@ -47,8 +47,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ----------------PhysicalDistribute[DistributionSpecGather]
 ------------------hashAgg[LOCAL]
 --------------------PhysicalDistribute[DistributionSpecExecutionAny]
-----------------------PhysicalProject
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ------------PhysicalDistribute[DistributionSpecReplicated]
 --------------PhysicalProject
 ----------------hashAgg[GLOBAL]
diff --git a/regression-test/data/nereids_p0/hint/multi_leading.out b/regression-test/data/nereids_p0/hint/multi_leading.out
index 51ecab29494cac..71db5aec524b10 100644
--- a/regression-test/data/nereids_p0/hint/multi_leading.out
+++ b/regression-test/data/nereids_p0/hint/multi_leading.out
@@ -613,8 +613,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ----------------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = cte.c11)) otherCondition=()
 ------------------------PhysicalOlapScan[t1]
 ------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalProject
-----------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
 -- !sql5_2 --
 PhysicalCteAnchor ( cteId=CTEId#0 )
@@ -639,8 +638,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------PhysicalProject
 ----------------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = cte.c11)) otherCondition=()
 ------------------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------------------PhysicalProject
-----------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ------------------------PhysicalDistribute[DistributionSpecReplicated]
 --------------------------PhysicalOlapScan[t1]
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query1.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query1.out
index b2b5a87ac2bc84..2317f9435be541 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query1.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query1.out
@@ -39,6 +39,5 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------------------PhysicalDistribute[DistributionSpecHash]
 --------------------------hashAgg[LOCAL]
 ----------------------------PhysicalDistribute[DistributionSpecExecutionAny]
-------------------------------PhysicalProject
---------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query23.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query23.out
index b937f23400b9a3..0475cae9f9522a 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query23.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query23.out
@@ -59,8 +59,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------------PhysicalProject
 --------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((catalog_sales.cs_item_sk = frequent_ss_items.item_sk)) otherCondition=() build RFs:RF4 cs_item_sk->[item_sk]
 ----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------PhysicalProject
 --------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((catalog_sales.cs_bill_customer_sk = best_ss_customer.c_customer_sk)) otherCondition=()
@@ -73,13 +72,11 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------------------------------filter((date_dim.d_moy = 7) and (date_dim.d_year = 2000))
 --------------------------------------PhysicalOlapScan[date_dim]
 ----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
+------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
 ------------------PhysicalProject
 --------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((web_sales.ws_item_sk = frequent_ss_items.item_sk)) otherCondition=() build RFs:RF6 ws_item_sk->[item_sk]
 ----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF6
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF6
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------PhysicalProject
 --------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = best_ss_customer.c_customer_sk)) otherCondition=()
@@ -92,6 +89,5 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------------------------------filter((date_dim.d_moy = 7) and (date_dim.d_year = 2000))
 --------------------------------------PhysicalOlapScan[date_dim]
 ----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
+------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query24.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query24.out
index 83f4e91b4a29f1..ebdb27b0c5d023 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query24.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query24.out
@@ -55,6 +55,5 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------PhysicalDistribute[DistributionSpecGather]
 ----------------------hashAgg[LOCAL]
 ------------------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------------------PhysicalProject
-----------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query30.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query30.out
index 7dcac891ad1dc1..7272f6c9e2641c 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query30.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query30.out
@@ -43,6 +43,5 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------------PhysicalDistribute[DistributionSpecHash]
 --------------------hashAgg[LOCAL]
 ----------------------PhysicalDistribute[DistributionSpecExecutionAny]
-------------------------PhysicalProject
---------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query31.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query31.out
index 11e812ae39b681..f22860f874de84 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query31.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query31.out
@@ -2,44 +2,42 @@
 -- !ds_shape_31 --
 PhysicalCteAnchor ( cteId=CTEId#0 )
 --PhysicalCteProducer ( cteId=CTEId#0 )
-----PhysicalProject
+----hashAgg[GLOBAL]
+------PhysicalDistribute[DistributionSpecHash]
+--------hashAgg[LOCAL]
+----------PhysicalProject
+------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[ss_addr_sk]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalProject
+------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+--------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------PhysicalProject
+------------------------filter((ss.d_year = 1999) and d_qoy IN (1, 2, 3))
+--------------------------PhysicalOlapScan[date_dim]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalProject
+------------------PhysicalOlapScan[customer_address]
+--PhysicalCteAnchor ( cteId=CTEId#1 )
+----PhysicalCteProducer ( cteId=CTEId#1 )
 ------hashAgg[GLOBAL]
 --------PhysicalDistribute[DistributionSpecHash]
 ----------hashAgg[LOCAL]
 ------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[ss_addr_sk]
+--------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[ws_bill_addr_sk]
 ----------------PhysicalDistribute[DistributionSpecHash]
 ------------------PhysicalProject
---------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_sold_date_sk]
 ----------------------PhysicalProject
-------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 RF3
 ----------------------PhysicalDistribute[DistributionSpecReplicated]
 ------------------------PhysicalProject
---------------------------filter((ss.d_year = 1999) and d_qoy IN (1, 2, 3))
+--------------------------filter((ws.d_year = 1999) and d_qoy IN (1, 2, 3))
 ----------------------------PhysicalOlapScan[date_dim]
 ----------------PhysicalDistribute[DistributionSpecHash]
 ------------------PhysicalProject
 --------------------PhysicalOlapScan[customer_address]
---PhysicalCteAnchor ( cteId=CTEId#1 )
-----PhysicalCteProducer ( cteId=CTEId#1 )
-------PhysicalProject
---------hashAgg[GLOBAL]
-----------PhysicalDistribute[DistributionSpecHash]
-------------hashAgg[LOCAL]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[ws_bill_addr_sk]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_sold_date_sk]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 RF3
-------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------PhysicalProject
-----------------------------filter((ws.d_year = 1999) and d_qoy IN (1, 2, 3))
-------------------------------PhysicalOlapScan[date_dim]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[customer_address]
 ----PhysicalResultSink
 ------PhysicalQuickSort[MERGE_SORT]
 --------PhysicalDistribute[DistributionSpecGather]
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query39.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query39.out
index 2fb33f5848e1cc..9d2e1eb162d5d0 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query39.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query39.out
@@ -29,11 +29,9 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------PhysicalQuickSort[LOCAL_SORT]
 ----------hashJoin[INNER_JOIN] hashCondition=((inv1.i_item_sk = inv2.i_item_sk) and (inv1.w_warehouse_sk = inv2.w_warehouse_sk)) otherCondition=()
 ------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------filter((inv1.d_moy = 1))
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------filter((inv1.d_moy = 1))
+----------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------filter((inv2.d_moy = 2))
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------filter((inv2.d_moy = 2))
+----------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query47.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query47.out
index 1ff72561a11a74..e57e35ba769342 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query47.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query47.out
@@ -41,9 +41,8 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------PhysicalProject
 ----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000) and (v2.avg_monthly_sales > 0.0000) and (v2.d_year = 2000))
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000) and (v2.avg_monthly_sales > 0.0000) and (v2.d_year = 2000))
+----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ----------------PhysicalDistribute[DistributionSpecHash]
 ------------------PhysicalProject
 --------------------PhysicalCteConsumer ( cteId=CTEId#0 )
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query57.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query57.out
index 628a6aa98a3da7..79c67bde5cb36b 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query57.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query57.out
@@ -41,9 +41,8 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------PhysicalProject
 ----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000) and (v2.avg_monthly_sales > 0.0000) and (v2.d_year = 2001))
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000) and (v2.avg_monthly_sales > 0.0000) and (v2.d_year = 2001))
+----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ----------------PhysicalDistribute[DistributionSpecHash]
 ------------------PhysicalProject
 --------------------PhysicalCteConsumer ( cteId=CTEId#0 )
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query59.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query59.out
index ca30f76b1d7829..84bd2d432ac7f4 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query59.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query59.out
@@ -2,17 +2,16 @@
 -- !ds_shape_59 --
 PhysicalCteAnchor ( cteId=CTEId#0 )
 --PhysicalCteProducer ( cteId=CTEId#0 )
-----PhysicalProject
-------hashAgg[GLOBAL]
---------PhysicalDistribute[DistributionSpecHash]
-----------hashAgg[LOCAL]
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+----hashAgg[GLOBAL]
+------PhysicalDistribute[DistributionSpecHash]
+--------hashAgg[LOCAL]
+----------PhysicalProject
+------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------PhysicalProject
+----------------PhysicalOlapScan[store_sales] apply RFs: RF0
+--------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------PhysicalProject
-------------------PhysicalOlapScan[store_sales] apply RFs: RF0
-----------------PhysicalDistribute[DistributionSpecReplicated]
-------------------PhysicalProject
---------------------PhysicalOlapScan[date_dim]
+------------------PhysicalOlapScan[date_dim]
 --PhysicalResultSink
 ----PhysicalTopN[MERGE_SORT]
 ------PhysicalDistribute[DistributionSpecGather]
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query70.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query70.out
index fdbfa12a4b73f7..5eb9fba4824825 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query70.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query70.out
@@ -45,3 +45,4 @@ PhysicalResultSink
 --------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------------------------------------------PhysicalProject
 ------------------------------------------------------PhysicalOlapScan[store]
+
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query81.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query81.out
index 20a169a0faa968..61f4343fd1f0a0 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query81.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query81.out
@@ -44,6 +44,5 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------------PhysicalDistribute[DistributionSpecHash]
 --------------------hashAgg[LOCAL]
 ----------------------PhysicalDistribute[DistributionSpecExecutionAny]
-------------------------PhysicalProject
---------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query95.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query95.out
index b0a0655caff31e..c6f2d22db1554c 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query95.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query95.out
@@ -23,16 +23,14 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ----------------------PhysicalProject
 ------------------------hashJoin[INNER_JOIN] hashCondition=((web_returns.wr_order_number = ws_wh.ws_order_number)) otherCondition=() build RFs:RF5 wr_order_number->[ws_order_number]
 --------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5 RF6
+----------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5 RF6
 --------------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------------PhysicalProject
 ------------------------------PhysicalOlapScan[web_returns] apply RFs: RF6
 ----------------------PhysicalProject
 ------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws_wh.ws_order_number)) otherCondition=() build RFs:RF7 ws_order_number->[ws_order_number,ws_order_number]
 --------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 --------------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF3 web_site_sk->[ws_web_site_sk]
 ------------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_ship_date_sk]
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/constraints/query23.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/constraints/query23.out
index ddff36aebc71b7..8668943e20f864 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/constraints/query23.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/constraints/query23.out
@@ -69,11 +69,9 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------------------------------filter((date_dim.d_moy = 5) and (date_dim.d_year = 2000))
 --------------------------------------PhysicalOlapScan[date_dim]
 ----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
+------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
 ----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ------------------PhysicalProject
 --------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((web_sales.ws_item_sk = frequent_ss_items.item_sk)) otherCondition=()
 ----------------------PhysicalDistribute[DistributionSpecHash]
@@ -88,9 +86,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------------------------------filter((date_dim.d_moy = 5) and (date_dim.d_year = 2000))
 --------------------------------------PhysicalOlapScan[date_dim]
 ----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
+------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
 ----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query1.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query1.out
index 65dca9e89f3eb1..8528bf7b49b992 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query1.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query1.out
@@ -37,6 +37,5 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ----------------PhysicalDistribute[DistributionSpecHash]
 ------------------hashAgg[LOCAL]
 --------------------PhysicalDistribute[DistributionSpecExecutionAny]
-----------------------PhysicalProject
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query23.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query23.out
index 431330e9039d15..22f483bb5a7b93 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query23.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query23.out
@@ -65,11 +65,9 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------------------PhysicalProject
 ----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3
 ------------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------PhysicalProject
-----------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
+--------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
 --------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ----------------------PhysicalDistribute[DistributionSpecReplicated]
 ------------------------PhysicalProject
 --------------------------filter((date_dim.d_moy = 5) and (date_dim.d_year = 2000))
@@ -84,11 +82,9 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------------------PhysicalProject
 ----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4
 ------------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------PhysicalProject
-----------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
+--------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
 --------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ----------------------PhysicalDistribute[DistributionSpecReplicated]
 ------------------------PhysicalProject
 --------------------------filter((date_dim.d_moy = 5) and (date_dim.d_year = 2000))
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query24.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query24.out
index b9a89aae5e9bbf..9699ebf9255d22 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query24.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query24.out
@@ -52,6 +52,5 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------PhysicalDistribute[DistributionSpecGather]
 ----------------------hashAgg[LOCAL]
 ------------------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------------------PhysicalProject
-----------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query30.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query30.out
index 985c714b0ab82e..61524dd01387bc 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query30.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query30.out
@@ -39,8 +39,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------hashAgg[LOCAL]
 ------------------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------------------PhysicalProject
-----------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 --------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------PhysicalProject
 ------------------filter((customer_address.ca_state = 'IN'))
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query31.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query31.out
index 3cacf0ae184f1e..0852d3abe2ed20 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query31.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query31.out
@@ -2,30 +2,30 @@
 -- !ds_shape_31 --
 PhysicalCteAnchor ( cteId=CTEId#0 )
 --PhysicalCteProducer ( cteId=CTEId#0 )
-----PhysicalProject
-------hashAgg[GLOBAL]
---------PhysicalDistribute[DistributionSpecHash]
-----------hashAgg[LOCAL]
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
+----hashAgg[GLOBAL]
+------PhysicalDistribute[DistributionSpecHash]
+--------hashAgg[LOCAL]
+----------PhysicalProject
+------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
+--------------PhysicalProject
 ----------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=()
 ------------------PhysicalProject
 --------------------PhysicalOlapScan[store_sales] apply RFs: RF1
 ------------------PhysicalDistribute[DistributionSpecReplicated]
 --------------------PhysicalProject
 ----------------------PhysicalOlapScan[customer_address]
-----------------PhysicalDistribute[DistributionSpecReplicated]
-------------------PhysicalProject
---------------------filter((ss.d_year = 2000) and d_qoy IN (1, 2, 3))
-----------------------PhysicalOlapScan[date_dim]
+--------------PhysicalDistribute[DistributionSpecReplicated]
+----------------PhysicalProject
+------------------filter((ss.d_year = 2000) and d_qoy IN (1, 2, 3))
+--------------------PhysicalOlapScan[date_dim]
 --PhysicalCteAnchor ( cteId=CTEId#1 )
 ----PhysicalCteProducer ( cteId=CTEId#1 )
-------PhysicalProject
---------hashAgg[GLOBAL]
-----------PhysicalDistribute[DistributionSpecHash]
-------------hashAgg[LOCAL]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ws_sold_date_sk]
+------hashAgg[GLOBAL]
+--------PhysicalDistribute[DistributionSpecHash]
+----------hashAgg[LOCAL]
+------------PhysicalProject
+--------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ws_sold_date_sk]
+----------------PhysicalProject
 ------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_addr_sk = customer_address.ca_address_sk)) otherCondition=()
 --------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------PhysicalProject
@@ -33,10 +33,10 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------PhysicalProject
 ------------------------PhysicalOlapScan[customer_address]
-------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------PhysicalProject
-----------------------filter((ws.d_year = 2000) and d_qoy IN (1, 2, 3))
-------------------------PhysicalOlapScan[date_dim]
+----------------PhysicalDistribute[DistributionSpecReplicated]
+------------------PhysicalProject
+--------------------filter((ws.d_year = 2000) and d_qoy IN (1, 2, 3))
+----------------------PhysicalOlapScan[date_dim]
 ----PhysicalResultSink
 ------PhysicalQuickSort[MERGE_SORT]
 --------PhysicalDistribute[DistributionSpecGather]
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query39.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query39.out
index 9ffc6dc5e07fc4..421fa8a749997d 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query39.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query39.out
@@ -28,11 +28,9 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------PhysicalQuickSort[LOCAL_SORT]
 ----------hashJoin[INNER_JOIN] hashCondition=((inv1.i_item_sk = inv2.i_item_sk) and (inv1.w_warehouse_sk = inv2.w_warehouse_sk)) otherCondition=()
 ------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------filter((inv1.d_moy = 1))
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------filter((inv1.d_moy = 1))
+----------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------filter((inv2.d_moy = 2))
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------filter((inv2.d_moy = 2))
+----------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query47.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query47.out
index fb20900a0b67c3..430c3c4067384f 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query47.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query47.out
@@ -46,7 +46,6 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ----------------------PhysicalProject
 ------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 --------------------PhysicalDistribute[DistributionSpecHash]
-----------------------PhysicalProject
-------------------------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000) and (v2.avg_monthly_sales > 0.0000) and (v2.d_year = 2001))
---------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000) and (v2.avg_monthly_sales > 0.0000) and (v2.d_year = 2001))
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query57.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query57.out
index 18d9a45e7d6755..ed1d2952975370 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query57.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query57.out
@@ -46,7 +46,6 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ----------------------PhysicalProject
 ------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 --------------------PhysicalDistribute[DistributionSpecHash]
-----------------------PhysicalProject
-------------------------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000) and (v2.avg_monthly_sales > 0.0000) and (v2.d_year = 1999))
---------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000) and (v2.avg_monthly_sales > 0.0000) and (v2.d_year = 1999))
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query59.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query59.out
index e871b1a1a44480..3347fd02ab46e0 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query59.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query59.out
@@ -2,17 +2,16 @@
 -- !ds_shape_59 --
 PhysicalCteAnchor ( cteId=CTEId#0 )
 --PhysicalCteProducer ( cteId=CTEId#0 )
-----PhysicalProject
-------hashAgg[GLOBAL]
---------PhysicalDistribute[DistributionSpecHash]
-----------hashAgg[LOCAL]
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=()
+----hashAgg[GLOBAL]
+------PhysicalDistribute[DistributionSpecHash]
+--------hashAgg[LOCAL]
+----------PhysicalProject
+------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=()
+--------------PhysicalProject
+----------------PhysicalOlapScan[store_sales]
+--------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------PhysicalProject
-------------------PhysicalOlapScan[store_sales]
-----------------PhysicalDistribute[DistributionSpecReplicated]
-------------------PhysicalProject
---------------------PhysicalOlapScan[date_dim]
+------------------PhysicalOlapScan[date_dim]
 --PhysicalResultSink
 ----PhysicalTopN[MERGE_SORT]
 ------PhysicalDistribute[DistributionSpecGather]
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query70.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query70.out
index abdcd1b0149063..d3e7d441cec28c 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query70.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query70.out
@@ -45,3 +45,4 @@ PhysicalResultSink
 ------------------------------------PhysicalDistribute[DistributionSpecHash]
 --------------------------------------PhysicalProject
 ----------------------------------------PhysicalOlapScan[store]
+
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query81.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query81.out
index ac9cf29ee123d4..e9fea1c43c2cff 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query81.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query81.out
@@ -40,8 +40,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------hashAgg[LOCAL]
 --------------------------PhysicalDistribute[DistributionSpecExecutionAny]
-----------------------------PhysicalProject
-------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 --------------PhysicalDistribute[DistributionSpecHash]
 ----------------PhysicalProject
 ------------------filter((customer_address.ca_state = 'CA'))
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query95.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query95.out
index b042e3531742ae..3cc3f5843b2b08 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query95.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query95.out
@@ -21,8 +21,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------------PhysicalProject
 --------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws_wh.ws_order_number)) otherCondition=() build RFs:RF6 ws_order_number->[ws_order_number]
 ----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF6
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF6
 ----------------------PhysicalProject
 ------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF5 web_site_sk->[ws_web_site_sk]
 --------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF4 ca_address_sk->[ws_ship_addr_sk]
@@ -31,8 +30,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------------------PhysicalProject
 ----------------------------------hashJoin[INNER_JOIN] hashCondition=((web_returns.wr_order_number = ws_wh.ws_order_number)) otherCondition=()
 ------------------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------------PhysicalProject
-----------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ------------------------------------PhysicalDistribute[DistributionSpecHash]
 --------------------------------------PhysicalProject
 ----------------------------------------PhysicalOlapScan[web_returns] apply RFs: RF2
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query1.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query1.out
index e1300825fc29a8..422dff53364487 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query1.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query1.out
@@ -37,6 +37,5 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ----------------PhysicalDistribute[DistributionSpecHash]
 ------------------hashAgg[LOCAL]
 --------------------PhysicalDistribute[DistributionSpecExecutionAny]
-----------------------PhysicalProject
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query23.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query23.out
index 049a85a8860ef2..662fc3aa2dd7a8 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query23.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query23.out
@@ -65,11 +65,9 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------------------PhysicalProject
 ----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3
 ------------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------PhysicalProject
-----------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
+--------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
 --------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ----------------------PhysicalDistribute[DistributionSpecReplicated]
 ------------------------PhysicalProject
 --------------------------filter((date_dim.d_moy = 5) and (date_dim.d_year = 2000))
@@ -84,11 +82,9 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------------------PhysicalProject
 ----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4
 ------------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------PhysicalProject
-----------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
+--------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
 --------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ----------------------PhysicalDistribute[DistributionSpecReplicated]
 ------------------------PhysicalProject
 --------------------------filter((date_dim.d_moy = 5) and (date_dim.d_year = 2000))
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query24.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query24.out
index 9f6191dac84b55..15252933e01d4c 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query24.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query24.out
@@ -52,6 +52,5 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------PhysicalDistribute[DistributionSpecGather]
 ----------------------hashAgg[LOCAL]
 ------------------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------------------PhysicalProject
-----------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query30.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query30.out
index bfa6bc4fe650b8..ccd62e114b44e7 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query30.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query30.out
@@ -39,8 +39,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------hashAgg[LOCAL]
 ------------------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------------------PhysicalProject
-----------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 --------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------PhysicalProject
 ------------------filter((customer_address.ca_state = 'IN'))
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query31.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query31.out
index 5a0036ae4d5b8d..65a1a9afe0c750 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query31.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query31.out
@@ -2,30 +2,30 @@
 -- !ds_shape_31 --
 PhysicalCteAnchor ( cteId=CTEId#0 )
 --PhysicalCteProducer ( cteId=CTEId#0 )
-----PhysicalProject
-------hashAgg[GLOBAL]
---------PhysicalDistribute[DistributionSpecHash]
-----------hashAgg[LOCAL]
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
+----hashAgg[GLOBAL]
+------PhysicalDistribute[DistributionSpecHash]
+--------hashAgg[LOCAL]
+----------PhysicalProject
+------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
+--------------PhysicalProject
 ----------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[ss_addr_sk]
 ------------------PhysicalProject
 --------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
 ------------------PhysicalDistribute[DistributionSpecReplicated]
 --------------------PhysicalProject
 ----------------------PhysicalOlapScan[customer_address]
-----------------PhysicalDistribute[DistributionSpecReplicated]
-------------------PhysicalProject
---------------------filter((ss.d_year = 2000) and d_qoy IN (1, 2, 3))
-----------------------PhysicalOlapScan[date_dim]
+--------------PhysicalDistribute[DistributionSpecReplicated]
+----------------PhysicalProject
+------------------filter((ss.d_year = 2000) and d_qoy IN (1, 2, 3))
+--------------------PhysicalOlapScan[date_dim]
 --PhysicalCteAnchor ( cteId=CTEId#1 )
 ----PhysicalCteProducer ( cteId=CTEId#1 )
-------PhysicalProject
---------hashAgg[GLOBAL]
-----------PhysicalDistribute[DistributionSpecHash]
-------------hashAgg[LOCAL]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ws_sold_date_sk]
+------hashAgg[GLOBAL]
+--------PhysicalDistribute[DistributionSpecHash]
+----------hashAgg[LOCAL]
+------------PhysicalProject
+--------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[ws_sold_date_sk]
+----------------PhysicalProject
 ------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF2 ca_address_sk->[ws_bill_addr_sk]
 --------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------PhysicalProject
@@ -33,10 +33,10 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------PhysicalProject
 ------------------------PhysicalOlapScan[customer_address]
-------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------PhysicalProject
-----------------------filter((ws.d_year = 2000) and d_qoy IN (1, 2, 3))
-------------------------PhysicalOlapScan[date_dim]
+----------------PhysicalDistribute[DistributionSpecReplicated]
+------------------PhysicalProject
+--------------------filter((ws.d_year = 2000) and d_qoy IN (1, 2, 3))
+----------------------PhysicalOlapScan[date_dim]
 ----PhysicalResultSink
 ------PhysicalQuickSort[MERGE_SORT]
 --------PhysicalDistribute[DistributionSpecGather]
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query39.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query39.out
index 40f877acac568b..d1d9fb39429644 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query39.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query39.out
@@ -28,11 +28,9 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------PhysicalQuickSort[LOCAL_SORT]
 ----------hashJoin[INNER_JOIN] hashCondition=((inv1.i_item_sk = inv2.i_item_sk) and (inv1.w_warehouse_sk = inv2.w_warehouse_sk)) otherCondition=()
 ------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------filter((inv1.d_moy = 1))
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------filter((inv1.d_moy = 1))
+----------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------filter((inv2.d_moy = 2))
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------filter((inv2.d_moy = 2))
+----------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query47.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query47.out
index 59d526865c49a0..788b686c26f4e1 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query47.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query47.out
@@ -46,7 +46,6 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ----------------------PhysicalProject
 ------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 --------------------PhysicalDistribute[DistributionSpecHash]
-----------------------PhysicalProject
-------------------------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000) and (v2.avg_monthly_sales > 0.0000) and (v2.d_year = 2001))
---------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000) and (v2.avg_monthly_sales > 0.0000) and (v2.d_year = 2001))
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query57.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query57.out
index 52cd80d56cc1de..e2c13ea729c532 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query57.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query57.out
@@ -46,7 +46,6 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ----------------------PhysicalProject
 ------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 --------------------PhysicalDistribute[DistributionSpecHash]
-----------------------PhysicalProject
-------------------------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000) and (v2.avg_monthly_sales > 0.0000) and (v2.d_year = 1999))
---------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000) and (v2.avg_monthly_sales > 0.0000) and (v2.d_year = 1999))
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query59.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query59.out
index 6027c75690447d..fd888cc3382785 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query59.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query59.out
@@ -2,17 +2,16 @@
 -- !ds_shape_59 --
 PhysicalCteAnchor ( cteId=CTEId#0 )
 --PhysicalCteProducer ( cteId=CTEId#0 )
-----PhysicalProject
-------hashAgg[GLOBAL]
---------PhysicalDistribute[DistributionSpecHash]
-----------hashAgg[LOCAL]
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+----hashAgg[GLOBAL]
+------PhysicalDistribute[DistributionSpecHash]
+--------hashAgg[LOCAL]
+----------PhysicalProject
+------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------PhysicalProject
+----------------PhysicalOlapScan[store_sales] apply RFs: RF0
+--------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------PhysicalProject
-------------------PhysicalOlapScan[store_sales] apply RFs: RF0
-----------------PhysicalDistribute[DistributionSpecReplicated]
-------------------PhysicalProject
---------------------PhysicalOlapScan[date_dim]
+------------------PhysicalOlapScan[date_dim]
 --PhysicalResultSink
 ----PhysicalTopN[MERGE_SORT]
 ------PhysicalDistribute[DistributionSpecGather]
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query81.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query81.out
index 6463028a8fec4c..fb68f6ce1a3ddb 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query81.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query81.out
@@ -40,8 +40,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------hashAgg[LOCAL]
 --------------------------PhysicalDistribute[DistributionSpecExecutionAny]
-----------------------------PhysicalProject
-------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 --------------PhysicalDistribute[DistributionSpecHash]
 ----------------PhysicalProject
 ------------------filter((customer_address.ca_state = 'CA'))
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query95.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query95.out
index 9e96715c5e4990..4fd762ec994fdd 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query95.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query95.out
@@ -21,8 +21,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------------PhysicalProject
 --------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws_wh.ws_order_number)) otherCondition=() build RFs:RF12 ws_order_number->[ws_order_number];RF13 ws_order_number->[ws_order_number]
 ----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF12 RF13
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF12 RF13
 ----------------------PhysicalProject
 ------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF10 web_site_sk->[ws_web_site_sk];RF11 web_site_sk->[ws_web_site_sk]
 --------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF8 ca_address_sk->[ws_ship_addr_sk];RF9 ca_address_sk->[ws_ship_addr_sk]
@@ -31,8 +30,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------------------PhysicalProject
 ----------------------------------hashJoin[INNER_JOIN] hashCondition=((web_returns.wr_order_number = ws_wh.ws_order_number)) otherCondition=() build RFs:RF2 wr_order_number->[ws_order_number];RF3 wr_order_number->[ws_order_number]
 ------------------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------------PhysicalProject
-----------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF2 RF3
+--------------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF2 RF3
 ------------------------------------PhysicalDistribute[DistributionSpecHash]
 --------------------------------------PhysicalProject
 ----------------------------------------PhysicalOlapScan[web_returns] apply RFs: RF4 RF5
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query1.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query1.out
index eca7d46a8f3ef1..8996d789efa954 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query1.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query1.out
@@ -39,6 +39,5 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------------------PhysicalDistribute[DistributionSpecHash]
 --------------------------hashAgg[LOCAL]
 ----------------------------PhysicalDistribute[DistributionSpecExecutionAny]
-------------------------------PhysicalProject
---------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query23.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query23.out
index 8132fd343d5fa8..4d4463732f09c3 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query23.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query23.out
@@ -59,8 +59,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------------PhysicalProject
 --------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((catalog_sales.cs_item_sk = frequent_ss_items.item_sk)) otherCondition=() build RFs:RF4 cs_item_sk->[item_sk]
 ----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------PhysicalProject
 --------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((catalog_sales.cs_bill_customer_sk = best_ss_customer.c_customer_sk)) otherCondition=()
@@ -73,13 +72,11 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------------------------------filter((date_dim.d_moy = 5) and (date_dim.d_year = 2000))
 --------------------------------------PhysicalOlapScan[date_dim]
 ----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
+------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
 ------------------PhysicalProject
 --------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((web_sales.ws_item_sk = frequent_ss_items.item_sk)) otherCondition=() build RFs:RF6 ws_item_sk->[item_sk]
 ----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF6
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF6
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------PhysicalProject
 --------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = best_ss_customer.c_customer_sk)) otherCondition=()
@@ -92,6 +89,5 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------------------------------filter((date_dim.d_moy = 5) and (date_dim.d_year = 2000))
 --------------------------------------PhysicalOlapScan[date_dim]
 ----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
+------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query24.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query24.out
index 67183c68f45617..ddf2dfd6c057b2 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query24.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query24.out
@@ -55,6 +55,5 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------PhysicalDistribute[DistributionSpecGather]
 ----------------------hashAgg[LOCAL]
 ------------------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------------------PhysicalProject
-----------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query30.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query30.out
index 9f72a79a16ece1..40293a67cb991d 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query30.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query30.out
@@ -43,6 +43,5 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------------PhysicalDistribute[DistributionSpecHash]
 --------------------hashAgg[LOCAL]
 ----------------------PhysicalDistribute[DistributionSpecExecutionAny]
-------------------------PhysicalProject
---------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query31.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query31.out
index 835238e3d707ce..21bd99fb9f31d4 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query31.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query31.out
@@ -2,44 +2,42 @@
 -- !ds_shape_31 --
 PhysicalCteAnchor ( cteId=CTEId#0 )
 --PhysicalCteProducer ( cteId=CTEId#0 )
-----PhysicalProject
+----hashAgg[GLOBAL]
+------PhysicalDistribute[DistributionSpecHash]
+--------hashAgg[LOCAL]
+----------PhysicalProject
+------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=()
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalProject
+------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[store_sales] apply RFs: RF0
+--------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------PhysicalProject
+------------------------filter((ss.d_year = 2000) and d_qoy IN (1, 2, 3))
+--------------------------PhysicalOlapScan[date_dim]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalProject
+------------------PhysicalOlapScan[customer_address]
+--PhysicalCteAnchor ( cteId=CTEId#1 )
+----PhysicalCteProducer ( cteId=CTEId#1 )
 ------hashAgg[GLOBAL]
 --------PhysicalDistribute[DistributionSpecHash]
 ----------hashAgg[LOCAL]
 ------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=()
+--------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_addr_sk = customer_address.ca_address_sk)) otherCondition=()
 ----------------PhysicalDistribute[DistributionSpecHash]
 ------------------PhysicalProject
---------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_sold_date_sk]
 ----------------------PhysicalProject
-------------------------PhysicalOlapScan[store_sales] apply RFs: RF0
+------------------------PhysicalOlapScan[web_sales] apply RFs: RF2
 ----------------------PhysicalDistribute[DistributionSpecReplicated]
 ------------------------PhysicalProject
---------------------------filter((ss.d_year = 2000) and d_qoy IN (1, 2, 3))
+--------------------------filter((ws.d_year = 2000) and d_qoy IN (1, 2, 3))
 ----------------------------PhysicalOlapScan[date_dim]
 ----------------PhysicalDistribute[DistributionSpecHash]
 ------------------PhysicalProject
 --------------------PhysicalOlapScan[customer_address]
---PhysicalCteAnchor ( cteId=CTEId#1 )
-----PhysicalCteProducer ( cteId=CTEId#1 )
-------PhysicalProject
---------hashAgg[GLOBAL]
-----------PhysicalDistribute[DistributionSpecHash]
-------------hashAgg[LOCAL]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_addr_sk = customer_address.ca_address_sk)) otherCondition=()
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_sold_date_sk]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[web_sales] apply RFs: RF2
-------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------PhysicalProject
-----------------------------filter((ws.d_year = 2000) and d_qoy IN (1, 2, 3))
-------------------------------PhysicalOlapScan[date_dim]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[customer_address]
 ----PhysicalResultSink
 ------PhysicalQuickSort[MERGE_SORT]
 --------PhysicalDistribute[DistributionSpecGather]
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query39.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query39.out
index 7a0a69965bdccd..11ec8af267cb77 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query39.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query39.out
@@ -28,11 +28,9 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------PhysicalQuickSort[LOCAL_SORT]
 ----------hashJoin[INNER_JOIN] hashCondition=((inv1.i_item_sk = inv2.i_item_sk) and (inv1.w_warehouse_sk = inv2.w_warehouse_sk)) otherCondition=()
 ------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------filter((inv1.d_moy = 1))
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------filter((inv1.d_moy = 1))
+----------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------filter((inv2.d_moy = 2))
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------filter((inv2.d_moy = 2))
+----------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query47.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query47.out
index 174fd05d61dc04..03cb37e0f810f3 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query47.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query47.out
@@ -41,9 +41,8 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------PhysicalProject
 ----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000) and (v2.avg_monthly_sales > 0.0000) and (v2.d_year = 2001))
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000) and (v2.avg_monthly_sales > 0.0000) and (v2.d_year = 2001))
+----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ----------------PhysicalDistribute[DistributionSpecHash]
 ------------------PhysicalProject
 --------------------PhysicalCteConsumer ( cteId=CTEId#0 )
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query57.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query57.out
index d09ed9ca40fe42..555d7716af6512 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query57.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query57.out
@@ -42,9 +42,8 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------PhysicalProject
 ----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000) and (v2.avg_monthly_sales > 0.0000) and (v2.d_year = 1999))
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000) and (v2.avg_monthly_sales > 0.0000) and (v2.d_year = 1999))
+----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ----------------PhysicalDistribute[DistributionSpecHash]
 ------------------PhysicalProject
 --------------------PhysicalCteConsumer ( cteId=CTEId#0 )
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query59.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query59.out
index 644b59d4db3312..b9e7a9b40de441 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query59.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query59.out
@@ -2,17 +2,16 @@
 -- !ds_shape_59 --
 PhysicalCteAnchor ( cteId=CTEId#0 )
 --PhysicalCteProducer ( cteId=CTEId#0 )
-----PhysicalProject
-------hashAgg[GLOBAL]
---------PhysicalDistribute[DistributionSpecHash]
-----------hashAgg[LOCAL]
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=()
+----hashAgg[GLOBAL]
+------PhysicalDistribute[DistributionSpecHash]
+--------hashAgg[LOCAL]
+----------PhysicalProject
+------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=()
+--------------PhysicalProject
+----------------PhysicalOlapScan[store_sales]
+--------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------PhysicalProject
-------------------PhysicalOlapScan[store_sales]
-----------------PhysicalDistribute[DistributionSpecReplicated]
-------------------PhysicalProject
---------------------PhysicalOlapScan[date_dim]
+------------------PhysicalOlapScan[date_dim]
 --PhysicalResultSink
 ----PhysicalTopN[MERGE_SORT]
 ------PhysicalDistribute[DistributionSpecGather]
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query70.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query70.out
index f3e524aabcfe08..6fc8a52f8398e0 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query70.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query70.out
@@ -45,3 +45,4 @@ PhysicalResultSink
 --------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------------------------------------------PhysicalProject
 ------------------------------------------------------PhysicalOlapScan[store]
+
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query81.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query81.out
index 9b9a03af8fe25e..22ab8efaf190e3 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query81.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query81.out
@@ -44,6 +44,5 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------------PhysicalDistribute[DistributionSpecHash]
 --------------------hashAgg[LOCAL]
 ----------------------PhysicalDistribute[DistributionSpecExecutionAny]
-------------------------PhysicalProject
---------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query95.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query95.out
index a835868fd8c78e..2a0ae9b4138686 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query95.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query95.out
@@ -23,16 +23,14 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ----------------------PhysicalProject
 ------------------------hashJoin[INNER_JOIN] hashCondition=((web_returns.wr_order_number = ws_wh.ws_order_number)) otherCondition=()
 --------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF6
+----------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF6
 --------------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------------PhysicalProject
 ------------------------------PhysicalOlapScan[web_returns] apply RFs: RF6
 ----------------------PhysicalProject
 ------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws_wh.ws_order_number)) otherCondition=() build RFs:RF7 ws_order_number->[ws_order_number,ws_order_number]
 --------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 --------------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF3 web_site_sk->[ws_web_site_sk]
 ------------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_ship_date_sk]
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query1.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query1.out
index eca7d46a8f3ef1..8996d789efa954 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query1.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query1.out
@@ -39,6 +39,5 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------------------PhysicalDistribute[DistributionSpecHash]
 --------------------------hashAgg[LOCAL]
 ----------------------------PhysicalDistribute[DistributionSpecExecutionAny]
-------------------------------PhysicalProject
---------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query23.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query23.out
index 520c3910152f74..edd849ea421d13 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query23.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query23.out
@@ -59,8 +59,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------------PhysicalProject
 --------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((catalog_sales.cs_item_sk = frequent_ss_items.item_sk)) otherCondition=() build RFs:RF4 cs_item_sk->[item_sk]
 ----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF4
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------PhysicalProject
 --------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((catalog_sales.cs_bill_customer_sk = best_ss_customer.c_customer_sk)) otherCondition=()
@@ -73,13 +72,11 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------------------------------filter((date_dim.d_moy = 5) and (date_dim.d_year = 2000))
 --------------------------------------PhysicalOlapScan[date_dim]
 ----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
+------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
 ------------------PhysicalProject
 --------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((web_sales.ws_item_sk = frequent_ss_items.item_sk)) otherCondition=() build RFs:RF6 ws_item_sk->[item_sk]
 ----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF6
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF6
 ----------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------PhysicalProject
 --------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = best_ss_customer.c_customer_sk)) otherCondition=()
@@ -92,6 +89,5 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------------------------------filter((date_dim.d_moy = 5) and (date_dim.d_year = 2000))
 --------------------------------------PhysicalOlapScan[date_dim]
 ----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
+------------------------------PhysicalCteConsumer ( cteId=CTEId#2 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out
index cf64374e507aa6..c0d202025b646f 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out
@@ -55,6 +55,5 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------PhysicalDistribute[DistributionSpecGather]
 ----------------------hashAgg[LOCAL]
 ------------------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------------------PhysicalProject
-----------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query30.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query30.out
index 0160329ec263ee..2880145be2057e 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query30.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query30.out
@@ -43,6 +43,5 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------------PhysicalDistribute[DistributionSpecHash]
 --------------------hashAgg[LOCAL]
 ----------------------PhysicalDistribute[DistributionSpecExecutionAny]
-------------------------PhysicalProject
---------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query31.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query31.out
index f12c5e5cb28f78..f759ca84798c82 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query31.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query31.out
@@ -2,44 +2,42 @@
 -- !ds_shape_31 --
 PhysicalCteAnchor ( cteId=CTEId#0 )
 --PhysicalCteProducer ( cteId=CTEId#0 )
-----PhysicalProject
+----hashAgg[GLOBAL]
+------PhysicalDistribute[DistributionSpecHash]
+--------hashAgg[LOCAL]
+----------PhysicalProject
+------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[ss_addr_sk]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalProject
+------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+--------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------PhysicalProject
+------------------------filter((ss.d_year = 2000) and d_qoy IN (1, 2, 3))
+--------------------------PhysicalOlapScan[date_dim]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalProject
+------------------PhysicalOlapScan[customer_address]
+--PhysicalCteAnchor ( cteId=CTEId#1 )
+----PhysicalCteProducer ( cteId=CTEId#1 )
 ------hashAgg[GLOBAL]
 --------PhysicalDistribute[DistributionSpecHash]
 ----------hashAgg[LOCAL]
 ------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[ss_addr_sk]
+--------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[ws_bill_addr_sk]
 ----------------PhysicalDistribute[DistributionSpecHash]
 ------------------PhysicalProject
---------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_sold_date_sk]
 ----------------------PhysicalProject
-------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 RF3
 ----------------------PhysicalDistribute[DistributionSpecReplicated]
 ------------------------PhysicalProject
---------------------------filter((ss.d_year = 2000) and d_qoy IN (1, 2, 3))
+--------------------------filter((ws.d_year = 2000) and d_qoy IN (1, 2, 3))
 ----------------------------PhysicalOlapScan[date_dim]
 ----------------PhysicalDistribute[DistributionSpecHash]
 ------------------PhysicalProject
 --------------------PhysicalOlapScan[customer_address]
---PhysicalCteAnchor ( cteId=CTEId#1 )
-----PhysicalCteProducer ( cteId=CTEId#1 )
-------PhysicalProject
---------hashAgg[GLOBAL]
-----------PhysicalDistribute[DistributionSpecHash]
-------------hashAgg[LOCAL]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[ws_bill_addr_sk]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_sold_date_sk]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 RF3
-------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------PhysicalProject
-----------------------------filter((ws.d_year = 2000) and d_qoy IN (1, 2, 3))
-------------------------------PhysicalOlapScan[date_dim]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[customer_address]
 ----PhysicalResultSink
 ------PhysicalQuickSort[MERGE_SORT]
 --------PhysicalDistribute[DistributionSpecGather]
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query39.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query39.out
index 4ccedd314026d1..88b2869175c9a1 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query39.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query39.out
@@ -28,11 +28,9 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------PhysicalQuickSort[LOCAL_SORT]
 ----------hashJoin[INNER_JOIN] hashCondition=((inv1.i_item_sk = inv2.i_item_sk) and (inv1.w_warehouse_sk = inv2.w_warehouse_sk)) otherCondition=()
 ------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------filter((inv1.d_moy = 1))
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------filter((inv1.d_moy = 1))
+----------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------filter((inv2.d_moy = 2))
-------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------filter((inv2.d_moy = 2))
+----------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query47.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query47.out
index 214cdaaee62239..e8f28d6ea4e8cf 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query47.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query47.out
@@ -41,9 +41,8 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------PhysicalProject
 ----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000) and (v2.avg_monthly_sales > 0.0000) and (v2.d_year = 2001))
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000) and (v2.avg_monthly_sales > 0.0000) and (v2.d_year = 2001))
+----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ----------------PhysicalDistribute[DistributionSpecHash]
 ------------------PhysicalProject
 --------------------PhysicalCteConsumer ( cteId=CTEId#0 )
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query57.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query57.out
index ea7531482a7ee3..f479209035e353 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query57.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query57.out
@@ -42,9 +42,8 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 --------------------PhysicalProject
 ----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000) and (v2.avg_monthly_sales > 0.0000) and (v2.d_year = 1999))
-------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000) and (v2.avg_monthly_sales > 0.0000) and (v2.d_year = 1999))
+----------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 ----------------PhysicalDistribute[DistributionSpecHash]
 ------------------PhysicalProject
 --------------------PhysicalCteConsumer ( cteId=CTEId#0 )
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query59.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query59.out
index 08d5a9d3f1f018..2db7d57863df6b 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query59.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query59.out
@@ -2,17 +2,16 @@
 -- !ds_shape_59 --
 PhysicalCteAnchor ( cteId=CTEId#0 )
 --PhysicalCteProducer ( cteId=CTEId#0 )
-----PhysicalProject
-------hashAgg[GLOBAL]
---------PhysicalDistribute[DistributionSpecHash]
-----------hashAgg[LOCAL]
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+----hashAgg[GLOBAL]
+------PhysicalDistribute[DistributionSpecHash]
+--------hashAgg[LOCAL]
+----------PhysicalProject
+------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------PhysicalProject
+----------------PhysicalOlapScan[store_sales] apply RFs: RF0
+--------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------PhysicalProject
-------------------PhysicalOlapScan[store_sales] apply RFs: RF0
-----------------PhysicalDistribute[DistributionSpecReplicated]
-------------------PhysicalProject
---------------------PhysicalOlapScan[date_dim]
+------------------PhysicalOlapScan[date_dim]
 --PhysicalResultSink
 ----PhysicalTopN[MERGE_SORT]
 ------PhysicalDistribute[DistributionSpecGather]
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query81.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query81.out
index aa637bc6469b2d..99cf6c48bb75e7 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query81.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query81.out
@@ -44,6 +44,5 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------------PhysicalDistribute[DistributionSpecHash]
 --------------------hashAgg[LOCAL]
 ----------------------PhysicalDistribute[DistributionSpecExecutionAny]
-------------------------PhysicalProject
---------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query95.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query95.out
index 4763e6aa34cf49..9a971d30aec2de 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query95.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query95.out
@@ -23,16 +23,14 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ----------------------PhysicalProject
 ------------------------hashJoin[INNER_JOIN] hashCondition=((web_returns.wr_order_number = ws_wh.ws_order_number)) otherCondition=() build RFs:RF10 wr_order_number->[ws_order_number];RF11 wr_order_number->[ws_order_number]
 --------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF10 RF11 RF12 RF13
+----------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF10 RF11 RF12 RF13
 --------------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------------PhysicalProject
 ------------------------------PhysicalOlapScan[web_returns] apply RFs: RF12 RF13
 ----------------------PhysicalProject
 ------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws_wh.ws_order_number)) otherCondition=() build RFs:RF14 ws_order_number->[ws_order_number,ws_order_number];RF15 ws_order_number->[ws_order_number,ws_order_number]
 --------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 --------------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF6 web_site_sk->[ws_web_site_sk];RF7 web_site_sk->[ws_web_site_sk]
 ------------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_ship_date_sk];RF5 d_date_sk->[ws_ship_date_sk]
diff --git a/regression-test/suites/nereids_p0/cte/test_cte_column_pruning.groovy b/regression-test/suites/nereids_p0/cte/test_cte_column_pruning.groovy
new file mode 100644
index 00000000000000..7a465c212518f2
--- /dev/null
+++ b/regression-test/suites/nereids_p0/cte/test_cte_column_pruning.groovy
@@ -0,0 +1,143 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+suite("test_cte_column_pruning") {
+    sql "SET enable_nereids_planner=true"
+    sql "SET enable_pipeline_engine=true"
+    sql "SET enable_fallback_to_original_planner=false"
+
+    sql """drop table if exists t1"""
+    sql """drop table if exists t2"""
+    sql """drop table if exists t3"""
+    sql """drop table if exists t4"""
+
+    sql """
+        create table if not exists t1 (
+        c2 int   ,
+        c1 int   ,
+        c3 int   ,
+        c4 int   ,
+        pk int
+        )
+        distributed by hash(pk) buckets 10
+        properties("replication_num" = "1");
+    """
+
+    sql """
+        create table if not exists t2 (
+        c1 int   ,
+        c2 int   ,
+        c3 int   ,
+        c4 int   ,
+        pk int
+        )
+        distributed by hash(pk) buckets 10
+        properties("replication_num" = "1");
+    """
+
+    sql """
+        create table if not exists t3 (
+        c2 int   ,
+        c1 int   ,
+        c3 int   ,
+        c4 int   ,
+        pk int
+        )
+        distributed by hash(pk) buckets 10
+        properties("replication_num" = "1");
+    """
+
+    sql """
+        create table if not exists t4 (
+        c1 int   ,
+        c2 int   ,
+        c3 int   ,
+        c4 int   ,
+        pk int
+        )
+        distributed by hash(pk) buckets 10
+        properties("replication_num" = "1");
+    """
+
+    sql """
+        insert into t1(pk,c1,c2,c3,c4) values (0,7,2,3328056,7),(1,3,5,3,3045349),(2,2130015,0,7,-7116176),(3,4411710,1203314,1,2336164),(4,4,-8001461,0,8),(5,9,3,6,2),(6,-8088092,null,-7256698,-2025142),(7,8,2,5,1),(8,4,4953685,3,null),(9,-6662413,-3845449,4,2),(10,5315281,0,5,null),(11,9,3,7,7),(12,4341905,null,null,8),(13,3,6,5,1),(14,5,9,6541164,3),(15,1,-582319,1,9),(16,5533636,4,39841,0),(17,1,1,null,7),(18,742881,-1420303,6,1),(19,281430,6753011,3,2),(20,7,1,4,-31350),(21,-5663089,9,2278262,9),(22,6,0,2706409,3),(23,-3841713,9,3,9),(24,1,6,3,4059303),(25,0,1,-5700982,3),(26,5,0,6,1),(27,7,2,2,4),(28,6,-2140815,-8190613,6),(29,-8214516,4,3,6),(30,4393731,null,7,2),(31,-2524331,8,2,9),(32,5,1,7,3),(33,2,968001,-1718546,0),(34,9,2,null,-7682164),(35,5,-3302521,8,2),(36,2,1325975,null,2826927),(37,-6607898,null,4,8),(38,7,3,5284408,-265983),(39,1,null,2,-559197),(40,9,7,2,6),(41,-7193680,null,3,8),(42,-4800310,8,9,5),(43,0,8,0,-2429477),(44,-1007106,-7583038,9,2627388),(45,7,-6572230,4,-1789489),(46,8,4,null,7837867),(47,7,8,7,null),(48,8,-2618403,2723851,3),(49,1,3,1,0),(50,null,3241893,0,8),(51,1934849,-1353430,1,9),(52,5148268,6,null,1),(53,null,3922713,4,47559),(54,2038005,-7625242,null,-5606136),(55,4,449100,2108275,5147506),(56,5,5,4316929,null),(57,5049864,null,4,9),(58,null,7,2,9),(59,5,2,5,7),(60,9,9,5,-2774033),(61,4,0,6,1),(62,5,-7700238,6,3),(63,658183,-7933445,1,4),(64,8,8,-7019658,-7873828),(65,1,1,null,0),(66,1,2,9,7320481),(67,3,2099077,9,3),(68,-7120763,276954,0,4),(69,9,5,5170840,null),(70,null,6,220899,-5774478),(71,null,null,3,6),(72,7,2,8101877,null),(73,1,null,5,-5141920),(74,8,-7143195,0,6),(75,6,5,3388863,4),(76,6,6,-8015259,1),(77,5207959,-4325820,791546,7),(78,2,4411975,2,null),(79,9,2379417,8,3),(80,3,null,-6968517,-336360),(81,null,0,5,1),(82,3,0,6,-4536269),(83,2,7,0,7),(84,1,7,1,5),(85,3,3,7509217,2920951),(86,6,null,8,3),(87,9,8,8,5941004),(88,8023576,1036293,9,2),(89,5,3,1,5),(90,5,5,6,2170127),(91,null,1,7,null),(92,-5659717,4,null,6),(93,848413,9,-2742042,4980140),(94,1,9,467168,9),(95,6,6,4783371,-5096980),(96,3,2,4,3),(97,3,2,2,1),(98,8,0,-6734149,2),(99,4985816,3,null,8);
+    """
+
+    sql """
+        insert into t2(pk,c1,c2,c3,c4) values (0,5,4,189864,-7663457),(1,7,null,6,1),(2,null,8,-3362640,9),(3,3,2,5,-2197130),(4,2,3,7160615,1),(5,null,-57834,420441,3),(6,0,null,2,2),(7,1,-3681539,3,4),(8,548866,3,0,5),(9,8,-2824887,0,3246956),(10,5,3,7,2),(11,8,8,6,8),(12,0,2,7,9),(13,8,6,null,null),(14,-4103729,4,5,8),(15,-3659292,2,7,5),(16,8,7,1,null),(17,2526018,4,8069607,5),(18,6,6,5,2802235),(19,9,0,6379201,null),(20,3,null,4,3),(21,0,8,-5506402,2),(22,6,4,3,1),(23,4,5225086,3,1),(24,-211796,2,0,null),(25,5,2,-4100572,7),(26,2345127,2,null,1),(27,8,2,4893754,2),(28,null,-5580446,4,0),(29,3,1,2,6);
+    """
+
+    sql """
+        insert into t3(pk,c1,c2,c3,c4) values (0,3,2,6,-3164679),(1,-6216443,3437690,-288827,6),(2,4,-5352286,-1005469,4118240),(3,9,6795167,5,1616205),(4,8,-4659990,-4816829,6),(5,0,9,4,8),(6,-4454766,2,2510766,3),(7,7860071,-3434966,8,3),(8,null,0,2,1),(9,8031908,2,-6673194,-5981416),(10,5,6716310,8,2529959),(11,null,-3622116,1,-7891010),(12,null,3527222,7993802,null),(13,null,1,2,1),(14,2,8,7,7),(15,0,9,5,null),(16,7452083,null,-4620796,0),(17,9,9,null,6),(18,3,1,-1578776,5),(19,9,2532045,-3577349,null);
+    """
+
+    sql """
+        insert into t4(pk,c1,c2,c3,c4) values (0,-4263513,null,null,6),(1,1,3,4,null),(2,2460936,6,5,6299003),(3,null,7,7107446,-2366754),(4,6247611,4785035,3,-8014875),(5,0,2,5249218,3),(6,null,253825,4,3),(7,null,2,9,-350785),(8,6,null,null,4),(9,1,3,1,3422691),(10,0,-6596165,1808018,3),(11,2,752342,null,1),(12,-5220927,2676278,9,7),(13,6025864,2,1,4),(14,7,4,4,9),(15,5,9,9,849881),(16,-4253076,null,-4404479,-6365351),(17,null,6,4240023,3),(18,7,1276495,7,6),(19,null,-4459040,178194,-6974337),(20,6,2498738,9,6),(21,8,-1047876,8,-3519551),(22,4477868,6,3,-7237985),(23,9,1,null,7),(24,null,2,-6996324,4),(25,2,2,-7965145,2),(26,5339549,6,null,4),(27,0,4,4,4),(28,null,6563965,-5816143,2),(29,4,7245227,3239886,1),(30,9,9,-8134757,0),(31,-1787881,7769609,8306001,null),(32,-1817246,1,3,-8163782),(33,7,4018844,0,4),(34,null,5,3,4),(35,8,-1698017,0,3024748),(36,2,7,5330073,3654557),(37,null,null,1,7),(38,6,9,0,2),(39,-3988946,-1465296,3,3),(40,4939439,null,null,3),(41,6,-7235968,1,0),(42,5141520,-7389145,8,1),(43,5,89342,1,0),(44,1,641063,9,4718353),(45,5,4,4,6),(46,2,6,4,4),(47,3,2,2,-7137584),(48,6735548,0,1,7),(49,6,4,7,-4864341);
+    """
+
+    sql """
+        sync
+    """
+
+    sql """
+        WITH tbl1 AS (
+            SELECT
+                tbl2.c1 AS c1,
+                tbl3.c2 AS c2,
+                tbl5.c4 AS c3,
+                tbl3.c1 AS c4
+            FROM
+                t1 AS tbl1
+                JOIN t2 AS tbl2 ON tbl1.c2 = tbl1.c4
+                RIGHT JOIN t1 AS tbl3 ON tbl3.c3 = tbl1.c3
+                INNER JOIN t3 AS tbl4 ON tbl3.c2 = tbl4.c3
+                INNER JOIN t4 AS tbl5 ON tbl5.c4 = tbl4.c2
+                AND tbl3.c3 = tbl3.c4
+            WHERE
+                (
+                    tbl2.c4 = (0 + 9)
+                    AND ((4 + 1) IS NULL)
+                )
+            ORDER BY
+                1,
+                2,
+                3,
+                4 DESC
+            LIMIT
+                6666 OFFSET 500
+        )
+        SELECT
+            tbl3.c4 AS c1,
+            tbl2.c4 AS c2,
+            tbl3.c3 AS c3,
+            tbl2.c2 AS c4
+        FROM
+            tbl1 AS tbl2
+            JOIN tbl1 AS tbl3 ON tbl3.c2 = tbl2.c2
+        WHERE
+            (
+                tbl2.c3 != tbl2.c3
+                AND ((2 + 0) IS NOT NULL)
+            )
+        ORDER BY
+            2,
+            4,
+            1,
+            3 ASC
+        LIMIT
+            6666 OFFSET 2;
+    """
+}

From d6a8e24a82b314d4831168fc6211cf3ebd9fb2b0 Mon Sep 17 00:00:00 2001
From: zclllyybb <zhaochangle@selectdb.com>
Date: Fri, 26 Apr 2024 12:52:17 +0800
Subject: [PATCH 045/163] [Fix](cloud) Fix concurrency bugs on creating auto
 partition #34135

---
 .../transaction/CloudGlobalTransactionMgr.java  | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java
index 35b3cd285298d3..aa1218ab9b3f3f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java
@@ -385,12 +385,17 @@ private Set<Long> getBaseTabletsFromTables(List<Table> tableList, List<TabletCom
         }
         for (Table table : tableList) {
             OlapTable olapTable = (OlapTable) table;
-            olapTable.getPartitions().stream()
-                    .map(Partition::getBaseIndex)
-                    .map(MaterializedIndex::getTablets)
-                    .flatMap(Collection::stream)
-                    .map(Tablet::getId)
-                    .forEach(baseTabletIds::add);
+            try {
+                olapTable.readLock();
+                olapTable.getPartitions().stream()
+                        .map(Partition::getBaseIndex)
+                        .map(MaterializedIndex::getTablets)
+                        .flatMap(Collection::stream)
+                        .map(Tablet::getId)
+                        .forEach(baseTabletIds::add);
+            } finally {
+                olapTable.readUnlock();
+            }
         }
         Set<Long> tabletIds = tabletCommitInfos.stream().map(TabletCommitInfo::getTabletId).collect(Collectors.toSet());
         baseTabletIds.retainAll(tabletIds);

From a28499c53fa80c065ef8d9b689126f79f986dad0 Mon Sep 17 00:00:00 2001
From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com>
Date: Fri, 26 Apr 2024 13:06:48 +0800
Subject: [PATCH 046/163] Implement HLL with 128 buckets to support statistics
 cache. (#34124)

---
 .../java/org/apache/doris/common/io/Hll.java  |  15 +-
 .../org/apache/doris/common/io/HllTest.java   |   4 +-
 .../apache/doris/statistics/util/Hll128.java  | 214 ++++++++++++++++++
 .../doris/statistics/util/Hll128Test.java     | 204 +++++++++++++++++
 4 files changed, 432 insertions(+), 5 deletions(-)
 create mode 100644 fe/fe-core/src/main/java/org/apache/doris/statistics/util/Hll128.java
 create mode 100644 fe/fe-core/src/test/java/org/apache/doris/statistics/util/Hll128Test.java

diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/io/Hll.java b/fe/fe-common/src/main/java/org/apache/doris/common/io/Hll.java
index b00912598d1220..f70df61749855d 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/io/Hll.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/io/Hll.java
@@ -46,7 +46,7 @@ public class Hll {
 
     public static final int HLL_COLUMN_PRECISION = 14;
     public static final int HLL_ZERO_COUNT_BITS = (64 - HLL_COLUMN_PRECISION);
-    public static final int HLL_EXPLICLIT_INT64_NUM = 160;
+    public static final int HLL_EXPLICIT_INT64_NUM = 160;
     public static final int HLL_SPARSE_THRESHOLD = 4096;
     public static final int HLL_REGISTERS_COUNT = 16 * 1024;
 
@@ -122,7 +122,7 @@ public void update(long hashValue) {
                 type = HLL_DATA_EXPLICIT;
                 break;
             case HLL_DATA_EXPLICIT:
-                if (hashSet.size() < HLL_EXPLICLIT_INT64_NUM) {
+                if (hashSet.size() < HLL_EXPLICIT_INT64_NUM) {
                     hashSet.add(hashValue);
                     break;
                 }
@@ -157,7 +157,7 @@ public void merge(Hll other) {
                 switch (other.type) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
                     case HLL_DATA_EXPLICIT:
                         this.hashSet.addAll(other.hashSet);
-                        if (this.hashSet.size() > HLL_EXPLICLIT_INT64_NUM) {
+                        if (this.hashSet.size() > HLL_EXPLICIT_INT64_NUM) {
                             convertExplicitToRegister();
                             this.type = HLL_DATA_FULL;
                         }
@@ -393,4 +393,13 @@ public int getType() {
         return type;
     }
 
+    // For convert to statistics used Hll128
+    public byte[] getRegisters() {
+        return registers;
+    }
+
+    // For convert to statistics used Hll128
+    public Set<Long> getHashSet() {
+        return hashSet;
+    }
 }
diff --git a/fe/fe-common/src/test/java/org/apache/doris/common/io/HllTest.java b/fe/fe-common/src/test/java/org/apache/doris/common/io/HllTest.java
index fabc7c1f8da70f..94333f255a657d 100644
--- a/fe/fe-common/src/test/java/org/apache/doris/common/io/HllTest.java
+++ b/fe/fe-common/src/test/java/org/apache/doris/common/io/HllTest.java
@@ -55,11 +55,11 @@ public void hllBasicTest() throws IOException {
 
         // test explicit
         Hll explicitHll = new Hll();
-        for (int i = 0; i < Hll.HLL_EXPLICLIT_INT64_NUM; i++) {
+        for (int i = 0; i < Hll.HLL_EXPLICIT_INT64_NUM; i++) {
             explicitHll.updateWithHash(i);
         }
         Assert.assertTrue(explicitHll.getType() == Hll.HLL_DATA_EXPLICIT);
-        Assert.assertTrue(explicitHll.estimateCardinality() == Hll.HLL_EXPLICLIT_INT64_NUM);
+        Assert.assertTrue(explicitHll.estimateCardinality() == Hll.HLL_EXPLICIT_INT64_NUM);
 
         ByteArrayOutputStream explicitOutputStream = new ByteArrayOutputStream();
         DataOutput explicitOutput = new DataOutputStream(explicitOutputStream);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/Hll128.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/Hll128.java
new file mode 100644
index 00000000000000..094cff4b7d9def
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/Hll128.java
@@ -0,0 +1,214 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics.util;
+
+import org.apache.doris.common.io.Hll;
+
+import java.math.BigInteger;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * This is an HLL implementation with 128 Buckets.
+ * Mainly used for statistics partition ndv cache.
+ * We can convert the org.apache.doris.common.io.Hll object with 16K buckets to Hll128 to reduce memory consumption.
+ */
+public class Hll128 {
+
+    public static final byte HLL_DATA_EMPTY = 0;
+    public static final byte HLL_DATA_EXPLICIT = 1;
+    public static final byte HLL_DATA_FULL = 3;
+
+    public static final int HLL_COLUMN_PRECISION = 7;
+    public static final int HLL_ZERO_COUNT_BITS = (64 - HLL_COLUMN_PRECISION);
+    public static final int HLL_EXPLICLIT_INT64_NUM = 160;
+    public static final int HLL_REGISTERS_COUNT = 128;
+
+    private int type;
+    private Set<Long> hashSet;
+    private byte[] registers;
+
+    public Hll128() {
+        type = Hll.HLL_DATA_EMPTY;
+        this.hashSet = new HashSet<>();
+    }
+
+    private void convertExplicitToRegister() {
+        assert this.type == HLL_DATA_EXPLICIT;
+        registers = new byte[HLL_REGISTERS_COUNT];
+        for (Long value : hashSet) {
+            updateRegisters(value);
+        }
+        hashSet.clear();
+    }
+
+    private void updateRegisters(long hashValue) {
+        int idx;
+        // hash value less than zero means we get a unsigned long
+        // so need to transfer to BigInter to mod
+        if (hashValue < 0) {
+            BigInteger unint64HashValue = new BigInteger(Long.toUnsignedString(hashValue));
+            unint64HashValue = unint64HashValue.mod(new BigInteger(Long.toUnsignedString(HLL_REGISTERS_COUNT)));
+            idx = unint64HashValue.intValue();
+        } else {
+            idx = (int) (hashValue % HLL_REGISTERS_COUNT);
+        }
+
+        hashValue >>>= HLL_COLUMN_PRECISION;
+        hashValue |= (1L << HLL_ZERO_COUNT_BITS);
+        byte firstOneBit = (byte) (Hll.getLongTailZeroNum(hashValue) + 1);
+        registers[idx] = registers[idx] > firstOneBit ? registers[idx] : firstOneBit;
+    }
+
+    private void mergeRegisters(byte[] other) {
+        for (int i = 0; i < HLL_REGISTERS_COUNT; i++) {
+            this.registers[i] = this.registers[i] > other[i] ? this.registers[i] : other[i];
+        }
+    }
+
+    public void update(long hashValue) {
+        switch (this.type) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+            case HLL_DATA_EMPTY:
+                hashSet.add(hashValue);
+                type = HLL_DATA_EXPLICIT;
+                break;
+            case HLL_DATA_EXPLICIT:
+                if (hashSet.size() < HLL_EXPLICLIT_INT64_NUM) {
+                    hashSet.add(hashValue);
+                    break;
+                }
+                convertExplicitToRegister();
+                type = HLL_DATA_FULL;
+            case HLL_DATA_FULL:  // CHECKSTYLE IGNORE THIS LINE: fall through
+                updateRegisters(hashValue);
+                break;
+        }
+    }
+
+    public void merge(Hll128 other) {
+        if (other.type == HLL_DATA_EMPTY) {
+            return;
+        }
+        switch (this.type) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+            case HLL_DATA_EMPTY:
+                this.type = other.type;
+                switch (other.type) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+                    case HLL_DATA_EXPLICIT:
+                        this.hashSet.addAll(other.hashSet);
+                        break;
+                    case HLL_DATA_FULL:
+                        this.registers = new byte[HLL_REGISTERS_COUNT];
+                        System.arraycopy(other.registers, 0, this.registers, 0, HLL_REGISTERS_COUNT);
+                        break;
+                }
+                break;
+            case HLL_DATA_EXPLICIT:
+                switch (other.type) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+                    case HLL_DATA_EXPLICIT:
+                        this.hashSet.addAll(other.hashSet);
+                        if (this.hashSet.size() > HLL_EXPLICLIT_INT64_NUM) {
+                            convertExplicitToRegister();
+                            this.type = HLL_DATA_FULL;
+                        }
+                        break;
+                    case HLL_DATA_FULL:
+                        convertExplicitToRegister();
+                        mergeRegisters(other.registers);
+                        this.type = HLL_DATA_FULL;
+                        break;
+                }
+                break;
+            case HLL_DATA_FULL:
+                switch (other.type) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+                    case HLL_DATA_EXPLICIT:
+                        for (long value : other.hashSet) {
+                            update(value);
+                        }
+                        break;
+                    case HLL_DATA_FULL:
+                        mergeRegisters(other.registers);
+                        break;
+                }
+                break;
+        }
+    }
+
+    // use strictfp to force java follow IEEE 754 to deal float point strictly
+    public strictfp long estimateCardinality() {
+        if (type == HLL_DATA_EMPTY) {
+            return 0;
+        }
+        if (type == HLL_DATA_EXPLICIT) {
+            return hashSet.size();
+        }
+
+        int numStreams = HLL_REGISTERS_COUNT;
+        float alpha = 0.7213f / (1 + 1.079f / numStreams);
+        float harmonicMean = 0;
+        int numZeroRegisters = 0;
+
+        for (int i = 0; i < HLL_REGISTERS_COUNT; i++) {
+            harmonicMean += Math.pow(2.0f, -registers[i]);
+
+            if (registers[i] == 0) {
+                numZeroRegisters++;
+            }
+        }
+
+        harmonicMean = 1.0f / harmonicMean;
+        double estimate = alpha * numStreams * numStreams * harmonicMean;
+
+        if (estimate <= numStreams * 2.5 && numZeroRegisters != 0) {
+            estimate = numStreams * Math.log(((float) numStreams) / ((float) numZeroRegisters));
+        }
+
+        return (long) (estimate + 0.5);
+    }
+
+    public int getType() {
+        return type;
+    }
+
+    public static Hll128 fromHll(Hll hll) {
+        Hll128 hll128 = new Hll128();
+        if (hll == null || hll.getType() == Hll.HLL_DATA_EMPTY) {
+            return hll128;
+        }
+        if (hll.getType() == Hll.HLL_DATA_EXPLICIT) {
+            hll128.type = HLL_DATA_EXPLICIT;
+            hll128.hashSet.addAll(hll.getHashSet());
+            return hll128;
+        }
+
+        byte[] registers = hll.getRegisters();
+        byte[] registers128 = new byte[HLL_REGISTERS_COUNT];
+        int groupSize = Hll.HLL_REGISTERS_COUNT / HLL_REGISTERS_COUNT;
+        for (int i = 0; i < HLL_REGISTERS_COUNT; i++) {
+            for (int j = 0; j < groupSize; j++) {
+                registers128[i] = registers128[i] < registers[i * groupSize + j]
+                    ? registers[i * groupSize + j]
+                    : registers128[i];
+            }
+        }
+        hll128.registers = registers128;
+        hll128.type = HLL_DATA_FULL;
+        return hll128;
+    }
+
+}
+
diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/Hll128Test.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/Hll128Test.java
new file mode 100644
index 00000000000000..d692b90ce3171b
--- /dev/null
+++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/Hll128Test.java
@@ -0,0 +1,204 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.statistics.util;
+
+import org.apache.doris.common.io.Hll;
+
+import org.apache.commons.codec.binary.StringUtils;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.IOException;
+
+public class Hll128Test {
+
+    @Test
+    public void basicTest() {
+        // test empty
+        Hll128 emptyHll = new Hll128();
+        Assert.assertEquals(Hll128.HLL_DATA_EMPTY, emptyHll.getType());
+        Assert.assertEquals(0, emptyHll.estimateCardinality());
+
+        // test explicit
+        Hll128 explicitHll = new Hll128();
+        for (int i = 0; i < Hll.HLL_EXPLICIT_INT64_NUM; i++) {
+            explicitHll.update(i);
+        }
+        Assert.assertEquals(Hll128.HLL_DATA_EXPLICIT, explicitHll.getType());
+        Assert.assertEquals(Hll.HLL_EXPLICIT_INT64_NUM, explicitHll.estimateCardinality());
+
+        // test full
+        Hll128 fullHll = new Hll128();
+        for (int i = 1; i <= Short.MAX_VALUE; i++) {
+            byte[] v = StringUtils.getBytesUtf8(String.valueOf(i));
+            fullHll.update(Hll.hash64(v, v.length, Hll.SEED));
+        }
+        Assert.assertEquals(Hll.HLL_DATA_FULL, fullHll.getType());
+        Assert.assertEquals(33141, fullHll.estimateCardinality());
+        Assert.assertTrue(fullHll.estimateCardinality() > Short.MAX_VALUE * (1 - 0.1)
+                && fullHll.estimateCardinality() < Short.MAX_VALUE * (1 + 0.1));
+
+    }
+
+    @Test
+    public void testFromHll() throws IOException {
+        // test empty
+        Hll emptyHll = new Hll();
+        Hll128 hll128 = Hll128.fromHll(emptyHll);
+        Assert.assertEquals(Hll128.HLL_DATA_EMPTY, hll128.getType());
+        Assert.assertEquals(0, hll128.estimateCardinality());
+
+        // test explicit
+        Hll explicitHll = new Hll();
+        for (int i = 0; i < Hll.HLL_EXPLICIT_INT64_NUM; i++) {
+            explicitHll.updateWithHash(i);
+        }
+        hll128 = Hll128.fromHll(explicitHll);
+        Assert.assertEquals(Hll128.HLL_DATA_EXPLICIT, hll128.getType());
+        Assert.assertEquals(Hll.HLL_EXPLICIT_INT64_NUM, hll128.estimateCardinality());
+
+        // test full
+        Hll fullHll = new Hll();
+        for (int i = 0; i < 10000; i++) {
+            fullHll.updateWithHash(i);
+        }
+        hll128 = Hll128.fromHll(fullHll);
+        Assert.assertEquals(Hll128.HLL_DATA_FULL, hll128.getType());
+        Assert.assertTrue(hll128.estimateCardinality() > 9000 && hll128.estimateCardinality() < 11000);
+    }
+
+    @Test
+    public void testMerge() throws IOException {
+        // test empty merge empty
+        Hll128 empty1 = new Hll128();
+        Hll128 empty2 = new Hll128();
+        empty1.merge(empty2);
+        Assert.assertEquals(Hll128.HLL_DATA_EMPTY, empty1.getType());
+        Assert.assertEquals(0, empty1.estimateCardinality());
+
+        // test empty merge explicit
+        Hll128 empty = new Hll128();
+        Hll128 explicit = new Hll128();
+        for (int i = 1; i < Hll.HLL_EXPLICIT_INT64_NUM; i++) {
+            byte[] v = StringUtils.getBytesUtf8(String.valueOf(i));
+            explicit.update(Hll.hash64(v, v.length, Hll.SEED));
+        }
+        empty.merge(explicit);
+        Assert.assertEquals(Hll128.HLL_DATA_EXPLICIT, empty.getType());
+        Assert.assertEquals(Hll.HLL_EXPLICIT_INT64_NUM - 1, empty.estimateCardinality());
+
+        // test empty merge full
+        empty = new Hll128();
+        Hll128 full = new Hll128();
+        for (int i = 1; i < 10000; i++) {
+            byte[] v = StringUtils.getBytesUtf8(String.valueOf(i));
+            full.update(Hll.hash64(v, v.length, Hll.SEED));
+        }
+        empty.merge(full);
+        Assert.assertEquals(Hll128.HLL_DATA_FULL, empty.getType());
+        Assert.assertTrue(empty.estimateCardinality() > 9000 && empty.estimateCardinality() < 11000);
+
+        // test explicit merge empty
+        empty = new Hll128();
+        explicit = new Hll128();
+        for (int i = 1; i < Hll.HLL_EXPLICIT_INT64_NUM; i++) {
+            byte[] v = StringUtils.getBytesUtf8(String.valueOf(i));
+            explicit.update(Hll.hash64(v, v.length, Hll.SEED));
+        }
+        explicit.merge(empty);
+        Assert.assertEquals(Hll128.HLL_DATA_EXPLICIT, explicit.getType());
+        Assert.assertEquals(Hll.HLL_EXPLICIT_INT64_NUM - 1, explicit.estimateCardinality());
+
+        // test explicit merge explicit
+        Hll128 explicit1 = new Hll128();
+        Hll128 explicit2 = new Hll128();
+        for (int i = 0; i < 10; i++) {
+            byte[] v = StringUtils.getBytesUtf8(String.valueOf(i));
+            explicit1.update(Hll.hash64(v, v.length, Hll.SEED));
+        }
+        for (int i = 0; i < 30; i++) {
+            byte[] v = StringUtils.getBytesUtf8(String.valueOf(i));
+            explicit2.update(Hll.hash64(v, v.length, Hll.SEED));
+        }
+        explicit1.merge(explicit2);
+        Assert.assertEquals(Hll128.HLL_DATA_EXPLICIT, explicit1.getType());
+        Assert.assertEquals(Hll128.HLL_DATA_EXPLICIT, explicit2.getType());
+        Assert.assertEquals(30, explicit1.estimateCardinality());
+
+        explicit2 = new Hll128();
+        for (int i = 10001; i < 10000 + Hll.HLL_EXPLICIT_INT64_NUM; i++) {
+            byte[] v = StringUtils.getBytesUtf8(String.valueOf(i));
+            explicit2.update(Hll.hash64(v, v.length, Hll.SEED));
+        }
+        Assert.assertEquals(Hll128.HLL_DATA_EXPLICIT, explicit1.getType());
+        Assert.assertEquals(Hll128.HLL_DATA_EXPLICIT, explicit2.getType());
+        explicit1.merge(explicit2);
+        Assert.assertEquals(Hll128.HLL_DATA_FULL, explicit1.getType());
+        Assert.assertTrue(explicit1.estimateCardinality() > 170 && explicit1.estimateCardinality() < 210);
+
+        // Test explicit merge full
+        explicit = new Hll128();
+        for (int i = 0; i < 10; i++) {
+            byte[] v = StringUtils.getBytesUtf8(String.valueOf(i));
+            explicit.update(Hll.hash64(v, v.length, Hll.SEED));
+        }
+        full = new Hll128();
+        for (int i = 1; i < 10000; i++) {
+            byte[] v = StringUtils.getBytesUtf8(String.valueOf(i));
+            full.update(Hll.hash64(v, v.length, Hll.SEED));
+        }
+        Assert.assertEquals(Hll128.HLL_DATA_FULL, full.getType());
+        explicit.merge(full);
+        Assert.assertEquals(Hll128.HLL_DATA_FULL, explicit.getType());
+        Assert.assertTrue(explicit.estimateCardinality() > 9000 && explicit.estimateCardinality() < 11000);
+
+        // Test full merge explicit
+        explicit = new Hll128();
+        for (int i = 0; i < 10; i++) {
+            byte[] v = StringUtils.getBytesUtf8(String.valueOf(i));
+            explicit.update(Hll.hash64(v, v.length, Hll.SEED));
+        }
+        full = new Hll128();
+        for (int i = 1; i < 10000; i++) {
+            byte[] v = StringUtils.getBytesUtf8(String.valueOf(i));
+            full.update(Hll.hash64(v, v.length, Hll.SEED));
+        }
+        Assert.assertEquals(Hll128.HLL_DATA_EXPLICIT, explicit.getType());
+        full.merge(explicit);
+        Assert.assertEquals(Hll128.HLL_DATA_FULL, full.getType());
+        Assert.assertTrue(full.estimateCardinality() > 9000 && full.estimateCardinality() < 11000);
+
+        // Test full merge full
+        Hll128 full1 = new Hll128();
+        Hll128 full2 = new Hll128();
+        for (int i = 1; i < 10000; i++) {
+            byte[] v = StringUtils.getBytesUtf8(String.valueOf(i));
+            full1.update(Hll.hash64(v, v.length, Hll.SEED));
+        }
+        for (int i = 5000; i < 15000; i++) {
+            byte[] v = StringUtils.getBytesUtf8(String.valueOf(i));
+            full2.update(Hll.hash64(v, v.length, Hll.SEED));
+        }
+        Assert.assertEquals(Hll128.HLL_DATA_FULL, full1.getType());
+        Assert.assertEquals(Hll128.HLL_DATA_FULL, full2.getType());
+        full1.merge(full2);
+        Assert.assertEquals(Hll128.HLL_DATA_FULL, full1.getType());
+        Assert.assertTrue(full1.estimateCardinality() > 13500 && full1.estimateCardinality() < 16500);
+    }
+
+}

From fcc7e079dc9103de8e99c77bd3cd86d88c9c252e Mon Sep 17 00:00:00 2001
From: Qi Chen <kaka11.chen@gmail.com>
Date: Fri, 26 Apr 2024 13:43:08 +0800
Subject: [PATCH 047/163] [Test](hive-writer) Adjust test_hive_write_partitions
 regression test to resolve special characters issue with git on windows.
 (#34026)

---
 .../scripts/create_preinstalled_table.hql     |  13 +--
 .../varchar_col=varchar_value1/000000_0       | Bin
 .../varchar_col=varchar_value1/000000_0       | Bin
 .../varchar_col=varchar_value1/000000_0       | Bin
 .../hive/write/test_hive_write_partitions.out | 105 +++++++++---------
 .../write/test_hive_write_partitions.groovy   |   2 +-
 6 files changed, 58 insertions(+), 62 deletions(-)
 rename docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/all_partition_types2_parquet_snappy_src/decimal_col=-123456.789012/string_col=string_value/{binary_col=62 69 6e 61 72 79 5f 76 61 6c 75 65/date_col=2024-03-21/timestamp_col=2024-03-21 12%3A00%3A00/char_col=char_value1                                        => date_col=2024-03-21/char_col=char_value1}/varchar_col=varchar_value1/000000_0 (100%)
 rename docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/all_partition_types2_parquet_snappy_src/decimal_col=-123456.789012/string_col=string_value/{binary_col=62 69 6e 61 72 79 5f 76 61 6c 75 65/date_col=2024-03-22/timestamp_col=2024-03-22 12%3A00%3A00/char_col=char_value1                                        => date_col=2024-03-22/char_col=char_value1}/varchar_col=varchar_value1/000000_0 (100%)
 rename docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/all_partition_types2_parquet_snappy_src/decimal_col=123456.789012/string_col=string_value/{binary_col=62 69 6e 61 72 79 5f 76 61 6c 75 65/date_col=2024-03-20/timestamp_col=2024-03-20 12%3A00%3A00/char_col=char_value1                                        => date_col=2024-03-20/char_col=char_value1}/varchar_col=varchar_value1/000000_0 (100%)

diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
index dbeeab972f6b0d..0ab4a204b84ce0 100644
--- a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
+++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
@@ -2406,10 +2406,8 @@ CREATE TABLE `all_partition_types2_parquet_snappy_src`(
 PARTITIONED BY (
   `decimal_col` decimal(18,6),
   `string_col` string,
-  `binary_col` binary,
   `date_col` date,
-  `timestamp_col` timestamp,
-  `char_col` char(50),
+  `char_col` char(11),
   `varchar_col` varchar(50))
 stored as parquet
 LOCATION
@@ -2423,10 +2421,8 @@ CREATE TABLE `all_partition_types2_parquet_snappy`(
 PARTITIONED BY (
   `decimal_col` decimal(18,6),
   `string_col` string,
-  `binary_col` binary,
   `date_col` date,
-  `timestamp_col` timestamp,
-  `char_col` char(50),
+  `char_col` char(11),
   `varchar_col` varchar(50))
 stored as parquet
 TBLPROPERTIES('parquet.compression'='SNAPPY');
@@ -2437,10 +2433,9 @@ CREATE TABLE `all_partition_types2_orc_zlib`(
 PARTITIONED BY (
   `decimal_col` decimal(18,6),
   `string_col` string,
-  `binary_col` binary,
   `date_col` date,
-  `timestamp_col` timestamp,
-  `char_col` char(50),
+  `char_col` char(11),
   `varchar_col` varchar(50))
 stored as orc
 TBLPROPERTIES("orc.compress"="ZLIB");
+
diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/all_partition_types2_parquet_snappy_src/decimal_col=-123456.789012/string_col=string_value/binary_col=62 69 6e 61 72 79 5f 76 61 6c 75 65/date_col=2024-03-21/timestamp_col=2024-03-21 12%3A00%3A00/char_col=char_value1                                       /varchar_col=varchar_value1/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/all_partition_types2_parquet_snappy_src/decimal_col=-123456.789012/string_col=string_value/date_col=2024-03-21/char_col=char_value1/varchar_col=varchar_value1/000000_0
similarity index 100%
rename from docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/all_partition_types2_parquet_snappy_src/decimal_col=-123456.789012/string_col=string_value/binary_col=62 69 6e 61 72 79 5f 76 61 6c 75 65/date_col=2024-03-21/timestamp_col=2024-03-21 12%3A00%3A00/char_col=char_value1                                       /varchar_col=varchar_value1/000000_0
rename to docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/all_partition_types2_parquet_snappy_src/decimal_col=-123456.789012/string_col=string_value/date_col=2024-03-21/char_col=char_value1/varchar_col=varchar_value1/000000_0
diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/all_partition_types2_parquet_snappy_src/decimal_col=-123456.789012/string_col=string_value/binary_col=62 69 6e 61 72 79 5f 76 61 6c 75 65/date_col=2024-03-22/timestamp_col=2024-03-22 12%3A00%3A00/char_col=char_value1                                       /varchar_col=varchar_value1/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/all_partition_types2_parquet_snappy_src/decimal_col=-123456.789012/string_col=string_value/date_col=2024-03-22/char_col=char_value1/varchar_col=varchar_value1/000000_0
similarity index 100%
rename from docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/all_partition_types2_parquet_snappy_src/decimal_col=-123456.789012/string_col=string_value/binary_col=62 69 6e 61 72 79 5f 76 61 6c 75 65/date_col=2024-03-22/timestamp_col=2024-03-22 12%3A00%3A00/char_col=char_value1                                       /varchar_col=varchar_value1/000000_0
rename to docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/all_partition_types2_parquet_snappy_src/decimal_col=-123456.789012/string_col=string_value/date_col=2024-03-22/char_col=char_value1/varchar_col=varchar_value1/000000_0
diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/all_partition_types2_parquet_snappy_src/decimal_col=123456.789012/string_col=string_value/binary_col=62 69 6e 61 72 79 5f 76 61 6c 75 65/date_col=2024-03-20/timestamp_col=2024-03-20 12%3A00%3A00/char_col=char_value1                                       /varchar_col=varchar_value1/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/all_partition_types2_parquet_snappy_src/decimal_col=123456.789012/string_col=string_value/date_col=2024-03-20/char_col=char_value1/varchar_col=varchar_value1/000000_0
similarity index 100%
rename from docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/all_partition_types2_parquet_snappy_src/decimal_col=123456.789012/string_col=string_value/binary_col=62 69 6e 61 72 79 5f 76 61 6c 75 65/date_col=2024-03-20/timestamp_col=2024-03-20 12%3A00%3A00/char_col=char_value1                                       /varchar_col=varchar_value1/000000_0
rename to docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/all_partition_types2_parquet_snappy_src/decimal_col=123456.789012/string_col=string_value/date_col=2024-03-20/char_col=char_value1/varchar_col=varchar_value1/000000_0
diff --git a/regression-test/data/external_table_p0/hive/write/test_hive_write_partitions.out b/regression-test/data/external_table_p0/hive/write/test_hive_write_partitions.out
index c0e695544e2b34..1a81e07a2f3141 100644
--- a/regression-test/data/external_table_p0/hive/write/test_hive_write_partitions.out
+++ b/regression-test/data/external_table_p0/hive/write/test_hive_write_partitions.out
@@ -41,27 +41,27 @@ true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5
 7	true	127	32767	2147483647	9223372036854775807	123.45	123456.789
 
 -- !q01 --
-1	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-22	2024-03-22T12:00	char_value1                                       	varchar_value1
+1	-123456.789012	string_value	2024-03-22	char_value1	varchar_value1
 
 -- !q02 --
-1	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-22	2024-03-22T12:00	char_value1                                       	varchar_value1
-2	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-21	2024-03-21T12:00	char_value1                                       	varchar_value1
+1	-123456.789012	string_value	2024-03-22	char_value1	varchar_value1
+2	-123456.789012	string_value	2024-03-21	char_value1	varchar_value1
 
 -- !q03 --
-1	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-22	2024-03-22T12:00	char_value1                                       	varchar_value1
-2	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-21	2024-03-21T12:00	char_value1                                       	varchar_value1
-3	123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-20	2024-03-20T12:00	char_value1                                       	varchar_value1
+1	-123456.789012	string_value	2024-03-22	char_value1	varchar_value1
+2	-123456.789012	string_value	2024-03-21	char_value1	varchar_value1
+3	123456.789012	string_value	2024-03-20	char_value1	varchar_value1
 
 -- !q04 --
-1	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-22	2024-03-22T12:00	char_value1                                       	varchar_value1
-2	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-21	2024-03-21T12:00	char_value1                                       	varchar_value1
-3	123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-20	2024-03-20T12:00	char_value1                                       	varchar_value1
-3	123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-20	2024-03-20T12:00	char_value1                                       	varchar_value1
+1	-123456.789012	string_value	2024-03-22	char_value1	varchar_value1
+2	-123456.789012	string_value	2024-03-21	char_value1	varchar_value1
+3	123456.789012	string_value	2024-03-20	char_value1	varchar_value1
+3	123456.789012	string_value	2024-03-20	char_value1	varchar_value1
 
 -- !q05 --
-1	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-22	2024-03-22T12:00	char_value1                                       	varchar_value1
-2	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-21	2024-03-21T12:00	char_value1                                       	varchar_value1
-7	123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-20	2024-03-20T12:00	char_value1                                       	varchar_value1
+1	-123456.789012	string_value	2024-03-22	char_value1	varchar_value1
+2	-123456.789012	string_value	2024-03-21	char_value1	varchar_value1
+7	123456.789012	string_value	2024-03-20	char_value1	varchar_value1
 
 -- !q01 --
 1	\N	-128	\N	-2147483648	\N	-123.45	\N
@@ -130,27 +130,27 @@ true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5
 7	true	127	32767	2147483647	9223372036854775807	123.45	123456.789
 
 -- !q01 --
-1	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-22	2024-03-22T12:00	char_value1                                       	varchar_value1
+1	-123456.789012	string_value	2024-03-22	char_value1	varchar_value1
 
 -- !q02 --
-1	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-22	2024-03-22T12:00	char_value1                                       	varchar_value1
-2	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-21	2024-03-21T12:00	char_value1                                       	varchar_value1
+1	-123456.789012	string_value	2024-03-22	char_value1	varchar_value1
+2	-123456.789012	string_value	2024-03-21	char_value1	varchar_value1
 
 -- !q03 --
-1	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-22	2024-03-22T12:00	char_value1                                       	varchar_value1
-2	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-21	2024-03-21T12:00	char_value1                                       	varchar_value1
-3	123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-20	2024-03-20T12:00	char_value1                                       	varchar_value1
+1	-123456.789012	string_value	2024-03-22	char_value1	varchar_value1
+2	-123456.789012	string_value	2024-03-21	char_value1	varchar_value1
+3	123456.789012	string_value	2024-03-20	char_value1	varchar_value1
 
 -- !q04 --
-1	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-22	2024-03-22T12:00	char_value1                                       	varchar_value1
-2	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-21	2024-03-21T12:00	char_value1                                       	varchar_value1
-3	123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-20	2024-03-20T12:00	char_value1                                       	varchar_value1
-3	123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-20	2024-03-20T12:00	char_value1                                       	varchar_value1
+1	-123456.789012	string_value	2024-03-22	char_value1	varchar_value1
+2	-123456.789012	string_value	2024-03-21	char_value1	varchar_value1
+3	123456.789012	string_value	2024-03-20	char_value1	varchar_value1
+3	123456.789012	string_value	2024-03-20	char_value1	varchar_value1
 
 -- !q05 --
-1	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-22	2024-03-22T12:00	char_value1                                       	varchar_value1
-2	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-21	2024-03-21T12:00	char_value1                                       	varchar_value1
-7	123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-20	2024-03-20T12:00	char_value1                                       	varchar_value1
+1	-123456.789012	string_value	2024-03-22	char_value1	varchar_value1
+2	-123456.789012	string_value	2024-03-21	char_value1	varchar_value1
+7	123456.789012	string_value	2024-03-20	char_value1	varchar_value1
 
 -- !q01 --
 1	\N	-128	\N	-2147483648	\N	-123.45	\N
@@ -219,27 +219,27 @@ true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5
 7	true	127	32767	2147483647	9223372036854775807	123.45	123456.789
 
 -- !q01 --
-1	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-22	2024-03-22T12:00	char_value1                                       	varchar_value1
+1	-123456.789012	string_value	2024-03-22	char_value1	varchar_value1
 
 -- !q02 --
-1	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-22	2024-03-22T12:00	char_value1                                       	varchar_value1
-2	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-21	2024-03-21T12:00	char_value1                                       	varchar_value1
+1	-123456.789012	string_value	2024-03-22	char_value1	varchar_value1
+2	-123456.789012	string_value	2024-03-21	char_value1	varchar_value1
 
 -- !q03 --
-1	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-22	2024-03-22T12:00	char_value1                                       	varchar_value1
-2	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-21	2024-03-21T12:00	char_value1                                       	varchar_value1
-3	123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-20	2024-03-20T12:00	char_value1                                       	varchar_value1
+1	-123456.789012	string_value	2024-03-22	char_value1	varchar_value1
+2	-123456.789012	string_value	2024-03-21	char_value1	varchar_value1
+3	123456.789012	string_value	2024-03-20	char_value1	varchar_value1
 
 -- !q04 --
-1	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-22	2024-03-22T12:00	char_value1                                       	varchar_value1
-2	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-21	2024-03-21T12:00	char_value1                                       	varchar_value1
-3	123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-20	2024-03-20T12:00	char_value1                                       	varchar_value1
-3	123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-20	2024-03-20T12:00	char_value1                                       	varchar_value1
+1	-123456.789012	string_value	2024-03-22	char_value1	varchar_value1
+2	-123456.789012	string_value	2024-03-21	char_value1	varchar_value1
+3	123456.789012	string_value	2024-03-20	char_value1	varchar_value1
+3	123456.789012	string_value	2024-03-20	char_value1	varchar_value1
 
 -- !q05 --
-1	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-22	2024-03-22T12:00	char_value1                                       	varchar_value1
-2	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-21	2024-03-21T12:00	char_value1                                       	varchar_value1
-7	123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-20	2024-03-20T12:00	char_value1                                       	varchar_value1
+1	-123456.789012	string_value	2024-03-22	char_value1	varchar_value1
+2	-123456.789012	string_value	2024-03-21	char_value1	varchar_value1
+7	123456.789012	string_value	2024-03-20	char_value1	varchar_value1
 
 -- !q01 --
 1	\N	-128	\N	-2147483648	\N	-123.45	\N
@@ -308,27 +308,27 @@ true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5
 7	true	127	32767	2147483647	9223372036854775807	123.45	123456.789
 
 -- !q01 --
-1	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-22	2024-03-22T12:00	char_value1                                       	varchar_value1
+1	-123456.789012	string_value	2024-03-22	char_value1	varchar_value1
 
 -- !q02 --
-1	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-22	2024-03-22T12:00	char_value1                                       	varchar_value1
-2	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-21	2024-03-21T12:00	char_value1                                       	varchar_value1
+1	-123456.789012	string_value	2024-03-22	char_value1	varchar_value1
+2	-123456.789012	string_value	2024-03-21	char_value1	varchar_value1
 
 -- !q03 --
-1	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-22	2024-03-22T12:00	char_value1                                       	varchar_value1
-2	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-21	2024-03-21T12:00	char_value1                                       	varchar_value1
-3	123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-20	2024-03-20T12:00	char_value1                                       	varchar_value1
+1	-123456.789012	string_value	2024-03-22	char_value1	varchar_value1
+2	-123456.789012	string_value	2024-03-21	char_value1	varchar_value1
+3	123456.789012	string_value	2024-03-20	char_value1	varchar_value1
 
 -- !q04 --
-1	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-22	2024-03-22T12:00	char_value1                                       	varchar_value1
-2	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-21	2024-03-21T12:00	char_value1                                       	varchar_value1
-3	123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-20	2024-03-20T12:00	char_value1                                       	varchar_value1
-3	123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-20	2024-03-20T12:00	char_value1                                       	varchar_value1
+1	-123456.789012	string_value	2024-03-22	char_value1	varchar_value1
+2	-123456.789012	string_value	2024-03-21	char_value1	varchar_value1
+3	123456.789012	string_value	2024-03-20	char_value1	varchar_value1
+3	123456.789012	string_value	2024-03-20	char_value1	varchar_value1
 
 -- !q05 --
-1	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-22	2024-03-22T12:00	char_value1                                       	varchar_value1
-2	-123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-21	2024-03-21T12:00	char_value1                                       	varchar_value1
-7	123456.789012	string_value	62 69 6e 61 72 79 5f 76 61 6c 75 65	2024-03-20	2024-03-20T12:00	char_value1                                       	varchar_value1
+1	-123456.789012	string_value	2024-03-22	char_value1	varchar_value1
+2	-123456.789012	string_value	2024-03-21	char_value1	varchar_value1
+7	123456.789012	string_value	2024-03-20	char_value1	varchar_value1
 
 -- !q01 --
 1	\N	-128	\N	-2147483648	\N	-123.45	\N
@@ -354,3 +354,4 @@ true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5
 3	\N	127	\N	2147483647	\N	123.45	\N
 3	\N	127	\N	2147483647	\N	123.45	\N
 7	true	127	32767	2147483647	9223372036854775807	123.45	123456.789
+
diff --git a/regression-test/suites/external_table_p0/hive/write/test_hive_write_partitions.groovy b/regression-test/suites/external_table_p0/hive/write/test_hive_write_partitions.groovy
index 695b608e1521a5..0435a68b09a42a 100644
--- a/regression-test/suites/external_table_p0/hive/write/test_hive_write_partitions.groovy
+++ b/regression-test/suites/external_table_p0/hive/write/test_hive_write_partitions.groovy
@@ -134,7 +134,7 @@ suite("test_hive_write_partitions", "p0,external,hive,external_docker,external_d
 
         sql """
         INSERT OVERWRITE TABLE all_partition_types2_${format_compression}_${catalog_name}_q03
-        SELECT CAST(7 as INT) as id, decimal_col, string_col, binary_col, date_col, timestamp_col, char_col, varchar_col FROM all_partition_types2_parquet_snappy_src where id = 3;
+        SELECT CAST(7 as INT) as id, decimal_col, string_col, date_col, char_col, varchar_col FROM all_partition_types2_parquet_snappy_src where id = 3;
         """
         order_qt_q05 """ select * from all_partition_types2_${format_compression}_${catalog_name}_q03;
         """

From 9bffc246959af69af494b90a6fdf581bea4e5df5 Mon Sep 17 00:00:00 2001
From: Gabriel <gabrielleebuaa@gmail.com>
Date: Fri, 26 Apr 2024 13:47:43 +0800
Subject: [PATCH 048/163] [Improvement](pipeline) Use hash shuffle for 1-phase
 Agg/Analytic ope rator #34122

---
 be/src/pipeline/exec/aggregation_sink_operator.cpp    | 11 +++++++----
 be/src/pipeline/exec/aggregation_sink_operator.h      |  2 +-
 .../exec/partitioned_aggregation_sink_operator.cpp    |  6 ++++--
 .../exec/partitioned_aggregation_sink_operator.h      |  2 +-
 .../pipeline_x/pipeline_x_fragment_context.cpp        | 10 ++++++++--
 .../pipeline/pipeline_x/pipeline_x_fragment_context.h |  2 ++
 6 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/be/src/pipeline/exec/aggregation_sink_operator.cpp b/be/src/pipeline/exec/aggregation_sink_operator.cpp
index fd88b0d15218b0..6c9d27e2a2b063 100644
--- a/be/src/pipeline/exec/aggregation_sink_operator.cpp
+++ b/be/src/pipeline/exec/aggregation_sink_operator.cpp
@@ -616,7 +616,7 @@ void AggSinkLocalState::_init_hash_method(const vectorized::VExprContextSPtrs& p
 }
 
 AggSinkOperatorX::AggSinkOperatorX(ObjectPool* pool, int operator_id, const TPlanNode& tnode,
-                                   const DescriptorTbl& descs)
+                                   const DescriptorTbl& descs, bool require_bucket_distribution)
         : DataSinkOperatorX<AggSinkLocalState>(operator_id, tnode.node_id),
           _intermediate_tuple_id(tnode.agg_node.intermediate_tuple_id),
           _intermediate_tuple_desc(nullptr),
@@ -629,9 +629,12 @@ AggSinkOperatorX::AggSinkOperatorX(ObjectPool* pool, int operator_id, const TPla
           _limit(tnode.limit),
           _have_conjuncts((tnode.__isset.vconjunct && !tnode.vconjunct.nodes.empty()) ||
                           (tnode.__isset.conjuncts && !tnode.conjuncts.empty())),
-          _partition_exprs(tnode.__isset.distribute_expr_lists ? tnode.distribute_expr_lists[0]
-                                                               : std::vector<TExpr> {}),
-          _is_colocate(tnode.agg_node.__isset.is_colocate && tnode.agg_node.is_colocate),
+          _partition_exprs(require_bucket_distribution ? (tnode.__isset.distribute_expr_lists
+                                                                  ? tnode.distribute_expr_lists[0]
+                                                                  : std::vector<TExpr> {})
+                                                       : tnode.agg_node.grouping_exprs),
+          _is_colocate(tnode.agg_node.__isset.is_colocate && tnode.agg_node.is_colocate &&
+                       require_bucket_distribution),
           _agg_fn_output_row_descriptor(descs, tnode.row_tuples, tnode.nullable_tuples) {}
 
 Status AggSinkOperatorX::init(const TPlanNode& tnode, RuntimeState* state) {
diff --git a/be/src/pipeline/exec/aggregation_sink_operator.h b/be/src/pipeline/exec/aggregation_sink_operator.h
index b3ffa19d6db791..0c34acfd7dfe84 100644
--- a/be/src/pipeline/exec/aggregation_sink_operator.h
+++ b/be/src/pipeline/exec/aggregation_sink_operator.h
@@ -143,7 +143,7 @@ class AggSinkLocalState : public PipelineXSinkLocalState<AggSharedState> {
 class AggSinkOperatorX final : public DataSinkOperatorX<AggSinkLocalState> {
 public:
     AggSinkOperatorX(ObjectPool* pool, int operator_id, const TPlanNode& tnode,
-                     const DescriptorTbl& descs);
+                     const DescriptorTbl& descs, bool require_bucket_distribution);
     ~AggSinkOperatorX() override = default;
     Status init(const TDataSink& tsink) override {
         return Status::InternalError("{} should not init with TPlanNode",
diff --git a/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp b/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp
index 78079a0ddf8c98..7eb09555aa8ab4 100644
--- a/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp
+++ b/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp
@@ -122,9 +122,11 @@ void PartitionedAggSinkLocalState::update_profile(RuntimeProfile* child_profile)
 
 PartitionedAggSinkOperatorX::PartitionedAggSinkOperatorX(ObjectPool* pool, int operator_id,
                                                          const TPlanNode& tnode,
-                                                         const DescriptorTbl& descs)
+                                                         const DescriptorTbl& descs,
+                                                         bool require_bucket_distribution)
         : DataSinkOperatorX<PartitionedAggSinkLocalState>(operator_id, tnode.node_id) {
-    _agg_sink_operator = std::make_unique<AggSinkOperatorX>(pool, operator_id, tnode, descs);
+    _agg_sink_operator = std::make_unique<AggSinkOperatorX>(pool, operator_id, tnode, descs,
+                                                            require_bucket_distribution);
 }
 
 Status PartitionedAggSinkOperatorX::init(const TPlanNode& tnode, RuntimeState* state) {
diff --git a/be/src/pipeline/exec/partitioned_aggregation_sink_operator.h b/be/src/pipeline/exec/partitioned_aggregation_sink_operator.h
index 1755cd866f270f..1233f66b56294c 100644
--- a/be/src/pipeline/exec/partitioned_aggregation_sink_operator.h
+++ b/be/src/pipeline/exec/partitioned_aggregation_sink_operator.h
@@ -294,7 +294,7 @@ class PartitionedAggSinkLocalState
 class PartitionedAggSinkOperatorX : public DataSinkOperatorX<PartitionedAggSinkLocalState> {
 public:
     PartitionedAggSinkOperatorX(ObjectPool* pool, int operator_id, const TPlanNode& tnode,
-                                const DescriptorTbl& descs);
+                                const DescriptorTbl& descs, bool require_bucket_distribution);
     ~PartitionedAggSinkOperatorX() override = default;
     Status init(const TDataSink& tsink) override {
         return Status::InternalError("{} should not init with TPlanNode",
diff --git a/be/src/pipeline/pipeline_x/pipeline_x_fragment_context.cpp b/be/src/pipeline/pipeline_x/pipeline_x_fragment_context.cpp
index bf2c255a1274f0..fc0234c62904b6 100644
--- a/be/src/pipeline/pipeline_x/pipeline_x_fragment_context.cpp
+++ b/be/src/pipeline/pipeline_x/pipeline_x_fragment_context.cpp
@@ -1034,14 +1034,16 @@ Status PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
             DataSinkOperatorXPtr sink;
             if (_runtime_state->enable_agg_spill() && !tnode.agg_node.grouping_exprs.empty()) {
                 sink.reset(new PartitionedAggSinkOperatorX(pool, next_sink_operator_id(), tnode,
-                                                           descs));
+                                                           descs, _require_bucket_distribution));
             } else {
-                sink.reset(new AggSinkOperatorX(pool, next_sink_operator_id(), tnode, descs));
+                sink.reset(new AggSinkOperatorX(pool, next_sink_operator_id(), tnode, descs,
+                                                _require_bucket_distribution));
             }
             sink->set_dests_id({op->operator_id()});
             RETURN_IF_ERROR(cur_pipe->set_sink(sink));
             RETURN_IF_ERROR(cur_pipe->sink_x()->init(tnode, _runtime_state.get()));
         }
+        _require_bucket_distribution = true;
         break;
     }
     case TPlanNodeType::HASH_JOIN_NODE: {
@@ -1106,6 +1108,7 @@ Status PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
             _pipeline_parent_map.push(op->node_id(), cur_pipe);
             _pipeline_parent_map.push(op->node_id(), build_side_pipe);
         }
+        _require_bucket_distribution = true;
         break;
     }
     case TPlanNodeType::CROSS_JOIN_NODE: {
@@ -1211,6 +1214,7 @@ Status PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
         sink->set_dests_id({op->operator_id()});
         RETURN_IF_ERROR(cur_pipe->set_sink(sink));
         RETURN_IF_ERROR(cur_pipe->sink_x()->init(tnode, _runtime_state.get()));
+        _require_bucket_distribution = true;
         break;
     }
     case TPlanNodeType::INTERSECT_NODE: {
@@ -1268,6 +1272,8 @@ Status PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanN
                                      print_plan_node_type(tnode.node_type));
     }
 
+    _require_bucket_distribution = true;
+
     return Status::OK();
 }
 // NOLINTEND(readability-function-cognitive-complexity)
diff --git a/be/src/pipeline/pipeline_x/pipeline_x_fragment_context.h b/be/src/pipeline/pipeline_x/pipeline_x_fragment_context.h
index 31febc0d8aaf4d..c87f8f4f784051 100644
--- a/be/src/pipeline/pipeline_x/pipeline_x_fragment_context.h
+++ b/be/src/pipeline/pipeline_x/pipeline_x_fragment_context.h
@@ -239,6 +239,8 @@ class PipelineXFragmentContext : public PipelineFragmentContext {
 
     // Total instance num running on all BEs
     int _total_instances = -1;
+
+    bool _require_bucket_distribution = false;
 };
 
 } // namespace pipeline

From 95a1be4adf11df9f69518c467e35e989ff6ee746 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=8B=8F=E5=B0=8F=E5=88=9A?= <suxiaogang223@icloud.com>
Date: Fri, 26 Apr 2024 13:51:30 +0800
Subject: [PATCH 049/163] [opt](parquet)Skip page with offset index (#33082)

Make skip_page() in ColumnChunkReader more efficient. No more reading page headers if there are pagelocations in chunk.
---
 .../parquet/vparquet_column_chunk_reader.cpp  |  82 ++---
 .../parquet/vparquet_column_chunk_reader.h    |  11 +-
 .../format/parquet/vparquet_column_reader.cpp |   7 +-
 .../format/parquet/vparquet_column_reader.h   |  22 +-
 .../format/parquet/vparquet_group_reader.cpp  |  12 +-
 .../format/parquet/vparquet_group_reader.h    |   1 -
 .../format/parquet/vparquet_page_reader.cpp   |  15 +-
 .../format/parquet/vparquet_page_reader.h     | 123 +++++++-
 .../exec/format/parquet/vparquet_reader.cpp   |  11 +-
 .../vec/exec/format/parquet/vparquet_reader.h |   3 +-
 .../vec/exec/parquet/parquet_thrift_test.cpp  |   3 +-
 .../hive/test_hive_parquet_skip_page.out      | 289 ++++++++++++++++++
 .../hive/test_hive_parquet_skip_page.groovy   | 131 ++++++++
 13 files changed, 641 insertions(+), 69 deletions(-)
 create mode 100644 regression-test/data/external_table_p0/hive/test_hive_parquet_skip_page.out
 create mode 100644 regression-test/suites/external_table_p0/hive/test_hive_parquet_skip_page.groovy

diff --git a/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp
index 6feb9bc1025b33..af30e63d1e35d9 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.cpp
@@ -47,12 +47,14 @@ namespace doris::vectorized {
 
 ColumnChunkReader::ColumnChunkReader(io::BufferedStreamReader* reader,
                                      tparquet::ColumnChunk* column_chunk, FieldSchema* field_schema,
+                                     const tparquet::OffsetIndex* offset_index,
                                      cctz::time_zone* ctz, io::IOContext* io_ctx)
         : _field_schema(field_schema),
           _max_rep_level(field_schema->repetition_level),
           _max_def_level(field_schema->definition_level),
           _stream_reader(reader),
           _metadata(column_chunk->meta_data),
+          _offset_index(offset_index),
           //          _ctz(ctz),
           _io_ctx(io_ctx) {}
 
@@ -61,7 +63,9 @@ Status ColumnChunkReader::init() {
                                   ? _metadata.dictionary_page_offset
                                   : _metadata.data_page_offset;
     size_t chunk_size = _metadata.total_compressed_size;
-    _page_reader = std::make_unique<PageReader>(_stream_reader, _io_ctx, start_offset, chunk_size);
+    // create page reader
+    _page_reader = create_page_reader(_stream_reader, _io_ctx, start_offset, chunk_size,
+                                      _metadata.num_values, _offset_index);
     // get the block compression codec
     RETURN_IF_ERROR(get_block_compression_codec(_metadata.codec, &_block_compress_codec));
     if (_metadata.__isset.dictionary_page_offset) {
@@ -88,24 +92,27 @@ Status ColumnChunkReader::next_page() {
     if (UNLIKELY(_remaining_num_values != 0)) {
         return Status::Corruption("Should skip current page");
     }
+
     RETURN_IF_ERROR(_page_reader->next_page_header());
-    if (_page_reader->get_page_header()->type == tparquet::PageType::DICTIONARY_PAGE) {
-        // the first page maybe directory page even if _metadata.__isset.dictionary_page_offset == false,
-        // so we should parse the directory page in next_page()
-        RETURN_IF_ERROR(_decode_dict_page());
-        // parse the real first data page
-        return next_page();
-    } else if (_page_reader->get_page_header()->type == tparquet::PageType::DATA_PAGE_V2) {
-        _remaining_num_values = _page_reader->get_page_header()->data_page_header_v2.num_values;
-        _chunk_parsed_values += _remaining_num_values;
-        _state = HEADER_PARSED;
-        return Status::OK();
-    } else {
-        _remaining_num_values = _page_reader->get_page_header()->data_page_header.num_values;
-        _chunk_parsed_values += _remaining_num_values;
-        _state = HEADER_PARSED;
-        return Status::OK();
+
+    if (!_dict_checked) {
+        _dict_checked = true;
+        const tparquet::PageHeader* header;
+        RETURN_IF_ERROR(_page_reader->get_page_header(header));
+        if (header->type == tparquet::PageType::DICTIONARY_PAGE) {
+            // the first page maybe directory page even if _metadata.__isset.dictionary_page_offset == false,
+            // so we should parse the directory page in next_page()
+            RETURN_IF_ERROR(_decode_dict_page());
+            // parse the real first data page
+            return next_page();
+        }
     }
+
+    RETURN_IF_ERROR(_page_reader->get_num_values(_remaining_num_values));
+    _chunk_parsed_values += _remaining_num_values;
+    _state = HEADER_PARSED;
+
+    return Status::OK();
 }
 
 void ColumnChunkReader::_get_uncompressed_levels(const tparquet::DataPageHeaderV2& page_v2,
@@ -119,17 +126,19 @@ void ColumnChunkReader::_get_uncompressed_levels(const tparquet::DataPageHeaderV
 }
 
 Status ColumnChunkReader::load_page_data() {
+    // TODO: remove checking HEADER_PARSED or change name
     if (UNLIKELY(_state != HEADER_PARSED)) {
         return Status::Corruption("Should parse page header");
     }
-    const auto& header = *_page_reader->get_page_header();
-    int32_t uncompressed_size = header.uncompressed_page_size;
+    const tparquet::PageHeader* header;
+    RETURN_IF_ERROR(_page_reader->get_page_header(header));
+    int32_t uncompressed_size = header->uncompressed_page_size;
 
     if (_block_compress_codec != nullptr) {
         Slice compressed_data;
         RETURN_IF_ERROR(_page_reader->get_page_data(compressed_data));
-        if (header.__isset.data_page_header_v2) {
-            const tparquet::DataPageHeaderV2& header_v2 = header.data_page_header_v2;
+        if (header->__isset.data_page_header_v2) {
+            const tparquet::DataPageHeaderV2& header_v2 = header->data_page_header_v2;
             // uncompressed_size = rl + dl + uncompressed_data_size
             // compressed_size = rl + dl + compressed_data_size
             uncompressed_size -= header_v2.repetition_levels_byte_length +
@@ -137,8 +146,8 @@ Status ColumnChunkReader::load_page_data() {
             _get_uncompressed_levels(header_v2, compressed_data);
         }
         bool is_v2_compressed =
-                header.__isset.data_page_header_v2 && header.data_page_header_v2.is_compressed;
-        if (header.__isset.data_page_header || is_v2_compressed) {
+                header->__isset.data_page_header_v2 && header->data_page_header_v2.is_compressed;
+        if (header->__isset.data_page_header || is_v2_compressed) {
             // check decompressed buffer size
             _reserve_decompress_buf(uncompressed_size);
             _page_data = Slice(_decompress_buf.get(), uncompressed_size);
@@ -151,36 +160,36 @@ Status ColumnChunkReader::load_page_data() {
         }
     } else {
         RETURN_IF_ERROR(_page_reader->get_page_data(_page_data));
-        if (header.__isset.data_page_header_v2) {
-            _get_uncompressed_levels(header.data_page_header_v2, _page_data);
+        if (header->__isset.data_page_header_v2) {
+            _get_uncompressed_levels(header->data_page_header_v2, _page_data);
         }
     }
 
     // Initialize repetition level and definition level. Skip when level = 0, which means required field.
     if (_max_rep_level > 0) {
         SCOPED_RAW_TIMER(&_statistics.decode_level_time);
-        if (header.__isset.data_page_header_v2) {
+        if (header->__isset.data_page_header_v2) {
             RETURN_IF_ERROR(_rep_level_decoder.init_v2(_v2_rep_levels, _max_rep_level,
                                                        _remaining_num_values));
         } else {
             RETURN_IF_ERROR(_rep_level_decoder.init(
-                    &_page_data, header.data_page_header.repetition_level_encoding, _max_rep_level,
+                    &_page_data, header->data_page_header.repetition_level_encoding, _max_rep_level,
                     _remaining_num_values));
         }
     }
     if (_max_def_level > 0) {
         SCOPED_RAW_TIMER(&_statistics.decode_level_time);
-        if (header.__isset.data_page_header_v2) {
+        if (header->__isset.data_page_header_v2) {
             RETURN_IF_ERROR(_def_level_decoder.init_v2(_v2_def_levels, _max_def_level,
                                                        _remaining_num_values));
         } else {
             RETURN_IF_ERROR(_def_level_decoder.init(
-                    &_page_data, header.data_page_header.definition_level_encoding, _max_def_level,
+                    &_page_data, header->data_page_header.definition_level_encoding, _max_def_level,
                     _remaining_num_values));
         }
     }
-    auto encoding = header.__isset.data_page_header_v2 ? header.data_page_header_v2.encoding
-                                                       : header.data_page_header.encoding;
+    auto encoding = header->__isset.data_page_header_v2 ? header->data_page_header_v2.encoding
+                                                        : header->data_page_header.encoding;
     // change the deprecated encoding to RLE_DICTIONARY
     if (encoding == tparquet::Encoding::PLAIN_DICTIONARY) {
         encoding = tparquet::Encoding::RLE_DICTIONARY;
@@ -207,14 +216,15 @@ Status ColumnChunkReader::load_page_data() {
 }
 
 Status ColumnChunkReader::_decode_dict_page() {
-    const tparquet::PageHeader& header = *_page_reader->get_page_header();
-    DCHECK_EQ(tparquet::PageType::DICTIONARY_PAGE, header.type);
+    const tparquet::PageHeader* header;
+    RETURN_IF_ERROR(_page_reader->get_page_header(header));
+    DCHECK_EQ(tparquet::PageType::DICTIONARY_PAGE, header->type);
     SCOPED_RAW_TIMER(&_statistics.decode_dict_time);
 
     // Using the PLAIN_DICTIONARY enum value is deprecated in the Parquet 2.0 specification.
     // Prefer using RLE_DICTIONARY in a data page and PLAIN in a dictionary page for Parquet 2.0+ files.
     // refer: https://github.com/apache/parquet-format/blob/master/Encodings.md
-    tparquet::Encoding::type dict_encoding = header.dictionary_page_header.encoding;
+    tparquet::Encoding::type dict_encoding = header->dictionary_page_header.encoding;
     if (dict_encoding != tparquet::Encoding::PLAIN_DICTIONARY &&
         dict_encoding != tparquet::Encoding::PLAIN) {
         return Status::InternalError("Unsupported dictionary encoding {}",
@@ -222,7 +232,7 @@ Status ColumnChunkReader::_decode_dict_page() {
     }
 
     // Prepare dictionary data
-    int32_t uncompressed_size = header.uncompressed_page_size;
+    int32_t uncompressed_size = header->uncompressed_page_size;
     std::unique_ptr<uint8_t[]> dict_data(new uint8_t[uncompressed_size]);
     if (_block_compress_codec != nullptr) {
         Slice compressed_data;
@@ -246,7 +256,7 @@ Status ColumnChunkReader::_decode_dict_page() {
     //    page_decoder->init(_field_schema, _ctz);
     // Set the dictionary data
     RETURN_IF_ERROR(page_decoder->set_dict(dict_data, uncompressed_size,
-                                           header.dictionary_page_header.num_values));
+                                           header->dictionary_page_header.num_values));
     _decoders[static_cast<int>(tparquet::Encoding::RLE_DICTIONARY)] = std::move(page_decoder);
 
     _has_dict = true;
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.h b/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.h
index 0ca6859ac83876..79ee3cd646306c 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_column_chunk_reader.h
@@ -71,7 +71,7 @@ using ColumnString = ColumnStr<UInt32>;
  *   // Or, we can call the chunk_reader.skip_page() to skip current page.
  *   chunk_reader.load_page_data();
  *   // Decode values into column or slice.
- *   // Or, we can call chunk_reader.slip_values(num_values) to skip some values.
+ *   // Or, we can call chunk_reader.skip_values(num_values) to skip some values.
  *   chunk_reader.decode_values(slice, num_values);
  * }
  */
@@ -84,10 +84,13 @@ class ColumnChunkReader {
         int64_t decode_value_time = 0;
         int64_t decode_dict_time = 0;
         int64_t decode_level_time = 0;
+        int64_t skip_page_header_num = 0;
+        int64_t parse_page_header_num = 0;
     };
 
     ColumnChunkReader(io::BufferedStreamReader* reader, tparquet::ColumnChunk* column_chunk,
-                      FieldSchema* field_schema, cctz::time_zone* ctz, io::IOContext* io_ctx);
+                      FieldSchema* field_schema, const tparquet::OffsetIndex* offset_index,
+                      cctz::time_zone* ctz, io::IOContext* io_ctx);
     ~ColumnChunkReader() = default;
 
     // Initialize chunk reader, will generate the decoder and codec.
@@ -170,6 +173,8 @@ class ColumnChunkReader {
 
     Statistics& statistics() {
         _statistics.decode_header_time = _page_reader->statistics().decode_header_time;
+        _statistics.skip_page_header_num = _page_reader->statistics().skip_page_header_num;
+        _statistics.parse_page_header_num = _page_reader->statistics().parse_page_header_num;
         return _statistics;
     }
 
@@ -204,6 +209,7 @@ class ColumnChunkReader {
 
     io::BufferedStreamReader* _stream_reader = nullptr;
     tparquet::ColumnMetaData _metadata;
+    const tparquet::OffsetIndex* _offset_index;
     //    cctz::time_zone* _ctz;
     io::IOContext* _io_ctx = nullptr;
 
@@ -219,6 +225,7 @@ class ColumnChunkReader {
     size_t _decompress_buf_size = 0;
     Slice _v2_rep_levels;
     Slice _v2_def_levels;
+    bool _dict_checked = false;
     bool _has_dict = false;
     Decoder* _page_decoder = nullptr;
     // Map: encoding -> Decoder
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
index 2a3782ab44944c..85d03daebc5609 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
@@ -108,7 +108,7 @@ Status ParquetColumnReader::create(io::FileReaderSPtr file, FieldSchema* field,
                                    const std::vector<RowRange>& row_ranges, cctz::time_zone* ctz,
                                    io::IOContext* io_ctx,
                                    std::unique_ptr<ParquetColumnReader>& reader,
-                                   size_t max_buf_size) {
+                                   size_t max_buf_size, const tparquet::OffsetIndex* offset_index) {
     if (field->type.type == TYPE_ARRAY) {
         std::unique_ptr<ParquetColumnReader> element_reader;
         RETURN_IF_ERROR(create(file, &field->children[0], row_group, row_ranges, ctz, io_ctx,
@@ -144,7 +144,8 @@ Status ParquetColumnReader::create(io::FileReaderSPtr file, FieldSchema* field,
         reader.reset(struct_reader.release());
     } else {
         const tparquet::ColumnChunk& chunk = row_group.columns[field->physical_column_index];
-        auto scalar_reader = ScalarColumnReader::create_unique(row_ranges, chunk, ctz, io_ctx);
+        auto scalar_reader =
+                ScalarColumnReader::create_unique(row_ranges, chunk, offset_index, ctz, io_ctx);
         RETURN_IF_ERROR(scalar_reader->init(file, field, max_buf_size));
         reader.reset(scalar_reader.release());
     }
@@ -190,7 +191,7 @@ Status ScalarColumnReader::init(io::FileReaderSPtr file, FieldSchema* field, siz
     _stream_reader = std::make_unique<io::BufferedFileStreamReader>(file, chunk_start, chunk_len,
                                                                     prefetch_buffer_size);
     _chunk_reader = std::make_unique<ColumnChunkReader>(_stream_reader.get(), &_chunk_meta, field,
-                                                        _ctz, _io_ctx);
+                                                        _offset_index, _ctz, _io_ctx);
     RETURN_IF_ERROR(_chunk_reader->init());
     return Status::OK();
 }
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.h b/be/src/vec/exec/format/parquet/vparquet_column_reader.h
index d12eac2f383d58..f0eadb8bcd61c5 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.h
@@ -65,7 +65,9 @@ class ParquetColumnReader {
                   decode_value_time(0),
                   decode_dict_time(0),
                   decode_level_time(0),
-                  decode_null_map_time(0) {}
+                  decode_null_map_time(0),
+                  skip_page_header_num(0),
+                  parse_page_header_num(0) {}
 
         Statistics(io::BufferedStreamReader::Statistics& fs, ColumnChunkReader::Statistics& cs,
                    int64_t null_map_time)
@@ -79,7 +81,9 @@ class ParquetColumnReader {
                   decode_value_time(cs.decode_value_time),
                   decode_dict_time(cs.decode_dict_time),
                   decode_level_time(cs.decode_level_time),
-                  decode_null_map_time(null_map_time) {}
+                  decode_null_map_time(null_map_time),
+                  skip_page_header_num(cs.skip_page_header_num),
+                  parse_page_header_num(cs.parse_page_header_num) {}
 
         int64_t read_time;
         int64_t read_calls;
@@ -92,6 +96,8 @@ class ParquetColumnReader {
         int64_t decode_dict_time;
         int64_t decode_level_time;
         int64_t decode_null_map_time;
+        int64_t skip_page_header_num;
+        int64_t parse_page_header_num;
 
         void merge(Statistics& statistics) {
             read_time += statistics.read_time;
@@ -105,6 +111,8 @@ class ParquetColumnReader {
             decode_dict_time += statistics.decode_dict_time;
             decode_level_time += statistics.decode_level_time;
             decode_null_map_time += statistics.decode_null_map_time;
+            skip_page_header_num += statistics.skip_page_header_num;
+            parse_page_header_num += statistics.parse_page_header_num;
         }
     };
 
@@ -134,7 +142,7 @@ class ParquetColumnReader {
                          const tparquet::RowGroup& row_group,
                          const std::vector<RowRange>& row_ranges, cctz::time_zone* ctz,
                          io::IOContext* io_ctx, std::unique_ptr<ParquetColumnReader>& reader,
-                         size_t max_buf_size);
+                         size_t max_buf_size, const tparquet::OffsetIndex* offset_index = nullptr);
     void set_nested_column() { _nested_column = true; }
     virtual const std::vector<level_t>& get_rep_level() const = 0;
     virtual const std::vector<level_t>& get_def_level() const = 0;
@@ -160,9 +168,12 @@ class ScalarColumnReader : public ParquetColumnReader {
     ENABLE_FACTORY_CREATOR(ScalarColumnReader)
 public:
     ScalarColumnReader(const std::vector<RowRange>& row_ranges,
-                       const tparquet::ColumnChunk& chunk_meta, cctz::time_zone* ctz,
+                       const tparquet::ColumnChunk& chunk_meta,
+                       const tparquet::OffsetIndex* offset_index, cctz::time_zone* ctz,
                        io::IOContext* io_ctx)
-            : ParquetColumnReader(row_ranges, ctz, io_ctx), _chunk_meta(chunk_meta) {}
+            : ParquetColumnReader(row_ranges, ctz, io_ctx),
+              _chunk_meta(chunk_meta),
+              _offset_index(offset_index) {}
     ~ScalarColumnReader() override { close(); }
     Status init(io::FileReaderSPtr file, FieldSchema* field, size_t max_buf_size);
     Status read_column_data(ColumnPtr& doris_column, DataTypePtr& type,
@@ -182,6 +193,7 @@ class ScalarColumnReader : public ParquetColumnReader {
 
 private:
     tparquet::ColumnChunk _chunk_meta;
+    const tparquet::OffsetIndex* _offset_index;
     std::unique_ptr<io::BufferedFileStreamReader> _stream_reader;
     std::unique_ptr<ColumnChunkReader> _chunk_reader;
     std::vector<level_t> _rep_levels;
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
index 3f8000c3173e1c..335207070dd367 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
@@ -39,7 +39,6 @@
 #include "runtime/thread_context.h"
 #include "runtime/types.h"
 #include "schema_desc.h"
-#include "util/simd/bits.h"
 #include "vec/columns/column_const.h"
 #include "vec/columns/column_nullable.h"
 #include "vec/columns/column_string.h"
@@ -124,12 +123,17 @@ Status RowGroupReader::init(
     const size_t MAX_GROUP_BUF_SIZE = config::parquet_rowgroup_max_buffer_mb << 20;
     const size_t MAX_COLUMN_BUF_SIZE = config::parquet_column_max_buffer_mb << 20;
     size_t max_buf_size = std::min(MAX_COLUMN_BUF_SIZE, MAX_GROUP_BUF_SIZE / _read_columns.size());
-    for (auto& read_col : _read_columns) {
-        auto field = const_cast<FieldSchema*>(schema.get_column(read_col));
+    for (const auto& read_col : _read_columns) {
+        auto* field = const_cast<FieldSchema*>(schema.get_column(read_col));
+        auto physical_index = field->physical_column_index;
         std::unique_ptr<ParquetColumnReader> reader;
+        // TODO : support rested column types
+        const tparquet::OffsetIndex* offset_index =
+                col_offsets.find(physical_index) != col_offsets.end() ? &col_offsets[physical_index]
+                                                                      : nullptr;
         RETURN_IF_ERROR(ParquetColumnReader::create(_file_reader, field, _row_group_meta,
                                                     _read_ranges, _ctz, _io_ctx, reader,
-                                                    max_buf_size));
+                                                    max_buf_size, offset_index));
         if (reader == nullptr) {
             VLOG_DEBUG << "Init row group(" << _row_group_id << ") reader failed";
             return Status::Corruption("Init row group reader failed");
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.h b/be/src/vec/exec/format/parquet/vparquet_group_reader.h
index 128a7450554327..d38f5a74adf3d7 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.h
@@ -29,7 +29,6 @@
 
 #include "io/fs/file_reader_writer_fwd.h"
 #include "vec/columns/column.h"
-#include "vec/common/allocator.h"
 #include "vec/exec/format/parquet/parquet_common.h"
 #include "vec/exprs/vexpr_fwd.h"
 #include "vparquet_column_reader.h"
diff --git a/be/src/vec/exec/format/parquet/vparquet_page_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_page_reader.cpp
index 3b4e18c27da7aa..a321e77c69289c 100644
--- a/be/src/vec/exec/format/parquet/vparquet_page_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_page_reader.cpp
@@ -40,11 +40,23 @@ namespace doris::vectorized {
 
 static constexpr size_t INIT_PAGE_HEADER_SIZE = 128;
 
+std::unique_ptr<PageReader> create_page_reader(io::BufferedStreamReader* reader,
+                                               io::IOContext* io_ctx, uint64_t offset,
+                                               uint64_t length, int64_t num_values,
+                                               const tparquet::OffsetIndex* offset_index) {
+    if (offset_index) {
+        return std::make_unique<PageReaderWithOffsetIndex>(reader, io_ctx, offset, length,
+                                                           num_values, offset_index);
+    } else {
+        return std::make_unique<PageReader>(reader, io_ctx, offset, length);
+    }
+}
+
 PageReader::PageReader(io::BufferedStreamReader* reader, io::IOContext* io_ctx, uint64_t offset,
                        uint64_t length)
         : _reader(reader), _io_ctx(io_ctx), _start_offset(offset), _end_offset(offset + length) {}
 
-Status PageReader::next_page_header() {
+Status PageReader::_parse_page_header() {
     if (UNLIKELY(_offset < _start_offset || _offset >= _end_offset)) {
         return Status::IOError("Out-of-bounds Access");
     }
@@ -82,6 +94,7 @@ Status PageReader::next_page_header() {
         header_size <<= 2;
     }
 
+    _statistics.parse_page_header_num++;
     _offset += real_header_size;
     _next_header_offset = _offset + _cur_page_header.compressed_page_size;
     _state = HEADER_PARSED;
diff --git a/be/src/vec/exec/format/parquet/vparquet_page_reader.h b/be/src/vec/exec/format/parquet/vparquet_page_reader.h
index bdd0a8d0f5ff24..5765df4fc1f389 100644
--- a/be/src/vec/exec/format/parquet/vparquet_page_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_page_reader.h
@@ -20,6 +20,8 @@
 #include <gen_cpp/parquet_types.h>
 #include <stdint.h>
 
+#include <memory>
+
 #include "common/status.h"
 
 namespace doris {
@@ -39,11 +41,13 @@ class PageReader {
 public:
     struct Statistics {
         int64_t decode_header_time = 0;
+        int64_t skip_page_header_num = 0;
+        int64_t parse_page_header_num = 0;
     };
 
     PageReader(io::BufferedStreamReader* reader, io::IOContext* io_ctx, uint64_t offset,
                uint64_t length);
-    ~PageReader() = default;
+    virtual ~PageReader() = default;
 
     // Deprecated
     // Parquet file may not be standardized,
@@ -52,13 +56,31 @@ class PageReader {
     // [[deprecated]]
     bool has_next_page() const { return _offset < _end_offset; }
 
-    Status next_page_header();
+    virtual Status next_page_header() { return _parse_page_header(); }
 
-    Status skip_page();
+    virtual Status get_page_header(const tparquet::PageHeader*& page_header) {
+        if (UNLIKELY(_state != HEADER_PARSED)) {
+            return Status::InternalError("Page header not parsed");
+        }
+        page_header = &_cur_page_header;
+        return Status::OK();
+    }
 
-    const tparquet::PageHeader* get_page_header() const { return &_cur_page_header; }
+    virtual Status get_num_values(uint32_t& num_values) {
+        if (_state != HEADER_PARSED) {
+            return Status::InternalError("Page header not parsed");
+        }
+        if (_cur_page_header.type == tparquet::PageType::DATA_PAGE_V2) {
+            num_values = _cur_page_header.data_page_header_v2.num_values;
+        } else {
+            num_values = _cur_page_header.data_page_header.num_values;
+        }
+        return Status::OK();
+    }
 
-    Status get_page_data(Slice& slice);
+    virtual Status skip_page();
+
+    virtual Status get_page_data(Slice& slice);
 
     Statistics& statistics() { return _statistics; }
 
@@ -68,20 +90,99 @@ class PageReader {
         _state = INITIALIZED;
     }
 
-private:
+protected:
     enum PageReaderState { INITIALIZED, HEADER_PARSED };
-
-    io::BufferedStreamReader* _reader = nullptr;
-    io::IOContext* _io_ctx = nullptr;
+    PageReaderState _state = INITIALIZED;
     tparquet::PageHeader _cur_page_header;
     Statistics _statistics;
-    PageReaderState _state = INITIALIZED;
 
+    Status _parse_page_header();
+
+private:
+    io::BufferedStreamReader* _reader = nullptr;
+    io::IOContext* _io_ctx = nullptr;
     uint64_t _offset = 0;
     uint64_t _next_header_offset = 0;
-
     uint64_t _start_offset = 0;
     uint64_t _end_offset = 0;
 };
 
+class PageReaderWithOffsetIndex : public PageReader {
+public:
+    PageReaderWithOffsetIndex(io::BufferedStreamReader* reader, io::IOContext* io_ctx,
+                              uint64_t offset, uint64_t length, int64_t num_values,
+                              const tparquet::OffsetIndex* offset_index)
+            : PageReader(reader, io_ctx, offset, length),
+              _num_values(num_values),
+              _offset_index(offset_index) {}
+
+    Status next_page_header() override {
+        // lazy to parse page header in get_page_header
+        return Status::OK();
+    }
+
+    Status get_page_header(const tparquet::PageHeader*& page_header) override {
+        if (_state != HEADER_PARSED) {
+            RETURN_IF_ERROR(_parse_page_header());
+        }
+        page_header = &_cur_page_header;
+        return Status::OK();
+    }
+
+    Status get_num_values(uint32_t& num_values) override {
+        if (UNLIKELY(_page_index >= _offset_index->page_locations.size())) {
+            return Status::IOError("End of page");
+        }
+
+        if (_page_index < _offset_index->page_locations.size() - 1) {
+            num_values = _offset_index->page_locations[_page_index + 1].first_row_index -
+                         _offset_index->page_locations[_page_index].first_row_index;
+        } else {
+            num_values = _num_values - _offset_index->page_locations[_page_index].first_row_index;
+        }
+        return Status::OK();
+    }
+
+    Status skip_page() override {
+        if (UNLIKELY(_page_index >= _offset_index->page_locations.size())) {
+            return Status::IOError("End of page");
+        }
+
+        if (_state != HEADER_PARSED) {
+            _statistics.skip_page_header_num++;
+        }
+
+        seek_to_page(_offset_index->page_locations[_page_index].offset +
+                     _offset_index->page_locations[_page_index].compressed_page_size);
+        _page_index++;
+        return Status::OK();
+    }
+
+    Status get_page_data(Slice& slice) override {
+        if (_page_index >= _offset_index->page_locations.size()) {
+            return Status::IOError("End of page");
+        }
+        if (_state != HEADER_PARSED) {
+            RETURN_IF_ERROR(_parse_page_header());
+        }
+
+        // dirctionary page is not in page location
+        if (LIKELY(_cur_page_header.type != tparquet::PageType::DICTIONARY_PAGE)) {
+            _page_index++;
+        }
+
+        return PageReader::get_page_data(slice);
+    }
+
+private:
+    size_t _page_index = 0;
+    int64_t _num_values = 0;
+    const tparquet::OffsetIndex* _offset_index;
+};
+
+std::unique_ptr<PageReader> create_page_reader(io::BufferedStreamReader* reader,
+                                               io::IOContext* io_ctx, uint64_t offset,
+                                               uint64_t length, int64_t num_values = 0,
+                                               const tparquet::OffsetIndex* offset_index = nullptr);
+
 } // namespace doris::vectorized
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index 632bc9bcda335d..ded745f5a5c95a 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -23,18 +23,14 @@
 #include <glog/logging.h>
 
 #include <functional>
-#include <ostream>
 #include <utility>
 
 #include "common/status.h"
 #include "exec/schema_scanner.h"
-#include "gen_cpp/descriptors.pb.h"
-#include "gtest/gtest_pred_impl.h"
 #include "io/file_factory.h"
 #include "io/fs/buffered_reader.h"
 #include "io/fs/file_reader.h"
 #include "io/fs/file_reader_writer_fwd.h"
-#include "olap/olap_common.h"
 #include "parquet_pred_cmp.h"
 #include "parquet_thrift_util.h"
 #include "runtime/define_primitive_type.h"
@@ -170,6 +166,10 @@ void ParquetReader::_init_profile() {
                 ADD_CHILD_TIMER_WITH_LEVEL(_profile, "DecodeLevelTime", parquet_profile, 1);
         _parquet_profile.decode_null_map_time =
                 ADD_CHILD_TIMER_WITH_LEVEL(_profile, "DecodeNullMapTime", parquet_profile, 1);
+        _parquet_profile.skip_page_header_num = ADD_CHILD_COUNTER_WITH_LEVEL(
+                _profile, "SkipPageHeaderNum", TUnit::UNIT, parquet_profile, 1);
+        _parquet_profile.parse_page_header_num = ADD_CHILD_COUNTER_WITH_LEVEL(
+                _profile, "ParsePageHeaderNum", TUnit::UNIT, parquet_profile, 1);
     }
 }
 
@@ -921,6 +921,9 @@ void ParquetReader::_collect_profile() {
     COUNTER_UPDATE(_parquet_profile.page_index_filter_time, _statistics.page_index_filter_time);
     COUNTER_UPDATE(_parquet_profile.row_group_filter_time, _statistics.row_group_filter_time);
 
+    COUNTER_UPDATE(_parquet_profile.skip_page_header_num, _column_statistics.skip_page_header_num);
+    COUNTER_UPDATE(_parquet_profile.parse_page_header_num,
+                   _column_statistics.parse_page_header_num);
     COUNTER_UPDATE(_parquet_profile.file_read_time, _column_statistics.read_time);
     COUNTER_UPDATE(_parquet_profile.file_read_calls, _column_statistics.read_calls);
     COUNTER_UPDATE(_parquet_profile.file_meta_read_calls, _column_statistics.meta_read_calls);
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.h b/be/src/vec/exec/format/parquet/vparquet_reader.h
index d5e2ffe8ee0f54..416004f90a0ac5 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.h
@@ -22,7 +22,6 @@
 #include <stdint.h>
 
 #include <list>
-#include <map>
 #include <memory>
 #include <string>
 #include <tuple>
@@ -183,6 +182,8 @@ class ParquetReader : public GenericReader {
         RuntimeProfile::Counter* decode_dict_time = nullptr;
         RuntimeProfile::Counter* decode_level_time = nullptr;
         RuntimeProfile::Counter* decode_null_map_time = nullptr;
+        RuntimeProfile::Counter* skip_page_header_num = nullptr;
+        RuntimeProfile::Counter* parse_page_header_num = nullptr;
     };
 
     Status _open_file();
diff --git a/be/test/vec/exec/parquet/parquet_thrift_test.cpp b/be/test/vec/exec/parquet/parquet_thrift_test.cpp
index 26da99607b6cba..741bb242bfdae1 100644
--- a/be/test/vec/exec/parquet/parquet_thrift_test.cpp
+++ b/be/test/vec/exec/parquet/parquet_thrift_test.cpp
@@ -205,7 +205,8 @@ static Status get_column_values(io::FileReaderSPtr file_reader, tparquet::Column
 
     io::BufferedFileStreamReader stream_reader(file_reader, start_offset, chunk_size, 1024);
 
-    ColumnChunkReader chunk_reader(&stream_reader, column_chunk, field_schema, &ctz, nullptr);
+    ColumnChunkReader chunk_reader(&stream_reader, column_chunk, field_schema, nullptr, &ctz,
+                                   nullptr);
     // initialize chunk reader
     static_cast<void>(chunk_reader.init());
     // seek to next page header
diff --git a/regression-test/data/external_table_p0/hive/test_hive_parquet_skip_page.out b/regression-test/data/external_table_p0/hive/test_hive_parquet_skip_page.out
new file mode 100644
index 00000000000000..6c869dbc789466
--- /dev/null
+++ b/regression-test/data/external_table_p0/hive/test_hive_parquet_skip_page.out
@@ -0,0 +1,289 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !q01 --
+1	2132	4633	4	28.00	28955.64	0.09	0.06	N	O	1996-04-21	1996-03-30	1996-05-16	NONE	AIR	lites. fluffily even de
+1	15635	638	6	32.00	49620.16	0.07	0.02	N	O	1996-01-30	1996-02-07	1996-02-03	DELIVER IN PERSON	MAIL	arefully slyly ex
+1	24027	1534	5	24.00	22824.48	0.10	0.04	N	O	1996-03-30	1996-03-14	1996-04-01	NONE	FOB	 pending foxes. slyly re
+1	63700	3701	3	8.00	13309.60	0.10	0.02	N	O	1996-01-29	1996-03-05	1996-01-31	TAKE BACK RETURN	REG AIR	riously. regular, express dep
+1	67310	7311	2	36.00	45983.16	0.09	0.06	N	O	1996-04-12	1996-02-28	1996-04-20	TAKE BACK RETURN	MAIL	ly final dependencies: slyly bold 
+1	155190	7706	1	17.00	21168.23	0.04	0.02	N	O	1996-03-13	1996-02-12	1996-03-22	DELIVER IN PERSON	TRUCK	egular courts above the
+2	106170	1191	1	38.00	44694.46	0.00	0.05	N	O	1997-01-28	1997-01-14	1997-02-02	TAKE BACK RETURN	RAIL	ven requests. deposits breach a
+3	4297	1798	1	45.00	54058.05	0.06	0.00	R	F	1994-02-02	1994-01-04	1994-02-23	NONE	AIR	ongside of the furiously brave acco
+3	19036	6540	2	49.00	46796.47	0.10	0.00	R	F	1993-11-09	1993-12-20	1993-11-24	TAKE BACK RETURN	RAIL	 unusual accounts. eve
+3	29380	1883	4	2.00	2618.76	0.01	0.06	A	F	1993-12-04	1994-01-07	1994-01-01	NONE	TRUCK	y. fluffily pending d
+
+-- !q02 --
+5999008	16312	6313	4	2.00	2456.62	0.08	0.05	R	F	1994-04-28	1994-06-01	1994-05-12	COLLECT COD	FOB	longside of the slo
+5999008	32738	2739	3	39.00	65158.47	0.09	0.08	R	F	1994-07-11	1994-06-15	1994-08-04	TAKE BACK RETURN	TRUCK	equests nag along
+5999008	64711	2230	1	29.00	48595.59	0.00	0.02	R	F	1994-05-16	1994-07-04	1994-05-18	NONE	FOB	 final requests across 
+5999008	192755	5275	2	32.00	59128.00	0.07	0.08	R	F	1994-05-15	1994-05-22	1994-06-07	COLLECT COD	RAIL	ts sleep slyly about the slyly ironic acco
+5999009	12147	7150	1	21.00	22241.94	0.00	0.05	N	O	1997-11-01	1997-12-11	1997-11-05	NONE	AIR	 deposits after the blithely ex
+5999010	106595	6596	1	31.00	49649.29	0.02	0.05	N	O	1997-11-29	1997-10-24	1997-12-11	DELIVER IN PERSON	MAIL	ilent instructions? slyly r
+5999010	141441	1442	2	42.00	62262.48	0.05	0.08	N	O	1997-09-21	1997-10-13	1997-09-25	TAKE BACK RETURN	SHIP	c, even ideas. ruth
+5999010	193075	8114	3	40.00	46722.80	0.04	0.07	N	O	1997-11-19	1997-09-25	1997-11-25	DELIVER IN PERSON	AIR	 accounts sleep blithely even,
+5999010	198678	1198	4	12.00	21320.04	0.00	0.04	N	O	1997-09-19	1997-10-15	1997-10-05	DELIVER IN PERSON	REG AIR	ironic foxes. slyly special id
+5999011	98609	6137	1	44.00	70734.40	0.03	0.01	N	O	1998-04-05	1998-05-16	1998-05-05	DELIVER IN PERSON	REG AIR	ructions along the blit
+
+-- !q03 --
+2000001	16877	6878	1	36.00	64579.32	0.01	0.05	A	F	1995-01-23	1995-01-31	1995-02-20	TAKE BACK RETURN	FOB	 regular deposits. even 
+2000001	50928	3434	2	36.00	67641.12	0.02	0.03	A	F	1995-02-03	1995-02-07	1995-02-13	NONE	AIR	ickly slyl
+2000001	117877	7878	4	20.00	37897.40	0.02	0.07	R	F	1995-03-29	1995-03-16	1995-04-01	COLLECT COD	RAIL	. realms boost unusual theodoli
+2000001	135534	8048	3	38.00	59642.14	0.00	0.05	R	F	1994-12-31	1995-03-06	1995-01-26	COLLECT COD	MAIL	l theodolites affix quickly alongside of 
+2000001	149269	1784	5	15.00	19773.90	0.05	0.05	R	F	1995-03-08	1995-02-10	1995-03-23	DELIVER IN PERSON	AIR	e bold, silent foxes solve dog
+2000002	41816	1817	6	8.00	14062.48	0.05	0.06	N	O	1996-02-14	1995-12-25	1996-03-12	DELIVER IN PERSON	FOB	y quickly pending foxes. quickly ironic acc
+2000002	62662	2663	2	48.00	77983.68	0.01	0.05	N	O	1995-11-20	1996-01-11	1995-12-05	TAKE BACK RETURN	SHIP	 requests sleep blithely. slyly 
+2000002	77402	7403	4	47.00	64831.80	0.07	0.03	N	O	1996-02-24	1996-02-04	1996-03-09	NONE	SHIP	ong the carefully silent instructions. even
+2000002	80719	8244	5	42.00	71387.82	0.10	0.03	N	O	1995-11-24	1996-01-01	1995-11-25	COLLECT COD	SHIP	ing to the carefully final deposits. care
+2000002	156357	6358	3	28.00	39573.80	0.09	0.05	N	O	1995-12-26	1996-01-15	1996-01-25	COLLECT COD	MAIL	ely regular instr
+
+-- !q04 --
+1	Customer#000000001	IVhzIApeRb ot,c,E	15	25-989-741-2988	711.56	BUILDING	to the even, regular platelets. regular, ironic epitaphs nag e
+2	Customer#000000002	XSTf4,NCwDVaWNe6tEgvwfmRchLXak	13	23-768-687-3665	121.65	AUTOMOBILE	l accounts. blithely ironic theodolites integrate boldly: caref
+3	Customer#000000003	MG9kdTD2WBHm	1	11-719-748-3364	7498.12	AUTOMOBILE	 deposits eat slyly ironic, even instructions. express foxes detect slyly. blithely even accounts abov
+4	Customer#000000004	XxVSJsLAGtn	4	14-128-190-5944	2866.83	MACHINERY	 requests. final, regular ideas sleep final accou
+5	Customer#000000005	KvpyuHCplrB84WgAiGV6sYpZq7Tj	3	13-750-942-6364	794.47	HOUSEHOLD	n accounts will have to unwind. foxes cajole accor
+6	Customer#000000006	sKZz0CsnMD7mp4Xd0YrBvx,LREYKUWAh yVn	20	30-114-968-4951	7638.57	AUTOMOBILE	tions. even deposits boost according to the slyly bold packages. final accounts cajole requests. furious
+7	Customer#000000007	TcGe5gaZNgVePxU5kRrvXBfkasDTea	18	28-190-982-9759	9561.95	AUTOMOBILE	ainst the ironic, express theodolites. express, even pinto beans among the exp
+8	Customer#000000008	I0B10bB0AymmC, 0PrRYBCP1yGJ8xcBPmWhl5	17	27-147-574-9335	6819.74	BUILDING	among the slyly regular theodolites kindle blithely courts. carefully even theodolites haggle slyly along the ide
+9	Customer#000000009	xKiAFTjUsCuxfeleNqefumTrjS	8	18-338-906-3675	8324.07	FURNITURE	r theodolites according to the requests wake thinly excuses: pending requests haggle furiousl
+10	Customer#000000010	6LrEaV6KR6PLVcgl2ArL Q3rqzLzcT1 v2	5	15-741-346-9870	2753.54	HOUSEHOLD	es regular deposits haggle. fur
+
+-- !q05 --
+140001	Customer#000140001	CkN1egC06Sc51bbDyQ8VnFn Bz6N1p	15	25-647-696-2830	2747.48	AUTOMOBILE	heodolites. slyly bold theodolites d
+140002	Customer#000140002	8UWLS,im0k94ivCVx	23	33-146-814-9234	7914.10	HOUSEHOLD	eep behind the quickly bold foxes. furiously ironic ideas shall have to sleep. regular packages
+140003	Customer#000140003	2duVgk HhGGlOeP4S,brWKjKG62bGkupful	23	33-326-909-7916	9389.77	FURNITURE	pending, even packages are. slyly regular accounts wake ironically final packages. bold 
+140004	Customer#000140004	S,V7RhLnmqPK0TDghbcdxotzTyKeUC	13	23-594-312-2596	5931.66	HOUSEHOLD	t blithely blithely regular packages. never silent dependencies cajo
+140005	Customer#000140005	yQemRDs9i8MmTJla7xha xqeZjMznW	20	30-169-231-7354	9489.56	BUILDING	accounts. even ideas sleep carefu
+140006	Customer#000140006	5,eRqyFjpobN2Wtvt2oXuLcJcNE8oTyRh	15	25-681-278-7283	67.66	MACHINERY	at the accounts are bold escapades. furiously final foxes use carefully unusual orb
+140007	Customer#000140007	AX75sSePE5PlDjD5qS6W1dx08Levf09	24	34-818-770-8059	2093.37	MACHINERY	ily according to the furiously final packages? quickly spe
+140008	Customer#000140008	2zpry AYh9otf4c5vESISPvKLWPKe9i	14	24-552-949-6395	3264.69	AUTOMOBILE	nstructions are against the requests. fin
+140009	Customer#000140009	dNwNUcCv,,0YE6WFYfOgM,6A2	4	14-940-856-8557	-359.36	HOUSEHOLD	beans. blithely silent dependencies haggle slyly. carefully quick accounts across the depos
+140010	Customer#000140010	vZxOW,NtvppKR9mpTl6RDl9sWJJbosYDoLineEm	7	17-151-800-8260	8216.11	BUILDING	nding foxes across the quickly regular forges nod accounts. slyly express ex
+
+-- !q06 --
+100001	Customer#000100001	gQ1s5C45A3PxWmZ1oFFSxt8u EcZ,	24	34-705-443-4055	1726.66	HOUSEHOLD	ts. ironic instructions sleep. final deposits 
+100002	Customer#000100002	qOmTcZ7kHzJLSoaLenr9,Gu	17	27-453-414-8560	-39.14	BUILDING	wake carefully. blithely regular epitaphs are among the quickly regular deposits. 
+100003	Customer#000100003	5AYbJxvjo7ErQB,cGIpKZRAE9,w2l9	5	15-783-309-8970	72.71	BUILDING	ckly blithely special accounts. theodolites are carefully. pending requests ha
+100004	Customer#000100004	cpIOYQpMlm	18	28-316-370-8752	9990.05	BUILDING	y above the slyly regular pains. unusual requests against the always special packages bre
+100005	Customer#000100005	Wud8n74NcIpwiKSjPS zZ	16	26-935-603-9031	7789.25	BUILDING	ing dugouts boost slyly above the pending, final accounts? regular deposits wake slyly alongside of the blithely i
+100006	Customer#000100006	AkjXh4y,QNaF7,0xzbP,sG	7	17-964-673-7626	974.05	MACHINERY	grate across the slyly even packages; final, special idea
+100007	Customer#000100007	d94JW9Hc2ZtGriOBNKyIjOeP,VZZqIX7S	17	27-244-129-5307	777.86	HOUSEHOLD	foxes are against the ironic theodolites. evenly pending ideas according to the qu
+100008	Customer#000100008	Hv2A,YqfNnGRIKaY	18	28-828-394-8424	3374.90	BUILDING	ccounts. even deposits wake quickly pinto beans. bold instructions integrate? never bold theodolites are s
+100009	Customer#000100009	OioQ3EjJZRvxCNh6Q8E3QZH	6	16-928-807-2622	3932.63	MACHINERY	aggle blithely quickly final accounts. carefully final deposits above the fluffily unus
+100010	Customer#000100010	 Tbiz2WMJX	0	10-147-978-7806	5693.02	BUILDING	y regular ideas. quickly unusual gifts n
+
+-- !q07 --
+1	36901	O	173665.47	1996-01-02	5-LOW	Clerk#000000951	0	nstructions sleep furiously among 
+2	78002	O	46929.18	1996-12-01	1-URGENT	Clerk#000000880	0	 foxes. pending accounts at the pending, silent asymptot
+3	123314	F	193846.25	1993-10-14	5-LOW	Clerk#000000955	0	sly final accounts boost. carefully regular ideas cajole carefully. depos
+4	136777	O	32151.78	1995-10-11	5-LOW	Clerk#000000124	0	sits. slyly regular warthogs cajole. regular, regular theodolites acro
+5	44485	F	144659.20	1994-07-30	5-LOW	Clerk#000000925	0	quickly. bold deposits sleep slyly. packages use slyly
+6	55624	F	58749.59	1992-02-21	4-NOT SPECIFIED	Clerk#000000058	0	ggle. special, final requests are against the furiously specia
+7	39136	O	252004.18	1996-01-10	2-HIGH	Clerk#000000470	0	ly special requests 
+32	130057	O	208660.75	1995-07-16	2-HIGH	Clerk#000000616	0	ise blithely bold, regular requests. quickly unusual dep
+33	66958	F	163243.98	1993-10-27	3-MEDIUM	Clerk#000000409	0	uriously. furiously final request
+34	61001	O	58949.67	1998-07-21	3-MEDIUM	Clerk#000000223	0	ly final packages. fluffily final deposits wake blithely ideas. spe
+
+-- !q08 --
+5990016	100807	F	102428.29	1994-01-31	1-URGENT	Clerk#000000554	0	. fluffily unusual requests cajole furiously. fluffily pending accounts ca
+5990017	12382	F	176602.99	1992-07-01	5-LOW	Clerk#000000205	0	ual pinto beans. final instructions haggle quickly alongside of the furio
+5990018	51145	F	78440.49	1992-05-28	1-URGENT	Clerk#000000996	0	quests play daringly. regula
+5990019	85478	O	250306.69	1998-06-29	5-LOW	Clerk#000000900	0	ainst the sly pinto beans. unu
+5990020	62137	O	229287.04	1996-08-15	1-URGENT	Clerk#000000801	0	 fluffily special pinto beans. regular, regular pinto beans slee
+5990021	24235	O	265459.10	1996-12-16	3-MEDIUM	Clerk#000000113	0	gside of the ironic, unusual escapades. evenly silent tithes are 
+5990022	35143	O	141070.92	1996-07-01	4-NOT SPECIFIED	Clerk#000000546	0	ests haggle across the blithely bo
+5990023	65318	F	171515.91	1993-07-04	1-URGENT	Clerk#000000178	0	r the express accounts haggle blithely ironic accounts-- regu
+5990048	88213	O	70608.62	1997-10-23	2-HIGH	Clerk#000000303	0	slyly enticing foxes doze regularly even requests. 
+5990049	115694	F	183390.98	1992-05-21	1-URGENT	Clerk#000000450	0	ckly final theodolites ca
+
+-- !q09 --
+2000001	44200	F	257495.03	1994-12-18	5-LOW	Clerk#000000314	0	ometimes theodolites. quickly even accounts among the blithely bold 
+2000002	55241	O	263734.77	1995-11-13	1-URGENT	Clerk#000000749	0	uses along the brave excuses sleep for the packages. packages affix? slyl
+2000003	84553	F	78066.42	1992-10-10	5-LOW	Clerk#000000314	0	e slyly regular asymptotes. fluf
+2000004	125197	F	246917.53	1993-01-06	1-URGENT	Clerk#000000675	0	ironic ideas. platelets are regularly after the
+2000005	117907	O	229611.23	1996-10-16	2-HIGH	Clerk#000000458	0	he furiously regular excuses haggle slyly along the slyly pending a
+2000006	1538	O	32011.55	1995-12-09	1-URGENT	Clerk#000000279	0	ual, regular deposits sleep carefully carefully final dependencies. dep
+2000007	42958	F	48446.75	1993-03-28	5-LOW	Clerk#000000956	0	uickly final ideas. final, final requests are courts. slyly unu
+2000032	34156	F	56186.58	1994-09-05	4-NOT SPECIFIED	Clerk#000000612	0	fully regular instructions doze
+2000033	141263	O	130829.92	1997-06-05	3-MEDIUM	Clerk#000000118	0	inst the final dependencies. even, final pat
+2000034	149275	O	64568.70	1997-09-23	2-HIGH	Clerk#000000335	0	regular asymptotes. carefu
+
+-- !q10 --
+1	goldenrod lavender spring chocolate lace	Manufacturer#1	Brand#13	PROMO BURNISHED COPPER	7	JUMBO PKG	901.00	ly. slyly ironi
+2	blush thistle blue yellow saddle	Manufacturer#1	Brand#13	LARGE BRUSHED BRASS	1	LG CASE	902.00	lar accounts amo
+3	spring green yellow purple cornsilk	Manufacturer#4	Brand#42	STANDARD POLISHED BRASS	21	WRAP CASE	903.00	egular deposits hag
+4	cornflower chocolate smoke green pink	Manufacturer#3	Brand#34	SMALL PLATED BRASS	14	MED DRUM	904.00	p furiously r
+5	forest brown coral puff cream	Manufacturer#3	Brand#32	STANDARD POLISHED TIN	15	SM PKG	905.00	 wake carefully 
+6	bisque cornflower lawn forest magenta	Manufacturer#2	Brand#24	PROMO PLATED STEEL	4	MED BAG	906.00	sual a
+7	moccasin green thistle khaki floral	Manufacturer#1	Brand#11	SMALL PLATED COPPER	45	SM BAG	907.00	lyly. ex
+8	misty lace thistle snow royal	Manufacturer#4	Brand#44	PROMO BURNISHED TIN	41	LG DRUM	908.00	eposi
+9	thistle dim navajo dark gainsboro	Manufacturer#4	Brand#43	SMALL BURNISHED STEEL	12	WRAP CASE	909.00	ironic foxe
+10	linen pink saddle puff powder	Manufacturer#5	Brand#54	LARGE BURNISHED STEEL	44	LG CAN	910.01	ithely final deposit
+
+-- !q08 --
+190001	powder coral chiffon burnished bisque	Manufacturer#2	Brand#22	MEDIUM ANODIZED NICKEL	26	WRAP BOX	1091.00	ly busy deposi
+190002	peru coral rosy azure green	Manufacturer#4	Brand#41	LARGE POLISHED TIN	21	SM PKG	1092.00	 express, daring sh
+190003	white salmon lemon cornsilk ghost	Manufacturer#4	Brand#41	PROMO ANODIZED TIN	41	LG BAG	1093.00	ckages according to th
+190004	ivory almond honeydew metallic dodger	Manufacturer#4	Brand#44	PROMO PLATED NICKEL	23	MED DRUM	1094.00	 blithely regular t
+190005	slate indian forest chartreuse rosy	Manufacturer#1	Brand#11	SMALL BRUSHED BRASS	3	SM CASE	1095.00	ly blithe, regula
+190006	navajo lavender smoke puff olive	Manufacturer#5	Brand#55	SMALL BRUSHED BRASS	35	LG CASE	1096.00	ilent ideas boo
+190007	khaki lime goldenrod pink grey	Manufacturer#1	Brand#11	STANDARD PLATED BRASS	30	SM PKG	1097.00	fully final gift
+190008	cream dark peru thistle gainsboro	Manufacturer#3	Brand#31	ECONOMY ANODIZED STEEL	46	WRAP CASE	1098.00	 pinto beans. fur
+190009	orchid goldenrod metallic frosted powder	Manufacturer#3	Brand#33	STANDARD ANODIZED COPPER	25	LG BAG	1099.00	es cajole f
+190010	misty mint white seashell papaya	Manufacturer#3	Brand#34	STANDARD POLISHED STEEL	38	JUMBO BOX	1100.01	pecia
+
+-- !q12 --
+100001	seashell cyan plum purple honeydew	Manufacturer#3	Brand#35	STANDARD BRUSHED TIN	37	JUMBO CASE	1001.00	ronic dependencies d
+100002	steel moccasin forest cornflower brown	Manufacturer#3	Brand#34	STANDARD ANODIZED NICKEL	11	WRAP CAN	1002.00	 quickly pending 
+100003	beige powder violet orchid yellow	Manufacturer#2	Brand#21	MEDIUM PLATED BRASS	41	SM BOX	1003.00	 carefully even pac
+100004	snow blanched khaki indian azure	Manufacturer#4	Brand#42	SMALL POLISHED TIN	29	SM CASE	1004.00	sly. blithely
+100005	grey midnight orange peach pale	Manufacturer#2	Brand#21	SMALL POLISHED STEEL	7	MED BAG	1005.00	ajole? blithe
+100006	violet sandy olive yellow orange	Manufacturer#4	Brand#45	STANDARD BURNISHED COPPER	23	WRAP CASE	1006.00	he slyly regular pack
+100007	snow magenta pale lemon metallic	Manufacturer#1	Brand#12	PROMO BURNISHED COPPER	4	MED PKG	1007.00	ronic accounts in
+100008	spring powder sienna purple lime	Manufacturer#4	Brand#45	ECONOMY BRUSHED BRASS	19	SM PKG	1008.00	ts. furious
+100009	goldenrod sandy beige hot orange	Manufacturer#3	Brand#32	SMALL BURNISHED STEEL	41	WRAP BOX	1009.00	dinos about the quick
+100010	lime lavender slate cream brown	Manufacturer#4	Brand#43	PROMO ANODIZED COPPER	19	JUMBO PACK	1010.01	gle slyly above the b
+
+-- !q01 --
+1	2132	4633	4	28.00	28955.64	0.09	0.06	N	O	1996-04-21	1996-03-30	1996-05-16	NONE	AIR	lites. fluffily even de
+1	15635	638	6	32.00	49620.16	0.07	0.02	N	O	1996-01-30	1996-02-07	1996-02-03	DELIVER IN PERSON	MAIL	arefully slyly ex
+1	24027	1534	5	24.00	22824.48	0.10	0.04	N	O	1996-03-30	1996-03-14	1996-04-01	NONE	FOB	 pending foxes. slyly re
+1	63700	3701	3	8.00	13309.60	0.10	0.02	N	O	1996-01-29	1996-03-05	1996-01-31	TAKE BACK RETURN	REG AIR	riously. regular, express dep
+1	67310	7311	2	36.00	45983.16	0.09	0.06	N	O	1996-04-12	1996-02-28	1996-04-20	TAKE BACK RETURN	MAIL	ly final dependencies: slyly bold 
+1	155190	7706	1	17.00	21168.23	0.04	0.02	N	O	1996-03-13	1996-02-12	1996-03-22	DELIVER IN PERSON	TRUCK	egular courts above the
+2	106170	1191	1	38.00	44694.46	0.00	0.05	N	O	1997-01-28	1997-01-14	1997-02-02	TAKE BACK RETURN	RAIL	ven requests. deposits breach a
+3	4297	1798	1	45.00	54058.05	0.06	0.00	R	F	1994-02-02	1994-01-04	1994-02-23	NONE	AIR	ongside of the furiously brave acco
+3	19036	6540	2	49.00	46796.47	0.10	0.00	R	F	1993-11-09	1993-12-20	1993-11-24	TAKE BACK RETURN	RAIL	 unusual accounts. eve
+3	29380	1883	4	2.00	2618.76	0.01	0.06	A	F	1993-12-04	1994-01-07	1994-01-01	NONE	TRUCK	y. fluffily pending d
+
+-- !q02 --
+5999008	16312	6313	4	2.00	2456.62	0.08	0.05	R	F	1994-04-28	1994-06-01	1994-05-12	COLLECT COD	FOB	longside of the slo
+5999008	32738	2739	3	39.00	65158.47	0.09	0.08	R	F	1994-07-11	1994-06-15	1994-08-04	TAKE BACK RETURN	TRUCK	equests nag along
+5999008	64711	2230	1	29.00	48595.59	0.00	0.02	R	F	1994-05-16	1994-07-04	1994-05-18	NONE	FOB	 final requests across 
+5999008	192755	5275	2	32.00	59128.00	0.07	0.08	R	F	1994-05-15	1994-05-22	1994-06-07	COLLECT COD	RAIL	ts sleep slyly about the slyly ironic acco
+5999009	12147	7150	1	21.00	22241.94	0.00	0.05	N	O	1997-11-01	1997-12-11	1997-11-05	NONE	AIR	 deposits after the blithely ex
+5999010	106595	6596	1	31.00	49649.29	0.02	0.05	N	O	1997-11-29	1997-10-24	1997-12-11	DELIVER IN PERSON	MAIL	ilent instructions? slyly r
+5999010	141441	1442	2	42.00	62262.48	0.05	0.08	N	O	1997-09-21	1997-10-13	1997-09-25	TAKE BACK RETURN	SHIP	c, even ideas. ruth
+5999010	193075	8114	3	40.00	46722.80	0.04	0.07	N	O	1997-11-19	1997-09-25	1997-11-25	DELIVER IN PERSON	AIR	 accounts sleep blithely even,
+5999010	198678	1198	4	12.00	21320.04	0.00	0.04	N	O	1997-09-19	1997-10-15	1997-10-05	DELIVER IN PERSON	REG AIR	ironic foxes. slyly special id
+5999011	98609	6137	1	44.00	70734.40	0.03	0.01	N	O	1998-04-05	1998-05-16	1998-05-05	DELIVER IN PERSON	REG AIR	ructions along the blit
+
+-- !q03 --
+2000001	16877	6878	1	36.00	64579.32	0.01	0.05	A	F	1995-01-23	1995-01-31	1995-02-20	TAKE BACK RETURN	FOB	 regular deposits. even 
+2000001	50928	3434	2	36.00	67641.12	0.02	0.03	A	F	1995-02-03	1995-02-07	1995-02-13	NONE	AIR	ickly slyl
+2000001	117877	7878	4	20.00	37897.40	0.02	0.07	R	F	1995-03-29	1995-03-16	1995-04-01	COLLECT COD	RAIL	. realms boost unusual theodoli
+2000001	135534	8048	3	38.00	59642.14	0.00	0.05	R	F	1994-12-31	1995-03-06	1995-01-26	COLLECT COD	MAIL	l theodolites affix quickly alongside of 
+2000001	149269	1784	5	15.00	19773.90	0.05	0.05	R	F	1995-03-08	1995-02-10	1995-03-23	DELIVER IN PERSON	AIR	e bold, silent foxes solve dog
+2000002	41816	1817	6	8.00	14062.48	0.05	0.06	N	O	1996-02-14	1995-12-25	1996-03-12	DELIVER IN PERSON	FOB	y quickly pending foxes. quickly ironic acc
+2000002	62662	2663	2	48.00	77983.68	0.01	0.05	N	O	1995-11-20	1996-01-11	1995-12-05	TAKE BACK RETURN	SHIP	 requests sleep blithely. slyly 
+2000002	77402	7403	4	47.00	64831.80	0.07	0.03	N	O	1996-02-24	1996-02-04	1996-03-09	NONE	SHIP	ong the carefully silent instructions. even
+2000002	80719	8244	5	42.00	71387.82	0.10	0.03	N	O	1995-11-24	1996-01-01	1995-11-25	COLLECT COD	SHIP	ing to the carefully final deposits. care
+2000002	156357	6358	3	28.00	39573.80	0.09	0.05	N	O	1995-12-26	1996-01-15	1996-01-25	COLLECT COD	MAIL	ely regular instr
+
+-- !q04 --
+1	Customer#000000001	IVhzIApeRb ot,c,E	15	25-989-741-2988	711.56	BUILDING	to the even, regular platelets. regular, ironic epitaphs nag e
+2	Customer#000000002	XSTf4,NCwDVaWNe6tEgvwfmRchLXak	13	23-768-687-3665	121.65	AUTOMOBILE	l accounts. blithely ironic theodolites integrate boldly: caref
+3	Customer#000000003	MG9kdTD2WBHm	1	11-719-748-3364	7498.12	AUTOMOBILE	 deposits eat slyly ironic, even instructions. express foxes detect slyly. blithely even accounts abov
+4	Customer#000000004	XxVSJsLAGtn	4	14-128-190-5944	2866.83	MACHINERY	 requests. final, regular ideas sleep final accou
+5	Customer#000000005	KvpyuHCplrB84WgAiGV6sYpZq7Tj	3	13-750-942-6364	794.47	HOUSEHOLD	n accounts will have to unwind. foxes cajole accor
+6	Customer#000000006	sKZz0CsnMD7mp4Xd0YrBvx,LREYKUWAh yVn	20	30-114-968-4951	7638.57	AUTOMOBILE	tions. even deposits boost according to the slyly bold packages. final accounts cajole requests. furious
+7	Customer#000000007	TcGe5gaZNgVePxU5kRrvXBfkasDTea	18	28-190-982-9759	9561.95	AUTOMOBILE	ainst the ironic, express theodolites. express, even pinto beans among the exp
+8	Customer#000000008	I0B10bB0AymmC, 0PrRYBCP1yGJ8xcBPmWhl5	17	27-147-574-9335	6819.74	BUILDING	among the slyly regular theodolites kindle blithely courts. carefully even theodolites haggle slyly along the ide
+9	Customer#000000009	xKiAFTjUsCuxfeleNqefumTrjS	8	18-338-906-3675	8324.07	FURNITURE	r theodolites according to the requests wake thinly excuses: pending requests haggle furiousl
+10	Customer#000000010	6LrEaV6KR6PLVcgl2ArL Q3rqzLzcT1 v2	5	15-741-346-9870	2753.54	HOUSEHOLD	es regular deposits haggle. fur
+
+-- !q05 --
+140001	Customer#000140001	CkN1egC06Sc51bbDyQ8VnFn Bz6N1p	15	25-647-696-2830	2747.48	AUTOMOBILE	heodolites. slyly bold theodolites d
+140002	Customer#000140002	8UWLS,im0k94ivCVx	23	33-146-814-9234	7914.10	HOUSEHOLD	eep behind the quickly bold foxes. furiously ironic ideas shall have to sleep. regular packages
+140003	Customer#000140003	2duVgk HhGGlOeP4S,brWKjKG62bGkupful	23	33-326-909-7916	9389.77	FURNITURE	pending, even packages are. slyly regular accounts wake ironically final packages. bold 
+140004	Customer#000140004	S,V7RhLnmqPK0TDghbcdxotzTyKeUC	13	23-594-312-2596	5931.66	HOUSEHOLD	t blithely blithely regular packages. never silent dependencies cajo
+140005	Customer#000140005	yQemRDs9i8MmTJla7xha xqeZjMznW	20	30-169-231-7354	9489.56	BUILDING	accounts. even ideas sleep carefu
+140006	Customer#000140006	5,eRqyFjpobN2Wtvt2oXuLcJcNE8oTyRh	15	25-681-278-7283	67.66	MACHINERY	at the accounts are bold escapades. furiously final foxes use carefully unusual orb
+140007	Customer#000140007	AX75sSePE5PlDjD5qS6W1dx08Levf09	24	34-818-770-8059	2093.37	MACHINERY	ily according to the furiously final packages? quickly spe
+140008	Customer#000140008	2zpry AYh9otf4c5vESISPvKLWPKe9i	14	24-552-949-6395	3264.69	AUTOMOBILE	nstructions are against the requests. fin
+140009	Customer#000140009	dNwNUcCv,,0YE6WFYfOgM,6A2	4	14-940-856-8557	-359.36	HOUSEHOLD	beans. blithely silent dependencies haggle slyly. carefully quick accounts across the depos
+140010	Customer#000140010	vZxOW,NtvppKR9mpTl6RDl9sWJJbosYDoLineEm	7	17-151-800-8260	8216.11	BUILDING	nding foxes across the quickly regular forges nod accounts. slyly express ex
+
+-- !q06 --
+100001	Customer#000100001	gQ1s5C45A3PxWmZ1oFFSxt8u EcZ,	24	34-705-443-4055	1726.66	HOUSEHOLD	ts. ironic instructions sleep. final deposits 
+100002	Customer#000100002	qOmTcZ7kHzJLSoaLenr9,Gu	17	27-453-414-8560	-39.14	BUILDING	wake carefully. blithely regular epitaphs are among the quickly regular deposits. 
+100003	Customer#000100003	5AYbJxvjo7ErQB,cGIpKZRAE9,w2l9	5	15-783-309-8970	72.71	BUILDING	ckly blithely special accounts. theodolites are carefully. pending requests ha
+100004	Customer#000100004	cpIOYQpMlm	18	28-316-370-8752	9990.05	BUILDING	y above the slyly regular pains. unusual requests against the always special packages bre
+100005	Customer#000100005	Wud8n74NcIpwiKSjPS zZ	16	26-935-603-9031	7789.25	BUILDING	ing dugouts boost slyly above the pending, final accounts? regular deposits wake slyly alongside of the blithely i
+100006	Customer#000100006	AkjXh4y,QNaF7,0xzbP,sG	7	17-964-673-7626	974.05	MACHINERY	grate across the slyly even packages; final, special idea
+100007	Customer#000100007	d94JW9Hc2ZtGriOBNKyIjOeP,VZZqIX7S	17	27-244-129-5307	777.86	HOUSEHOLD	foxes are against the ironic theodolites. evenly pending ideas according to the qu
+100008	Customer#000100008	Hv2A,YqfNnGRIKaY	18	28-828-394-8424	3374.90	BUILDING	ccounts. even deposits wake quickly pinto beans. bold instructions integrate? never bold theodolites are s
+100009	Customer#000100009	OioQ3EjJZRvxCNh6Q8E3QZH	6	16-928-807-2622	3932.63	MACHINERY	aggle blithely quickly final accounts. carefully final deposits above the fluffily unus
+100010	Customer#000100010	 Tbiz2WMJX	0	10-147-978-7806	5693.02	BUILDING	y regular ideas. quickly unusual gifts n
+
+-- !q07 --
+1	36901	O	173665.47	1996-01-02	5-LOW	Clerk#000000951	0	nstructions sleep furiously among 
+2	78002	O	46929.18	1996-12-01	1-URGENT	Clerk#000000880	0	 foxes. pending accounts at the pending, silent asymptot
+3	123314	F	193846.25	1993-10-14	5-LOW	Clerk#000000955	0	sly final accounts boost. carefully regular ideas cajole carefully. depos
+4	136777	O	32151.78	1995-10-11	5-LOW	Clerk#000000124	0	sits. slyly regular warthogs cajole. regular, regular theodolites acro
+5	44485	F	144659.20	1994-07-30	5-LOW	Clerk#000000925	0	quickly. bold deposits sleep slyly. packages use slyly
+6	55624	F	58749.59	1992-02-21	4-NOT SPECIFIED	Clerk#000000058	0	ggle. special, final requests are against the furiously specia
+7	39136	O	252004.18	1996-01-10	2-HIGH	Clerk#000000470	0	ly special requests 
+32	130057	O	208660.75	1995-07-16	2-HIGH	Clerk#000000616	0	ise blithely bold, regular requests. quickly unusual dep
+33	66958	F	163243.98	1993-10-27	3-MEDIUM	Clerk#000000409	0	uriously. furiously final request
+34	61001	O	58949.67	1998-07-21	3-MEDIUM	Clerk#000000223	0	ly final packages. fluffily final deposits wake blithely ideas. spe
+
+-- !q08 --
+5990016	100807	F	102428.29	1994-01-31	1-URGENT	Clerk#000000554	0	. fluffily unusual requests cajole furiously. fluffily pending accounts ca
+5990017	12382	F	176602.99	1992-07-01	5-LOW	Clerk#000000205	0	ual pinto beans. final instructions haggle quickly alongside of the furio
+5990018	51145	F	78440.49	1992-05-28	1-URGENT	Clerk#000000996	0	quests play daringly. regula
+5990019	85478	O	250306.69	1998-06-29	5-LOW	Clerk#000000900	0	ainst the sly pinto beans. unu
+5990020	62137	O	229287.04	1996-08-15	1-URGENT	Clerk#000000801	0	 fluffily special pinto beans. regular, regular pinto beans slee
+5990021	24235	O	265459.10	1996-12-16	3-MEDIUM	Clerk#000000113	0	gside of the ironic, unusual escapades. evenly silent tithes are 
+5990022	35143	O	141070.92	1996-07-01	4-NOT SPECIFIED	Clerk#000000546	0	ests haggle across the blithely bo
+5990023	65318	F	171515.91	1993-07-04	1-URGENT	Clerk#000000178	0	r the express accounts haggle blithely ironic accounts-- regu
+5990048	88213	O	70608.62	1997-10-23	2-HIGH	Clerk#000000303	0	slyly enticing foxes doze regularly even requests. 
+5990049	115694	F	183390.98	1992-05-21	1-URGENT	Clerk#000000450	0	ckly final theodolites ca
+
+-- !q09 --
+2000001	44200	F	257495.03	1994-12-18	5-LOW	Clerk#000000314	0	ometimes theodolites. quickly even accounts among the blithely bold 
+2000002	55241	O	263734.77	1995-11-13	1-URGENT	Clerk#000000749	0	uses along the brave excuses sleep for the packages. packages affix? slyl
+2000003	84553	F	78066.42	1992-10-10	5-LOW	Clerk#000000314	0	e slyly regular asymptotes. fluf
+2000004	125197	F	246917.53	1993-01-06	1-URGENT	Clerk#000000675	0	ironic ideas. platelets are regularly after the
+2000005	117907	O	229611.23	1996-10-16	2-HIGH	Clerk#000000458	0	he furiously regular excuses haggle slyly along the slyly pending a
+2000006	1538	O	32011.55	1995-12-09	1-URGENT	Clerk#000000279	0	ual, regular deposits sleep carefully carefully final dependencies. dep
+2000007	42958	F	48446.75	1993-03-28	5-LOW	Clerk#000000956	0	uickly final ideas. final, final requests are courts. slyly unu
+2000032	34156	F	56186.58	1994-09-05	4-NOT SPECIFIED	Clerk#000000612	0	fully regular instructions doze
+2000033	141263	O	130829.92	1997-06-05	3-MEDIUM	Clerk#000000118	0	inst the final dependencies. even, final pat
+2000034	149275	O	64568.70	1997-09-23	2-HIGH	Clerk#000000335	0	regular asymptotes. carefu
+
+-- !q10 --
+1	goldenrod lavender spring chocolate lace	Manufacturer#1	Brand#13	PROMO BURNISHED COPPER	7	JUMBO PKG	901.00	ly. slyly ironi
+2	blush thistle blue yellow saddle	Manufacturer#1	Brand#13	LARGE BRUSHED BRASS	1	LG CASE	902.00	lar accounts amo
+3	spring green yellow purple cornsilk	Manufacturer#4	Brand#42	STANDARD POLISHED BRASS	21	WRAP CASE	903.00	egular deposits hag
+4	cornflower chocolate smoke green pink	Manufacturer#3	Brand#34	SMALL PLATED BRASS	14	MED DRUM	904.00	p furiously r
+5	forest brown coral puff cream	Manufacturer#3	Brand#32	STANDARD POLISHED TIN	15	SM PKG	905.00	 wake carefully 
+6	bisque cornflower lawn forest magenta	Manufacturer#2	Brand#24	PROMO PLATED STEEL	4	MED BAG	906.00	sual a
+7	moccasin green thistle khaki floral	Manufacturer#1	Brand#11	SMALL PLATED COPPER	45	SM BAG	907.00	lyly. ex
+8	misty lace thistle snow royal	Manufacturer#4	Brand#44	PROMO BURNISHED TIN	41	LG DRUM	908.00	eposi
+9	thistle dim navajo dark gainsboro	Manufacturer#4	Brand#43	SMALL BURNISHED STEEL	12	WRAP CASE	909.00	ironic foxe
+10	linen pink saddle puff powder	Manufacturer#5	Brand#54	LARGE BURNISHED STEEL	44	LG CAN	910.01	ithely final deposit
+
+-- !q08 --
+190001	powder coral chiffon burnished bisque	Manufacturer#2	Brand#22	MEDIUM ANODIZED NICKEL	26	WRAP BOX	1091.00	ly busy deposi
+190002	peru coral rosy azure green	Manufacturer#4	Brand#41	LARGE POLISHED TIN	21	SM PKG	1092.00	 express, daring sh
+190003	white salmon lemon cornsilk ghost	Manufacturer#4	Brand#41	PROMO ANODIZED TIN	41	LG BAG	1093.00	ckages according to th
+190004	ivory almond honeydew metallic dodger	Manufacturer#4	Brand#44	PROMO PLATED NICKEL	23	MED DRUM	1094.00	 blithely regular t
+190005	slate indian forest chartreuse rosy	Manufacturer#1	Brand#11	SMALL BRUSHED BRASS	3	SM CASE	1095.00	ly blithe, regula
+190006	navajo lavender smoke puff olive	Manufacturer#5	Brand#55	SMALL BRUSHED BRASS	35	LG CASE	1096.00	ilent ideas boo
+190007	khaki lime goldenrod pink grey	Manufacturer#1	Brand#11	STANDARD PLATED BRASS	30	SM PKG	1097.00	fully final gift
+190008	cream dark peru thistle gainsboro	Manufacturer#3	Brand#31	ECONOMY ANODIZED STEEL	46	WRAP CASE	1098.00	 pinto beans. fur
+190009	orchid goldenrod metallic frosted powder	Manufacturer#3	Brand#33	STANDARD ANODIZED COPPER	25	LG BAG	1099.00	es cajole f
+190010	misty mint white seashell papaya	Manufacturer#3	Brand#34	STANDARD POLISHED STEEL	38	JUMBO BOX	1100.01	pecia
+
+-- !q12 --
+100001	seashell cyan plum purple honeydew	Manufacturer#3	Brand#35	STANDARD BRUSHED TIN	37	JUMBO CASE	1001.00	ronic dependencies d
+100002	steel moccasin forest cornflower brown	Manufacturer#3	Brand#34	STANDARD ANODIZED NICKEL	11	WRAP CAN	1002.00	 quickly pending 
+100003	beige powder violet orchid yellow	Manufacturer#2	Brand#21	MEDIUM PLATED BRASS	41	SM BOX	1003.00	 carefully even pac
+100004	snow blanched khaki indian azure	Manufacturer#4	Brand#42	SMALL POLISHED TIN	29	SM CASE	1004.00	sly. blithely
+100005	grey midnight orange peach pale	Manufacturer#2	Brand#21	SMALL POLISHED STEEL	7	MED BAG	1005.00	ajole? blithe
+100006	violet sandy olive yellow orange	Manufacturer#4	Brand#45	STANDARD BURNISHED COPPER	23	WRAP CASE	1006.00	he slyly regular pack
+100007	snow magenta pale lemon metallic	Manufacturer#1	Brand#12	PROMO BURNISHED COPPER	4	MED PKG	1007.00	ronic accounts in
+100008	spring powder sienna purple lime	Manufacturer#4	Brand#45	ECONOMY BRUSHED BRASS	19	SM PKG	1008.00	ts. furious
+100009	goldenrod sandy beige hot orange	Manufacturer#3	Brand#32	SMALL BURNISHED STEEL	41	WRAP BOX	1009.00	dinos about the quick
+100010	lime lavender slate cream brown	Manufacturer#4	Brand#43	PROMO ANODIZED COPPER	19	JUMBO PACK	1010.01	gle slyly above the b
+
diff --git a/regression-test/suites/external_table_p0/hive/test_hive_parquet_skip_page.groovy b/regression-test/suites/external_table_p0/hive/test_hive_parquet_skip_page.groovy
new file mode 100644
index 00000000000000..67e594b21f180c
--- /dev/null
+++ b/regression-test/suites/external_table_p0/hive/test_hive_parquet_skip_page.groovy
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_parquet_skip_page", "p0,external,hive,external_docker,external_docker_hive") {
+    def q01 = {
+        qt_q01 """
+        select * from lineitem where l_orderkey  < 1000 order by l_orderkey,l_partkey limit 10;
+    """
+    }
+
+    def q02 = {
+        qt_q02 """
+        select * from lineitem where l_orderkey > 5999000 order by l_orderkey,l_partkey limit 10;
+    """
+    }
+
+    def q03 = {
+        qt_q03 """
+        select * from lineitem where l_orderkey > 2000000 and l_orderkey < 2001000  order by l_orderkey,l_partkey limit 10;
+    """
+    }
+
+    def q04 = {
+        qt_q04 """
+        select * from customer where c_custkey < 10000 order by c_custkey limit 10;
+    """
+    }
+
+    def q05 = {
+        qt_q05 """
+        select * from customer where c_custkey > 140000 order by c_custkey limit 10;
+    """
+    }
+
+    def q06 = {
+        qt_q06 """
+        select * from customer where c_custkey > 100000 and c_custkey < 110000  order by c_custkey limit 10;
+    """
+    }
+
+    def q07 = {
+        qt_q07 """
+        select * from orders where o_orderkey < 10000 order by o_orderkey limit 10;
+    """
+    }
+
+    def q08 = {
+        qt_q08 """
+        select * from orders where o_orderkey > 5990000 order by o_orderkey limit 10;
+    """
+    }
+
+    def q09 = {
+        qt_q09 """
+        select * from orders where o_orderkey > 2000000 and o_orderkey < 2010000 order by o_orderkey limit 10;
+    """
+    }
+
+    def q10 = {
+        qt_q10 """
+        select * from part where p_partkey < 10000 order by p_partkey limit 10;
+    """
+    }
+
+    def q11 = {
+        qt_q08 """
+        select * from part where p_partkey > 190000 order by p_partkey limit 10;
+    """
+    }
+
+    def q12 = {
+        qt_q12 """
+        select * from part where p_partkey > 100000 and p_partkey < 110000 order by p_partkey limit 10;
+    """
+    }
+
+    String enabled = context.config.otherConfigs.get("enableHiveTest")
+    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+        logger.info("diable Hive test.")
+        return;
+    }
+
+    for (String hivePrefix : ["hive2", "hive3"]) {
+        try {
+            String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort")
+            String catalog_name = "${hivePrefix}_test_parquet"
+            String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+
+            sql """drop catalog if exists ${catalog_name}"""
+            sql """create catalog if not exists ${catalog_name} properties (
+                "type"="hms",
+                'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}'
+            );"""
+
+            sql """switch ${catalog_name}"""
+            sql """use `tpch1_parquet`"""
+
+            sql """set enable_profile=true;"""
+
+            q01()
+            q02()
+            q03()
+            q04()
+            q05()
+            q06()
+            q07()
+            q08()
+            q09()
+            q10()
+            q11()
+            q12()
+
+            sql """drop catalog if exists ${catalog_name}"""
+        } finally {
+        }
+    }
+}

From 580181fd1f4070dbfc29a48fbb6712d1cbc243ae Mon Sep 17 00:00:00 2001
From: Lei Zhang <27994433+SWJTU-ZhangLei@users.noreply.github.com>
Date: Fri, 26 Apr 2024 14:12:11 +0800
Subject: [PATCH 050/163] [fix](merge-cloud)  Fix brpc mbvar prometheus format
 issue by patching brpc (#34098)

* need to update thirdparty
* https://github.com/apache/brpc/pull/2235
---
 .../brpc-1.8.0-mbvar-format-issue.patch       | 107 ++++++++++++++++++
 1 file changed, 107 insertions(+)
 create mode 100644 thirdparty/patches/brpc-1.8.0-mbvar-format-issue.patch

diff --git a/thirdparty/patches/brpc-1.8.0-mbvar-format-issue.patch b/thirdparty/patches/brpc-1.8.0-mbvar-format-issue.patch
new file mode 100644
index 00000000000000..8e8bb1349e9e77
--- /dev/null
+++ b/thirdparty/patches/brpc-1.8.0-mbvar-format-issue.patch
@@ -0,0 +1,107 @@
+From 6de560b84fb4cc37461bc6698ea2effd64678465 Mon Sep 17 00:00:00 2001
+From: dylan <451809218@qq.com>
+Date: Tue, 7 Nov 2023 14:05:37 +0800
+Subject: [PATCH] Fixup mbvar convert prometheus metrics format issue (#2082)
+ (#2235)
+
+---
+ .../builtin/prometheus_metrics_service.cpp    | 13 +++++-
+ src/brpc/builtin/prometheus_metrics_service.h |  1 +
+ ...pc_prometheus_metrics_service_unittest.cpp | 42 +++++++++++++++++++
+ 3 files changed, 54 insertions(+), 2 deletions(-)
+ create mode 100644 test/brpc_prometheus_metrics_service_unittest.cpp
+
+diff --git a/src/brpc/builtin/prometheus_metrics_service.cpp b/src/brpc/builtin/prometheus_metrics_service.cpp
+index 7bf8bbf3..88f675bb 100644
+--- a/src/brpc/builtin/prometheus_metrics_service.cpp
++++ b/src/brpc/builtin/prometheus_metrics_service.cpp
+@@ -82,6 +82,12 @@ private:
+     std::map<std::string, SummaryItems> _m;
+ };
+ 
++butil::StringPiece GetMetricsName(const std::string& name) {
++    auto pos = name.find_first_of('{');
++    int size = (pos == std::string::npos) ? name.size() : pos;
++    return butil::StringPiece(name.data(), size);
++}
++
+ bool PrometheusMetricsDumper::dump(const std::string& name,
+                                    const butil::StringPiece& desc) {
+     if (!desc.empty() && desc[0] == '"') {
+@@ -93,8 +99,11 @@ bool PrometheusMetricsDumper::dump(const std::string& name,
+         // Leave it to DumpLatencyRecorderSuffix to output Summary.
+         return true;
+     }
+-    *_os << "# HELP " << name << '\n'
+-         << "# TYPE " << name << " gauge" << '\n'
++
++    auto metrics_name = GetMetricsName(name);
++
++    *_os << "# HELP " << metrics_name << '\n'
++         << "# TYPE " << metrics_name << " gauge" << '\n'
+          << name << " " << desc << '\n';
+     return true;
+ }
+diff --git a/src/brpc/builtin/prometheus_metrics_service.h b/src/brpc/builtin/prometheus_metrics_service.h
+index c844e1e7..541b395c 100644
+--- a/src/brpc/builtin/prometheus_metrics_service.h
++++ b/src/brpc/builtin/prometheus_metrics_service.h
+@@ -31,6 +31,7 @@ public:
+                         ::google::protobuf::Closure* done) override;
+ };
+ 
++butil::StringPiece GetMetricsName(const std::string& name);
+ int DumpPrometheusMetricsToIOBuf(butil::IOBuf* output);
+ 
+ } // namepace brpc
+diff --git a/test/brpc_prometheus_metrics_service_unittest.cpp b/test/brpc_prometheus_metrics_service_unittest.cpp
+new file mode 100644
+index 00000000..b5b0bc10
+--- /dev/null
++++ b/test/brpc_prometheus_metrics_service_unittest.cpp
+@@ -0,0 +1,42 @@
++// Licensed to the Apache Software Foundation (ASF) under one
++// or more contributor license agreements.  See the NOTICE file
++// distributed with this work for additional information
++// regarding copyright ownership.  The ASF licenses this file
++// to you under the Apache License, Version 2.0 (the
++// "License"); you may not use this file except in compliance
++// with the License.  You may obtain a copy of the License at
++//
++//   http://www.apache.org/licenses/LICENSE-2.0
++//
++// Unless required by applicable law or agreed to in writing,
++// software distributed under the License is distributed on an
++// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
++// KIND, either express or implied.  See the License for the
++// specific language governing permissions and limitations
++// under the License.
++
++// Date: 2023/05/06 15:10:00
++
++#include <gtest/gtest.h>
++
++#include "butil/strings/string_piece.h"
++#include "butil/iobuf.h"
++#include "brpc/builtin/prometheus_metrics_service.h"
++
++namespace {
++
++class PrometheusMetricsDumperTest : public testing::Test {
++protected:
++    void SetUp() {}
++    void TearDown() {}
++};
++
++TEST_F(PrometheusMetricsDumperTest, GetMetricsName) {
++  EXPECT_EQ("", brpc::GetMetricsName(""));
++
++  EXPECT_EQ("commit_count", brpc::GetMetricsName("commit_count"));
++
++  EXPECT_EQ("commit_count", brpc::GetMetricsName("commit_count{region=\"1000\"}"));
++}
++
++}
+-- 
+2.39.3
+

From ce55fc4c444cf35d25c02a66e62bfd22d91ffab0 Mon Sep 17 00:00:00 2001
From: morrySnow <101034200+morrySnow@users.noreply.github.com>
Date: Fri, 26 Apr 2024 14:31:49 +0800
Subject: [PATCH 051/163] [fix](Nereids) support not in predicate for delete
 command (#34153)

---
 .../doris/nereids/trees/plans/commands/DeleteFromCommand.java   | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DeleteFromCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DeleteFromCommand.java
index 7fc4657a17fec3..6d339a21d943cb 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DeleteFromCommand.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DeleteFromCommand.java
@@ -326,6 +326,8 @@ private void checkPredicate(Expression predicate) {
                 checkIsNull((IsNull) child);
             } else if (child instanceof ComparisonPredicate) {
                 checkComparisonPredicate((ComparisonPredicate) child);
+            } else if (child instanceof InPredicate) {
+                checkInPredicate((InPredicate) child);
             } else {
                 throw new AnalysisException("Where clause only supports compound predicate,"
                         + " binary predicate, is_null predicate or in predicate. But we meet "

From 7a26e07f3bba0a6f011cccdf6bdf72c87a928b38 Mon Sep 17 00:00:00 2001
From: Jerry Hu <mrhhsg@gmail.com>
Date: Fri, 26 Apr 2024 14:33:03 +0800
Subject: [PATCH 052/163] [fix](pipeline_x) Crc32HashPartitioner should use
 ShuffleChannelIds (#34147)

---
 be/src/pipeline/exec/exchange_sink_operator.cpp           | 8 ++++----
 .../pipeline/exec/partitioned_hash_join_probe_operator.h  | 4 ++--
 .../pipeline/exec/partitioned_hash_join_sink_operator.h   | 4 ++--
 .../local_exchange/local_exchange_sink_operator.h         | 4 ++--
 be/src/vec/runtime/partitioner.cpp                        | 1 -
 5 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/be/src/pipeline/exec/exchange_sink_operator.cpp b/be/src/pipeline/exec/exchange_sink_operator.cpp
index 79a6ee0e74881f..84381c6a8af746 100644
--- a/be/src/pipeline/exec/exchange_sink_operator.cpp
+++ b/be/src/pipeline/exec/exchange_sink_operator.cpp
@@ -221,8 +221,8 @@ Status ExchangeSinkLocalState::open(RuntimeState* state) {
     }
     if (_part_type == TPartitionType::HASH_PARTITIONED) {
         _partition_count = channels.size();
-        _partitioner.reset(
-                new vectorized::Crc32HashPartitioner<LocalExchangeChannelIds>(channels.size()));
+        _partitioner.reset(new vectorized::Crc32HashPartitioner<vectorized::ShuffleChannelIds>(
+                channels.size()));
         RETURN_IF_ERROR(_partitioner->init(p._texprs));
         RETURN_IF_ERROR(_partitioner->prepare(state, p._row_desc));
         _profile->add_info_string("Partitioner",
@@ -269,8 +269,8 @@ Status ExchangeSinkLocalState::open(RuntimeState* state) {
     } else if (_part_type == TPartitionType::TABLE_SINK_HASH_PARTITIONED) {
         _partition_count =
                 channels.size() * config::table_sink_partition_write_max_partition_nums_per_writer;
-        _partitioner.reset(
-                new vectorized::Crc32HashPartitioner<LocalExchangeChannelIds>(_partition_count));
+        _partitioner.reset(new vectorized::Crc32HashPartitioner<vectorized::ShuffleChannelIds>(
+                _partition_count));
         _partition_function.reset(new HashPartitionFunction(_partitioner.get()));
 
         scale_writer_partitioning_exchanger.reset(new vectorized::ScaleWriterPartitioningExchanger<
diff --git a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h
index 5bdc5278ffcf5e..3702c2e1a6bd7a 100644
--- a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h
+++ b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h
@@ -24,16 +24,16 @@
 #include "pipeline/exec/hashjoin_build_sink.h"
 #include "pipeline/exec/hashjoin_probe_operator.h"
 #include "pipeline/exec/join_build_sink_operator.h"
-#include "pipeline/pipeline_x/local_exchange/local_exchange_sink_operator.h" // LocalExchangeChannelIds
 #include "pipeline/pipeline_x/operator.h"
 #include "vec/runtime/partitioner.h"
+#include "vec/sink/vdata_stream_sender.h" // ShuffleChannelIds
 
 namespace doris {
 class RuntimeState;
 
 namespace pipeline {
 
-using PartitionerType = vectorized::Crc32HashPartitioner<LocalExchangeChannelIds>;
+using PartitionerType = vectorized::Crc32HashPartitioner<vectorized::ShuffleChannelIds>;
 
 class PartitionedHashJoinProbeOperatorX;
 
diff --git a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h
index 3f29e3093b6df0..68c6b970163f24 100644
--- a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h
+++ b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h
@@ -24,9 +24,9 @@
 #include "pipeline/exec/hashjoin_build_sink.h"
 #include "pipeline/exec/hashjoin_probe_operator.h"
 #include "pipeline/exec/join_build_sink_operator.h"
-#include "pipeline/pipeline_x/local_exchange/local_exchange_sink_operator.h" // LocalExchangeChannelIds
 #include "pipeline/pipeline_x/operator.h"
 #include "vec/runtime/partitioner.h"
+#include "vec/sink/vdata_stream_sender.h" // ShuffleChannelIds
 
 namespace doris {
 class ExecNode;
@@ -34,7 +34,7 @@ class RuntimeState;
 
 namespace pipeline {
 
-using PartitionerType = vectorized::Crc32HashPartitioner<LocalExchangeChannelIds>;
+using PartitionerType = vectorized::Crc32HashPartitioner<vectorized::ShuffleChannelIds>;
 
 class PartitionedHashJoinSinkOperatorX;
 
diff --git a/be/src/pipeline/pipeline_x/local_exchange/local_exchange_sink_operator.h b/be/src/pipeline/pipeline_x/local_exchange/local_exchange_sink_operator.h
index b3ecf29736fda6..db6662a221ad8e 100644
--- a/be/src/pipeline/pipeline_x/local_exchange/local_exchange_sink_operator.h
+++ b/be/src/pipeline/pipeline_x/local_exchange/local_exchange_sink_operator.h
@@ -114,8 +114,8 @@ class LocalExchangeSinkOperatorX final : public DataSinkOperatorX<LocalExchangeS
                     _shuffle_idx_to_instance_idx[i] = {i, i};
                 }
             }
-            _partitioner.reset(
-                    new vectorized::Crc32HashPartitioner<LocalExchangeChannelIds>(_num_partitions));
+            _partitioner.reset(new vectorized::Crc32HashPartitioner<vectorized::ShuffleChannelIds>(
+                    _num_partitions));
             RETURN_IF_ERROR(_partitioner->init(_texprs));
         } else if (_type == ExchangeType::BUCKET_HASH_SHUFFLE) {
             _partitioner.reset(new vectorized::Crc32HashPartitioner<vectorized::ShuffleChannelIds>(
diff --git a/be/src/vec/runtime/partitioner.cpp b/be/src/vec/runtime/partitioner.cpp
index db40610723cdb6..fadf6d73b95f76 100644
--- a/be/src/vec/runtime/partitioner.cpp
+++ b/be/src/vec/runtime/partitioner.cpp
@@ -103,6 +103,5 @@ template class Partitioner<size_t, ShuffleChannelIds>;
 template class XXHashPartitioner<ShuffleChannelIds>;
 template class Partitioner<uint32_t, ShuffleChannelIds>;
 template class Crc32HashPartitioner<ShuffleChannelIds>;
-template class Crc32HashPartitioner<pipeline::LocalExchangeChannelIds>;
 
 } // namespace doris::vectorized

From 464620431d69e1959287469119e50752c16261eb Mon Sep 17 00:00:00 2001
From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com>
Date: Fri, 26 Apr 2024 16:33:40 +0800
Subject: [PATCH 053/163] Improve analyze timeout. (#33836)

---
 .../java/org/apache/doris/statistics/AnalysisManager.java  | 4 ++--
 .../org/apache/doris/statistics/ExternalAnalysisTask.java  | 3 +++
 .../java/org/apache/doris/statistics/JdbcAnalysisTask.java | 3 +++
 .../java/org/apache/doris/statistics/OlapAnalysisTask.java | 3 +++
 .../hive/test_hive_statistic_timeout.groovy                | 7 ++++---
 5 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index 03314fe7748a13..b697c6f8327c64 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -867,7 +867,7 @@ public void execute(ThreadPoolExecutor executor) {
                 executor.submit(() -> {
                     try {
                         if (cancelled) {
-                            errorMessages.add("Query timeout or user cancelled."
+                            errorMessages.add("Query Timeout or user Cancelled."
                                     + "Could set analyze_timeout to a bigger value.");
                             return;
                         }
@@ -890,7 +890,7 @@ public void execute(ThreadPoolExecutor executor) {
             }
             if (!colNames.isEmpty()) {
                 if (cancelled) {
-                    throw new RuntimeException("Cancelled");
+                    throw new RuntimeException("User Cancelled or Timeout.");
                 }
                 throw new RuntimeException("Failed to analyze following columns:[" + String.join(",", colNames)
                         + "] Reasons: " + String.join(",", errorMessages));
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java
index 7d3c9af254800d..d59133017ab128 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java
@@ -56,6 +56,9 @@ public ExternalAnalysisTask(AnalysisInfo info) {
     }
 
     public void doExecute() throws Exception {
+        if (killed) {
+            return;
+        }
         if (isTableLevelTask) {
             getTableStats();
         } else {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java
index a318bd4594e5f6..4562d4336e8aee 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java
@@ -60,6 +60,9 @@ public JdbcAnalysisTask(AnalysisInfo info) {
     }
 
     public void doExecute() throws Exception {
+        if (killed) {
+            return;
+        }
         if (isTableLevelTask) {
             getTableStats();
         } else {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index ce7982d4f1ad58..4b651d322ba76a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -64,6 +64,9 @@ public OlapAnalysisTask(AnalysisInfo info) {
     }
 
     public void doExecute() throws Exception {
+        if (killed) {
+            return;
+        }
         // For empty table, write empty result directly, no need to run SQL to collect stats.
         if (info.rowCount == 0 && tableSample != null) {
             StatsId statsId = new StatsId(concatColumnStatsId(), info.catalogId, info.dbId,
diff --git a/regression-test/suites/external_table_p2/hive/test_hive_statistic_timeout.groovy b/regression-test/suites/external_table_p2/hive/test_hive_statistic_timeout.groovy
index a3329f87712802..a52a230478851b 100644
--- a/regression-test/suites/external_table_p2/hive/test_hive_statistic_timeout.groovy
+++ b/regression-test/suites/external_table_p2/hive/test_hive_statistic_timeout.groovy
@@ -34,9 +34,10 @@ suite("test_hive_statistic_timeout", "p2,external,hive,external_remote,external_
         sql """use ${catalog_name}.tpch_1000_parquet"""
         sql """set global analyze_timeout=1"""
         try {
-            sql """analyze table part (p_partkey, p_container, p_type, p_retailprice) with sync with full;"""
-        } catch (Exception e) {
-            assertTrue(e.getMessage().contains("Cancelled"));
+            test {
+                sql """analyze table part (p_partkey, p_container, p_type, p_retailprice) with sync with full;"""
+                exception "Timeout"
+            }
         } finally {
             sql """set global analyze_timeout=43200"""
         }

From 4e125b0d567adccd9dde1fc7bd0d805a93c85e89 Mon Sep 17 00:00:00 2001
From: Gabriel <gabrielleebuaa@gmail.com>
Date: Fri, 26 Apr 2024 17:18:42 +0800
Subject: [PATCH 054/163] [refactor](cleanup) Clean up pipeline engine (PART I)
 (#33945)

---
 .../exec/aggregation_sink_operator.cpp        |    4 -
 .../pipeline/exec/aggregation_sink_operator.h |   23 +-
 .../exec/aggregation_source_operator.cpp      |    3 -
 .../exec/aggregation_source_operator.h        |   20 -
 .../pipeline/exec/analytic_sink_operator.cpp  |    2 -
 be/src/pipeline/exec/analytic_sink_operator.h |    2 -
 .../exec/analytic_source_operator.cpp         |    2 -
 .../pipeline/exec/analytic_source_operator.h  |   18 -
 .../exec/assert_num_rows_operator.cpp         |    4 -
 .../pipeline/exec/assert_num_rows_operator.h  |   22 +-
 be/src/pipeline/exec/const_value_operator.h   |   52 -
 be/src/pipeline/exec/datagen_operator.cpp     |   14 -
 be/src/pipeline/exec/datagen_operator.h       |   19 -
 ...ct_streaming_aggregation_sink_operator.cpp |  100 -
 ...inct_streaming_aggregation_sink_operator.h |   79 -
 ..._streaming_aggregation_source_operator.cpp |   91 -
 ...ct_streaming_aggregation_source_operator.h |   67 -
 be/src/pipeline/exec/empty_set_operator.cpp   |    2 -
 be/src/pipeline/exec/empty_set_operator.h     |   24 +-
 .../pipeline/exec/empty_source_operator.cpp   |   27 -
 be/src/pipeline/exec/empty_source_operator.h  |   89 -
 be/src/pipeline/exec/es_scan_operator.h       |    2 -
 .../pipeline/exec/exchange_sink_operator.cpp  |   53 -
 be/src/pipeline/exec/exchange_sink_operator.h |   30 -
 .../exec/exchange_source_operator.cpp         |   10 -
 .../pipeline/exec/exchange_source_operator.h  |   17 -
 be/src/pipeline/exec/file_scan_operator.h     |    2 -
 .../exec/group_commit_block_sink_operator.cpp |    4 -
 .../exec/group_commit_block_sink_operator.h   |   25 +-
 be/src/pipeline/exec/hashjoin_build_sink.cpp  |    2 -
 be/src/pipeline/exec/hashjoin_build_sink.h    |    9 +-
 .../pipeline/exec/hashjoin_probe_operator.cpp |    8 +-
 .../pipeline/exec/hashjoin_probe_operator.h   |   17 -
 .../exec/hive_table_sink_operator.cpp         |    4 -
 .../pipeline/exec/hive_table_sink_operator.h  |   24 +-
 be/src/pipeline/exec/jdbc_scan_operator.h     |    1 -
 .../pipeline/exec/join_build_sink_operator.h  |    8 +-
 be/src/pipeline/exec/join_probe_operator.h    |    8 +-
 be/src/pipeline/exec/meta_scan_operator.h     |    2 -
 .../exec/multi_cast_data_stream_sink.cpp      |    4 -
 .../exec/multi_cast_data_stream_sink.h        |   19 -
 .../exec/multi_cast_data_stream_source.cpp    |   98 -
 .../exec/multi_cast_data_stream_source.h      |    1 -
 be/src/pipeline/exec/mysql_scan_operator.cpp  |   37 -
 be/src/pipeline/exec/mysql_scan_operator.h    |   43 -
 .../exec/nested_loop_join_build_operator.cpp  |    3 +-
 .../exec/nested_loop_join_build_operator.h    |   24 +-
 .../exec/nested_loop_join_probe_operator.cpp  |   13 -
 .../exec/nested_loop_join_probe_operator.h    |   21 -
 be/src/pipeline/exec/olap_scan_operator.h     |    2 -
 .../exec/olap_table_sink_operator.cpp         |    8 -
 .../pipeline/exec/olap_table_sink_operator.h  |   24 +-
 .../exec/olap_table_sink_v2_operator.cpp      |    4 -
 .../exec/olap_table_sink_v2_operator.h        |    7 +-
 be/src/pipeline/exec/operator.h               |   11 -
 .../exec/partition_sort_sink_operator.cpp     |    4 -
 .../exec/partition_sort_sink_operator.h       |   28 +-
 .../exec/partition_sort_source_operator.cpp   |    5 -
 .../exec/partition_sort_source_operator.h     |   20 -
 .../partitioned_aggregation_source_operator.h |    1 -
 .../partitioned_hash_join_sink_operator.h     |    1 -
 be/src/pipeline/exec/repeat_operator.cpp      |   13 -
 be/src/pipeline/exec/repeat_operator.h        |   18 -
 .../exec/result_file_sink_operator.cpp        |   16 -
 .../pipeline/exec/result_file_sink_operator.h |   23 +-
 be/src/pipeline/exec/result_sink_operator.cpp |   19 -
 be/src/pipeline/exec/result_sink_operator.h   |   16 -
 be/src/pipeline/exec/scan_operator.cpp        |   33 -
 be/src/pipeline/exec/scan_operator.h          |   21 -
 be/src/pipeline/exec/schema_scan_operator.cpp |   13 -
 be/src/pipeline/exec/schema_scan_operator.h   |   19 -
 be/src/pipeline/exec/select_operator.cpp      |   28 -
 be/src/pipeline/exec/select_operator.h        |   21 +-
 .../pipeline/exec/set_probe_sink_operator.cpp |   37 -
 .../pipeline/exec/set_probe_sink_operator.h   |   33 -
 be/src/pipeline/exec/set_sink_operator.cpp    |   25 -
 be/src/pipeline/exec/set_sink_operator.h      |   28 -
 be/src/pipeline/exec/set_source_operator.cpp  |   25 -
 be/src/pipeline/exec/set_source_operator.h    |   25 -
 be/src/pipeline/exec/sort_sink_operator.cpp   |    2 -
 be/src/pipeline/exec/sort_sink_operator.h     |   25 +-
 be/src/pipeline/exec/sort_source_operator.cpp |    2 -
 be/src/pipeline/exec/sort_source_operator.h   |   17 -
 .../streaming_aggregation_sink_operator.cpp   |   93 -
 .../streaming_aggregation_sink_operator.h     |   76 -
 .../streaming_aggregation_source_operator.cpp |   76 -
 .../streaming_aggregation_source_operator.h   |   64 -
 .../pipeline/exec/table_function_operator.cpp |   12 -
 .../pipeline/exec/table_function_operator.h   |   17 -
 be/src/pipeline/exec/table_sink_operator.h    |   49 -
 be/src/pipeline/exec/union_sink_operator.cpp  |   65 -
 be/src/pipeline/exec/union_sink_operator.h    |   34 -
 .../pipeline/exec/union_source_operator.cpp   |   73 -
 be/src/pipeline/exec/union_source_operator.h  |   35 -
 be/src/pipeline/pipeline.h                    |    2 +-
 be/src/pipeline/pipeline_fragment_context.cpp | 1688 ++++++++++++-----
 be/src/pipeline/pipeline_fragment_context.h   |  220 ++-
 be/src/pipeline/pipeline_task.cpp             |   34 +-
 be/src/pipeline/pipeline_task.h               |    2 -
 be/src/pipeline/pipeline_x/operator.cpp       |    2 -
 .../pipeline_x_fragment_context.cpp           | 1522 ---------------
 .../pipeline_x/pipeline_x_fragment_context.h  |  247 ---
 .../pipeline/pipeline_x/pipeline_x_task.cpp   |    2 +-
 be/src/pipeline/pipeline_x/pipeline_x_task.h  |    2 -
 be/src/pipeline/task_scheduler.cpp            |  218 +--
 be/src/pipeline/task_scheduler.h              |   33 +-
 be/src/runtime/exec_env.h                     |   10 -
 be/src/runtime/exec_env_init.cpp              |   11 +-
 be/src/runtime/fragment_mgr.cpp               |    8 +-
 be/src/runtime/fragment_mgr.h                 |    3 +-
 be/src/runtime/query_context.cpp              |   11 +-
 be/src/runtime/runtime_state.cpp              |    2 +-
 be/src/runtime/runtime_state.h                |    6 +-
 .../runtime/workload_group/workload_group.cpp |    5 +-
 114 files changed, 1408 insertions(+), 4941 deletions(-)
 delete mode 100644 be/src/pipeline/exec/const_value_operator.h
 delete mode 100644 be/src/pipeline/exec/distinct_streaming_aggregation_sink_operator.cpp
 delete mode 100644 be/src/pipeline/exec/distinct_streaming_aggregation_sink_operator.h
 delete mode 100644 be/src/pipeline/exec/distinct_streaming_aggregation_source_operator.cpp
 delete mode 100644 be/src/pipeline/exec/distinct_streaming_aggregation_source_operator.h
 delete mode 100644 be/src/pipeline/exec/empty_source_operator.cpp
 delete mode 100644 be/src/pipeline/exec/empty_source_operator.h
 delete mode 100644 be/src/pipeline/exec/mysql_scan_operator.cpp
 delete mode 100644 be/src/pipeline/exec/mysql_scan_operator.h
 delete mode 100644 be/src/pipeline/exec/select_operator.cpp
 delete mode 100644 be/src/pipeline/exec/streaming_aggregation_sink_operator.cpp
 delete mode 100644 be/src/pipeline/exec/streaming_aggregation_sink_operator.h
 delete mode 100644 be/src/pipeline/exec/streaming_aggregation_source_operator.cpp
 delete mode 100644 be/src/pipeline/exec/streaming_aggregation_source_operator.h
 delete mode 100644 be/src/pipeline/exec/table_sink_operator.h
 delete mode 100644 be/src/pipeline/pipeline_x/pipeline_x_fragment_context.cpp
 delete mode 100644 be/src/pipeline/pipeline_x/pipeline_x_fragment_context.h

diff --git a/be/src/pipeline/exec/aggregation_sink_operator.cpp b/be/src/pipeline/exec/aggregation_sink_operator.cpp
index 6c9d27e2a2b063..d947dd55c36194 100644
--- a/be/src/pipeline/exec/aggregation_sink_operator.cpp
+++ b/be/src/pipeline/exec/aggregation_sink_operator.cpp
@@ -19,16 +19,12 @@
 
 #include <string>
 
-#include "pipeline/exec/distinct_streaming_aggregation_sink_operator.h"
 #include "pipeline/exec/operator.h"
-#include "pipeline/exec/streaming_aggregation_sink_operator.h"
 #include "runtime/primitive_type.h"
 #include "vec/common/hash_table/hash.h"
 
 namespace doris::pipeline {
 
-OPERATOR_CODE_GENERATOR(AggSinkOperator, StreamingOperator)
-
 /// The minimum reduction factor (input rows divided by output rows) to grow hash tables
 /// in a streaming preaggregation, given that the hash tables are currently the given
 /// size or above. The sizes roughly correspond to hash table sizes where the bucket
diff --git a/be/src/pipeline/exec/aggregation_sink_operator.h b/be/src/pipeline/exec/aggregation_sink_operator.h
index 0c34acfd7dfe84..ee31db3691759d 100644
--- a/be/src/pipeline/exec/aggregation_sink_operator.h
+++ b/be/src/pipeline/exec/aggregation_sink_operator.h
@@ -23,26 +23,8 @@
 #include "pipeline/pipeline_x/operator.h"
 #include "runtime/block_spill_manager.h"
 #include "runtime/exec_env.h"
-#include "vec/exec/vaggregation_node.h"
 
-namespace doris {
-class ExecNode;
-
-namespace pipeline {
-
-class AggSinkOperatorBuilder final : public OperatorBuilder<vectorized::AggregationNode> {
-public:
-    AggSinkOperatorBuilder(int32_t, ExecNode*);
-
-    OperatorPtr build_operator() override;
-    bool is_sink() const override { return true; }
-};
-
-class AggSinkOperator final : public StreamingOperator<vectorized::AggregationNode> {
-public:
-    AggSinkOperator(OperatorBuilderBase* operator_builder, ExecNode* node);
-    bool can_write() override { return true; }
-};
+namespace doris::pipeline {
 
 class AggSinkOperatorX;
 
@@ -217,5 +199,4 @@ class AggSinkOperatorX final : public DataSinkOperatorX<AggSinkLocalState> {
     RowDescriptor _agg_fn_output_row_descriptor;
 };
 
-} // namespace pipeline
-} // namespace doris
+} // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/aggregation_source_operator.cpp b/be/src/pipeline/exec/aggregation_source_operator.cpp
index cff6f9fec42fe9..f53d96d71aa2ae 100644
--- a/be/src/pipeline/exec/aggregation_source_operator.cpp
+++ b/be/src/pipeline/exec/aggregation_source_operator.cpp
@@ -22,13 +22,10 @@
 
 #include "common/exception.h"
 #include "pipeline/exec/operator.h"
-#include "pipeline/exec/streaming_aggregation_source_operator.h"
 #include "vec//utils/util.hpp"
 
 namespace doris::pipeline {
 
-OPERATOR_CODE_GENERATOR(AggSourceOperator, SourceOperator)
-
 AggLocalState::AggLocalState(RuntimeState* state, OperatorXBase* parent)
         : Base(state, parent),
           _get_results_timer(nullptr),
diff --git a/be/src/pipeline/exec/aggregation_source_operator.h b/be/src/pipeline/exec/aggregation_source_operator.h
index 1d1f564d41aa62..d43ec7db0b550d 100644
--- a/be/src/pipeline/exec/aggregation_source_operator.h
+++ b/be/src/pipeline/exec/aggregation_source_operator.h
@@ -21,32 +21,12 @@
 #include "common/status.h"
 #include "operator.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/exec/vaggregation_node.h"
 
 namespace doris {
-class ExecNode;
 class RuntimeState;
 
 namespace pipeline {
 
-class AggSourceOperatorBuilder final : public OperatorBuilder<vectorized::AggregationNode> {
-public:
-    AggSourceOperatorBuilder(int32_t, ExecNode*);
-
-    bool is_source() const override { return true; }
-
-    OperatorPtr build_operator() override;
-};
-
-class AggSourceOperator final : public SourceOperator<vectorized::AggregationNode> {
-public:
-    AggSourceOperator(OperatorBuilderBase*, ExecNode*);
-    // if exec node split to: sink, source operator. the source operator
-    // should skip `alloc_resource()` function call, only sink operator
-    // call the function
-    Status open(RuntimeState*) override { return Status::OK(); }
-};
-
 class AggSourceOperatorX;
 
 class AggLocalState final : public PipelineXLocalState<AggSharedState> {
diff --git a/be/src/pipeline/exec/analytic_sink_operator.cpp b/be/src/pipeline/exec/analytic_sink_operator.cpp
index a1d3384edc6dde..12c4e7634e71a6 100644
--- a/be/src/pipeline/exec/analytic_sink_operator.cpp
+++ b/be/src/pipeline/exec/analytic_sink_operator.cpp
@@ -24,8 +24,6 @@
 
 namespace doris::pipeline {
 
-OPERATOR_CODE_GENERATOR(AnalyticSinkOperator, StreamingOperator)
-
 Status AnalyticSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo& info) {
     RETURN_IF_ERROR(PipelineXSinkLocalState<AnalyticSharedState>::init(state, info));
     SCOPED_TIMER(exec_time_counter());
diff --git a/be/src/pipeline/exec/analytic_sink_operator.h b/be/src/pipeline/exec/analytic_sink_operator.h
index 3ae4a7b5cff5ca..0098c108e53d4e 100644
--- a/be/src/pipeline/exec/analytic_sink_operator.h
+++ b/be/src/pipeline/exec/analytic_sink_operator.h
@@ -23,10 +23,8 @@
 #include "operator.h"
 #include "pipeline/pipeline_x/dependency.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/exec/vanalytic_eval_node.h"
 
 namespace doris {
-class ExecNode;
 
 namespace pipeline {
 class AnalyticSinkOperatorBuilder final : public OperatorBuilder<vectorized::VAnalyticEvalNode> {
diff --git a/be/src/pipeline/exec/analytic_source_operator.cpp b/be/src/pipeline/exec/analytic_source_operator.cpp
index f6658583d4657a..85995732fff3d4 100644
--- a/be/src/pipeline/exec/analytic_source_operator.cpp
+++ b/be/src/pipeline/exec/analytic_source_operator.cpp
@@ -24,8 +24,6 @@
 
 namespace doris::pipeline {
 
-OPERATOR_CODE_GENERATOR(AnalyticSourceOperator, SourceOperator)
-
 AnalyticLocalState::AnalyticLocalState(RuntimeState* state, OperatorXBase* parent)
         : PipelineXLocalState<AnalyticSharedState>(state, parent),
           _output_block_index(0),
diff --git a/be/src/pipeline/exec/analytic_source_operator.h b/be/src/pipeline/exec/analytic_source_operator.h
index 17a4d34ec739b3..075053a486600c 100644
--- a/be/src/pipeline/exec/analytic_source_operator.h
+++ b/be/src/pipeline/exec/analytic_source_operator.h
@@ -22,30 +22,12 @@
 #include "common/status.h"
 #include "operator.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/exec/vanalytic_eval_node.h"
 
 namespace doris {
-class ExecNode;
 class RuntimeState;
 
 namespace pipeline {
 
-class AnalyticSourceOperatorBuilder final : public OperatorBuilder<vectorized::VAnalyticEvalNode> {
-public:
-    AnalyticSourceOperatorBuilder(int32_t, ExecNode*);
-
-    bool is_source() const override { return true; }
-
-    OperatorPtr build_operator() override;
-};
-
-class AnalyticSourceOperator final : public SourceOperator<vectorized::VAnalyticEvalNode> {
-public:
-    AnalyticSourceOperator(OperatorBuilderBase*, ExecNode*);
-
-    Status open(RuntimeState*) override { return Status::OK(); }
-};
-
 class AnalyticSourceOperatorX;
 class AnalyticLocalState final : public PipelineXLocalState<AnalyticSharedState> {
 public:
diff --git a/be/src/pipeline/exec/assert_num_rows_operator.cpp b/be/src/pipeline/exec/assert_num_rows_operator.cpp
index ef0efd3f86bf2b..4a51002beff389 100644
--- a/be/src/pipeline/exec/assert_num_rows_operator.cpp
+++ b/be/src/pipeline/exec/assert_num_rows_operator.cpp
@@ -22,10 +22,6 @@
 
 namespace doris::pipeline {
 
-OperatorPtr AssertNumRowsOperatorBuilder::build_operator() {
-    return std::make_shared<AssertNumRowsOperator>(this, _node);
-}
-
 AssertNumRowsOperatorX::AssertNumRowsOperatorX(ObjectPool* pool, const TPlanNode& tnode,
                                                int operator_id, const DescriptorTbl& descs)
         : StreamingOperatorX<AssertNumRowsLocalState>(pool, tnode, operator_id, descs),
diff --git a/be/src/pipeline/exec/assert_num_rows_operator.h b/be/src/pipeline/exec/assert_num_rows_operator.h
index 4d6d835f815aa4..07a90d6b471144 100644
--- a/be/src/pipeline/exec/assert_num_rows_operator.h
+++ b/be/src/pipeline/exec/assert_num_rows_operator.h
@@ -19,25 +19,8 @@
 
 #include "operator.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/exec/vassert_num_rows_node.h"
 
-namespace doris {
-
-namespace pipeline {
-
-class AssertNumRowsOperatorBuilder final : public OperatorBuilder<vectorized::VAssertNumRowsNode> {
-public:
-    AssertNumRowsOperatorBuilder(int32_t id, ExecNode* node)
-            : OperatorBuilder(id, "AssertNumRowsOperator", node) {}
-
-    OperatorPtr build_operator() override;
-};
-
-class AssertNumRowsOperator final : public StreamingOperator<vectorized::VAssertNumRowsNode> {
-public:
-    AssertNumRowsOperator(OperatorBuilderBase* operator_builder, ExecNode* node)
-            : StreamingOperator(operator_builder, node) {}
-};
+namespace doris::pipeline {
 
 class AssertNumRowsLocalState final : public PipelineXLocalState<FakeSharedState> {
 public:
@@ -70,5 +53,4 @@ class AssertNumRowsOperatorX final : public StreamingOperatorX<AssertNumRowsLoca
     bool _should_convert_output_to_nullable;
 };
 
-} // namespace pipeline
-} // namespace doris
+} // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/const_value_operator.h b/be/src/pipeline/exec/const_value_operator.h
deleted file mode 100644
index e87edae494b078..00000000000000
--- a/be/src/pipeline/exec/const_value_operator.h
+++ /dev/null
@@ -1,52 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include "operator.h"
-
-namespace doris {
-namespace vectorized {
-class VUnionNode;
-} // namespace vectorized
-
-namespace pipeline {
-
-class ConstValueOperatorBuilder final : public OperatorBuilder<vectorized::VUnionNode> {
-public:
-    ConstValueOperatorBuilder(int32_t id, ExecNode* node)
-            : OperatorBuilder(id, "ConstValueOperator", node) {}
-
-    OperatorPtr build_operator() override;
-
-    bool is_source() const override { return true; }
-};
-
-class ConstValueOperator final : public SourceOperator<vectorized::VUnionNode> {
-public:
-    ConstValueOperator(OperatorBuilderBase* operator_builder, ExecNode* node)
-            : SourceOperator(operator_builder, node) {}
-
-    bool can_read() override { return true; }
-};
-
-OperatorPtr ConstValueOperatorBuilder::build_operator() {
-    return std::make_shared<ConstValueOperator>(this, _node);
-}
-
-} // namespace pipeline
-} // namespace doris
\ No newline at end of file
diff --git a/be/src/pipeline/exec/datagen_operator.cpp b/be/src/pipeline/exec/datagen_operator.cpp
index 4fbe21f71d5e32..95b284c94b4950 100644
--- a/be/src/pipeline/exec/datagen_operator.cpp
+++ b/be/src/pipeline/exec/datagen_operator.cpp
@@ -23,7 +23,6 @@
 #include "util/runtime_profile.h"
 #include "vec/exec/data_gen_functions/vdata_gen_function_inf.h"
 #include "vec/exec/data_gen_functions/vnumbers_tvf.h"
-#include "vec/exec/vdata_gen_scan_node.h"
 
 namespace doris {
 class RuntimeState;
@@ -31,19 +30,6 @@ class RuntimeState;
 
 namespace doris::pipeline {
 
-OPERATOR_CODE_GENERATOR(DataGenOperator, SourceOperator)
-
-Status DataGenOperator::open(RuntimeState* state) {
-    RETURN_IF_ERROR(SourceOperator::open(state));
-    return _node->open(state);
-}
-
-Status DataGenOperator::close(RuntimeState* state) {
-    RETURN_IF_ERROR(SourceOperator::close(state));
-    RETURN_IF_ERROR(_node->close(state));
-    return Status::OK();
-}
-
 DataGenSourceOperatorX::DataGenSourceOperatorX(ObjectPool* pool, const TPlanNode& tnode,
                                                int operator_id, const DescriptorTbl& descs)
         : OperatorX<DataGenLocalState>(pool, tnode, operator_id, descs),
diff --git a/be/src/pipeline/exec/datagen_operator.h b/be/src/pipeline/exec/datagen_operator.h
index af8eda179dac7c..edcc85c8342e12 100644
--- a/be/src/pipeline/exec/datagen_operator.h
+++ b/be/src/pipeline/exec/datagen_operator.h
@@ -25,30 +25,11 @@
 #include "vec/exec/vdata_gen_scan_node.h"
 
 namespace doris {
-class ExecNode;
 class RuntimeState;
 } // namespace doris
 
 namespace doris::pipeline {
 
-class DataGenOperatorBuilder : public OperatorBuilder<vectorized::VDataGenFunctionScanNode> {
-public:
-    DataGenOperatorBuilder(int32_t id, ExecNode* exec_node);
-    bool is_source() const override { return true; }
-    OperatorPtr build_operator() override;
-};
-
-class DataGenOperator : public SourceOperator<vectorized::VDataGenFunctionScanNode> {
-public:
-    DataGenOperator(OperatorBuilderBase* operator_builder, ExecNode* datagen_node);
-
-    bool can_read() override { return true; }
-
-    Status open(RuntimeState* state) override;
-
-    Status close(RuntimeState* state) override;
-};
-
 class DataGenSourceOperatorX;
 class DataGenLocalState final : public PipelineXLocalState<> {
 public:
diff --git a/be/src/pipeline/exec/distinct_streaming_aggregation_sink_operator.cpp b/be/src/pipeline/exec/distinct_streaming_aggregation_sink_operator.cpp
deleted file mode 100644
index 3cb18168dcb215..00000000000000
--- a/be/src/pipeline/exec/distinct_streaming_aggregation_sink_operator.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "distinct_streaming_aggregation_sink_operator.h"
-
-#include <gen_cpp/Metrics_types.h>
-
-#include <memory>
-#include <utility>
-
-#include "common/compiler_util.h" // IWYU pragma: keep
-#include "pipeline/exec/data_queue.h"
-#include "pipeline/exec/operator.h"
-#include "vec/exec/distinct_vaggregation_node.h"
-#include "vec/exec/vaggregation_node.h"
-
-namespace doris {
-class ExecNode;
-class RuntimeState;
-} // namespace doris
-
-namespace doris::pipeline {
-
-DistinctStreamingAggSinkOperator::DistinctStreamingAggSinkOperator(
-        OperatorBuilderBase* operator_builder, ExecNode* agg_node, std::shared_ptr<DataQueue> queue)
-        : StreamingOperator(operator_builder, agg_node), _data_queue(std::move(queue)) {}
-
-bool DistinctStreamingAggSinkOperator::can_write() {
-    // sink and source in diff threads
-    return _data_queue->has_enough_space_to_push();
-}
-
-Status DistinctStreamingAggSinkOperator::sink(RuntimeState* state, vectorized::Block* in_block,
-                                              SourceState source_state) {
-    if (in_block && in_block->rows() > 0) {
-        if (_output_block == nullptr) {
-            _output_block = _data_queue->get_free_block();
-        }
-        RETURN_IF_ERROR(
-                _node->_distinct_pre_agg_with_serialized_key(in_block, _output_block.get()));
-        bool stop_emplace_flag = _node->is_stop_emplace_flag();
-        // get enough data or reached limit rows, need push block to queue
-        if (!stop_emplace_flag && _node->limit() != -1 &&
-            (_output_block->rows() + _output_distinct_rows) >= _node->limit()) {
-            auto limit_rows = _node->limit() - _output_distinct_rows;
-            _output_block->set_num_rows(limit_rows);
-            _output_distinct_rows += limit_rows;
-            _data_queue->push_block(std::move(_output_block));
-        } else if (stop_emplace_flag || _output_block->rows() >= state->batch_size()) {
-            if (!stop_emplace_flag) { // if stop_emplace_flag = true, will be return rows directly, not get distinct
-                _output_distinct_rows += _output_block->rows();
-            }
-            _data_queue->push_block(std::move(_output_block));
-        }
-    }
-
-    // reach limit or source finish
-    if ((UNLIKELY(source_state == SourceState::FINISHED)) || reached_limited_rows()) {
-        if (_output_block != nullptr) { //maybe the last block with eos
-            _output_distinct_rows += _output_block->rows();
-            _data_queue->push_block(std::move(_output_block));
-        }
-        _data_queue->set_finish();
-        return Status::Error<ErrorCode::END_OF_FILE>("");
-    }
-    return Status::OK();
-}
-
-Status DistinctStreamingAggSinkOperator::close(RuntimeState* state) {
-    if (_data_queue && !_data_queue->is_finish()) {
-        // finish should be set, if not set here means error.
-        _data_queue->set_canceled();
-    }
-    return StreamingOperator::close(state);
-}
-
-DistinctStreamingAggSinkOperatorBuilder::DistinctStreamingAggSinkOperatorBuilder(
-        int32_t id, ExecNode* exec_node, std::shared_ptr<DataQueue> queue)
-        : OperatorBuilder(id, "DistinctStreamingAggSinkOperator", exec_node),
-          _data_queue(std::move(queue)) {}
-
-OperatorPtr DistinctStreamingAggSinkOperatorBuilder::build_operator() {
-    return std::make_shared<DistinctStreamingAggSinkOperator>(this, _node, _data_queue);
-}
-
-} // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/distinct_streaming_aggregation_sink_operator.h b/be/src/pipeline/exec/distinct_streaming_aggregation_sink_operator.h
deleted file mode 100644
index c872a2b299e14c..00000000000000
--- a/be/src/pipeline/exec/distinct_streaming_aggregation_sink_operator.h
+++ /dev/null
@@ -1,79 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <stdint.h>
-
-#include <cstdint>
-#include <memory>
-
-#include "aggregation_sink_operator.h"
-#include "common/status.h"
-#include "operator.h"
-#include "pipeline/exec/aggregation_sink_operator.h"
-#include "pipeline/exec/aggregation_source_operator.h"
-#include "util/runtime_profile.h"
-#include "vec/core/block.h"
-#include "vec/exec/distinct_vaggregation_node.h"
-#include "vec/exec/vaggregation_node.h"
-
-namespace doris {
-class ExecNode;
-class RuntimeState;
-
-namespace pipeline {
-class DataQueue;
-
-class DistinctStreamingAggSinkOperatorBuilder final
-        : public OperatorBuilder<vectorized::DistinctAggregationNode> {
-public:
-    DistinctStreamingAggSinkOperatorBuilder(int32_t, ExecNode*, std::shared_ptr<DataQueue>);
-
-    OperatorPtr build_operator() override;
-
-    bool is_sink() const override { return true; }
-    bool is_source() const override { return false; }
-
-private:
-    std::shared_ptr<DataQueue> _data_queue;
-};
-
-class DistinctStreamingAggSinkOperator final
-        : public StreamingOperator<vectorized::DistinctAggregationNode> {
-public:
-    DistinctStreamingAggSinkOperator(OperatorBuilderBase* operator_builder, ExecNode*,
-                                     std::shared_ptr<DataQueue>);
-
-    Status sink(RuntimeState* state, vectorized::Block* block, SourceState source_state) override;
-
-    bool can_write() override;
-
-    Status close(RuntimeState* state) override;
-
-    bool reached_limited_rows() {
-        return _node->limit() != -1 && _output_distinct_rows >= _node->limit();
-    }
-
-private:
-    int64_t _output_distinct_rows = 0;
-    std::shared_ptr<DataQueue> _data_queue;
-    std::unique_ptr<vectorized::Block> _output_block = vectorized::Block::create_unique();
-};
-
-} // namespace pipeline
-} // namespace doris
diff --git a/be/src/pipeline/exec/distinct_streaming_aggregation_source_operator.cpp b/be/src/pipeline/exec/distinct_streaming_aggregation_source_operator.cpp
deleted file mode 100644
index 5ab8bd30bc845f..00000000000000
--- a/be/src/pipeline/exec/distinct_streaming_aggregation_source_operator.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "distinct_streaming_aggregation_source_operator.h"
-
-#include <utility>
-
-#include "pipeline/exec/data_queue.h"
-#include "pipeline/exec/operator.h"
-#include "runtime/descriptors.h"
-#include "util/runtime_profile.h"
-#include "vec/core/block.h"
-#include "vec/exec/distinct_vaggregation_node.h"
-#include "vec/exec/vaggregation_node.h"
-
-namespace doris {
-class ExecNode;
-class RuntimeState;
-
-namespace pipeline {
-DistinctStreamingAggSourceOperator::DistinctStreamingAggSourceOperator(
-        OperatorBuilderBase* templ, ExecNode* node, std::shared_ptr<DataQueue> queue)
-        : SourceOperator(templ, node), _data_queue(std::move(queue)) {}
-
-bool DistinctStreamingAggSourceOperator::can_read() {
-    return _data_queue->has_data_or_finished();
-}
-
-Status DistinctStreamingAggSourceOperator::pull_data(RuntimeState* state, vectorized::Block* block,
-                                                     bool* eos) {
-    std::unique_ptr<vectorized::Block> agg_block;
-    RETURN_IF_ERROR(_data_queue->get_block_from_queue(&agg_block));
-    if (agg_block != nullptr) {
-        block->swap(*agg_block);
-        agg_block->clear_column_data(block->columns());
-        _data_queue->push_free_block(std::move(agg_block));
-    }
-    if (_data_queue->data_exhausted()) { //the sink is eos or reached limit
-        *eos = true;
-    }
-    _node->_make_nullable_output_key(block);
-    if (_node->is_streaming_preagg() == false) {
-        // dispose the having clause, should not be execute in prestreaming agg
-        RETURN_IF_ERROR(vectorized::VExprContext::filter_block(_node->get_conjuncts(), block,
-                                                               block->columns()));
-    }
-
-    _node->add_num_rows_returned(block->rows());
-    return Status::OK();
-}
-
-Status DistinctStreamingAggSourceOperator::get_block(RuntimeState* state, vectorized::Block* block,
-                                                     SourceState& source_state) {
-    bool eos = false;
-    RETURN_IF_ERROR(_node->get_next_after_projects(
-            state, block, &eos,
-            std::bind(&DistinctStreamingAggSourceOperator::pull_data, this, std::placeholders::_1,
-                      std::placeholders::_2, std::placeholders::_3)));
-    if (UNLIKELY(eos)) {
-        source_state = SourceState::FINISHED;
-    } else {
-        source_state = SourceState::DEPEND_ON_SOURCE;
-    }
-    return Status::OK();
-}
-
-DistinctStreamingAggSourceOperatorBuilder::DistinctStreamingAggSourceOperatorBuilder(
-        int32_t id, ExecNode* exec_node, std::shared_ptr<DataQueue> queue)
-        : OperatorBuilder(id, "DistinctStreamingAggSourceOperator", exec_node),
-          _data_queue(std::move(queue)) {}
-
-OperatorPtr DistinctStreamingAggSourceOperatorBuilder::build_operator() {
-    return std::make_shared<DistinctStreamingAggSourceOperator>(this, _node, _data_queue);
-}
-
-} // namespace pipeline
-} // namespace doris
diff --git a/be/src/pipeline/exec/distinct_streaming_aggregation_source_operator.h b/be/src/pipeline/exec/distinct_streaming_aggregation_source_operator.h
deleted file mode 100644
index e8fd21310bbd7b..00000000000000
--- a/be/src/pipeline/exec/distinct_streaming_aggregation_source_operator.h
+++ /dev/null
@@ -1,67 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-#pragma once
-
-#include <stdint.h>
-
-#include <cstdint>
-#include <memory>
-
-#include "common/status.h"
-#include "operator.h"
-#include "pipeline/exec/aggregation_source_operator.h"
-#include "vec/exec/distinct_vaggregation_node.h"
-#include "vec/exec/vaggregation_node.h"
-
-namespace doris {
-class ExecNode;
-class RuntimeState;
-
-namespace vectorized {
-class Block;
-} // namespace vectorized
-namespace pipeline {
-class DataQueue;
-
-class DistinctStreamingAggSourceOperatorBuilder final
-        : public OperatorBuilder<vectorized::DistinctAggregationNode> {
-public:
-    DistinctStreamingAggSourceOperatorBuilder(int32_t, ExecNode*, std::shared_ptr<DataQueue>);
-
-    bool is_source() const override { return true; }
-
-    OperatorPtr build_operator() override;
-
-private:
-    std::shared_ptr<DataQueue> _data_queue;
-};
-
-class DistinctStreamingAggSourceOperator final
-        : public SourceOperator<vectorized::DistinctAggregationNode> {
-public:
-    DistinctStreamingAggSourceOperator(OperatorBuilderBase*, ExecNode*, std::shared_ptr<DataQueue>);
-    bool can_read() override;
-    Status get_block(RuntimeState*, vectorized::Block*, SourceState& source_state) override;
-    Status open(RuntimeState*) override { return Status::OK(); }
-    Status pull_data(RuntimeState* state, vectorized::Block* output_block, bool* eos);
-
-private:
-    std::shared_ptr<DataQueue> _data_queue;
-};
-
-} // namespace pipeline
-} // namespace doris
diff --git a/be/src/pipeline/exec/empty_set_operator.cpp b/be/src/pipeline/exec/empty_set_operator.cpp
index 02dc80258031b6..7233e46dfd1e52 100644
--- a/be/src/pipeline/exec/empty_set_operator.cpp
+++ b/be/src/pipeline/exec/empty_set_operator.cpp
@@ -23,8 +23,6 @@
 
 namespace doris::pipeline {
 
-OPERATOR_CODE_GENERATOR(EmptySetSourceOperator, SourceOperator)
-
 Status EmptySetSourceOperatorX::get_block(RuntimeState* state, vectorized::Block* block,
                                           bool* eos) {
     *eos = true;
diff --git a/be/src/pipeline/exec/empty_set_operator.h b/be/src/pipeline/exec/empty_set_operator.h
index b65139fb985a5f..3d5e9a72cf2bfe 100644
--- a/be/src/pipeline/exec/empty_set_operator.h
+++ b/be/src/pipeline/exec/empty_set_operator.h
@@ -21,27 +21,8 @@
 
 #include "operator.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/exec/vempty_set_node.h"
 
-namespace doris {
-class ExecNode;
-
-namespace pipeline {
-
-class EmptySetSourceOperatorBuilder final : public OperatorBuilder<vectorized::VEmptySetNode> {
-public:
-    EmptySetSourceOperatorBuilder(int32_t id, ExecNode* empty_set_node);
-
-    bool is_source() const override { return true; }
-
-    OperatorPtr build_operator() override;
-};
-
-class EmptySetSourceOperator final : public SourceOperator<vectorized::VEmptySetNode> {
-public:
-    EmptySetSourceOperator(OperatorBuilderBase* operator_builder, ExecNode* empty_set_node);
-    bool can_read() override { return true; }
-};
+namespace doris::pipeline {
 
 class EmptySetLocalState final : public PipelineXLocalState<FakeSharedState> {
 public:
@@ -63,5 +44,4 @@ class EmptySetSourceOperatorX final : public OperatorX<EmptySetLocalState> {
     [[nodiscard]] bool is_source() const override { return true; }
 };
 
-} // namespace pipeline
-} // namespace doris
+} // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/empty_source_operator.cpp b/be/src/pipeline/exec/empty_source_operator.cpp
deleted file mode 100644
index 78f5c946621e3b..00000000000000
--- a/be/src/pipeline/exec/empty_source_operator.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "empty_source_operator.h"
-
-#include "pipeline/exec/operator.h"
-
-namespace doris::pipeline {
-OperatorPtr EmptySourceOperatorBuilder::build_operator() {
-    return std::make_shared<EmptySourceOperator>(this, _exec_node);
-}
-
-} // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/empty_source_operator.h b/be/src/pipeline/exec/empty_source_operator.h
deleted file mode 100644
index b85d2b1a2ca9dc..00000000000000
--- a/be/src/pipeline/exec/empty_source_operator.h
+++ /dev/null
@@ -1,89 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <stdint.h>
-
-#include <memory>
-
-#include "common/status.h"
-#include "operator.h"
-#include "runtime/descriptors.h"
-
-namespace doris {
-class RuntimeState;
-
-namespace vectorized {
-class Block;
-} // namespace vectorized
-} // namespace doris
-
-namespace doris::pipeline {
-
-class EmptySourceOperatorBuilder final : public OperatorBuilderBase {
-public:
-    EmptySourceOperatorBuilder(int32_t id, const RowDescriptor& row_descriptor, ExecNode* exec_node)
-            : OperatorBuilderBase(id, "EmptySourceOperator"),
-              _row_descriptor(row_descriptor),
-              _exec_node(exec_node) {}
-
-    bool is_source() const override { return true; }
-
-    OperatorPtr build_operator() override;
-
-    const RowDescriptor& row_desc() const override { return _row_descriptor; }
-
-private:
-    RowDescriptor _row_descriptor;
-    ExecNode* _exec_node = nullptr;
-};
-
-class EmptySourceOperator final : public OperatorBase {
-public:
-    EmptySourceOperator(OperatorBuilderBase* builder, ExecNode* exec_node)
-            : OperatorBase(builder), _exec_node(exec_node) {}
-
-    bool can_read() override { return true; }
-    bool is_pending_finish() const override { return false; }
-
-    Status prepare(RuntimeState*) override { return Status::OK(); }
-
-    Status open(RuntimeState*) override { return Status::OK(); }
-
-    Status get_block(RuntimeState* /*runtime_state*/, vectorized::Block* /*block*/,
-                     SourceState& result_state) override {
-        result_state = SourceState::FINISHED;
-        return Status::OK();
-    }
-
-    Status sink(RuntimeState*, vectorized::Block*, SourceState) override { return Status::OK(); }
-
-    Status close(RuntimeState* state) override {
-        RETURN_IF_ERROR(_exec_node->close(state));
-        return Status::OK();
-    }
-
-    [[nodiscard]] RuntimeProfile* get_runtime_profile() const override {
-        return _exec_node->runtime_profile();
-    }
-
-private:
-    ExecNode* _exec_node = nullptr;
-};
-
-} // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/es_scan_operator.h b/be/src/pipeline/exec/es_scan_operator.h
index cdbd6922454c11..c4e7772807181e 100644
--- a/be/src/pipeline/exec/es_scan_operator.h
+++ b/be/src/pipeline/exec/es_scan_operator.h
@@ -25,10 +25,8 @@
 #include "operator.h"
 #include "pipeline/exec/scan_operator.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/exec/scan/vscan_node.h"
 
 namespace doris {
-class ExecNode;
 
 namespace vectorized {
 class NewEsScanner;
diff --git a/be/src/pipeline/exec/exchange_sink_operator.cpp b/be/src/pipeline/exec/exchange_sink_operator.cpp
index 84381c6a8af746..7db22c98f9f6b7 100644
--- a/be/src/pipeline/exec/exchange_sink_operator.cpp
+++ b/be/src/pipeline/exec/exchange_sink_operator.cpp
@@ -30,7 +30,6 @@
 #include "pipeline/pipeline_x/local_exchange/local_exchange_sink_operator.h"
 #include "vec/columns/column_const.h"
 #include "vec/exprs/vexpr.h"
-#include "vec/sink/vdata_stream_sender.h"
 
 namespace doris {
 class DataSink;
@@ -38,58 +37,6 @@ class DataSink;
 
 namespace doris::pipeline {
 
-ExchangeSinkOperatorBuilder::ExchangeSinkOperatorBuilder(int32_t id, DataSink* sink,
-                                                         int mult_cast_id)
-        : DataSinkOperatorBuilder(id, "ExchangeSinkOperator", sink), _mult_cast_id(mult_cast_id) {}
-
-OperatorPtr ExchangeSinkOperatorBuilder::build_operator() {
-    return std::make_shared<ExchangeSinkOperator>(this, _sink, _mult_cast_id);
-}
-
-ExchangeSinkOperator::ExchangeSinkOperator(OperatorBuilderBase* operator_builder, DataSink* sink,
-                                           int mult_cast_id)
-        : DataSinkOperator(operator_builder, sink), _mult_cast_id(mult_cast_id) {}
-
-Status ExchangeSinkOperator::init(const TDataSink& tsink) {
-    // -1 means not the mult cast stream sender
-    if (_mult_cast_id == -1) {
-        _dest_node_id = tsink.stream_sink.dest_node_id;
-    } else {
-        _dest_node_id = tsink.multi_cast_stream_sink.sinks[_mult_cast_id].dest_node_id;
-    }
-    return Status::OK();
-}
-
-Status ExchangeSinkOperator::prepare(RuntimeState* state) {
-    _state = state;
-    PUniqueId id;
-    id.set_hi(_state->query_id().hi);
-    id.set_lo(_state->query_id().lo);
-    _sink_buffer = std::make_unique<ExchangeSinkBuffer<vectorized::VDataStreamSender>>(
-            id, _dest_node_id, _sink->_sender_id, _state->be_number(), state);
-
-    RETURN_IF_ERROR(DataSinkOperator::prepare(state));
-    _sink->register_pipeline_channels(_sink_buffer.get());
-    return Status::OK();
-}
-
-bool ExchangeSinkOperator::can_write() {
-    return _sink_buffer->can_write() && _sink->channel_all_can_write();
-}
-
-bool ExchangeSinkOperator::is_pending_finish() const {
-    return _sink_buffer->is_pending_finish();
-}
-
-Status ExchangeSinkOperator::close(RuntimeState* state) {
-    RETURN_IF_ERROR(DataSinkOperator::close(state));
-    if (_sink_buffer) {
-        _sink_buffer->update_profile(_sink->profile());
-        _sink_buffer->close();
-    }
-    return Status::OK();
-}
-
 Status ExchangeSinkLocalState::serialize_block(vectorized::Block* src, PBlock* dest,
                                                int num_receivers) {
     return _parent->cast<ExchangeSinkOperatorX>().serialize_block(*this, src, dest, num_receivers);
diff --git a/be/src/pipeline/exec/exchange_sink_operator.h b/be/src/pipeline/exec/exchange_sink_operator.h
index f275365c0e85a3..aaa89d246be656 100644
--- a/be/src/pipeline/exec/exchange_sink_operator.h
+++ b/be/src/pipeline/exec/exchange_sink_operator.h
@@ -35,36 +35,6 @@ class TDataSink;
 
 namespace pipeline {
 
-class ExchangeSinkOperatorBuilder final
-        : public DataSinkOperatorBuilder<vectorized::VDataStreamSender> {
-public:
-    ExchangeSinkOperatorBuilder(int32_t id, DataSink* sink, int mult_cast_id = -1);
-
-    OperatorPtr build_operator() override;
-
-private:
-    int _mult_cast_id = -1;
-};
-
-// Now local exchange is not supported since VDataStreamRecvr is considered as a pipeline broker.
-class ExchangeSinkOperator final : public DataSinkOperator<vectorized::VDataStreamSender> {
-public:
-    ExchangeSinkOperator(OperatorBuilderBase* operator_builder, DataSink* sink, int mult_cast_id);
-    Status init(const TDataSink& tsink) override;
-
-    Status prepare(RuntimeState* state) override;
-    bool can_write() override;
-    bool is_pending_finish() const override;
-
-    Status close(RuntimeState* state) override;
-
-private:
-    std::unique_ptr<ExchangeSinkBuffer<vectorized::VDataStreamSender>> _sink_buffer = nullptr;
-    int _dest_node_id = -1;
-    RuntimeState* _state = nullptr;
-    int _mult_cast_id = -1;
-};
-
 class ExchangeSinkLocalState final : public PipelineXSinkLocalState<> {
     ENABLE_FACTORY_CREATOR(ExchangeSinkLocalState);
     using Base = PipelineXSinkLocalState<>;
diff --git a/be/src/pipeline/exec/exchange_source_operator.cpp b/be/src/pipeline/exec/exchange_source_operator.cpp
index a23dae6dd62dd1..1a891655ee8daa 100644
--- a/be/src/pipeline/exec/exchange_source_operator.cpp
+++ b/be/src/pipeline/exec/exchange_source_operator.cpp
@@ -30,16 +30,6 @@
 
 namespace doris::pipeline {
 
-OPERATOR_CODE_GENERATOR(ExchangeSourceOperator, SourceOperator)
-
-bool ExchangeSourceOperator::can_read() {
-    return _node->_stream_recvr->ready_to_read();
-}
-
-bool ExchangeSourceOperator::is_pending_finish() const {
-    return false;
-}
-
 ExchangeLocalState::ExchangeLocalState(RuntimeState* state, OperatorXBase* parent)
         : Base(state, parent), num_rows_skipped(0), is_ready(false) {}
 
diff --git a/be/src/pipeline/exec/exchange_source_operator.h b/be/src/pipeline/exec/exchange_source_operator.h
index 6176ad5b7f7190..de761d8391c94c 100644
--- a/be/src/pipeline/exec/exchange_source_operator.h
+++ b/be/src/pipeline/exec/exchange_source_operator.h
@@ -21,7 +21,6 @@
 
 #include "operator.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/exec/vexchange_node.h"
 
 namespace doris {
 class ExecNode;
@@ -34,22 +33,6 @@ class Block;
 
 namespace doris::pipeline {
 
-class ExchangeSourceOperatorBuilder final : public OperatorBuilder<vectorized::VExchangeNode> {
-public:
-    ExchangeSourceOperatorBuilder(int32_t id, ExecNode* exec_node);
-
-    bool is_source() const override { return true; }
-
-    OperatorPtr build_operator() override;
-};
-
-class ExchangeSourceOperator final : public SourceOperator<vectorized::VExchangeNode> {
-public:
-    ExchangeSourceOperator(OperatorBuilderBase*, ExecNode*);
-    bool can_read() override;
-    bool is_pending_finish() const override;
-};
-
 class ExchangeSourceOperatorX;
 class ExchangeLocalState final : public PipelineXLocalState<> {
     ENABLE_FACTORY_CREATOR(ExchangeLocalState);
diff --git a/be/src/pipeline/exec/file_scan_operator.h b/be/src/pipeline/exec/file_scan_operator.h
index e59dd8055b2949..f5c4f194bc5767 100644
--- a/be/src/pipeline/exec/file_scan_operator.h
+++ b/be/src/pipeline/exec/file_scan_operator.h
@@ -27,10 +27,8 @@
 #include "pipeline/exec/scan_operator.h"
 #include "pipeline/pipeline_x/operator.h"
 #include "vec/exec/format/format_common.h"
-#include "vec/exec/scan/vscan_node.h"
 
 namespace doris {
-class ExecNode;
 namespace vectorized {
 class VFileScanner;
 } // namespace vectorized
diff --git a/be/src/pipeline/exec/group_commit_block_sink_operator.cpp b/be/src/pipeline/exec/group_commit_block_sink_operator.cpp
index 7c6abefc4a1902..4e9969d957049d 100644
--- a/be/src/pipeline/exec/group_commit_block_sink_operator.cpp
+++ b/be/src/pipeline/exec/group_commit_block_sink_operator.cpp
@@ -23,10 +23,6 @@
 
 namespace doris::pipeline {
 
-OperatorPtr GroupCommitBlockSinkOperatorBuilder::build_operator() {
-    return std::make_shared<GroupCommitBlockSinkOperator>(this, _sink);
-}
-
 GroupCommitBlockSinkLocalState::~GroupCommitBlockSinkLocalState() {
     if (_load_block_queue) {
         _remove_estimated_wal_bytes();
diff --git a/be/src/pipeline/exec/group_commit_block_sink_operator.h b/be/src/pipeline/exec/group_commit_block_sink_operator.h
index 9d8aa372d6ea3d..ad03d607c15d2d 100644
--- a/be/src/pipeline/exec/group_commit_block_sink_operator.h
+++ b/be/src/pipeline/exec/group_commit_block_sink_operator.h
@@ -21,27 +21,7 @@
 #include "pipeline/pipeline_x/operator.h"
 #include "vec/sink/group_commit_block_sink.h"
 
-namespace doris {
-
-namespace pipeline {
-
-class GroupCommitBlockSinkOperatorBuilder final
-        : public DataSinkOperatorBuilder<vectorized::GroupCommitBlockSink> {
-public:
-    GroupCommitBlockSinkOperatorBuilder(int32_t id, DataSink* sink)
-            : DataSinkOperatorBuilder(id, "GroupCommitBlockSinkOperator", sink) {}
-
-    OperatorPtr build_operator() override;
-};
-
-class GroupCommitBlockSinkOperator final
-        : public DataSinkOperator<vectorized::GroupCommitBlockSink> {
-public:
-    GroupCommitBlockSinkOperator(OperatorBuilderBase* operator_builder, DataSink* sink)
-            : DataSinkOperator(operator_builder, sink) {}
-
-    bool can_write() override { return true; } // TODO: need use mem_limit
-};
+namespace doris::pipeline {
 
 class GroupCommitBlockSinkOperatorX;
 class GroupCommitBlockSinkLocalState final : public PipelineXSinkLocalState<BasicSharedState> {
@@ -122,5 +102,4 @@ class GroupCommitBlockSinkOperatorX final
     TGroupCommitMode::type _group_commit_mode;
 };
 
-} // namespace pipeline
-} // namespace doris
\ No newline at end of file
+} // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/hashjoin_build_sink.cpp b/be/src/pipeline/exec/hashjoin_build_sink.cpp
index da3614e4479fce..d583a827059111 100644
--- a/be/src/pipeline/exec/hashjoin_build_sink.cpp
+++ b/be/src/pipeline/exec/hashjoin_build_sink.cpp
@@ -28,8 +28,6 @@
 
 namespace doris::pipeline {
 
-OPERATOR_CODE_GENERATOR(HashJoinBuildSink, StreamingOperator)
-
 template <typename... Callables>
 struct Overload : Callables... {
     using Callables::operator()...;
diff --git a/be/src/pipeline/exec/hashjoin_build_sink.h b/be/src/pipeline/exec/hashjoin_build_sink.h
index 0998884c99bb81..18c922eb19d5ea 100644
--- a/be/src/pipeline/exec/hashjoin_build_sink.h
+++ b/be/src/pipeline/exec/hashjoin_build_sink.h
@@ -22,12 +22,8 @@
 #include "join_build_sink_operator.h"
 #include "operator.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/exec/join/vhash_join_node.h"
 
-namespace doris {
-class ExecNode;
-
-namespace pipeline {
+namespace doris::pipeline {
 
 class HashJoinBuildSinkBuilder final : public OperatorBuilder<vectorized::HashJoinNode> {
 public:
@@ -187,5 +183,4 @@ class HashJoinBuildSinkOperatorX final
     const bool _need_local_merge;
 };
 
-} // namespace pipeline
-} // namespace doris
+} // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.cpp b/be/src/pipeline/exec/hashjoin_probe_operator.cpp
index fc6f81f41902a2..6e8e96e8d8a2ba 100644
--- a/be/src/pipeline/exec/hashjoin_probe_operator.cpp
+++ b/be/src/pipeline/exec/hashjoin_probe_operator.cpp
@@ -22,10 +22,7 @@
 #include "common/logging.h"
 #include "pipeline/exec/operator.h"
 
-namespace doris {
-namespace pipeline {
-
-OPERATOR_CODE_GENERATOR(HashJoinProbeOperator, StatefulOperator)
+namespace doris::pipeline {
 
 HashJoinProbeLocalState::HashJoinProbeLocalState(RuntimeState* state, OperatorXBase* parent)
         : JoinProbeLocalState<HashJoinSharedState, HashJoinProbeLocalState>(state, parent) {}
@@ -634,5 +631,4 @@ Status HashJoinProbeOperatorX::open(RuntimeState* state) {
     return Status::OK();
 }
 
-} // namespace pipeline
-} // namespace doris
+} // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.h b/be/src/pipeline/exec/hashjoin_probe_operator.h
index 1b45a2a258eb07..b5daefd735d984 100644
--- a/be/src/pipeline/exec/hashjoin_probe_operator.h
+++ b/be/src/pipeline/exec/hashjoin_probe_operator.h
@@ -24,27 +24,10 @@
 #include "pipeline/pipeline_x/operator.h"
 
 namespace doris {
-class ExecNode;
 class RuntimeState;
 
 namespace pipeline {
 
-class HashJoinProbeOperatorBuilder final : public OperatorBuilder<vectorized::HashJoinNode> {
-public:
-    HashJoinProbeOperatorBuilder(int32_t, ExecNode*);
-
-    OperatorPtr build_operator() override;
-};
-
-class HashJoinProbeOperator final : public StatefulOperator<vectorized::HashJoinNode> {
-public:
-    HashJoinProbeOperator(OperatorBuilderBase*, ExecNode*);
-    // if exec node split to: sink, source operator. the source operator
-    // should skip `alloc_resource()` function call, only sink operator
-    // call the function
-    Status open(RuntimeState*) override { return Status::OK(); }
-};
-
 class HashJoinProbeLocalState;
 
 using HashTableCtxVariants = std::variant<
diff --git a/be/src/pipeline/exec/hive_table_sink_operator.cpp b/be/src/pipeline/exec/hive_table_sink_operator.cpp
index 6b8eaa8c91e635..b931d48e832fca 100644
--- a/be/src/pipeline/exec/hive_table_sink_operator.cpp
+++ b/be/src/pipeline/exec/hive_table_sink_operator.cpp
@@ -21,10 +21,6 @@
 
 namespace doris::pipeline {
 
-OperatorPtr HiveTableSinkOperatorBuilder::build_operator() {
-    return std::make_shared<HiveTableSinkOperator>(this, _sink);
-}
-
 Status HiveTableSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo& info) {
     RETURN_IF_ERROR(Base::init(state, info));
     SCOPED_TIMER(exec_time_counter());
diff --git a/be/src/pipeline/exec/hive_table_sink_operator.h b/be/src/pipeline/exec/hive_table_sink_operator.h
index 39b5df36567440..a489948268b9e4 100644
--- a/be/src/pipeline/exec/hive_table_sink_operator.h
+++ b/be/src/pipeline/exec/hive_table_sink_operator.h
@@ -21,26 +21,7 @@
 #include "pipeline/pipeline_x/operator.h"
 #include "vec/sink/vhive_table_sink.h"
 
-namespace doris {
-
-namespace pipeline {
-
-class HiveTableSinkOperatorBuilder final
-        : public DataSinkOperatorBuilder<vectorized::VHiveTableSink> {
-public:
-    HiveTableSinkOperatorBuilder(int32_t id, DataSink* sink)
-            : DataSinkOperatorBuilder(id, "HiveTableSinkOperator", sink) {}
-
-    OperatorPtr build_operator() override;
-};
-
-class HiveTableSinkOperator final : public DataSinkOperator<vectorized::VHiveTableSink> {
-public:
-    HiveTableSinkOperator(OperatorBuilderBase* operator_builder, DataSink* sink)
-            : DataSinkOperator(operator_builder, sink) {}
-
-    bool can_write() override { return _sink->can_write(); }
-};
+namespace doris::pipeline {
 
 class HiveTableSinkOperatorX;
 
@@ -111,5 +92,4 @@ class HiveTableSinkOperatorX final : public DataSinkOperatorX<HiveTableSinkLocal
     ObjectPool* _pool = nullptr;
 };
 
-} // namespace pipeline
-} // namespace doris
\ No newline at end of file
+} // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/jdbc_scan_operator.h b/be/src/pipeline/exec/jdbc_scan_operator.h
index 825e01acc2aa04..8e3e93f2026844 100644
--- a/be/src/pipeline/exec/jdbc_scan_operator.h
+++ b/be/src/pipeline/exec/jdbc_scan_operator.h
@@ -25,7 +25,6 @@
 #include "operator.h"
 #include "pipeline/exec/scan_operator.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/exec/scan/vscan_node.h"
 
 namespace doris {
 
diff --git a/be/src/pipeline/exec/join_build_sink_operator.h b/be/src/pipeline/exec/join_build_sink_operator.h
index 2a204a75a5a30f..90e7c2caa5d276 100644
--- a/be/src/pipeline/exec/join_build_sink_operator.h
+++ b/be/src/pipeline/exec/join_build_sink_operator.h
@@ -19,11 +19,8 @@
 
 #include "operator.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/exec/join/vjoin_node_base.h"
 
-namespace doris {
-
-namespace pipeline {
+namespace doris::pipeline {
 template <typename LocalStateType>
 class JoinBuildSinkOperatorX;
 
@@ -82,5 +79,4 @@ class JoinBuildSinkOperatorX : public DataSinkOperatorX<LocalStateType> {
     const std::vector<TRuntimeFilterDesc> _runtime_filter_descs;
 };
 
-} // namespace pipeline
-} // namespace doris
+} // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/join_probe_operator.h b/be/src/pipeline/exec/join_probe_operator.h
index 679446147ef9ac..228a4140b13c34 100644
--- a/be/src/pipeline/exec/join_probe_operator.h
+++ b/be/src/pipeline/exec/join_probe_operator.h
@@ -19,11 +19,8 @@
 
 #include "operator.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/exec/join/vjoin_node_base.h"
 
-namespace doris {
-
-namespace pipeline {
+namespace doris::pipeline {
 template <typename LocalStateType>
 class JoinProbeOperatorX;
 template <typename SharedStateArg, typename Derived>
@@ -127,5 +124,4 @@ class JoinProbeOperatorX : public StatefulOperatorX<LocalStateType> {
     const bool _use_specific_projections;
 };
 
-} // namespace pipeline
-} // namespace doris
+} // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/meta_scan_operator.h b/be/src/pipeline/exec/meta_scan_operator.h
index e26af7dba5a6e1..440f489f2513e3 100644
--- a/be/src/pipeline/exec/meta_scan_operator.h
+++ b/be/src/pipeline/exec/meta_scan_operator.h
@@ -25,10 +25,8 @@
 #include "operator.h"
 #include "pipeline/exec/scan_operator.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/exec/scan/vscan_node.h"
 
 namespace doris {
-class ExecNode;
 
 namespace vectorized {
 class NewOlapScanner;
diff --git a/be/src/pipeline/exec/multi_cast_data_stream_sink.cpp b/be/src/pipeline/exec/multi_cast_data_stream_sink.cpp
index de9cdeba04fae5..e39f60b356ccd9 100644
--- a/be/src/pipeline/exec/multi_cast_data_stream_sink.cpp
+++ b/be/src/pipeline/exec/multi_cast_data_stream_sink.cpp
@@ -19,10 +19,6 @@
 
 namespace doris::pipeline {
 
-OperatorPtr MultiCastDataStreamSinkOperatorBuilder::build_operator() {
-    return std::make_shared<MultiCastDataStreamSinkOperator>(this, _sink);
-}
-
 std::string MultiCastDataStreamSinkLocalState::name_suffix() {
     auto& sinks = static_cast<MultiCastDataStreamSinkOperatorX*>(_parent)->sink_node().sinks;
     std::string id_name = " (dst id : ";
diff --git a/be/src/pipeline/exec/multi_cast_data_stream_sink.h b/be/src/pipeline/exec/multi_cast_data_stream_sink.h
index b4886f089ef108..1a18772af4fd12 100644
--- a/be/src/pipeline/exec/multi_cast_data_stream_sink.h
+++ b/be/src/pipeline/exec/multi_cast_data_stream_sink.h
@@ -19,28 +19,9 @@
 
 #include "operator.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/sink/multi_cast_data_stream_sink.h"
 
 namespace doris::pipeline {
 
-class MultiCastDataStreamSinkOperatorBuilder final
-        : public DataSinkOperatorBuilder<vectorized::MultiCastDataStreamSink> {
-public:
-    MultiCastDataStreamSinkOperatorBuilder(int32_t id, DataSink* sink)
-            : DataSinkOperatorBuilder(id, "MultiCastDataStreamSinkOperator", sink) {}
-
-    OperatorPtr build_operator() override;
-};
-
-class MultiCastDataStreamSinkOperator final
-        : public DataSinkOperator<vectorized::MultiCastDataStreamSink> {
-public:
-    MultiCastDataStreamSinkOperator(OperatorBuilderBase* operator_builder, DataSink* sink)
-            : DataSinkOperator(operator_builder, sink) {}
-
-    bool can_write() override { return _sink->can_write(); }
-};
-
 class MultiCastDataStreamSinkOperatorX;
 class MultiCastDataStreamSinkLocalState final
         : public PipelineXSinkLocalState<MultiCastSharedState> {
diff --git a/be/src/pipeline/exec/multi_cast_data_stream_source.cpp b/be/src/pipeline/exec/multi_cast_data_stream_source.cpp
index 90c809c535968d..b72125abbb3eb4 100644
--- a/be/src/pipeline/exec/multi_cast_data_stream_source.cpp
+++ b/be/src/pipeline/exec/multi_cast_data_stream_source.cpp
@@ -25,104 +25,6 @@
 
 namespace doris::pipeline {
 
-MultiCastDataStreamerSourceOperatorBuilder::MultiCastDataStreamerSourceOperatorBuilder(
-        int32_t id, const int consumer_id, std::shared_ptr<MultiCastDataStreamer>& data_streamer,
-        const TDataStreamSink& sink)
-        : OperatorBuilderBase(id, "MultiCastDataStreamerSourceOperator"),
-          _consumer_id(consumer_id),
-          _multi_cast_data_streamer(data_streamer),
-          _t_data_stream_sink(sink) {}
-
-OperatorPtr MultiCastDataStreamerSourceOperatorBuilder::build_operator() {
-    return std::make_shared<MultiCastDataStreamerSourceOperator>(
-            this, _consumer_id, _multi_cast_data_streamer, _t_data_stream_sink);
-}
-
-const RowDescriptor& MultiCastDataStreamerSourceOperatorBuilder::row_desc() const {
-    return _multi_cast_data_streamer->row_desc();
-}
-
-MultiCastDataStreamerSourceOperator::MultiCastDataStreamerSourceOperator(
-        OperatorBuilderBase* operator_builder, const int consumer_id,
-        std::shared_ptr<MultiCastDataStreamer>& data_streamer, const TDataStreamSink& sink)
-        : OperatorBase(operator_builder),
-          vectorized::RuntimeFilterConsumer(sink.dest_node_id, sink.runtime_filters,
-                                            data_streamer->row_desc(), _conjuncts),
-          _consumer_id(consumer_id),
-          _multi_cast_data_streamer(data_streamer),
-          _t_data_stream_sink(sink) {}
-
-Status MultiCastDataStreamerSourceOperator::prepare(doris::RuntimeState* state) {
-    RETURN_IF_ERROR(vectorized::RuntimeFilterConsumer::init(state));
-    // init profile for runtime filter
-    RuntimeFilterConsumer::_init_profile(_multi_cast_data_streamer->profile());
-    if (_t_data_stream_sink.__isset.output_exprs) {
-        RETURN_IF_ERROR(vectorized::VExpr::create_expr_trees(_t_data_stream_sink.output_exprs,
-                                                             _output_expr_contexts));
-        RETURN_IF_ERROR(vectorized::VExpr::prepare(_output_expr_contexts, state, row_desc()));
-    }
-
-    if (_t_data_stream_sink.__isset.conjuncts) {
-        RETURN_IF_ERROR(
-                vectorized::VExpr::create_expr_trees(_t_data_stream_sink.conjuncts, _conjuncts));
-        RETURN_IF_ERROR(vectorized::VExpr::prepare(_conjuncts, state, row_desc()));
-    }
-    return Status::OK();
-}
-
-Status MultiCastDataStreamerSourceOperator::open(doris::RuntimeState* state) {
-    if (_t_data_stream_sink.__isset.output_exprs) {
-        RETURN_IF_ERROR(vectorized::VExpr::open(_output_expr_contexts, state));
-    }
-    if (_t_data_stream_sink.__isset.conjuncts) {
-        RETURN_IF_ERROR(vectorized::VExpr::open(_conjuncts, state));
-    }
-    return _acquire_runtime_filter(false);
-}
-
-bool MultiCastDataStreamerSourceOperator::runtime_filters_are_ready_or_timeout() {
-    return vectorized::RuntimeFilterConsumer::runtime_filters_are_ready_or_timeout();
-}
-
-bool MultiCastDataStreamerSourceOperator::can_read() {
-    return _multi_cast_data_streamer->can_read(_consumer_id);
-}
-
-Status MultiCastDataStreamerSourceOperator::get_block(RuntimeState* state, vectorized::Block* block,
-                                                      SourceState& source_state) {
-    bool eos = false;
-    vectorized::Block tmp_block;
-    vectorized::Block* output_block = block;
-    if (!_output_expr_contexts.empty()) {
-        output_block = &tmp_block;
-    }
-    _multi_cast_data_streamer->pull(_consumer_id, output_block, &eos);
-
-    if (!_conjuncts.empty()) {
-        RETURN_IF_ERROR(vectorized::VExprContext::filter_block(_conjuncts, output_block,
-                                                               output_block->columns()));
-    }
-
-    if (!_output_expr_contexts.empty() && output_block->rows() > 0) {
-        RETURN_IF_ERROR(vectorized::VExprContext::get_output_block_after_execute_exprs(
-                _output_expr_contexts, *output_block, block, true));
-        vectorized::materialize_block_inplace(*block);
-    }
-    if (eos) {
-        source_state = SourceState::FINISHED;
-    }
-    return Status::OK();
-}
-
-Status MultiCastDataStreamerSourceOperator::close(doris::RuntimeState* state) {
-    _multi_cast_data_streamer->close_sender(_consumer_id);
-    return OperatorBase::close(state);
-}
-
-RuntimeProfile* MultiCastDataStreamerSourceOperator::get_runtime_profile() const {
-    return _multi_cast_data_streamer->profile();
-}
-
 MultiCastDataStreamSourceLocalState::MultiCastDataStreamSourceLocalState(RuntimeState* state,
                                                                          OperatorXBase* parent)
         : Base(state, parent),
diff --git a/be/src/pipeline/exec/multi_cast_data_stream_source.h b/be/src/pipeline/exec/multi_cast_data_stream_source.h
index 8d14b4f266bec9..c3404a873c6a9a 100644
--- a/be/src/pipeline/exec/multi_cast_data_stream_source.h
+++ b/be/src/pipeline/exec/multi_cast_data_stream_source.h
@@ -27,7 +27,6 @@
 #include "vec/exec/runtime_filter_consumer.h"
 
 namespace doris {
-class ExecNode;
 class RuntimeState;
 
 namespace vectorized {
diff --git a/be/src/pipeline/exec/mysql_scan_operator.cpp b/be/src/pipeline/exec/mysql_scan_operator.cpp
deleted file mode 100644
index 7ef6170d152683..00000000000000
--- a/be/src/pipeline/exec/mysql_scan_operator.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "mysql_scan_operator.h"
-
-#include "vec/exec/vmysql_scan_node.h"
-
-namespace doris::pipeline {
-
-OPERATOR_CODE_GENERATOR(MysqlScanOperator, SourceOperator)
-
-Status MysqlScanOperator::open(RuntimeState* state) {
-    RETURN_IF_ERROR(SourceOperator::open(state));
-    return _node->open(state);
-}
-
-Status MysqlScanOperator::close(RuntimeState* state) {
-    RETURN_IF_ERROR(SourceOperator::close(state));
-    RETURN_IF_ERROR(_node->close(state));
-    return Status::OK();
-}
-
-} // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/mysql_scan_operator.h b/be/src/pipeline/exec/mysql_scan_operator.h
deleted file mode 100644
index 6e21d8d2ebef36..00000000000000
--- a/be/src/pipeline/exec/mysql_scan_operator.h
+++ /dev/null
@@ -1,43 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include "operator.h"
-#include "vec/exec/vmysql_scan_node.h"
-
-namespace doris::pipeline {
-
-class MysqlScanOperatorBuilder : public OperatorBuilder<vectorized::VMysqlScanNode> {
-public:
-    MysqlScanOperatorBuilder(int32_t id, ExecNode* exec_node);
-    bool is_source() const override { return true; }
-    OperatorPtr build_operator() override;
-};
-
-class MysqlScanOperator : public SourceOperator<vectorized::VMysqlScanNode> {
-public:
-    MysqlScanOperator(OperatorBuilderBase* operator_builder, ExecNode* mysql_scan_node);
-
-    bool can_read() override { return true; }
-
-    Status open(RuntimeState* state) override;
-
-    Status close(RuntimeState* state) override;
-};
-
-} // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/nested_loop_join_build_operator.cpp b/be/src/pipeline/exec/nested_loop_join_build_operator.cpp
index 66612700fed37d..09a3a976567aae 100644
--- a/be/src/pipeline/exec/nested_loop_join_build_operator.cpp
+++ b/be/src/pipeline/exec/nested_loop_join_build_operator.cpp
@@ -20,11 +20,10 @@
 #include <string>
 
 #include "pipeline/exec/operator.h"
+#include "vec/exec/join/vnested_loop_join_node.h"
 
 namespace doris::pipeline {
 
-OPERATOR_CODE_GENERATOR(NestLoopJoinBuildOperator, StreamingOperator)
-
 NestedLoopJoinBuildSinkLocalState::NestedLoopJoinBuildSinkLocalState(DataSinkOperatorXBase* parent,
                                                                      RuntimeState* state)
         : JoinBuildSinkLocalState<NestedLoopJoinSharedState, NestedLoopJoinBuildSinkLocalState>(
diff --git a/be/src/pipeline/exec/nested_loop_join_build_operator.h b/be/src/pipeline/exec/nested_loop_join_build_operator.h
index da42e961f479a9..98377559f91b2e 100644
--- a/be/src/pipeline/exec/nested_loop_join_build_operator.h
+++ b/be/src/pipeline/exec/nested_loop_join_build_operator.h
@@ -22,27 +22,8 @@
 #include "operator.h"
 #include "pipeline/exec/join_build_sink_operator.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/exec/join/vnested_loop_join_node.h"
 
-namespace doris {
-class ExecNode;
-
-namespace pipeline {
-
-class NestLoopJoinBuildOperatorBuilder final
-        : public OperatorBuilder<vectorized::VNestedLoopJoinNode> {
-public:
-    NestLoopJoinBuildOperatorBuilder(int32_t, ExecNode*);
-
-    OperatorPtr build_operator() override;
-    bool is_sink() const override { return true; }
-};
-
-class NestLoopJoinBuildOperator final : public StreamingOperator<vectorized::VNestedLoopJoinNode> {
-public:
-    NestLoopJoinBuildOperator(OperatorBuilderBase* operator_builder, ExecNode* node);
-    bool can_write() override { return true; }
-};
+namespace doris::pipeline {
 
 class NestedLoopJoinBuildSinkOperatorX;
 
@@ -111,5 +92,4 @@ class NestedLoopJoinBuildSinkOperatorX final
     RowDescriptor _row_descriptor;
 };
 
-} // namespace pipeline
-} // namespace doris
+} // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp b/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp
index c7afa2c399c249..819e8f67616bf6 100644
--- a/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp
+++ b/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp
@@ -22,7 +22,6 @@
 #include "pipeline/exec/operator.h"
 #include "vec/columns/column_filter_helper.h"
 #include "vec/core/block.h"
-#include "vec/exec/join/vnested_loop_join_node.h"
 
 namespace doris {
 class RuntimeState;
@@ -30,18 +29,6 @@ class RuntimeState;
 
 namespace doris::pipeline {
 
-OPERATOR_CODE_GENERATOR(NestLoopJoinProbeOperator, StatefulOperator)
-
-Status NestLoopJoinProbeOperator::prepare(doris::RuntimeState* state) {
-    // just for speed up, the way is dangerous
-    _child_block = _node->get_left_block();
-    return StatefulOperator::prepare(state);
-}
-
-Status NestLoopJoinProbeOperator::close(doris::RuntimeState* state) {
-    return StatefulOperator::close(state);
-}
-
 NestedLoopJoinProbeLocalState::NestedLoopJoinProbeLocalState(RuntimeState* state,
                                                              OperatorXBase* parent)
         : JoinProbeLocalState<NestedLoopJoinSharedState, NestedLoopJoinProbeLocalState>(state,
diff --git a/be/src/pipeline/exec/nested_loop_join_probe_operator.h b/be/src/pipeline/exec/nested_loop_join_probe_operator.h
index de9f11b437eece..5c483a4348f87f 100644
--- a/be/src/pipeline/exec/nested_loop_join_probe_operator.h
+++ b/be/src/pipeline/exec/nested_loop_join_probe_operator.h
@@ -24,33 +24,12 @@
 #include "pipeline/exec/join_probe_operator.h"
 #include "pipeline/pipeline_x/operator.h"
 #include "util/simd/bits.h"
-#include "vec/exec/join/vnested_loop_join_node.h"
 
 namespace doris {
-class ExecNode;
 class RuntimeState;
 
 namespace pipeline {
 
-class NestLoopJoinProbeOperatorBuilder final
-        : public OperatorBuilder<vectorized::VNestedLoopJoinNode> {
-public:
-    NestLoopJoinProbeOperatorBuilder(int32_t id, ExecNode* node);
-
-    OperatorPtr build_operator() override;
-};
-
-class NestLoopJoinProbeOperator final : public StatefulOperator<vectorized::VNestedLoopJoinNode> {
-public:
-    NestLoopJoinProbeOperator(OperatorBuilderBase* operator_builder, ExecNode* node);
-
-    Status prepare(RuntimeState* state) override;
-
-    Status open(RuntimeState*) override { return Status::OK(); }
-
-    Status close(RuntimeState* state) override;
-};
-
 class NestedLoopJoinProbeOperatorX;
 class NestedLoopJoinProbeLocalState final
         : public JoinProbeLocalState<NestedLoopJoinSharedState, NestedLoopJoinProbeLocalState> {
diff --git a/be/src/pipeline/exec/olap_scan_operator.h b/be/src/pipeline/exec/olap_scan_operator.h
index 8f546826c88fdd..f11cc6db0b982c 100644
--- a/be/src/pipeline/exec/olap_scan_operator.h
+++ b/be/src/pipeline/exec/olap_scan_operator.h
@@ -25,10 +25,8 @@
 #include "operator.h"
 #include "pipeline/exec/scan_operator.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/exec/scan/vscan_node.h"
 
 namespace doris {
-class ExecNode;
 
 namespace vectorized {
 class NewOlapScanner;
diff --git a/be/src/pipeline/exec/olap_table_sink_operator.cpp b/be/src/pipeline/exec/olap_table_sink_operator.cpp
index faffaf99c112ed..60e6180469c764 100644
--- a/be/src/pipeline/exec/olap_table_sink_operator.cpp
+++ b/be/src/pipeline/exec/olap_table_sink_operator.cpp
@@ -19,16 +19,8 @@
 
 #include "common/status.h"
 
-namespace doris {
-class DataSink;
-} // namespace doris
-
 namespace doris::pipeline {
 
-OperatorPtr OlapTableSinkOperatorBuilder::build_operator() {
-    return std::make_shared<OlapTableSinkOperator>(this, _sink);
-}
-
 Status OlapTableSinkLocalState::close(RuntimeState* state, Status exec_status) {
     if (Base::_closed) {
         return Status::OK();
diff --git a/be/src/pipeline/exec/olap_table_sink_operator.h b/be/src/pipeline/exec/olap_table_sink_operator.h
index 19c192160fd66e..ad8bbab3ee9b18 100644
--- a/be/src/pipeline/exec/olap_table_sink_operator.h
+++ b/be/src/pipeline/exec/olap_table_sink_operator.h
@@ -21,26 +21,7 @@
 #include "pipeline/pipeline_x/operator.h"
 #include "vec/sink/volap_table_sink.h"
 
-namespace doris {
-
-namespace pipeline {
-
-class OlapTableSinkOperatorBuilder final
-        : public DataSinkOperatorBuilder<vectorized::VOlapTableSink> {
-public:
-    OlapTableSinkOperatorBuilder(int32_t id, DataSink* sink)
-            : DataSinkOperatorBuilder(id, "OlapTableSinkOperator", sink) {}
-
-    OperatorPtr build_operator() override;
-};
-
-class OlapTableSinkOperator final : public DataSinkOperator<vectorized::VOlapTableSink> {
-public:
-    OlapTableSinkOperator(OperatorBuilderBase* operator_builder, DataSink* sink)
-            : DataSinkOperator(operator_builder, sink) {}
-
-    bool can_write() override { return _sink->can_write(); }
-};
+namespace doris::pipeline {
 
 class OlapTableSinkOperatorX;
 
@@ -102,5 +83,4 @@ class OlapTableSinkOperatorX final : public DataSinkOperatorX<OlapTableSinkLocal
     ObjectPool* _pool = nullptr;
 };
 
-} // namespace pipeline
-} // namespace doris
\ No newline at end of file
+} // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/olap_table_sink_v2_operator.cpp b/be/src/pipeline/exec/olap_table_sink_v2_operator.cpp
index 4b31edb091c4c3..b476611b7198f4 100644
--- a/be/src/pipeline/exec/olap_table_sink_v2_operator.cpp
+++ b/be/src/pipeline/exec/olap_table_sink_v2_operator.cpp
@@ -21,10 +21,6 @@
 
 namespace doris::pipeline {
 
-OperatorPtr OlapTableSinkV2OperatorBuilder::build_operator() {
-    return std::make_shared<OlapTableSinkV2Operator>(this, _sink);
-}
-
 Status OlapTableSinkV2LocalState::close(RuntimeState* state, Status exec_status) {
     if (Base::_closed) {
         return Status::OK();
diff --git a/be/src/pipeline/exec/olap_table_sink_v2_operator.h b/be/src/pipeline/exec/olap_table_sink_v2_operator.h
index 1fcd4716268119..da0700f8af3377 100644
--- a/be/src/pipeline/exec/olap_table_sink_v2_operator.h
+++ b/be/src/pipeline/exec/olap_table_sink_v2_operator.h
@@ -21,9 +21,7 @@
 #include "pipeline/pipeline_x/operator.h"
 #include "vec/sink/volap_table_sink_v2.h"
 
-namespace doris {
-
-namespace pipeline {
+namespace doris::pipeline {
 
 class OlapTableSinkV2OperatorBuilder final
         : public DataSinkOperatorBuilder<vectorized::VOlapTableSinkV2> {
@@ -104,5 +102,4 @@ class OlapTableSinkV2OperatorX final : public DataSinkOperatorX<OlapTableSinkV2L
     ObjectPool* _pool = nullptr;
 };
 
-} // namespace pipeline
-} // namespace doris
\ No newline at end of file
+} // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/operator.h b/be/src/pipeline/exec/operator.h
index c93cc8f592eb1a..b30619079d8d25 100644
--- a/be/src/pipeline/exec/operator.h
+++ b/be/src/pipeline/exec/operator.h
@@ -42,17 +42,6 @@ class RuntimeState;
 class TDataSink;
 } // namespace doris
 
-#define OPERATOR_CODE_GENERATOR(NAME, SUBCLASS)                       \
-    NAME##Builder::NAME##Builder(int32_t id, ExecNode* exec_node)     \
-            : OperatorBuilder(id, #NAME, exec_node) {}                \
-                                                                      \
-    OperatorPtr NAME##Builder::build_operator() {                     \
-        return std::make_shared<NAME>(this, _node);                   \
-    }                                                                 \
-                                                                      \
-    NAME::NAME(OperatorBuilderBase* operator_builder, ExecNode* node) \
-            : SUBCLASS(operator_builder, node) {};
-
 namespace doris::pipeline {
 
 /**
diff --git a/be/src/pipeline/exec/partition_sort_sink_operator.cpp b/be/src/pipeline/exec/partition_sort_sink_operator.cpp
index abe2fde555e164..77d9ca03e28a0b 100644
--- a/be/src/pipeline/exec/partition_sort_sink_operator.cpp
+++ b/be/src/pipeline/exec/partition_sort_sink_operator.cpp
@@ -23,10 +23,6 @@
 
 namespace doris::pipeline {
 
-OperatorPtr PartitionSortSinkOperatorBuilder::build_operator() {
-    return std::make_shared<PartitionSortSinkOperator>(this, _node);
-}
-
 Status PartitionSortSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo& info) {
     RETURN_IF_ERROR(PipelineXSinkLocalState<PartitionSortNodeSharedState>::init(state, info));
     SCOPED_TIMER(exec_time_counter());
diff --git a/be/src/pipeline/exec/partition_sort_sink_operator.h b/be/src/pipeline/exec/partition_sort_sink_operator.h
index 8602b096f516a2..4ac8a9d73f41a0 100644
--- a/be/src/pipeline/exec/partition_sort_sink_operator.h
+++ b/be/src/pipeline/exec/partition_sort_sink_operator.h
@@ -24,31 +24,8 @@
 #include "operator.h"
 #include "pipeline/pipeline_x/operator.h"
 #include "vec/common/sort/partition_sorter.h"
-#include "vec/exec/vpartition_sort_node.h"
 
-namespace doris {
-class ExecNode;
-
-namespace pipeline {
-
-class PartitionSortSinkOperatorBuilder final
-        : public OperatorBuilder<vectorized::VPartitionSortNode> {
-public:
-    PartitionSortSinkOperatorBuilder(int32_t id, ExecNode* sort_node)
-            : OperatorBuilder(id, "PartitionSortSinkOperator", sort_node) {}
-
-    bool is_sink() const override { return true; }
-
-    OperatorPtr build_operator() override;
-};
-
-class PartitionSortSinkOperator final : public StreamingOperator<vectorized::VPartitionSortNode> {
-public:
-    PartitionSortSinkOperator(OperatorBuilderBase* operator_builder, ExecNode* sort_node)
-            : StreamingOperator(operator_builder, sort_node) {};
-
-    bool can_write() override { return true; }
-};
+namespace doris::pipeline {
 
 class PartitionSortSinkOperatorX;
 class PartitionSortSinkLocalState : public PipelineXSinkLocalState<PartitionSortNodeSharedState> {
@@ -128,5 +105,4 @@ class PartitionSortSinkOperatorX final : public DataSinkOperatorX<PartitionSortS
                                     PartitionSortSinkLocalState& local_state, bool eos);
 };
 
-} // namespace pipeline
-} // namespace doris
+} // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/partition_sort_source_operator.cpp b/be/src/pipeline/exec/partition_sort_source_operator.cpp
index 7fd03a11f7ac37..89cbbb7cf216d2 100644
--- a/be/src/pipeline/exec/partition_sort_source_operator.cpp
+++ b/be/src/pipeline/exec/partition_sort_source_operator.cpp
@@ -20,15 +20,10 @@
 #include "pipeline/exec/operator.h"
 
 namespace doris {
-class ExecNode;
 class RuntimeState;
 
 namespace pipeline {
 
-OperatorPtr PartitionSortSourceOperatorBuilder::build_operator() {
-    return std::make_shared<PartitionSortSourceOperator>(this, _node);
-}
-
 Status PartitionSortSourceLocalState::init(RuntimeState* state, LocalStateInfo& info) {
     RETURN_IF_ERROR(PipelineXLocalState<PartitionSortNodeSharedState>::init(state, info));
     SCOPED_TIMER(exec_time_counter());
diff --git a/be/src/pipeline/exec/partition_sort_source_operator.h b/be/src/pipeline/exec/partition_sort_source_operator.h
index 9d810db2039e70..5398a728e4ecf3 100644
--- a/be/src/pipeline/exec/partition_sort_source_operator.h
+++ b/be/src/pipeline/exec/partition_sort_source_operator.h
@@ -22,32 +22,12 @@
 #include "common/status.h"
 #include "operator.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/exec/vpartition_sort_node.h"
 
 namespace doris {
-class ExecNode;
 class RuntimeState;
 
 namespace pipeline {
 
-class PartitionSortSourceOperatorBuilder final
-        : public OperatorBuilder<vectorized::VPartitionSortNode> {
-public:
-    PartitionSortSourceOperatorBuilder(int32_t id, ExecNode* sort_node)
-            : OperatorBuilder(id, "PartitionSortSourceOperator", sort_node) {}
-
-    bool is_source() const override { return true; }
-
-    OperatorPtr build_operator() override;
-};
-
-class PartitionSortSourceOperator final : public SourceOperator<vectorized::VPartitionSortNode> {
-public:
-    PartitionSortSourceOperator(OperatorBuilderBase* operator_builder, ExecNode* sort_node)
-            : SourceOperator(operator_builder, sort_node) {}
-    Status open(RuntimeState*) override { return Status::OK(); }
-};
-
 class PartitionSortSourceOperatorX;
 class PartitionSortSourceLocalState final
         : public PipelineXLocalState<PartitionSortNodeSharedState> {
diff --git a/be/src/pipeline/exec/partitioned_aggregation_source_operator.h b/be/src/pipeline/exec/partitioned_aggregation_source_operator.h
index eff1e7179c8d0d..c1deb8af50db79 100644
--- a/be/src/pipeline/exec/partitioned_aggregation_source_operator.h
+++ b/be/src/pipeline/exec/partitioned_aggregation_source_operator.h
@@ -23,7 +23,6 @@
 #include "pipeline/pipeline_x/operator.h"
 
 namespace doris {
-class ExecNode;
 class RuntimeState;
 
 namespace pipeline {
diff --git a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h
index 68c6b970163f24..5c6b7e1f74f930 100644
--- a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h
+++ b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h
@@ -29,7 +29,6 @@
 #include "vec/sink/vdata_stream_sender.h" // ShuffleChannelIds
 
 namespace doris {
-class ExecNode;
 class RuntimeState;
 
 namespace pipeline {
diff --git a/be/src/pipeline/exec/repeat_operator.cpp b/be/src/pipeline/exec/repeat_operator.cpp
index 42d009f0e76b7d..fcf59f36543ba3 100644
--- a/be/src/pipeline/exec/repeat_operator.cpp
+++ b/be/src/pipeline/exec/repeat_operator.cpp
@@ -22,7 +22,6 @@
 #include "common/logging.h"
 #include "pipeline/exec/operator.h"
 #include "vec/core/block.h"
-#include "vec/exec/vrepeat_node.h"
 
 namespace doris {
 class RuntimeState;
@@ -30,18 +29,6 @@ class RuntimeState;
 
 namespace doris::pipeline {
 
-OPERATOR_CODE_GENERATOR(RepeatOperator, StatefulOperator)
-
-Status RepeatOperator::prepare(doris::RuntimeState* state) {
-    // just for speed up, the way is dangerous
-    _child_block = _node->get_child_block();
-    return StatefulOperator::prepare(state);
-}
-
-Status RepeatOperator::close(doris::RuntimeState* state) {
-    return StatefulOperator::close(state);
-}
-
 RepeatLocalState::RepeatLocalState(RuntimeState* state, OperatorXBase* parent)
         : Base(state, parent),
           _child_block(vectorized::Block::create_unique()),
diff --git a/be/src/pipeline/exec/repeat_operator.h b/be/src/pipeline/exec/repeat_operator.h
index 208b3d1e00565c..922645d270d915 100644
--- a/be/src/pipeline/exec/repeat_operator.h
+++ b/be/src/pipeline/exec/repeat_operator.h
@@ -21,30 +21,12 @@
 
 #include "common/status.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/exec/vrepeat_node.h"
 
 namespace doris {
-class ExecNode;
 class RuntimeState;
 
 namespace pipeline {
 
-class RepeatOperatorBuilder final : public OperatorBuilder<vectorized::VRepeatNode> {
-public:
-    RepeatOperatorBuilder(int32_t id, ExecNode* repeat_node);
-
-    OperatorPtr build_operator() override;
-};
-
-class RepeatOperator final : public StatefulOperator<vectorized::VRepeatNode> {
-public:
-    RepeatOperator(OperatorBuilderBase* operator_builder, ExecNode* repeat_node);
-
-    Status prepare(RuntimeState* state) override;
-
-    Status close(RuntimeState* state) override;
-};
-
 class RepeatOperatorX;
 
 class RepeatLocalState final : public PipelineXLocalState<FakeSharedState> {
diff --git a/be/src/pipeline/exec/result_file_sink_operator.cpp b/be/src/pipeline/exec/result_file_sink_operator.cpp
index 65ad6599d5ddba..f8cadad1df7e1d 100644
--- a/be/src/pipeline/exec/result_file_sink_operator.cpp
+++ b/be/src/pipeline/exec/result_file_sink_operator.cpp
@@ -25,25 +25,9 @@
 #include "runtime/buffer_control_block.h"
 #include "runtime/result_buffer_mgr.h"
 #include "vec/sink/vdata_stream_sender.h"
-#include "vec/sink/vresult_file_sink.h"
-
-namespace doris {
-class DataSink;
-} // namespace doris
 
 namespace doris::pipeline {
 
-ResultFileSinkOperatorBuilder::ResultFileSinkOperatorBuilder(int32_t id, DataSink* sink)
-        : DataSinkOperatorBuilder(id, "ResultSinkOperator", sink) {};
-
-OperatorPtr ResultFileSinkOperatorBuilder::build_operator() {
-    return std::make_shared<ResultFileSinkOperator>(this, _sink);
-}
-
-ResultFileSinkOperator::ResultFileSinkOperator(OperatorBuilderBase* operator_builder,
-                                               DataSink* sink)
-        : DataSinkOperator(operator_builder, sink) {};
-
 ResultFileSinkLocalState::ResultFileSinkLocalState(DataSinkOperatorXBase* parent,
                                                    RuntimeState* state)
         : AsyncWriterSink<vectorized::VFileResultWriter, ResultFileSinkOperatorX>(parent, state),
diff --git a/be/src/pipeline/exec/result_file_sink_operator.h b/be/src/pipeline/exec/result_file_sink_operator.h
index 31b4b26206caaf..9dc91193510ce7 100644
--- a/be/src/pipeline/exec/result_file_sink_operator.h
+++ b/be/src/pipeline/exec/result_file_sink_operator.h
@@ -23,25 +23,7 @@
 #include "pipeline/pipeline_x/operator.h"
 #include "vec/sink/vresult_file_sink.h"
 
-namespace doris {
-class DataSink;
-
-namespace pipeline {
-
-class ResultFileSinkOperatorBuilder final
-        : public DataSinkOperatorBuilder<vectorized::VResultFileSink> {
-public:
-    ResultFileSinkOperatorBuilder(int32_t id, DataSink* sink);
-
-    OperatorPtr build_operator() override;
-};
-
-class ResultFileSinkOperator final : public DataSinkOperator<vectorized::VResultFileSink> {
-public:
-    ResultFileSinkOperator(OperatorBuilderBase* operator_builder, DataSink* sink);
-
-    bool can_write() override { return true; }
-};
+namespace doris::pipeline {
 
 class ResultFileSinkOperatorX;
 class ResultFileSinkLocalState final
@@ -127,5 +109,4 @@ class ResultFileSinkOperatorX final : public DataSinkOperatorX<ResultFileSinkLoc
     vectorized::VExprContextSPtrs _output_vexpr_ctxs;
 };
 
-} // namespace pipeline
-} // namespace doris
+} // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/result_sink_operator.cpp b/be/src/pipeline/exec/result_sink_operator.cpp
index d0cd130cc8a8be..80e45a677d3dea 100644
--- a/be/src/pipeline/exec/result_sink_operator.cpp
+++ b/be/src/pipeline/exec/result_sink_operator.cpp
@@ -28,28 +28,9 @@
 #include "vec/exprs/vexpr.h"
 #include "vec/exprs/vexpr_context.h"
 #include "vec/sink/vmysql_result_writer.h"
-#include "vec/sink/vresult_sink.h"
-
-namespace doris {
-class DataSink;
-} // namespace doris
 
 namespace doris::pipeline {
 
-ResultSinkOperatorBuilder::ResultSinkOperatorBuilder(int32_t id, DataSink* sink)
-        : DataSinkOperatorBuilder(id, "ResultSinkOperator", sink) {};
-
-OperatorPtr ResultSinkOperatorBuilder::build_operator() {
-    return std::make_shared<ResultSinkOperator>(this, _sink);
-}
-
-ResultSinkOperator::ResultSinkOperator(OperatorBuilderBase* operator_builder, DataSink* sink)
-        : DataSinkOperator(operator_builder, sink) {};
-
-bool ResultSinkOperator::can_write() {
-    return _sink->_sender->can_sink();
-}
-
 Status ResultSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo& info) {
     RETURN_IF_ERROR(Base::init(state, info));
     SCOPED_TIMER(exec_time_counter());
diff --git a/be/src/pipeline/exec/result_sink_operator.h b/be/src/pipeline/exec/result_sink_operator.h
index aed9961a6d6771..71b8afce1719ec 100644
--- a/be/src/pipeline/exec/result_sink_operator.h
+++ b/be/src/pipeline/exec/result_sink_operator.h
@@ -21,28 +21,12 @@
 
 #include "operator.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/sink/vresult_sink.h"
 
 namespace doris {
-class DataSink;
 class PipBufferControlBlock;
 
 namespace pipeline {
 
-class ResultSinkOperatorBuilder final : public DataSinkOperatorBuilder<vectorized::VResultSink> {
-public:
-    ResultSinkOperatorBuilder(int32_t id, DataSink* sink);
-
-    OperatorPtr build_operator() override;
-};
-
-class ResultSinkOperator final : public DataSinkOperator<vectorized::VResultSink> {
-public:
-    ResultSinkOperator(OperatorBuilderBase* operator_builder, DataSink* sink);
-
-    bool can_write() override;
-};
-
 class ResultSinkLocalState final : public PipelineXSinkLocalState<BasicSharedState> {
     ENABLE_FACTORY_CREATOR(ResultSinkLocalState);
     using Base = PipelineXSinkLocalState<BasicSharedState>;
diff --git a/be/src/pipeline/exec/scan_operator.cpp b/be/src/pipeline/exec/scan_operator.cpp
index 9d32f0e25abf60..37da4d97288367 100644
--- a/be/src/pipeline/exec/scan_operator.cpp
+++ b/be/src/pipeline/exec/scan_operator.cpp
@@ -45,39 +45,6 @@
 
 namespace doris::pipeline {
 
-OPERATOR_CODE_GENERATOR(ScanOperator, SourceOperator)
-
-bool ScanOperator::can_read() {
-    if (!_node->_opened) {
-        return _node->_should_create_scanner || _node->ready_to_open();
-    } else {
-        // If scanner meet any error, done == true
-        if (_node->_eos || _node->_scanner_ctx->done()) {
-            // _eos: need eos
-            // _scanner_ctx->done(): need finish
-            // _scanner_ctx->no_schedule(): should schedule _scanner_ctx
-            return true;
-        } else {
-            return _node->ready_to_read(); // there are some blocks to process
-        }
-    }
-}
-
-bool ScanOperator::runtime_filters_are_ready_or_timeout() {
-    return _node->runtime_filters_are_ready_or_timeout();
-}
-
-std::string ScanOperator::debug_string() const {
-    fmt::memory_buffer debug_string_buffer;
-    fmt::format_to(debug_string_buffer, "{}, scanner_ctx is null: {} ",
-                   SourceOperator::debug_string(), _node->_scanner_ctx == nullptr);
-    if (_node->_scanner_ctx) {
-        fmt::format_to(debug_string_buffer, ", scanner ctx detail = {}",
-                       _node->_scanner_ctx->debug_string());
-    }
-    return fmt::to_string(debug_string_buffer);
-}
-
 #define RETURN_IF_PUSH_DOWN(stmt, status)                           \
     if (pdt == vectorized::VScanNode::PushDownType::UNACCEPTABLE) { \
         status = stmt;                                              \
diff --git a/be/src/pipeline/exec/scan_operator.h b/be/src/pipeline/exec/scan_operator.h
index 3ebccb58a8c604..6b62af1eefd9ac 100644
--- a/be/src/pipeline/exec/scan_operator.h
+++ b/be/src/pipeline/exec/scan_operator.h
@@ -29,9 +29,6 @@
 #include "runtime/descriptors.h"
 #include "vec/exec/scan/vscan_node.h"
 
-namespace doris {
-class ExecNode;
-} // namespace doris
 namespace doris::vectorized {
 class ScannerDelegate;
 }
@@ -39,24 +36,6 @@ class ScannerDelegate;
 namespace doris::pipeline {
 class PipScannerContext;
 
-class ScanOperatorBuilder : public OperatorBuilder<vectorized::VScanNode> {
-public:
-    ScanOperatorBuilder(int32_t id, ExecNode* exec_node);
-    bool is_source() const override { return true; }
-    OperatorPtr build_operator() override;
-};
-
-class ScanOperator : public SourceOperator<vectorized::VScanNode> {
-public:
-    ScanOperator(OperatorBuilderBase* operator_builder, ExecNode* scan_node);
-
-    bool can_read() override; // for source
-
-    bool runtime_filters_are_ready_or_timeout() override;
-
-    std::string debug_string() const override;
-};
-
 class ScanLocalStateBase : public PipelineXLocalState<>, public vectorized::RuntimeFilterConsumer {
 public:
     ScanLocalStateBase(RuntimeState* state, OperatorXBase* parent)
diff --git a/be/src/pipeline/exec/schema_scan_operator.cpp b/be/src/pipeline/exec/schema_scan_operator.cpp
index 2d32e21d991960..e4a3a8c1ca2468 100644
--- a/be/src/pipeline/exec/schema_scan_operator.cpp
+++ b/be/src/pipeline/exec/schema_scan_operator.cpp
@@ -24,7 +24,6 @@
 #include "pipeline/exec/operator.h"
 #include "util/runtime_profile.h"
 #include "vec/data_types/data_type_factory.hpp"
-#include "vec/exec/vschema_scan_node.h"
 
 namespace doris {
 class RuntimeState;
@@ -32,18 +31,6 @@ class RuntimeState;
 
 namespace doris::pipeline {
 
-OPERATOR_CODE_GENERATOR(SchemaScanOperator, SourceOperator)
-
-Status SchemaScanOperator::open(RuntimeState* state) {
-    return _node->open(state);
-}
-
-Status SchemaScanOperator::close(RuntimeState* state) {
-    RETURN_IF_ERROR(SourceOperator::close(state));
-    RETURN_IF_ERROR(_node->close(state));
-    return Status::OK();
-}
-
 Status SchemaScanLocalState::init(RuntimeState* state, LocalStateInfo& info) {
     RETURN_IF_ERROR(PipelineXLocalState<>::init(state, info));
 
diff --git a/be/src/pipeline/exec/schema_scan_operator.h b/be/src/pipeline/exec/schema_scan_operator.h
index bd336132efb2b7..f07a0e854e70c0 100644
--- a/be/src/pipeline/exec/schema_scan_operator.h
+++ b/be/src/pipeline/exec/schema_scan_operator.h
@@ -25,30 +25,11 @@
 #include "vec/exec/vschema_scan_node.h"
 
 namespace doris {
-class ExecNode;
 class RuntimeState;
 } // namespace doris
 
 namespace doris::pipeline {
 
-class SchemaScanOperatorBuilder : public OperatorBuilder<vectorized::VSchemaScanNode> {
-public:
-    SchemaScanOperatorBuilder(int32_t id, ExecNode* exec_node);
-    bool is_source() const override { return true; }
-    OperatorPtr build_operator() override;
-};
-
-class SchemaScanOperator : public SourceOperator<vectorized::VSchemaScanNode> {
-public:
-    SchemaScanOperator(OperatorBuilderBase* operator_builder, ExecNode* scan_node);
-
-    bool can_read() override { return true; }
-
-    Status open(RuntimeState* state) override;
-
-    Status close(RuntimeState* state) override;
-};
-
 class SchemaScanOperatorX;
 class SchemaScanLocalState final : public PipelineXLocalState<> {
 public:
diff --git a/be/src/pipeline/exec/select_operator.cpp b/be/src/pipeline/exec/select_operator.cpp
deleted file mode 100644
index b4eb27eace3ea1..00000000000000
--- a/be/src/pipeline/exec/select_operator.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "select_operator.h"
-
-#include <string>
-
-#include "pipeline/exec/operator.h"
-
-namespace doris::pipeline {
-
-OPERATOR_CODE_GENERATOR(SelectOperator, StreamingOperator)
-
-} // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/select_operator.h b/be/src/pipeline/exec/select_operator.h
index 4fd929e323ba7d..4bdc5a9e42d298 100644
--- a/be/src/pipeline/exec/select_operator.h
+++ b/be/src/pipeline/exec/select_operator.h
@@ -21,24 +21,8 @@
 
 #include "operator.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/exec/vselect_node.h"
 
-namespace doris {
-class ExecNode;
-
-namespace pipeline {
-
-class SelectOperatorBuilder final : public OperatorBuilder<vectorized::VSelectNode> {
-public:
-    SelectOperatorBuilder(int32_t id, ExecNode* select_node);
-
-    OperatorPtr build_operator() override;
-};
-
-class SelectOperator final : public StreamingOperator<vectorized::VSelectNode> {
-public:
-    SelectOperator(OperatorBuilderBase* operator_builder, ExecNode* select_node);
-};
+namespace doris::pipeline {
 
 class SelectOperatorX;
 class SelectLocalState final : public PipelineXLocalState<FakeSharedState> {
@@ -72,5 +56,4 @@ class SelectOperatorX final : public StreamingOperatorX<SelectLocalState> {
     [[nodiscard]] bool is_source() const override { return false; }
 };
 
-} // namespace pipeline
-} // namespace doris
+} // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/set_probe_sink_operator.cpp b/be/src/pipeline/exec/set_probe_sink_operator.cpp
index 744c8b17e2257d..81c0cd463c1284 100644
--- a/be/src/pipeline/exec/set_probe_sink_operator.cpp
+++ b/be/src/pipeline/exec/set_probe_sink_operator.cpp
@@ -23,10 +23,8 @@
 
 #include "pipeline/exec/operator.h"
 #include "vec/common/hash_table/hash_table_set_probe.h"
-#include "vec/exec/vset_operation_node.h"
 
 namespace doris {
-class ExecNode;
 class RuntimeState;
 
 namespace vectorized {
@@ -36,41 +34,6 @@ class Block;
 
 namespace doris::pipeline {
 
-template <bool is_intersect>
-SetProbeSinkOperatorBuilder<is_intersect>::SetProbeSinkOperatorBuilder(int32_t id, int child_id,
-                                                                       ExecNode* set_node)
-        : OperatorBuilder<vectorized::VSetOperationNode<is_intersect>>(id, builder_name, set_node),
-          _child_id(child_id) {}
-
-template <bool is_intersect>
-OperatorPtr SetProbeSinkOperatorBuilder<is_intersect>::build_operator() {
-    return std::make_shared<SetProbeSinkOperator<is_intersect>>(this, _child_id, this->_node);
-}
-
-template <bool is_intersect>
-SetProbeSinkOperator<is_intersect>::SetProbeSinkOperator(OperatorBuilderBase* operator_builder,
-                                                         int child_id, ExecNode* set_node)
-        : StreamingOperator<vectorized::VSetOperationNode<is_intersect>>(operator_builder,
-                                                                         set_node),
-          _child_id(child_id) {}
-
-template <bool is_intersect>
-Status SetProbeSinkOperator<is_intersect>::sink(RuntimeState* state, vectorized::Block* block,
-                                                SourceState source_state) {
-    return this->_node->sink_probe(state, _child_id, block, source_state == SourceState::FINISHED);
-}
-
-template <bool is_intersect>
-bool SetProbeSinkOperator<is_intersect>::can_write() {
-    DCHECK_GT(_child_id, 0);
-    return this->_node->is_child_finished(_child_id - 1);
-}
-
-template class SetProbeSinkOperatorBuilder<true>;
-template class SetProbeSinkOperatorBuilder<false>;
-template class SetProbeSinkOperator<true>;
-template class SetProbeSinkOperator<false>;
-
 template <bool is_intersect>
 Status SetProbeSinkOperatorX<is_intersect>::init(const TPlanNode& tnode, RuntimeState* state) {
     DataSinkOperatorX<SetProbeSinkLocalState<is_intersect>>::_name = "SET_PROBE_SINK_OPERATOR";
diff --git a/be/src/pipeline/exec/set_probe_sink_operator.h b/be/src/pipeline/exec/set_probe_sink_operator.h
index 9f80f03966b1f1..499eeee0d6205b 100644
--- a/be/src/pipeline/exec/set_probe_sink_operator.h
+++ b/be/src/pipeline/exec/set_probe_sink_operator.h
@@ -22,10 +22,8 @@
 #include "common/status.h"
 #include "operator.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/exec/vset_operation_node.h"
 
 namespace doris {
-class ExecNode;
 class RuntimeState;
 
 namespace vectorized {
@@ -36,37 +34,6 @@ struct HashTableProbe;
 
 namespace pipeline {
 
-template <bool is_intersect>
-class SetProbeSinkOperatorBuilder final
-        : public OperatorBuilder<vectorized::VSetOperationNode<is_intersect>> {
-private:
-    constexpr static auto builder_name =
-            is_intersect ? "IntersectProbeSinkOperator" : "ExceptProbeSinkOperator";
-
-public:
-    SetProbeSinkOperatorBuilder(int32_t id, int child_id, ExecNode* set_node);
-    [[nodiscard]] bool is_sink() const override { return true; }
-
-    OperatorPtr build_operator() override;
-
-private:
-    int _child_id;
-};
-
-template <bool is_intersect>
-class SetProbeSinkOperator : public StreamingOperator<vectorized::VSetOperationNode<is_intersect>> {
-public:
-    SetProbeSinkOperator(OperatorBuilderBase* operator_builder, int child_id, ExecNode* set_node);
-
-    bool can_write() override;
-
-    Status sink(RuntimeState* state, vectorized::Block* block, SourceState source_state) override;
-    Status open(RuntimeState* /*state*/) override { return Status::OK(); }
-
-private:
-    int _child_id;
-};
-
 template <bool is_intersect>
 class SetProbeSinkOperatorX;
 
diff --git a/be/src/pipeline/exec/set_sink_operator.cpp b/be/src/pipeline/exec/set_sink_operator.cpp
index 2042e3eb1a1d51..796174d080a88e 100644
--- a/be/src/pipeline/exec/set_sink_operator.cpp
+++ b/be/src/pipeline/exec/set_sink_operator.cpp
@@ -22,34 +22,9 @@
 #include "pipeline/exec/operator.h"
 #include "vec/common/hash_table/hash_table_set_build.h"
 #include "vec/core/materialize_block.h"
-#include "vec/exec/vset_operation_node.h"
-
-namespace doris {
-class ExecNode;
-} // namespace doris
 
 namespace doris::pipeline {
 
-template <bool is_intersect>
-SetSinkOperatorBuilder<is_intersect>::SetSinkOperatorBuilder(int32_t id, ExecNode* set_node)
-        : OperatorBuilder<vectorized::VSetOperationNode<is_intersect>>(id, builder_name, set_node) {
-}
-
-template <bool is_intersect>
-OperatorPtr SetSinkOperatorBuilder<is_intersect>::build_operator() {
-    return std::make_shared<SetSinkOperator<is_intersect>>(this, this->_node);
-}
-
-template <bool is_intersect>
-SetSinkOperator<is_intersect>::SetSinkOperator(
-        OperatorBuilderBase* builder, vectorized::VSetOperationNode<is_intersect>* set_node)
-        : StreamingOperator<vectorized::VSetOperationNode<is_intersect>>(builder, set_node) {}
-
-template class SetSinkOperatorBuilder<true>;
-template class SetSinkOperatorBuilder<false>;
-template class SetSinkOperator<true>;
-template class SetSinkOperator<false>;
-
 template <bool is_intersect>
 Status SetSinkOperatorX<is_intersect>::sink(RuntimeState* state, vectorized::Block* in_block,
                                             bool eos) {
diff --git a/be/src/pipeline/exec/set_sink_operator.h b/be/src/pipeline/exec/set_sink_operator.h
index 2a6bb63c02e815..8894b8b15f2252 100644
--- a/be/src/pipeline/exec/set_sink_operator.h
+++ b/be/src/pipeline/exec/set_sink_operator.h
@@ -22,10 +22,8 @@
 #include "olap/olap_common.h"
 #include "operator.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/exec/vset_operation_node.h"
 
 namespace doris {
-class ExecNode;
 
 namespace vectorized {
 template <class HashTableContext, bool is_intersected>
@@ -34,32 +32,6 @@ struct HashTableBuild;
 
 namespace pipeline {
 
-template <bool is_intersect>
-class SetSinkOperatorBuilder final
-        : public OperatorBuilder<vectorized::VSetOperationNode<is_intersect>> {
-private:
-    constexpr static auto builder_name =
-            is_intersect ? "IntersectSinkOperator" : "ExceptSinkOperator";
-
-public:
-    SetSinkOperatorBuilder(int32_t id, ExecNode* set_node);
-    [[nodiscard]] bool is_sink() const override { return true; }
-
-    OperatorPtr build_operator() override;
-};
-
-template <bool is_intersect>
-class SetSinkOperator : public StreamingOperator<vectorized::VSetOperationNode<is_intersect>> {
-public:
-    SetSinkOperator(OperatorBuilderBase* operator_builder,
-                    vectorized::VSetOperationNode<is_intersect>* set_node);
-
-    bool can_write() override { return true; }
-
-private:
-    vectorized::VSetOperationNode<is_intersect>* _set_node = nullptr;
-};
-
 template <bool is_intersect>
 class SetSinkOperatorX;
 
diff --git a/be/src/pipeline/exec/set_source_operator.cpp b/be/src/pipeline/exec/set_source_operator.cpp
index 88d38d325af003..bdb844e70e8095 100644
--- a/be/src/pipeline/exec/set_source_operator.cpp
+++ b/be/src/pipeline/exec/set_source_operator.cpp
@@ -21,34 +21,9 @@
 
 #include "common/status.h"
 #include "pipeline/exec/operator.h"
-#include "vec/exec/vset_operation_node.h"
-
-namespace doris {
-class ExecNode;
-} // namespace doris
 
 namespace doris::pipeline {
 
-template <bool is_intersect>
-SetSourceOperatorBuilder<is_intersect>::SetSourceOperatorBuilder(int32_t id, ExecNode* set_node)
-        : OperatorBuilder<vectorized::VSetOperationNode<is_intersect>>(id, builder_name, set_node) {
-}
-
-template <bool is_intersect>
-OperatorPtr SetSourceOperatorBuilder<is_intersect>::build_operator() {
-    return std::make_shared<SetSourceOperator<is_intersect>>(this, this->_node);
-}
-
-template <bool is_intersect>
-SetSourceOperator<is_intersect>::SetSourceOperator(
-        OperatorBuilderBase* builder, vectorized::VSetOperationNode<is_intersect>* set_node)
-        : SourceOperator<vectorized::VSetOperationNode<is_intersect>>(builder, set_node) {}
-
-template class SetSourceOperatorBuilder<true>;
-template class SetSourceOperatorBuilder<false>;
-template class SetSourceOperator<true>;
-template class SetSourceOperator<false>;
-
 template <bool is_intersect>
 Status SetSourceLocalState<is_intersect>::init(RuntimeState* state, LocalStateInfo& info) {
     RETURN_IF_ERROR(Base::init(state, info));
diff --git a/be/src/pipeline/exec/set_source_operator.h b/be/src/pipeline/exec/set_source_operator.h
index 1c5cf162940b40..94487507c26e3e 100644
--- a/be/src/pipeline/exec/set_source_operator.h
+++ b/be/src/pipeline/exec/set_source_operator.h
@@ -22,37 +22,12 @@
 #include "common/status.h"
 #include "operator.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/exec/vset_operation_node.h"
 
 namespace doris {
-class ExecNode;
 class RuntimeState;
 
 namespace pipeline {
 
-template <bool is_intersect>
-class SetSourceOperatorBuilder
-        : public OperatorBuilder<vectorized::VSetOperationNode<is_intersect>> {
-private:
-    constexpr static auto builder_name =
-            is_intersect ? "IntersectSourceOperator" : "ExceptSourceOperator";
-
-public:
-    SetSourceOperatorBuilder(int32_t id, ExecNode* set_node);
-    [[nodiscard]] bool is_source() const override { return true; }
-
-    OperatorPtr build_operator() override;
-};
-
-template <bool is_intersect>
-class SetSourceOperator : public SourceOperator<vectorized::VSetOperationNode<is_intersect>> {
-public:
-    SetSourceOperator(OperatorBuilderBase* builder,
-                      vectorized::VSetOperationNode<is_intersect>* set_node);
-
-    Status open(RuntimeState* /*state*/) override { return Status::OK(); }
-};
-
 template <bool is_intersect>
 class SetSourceOperatorX;
 
diff --git a/be/src/pipeline/exec/sort_sink_operator.cpp b/be/src/pipeline/exec/sort_sink_operator.cpp
index 91ae687510c985..7c9f40d1b5956f 100644
--- a/be/src/pipeline/exec/sort_sink_operator.cpp
+++ b/be/src/pipeline/exec/sort_sink_operator.cpp
@@ -26,8 +26,6 @@
 
 namespace doris::pipeline {
 
-OPERATOR_CODE_GENERATOR(SortSinkOperator, StreamingOperator)
-
 Status SortSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo& info) {
     RETURN_IF_ERROR(Base::init(state, info));
     SCOPED_TIMER(exec_time_counter());
diff --git a/be/src/pipeline/exec/sort_sink_operator.h b/be/src/pipeline/exec/sort_sink_operator.h
index ad9c23401b4c69..8298dc980b6c7d 100644
--- a/be/src/pipeline/exec/sort_sink_operator.h
+++ b/be/src/pipeline/exec/sort_sink_operator.h
@@ -22,28 +22,8 @@
 #include "operator.h"
 #include "pipeline/pipeline_x/operator.h"
 #include "vec/core/field.h"
-#include "vec/exec/vsort_node.h"
 
-namespace doris {
-class ExecNode;
-
-namespace pipeline {
-
-class SortSinkOperatorBuilder final : public OperatorBuilder<vectorized::VSortNode> {
-public:
-    SortSinkOperatorBuilder(int32_t id, ExecNode* sort_node);
-
-    bool is_sink() const override { return true; }
-
-    OperatorPtr build_operator() override;
-};
-
-class SortSinkOperator final : public StreamingOperator<vectorized::VSortNode> {
-public:
-    SortSinkOperator(OperatorBuilderBase* operator_builder, ExecNode* sort_node);
-
-    bool can_write() override { return true; }
-};
+namespace doris::pipeline {
 
 enum class SortAlgorithm { HEAP_SORT, TOPN_SORT, FULL_SORT };
 
@@ -132,5 +112,4 @@ class SortSinkOperatorX final : public DataSinkOperatorX<SortSinkLocalState> {
     const std::vector<TExpr> _partition_exprs;
 };
 
-} // namespace pipeline
-} // namespace doris
+} // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/sort_source_operator.cpp b/be/src/pipeline/exec/sort_source_operator.cpp
index f7d6b6ae56c3c9..34bfffb8d9fd2e 100644
--- a/be/src/pipeline/exec/sort_source_operator.cpp
+++ b/be/src/pipeline/exec/sort_source_operator.cpp
@@ -23,8 +23,6 @@
 
 namespace doris::pipeline {
 
-OPERATOR_CODE_GENERATOR(SortSourceOperator, SourceOperator)
-
 SortLocalState::SortLocalState(RuntimeState* state, OperatorXBase* parent)
         : PipelineXLocalState<SortSharedState>(state, parent) {}
 
diff --git a/be/src/pipeline/exec/sort_source_operator.h b/be/src/pipeline/exec/sort_source_operator.h
index 1af30bfd05f49f..43c4934b977415 100644
--- a/be/src/pipeline/exec/sort_source_operator.h
+++ b/be/src/pipeline/exec/sort_source_operator.h
@@ -22,29 +22,12 @@
 #include "common/status.h"
 #include "operator.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/exec/vsort_node.h"
 
 namespace doris {
-class ExecNode;
 class RuntimeState;
 
 namespace pipeline {
 
-class SortSourceOperatorBuilder final : public OperatorBuilder<vectorized::VSortNode> {
-public:
-    SortSourceOperatorBuilder(int32_t id, ExecNode* sort_node);
-
-    bool is_source() const override { return true; }
-
-    OperatorPtr build_operator() override;
-};
-
-class SortSourceOperator final : public SourceOperator<vectorized::VSortNode> {
-public:
-    SortSourceOperator(OperatorBuilderBase* operator_builder, ExecNode* sort_node);
-    Status open(RuntimeState*) override { return Status::OK(); }
-};
-
 class SortSourceOperatorX;
 class SortLocalState final : public PipelineXLocalState<SortSharedState> {
 public:
diff --git a/be/src/pipeline/exec/streaming_aggregation_sink_operator.cpp b/be/src/pipeline/exec/streaming_aggregation_sink_operator.cpp
deleted file mode 100644
index 5805b209f4f7f8..00000000000000
--- a/be/src/pipeline/exec/streaming_aggregation_sink_operator.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "streaming_aggregation_sink_operator.h"
-
-#include <gen_cpp/Metrics_types.h>
-
-#include <utility>
-
-#include "common/compiler_util.h" // IWYU pragma: keep
-#include "pipeline/exec/data_queue.h"
-#include "pipeline/exec/operator.h"
-#include "vec/exec/vaggregation_node.h"
-
-namespace doris {
-class ExecNode;
-class RuntimeState;
-} // namespace doris
-
-namespace doris::pipeline {
-
-StreamingAggSinkOperator::StreamingAggSinkOperator(OperatorBuilderBase* operator_builder,
-                                                   ExecNode* agg_node,
-                                                   std::shared_ptr<DataQueue> queue)
-        : StreamingOperator(operator_builder, agg_node), _data_queue(std::move(queue)) {}
-
-Status StreamingAggSinkOperator::prepare(RuntimeState* state) {
-    RETURN_IF_ERROR(StreamingOperator::prepare(state));
-    _queue_byte_size_counter =
-            ADD_COUNTER(_node->runtime_profile(), "MaxSizeInBlockQueue", TUnit::BYTES);
-    _queue_size_counter = ADD_COUNTER(_node->runtime_profile(), "MaxSizeOfBlockQueue", TUnit::UNIT);
-    return Status::OK();
-}
-
-bool StreamingAggSinkOperator::can_write() {
-    // sink and source in diff threads
-    return _data_queue->has_enough_space_to_push();
-}
-
-Status StreamingAggSinkOperator::sink(RuntimeState* state, vectorized::Block* in_block,
-                                      SourceState source_state) {
-    Status ret = Status::OK();
-    if (in_block && in_block->rows() > 0) {
-        auto block_from_ctx = _data_queue->get_free_block();
-        RETURN_IF_ERROR(_node->do_pre_agg(in_block, block_from_ctx.get()));
-        if (block_from_ctx->rows() == 0) {
-            _data_queue->push_free_block(std::move(block_from_ctx));
-        } else {
-            _data_queue->push_block(std::move(block_from_ctx));
-        }
-    }
-
-    if (UNLIKELY(source_state == SourceState::FINISHED)) {
-        _data_queue->set_finish();
-    }
-    return Status::OK();
-}
-
-Status StreamingAggSinkOperator::close(RuntimeState* state) {
-    if (_data_queue && !_data_queue->is_finish()) {
-        // finish should be set, if not set here means error.
-        _data_queue->set_canceled();
-    }
-    if (_data_queue) {
-        COUNTER_SET(_queue_size_counter, _data_queue->max_size_of_queue());
-        COUNTER_SET(_queue_byte_size_counter, _data_queue->max_bytes_in_queue());
-    }
-    return StreamingOperator::close(state);
-}
-
-StreamingAggSinkOperatorBuilder::StreamingAggSinkOperatorBuilder(int32_t id, ExecNode* exec_node,
-                                                                 std::shared_ptr<DataQueue> queue)
-        : OperatorBuilder(id, "StreamingAggSinkOperator", exec_node),
-          _data_queue(std::move(queue)) {}
-
-OperatorPtr StreamingAggSinkOperatorBuilder::build_operator() {
-    return std::make_shared<StreamingAggSinkOperator>(this, _node, _data_queue);
-}
-} // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/streaming_aggregation_sink_operator.h b/be/src/pipeline/exec/streaming_aggregation_sink_operator.h
deleted file mode 100644
index 99e94e2d585957..00000000000000
--- a/be/src/pipeline/exec/streaming_aggregation_sink_operator.h
+++ /dev/null
@@ -1,76 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <stdint.h>
-
-#include <memory>
-
-#include "aggregation_sink_operator.h"
-#include "aggregation_source_operator.h"
-#include "common/status.h"
-#include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
-#include "util/runtime_profile.h"
-#include "vec/core/block.h"
-#include "vec/exec/vaggregation_node.h"
-
-namespace doris {
-class ExecNode;
-class RuntimeState;
-
-namespace pipeline {
-class DataQueue;
-
-class StreamingAggSinkOperatorBuilder final : public OperatorBuilder<vectorized::AggregationNode> {
-public:
-    StreamingAggSinkOperatorBuilder(int32_t, ExecNode*, std::shared_ptr<DataQueue>);
-
-    OperatorPtr build_operator() override;
-
-    bool is_sink() const override { return true; }
-    bool is_source() const override { return false; }
-
-private:
-    std::shared_ptr<DataQueue> _data_queue;
-};
-
-class StreamingAggSinkOperator final : public StreamingOperator<vectorized::AggregationNode> {
-public:
-    StreamingAggSinkOperator(OperatorBuilderBase* operator_builder, ExecNode*,
-                             std::shared_ptr<DataQueue>);
-
-    Status prepare(RuntimeState*) override;
-
-    Status sink(RuntimeState* state, vectorized::Block* block, SourceState source_state) override;
-
-    bool can_write() override;
-
-    Status close(RuntimeState* state) override;
-
-private:
-    vectorized::Block _preagg_block = vectorized::Block();
-
-    RuntimeProfile::Counter* _queue_byte_size_counter = nullptr;
-    RuntimeProfile::Counter* _queue_size_counter = nullptr;
-
-    std::shared_ptr<DataQueue> _data_queue;
-};
-
-} // namespace pipeline
-} // namespace doris
diff --git a/be/src/pipeline/exec/streaming_aggregation_source_operator.cpp b/be/src/pipeline/exec/streaming_aggregation_source_operator.cpp
deleted file mode 100644
index 9c9a014046a043..00000000000000
--- a/be/src/pipeline/exec/streaming_aggregation_source_operator.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "streaming_aggregation_source_operator.h"
-
-#include <utility>
-
-#include "pipeline/exec/data_queue.h"
-#include "pipeline/exec/operator.h"
-#include "runtime/descriptors.h"
-#include "util/runtime_profile.h"
-#include "vec/core/block.h"
-#include "vec/exec/vaggregation_node.h"
-
-namespace doris {
-class ExecNode;
-class RuntimeState;
-
-namespace pipeline {
-
-StreamingAggSourceOperator::StreamingAggSourceOperator(OperatorBuilderBase* templ, ExecNode* node,
-                                                       std::shared_ptr<DataQueue> queue)
-        : SourceOperator(templ, node), _data_queue(std::move(queue)) {}
-
-bool StreamingAggSourceOperator::can_read() {
-    return _data_queue->has_data_or_finished();
-}
-
-Status StreamingAggSourceOperator::get_block(RuntimeState* state, vectorized::Block* block,
-                                             SourceState& source_state) {
-    bool eos = false;
-    if (!_data_queue->data_exhausted()) {
-        std::unique_ptr<vectorized::Block> agg_block;
-        RETURN_IF_ERROR(_data_queue->get_block_from_queue(&agg_block));
-
-        if (_data_queue->data_exhausted()) {
-            RETURN_IF_ERROR(_node->pull(state, block, &eos));
-        } else {
-            block->swap(*agg_block);
-            agg_block->clear_column_data(_node->row_desc().num_materialized_slots());
-            _data_queue->push_free_block(std::move(agg_block));
-        }
-    } else {
-        RETURN_IF_ERROR(_node->pull(state, block, &eos));
-    }
-
-    source_state = eos ? SourceState::FINISHED : SourceState::DEPEND_ON_SOURCE;
-
-    return Status::OK();
-}
-
-StreamingAggSourceOperatorBuilder::StreamingAggSourceOperatorBuilder(
-        int32_t id, ExecNode* exec_node, std::shared_ptr<DataQueue> queue)
-        : OperatorBuilder(id, "StreamingAggSourceOperator", exec_node),
-          _data_queue(std::move(queue)) {}
-
-OperatorPtr StreamingAggSourceOperatorBuilder::build_operator() {
-    return std::make_shared<StreamingAggSourceOperator>(this, _node, _data_queue);
-}
-
-} // namespace pipeline
-} // namespace doris
diff --git a/be/src/pipeline/exec/streaming_aggregation_source_operator.h b/be/src/pipeline/exec/streaming_aggregation_source_operator.h
deleted file mode 100644
index 89dbaab058ac82..00000000000000
--- a/be/src/pipeline/exec/streaming_aggregation_source_operator.h
+++ /dev/null
@@ -1,64 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-#pragma once
-
-#include <stdint.h>
-
-#include <memory>
-
-#include "common/status.h"
-#include "operator.h"
-#include "pipeline/exec/aggregation_source_operator.h"
-#include "pipeline/pipeline_x/operator.h"
-#include "vec/exec/vaggregation_node.h"
-
-namespace doris {
-class ExecNode;
-class RuntimeState;
-
-namespace vectorized {
-class Block;
-} // namespace vectorized
-namespace pipeline {
-class DataQueue;
-
-class StreamingAggSourceOperatorBuilder final
-        : public OperatorBuilder<vectorized::AggregationNode> {
-public:
-    StreamingAggSourceOperatorBuilder(int32_t, ExecNode*, std::shared_ptr<DataQueue>);
-
-    bool is_source() const override { return true; }
-
-    OperatorPtr build_operator() override;
-
-private:
-    std::shared_ptr<DataQueue> _data_queue;
-};
-
-class StreamingAggSourceOperator final : public SourceOperator<vectorized::AggregationNode> {
-public:
-    StreamingAggSourceOperator(OperatorBuilderBase*, ExecNode*, std::shared_ptr<DataQueue>);
-    bool can_read() override;
-    Status get_block(RuntimeState*, vectorized::Block*, SourceState& source_state) override;
-    Status open(RuntimeState*) override { return Status::OK(); }
-
-private:
-    std::shared_ptr<DataQueue> _data_queue;
-};
-
-} // namespace pipeline
-} // namespace doris
diff --git a/be/src/pipeline/exec/table_function_operator.cpp b/be/src/pipeline/exec/table_function_operator.cpp
index 9256d1deb2b072..cb547688595de6 100644
--- a/be/src/pipeline/exec/table_function_operator.cpp
+++ b/be/src/pipeline/exec/table_function_operator.cpp
@@ -29,18 +29,6 @@ class RuntimeState;
 
 namespace doris::pipeline {
 
-OPERATOR_CODE_GENERATOR(TableFunctionOperator, StatefulOperator)
-
-Status TableFunctionOperator::prepare(doris::RuntimeState* state) {
-    // just for speed up, the way is dangerous
-    _child_block = _node->get_child_block();
-    return StatefulOperator::prepare(state);
-}
-
-Status TableFunctionOperator::close(doris::RuntimeState* state) {
-    return StatefulOperator::close(state);
-}
-
 TableFunctionLocalState::TableFunctionLocalState(RuntimeState* state, OperatorXBase* parent)
         : PipelineXLocalState<>(state, parent), _child_block(vectorized::Block::create_unique()) {}
 
diff --git a/be/src/pipeline/exec/table_function_operator.h b/be/src/pipeline/exec/table_function_operator.h
index 8a7b7bd43d45d1..5b9457a474b79b 100644
--- a/be/src/pipeline/exec/table_function_operator.h
+++ b/be/src/pipeline/exec/table_function_operator.h
@@ -25,28 +25,11 @@
 #include "vec/exec/vtable_function_node.h"
 
 namespace doris {
-class ExecNode;
 class RuntimeState;
 } // namespace doris
 
 namespace doris::pipeline {
 
-class TableFunctionOperatorBuilder final : public OperatorBuilder<vectorized::VTableFunctionNode> {
-public:
-    TableFunctionOperatorBuilder(int32_t id, ExecNode* node);
-
-    OperatorPtr build_operator() override;
-};
-
-class TableFunctionOperator final : public StatefulOperator<vectorized::VTableFunctionNode> {
-public:
-    TableFunctionOperator(OperatorBuilderBase* operator_builder, ExecNode* node);
-
-    Status prepare(RuntimeState* state) override;
-
-    Status close(RuntimeState* state) override;
-};
-
 class TableFunctionOperatorX;
 class TableFunctionLocalState final : public PipelineXLocalState<> {
 public:
diff --git a/be/src/pipeline/exec/table_sink_operator.h b/be/src/pipeline/exec/table_sink_operator.h
deleted file mode 100644
index 46843c23f870b3..00000000000000
--- a/be/src/pipeline/exec/table_sink_operator.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include "exec/data_sink.h"
-#include "operator.h"
-
-namespace doris {
-
-namespace pipeline {
-
-// used for VMysqlTableSink, VJdbcTableSink and VOdbcTableSink.
-class TableSinkOperatorBuilder final : public DataSinkOperatorBuilder<DataSink> {
-public:
-    TableSinkOperatorBuilder(int32_t id, DataSink* sink)
-            : DataSinkOperatorBuilder(id, "TableSinkOperator", sink) {}
-
-    OperatorPtr build_operator() override;
-};
-
-class TableSinkOperator final : public DataSinkOperator<DataSink> {
-public:
-    TableSinkOperator(OperatorBuilderBase* operator_builder, DataSink* sink)
-            : DataSinkOperator(operator_builder, sink) {}
-
-    bool can_write() override { return _sink->can_write(); }
-};
-
-OperatorPtr TableSinkOperatorBuilder::build_operator() {
-    return std::make_shared<TableSinkOperator>(this, _sink);
-}
-
-} // namespace pipeline
-} // namespace doris
diff --git a/be/src/pipeline/exec/union_sink_operator.cpp b/be/src/pipeline/exec/union_sink_operator.cpp
index 40344882a84e53..aa10468fc754d7 100644
--- a/be/src/pipeline/exec/union_sink_operator.cpp
+++ b/be/src/pipeline/exec/union_sink_operator.cpp
@@ -26,73 +26,8 @@
 #include "runtime/runtime_state.h"
 #include "util/runtime_profile.h"
 
-namespace doris {
-class ExecNode;
-} // namespace doris
-
 namespace doris::pipeline {
 
-UnionSinkOperatorBuilder::UnionSinkOperatorBuilder(int32_t id, int child_id, ExecNode* node,
-                                                   std::shared_ptr<DataQueue> queue)
-        : OperatorBuilder(id, "UnionSinkOperator", node),
-          _cur_child_id(child_id),
-          _data_queue(queue) {};
-
-UnionSinkOperator::UnionSinkOperator(OperatorBuilderBase* operator_builder, int child_id,
-                                     ExecNode* node, std::shared_ptr<DataQueue> queue)
-        : StreamingOperator(operator_builder, node), _cur_child_id(child_id), _data_queue(queue) {};
-
-OperatorPtr UnionSinkOperatorBuilder::build_operator() {
-    return std::make_shared<UnionSinkOperator>(this, _cur_child_id, _node, _data_queue);
-}
-
-Status UnionSinkOperator::sink(RuntimeState* state, vectorized::Block* in_block,
-                               SourceState source_state) {
-    if (_output_block == nullptr) {
-        _output_block = _data_queue->get_free_block(_cur_child_id);
-    }
-
-    if (_cur_child_id < _node->get_first_materialized_child_idx()) { //pass_through
-        if (in_block->rows() > 0) {
-            _output_block->swap(*in_block);
-            _data_queue->push_block(std::move(_output_block), _cur_child_id);
-        }
-    } else if (_node->get_first_materialized_child_idx() != _node->children_count() &&
-               _cur_child_id < _node->children_count()) { //need materialized
-        RETURN_IF_ERROR(this->_node->materialize_child_block(state, _cur_child_id, in_block,
-                                                             _output_block.get()));
-    } else {
-        return Status::InternalError("maybe can't reach here, execute const expr: {}, {}, {}",
-                                     _cur_child_id, _node->get_first_materialized_child_idx(),
-                                     _node->children_count());
-    }
-
-    if (UNLIKELY(source_state == SourceState::FINISHED)) {
-        //if _cur_child_id eos, need check to push block
-        //Now here can't check _output_block rows, even it's row==0, also need push block
-        //because maybe sink is eos and queue have none data, if not push block
-        //the source can't can_read again and can't set source finished
-        if (_output_block) {
-            _data_queue->push_block(std::move(_output_block), _cur_child_id);
-        }
-        _data_queue->set_finish(_cur_child_id);
-        return Status::OK();
-    }
-    // not eos and block rows is enough to output,so push block
-    if (_output_block && (_output_block->rows() >= state->batch_size())) {
-        _data_queue->push_block(std::move(_output_block), _cur_child_id);
-    }
-    return Status::OK();
-}
-
-Status UnionSinkOperator::close(RuntimeState* state) {
-    if (_data_queue && !_data_queue->is_finish(_cur_child_id)) {
-        // finish should be set, if not set here means error.
-        _data_queue->set_canceled(_cur_child_id);
-    }
-    return StreamingOperator::close(state);
-}
-
 Status UnionSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo& info) {
     RETURN_IF_ERROR(Base::init(state, info));
     SCOPED_TIMER(exec_time_counter());
diff --git a/be/src/pipeline/exec/union_sink_operator.h b/be/src/pipeline/exec/union_sink_operator.h
index 97b704078c63ec..c11465da134b60 100644
--- a/be/src/pipeline/exec/union_sink_operator.h
+++ b/be/src/pipeline/exec/union_sink_operator.h
@@ -25,47 +25,13 @@
 #include "operator.h"
 #include "pipeline/pipeline_x/operator.h"
 #include "vec/core/block.h"
-#include "vec/exec/vunion_node.h"
 
 namespace doris {
-class ExecNode;
 class RuntimeState;
 
 namespace pipeline {
 class DataQueue;
 
-class UnionSinkOperatorBuilder final : public OperatorBuilder<vectorized::VUnionNode> {
-public:
-    UnionSinkOperatorBuilder(int32_t id, int child_id, ExecNode* node,
-                             std::shared_ptr<DataQueue> queue);
-
-    OperatorPtr build_operator() override;
-
-    bool is_sink() const override { return true; }
-
-private:
-    int _cur_child_id;
-    std::shared_ptr<DataQueue> _data_queue;
-};
-
-class UnionSinkOperator final : public StreamingOperator<vectorized::VUnionNode> {
-public:
-    UnionSinkOperator(OperatorBuilderBase* operator_builder, int child_id, ExecNode* node,
-                      std::shared_ptr<DataQueue> queue);
-
-    bool can_write() override { return true; }
-
-    Status sink(RuntimeState* state, vectorized::Block* in_block,
-                SourceState source_state) override;
-
-    Status close(RuntimeState* state) override;
-
-private:
-    int _cur_child_id;
-    std::shared_ptr<DataQueue> _data_queue;
-    std::unique_ptr<vectorized::Block> _output_block;
-};
-
 class UnionSinkOperatorX;
 class UnionSinkLocalState final : public PipelineXSinkLocalState<UnionSharedState> {
 public:
diff --git a/be/src/pipeline/exec/union_source_operator.cpp b/be/src/pipeline/exec/union_source_operator.cpp
index 10f98a8d1cbc49..6393ffcccd312b 100644
--- a/be/src/pipeline/exec/union_source_operator.cpp
+++ b/be/src/pipeline/exec/union_source_operator.cpp
@@ -30,83 +30,10 @@
 #include "vec/core/block.h"
 
 namespace doris {
-class ExecNode;
 class RuntimeState;
 
 namespace pipeline {
 
-UnionSourceOperatorBuilder::UnionSourceOperatorBuilder(int32_t id, ExecNode* node,
-                                                       std::shared_ptr<DataQueue> queue)
-        : OperatorBuilder(id, "UnionSourceOperator", node), _data_queue(queue) {};
-
-OperatorPtr UnionSourceOperatorBuilder::build_operator() {
-    return std::make_shared<UnionSourceOperator>(this, _node, _data_queue);
-}
-
-UnionSourceOperator::UnionSourceOperator(OperatorBuilderBase* operator_builder, ExecNode* node,
-                                         std::shared_ptr<DataQueue> queue)
-        : SourceOperator(operator_builder, node),
-          _data_queue(queue),
-          _need_read_for_const_expr(true) {};
-
-bool UnionSourceOperator::_has_data() {
-    return _need_read_for_const_expr || _data_queue->remaining_has_data();
-}
-
-// we assumed it can read to process const expr， Although we don't know whether there is
-// ,and queue have data, could read also
-// The source operator's run dependences on Node's alloc_resource, which is called in Sink's open.
-// So hang until SinkOperator was scheduled to open.
-bool UnionSourceOperator::can_read() {
-    return _node->resource_allocated() && (_has_data() || _data_queue->is_all_finish());
-}
-
-Status UnionSourceOperator::pull_data(RuntimeState* state, vectorized::Block* block, bool* eos) {
-    // here we precess const expr firstly
-    if (_need_read_for_const_expr) {
-        if (_node->has_more_const(state)) {
-            RETURN_IF_ERROR(_node->get_next_const(state, block));
-        }
-        _need_read_for_const_expr = _node->has_more_const(state);
-    } else {
-        std::unique_ptr<vectorized::Block> output_block;
-        int child_idx = 0;
-        RETURN_IF_ERROR(_data_queue->get_block_from_queue(&output_block, &child_idx));
-        if (!output_block) {
-            return Status::OK();
-        }
-        block->swap(*output_block);
-        output_block->clear_column_data(_node->intermediate_row_desc().num_materialized_slots());
-        _data_queue->push_free_block(std::move(output_block), child_idx);
-    }
-
-    _node->reached_limit(block, eos);
-    return Status::OK();
-}
-
-Status UnionSourceOperator::get_block(RuntimeState* state, vectorized::Block* block,
-                                      SourceState& source_state) {
-    bool eos = false;
-    RETURN_IF_ERROR(_node->get_next_after_projects(
-            state, block, &eos,
-            std::bind(&UnionSourceOperator::pull_data, this, std::placeholders::_1,
-                      std::placeholders::_2, std::placeholders::_3)));
-    //have executing const expr, queue have no data anymore, and child could be closed.
-    if (eos) { // reach limit
-        source_state = SourceState::FINISHED;
-    } else if (_has_data()) {
-        source_state = SourceState::MORE_DATA;
-    } else if (_data_queue->is_all_finish()) {
-        // Here, check the value of `_has_data(state)` again after `data_queue.is_all_finish()` is TRUE
-        // as there may be one or more blocks when `data_queue.is_all_finish()` is TRUE.
-        source_state = _has_data() ? SourceState::MORE_DATA : SourceState::FINISHED;
-    } else {
-        source_state = SourceState::DEPEND_ON_SOURCE;
-    }
-
-    return Status::OK();
-}
-
 Status UnionSourceLocalState::init(RuntimeState* state, LocalStateInfo& info) {
     RETURN_IF_ERROR(Base::init(state, info));
     SCOPED_TIMER(exec_time_counter());
diff --git a/be/src/pipeline/exec/union_source_operator.h b/be/src/pipeline/exec/union_source_operator.h
index 60530521ec0a82..ee1d35d73cff04 100644
--- a/be/src/pipeline/exec/union_source_operator.h
+++ b/be/src/pipeline/exec/union_source_operator.h
@@ -23,10 +23,8 @@
 #include "common/status.h"
 #include "operator.h"
 #include "pipeline/pipeline_x/operator.h"
-#include "vec/exec/vunion_node.h"
 
 namespace doris {
-class ExecNode;
 class RuntimeState;
 
 namespace vectorized {
@@ -36,39 +34,6 @@ class Block;
 namespace pipeline {
 class DataQueue;
 
-class UnionSourceOperatorBuilder final : public OperatorBuilder<vectorized::VUnionNode> {
-public:
-    UnionSourceOperatorBuilder(int32_t id, ExecNode* node, std::shared_ptr<DataQueue>);
-
-    bool is_source() const override { return true; }
-
-    OperatorPtr build_operator() override;
-
-private:
-    std::shared_ptr<DataQueue> _data_queue;
-};
-
-class UnionSourceOperator final : public SourceOperator<vectorized::VUnionNode> {
-public:
-    UnionSourceOperator(OperatorBuilderBase* operator_builder, ExecNode* node,
-                        std::shared_ptr<DataQueue>);
-
-    // this operator in source open directly return, do this work in sink
-    Status open(RuntimeState* /*state*/) override { return Status::OK(); }
-
-    Status get_block(RuntimeState* state, vectorized::Block* block,
-                     SourceState& source_state) override;
-    bool can_read() override;
-
-    Status pull_data(RuntimeState* state, vectorized::Block* output_block, bool* eos);
-
-private:
-    bool _has_data();
-
-    std::shared_ptr<DataQueue> _data_queue;
-    bool _need_read_for_const_expr;
-};
-
 class UnionSourceOperatorX;
 class UnionSourceLocalState final : public PipelineXLocalState<UnionSharedState> {
 public:
diff --git a/be/src/pipeline/pipeline.h b/be/src/pipeline/pipeline.h
index ab5b7e36bc2ad5..1bab4f1fd5003b 100644
--- a/be/src/pipeline/pipeline.h
+++ b/be/src/pipeline/pipeline.h
@@ -42,7 +42,7 @@ using PipelineId = uint32_t;
 class Pipeline : public std::enable_shared_from_this<Pipeline> {
     friend class PipelineTask;
     friend class PipelineXTask;
-    friend class PipelineXFragmentContext;
+    friend class PipelineFragmentContext;
 
 public:
     Pipeline() = delete;
diff --git a/be/src/pipeline/pipeline_fragment_context.cpp b/be/src/pipeline/pipeline_fragment_context.cpp
index 3b094062074e34..03de71f3c8d829 100644
--- a/be/src/pipeline/pipeline_fragment_context.cpp
+++ b/be/src/pipeline/pipeline_fragment_context.cpp
@@ -48,46 +48,52 @@
 #include "pipeline/exec/analytic_sink_operator.h"
 #include "pipeline/exec/analytic_source_operator.h"
 #include "pipeline/exec/assert_num_rows_operator.h"
-#include "pipeline/exec/const_value_operator.h"
-#include "pipeline/exec/data_queue.h"
 #include "pipeline/exec/datagen_operator.h"
-#include "pipeline/exec/distinct_streaming_aggregation_sink_operator.h"
-#include "pipeline/exec/distinct_streaming_aggregation_source_operator.h"
+#include "pipeline/exec/distinct_streaming_aggregation_operator.h"
 #include "pipeline/exec/empty_set_operator.h"
-#include "pipeline/exec/empty_source_operator.h"
+#include "pipeline/exec/es_scan_operator.h"
 #include "pipeline/exec/exchange_sink_operator.h"
 #include "pipeline/exec/exchange_source_operator.h"
+#include "pipeline/exec/file_scan_operator.h"
 #include "pipeline/exec/group_commit_block_sink_operator.h"
 #include "pipeline/exec/hashjoin_build_sink.h"
 #include "pipeline/exec/hashjoin_probe_operator.h"
 #include "pipeline/exec/hive_table_sink_operator.h"
+#include "pipeline/exec/jdbc_scan_operator.h"
+#include "pipeline/exec/jdbc_table_sink_operator.h"
+#include "pipeline/exec/meta_scan_operator.h"
 #include "pipeline/exec/multi_cast_data_stream_sink.h"
 #include "pipeline/exec/multi_cast_data_stream_source.h"
-#include "pipeline/exec/mysql_scan_operator.h" // IWYU pragma: keep
 #include "pipeline/exec/nested_loop_join_build_operator.h"
 #include "pipeline/exec/nested_loop_join_probe_operator.h"
+#include "pipeline/exec/olap_scan_operator.h"
 #include "pipeline/exec/olap_table_sink_operator.h"
 #include "pipeline/exec/olap_table_sink_v2_operator.h"
-#include "pipeline/exec/operator.h"
 #include "pipeline/exec/partition_sort_sink_operator.h"
 #include "pipeline/exec/partition_sort_source_operator.h"
+#include "pipeline/exec/partitioned_aggregation_sink_operator.h"
+#include "pipeline/exec/partitioned_aggregation_source_operator.h"
+#include "pipeline/exec/partitioned_hash_join_probe_operator.h"
+#include "pipeline/exec/partitioned_hash_join_sink_operator.h"
 #include "pipeline/exec/repeat_operator.h"
 #include "pipeline/exec/result_file_sink_operator.h"
 #include "pipeline/exec/result_sink_operator.h"
-#include "pipeline/exec/scan_operator.h"
 #include "pipeline/exec/schema_scan_operator.h"
 #include "pipeline/exec/select_operator.h"
-#include "pipeline/exec/set_probe_sink_operator.h" // IWYU pragma: keep
-#include "pipeline/exec/set_sink_operator.h"       // IWYU pragma: keep
-#include "pipeline/exec/set_source_operator.h"     // IWYU pragma: keep
+#include "pipeline/exec/set_probe_sink_operator.h"
+#include "pipeline/exec/set_sink_operator.h"
+#include "pipeline/exec/set_source_operator.h"
 #include "pipeline/exec/sort_sink_operator.h"
 #include "pipeline/exec/sort_source_operator.h"
-#include "pipeline/exec/streaming_aggregation_sink_operator.h"
-#include "pipeline/exec/streaming_aggregation_source_operator.h"
+#include "pipeline/exec/spill_sort_sink_operator.h"
+#include "pipeline/exec/spill_sort_source_operator.h"
+#include "pipeline/exec/streaming_aggregation_operator.h"
 #include "pipeline/exec/table_function_operator.h"
-#include "pipeline/exec/table_sink_operator.h"
 #include "pipeline/exec/union_sink_operator.h"
 #include "pipeline/exec/union_source_operator.h"
+#include "pipeline/pipeline_x/local_exchange/local_exchange_sink_operator.h"
+#include "pipeline/pipeline_x/local_exchange/local_exchange_source_operator.h"
+#include "pipeline/task_scheduler.h"
 #include "pipeline_task.h"
 #include "runtime/exec_env.h"
 #include "runtime/fragment_mgr.h"
@@ -119,14 +125,11 @@ namespace doris::pipeline {
 bvar::Adder<int64_t> g_pipeline_tasks_count("doris_pipeline_tasks_count");
 
 PipelineFragmentContext::PipelineFragmentContext(
-        const TUniqueId& query_id, const TUniqueId& instance_id, int fragment_id, int backend_num,
-        std::shared_ptr<QueryContext> query_ctx, ExecEnv* exec_env,
-        const std::function<void(RuntimeState*, Status*)>& call_back,
-        report_status_callback report_status_cb)
+        const TUniqueId& query_id, const int fragment_id, std::shared_ptr<QueryContext> query_ctx,
+        ExecEnv* exec_env, const std::function<void(RuntimeState*, Status*)>& call_back,
+        const report_status_callback& report_status_cb)
         : _query_id(query_id),
-          _fragment_instance_id(instance_id),
           _fragment_id(fragment_id),
-          _backend_num(backend_num),
           _exec_env(exec_env),
           _query_ctx(std::move(query_ctx)),
           _call_back(call_back),
@@ -144,14 +147,19 @@ PipelineFragmentContext::~PipelineFragmentContext() {
     auto st = _query_ctx->exec_status();
     _query_ctx.reset();
     _tasks.clear();
-    if (_runtime_state != nullptr) {
-        _call_back(_runtime_state.get(), &st);
-        _runtime_state.reset();
+    if (!_task_runtime_states.empty()) {
+        for (auto& runtime_state : _task_runtime_states) {
+            _call_back(runtime_state.get(), &st);
+            runtime_state.reset();
+        }
     }
-    _root_pipeline.reset();
     _pipelines.clear();
     _sink.reset();
-    _multi_cast_stream_sink_senders.clear();
+    _root_op.reset();
+    _runtime_state.reset();
+    _runtime_filter_states.clear();
+    _runtime_filter_mgr_map.clear();
+    _op_id_to_le_state.clear();
 }
 
 bool PipelineFragmentContext::is_timeout(const VecDateTimeValue& now) const {
@@ -171,26 +179,21 @@ bool PipelineFragmentContext::is_timeout(const VecDateTimeValue& now) const {
 void PipelineFragmentContext::cancel(const PPlanFragmentCancelReason& reason,
                                      const std::string& msg) {
     LOG_INFO("PipelineFragmentContext::cancel")
-            .tag("query_id", print_id(_query_ctx->query_id()))
+            .tag("query_id", print_id(_query_id))
             .tag("fragment_id", _fragment_id)
-            .tag("instance_id", print_id(_runtime_state->fragment_instance_id()))
-            .tag("reason", PPlanFragmentCancelReason_Name(reason))
-            .tag("message", msg);
-    // TODO(zhiqiang): may be not need to check if query is already cancelled.
-    // Dont cancel in this situation may lead to bug. For example, result sink node
-    // can not be cancelled if other fragments set the query_ctx cancelled, this will
-    // make result receiver on fe be stocked on rpc forever until timeout...
-    // We need a more detail discussion.
-    _query_ctx->cancel(msg, Status::Cancelled(msg));
+            .tag("reason", reason)
+            .tag("error message", msg);
+    if (reason == PPlanFragmentCancelReason::TIMEOUT) {
+        LOG(WARNING) << "PipelineFragmentContext is cancelled due to timeout : " << debug_string();
+    }
+    _query_ctx->cancel(msg, Status::Cancelled(msg), _fragment_id);
     if (reason == PPlanFragmentCancelReason::LIMIT_REACH) {
         _is_report_on_cancel = false;
     } else {
-        LOG(WARNING) << "PipelineFragmentContext "
-                     << PrintInstanceStandardInfo(_query_id, _fragment_instance_id)
-                     << " is canceled, cancel message: " << msg;
+        for (auto& id : _fragment_instance_ids) {
+            LOG(WARNING) << "PipelineFragmentContext cancel instance: " << print_id(id);
+        }
     }
-
-    _runtime_state->set_process_status(_query_ctx->exec_status());
     // Get pipe from new load stream manager and send cancel to it or the fragment may hang to wait read from pipe
     // For stream load the fragment's query_id == load id, it is set in FE.
     auto stream_load_ctx = _exec_env->new_load_stream_mgr()->get(_query_id);
@@ -198,12 +201,14 @@ void PipelineFragmentContext::cancel(const PPlanFragmentCancelReason& reason,
         stream_load_ctx->pipe->cancel(msg);
     }
 
-    // must close stream_mgr to avoid dead lock in Exchange Node
-    // TODO bug llj  fix this other instance will not cancel
-    _exec_env->vstream_mgr()->cancel(_fragment_instance_id, Status::Cancelled(msg));
     // Cancel the result queue manager used by spark doris connector
     // TODO pipeline incomp
     // _exec_env->result_queue_mgr()->update_queue_status(id, Status::Aborted(msg));
+    for (auto& tasks : _tasks) {
+        for (auto& task : tasks) {
+            task->clear_blocking_state();
+        }
+    }
 }
 
 PipelinePtr PipelineFragmentContext::add_pipeline() {
@@ -231,36 +236,33 @@ PipelinePtr PipelineFragmentContext::add_pipeline(PipelinePtr parent, int idx) {
     return pipeline;
 }
 
-Status PipelineFragmentContext::prepare(const doris::TPipelineFragmentParams& request, size_t idx) {
+Status PipelineFragmentContext::prepare(const doris::TPipelineFragmentParams& request) {
     if (_prepared) {
         return Status::InternalError("Already prepared");
     }
-    if (request.__isset.query_options && request.query_options.__isset.execution_timeout) {
-        _timeout = request.query_options.execution_timeout;
-    }
-    const auto& local_params = request.local_params[idx];
+    _num_instances = request.local_params.size();
+    _total_instances = request.__isset.total_instances ? request.total_instances : _num_instances;
     _runtime_profile = std::make_unique<RuntimeProfile>("PipelineContext");
-    _start_timer = ADD_TIMER(_runtime_profile, "StartTime");
-    COUNTER_UPDATE(_start_timer, _fragment_watcher.elapsed_time());
     _prepare_timer = ADD_TIMER(_runtime_profile, "PrepareTime");
     SCOPED_TIMER(_prepare_timer);
 
     auto* fragment_context = this;
 
-    LOG_INFO("Preparing instance {}|{}, backend_num {}", print_id(_query_id),
-             print_id(local_params.fragment_instance_id), local_params.backend_num);
+    LOG_INFO("PipelineFragmentContext::prepare")
+            .tag("query_id", print_id(_query_id))
+            .tag("fragment_id", _fragment_id)
+            .tag("pthread_id", (uintptr_t)pthread_self());
 
-    // 1. init _runtime_state
-    _runtime_state = RuntimeState::create_unique(
-            local_params.fragment_instance_id, request.query_id, request.fragment_id,
-            request.query_options, _query_ctx->query_globals, _exec_env, _query_ctx.get());
+    if (request.query_options.__isset.is_report_success) {
+        fragment_context->set_is_report_success(request.query_options.is_report_success);
+    }
 
-    _runtime_state->set_task_execution_context(shared_from_this());
+    // 1. Set up the global runtime state.
+    _runtime_state = RuntimeState::create_unique(request.query_id, request.fragment_id,
+                                                 request.query_options, _query_ctx->query_globals,
+                                                 _exec_env, _query_ctx.get());
 
-    // TODO should be combine with plan_fragment_executor.prepare funciton
     SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(_runtime_state->query_mem_tracker());
-    _runtime_state->set_be_number(local_params.backend_num);
-
     if (request.__isset.backend_id) {
         _runtime_state->set_backend_id(request.backend_id);
     }
@@ -274,10 +276,6 @@ Status PipelineFragmentContext::prepare(const doris::TPipelineFragmentParams& re
         _runtime_state->set_load_job_id(request.load_job_id);
     }
 
-    if (request.query_options.__isset.is_report_success) {
-        fragment_context->set_is_report_success(request.query_options.is_report_success);
-    }
-
     if (request.is_simplified_param) {
         _desc_tbl = _query_ctx->desc_tbl;
     } else {
@@ -286,91 +284,58 @@ Status PipelineFragmentContext::prepare(const doris::TPipelineFragmentParams& re
                 DescriptorTbl::create(_runtime_state->obj_pool(), request.desc_tbl, &_desc_tbl));
     }
     _runtime_state->set_desc_tbl(_desc_tbl);
-
-    // 2. Create ExecNode to build pipeline with PipelineFragmentContext
-    RETURN_IF_ERROR_OR_CATCH_EXCEPTION(
-            ExecNode::create_tree(_runtime_state.get(), _runtime_state->obj_pool(),
-                                  request.fragment.plan, *_desc_tbl, &_root_plan));
-
-    // Set senders of exchange nodes before pipeline build
-    std::vector<ExecNode*> exch_nodes;
-    _root_plan->collect_nodes(TPlanNodeType::EXCHANGE_NODE, &exch_nodes);
-    for (ExecNode* exch_node : exch_nodes) {
-        DCHECK_EQ(exch_node->type(), TPlanNodeType::EXCHANGE_NODE);
-        int num_senders = find_with_default(request.per_exch_num_senders, exch_node->id(), 0);
-        DCHECK_GT(num_senders, 0);
-        static_cast<vectorized::VExchangeNode*>(exch_node)->set_num_senders(num_senders);
-    }
-
-    // All prepare work do in exec node tree
-    RETURN_IF_ERROR(_root_plan->prepare(_runtime_state.get()));
-    // set scan ranges
-    std::vector<ExecNode*> scan_nodes;
-    std::vector<TScanRangeParams> no_scan_ranges;
-    _root_plan->collect_scan_nodes(&scan_nodes);
-    VLOG_CRITICAL << "query " << print_id(get_query_id())
-                  << " scan_nodes.size()=" << scan_nodes.size();
-    VLOG_CRITICAL << "query " << print_id(get_query_id()) << " params.per_node_scan_ranges.size()="
-                  << local_params.per_node_scan_ranges.size();
-
-    // set scan range in ScanNode
-    for (auto& i : scan_nodes) {
-        // TODO(cmy): this "if...else" should be removed once all ScanNode are derived from VScanNode.
-        ExecNode* node = i;
-        if (typeid(*node) == typeid(vectorized::NewOlapScanNode) ||
-            typeid(*node) == typeid(vectorized::NewFileScanNode) ||
-            typeid(*node) == typeid(vectorized::NewOdbcScanNode) ||
-            typeid(*node) == typeid(vectorized::NewEsScanNode) ||
-            typeid(*node) == typeid(vectorized::VMetaScanNode) ||
-            typeid(*node) == typeid(vectorized::NewJdbcScanNode)) {
-            auto* scan_node = static_cast<vectorized::VScanNode*>(i);
-            auto scan_ranges = find_with_default(local_params.per_node_scan_ranges, scan_node->id(),
-                                                 no_scan_ranges);
-            const bool shared_scan =
-                    find_with_default(local_params.per_node_shared_scans, scan_node->id(), false);
-            scan_node->set_scan_ranges(_runtime_state.get(), scan_ranges);
-            scan_node->set_shared_scan(_runtime_state.get(), shared_scan);
-        } else {
-            auto* scan_node = static_cast<ScanNode*>(node);
-            auto scan_ranges = find_with_default(local_params.per_node_scan_ranges, scan_node->id(),
-                                                 no_scan_ranges);
-            RETURN_IF_ERROR(scan_node->set_scan_ranges(_runtime_state.get(), scan_ranges));
-            VLOG_CRITICAL << "query " << print_id(get_query_id())
-                          << " scan_node_id=" << scan_node->id()
-                          << " size=" << scan_ranges.get().size();
-        }
-    }
-
-    _runtime_state->set_per_fragment_instance_idx(local_params.sender_id);
     _runtime_state->set_num_per_fragment_instances(request.num_senders);
     _runtime_state->set_load_stream_per_node(request.load_stream_per_node);
     _runtime_state->set_total_load_streams(request.total_load_streams);
     _runtime_state->set_num_local_sink(request.num_local_sink);
 
-    if (request.fragment.__isset.output_sink) {
-        // Here we build a DataSink object, which will be hold by DataSinkOperator
-        RETURN_IF_ERROR_OR_CATCH_EXCEPTION(DataSink::create_data_sink(
-                _runtime_state->obj_pool(), request.fragment.output_sink,
-                request.fragment.output_exprs, request, idx, _root_plan->row_desc(),
-                _runtime_state.get(), &_sink, *_desc_tbl));
+    const auto& local_params = request.local_params[0];
+    if (local_params.__isset.runtime_filter_params) {
+        _query_ctx->runtime_filter_mgr()->set_runtime_filter_params(
+                local_params.runtime_filter_params);
+    }
+    if (local_params.__isset.topn_filter_source_node_ids) {
+        _query_ctx->init_runtime_predicates(local_params.topn_filter_source_node_ids);
+    } else {
+        _query_ctx->init_runtime_predicates({0});
     }
 
-    _root_pipeline = fragment_context->add_pipeline();
-    _root_pipeline->set_is_root_pipeline();
-    RETURN_IF_ERROR(_build_pipelines(_root_plan, _root_pipeline));
-    if (_sink) {
-        // DataSinkOperator is builded here
-        RETURN_IF_ERROR(_create_sink(request.local_params[idx].sender_id,
-                                     request.fragment.output_sink, _runtime_state.get()));
+    _need_local_merge = request.__isset.parallel_instances;
+
+    // 2. Build pipelines with operators in this fragment.
+    auto root_pipeline = add_pipeline();
+    RETURN_IF_ERROR_OR_CATCH_EXCEPTION(_build_pipelines(
+            _runtime_state->obj_pool(), request, *_query_ctx->desc_tbl, &_root_op, root_pipeline));
+
+    // 3. Create sink operator
+    if (!request.fragment.__isset.output_sink) {
+        return Status::InternalError("No output sink in this fragment!");
     }
-    RETURN_IF_ERROR(_build_pipeline_tasks(request));
-    if (_sink) {
-        _runtime_state->runtime_profile()->add_child(_sink->profile(), true, nullptr);
-        _sink->profile()->add_child(_root_plan->runtime_profile(), true, nullptr);
-    } else {
-        _runtime_state->runtime_profile()->add_child(_root_plan->runtime_profile(), true, nullptr);
+    RETURN_IF_ERROR_OR_CATCH_EXCEPTION(_create_data_sink(
+            _runtime_state->obj_pool(), request.fragment.output_sink, request.fragment.output_exprs,
+            request, root_pipeline->output_row_desc(), _runtime_state.get(), *_desc_tbl,
+            root_pipeline->id()));
+    RETURN_IF_ERROR(_sink->init(request.fragment.output_sink));
+    RETURN_IF_ERROR(root_pipeline->set_sink(_sink));
+
+    for (PipelinePtr& pipeline : _pipelines) {
+        DCHECK(pipeline->sink_x() != nullptr) << pipeline->operator_xs().size();
+        RETURN_IF_ERROR(pipeline->sink_x()->set_child(pipeline->operator_xs().back()));
+    }
+    if (_enable_local_shuffle()) {
+        RETURN_IF_ERROR(_plan_local_exchange(request.num_buckets,
+                                             request.bucket_seq_to_instance_idx,
+                                             request.shuffle_idx_to_instance_idx));
     }
-    _runtime_state->runtime_profile()->add_child(_runtime_profile.get(), true, nullptr);
+
+    // 4. Initialize global states in pipelines.
+    for (PipelinePtr& pipeline : _pipelines) {
+        pipeline->children().clear();
+        RETURN_IF_ERROR(pipeline->prepare(_runtime_state.get()));
+    }
+
+    // 5. Build pipeline tasks and initialize local state.
+    RETURN_IF_ERROR(_build_pipeline_tasks(request));
 
     _init_next_report_time();
 
@@ -381,29 +346,189 @@ Status PipelineFragmentContext::prepare(const doris::TPipelineFragmentParams& re
 Status PipelineFragmentContext::_build_pipeline_tasks(
         const doris::TPipelineFragmentParams& request) {
     _total_tasks = 0;
-    for (PipelinePtr& pipeline : _pipelines) {
-        // if sink
-        auto sink_operator = pipeline->get_sink_builder()->build_operator();
-        // TODO pipeline 1 need to add new interface for exec node and operator
-        RETURN_IF_ERROR(sink_operator->init(request.fragment.output_sink));
-
-        RETURN_IF_ERROR(pipeline->build_operators());
-        auto task =
-                std::make_unique<PipelineTask>(pipeline, _total_tasks++, _runtime_state.get(),
-                                               sink_operator, this, pipeline->pipeline_profile());
-        RETURN_IF_ERROR(sink_operator->set_child(task->get_root()));
-        _tasks.emplace_back(std::move(task));
-        _runtime_profile->add_child(pipeline->pipeline_profile(), true, nullptr);
-    }
-    g_pipeline_tasks_count << _total_tasks;
-    for (auto& task : _tasks) {
-        RETURN_IF_ERROR(task->prepare(_runtime_state.get()));
+    int target_size = request.local_params.size();
+    _tasks.resize(target_size);
+    auto& pipeline_id_to_profile = _runtime_state->pipeline_id_to_profile();
+    DCHECK(pipeline_id_to_profile.empty());
+    pipeline_id_to_profile.resize(_pipelines.size());
+    {
+        size_t pip_idx = 0;
+        for (auto& pipeline_profile : pipeline_id_to_profile) {
+            pipeline_profile =
+                    std::make_unique<RuntimeProfile>("Pipeline : " + std::to_string(pip_idx));
+            pip_idx++;
+        }
     }
 
-    // register the profile of child data stream sender
-    for (auto& sender : _multi_cast_stream_sink_senders) {
-        _sink->profile()->add_child(sender->profile(), true, nullptr);
+    for (size_t i = 0; i < target_size; i++) {
+        const auto& local_params = request.local_params[i];
+        auto fragment_instance_id = local_params.fragment_instance_id;
+        _fragment_instance_ids.push_back(fragment_instance_id);
+        std::unique_ptr<RuntimeFilterMgr> runtime_filter_mgr;
+        auto init_runtime_state = [&](std::unique_ptr<RuntimeState>& runtime_state) {
+            runtime_state->set_query_mem_tracker(_query_ctx->query_mem_tracker);
+
+            runtime_state->set_task_execution_context(shared_from_this());
+            runtime_state->set_be_number(local_params.backend_num);
+
+            if (request.__isset.backend_id) {
+                runtime_state->set_backend_id(request.backend_id);
+            }
+            if (request.__isset.import_label) {
+                runtime_state->set_import_label(request.import_label);
+            }
+            if (request.__isset.db_name) {
+                runtime_state->set_db_name(request.db_name);
+            }
+            if (request.__isset.load_job_id) {
+                runtime_state->set_load_job_id(request.load_job_id);
+            }
+
+            runtime_state->set_desc_tbl(_desc_tbl);
+            runtime_state->set_per_fragment_instance_idx(local_params.sender_id);
+            runtime_state->set_num_per_fragment_instances(request.num_senders);
+            runtime_state->resize_op_id_to_local_state(max_operator_id());
+            runtime_state->set_max_operator_id(max_operator_id());
+            runtime_state->set_load_stream_per_node(request.load_stream_per_node);
+            runtime_state->set_total_load_streams(request.total_load_streams);
+            runtime_state->set_num_local_sink(request.num_local_sink);
+            DCHECK(runtime_filter_mgr);
+            runtime_state->set_pipeline_x_runtime_filter_mgr(runtime_filter_mgr.get());
+        };
+
+        auto filterparams = std::make_unique<RuntimeFilterParamsContext>();
+
+        {
+            filterparams->runtime_filter_wait_infinitely =
+                    _runtime_state->runtime_filter_wait_infinitely();
+            filterparams->runtime_filter_wait_time_ms =
+                    _runtime_state->runtime_filter_wait_time_ms();
+            filterparams->enable_pipeline_exec = _runtime_state->enable_pipeline_x_exec();
+            filterparams->execution_timeout = _runtime_state->execution_timeout();
+
+            filterparams->exec_env = ExecEnv::GetInstance();
+            filterparams->query_id.set_hi(_runtime_state->query_id().hi);
+            filterparams->query_id.set_lo(_runtime_state->query_id().lo);
+
+            filterparams->be_exec_version = _runtime_state->be_exec_version();
+            filterparams->query_ctx = _query_ctx.get();
+        }
+
+        // build local_runtime_filter_mgr for each instance
+        runtime_filter_mgr = std::make_unique<RuntimeFilterMgr>(
+                request.query_id, filterparams.get(), _query_ctx->query_mem_tracker);
+
+        filterparams->runtime_filter_mgr = runtime_filter_mgr.get();
+
+        _runtime_filter_states.push_back(std::move(filterparams));
+        std::map<PipelineId, PipelineXTask*> pipeline_id_to_task;
+        auto get_local_exchange_state = [&](PipelinePtr pipeline)
+                -> std::map<int, std::pair<std::shared_ptr<LocalExchangeSharedState>,
+                                           std::shared_ptr<Dependency>>> {
+            std::map<int, std::pair<std::shared_ptr<LocalExchangeSharedState>,
+                                    std::shared_ptr<Dependency>>>
+                    le_state_map;
+            auto source_id = pipeline->operator_xs().front()->operator_id();
+            if (auto iter = _op_id_to_le_state.find(source_id); iter != _op_id_to_le_state.end()) {
+                le_state_map.insert({source_id, iter->second});
+            }
+            for (auto sink_to_source_id : pipeline->sink_x()->dests_id()) {
+                if (auto iter = _op_id_to_le_state.find(sink_to_source_id);
+                    iter != _op_id_to_le_state.end()) {
+                    le_state_map.insert({sink_to_source_id, iter->second});
+                }
+            }
+            return le_state_map;
+        };
+        auto get_task_runtime_state = [&](int task_id) -> RuntimeState* {
+            DCHECK(_task_runtime_states[task_id]);
+            return _task_runtime_states[task_id].get();
+        };
+        for (size_t pip_idx = 0; pip_idx < _pipelines.size(); pip_idx++) {
+            auto& pipeline = _pipelines[pip_idx];
+            if (pipeline->need_to_create_task()) {
+                // build task runtime state
+                _task_runtime_states.push_back(RuntimeState::create_unique(
+                        this, local_params.fragment_instance_id, request.query_id,
+                        request.fragment_id, request.query_options, _query_ctx->query_globals,
+                        _exec_env, _query_ctx.get()));
+                auto& task_runtime_state = _task_runtime_states.back();
+                init_runtime_state(task_runtime_state);
+                auto cur_task_id = _total_tasks++;
+                task_runtime_state->set_task_id(cur_task_id);
+                task_runtime_state->set_task_num(pipeline->num_tasks());
+                auto task = std::make_unique<PipelineXTask>(
+                        pipeline, cur_task_id, get_task_runtime_state(cur_task_id), this,
+                        pipeline_id_to_profile[pip_idx].get(), get_local_exchange_state(pipeline),
+                        i);
+                pipeline_id_to_task.insert({pipeline->id(), task.get()});
+                _tasks[i].emplace_back(std::move(task));
+            }
+        }
+
+        /**
+         * Build DAG for pipeline tasks.
+         * For example, we have
+         *
+         *   ExchangeSink (Pipeline1)     JoinBuildSink (Pipeline2)
+         *            \                      /
+         *          JoinProbeOperator1 (Pipeline1)    JoinBuildSink (Pipeline3)
+         *                 \                          /
+         *               JoinProbeOperator2 (Pipeline1)
+         *
+         * In this fragment, we have three pipelines and pipeline 1 depends on pipeline 2 and pipeline 3.
+         * To build this DAG, `_dag` manage dependencies between pipelines by pipeline ID and
+         * `pipeline_id_to_task` is used to find the task by a unique pipeline ID.
+         *
+         * Finally, we have two upstream dependencies in Pipeline1 corresponding to JoinProbeOperator1
+         * and JoinProbeOperator2.
+         */
+
+        // First, set up the parent profile,task runtime state
+
+        auto prepare_and_set_parent_profile = [&](PipelineXTask* task, size_t pip_idx) {
+            DCHECK(pipeline_id_to_profile[pip_idx]);
+            RETURN_IF_ERROR(
+                    task->prepare(local_params, request.fragment.output_sink, _query_ctx.get()));
+            return Status::OK();
+        };
+
+        for (auto& _pipeline : _pipelines) {
+            if (pipeline_id_to_task.contains(_pipeline->id())) {
+                auto* task = pipeline_id_to_task[_pipeline->id()];
+                DCHECK(task != nullptr);
+
+                // if this task has upstream dependency, then record them.
+                if (_dag.find(_pipeline->id()) != _dag.end()) {
+                    auto& deps = _dag[_pipeline->id()];
+                    for (auto& dep : deps) {
+                        if (pipeline_id_to_task.contains(dep)) {
+                            auto ss = pipeline_id_to_task[dep]->get_sink_shared_state();
+                            if (ss) {
+                                task->inject_shared_state(ss);
+                            } else {
+                                pipeline_id_to_task[dep]->inject_shared_state(
+                                        task->get_source_shared_state());
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        for (size_t pip_idx = 0; pip_idx < _pipelines.size(); pip_idx++) {
+            if (pipeline_id_to_task.contains(_pipelines[pip_idx]->id())) {
+                auto* task = pipeline_id_to_task[_pipelines[pip_idx]->id()];
+                RETURN_IF_ERROR(prepare_and_set_parent_profile(task, pip_idx));
+            }
+        }
+        {
+            std::lock_guard<std::mutex> l(_state_map_lock);
+            _runtime_filter_mgr_map[fragment_instance_id] = std::move(runtime_filter_mgr);
+        }
     }
+    _pipeline_parent_map.clear();
+    _dag.clear();
+    _op_id_to_le_state.clear();
 
     return Status::OK();
 }
@@ -467,8 +592,7 @@ void PipelineFragmentContext::trigger_report_if_necessary() {
             }
 
             VLOG_FILE << "Query " << print_id(this->get_query_id()) << " fragment "
-                      << this->get_fragment_id() << " instance "
-                      << print_id(this->get_fragment_instance_id()) << " profile:\n"
+                      << this->get_fragment_id() << " profile:\n"
                       << ss.str();
         }
         auto st = send_report(false);
@@ -481,265 +605,840 @@ void PipelineFragmentContext::trigger_report_if_necessary() {
 }
 
 // TODO: use virtual function to do abstruct
-Status PipelineFragmentContext::_build_pipelines(ExecNode* node, PipelinePtr cur_pipe) {
-    auto node_type = node->type();
-    switch (node_type) {
-    // for source
-    case TPlanNodeType::OLAP_SCAN_NODE:
-    case TPlanNodeType::JDBC_SCAN_NODE:
-    case TPlanNodeType::ODBC_SCAN_NODE:
-    case TPlanNodeType::FILE_SCAN_NODE:
-    case TPlanNodeType::META_SCAN_NODE:
-    case TPlanNodeType::GROUP_COMMIT_SCAN_NODE:
-    case TPlanNodeType::ES_HTTP_SCAN_NODE:
-    case TPlanNodeType::ES_SCAN_NODE: {
-        OperatorBuilderPtr operator_t = std::make_shared<ScanOperatorBuilder>(node->id(), node);
-        RETURN_IF_ERROR(cur_pipe->add_operator(operator_t));
-        break;
-    }
-    case TPlanNodeType::MYSQL_SCAN_NODE: {
-#ifdef DORIS_WITH_MYSQL
-        OperatorBuilderPtr operator_t =
-                std::make_shared<MysqlScanOperatorBuilder>(node->id(), node);
-        RETURN_IF_ERROR(cur_pipe->add_operator(operator_t));
-        break;
-#else
+Status PipelineFragmentContext::_build_pipelines(ObjectPool* pool,
+                                                 const doris::TPipelineFragmentParams& request,
+                                                 const DescriptorTbl& descs, OperatorXPtr* root,
+                                                 PipelinePtr cur_pipe) {
+    if (request.fragment.plan.nodes.empty()) {
+        throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid plan which has no plan node!");
+    }
+
+    int node_idx = 0;
+
+    cur_pipe->_name.append(std::to_string(cur_pipe->id()));
+
+    RETURN_IF_ERROR(_create_tree_helper(pool, request.fragment.plan.nodes, request, descs, nullptr,
+                                        &node_idx, root, cur_pipe, 0));
+
+    if (node_idx + 1 != request.fragment.plan.nodes.size()) {
+        // TODO: print thrift msg for diagnostic purposes.
         return Status::InternalError(
-                "Don't support MySQL table, you should rebuild Doris with WITH_MYSQL option ON");
-#endif
+                "Plan tree only partially reconstructed. Not all thrift nodes were used.");
     }
-    case TPlanNodeType::SCHEMA_SCAN_NODE: {
-        OperatorBuilderPtr operator_t =
-                std::make_shared<SchemaScanOperatorBuilder>(node->id(), node);
-        RETURN_IF_ERROR(cur_pipe->add_operator(operator_t));
+    return Status::OK();
+}
+
+Status PipelineFragmentContext::_create_tree_helper(ObjectPool* pool,
+                                                    const std::vector<TPlanNode>& tnodes,
+                                                    const doris::TPipelineFragmentParams& request,
+                                                    const DescriptorTbl& descs, OperatorXPtr parent,
+                                                    int* node_idx, OperatorXPtr* root,
+                                                    PipelinePtr& cur_pipe, int child_idx) {
+    // propagate error case
+    if (*node_idx >= tnodes.size()) {
+        // TODO: print thrift msg
+        return Status::InternalError(
+                "Failed to reconstruct plan tree from thrift. Node id: {}, number of nodes: {}",
+                *node_idx, tnodes.size());
+    }
+    const TPlanNode& tnode = tnodes[*node_idx];
+
+    int num_children = tnodes[*node_idx].num_children;
+    OperatorXPtr op = nullptr;
+    RETURN_IF_ERROR(_create_operator(pool, tnodes[*node_idx], request, descs, op, cur_pipe,
+                                     parent == nullptr ? -1 : parent->node_id(), child_idx));
+
+    // assert(parent != nullptr || (node_idx == 0 && root_expr != nullptr));
+    if (parent != nullptr) {
+        // add to parent's child(s)
+        RETURN_IF_ERROR(parent->set_child(op));
+    } else {
+        *root = op;
+    }
+
+    cur_pipe->_name.push_back('-');
+    cur_pipe->_name.append(std::to_string(op->id()));
+    cur_pipe->_name.append(op->get_name());
+
+    // rely on that tnodes is preorder of the plan
+    for (int i = 0; i < num_children; i++) {
+        ++*node_idx;
+        RETURN_IF_ERROR(_create_tree_helper(pool, tnodes, request, descs, op, node_idx, nullptr,
+                                            cur_pipe, i));
+
+        // we are expecting a child, but have used all nodes
+        // this means we have been given a bad tree and must fail
+        if (*node_idx >= tnodes.size()) {
+            // TODO: print thrift msg
+            return Status::InternalError(
+                    "Failed to reconstruct plan tree from thrift. Node id: {}, number of nodes: {}",
+                    *node_idx, tnodes.size());
+        }
+    }
+
+    RETURN_IF_ERROR(op->init(tnode, _runtime_state.get()));
+
+    return Status::OK();
+}
+
+void PipelineFragmentContext::_inherit_pipeline_properties(
+        const DataDistribution& data_distribution, PipelinePtr pipe_with_source,
+        PipelinePtr pipe_with_sink) {
+    pipe_with_sink->set_num_tasks(pipe_with_source->num_tasks());
+    pipe_with_source->set_num_tasks(_num_instances);
+    pipe_with_source->set_data_distribution(data_distribution);
+}
+
+Status PipelineFragmentContext::_add_local_exchange_impl(
+        int idx, ObjectPool* pool, PipelinePtr cur_pipe, PipelinePtr new_pip,
+        DataDistribution data_distribution, bool* do_local_exchange, int num_buckets,
+        const std::map<int, int>& bucket_seq_to_instance_idx,
+        const std::map<int, int>& shuffle_idx_to_instance_idx,
+        const bool ignore_data_hash_distribution) {
+    auto& operator_xs = cur_pipe->operator_xs();
+    const auto downstream_pipeline_id = cur_pipe->id();
+    auto local_exchange_id = next_operator_id();
+    // 1. Create a new pipeline with local exchange sink.
+    DataSinkOperatorXPtr sink;
+    auto sink_id = next_sink_operator_id();
+    const bool is_shuffled_hash_join = operator_xs.size() > idx
+                                               ? operator_xs[idx]->is_shuffled_hash_join()
+                                               : cur_pipe->sink_x()->is_shuffled_hash_join();
+    sink.reset(new LocalExchangeSinkOperatorX(
+            sink_id, local_exchange_id, is_shuffled_hash_join ? _total_instances : _num_instances,
+            data_distribution.partition_exprs, bucket_seq_to_instance_idx));
+    RETURN_IF_ERROR(new_pip->set_sink(sink));
+    RETURN_IF_ERROR(new_pip->sink_x()->init(data_distribution.distribution_type, num_buckets,
+                                            is_shuffled_hash_join, shuffle_idx_to_instance_idx));
+
+    // 2. Create and initialize LocalExchangeSharedState.
+    auto shared_state = LocalExchangeSharedState::create_shared(_num_instances);
+    switch (data_distribution.distribution_type) {
+    case ExchangeType::HASH_SHUFFLE:
+        shared_state->exchanger = ShuffleExchanger::create_unique(
+                std::max(cur_pipe->num_tasks(), _num_instances),
+                is_shuffled_hash_join ? _total_instances : _num_instances);
+        break;
+    case ExchangeType::BUCKET_HASH_SHUFFLE:
+        shared_state->exchanger = BucketShuffleExchanger::create_unique(
+                std::max(cur_pipe->num_tasks(), _num_instances), _num_instances, num_buckets,
+                ignore_data_hash_distribution);
         break;
+    case ExchangeType::PASSTHROUGH:
+        shared_state->exchanger =
+                PassthroughExchanger::create_unique(cur_pipe->num_tasks(), _num_instances);
+        break;
+    case ExchangeType::BROADCAST:
+        shared_state->exchanger =
+                BroadcastExchanger::create_unique(cur_pipe->num_tasks(), _num_instances);
+        break;
+    case ExchangeType::PASS_TO_ONE:
+        shared_state->exchanger =
+                BroadcastExchanger::create_unique(cur_pipe->num_tasks(), _num_instances);
+        break;
+    case ExchangeType::ADAPTIVE_PASSTHROUGH:
+        shared_state->exchanger =
+                AdaptivePassthroughExchanger::create_unique(cur_pipe->num_tasks(), _num_instances);
+        break;
+    default:
+        return Status::InternalError("Unsupported local exchange type : " +
+                                     std::to_string((int)data_distribution.distribution_type));
+    }
+    auto sink_dep = std::make_shared<Dependency>(sink_id, local_exchange_id,
+                                                 "LOCAL_EXCHANGE_SINK_DEPENDENCY", true,
+                                                 _runtime_state->get_query_ctx());
+    sink_dep->set_shared_state(shared_state.get());
+    shared_state->sink_deps.push_back(sink_dep);
+    _op_id_to_le_state.insert({local_exchange_id, {shared_state, sink_dep}});
+
+    // 3. Set two pipelines' operator list. For example, split pipeline [Scan - AggSink] to
+    // pipeline1 [Scan - LocalExchangeSink] and pipeline2 [LocalExchangeSource - AggSink].
+
+    // 3.1 Initialize new pipeline's operator list.
+    std::copy(operator_xs.begin(), operator_xs.begin() + idx,
+              std::inserter(new_pip->operator_xs(), new_pip->operator_xs().end()));
+
+    // 3.2 Erase unused operators in previous pipeline.
+    operator_xs.erase(operator_xs.begin(), operator_xs.begin() + idx);
+
+    // 4. Initialize LocalExchangeSource and insert it into this pipeline.
+    OperatorXPtr source_op;
+    source_op.reset(new LocalExchangeSourceOperatorX(pool, local_exchange_id));
+    RETURN_IF_ERROR(source_op->set_child(new_pip->operator_xs().back()));
+    RETURN_IF_ERROR(source_op->init(data_distribution.distribution_type));
+    if (!operator_xs.empty()) {
+        RETURN_IF_ERROR(operator_xs.front()->set_child(source_op));
+    }
+    operator_xs.insert(operator_xs.begin(), source_op);
+
+    shared_state->create_source_dependencies(source_op->operator_id(), source_op->node_id(),
+                                             _query_ctx.get());
+
+    // 5. Set children for two pipelines separately.
+    std::vector<std::shared_ptr<Pipeline>> new_children;
+    std::vector<PipelineId> edges_with_source;
+    for (auto child : cur_pipe->children()) {
+        bool found = false;
+        for (auto op : new_pip->operator_xs()) {
+            if (child->sink_x()->node_id() == op->node_id()) {
+                new_pip->set_children(child);
+                found = true;
+            };
+        }
+        if (!found) {
+            new_children.push_back(child);
+            edges_with_source.push_back(child->id());
+        }
     }
-    case TPlanNodeType::EXCHANGE_NODE: {
-        OperatorBuilderPtr operator_t =
-                std::make_shared<ExchangeSourceOperatorBuilder>(node->id(), node);
-        RETURN_IF_ERROR(cur_pipe->add_operator(operator_t));
+    new_children.push_back(new_pip);
+    edges_with_source.push_back(new_pip->id());
+
+    // 6. Set DAG for new pipelines.
+    if (!new_pip->children().empty()) {
+        std::vector<PipelineId> edges_with_sink;
+        for (auto child : new_pip->children()) {
+            edges_with_sink.push_back(child->id());
+        }
+        _dag.insert({new_pip->id(), edges_with_sink});
+    }
+    cur_pipe->set_children(new_children);
+    _dag[downstream_pipeline_id] = edges_with_source;
+    RETURN_IF_ERROR(new_pip->sink_x()->set_child(new_pip->operator_xs().back()));
+    RETURN_IF_ERROR(cur_pipe->sink_x()->set_child(cur_pipe->operator_xs().back()));
+
+    // 7. Inherit properties from current pipeline.
+    _inherit_pipeline_properties(data_distribution, cur_pipe, new_pip);
+    return Status::OK();
+}
+
+Status PipelineFragmentContext::_add_local_exchange(
+        int pip_idx, int idx, int node_id, ObjectPool* pool, PipelinePtr cur_pipe,
+        DataDistribution data_distribution, bool* do_local_exchange, int num_buckets,
+        const std::map<int, int>& bucket_seq_to_instance_idx,
+        const std::map<int, int>& shuffle_idx_to_instance_idx,
+        const bool ignore_data_distribution) {
+    DCHECK(_enable_local_shuffle());
+    if (_num_instances <= 1) {
+        return Status::OK();
+    }
+
+    if (!cur_pipe->need_to_local_exchange(data_distribution)) {
+        return Status::OK();
+    }
+    *do_local_exchange = true;
+
+    auto& operator_xs = cur_pipe->operator_xs();
+    auto total_op_num = operator_xs.size();
+    auto new_pip = add_pipeline(cur_pipe, pip_idx + 1);
+    RETURN_IF_ERROR(_add_local_exchange_impl(
+            idx, pool, cur_pipe, new_pip, data_distribution, do_local_exchange, num_buckets,
+            bucket_seq_to_instance_idx, shuffle_idx_to_instance_idx, ignore_data_distribution));
+
+    CHECK(total_op_num + 1 == cur_pipe->operator_xs().size() + new_pip->operator_xs().size())
+            << "total_op_num: " << total_op_num
+            << " cur_pipe->operator_xs().size(): " << cur_pipe->operator_xs().size()
+            << " new_pip->operator_xs().size(): " << new_pip->operator_xs().size();
+
+    // Add passthrough local exchanger if necessary
+    if (cur_pipe->num_tasks() > 1 && new_pip->num_tasks() == 1 &&
+        Pipeline::is_hash_exchange(data_distribution.distribution_type)) {
+        RETURN_IF_ERROR(_add_local_exchange_impl(
+                new_pip->operator_xs().size(), pool, new_pip, add_pipeline(new_pip, pip_idx + 2),
+                DataDistribution(ExchangeType::PASSTHROUGH), do_local_exchange, num_buckets,
+                bucket_seq_to_instance_idx, shuffle_idx_to_instance_idx, ignore_data_distribution));
+    }
+    return Status::OK();
+}
+
+Status PipelineFragmentContext::_plan_local_exchange(
+        int num_buckets, const std::map<int, int>& bucket_seq_to_instance_idx,
+        const std::map<int, int>& shuffle_idx_to_instance_idx) {
+    for (int pip_idx = _pipelines.size() - 1; pip_idx >= 0; pip_idx--) {
+        _pipelines[pip_idx]->init_data_distribution();
+        // Set property if child pipeline is not join operator's child.
+        if (!_pipelines[pip_idx]->children().empty()) {
+            for (auto& child : _pipelines[pip_idx]->children()) {
+                if (child->sink_x()->node_id() ==
+                    _pipelines[pip_idx]->operator_xs().front()->node_id()) {
+                    RETURN_IF_ERROR(_pipelines[pip_idx]->operator_xs().front()->set_child(
+                            child->operator_xs().back()));
+                    _pipelines[pip_idx]->set_data_distribution(child->data_distribution());
+                }
+            }
+        }
+
+        RETURN_IF_ERROR(_plan_local_exchange(
+                _pipelines[pip_idx]->operator_xs().front()->ignore_data_hash_distribution()
+                        ? _num_instances
+                        : num_buckets,
+                pip_idx, _pipelines[pip_idx], bucket_seq_to_instance_idx,
+                shuffle_idx_to_instance_idx,
+                _pipelines[pip_idx]->operator_xs().front()->ignore_data_hash_distribution()));
+    }
+    return Status::OK();
+}
+
+Status PipelineFragmentContext::_plan_local_exchange(
+        int num_buckets, int pip_idx, PipelinePtr pip,
+        const std::map<int, int>& bucket_seq_to_instance_idx,
+        const std::map<int, int>& shuffle_idx_to_instance_idx,
+        const bool ignore_data_hash_distribution) {
+    int idx = 1;
+    bool do_local_exchange = false;
+    do {
+        auto& ops = pip->operator_xs();
+        do_local_exchange = false;
+        // Plan local exchange for each operator.
+        for (; idx < ops.size();) {
+            if (ops[idx]->required_data_distribution().need_local_exchange()) {
+                RETURN_IF_ERROR(_add_local_exchange(
+                        pip_idx, idx, ops[idx]->node_id(), _runtime_state->obj_pool(), pip,
+                        ops[idx]->required_data_distribution(), &do_local_exchange, num_buckets,
+                        bucket_seq_to_instance_idx, shuffle_idx_to_instance_idx,
+                        ignore_data_hash_distribution));
+            }
+            if (do_local_exchange) {
+                // If local exchange is needed for current operator, we will split this pipeline to
+                // two pipelines by local exchange sink/source. And then we need to process remaining
+                // operators in this pipeline so we set idx to 2 (0 is local exchange source and 1
+                // is current operator was already processed) and continue to plan local exchange.
+                idx = 2;
+                break;
+            }
+            idx++;
+        }
+    } while (do_local_exchange);
+    if (pip->sink_x()->required_data_distribution().need_local_exchange()) {
+        RETURN_IF_ERROR(_add_local_exchange(
+                pip_idx, idx, pip->sink_x()->node_id(), _runtime_state->obj_pool(), pip,
+                pip->sink_x()->required_data_distribution(), &do_local_exchange, num_buckets,
+                bucket_seq_to_instance_idx, shuffle_idx_to_instance_idx,
+                ignore_data_hash_distribution));
+    }
+    return Status::OK();
+}
+
+Status PipelineFragmentContext::_create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink,
+                                                  const std::vector<TExpr>& output_exprs,
+                                                  const TPipelineFragmentParams& params,
+                                                  const RowDescriptor& row_desc,
+                                                  RuntimeState* state, DescriptorTbl& desc_tbl,
+                                                  PipelineId cur_pipeline_id) {
+    switch (thrift_sink.type) {
+    case TDataSinkType::DATA_STREAM_SINK: {
+        if (!thrift_sink.__isset.stream_sink) {
+            return Status::InternalError("Missing data stream sink.");
+        }
+        _sink.reset(new ExchangeSinkOperatorX(state, row_desc, next_sink_operator_id(),
+                                              thrift_sink.stream_sink, params.destinations));
         break;
     }
-    case TPlanNodeType::EMPTY_SET_NODE: {
-        OperatorBuilderPtr operator_t =
-                std::make_shared<EmptySetSourceOperatorBuilder>(node->id(), node);
-        RETURN_IF_ERROR(cur_pipe->add_operator(operator_t));
+    case TDataSinkType::RESULT_SINK: {
+        if (!thrift_sink.__isset.result_sink) {
+            return Status::InternalError("Missing data buffer sink.");
+        }
+
+        // TODO: figure out good buffer size based on size of output row
+        _sink.reset(new ResultSinkOperatorX(next_sink_operator_id(), row_desc, output_exprs,
+                                            thrift_sink.result_sink));
         break;
     }
-    case TPlanNodeType::DATA_GEN_SCAN_NODE: {
-        OperatorBuilderPtr operator_t = std::make_shared<DataGenOperatorBuilder>(node->id(), node);
-        RETURN_IF_ERROR(cur_pipe->add_operator(operator_t));
+    case TDataSinkType::GROUP_COMMIT_OLAP_TABLE_SINK:
+    case TDataSinkType::OLAP_TABLE_SINK: {
+        if (state->query_options().enable_memtable_on_sink_node &&
+            !_has_inverted_index_or_partial_update(thrift_sink.olap_table_sink) &&
+            !config::is_cloud_mode()) {
+            _sink.reset(new OlapTableSinkV2OperatorX(pool, next_sink_operator_id(), row_desc,
+                                                     output_exprs));
+        } else {
+            _sink.reset(new OlapTableSinkOperatorX(pool, next_sink_operator_id(), row_desc,
+                                                   output_exprs));
+        }
         break;
     }
-    case TPlanNodeType::UNION_NODE: {
-        auto* union_node = assert_cast<vectorized::VUnionNode*>(node);
-        if (union_node->children_count() == 0 &&
-            union_node->get_first_materialized_child_idx() == 0) { // only have const expr
-            OperatorBuilderPtr builder =
-                    std::make_shared<ConstValueOperatorBuilder>(node->id(), node);
-            RETURN_IF_ERROR(cur_pipe->add_operator(builder));
+    case TDataSinkType::GROUP_COMMIT_BLOCK_SINK: {
+        DCHECK(thrift_sink.__isset.olap_table_sink);
+        _sink.reset(new GroupCommitBlockSinkOperatorX(next_sink_operator_id(), row_desc));
+        break;
+    }
+    case TDataSinkType::HIVE_TABLE_SINK: {
+        if (!thrift_sink.__isset.hive_table_sink) {
+            return Status::InternalError("Missing hive table sink.");
+        }
+        _sink.reset(
+                new HiveTableSinkOperatorX(pool, next_sink_operator_id(), row_desc, output_exprs));
+        break;
+    }
+    case TDataSinkType::JDBC_TABLE_SINK: {
+        if (!thrift_sink.__isset.jdbc_table_sink) {
+            return Status::InternalError("Missing data jdbc sink.");
+        }
+        if (config::enable_java_support) {
+            _sink.reset(
+                    new JdbcTableSinkOperatorX(row_desc, next_sink_operator_id(), output_exprs));
         } else {
-            int child_count = union_node->children_count();
-            auto data_queue = std::make_shared<DataQueue>(child_count);
-            data_queue->set_max_blocks_in_sub_queue(_runtime_state->data_queue_max_blocks());
-            for (int child_id = 0; child_id < child_count; ++child_id) {
-                auto new_child_pipeline = add_pipeline();
-                RETURN_IF_ERROR(_build_pipelines(union_node->child(child_id), new_child_pipeline));
-                OperatorBuilderPtr child_sink_builder = std::make_shared<UnionSinkOperatorBuilder>(
-                        union_node->id(), child_id, union_node, data_queue);
-                RETURN_IF_ERROR(new_child_pipeline->set_sink_builder(child_sink_builder));
-            }
-            OperatorBuilderPtr source_builder = std::make_shared<UnionSourceOperatorBuilder>(
-                    node->id(), union_node, data_queue);
-            RETURN_IF_ERROR(cur_pipe->add_operator(source_builder));
+            return Status::InternalError(
+                    "Jdbc table sink is not enabled, you can change be config "
+                    "enable_java_support to true and restart be.");
         }
         break;
     }
-    case TPlanNodeType::AGGREGATION_NODE: {
-        auto* agg_node = static_cast<vectorized::AggregationNode*>(node);
-        auto new_pipe = add_pipeline();
-        RETURN_IF_ERROR(_build_pipelines(node->child(0), new_pipe));
-        if (agg_node->is_probe_expr_ctxs_empty() && agg_node->agg_output_desc()->slots().empty()) {
-            return Status::InternalError("Illegal aggregate node " +
-                                         std::to_string(agg_node->id()) +
-                                         ": group by and output is empty");
+    case TDataSinkType::RESULT_FILE_SINK: {
+        if (!thrift_sink.__isset.result_file_sink) {
+            return Status::InternalError("Missing result file sink.");
         }
 
-        const int64_t data_queue_max_blocks = _runtime_state->data_queue_max_blocks();
-        if (agg_node->is_aggregate_evaluators_empty() && !agg_node->is_probe_expr_ctxs_empty()) {
-            auto data_queue = std::make_shared<DataQueue>(1);
-            data_queue->set_max_blocks_in_sub_queue(data_queue_max_blocks);
-            OperatorBuilderPtr pre_agg_sink =
-                    std::make_shared<DistinctStreamingAggSinkOperatorBuilder>(node->id(), agg_node,
-                                                                              data_queue);
-            RETURN_IF_ERROR(new_pipe->set_sink_builder(pre_agg_sink));
-
-            OperatorBuilderPtr pre_agg_source =
-                    std::make_shared<DistinctStreamingAggSourceOperatorBuilder>(
-                            node->id(), agg_node, data_queue);
-            RETURN_IF_ERROR(cur_pipe->add_operator(pre_agg_source));
-        } else if (agg_node->is_streaming_preagg() && !agg_node->is_probe_expr_ctxs_empty()) {
-            auto data_queue = std::make_shared<DataQueue>(1);
-            data_queue->set_max_blocks_in_sub_queue(data_queue_max_blocks);
-            OperatorBuilderPtr pre_agg_sink = std::make_shared<StreamingAggSinkOperatorBuilder>(
-                    node->id(), agg_node, data_queue);
-            RETURN_IF_ERROR(new_pipe->set_sink_builder(pre_agg_sink));
-
-            OperatorBuilderPtr pre_agg_source = std::make_shared<StreamingAggSourceOperatorBuilder>(
-                    node->id(), agg_node, data_queue);
-            RETURN_IF_ERROR(cur_pipe->add_operator(pre_agg_source));
+        // TODO: figure out good buffer size based on size of output row
+        // Result file sink is not the top sink
+        if (params.__isset.destinations && !params.destinations.empty()) {
+            _sink.reset(new ResultFileSinkOperatorX(next_sink_operator_id(), row_desc,
+                                                    thrift_sink.result_file_sink,
+                                                    params.destinations, output_exprs, desc_tbl));
         } else {
-            OperatorBuilderPtr agg_sink =
-                    std::make_shared<AggSinkOperatorBuilder>(node->id(), agg_node);
-            RETURN_IF_ERROR(new_pipe->set_sink_builder(agg_sink));
-
-            OperatorBuilderPtr agg_source =
-                    std::make_shared<AggSourceOperatorBuilder>(node->id(), agg_node);
-            RETURN_IF_ERROR(cur_pipe->add_operator(agg_source));
+            _sink.reset(
+                    new ResultFileSinkOperatorX(next_sink_operator_id(), row_desc, output_exprs));
         }
         break;
     }
-    case TPlanNodeType::SORT_NODE: {
-        auto new_pipeline = add_pipeline();
-        RETURN_IF_ERROR(_build_pipelines(node->child(0), new_pipeline));
+    case TDataSinkType::MULTI_CAST_DATA_STREAM_SINK: {
+        DCHECK(thrift_sink.__isset.multi_cast_stream_sink);
+        DCHECK_GT(thrift_sink.multi_cast_stream_sink.sinks.size(), 0);
+        // TODO: figure out good buffer size based on size of output row
+        auto sink_id = next_sink_operator_id();
+        auto sender_size = thrift_sink.multi_cast_stream_sink.sinks.size();
+        // one sink has multiple sources.
+        std::vector<int> sources;
+        for (int i = 0; i < sender_size; ++i) {
+            auto source_id = next_operator_id();
+            sources.push_back(source_id);
+        }
 
-        OperatorBuilderPtr sort_sink = std::make_shared<SortSinkOperatorBuilder>(node->id(), node);
-        RETURN_IF_ERROR(new_pipeline->set_sink_builder(sort_sink));
+        _sink.reset(new MultiCastDataStreamSinkOperatorX(
+                sink_id, sources, thrift_sink.multi_cast_stream_sink.sinks.size(), pool,
+                thrift_sink.multi_cast_stream_sink, row_desc));
+        for (int i = 0; i < sender_size; ++i) {
+            auto new_pipeline = add_pipeline();
+            RowDescriptor* _row_desc = nullptr;
+            {
+                const auto& tmp_row_desc =
+                        !thrift_sink.multi_cast_stream_sink.sinks[i].output_exprs.empty()
+                                ? RowDescriptor(state->desc_tbl(),
+                                                {thrift_sink.multi_cast_stream_sink.sinks[i]
+                                                         .output_tuple_id},
+                                                {false})
+                                : _sink->row_desc();
+                _row_desc = pool->add(new RowDescriptor(tmp_row_desc));
+            }
+            auto source_id = sources[i];
+            OperatorXPtr source_op;
+            // 1. create and set the source operator of multi_cast_data_stream_source for new pipeline
+            source_op.reset(new MultiCastDataStreamerSourceOperatorX(
+                    i, pool, thrift_sink.multi_cast_stream_sink.sinks[i], row_desc, source_id));
+            RETURN_IF_ERROR(new_pipeline->add_operator(source_op));
+            // 2. create and set sink operator of data stream sender for new pipeline
+
+            DataSinkOperatorXPtr sink_op;
+            sink_op.reset(
+                    new ExchangeSinkOperatorX(state, *_row_desc, next_sink_operator_id(),
+                                              thrift_sink.multi_cast_stream_sink.sinks[i],
+                                              thrift_sink.multi_cast_stream_sink.destinations[i]));
+
+            RETURN_IF_ERROR(new_pipeline->set_sink(sink_op));
+            {
+                TDataSink* t = pool->add(new TDataSink());
+                t->stream_sink = thrift_sink.multi_cast_stream_sink.sinks[i];
+                RETURN_IF_ERROR(sink_op->init(*t));
+            }
 
-        OperatorBuilderPtr sort_source =
-                std::make_shared<SortSourceOperatorBuilder>(node->id(), node);
-        RETURN_IF_ERROR(cur_pipe->add_operator(sort_source));
+            // 3. set dependency dag
+            _dag[new_pipeline->id()].push_back(cur_pipeline_id);
+        }
+        if (sources.empty()) {
+            return Status::InternalError("size of sources must be greater than 0");
+        }
         break;
     }
-    case TPlanNodeType::PARTITION_SORT_NODE: {
-        auto new_pipeline = add_pipeline();
-        RETURN_IF_ERROR(_build_pipelines(node->child(0), new_pipeline));
-
-        OperatorBuilderPtr partition_sort_sink =
-                std::make_shared<PartitionSortSinkOperatorBuilder>(node->id(), node);
-        RETURN_IF_ERROR(new_pipeline->set_sink_builder(partition_sort_sink));
+    default:
+        return Status::InternalError("Unsuported sink type in pipeline: {}", thrift_sink.type);
+    }
+    return Status::OK();
+}
 
-        OperatorBuilderPtr partition_sort_source =
-                std::make_shared<PartitionSortSourceOperatorBuilder>(node->id(), node);
-        RETURN_IF_ERROR(cur_pipe->add_operator(partition_sort_source));
+// NOLINTBEGIN(readability-function-size)
+// NOLINTBEGIN(readability-function-cognitive-complexity)
+Status PipelineFragmentContext::_create_operator(ObjectPool* pool, const TPlanNode& tnode,
+                                                 const doris::TPipelineFragmentParams& request,
+                                                 const DescriptorTbl& descs, OperatorXPtr& op,
+                                                 PipelinePtr& cur_pipe, int parent_idx,
+                                                 int child_idx) {
+    // We directly construct the operator from Thrift because the given array is in the order of preorder traversal.
+    // Therefore, here we need to use a stack-like structure.
+    _pipeline_parent_map.pop(cur_pipe, parent_idx, child_idx);
+    std::stringstream error_msg;
+
+    switch (tnode.node_type) {
+    case TPlanNodeType::OLAP_SCAN_NODE: {
+        op.reset(new OlapScanOperatorX(pool, tnode, next_operator_id(), descs, _num_instances));
+        RETURN_IF_ERROR(cur_pipe->add_operator(op));
+        if (request.__isset.parallel_instances) {
+            cur_pipe->set_num_tasks(request.parallel_instances);
+            op->set_ignore_data_distribution();
+        }
         break;
     }
-    case TPlanNodeType::ANALYTIC_EVAL_NODE: {
-        auto new_pipeline = add_pipeline();
-        RETURN_IF_ERROR(_build_pipelines(node->child(0), new_pipeline));
-
-        OperatorBuilderPtr analytic_sink =
-                std::make_shared<AnalyticSinkOperatorBuilder>(node->id(), node);
-        RETURN_IF_ERROR(new_pipeline->set_sink_builder(analytic_sink));
-
-        OperatorBuilderPtr analytic_source =
-                std::make_shared<AnalyticSourceOperatorBuilder>(node->id(), node);
-        RETURN_IF_ERROR(cur_pipe->add_operator(analytic_source));
+    case doris::TPlanNodeType::JDBC_SCAN_NODE: {
+        if (config::enable_java_support) {
+            op.reset(new JDBCScanOperatorX(pool, tnode, next_operator_id(), descs, _num_instances));
+            RETURN_IF_ERROR(cur_pipe->add_operator(op));
+        } else {
+            return Status::InternalError(
+                    "Jdbc scan node is disabled, you can change be config enable_java_support "
+                    "to true and restart be.");
+        }
+        if (request.__isset.parallel_instances) {
+            cur_pipe->set_num_tasks(request.parallel_instances);
+            op->set_ignore_data_distribution();
+        }
         break;
     }
-    case TPlanNodeType::REPEAT_NODE: {
-        RETURN_IF_ERROR(_build_pipelines(node->child(0), cur_pipe));
-        OperatorBuilderPtr builder = std::make_shared<RepeatOperatorBuilder>(node->id(), node);
-        RETURN_IF_ERROR(cur_pipe->add_operator(builder));
+    case doris::TPlanNodeType::FILE_SCAN_NODE: {
+        op.reset(new FileScanOperatorX(pool, tnode, next_operator_id(), descs, _num_instances));
+        RETURN_IF_ERROR(cur_pipe->add_operator(op));
+        if (request.__isset.parallel_instances) {
+            cur_pipe->set_num_tasks(request.parallel_instances);
+            op->set_ignore_data_distribution();
+        }
         break;
     }
-    case TPlanNodeType::ASSERT_NUM_ROWS_NODE: {
-        RETURN_IF_ERROR(_build_pipelines(node->child(0), cur_pipe));
-        OperatorBuilderPtr builder =
-                std::make_shared<AssertNumRowsOperatorBuilder>(node->id(), node);
-        RETURN_IF_ERROR(cur_pipe->add_operator(builder));
+    case TPlanNodeType::ES_SCAN_NODE:
+    case TPlanNodeType::ES_HTTP_SCAN_NODE: {
+        op.reset(new EsScanOperatorX(pool, tnode, next_operator_id(), descs, _num_instances));
+        RETURN_IF_ERROR(cur_pipe->add_operator(op));
+        if (request.__isset.parallel_instances) {
+            cur_pipe->set_num_tasks(request.parallel_instances);
+            op->set_ignore_data_distribution();
+        }
         break;
     }
-    case TPlanNodeType::TABLE_FUNCTION_NODE: {
-        RETURN_IF_ERROR(_build_pipelines(node->child(0), cur_pipe));
-        OperatorBuilderPtr builder =
-                std::make_shared<TableFunctionOperatorBuilder>(node->id(), node);
-        RETURN_IF_ERROR(cur_pipe->add_operator(builder));
+    case TPlanNodeType::EXCHANGE_NODE: {
+        int num_senders = find_with_default(request.per_exch_num_senders, tnode.node_id, 0);
+        DCHECK_GT(num_senders, 0);
+        op.reset(new ExchangeSourceOperatorX(pool, tnode, next_operator_id(), descs, num_senders));
+        RETURN_IF_ERROR(cur_pipe->add_operator(op));
+        if (request.__isset.parallel_instances) {
+            op->set_ignore_data_distribution();
+            cur_pipe->set_num_tasks(request.parallel_instances);
+        }
+        break;
+    }
+    case TPlanNodeType::AGGREGATION_NODE: {
+        if (tnode.agg_node.grouping_exprs.empty() &&
+            descs.get_tuple_descriptor(tnode.agg_node.output_tuple_id)->slots().empty()) {
+            return Status::InternalError("Illegal aggregate node " + std::to_string(tnode.node_id) +
+                                         ": group by and output is empty");
+        }
+        if (tnode.agg_node.aggregate_functions.empty() && !_runtime_state->enable_agg_spill() &&
+            request.query_options.__isset.enable_distinct_streaming_aggregation &&
+            request.query_options.enable_distinct_streaming_aggregation &&
+            !tnode.agg_node.grouping_exprs.empty()) {
+            op.reset(new DistinctStreamingAggOperatorX(pool, next_operator_id(), tnode, descs));
+            RETURN_IF_ERROR(cur_pipe->add_operator(op));
+        } else if (tnode.agg_node.__isset.use_streaming_preaggregation &&
+                   tnode.agg_node.use_streaming_preaggregation &&
+                   !tnode.agg_node.grouping_exprs.empty()) {
+            op.reset(new StreamingAggOperatorX(pool, next_operator_id(), tnode, descs));
+            RETURN_IF_ERROR(cur_pipe->add_operator(op));
+        } else {
+            if (_runtime_state->enable_agg_spill() && !tnode.agg_node.grouping_exprs.empty()) {
+                op.reset(new PartitionedAggSourceOperatorX(pool, tnode, next_operator_id(), descs));
+            } else {
+                op.reset(new AggSourceOperatorX(pool, tnode, next_operator_id(), descs));
+            }
+            RETURN_IF_ERROR(cur_pipe->add_operator(op));
+
+            const auto downstream_pipeline_id = cur_pipe->id();
+            if (_dag.find(downstream_pipeline_id) == _dag.end()) {
+                _dag.insert({downstream_pipeline_id, {}});
+            }
+            cur_pipe = add_pipeline(cur_pipe);
+            _dag[downstream_pipeline_id].push_back(cur_pipe->id());
+
+            DataSinkOperatorXPtr sink;
+            if (_runtime_state->enable_agg_spill() && !tnode.agg_node.grouping_exprs.empty()) {
+                sink.reset(new PartitionedAggSinkOperatorX(pool, next_sink_operator_id(), tnode,
+                                                           descs, _require_bucket_distribution));
+            } else {
+                sink.reset(new AggSinkOperatorX(pool, next_sink_operator_id(), tnode, descs,
+                                                _require_bucket_distribution));
+            }
+            sink->set_dests_id({op->operator_id()});
+            RETURN_IF_ERROR(cur_pipe->set_sink(sink));
+            RETURN_IF_ERROR(cur_pipe->sink_x()->init(tnode, _runtime_state.get()));
+        }
         break;
     }
     case TPlanNodeType::HASH_JOIN_NODE: {
-        auto* join_node = assert_cast<vectorized::HashJoinNode*>(node);
-        auto new_pipe = add_pipeline();
-        if (join_node->should_build_hash_table()) {
-            RETURN_IF_ERROR(_build_pipelines(node->child(1), new_pipe));
+        const auto is_broadcast_join = tnode.hash_join_node.__isset.is_broadcast_join &&
+                                       tnode.hash_join_node.is_broadcast_join;
+        const auto enable_join_spill = _runtime_state->enable_join_spill();
+        if (enable_join_spill && !is_broadcast_join) {
+            auto tnode_ = tnode;
+            /// TODO: support rf in partitioned hash join
+            tnode_.runtime_filters.clear();
+            const uint32_t partition_count = 32;
+            auto inner_probe_operator =
+                    std::make_shared<HashJoinProbeOperatorX>(pool, tnode_, 0, descs);
+            auto inner_sink_operator = std::make_shared<HashJoinBuildSinkOperatorX>(
+                    pool, 0, tnode_, descs, _need_local_merge);
+
+            RETURN_IF_ERROR(inner_probe_operator->init(tnode_, _runtime_state.get()));
+            RETURN_IF_ERROR(inner_sink_operator->init(tnode_, _runtime_state.get()));
+
+            auto probe_operator = std::make_shared<PartitionedHashJoinProbeOperatorX>(
+                    pool, tnode_, next_operator_id(), descs, partition_count);
+            probe_operator->set_inner_operators(inner_sink_operator, inner_probe_operator);
+            op = std::move(probe_operator);
+            RETURN_IF_ERROR(cur_pipe->add_operator(op));
+
+            const auto downstream_pipeline_id = cur_pipe->id();
+            if (_dag.find(downstream_pipeline_id) == _dag.end()) {
+                _dag.insert({downstream_pipeline_id, {}});
+            }
+            PipelinePtr build_side_pipe = add_pipeline(cur_pipe);
+            _dag[downstream_pipeline_id].push_back(build_side_pipe->id());
+
+            auto sink_operator = std::make_shared<PartitionedHashJoinSinkOperatorX>(
+                    pool, next_sink_operator_id(), tnode_, descs, _need_local_merge,
+                    partition_count);
+            sink_operator->set_inner_operators(inner_sink_operator, inner_probe_operator);
+            DataSinkOperatorXPtr sink = std::move(sink_operator);
+            sink->set_dests_id({op->operator_id()});
+            RETURN_IF_ERROR(build_side_pipe->set_sink(sink));
+            RETURN_IF_ERROR(build_side_pipe->sink_x()->init(tnode_, _runtime_state.get()));
+
+            _pipeline_parent_map.push(op->node_id(), cur_pipe);
+            _pipeline_parent_map.push(op->node_id(), build_side_pipe);
+        } else {
+            op.reset(new HashJoinProbeOperatorX(pool, tnode, next_operator_id(), descs));
+            RETURN_IF_ERROR(cur_pipe->add_operator(op));
+
+            const auto downstream_pipeline_id = cur_pipe->id();
+            if (_dag.find(downstream_pipeline_id) == _dag.end()) {
+                _dag.insert({downstream_pipeline_id, {}});
+            }
+            PipelinePtr build_side_pipe = add_pipeline(cur_pipe);
+            _dag[downstream_pipeline_id].push_back(build_side_pipe->id());
+
+            DataSinkOperatorXPtr sink;
+            sink.reset(new HashJoinBuildSinkOperatorX(pool, next_sink_operator_id(), tnode, descs,
+                                                      _need_local_merge));
+            sink->set_dests_id({op->operator_id()});
+            RETURN_IF_ERROR(build_side_pipe->set_sink(sink));
+            RETURN_IF_ERROR(build_side_pipe->sink_x()->init(tnode, _runtime_state.get()));
+
+            _pipeline_parent_map.push(op->node_id(), cur_pipe);
+            _pipeline_parent_map.push(op->node_id(), build_side_pipe);
+        }
+        break;
+    }
+    case TPlanNodeType::CROSS_JOIN_NODE: {
+        op.reset(new NestedLoopJoinProbeOperatorX(pool, tnode, next_operator_id(), descs));
+        RETURN_IF_ERROR(cur_pipe->add_operator(op));
+
+        const auto downstream_pipeline_id = cur_pipe->id();
+        if (_dag.find(downstream_pipeline_id) == _dag.end()) {
+            _dag.insert({downstream_pipeline_id, {}});
+        }
+        PipelinePtr build_side_pipe = add_pipeline(cur_pipe);
+        _dag[downstream_pipeline_id].push_back(build_side_pipe->id());
+
+        DataSinkOperatorXPtr sink;
+        sink.reset(new NestedLoopJoinBuildSinkOperatorX(pool, next_sink_operator_id(), tnode, descs,
+                                                        _need_local_merge));
+        sink->set_dests_id({op->operator_id()});
+        RETURN_IF_ERROR(build_side_pipe->set_sink(sink));
+        RETURN_IF_ERROR(build_side_pipe->sink_x()->init(tnode, _runtime_state.get()));
+        _pipeline_parent_map.push(op->node_id(), cur_pipe);
+        _pipeline_parent_map.push(op->node_id(), build_side_pipe);
+        break;
+    }
+    case TPlanNodeType::UNION_NODE: {
+        int child_count = tnode.num_children;
+        op.reset(new UnionSourceOperatorX(pool, tnode, next_operator_id(), descs));
+        RETURN_IF_ERROR(cur_pipe->add_operator(op));
+
+        const auto downstream_pipeline_id = cur_pipe->id();
+        if (_dag.find(downstream_pipeline_id) == _dag.end()) {
+            _dag.insert({downstream_pipeline_id, {}});
+        }
+        for (int i = 0; i < child_count; i++) {
+            PipelinePtr build_side_pipe = add_pipeline(cur_pipe);
+            _dag[downstream_pipeline_id].push_back(build_side_pipe->id());
+            DataSinkOperatorXPtr sink;
+            sink.reset(new UnionSinkOperatorX(i, next_sink_operator_id(), pool, tnode, descs));
+            sink->set_dests_id({op->operator_id()});
+            RETURN_IF_ERROR(build_side_pipe->set_sink(sink));
+            RETURN_IF_ERROR(build_side_pipe->sink_x()->init(tnode, _runtime_state.get()));
+            // preset children pipelines. if any pipeline found this as its father, will use the prepared pipeline to build.
+            _pipeline_parent_map.push(op->node_id(), build_side_pipe);
+        }
+        break;
+    }
+    case TPlanNodeType::SORT_NODE: {
+        if (_runtime_state->enable_sort_spill()) {
+            op.reset(new SpillSortSourceOperatorX(pool, tnode, next_operator_id(), descs));
         } else {
-            OperatorBuilderPtr builder = std::make_shared<EmptySourceOperatorBuilder>(
-                    node->child(1)->id(), node->child(1)->row_desc(), node->child(1));
-            RETURN_IF_ERROR(new_pipe->add_operator(builder));
+            op.reset(new SortSourceOperatorX(pool, tnode, next_operator_id(), descs));
         }
-        OperatorBuilderPtr join_sink =
-                std::make_shared<HashJoinBuildSinkBuilder>(node->id(), join_node);
-        RETURN_IF_ERROR(new_pipe->set_sink_builder(join_sink));
+        RETURN_IF_ERROR(cur_pipe->add_operator(op));
 
-        RETURN_IF_ERROR(_build_pipelines(node->child(0), cur_pipe));
-        OperatorBuilderPtr join_source =
-                std::make_shared<HashJoinProbeOperatorBuilder>(node->id(), join_node);
-        RETURN_IF_ERROR(cur_pipe->add_operator(join_source));
+        const auto downstream_pipeline_id = cur_pipe->id();
+        if (_dag.find(downstream_pipeline_id) == _dag.end()) {
+            _dag.insert({downstream_pipeline_id, {}});
+        }
+        cur_pipe = add_pipeline(cur_pipe);
+        _dag[downstream_pipeline_id].push_back(cur_pipe->id());
 
-        cur_pipe->add_dependency(new_pipe);
+        DataSinkOperatorXPtr sink;
+        if (_runtime_state->enable_sort_spill()) {
+            sink.reset(new SpillSortSinkOperatorX(pool, next_sink_operator_id(), tnode, descs));
+        } else {
+            sink.reset(new SortSinkOperatorX(pool, next_sink_operator_id(), tnode, descs));
+        }
+        sink->set_dests_id({op->operator_id()});
+        RETURN_IF_ERROR(cur_pipe->set_sink(sink));
+        RETURN_IF_ERROR(cur_pipe->sink_x()->init(tnode, _runtime_state.get()));
         break;
     }
-    case TPlanNodeType::CROSS_JOIN_NODE: {
-        auto new_pipe = add_pipeline();
-        RETURN_IF_ERROR(_build_pipelines(node->child(1), new_pipe));
-        OperatorBuilderPtr join_sink =
-                std::make_shared<NestLoopJoinBuildOperatorBuilder>(node->id(), node);
-        RETURN_IF_ERROR(new_pipe->set_sink_builder(join_sink));
+    case doris::TPlanNodeType::PARTITION_SORT_NODE: {
+        op.reset(new PartitionSortSourceOperatorX(pool, tnode, next_operator_id(), descs));
+        RETURN_IF_ERROR(cur_pipe->add_operator(op));
 
-        RETURN_IF_ERROR(_build_pipelines(node->child(0), cur_pipe));
-        OperatorBuilderPtr join_source =
-                std::make_shared<NestLoopJoinProbeOperatorBuilder>(node->id(), node);
-        RETURN_IF_ERROR(cur_pipe->add_operator(join_source));
+        const auto downstream_pipeline_id = cur_pipe->id();
+        if (_dag.find(downstream_pipeline_id) == _dag.end()) {
+            _dag.insert({downstream_pipeline_id, {}});
+        }
+        cur_pipe = add_pipeline(cur_pipe);
+        _dag[downstream_pipeline_id].push_back(cur_pipe->id());
+
+        DataSinkOperatorXPtr sink;
+        sink.reset(new PartitionSortSinkOperatorX(pool, next_sink_operator_id(), tnode, descs));
+        sink->set_dests_id({op->operator_id()});
+        RETURN_IF_ERROR(cur_pipe->set_sink(sink));
+        RETURN_IF_ERROR(cur_pipe->sink_x()->init(tnode, _runtime_state.get()));
+        break;
+    }
+    case TPlanNodeType::ANALYTIC_EVAL_NODE: {
+        op.reset(new AnalyticSourceOperatorX(pool, tnode, next_operator_id(), descs));
+        RETURN_IF_ERROR(cur_pipe->add_operator(op));
 
-        cur_pipe->add_dependency(new_pipe);
+        const auto downstream_pipeline_id = cur_pipe->id();
+        if (_dag.find(downstream_pipeline_id) == _dag.end()) {
+            _dag.insert({downstream_pipeline_id, {}});
+        }
+        cur_pipe = add_pipeline(cur_pipe);
+        _dag[downstream_pipeline_id].push_back(cur_pipe->id());
+
+        DataSinkOperatorXPtr sink;
+        sink.reset(new AnalyticSinkOperatorX(pool, next_sink_operator_id(), tnode, descs));
+        sink->set_dests_id({op->operator_id()});
+        RETURN_IF_ERROR(cur_pipe->set_sink(sink));
+        RETURN_IF_ERROR(cur_pipe->sink_x()->init(tnode, _runtime_state.get()));
         break;
     }
     case TPlanNodeType::INTERSECT_NODE: {
-        RETURN_IF_ERROR(_build_operators_for_set_operation_node<true>(node, cur_pipe));
+        RETURN_IF_ERROR(_build_operators_for_set_operation_node<true>(
+                pool, tnode, descs, op, cur_pipe, parent_idx, child_idx));
         break;
     }
     case TPlanNodeType::EXCEPT_NODE: {
-        RETURN_IF_ERROR(_build_operators_for_set_operation_node<false>(node, cur_pipe));
+        RETURN_IF_ERROR(_build_operators_for_set_operation_node<false>(
+                pool, tnode, descs, op, cur_pipe, parent_idx, child_idx));
+        break;
+    }
+    case TPlanNodeType::REPEAT_NODE: {
+        op.reset(new RepeatOperatorX(pool, tnode, next_operator_id(), descs));
+        RETURN_IF_ERROR(cur_pipe->add_operator(op));
+        break;
+    }
+    case TPlanNodeType::TABLE_FUNCTION_NODE: {
+        op.reset(new TableFunctionOperatorX(pool, tnode, next_operator_id(), descs));
+        RETURN_IF_ERROR(cur_pipe->add_operator(op));
+        break;
+    }
+    case TPlanNodeType::ASSERT_NUM_ROWS_NODE: {
+        op.reset(new AssertNumRowsOperatorX(pool, tnode, next_operator_id(), descs));
+        RETURN_IF_ERROR(cur_pipe->add_operator(op));
+        break;
+    }
+    case TPlanNodeType::EMPTY_SET_NODE: {
+        op.reset(new EmptySetSourceOperatorX(pool, tnode, next_operator_id(), descs));
+        RETURN_IF_ERROR(cur_pipe->add_operator(op));
+        break;
+    }
+    case TPlanNodeType::DATA_GEN_SCAN_NODE: {
+        op.reset(new DataGenSourceOperatorX(pool, tnode, next_operator_id(), descs));
+        RETURN_IF_ERROR(cur_pipe->add_operator(op));
+        break;
+    }
+    case TPlanNodeType::SCHEMA_SCAN_NODE: {
+        op.reset(new SchemaScanOperatorX(pool, tnode, next_operator_id(), descs));
+        RETURN_IF_ERROR(cur_pipe->add_operator(op));
+        break;
+    }
+    case TPlanNodeType::META_SCAN_NODE: {
+        op.reset(new MetaScanOperatorX(pool, tnode, next_operator_id(), descs));
+        RETURN_IF_ERROR(cur_pipe->add_operator(op));
         break;
     }
     case TPlanNodeType::SELECT_NODE: {
-        RETURN_IF_ERROR(_build_pipelines(node->child(0), cur_pipe));
-        OperatorBuilderPtr builder = std::make_shared<SelectOperatorBuilder>(node->id(), node);
-        RETURN_IF_ERROR(cur_pipe->add_operator(builder));
+        op.reset(new SelectOperatorX(pool, tnode, next_operator_id(), descs));
+        RETURN_IF_ERROR(cur_pipe->add_operator(op));
         break;
     }
     default:
-        return Status::InternalError("Unsupported exec type in pipeline: {}",
-                                     print_plan_node_type(node_type));
+        return Status::InternalError("Unsupported exec type in pipelineX: {}",
+                                     print_plan_node_type(tnode.node_type));
     }
+    _require_bucket_distribution = true;
+
     return Status::OK();
 }
+// NOLINTEND(readability-function-cognitive-complexity)
+// NOLINTEND(readability-function-size)
 
 template <bool is_intersect>
-Status PipelineFragmentContext::_build_operators_for_set_operation_node(ExecNode* node,
-                                                                        PipelinePtr cur_pipe) {
-    auto build_pipeline = add_pipeline();
-    RETURN_IF_ERROR(_build_pipelines(node->child(0), build_pipeline));
-    OperatorBuilderPtr sink_builder =
-            std::make_shared<SetSinkOperatorBuilder<is_intersect>>(node->id(), node);
-    RETURN_IF_ERROR(build_pipeline->set_sink_builder(sink_builder));
-    std::vector<PipelinePtr> all_pipelines;
-    all_pipelines.emplace_back(build_pipeline);
-    for (int child_id = 1; child_id < node->children_count(); ++child_id) {
-        auto probe_pipeline = add_pipeline();
-        RETURN_IF_ERROR(_build_pipelines(node->child(child_id), probe_pipeline));
-        OperatorBuilderPtr probe_sink_builder =
-                std::make_shared<SetProbeSinkOperatorBuilder<is_intersect>>(node->id(), child_id,
-                                                                            node);
-        RETURN_IF_ERROR(probe_pipeline->set_sink_builder(probe_sink_builder));
-        //eg: These sinks must be completed one by one in order, child(1) must wait child(0) build finish
-        probe_pipeline->add_dependency(all_pipelines[child_id - 1]);
-        all_pipelines.emplace_back(probe_pipeline);
-    }
-
-    OperatorBuilderPtr source_builder =
-            std::make_shared<SetSourceOperatorBuilder<is_intersect>>(node->id(), node);
-    return cur_pipe->add_operator(source_builder);
+Status PipelineFragmentContext::_build_operators_for_set_operation_node(
+        ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs, OperatorXPtr& op,
+        PipelinePtr& cur_pipe, int parent_idx, int child_idx) {
+    op.reset(new SetSourceOperatorX<is_intersect>(pool, tnode, next_operator_id(), descs));
+    RETURN_IF_ERROR(cur_pipe->add_operator(op));
+
+    const auto downstream_pipeline_id = cur_pipe->id();
+    if (_dag.find(downstream_pipeline_id) == _dag.end()) {
+        _dag.insert({downstream_pipeline_id, {}});
+    }
+
+    for (int child_id = 0; child_id < tnode.num_children; child_id++) {
+        PipelinePtr probe_side_pipe = add_pipeline(cur_pipe);
+        _dag[downstream_pipeline_id].push_back(probe_side_pipe->id());
+
+        DataSinkOperatorXPtr sink;
+        if (child_id == 0) {
+            sink.reset(new SetSinkOperatorX<is_intersect>(child_id, next_sink_operator_id(), pool,
+                                                          tnode, descs));
+        } else {
+            sink.reset(new SetProbeSinkOperatorX<is_intersect>(child_id, next_sink_operator_id(),
+                                                               pool, tnode, descs));
+        }
+        sink->set_dests_id({op->operator_id()});
+        RETURN_IF_ERROR(probe_side_pipe->set_sink(sink));
+        RETURN_IF_ERROR(probe_side_pipe->sink_x()->init(tnode, _runtime_state.get()));
+        // prepare children pipelines. if any pipeline found this as its father, will use the prepared pipeline to build.
+        _pipeline_parent_map.push(op->node_id(), probe_side_pipe);
+    }
+
+    return Status::OK();
 }
 
 Status PipelineFragmentContext::submit() {
@@ -752,23 +1451,16 @@ Status PipelineFragmentContext::submit() {
     Status st;
     auto* scheduler = _query_ctx->get_pipe_exec_scheduler();
     for (auto& task : _tasks) {
-        st = scheduler->schedule_task(task.get());
-        if (!st) {
-            {
+        for (auto& t : task) {
+            st = scheduler->schedule_task(t.get());
+            if (!st) {
                 std::lock_guard<std::mutex> l(_status_lock);
                 cancel(PPlanFragmentCancelReason::INTERNAL_ERROR, "submit context fail");
-            }
-            {
-                // The fragment instance may contains 10 tasks, maybe 8 of them is in scheduler running
-                // and the 9th failed to add to scheduler, then it will update total_tasks.
-                // But the previous 8 tasks may finished and try to access total_tasks. So that
-                // has to use a lock to protect it.
-                std::lock_guard<std::mutex> l(_task_mutex);
                 _total_tasks = submit_tasks;
+                break;
             }
-            break;
+            submit_tasks++;
         }
-        submit_tasks++;
     }
     if (!st.ok()) {
         std::lock_guard<std::mutex> l(_task_mutex);
@@ -783,134 +1475,25 @@ Status PipelineFragmentContext::submit() {
 }
 
 void PipelineFragmentContext::close_sink() {
-    if (_sink) {
-        if (_prepared) {
-            static_cast<void>(
-                    _sink->close(_runtime_state.get(), Status::RuntimeError("prepare failed")));
-        } else {
-            static_cast<void>(_sink->close(_runtime_state.get(), Status::OK()));
+    for (auto& tasks : _tasks) {
+        auto& root_task = *tasks.begin();
+        auto st = root_task->close_sink(_prepared ? Status::RuntimeError("prepare failed")
+                                                  : Status::OK());
+        if (!st.ok()) {
+            LOG_WARNING("PipelineFragmentContext::close_sink() error").tag("msg", st.msg());
         }
     }
 }
 
-void PipelineFragmentContext::close_if_prepare_failed(Status /*st*/) {
-    if (_tasks.empty()) {
-        if (_root_plan) {
-            static_cast<void>(_root_plan->close(_runtime_state.get()));
-        }
-        if (_sink) {
-            static_cast<void>(
-                    _sink->close(_runtime_state.get(), Status::RuntimeError("prepare failed")));
-        }
-    }
+void PipelineFragmentContext::close_if_prepare_failed(Status st) {
     for (auto& task : _tasks) {
-        DCHECK(!task->is_pending_finish());
-        WARN_IF_ERROR(task->close(Status::OK()),
-                      fmt::format("Query {} closed since prepare failed", print_id(_query_id)));
-        close_a_pipeline();
-    }
-}
-
-// construct sink operator
-Status PipelineFragmentContext::_create_sink(int sender_id, const TDataSink& thrift_sink,
-                                             RuntimeState* state) {
-    OperatorBuilderPtr sink_;
-    switch (thrift_sink.type) {
-    case TDataSinkType::DATA_STREAM_SINK: {
-        sink_ = std::make_shared<ExchangeSinkOperatorBuilder>(thrift_sink.stream_sink.dest_node_id,
-                                                              _sink.get());
-        break;
-    }
-    case TDataSinkType::RESULT_SINK: {
-        sink_ = std::make_shared<ResultSinkOperatorBuilder>(next_operator_builder_id(),
-                                                            _sink.get());
-        break;
-    }
-    case TDataSinkType::GROUP_COMMIT_OLAP_TABLE_SINK:
-    case TDataSinkType::OLAP_TABLE_SINK: {
-        DCHECK(thrift_sink.__isset.olap_table_sink);
-        if (state->query_options().enable_memtable_on_sink_node &&
-            !_has_inverted_index_or_partial_update(thrift_sink.olap_table_sink) &&
-            !config::is_cloud_mode()) {
-            sink_ = std::make_shared<OlapTableSinkV2OperatorBuilder>(next_operator_builder_id(),
-                                                                     _sink.get());
-        } else {
-            sink_ = std::make_shared<OlapTableSinkOperatorBuilder>(next_operator_builder_id(),
-                                                                   _sink.get());
-        }
-        break;
-    }
-    case TDataSinkType::GROUP_COMMIT_BLOCK_SINK: {
-        sink_ = std::make_shared<GroupCommitBlockSinkOperatorBuilder>(next_operator_builder_id(),
-                                                                      _sink.get());
-        break;
-    }
-    case TDataSinkType::HIVE_TABLE_SINK: {
-        sink_ = std::make_shared<HiveTableSinkOperatorBuilder>(next_operator_builder_id(),
-                                                               _sink.get());
-        break;
-    }
-    case TDataSinkType::MYSQL_TABLE_SINK:
-    case TDataSinkType::JDBC_TABLE_SINK:
-    case TDataSinkType::ODBC_TABLE_SINK:
-    case TDataSinkType::RESULT_FILE_SINK: {
-        sink_ = std::make_shared<ResultFileSinkOperatorBuilder>(
-                thrift_sink.result_file_sink.dest_node_id, _sink.get());
-        break;
-    }
-    case TDataSinkType::MULTI_CAST_DATA_STREAM_SINK: {
-        sink_ = std::make_shared<MultiCastDataStreamSinkOperatorBuilder>(next_operator_builder_id(),
-                                                                         _sink.get());
-        RETURN_IF_ERROR(_root_pipeline->set_sink_builder(sink_));
-
-        auto& multi_cast_data_streamer =
-                assert_cast<vectorized::MultiCastDataStreamSink*>(_sink.get())
-                        ->get_multi_cast_data_streamer();
-        DCHECK_EQ(thrift_sink.multi_cast_stream_sink.sinks.size(),
-                  thrift_sink.multi_cast_stream_sink.destinations.size());
-        auto sender_size = thrift_sink.multi_cast_stream_sink.sinks.size();
-        _multi_cast_stream_sink_senders.resize(sender_size);
-        for (int i = 0; i < sender_size; ++i) {
-            auto new_pipeline = add_pipeline();
-
-            auto row_desc =
-                    !thrift_sink.multi_cast_stream_sink.sinks[i].output_exprs.empty()
-                            ? RowDescriptor(
-                                      _runtime_state->desc_tbl(),
-                                      {thrift_sink.multi_cast_stream_sink.sinks[i].output_tuple_id},
-                                      {false})
-                            : sink_->row_desc();
-            // 1. create the data stream sender sink
-            _multi_cast_stream_sink_senders[i] = std::make_unique<vectorized::VDataStreamSender>(
-                    _runtime_state.get(), _runtime_state->obj_pool(), sender_id, row_desc,
-                    thrift_sink.multi_cast_stream_sink.sinks[i],
-                    thrift_sink.multi_cast_stream_sink.destinations[i]);
-
-            // 2. create and set the source operator of multi_cast_data_stream_source for new pipeline
-            OperatorBuilderPtr source_op =
-                    std::make_shared<MultiCastDataStreamerSourceOperatorBuilder>(
-                            next_operator_builder_id(), i, multi_cast_data_streamer,
-                            thrift_sink.multi_cast_stream_sink.sinks[i]);
-            static_cast<void>(new_pipeline->add_operator(source_op));
-
-            // 3. create and set sink operator of data stream sender for new pipeline
-            OperatorBuilderPtr sink_op_builder = std::make_shared<ExchangeSinkOperatorBuilder>(
-                    next_operator_builder_id(), _multi_cast_stream_sink_senders[i].get(), i);
-            static_cast<void>(new_pipeline->set_sink_builder(sink_op_builder));
-
-            // 4. init and prepare the data_stream_sender of diff exchange
-            TDataSink t;
-            t.stream_sink = thrift_sink.multi_cast_stream_sink.sinks[i];
-            RETURN_IF_ERROR(_multi_cast_stream_sink_senders[i]->init(t));
-            RETURN_IF_ERROR(_multi_cast_stream_sink_senders[i]->prepare(state));
+        for (auto& t : task) {
+            DCHECK(!t->is_pending_finish());
+            WARN_IF_ERROR(t->close(st), "close_if_prepare_failed failed: ");
+            close_a_pipeline();
         }
-
-        return Status::OK();
-    }
-    default:
-        return Status::InternalError("Unsuported sink type in pipeline: {}", thrift_sink.type);
     }
-    return _root_pipeline->set_sink_builder(sink_);
+    _query_ctx->cancel(st.to_string(), st, _fragment_id);
 }
 
 // If all pipeline tasks binded to the fragment instance are finished, then we could
@@ -921,8 +1504,6 @@ void PipelineFragmentContext::_close_fragment_instance() {
     }
     Defer defer_op {[&]() { _is_fragment_instance_closed = true; }};
     _runtime_profile->total_time_counter()->update(_fragment_watcher.elapsed_time());
-    _runtime_state->runtime_profile()->total_time_counter()->update(
-            _fragment_watcher.elapsed_time());
     static_cast<void>(send_report(true));
     if (_runtime_state->enable_profile()) {
         std::stringstream ss;
@@ -938,9 +1519,15 @@ void PipelineFragmentContext::_close_fragment_instance() {
             _runtime_state->load_channel_profile()->pretty_print(&ss);
         }
 
-        LOG_INFO("Query {} fragment {} instance {} profile:\n {}", print_id(this->_query_id),
-                 this->_fragment_id, print_id(this->get_fragment_instance_id()), ss.str());
+        LOG_INFO("Query {} fragment {} profile:\n {}", print_id(this->_query_id),
+                 this->_fragment_id, ss.str());
     }
+
+    if (_query_ctx->enable_profile()) {
+        _query_ctx->add_fragment_profile_x(_fragment_id, collect_realtime_profile_x(),
+                                           collect_realtime_load_channel_profile_x());
+    }
+
     // all submitted tasks done
     _exec_env->fragment_mgr()->remove_pipeline_context(
             std::dynamic_pointer_cast<PipelineFragmentContext>(shared_from_this()));
@@ -976,31 +1563,100 @@ Status PipelineFragmentContext::send_report(bool done) {
         return Status::NeedSendAgain("");
     }
 
+    std::vector<RuntimeState*> runtime_states;
+
+    for (auto& task_state : _task_runtime_states) {
+        runtime_states.push_back(task_state.get());
+    }
+
+    ReportStatusRequest req {true,
+                             exec_status,
+                             runtime_states,
+                             nullptr,
+                             _runtime_state->load_channel_profile(),
+                             done || !exec_status.ok(),
+                             _query_ctx->coord_addr,
+                             _query_id,
+                             _fragment_id,
+                             TUniqueId(),
+                             -1,
+                             _runtime_state.get(),
+                             [this](Status st) { return update_status(st); },
+                             [this](const PPlanFragmentCancelReason& reason,
+                                    const std::string& msg) { cancel(reason, msg); }};
+
     return _report_status_cb(
-            {false,
-             exec_status,
-             {},
-             _runtime_state->enable_profile() ? _runtime_state->runtime_profile() : nullptr,
-             _runtime_state->enable_profile() ? _runtime_state->load_channel_profile() : nullptr,
-             done || !exec_status.ok(),
-             _query_ctx->coord_addr,
-             _query_id,
-             _fragment_id,
-             _fragment_instance_id,
-             _backend_num,
-             _runtime_state.get(),
-             [this](Status st) { return update_status(st); },
-             [this](const PPlanFragmentCancelReason& reason, const std::string& msg) {
-                 cancel(reason, msg);
-             }},
-            std::dynamic_pointer_cast<PipelineFragmentContext>(shared_from_this()));
+            req, std::dynamic_pointer_cast<PipelineFragmentContext>(shared_from_this()));
 }
 
 std::string PipelineFragmentContext::debug_string() {
     fmt::memory_buffer debug_string_buffer;
-    fmt::format_to(debug_string_buffer, "PipelineFragmentContext Info: QueryId = {}\n",
-                   print_id(_query_ctx->query_id()));
+    fmt::format_to(debug_string_buffer, "PipelineFragmentContext Info:\n");
+    for (size_t j = 0; j < _tasks.size(); j++) {
+        fmt::format_to(debug_string_buffer, "Tasks in instance {}:\n", j);
+        for (size_t i = 0; i < _tasks[j].size(); i++) {
+            fmt::format_to(debug_string_buffer, "Task {}: {}\n", i, _tasks[j][i]->debug_string());
+        }
+    }
+
     return fmt::to_string(debug_string_buffer);
 }
 
+std::vector<std::shared_ptr<TRuntimeProfileTree>>
+PipelineFragmentContext::collect_realtime_profile_x() const {
+    std::vector<std::shared_ptr<TRuntimeProfileTree>> res;
+    DCHECK(_query_ctx->enable_pipeline_x_exec() == true)
+            << fmt::format("Query {} calling a pipeline X function, but its pipeline X is disabled",
+                           print_id(this->_query_id));
+
+    // we do not have mutex to protect pipeline_id_to_profile
+    // so we need to make sure this funciton is invoked after fragment context
+    // has already been prepared.
+    if (!this->_prepared) {
+        std::string msg =
+                "Query " + print_id(this->_query_id) + " collecting profile, but its not prepared";
+        DCHECK(false) << msg;
+        LOG_ERROR(msg);
+        return res;
+    }
+
+    // pipeline_id_to_profile is initialized in prepare stage
+    for (auto& pipeline_profile : _runtime_state->pipeline_id_to_profile()) {
+        auto profile_ptr = std::make_shared<TRuntimeProfileTree>();
+        pipeline_profile->to_thrift(profile_ptr.get());
+        res.push_back(profile_ptr);
+    }
+
+    return res;
+}
+
+std::shared_ptr<TRuntimeProfileTree>
+PipelineFragmentContext::collect_realtime_load_channel_profile_x() const {
+    // we do not have mutex to protect pipeline_id_to_profile
+    // so we need to make sure this funciton is invoked after fragment context
+    // has already been prepared.
+    if (!this->_prepared) {
+        std::string msg =
+                "Query " + print_id(this->_query_id) + " collecting profile, but its not prepared";
+        DCHECK(false) << msg;
+        LOG_ERROR(msg);
+        return nullptr;
+    }
+
+    for (auto& runtime_state : _task_runtime_states) {
+        if (runtime_state->runtime_profile() == nullptr) {
+            continue;
+        }
+
+        auto tmp_load_channel_profile = std::make_shared<TRuntimeProfileTree>();
+
+        runtime_state->runtime_profile()->to_thrift(tmp_load_channel_profile.get());
+        this->_runtime_state->load_channel_profile()->update(*tmp_load_channel_profile);
+    }
+
+    auto load_channel_profile = std::make_shared<TRuntimeProfileTree>();
+    this->_runtime_state->load_channel_profile()->to_thrift(load_channel_profile.get());
+    return load_channel_profile;
+}
+
 } // namespace doris::pipeline
diff --git a/be/src/pipeline/pipeline_fragment_context.h b/be/src/pipeline/pipeline_fragment_context.h
index b9bfcb28f68af2..0c3af6733bccba 100644
--- a/be/src/pipeline/pipeline_fragment_context.h
+++ b/be/src/pipeline/pipeline_fragment_context.h
@@ -31,7 +31,10 @@
 
 #include "common/status.h"
 #include "pipeline/pipeline.h"
+#include "pipeline/pipeline_fragment_context.h"
 #include "pipeline/pipeline_task.h"
+#include "pipeline/pipeline_x/local_exchange/local_exchanger.h"
+#include "pipeline/pipeline_x/pipeline_x_task.h"
 #include "runtime/query_context.h"
 #include "runtime/runtime_state.h"
 #include "runtime/task_execution_context.h"
@@ -39,8 +42,6 @@
 #include "util/stopwatch.hpp"
 
 namespace doris {
-class ExecNode;
-class DataSink;
 struct ReportStatusRequest;
 class ExecEnv;
 class RuntimeFilterMergeControllerEntity;
@@ -49,6 +50,8 @@ class TPipelineFragmentParams;
 
 namespace pipeline {
 
+class Dependency;
+
 class PipelineFragmentContext : public TaskExecutionContext {
 public:
     ENABLE_FACTORY_CREATOR(PipelineFragmentContext);
@@ -61,13 +64,15 @@ class PipelineFragmentContext : public TaskExecutionContext {
     using report_status_callback = std::function<Status(
             const ReportStatusRequest, std::shared_ptr<pipeline::PipelineFragmentContext>&&)>;
     PipelineFragmentContext() = default;
-    PipelineFragmentContext(const TUniqueId& query_id, const TUniqueId& instance_id,
-                            int fragment_id, int backend_num,
+    PipelineFragmentContext(const TUniqueId& query_id, const int fragment_id,
                             std::shared_ptr<QueryContext> query_ctx, ExecEnv* exec_env,
                             const std::function<void(RuntimeState*, Status*)>& call_back,
-                            report_status_callback report_status_cb);
+                            const report_status_callback& report_status_cb);
+
+    ~PipelineFragmentContext();
 
-    ~PipelineFragmentContext() override;
+    std::vector<std::shared_ptr<TRuntimeProfileTree>> collect_realtime_profile_x() const;
+    std::shared_ptr<TRuntimeProfileTree> collect_realtime_load_channel_profile_x() const;
 
     bool is_timeout(const VecDateTimeValue& now) const;
 
@@ -75,32 +80,23 @@ class PipelineFragmentContext : public TaskExecutionContext {
 
     PipelinePtr add_pipeline(PipelinePtr parent, int idx = -1);
 
-    TUniqueId get_fragment_instance_id() const { return _fragment_instance_id; }
-
     RuntimeState* get_runtime_state() { return _runtime_state.get(); }
 
     QueryContext* get_query_ctx() { return _query_ctx.get(); }
     // should be protected by lock?
     [[nodiscard]] bool is_canceled() const { return _runtime_state->is_cancelled(); }
 
-    int32_t next_operator_builder_id() { return _next_operator_builder_id++; }
-
-    Status prepare(const doris::TPipelineFragmentParams& request, size_t idx);
-
-    virtual Status prepare(const doris::TPipelineFragmentParams& request) {
-        return Status::InternalError("Pipeline fragment context do not implement prepare");
-    }
+    Status prepare(const doris::TPipelineFragmentParams& request);
 
-    virtual Status submit();
+    Status submit();
 
-    virtual void close_if_prepare_failed(Status st);
-    virtual void close_sink();
+    void close_if_prepare_failed(Status st);
+    void close_sink();
 
     void set_is_report_success(bool is_report_success) { _is_report_success = is_report_success; }
 
-    virtual void cancel(
-            const PPlanFragmentCancelReason& reason = PPlanFragmentCancelReason::INTERNAL_ERROR,
-            const std::string& msg = "");
+    void cancel(const PPlanFragmentCancelReason& reason = PPlanFragmentCancelReason::INTERNAL_ERROR,
+                const std::string& msg = "");
 
     // TODO: Support pipeline runtime filter
 
@@ -110,10 +106,7 @@ class PipelineFragmentContext : public TaskExecutionContext {
 
     void close_a_pipeline();
 
-    virtual void add_merge_controller_handler(
-            std::shared_ptr<RuntimeFilterMergeControllerEntity>& handler) {}
-
-    virtual Status send_report(bool);
+    Status send_report(bool);
 
     Status update_status(Status status) {
         std::lock_guard<std::mutex> l(_status_lock);
@@ -124,36 +117,94 @@ class PipelineFragmentContext : public TaskExecutionContext {
     }
 
     void trigger_report_if_necessary();
-    virtual void instance_ids(std::vector<TUniqueId>& ins_ids) const {
-        ins_ids.resize(1);
-        ins_ids[0] = _fragment_instance_id;
-    }
-    virtual void instance_ids(std::vector<string>& ins_ids) const {
-        ins_ids.resize(1);
-        ins_ids[0] = print_id(_fragment_instance_id);
-    }
     void refresh_next_report_time();
 
-    virtual std::string debug_string();
+    std::string debug_string();
 
     uint64_t create_time() const { return _create_time; }
 
-protected:
-    Status _create_sink(int sender_id, const TDataSink& t_data_sink, RuntimeState* state);
-    Status _build_pipelines(ExecNode*, PipelinePtr);
-    virtual Status _build_pipeline_tasks(const doris::TPipelineFragmentParams& request);
+    [[nodiscard]] int next_operator_id() { return _operator_id--; }
+
+    [[nodiscard]] int max_operator_id() const { return _operator_id; }
+
+    [[nodiscard]] int next_sink_operator_id() { return _sink_operator_id--; }
+
+    [[nodiscard]] int max_sink_operator_id() const { return _sink_operator_id; }
+
+    void instance_ids(std::vector<TUniqueId>& ins_ids) const {
+        ins_ids.resize(_fragment_instance_ids.size());
+        for (size_t i = 0; i < _fragment_instance_ids.size(); i++) {
+            ins_ids[i] = _fragment_instance_ids[i];
+        }
+    }
+
+    void instance_ids(std::vector<string>& ins_ids) const {
+        ins_ids.resize(_fragment_instance_ids.size());
+        for (size_t i = 0; i < _fragment_instance_ids.size(); i++) {
+            ins_ids[i] = print_id(_fragment_instance_ids[i]);
+        }
+    }
+
+    void add_merge_controller_handler(
+            std::shared_ptr<RuntimeFilterMergeControllerEntity>& handler) {
+        _merge_controller_handlers.emplace_back(handler);
+    }
+
+private:
+    Status _build_pipelines(ObjectPool* pool, const doris::TPipelineFragmentParams& request,
+                            const DescriptorTbl& descs, OperatorXPtr* root, PipelinePtr cur_pipe);
+    Status _create_tree_helper(ObjectPool* pool, const std::vector<TPlanNode>& tnodes,
+                               const doris::TPipelineFragmentParams& request,
+                               const DescriptorTbl& descs, OperatorXPtr parent, int* node_idx,
+                               OperatorXPtr* root, PipelinePtr& cur_pipe, int child_idx);
+
+    Status _create_operator(ObjectPool* pool, const TPlanNode& tnode,
+                            const doris::TPipelineFragmentParams& request,
+                            const DescriptorTbl& descs, OperatorXPtr& op, PipelinePtr& cur_pipe,
+                            int parent_idx, int child_idx);
     template <bool is_intersect>
-    Status _build_operators_for_set_operation_node(ExecNode*, PipelinePtr);
-    virtual void _close_fragment_instance();
+    Status _build_operators_for_set_operation_node(ObjectPool* pool, const TPlanNode& tnode,
+                                                   const DescriptorTbl& descs, OperatorXPtr& op,
+                                                   PipelinePtr& cur_pipe, int parent_idx,
+                                                   int child_idx);
+
+    Status _create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink,
+                             const std::vector<TExpr>& output_exprs,
+                             const TPipelineFragmentParams& params, const RowDescriptor& row_desc,
+                             RuntimeState* state, DescriptorTbl& desc_tbl,
+                             PipelineId cur_pipeline_id);
+    Status _plan_local_exchange(int num_buckets,
+                                const std::map<int, int>& bucket_seq_to_instance_idx,
+                                const std::map<int, int>& shuffle_idx_to_instance_idx);
+    Status _plan_local_exchange(int num_buckets, int pip_idx, PipelinePtr pip,
+                                const std::map<int, int>& bucket_seq_to_instance_idx,
+                                const std::map<int, int>& shuffle_idx_to_instance_idx,
+                                const bool ignore_data_distribution);
+    void _inherit_pipeline_properties(const DataDistribution& data_distribution,
+                                      PipelinePtr pipe_with_source, PipelinePtr pipe_with_sink);
+    Status _add_local_exchange(int pip_idx, int idx, int node_id, ObjectPool* pool,
+                               PipelinePtr cur_pipe, DataDistribution data_distribution,
+                               bool* do_local_exchange, int num_buckets,
+                               const std::map<int, int>& bucket_seq_to_instance_idx,
+                               const std::map<int, int>& shuffle_idx_to_instance_idx,
+                               const bool ignore_data_distribution);
+    Status _add_local_exchange_impl(int idx, ObjectPool* pool, PipelinePtr cur_pipe,
+                                    PipelinePtr new_pip, DataDistribution data_distribution,
+                                    bool* do_local_exchange, int num_buckets,
+                                    const std::map<int, int>& bucket_seq_to_instance_idx,
+                                    const std::map<int, int>& shuffle_idx_to_instance_idx,
+                                    const bool ignore_data_hash_distribution);
+
+    bool _enable_local_shuffle() const { return _runtime_state->enable_local_shuffle(); }
+
+    Status _build_pipeline_tasks(const doris::TPipelineFragmentParams& request);
+    void _close_fragment_instance();
     void _init_next_report_time();
 
     // Id of this query
     TUniqueId _query_id;
-    TUniqueId _fragment_instance_id;
     int _fragment_id;
 
-    int _backend_num;
-
     ExecEnv* _exec_env = nullptr;
 
     std::atomic_bool _prepared = false;
@@ -169,27 +220,16 @@ class PipelineFragmentContext : public TaskExecutionContext {
     // When submit fail, `_total_tasks` is equal to the number of tasks submitted.
     int _total_tasks = 0;
 
-    int32_t _next_operator_builder_id = 10000;
-
-    PipelinePtr _root_pipeline;
-
     std::unique_ptr<RuntimeProfile> _runtime_profile;
     bool _is_report_success = false;
 
     std::unique_ptr<RuntimeState> _runtime_state;
 
-    ExecNode* _root_plan = nullptr; // lives in _runtime_state->obj_pool()
-    // TODO: remove the _sink and _multi_cast_stream_sink_senders to set both
-    // of it in pipeline task not the fragment_context
-    std::unique_ptr<DataSink> _sink;
-    std::vector<std::unique_ptr<DataSink>> _multi_cast_stream_sink_senders;
-
     std::shared_ptr<QueryContext> _query_ctx;
 
     QueryThreadContext _query_thread_context;
 
     MonotonicStopWatch _fragment_watcher;
-    RuntimeProfile::Counter* _start_timer = nullptr;
     RuntimeProfile::Counter* _prepare_timer = nullptr;
 
     std::function<void(RuntimeState*, Status*)> _call_back;
@@ -212,10 +252,78 @@ class PipelineFragmentContext : public TaskExecutionContext {
     VecDateTimeValue _start_time;
     int _timeout = -1;
 
-private:
-    std::vector<std::unique_ptr<PipelineTask>> _tasks;
+    OperatorXPtr _root_op = nullptr;
+    // this is a [n * m] matrix. n is parallelism of pipeline engine and m is the number of pipelines.
+    std::vector<std::vector<std::unique_ptr<PipelineXTask>>> _tasks;
+
+    bool _need_local_merge = false;
+
+    // It is used to manage the lifecycle of RuntimeFilterMergeController
+    std::vector<std::shared_ptr<RuntimeFilterMergeControllerEntity>> _merge_controller_handlers;
+
+    // TODO: remove the _sink and _multi_cast_stream_sink_senders to set both
+    // of it in pipeline task not the fragment_context
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wshadow-field"
+#endif
+    DataSinkOperatorXPtr _sink = nullptr;
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+    // `_dag` manage dependencies between pipelines by pipeline ID. the indices will be blocked by members
+    std::map<PipelineId, std::vector<PipelineId>> _dag;
+
+    // We use preorder traversal to create an operator tree. When we meet a join node, we should
+    // build probe operator and build operator in separate pipelines. To do this, we should build
+    // ProbeSide first, and use `_pipelines_to_build` to store which pipeline the build operator
+    // is in, so we can build BuildSide once we complete probe side.
+    struct pipeline_parent_map {
+        std::map<int, std::vector<PipelinePtr>> _build_side_pipelines;
+        void push(int parent_node_id, PipelinePtr pipeline) {
+            if (!_build_side_pipelines.contains(parent_node_id)) {
+                _build_side_pipelines.insert({parent_node_id, {pipeline}});
+            } else {
+                _build_side_pipelines[parent_node_id].push_back(pipeline);
+            }
+        }
+        void pop(PipelinePtr& cur_pipe, int parent_node_id, int child_idx) {
+            if (!_build_side_pipelines.contains(parent_node_id)) {
+                return;
+            }
+            DCHECK(_build_side_pipelines.contains(parent_node_id));
+            auto& child_pipeline = _build_side_pipelines[parent_node_id];
+            DCHECK(child_idx < child_pipeline.size());
+            cur_pipe = child_pipeline[child_idx];
+        }
+        void clear() { _build_side_pipelines.clear(); }
+    } _pipeline_parent_map;
+
+    std::mutex _state_map_lock;
+
+    int _operator_id = 0;
+    int _sink_operator_id = 0;
+    std::map<int, std::pair<std::shared_ptr<LocalExchangeSharedState>, std::shared_ptr<Dependency>>>
+            _op_id_to_le_state;
+
+    // UniqueId -> runtime mgr
+    std::map<UniqueId, std::unique_ptr<RuntimeFilterMgr>> _runtime_filter_mgr_map;
+
+    //Here are two types of runtime states:
+    //    - _runtime state is at the Fragment level.
+    //    - _task_runtime_states is at the task level, unique to each task.
+
+    std::vector<TUniqueId> _fragment_instance_ids;
+    // Local runtime states for each task
+    std::vector<std::unique_ptr<RuntimeState>> _task_runtime_states;
+
+    std::vector<std::unique_ptr<RuntimeFilterParamsContext>> _runtime_filter_states;
 
+    // Total instance num running on all BEs
+    int _total_instances = -1;
     uint64_t _create_time;
+    bool _require_bucket_distribution = false;
 };
 } // namespace pipeline
 } // namespace doris
\ No newline at end of file
diff --git a/be/src/pipeline/pipeline_task.cpp b/be/src/pipeline/pipeline_task.cpp
index f31a39df31a79d..167ff01fe695ed 100644
--- a/be/src/pipeline/pipeline_task.cpp
+++ b/be/src/pipeline/pipeline_task.cpp
@@ -391,39 +391,7 @@ void PipelineTask::set_state(PipelineTaskState state) {
 }
 
 std::string PipelineTask::debug_string() {
-    fmt::memory_buffer debug_string_buffer;
-
-    fmt::format_to(debug_string_buffer, "QueryId: {}\n", print_id(query_context()->query_id()));
-    fmt::format_to(debug_string_buffer, "InstanceId: {}\n",
-                   print_id(fragment_context()->get_fragment_instance_id()));
-
-    fmt::format_to(debug_string_buffer, "RuntimeUsage: {}\n",
-                   PrettyPrinter::print(get_runtime_ns(), TUnit::TIME_NS));
-    {
-        std::stringstream profile_ss;
-        _fresh_profile_counter();
-        _task_profile->pretty_print(&profile_ss, "");
-        fmt::format_to(debug_string_buffer, "Profile: {}\n", profile_ss.str());
-    }
-    fmt::format_to(debug_string_buffer,
-                   "PipelineTask[this = {}, state = {}]\noperators: ", (void*)this,
-                   get_state_name(_cur_state));
-    for (size_t i = 0; i < _operators.size(); i++) {
-        fmt::format_to(debug_string_buffer, "\n{}{}", std::string(i * 2, ' '),
-                       _operators[i]->debug_string());
-        std::stringstream profile_ss;
-        _operators[i]->get_runtime_profile()->pretty_print(&profile_ss, std::string(i * 2, ' '));
-        fmt::format_to(debug_string_buffer, "\n{}", profile_ss.str());
-    }
-    fmt::format_to(debug_string_buffer, "\n{}{}", std::string(_operators.size() * 2, ' '),
-                   _sink->debug_string());
-    {
-        std::stringstream profile_ss;
-        _sink->get_runtime_profile()->pretty_print(&profile_ss,
-                                                   std::string(_operators.size() * 2, ' '));
-        fmt::format_to(debug_string_buffer, "\n{}", profile_ss.str());
-    }
-    return fmt::to_string(debug_string_buffer);
+    return "";
 }
 
 } // namespace doris::pipeline
diff --git a/be/src/pipeline/pipeline_task.h b/be/src/pipeline/pipeline_task.h
index b9a5cb06ff872d..aabfc91af37a39 100644
--- a/be/src/pipeline/pipeline_task.h
+++ b/be/src/pipeline/pipeline_task.h
@@ -254,8 +254,6 @@ class PipelineTask {
 
     void set_parent_profile(RuntimeProfile* profile) { _parent_profile = profile; }
 
-    virtual bool is_pipelineX() const { return false; }
-
     bool is_running() { return _running.load(); }
     void set_running(bool running) { _running = running; }
 
diff --git a/be/src/pipeline/pipeline_x/operator.cpp b/be/src/pipeline/pipeline_x/operator.cpp
index d5afad15fa0ec0..8b1fb15857107a 100644
--- a/be/src/pipeline/pipeline_x/operator.cpp
+++ b/be/src/pipeline/pipeline_x/operator.cpp
@@ -70,8 +70,6 @@
 #include "pipeline/exec/spill_sort_sink_operator.h"
 #include "pipeline/exec/spill_sort_source_operator.h"
 #include "pipeline/exec/streaming_aggregation_operator.h"
-#include "pipeline/exec/streaming_aggregation_sink_operator.h"
-#include "pipeline/exec/streaming_aggregation_source_operator.h"
 #include "pipeline/exec/table_function_operator.h"
 #include "pipeline/exec/union_sink_operator.h"
 #include "pipeline/exec/union_source_operator.h"
diff --git a/be/src/pipeline/pipeline_x/pipeline_x_fragment_context.cpp b/be/src/pipeline/pipeline_x/pipeline_x_fragment_context.cpp
deleted file mode 100644
index fc0234c62904b6..00000000000000
--- a/be/src/pipeline/pipeline_x/pipeline_x_fragment_context.cpp
+++ /dev/null
@@ -1,1522 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "pipeline_x_fragment_context.h"
-
-#include <gen_cpp/DataSinks_types.h>
-#include <gen_cpp/PaloInternalService_types.h>
-#include <gen_cpp/PlanNodes_types.h>
-#include <gen_cpp/Planner_types.h>
-#include <gen_cpp/RuntimeProfile_types.h>
-#include <pthread.h>
-#include <runtime/result_buffer_mgr.h>
-
-// IWYU pragma: no_include <bits/chrono.h>
-#include <chrono> // IWYU pragma: keep
-#include <map>
-#include <memory>
-#include <ostream>
-#include <utility>
-
-#include "cloud/config.h"
-#include "common/config.h"
-#include "common/logging.h"
-#include "exec/data_sink.h"
-#include "exec/exec_node.h"
-#include "exec/scan_node.h"
-#include "io/fs/stream_load_pipe.h"
-#include "pipeline/exec/aggregation_sink_operator.h"
-#include "pipeline/exec/aggregation_source_operator.h"
-#include "pipeline/exec/analytic_sink_operator.h"
-#include "pipeline/exec/analytic_source_operator.h"
-#include "pipeline/exec/assert_num_rows_operator.h"
-#include "pipeline/exec/datagen_operator.h"
-#include "pipeline/exec/distinct_streaming_aggregation_operator.h"
-#include "pipeline/exec/empty_set_operator.h"
-#include "pipeline/exec/es_scan_operator.h"
-#include "pipeline/exec/exchange_sink_operator.h"
-#include "pipeline/exec/exchange_source_operator.h"
-#include "pipeline/exec/file_scan_operator.h"
-#include "pipeline/exec/group_commit_block_sink_operator.h"
-#include "pipeline/exec/hashjoin_build_sink.h"
-#include "pipeline/exec/hashjoin_probe_operator.h"
-#include "pipeline/exec/hive_table_sink_operator.h"
-#include "pipeline/exec/jdbc_scan_operator.h"
-#include "pipeline/exec/jdbc_table_sink_operator.h"
-#include "pipeline/exec/meta_scan_operator.h"
-#include "pipeline/exec/multi_cast_data_stream_sink.h"
-#include "pipeline/exec/multi_cast_data_stream_source.h"
-#include "pipeline/exec/nested_loop_join_build_operator.h"
-#include "pipeline/exec/nested_loop_join_probe_operator.h"
-#include "pipeline/exec/olap_scan_operator.h"
-#include "pipeline/exec/olap_table_sink_operator.h"
-#include "pipeline/exec/olap_table_sink_v2_operator.h"
-#include "pipeline/exec/partition_sort_sink_operator.h"
-#include "pipeline/exec/partition_sort_source_operator.h"
-#include "pipeline/exec/partitioned_aggregation_sink_operator.h"
-#include "pipeline/exec/partitioned_aggregation_source_operator.h"
-#include "pipeline/exec/partitioned_hash_join_probe_operator.h"
-#include "pipeline/exec/partitioned_hash_join_sink_operator.h"
-#include "pipeline/exec/repeat_operator.h"
-#include "pipeline/exec/result_file_sink_operator.h"
-#include "pipeline/exec/result_sink_operator.h"
-#include "pipeline/exec/schema_scan_operator.h"
-#include "pipeline/exec/select_operator.h"
-#include "pipeline/exec/set_probe_sink_operator.h"
-#include "pipeline/exec/set_sink_operator.h"
-#include "pipeline/exec/set_source_operator.h"
-#include "pipeline/exec/sort_sink_operator.h"
-#include "pipeline/exec/sort_source_operator.h"
-#include "pipeline/exec/spill_sort_sink_operator.h"
-#include "pipeline/exec/spill_sort_source_operator.h"
-#include "pipeline/exec/streaming_aggregation_operator.h"
-#include "pipeline/exec/table_function_operator.h"
-#include "pipeline/exec/union_sink_operator.h"
-#include "pipeline/exec/union_source_operator.h"
-#include "pipeline/pipeline_x/local_exchange/local_exchange_sink_operator.h"
-#include "pipeline/pipeline_x/local_exchange/local_exchange_source_operator.h"
-#include "pipeline/task_scheduler.h"
-#include "runtime/exec_env.h"
-#include "runtime/fragment_mgr.h"
-#include "runtime/runtime_filter_mgr.h"
-#include "runtime/runtime_state.h"
-#include "runtime/stream_load/new_load_stream_mgr.h"
-#include "runtime/stream_load/stream_load_context.h"
-#include "runtime/thread_context.h"
-#include "service/backend_options.h"
-#include "util/container_util.hpp"
-#include "util/debug_util.h"
-#include "util/uid_util.h"
-#include "vec/runtime/vdata_stream_mgr.h"
-
-namespace doris::pipeline {
-
-PipelineXFragmentContext::PipelineXFragmentContext(
-        const TUniqueId& query_id, const int fragment_id, std::shared_ptr<QueryContext> query_ctx,
-        ExecEnv* exec_env, const std::function<void(RuntimeState*, Status*)>& call_back,
-        const report_status_callback& report_status_cb)
-        : PipelineFragmentContext(query_id, TUniqueId(), fragment_id, -1, query_ctx, exec_env,
-                                  call_back, report_status_cb) {}
-
-PipelineXFragmentContext::~PipelineXFragmentContext() {
-    // The memory released by the query end is recorded in the query mem tracker.
-    SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(_query_thread_context.query_mem_tracker);
-    auto st = _query_ctx->exec_status();
-    _tasks.clear();
-    if (!_task_runtime_states.empty()) {
-        for (auto& runtime_state : _task_runtime_states) {
-            _call_back(runtime_state.get(), &st);
-            runtime_state.reset();
-        }
-    }
-    _runtime_state.reset();
-    _runtime_filter_states.clear();
-    _runtime_filter_mgr_map.clear();
-    _op_id_to_le_state.clear();
-}
-
-void PipelineXFragmentContext::cancel(const PPlanFragmentCancelReason& reason,
-                                      const std::string& msg) {
-    {
-        std::lock_guard<std::mutex> l(_task_mutex);
-        if (_closed_tasks == _total_tasks) {
-            // All tasks in this PipelineXFragmentContext already closed.
-            return;
-        }
-    }
-    LOG_INFO("PipelineXFragmentContext::cancel")
-            .tag("query_id", print_id(_query_id))
-            .tag("fragment_id", _fragment_id)
-            .tag("reason", reason)
-            .tag("error message", msg);
-    if (reason == PPlanFragmentCancelReason::TIMEOUT) {
-        LOG(WARNING) << "PipelineXFragmentContext is cancelled due to timeout : " << debug_string();
-    }
-    _query_ctx->cancel(msg, Status::Cancelled(msg), _fragment_id);
-    if (reason == PPlanFragmentCancelReason::LIMIT_REACH) {
-        _is_report_on_cancel = false;
-    } else {
-        for (auto& id : _fragment_instance_ids) {
-            LOG(WARNING) << "PipelineXFragmentContext cancel instance: " << print_id(id);
-        }
-    }
-    // Get pipe from new load stream manager and send cancel to it or the fragment may hang to wait read from pipe
-    // For stream load the fragment's query_id == load id, it is set in FE.
-    auto stream_load_ctx = _exec_env->new_load_stream_mgr()->get(_query_id);
-    if (stream_load_ctx != nullptr) {
-        stream_load_ctx->pipe->cancel(msg);
-    }
-
-    // Cancel the result queue manager used by spark doris connector
-    // TODO pipeline incomp
-    // _exec_env->result_queue_mgr()->update_queue_status(id, Status::Aborted(msg));
-    for (auto& tasks : _tasks) {
-        for (auto& task : tasks) {
-            if (task->is_finished()) {
-                continue;
-            }
-            task->clear_blocking_state();
-        }
-    }
-}
-
-Status PipelineXFragmentContext::prepare(const doris::TPipelineFragmentParams& request) {
-    if (_prepared) {
-        return Status::InternalError("Already prepared");
-    }
-    _num_instances = request.local_params.size();
-    _total_instances = request.__isset.total_instances ? request.total_instances : _num_instances;
-    _runtime_profile = std::make_unique<RuntimeProfile>("PipelineContext");
-    _prepare_timer = ADD_TIMER(_runtime_profile, "PrepareTime");
-    SCOPED_TIMER(_prepare_timer);
-
-    auto* fragment_context = this;
-
-    LOG_INFO("PipelineXFragmentContext::prepare")
-            .tag("query_id", print_id(_query_id))
-            .tag("fragment_id", _fragment_id)
-            .tag("pthread_id", (uintptr_t)pthread_self());
-
-    if (request.query_options.__isset.is_report_success) {
-        fragment_context->set_is_report_success(request.query_options.is_report_success);
-    }
-
-    // 1. Set up the global runtime state.
-    _runtime_state = RuntimeState::create_unique(request.query_id, request.fragment_id,
-                                                 request.query_options, _query_ctx->query_globals,
-                                                 _exec_env, _query_ctx.get());
-
-    SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(_runtime_state->query_mem_tracker());
-    if (request.__isset.backend_id) {
-        _runtime_state->set_backend_id(request.backend_id);
-    }
-    if (request.__isset.import_label) {
-        _runtime_state->set_import_label(request.import_label);
-    }
-    if (request.__isset.db_name) {
-        _runtime_state->set_db_name(request.db_name);
-    }
-    if (request.__isset.load_job_id) {
-        _runtime_state->set_load_job_id(request.load_job_id);
-    }
-
-    if (request.is_simplified_param) {
-        _desc_tbl = _query_ctx->desc_tbl;
-    } else {
-        DCHECK(request.__isset.desc_tbl);
-        RETURN_IF_ERROR(
-                DescriptorTbl::create(_runtime_state->obj_pool(), request.desc_tbl, &_desc_tbl));
-    }
-    _runtime_state->set_desc_tbl(_desc_tbl);
-    _runtime_state->set_num_per_fragment_instances(request.num_senders);
-    _runtime_state->set_load_stream_per_node(request.load_stream_per_node);
-    _runtime_state->set_total_load_streams(request.total_load_streams);
-    _runtime_state->set_num_local_sink(request.num_local_sink);
-
-    const auto& local_params = request.local_params[0];
-    if (local_params.__isset.runtime_filter_params) {
-        _query_ctx->runtime_filter_mgr()->set_runtime_filter_params(
-                local_params.runtime_filter_params);
-    }
-    if (local_params.__isset.topn_filter_source_node_ids) {
-        _query_ctx->init_runtime_predicates(local_params.topn_filter_source_node_ids);
-    } else {
-        _query_ctx->init_runtime_predicates({0});
-    }
-
-    _need_local_merge = request.__isset.parallel_instances;
-
-    // 2. Build pipelines with operators in this fragment.
-    auto root_pipeline = add_pipeline();
-    RETURN_IF_ERROR_OR_CATCH_EXCEPTION(_build_pipelines(
-            _runtime_state->obj_pool(), request, *_query_ctx->desc_tbl, &_root_op, root_pipeline));
-
-    // 3. Create sink operator
-    if (!request.fragment.__isset.output_sink) {
-        return Status::InternalError("No output sink in this fragment!");
-    }
-    RETURN_IF_ERROR_OR_CATCH_EXCEPTION(_create_data_sink(
-            _runtime_state->obj_pool(), request.fragment.output_sink, request.fragment.output_exprs,
-            request, root_pipeline->output_row_desc(), _runtime_state.get(), *_desc_tbl,
-            root_pipeline->id()));
-    RETURN_IF_ERROR(_sink->init(request.fragment.output_sink));
-    RETURN_IF_ERROR(root_pipeline->set_sink(_sink));
-
-    for (PipelinePtr& pipeline : _pipelines) {
-        DCHECK(pipeline->sink_x() != nullptr) << pipeline->operator_xs().size();
-        RETURN_IF_ERROR(pipeline->sink_x()->set_child(pipeline->operator_xs().back()));
-    }
-    if (_enable_local_shuffle()) {
-        RETURN_IF_ERROR(_plan_local_exchange(request.num_buckets,
-                                             request.bucket_seq_to_instance_idx,
-                                             request.shuffle_idx_to_instance_idx));
-    }
-
-    // 4. Initialize global states in pipelines.
-    for (PipelinePtr& pipeline : _pipelines) {
-        pipeline->children().clear();
-        RETURN_IF_ERROR(pipeline->prepare(_runtime_state.get()));
-    }
-
-    // 5. Build pipeline tasks and initialize local state.
-    RETURN_IF_ERROR(_build_pipeline_tasks(request));
-
-    _init_next_report_time();
-
-    _prepared = true;
-    return Status::OK();
-}
-
-Status PipelineXFragmentContext::_plan_local_exchange(
-        int num_buckets, const std::map<int, int>& bucket_seq_to_instance_idx,
-        const std::map<int, int>& shuffle_idx_to_instance_idx) {
-    for (int pip_idx = _pipelines.size() - 1; pip_idx >= 0; pip_idx--) {
-        _pipelines[pip_idx]->init_data_distribution();
-        // Set property if child pipeline is not join operator's child.
-        if (!_pipelines[pip_idx]->children().empty()) {
-            for (auto& child : _pipelines[pip_idx]->children()) {
-                if (child->sink_x()->node_id() ==
-                    _pipelines[pip_idx]->operator_xs().front()->node_id()) {
-                    RETURN_IF_ERROR(_pipelines[pip_idx]->operator_xs().front()->set_child(
-                            child->operator_xs().back()));
-                    _pipelines[pip_idx]->set_data_distribution(child->data_distribution());
-                }
-            }
-        }
-
-        RETURN_IF_ERROR(_plan_local_exchange(
-                _pipelines[pip_idx]->operator_xs().front()->ignore_data_hash_distribution()
-                        ? _num_instances
-                        : num_buckets,
-                pip_idx, _pipelines[pip_idx], bucket_seq_to_instance_idx,
-                shuffle_idx_to_instance_idx,
-                _pipelines[pip_idx]->operator_xs().front()->ignore_data_hash_distribution()));
-    }
-    return Status::OK();
-}
-
-Status PipelineXFragmentContext::_plan_local_exchange(
-        int num_buckets, int pip_idx, PipelinePtr pip,
-        const std::map<int, int>& bucket_seq_to_instance_idx,
-        const std::map<int, int>& shuffle_idx_to_instance_idx,
-        const bool ignore_data_hash_distribution) {
-    int idx = 1;
-    bool do_local_exchange = false;
-    do {
-        auto& ops = pip->operator_xs();
-        do_local_exchange = false;
-        // Plan local exchange for each operator.
-        for (; idx < ops.size();) {
-            if (ops[idx]->required_data_distribution().need_local_exchange()) {
-                RETURN_IF_ERROR(_add_local_exchange(
-                        pip_idx, idx, ops[idx]->node_id(), _runtime_state->obj_pool(), pip,
-                        ops[idx]->required_data_distribution(), &do_local_exchange, num_buckets,
-                        bucket_seq_to_instance_idx, shuffle_idx_to_instance_idx,
-                        ignore_data_hash_distribution));
-            }
-            if (do_local_exchange) {
-                // If local exchange is needed for current operator, we will split this pipeline to
-                // two pipelines by local exchange sink/source. And then we need to process remaining
-                // operators in this pipeline so we set idx to 2 (0 is local exchange source and 1
-                // is current operator was already processed) and continue to plan local exchange.
-                idx = 2;
-                break;
-            }
-            idx++;
-        }
-    } while (do_local_exchange);
-    if (pip->sink_x()->required_data_distribution().need_local_exchange()) {
-        RETURN_IF_ERROR(_add_local_exchange(
-                pip_idx, idx, pip->sink_x()->node_id(), _runtime_state->obj_pool(), pip,
-                pip->sink_x()->required_data_distribution(), &do_local_exchange, num_buckets,
-                bucket_seq_to_instance_idx, shuffle_idx_to_instance_idx,
-                ignore_data_hash_distribution));
-    }
-    return Status::OK();
-}
-
-Status PipelineXFragmentContext::_create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink,
-                                                   const std::vector<TExpr>& output_exprs,
-                                                   const TPipelineFragmentParams& params,
-                                                   const RowDescriptor& row_desc,
-                                                   RuntimeState* state, DescriptorTbl& desc_tbl,
-                                                   PipelineId cur_pipeline_id) {
-    switch (thrift_sink.type) {
-    case TDataSinkType::DATA_STREAM_SINK: {
-        if (!thrift_sink.__isset.stream_sink) {
-            return Status::InternalError("Missing data stream sink.");
-        }
-        _sink.reset(new ExchangeSinkOperatorX(state, row_desc, next_sink_operator_id(),
-                                              thrift_sink.stream_sink, params.destinations));
-        break;
-    }
-    case TDataSinkType::RESULT_SINK: {
-        if (!thrift_sink.__isset.result_sink) {
-            return Status::InternalError("Missing data buffer sink.");
-        }
-
-        // TODO: figure out good buffer size based on size of output row
-        _sink.reset(new ResultSinkOperatorX(next_sink_operator_id(), row_desc, output_exprs,
-                                            thrift_sink.result_sink));
-        break;
-    }
-    case TDataSinkType::GROUP_COMMIT_OLAP_TABLE_SINK:
-    case TDataSinkType::OLAP_TABLE_SINK: {
-        if (state->query_options().enable_memtable_on_sink_node &&
-            !_has_inverted_index_or_partial_update(thrift_sink.olap_table_sink) &&
-            !config::is_cloud_mode()) {
-            _sink.reset(new OlapTableSinkV2OperatorX(pool, next_sink_operator_id(), row_desc,
-                                                     output_exprs));
-        } else {
-            _sink.reset(new OlapTableSinkOperatorX(pool, next_sink_operator_id(), row_desc,
-                                                   output_exprs));
-        }
-        break;
-    }
-    case TDataSinkType::GROUP_COMMIT_BLOCK_SINK: {
-        DCHECK(thrift_sink.__isset.olap_table_sink);
-        _sink.reset(new GroupCommitBlockSinkOperatorX(next_sink_operator_id(), row_desc));
-        break;
-    }
-    case TDataSinkType::HIVE_TABLE_SINK: {
-        if (!thrift_sink.__isset.hive_table_sink) {
-            return Status::InternalError("Missing hive table sink.");
-        }
-        _sink.reset(
-                new HiveTableSinkOperatorX(pool, next_sink_operator_id(), row_desc, output_exprs));
-        break;
-    }
-    case TDataSinkType::JDBC_TABLE_SINK: {
-        if (!thrift_sink.__isset.jdbc_table_sink) {
-            return Status::InternalError("Missing data jdbc sink.");
-        }
-        if (config::enable_java_support) {
-            _sink.reset(
-                    new JdbcTableSinkOperatorX(row_desc, next_sink_operator_id(), output_exprs));
-        } else {
-            return Status::InternalError(
-                    "Jdbc table sink is not enabled, you can change be config "
-                    "enable_java_support to true and restart be.");
-        }
-        break;
-    }
-    case TDataSinkType::RESULT_FILE_SINK: {
-        if (!thrift_sink.__isset.result_file_sink) {
-            return Status::InternalError("Missing result file sink.");
-        }
-
-        // TODO: figure out good buffer size based on size of output row
-        // Result file sink is not the top sink
-        if (params.__isset.destinations && !params.destinations.empty()) {
-            _sink.reset(new ResultFileSinkOperatorX(next_sink_operator_id(), row_desc,
-                                                    thrift_sink.result_file_sink,
-                                                    params.destinations, output_exprs, desc_tbl));
-        } else {
-            _sink.reset(
-                    new ResultFileSinkOperatorX(next_sink_operator_id(), row_desc, output_exprs));
-        }
-        break;
-    }
-    case TDataSinkType::MULTI_CAST_DATA_STREAM_SINK: {
-        DCHECK(thrift_sink.__isset.multi_cast_stream_sink);
-        DCHECK_GT(thrift_sink.multi_cast_stream_sink.sinks.size(), 0);
-        // TODO: figure out good buffer size based on size of output row
-        auto sink_id = next_sink_operator_id();
-        auto sender_size = thrift_sink.multi_cast_stream_sink.sinks.size();
-        // one sink has multiple sources.
-        std::vector<int> sources;
-        for (int i = 0; i < sender_size; ++i) {
-            auto source_id = next_operator_id();
-            sources.push_back(source_id);
-        }
-
-        _sink.reset(new MultiCastDataStreamSinkOperatorX(
-                sink_id, sources, thrift_sink.multi_cast_stream_sink.sinks.size(), pool,
-                thrift_sink.multi_cast_stream_sink, row_desc));
-        for (int i = 0; i < sender_size; ++i) {
-            auto new_pipeline = add_pipeline();
-            RowDescriptor* _row_desc = nullptr;
-            {
-                const auto& tmp_row_desc =
-                        !thrift_sink.multi_cast_stream_sink.sinks[i].output_exprs.empty()
-                                ? RowDescriptor(state->desc_tbl(),
-                                                {thrift_sink.multi_cast_stream_sink.sinks[i]
-                                                         .output_tuple_id},
-                                                {false})
-                                : _sink->row_desc();
-                _row_desc = pool->add(new RowDescriptor(tmp_row_desc));
-            }
-            auto source_id = sources[i];
-            OperatorXPtr source_op;
-            // 1. create and set the source operator of multi_cast_data_stream_source for new pipeline
-            source_op.reset(new MultiCastDataStreamerSourceOperatorX(
-                    i, pool, thrift_sink.multi_cast_stream_sink.sinks[i], row_desc, source_id));
-            RETURN_IF_ERROR(new_pipeline->add_operator(source_op));
-            // 2. create and set sink operator of data stream sender for new pipeline
-
-            DataSinkOperatorXPtr sink_op;
-            sink_op.reset(
-                    new ExchangeSinkOperatorX(state, *_row_desc, next_sink_operator_id(),
-                                              thrift_sink.multi_cast_stream_sink.sinks[i],
-                                              thrift_sink.multi_cast_stream_sink.destinations[i]));
-
-            RETURN_IF_ERROR(new_pipeline->set_sink(sink_op));
-            {
-                TDataSink* t = pool->add(new TDataSink());
-                t->stream_sink = thrift_sink.multi_cast_stream_sink.sinks[i];
-                RETURN_IF_ERROR(sink_op->init(*t));
-            }
-
-            // 3. set dependency dag
-            _dag[new_pipeline->id()].push_back(cur_pipeline_id);
-        }
-        if (sources.empty()) {
-            return Status::InternalError("size of sources must be greater than 0");
-        }
-        break;
-    }
-    default:
-        return Status::InternalError("Unsuported sink type in pipeline: {}", thrift_sink.type);
-    }
-    return Status::OK();
-}
-
-Status PipelineXFragmentContext::_build_pipeline_tasks(
-        const doris::TPipelineFragmentParams& request) {
-    _total_tasks = 0;
-    int target_size = request.local_params.size();
-    _tasks.resize(target_size);
-    auto& pipeline_id_to_profile = _runtime_state->pipeline_id_to_profile();
-    DCHECK(pipeline_id_to_profile.empty());
-    pipeline_id_to_profile.resize(_pipelines.size());
-    {
-        size_t pip_idx = 0;
-        for (auto& pipeline_profile : pipeline_id_to_profile) {
-            pipeline_profile =
-                    std::make_unique<RuntimeProfile>("Pipeline : " + std::to_string(pip_idx));
-            pip_idx++;
-        }
-    }
-
-    for (size_t i = 0; i < target_size; i++) {
-        const auto& local_params = request.local_params[i];
-        auto fragment_instance_id = local_params.fragment_instance_id;
-        _fragment_instance_ids.push_back(fragment_instance_id);
-        std::unique_ptr<RuntimeFilterMgr> runtime_filter_mgr;
-        auto init_runtime_state = [&](std::unique_ptr<RuntimeState>& runtime_state) {
-            runtime_state->set_query_mem_tracker(_query_ctx->query_mem_tracker);
-
-            runtime_state->set_task_execution_context(shared_from_this());
-            runtime_state->set_be_number(local_params.backend_num);
-
-            if (request.__isset.backend_id) {
-                runtime_state->set_backend_id(request.backend_id);
-            }
-            if (request.__isset.import_label) {
-                runtime_state->set_import_label(request.import_label);
-            }
-            if (request.__isset.db_name) {
-                runtime_state->set_db_name(request.db_name);
-            }
-            if (request.__isset.load_job_id) {
-                runtime_state->set_load_job_id(request.load_job_id);
-            }
-
-            runtime_state->set_desc_tbl(_desc_tbl);
-            runtime_state->set_per_fragment_instance_idx(local_params.sender_id);
-            runtime_state->set_num_per_fragment_instances(request.num_senders);
-            runtime_state->resize_op_id_to_local_state(max_operator_id());
-            runtime_state->set_max_operator_id(max_operator_id());
-            runtime_state->set_load_stream_per_node(request.load_stream_per_node);
-            runtime_state->set_total_load_streams(request.total_load_streams);
-            runtime_state->set_num_local_sink(request.num_local_sink);
-            DCHECK(runtime_filter_mgr);
-            runtime_state->set_pipeline_x_runtime_filter_mgr(runtime_filter_mgr.get());
-        };
-
-        auto filterparams = std::make_unique<RuntimeFilterParamsContext>();
-
-        {
-            filterparams->runtime_filter_wait_infinitely =
-                    _runtime_state->runtime_filter_wait_infinitely();
-            filterparams->runtime_filter_wait_time_ms =
-                    _runtime_state->runtime_filter_wait_time_ms();
-            filterparams->enable_pipeline_exec = _runtime_state->enable_pipeline_x_exec();
-            filterparams->execution_timeout = _runtime_state->execution_timeout();
-
-            filterparams->exec_env = ExecEnv::GetInstance();
-            filterparams->query_id.set_hi(_runtime_state->query_id().hi);
-            filterparams->query_id.set_lo(_runtime_state->query_id().lo);
-
-            filterparams->be_exec_version = _runtime_state->be_exec_version();
-            filterparams->query_ctx = _query_ctx.get();
-        }
-
-        // build local_runtime_filter_mgr for each instance
-        runtime_filter_mgr = std::make_unique<RuntimeFilterMgr>(
-                request.query_id, filterparams.get(), _query_ctx->query_mem_tracker);
-
-        filterparams->runtime_filter_mgr = runtime_filter_mgr.get();
-
-        _runtime_filter_states.push_back(std::move(filterparams));
-        std::map<PipelineId, PipelineXTask*> pipeline_id_to_task;
-        auto get_local_exchange_state = [&](PipelinePtr pipeline)
-                -> std::map<int, std::pair<std::shared_ptr<LocalExchangeSharedState>,
-                                           std::shared_ptr<Dependency>>> {
-            std::map<int, std::pair<std::shared_ptr<LocalExchangeSharedState>,
-                                    std::shared_ptr<Dependency>>>
-                    le_state_map;
-            auto source_id = pipeline->operator_xs().front()->operator_id();
-            if (auto iter = _op_id_to_le_state.find(source_id); iter != _op_id_to_le_state.end()) {
-                le_state_map.insert({source_id, iter->second});
-            }
-            for (auto sink_to_source_id : pipeline->sink_x()->dests_id()) {
-                if (auto iter = _op_id_to_le_state.find(sink_to_source_id);
-                    iter != _op_id_to_le_state.end()) {
-                    le_state_map.insert({sink_to_source_id, iter->second});
-                }
-            }
-            return le_state_map;
-        };
-        auto get_task_runtime_state = [&](int task_id) -> RuntimeState* {
-            DCHECK(_task_runtime_states[task_id]);
-            return _task_runtime_states[task_id].get();
-        };
-        for (size_t pip_idx = 0; pip_idx < _pipelines.size(); pip_idx++) {
-            auto& pipeline = _pipelines[pip_idx];
-            if (pipeline->need_to_create_task()) {
-                // build task runtime state
-                _task_runtime_states.push_back(RuntimeState::create_unique(
-                        this, local_params.fragment_instance_id, request.query_id,
-                        request.fragment_id, request.query_options, _query_ctx->query_globals,
-                        _exec_env, _query_ctx.get()));
-                auto& task_runtime_state = _task_runtime_states.back();
-                init_runtime_state(task_runtime_state);
-                auto cur_task_id = _total_tasks++;
-                task_runtime_state->set_task_id(cur_task_id);
-                task_runtime_state->set_task_num(pipeline->num_tasks());
-                auto task = std::make_unique<PipelineXTask>(
-                        pipeline, cur_task_id, get_task_runtime_state(cur_task_id), this,
-                        pipeline_id_to_profile[pip_idx].get(), get_local_exchange_state(pipeline),
-                        i);
-                pipeline_id_to_task.insert({pipeline->id(), task.get()});
-                _tasks[i].emplace_back(std::move(task));
-            }
-        }
-
-        /**
-         * Build DAG for pipeline tasks.
-         * For example, we have
-         *
-         *   ExchangeSink (Pipeline1)     JoinBuildSink (Pipeline2)
-         *            \                      /
-         *          JoinProbeOperator1 (Pipeline1)    JoinBuildSink (Pipeline3)
-         *                 \                          /
-         *               JoinProbeOperator2 (Pipeline1)
-         *
-         * In this fragment, we have three pipelines and pipeline 1 depends on pipeline 2 and pipeline 3.
-         * To build this DAG, `_dag` manage dependencies between pipelines by pipeline ID and
-         * `pipeline_id_to_task` is used to find the task by a unique pipeline ID.
-         *
-         * Finally, we have two upstream dependencies in Pipeline1 corresponding to JoinProbeOperator1
-         * and JoinProbeOperator2.
-         */
-
-        // First, set up the parent profile,task runtime state
-
-        auto prepare_and_set_parent_profile = [&](PipelineXTask* task, size_t pip_idx) {
-            DCHECK(pipeline_id_to_profile[pip_idx]);
-            RETURN_IF_ERROR(
-                    task->prepare(local_params, request.fragment.output_sink, _query_ctx.get()));
-            return Status::OK();
-        };
-
-        for (auto& _pipeline : _pipelines) {
-            if (pipeline_id_to_task.contains(_pipeline->id())) {
-                auto* task = pipeline_id_to_task[_pipeline->id()];
-                DCHECK(task != nullptr);
-
-                // if this task has upstream dependency, then record them.
-                if (_dag.find(_pipeline->id()) != _dag.end()) {
-                    auto& deps = _dag[_pipeline->id()];
-                    for (auto& dep : deps) {
-                        if (pipeline_id_to_task.contains(dep)) {
-                            auto ss = pipeline_id_to_task[dep]->get_sink_shared_state();
-                            if (ss) {
-                                task->inject_shared_state(ss);
-                            } else {
-                                pipeline_id_to_task[dep]->inject_shared_state(
-                                        task->get_source_shared_state());
-                            }
-                        }
-                    }
-                }
-            }
-        }
-        for (size_t pip_idx = 0; pip_idx < _pipelines.size(); pip_idx++) {
-            if (pipeline_id_to_task.contains(_pipelines[pip_idx]->id())) {
-                auto* task = pipeline_id_to_task[_pipelines[pip_idx]->id()];
-                RETURN_IF_ERROR(prepare_and_set_parent_profile(task, pip_idx));
-            }
-        }
-        {
-            std::lock_guard<std::mutex> l(_state_map_lock);
-            _runtime_filter_mgr_map[fragment_instance_id] = std::move(runtime_filter_mgr);
-        }
-    }
-    _pipeline_parent_map.clear();
-    _dag.clear();
-    _op_id_to_le_state.clear();
-
-    return Status::OK();
-}
-
-Status PipelineXFragmentContext::_build_pipelines(ObjectPool* pool,
-                                                  const doris::TPipelineFragmentParams& request,
-                                                  const DescriptorTbl& descs, OperatorXPtr* root,
-                                                  PipelinePtr cur_pipe) {
-    if (request.fragment.plan.nodes.empty()) {
-        throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid plan which has no plan node!");
-    }
-
-    int node_idx = 0;
-
-    cur_pipe->_name.append(std::to_string(cur_pipe->id()));
-
-    RETURN_IF_ERROR(_create_tree_helper(pool, request.fragment.plan.nodes, request, descs, nullptr,
-                                        &node_idx, root, cur_pipe, 0));
-
-    if (node_idx + 1 != request.fragment.plan.nodes.size()) {
-        // TODO: print thrift msg for diagnostic purposes.
-        return Status::InternalError(
-                "Plan tree only partially reconstructed. Not all thrift nodes were used.");
-    }
-
-    return Status::OK();
-}
-
-Status PipelineXFragmentContext::_create_tree_helper(ObjectPool* pool,
-                                                     const std::vector<TPlanNode>& tnodes,
-                                                     const doris::TPipelineFragmentParams& request,
-                                                     const DescriptorTbl& descs,
-                                                     OperatorXPtr parent, int* node_idx,
-                                                     OperatorXPtr* root, PipelinePtr& cur_pipe,
-                                                     int child_idx) {
-    // propagate error case
-    if (*node_idx >= tnodes.size()) {
-        // TODO: print thrift msg
-        return Status::InternalError(
-                "Failed to reconstruct plan tree from thrift. Node id: {}, number of nodes: {}",
-                *node_idx, tnodes.size());
-    }
-    const TPlanNode& tnode = tnodes[*node_idx];
-
-    int num_children = tnodes[*node_idx].num_children;
-    OperatorXPtr op = nullptr;
-    RETURN_IF_ERROR(_create_operator(pool, tnodes[*node_idx], request, descs, op, cur_pipe,
-                                     parent == nullptr ? -1 : parent->node_id(), child_idx));
-
-    // assert(parent != nullptr || (node_idx == 0 && root_expr != nullptr));
-    if (parent != nullptr) {
-        // add to parent's child(s)
-        RETURN_IF_ERROR(parent->set_child(op));
-    } else {
-        *root = op;
-    }
-
-    cur_pipe->_name.push_back('-');
-    cur_pipe->_name.append(std::to_string(op->id()));
-    cur_pipe->_name.append(op->get_name());
-
-    // rely on that tnodes is preorder of the plan
-    for (int i = 0; i < num_children; i++) {
-        ++*node_idx;
-        RETURN_IF_ERROR(_create_tree_helper(pool, tnodes, request, descs, op, node_idx, nullptr,
-                                            cur_pipe, i));
-
-        // we are expecting a child, but have used all nodes
-        // this means we have been given a bad tree and must fail
-        if (*node_idx >= tnodes.size()) {
-            // TODO: print thrift msg
-            return Status::InternalError(
-                    "Failed to reconstruct plan tree from thrift. Node id: {}, number of nodes: {}",
-                    *node_idx, tnodes.size());
-        }
-    }
-
-    RETURN_IF_ERROR(op->init(tnode, _runtime_state.get()));
-
-    return Status::OK();
-}
-
-void PipelineXFragmentContext::_inherit_pipeline_properties(
-        const DataDistribution& data_distribution, PipelinePtr pipe_with_source,
-        PipelinePtr pipe_with_sink) {
-    pipe_with_sink->set_num_tasks(pipe_with_source->num_tasks());
-    pipe_with_source->set_num_tasks(_num_instances);
-    pipe_with_source->set_data_distribution(data_distribution);
-}
-
-Status PipelineXFragmentContext::_add_local_exchange_impl(
-        int idx, ObjectPool* pool, PipelinePtr cur_pipe, PipelinePtr new_pip,
-        DataDistribution data_distribution, bool* do_local_exchange, int num_buckets,
-        const std::map<int, int>& bucket_seq_to_instance_idx,
-        const std::map<int, int>& shuffle_idx_to_instance_idx,
-        const bool ignore_data_hash_distribution) {
-    auto& operator_xs = cur_pipe->operator_xs();
-    const auto downstream_pipeline_id = cur_pipe->id();
-    auto local_exchange_id = next_operator_id();
-    // 1. Create a new pipeline with local exchange sink.
-    DataSinkOperatorXPtr sink;
-    auto sink_id = next_sink_operator_id();
-    const bool is_shuffled_hash_join = operator_xs.size() > idx
-                                               ? operator_xs[idx]->is_shuffled_hash_join()
-                                               : cur_pipe->sink_x()->is_shuffled_hash_join();
-    sink.reset(new LocalExchangeSinkOperatorX(
-            sink_id, local_exchange_id, is_shuffled_hash_join ? _total_instances : _num_instances,
-            data_distribution.partition_exprs, bucket_seq_to_instance_idx));
-    RETURN_IF_ERROR(new_pip->set_sink(sink));
-    RETURN_IF_ERROR(new_pip->sink_x()->init(data_distribution.distribution_type, num_buckets,
-                                            is_shuffled_hash_join, shuffle_idx_to_instance_idx));
-
-    // 2. Create and initialize LocalExchangeSharedState.
-    auto shared_state = LocalExchangeSharedState::create_shared(_num_instances);
-    switch (data_distribution.distribution_type) {
-    case ExchangeType::HASH_SHUFFLE:
-        shared_state->exchanger = ShuffleExchanger::create_unique(
-                std::max(cur_pipe->num_tasks(), _num_instances),
-                is_shuffled_hash_join ? _total_instances : _num_instances);
-        break;
-    case ExchangeType::BUCKET_HASH_SHUFFLE:
-        shared_state->exchanger = BucketShuffleExchanger::create_unique(
-                std::max(cur_pipe->num_tasks(), _num_instances), _num_instances, num_buckets,
-                ignore_data_hash_distribution);
-        break;
-    case ExchangeType::PASSTHROUGH:
-        shared_state->exchanger =
-                PassthroughExchanger::create_unique(cur_pipe->num_tasks(), _num_instances);
-        break;
-    case ExchangeType::BROADCAST:
-        shared_state->exchanger =
-                BroadcastExchanger::create_unique(cur_pipe->num_tasks(), _num_instances);
-        break;
-    case ExchangeType::PASS_TO_ONE:
-        shared_state->exchanger =
-                BroadcastExchanger::create_unique(cur_pipe->num_tasks(), _num_instances);
-        break;
-    case ExchangeType::ADAPTIVE_PASSTHROUGH:
-        shared_state->exchanger =
-                AdaptivePassthroughExchanger::create_unique(cur_pipe->num_tasks(), _num_instances);
-        break;
-    default:
-        return Status::InternalError("Unsupported local exchange type : " +
-                                     std::to_string((int)data_distribution.distribution_type));
-    }
-    auto sink_dep = std::make_shared<Dependency>(sink_id, local_exchange_id,
-                                                 "LOCAL_EXCHANGE_SINK_DEPENDENCY", true,
-                                                 _runtime_state->get_query_ctx());
-    sink_dep->set_shared_state(shared_state.get());
-    shared_state->sink_deps.push_back(sink_dep);
-    _op_id_to_le_state.insert({local_exchange_id, {shared_state, sink_dep}});
-
-    // 3. Set two pipelines' operator list. For example, split pipeline [Scan - AggSink] to
-    // pipeline1 [Scan - LocalExchangeSink] and pipeline2 [LocalExchangeSource - AggSink].
-
-    // 3.1 Initialize new pipeline's operator list.
-    std::copy(operator_xs.begin(), operator_xs.begin() + idx,
-              std::inserter(new_pip->operator_xs(), new_pip->operator_xs().end()));
-
-    // 3.2 Erase unused operators in previous pipeline.
-    operator_xs.erase(operator_xs.begin(), operator_xs.begin() + idx);
-
-    // 4. Initialize LocalExchangeSource and insert it into this pipeline.
-    OperatorXPtr source_op;
-    source_op.reset(new LocalExchangeSourceOperatorX(pool, local_exchange_id));
-    RETURN_IF_ERROR(source_op->set_child(new_pip->operator_xs().back()));
-    RETURN_IF_ERROR(source_op->init(data_distribution.distribution_type));
-    if (!operator_xs.empty()) {
-        RETURN_IF_ERROR(operator_xs.front()->set_child(source_op));
-    }
-    operator_xs.insert(operator_xs.begin(), source_op);
-
-    shared_state->create_source_dependencies(source_op->operator_id(), source_op->node_id(),
-                                             _query_ctx.get());
-
-    // 5. Set children for two pipelines separately.
-    std::vector<std::shared_ptr<Pipeline>> new_children;
-    std::vector<PipelineId> edges_with_source;
-    for (auto child : cur_pipe->children()) {
-        bool found = false;
-        for (auto op : new_pip->operator_xs()) {
-            if (child->sink_x()->node_id() == op->node_id()) {
-                new_pip->set_children(child);
-                found = true;
-            };
-        }
-        if (!found) {
-            new_children.push_back(child);
-            edges_with_source.push_back(child->id());
-        }
-    }
-    new_children.push_back(new_pip);
-    edges_with_source.push_back(new_pip->id());
-
-    // 6. Set DAG for new pipelines.
-    if (!new_pip->children().empty()) {
-        std::vector<PipelineId> edges_with_sink;
-        for (auto child : new_pip->children()) {
-            edges_with_sink.push_back(child->id());
-        }
-        _dag.insert({new_pip->id(), edges_with_sink});
-    }
-    cur_pipe->set_children(new_children);
-    _dag[downstream_pipeline_id] = edges_with_source;
-    RETURN_IF_ERROR(new_pip->sink_x()->set_child(new_pip->operator_xs().back()));
-    RETURN_IF_ERROR(cur_pipe->sink_x()->set_child(cur_pipe->operator_xs().back()));
-
-    // 7. Inherit properties from current pipeline.
-    _inherit_pipeline_properties(data_distribution, cur_pipe, new_pip);
-    return Status::OK();
-}
-
-Status PipelineXFragmentContext::_add_local_exchange(
-        int pip_idx, int idx, int node_id, ObjectPool* pool, PipelinePtr cur_pipe,
-        DataDistribution data_distribution, bool* do_local_exchange, int num_buckets,
-        const std::map<int, int>& bucket_seq_to_instance_idx,
-        const std::map<int, int>& shuffle_idx_to_instance_idx,
-        const bool ignore_data_distribution) {
-    DCHECK(_enable_local_shuffle());
-    if (_num_instances <= 1) {
-        return Status::OK();
-    }
-
-    if (!cur_pipe->need_to_local_exchange(data_distribution)) {
-        return Status::OK();
-    }
-    *do_local_exchange = true;
-
-    auto& operator_xs = cur_pipe->operator_xs();
-    auto total_op_num = operator_xs.size();
-    auto new_pip = add_pipeline(cur_pipe, pip_idx + 1);
-    RETURN_IF_ERROR(_add_local_exchange_impl(
-            idx, pool, cur_pipe, new_pip, data_distribution, do_local_exchange, num_buckets,
-            bucket_seq_to_instance_idx, shuffle_idx_to_instance_idx, ignore_data_distribution));
-
-    CHECK(total_op_num + 1 == cur_pipe->operator_xs().size() + new_pip->operator_xs().size())
-            << "total_op_num: " << total_op_num
-            << " cur_pipe->operator_xs().size(): " << cur_pipe->operator_xs().size()
-            << " new_pip->operator_xs().size(): " << new_pip->operator_xs().size();
-
-    // Add passthrough local exchanger if necessary
-    if (cur_pipe->num_tasks() > 1 && new_pip->num_tasks() == 1 &&
-        Pipeline::is_hash_exchange(data_distribution.distribution_type)) {
-        RETURN_IF_ERROR(_add_local_exchange_impl(
-                new_pip->operator_xs().size(), pool, new_pip, add_pipeline(new_pip, pip_idx + 2),
-                DataDistribution(ExchangeType::PASSTHROUGH), do_local_exchange, num_buckets,
-                bucket_seq_to_instance_idx, shuffle_idx_to_instance_idx, ignore_data_distribution));
-    }
-    return Status::OK();
-}
-
-// NOLINTBEGIN(readability-function-size)
-// NOLINTBEGIN(readability-function-cognitive-complexity)
-Status PipelineXFragmentContext::_create_operator(ObjectPool* pool, const TPlanNode& tnode,
-                                                  const doris::TPipelineFragmentParams& request,
-                                                  const DescriptorTbl& descs, OperatorXPtr& op,
-                                                  PipelinePtr& cur_pipe, int parent_idx,
-                                                  int child_idx) {
-    // We directly construct the operator from Thrift because the given array is in the order of preorder traversal.
-    // Therefore, here we need to use a stack-like structure.
-    _pipeline_parent_map.pop(cur_pipe, parent_idx, child_idx);
-    std::stringstream error_msg;
-
-    switch (tnode.node_type) {
-    case TPlanNodeType::OLAP_SCAN_NODE: {
-        op.reset(new OlapScanOperatorX(pool, tnode, next_operator_id(), descs, _num_instances));
-        RETURN_IF_ERROR(cur_pipe->add_operator(op));
-        if (request.__isset.parallel_instances) {
-            cur_pipe->set_num_tasks(request.parallel_instances);
-            op->set_ignore_data_distribution();
-        }
-        break;
-    }
-    case doris::TPlanNodeType::JDBC_SCAN_NODE: {
-        if (config::enable_java_support) {
-            op.reset(new JDBCScanOperatorX(pool, tnode, next_operator_id(), descs, _num_instances));
-            RETURN_IF_ERROR(cur_pipe->add_operator(op));
-        } else {
-            return Status::InternalError(
-                    "Jdbc scan node is disabled, you can change be config enable_java_support "
-                    "to true and restart be.");
-        }
-        if (request.__isset.parallel_instances) {
-            cur_pipe->set_num_tasks(request.parallel_instances);
-            op->set_ignore_data_distribution();
-        }
-        break;
-    }
-    case doris::TPlanNodeType::FILE_SCAN_NODE: {
-        op.reset(new FileScanOperatorX(pool, tnode, next_operator_id(), descs, _num_instances));
-        RETURN_IF_ERROR(cur_pipe->add_operator(op));
-        if (request.__isset.parallel_instances) {
-            cur_pipe->set_num_tasks(request.parallel_instances);
-            op->set_ignore_data_distribution();
-        }
-        break;
-    }
-    case TPlanNodeType::ES_SCAN_NODE:
-    case TPlanNodeType::ES_HTTP_SCAN_NODE: {
-        op.reset(new EsScanOperatorX(pool, tnode, next_operator_id(), descs, _num_instances));
-        RETURN_IF_ERROR(cur_pipe->add_operator(op));
-        if (request.__isset.parallel_instances) {
-            cur_pipe->set_num_tasks(request.parallel_instances);
-            op->set_ignore_data_distribution();
-        }
-        break;
-    }
-    case TPlanNodeType::EXCHANGE_NODE: {
-        int num_senders = find_with_default(request.per_exch_num_senders, tnode.node_id, 0);
-        DCHECK_GT(num_senders, 0);
-        op.reset(new ExchangeSourceOperatorX(pool, tnode, next_operator_id(), descs, num_senders));
-        RETURN_IF_ERROR(cur_pipe->add_operator(op));
-        if (request.__isset.parallel_instances) {
-            op->set_ignore_data_distribution();
-            cur_pipe->set_num_tasks(request.parallel_instances);
-        }
-        break;
-    }
-    case TPlanNodeType::AGGREGATION_NODE: {
-        if (tnode.agg_node.grouping_exprs.empty() &&
-            descs.get_tuple_descriptor(tnode.agg_node.output_tuple_id)->slots().empty()) {
-            return Status::InternalError("Illegal aggregate node " + std::to_string(tnode.node_id) +
-                                         ": group by and output is empty");
-        }
-        if (tnode.agg_node.aggregate_functions.empty() && !_runtime_state->enable_agg_spill() &&
-            request.query_options.__isset.enable_distinct_streaming_aggregation &&
-            request.query_options.enable_distinct_streaming_aggregation &&
-            !tnode.agg_node.grouping_exprs.empty()) {
-            op.reset(new DistinctStreamingAggOperatorX(pool, next_operator_id(), tnode, descs));
-            RETURN_IF_ERROR(cur_pipe->add_operator(op));
-        } else if (tnode.agg_node.__isset.use_streaming_preaggregation &&
-                   tnode.agg_node.use_streaming_preaggregation &&
-                   !tnode.agg_node.grouping_exprs.empty()) {
-            op.reset(new StreamingAggOperatorX(pool, next_operator_id(), tnode, descs));
-            RETURN_IF_ERROR(cur_pipe->add_operator(op));
-        } else {
-            if (_runtime_state->enable_agg_spill() && !tnode.agg_node.grouping_exprs.empty()) {
-                op.reset(new PartitionedAggSourceOperatorX(pool, tnode, next_operator_id(), descs));
-            } else {
-                op.reset(new AggSourceOperatorX(pool, tnode, next_operator_id(), descs));
-            }
-            RETURN_IF_ERROR(cur_pipe->add_operator(op));
-
-            const auto downstream_pipeline_id = cur_pipe->id();
-            if (_dag.find(downstream_pipeline_id) == _dag.end()) {
-                _dag.insert({downstream_pipeline_id, {}});
-            }
-            cur_pipe = add_pipeline(cur_pipe);
-            _dag[downstream_pipeline_id].push_back(cur_pipe->id());
-
-            DataSinkOperatorXPtr sink;
-            if (_runtime_state->enable_agg_spill() && !tnode.agg_node.grouping_exprs.empty()) {
-                sink.reset(new PartitionedAggSinkOperatorX(pool, next_sink_operator_id(), tnode,
-                                                           descs, _require_bucket_distribution));
-            } else {
-                sink.reset(new AggSinkOperatorX(pool, next_sink_operator_id(), tnode, descs,
-                                                _require_bucket_distribution));
-            }
-            sink->set_dests_id({op->operator_id()});
-            RETURN_IF_ERROR(cur_pipe->set_sink(sink));
-            RETURN_IF_ERROR(cur_pipe->sink_x()->init(tnode, _runtime_state.get()));
-        }
-        _require_bucket_distribution = true;
-        break;
-    }
-    case TPlanNodeType::HASH_JOIN_NODE: {
-        const auto is_broadcast_join = tnode.hash_join_node.__isset.is_broadcast_join &&
-                                       tnode.hash_join_node.is_broadcast_join;
-        const auto enable_join_spill = _runtime_state->enable_join_spill();
-        if (enable_join_spill && !is_broadcast_join) {
-            auto tnode_ = tnode;
-            /// TODO: support rf in partitioned hash join
-            tnode_.runtime_filters.clear();
-            const uint32_t partition_count = 32;
-            auto inner_probe_operator =
-                    std::make_shared<HashJoinProbeOperatorX>(pool, tnode_, 0, descs);
-            auto inner_sink_operator = std::make_shared<HashJoinBuildSinkOperatorX>(
-                    pool, 0, tnode_, descs, _need_local_merge);
-
-            RETURN_IF_ERROR(inner_probe_operator->init(tnode_, _runtime_state.get()));
-            RETURN_IF_ERROR(inner_sink_operator->init(tnode_, _runtime_state.get()));
-
-            auto probe_operator = std::make_shared<PartitionedHashJoinProbeOperatorX>(
-                    pool, tnode_, next_operator_id(), descs, partition_count);
-            probe_operator->set_inner_operators(inner_sink_operator, inner_probe_operator);
-            op = std::move(probe_operator);
-            RETURN_IF_ERROR(cur_pipe->add_operator(op));
-
-            const auto downstream_pipeline_id = cur_pipe->id();
-            if (_dag.find(downstream_pipeline_id) == _dag.end()) {
-                _dag.insert({downstream_pipeline_id, {}});
-            }
-            PipelinePtr build_side_pipe = add_pipeline(cur_pipe);
-            _dag[downstream_pipeline_id].push_back(build_side_pipe->id());
-
-            auto sink_operator = std::make_shared<PartitionedHashJoinSinkOperatorX>(
-                    pool, next_sink_operator_id(), tnode_, descs, _need_local_merge,
-                    partition_count);
-            sink_operator->set_inner_operators(inner_sink_operator, inner_probe_operator);
-            DataSinkOperatorXPtr sink = std::move(sink_operator);
-            sink->set_dests_id({op->operator_id()});
-            RETURN_IF_ERROR(build_side_pipe->set_sink(sink));
-            RETURN_IF_ERROR(build_side_pipe->sink_x()->init(tnode_, _runtime_state.get()));
-
-            _pipeline_parent_map.push(op->node_id(), cur_pipe);
-            _pipeline_parent_map.push(op->node_id(), build_side_pipe);
-        } else {
-            op.reset(new HashJoinProbeOperatorX(pool, tnode, next_operator_id(), descs));
-            RETURN_IF_ERROR(cur_pipe->add_operator(op));
-
-            const auto downstream_pipeline_id = cur_pipe->id();
-            if (_dag.find(downstream_pipeline_id) == _dag.end()) {
-                _dag.insert({downstream_pipeline_id, {}});
-            }
-            PipelinePtr build_side_pipe = add_pipeline(cur_pipe);
-            _dag[downstream_pipeline_id].push_back(build_side_pipe->id());
-
-            DataSinkOperatorXPtr sink;
-            sink.reset(new HashJoinBuildSinkOperatorX(pool, next_sink_operator_id(), tnode, descs,
-                                                      _need_local_merge));
-            sink->set_dests_id({op->operator_id()});
-            RETURN_IF_ERROR(build_side_pipe->set_sink(sink));
-            RETURN_IF_ERROR(build_side_pipe->sink_x()->init(tnode, _runtime_state.get()));
-
-            _pipeline_parent_map.push(op->node_id(), cur_pipe);
-            _pipeline_parent_map.push(op->node_id(), build_side_pipe);
-        }
-        _require_bucket_distribution = true;
-        break;
-    }
-    case TPlanNodeType::CROSS_JOIN_NODE: {
-        op.reset(new NestedLoopJoinProbeOperatorX(pool, tnode, next_operator_id(), descs));
-        RETURN_IF_ERROR(cur_pipe->add_operator(op));
-
-        const auto downstream_pipeline_id = cur_pipe->id();
-        if (_dag.find(downstream_pipeline_id) == _dag.end()) {
-            _dag.insert({downstream_pipeline_id, {}});
-        }
-        PipelinePtr build_side_pipe = add_pipeline(cur_pipe);
-        _dag[downstream_pipeline_id].push_back(build_side_pipe->id());
-
-        DataSinkOperatorXPtr sink;
-        sink.reset(new NestedLoopJoinBuildSinkOperatorX(pool, next_sink_operator_id(), tnode, descs,
-                                                        _need_local_merge));
-        sink->set_dests_id({op->operator_id()});
-        RETURN_IF_ERROR(build_side_pipe->set_sink(sink));
-        RETURN_IF_ERROR(build_side_pipe->sink_x()->init(tnode, _runtime_state.get()));
-        _pipeline_parent_map.push(op->node_id(), cur_pipe);
-        _pipeline_parent_map.push(op->node_id(), build_side_pipe);
-        break;
-    }
-    case TPlanNodeType::UNION_NODE: {
-        int child_count = tnode.num_children;
-        op.reset(new UnionSourceOperatorX(pool, tnode, next_operator_id(), descs));
-        RETURN_IF_ERROR(cur_pipe->add_operator(op));
-
-        const auto downstream_pipeline_id = cur_pipe->id();
-        if (_dag.find(downstream_pipeline_id) == _dag.end()) {
-            _dag.insert({downstream_pipeline_id, {}});
-        }
-        for (int i = 0; i < child_count; i++) {
-            PipelinePtr build_side_pipe = add_pipeline(cur_pipe);
-            _dag[downstream_pipeline_id].push_back(build_side_pipe->id());
-            DataSinkOperatorXPtr sink;
-            sink.reset(new UnionSinkOperatorX(i, next_sink_operator_id(), pool, tnode, descs));
-            sink->set_dests_id({op->operator_id()});
-            RETURN_IF_ERROR(build_side_pipe->set_sink(sink));
-            RETURN_IF_ERROR(build_side_pipe->sink_x()->init(tnode, _runtime_state.get()));
-            // preset children pipelines. if any pipeline found this as its father, will use the prepared pipeline to build.
-            _pipeline_parent_map.push(op->node_id(), build_side_pipe);
-        }
-        break;
-    }
-    case TPlanNodeType::SORT_NODE: {
-        if (_runtime_state->enable_sort_spill()) {
-            op.reset(new SpillSortSourceOperatorX(pool, tnode, next_operator_id(), descs));
-        } else {
-            op.reset(new SortSourceOperatorX(pool, tnode, next_operator_id(), descs));
-        }
-        RETURN_IF_ERROR(cur_pipe->add_operator(op));
-
-        const auto downstream_pipeline_id = cur_pipe->id();
-        if (_dag.find(downstream_pipeline_id) == _dag.end()) {
-            _dag.insert({downstream_pipeline_id, {}});
-        }
-        cur_pipe = add_pipeline(cur_pipe);
-        _dag[downstream_pipeline_id].push_back(cur_pipe->id());
-
-        DataSinkOperatorXPtr sink;
-        if (_runtime_state->enable_sort_spill()) {
-            sink.reset(new SpillSortSinkOperatorX(pool, next_sink_operator_id(), tnode, descs));
-        } else {
-            sink.reset(new SortSinkOperatorX(pool, next_sink_operator_id(), tnode, descs));
-        }
-        sink->set_dests_id({op->operator_id()});
-        RETURN_IF_ERROR(cur_pipe->set_sink(sink));
-        RETURN_IF_ERROR(cur_pipe->sink_x()->init(tnode, _runtime_state.get()));
-        break;
-    }
-    case doris::TPlanNodeType::PARTITION_SORT_NODE: {
-        op.reset(new PartitionSortSourceOperatorX(pool, tnode, next_operator_id(), descs));
-        RETURN_IF_ERROR(cur_pipe->add_operator(op));
-
-        const auto downstream_pipeline_id = cur_pipe->id();
-        if (_dag.find(downstream_pipeline_id) == _dag.end()) {
-            _dag.insert({downstream_pipeline_id, {}});
-        }
-        cur_pipe = add_pipeline(cur_pipe);
-        _dag[downstream_pipeline_id].push_back(cur_pipe->id());
-
-        DataSinkOperatorXPtr sink;
-        sink.reset(new PartitionSortSinkOperatorX(pool, next_sink_operator_id(), tnode, descs));
-        sink->set_dests_id({op->operator_id()});
-        RETURN_IF_ERROR(cur_pipe->set_sink(sink));
-        RETURN_IF_ERROR(cur_pipe->sink_x()->init(tnode, _runtime_state.get()));
-        break;
-    }
-    case TPlanNodeType::ANALYTIC_EVAL_NODE: {
-        op.reset(new AnalyticSourceOperatorX(pool, tnode, next_operator_id(), descs));
-        RETURN_IF_ERROR(cur_pipe->add_operator(op));
-
-        const auto downstream_pipeline_id = cur_pipe->id();
-        if (_dag.find(downstream_pipeline_id) == _dag.end()) {
-            _dag.insert({downstream_pipeline_id, {}});
-        }
-        cur_pipe = add_pipeline(cur_pipe);
-        _dag[downstream_pipeline_id].push_back(cur_pipe->id());
-
-        DataSinkOperatorXPtr sink;
-        sink.reset(new AnalyticSinkOperatorX(pool, next_sink_operator_id(), tnode, descs));
-        sink->set_dests_id({op->operator_id()});
-        RETURN_IF_ERROR(cur_pipe->set_sink(sink));
-        RETURN_IF_ERROR(cur_pipe->sink_x()->init(tnode, _runtime_state.get()));
-        _require_bucket_distribution = true;
-        break;
-    }
-    case TPlanNodeType::INTERSECT_NODE: {
-        RETURN_IF_ERROR(_build_operators_for_set_operation_node<true>(
-                pool, tnode, descs, op, cur_pipe, parent_idx, child_idx));
-        break;
-    }
-    case TPlanNodeType::EXCEPT_NODE: {
-        RETURN_IF_ERROR(_build_operators_for_set_operation_node<false>(
-                pool, tnode, descs, op, cur_pipe, parent_idx, child_idx));
-        break;
-    }
-    case TPlanNodeType::REPEAT_NODE: {
-        op.reset(new RepeatOperatorX(pool, tnode, next_operator_id(), descs));
-        RETURN_IF_ERROR(cur_pipe->add_operator(op));
-        break;
-    }
-    case TPlanNodeType::TABLE_FUNCTION_NODE: {
-        op.reset(new TableFunctionOperatorX(pool, tnode, next_operator_id(), descs));
-        RETURN_IF_ERROR(cur_pipe->add_operator(op));
-        break;
-    }
-    case TPlanNodeType::ASSERT_NUM_ROWS_NODE: {
-        op.reset(new AssertNumRowsOperatorX(pool, tnode, next_operator_id(), descs));
-        RETURN_IF_ERROR(cur_pipe->add_operator(op));
-        break;
-    }
-    case TPlanNodeType::EMPTY_SET_NODE: {
-        op.reset(new EmptySetSourceOperatorX(pool, tnode, next_operator_id(), descs));
-        RETURN_IF_ERROR(cur_pipe->add_operator(op));
-        break;
-    }
-    case TPlanNodeType::DATA_GEN_SCAN_NODE: {
-        op.reset(new DataGenSourceOperatorX(pool, tnode, next_operator_id(), descs));
-        RETURN_IF_ERROR(cur_pipe->add_operator(op));
-        break;
-    }
-    case TPlanNodeType::SCHEMA_SCAN_NODE: {
-        op.reset(new SchemaScanOperatorX(pool, tnode, next_operator_id(), descs));
-        RETURN_IF_ERROR(cur_pipe->add_operator(op));
-        break;
-    }
-    case TPlanNodeType::META_SCAN_NODE: {
-        op.reset(new MetaScanOperatorX(pool, tnode, next_operator_id(), descs));
-        RETURN_IF_ERROR(cur_pipe->add_operator(op));
-        break;
-    }
-    case TPlanNodeType::SELECT_NODE: {
-        op.reset(new SelectOperatorX(pool, tnode, next_operator_id(), descs));
-        RETURN_IF_ERROR(cur_pipe->add_operator(op));
-        break;
-    }
-    default:
-        return Status::InternalError("Unsupported exec type in pipelineX: {}",
-                                     print_plan_node_type(tnode.node_type));
-    }
-
-    _require_bucket_distribution = true;
-
-    return Status::OK();
-}
-// NOLINTEND(readability-function-cognitive-complexity)
-// NOLINTEND(readability-function-size)
-
-template <bool is_intersect>
-Status PipelineXFragmentContext::_build_operators_for_set_operation_node(
-        ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs, OperatorXPtr& op,
-        PipelinePtr& cur_pipe, int parent_idx, int child_idx) {
-    op.reset(new SetSourceOperatorX<is_intersect>(pool, tnode, next_operator_id(), descs));
-    RETURN_IF_ERROR(cur_pipe->add_operator(op));
-
-    const auto downstream_pipeline_id = cur_pipe->id();
-    if (_dag.find(downstream_pipeline_id) == _dag.end()) {
-        _dag.insert({downstream_pipeline_id, {}});
-    }
-
-    for (int child_id = 0; child_id < tnode.num_children; child_id++) {
-        PipelinePtr probe_side_pipe = add_pipeline(cur_pipe);
-        _dag[downstream_pipeline_id].push_back(probe_side_pipe->id());
-
-        DataSinkOperatorXPtr sink;
-        if (child_id == 0) {
-            sink.reset(new SetSinkOperatorX<is_intersect>(child_id, next_sink_operator_id(), pool,
-                                                          tnode, descs));
-        } else {
-            sink.reset(new SetProbeSinkOperatorX<is_intersect>(child_id, next_sink_operator_id(),
-                                                               pool, tnode, descs));
-        }
-        sink->set_dests_id({op->operator_id()});
-        RETURN_IF_ERROR(probe_side_pipe->set_sink(sink));
-        RETURN_IF_ERROR(probe_side_pipe->sink_x()->init(tnode, _runtime_state.get()));
-        // prepare children pipelines. if any pipeline found this as its father, will use the prepared pipeline to build.
-        _pipeline_parent_map.push(op->node_id(), probe_side_pipe);
-    }
-
-    return Status::OK();
-}
-
-Status PipelineXFragmentContext::submit() {
-    if (_submitted) {
-        return Status::InternalError("submitted");
-    }
-    _submitted = true;
-
-    int submit_tasks = 0;
-    Status st;
-    auto* scheduler = _query_ctx->get_pipe_exec_scheduler();
-    for (auto& task : _tasks) {
-        for (auto& t : task) {
-            st = scheduler->schedule_task(t.get());
-            if (!st) {
-                std::lock_guard<std::mutex> l(_status_lock);
-                cancel(PPlanFragmentCancelReason::INTERNAL_ERROR, "submit context fail");
-                _total_tasks = submit_tasks;
-                break;
-            }
-            submit_tasks++;
-        }
-    }
-    if (!st.ok()) {
-        std::lock_guard<std::mutex> l(_task_mutex);
-        if (_closed_tasks == _total_tasks) {
-            _close_fragment_instance();
-        }
-        return Status::InternalError("Submit pipeline failed. err = {}, BE: {}", st.to_string(),
-                                     BackendOptions::get_localhost());
-    } else {
-        return st;
-    }
-}
-
-void PipelineXFragmentContext::close_sink() {
-    for (auto& tasks : _tasks) {
-        auto& root_task = *tasks.begin();
-        auto st = root_task->close_sink(_prepared ? Status::RuntimeError("prepare failed")
-                                                  : Status::OK());
-        if (!st.ok()) {
-            LOG_WARNING("PipelineXFragmentContext::close_sink() error").tag("msg", st.msg());
-        }
-    }
-}
-
-void PipelineXFragmentContext::close_if_prepare_failed(Status st) {
-    for (auto& task : _tasks) {
-        for (auto& t : task) {
-            DCHECK(!t->is_pending_finish());
-            WARN_IF_ERROR(t->close(st), "close_if_prepare_failed failed: ");
-            close_a_pipeline();
-        }
-    }
-    _query_ctx->cancel(st.to_string(), st, _fragment_id);
-}
-
-void PipelineXFragmentContext::_close_fragment_instance() {
-    if (_is_fragment_instance_closed) {
-        return;
-    }
-    Defer defer_op {[&]() { _is_fragment_instance_closed = true; }};
-    _runtime_profile->total_time_counter()->update(_fragment_watcher.elapsed_time());
-    static_cast<void>(send_report(true));
-    if (_runtime_state->enable_profile()) {
-        std::stringstream ss;
-        // Compute the _local_time_percent before pretty_print the runtime_profile
-        // Before add this operation, the print out like that:
-        // UNION_NODE (id=0):(Active: 56.720us, non-child: 00.00%)
-        // After add the operation, the print out like that:
-        // UNION_NODE (id=0):(Active: 56.720us, non-child: 82.53%)
-        // We can easily know the exec node execute time without child time consumed.
-        _runtime_state->runtime_profile()->compute_time_in_profile();
-        _runtime_state->runtime_profile()->pretty_print(&ss);
-        if (_runtime_state->load_channel_profile()) {
-            _runtime_state->load_channel_profile()->pretty_print(&ss);
-        }
-
-        LOG_INFO("Query {} fragment {} profile:\n {}", print_id(this->_query_id),
-                 this->_fragment_id, ss.str());
-    }
-
-    if (_query_ctx->enable_profile()) {
-        _query_ctx->add_fragment_profile_x(_fragment_id, collect_realtime_profile_x(),
-                                           collect_realtime_load_channel_profile_x());
-    }
-
-    // all submitted tasks done
-    _exec_env->fragment_mgr()->remove_pipeline_context(
-            std::dynamic_pointer_cast<PipelineXFragmentContext>(shared_from_this()));
-}
-
-Status PipelineXFragmentContext::send_report(bool done) {
-    Status exec_status = Status::OK();
-    {
-        std::lock_guard<std::mutex> l(_status_lock);
-        exec_status = _query_ctx->exec_status();
-    }
-
-    // If plan is done successfully, but _is_report_success is false,
-    // no need to send report.
-    if (!_is_report_success && done && exec_status.ok()) {
-        return Status::NeedSendAgain("");
-    }
-
-    // If both _is_report_success and _is_report_on_cancel are false,
-    // which means no matter query is success or failed, no report is needed.
-    // This may happen when the query limit reached and
-    // a internal cancellation being processed
-    if (!_is_report_success && !_is_report_on_cancel) {
-        return Status::NeedSendAgain("");
-    }
-
-    std::vector<RuntimeState*> runtime_states;
-
-    for (auto& task_state : _task_runtime_states) {
-        runtime_states.push_back(task_state.get());
-    }
-
-    ReportStatusRequest req {true,
-                             exec_status,
-                             runtime_states,
-                             nullptr,
-                             _runtime_state->load_channel_profile(),
-                             done || !exec_status.ok(),
-                             _query_ctx->coord_addr,
-                             _query_id,
-                             _fragment_id,
-                             TUniqueId(),
-                             _backend_num,
-                             _runtime_state.get(),
-                             [this](Status st) { return update_status(st); },
-                             [this](const PPlanFragmentCancelReason& reason,
-                                    const std::string& msg) { cancel(reason, msg); }};
-
-    return _report_status_cb(
-            req, std::dynamic_pointer_cast<PipelineXFragmentContext>(shared_from_this()));
-}
-
-std::vector<std::shared_ptr<TRuntimeProfileTree>>
-PipelineXFragmentContext::collect_realtime_profile_x() const {
-    std::vector<std::shared_ptr<TRuntimeProfileTree>> res;
-    DCHECK(_query_ctx->enable_pipeline_x_exec() == true)
-            << fmt::format("Query {} calling a pipeline X function, but its pipeline X is disabled",
-                           print_id(this->_query_id));
-
-    // we do not have mutex to protect pipeline_id_to_profile
-    // so we need to make sure this funciton is invoked after fragment context
-    // has already been prepared.
-    if (!this->_prepared) {
-        std::string msg =
-                "Query " + print_id(this->_query_id) + " collecting profile, but its not prepared";
-        DCHECK(false) << msg;
-        LOG_ERROR(msg);
-        return res;
-    }
-
-    // pipeline_id_to_profile is initialized in prepare stage
-    for (auto& pipeline_profile : _runtime_state->pipeline_id_to_profile()) {
-        auto profile_ptr = std::make_shared<TRuntimeProfileTree>();
-        pipeline_profile->to_thrift(profile_ptr.get());
-        res.push_back(profile_ptr);
-    }
-
-    return res;
-}
-
-std::shared_ptr<TRuntimeProfileTree>
-PipelineXFragmentContext::collect_realtime_load_channel_profile_x() const {
-    // we do not have mutex to protect pipeline_id_to_profile
-    // so we need to make sure this funciton is invoked after fragment context
-    // has already been prepared.
-    if (!this->_prepared) {
-        std::string msg =
-                "Query " + print_id(this->_query_id) + " collecting profile, but its not prepared";
-        DCHECK(false) << msg;
-        LOG_ERROR(msg);
-        return nullptr;
-    }
-
-    for (auto& runtime_state : _task_runtime_states) {
-        if (runtime_state->runtime_profile() == nullptr) {
-            continue;
-        }
-
-        auto tmp_load_channel_profile = std::make_shared<TRuntimeProfileTree>();
-
-        runtime_state->runtime_profile()->to_thrift(tmp_load_channel_profile.get());
-        this->_runtime_state->load_channel_profile()->update(*tmp_load_channel_profile);
-    }
-
-    auto load_channel_profile = std::make_shared<TRuntimeProfileTree>();
-    this->_runtime_state->load_channel_profile()->to_thrift(load_channel_profile.get());
-    return load_channel_profile;
-}
-
-std::string PipelineXFragmentContext::debug_string() {
-    fmt::memory_buffer debug_string_buffer;
-    fmt::format_to(debug_string_buffer, "PipelineXFragmentContext Info:\n");
-    for (size_t j = 0; j < _tasks.size(); j++) {
-        fmt::format_to(debug_string_buffer, "Tasks in instance {}:\n", j);
-        for (size_t i = 0; i < _tasks[j].size(); i++) {
-            fmt::format_to(debug_string_buffer, "Task {}: {}\n", i, _tasks[j][i]->debug_string());
-        }
-    }
-
-    return fmt::to_string(debug_string_buffer);
-}
-} // namespace doris::pipeline
diff --git a/be/src/pipeline/pipeline_x/pipeline_x_fragment_context.h b/be/src/pipeline/pipeline_x/pipeline_x_fragment_context.h
deleted file mode 100644
index c87f8f4f784051..00000000000000
--- a/be/src/pipeline/pipeline_x/pipeline_x_fragment_context.h
+++ /dev/null
@@ -1,247 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <gen_cpp/Types_types.h>
-#include <gen_cpp/types.pb.h>
-#include <glog/logging.h>
-#include <stddef.h>
-#include <stdint.h>
-
-#include <atomic>
-#include <condition_variable>
-#include <functional>
-#include <future>
-#include <memory>
-#include <mutex>
-#include <string>
-#include <vector>
-
-#include "common/status.h"
-#include "pipeline/pipeline.h"
-#include "pipeline/pipeline_fragment_context.h"
-#include "pipeline/pipeline_task.h"
-#include "pipeline/pipeline_x/local_exchange/local_exchanger.h"
-#include "pipeline/pipeline_x/pipeline_x_task.h"
-#include "runtime/query_context.h"
-#include "runtime/runtime_state.h"
-#include "util/runtime_profile.h"
-#include "util/stopwatch.hpp"
-
-namespace doris {
-class ExecNode;
-class DataSink;
-struct ReportStatusRequest;
-class ExecEnv;
-class RuntimeFilterMergeControllerEntity;
-class TDataSink;
-class TPipelineFragmentParams;
-
-namespace pipeline {
-class Dependency;
-
-class PipelineXFragmentContext : public PipelineFragmentContext {
-public:
-    // Callback to report execution status of plan fragment.
-    // 'profile' is the cumulative profile, 'done' indicates whether the execution
-    // is done or still continuing.
-    // Note: this does not take a const RuntimeProfile&, because it might need to call
-    // functions like PrettyPrint() or to_thrift(), neither of which is const
-    // because they take locks.
-    PipelineXFragmentContext(const TUniqueId& query_id, const int fragment_id,
-                             std::shared_ptr<QueryContext> query_ctx, ExecEnv* exec_env,
-                             const std::function<void(RuntimeState*, Status*)>& call_back,
-                             const report_status_callback& report_status_cb);
-
-    ~PipelineXFragmentContext() override;
-
-    void instance_ids(std::vector<TUniqueId>& ins_ids) const override {
-        ins_ids.resize(_fragment_instance_ids.size());
-        for (size_t i = 0; i < _fragment_instance_ids.size(); i++) {
-            ins_ids[i] = _fragment_instance_ids[i];
-        }
-    }
-
-    void instance_ids(std::vector<string>& ins_ids) const override {
-        ins_ids.resize(_fragment_instance_ids.size());
-        for (size_t i = 0; i < _fragment_instance_ids.size(); i++) {
-            ins_ids[i] = print_id(_fragment_instance_ids[i]);
-        }
-    }
-
-    void add_merge_controller_handler(
-            std::shared_ptr<RuntimeFilterMergeControllerEntity>& handler) override {
-        _merge_controller_handlers.emplace_back(handler);
-    }
-
-    //    bool is_canceled() const { return _runtime_state->is_cancelled(); }
-
-    // Prepare global information including global states and the unique operator tree shared by all pipeline tasks.
-    Status prepare(const doris::TPipelineFragmentParams& request) override;
-
-    Status submit() override;
-
-    void close_if_prepare_failed(Status st) override;
-    void close_sink() override;
-
-    void cancel(const PPlanFragmentCancelReason& reason = PPlanFragmentCancelReason::INTERNAL_ERROR,
-                const std::string& msg = "") override;
-
-    Status send_report(bool) override;
-
-    [[nodiscard]] int next_operator_id() { return _operator_id--; }
-
-    [[nodiscard]] int max_operator_id() const { return _operator_id; }
-
-    [[nodiscard]] int next_sink_operator_id() { return _sink_operator_id--; }
-
-    [[nodiscard]] int max_sink_operator_id() const { return _sink_operator_id; }
-
-    std::vector<std::shared_ptr<TRuntimeProfileTree>> collect_realtime_profile_x() const;
-    std::shared_ptr<TRuntimeProfileTree> collect_realtime_load_channel_profile_x() const;
-
-    std::string debug_string() override;
-
-private:
-    void _close_fragment_instance() override;
-    Status _build_pipeline_tasks(const doris::TPipelineFragmentParams& request) override;
-    Status _add_local_exchange(int pip_idx, int idx, int node_id, ObjectPool* pool,
-                               PipelinePtr cur_pipe, DataDistribution data_distribution,
-                               bool* do_local_exchange, int num_buckets,
-                               const std::map<int, int>& bucket_seq_to_instance_idx,
-                               const std::map<int, int>& shuffle_idx_to_instance_idx,
-                               const bool ignore_data_distribution);
-    void _inherit_pipeline_properties(const DataDistribution& data_distribution,
-                                      PipelinePtr pipe_with_source, PipelinePtr pipe_with_sink);
-    Status _add_local_exchange_impl(int idx, ObjectPool* pool, PipelinePtr cur_pipe,
-                                    PipelinePtr new_pipe, DataDistribution data_distribution,
-                                    bool* do_local_exchange, int num_buckets,
-                                    const std::map<int, int>& bucket_seq_to_instance_idx,
-                                    const std::map<int, int>& shuffle_idx_to_instance_idx,
-                                    const bool ignore_data_distribution);
-
-    [[nodiscard]] Status _build_pipelines(ObjectPool* pool,
-                                          const doris::TPipelineFragmentParams& request,
-                                          const DescriptorTbl& descs, OperatorXPtr* root,
-                                          PipelinePtr cur_pipe);
-    Status _create_tree_helper(ObjectPool* pool, const std::vector<TPlanNode>& tnodes,
-                               const doris::TPipelineFragmentParams& request,
-                               const DescriptorTbl& descs, OperatorXPtr parent, int* node_idx,
-                               OperatorXPtr* root, PipelinePtr& cur_pipe, int child_idx);
-
-    Status _create_operator(ObjectPool* pool, const TPlanNode& tnode,
-                            const doris::TPipelineFragmentParams& request,
-                            const DescriptorTbl& descs, OperatorXPtr& op, PipelinePtr& cur_pipe,
-                            int parent_idx, int child_idx);
-    template <bool is_intersect>
-    Status _build_operators_for_set_operation_node(ObjectPool* pool, const TPlanNode& tnode,
-                                                   const DescriptorTbl& descs, OperatorXPtr& op,
-                                                   PipelinePtr& cur_pipe, int parent_idx,
-                                                   int child_idx);
-
-    Status _create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink,
-                             const std::vector<TExpr>& output_exprs,
-                             const TPipelineFragmentParams& params, const RowDescriptor& row_desc,
-                             RuntimeState* state, DescriptorTbl& desc_tbl,
-                             PipelineId cur_pipeline_id);
-    Status _plan_local_exchange(int num_buckets,
-                                const std::map<int, int>& bucket_seq_to_instance_idx,
-                                const std::map<int, int>& shuffle_idx_to_instance_idx);
-    Status _plan_local_exchange(int num_buckets, int pip_idx, PipelinePtr pip,
-                                const std::map<int, int>& bucket_seq_to_instance_idx,
-                                const std::map<int, int>& shuffle_idx_to_instance_idx,
-                                const bool ignore_data_distribution);
-
-    bool _enable_local_shuffle() const { return _runtime_state->enable_local_shuffle(); }
-
-    OperatorXPtr _root_op = nullptr;
-    // this is a [n * m] matrix. n is parallelism of pipeline engine and m is the number of pipelines.
-    std::vector<std::vector<std::unique_ptr<PipelineXTask>>> _tasks;
-
-    bool _need_local_merge = false;
-
-    // It is used to manage the lifecycle of RuntimeFilterMergeController
-    std::vector<std::shared_ptr<RuntimeFilterMergeControllerEntity>> _merge_controller_handlers;
-
-    // TODO: remove the _sink and _multi_cast_stream_sink_senders to set both
-    // of it in pipeline task not the fragment_context
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wshadow-field"
-#endif
-    DataSinkOperatorXPtr _sink = nullptr;
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
-
-    // `_dag` manage dependencies between pipelines by pipeline ID. the indices will be blocked by members
-    std::map<PipelineId, std::vector<PipelineId>> _dag;
-
-    // We use preorder traversal to create an operator tree. When we meet a join node, we should
-    // build probe operator and build operator in separate pipelines. To do this, we should build
-    // ProbeSide first, and use `_pipelines_to_build` to store which pipeline the build operator
-    // is in, so we can build BuildSide once we complete probe side.
-    struct pipeline_parent_map {
-        std::map<int, std::vector<PipelinePtr>> _build_side_pipelines;
-        void push(int parent_node_id, PipelinePtr pipeline) {
-            if (!_build_side_pipelines.contains(parent_node_id)) {
-                _build_side_pipelines.insert({parent_node_id, {pipeline}});
-            } else {
-                _build_side_pipelines[parent_node_id].push_back(pipeline);
-            }
-        }
-        void pop(PipelinePtr& cur_pipe, int parent_node_id, int child_idx) {
-            if (!_build_side_pipelines.contains(parent_node_id)) {
-                return;
-            }
-            DCHECK(_build_side_pipelines.contains(parent_node_id));
-            auto& child_pipeline = _build_side_pipelines[parent_node_id];
-            DCHECK(child_idx < child_pipeline.size());
-            cur_pipe = child_pipeline[child_idx];
-        }
-        void clear() { _build_side_pipelines.clear(); }
-    } _pipeline_parent_map;
-
-    std::mutex _state_map_lock;
-
-    int _operator_id = 0;
-    int _sink_operator_id = 0;
-    std::map<int, std::pair<std::shared_ptr<LocalExchangeSharedState>, std::shared_ptr<Dependency>>>
-            _op_id_to_le_state;
-
-    // UniqueId -> runtime mgr
-    std::map<UniqueId, std::unique_ptr<RuntimeFilterMgr>> _runtime_filter_mgr_map;
-
-    //Here are two types of runtime states:
-    //    - _runtime state is at the Fragment level.
-    //    - _task_runtime_states is at the task level, unique to each task.
-
-    std::vector<TUniqueId> _fragment_instance_ids;
-    // Local runtime states for each task
-    std::vector<std::unique_ptr<RuntimeState>> _task_runtime_states;
-
-    std::vector<std::unique_ptr<RuntimeFilterParamsContext>> _runtime_filter_states;
-
-    // Total instance num running on all BEs
-    int _total_instances = -1;
-
-    bool _require_bucket_distribution = false;
-};
-
-} // namespace pipeline
-} // namespace doris
diff --git a/be/src/pipeline/pipeline_x/pipeline_x_task.cpp b/be/src/pipeline/pipeline_x/pipeline_x_task.cpp
index 6ffc5571b4e563..e8957318a6d66b 100644
--- a/be/src/pipeline/pipeline_x/pipeline_x_task.cpp
+++ b/be/src/pipeline/pipeline_x/pipeline_x_task.cpp
@@ -29,8 +29,8 @@
 #include "pipeline/exec/operator.h"
 #include "pipeline/exec/scan_operator.h"
 #include "pipeline/pipeline.h"
+#include "pipeline/pipeline_fragment_context.h"
 #include "pipeline/task_queue.h"
-#include "pipeline_x_fragment_context.h"
 #include "runtime/descriptors.h"
 #include "runtime/query_context.h"
 #include "runtime/thread_context.h"
diff --git a/be/src/pipeline/pipeline_x/pipeline_x_task.h b/be/src/pipeline/pipeline_x/pipeline_x_task.h
index 1f3dd9c3b71f5f..c1a72836fd8720 100644
--- a/be/src/pipeline/pipeline_x/pipeline_x_task.h
+++ b/be/src/pipeline/pipeline_x/pipeline_x_task.h
@@ -126,8 +126,6 @@ class PipelineXTask : public PipelineTask {
         return _op_shared_states[id].get();
     }
 
-    bool is_pipelineX() const override { return true; }
-
     void wake_up();
 
     DataSinkOperatorXPtr sink() const { return _sink; }
diff --git a/be/src/pipeline/task_scheduler.cpp b/be/src/pipeline/task_scheduler.cpp
index 8981a7e621c463..0461999d185260 100644
--- a/be/src/pipeline/task_scheduler.cpp
+++ b/be/src/pipeline/task_scheduler.cpp
@@ -48,156 +48,6 @@
 
 namespace doris::pipeline {
 
-BlockedTaskScheduler::BlockedTaskScheduler(std::string name)
-        : _name(std::move(name)), _started(false), _shutdown(false) {}
-
-Status BlockedTaskScheduler::start() {
-    LOG(INFO) << "BlockedTaskScheduler start";
-    RETURN_IF_ERROR(Thread::create(
-            "BlockedTaskScheduler", _name, [this]() { this->_schedule(); }, &_thread));
-    while (!this->_started.load()) {
-        std::this_thread::sleep_for(std::chrono::milliseconds(5));
-    }
-    LOG(INFO) << "BlockedTaskScheduler started";
-    return Status::OK();
-}
-
-void BlockedTaskScheduler::shutdown() {
-    LOG(INFO) << "Start shutdown BlockedTaskScheduler";
-    if (!this->_shutdown) {
-        this->_shutdown = true;
-        if (_thread) {
-            _task_cond.notify_one();
-            _thread->join();
-        }
-    }
-}
-
-Status BlockedTaskScheduler::add_blocked_task(PipelineTask* task) {
-    if (this->_shutdown) {
-        return Status::InternalError("BlockedTaskScheduler shutdown");
-    }
-    std::unique_lock<std::mutex> lock(_task_mutex);
-    if (task->is_pipelineX()) {
-        // put this task into current dependency's blocking queue and wait for event notification
-        // instead of using a separate BlockedTaskScheduler.
-        task->set_running(false);
-        return Status::OK();
-    }
-    _blocked_tasks.push_back(task);
-    _task_cond.notify_one();
-    task->set_running(false);
-    return Status::OK();
-}
-
-void BlockedTaskScheduler::_schedule() {
-    _started.store(true);
-    std::list<PipelineTask*> local_blocked_tasks;
-    int empty_times = 0;
-
-    while (!_shutdown) {
-        {
-            std::unique_lock<std::mutex> lock(this->_task_mutex);
-            local_blocked_tasks.splice(local_blocked_tasks.end(), _blocked_tasks);
-            if (local_blocked_tasks.empty()) {
-                while (!_shutdown.load() && _blocked_tasks.empty()) {
-                    _task_cond.wait_for(lock, std::chrono::milliseconds(10));
-                }
-
-                if (_shutdown.load()) {
-                    break;
-                }
-
-                DCHECK(!_blocked_tasks.empty());
-                local_blocked_tasks.splice(local_blocked_tasks.end(), _blocked_tasks);
-            }
-        }
-
-        auto origin_local_block_tasks_size = local_blocked_tasks.size();
-        auto iter = local_blocked_tasks.begin();
-        VecDateTimeValue now = VecDateTimeValue::local_time();
-        while (iter != local_blocked_tasks.end()) {
-            auto* task = *iter;
-            auto state = task->get_state();
-            task->log_detail_if_need();
-            if (state == PipelineTaskState::PENDING_FINISH) {
-                // should cancel or should finish
-                if (task->is_pending_finish()) {
-                    VLOG_DEBUG << "Task pending" << task->debug_string();
-                    iter++;
-                } else {
-                    _make_task_run(local_blocked_tasks, iter, PipelineTaskState::PENDING_FINISH);
-                }
-            } else if (task->query_context()->is_cancelled()) {
-                _make_task_run(local_blocked_tasks, iter);
-            } else if (task->query_context()->is_timeout(now)) {
-                LOG(WARNING) << "Timeout, query_id=" << print_id(task->query_context()->query_id())
-                             << ", instance_id=" << print_id(task->instance_id())
-                             << ", task info: " << task->debug_string();
-
-                task->query_context()->cancel("", Status::Cancelled(""));
-                _make_task_run(local_blocked_tasks, iter);
-            } else if (state == PipelineTaskState::BLOCKED_FOR_DEPENDENCY) {
-                if (task->has_dependency()) {
-                    iter++;
-                } else {
-                    _make_task_run(local_blocked_tasks, iter);
-                }
-            } else if (state == PipelineTaskState::BLOCKED_FOR_SOURCE) {
-                if (task->source_can_read()) {
-                    _make_task_run(local_blocked_tasks, iter);
-                } else {
-                    iter++;
-                }
-            } else if (state == PipelineTaskState::BLOCKED_FOR_RF) {
-                if (task->runtime_filters_are_ready_or_timeout()) {
-                    _make_task_run(local_blocked_tasks, iter);
-                } else {
-                    iter++;
-                }
-            } else if (state == PipelineTaskState::BLOCKED_FOR_SINK) {
-                if (task->sink_can_write()) {
-                    _make_task_run(local_blocked_tasks, iter);
-                } else {
-                    iter++;
-                }
-            } else {
-                // TODO: DCHECK the state
-                _make_task_run(local_blocked_tasks, iter);
-            }
-        }
-
-        if (origin_local_block_tasks_size == 0 ||
-            local_blocked_tasks.size() == origin_local_block_tasks_size) {
-            empty_times += 1;
-        } else {
-            empty_times = 0;
-        }
-
-        if (empty_times != 0 && (empty_times & (EMPTY_TIMES_TO_YIELD - 1)) == 0) {
-#ifdef __x86_64__
-            _mm_pause();
-#else
-            sched_yield();
-#endif
-        }
-        if (empty_times == EMPTY_TIMES_TO_YIELD * 10) {
-            empty_times = 0;
-            sched_yield();
-        }
-    }
-    LOG(INFO) << "BlockedTaskScheduler schedule thread stop";
-}
-
-void BlockedTaskScheduler::_make_task_run(std::list<PipelineTask*>& local_tasks,
-                                          std::list<PipelineTask*>::iterator& task_itr,
-                                          PipelineTaskState t_state) {
-    auto* task = *task_itr;
-    task->set_state(t_state);
-    local_tasks.erase(task_itr++);
-    static_cast<void>(task->get_task_queue()->push_back(task));
-}
-
 TaskScheduler::~TaskScheduler() {
     stop();
     LOG(INFO) << "Task scheduler " << _name << " shutdown";
@@ -241,13 +91,8 @@ void _close_task(PipelineTask* task, PipelineTaskState state, Status exec_status
     // for pending finish now. So that could call close directly.
     Status status = task->close(exec_status);
     if (!status.ok() && state != PipelineTaskState::CANCELED) {
-        if (task->is_pipelineX()) { //should call fragment context cancel, in it will call query context cancel
-            task->fragment_context()->cancel(PPlanFragmentCancelReason::INTERNAL_ERROR,
-                                             std::string(status.msg()));
-        } else {
-            task->query_context()->cancel(status.to_string(),
-                                          Status::Cancelled(status.to_string()));
-        }
+        task->fragment_context()->cancel(PPlanFragmentCancelReason::INTERNAL_ERROR,
+                                         std::string(status.msg()));
         state = PipelineTaskState::CANCELED;
     }
     task->set_state(state);
@@ -264,7 +109,7 @@ void TaskScheduler::_do_work(size_t index) {
         if (!task) {
             continue;
         }
-        if (task->is_pipelineX() && task->is_running()) {
+        if (task->is_running()) {
             static_cast<void>(_task_queue->push_back(task, index));
             continue;
         }
@@ -278,8 +123,6 @@ void TaskScheduler::_do_work(size_t index) {
         // If the state is PENDING_FINISH, then the task is come from blocked queue, its is_pending_finish
         // has to return false. The task is finished and need to close now.
         if (state == PipelineTaskState::PENDING_FINISH) {
-            DCHECK(task->is_pipelineX() || !task->is_pending_finish())
-                    << "must not pending close " << task->debug_string();
             Status exec_status = fragment_ctx->get_query_ctx()->exec_status();
             _close_task(task, canceled ? PipelineTaskState::CANCELED : PipelineTaskState::FINISHED,
                         exec_status);
@@ -301,13 +144,8 @@ void TaskScheduler::_do_work(size_t index) {
             continue;
         }
 
-        if (task->is_pipelineX()) {
-            task->set_state(PipelineTaskState::RUNNABLE);
-        }
+        task->set_state(PipelineTaskState::RUNNABLE);
 
-        DCHECK(task->is_pipelineX() || task->get_state() == PipelineTaskState::RUNNABLE)
-                << "state:" << get_state_name(task->get_state())
-                << " task: " << task->debug_string();
         // task exec
         bool eos = false;
         auto status = Status::OK();
@@ -354,11 +192,9 @@ void TaskScheduler::_do_work(size_t index) {
             continue;
         } else if (!status.ok()) {
             task->set_eos_time();
-            LOG(WARNING) << fmt::format(
-                    "Pipeline task failed. query_id: {} reason: {}",
-                    PrintInstanceStandardInfo(task->query_context()->query_id(),
-                                              task->fragment_context()->get_fragment_instance_id()),
-                    status.to_string());
+            LOG(WARNING) << fmt::format("Pipeline task failed. query_id: {} reason: {}",
+                                        print_id(task->query_context()->query_id()),
+                                        status.to_string());
             // Print detail informations below when you debugging here.
             //
             // LOG(WARNING)<< "task:\n"<<task->debug_string();
@@ -375,35 +211,21 @@ void TaskScheduler::_do_work(size_t index) {
             task->set_eos_time();
             // TODO: pipeline parallel need to wait the last task finish to call finalize
             //  and find_p_dependency
-            VLOG_DEBUG << fmt::format(
-                    "Try close task: {}, fragment_ctx->is_canceled(): {}",
-                    PrintInstanceStandardInfo(task->query_context()->query_id(),
-                                              task->fragment_context()->get_fragment_instance_id()),
-                    fragment_ctx->is_canceled());
-            if (task->is_pipelineX()) {
-                // is pending finish will add the task to dependency's blocking queue, and then the task will be
-                // added to running queue when dependency is ready.
-                if (task->is_pending_finish()) {
-                    // Only meet eos, should set task to PENDING_FINISH state
-                    task->set_state(PipelineTaskState::PENDING_FINISH);
-                    task->set_running(false);
-                } else {
-                    // Close the task directly?
-                    Status exec_status = fragment_ctx->get_query_ctx()->exec_status();
-                    _close_task(
-                            task,
-                            canceled ? PipelineTaskState::CANCELED : PipelineTaskState::FINISHED,
-                            exec_status);
-                }
-            } else {
+            VLOG_DEBUG << fmt::format("Try close task: {}, fragment_ctx->is_canceled(): {}",
+                                      print_id(task->query_context()->query_id()),
+                                      fragment_ctx->is_canceled());
+            // is pending finish will add the task to dependency's blocking queue, and then the task will be
+            // added to running queue when dependency is ready.
+            if (task->is_pending_finish()) {
                 // Only meet eos, should set task to PENDING_FINISH state
-                // pipeline is ok, because it will check is pending finish, and if it is ready, it will be invoked.
                 task->set_state(PipelineTaskState::PENDING_FINISH);
                 task->set_running(false);
-                // After the task is added to the block queue, it maybe run by another thread
-                // and the task maybe released in the other thread. And will core at
-                // task set running.
-                static_cast<void>(_blocked_task_scheduler->add_blocked_task(task));
+            } else {
+                // Close the task directly?
+                Status exec_status = fragment_ctx->get_query_ctx()->exec_status();
+                _close_task(task,
+                            canceled ? PipelineTaskState::CANCELED : PipelineTaskState::FINISHED,
+                            exec_status);
             }
             continue;
         }
@@ -414,7 +236,7 @@ void TaskScheduler::_do_work(size_t index) {
         case PipelineTaskState::BLOCKED_FOR_SINK:
         case PipelineTaskState::BLOCKED_FOR_RF:
         case PipelineTaskState::BLOCKED_FOR_DEPENDENCY:
-            static_cast<void>(_blocked_task_scheduler->add_blocked_task(task));
+            task->set_running(false);
             break;
         case PipelineTaskState::RUNNABLE:
             task->set_running(false);
diff --git a/be/src/pipeline/task_scheduler.h b/be/src/pipeline/task_scheduler.h
index 5bbf85fad452fd..8e513748203e21 100644
--- a/be/src/pipeline/task_scheduler.h
+++ b/be/src/pipeline/task_scheduler.h
@@ -44,41 +44,11 @@ class TaskQueue;
 
 namespace doris::pipeline {
 
-class BlockedTaskScheduler {
-public:
-    explicit BlockedTaskScheduler(std::string name);
-
-    ~BlockedTaskScheduler() = default;
-
-    Status start();
-    void shutdown();
-    Status add_blocked_task(PipelineTask* task);
-
-private:
-    std::mutex _task_mutex;
-    std::string _name;
-    std::condition_variable _task_cond;
-    std::list<PipelineTask*> _blocked_tasks;
-
-    scoped_refptr<Thread> _thread;
-    std::atomic<bool> _started;
-    std::atomic<bool> _shutdown;
-
-    static constexpr auto EMPTY_TIMES_TO_YIELD = 64;
-
-    void _schedule();
-    void _make_task_run(std::list<PipelineTask*>& local_tasks,
-                        std::list<PipelineTask*>::iterator& task_itr,
-                        PipelineTaskState state = PipelineTaskState::RUNNABLE);
-};
-
 class TaskScheduler {
 public:
-    TaskScheduler(ExecEnv* exec_env, std::shared_ptr<BlockedTaskScheduler> b_scheduler,
-                  std::shared_ptr<TaskQueue> task_queue, std::string name,
+    TaskScheduler(ExecEnv* exec_env, std::shared_ptr<TaskQueue> task_queue, std::string name,
                   CgroupCpuCtl* cgroup_cpu_ctl)
             : _task_queue(std::move(task_queue)),
-              _blocked_task_scheduler(std::move(b_scheduler)),
               _shutdown(false),
               _name(name),
               _cgroup_cpu_ctl(cgroup_cpu_ctl) {}
@@ -97,7 +67,6 @@ class TaskScheduler {
     std::unique_ptr<ThreadPool> _fix_thread_pool;
     std::shared_ptr<TaskQueue> _task_queue;
     std::vector<std::unique_ptr<std::atomic<bool>>> _markers;
-    std::shared_ptr<BlockedTaskScheduler> _blocked_task_scheduler;
     std::atomic<bool> _shutdown;
     std::string _name;
     CgroupCpuCtl* _cgroup_cpu_ctl = nullptr;
diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h
index eb3e6f8a89c81b..1cf0aea8349d94 100644
--- a/be/src/runtime/exec_env.h
+++ b/be/src/runtime/exec_env.h
@@ -45,7 +45,6 @@ class DeltaWriterV2Pool;
 } // namespace vectorized
 namespace pipeline {
 class TaskScheduler;
-class BlockedTaskScheduler;
 struct RuntimeFilterTimerQueue;
 } // namespace pipeline
 class WorkloadGroupMgr;
@@ -305,10 +304,6 @@ class ExecEnv {
     }
     std::shared_ptr<DummyLRUCache> get_dummy_lru_cache() { return _dummy_lru_cache; }
 
-    std::shared_ptr<pipeline::BlockedTaskScheduler> get_global_block_scheduler() {
-        return _global_block_scheduler;
-    }
-
     pipeline::RuntimeFilterTimerQueue* runtime_filter_timer_queue() {
         return _runtime_filter_timer_queue;
     }
@@ -453,11 +448,6 @@ class ExecEnv {
     TabletHotspot* _tablet_hotspot;
     CloudWarmUpManager* _cloud_warm_up_manager;
 
-    // used for query with group cpu hard limit
-    std::shared_ptr<pipeline::BlockedTaskScheduler> _global_block_scheduler;
-    // used for query without workload group
-    std::shared_ptr<pipeline::BlockedTaskScheduler> _without_group_block_scheduler;
-
     pipeline::RuntimeFilterTimerQueue* _runtime_filter_timer_queue = nullptr;
 
     WorkloadSchedPolicyMgr* _workload_sched_mgr = nullptr;
diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp
index 5a7e39cf158c41..ae36757b10a37c 100644
--- a/be/src/runtime/exec_env_init.cpp
+++ b/be/src/runtime/exec_env_init.cpp
@@ -364,15 +364,10 @@ Status ExecEnv::init_pipeline_task_scheduler() {
     LOG_INFO("pipeline executors_size set ").tag("size", executors_size);
     // TODO pipeline workload group combie two blocked schedulers.
     auto t_queue = std::make_shared<pipeline::MultiCoreTaskQueue>(executors_size);
-    _without_group_block_scheduler =
-            std::make_shared<pipeline::BlockedTaskScheduler>("PipeNoGSchePool");
-    _without_group_task_scheduler = new pipeline::TaskScheduler(
-            this, _without_group_block_scheduler, t_queue, "PipeNoGSchePool", nullptr);
+    _without_group_task_scheduler =
+            new pipeline::TaskScheduler(this, t_queue, "PipeNoGSchePool", nullptr);
     RETURN_IF_ERROR(_without_group_task_scheduler->start());
-    RETURN_IF_ERROR(_without_group_block_scheduler->start());
 
-    _global_block_scheduler = std::make_shared<pipeline::BlockedTaskScheduler>("PipeGBlockSche");
-    RETURN_IF_ERROR(_global_block_scheduler->start());
     _runtime_filter_timer_queue = new doris::pipeline::RuntimeFilterTimerQueue();
     _runtime_filter_timer_queue->run();
     return Status::OK();
@@ -627,10 +622,8 @@ void ExecEnv::destroy() {
     // stop workload scheduler
     SAFE_STOP(_workload_sched_mgr);
     // stop pipline step 1, non-cgroup execution
-    SAFE_SHUTDOWN(_without_group_block_scheduler.get());
     SAFE_STOP(_without_group_task_scheduler);
     // stop pipline step 2, cgroup execution
-    SAFE_SHUTDOWN(_global_block_scheduler.get());
     SAFE_STOP(_workload_group_manager);
 
     SAFE_STOP(_external_scan_context_mgr);
diff --git a/be/src/runtime/fragment_mgr.cpp b/be/src/runtime/fragment_mgr.cpp
index c4605f1de54590..7dbf1571e50737 100644
--- a/be/src/runtime/fragment_mgr.cpp
+++ b/be/src/runtime/fragment_mgr.cpp
@@ -42,7 +42,6 @@
 #include <atomic>
 
 #include "common/status.h"
-#include "pipeline/pipeline_x/pipeline_x_fragment_context.h"
 // IWYU pragma: no_include <bits/chrono.h>
 #include <chrono> // IWYU pragma: keep
 #include <map>
@@ -845,7 +844,7 @@ Status FragmentMgr::exec_plan_fragment(const TPipelineFragmentParams& params,
     _setup_shared_hashtable_for_broadcast_join(params, query_ctx.get());
     int64_t duration_ns = 0;
     std::shared_ptr<pipeline::PipelineFragmentContext> context =
-            std::make_shared<pipeline::PipelineXFragmentContext>(
+            std::make_shared<pipeline::PipelineFragmentContext>(
                     query_ctx->query_id(), params.fragment_id, query_ctx, _exec_env, cb,
                     std::bind<Status>(std::mem_fn(&FragmentMgr::trigger_pipeline_context_report),
                                       this, std::placeholders::_1, std::placeholders::_2));
@@ -892,7 +891,7 @@ Status FragmentMgr::exec_plan_fragment(const TPipelineFragmentParams& params,
         g_fragment_last_active_time.set_value(now);
         std::lock_guard<std::mutex> lock(_lock);
         std::vector<TUniqueId> ins_ids;
-        reinterpret_cast<pipeline::PipelineXFragmentContext*>(context.get())->instance_ids(ins_ids);
+        context->instance_ids(ins_ids);
         // TODO: simplify this mapping
         for (const auto& ins_id : ins_ids) {
             _pipeline_map.insert({ins_id, context});
@@ -1029,8 +1028,7 @@ void FragmentMgr::cancel_worker() {
             for (auto& pipeline_itr : _pipeline_map) {
                 if (pipeline_itr.second->is_timeout(now)) {
                     std::vector<TUniqueId> ins_ids;
-                    reinterpret_cast<pipeline::PipelineXFragmentContext*>(pipeline_itr.second.get())
-                            ->instance_ids(ins_ids);
+                    pipeline_itr.second->instance_ids(ins_ids);
                     for (auto& ins_id : ins_ids) {
                         to_cancel.push_back(ins_id);
                     }
diff --git a/be/src/runtime/fragment_mgr.h b/be/src/runtime/fragment_mgr.h
index 25b555f4fe8d60..5b70be6d8a5c41 100644
--- a/be/src/runtime/fragment_mgr.h
+++ b/be/src/runtime/fragment_mgr.h
@@ -51,7 +51,6 @@ extern bvar::Status<uint64_t> g_fragment_last_active_time;
 
 namespace pipeline {
 class PipelineFragmentContext;
-class PipelineXFragmentContext;
 } // namespace pipeline
 class QueryContext;
 class ExecEnv;
@@ -104,7 +103,7 @@ class FragmentMgr : public RestMonitorIface {
     void cancel_instance(const TUniqueId& instance_id, const PPlanFragmentCancelReason& reason,
                          const std::string& msg = "");
     // Cancel fragment (only pipelineX).
-    // {query id fragment} -> PipelineXFragmentContext
+    // {query id fragment} -> PipelineFragmentContext
     void cancel_fragment(const TUniqueId& query_id, int32_t fragment_id,
                          const PPlanFragmentCancelReason& reason, const std::string& msg = "");
 
diff --git a/be/src/runtime/query_context.cpp b/be/src/runtime/query_context.cpp
index 081d8ca1f590ed..c521c855515952 100644
--- a/be/src/runtime/query_context.cpp
+++ b/be/src/runtime/query_context.cpp
@@ -33,7 +33,6 @@
 #include "olap/olap_common.h"
 #include "pipeline/pipeline_fragment_context.h"
 #include "pipeline/pipeline_x/dependency.h"
-#include "pipeline/pipeline_x/pipeline_x_fragment_context.h"
 #include "runtime/exec_env.h"
 #include "runtime/fragment_mgr.h"
 #include "runtime/runtime_query_statistics_mgr.h"
@@ -430,20 +429,16 @@ QueryContext::_collect_realtime_query_profile_x() const {
 
     for (auto& [fragment_id, fragment_ctx_wptr] : _fragment_id_to_pipeline_ctx) {
         if (auto fragment_ctx = fragment_ctx_wptr.lock()) {
-            // In theory, cast result can not be nullptr since we have checked the pipeline X engine above
-            std::shared_ptr<pipeline::PipelineXFragmentContext> fragment_ctx_x =
-                    std::dynamic_pointer_cast<pipeline::PipelineXFragmentContext>(fragment_ctx);
-
-            if (fragment_ctx_x == nullptr) {
+            if (fragment_ctx == nullptr) {
                 std::string msg =
-                        fmt::format("PipelineXFragmentContext is nullptr, query {} fragment_id: {}",
+                        fmt::format("PipelineFragmentContext is nullptr, query {} fragment_id: {}",
                                     print_id(_query_id), fragment_id);
                 LOG_ERROR(msg);
                 DCHECK(false) << msg;
                 continue;
             }
 
-            auto profile = fragment_ctx_x->collect_realtime_profile_x();
+            auto profile = fragment_ctx->collect_realtime_profile_x();
 
             if (profile.empty()) {
                 std::string err_msg = fmt::format(
diff --git a/be/src/runtime/runtime_state.cpp b/be/src/runtime/runtime_state.cpp
index 2713ee441dd0df..4df90fd798af24 100644
--- a/be/src/runtime/runtime_state.cpp
+++ b/be/src/runtime/runtime_state.cpp
@@ -141,7 +141,7 @@ RuntimeState::RuntimeState(const TUniqueId& instance_id, const TUniqueId& query_
             query_id, RuntimeFilterParamsContext::create(this), _query_mem_tracker));
 }
 
-RuntimeState::RuntimeState(pipeline::PipelineXFragmentContext*, const TUniqueId& instance_id,
+RuntimeState::RuntimeState(pipeline::PipelineFragmentContext*, const TUniqueId& instance_id,
                            const TUniqueId& query_id, int32_t fragment_id,
                            const TQueryOptions& query_options, const TQueryGlobals& query_globals,
                            ExecEnv* exec_env, QueryContext* ctx)
diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h
index b266b76778f672..025d9517c28ef4 100644
--- a/be/src/runtime/runtime_state.h
+++ b/be/src/runtime/runtime_state.h
@@ -49,7 +49,7 @@ class IRuntimeFilter;
 namespace pipeline {
 class PipelineXLocalStateBase;
 class PipelineXSinkLocalStateBase;
-class PipelineXFragmentContext;
+class PipelineFragmentContext;
 class PipelineXTask;
 } // namespace pipeline
 
@@ -76,7 +76,7 @@ class RuntimeState {
                  ExecEnv* exec_env, QueryContext* ctx);
 
     // for only use in pipelineX
-    RuntimeState(pipeline::PipelineXFragmentContext*, const TUniqueId& instance_id,
+    RuntimeState(pipeline::PipelineFragmentContext*, const TUniqueId& instance_id,
                  const TUniqueId& query_id, int32 fragment_id, const TQueryOptions& query_options,
                  const TQueryGlobals& query_globals, ExecEnv* exec_env, QueryContext* ctx);
 
@@ -662,7 +662,7 @@ class RuntimeState {
     // runtime filter
     std::unique_ptr<RuntimeFilterMgr> _runtime_filter_mgr;
 
-    // owned by PipelineXFragmentContext
+    // owned by PipelineFragmentContext
     RuntimeFilterMgr* _pipeline_x_runtime_filter_mgr = nullptr;
 
     // Data stream receivers created by a plan fragment are gathered here to make sure
diff --git a/be/src/runtime/workload_group/workload_group.cpp b/be/src/runtime/workload_group/workload_group.cpp
index c82346f040ec82..05e38b973c6057 100644
--- a/be/src/runtime/workload_group/workload_group.cpp
+++ b/be/src/runtime/workload_group/workload_group.cpp
@@ -361,9 +361,8 @@ void WorkloadGroup::upsert_task_scheduler(WorkloadGroupInfo* tg_info, ExecEnv* e
         }
         auto task_queue = std::make_shared<pipeline::MultiCoreTaskQueue>(executors_size);
         std::unique_ptr<pipeline::TaskScheduler> pipeline_task_scheduler =
-                std::make_unique<pipeline::TaskScheduler>(
-                        exec_env, exec_env->get_global_block_scheduler(), std::move(task_queue),
-                        "Pipe_" + tg_name, cg_cpu_ctl_ptr);
+                std::make_unique<pipeline::TaskScheduler>(exec_env, std::move(task_queue),
+                                                          "Pipe_" + tg_name, cg_cpu_ctl_ptr);
         Status ret = pipeline_task_scheduler->start();
         if (ret.ok()) {
             _task_sched = std::move(pipeline_task_scheduler);

From 677158931ba9c0ba647291c6b7c4f45fd98175df Mon Sep 17 00:00:00 2001
From: xzj7019 <131111794+xzj7019@users.noreply.github.com>
Date: Fri, 26 Apr 2024 18:27:51 +0800
Subject: [PATCH 055/163] [opt](Nereids) bucket shuffle downgrade expansion
 (#34088)

Expand bucket shuffle downgrade condition, which originally requiring a single partition after pruning, basic table and bucket number < para number. Currently, we expect this option can be used for disabling bucket shuffle more efficiently, without above restrictions.

Co-authored-by: zhongjian.xzj <zhongjian.xzj@zhongjianxzjdeMacBook-Pro.local>
---
 .../ChildrenPropertiesRegulator.java          |  39 +--
 .../bs_downgrade_shape/query13.out            |  40 +++
 .../bs_downgrade_shape/query19.out            |  41 +++
 .../bs_downgrade_shape/query44.out            |  75 ++++++
 .../bs_downgrade_shape/query45.out            |  38 +++
 .../bs_downgrade_shape/query54.out            |  84 ++++++
 .../bs_downgrade_shape/query56.out            |  95 +++++++
 .../bs_downgrade_shape/query6.out             |  54 ++++
 .../bs_downgrade_shape/query61.out            |  83 ++++++
 .../bs_downgrade_shape/query68.out            |  47 ++++
 .../bs_downgrade_shape/query8.out             |  47 ++++
 .../bs_downgrade_shape/query91.out            |  46 ++++
 .../bs_downgrade_shape/query95.out            |  55 ++++
 .../bs_downgrade_shape/query13.groovy         | 136 ++++++++++
 .../bs_downgrade_shape/query19.groovy         |  82 ++++++
 .../bs_downgrade_shape/query44.groovy         | 102 +++++++
 .../bs_downgrade_shape/query45.groovy         |  72 +++++
 .../bs_downgrade_shape/query54.groovy         | 144 ++++++++++
 .../bs_downgrade_shape/query56.groovy         | 170 ++++++++++++
 .../bs_downgrade_shape/query6.groovy          |  84 ++++++
 .../bs_downgrade_shape/query61.groovy         | 120 +++++++++
 .../bs_downgrade_shape/query68.groovy         | 116 ++++++++
 .../bs_downgrade_shape/query8.groovy          | 248 ++++++++++++++++++
 .../bs_downgrade_shape/query91.groovy         |  94 +++++++
 .../bs_downgrade_shape/query95.groovy         |  96 +++++++
 25 files changed, 2173 insertions(+), 35 deletions(-)
 create mode 100644 regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query13.out
 create mode 100644 regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query19.out
 create mode 100644 regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query44.out
 create mode 100644 regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query45.out
 create mode 100644 regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query54.out
 create mode 100644 regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query56.out
 create mode 100644 regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query6.out
 create mode 100644 regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query61.out
 create mode 100644 regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query68.out
 create mode 100644 regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query8.out
 create mode 100644 regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query91.out
 create mode 100644 regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query95.out
 create mode 100644 regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query13.groovy
 create mode 100644 regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query19.groovy
 create mode 100644 regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query44.groovy
 create mode 100644 regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query45.groovy
 create mode 100644 regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query54.groovy
 create mode 100644 regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query56.groovy
 create mode 100644 regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query6.groovy
 create mode 100644 regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query61.groovy
 create mode 100644 regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query68.groovy
 create mode 100644 regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query8.groovy
 create mode 100644 regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query91.groovy
 create mode 100644 regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query95.groovy

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java
index 366730f7dc521e..8cae2c8775cafd 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/ChildrenPropertiesRegulator.java
@@ -30,7 +30,6 @@
 import org.apache.doris.nereids.trees.expressions.SlotReference;
 import org.apache.doris.nereids.trees.expressions.functions.agg.MultiDistinction;
 import org.apache.doris.nereids.trees.plans.AggMode;
-import org.apache.doris.nereids.trees.plans.GroupPlan;
 import org.apache.doris.nereids.trees.plans.JoinType;
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.SortPhase;
@@ -40,7 +39,6 @@
 import org.apache.doris.nereids.trees.plans.physical.PhysicalHashAggregate;
 import org.apache.doris.nereids.trees.plans.physical.PhysicalHashJoin;
 import org.apache.doris.nereids.trees.plans.physical.PhysicalNestedLoopJoin;
-import org.apache.doris.nereids.trees.plans.physical.PhysicalOlapScan;
 import org.apache.doris.nereids.trees.plans.physical.PhysicalPartitionTopN;
 import org.apache.doris.nereids.trees.plans.physical.PhysicalProject;
 import org.apache.doris.nereids.trees.plans.physical.PhysicalSetOperation;
@@ -209,38 +207,12 @@ public Boolean visitPhysicalFilter(PhysicalFilter<? extends Plan> filter, Void c
         return true;
     }
 
-    private boolean isBucketShuffleDownGrade(Plan oneSidePlan, DistributionSpecHash otherSideSpec) {
-        // improper to do bucket shuffle join:
-        // oneSide:
-        // 1. base table
-        // 2. single partition after pruning
-        // 3. tablets' number is small enough (< paraInstanceNum)
-        // otherSide: ShuffleType.EXECUTION_BUCKETED
+    private boolean isBucketShuffleDownGrade(DistributionSpecHash srcSideSpec) {
         boolean isBucketShuffleDownGrade = ConnectContext.get().getSessionVariable().isEnableBucketShuffleDownGrade();
         if (!isBucketShuffleDownGrade) {
             return false;
-        } else if (otherSideSpec.getShuffleType() != ShuffleType.EXECUTION_BUCKETED) {
-            return false;
         } else {
-            int paraNum = Math.max(1, ConnectContext.get().getSessionVariable().getParallelExecInstanceNum());
-            if (((GroupPlan) oneSidePlan).getGroup().getPhysicalExpressions().isEmpty()) {
-                return false;
-            } else {
-                Plan plan = ((GroupPlan) oneSidePlan).getGroup().getPhysicalExpressions().get(0).getPlan();
-                while ((plan instanceof PhysicalProject || plan instanceof PhysicalFilter)
-                        && !((GroupPlan) plan.child(0)).getGroup().getPhysicalExpressions().isEmpty()) {
-                    plan = ((GroupPlan) plan.child(0)).getGroup().getPhysicalExpressions().get(0).getPlan();
-                }
-                if (plan != null && plan instanceof PhysicalOlapScan
-                        && ((PhysicalOlapScan) plan).getSelectedPartitionIds().size() <= 1
-                        && ((PhysicalOlapScan) plan).getTable() != null
-                        && ((PhysicalOlapScan) plan).getTable().getDefaultDistributionInfo() != null
-                        && ((PhysicalOlapScan) plan).getTable().getDefaultDistributionInfo().getBucketNum() < paraNum) {
-                    return true;
-                } else {
-                    return false;
-                }
-            }
+            return srcSideSpec.getShuffleType() == ShuffleType.EXECUTION_BUCKETED;
         }
     }
 
@@ -262,9 +234,6 @@ public Boolean visitPhysicalHashJoin(PhysicalHashJoin<? extends Plan, ? extends
         DistributionSpec leftDistributionSpec = childrenProperties.get(0).getDistributionSpec();
         DistributionSpec rightDistributionSpec = childrenProperties.get(1).getDistributionSpec();
 
-        Plan leftChild = hashJoin.child(0);
-        Plan rightChild = hashJoin.child(1);
-
         // broadcast do not need regular
         if (rightDistributionSpec instanceof DistributionSpecReplicated) {
             return true;
@@ -296,7 +265,7 @@ public Boolean visitPhysicalHashJoin(PhysicalHashJoin<? extends Plan, ? extends
                     ShuffleType.EXECUTION_BUCKETED, leftHashSpec, rightHashSpec,
                     (DistributionSpecHash) requiredProperties.get(0).getDistributionSpec(),
                     (DistributionSpecHash) requiredProperties.get(1).getDistributionSpec()));
-        } else if (isBucketShuffleDownGrade(leftChild, rightHashSpec)) {
+        } else if (isBucketShuffleDownGrade(rightHashSpec)) {
             updatedForLeft = Optional.of(calAnotherSideRequired(
                     ShuffleType.EXECUTION_BUCKETED, leftHashSpec, leftHashSpec,
                     (DistributionSpecHash) requiredProperties.get(0).getDistributionSpec(),
@@ -305,7 +274,7 @@ public Boolean visitPhysicalHashJoin(PhysicalHashJoin<? extends Plan, ? extends
                     ShuffleType.EXECUTION_BUCKETED, leftHashSpec, rightHashSpec,
                     (DistributionSpecHash) requiredProperties.get(0).getDistributionSpec(),
                     (DistributionSpecHash) requiredProperties.get(1).getDistributionSpec()));
-        } else if (isBucketShuffleDownGrade(rightChild, leftHashSpec)) {
+        } else if (isBucketShuffleDownGrade(leftHashSpec)) {
             updatedForLeft = Optional.of(calAnotherSideRequired(
                     ShuffleType.EXECUTION_BUCKETED, rightHashSpec, leftHashSpec,
                     (DistributionSpecHash) requiredProperties.get(1).getDistributionSpec(),
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query13.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query13.out
new file mode 100644
index 00000000000000..ff71ac9d027107
--- /dev/null
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query13.out
@@ -0,0 +1,40 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !ds_shape_13 --
+PhysicalResultSink
+--hashAgg[GLOBAL]
+----PhysicalDistribute[DistributionSpecGather]
+------hashAgg[LOCAL]
+--------PhysicalProject
+----------hashJoin[INNER_JOIN] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF4 ss_store_sk->[s_store_sk]
+------------PhysicalDistribute[DistributionSpecHash]
+--------------PhysicalProject
+----------------PhysicalOlapScan[store] apply RFs: RF4
+------------PhysicalDistribute[DistributionSpecHash]
+--------------PhysicalProject
+----------------hashJoin[INNER_JOIN] hashCondition=((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk)) otherCondition=((((household_demographics.hd_dep_count = 1) AND ((((customer_demographics.cd_marital_status = 'D') AND (customer_demographics.cd_education_status = 'Primary')) AND ((store_sales.ss_sales_price >= 50.00) AND (store_sales.ss_sales_price <= 100.00))) OR (((customer_demographics.cd_marital_status = 'W') AND (customer_demographics.cd_education_status = '2 yr Degree')) AND ((store_sales.ss_sales_price >= 150.00) AND (store_sales.ss_sales_price <= 200.00))))) OR ((((customer_demographics.cd_marital_status = 'M') AND (customer_demographics.cd_education_status = 'College')) AND ((store_sales.ss_sales_price >= 100.00) AND (store_sales.ss_sales_price <= 150.00))) AND (household_demographics.hd_dep_count = 3)))) build RFs:RF3 ss_cdemo_sk->[cd_demo_sk]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------PhysicalProject
+----------------------filter(((((customer_demographics.cd_marital_status = 'M') AND (customer_demographics.cd_education_status = 'College')) OR ((customer_demographics.cd_marital_status = 'D') AND (customer_demographics.cd_education_status = 'Primary'))) OR ((customer_demographics.cd_marital_status = 'W') AND (customer_demographics.cd_education_status = '2 yr Degree'))))
+------------------------PhysicalOlapScan[customer_demographics] apply RFs: RF3
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------PhysicalProject
+----------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk]
+------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
+--------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=((((ca_state IN ('IL', 'TN', 'TX') AND ((store_sales.ss_net_profit >= 100.00) AND (store_sales.ss_net_profit <= 200.00))) OR (ca_state IN ('ID', 'OH', 'WY') AND ((store_sales.ss_net_profit >= 150.00) AND (store_sales.ss_net_profit <= 300.00)))) OR (ca_state IN ('IA', 'MS', 'SC') AND ((store_sales.ss_net_profit >= 50.00) AND (store_sales.ss_net_profit <= 250.00))))) build RFs:RF0 ca_address_sk->[ss_addr_sk]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------PhysicalProject
+--------------------------------filter((store_sales.ss_net_profit <= 300.00) and (store_sales.ss_net_profit >= 50.00) and (store_sales.ss_sales_price <= 200.00) and (store_sales.ss_sales_price >= 50.00))
+----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------PhysicalProject
+--------------------------------filter((customer_address.ca_country = 'United States') and ca_state IN ('IA', 'ID', 'IL', 'MS', 'OH', 'SC', 'TN', 'TX', 'WY'))
+----------------------------------PhysicalOlapScan[customer_address]
+--------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------PhysicalProject
+------------------------------filter((date_dim.d_year = 2001))
+--------------------------------PhysicalOlapScan[date_dim]
+------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------PhysicalProject
+----------------------------filter(hd_dep_count IN (1, 3))
+------------------------------PhysicalOlapScan[household_demographics]
+
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query19.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query19.out
new file mode 100644
index 00000000000000..6b7d023e3be42f
--- /dev/null
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query19.out
@@ -0,0 +1,41 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !ds_shape_19 --
+PhysicalResultSink
+--PhysicalTopN[MERGE_SORT]
+----PhysicalDistribute[DistributionSpecGather]
+------PhysicalTopN[LOCAL_SORT]
+--------PhysicalProject
+----------hashAgg[GLOBAL]
+------------PhysicalDistribute[DistributionSpecHash]
+--------------hashAgg[LOCAL]
+----------------PhysicalProject
+------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=(( not (substring(ca_zip, 1, 5) = substring(s_zip, 1, 5)))) build RFs:RF4 s_store_sk->[ss_store_sk]
+--------------------PhysicalProject
+----------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 c_current_addr_sk->[ca_address_sk]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[customer_address] apply RFs: RF3
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------PhysicalProject
+----------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 ss_customer_sk->[c_customer_sk]
+------------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[customer] apply RFs: RF2
+------------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------PhysicalProject
+----------------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_date_sk = store_sales.ss_sold_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
+------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk]
+--------------------------------------PhysicalProject
+----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF4
+--------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------PhysicalProject
+------------------------------------------filter((item.i_manager_id = 14))
+--------------------------------------------PhysicalOlapScan[item]
+------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------PhysicalProject
+----------------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 2002))
+------------------------------------------PhysicalOlapScan[date_dim]
+--------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[store]
+
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query44.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query44.out
new file mode 100644
index 00000000000000..ea4ea67293e48b
--- /dev/null
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query44.out
@@ -0,0 +1,75 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !ds_shape_44 --
+PhysicalResultSink
+--PhysicalTopN[MERGE_SORT]
+----PhysicalDistribute[DistributionSpecGather]
+------PhysicalTopN[LOCAL_SORT]
+--------PhysicalProject
+----------hashJoin[INNER_JOIN] hashCondition=((asceding.rnk = descending.rnk)) otherCondition=()
+------------PhysicalDistribute[DistributionSpecHash]
+--------------PhysicalProject
+----------------hashJoin[INNER_JOIN] hashCondition=((i1.i_item_sk = asceding.item_sk)) otherCondition=() build RFs:RF1 item_sk->[i_item_sk]
+------------------PhysicalProject
+--------------------PhysicalOlapScan[item] apply RFs: RF1
+------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------PhysicalProject
+----------------------filter((rnk < 11))
+------------------------PhysicalWindow
+--------------------------PhysicalQuickSort[MERGE_SORT]
+----------------------------PhysicalDistribute[DistributionSpecGather]
+------------------------------PhysicalQuickSort[LOCAL_SORT]
+--------------------------------PhysicalPartitionTopN
+----------------------------------PhysicalProject
+------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE))
+--------------------------------------PhysicalProject
+----------------------------------------hashAgg[GLOBAL]
+------------------------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------------------hashAgg[LOCAL]
+----------------------------------------------PhysicalProject
+------------------------------------------------filter((ss1.ss_store_sk = 4))
+--------------------------------------------------PhysicalOlapScan[store_sales]
+--------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------PhysicalProject
+------------------------------------------PhysicalAssertNumRows
+--------------------------------------------PhysicalDistribute[DistributionSpecGather]
+----------------------------------------------PhysicalProject
+------------------------------------------------hashAgg[GLOBAL]
+--------------------------------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------------------------------hashAgg[LOCAL]
+------------------------------------------------------PhysicalProject
+--------------------------------------------------------filter((store_sales.ss_store_sk = 4) and ss_hdemo_sk IS NULL)
+----------------------------------------------------------PhysicalOlapScan[store_sales]
+------------PhysicalDistribute[DistributionSpecHash]
+--------------PhysicalProject
+----------------hashJoin[INNER_JOIN] hashCondition=((i2.i_item_sk = descending.item_sk)) otherCondition=() build RFs:RF0 item_sk->[i_item_sk]
+------------------PhysicalProject
+--------------------PhysicalOlapScan[item] apply RFs: RF0
+------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------PhysicalProject
+----------------------filter((rnk < 11))
+------------------------PhysicalWindow
+--------------------------PhysicalQuickSort[MERGE_SORT]
+----------------------------PhysicalDistribute[DistributionSpecGather]
+------------------------------PhysicalQuickSort[LOCAL_SORT]
+--------------------------------PhysicalPartitionTopN
+----------------------------------PhysicalProject
+------------------------------------NestedLoopJoin[INNER_JOIN](cast(rank_col as DOUBLE) > cast((0.9 * rank_col) as DOUBLE))
+--------------------------------------PhysicalProject
+----------------------------------------hashAgg[GLOBAL]
+------------------------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------------------hashAgg[LOCAL]
+----------------------------------------------PhysicalProject
+------------------------------------------------filter((ss1.ss_store_sk = 4))
+--------------------------------------------------PhysicalOlapScan[store_sales]
+--------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------PhysicalProject
+------------------------------------------PhysicalAssertNumRows
+--------------------------------------------PhysicalDistribute[DistributionSpecGather]
+----------------------------------------------PhysicalProject
+------------------------------------------------hashAgg[GLOBAL]
+--------------------------------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------------------------------hashAgg[LOCAL]
+------------------------------------------------------PhysicalProject
+--------------------------------------------------------filter((store_sales.ss_store_sk = 4) and ss_hdemo_sk IS NULL)
+----------------------------------------------------------PhysicalOlapScan[store_sales]
+
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query45.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query45.out
new file mode 100644
index 00000000000000..091bda5f39c326
--- /dev/null
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query45.out
@@ -0,0 +1,38 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !ds_shape_45 --
+PhysicalResultSink
+--PhysicalTopN[MERGE_SORT]
+----PhysicalDistribute[DistributionSpecGather]
+------PhysicalTopN[LOCAL_SORT]
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashAgg[LOCAL]
+--------------PhysicalProject
+----------------filter((substring(ca_zip, 1, 5) IN ('80348', '81792', '83405', '85392', '85460', '85669', '86197', '86475', '88274') OR $c$1))
+------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 c_current_addr_sk->[ca_address_sk]
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[customer_address] apply RFs: RF3
+--------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 ws_bill_customer_sk->[c_customer_sk]
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[customer] apply RFs: RF2
+--------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
+------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ws_item_sk]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1
+--------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------PhysicalProject
+------------------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((item.i_item_id = item.i_item_id)) otherCondition=()
+--------------------------------------PhysicalProject
+----------------------------------------PhysicalOlapScan[item]
+--------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------PhysicalProject
+------------------------------------------filter(i_item_sk IN (11, 13, 17, 19, 2, 23, 29, 3, 5, 7))
+--------------------------------------------PhysicalOlapScan[item]
+------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------PhysicalProject
+----------------------------------filter((date_dim.d_qoy = 1) and (date_dim.d_year = 2000))
+------------------------------------PhysicalOlapScan[date_dim]
+
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query54.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query54.out
new file mode 100644
index 00000000000000..be67d64e1d3f4e
--- /dev/null
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query54.out
@@ -0,0 +1,84 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !ds_shape_54 --
+PhysicalResultSink
+--PhysicalTopN[MERGE_SORT]
+----PhysicalDistribute[DistributionSpecGather]
+------PhysicalTopN[LOCAL_SORT]
+--------PhysicalProject
+----------hashAgg[GLOBAL]
+------------PhysicalDistribute[DistributionSpecHash]
+--------------hashAgg[LOCAL]
+----------------PhysicalProject
+------------------hashAgg[GLOBAL]
+--------------------PhysicalDistribute[DistributionSpecHash]
+----------------------hashAgg[LOCAL]
+------------------------PhysicalProject
+--------------------------NestedLoopJoin[INNER_JOIN](cast(d_month_seq as BIGINT) <= (d_month_seq + 3))
+----------------------------PhysicalProject
+------------------------------NestedLoopJoin[INNER_JOIN](cast(d_month_seq as BIGINT) >= (d_month_seq + 1))
+--------------------------------PhysicalProject
+----------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF7 d_date_sk->[ss_sold_date_sk]
+------------------------------------PhysicalProject
+--------------------------------------hashJoin[INNER_JOIN] hashCondition=((my_customers.c_customer_sk = store_sales.ss_customer_sk)) otherCondition=() build RFs:RF6 c_customer_sk->[ss_customer_sk]
+----------------------------------------PhysicalProject
+------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF6 RF7
+----------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------------------------PhysicalProject
+--------------------------------------------hashJoin[INNER_JOIN] hashCondition=((customer_address.ca_county = store.s_county) and (customer_address.ca_state = store.s_state)) otherCondition=() build RFs:RF4 s_county->[ca_county];RF5 s_state->[ca_state]
+----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((my_customers.c_current_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 c_current_addr_sk->[ca_address_sk]
+------------------------------------------------PhysicalProject
+--------------------------------------------------PhysicalOlapScan[customer_address] apply RFs: RF3 RF4 RF5
+------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------------------PhysicalProject
+----------------------------------------------------hashAgg[GLOBAL]
+------------------------------------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------------------------------hashAgg[LOCAL]
+----------------------------------------------------------PhysicalProject
+------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_customer_sk = cs_or_ws_sales.customer_sk)) otherCondition=() build RFs:RF2 customer_sk->[c_customer_sk]
+--------------------------------------------------------------PhysicalProject
+----------------------------------------------------------------PhysicalOlapScan[customer] apply RFs: RF2
+--------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------------------------------PhysicalProject
+------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((cs_or_ws_sales.sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk,ws_sold_date_sk]
+--------------------------------------------------------------------PhysicalProject
+----------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((cs_or_ws_sales.item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[cs_item_sk,ws_item_sk]
+------------------------------------------------------------------------PhysicalUnion
+--------------------------------------------------------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------------------------------------------------------PhysicalProject
+------------------------------------------------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1
+--------------------------------------------------------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------------------------------------------------------PhysicalProject
+------------------------------------------------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1
+------------------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------------------------------------------PhysicalProject
+----------------------------------------------------------------------------filter((item.i_category = 'Music') and (item.i_class = 'country'))
+------------------------------------------------------------------------------PhysicalOlapScan[item]
+--------------------------------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------------------------------------PhysicalProject
+------------------------------------------------------------------------filter((date_dim.d_moy = 1) and (date_dim.d_year = 1999))
+--------------------------------------------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------------------------------PhysicalProject
+--------------------------------------------------PhysicalOlapScan[store]
+------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------PhysicalProject
+----------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------PhysicalAssertNumRows
+------------------------------------PhysicalDistribute[DistributionSpecGather]
+--------------------------------------hashAgg[GLOBAL]
+----------------------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------------------hashAgg[LOCAL]
+--------------------------------------------PhysicalProject
+----------------------------------------------filter((date_dim.d_moy = 1) and (date_dim.d_year = 1999))
+------------------------------------------------PhysicalOlapScan[date_dim]
+----------------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------------PhysicalAssertNumRows
+--------------------------------PhysicalDistribute[DistributionSpecGather]
+----------------------------------hashAgg[GLOBAL]
+------------------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------------hashAgg[LOCAL]
+----------------------------------------PhysicalProject
+------------------------------------------filter((date_dim.d_moy = 1) and (date_dim.d_year = 1999))
+--------------------------------------------PhysicalOlapScan[date_dim]
+
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query56.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query56.out
new file mode 100644
index 00000000000000..b74545fe8068f4
--- /dev/null
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query56.out
@@ -0,0 +1,95 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !ds_shape_56 --
+PhysicalResultSink
+--PhysicalTopN[MERGE_SORT]
+----PhysicalDistribute[DistributionSpecGather]
+------PhysicalTopN[LOCAL_SORT]
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashAgg[LOCAL]
+--------------PhysicalUnion
+----------------PhysicalProject
+------------------hashAgg[GLOBAL]
+--------------------PhysicalDistribute[DistributionSpecHash]
+----------------------hashAgg[LOCAL]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[ss_addr_sk]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk]
+--------------------------------PhysicalProject
+----------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
+------------------------------------PhysicalProject
+--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2 RF3
+------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------PhysicalProject
+----------------------------------------filter((date_dim.d_moy = 3) and (date_dim.d_year = 2000))
+------------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((item.i_item_id = item.i_item_id)) otherCondition=() build RFs:RF0 i_item_id->[i_item_id]
+------------------------------------PhysicalProject
+--------------------------------------PhysicalOlapScan[item] apply RFs: RF0
+------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------PhysicalProject
+----------------------------------------filter(i_color IN ('orchid', 'pink', 'powder'))
+------------------------------------------PhysicalOlapScan[item]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------PhysicalProject
+--------------------------------filter((customer_address.ca_gmt_offset = -6.00))
+----------------------------------PhysicalOlapScan[customer_address]
+----------------PhysicalProject
+------------------hashAgg[GLOBAL]
+--------------------PhysicalDistribute[DistributionSpecHash]
+----------------------hashAgg[LOCAL]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_bill_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF7 ca_address_sk->[cs_bill_addr_sk]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF6 i_item_sk->[cs_item_sk]
+--------------------------------PhysicalProject
+----------------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[cs_sold_date_sk]
+------------------------------------PhysicalProject
+--------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF5 RF6 RF7
+------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------PhysicalProject
+----------------------------------------filter((date_dim.d_moy = 3) and (date_dim.d_year = 2000))
+------------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((item.i_item_id = item.i_item_id)) otherCondition=() build RFs:RF4 i_item_id->[i_item_id]
+------------------------------------PhysicalProject
+--------------------------------------PhysicalOlapScan[item] apply RFs: RF4
+------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------PhysicalProject
+----------------------------------------filter(i_color IN ('orchid', 'pink', 'powder'))
+------------------------------------------PhysicalOlapScan[item]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------PhysicalProject
+--------------------------------filter((customer_address.ca_gmt_offset = -6.00))
+----------------------------------PhysicalOlapScan[customer_address]
+----------------PhysicalProject
+------------------hashAgg[GLOBAL]
+--------------------PhysicalDistribute[DistributionSpecHash]
+----------------------hashAgg[LOCAL]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF11 ws_bill_addr_sk->[ca_address_sk]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------PhysicalProject
+--------------------------------filter((customer_address.ca_gmt_offset = -6.00))
+----------------------------------PhysicalOlapScan[customer_address] apply RFs: RF11
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF10 i_item_sk->[ws_item_sk]
+--------------------------------PhysicalProject
+----------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF9 d_date_sk->[ws_sold_date_sk]
+------------------------------------PhysicalProject
+--------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF9 RF10
+------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------PhysicalProject
+----------------------------------------filter((date_dim.d_moy = 3) and (date_dim.d_year = 2000))
+------------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((item.i_item_id = item.i_item_id)) otherCondition=() build RFs:RF8 i_item_id->[i_item_id]
+------------------------------------PhysicalProject
+--------------------------------------PhysicalOlapScan[item] apply RFs: RF8
+------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------PhysicalProject
+----------------------------------------filter(i_color IN ('orchid', 'pink', 'powder'))
+------------------------------------------PhysicalOlapScan[item]
+
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query6.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query6.out
new file mode 100644
index 00000000000000..b2169d25149efb
--- /dev/null
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query6.out
@@ -0,0 +1,54 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !ds_shape_6 --
+PhysicalResultSink
+--PhysicalTopN[MERGE_SORT]
+----PhysicalDistribute[DistributionSpecGather]
+------PhysicalTopN[LOCAL_SORT]
+--------PhysicalProject
+----------filter((cnt >= 10))
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------hashAgg[LOCAL]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN] hashCondition=((a.ca_address_sk = c.c_current_addr_sk)) otherCondition=() build RFs:RF5 c_current_addr_sk->[ca_address_sk]
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------PhysicalProject
+--------------------------PhysicalOlapScan[customer_address] apply RFs: RF5
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN] hashCondition=((c.c_customer_sk = s.ss_customer_sk)) otherCondition=() build RFs:RF4 ss_customer_sk->[c_customer_sk]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[customer] apply RFs: RF4
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------PhysicalProject
+--------------------------------hashJoin[INNER_JOIN] hashCondition=((s.ss_item_sk = i.i_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[ss_item_sk]
+----------------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------------PhysicalProject
+--------------------------------------hashJoin[INNER_JOIN] hashCondition=((s.ss_sold_date_sk = d.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk]
+----------------------------------------PhysicalProject
+------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3
+----------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------------------------hashJoin[INNER_JOIN] hashCondition=((d.d_month_seq = date_dim.d_month_seq)) otherCondition=() build RFs:RF1 d_month_seq->[d_month_seq]
+--------------------------------------------PhysicalProject
+----------------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF1
+--------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------------PhysicalAssertNumRows
+------------------------------------------------PhysicalDistribute[DistributionSpecGather]
+--------------------------------------------------hashAgg[GLOBAL]
+----------------------------------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------------------------------hashAgg[LOCAL]
+--------------------------------------------------------PhysicalProject
+----------------------------------------------------------filter((date_dim.d_moy = 3) and (date_dim.d_year = 2002))
+------------------------------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------------hashJoin[INNER_JOIN] hashCondition=((j.i_category = i.i_category)) otherCondition=((cast(i_current_price as DECIMALV3(38, 5)) > (1.2 * avg(cast(i_current_price as DECIMALV3(9, 4)))))) build RFs:RF0 i_category->[i_category]
+--------------------------------------PhysicalProject
+----------------------------------------PhysicalOlapScan[item] apply RFs: RF0
+--------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------hashAgg[GLOBAL]
+------------------------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------------------hashAgg[LOCAL]
+----------------------------------------------PhysicalProject
+------------------------------------------------PhysicalOlapScan[item]
+
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query61.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query61.out
new file mode 100644
index 00000000000000..982462b51cff28
--- /dev/null
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query61.out
@@ -0,0 +1,83 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !ds_shape_61 --
+PhysicalResultSink
+--PhysicalTopN[MERGE_SORT]
+----PhysicalTopN[LOCAL_SORT]
+------PhysicalProject
+--------NestedLoopJoin[CROSS_JOIN]
+----------hashAgg[GLOBAL]
+------------PhysicalDistribute[DistributionSpecGather]
+--------------hashAgg[LOCAL]
+----------------PhysicalProject
+------------------hashJoin[INNER_JOIN] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF10 c_current_addr_sk->[ca_address_sk]
+--------------------PhysicalDistribute[DistributionSpecHash]
+----------------------PhysicalProject
+------------------------filter((customer_address.ca_gmt_offset = -7.00))
+--------------------------PhysicalOlapScan[customer_address] apply RFs: RF10
+--------------------PhysicalDistribute[DistributionSpecHash]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF9 ss_customer_sk->[c_customer_sk]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[customer] apply RFs: RF9
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------PhysicalProject
+------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF8 s_store_sk->[ss_store_sk]
+--------------------------------PhysicalProject
+----------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF7 p_promo_sk->[ss_promo_sk]
+------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF6 i_item_sk->[ss_item_sk]
+--------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ss_sold_date_sk]
+----------------------------------------PhysicalProject
+------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 RF6 RF7 RF8
+----------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------------------------PhysicalProject
+--------------------------------------------filter((date_dim.d_moy = 12) and (date_dim.d_year = 2000))
+----------------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------PhysicalProject
+------------------------------------------filter((item.i_category = 'Home'))
+--------------------------------------------PhysicalOlapScan[item]
+------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------PhysicalProject
+----------------------------------------filter((((promotion.p_channel_dmail = 'Y') OR (promotion.p_channel_email = 'Y')) OR (promotion.p_channel_tv = 'Y')))
+------------------------------------------PhysicalOlapScan[promotion]
+--------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------PhysicalProject
+------------------------------------filter((store.s_gmt_offset = -7.00))
+--------------------------------------PhysicalOlapScan[store]
+----------PhysicalDistribute[DistributionSpecReplicated]
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute[DistributionSpecGather]
+----------------hashAgg[LOCAL]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF4 s_store_sk->[ss_store_sk]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[ss_customer_sk]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------PhysicalProject
+------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk]
+--------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
+----------------------------------PhysicalProject
+------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2 RF3 RF4
+----------------------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------------------PhysicalProject
+--------------------------------------filter((date_dim.d_moy = 12) and (date_dim.d_year = 2000))
+----------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------PhysicalProject
+------------------------------------filter((item.i_category = 'Home'))
+--------------------------------------PhysicalOlapScan[item]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------hashJoin[INNER_JOIN] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[c_current_addr_sk]
+------------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[customer] apply RFs: RF0
+------------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------------PhysicalProject
+----------------------------------filter((customer_address.ca_gmt_offset = -7.00))
+------------------------------------PhysicalOlapScan[customer_address]
+----------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------PhysicalProject
+--------------------------filter((store.s_gmt_offset = -7.00))
+----------------------------PhysicalOlapScan[store]
+
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query68.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query68.out
new file mode 100644
index 00000000000000..dc8b5303dfd5d5
--- /dev/null
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query68.out
@@ -0,0 +1,47 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !ds_shape_68 --
+PhysicalResultSink
+--PhysicalTopN[MERGE_SORT]
+----PhysicalDistribute[DistributionSpecGather]
+------PhysicalTopN[LOCAL_SORT]
+--------PhysicalProject
+----------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_addr_sk = current_addr.ca_address_sk)) otherCondition=(( not (ca_city = bought_city))) build RFs:RF5 c_current_addr_sk->[ca_address_sk]
+------------PhysicalDistribute[DistributionSpecHash]
+--------------PhysicalProject
+----------------PhysicalOlapScan[customer_address] apply RFs: RF5
+------------PhysicalDistribute[DistributionSpecHash]
+--------------PhysicalProject
+----------------hashJoin[INNER_JOIN] hashCondition=((dn.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 ss_customer_sk->[c_customer_sk]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[customer] apply RFs: RF4
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------PhysicalProject
+----------------------hashAgg[LOCAL]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF3 ss_addr_sk->[ca_address_sk]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[customer_address] apply RFs: RF3
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------PhysicalProject
+--------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk]
+----------------------------------PhysicalProject
+------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk]
+--------------------------------------PhysicalProject
+----------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+------------------------------------------PhysicalProject
+--------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2
+------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------------PhysicalProject
+----------------------------------------------filter((date_dim.d_dom <= 2) and (date_dim.d_dom >= 1) and d_year IN (1998, 1999, 2000))
+------------------------------------------------PhysicalOlapScan[date_dim]
+--------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------------PhysicalProject
+------------------------------------------filter(s_city IN ('Fairview', 'Midway'))
+--------------------------------------------PhysicalOlapScan[store]
+----------------------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------------------PhysicalProject
+--------------------------------------filter(((household_demographics.hd_dep_count = 3) OR (household_demographics.hd_vehicle_count = 4)))
+----------------------------------------PhysicalOlapScan[household_demographics]
+
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query8.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query8.out
new file mode 100644
index 00000000000000..aa750d8681807f
--- /dev/null
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query8.out
@@ -0,0 +1,47 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !ds_shape_8 --
+PhysicalResultSink
+--PhysicalTopN[MERGE_SORT]
+----PhysicalDistribute[DistributionSpecGather]
+------PhysicalTopN[LOCAL_SORT]
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashAgg[LOCAL]
+--------------PhysicalProject
+----------------hashJoin[INNER_JOIN] hashCondition=((expr_substring(s_zip, 1, 2) = expr_substring(ca_zip, 1, 2))) otherCondition=()
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2
+--------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------PhysicalProject
+------------------------------filter((date_dim.d_qoy = 2) and (date_dim.d_year = 1998))
+--------------------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------PhysicalProject
+--------------------------PhysicalOlapScan[store]
+------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------PhysicalProject
+----------------------PhysicalIntersect
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------PhysicalProject
+----------------------------filter(substring(ca_zip, 1, 5) IN ('10298', '10374', '10425', '11340', '11489', '11618', '11652', '11686', '11855', '11912', '12197', '12318', '12320', '12350', '13086', '13123', '13261', '13338', '13376', '13378', '13443', '13844', '13869', '13918', '14073', '14155', '14196', '14242', '14312', '14440', '14530', '14851', '15371', '15475', '15543', '15734', '15751', '15782', '15794', '16005', '16226', '16364', '16515', '16704', '16791', '16891', '17167', '17193', '17291', '17672', '17819', '17879', '17895', '18218', '18360', '18367', '18410', '18421', '18434', '18569', '18700', '18767', '18829', '18884', '19326', '19444', '19489', '19753', '19833', '19988', '20244', '20317', '20534', '20601', '20712', '21060', '21094', '21204', '21231', '21343', '21727', '21800', '21814', '22728', '22815', '22911', '23065', '23952', '24227', '24255', '24286', '24594', '24660', '24891', '24987', '25115', '25178', '25214', '25264', '25333', '25494', '25717', '25973', '26217', '26689', '27052', '27116', '27156', '27287', '27369', '27385', '27413', '27642', '27700', '28055', '28239', '28571', '28577', '28810', '29086', '29392', '29450', '29752', '29818', '30106', '30415', '30621', '31013', '31016', '31655', '31830', '32489', '32669', '32754', '32919', '32958', '32961', '33113', '33122', '33159', '33467', '33562', '33773', '33869', '34306', '34473', '34594', '34948', '34972', '35076', '35390', '35834', '35863', '35926', '36201', '36335', '36430', '36479', '37119', '37788', '37914', '38353', '38607', '38919', '39214', '39459', '39500', '39503', '40146', '40936', '40979', '41162', '41232', '41255', '41331', '41351', '41352', '41419', '41807', '41836', '41967', '42361', '43432', '43639', '43830', '43933', '44529', '45266', '45484', '45533', '45645', '45676', '45859', '46081', '46131', '46507', '47289', '47369', '47529', '47602', '47770', '48017', '48162', '48333', '48530', '48567', '49101', '49130', '49140', '49211', '49230', '49254', '49472', '50412', '50632', '50636', '50679', '50788', '51089', '51184', '51195', '51634', '51717', '51766', '51782', '51793', '51933', '52094', '52301', '52389', '52868', '53163', '53535', '53565', '54010', '54207', '54364', '54558', '54585', '55233', '55349', '56224', '56355', '56436', '56455', '56600', '56877', '57025', '57553', '57631', '57649', '57839', '58032', '58058', '58062', '58117', '58218', '58412', '58454', '58581', '59004', '59080', '59130', '59226', '59345', '59386', '59494', '59852', '60083', '60298', '60560', '60624', '60736', '61527', '61794', '61860', '61997', '62361', '62585', '62878', '63073', '63180', '63193', '63294', '63792', '63991', '64592', '65148', '65177', '65501', '66057', '66943', '67881', '67975', '67998', '68101', '68293', '68341', '68605', '68730', '68770', '68843', '68852', '68908', '69280', '69952', '69998', '70041', '70070', '70073', '70450', '71144', '71256', '71286', '71836', '71948', '71954', '71997', '72592', '72991', '73021', '73108', '73134', '73146', '73219', '73873', '74686', '75660', '75675', '75742', '75752', '77454', '77817', '78093', '78366', '79077', '79658', '80332', '80846', '81003', '81070', '81084', '81335', '81504', '81755', '81963', '82080', '82602', '82620', '83041', '83086', '83583', '83647', '83833', '83910', '83986', '84247', '84680', '84844', '84919', '85066', '85761', '86057', '86379', '86709', '88086', '88137', '88217', '89193', '89338', '90209', '90229', '90669', '91110', '91894', '92292', '92380', '92645', '92696', '93498', '94791', '94835', '94898', '95042', '95430', '95464', '95694', '96435', '96560', '97173', '97462', '98069', '98072', '98338', '98533', '98569', '98584', '98862', '99060', '99132'))
+------------------------------PhysicalOlapScan[customer_address]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------PhysicalProject
+----------------------------filter((cnt > 10))
+------------------------------hashAgg[GLOBAL]
+--------------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------------hashAgg[LOCAL]
+------------------------------------PhysicalProject
+--------------------------------------hashJoin[INNER_JOIN] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF0 c_current_addr_sk->[ca_address_sk]
+----------------------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------------------PhysicalProject
+--------------------------------------------PhysicalOlapScan[customer_address] apply RFs: RF0
+----------------------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------------------PhysicalProject
+--------------------------------------------filter((customer.c_preferred_cust_flag = 'Y'))
+----------------------------------------------PhysicalOlapScan[customer]
+
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query91.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query91.out
new file mode 100644
index 00000000000000..42598f25208596
--- /dev/null
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query91.out
@@ -0,0 +1,46 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !ds_shape_91 --
+PhysicalResultSink
+--PhysicalQuickSort[MERGE_SORT]
+----PhysicalDistribute[DistributionSpecGather]
+------PhysicalQuickSort[LOCAL_SORT]
+--------PhysicalProject
+----------hashAgg[GLOBAL]
+------------PhysicalDistribute[DistributionSpecHash]
+--------------hashAgg[LOCAL]
+----------------PhysicalProject
+------------------hashJoin[INNER_JOIN] hashCondition=((catalog_returns.cr_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF5 cc_call_center_sk->[cr_call_center_sk]
+--------------------PhysicalProject
+----------------------hashJoin[INNER_JOIN] hashCondition=((catalog_returns.cr_returned_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[cr_returned_date_sk]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_returns.cr_returning_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cr_returning_customer_sk]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF3 RF4 RF5
+----------------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------------hashJoin[INNER_JOIN] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF2 c_current_addr_sk->[ca_address_sk]
+--------------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------------PhysicalProject
+------------------------------------filter((customer_address.ca_gmt_offset = -7.00))
+--------------------------------------PhysicalOlapScan[customer_address] apply RFs: RF2
+--------------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------------hashJoin[INNER_JOIN] hashCondition=((household_demographics.hd_demo_sk = customer.c_current_hdemo_sk)) otherCondition=() build RFs:RF1 hd_demo_sk->[c_current_hdemo_sk]
+------------------------------------hashJoin[INNER_JOIN] hashCondition=((customer_demographics.cd_demo_sk = customer.c_current_cdemo_sk)) otherCondition=() build RFs:RF0 cd_demo_sk->[c_current_cdemo_sk]
+--------------------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------------------PhysicalProject
+------------------------------------------PhysicalOlapScan[customer] apply RFs: RF0 RF1
+--------------------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------------------PhysicalProject
+------------------------------------------filter((((customer_demographics.cd_marital_status = 'M') AND (customer_demographics.cd_education_status = 'Unknown')) OR ((customer_demographics.cd_marital_status = 'W') AND (customer_demographics.cd_education_status = 'Advanced Degree'))))
+--------------------------------------------PhysicalOlapScan[customer_demographics]
+------------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------------PhysicalProject
+----------------------------------------filter((hd_buy_potential like 'Unknown%'))
+------------------------------------------PhysicalOlapScan[household_demographics]
+------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------PhysicalProject
+----------------------------filter((date_dim.d_moy = 12) and (date_dim.d_year = 2000))
+------------------------------PhysicalOlapScan[date_dim]
+--------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------PhysicalProject
+------------------------PhysicalOlapScan[call_center]
+
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query95.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query95.out
new file mode 100644
index 00000000000000..476a65baed6102
--- /dev/null
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query95.out
@@ -0,0 +1,55 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !ds_shape_95 --
+PhysicalCteAnchor ( cteId=CTEId#0 )
+--PhysicalCteProducer ( cteId=CTEId#0 )
+----PhysicalProject
+------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF0 ws_order_number->[ws_order_number]
+--------PhysicalDistribute[DistributionSpecHash]
+----------PhysicalProject
+------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF7
+--------PhysicalDistribute[DistributionSpecHash]
+----------PhysicalProject
+------------PhysicalOlapScan[web_sales] apply RFs: RF7
+--PhysicalResultSink
+----PhysicalTopN[MERGE_SORT]
+------PhysicalTopN[LOCAL_SORT]
+--------hashAgg[DISTINCT_GLOBAL]
+----------PhysicalDistribute[DistributionSpecGather]
+------------hashAgg[DISTINCT_LOCAL]
+--------------hashAgg[GLOBAL]
+----------------hashAgg[LOCAL]
+------------------PhysicalProject
+--------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = web_returns.wr_order_number)) otherCondition=() build RFs:RF6 ws_order_number->[wr_order_number,ws_order_number]
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN] hashCondition=((web_returns.wr_order_number = ws_wh.ws_order_number)) otherCondition=() build RFs:RF5 wr_order_number->[ws_order_number]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------PhysicalProject
+--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5 RF6
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[web_returns] apply RFs: RF6
+----------------------PhysicalProject
+------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws_wh.ws_order_number)) otherCondition=() build RFs:RF7 ws_order_number->[ws_order_number,ws_order_number]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------PhysicalProject
+------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF3 web_site_sk->[ws_web_site_sk]
+------------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_ship_date_sk]
+--------------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[ws_ship_addr_sk]
+----------------------------------PhysicalProject
+------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1 RF2 RF3
+----------------------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------------------PhysicalProject
+--------------------------------------filter((customer_address.ca_state = 'VA'))
+----------------------------------------PhysicalOlapScan[customer_address]
+--------------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------------PhysicalProject
+------------------------------------filter((date_dim.d_date <= '2001-05-31') and (date_dim.d_date >= '2001-04-01'))
+--------------------------------------PhysicalOlapScan[date_dim]
+------------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------PhysicalProject
+----------------------------------filter((web_site.web_company_name = 'pri'))
+------------------------------------PhysicalOlapScan[web_site]
+
diff --git a/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query13.groovy b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query13.groovy
new file mode 100644
index 00000000000000..5718fe49c2774a
--- /dev/null
+++ b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query13.groovy
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+suite("query13") {
+    String db = context.config.getDbNameByFile(new File(context.file.parent))
+    sql "use ${db}"
+    sql 'set enable_nereids_planner=true'
+    sql 'set enable_fallback_to_original_planner=false'
+    sql 'set exec_mem_limit=21G'
+    sql 'set be_number_for_test=3'
+    sql 'set parallel_fragment_exec_instance_num=8; '
+    sql 'set parallel_pipeline_task_num=8; '
+    sql 'set forbid_unknown_col_stats=true'
+    sql 'set enable_nereids_timeout = false'
+    sql 'set enable_runtime_filter_prune=false'
+    sql 'set runtime_filter_type=8'
+    sql 'set dump_nereids_memo=false'
+    sql 'set enable_bucket_shuffle_downgrade=true'
+    def ds = """select avg(ss_quantity)
+       ,avg(ss_ext_sales_price)
+       ,avg(ss_ext_wholesale_cost)
+       ,sum(ss_ext_wholesale_cost)
+ from store_sales
+     ,store
+     ,customer_demographics
+     ,household_demographics
+     ,customer_address
+     ,date_dim
+ where s_store_sk = ss_store_sk
+ and  ss_sold_date_sk = d_date_sk and d_year = 2001
+ and((ss_hdemo_sk=hd_demo_sk
+  and cd_demo_sk = ss_cdemo_sk
+  and cd_marital_status = 'M'
+  and cd_education_status = 'College'
+  and ss_sales_price between 100.00 and 150.00
+  and hd_dep_count = 3   
+     )or
+     (ss_hdemo_sk=hd_demo_sk
+  and cd_demo_sk = ss_cdemo_sk
+  and cd_marital_status = 'D'
+  and cd_education_status = 'Primary'
+  and ss_sales_price between 50.00 and 100.00   
+  and hd_dep_count = 1
+     ) or 
+     (ss_hdemo_sk=hd_demo_sk
+  and cd_demo_sk = ss_cdemo_sk
+  and cd_marital_status = 'W'
+  and cd_education_status = '2 yr Degree'
+  and ss_sales_price between 150.00 and 200.00 
+  and hd_dep_count = 1  
+     ))
+ and((ss_addr_sk = ca_address_sk
+  and ca_country = 'United States'
+  and ca_state in ('IL', 'TN', 'TX')
+  and ss_net_profit between 100 and 200  
+     ) or
+     (ss_addr_sk = ca_address_sk
+  and ca_country = 'United States'
+  and ca_state in ('WY', 'OH', 'ID')
+  and ss_net_profit between 150 and 300  
+     ) or
+     (ss_addr_sk = ca_address_sk
+  and ca_country = 'United States'
+  and ca_state in ('MS', 'SC', 'IA')
+  and ss_net_profit between 50 and 250  
+     ))
+"""
+    qt_ds_shape_13 '''
+    explain shape plan
+    select avg(ss_quantity)
+       ,avg(ss_ext_sales_price)
+       ,avg(ss_ext_wholesale_cost)
+       ,sum(ss_ext_wholesale_cost)
+ from store_sales
+     ,store
+     ,customer_demographics
+     ,household_demographics
+     ,customer_address
+     ,date_dim
+ where s_store_sk = ss_store_sk
+ and  ss_sold_date_sk = d_date_sk and d_year = 2001
+ and((ss_hdemo_sk=hd_demo_sk
+  and cd_demo_sk = ss_cdemo_sk
+  and cd_marital_status = 'M'
+  and cd_education_status = 'College'
+  and ss_sales_price between 100.00 and 150.00
+  and hd_dep_count = 3   
+     )or
+     (ss_hdemo_sk=hd_demo_sk
+  and cd_demo_sk = ss_cdemo_sk
+  and cd_marital_status = 'D'
+  and cd_education_status = 'Primary'
+  and ss_sales_price between 50.00 and 100.00   
+  and hd_dep_count = 1
+     ) or 
+     (ss_hdemo_sk=hd_demo_sk
+  and cd_demo_sk = ss_cdemo_sk
+  and cd_marital_status = 'W'
+  and cd_education_status = '2 yr Degree'
+  and ss_sales_price between 150.00 and 200.00 
+  and hd_dep_count = 1  
+     ))
+ and((ss_addr_sk = ca_address_sk
+  and ca_country = 'United States'
+  and ca_state in ('IL', 'TN', 'TX')
+  and ss_net_profit between 100 and 200  
+     ) or
+     (ss_addr_sk = ca_address_sk
+  and ca_country = 'United States'
+  and ca_state in ('WY', 'OH', 'ID')
+  and ss_net_profit between 150 and 300  
+     ) or
+     (ss_addr_sk = ca_address_sk
+  and ca_country = 'United States'
+  and ca_state in ('MS', 'SC', 'IA')
+  and ss_net_profit between 50 and 250  
+     ))
+
+    '''
+}
diff --git a/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query19.groovy b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query19.groovy
new file mode 100644
index 00000000000000..322c84b782b6b4
--- /dev/null
+++ b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query19.groovy
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+suite("query19") {
+    String db = context.config.getDbNameByFile(new File(context.file.parent))
+    sql "use ${db}"
+    sql 'set enable_nereids_planner=true'
+    sql 'set enable_fallback_to_original_planner=false'
+    sql 'set exec_mem_limit=21G'
+    sql 'set be_number_for_test=3'
+    sql 'set parallel_fragment_exec_instance_num=8; '
+    sql 'set parallel_pipeline_task_num=8; '
+    sql 'set forbid_unknown_col_stats=true'
+    sql 'set enable_nereids_timeout = false'
+    sql 'set enable_runtime_filter_prune=false'
+    sql 'set runtime_filter_type=8'
+    sql 'set dump_nereids_memo=false'
+    sql 'set enable_bucket_shuffle_downgrade=true'
+    def ds = """select  i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact,
+ 	sum(ss_ext_sales_price) ext_price
+ from date_dim, store_sales, item,customer,customer_address,store
+ where d_date_sk = ss_sold_date_sk
+   and ss_item_sk = i_item_sk
+   and i_manager_id=14
+   and d_moy=11
+   and d_year=2002
+   and ss_customer_sk = c_customer_sk 
+   and c_current_addr_sk = ca_address_sk
+   and substr(ca_zip,1,5) <> substr(s_zip,1,5) 
+   and ss_store_sk = s_store_sk 
+ group by i_brand
+      ,i_brand_id
+      ,i_manufact_id
+      ,i_manufact
+ order by ext_price desc
+         ,i_brand
+         ,i_brand_id
+         ,i_manufact_id
+         ,i_manufact
+limit 100 """
+    qt_ds_shape_19 '''
+    explain shape plan
+    select  i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact,
+ 	sum(ss_ext_sales_price) ext_price
+ from date_dim, store_sales, item,customer,customer_address,store
+ where d_date_sk = ss_sold_date_sk
+   and ss_item_sk = i_item_sk
+   and i_manager_id=14
+   and d_moy=11
+   and d_year=2002
+   and ss_customer_sk = c_customer_sk 
+   and c_current_addr_sk = ca_address_sk
+   and substr(ca_zip,1,5) <> substr(s_zip,1,5) 
+   and ss_store_sk = s_store_sk 
+ group by i_brand
+      ,i_brand_id
+      ,i_manufact_id
+      ,i_manufact
+ order by ext_price desc
+         ,i_brand
+         ,i_brand_id
+         ,i_manufact_id
+         ,i_manufact
+limit 100 
+    '''
+}
diff --git a/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query44.groovy b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query44.groovy
new file mode 100644
index 00000000000000..309c650df16f85
--- /dev/null
+++ b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query44.groovy
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+suite("query44") {
+    String db = context.config.getDbNameByFile(new File(context.file.parent))
+    sql "use ${db}"
+    sql 'set enable_nereids_planner=true'
+    sql 'set enable_fallback_to_original_planner=false'
+    sql 'set exec_mem_limit=21G'
+    sql 'set be_number_for_test=3'
+    sql 'set parallel_fragment_exec_instance_num=8; '
+    sql 'set parallel_pipeline_task_num=8; '
+    sql 'set forbid_unknown_col_stats=true'
+    sql 'set enable_nereids_timeout = false'
+    sql 'set enable_runtime_filter_prune=false'
+    sql 'set runtime_filter_type=8'
+    sql 'set dump_nereids_memo=false'
+    sql 'set enable_bucket_shuffle_downgrade=true'
+    def ds = """select  asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing
+from(select *
+     from (select item_sk,rank() over (order by rank_col asc) rnk
+           from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col 
+                 from store_sales ss1
+                 where ss_store_sk = 4
+                 group by ss_item_sk
+                 having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col
+                                                  from store_sales
+                                                  where ss_store_sk = 4
+                                                    and ss_hdemo_sk is null
+                                                  group by ss_store_sk))V1)V11
+     where rnk  < 11) asceding,
+    (select *
+     from (select item_sk,rank() over (order by rank_col desc) rnk
+           from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col
+                 from store_sales ss1
+                 where ss_store_sk = 4
+                 group by ss_item_sk
+                 having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col
+                                                  from store_sales
+                                                  where ss_store_sk = 4
+                                                    and ss_hdemo_sk is null
+                                                  group by ss_store_sk))V2)V21
+     where rnk  < 11) descending,
+item i1,
+item i2
+where asceding.rnk = descending.rnk 
+  and i1.i_item_sk=asceding.item_sk
+  and i2.i_item_sk=descending.item_sk
+order by asceding.rnk
+limit 100"""
+    qt_ds_shape_44 '''
+    explain shape plan
+    select  asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing
+from(select *
+     from (select item_sk,rank() over (order by rank_col asc) rnk
+           from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col 
+                 from store_sales ss1
+                 where ss_store_sk = 4
+                 group by ss_item_sk
+                 having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col
+                                                  from store_sales
+                                                  where ss_store_sk = 4
+                                                    and ss_hdemo_sk is null
+                                                  group by ss_store_sk))V1)V11
+     where rnk  < 11) asceding,
+    (select *
+     from (select item_sk,rank() over (order by rank_col desc) rnk
+           from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col
+                 from store_sales ss1
+                 where ss_store_sk = 4
+                 group by ss_item_sk
+                 having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col
+                                                  from store_sales
+                                                  where ss_store_sk = 4
+                                                    and ss_hdemo_sk is null
+                                                  group by ss_store_sk))V2)V21
+     where rnk  < 11) descending,
+item i1,
+item i2
+where asceding.rnk = descending.rnk 
+  and i1.i_item_sk=asceding.item_sk
+  and i2.i_item_sk=descending.item_sk
+order by asceding.rnk
+limit 100
+    '''
+}
diff --git a/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query45.groovy b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query45.groovy
new file mode 100644
index 00000000000000..90b08d167e9a14
--- /dev/null
+++ b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query45.groovy
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+suite("query45") {
+    String db = context.config.getDbNameByFile(new File(context.file.parent))
+    sql "use ${db}"
+    sql 'set enable_nereids_planner=true'
+    sql 'set enable_fallback_to_original_planner=false'
+    sql 'set exec_mem_limit=21G'
+    sql 'set be_number_for_test=3'
+    sql 'set parallel_fragment_exec_instance_num=8; '
+    sql 'set parallel_pipeline_task_num=8; '
+    sql 'set forbid_unknown_col_stats=true'
+    sql 'set enable_nereids_timeout = false'
+    sql 'set enable_runtime_filter_prune=false'
+    sql 'set runtime_filter_type=8'
+    sql 'set dump_nereids_memo=false'
+    sql 'set enable_bucket_shuffle_downgrade=true'
+    def ds = """select  ca_zip, ca_city, sum(ws_sales_price)
+ from web_sales, customer, customer_address, date_dim, item
+ where ws_bill_customer_sk = c_customer_sk
+ 	and c_current_addr_sk = ca_address_sk 
+ 	and ws_item_sk = i_item_sk 
+ 	and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792')
+ 	      or 
+ 	      i_item_id in (select i_item_id
+                             from item
+                             where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29)
+                             )
+ 	    )
+ 	and ws_sold_date_sk = d_date_sk
+ 	and d_qoy = 1 and d_year = 2000
+ group by ca_zip, ca_city
+ order by ca_zip, ca_city
+ limit 100"""
+    qt_ds_shape_45 '''
+    explain shape plan
+    select  ca_zip, ca_city, sum(ws_sales_price)
+ from web_sales, customer, customer_address, date_dim, item
+ where ws_bill_customer_sk = c_customer_sk
+ 	and c_current_addr_sk = ca_address_sk 
+ 	and ws_item_sk = i_item_sk 
+ 	and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792')
+ 	      or 
+ 	      i_item_id in (select i_item_id
+                             from item
+                             where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29)
+                             )
+ 	    )
+ 	and ws_sold_date_sk = d_date_sk
+ 	and d_qoy = 1 and d_year = 2000
+ group by ca_zip, ca_city
+ order by ca_zip, ca_city
+ limit 100
+    '''
+}
diff --git a/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query54.groovy b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query54.groovy
new file mode 100644
index 00000000000000..67cc77943d67b4
--- /dev/null
+++ b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query54.groovy
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+suite("query54") {
+    String db = context.config.getDbNameByFile(new File(context.file.parent))
+    sql "use ${db}"
+    sql 'set enable_nereids_planner=true'
+    sql 'set enable_fallback_to_original_planner=false'
+    sql 'set exec_mem_limit=21G'
+    sql 'set be_number_for_test=3'
+    sql 'set parallel_fragment_exec_instance_num=8; '
+    sql 'set parallel_pipeline_task_num=8; '
+    sql 'set forbid_unknown_col_stats=true'
+    sql 'set enable_nereids_timeout = false'
+    sql 'set enable_runtime_filter_prune=false'
+    sql 'set runtime_filter_type=8'
+    sql 'set dump_nereids_memo=false'
+    sql 'set enable_bucket_shuffle_downgrade=true'
+    def ds = """with my_customers as (
+ select distinct c_customer_sk
+        , c_current_addr_sk
+ from   
+        ( select cs_sold_date_sk sold_date_sk,
+                 cs_bill_customer_sk customer_sk,
+                 cs_item_sk item_sk
+          from   catalog_sales
+          union all
+          select ws_sold_date_sk sold_date_sk,
+                 ws_bill_customer_sk customer_sk,
+                 ws_item_sk item_sk
+          from   web_sales
+         ) cs_or_ws_sales,
+         item,
+         date_dim,
+         customer
+ where   sold_date_sk = d_date_sk
+         and item_sk = i_item_sk
+         and i_category = 'Music'
+         and i_class = 'country'
+         and c_customer_sk = cs_or_ws_sales.customer_sk
+         and d_moy = 1
+         and d_year = 1999
+ )
+ , my_revenue as (
+ select c_customer_sk,
+        sum(ss_ext_sales_price) as revenue
+ from   my_customers,
+        store_sales,
+        customer_address,
+        store,
+        date_dim
+ where  c_current_addr_sk = ca_address_sk
+        and ca_county = s_county
+        and ca_state = s_state
+        and ss_sold_date_sk = d_date_sk
+        and c_customer_sk = ss_customer_sk
+        and d_month_seq between (select distinct d_month_seq+1
+                                 from   date_dim where d_year = 1999 and d_moy = 1)
+                           and  (select distinct d_month_seq+3
+                                 from   date_dim where d_year = 1999 and d_moy = 1)
+ group by c_customer_sk
+ )
+ , segments as
+ (select cast((revenue/50) as int) as segment
+  from   my_revenue
+ )
+  select  segment, count(*) as num_customers, segment*50 as segment_base
+ from segments
+ group by segment
+ order by segment, num_customers
+ limit 100"""
+    qt_ds_shape_54 '''
+    explain shape plan
+    with my_customers as (
+ select distinct c_customer_sk
+        , c_current_addr_sk
+ from   
+        ( select cs_sold_date_sk sold_date_sk,
+                 cs_bill_customer_sk customer_sk,
+                 cs_item_sk item_sk
+          from   catalog_sales
+          union all
+          select ws_sold_date_sk sold_date_sk,
+                 ws_bill_customer_sk customer_sk,
+                 ws_item_sk item_sk
+          from   web_sales
+         ) cs_or_ws_sales,
+         item,
+         date_dim,
+         customer
+ where   sold_date_sk = d_date_sk
+         and item_sk = i_item_sk
+         and i_category = 'Music'
+         and i_class = 'country'
+         and c_customer_sk = cs_or_ws_sales.customer_sk
+         and d_moy = 1
+         and d_year = 1999
+ )
+ , my_revenue as (
+ select c_customer_sk,
+        sum(ss_ext_sales_price) as revenue
+ from   my_customers,
+        store_sales,
+        customer_address,
+        store,
+        date_dim
+ where  c_current_addr_sk = ca_address_sk
+        and ca_county = s_county
+        and ca_state = s_state
+        and ss_sold_date_sk = d_date_sk
+        and c_customer_sk = ss_customer_sk
+        and d_month_seq between (select distinct d_month_seq+1
+                                 from   date_dim where d_year = 1999 and d_moy = 1)
+                           and  (select distinct d_month_seq+3
+                                 from   date_dim where d_year = 1999 and d_moy = 1)
+ group by c_customer_sk
+ )
+ , segments as
+ (select cast((revenue/50) as int) as segment
+  from   my_revenue
+ )
+  select  segment, count(*) as num_customers, segment*50 as segment_base
+ from segments
+ group by segment
+ order by segment, num_customers
+ limit 100
+    '''
+}
diff --git a/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query56.groovy b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query56.groovy
new file mode 100644
index 00000000000000..8eb5ff81c8b9d8
--- /dev/null
+++ b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query56.groovy
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+suite("query56") {
+    String db = context.config.getDbNameByFile(new File(context.file.parent))
+    sql "use ${db}"
+    sql 'set enable_nereids_planner=true'
+    sql 'set enable_fallback_to_original_planner=false'
+    sql 'set exec_mem_limit=21G'
+    sql 'set be_number_for_test=3'
+    sql 'set parallel_fragment_exec_instance_num=8; '
+    sql 'set parallel_pipeline_task_num=8; '
+    sql 'set forbid_unknown_col_stats=true'
+    sql 'set enable_nereids_timeout = false'
+    sql 'set enable_runtime_filter_prune=false'
+    sql 'set runtime_filter_type=8'
+    sql 'set dump_nereids_memo=false'
+    sql 'set enable_bucket_shuffle_downgrade=true'
+    def ds = """with ss as (
+ select i_item_id,sum(ss_ext_sales_price) total_sales
+ from
+ 	store_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where i_item_id in (select
+     i_item_id
+from item
+where i_color in ('powder','orchid','pink'))
+ and     ss_item_sk              = i_item_sk
+ and     ss_sold_date_sk         = d_date_sk
+ and     d_year                  = 2000
+ and     d_moy                   = 3
+ and     ss_addr_sk              = ca_address_sk
+ and     ca_gmt_offset           = -6 
+ group by i_item_id),
+ cs as (
+ select i_item_id,sum(cs_ext_sales_price) total_sales
+ from
+ 	catalog_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where
+         i_item_id               in (select
+  i_item_id
+from item
+where i_color in ('powder','orchid','pink'))
+ and     cs_item_sk              = i_item_sk
+ and     cs_sold_date_sk         = d_date_sk
+ and     d_year                  = 2000
+ and     d_moy                   = 3
+ and     cs_bill_addr_sk         = ca_address_sk
+ and     ca_gmt_offset           = -6 
+ group by i_item_id),
+ ws as (
+ select i_item_id,sum(ws_ext_sales_price) total_sales
+ from
+ 	web_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where
+         i_item_id               in (select
+  i_item_id
+from item
+where i_color in ('powder','orchid','pink'))
+ and     ws_item_sk              = i_item_sk
+ and     ws_sold_date_sk         = d_date_sk
+ and     d_year                  = 2000
+ and     d_moy                   = 3
+ and     ws_bill_addr_sk         = ca_address_sk
+ and     ca_gmt_offset           = -6
+ group by i_item_id)
+  select  i_item_id ,sum(total_sales) total_sales
+ from  (select * from ss 
+        union all
+        select * from cs 
+        union all
+        select * from ws) tmp1
+ group by i_item_id
+ order by total_sales,
+          i_item_id
+ limit 100"""
+    qt_ds_shape_56 '''
+    explain shape plan
+    with ss as (
+ select i_item_id,sum(ss_ext_sales_price) total_sales
+ from
+ 	store_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where i_item_id in (select
+     i_item_id
+from item
+where i_color in ('powder','orchid','pink'))
+ and     ss_item_sk              = i_item_sk
+ and     ss_sold_date_sk         = d_date_sk
+ and     d_year                  = 2000
+ and     d_moy                   = 3
+ and     ss_addr_sk              = ca_address_sk
+ and     ca_gmt_offset           = -6 
+ group by i_item_id),
+ cs as (
+ select i_item_id,sum(cs_ext_sales_price) total_sales
+ from
+ 	catalog_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where
+         i_item_id               in (select
+  i_item_id
+from item
+where i_color in ('powder','orchid','pink'))
+ and     cs_item_sk              = i_item_sk
+ and     cs_sold_date_sk         = d_date_sk
+ and     d_year                  = 2000
+ and     d_moy                   = 3
+ and     cs_bill_addr_sk         = ca_address_sk
+ and     ca_gmt_offset           = -6 
+ group by i_item_id),
+ ws as (
+ select i_item_id,sum(ws_ext_sales_price) total_sales
+ from
+ 	web_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where
+         i_item_id               in (select
+  i_item_id
+from item
+where i_color in ('powder','orchid','pink'))
+ and     ws_item_sk              = i_item_sk
+ and     ws_sold_date_sk         = d_date_sk
+ and     d_year                  = 2000
+ and     d_moy                   = 3
+ and     ws_bill_addr_sk         = ca_address_sk
+ and     ca_gmt_offset           = -6
+ group by i_item_id)
+  select  i_item_id ,sum(total_sales) total_sales
+ from  (select * from ss 
+        union all
+        select * from cs 
+        union all
+        select * from ws) tmp1
+ group by i_item_id
+ order by total_sales,
+          i_item_id
+ limit 100
+    '''
+}
diff --git a/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query6.groovy b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query6.groovy
new file mode 100644
index 00000000000000..8a4e9fc33be442
--- /dev/null
+++ b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query6.groovy
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+suite("query6") {
+    String db = context.config.getDbNameByFile(new File(context.file.parent))
+    sql "use ${db}"
+    sql 'set enable_nereids_planner=true'
+    sql 'set enable_fallback_to_original_planner=false'
+    sql 'set exec_mem_limit=21G'
+    sql 'set be_number_for_test=3'
+    sql 'set parallel_fragment_exec_instance_num=8; '
+    sql 'set parallel_pipeline_task_num=8; '
+    sql 'set forbid_unknown_col_stats=true'
+    sql 'set enable_nereids_timeout = false'
+    sql 'set enable_runtime_filter_prune=false'
+    sql 'set runtime_filter_type=8'
+    sql 'set dump_nereids_memo=false'
+    sql 'set enable_bucket_shuffle_downgrade=true'
+    def ds = """select  a.ca_state state, count(*) cnt
+ from customer_address a
+     ,customer c
+     ,store_sales s
+     ,date_dim d
+     ,item i
+ where       a.ca_address_sk = c.c_current_addr_sk
+ 	and c.c_customer_sk = s.ss_customer_sk
+ 	and s.ss_sold_date_sk = d.d_date_sk
+ 	and s.ss_item_sk = i.i_item_sk
+ 	and d.d_month_seq = 
+ 	     (select distinct (d_month_seq)
+ 	      from date_dim
+               where d_year = 2002
+ 	        and d_moy = 3 )
+ 	and i.i_current_price > 1.2 * 
+             (select avg(j.i_current_price) 
+ 	     from item j 
+ 	     where j.i_category = i.i_category)
+ group by a.ca_state
+ having count(*) >= 10
+ order by cnt, a.ca_state 
+ limit 100"""
+    qt_ds_shape_6 '''
+    explain shape plan
+    select  a.ca_state state, count(*) cnt
+ from customer_address a
+     ,customer c
+     ,store_sales s
+     ,date_dim d
+     ,item i
+ where       a.ca_address_sk = c.c_current_addr_sk
+ 	and c.c_customer_sk = s.ss_customer_sk
+ 	and s.ss_sold_date_sk = d.d_date_sk
+ 	and s.ss_item_sk = i.i_item_sk
+ 	and d.d_month_seq = 
+ 	     (select distinct (d_month_seq)
+ 	      from date_dim
+               where d_year = 2002
+ 	        and d_moy = 3 )
+ 	and i.i_current_price > 1.2 * 
+             (select avg(j.i_current_price) 
+ 	     from item j 
+ 	     where j.i_category = i.i_category)
+ group by a.ca_state
+ having count(*) >= 10
+ order by cnt, a.ca_state 
+ limit 100
+    '''
+}
diff --git a/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query61.groovy b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query61.groovy
new file mode 100644
index 00000000000000..afc92b3a6d688b
--- /dev/null
+++ b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query61.groovy
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+suite("query61") {
+    String db = context.config.getDbNameByFile(new File(context.file.parent))
+    sql "use ${db}"
+    sql 'set enable_nereids_planner=true'
+    sql 'set enable_fallback_to_original_planner=false'
+    sql 'set exec_mem_limit=21G'
+    sql 'set be_number_for_test=3'
+    sql 'set parallel_fragment_exec_instance_num=8; '
+    sql 'set parallel_pipeline_task_num=8; '
+    sql 'set forbid_unknown_col_stats=true'
+    sql 'set enable_nereids_timeout = false'
+    sql 'set enable_runtime_filter_prune=false'
+    sql 'set runtime_filter_type=8'
+    sql 'set dump_nereids_memo=false'
+    sql 'set enable_bucket_shuffle_downgrade=true'
+    def ds = """select  promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100
+from
+  (select sum(ss_ext_sales_price) promotions
+   from  store_sales
+        ,store
+        ,promotion
+        ,date_dim
+        ,customer
+        ,customer_address 
+        ,item
+   where ss_sold_date_sk = d_date_sk
+   and   ss_store_sk = s_store_sk
+   and   ss_promo_sk = p_promo_sk
+   and   ss_customer_sk= c_customer_sk
+   and   ca_address_sk = c_current_addr_sk
+   and   ss_item_sk = i_item_sk 
+   and   ca_gmt_offset = -7
+   and   i_category = 'Home'
+   and   (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y')
+   and   s_gmt_offset = -7
+   and   d_year = 2000
+   and   d_moy  = 12) promotional_sales,
+  (select sum(ss_ext_sales_price) total
+   from  store_sales
+        ,store
+        ,date_dim
+        ,customer
+        ,customer_address
+        ,item
+   where ss_sold_date_sk = d_date_sk
+   and   ss_store_sk = s_store_sk
+   and   ss_customer_sk= c_customer_sk
+   and   ca_address_sk = c_current_addr_sk
+   and   ss_item_sk = i_item_sk
+   and   ca_gmt_offset = -7
+   and   i_category = 'Home'
+   and   s_gmt_offset = -7
+   and   d_year = 2000
+   and   d_moy  = 12) all_sales
+order by promotions, total
+limit 100"""
+    qt_ds_shape_61 '''
+    explain shape plan
+    select  promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100
+from
+  (select sum(ss_ext_sales_price) promotions
+   from  store_sales
+        ,store
+        ,promotion
+        ,date_dim
+        ,customer
+        ,customer_address 
+        ,item
+   where ss_sold_date_sk = d_date_sk
+   and   ss_store_sk = s_store_sk
+   and   ss_promo_sk = p_promo_sk
+   and   ss_customer_sk= c_customer_sk
+   and   ca_address_sk = c_current_addr_sk
+   and   ss_item_sk = i_item_sk 
+   and   ca_gmt_offset = -7
+   and   i_category = 'Home'
+   and   (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y')
+   and   s_gmt_offset = -7
+   and   d_year = 2000
+   and   d_moy  = 12) promotional_sales,
+  (select sum(ss_ext_sales_price) total
+   from  store_sales
+        ,store
+        ,date_dim
+        ,customer
+        ,customer_address
+        ,item
+   where ss_sold_date_sk = d_date_sk
+   and   ss_store_sk = s_store_sk
+   and   ss_customer_sk= c_customer_sk
+   and   ca_address_sk = c_current_addr_sk
+   and   ss_item_sk = i_item_sk
+   and   ca_gmt_offset = -7
+   and   i_category = 'Home'
+   and   s_gmt_offset = -7
+   and   d_year = 2000
+   and   d_moy  = 12) all_sales
+order by promotions, total
+limit 100
+    '''
+}
diff --git a/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query68.groovy b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query68.groovy
new file mode 100644
index 00000000000000..a9d1967d203c9a
--- /dev/null
+++ b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query68.groovy
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+suite("query68") {
+    String db = context.config.getDbNameByFile(new File(context.file.parent))
+    sql "use ${db}"
+    sql 'set enable_nereids_planner=true'
+    sql 'set enable_fallback_to_original_planner=false'
+    sql 'set exec_mem_limit=21G'
+    sql 'set be_number_for_test=3'
+    sql 'set parallel_fragment_exec_instance_num=8; '
+    sql 'set parallel_pipeline_task_num=8; '
+    sql 'set forbid_unknown_col_stats=true'
+    sql 'set enable_nereids_timeout = false'
+    sql 'set enable_runtime_filter_prune=false'
+    sql 'set runtime_filter_type=8'
+    sql 'set dump_nereids_memo=false'
+    sql 'set enable_bucket_shuffle_downgrade=true'
+    def ds = """select  c_last_name
+       ,c_first_name
+       ,ca_city
+       ,bought_city
+       ,ss_ticket_number
+       ,extended_price
+       ,extended_tax
+       ,list_price
+ from (select ss_ticket_number
+             ,ss_customer_sk
+             ,ca_city bought_city
+             ,sum(ss_ext_sales_price) extended_price 
+             ,sum(ss_ext_list_price) list_price
+             ,sum(ss_ext_tax) extended_tax 
+       from store_sales
+           ,date_dim
+           ,store
+           ,household_demographics
+           ,customer_address 
+       where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+         and store_sales.ss_store_sk = store.s_store_sk  
+        and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+        and store_sales.ss_addr_sk = customer_address.ca_address_sk
+        and date_dim.d_dom between 1 and 2 
+        and (household_demographics.hd_dep_count = 3 or
+             household_demographics.hd_vehicle_count= 4)
+        and date_dim.d_year in (1998,1998+1,1998+2)
+        and store.s_city in ('Fairview','Midway')
+       group by ss_ticket_number
+               ,ss_customer_sk
+               ,ss_addr_sk,ca_city) dn
+      ,customer
+      ,customer_address current_addr
+ where ss_customer_sk = c_customer_sk
+   and customer.c_current_addr_sk = current_addr.ca_address_sk
+   and current_addr.ca_city <> bought_city
+ order by c_last_name
+         ,ss_ticket_number
+ limit 100"""
+    qt_ds_shape_68 '''
+    explain shape plan
+    select  c_last_name
+       ,c_first_name
+       ,ca_city
+       ,bought_city
+       ,ss_ticket_number
+       ,extended_price
+       ,extended_tax
+       ,list_price
+ from (select ss_ticket_number
+             ,ss_customer_sk
+             ,ca_city bought_city
+             ,sum(ss_ext_sales_price) extended_price 
+             ,sum(ss_ext_list_price) list_price
+             ,sum(ss_ext_tax) extended_tax 
+       from store_sales
+           ,date_dim
+           ,store
+           ,household_demographics
+           ,customer_address 
+       where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+         and store_sales.ss_store_sk = store.s_store_sk  
+        and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+        and store_sales.ss_addr_sk = customer_address.ca_address_sk
+        and date_dim.d_dom between 1 and 2 
+        and (household_demographics.hd_dep_count = 3 or
+             household_demographics.hd_vehicle_count= 4)
+        and date_dim.d_year in (1998,1998+1,1998+2)
+        and store.s_city in ('Fairview','Midway')
+       group by ss_ticket_number
+               ,ss_customer_sk
+               ,ss_addr_sk,ca_city) dn
+      ,customer
+      ,customer_address current_addr
+ where ss_customer_sk = c_customer_sk
+   and customer.c_current_addr_sk = current_addr.ca_address_sk
+   and current_addr.ca_city <> bought_city
+ order by c_last_name
+         ,ss_ticket_number
+ limit 100
+    '''
+}
diff --git a/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query8.groovy b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query8.groovy
new file mode 100644
index 00000000000000..98a6e694c196ff
--- /dev/null
+++ b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query8.groovy
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+suite("query8") {
+    String db = context.config.getDbNameByFile(new File(context.file.parent))
+    sql "use ${db}"
+    sql 'set enable_nereids_planner=true'
+    sql 'set enable_fallback_to_original_planner=false'
+    sql 'set exec_mem_limit=21G'
+    sql 'set be_number_for_test=3'
+    sql 'set parallel_fragment_exec_instance_num=8; '
+    sql 'set parallel_pipeline_task_num=8; '
+    sql 'set forbid_unknown_col_stats=true'
+    sql 'set enable_nereids_timeout = false'
+    sql 'set enable_runtime_filter_prune=false'
+    sql 'set runtime_filter_type=8'
+    sql 'set dump_nereids_memo=false'
+    sql 'set enable_bucket_shuffle_downgrade=true'
+    def ds = """select  s_store_name
+      ,sum(ss_net_profit)
+ from store_sales
+     ,date_dim
+     ,store,
+     (select ca_zip
+     from (
+      SELECT substr(ca_zip,1,5) ca_zip
+      FROM customer_address
+      WHERE substr(ca_zip,1,5) IN (
+                          '47602','16704','35863','28577','83910','36201',
+                          '58412','48162','28055','41419','80332',
+                          '38607','77817','24891','16226','18410',
+                          '21231','59345','13918','51089','20317',
+                          '17167','54585','67881','78366','47770',
+                          '18360','51717','73108','14440','21800',
+                          '89338','45859','65501','34948','25973',
+                          '73219','25333','17291','10374','18829',
+                          '60736','82620','41351','52094','19326',
+                          '25214','54207','40936','21814','79077',
+                          '25178','75742','77454','30621','89193',
+                          '27369','41232','48567','83041','71948',
+                          '37119','68341','14073','16891','62878',
+                          '49130','19833','24286','27700','40979',
+                          '50412','81504','94835','84844','71954',
+                          '39503','57649','18434','24987','12350',
+                          '86379','27413','44529','98569','16515',
+                          '27287','24255','21094','16005','56436',
+                          '91110','68293','56455','54558','10298',
+                          '83647','32754','27052','51766','19444',
+                          '13869','45645','94791','57631','20712',
+                          '37788','41807','46507','21727','71836',
+                          '81070','50632','88086','63991','20244',
+                          '31655','51782','29818','63792','68605',
+                          '94898','36430','57025','20601','82080',
+                          '33869','22728','35834','29086','92645',
+                          '98584','98072','11652','78093','57553',
+                          '43830','71144','53565','18700','90209',
+                          '71256','38353','54364','28571','96560',
+                          '57839','56355','50679','45266','84680',
+                          '34306','34972','48530','30106','15371',
+                          '92380','84247','92292','68852','13338',
+                          '34594','82602','70073','98069','85066',
+                          '47289','11686','98862','26217','47529',
+                          '63294','51793','35926','24227','14196',
+                          '24594','32489','99060','49472','43432',
+                          '49211','14312','88137','47369','56877',
+                          '20534','81755','15794','12318','21060',
+                          '73134','41255','63073','81003','73873',
+                          '66057','51184','51195','45676','92696',
+                          '70450','90669','98338','25264','38919',
+                          '59226','58581','60298','17895','19489',
+                          '52301','80846','95464','68770','51634',
+                          '19988','18367','18421','11618','67975',
+                          '25494','41352','95430','15734','62585',
+                          '97173','33773','10425','75675','53535',
+                          '17879','41967','12197','67998','79658',
+                          '59130','72592','14851','43933','68101',
+                          '50636','25717','71286','24660','58058',
+                          '72991','95042','15543','33122','69280',
+                          '11912','59386','27642','65177','17672',
+                          '33467','64592','36335','54010','18767',
+                          '63193','42361','49254','33113','33159',
+                          '36479','59080','11855','81963','31016',
+                          '49140','29392','41836','32958','53163',
+                          '13844','73146','23952','65148','93498',
+                          '14530','46131','58454','13376','13378',
+                          '83986','12320','17193','59852','46081',
+                          '98533','52389','13086','68843','31013',
+                          '13261','60560','13443','45533','83583',
+                          '11489','58218','19753','22911','25115',
+                          '86709','27156','32669','13123','51933',
+                          '39214','41331','66943','14155','69998',
+                          '49101','70070','35076','14242','73021',
+                          '59494','15782','29752','37914','74686',
+                          '83086','34473','15751','81084','49230',
+                          '91894','60624','17819','28810','63180',
+                          '56224','39459','55233','75752','43639',
+                          '55349','86057','62361','50788','31830',
+                          '58062','18218','85761','60083','45484',
+                          '21204','90229','70041','41162','35390',
+                          '16364','39500','68908','26689','52868',
+                          '81335','40146','11340','61527','61794',
+                          '71997','30415','59004','29450','58117',
+                          '69952','33562','83833','27385','61860',
+                          '96435','48333','23065','32961','84919',
+                          '61997','99132','22815','56600','68730',
+                          '48017','95694','32919','88217','27116',
+                          '28239','58032','18884','16791','21343',
+                          '97462','18569','75660','15475')
+     intersect
+      select ca_zip
+      from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt
+            FROM customer_address, customer
+            WHERE ca_address_sk = c_current_addr_sk and
+                  c_preferred_cust_flag='Y'
+            group by ca_zip
+            having count(*) > 10)A1)A2) V1
+ where ss_store_sk = s_store_sk
+  and ss_sold_date_sk = d_date_sk
+  and d_qoy = 2 and d_year = 1998
+  and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2))
+ group by s_store_name
+ order by s_store_name
+ limit 100"""
+    qt_ds_shape_8 '''
+    explain shape plan
+    select  s_store_name
+      ,sum(ss_net_profit)
+ from store_sales
+     ,date_dim
+     ,store,
+     (select ca_zip
+     from (
+      SELECT substr(ca_zip,1,5) ca_zip
+      FROM customer_address
+      WHERE substr(ca_zip,1,5) IN (
+                          '47602','16704','35863','28577','83910','36201',
+                          '58412','48162','28055','41419','80332',
+                          '38607','77817','24891','16226','18410',
+                          '21231','59345','13918','51089','20317',
+                          '17167','54585','67881','78366','47770',
+                          '18360','51717','73108','14440','21800',
+                          '89338','45859','65501','34948','25973',
+                          '73219','25333','17291','10374','18829',
+                          '60736','82620','41351','52094','19326',
+                          '25214','54207','40936','21814','79077',
+                          '25178','75742','77454','30621','89193',
+                          '27369','41232','48567','83041','71948',
+                          '37119','68341','14073','16891','62878',
+                          '49130','19833','24286','27700','40979',
+                          '50412','81504','94835','84844','71954',
+                          '39503','57649','18434','24987','12350',
+                          '86379','27413','44529','98569','16515',
+                          '27287','24255','21094','16005','56436',
+                          '91110','68293','56455','54558','10298',
+                          '83647','32754','27052','51766','19444',
+                          '13869','45645','94791','57631','20712',
+                          '37788','41807','46507','21727','71836',
+                          '81070','50632','88086','63991','20244',
+                          '31655','51782','29818','63792','68605',
+                          '94898','36430','57025','20601','82080',
+                          '33869','22728','35834','29086','92645',
+                          '98584','98072','11652','78093','57553',
+                          '43830','71144','53565','18700','90209',
+                          '71256','38353','54364','28571','96560',
+                          '57839','56355','50679','45266','84680',
+                          '34306','34972','48530','30106','15371',
+                          '92380','84247','92292','68852','13338',
+                          '34594','82602','70073','98069','85066',
+                          '47289','11686','98862','26217','47529',
+                          '63294','51793','35926','24227','14196',
+                          '24594','32489','99060','49472','43432',
+                          '49211','14312','88137','47369','56877',
+                          '20534','81755','15794','12318','21060',
+                          '73134','41255','63073','81003','73873',
+                          '66057','51184','51195','45676','92696',
+                          '70450','90669','98338','25264','38919',
+                          '59226','58581','60298','17895','19489',
+                          '52301','80846','95464','68770','51634',
+                          '19988','18367','18421','11618','67975',
+                          '25494','41352','95430','15734','62585',
+                          '97173','33773','10425','75675','53535',
+                          '17879','41967','12197','67998','79658',
+                          '59130','72592','14851','43933','68101',
+                          '50636','25717','71286','24660','58058',
+                          '72991','95042','15543','33122','69280',
+                          '11912','59386','27642','65177','17672',
+                          '33467','64592','36335','54010','18767',
+                          '63193','42361','49254','33113','33159',
+                          '36479','59080','11855','81963','31016',
+                          '49140','29392','41836','32958','53163',
+                          '13844','73146','23952','65148','93498',
+                          '14530','46131','58454','13376','13378',
+                          '83986','12320','17193','59852','46081',
+                          '98533','52389','13086','68843','31013',
+                          '13261','60560','13443','45533','83583',
+                          '11489','58218','19753','22911','25115',
+                          '86709','27156','32669','13123','51933',
+                          '39214','41331','66943','14155','69998',
+                          '49101','70070','35076','14242','73021',
+                          '59494','15782','29752','37914','74686',
+                          '83086','34473','15751','81084','49230',
+                          '91894','60624','17819','28810','63180',
+                          '56224','39459','55233','75752','43639',
+                          '55349','86057','62361','50788','31830',
+                          '58062','18218','85761','60083','45484',
+                          '21204','90229','70041','41162','35390',
+                          '16364','39500','68908','26689','52868',
+                          '81335','40146','11340','61527','61794',
+                          '71997','30415','59004','29450','58117',
+                          '69952','33562','83833','27385','61860',
+                          '96435','48333','23065','32961','84919',
+                          '61997','99132','22815','56600','68730',
+                          '48017','95694','32919','88217','27116',
+                          '28239','58032','18884','16791','21343',
+                          '97462','18569','75660','15475')
+     intersect
+      select ca_zip
+      from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt
+            FROM customer_address, customer
+            WHERE ca_address_sk = c_current_addr_sk and
+                  c_preferred_cust_flag='Y'
+            group by ca_zip
+            having count(*) > 10)A1)A2) V1
+ where ss_store_sk = s_store_sk
+  and ss_sold_date_sk = d_date_sk
+  and d_qoy = 2 and d_year = 1998
+  and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2))
+ group by s_store_name
+ order by s_store_name
+ limit 100
+    '''
+}
diff --git a/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query91.groovy b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query91.groovy
new file mode 100644
index 00000000000000..9fd084f69bebbc
--- /dev/null
+++ b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query91.groovy
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+suite("query91") {
+    String db = context.config.getDbNameByFile(new File(context.file.parent))
+    sql "use ${db}"
+    sql 'set enable_nereids_planner=true'
+    sql 'set enable_fallback_to_original_planner=false'
+    sql 'set exec_mem_limit=21G'
+    sql 'set be_number_for_test=3'
+    sql 'set parallel_fragment_exec_instance_num=8; '
+    sql 'set parallel_pipeline_task_num=8; '
+    sql 'set forbid_unknown_col_stats=true'
+    sql 'set enable_nereids_timeout = false'
+    sql 'set enable_runtime_filter_prune=false'
+    sql 'set runtime_filter_type=8'
+    sql 'set dump_nereids_memo=false'
+    sql 'set enable_bucket_shuffle_downgrade=true'
+    def ds = """select  
+        cc_call_center_id Call_Center,
+        cc_name Call_Center_Name,
+        cc_manager Manager,
+        sum(cr_net_loss) Returns_Loss
+from
+        call_center,
+        catalog_returns,
+        date_dim,
+        customer,
+        customer_address,
+        customer_demographics,
+        household_demographics
+where
+        cr_call_center_sk       = cc_call_center_sk
+and     cr_returned_date_sk     = d_date_sk
+and     cr_returning_customer_sk= c_customer_sk
+and     cd_demo_sk              = c_current_cdemo_sk
+and     hd_demo_sk              = c_current_hdemo_sk
+and     ca_address_sk           = c_current_addr_sk
+and     d_year                  = 2000 
+and     d_moy                   = 12
+and     ( (cd_marital_status       = 'M' and cd_education_status     = 'Unknown')
+        or(cd_marital_status       = 'W' and cd_education_status     = 'Advanced Degree'))
+and     hd_buy_potential like 'Unknown%'
+and     ca_gmt_offset           = -7
+group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status
+order by sum(cr_net_loss) desc"""
+    qt_ds_shape_91 '''
+    explain shape plan
+    select  
+        cc_call_center_id Call_Center,
+        cc_name Call_Center_Name,
+        cc_manager Manager,
+        sum(cr_net_loss) Returns_Loss
+from
+        call_center,
+        catalog_returns,
+        date_dim,
+        customer,
+        customer_address,
+        customer_demographics,
+        household_demographics
+where
+        cr_call_center_sk       = cc_call_center_sk
+and     cr_returned_date_sk     = d_date_sk
+and     cr_returning_customer_sk= c_customer_sk
+and     cd_demo_sk              = c_current_cdemo_sk
+and     hd_demo_sk              = c_current_hdemo_sk
+and     ca_address_sk           = c_current_addr_sk
+and     d_year                  = 2000 
+and     d_moy                   = 12
+and     ( (cd_marital_status       = 'M' and cd_education_status     = 'Unknown')
+        or(cd_marital_status       = 'W' and cd_education_status     = 'Advanced Degree'))
+and     hd_buy_potential like 'Unknown%'
+and     ca_gmt_offset           = -7
+group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status
+order by sum(cr_net_loss) desc
+    '''
+}
diff --git a/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query95.groovy b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query95.groovy
new file mode 100644
index 00000000000000..2837fea540febf
--- /dev/null
+++ b/regression-test/suites/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query95.groovy
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+suite("query95") {
+    String db = context.config.getDbNameByFile(new File(context.file.parent))
+    sql "use ${db}"
+    sql 'set enable_nereids_planner=true'
+    sql 'set enable_fallback_to_original_planner=false'
+    sql 'set exec_mem_limit=21G'
+    sql 'set be_number_for_test=3'
+    sql 'set parallel_fragment_exec_instance_num=8; '
+    sql 'set parallel_pipeline_task_num=8; '
+    sql 'set forbid_unknown_col_stats=true'
+    sql 'set enable_nereids_timeout = false'
+    sql 'set enable_runtime_filter_prune=false'
+    sql 'set runtime_filter_type=8'
+    sql 'set dump_nereids_memo=false'
+    sql 'set enable_bucket_shuffle_downgrade=true'
+    def ds = """with ws_wh as
+(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2
+ from web_sales ws1,web_sales ws2
+ where ws1.ws_order_number = ws2.ws_order_number
+   and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
+ select  
+   count(distinct ws_order_number) as "order count"
+  ,sum(ws_ext_ship_cost) as "total shipping cost"
+  ,sum(ws_net_profit) as "total net profit"
+from
+   web_sales ws1
+  ,date_dim
+  ,customer_address
+  ,web_site
+where
+    d_date between '2001-4-01' and 
+           (cast('2001-4-01' as date) + interval 60 day)
+and ws1.ws_ship_date_sk = d_date_sk
+and ws1.ws_ship_addr_sk = ca_address_sk
+and ca_state = 'VA'
+and ws1.ws_web_site_sk = web_site_sk
+and web_company_name = 'pri'
+and ws1.ws_order_number in (select ws_order_number
+                            from ws_wh)
+and ws1.ws_order_number in (select wr_order_number
+                            from web_returns,ws_wh
+                            where wr_order_number = ws_wh.ws_order_number)
+order by count(distinct ws_order_number)
+limit 100"""
+    qt_ds_shape_95 '''
+    explain shape plan
+    with ws_wh as
+(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2
+ from web_sales ws1,web_sales ws2
+ where ws1.ws_order_number = ws2.ws_order_number
+   and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
+ select  
+   count(distinct ws_order_number) as "order count"
+  ,sum(ws_ext_ship_cost) as "total shipping cost"
+  ,sum(ws_net_profit) as "total net profit"
+from
+   web_sales ws1
+  ,date_dim
+  ,customer_address
+  ,web_site
+where
+    d_date between '2001-4-01' and 
+           (cast('2001-4-01' as date) + interval 60 day)
+and ws1.ws_ship_date_sk = d_date_sk
+and ws1.ws_ship_addr_sk = ca_address_sk
+and ca_state = 'VA'
+and ws1.ws_web_site_sk = web_site_sk
+and web_company_name = 'pri'
+and ws1.ws_order_number in (select ws_order_number
+                            from ws_wh)
+and ws1.ws_order_number in (select wr_order_number
+                            from web_returns,ws_wh
+                            where wr_order_number = ws_wh.ws_order_number)
+order by count(distinct ws_order_number)
+limit 100
+    '''
+}

From ee4196d9d23c252fc35a287c04adf9d705e7637f Mon Sep 17 00:00:00 2001
From: Gabriel <gabrielleebuaa@gmail.com>
Date: Fri, 26 Apr 2024 18:31:11 +0800
Subject: [PATCH 056/163] [Improvement](agg) Improve count distinct distribute
 keys (#33167)

---
 .../aggregate_function_simple_factory.cpp     |   2 +
 .../aggregate_function_uniq.h                 |   2 +-
 ...aggregate_function_uniq_distribute_key.cpp |  73 +++++
 .../aggregate_function_uniq_distribute_key.h  | 253 ++++++++++++++++++
 4 files changed, 329 insertions(+), 1 deletion(-)
 create mode 100644 be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.cpp
 create mode 100644 be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.h

diff --git a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
index 00597b212befd0..d95d0ce6ccb90d 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
@@ -40,6 +40,7 @@ void register_aggregate_function_count(AggregateFunctionSimpleFactory& factory);
 void register_aggregate_function_count_by_enum(AggregateFunctionSimpleFactory& factory);
 void register_aggregate_function_HLL_union_agg(AggregateFunctionSimpleFactory& factory);
 void register_aggregate_function_uniq(AggregateFunctionSimpleFactory& factory);
+void register_aggregate_function_uniq_distribute_key(AggregateFunctionSimpleFactory& factory);
 void register_aggregate_function_bit(AggregateFunctionSimpleFactory& factory);
 void register_aggregate_function_bitmap(AggregateFunctionSimpleFactory& factory);
 void register_aggregate_function_quantile_state(AggregateFunctionSimpleFactory& factory);
@@ -80,6 +81,7 @@ AggregateFunctionSimpleFactory& AggregateFunctionSimpleFactory::instance() {
         register_aggregate_function_count(instance);
         register_aggregate_function_count_by_enum(instance);
         register_aggregate_function_uniq(instance);
+        register_aggregate_function_uniq_distribute_key(instance);
         register_aggregate_function_bit(instance);
         register_aggregate_function_bitmap(instance);
         register_aggregate_function_group_array_intersect(instance);
diff --git a/be/src/vec/aggregate_functions/aggregate_function_uniq.h b/be/src/vec/aggregate_functions/aggregate_function_uniq.h
index 2e8855134ebd31..58abd3842c21b2 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_uniq.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_uniq.h
@@ -75,7 +75,7 @@ struct AggregateFunctionUniqExactData {
 
     Set set;
 
-    static String get_name() { return "uniqExact"; }
+    static String get_name() { return "multi_distinct"; }
 };
 
 namespace detail {
diff --git a/be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.cpp b/be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.cpp
new file mode 100644
index 00000000000000..3bf979483b527c
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.cpp
@@ -0,0 +1,73 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/aggregate_functions/aggregate_function_uniq_distribute_key.h"
+
+#include <string>
+
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/aggregate_functions/factory_helpers.h"
+#include "vec/aggregate_functions/helpers.h"
+
+namespace doris::vectorized {
+
+template <template <typename> class Data>
+AggregateFunctionPtr create_aggregate_function_uniq(const std::string& name,
+                                                    const DataTypes& argument_types,
+                                                    const bool result_is_nullable) {
+    if (argument_types.size() == 1) {
+        const IDataType& argument_type = *remove_nullable(argument_types[0]);
+        WhichDataType which(argument_type);
+
+        AggregateFunctionPtr res(
+                creator_with_numeric_type::create<AggregateFunctionUniqDistributeKey, Data>(
+                        argument_types, result_is_nullable));
+        if (res) {
+            return res;
+        } else if (which.is_decimal32()) {
+            return creator_without_type::create<
+                    AggregateFunctionUniqDistributeKey<Decimal32, Data<Int32>>>(argument_types,
+                                                                                result_is_nullable);
+        } else if (which.is_decimal64()) {
+            return creator_without_type::create<
+                    AggregateFunctionUniqDistributeKey<Decimal64, Data<Int64>>>(argument_types,
+                                                                                result_is_nullable);
+        } else if (which.is_decimal128v3()) {
+            return creator_without_type::create<
+                    AggregateFunctionUniqDistributeKey<Decimal128V3, Data<Int128>>>(
+                    argument_types, result_is_nullable);
+        } else if (which.is_decimal128v2() || which.is_decimal128v3()) {
+            return creator_without_type::create<
+                    AggregateFunctionUniqDistributeKey<Decimal128V2, Data<Int128>>>(
+                    argument_types, result_is_nullable);
+        } else if (which.is_string_or_fixed_string()) {
+            return creator_without_type::create<
+                    AggregateFunctionUniqDistributeKey<String, Data<String>>>(argument_types,
+                                                                              result_is_nullable);
+        }
+    }
+
+    return nullptr;
+}
+
+void register_aggregate_function_uniq_distribute_key(AggregateFunctionSimpleFactory& factory) {
+    AggregateFunctionCreator creator =
+            create_aggregate_function_uniq<AggregateFunctionUniqDistributeKeyData>;
+    factory.register_function_both("multi_distinct_count_distribute_key", creator);
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.h b/be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.h
new file mode 100644
index 00000000000000..0fa66e3423041b
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.h
@@ -0,0 +1,253 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionUniq.h
+// and modified by Doris
+
+#pragma once
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <boost/iterator/iterator_facade.hpp>
+#include <memory>
+#include <vector>
+
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/aggregate_functions/aggregate_function_uniq.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_fixed_length_object.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_vector.h"
+#include "vec/columns/columns_number.h"
+#include "vec/common/assert_cast.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_fixed_length_object.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/io/var_int.h"
+
+namespace doris {
+namespace vectorized {
+class Arena;
+class BufferReadable;
+class BufferWritable;
+} // namespace vectorized
+} // namespace doris
+template <typename T>
+struct HashCRC32;
+namespace doris::vectorized {
+
+template <typename T>
+struct AggregateFunctionUniqDistributeKeyData {
+    static constexpr bool is_string_key = std::is_same_v<T, String>;
+    using Key = std::conditional_t<is_string_key, UInt128, T>;
+    using Hash = std::conditional_t<is_string_key, UInt128TrivialHash, HashCRC32<Key>>;
+
+    using Set = flat_hash_set<Key, Hash>;
+
+    // TODO: replace SipHash with xxhash to speed up
+    static UInt128 ALWAYS_INLINE get_key(const StringRef& value) {
+        auto hash_value = XXH_INLINE_XXH128(value.data, value.size, 0);
+        return UInt128 {hash_value.high64, hash_value.low64};
+    }
+
+    Set set;
+    UInt64 count = 0;
+};
+
+template <typename T, typename Data>
+class AggregateFunctionUniqDistributeKey final
+        : public IAggregateFunctionDataHelper<Data, AggregateFunctionUniqDistributeKey<T, Data>> {
+public:
+    using KeyType = std::conditional_t<std::is_same_v<T, String>, UInt128, T>;
+    AggregateFunctionUniqDistributeKey(const DataTypes& argument_types_)
+            : IAggregateFunctionDataHelper<Data, AggregateFunctionUniqDistributeKey<T, Data>>(
+                      argument_types_) {}
+
+    String get_name() const override { return "multi_distinct_distribute_key"; }
+
+    DataTypePtr get_return_type() const override { return std::make_shared<DataTypeInt64>(); }
+
+    void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num,
+             Arena*) const override {
+        detail::OneAdder<T, Data>::add(this->data(place), *columns[0], row_num);
+    }
+
+    static ALWAYS_INLINE const KeyType* get_keys(std::vector<KeyType>& keys_container,
+                                                 const IColumn& column, size_t batch_size) {
+        if constexpr (std::is_same_v<T, String>) {
+            keys_container.resize(batch_size);
+            for (size_t i = 0; i != batch_size; ++i) {
+                StringRef value = column.get_data_at(i);
+                keys_container[i] = Data::get_key(value);
+            }
+            return keys_container.data();
+        } else {
+            using ColumnType =
+                    std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
+            return assert_cast<const ColumnType&>(column).get_data().data();
+        }
+    }
+
+    void add_batch(size_t batch_size, AggregateDataPtr* places, size_t place_offset,
+                   const IColumn** columns, Arena* arena, bool /*agg_many*/) const override {
+        std::vector<KeyType> keys_container;
+        const KeyType* keys = get_keys(keys_container, *columns[0], batch_size);
+
+        std::vector<typename Data::Set*> array_of_data_set(batch_size);
+
+        for (size_t i = 0; i != batch_size; ++i) {
+            array_of_data_set[i] = &(this->data(places[i] + place_offset).set);
+        }
+
+        for (size_t i = 0; i != batch_size; ++i) {
+            if (i + HASH_MAP_PREFETCH_DIST < batch_size) {
+                array_of_data_set[i + HASH_MAP_PREFETCH_DIST]->prefetch(
+                        keys[i + HASH_MAP_PREFETCH_DIST]);
+            }
+
+            array_of_data_set[i]->insert(keys[i]);
+        }
+    }
+
+    void add_batch_single_place(size_t batch_size, AggregateDataPtr place, const IColumn** columns,
+                                Arena* arena) const override {
+        std::vector<KeyType> keys_container;
+        const KeyType* keys = get_keys(keys_container, *columns[0], batch_size);
+        auto& set = this->data(place).set;
+
+        for (size_t i = 0; i != batch_size; ++i) {
+            if (i + HASH_MAP_PREFETCH_DIST < batch_size) {
+                set.prefetch(keys[i + HASH_MAP_PREFETCH_DIST]);
+            }
+            set.insert(keys[i]);
+        }
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs,
+               Arena*) const override {
+        this->data(place).count += this->data(rhs).count;
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override {
+        write_var_uint(this->data(place).set.size(), buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf,
+                     Arena*) const override {
+        read_var_uint(this->data(place).count, buf);
+    }
+
+    void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override {
+        assert_cast<ColumnInt64&>(to).get_data().push_back(this->data(place).count);
+    }
+
+    void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena* arena,
+                                 size_t num_rows) const override {
+        auto data = reinterpret_cast<const UInt64*>(
+                assert_cast<const ColumnFixedLengthObject&>(column).get_data().data());
+        for (size_t i = 0; i != num_rows; ++i) {
+            auto rhs_place = places + sizeof(Data) * i;
+            this->create(rhs_place);
+            (reinterpret_cast<Data*>(rhs_place))->count = data[i];
+        }
+    }
+
+    void serialize_to_column(const std::vector<AggregateDataPtr>& places, size_t offset,
+                             MutableColumnPtr& dst, const size_t num_rows) const override {
+        auto& col = assert_cast<ColumnFixedLengthObject&>(*dst);
+        CHECK(col.item_size() == sizeof(UInt64))
+                << "size is not equal: " << col.item_size() << " " << sizeof(UInt64);
+        col.resize(num_rows);
+        auto* data = reinterpret_cast<UInt64*>(col.get_data().data());
+        for (size_t i = 0; i != num_rows; ++i) {
+            data[i] = this->data(places[i] + offset).set.size();
+        }
+    }
+
+    void streaming_agg_serialize_to_column(const IColumn** columns, MutableColumnPtr& dst,
+                                           const size_t num_rows, Arena* arena) const override {
+        auto& dst_col = assert_cast<ColumnFixedLengthObject&>(*dst);
+        CHECK(dst_col.item_size() == sizeof(UInt64))
+                << "size is not equal: " << dst_col.item_size() << " " << sizeof(UInt64);
+        dst_col.resize(num_rows);
+        auto* data = reinterpret_cast<UInt64*>(dst_col.get_data().data());
+        for (size_t i = 0; i != num_rows; ++i) {
+            data[i] = 1;
+        }
+    }
+
+    void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column,
+                                           Arena* arena) const override {
+        auto& col = assert_cast<const ColumnFixedLengthObject&>(column);
+        const size_t num_rows = column.size();
+        auto* data = reinterpret_cast<const UInt64*>(col.get_data().data());
+        for (size_t i = 0; i != num_rows; ++i) {
+            AggregateFunctionUniqDistributeKey::data(place).count += data[i];
+        }
+    }
+
+    void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict place,
+                                                 const IColumn& column, size_t begin, size_t end,
+                                                 Arena* arena) const override {
+        CHECK(end <= column.size() && begin <= end)
+                << ", begin:" << begin << ", end:" << end << ", column.size():" << column.size();
+        auto& col = assert_cast<const ColumnFixedLengthObject&>(column);
+        auto* data = reinterpret_cast<const UInt64*>(col.get_data().data());
+        for (size_t i = begin; i <= end; ++i) {
+            this->data(place).count += data[i];
+        }
+    }
+
+    void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset,
+                                   AggregateDataPtr rhs, const ColumnString* column, Arena* arena,
+                                   const size_t num_rows) const override {
+        this->deserialize_from_column(rhs, *column, arena, num_rows);
+        DEFER({ this->destroy_vec(rhs, num_rows); });
+        this->merge_vec(places, offset, rhs, arena, num_rows);
+    }
+
+    void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset,
+                                            AggregateDataPtr rhs, const ColumnString* column,
+                                            Arena* arena, const size_t num_rows) const override {
+        this->deserialize_from_column(rhs, *column, arena, num_rows);
+        DEFER({ this->destroy_vec(rhs, num_rows); });
+        this->merge_vec_selected(places, offset, rhs, arena, num_rows);
+    }
+
+    void serialize_without_key_to_column(ConstAggregateDataPtr __restrict place,
+                                         IColumn& to) const override {
+        auto& col = assert_cast<ColumnFixedLengthObject&>(to);
+        CHECK(col.item_size() == sizeof(UInt64))
+                << "size is not equal: " << col.item_size() << " " << sizeof(UInt64);
+        size_t old_size = col.size();
+        col.resize(old_size + 1);
+        *reinterpret_cast<UInt64*>(col.get_data().data() + old_size) =
+                AggregateFunctionUniqDistributeKey::data(place).set.size();
+    }
+
+    MutableColumnPtr create_serialize_column() const override {
+        return ColumnFixedLengthObject::create(sizeof(UInt64));
+    }
+
+    DataTypePtr get_serialized_type() const override {
+        return std::make_shared<DataTypeFixedLengthObject>();
+    }
+};
+
+} // namespace doris::vectorized

From 935f613ce831193467f8d8192d57f2f01cf90744 Mon Sep 17 00:00:00 2001
From: seawinde <149132972+seawinde@users.noreply.github.com>
Date: Fri, 26 Apr 2024 19:31:09 +0800
Subject: [PATCH 057/163] [improvement](mtmv) Optimize the nested materialized
 view performance (#34163)

Record increase refersh version more accurately.
The refreshVersion in the memo will increase when mv rewrite successfully.
In query rewrite, if refresh version is different from the current struct info map in group,
will refresh the group struct info or not.
---
 .../java/org/apache/doris/nereids/memo/Memo.java   |  4 +++-
 .../apache/doris/nereids/memo/StructInfoMap.java   | 14 ++++++++------
 .../exploration/mv/MaterializedViewUtils.java      |  3 +--
 .../doris/nereids/memo/StructInfoMapTest.java      | 14 +++++++-------
 4 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java
index 8793cb5be51f75..1e6b285187d6a6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java
@@ -416,7 +416,9 @@ private CopyInResult doCopyIn(Plan plan, @Nullable Group targetGroup, @Nullable
             throw new IllegalStateException("Insert a plan into targetGroup but differ in logicalproperties");
         }
         // TODO Support sync materialized view in the future
-        if (plan instanceof CatalogRelation && ((CatalogRelation) plan).getTable() instanceof MTMV) {
+        if (plan instanceof LogicalPlan && plan instanceof CatalogRelation
+                && ((CatalogRelation) plan).getTable() instanceof MTMV
+                && !plan.getGroupExpression().isPresent()) {
             refreshVersion.incrementAndGet();
         }
         Optional<GroupExpression> groupExpr = plan.getGroupExpression();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/StructInfoMap.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/StructInfoMap.java
index efa2bef1792417..ae07c2043abaf7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/StructInfoMap.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/StructInfoMap.java
@@ -59,7 +59,7 @@ public class StructInfoMap {
             return structInfo;
         }
         if (groupExpressionMap.isEmpty() || !groupExpressionMap.containsKey(tableMap)) {
-            refresh(group, memo.getRefreshVersion(), foldTableMap);
+            refresh(group, memo.getRefreshVersion());
             group.getstructInfoMap().setRefreshVersion(memo.getRefreshVersion());
         }
         if (groupExpressionMap.containsKey(tableMap)) {
@@ -118,7 +118,10 @@ private Plan constructPlan(GroupExpression groupExpression, List<BitSet> childre
      * @param group the root group
      *
      */
-    public void refresh(Group group, long refreshVersion, BitSet targetBitSet) {
+    public void refresh(Group group, long memoVersion) {
+        if (memoVersion == group.getstructInfoMap().refreshVersion) {
+            return;
+        }
         Set<Integer> refreshedGroup = new HashSet<>();
         for (GroupExpression groupExpression : group.getLogicalExpressions()) {
             List<Set<BitSet>> childrenTableMap = new LinkedList<>();
@@ -129,10 +132,9 @@ public void refresh(Group group, long refreshVersion, BitSet targetBitSet) {
             }
             for (Group child : groupExpression.children()) {
                 StructInfoMap childStructInfoMap = child.getstructInfoMap();
-                if (!refreshedGroup.contains(child.getGroupId().asInt())
-                        && refreshVersion != childStructInfoMap.getRefreshVersion()) {
-                    childStructInfoMap.refresh(child, refreshVersion, targetBitSet);
-                    childStructInfoMap.setRefreshVersion(refreshVersion);
+                if (!refreshedGroup.contains(child.getGroupId().asInt())) {
+                    childStructInfoMap.refresh(child, memoVersion);
+                    childStructInfoMap.setRefreshVersion(memoVersion);
                 }
                 refreshedGroup.add(child.getGroupId().asInt());
                 childrenTableMap.add(child.getstructInfoMap().getTableMaps());
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java
index 5f7dc419eafd22..73029b4ec141d3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java
@@ -150,8 +150,7 @@ public static List<StructInfo> extractStructInfo(Plan plan, CascadesContext casc
             StructInfoMap structInfoMap = ownerGroup.getstructInfoMap();
             if (cascadesContext.getMemo().getRefreshVersion() != structInfoMap.getRefreshVersion()
                     || structInfoMap.getTableMaps().isEmpty()) {
-                structInfoMap.refresh(ownerGroup, cascadesContext.getMemo().getRefreshVersion(),
-                        materializedViewTableSet);
+                structInfoMap.refresh(ownerGroup, cascadesContext.getMemo().getRefreshVersion());
                 structInfoMap.setRefreshVersion(cascadesContext.getMemo().getRefreshVersion());
             }
             Set<BitSet> queryTableSets = structInfoMap.getTableMaps();
diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/StructInfoMapTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/StructInfoMapTest.java
index 9192f86cf3bb41..217cbf2f1a28e2 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/StructInfoMapTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/StructInfoMapTest.java
@@ -50,7 +50,7 @@ void testTableMap() throws Exception {
         Group root = c1.getMemo().getRoot();
         Set<BitSet> tableMaps = root.getstructInfoMap().getTableMaps();
         Assertions.assertTrue(tableMaps.isEmpty());
-        root.getstructInfoMap().refresh(root, 1, new BitSet());
+        root.getstructInfoMap().refresh(root, 1);
         Assertions.assertEquals(1, tableMaps.size());
         new MockUp<MTMVRelationManager>() {
             @Mock
@@ -76,7 +76,7 @@ public boolean isMVPartitionValid(MTMV mtmv, ConnectContext ctx) {
                 .optimize()
                 .printlnBestPlanTree();
         root = c1.getMemo().getRoot();
-        root.getstructInfoMap().refresh(root, 1, new BitSet());
+        root.getstructInfoMap().refresh(root, 1);
         tableMaps = root.getstructInfoMap().getTableMaps();
         Assertions.assertEquals(2, tableMaps.size());
         dropMvByNereids("drop materialized view mv1");
@@ -97,8 +97,8 @@ void testLazyRefresh() throws Exception {
         Group root = c1.getMemo().getRoot();
         Set<BitSet> tableMaps = root.getstructInfoMap().getTableMaps();
         Assertions.assertTrue(tableMaps.isEmpty());
-        root.getstructInfoMap().refresh(root, 1, new BitSet());
-        root.getstructInfoMap().refresh(root, 1, new BitSet());
+        root.getstructInfoMap().refresh(root, 1);
+        root.getstructInfoMap().refresh(root, 1);
         Assertions.assertEquals(1, tableMaps.size());
         new MockUp<MTMVRelationManager>() {
             @Mock
@@ -124,8 +124,8 @@ public boolean isMVPartitionValid(MTMV mtmv, ConnectContext ctx) {
                 .optimize()
                 .printlnBestPlanTree();
         root = c1.getMemo().getRoot();
-        root.getstructInfoMap().refresh(root, 1, new BitSet());
-        root.getstructInfoMap().refresh(root, 1, new BitSet());
+        root.getstructInfoMap().refresh(root, 1);
+        root.getstructInfoMap().refresh(root, 1);
         tableMaps = root.getstructInfoMap().getTableMaps();
         Assertions.assertEquals(2, tableMaps.size());
         dropMvByNereids("drop materialized view mv1");
@@ -162,7 +162,7 @@ public boolean isMVPartitionValid(MTMV mtmv, ConnectContext ctx) {
                 .rewrite()
                 .optimize();
         Group root = c1.getMemo().getRoot();
-        root.getstructInfoMap().refresh(root, 1, new BitSet());
+        root.getstructInfoMap().refresh(root, 1);
         StructInfoMap structInfoMap = root.getstructInfoMap();
         Assertions.assertEquals(2, structInfoMap.getTableMaps().size());
         BitSet mvMap = structInfoMap.getTableMaps().stream()

From 3e36eabc3d7b52920c67455b96422454ced81c07 Mon Sep 17 00:00:00 2001
From: qiye <jianliang5669@gmail.com>
Date: Fri, 26 Apr 2024 19:46:49 +0800
Subject: [PATCH 058/163] [fix](ES catalog)Make col != '' behavior consistent
 with SQL (#34151)

In SQL syntax, `col != ''` equals `col.length() > 0`.
It means that this column must exist in ES doc fields and its content is not empty.
In this PR, we make a special translation for this binary predicate to keep the behavior of both consistent.

---------

Co-authored-by: Luennng <luennng@gmail.com>
---
 .../org/apache/doris/datasource/es/QueryBuilders.java  | 10 ++++++++++
 .../external/elasticsearch/QueryBuildersTest.java      |  9 +++++++++
 .../data/external_table_p0/es/test_es_query.out        |  9 +--------
 .../suites/external_table_p0/es/test_es_query.groovy   |  2 +-
 4 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/es/QueryBuilders.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/es/QueryBuilders.java
index 241f9e4ba43e81..3a54e012a32733 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/es/QueryBuilders.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/es/QueryBuilders.java
@@ -40,6 +40,7 @@
 import com.fasterxml.jackson.databind.ObjectMapper;
 import lombok.Builder;
 import lombok.Data;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.joda.time.format.DateTimeFormat;
@@ -137,6 +138,15 @@ private static QueryBuilder parseBinaryPredicate(Expr expr, TExprOpcode opCode,
             case EQ_FOR_NULL:
                 return QueryBuilders.termQuery(column, value);
             case NE:
+                // col != '' means col.length() > 0 in SQL syntax.
+                // The `NULL` value should not present in results.
+                // It equals
+                // '{"bool":{"must":{"bool":{"must_not":{"term":{"col":""}},"must":{"exists":{"field":"col"}}}}}}'
+                // in Elasticsearch
+                if (value instanceof String && StringUtils.isEmpty((String) value)) {
+                    return QueryBuilders.boolQuery().mustNot(QueryBuilders.termQuery(column, value))
+                        .must(QueryBuilders.existsQuery(column));
+                }
                 return QueryBuilders.boolQuery().mustNot(QueryBuilders.termQuery(column, value));
             case GE:
                 return QueryBuilders.rangeQuery(column).gte(value);
diff --git a/fe/fe-core/src/test/java/org/apache/doris/external/elasticsearch/QueryBuildersTest.java b/fe/fe-core/src/test/java/org/apache/doris/external/elasticsearch/QueryBuildersTest.java
index 9829c1705ce466..3cf9261b9325ad 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/external/elasticsearch/QueryBuildersTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/external/elasticsearch/QueryBuildersTest.java
@@ -86,6 +86,15 @@ public void testBinaryPredicateConvertEsDsl() {
         Assertions.assertEquals("{\"term\":{\"k2\":\"2023-02-19T22:00:00.000+08:00\"}}",
                 QueryBuilders.toEsDsl(dateTimeEqExpr, new ArrayList<>(), new HashMap<>(),
                         BuilderOptions.builder().needCompatDateFields(Lists.newArrayList("k2")).build()).toJson());
+        SlotRef k3 = new SlotRef(null, "k3");
+        Expr stringLiteral = new StringLiteral("");
+        Expr stringNeExpr = new BinaryPredicate(Operator.NE, k3, stringLiteral);
+        Assertions.assertEquals("{\"bool\":{\"must\":{\"exists\":{\"field\":\"k3\"}},\"must_not\":{\"term\":{\"k3\":\"\"}}}}",
+                QueryBuilders.toEsDsl(stringNeExpr).toJson());
+        stringLiteral = new StringLiteral("message");
+        stringNeExpr = new BinaryPredicate(Operator.NE, k3, stringLiteral);
+        Assertions.assertEquals("{\"bool\":{\"must_not\":{\"term\":{\"k3\":\"message\"}}}}",
+                QueryBuilders.toEsDsl(stringNeExpr).toJson());
     }
 
     @Test
diff --git a/regression-test/data/external_table_p0/es/test_es_query.out b/regression-test/data/external_table_p0/es/test_es_query.out
index 78556a3cd46b5d..2e98ee6a174fa6 100644
--- a/regression-test/data/external_table_p0/es/test_es_query.out
+++ b/regression-test/data/external_table_p0/es/test_es_query.out
@@ -12,8 +12,6 @@
 2022-08-08	2022-08-11T12:10:10	2022-08-11T12:10:10	2022-08-11T12:10:10	2022-08-11T11:10:10
 
 -- !sql04 --
-\N
-\N
 I'm not null or empty
 
 -- !sql05 --
@@ -91,7 +89,6 @@ true	1	128	32768	-1	0	1.0	1.0	1.0	1.0	2020-01-01	2020-01-01T12:00	a	d	192.168.0.
 2022-08-08T20:10:10
 
 -- !sql_6_16 --
-\N
 I'm not null or empty
 
 -- !sql_6_17 --
@@ -168,8 +165,6 @@ value1	value2
 2022-08-08T20:10:10
 
 -- !sql_7_19 --
-\N
-\N
 I'm not null or empty
 
 -- !sql_7_20 --
@@ -188,7 +183,7 @@ I'm not null or empty
 [1, 0, 1, 1]	[1, -2, -3, 4]	["2020-01-01", "2020-01-02"]	["2020-01-01 12:00:00", "2020-01-02 13:01:01"]	[1, 2, 3, 4]	[1, 1.1, 1.2, 1.3]	[1, 2, 3, 4]	[32768, 32769, -32769, -32770]	["192.168.0.1", "127.0.0.1"]	["a", "b", "c"]	[-1, 0, 1, 2]	["{"name":"Andy","age":18}", "{"name":"Tim","age":28}"]	[1, 2, 3, 4]	[128, 129, -129, -130]	["d", "e", "f"]	[0, 1, 2, 3]	\N	I'm not null or empty	\N	string3	2022-08-09T00:40:10	text3_4*5	5.0	2022-08-08T00:00	2022-08-10T12:10:10	1660104610000	2022-08-10T12:10:10	2022-08-10T20:10:10	3333
 [1, 0, 1, 1]	[1, -2, -3, 4]	["2020-01-01", "2020-01-02"]	["2020-01-01 12:00:00", "2020-01-02 13:01:01"]	[1, 2, 3, 4]	[1, 1.1, 1.2, 1.3]	[1, 2, 3, 4]	[32768, 32769, -32769, -32770]	["192.168.0.1", "127.0.0.1"]	["a", "b", "c"]	[-1, 0, 1, 2]	["{"name":"Andy","age":18}", "{"name":"Tim","age":28}"]	[1, 2, 3, 4]	[128, 129, -129, -130]	["d", "e", "f"]	[0, 1, 2, 3]	debug	\N	This string can be quite lengthy	string1	2022-08-08T20:10:10	text#1	3.14	2022-08-08T00:00	2022-08-08T12:10:10	1659931810000	2022-08-08T12:10:10	2022-08-08T20:10:10	12345
 
--- !sql_7_19 --
+-- !sql_7_24 --
 value1	value2
 
 -- !sql_8_01 --
@@ -255,8 +250,6 @@ value1	value2
 2022-08-08T20:10:10
 
 -- !sql_8_17 --
-\N
-\N
 I'm not null or empty
 
 -- !sql_8_18 --
diff --git a/regression-test/suites/external_table_p0/es/test_es_query.groovy b/regression-test/suites/external_table_p0/es/test_es_query.groovy
index 9acf67891e52cf..f2af00d6fe66b9 100644
--- a/regression-test/suites/external_table_p0/es/test_es_query.groovy
+++ b/regression-test/suites/external_table_p0/es/test_es_query.groovy
@@ -242,7 +242,7 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") {
         }
         assertTrue(containeHide7)
 
-        order_qt_sql_7_19 """select * from test3_20231005"""
+        order_qt_sql_7_24 """select * from test3_20231005"""
 
         sql """switch test_es_query_es8"""
         order_qt_sql_8_01 """select * from test1 where test2='text#1'"""

From dc09fab881fd2d6145cd768ba351510ff19e6caf Mon Sep 17 00:00:00 2001
From: AlexYue <yj976240184@gmail.com>
Date: Fri, 26 Apr 2024 20:31:57 +0800
Subject: [PATCH 059/163] decrypt sk (#34174)

---
 be/src/util/s3_util.h                            | 4 ++--
 cloud/src/meta-service/meta_service_resource.cpp | 8 ++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/be/src/util/s3_util.h b/be/src/util/s3_util.h
index 62d8b4f1d64e7d..46226b793591a4 100644
--- a/be/src/util/s3_util.h
+++ b/be/src/util/s3_util.h
@@ -132,8 +132,8 @@ struct S3Conf {
     cloud::ObjectStoreInfoPB::Provider provider;
 
     std::string to_string() const {
-        return fmt::format("(bucket={}, prefix={}, client_conf={})", bucket, prefix,
-                           client_conf.to_string());
+        return fmt::format("(bucket={}, prefix={}, client_conf={}, sse_enabled={})", bucket, prefix,
+                           client_conf.to_string(), sse_enabled);
     }
 };
 
diff --git a/cloud/src/meta-service/meta_service_resource.cpp b/cloud/src/meta-service/meta_service_resource.cpp
index 1dd04617cc4ac6..0adb3f0ac7e8c0 100644
--- a/cloud/src/meta-service/meta_service_resource.cpp
+++ b/cloud/src/meta-service/meta_service_resource.cpp
@@ -277,6 +277,14 @@ void MetaServiceImpl::get_obj_store_info(google::protobuf::RpcController* contro
             storage_vault_start.push_back('\x00'); // Update to next smallest key for iteration
         } while (it->more());
     }
+    for (auto& vault : *response->mutable_storage_vault()) {
+        if (vault.has_obj_info()) {
+            if (auto ret = decrypt_and_update_ak_sk(*vault.mutable_obj_info(), code, msg);
+                ret != 0) {
+                return;
+            }
+        }
+    }
 
     response->mutable_obj_info()->CopyFrom(instance.obj_info());
     if (instance.has_default_storage_vault_id()) {

From 4b58fc95097f853e66c919065f63d14756549899 Mon Sep 17 00:00:00 2001
From: AlexYue <yj976240184@gmail.com>
Date: Fri, 26 Apr 2024 20:32:42 +0800
Subject: [PATCH 060/163] [enhance](Cloud) Unify add storage vault http action
 (#34119)

---
 cloud/src/meta-service/meta_service_http.cpp     | 8 +++++++-
 cloud/src/meta-service/meta_service_resource.cpp | 3 +++
 gensrc/proto/cloud.proto                         | 1 +
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/cloud/src/meta-service/meta_service_http.cpp b/cloud/src/meta-service/meta_service_http.cpp
index 80ae08ef25197f..4542e05e486f23 100644
--- a/cloud/src/meta-service/meta_service_http.cpp
+++ b/cloud/src/meta-service/meta_service_http.cpp
@@ -209,7 +209,10 @@ static HttpResponse process_alter_obj_store_info(MetaServiceImpl* service, brpc:
     static std::unordered_map<std::string_view, AlterObjStoreInfoRequest::Operation> operations {
             {"add_obj_info", AlterObjStoreInfoRequest::ADD_OBJ_INFO},
             {"legacy_update_ak_sk", AlterObjStoreInfoRequest::LEGACY_UPDATE_AK_SK},
-            {"drop_hdfs_vault", AlterObjStoreInfoRequest::DROP_HDFS_INFO}};
+            {"drop_s3_vault", AlterObjStoreInfoRequest::DROP_S3_VAULT},
+            {"add_s3_vault", AlterObjStoreInfoRequest::ADD_S3_VAULT},
+            {"drop_hdfs_vault", AlterObjStoreInfoRequest::DROP_HDFS_INFO},
+            {"add_hdfs_vault", AlterObjStoreInfoRequest::ADD_HDFS_INFO}};
 
     auto& path = ctrl->http_request().unresolved_path();
     auto it = operations.find(remove_version_prefix(path));
@@ -444,6 +447,9 @@ void MetaServiceImpl::http(::google::protobuf::RpcController* controller,
             {"v1/legacy_update_ak_sk", process_alter_obj_store_info},
             {"v1/update_ak_sk", process_update_ak_sk},
             {"show_storage_vaults", process_get_obj_store_info},
+            {"add_hdfs_vault", process_alter_obj_store_info},
+            {"add_s3_vault", process_alter_obj_store_info},
+            {"drop_s3_vault", process_alter_obj_store_info},
             {"drop_hdfs_vault", process_alter_obj_store_info},
             // for tools
             {"decode_key", process_decode_key},
diff --git a/cloud/src/meta-service/meta_service_resource.cpp b/cloud/src/meta-service/meta_service_resource.cpp
index 0adb3f0ac7e8c0..eefd01eb254dfd 100644
--- a/cloud/src/meta-service/meta_service_resource.cpp
+++ b/cloud/src/meta-service/meta_service_resource.cpp
@@ -537,6 +537,7 @@ void MetaServiceImpl::alter_obj_store_info(google::protobuf::RpcController* cont
     switch (request->op()) {
     case AlterObjStoreInfoRequest::ADD_OBJ_INFO:
     case AlterObjStoreInfoRequest::ADD_S3_VAULT:
+    case AlterObjStoreInfoRequest::DROP_S3_VAULT:
     case AlterObjStoreInfoRequest::LEGACY_UPDATE_AK_SK:
     case AlterObjStoreInfoRequest::UPDATE_AK_SK: {
         if (!request->has_obj() && (!request->has_vault() || !request->vault().has_obj_info())) {
@@ -824,6 +825,8 @@ void MetaServiceImpl::alter_obj_store_info(google::protobuf::RpcController* cont
         instance.clear_default_storage_vault_name();
         break;
     }
+    case AlterObjStoreInfoRequest::DROP_S3_VAULT:
+        [[fallthrough]];
     default: {
         code = MetaServiceCode::INVALID_ARGUMENT;
         ss << "invalid request op, op=" << request->op();
diff --git a/gensrc/proto/cloud.proto b/gensrc/proto/cloud.proto
index f132e2eab4d0ee..ba9017a5eccb27 100644
--- a/gensrc/proto/cloud.proto
+++ b/gensrc/proto/cloud.proto
@@ -775,6 +775,7 @@ message AlterObjStoreInfoRequest {
         DROP_HDFS_INFO = 101;
         ADD_BUILT_IN_VAULT = 102;
         ADD_S3_VAULT = 103;
+        DROP_S3_VAULT = 104;
 
         SET_DEFAULT_VAULT = 200;
         UNSET_DEFAULT_VAULT = 201;

From 9ea7a2ef1e60a39fe08dde6e61062b251bb470eb Mon Sep 17 00:00:00 2001
From: Lightman <31928846+Lchangliang@users.noreply.github.com>
Date: Fri, 26 Apr 2024 21:28:34 +0800
Subject: [PATCH 061/163] (cloud-merge) Fix nullptr bacause don't init tablet
 (#34138)

---
 be/src/io/cache/block_file_cache_downloader.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/be/src/io/cache/block_file_cache_downloader.cpp b/be/src/io/cache/block_file_cache_downloader.cpp
index 283605f23bed91..f5a68f46d8f959 100644
--- a/be/src/io/cache/block_file_cache_downloader.cpp
+++ b/be/src/io/cache/block_file_cache_downloader.cpp
@@ -316,12 +316,12 @@ void FileCacheBlockS3Downloader::download_file_cache_block(std::vector<FileCache
                 }
             }
         };
-        CloudTabletSPtr tablet;
         auto res = _engine.tablet_mgr().get_tablet(meta.tablet_id(), false);
         if (!res.has_value()) {
             LOG_WARNING("Failed to find tablet {}", meta.tablet_id()).error(res.error());
+            return;
         }
-        auto id_to_rowset_meta_map = tablet->tablet_meta()->snapshot_rs_metas();
+        auto id_to_rowset_meta_map = res.value()->tablet_meta()->snapshot_rs_metas();
         if (auto iter = id_to_rowset_meta_map.find(meta.rowset_id());
             iter != id_to_rowset_meta_map.end()) {
             UInt128Wrapper cache_key = BlockFileCache::hash(meta.file_name());

From eb09a952f06dc401bab892916e5147b11dfce66f Mon Sep 17 00:00:00 2001
From: Lightman <31928846+Lchangliang@users.noreply.github.com>
Date: Fri, 26 Apr 2024 21:40:33 +0800
Subject: [PATCH 062/163] (cloud-merge) Fix missing ttl_seconds after
 checkpoint and restart (#34161)

---
 .../src/main/java/org/apache/doris/catalog/TableProperty.java  | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java
index e891202591aee7..66d6a5291a6f2b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java
@@ -616,7 +616,8 @@ public static TableProperty read(DataInput in) throws IOException {
                 .buildDisableAutoCompaction()
                 .buildEnableSingleReplicaCompaction()
                 .buildTimeSeriesCompactionEmptyRowsetsThreshold()
-                .buildTimeSeriesCompactionLevelThreshold();
+                .buildTimeSeriesCompactionLevelThreshold()
+                .buildTTLSeconds();
         if (Env.getCurrentEnvJournalVersion() < FeMetaVersion.VERSION_105) {
             // get replica num from property map and create replica allocation
             String repNum = tableProperty.properties.remove(PropertyAnalyzer.PROPERTIES_REPLICATION_NUM);

From 85db57c134b0a639d2c9caf7ea487c1125316c9c Mon Sep 17 00:00:00 2001
From: Gavin Chou <gavineaglechou@gmail.com>
Date: Fri, 26 Apr 2024 22:21:03 +0800
Subject: [PATCH 063/163] [chore](collaborator) Remove before adding (#34194)

---
 .asf.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.asf.yaml b/.asf.yaml
index 9c192b25cd3eb0..ba63dcdcc09eec 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -110,7 +110,6 @@ github:
     - shuke987
     - wm1581066
     - KassieZ
-    - gavinchou
     - yujun777
 
 notifications:

From 48d789e12858ee217f62015f11cb0a41f7f3b237 Mon Sep 17 00:00:00 2001
From: Gavin Chou <gavineaglechou@gmail.com>
Date: Fri, 26 Apr 2024 23:05:57 +0800
Subject: [PATCH 064/163] [chore](build) Build cloud module by default (#34143)

---
 build-for-release.sh | 14 ++++++++++----
 build.sh             |  1 +
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/build-for-release.sh b/build-for-release.sh
index 6e302d430a0078..4d9a257bd2eb67 100755
--- a/build-for-release.sh
+++ b/build-for-release.sh
@@ -126,6 +126,7 @@ rm -rf "${ORI_OUTPUT}"
 
 FE="fe"
 BE="be"
+CLOUD="ms"
 EXT="extensions"
 PACKAGE="apache-doris-${VERSION}-bin-${ARCH}"
 
@@ -137,11 +138,13 @@ OUTPUT="${ORI_OUTPUT}/${PACKAGE}"
 OUTPUT_FE="${OUTPUT}/${FE}"
 OUTPUT_EXT="${OUTPUT}/${EXT}"
 OUTPUT_BE="${OUTPUT}/${BE}"
+OUTPUT_CLOUD="${OUTPUT}/${CLOUD}"
 
 echo "Package Name:"
-echo "FE:   ${OUTPUT_FE}"
-echo "BE:   ${OUTPUT_BE}"
-echo "JAR:  ${OUTPUT_EXT}"
+echo "FE:    ${OUTPUT_FE}"
+echo "BE:    ${OUTPUT_BE}"
+echo "CLOUD: ${OUTPUT_CLOUD}"
+echo "JAR:   ${OUTPUT_EXT}"
 
 sh build.sh --clean &&
     USE_AVX2="${_USE_AVX2}" sh build.sh &&
@@ -149,7 +152,7 @@ sh build.sh --clean &&
 
 echo "Begin to pack"
 rm -rf "${OUTPUT}"
-mkdir -p "${OUTPUT_FE}" "${OUTPUT_BE}" "${OUTPUT_EXT}"
+mkdir -p "${OUTPUT_FE}" "${OUTPUT_BE}" "${OUTPUT_EXT}" "${OUTPUT_CLOUD}"
 
 # FE
 cp -R "${ORI_OUTPUT}"/fe/* "${OUTPUT_FE}"/
@@ -160,6 +163,9 @@ cp -R "${ORI_OUTPUT}"/apache_hdfs_broker "${OUTPUT_EXT}"/apache_hdfs_broker
 # BE
 cp -R "${ORI_OUTPUT}"/be/* "${OUTPUT_BE}"/
 
+# CLOUD
+cp -R "${ORI_OUTPUT}"/ms/* "${OUTPUT_CLOUD}"/
+
 if [[ "${TAR}" -eq 1 ]]; then
     echo "Begin to compress"
     cd "${ORI_OUTPUT}"
diff --git a/build.sh b/build.sh
index dbe6fb38b67df1..e1822164b78d66 100755
--- a/build.sh
+++ b/build.sh
@@ -159,6 +159,7 @@ if [[ "$#" == 1 ]]; then
     # default
     BUILD_FE=1
     BUILD_BE=1
+    BUILD_CLOUD=1
 
     BUILD_BROKER=1
     BUILD_META_TOOL='OFF'

From 03b6a660a3ed5eee960e2ce6e74d9c7663a88aa8 Mon Sep 17 00:00:00 2001
From: AlexYue <yj976240184@gmail.com>
Date: Fri, 26 Apr 2024 23:19:57 +0800
Subject: [PATCH 065/163] [enhance](S3) Add one config to control s3 client's
 scheme (#34176)

---
 be/src/common/config.cpp | 6 ++++++
 be/src/common/config.h   | 3 +++
 be/src/util/s3_util.cpp  | 5 +++++
 3 files changed, 14 insertions(+)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index e1c8114800a521..31a69418d949bb 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1211,6 +1211,12 @@ DEFINE_mInt32(thrift_client_open_num_tries, "1");
 
 DEFINE_Bool(enable_index_compaction, "false");
 
+// http scheme in S3Client to use. E.g. http or https
+DEFINE_String(s3_client_http_scheme, "http");
+DEFINE_Validator(s3_client_http_scheme, [](const std::string& config) -> bool {
+    return config == "http" || config == "https";
+});
+
 // enable injection point in regression-test
 DEFINE_mBool(enable_injection_point, "false");
 
diff --git a/be/src/common/config.h b/be/src/common/config.h
index ce244da36d53b9..2456de3375626e 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1290,6 +1290,9 @@ DECLARE_mInt64(hive_sink_max_file_size);
 // Retry the Open num_retries time waiting 100 milliseconds between retries.
 DECLARE_mInt32(thrift_client_open_num_tries);
 
+// http scheme in S3Client to use. E.g. http or https
+DECLARE_String(s3_client_http_scheme);
+
 // enable injection point in regression-test
 DECLARE_mBool(enable_injection_point);
 
diff --git a/be/src/util/s3_util.cpp b/be/src/util/s3_util.cpp
index 05fabf08489f46..3563ab4a609a5c 100644
--- a/be/src/util/s3_util.cpp
+++ b/be/src/util/s3_util.cpp
@@ -201,6 +201,11 @@ std::shared_ptr<Aws::S3::S3Client> S3ClientFactory::create(const S3ClientConf& s
     if (s3_conf.connect_timeout_ms > 0) {
         aws_config.connectTimeoutMs = s3_conf.connect_timeout_ms;
     }
+
+    if (config::s3_client_http_scheme == "http") {
+        aws_config.scheme = Aws::Http::Scheme::HTTP;
+    }
+
     aws_config.retryStrategy =
             std::make_shared<Aws::Client::DefaultRetryStrategy>(config::max_s3_client_retry);
     std::shared_ptr<Aws::S3::S3Client> new_client;

From 612c84a96531021ecaf7aa90bc69be0d84de9106 Mon Sep 17 00:00:00 2001
From: zy-kkk <zhongyk10@gmail.com>
Date: Sat, 27 Apr 2024 01:34:35 +0800
Subject: [PATCH 066/163] [Enhancement](planner) Support string input for
 sql_select_limit (#34177)

---
 .../doris/qe/VariableVarConverters.java       | 23 +++++++++++++++++++
 .../test_default_limit.groovy                 | 14 +++++++++++
 2 files changed, 37 insertions(+)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/VariableVarConverters.java b/fe/fe-core/src/main/java/org/apache/doris/qe/VariableVarConverters.java
index bea61557442419..789e9933daa8ec 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/VariableVarConverters.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/VariableVarConverters.java
@@ -48,6 +48,8 @@ public class VariableVarConverters {
         converters.put(SessionVariable.RUNTIME_FILTER_TYPE, runtimeFilterTypeConverter);
         ValidatePasswordPolicyConverter validatePasswordPolicyConverter = new ValidatePasswordPolicyConverter();
         converters.put(GlobalVariable.VALIDATE_PASSWORD_POLICY, validatePasswordPolicyConverter);
+        SqlSelectLimitConverter sqlSelectLimitConverter = new SqlSelectLimitConverter();
+        converters.put(SessionVariable.SQL_SELECT_LIMIT, sqlSelectLimitConverter);
     }
 
     public static Boolean hasConverter(String varName) {
@@ -96,6 +98,27 @@ public String decode(Long value) throws DdlException {
         }
     }
 
+    // Converter to convert sql select limit variable
+    public static class SqlSelectLimitConverter implements VariableVarConverterI {
+        @Override
+        public Long encode(String value) throws DdlException {
+            if (value.equalsIgnoreCase("DEFAULT")) {
+                return Long.MAX_VALUE;
+            } else {
+                try {
+                    return Long.parseLong(value);
+                } catch (NumberFormatException e) {
+                    throw new DdlException("Invalid sql_select_limit value: " + value);
+                }
+            }
+        }
+
+        @Override
+        public String decode(Long value) throws DdlException {
+            return String.valueOf(value);
+        }
+    }
+
     public static class ValidatePasswordPolicyConverter implements VariableVarConverterI {
         @Override
         public Long encode(String value) throws DdlException {
diff --git a/regression-test/suites/query_p0/session_variable/test_default_limit.groovy b/regression-test/suites/query_p0/session_variable/test_default_limit.groovy
index 4133ccc33976be..2ce3b647142ae4 100644
--- a/regression-test/suites/query_p0/session_variable/test_default_limit.groovy
+++ b/regression-test/suites/query_p0/session_variable/test_default_limit.groovy
@@ -277,5 +277,19 @@ suite('test_default_limit', "arrow_flight_sql") {
             order by c.k1, baseall.k2 limit 8
         '''
         assertEquals(res.size(), 8)
+
+        // Test setting sql_select_limit with the string "DEFAULT"
+        sql 'set sql_select_limit = "DEFAULT"'
+        res = sql 'select * from baseall'
+        assertEquals(res.size(), 16)  // Expecting the default limit to show all rows
+
+        // Test setting sql_select_limit with an explicit numeric string
+        sql 'set sql_select_limit = "10"'
+        res = sql 'select * from baseall'
+        assertEquals(res.size(), 10)  // Expecting the limit to restrict the results to 10 rows
+
+        // Reset the sql_select_limit to no limit for further tests
+        sql 'set sql_select_limit = -1'
+
     }
 }
\ No newline at end of file

From e64df3629588a98fd0daeb5257f393e4a0610b54 Mon Sep 17 00:00:00 2001
From: Jerry Hu <mrhhsg@gmail.com>
Date: Sat, 27 Apr 2024 10:43:02 +0800
Subject: [PATCH 067/163] [fix](spill) use different algorithm to avoid
 partition data skew (#34162)

---
 .../pipeline/exec/partitioned_hash_join_probe_operator.h   | 3 +--
 be/src/pipeline/exec/partitioned_hash_join_sink_operator.h | 3 +--
 be/src/vec/runtime/partitioner.cpp                         | 1 +
 be/src/vec/runtime/partitioner.h                           | 7 +++++++
 4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h
index 3702c2e1a6bd7a..d650dd1590d2e0 100644
--- a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h
+++ b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h
@@ -26,14 +26,13 @@
 #include "pipeline/exec/join_build_sink_operator.h"
 #include "pipeline/pipeline_x/operator.h"
 #include "vec/runtime/partitioner.h"
-#include "vec/sink/vdata_stream_sender.h" // ShuffleChannelIds
 
 namespace doris {
 class RuntimeState;
 
 namespace pipeline {
 
-using PartitionerType = vectorized::Crc32HashPartitioner<vectorized::ShuffleChannelIds>;
+using PartitionerType = vectorized::Crc32HashPartitioner<vectorized::SpillPartitionChannelIds>;
 
 class PartitionedHashJoinProbeOperatorX;
 
diff --git a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h
index 5c6b7e1f74f930..9120392e104554 100644
--- a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h
+++ b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h
@@ -26,14 +26,13 @@
 #include "pipeline/exec/join_build_sink_operator.h"
 #include "pipeline/pipeline_x/operator.h"
 #include "vec/runtime/partitioner.h"
-#include "vec/sink/vdata_stream_sender.h" // ShuffleChannelIds
 
 namespace doris {
 class RuntimeState;
 
 namespace pipeline {
 
-using PartitionerType = vectorized::Crc32HashPartitioner<vectorized::ShuffleChannelIds>;
+using PartitionerType = vectorized::Crc32HashPartitioner<vectorized::SpillPartitionChannelIds>;
 
 class PartitionedHashJoinSinkOperatorX;
 
diff --git a/be/src/vec/runtime/partitioner.cpp b/be/src/vec/runtime/partitioner.cpp
index fadf6d73b95f76..bbb6ebfc1a880a 100644
--- a/be/src/vec/runtime/partitioner.cpp
+++ b/be/src/vec/runtime/partitioner.cpp
@@ -103,5 +103,6 @@ template class Partitioner<size_t, ShuffleChannelIds>;
 template class XXHashPartitioner<ShuffleChannelIds>;
 template class Partitioner<uint32_t, ShuffleChannelIds>;
 template class Crc32HashPartitioner<ShuffleChannelIds>;
+template class Crc32HashPartitioner<SpillPartitionChannelIds>;
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/runtime/partitioner.h b/be/src/vec/runtime/partitioner.h
index 8d715a41285800..3152edb5cb57c7 100644
--- a/be/src/vec/runtime/partitioner.h
+++ b/be/src/vec/runtime/partitioner.h
@@ -125,5 +125,12 @@ class Crc32HashPartitioner final : public Partitioner<uint32_t, ChannelIds> {
     void _do_hash(const ColumnPtr& column, uint32_t* __restrict result, int idx) const override;
 };
 
+struct SpillPartitionChannelIds {
+    template <typename HashValueType>
+    HashValueType operator()(HashValueType l, size_t r) {
+        return ((l >> 16) | (l << 16)) % r;
+    }
+};
+
 } // namespace vectorized
 } // namespace doris

From 88852027edcc9b80f7596027440ddc7db553b9bf Mon Sep 17 00:00:00 2001
From: zhangstar333 <87313068+zhangstar333@users.noreply.github.com>
Date: Sat, 27 Apr 2024 11:18:35 +0800
Subject: [PATCH 068/163] [test](case) fix unstable case without order by
 distinct row (#34167)

---
 .../suites/nereids_p0/aggregate/agg_window_project.groovy     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/regression-test/suites/nereids_p0/aggregate/agg_window_project.groovy b/regression-test/suites/nereids_p0/aggregate/agg_window_project.groovy
index 7a3cb3042e54b4..eed0ec3fd24caa 100644
--- a/regression-test/suites/nereids_p0/aggregate/agg_window_project.groovy
+++ b/regression-test/suites/nereids_p0/aggregate/agg_window_project.groovy
@@ -97,9 +97,9 @@ suite("agg_window_project") {
 
     sql """sync"""
     
-    order_qt_select4 """select a, c, sum(sum(b)) over(partition by c order by c rows between unbounded preceding and current row) from test_window_table2 group by a, c having a > 1;"""
+    order_qt_select4 """select a, c, sum(sum(b)) over(partition by c order by a, c rows between unbounded preceding and current row) from test_window_table2 group by a, c having a > 1;"""
 
-    order_qt_select5 """select a, c, sum(sum(b)) over(partition by c order by c rows between unbounded preceding and current row) dd from test_window_table2 group by a, c having dd < 4;"""
+    order_qt_select5 """select a, c, sum(sum(b)) over(partition by c order by a, c rows between unbounded preceding and current row) dd from test_window_table2 group by a, c having dd < 4;"""
     
     explain {
         sql("select a, c, sum(sum(b)) over(partition by c order by c rows between unbounded preceding and current row) from test_window_table2 group by a, c having a > 1;")

From a80adab497179baaf779694c282d09fcfe20ddad Mon Sep 17 00:00:00 2001
From: amory <wangqiannan@selectdb.com>
Date: Sat, 27 Apr 2024 12:04:21 +0800
Subject: [PATCH 069/163] [FIX](inverted index)fix for array inverted index
 writer with large dataset witch will make core (#34076)

* fix for array inverted index writer with large dataset witch will make core

* add cases

* change p1 to p2

* updated
---
 .../segment_v2/inverted_index_writer.cpp      | 21 ++--
 .../test_array_with_large_dataset.out         |  4 +
 .../test_array_with_large_dataset.groovy      | 95 +++++++++++++++++++
 3 files changed, 111 insertions(+), 9 deletions(-)
 create mode 100644 regression-test/data/inverted_index_p2/test_array_with_large_dataset.out
 create mode 100644 regression-test/suites/inverted_index_p2/test_array_with_large_dataset.groovy

diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
index e9956008f98179..7774dc0c1dd95d 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
@@ -363,16 +363,15 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
                 LOG(ERROR) << "index writer is null in inverted index writer.";
                 return Status::InternalError("index writer is null in inverted index writer");
             }
+            size_t start_off = 0;
             for (int i = 0; i < count; ++i) {
-                // offsets[i+1] is now row element count
-                // [0, 3, 6]
-                // [10,20,30] [20,30,40], [30,40,50]
-                auto start_off = offsets[i];
-                auto end_off = offsets[i + 1];
+                // nullmap & value ptr-array may not from offsets[i] because olap_convertor make offsets accumulate from _base_offset which may not is 0, but nullmap & value in this segment is from 0, we only need
+                // every single array row element size to go through the nullmap & value ptr-array, and also can go through the every row in array to keep with _rid++
+                auto array_elem_size = offsets[i + 1] - offsets[i];
                 // TODO(Amory).later we use object pool to avoid field creation
                 lucene::document::Field* new_field = nullptr;
                 CL_NS(analysis)::TokenStream* ts = nullptr;
-                for (auto j = start_off; j < end_off; ++j) {
+                for (auto j = start_off; j < start_off + array_elem_size; ++j) {
                     if (null_map[j] == 1) {
                         continue;
                     }
@@ -405,19 +404,22 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
                         _doc->add(*new_field);
                     }
                 }
+                start_off += array_elem_size;
                 if (!_doc->getFields()->empty()) {
                     // if this array is null, we just ignore to write inverted index
                     RETURN_IF_ERROR(add_document());
                     _doc->clear();
                     _CLDELETE(ts);
+                } else {
+                    RETURN_IF_ERROR(add_null_document());
                 }
                 _rid++;
             }
         } else if constexpr (field_is_numeric_type(field_type)) {
+            size_t start_off = 0;
             for (int i = 0; i < count; ++i) {
-                auto start_off = offsets[i];
-                auto end_off = offsets[i + 1];
-                for (size_t j = start_off; j < end_off; ++j) {
+                auto array_elem_size = offsets[i + 1] - offsets[i];
+                for (size_t j = start_off; j < start_off + array_elem_size; ++j) {
                     if (null_map[j] == 1) {
                         continue;
                     }
@@ -428,6 +430,7 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
                     _value_key_coder->full_encode_ascending(p, &new_value);
                     _bkd_writer->add((const uint8_t*)new_value.c_str(), value_length, _rid);
                 }
+                start_off += array_elem_size;
                 _row_ids_seen_for_bkd++;
                 _rid++;
             }
diff --git a/regression-test/data/inverted_index_p2/test_array_with_large_dataset.out b/regression-test/data/inverted_index_p2/test_array_with_large_dataset.out
new file mode 100644
index 00000000000000..3635d2aa6f80cd
--- /dev/null
+++ b/regression-test/data/inverted_index_p2/test_array_with_large_dataset.out
@@ -0,0 +1,4 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select --
+2499995
+
diff --git a/regression-test/suites/inverted_index_p2/test_array_with_large_dataset.groovy b/regression-test/suites/inverted_index_p2/test_array_with_large_dataset.groovy
new file mode 100644
index 00000000000000..4d2f53d53533f4
--- /dev/null
+++ b/regression-test/suites/inverted_index_p2/test_array_with_large_dataset.groovy
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_array_with_large_dataset", "p2"){
+
+    def StreamLoad = {tableName, fileName ->
+        streamLoad {
+            // you can skip db declaration, because a default db has already been
+            // specified in ${DORIS_HOME}/conf/regression-conf.groovy
+            // db 'regression_test'
+            table tableName
+
+            // default label is UUID:
+            // set 'label' UUID.randomUUID().toString()
+
+            // default column_separator is specify in doris fe config, usually is '\t'.
+            // this line change to ','
+            set 'column_separator', '|'
+            set 'max_filter_ratio', '0.3'
+            set 'compress_type', 'GZ'
+
+            // relate to ${DORIS_HOME}/regression-test/data/demo/streamload_input.csv.
+            // also, you can stream load a http stream, e.g. http://xxx/some.csv
+            file fileName
+            time 300000
+            // stream load action will check result, include Success status, and NumberTotalRows == NumberLoadedRows
+
+            // if declared a check callback, the default check condition will ignore.
+            // So you must check all condition
+            check { result, exception, startTime, endTime ->
+                if (exception != null) {
+                    throw exception
+                }
+                log.info("Stream load result: ${result}".toString())
+                def json = parseJson(result)
+                assertEquals("success", json.Status.toLowerCase())
+                assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
+            }
+        }
+    }
+
+    // create table 
+    sql """ DROP TABLE IF EXISTS bai;"""
+    sql """
+            CREATE TABLE `bai` (
+              `id` BIGINT NULL,
+              `asl` ARRAY<INT> NULL,
+              `ash` ARRAY<INT> NULL,
+              INDEX index_inverted_ail (`asl`) USING INVERTED COMMENT '''''',
+              INDEX index_inverted_aih (`ash`) USING INVERTED COMMENT ''''''
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`id`)
+            COMMENT 'OLAP'
+            DISTRIBUTED BY HASH(`id`) BUCKETS 16
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "min_load_replica_num" = "-1",
+            "is_being_synced" = "false",
+            "storage_medium" = "hdd",
+            "storage_format" = "V2",
+            "inverted_index_storage_format" = "V1",
+            "light_schema_change" = "true",
+            "disable_auto_compaction" = "false",
+            "enable_single_replica_compaction" = "false",
+            "group_commit_interval_ms" = "10000",
+            "group_commit_data_bytes" = "134217728"
+    );
+    """
+    
+    def array_files = ["array_int_1.tar.gz", "array_int_500001.tar.gz", "array_int_1000001.tar.gz", "array_int_1500001.tar.gz", "array_int_2000001.tar.gz"]
+    for (f in array_files) {
+        def file_name = "${getS3Url()}/regression/array_index/" + f
+        StreamLoad.call("bai", file_name)
+    }
+    sql """sync"""
+
+    // check result
+    qt_select "SELECT count(*) FROM bai;"
+
+    // download tar.gz file for
+}

From 0f95160a48c76cbdf947bcd51c8c26d87d79d69a Mon Sep 17 00:00:00 2001
From: seawinde <149132972+seawinde@users.noreply.github.com>
Date: Sat, 27 Apr 2024 13:08:23 +0800
Subject: [PATCH 070/163] [feature](mtmv)Support single table mv rewrite
 (#34185)

Support Single table  query rewrite with out group by
this is useful for complex filter or expresission

the mv def and query is as following
which can be query rewritten

mv def:
```
          select *
            from lineitem where l_comment like '%xx%'
```

query:
```
            select l_linenumber, l_receiptdate
            from lineitem where l_comment like '%xx%'
```


Co-authored-by: zfr9527 <qhu15zhang3294197@163.com>
---
 .../apache/doris/nereids/rules/RuleSet.java   |   8 +
 ...MaterializedViewFilterProjectScanRule.java |  45 +++++
 .../mv/MaterializedViewFilterScanRule.java    |  44 ++++
 ...MaterializedViewProjectFilterScanRule.java |  45 +++++
 .../mv/MaterializedViewProjectScanRule.java   |  44 ++++
 .../mv/MaterializedViewScanRule.java          |  58 +++++-
 .../rules/exploration/mv/StructInfo.java      |  34 ++++
 .../nereids_rules_p0/mv/scan/scan_table.out   |  33 +++
 .../mv/dimension/dimension_1.groovy           | 172 ++++++++++++++++
 .../mv/nested_mtmv/nested_mtmv.groovy         |   2 +-
 .../mv/scan/scan_table.groovy                 | 191 ++++++++++++++++++
 11 files changed, 671 insertions(+), 5 deletions(-)
 create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewFilterProjectScanRule.java
 create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewFilterScanRule.java
 create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewProjectFilterScanRule.java
 create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewProjectScanRule.java
 create mode 100644 regression-test/data/nereids_rules_p0/mv/scan/scan_table.out
 create mode 100644 regression-test/suites/nereids_rules_p0/mv/scan/scan_table.groovy

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java
index d317b1e8738521..7dcb7c0c0084ce 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java
@@ -44,11 +44,15 @@
 import org.apache.doris.nereids.rules.exploration.mv.MaterializedViewFilterJoinRule;
 import org.apache.doris.nereids.rules.exploration.mv.MaterializedViewFilterProjectAggregateRule;
 import org.apache.doris.nereids.rules.exploration.mv.MaterializedViewFilterProjectJoinRule;
+import org.apache.doris.nereids.rules.exploration.mv.MaterializedViewFilterProjectScanRule;
+import org.apache.doris.nereids.rules.exploration.mv.MaterializedViewFilterScanRule;
 import org.apache.doris.nereids.rules.exploration.mv.MaterializedViewOnlyJoinRule;
 import org.apache.doris.nereids.rules.exploration.mv.MaterializedViewProjectAggregateRule;
 import org.apache.doris.nereids.rules.exploration.mv.MaterializedViewProjectFilterAggregateRule;
 import org.apache.doris.nereids.rules.exploration.mv.MaterializedViewProjectFilterJoinRule;
+import org.apache.doris.nereids.rules.exploration.mv.MaterializedViewProjectFilterScanRule;
 import org.apache.doris.nereids.rules.exploration.mv.MaterializedViewProjectJoinRule;
+import org.apache.doris.nereids.rules.exploration.mv.MaterializedViewProjectScanRule;
 import org.apache.doris.nereids.rules.expression.ExpressionOptimization;
 import org.apache.doris.nereids.rules.implementation.AggregateStrategies;
 import org.apache.doris.nereids.rules.implementation.LogicalAssertNumRowsToPhysicalAssertNumRows;
@@ -243,6 +247,10 @@ public class RuleSet {
             .add(MaterializedViewFilterAggregateRule.INSTANCE)
             .add(MaterializedViewProjectFilterAggregateRule.INSTANCE)
             .add(MaterializedViewFilterProjectAggregateRule.INSTANCE)
+            .add(MaterializedViewFilterScanRule.INSTANCE)
+            .add(MaterializedViewFilterProjectScanRule.INSTANCE)
+            .add(MaterializedViewProjectScanRule.INSTANCE)
+            .add(MaterializedViewProjectFilterScanRule.INSTANCE)
             .build();
 
     public static final List<Rule> DPHYP_REORDER_RULES = ImmutableList.<Rule>builder()
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewFilterProjectScanRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewFilterProjectScanRule.java
new file mode 100644
index 00000000000000..7063030f24d70d
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewFilterProjectScanRule.java
@@ -0,0 +1,45 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.exploration.mv;
+
+import org.apache.doris.nereids.rules.Rule;
+import org.apache.doris.nereids.rules.RuleType;
+import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
+import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
+import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
+
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * MaterializedViewFilterProjectScanRule
+ */
+public class MaterializedViewFilterProjectScanRule extends MaterializedViewScanRule {
+
+    public static final MaterializedViewFilterProjectScanRule INSTANCE = new MaterializedViewFilterProjectScanRule();
+
+    @Override
+    public List<Rule> buildRules() {
+        return ImmutableList.of(
+                logicalFilter(logicalProject(logicalOlapScan())).thenApplyMultiNoThrow(ctx -> {
+                    LogicalFilter<LogicalProject<LogicalOlapScan>> root = ctx.root;
+                    return rewrite(root, ctx.cascadesContext);
+                }).toRule(RuleType.MATERIALIZED_VIEW_FILTER_PROJECT_SCAN));
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewFilterScanRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewFilterScanRule.java
new file mode 100644
index 00000000000000..4cdde78ca4d2f5
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewFilterScanRule.java
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.exploration.mv;
+
+import org.apache.doris.nereids.rules.Rule;
+import org.apache.doris.nereids.rules.RuleType;
+import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
+import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
+
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * MaterializedViewFilterScanRule
+ */
+public class MaterializedViewFilterScanRule extends MaterializedViewScanRule {
+
+    public static final MaterializedViewFilterScanRule INSTANCE = new MaterializedViewFilterScanRule();
+
+    @Override
+    public List<Rule> buildRules() {
+        return ImmutableList.of(
+                logicalFilter(logicalOlapScan()).thenApplyMultiNoThrow(ctx -> {
+                    LogicalFilter<LogicalOlapScan> root = ctx.root;
+                    return rewrite(root, ctx.cascadesContext);
+                }).toRule(RuleType.MATERIALIZED_VIEW_FILTER_SCAN));
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewProjectFilterScanRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewProjectFilterScanRule.java
new file mode 100644
index 00000000000000..55f28b949049e6
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewProjectFilterScanRule.java
@@ -0,0 +1,45 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.exploration.mv;
+
+import org.apache.doris.nereids.rules.Rule;
+import org.apache.doris.nereids.rules.RuleType;
+import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
+import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
+import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
+
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * MaterializedViewProjectFilterScanRule
+ */
+public class MaterializedViewProjectFilterScanRule extends MaterializedViewScanRule {
+
+    public static final MaterializedViewProjectFilterScanRule INSTANCE = new MaterializedViewProjectFilterScanRule();
+
+    @Override
+    public List<Rule> buildRules() {
+        return ImmutableList.of(
+                logicalProject(logicalFilter(logicalOlapScan())).thenApplyMultiNoThrow(ctx -> {
+                    LogicalProject<LogicalFilter<LogicalOlapScan>> root = ctx.root;
+                    return rewrite(root, ctx.cascadesContext);
+                }).toRule(RuleType.MATERIALIZED_VIEW_PROJECT_FILTER_SCAN));
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewProjectScanRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewProjectScanRule.java
new file mode 100644
index 00000000000000..d73b31f2c7cb49
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewProjectScanRule.java
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.exploration.mv;
+
+import org.apache.doris.nereids.rules.Rule;
+import org.apache.doris.nereids.rules.RuleType;
+import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
+import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
+
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * MaterializedViewProjectScanRule
+ */
+public class MaterializedViewProjectScanRule extends MaterializedViewScanRule {
+
+    public static final MaterializedViewProjectScanRule INSTANCE = new MaterializedViewProjectScanRule();
+
+    @Override
+    public List<Rule> buildRules() {
+        return ImmutableList.of(
+                logicalProject(logicalOlapScan()).thenApplyMultiNoThrow(ctx -> {
+                    LogicalProject<LogicalOlapScan> root = ctx.root;
+                    return rewrite(root, ctx.cascadesContext);
+                }).toRule(RuleType.MATERIALIZED_VIEW_PROJECT_SCAN));
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewScanRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewScanRule.java
index c9624c176b9fd9..b3e742c64cba58 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewScanRule.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewScanRule.java
@@ -17,17 +17,67 @@
 
 package org.apache.doris.nereids.rules.exploration.mv;
 
-import org.apache.doris.nereids.rules.Rule;
+import org.apache.doris.nereids.rules.exploration.mv.StructInfo.PlanCheckContext;
+import org.apache.doris.nereids.rules.exploration.mv.mapping.SlotMapping;
+import org.apache.doris.nereids.trees.expressions.Alias;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.NamedExpression;
+import org.apache.doris.nereids.trees.plans.Plan;
+import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
+
+import com.google.common.collect.ImmutableSet;
 
 import java.util.List;
+import java.util.stream.Collectors;
 
 /**
  * This is responsible for single table rewriting according to different pattern
  * */
-public class MaterializedViewScanRule extends AbstractMaterializedViewRule {
+public abstract class MaterializedViewScanRule extends AbstractMaterializedViewRule {
+
+    @Override
+    protected Plan rewriteQueryByView(MatchMode matchMode,
+            StructInfo queryStructInfo,
+            StructInfo viewStructInfo,
+            SlotMapping targetToSourceMapping,
+            Plan tempRewritedPlan,
+            MaterializationContext materializationContext) {
+        // Rewrite top projects, represent the query projects by view
+        List<Expression> expressionsRewritten = rewriteExpression(
+                queryStructInfo.getExpressions(),
+                queryStructInfo.getTopPlan(),
+                materializationContext.getMvExprToMvScanExprMapping(),
+                targetToSourceMapping,
+                true,
+                queryStructInfo.getTableBitSet()
+        );
+        // Can not rewrite, bail out
+        if (expressionsRewritten.isEmpty()) {
+            materializationContext.recordFailReason(queryStructInfo,
+                    "Rewrite expressions by view in scan fail",
+                    () -> String.format("expressionToRewritten is %s,\n mvExprToMvScanExprMapping is %s,\n"
+                                    + "targetToSourceMapping = %s", queryStructInfo.getExpressions(),
+                            materializationContext.getMvExprToMvScanExprMapping(),
+                            targetToSourceMapping));
+            return null;
+        }
+        return new LogicalProject<>(
+                expressionsRewritten.stream()
+                        .map(expression -> expression instanceof NamedExpression ? expression : new Alias(expression))
+                        .map(NamedExpression.class::cast)
+                        .collect(Collectors.toList()),
+                tempRewritedPlan);
+    }
 
+    /**
+     * Check scan is whether valid or not. Support join's input only support project, filter, join,
+     * logical relation, simple aggregate node. Con not have aggregate above on join.
+     * Join condition should be slot reference equals currently.
+     */
     @Override
-    public List<Rule> buildRules() {
-        return null;
+    protected boolean checkPattern(StructInfo structInfo) {
+        PlanCheckContext checkContext = PlanCheckContext.of(ImmutableSet.of());
+        return structInfo.getTopPlan().accept(StructInfo.SCAN_PLAN_PATTERN_CHECKER, checkContext)
+                && !checkContext.isContainsTopAggregate();
     }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/StructInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/StructInfo.java
index 757004071ee4d3..1870be9212bebc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/StructInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/StructInfo.java
@@ -81,6 +81,7 @@
  */
 public class StructInfo {
     public static final PlanPatternChecker PLAN_PATTERN_CHECKER = new PlanPatternChecker();
+    public static final ScanPlanPatternChecker SCAN_PLAN_PATTERN_CHECKER = new ScanPlanPatternChecker();
     // struct info splitter
     public static final PlanSplitter PLAN_SPLITTER = new PlanSplitter();
     private static final RelationCollector RELATION_COLLECTOR = new RelationCollector();
@@ -606,6 +607,39 @@ private Boolean doVisit(Plan plan, PlanCheckContext checkContext) {
         }
     }
 
+    /**
+     * ScanPlanPatternChecker, this is used to check the plan pattern is valid or not
+     */
+    public static class ScanPlanPatternChecker extends DefaultPlanVisitor<Boolean, PlanCheckContext> {
+
+        @Override
+        public Boolean visitGroupPlan(GroupPlan groupPlan, PlanCheckContext checkContext) {
+            return groupPlan.getGroup().getLogicalExpressions().stream()
+                    .anyMatch(logicalExpression -> logicalExpression.getPlan().accept(this, checkContext));
+        }
+
+        @Override
+        public Boolean visit(Plan plan, PlanCheckContext checkContext) {
+            if (plan instanceof Filter
+                    || plan instanceof Project
+                    || plan instanceof CatalogRelation
+                    || plan instanceof GroupPlan) {
+                return doVisit(plan, checkContext);
+            }
+            return false;
+        }
+
+        private Boolean doVisit(Plan plan, PlanCheckContext checkContext) {
+            for (Plan child : plan.children()) {
+                boolean valid = child.accept(this, checkContext);
+                if (!valid) {
+                    return false;
+                }
+            }
+            return true;
+        }
+    }
+
     /**
      * Add predicates on base table when materialized view scan contains invalid partitions
      */
diff --git a/regression-test/data/nereids_rules_p0/mv/scan/scan_table.out b/regression-test/data/nereids_rules_p0/mv/scan/scan_table.out
new file mode 100644
index 00000000000000..9af8d1bfb63def
--- /dev/null
+++ b/regression-test/data/nereids_rules_p0/mv/scan/scan_table.out
@@ -0,0 +1,33 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !query1_0_before --
+4	3	3	4	5.50	6.50	7.50	8.50	o	k	2023-12-11	2023-12-09	2023-12-10	a	b	yyyyyyyyy
+5	2	3	6	7.50	8.50	9.50	10.50	k	o	2023-12-12	2023-12-12	2023-12-13	c	d	xxxxxxxxx
+
+-- !query1_0_after --
+4	3	3	4	5.50	6.50	7.50	8.50	o	k	2023-12-11	2023-12-09	2023-12-10	a	b	yyyyyyyyy
+5	2	3	6	7.50	8.50	9.50	10.50	k	o	2023-12-12	2023-12-12	2023-12-13	c	d	xxxxxxxxx
+
+-- !query1_1_before --
+1	2	3	4	5.50	6.50	7.50	8.50	o	k	2023-12-08	2023-12-09	2023-12-10	a	b	yyyyyyyyy
+2	4	3	4	5.50	6.50	7.50	8.50	o	k	2023-12-09	2023-12-09	2023-12-10	a	b	yyyyyyyyy
+3	2	4	4	5.50	6.50	7.50	8.50	o	k	2023-12-10	2023-12-09	2023-12-10	a	b	yyyyyyyyy
+4	3	3	4	5.50	6.50	7.50	8.50	o	k	2023-12-11	2023-12-09	2023-12-10	a	b	yyyyyyyyy
+
+-- !query1_1_after --
+1	2	3	4	5.50	6.50	7.50	8.50	o	k	2023-12-08	2023-12-09	2023-12-10	a	b	yyyyyyyyy
+2	4	3	4	5.50	6.50	7.50	8.50	o	k	2023-12-09	2023-12-09	2023-12-10	a	b	yyyyyyyyy
+3	2	4	4	5.50	6.50	7.50	8.50	o	k	2023-12-10	2023-12-09	2023-12-10	a	b	yyyyyyyyy
+4	3	3	4	5.50	6.50	7.50	8.50	o	k	2023-12-11	2023-12-09	2023-12-10	a	b	yyyyyyyyy
+
+-- !query1_2_before --
+6	2023-12-13
+
+-- !query1_2_after --
+6	2023-12-13
+
+-- !query1_3_before --
+6
+
+-- !query1_3_after --
+6
+
diff --git a/regression-test/suites/nereids_rules_p0/mv/dimension/dimension_1.groovy b/regression-test/suites/nereids_rules_p0/mv/dimension/dimension_1.groovy
index 192f991647cb3d..b02df2b99930e7 100644
--- a/regression-test/suites/nereids_rules_p0/mv/dimension/dimension_1.groovy
+++ b/regression-test/suites/nereids_rules_p0/mv/dimension/dimension_1.groovy
@@ -712,4 +712,176 @@ suite("partition_mv_rewrite_dimension_1") {
 //    }
 //    sql """DROP MATERIALIZED VIEW IF EXISTS ${rewriting_mv_name_1};"""
 
+
+    def create_mv_lineitem_without_partition = { mv_name, mv_sql ->
+        sql """DROP MATERIALIZED VIEW IF EXISTS ${mv_name};"""
+        sql """DROP TABLE IF EXISTS ${mv_name}"""
+        sql"""
+        CREATE MATERIALIZED VIEW ${mv_name} 
+        BUILD IMMEDIATE REFRESH AUTO ON MANUAL 
+        DISTRIBUTED BY RANDOM BUCKETS 2 
+        PROPERTIES ('replication_num' = '1')  
+        AS  
+        ${mv_sql}
+        """
+    }
+
+    // single table
+    mv_name_1 = "single_tb_mv_1"
+    def single_table_mv_stmt_1 = """
+        select l_Shipdate, l_partkey, l_suppkey 
+        from lineitem_1 
+        where l_commitdate like '2023-10-%'
+        """
+
+    create_mv_lineitem_without_partition(mv_name_1, single_table_mv_stmt_1)
+    job_name_1 = getJobName(db, mv_name_1)
+    waitingMTMVTaskFinished(job_name_1)
+
+    def single_table_query_stmt_1 = """
+        select l_Shipdate, l_partkey, l_suppkey 
+        from lineitem_1 
+        where l_commitdate like '2023-10-%'
+        """
+    def single_table_query_stmt_2 = """
+        select l_Shipdate, l_partkey, l_suppkey 
+        from lineitem_1 
+        where l_commitdate like '2023-10-%' and l_partkey > 0 + 1
+        """
+
+    explain {
+        sql("${single_table_query_stmt_1}")
+        contains "${mv_name_1}(${mv_name_1})"
+    }
+    compare_res(single_table_query_stmt_1 + " order by 1,2,3")
+
+    explain {
+        sql("${single_table_query_stmt_2}")
+        contains "${mv_name_1}(${mv_name_1})"
+    }
+    compare_res(single_table_query_stmt_2 + " order by 1,2,3")
+
+
+    single_table_mv_stmt_1 = """
+        select sum(o_totalprice) as sum_total, 
+            max(o_totalpricE) as max_total, 
+            min(o_totalprice) as min_total, 
+            count(*) as count_all, 
+            bitmap_union(to_bitmap(case when o_shippriority > 1 and o_orderkey IN (1, 3) then o_custkey else null end)) cnt_1, 
+            bitmap_union(to_bitmap(case when o_shippriority > 2 and o_orderkey IN (2) then o_custkey else null end)) as cnt_2 
+            from orders_1 where o_orderdate >= '2022-10-17' + interval '1' year
+        """
+
+    def create_mv_orders_without_partition = { mv_name, mv_sql ->
+        sql """DROP MATERIALIZED VIEW IF EXISTS ${mv_name};"""
+        sql """DROP TABLE IF EXISTS ${mv_name}"""
+        sql"""
+        CREATE MATERIALIZED VIEW ${mv_name} 
+        BUILD IMMEDIATE REFRESH AUTO ON MANUAL 
+        DISTRIBUTED BY RANDOM BUCKETS 2 
+        PROPERTIES ('replication_num' = '1') 
+        AS  
+        ${mv_sql}
+        """
+    }
+
+    create_mv_orders_without_partition(mv_name_1, single_table_mv_stmt_1)
+    job_name_1 = getJobName(db, mv_name_1)
+    waitingMTMVTaskFinished(job_name_1)
+
+    // not support currently
+//    single_table_query_stmt_1 = """
+//        select sum(o_totalprice) as sum_total,
+//            max(o_totalpricE) as max_total,
+//            min(o_totalprice) as min_total,
+//            count(*) as count_all,
+//            bitmap_union(to_bitmap(case when o_shippriority > 1 and o_orderkey IN (1, 3) then o_custkey else null end)) cnt_1,
+//            bitmap_union(to_bitmap(case when o_shippriority > 2 and o_orderkey IN (2) then o_custkey else null end)) as cnt_2
+//            from orders_1 where o_orderdate >= '2022-10-17' + interval '1' year
+//        """
+//    single_table_query_stmt_2 = """
+//        select sum(o_totalprice) as sum_total,
+//            max(o_totalpricE) as max_total,
+//            min(o_totalprice) as min_total,
+//            count(*) as count_all,
+//            bitmap_union(to_bitmap(case when o_shippriority > 1 and o_orderkey IN (1, 3) then o_custkey else null end)) cnt_1,
+//            bitmap_union(to_bitmap(case when o_shippriority > 2 and o_orderkey IN (2) then o_custkey else null end)) as cnt_2
+//            from orders_1 where o_orderdate > '2022-10-17' + interval '1' year
+//        """
+//    explain {
+//        sql("${single_table_query_stmt_1}")
+//        contains "${mv_name_1}(${mv_name_1})"
+//    }
+//    compare_res(single_table_query_stmt_1 + " order by 1,2,3,4")
+//    explain {
+//        sql("${single_table_query_stmt_2}")
+//        contains "${mv_name_1}(${mv_name_1})"
+//    }
+//    compare_res(single_table_query_stmt_2 + " order by 1,2,3,4")
+
+
+    single_table_mv_stmt_1 = """
+        select l_Shipdate, l_partkey, l_suppkey 
+        from lineitem_1 
+        where l_commitdate in (select l_commitdate from lineitem_1) 
+        """
+
+    create_mv_lineitem_without_partition(mv_name_1, single_table_mv_stmt_1)
+    job_name_1 = getJobName(db, mv_name_1)
+    waitingMTMVTaskFinished(job_name_1)
+
+    single_table_query_stmt_1 = """
+        select l_Shipdate, l_partkey, l_suppkey 
+        from lineitem_1 
+        where l_commitdate in (select l_commitdate from lineitem_1) 
+        """
+    explain {
+        sql("${single_table_query_stmt_1}")
+        contains "${mv_name_1}(${mv_name_1})"
+    }
+    compare_res(single_table_query_stmt_1 + " order by 1,2,3")
+
+// not supported currently
+//    single_table_mv_stmt_1 = """
+//        select l_Shipdate, l_partkey, l_suppkey
+//        from lineitem_1
+//        where exists (select l_commitdate from lineitem_1 where l_commitdate like "2023-10-17")
+//        """
+//
+//    create_mv_lineitem_without_partition(mv_name_1, single_table_mv_stmt_1)
+//    job_name_1 = getJobName(db, mv_name_1)
+//    waitingMTMVTaskFinished(job_name_1)
+//
+//    single_table_query_stmt_1 = """
+//        select l_Shipdate, l_partkey, l_suppkey
+//        from lineitem_1
+//        where exists (select l_commitdate from lineitem_1 where l_commitdate like "2023-10-17")
+//        """
+//    explain {
+//        sql("${single_table_query_stmt_1}")
+//        contains "${mv_name_1}(${mv_name_1})"
+//    }
+//    compare_res(single_table_query_stmt_1 + " order by 1,2,3")
+//
+//
+//    single_table_mv_stmt_1 = """
+//        select t.l_Shipdate, t.l_partkey, t.l_suppkey
+//        from (select * from lineitem_1) as t
+//        where exists (select l_commitdate from lineitem_1 where l_commitdate like "2023-10-17")
+//        """
+//
+//    create_mv_lineitem_without_partition(mv_name_1, single_table_mv_stmt_1)
+//    job_name_1 = getJobName(db, mv_name_1)
+//    waitingMTMVTaskFinished(job_name_1)
+//
+//    single_table_query_stmt_1 = """
+//        select t.l_Shipdate, t.l_partkey, t.l_suppkey
+//        from (select * from lineitem_1) as t
+//        where exists (select l_commitdate from lineitem_1 where l_commitdate like "2023-10-17")
+//        """
+//    explain {
+//        sql("${single_table_query_stmt_1}")
+//        contains "${mv_name_1}(${mv_name_1})"
+//    }
+//    compare_res(single_table_query_stmt_1 + " order by 1,2,3")
 }
diff --git a/regression-test/suites/nereids_rules_p0/mv/nested_mtmv/nested_mtmv.groovy b/regression-test/suites/nereids_rules_p0/mv/nested_mtmv/nested_mtmv.groovy
index 4a0f513f0fae88..52b58b29be97ff 100644
--- a/regression-test/suites/nereids_rules_p0/mv/nested_mtmv/nested_mtmv.groovy
+++ b/regression-test/suites/nereids_rules_p0/mv/nested_mtmv/nested_mtmv.groovy
@@ -278,7 +278,7 @@ suite("nested_mtmv") {
         """
     explain {
         sql("${query_stmt_2}")
-        contains "${mv_level3_name}(${mv_level3_name})"
+        contains "${mv_level4_name}(${mv_level4_name})"
     }
     compare_res(query_stmt_2 + " order by 1,2,3,4,5,6,7")
 
diff --git a/regression-test/suites/nereids_rules_p0/mv/scan/scan_table.groovy b/regression-test/suites/nereids_rules_p0/mv/scan/scan_table.groovy
new file mode 100644
index 00000000000000..b25e497379f567
--- /dev/null
+++ b/regression-test/suites/nereids_rules_p0/mv/scan/scan_table.groovy
@@ -0,0 +1,191 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("mv_scan_table") {
+    String db = context.config.getDbNameByFile(context.file)
+    sql "use ${db}"
+    sql "SET enable_nereids_planner=true"
+    sql "set runtime_filter_mode=OFF"
+    sql "SET enable_fallback_to_original_planner=false"
+    sql "SET enable_materialized_view_rewrite=true"
+    sql "SET enable_nereids_timeout = false"
+
+    sql """
+    drop table if exists orders
+    """
+
+    sql """
+    CREATE TABLE IF NOT EXISTS orders  (
+      o_orderkey       INTEGER NOT NULL,
+      o_custkey        INTEGER NOT NULL,
+      o_orderstatus    CHAR(1) NOT NULL,
+      o_totalprice     DECIMALV3(15,2) NOT NULL,
+      o_orderdate      DATE NOT NULL,
+      o_orderpriority  CHAR(15) NOT NULL,  
+      o_clerk          CHAR(15) NOT NULL, 
+      o_shippriority   INTEGER NOT NULL,
+      o_comment        VARCHAR(79) NOT NULL
+    )
+    DUPLICATE KEY(o_orderkey, o_custkey)
+    PARTITION BY RANGE(o_orderdate) (PARTITION `day_2` VALUES LESS THAN ('2023-12-30'))
+    DISTRIBUTED BY HASH(o_orderkey) BUCKETS 3
+    PROPERTIES (
+      "replication_num" = "1"
+    )
+    """
+
+    sql """
+    drop table if exists lineitem
+    """
+
+    sql"""
+    CREATE TABLE IF NOT EXISTS lineitem (
+      l_orderkey    INTEGER NOT NULL,
+      l_partkey     INTEGER NOT NULL,
+      l_suppkey     INTEGER NOT NULL,
+      l_linenumber  INTEGER NOT NULL,
+      l_quantity    DECIMALV3(15,2) NOT NULL,
+      l_extendedprice  DECIMALV3(15,2) NOT NULL,
+      l_discount    DECIMALV3(15,2) NOT NULL,
+      l_tax         DECIMALV3(15,2) NOT NULL,
+      l_returnflag  CHAR(1) NOT NULL,
+      l_linestatus  CHAR(1) NOT NULL,
+      l_shipdate    DATE NOT NULL,
+      l_commitdate  DATE NOT NULL,
+      l_receiptdate DATE NOT NULL,
+      l_shipinstruct CHAR(25) NOT NULL,
+      l_shipmode     CHAR(10) NOT NULL,
+      l_comment      VARCHAR(44) NOT NULL
+    )
+    DUPLICATE KEY(l_orderkey, l_partkey, l_suppkey, l_linenumber)
+    PARTITION BY RANGE(l_shipdate) (PARTITION `day_1` VALUES LESS THAN ('2023-12-30'))
+    DISTRIBUTED BY HASH(l_orderkey) BUCKETS 3
+    PROPERTIES (
+      "replication_num" = "1"
+    )
+    """
+
+    sql """
+    drop table if exists partsupp
+    """
+
+    sql """
+    CREATE TABLE IF NOT EXISTS partsupp (
+      ps_partkey     INTEGER NOT NULL,
+      ps_suppkey     INTEGER NOT NULL,
+      ps_availqty    INTEGER NOT NULL,
+      ps_supplycost  DECIMALV3(15,2)  NOT NULL,
+      ps_comment     VARCHAR(199) NOT NULL 
+    )
+    DUPLICATE KEY(ps_partkey, ps_suppkey)
+    DISTRIBUTED BY HASH(ps_partkey) BUCKETS 3
+    PROPERTIES (
+      "replication_num" = "1"
+    )
+    """
+
+    sql """ insert into lineitem values
+    (1, 2, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-08', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'),
+    (2, 4, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-09', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'),
+    (3, 2, 4, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-10', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'),
+    (4, 3, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-11', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'),
+    (5, 2, 3, 6, 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-12-12', '2023-12-12', '2023-12-13', 'c', 'd', 'xxxxxxxxx');
+    """
+
+    sql """
+    insert into orders values
+    (1, 1, 'o', 9.5, '2023-12-08', 'a', 'b', 1, 'yy'),
+    (1, 1, 'o', 10.5, '2023-12-08', 'a', 'b', 1, 'yy'),
+    (2, 1, 'o', 11.5, '2023-12-09', 'a', 'b', 1, 'yy'),
+    (3, 1, 'o', 12.5, '2023-12-10', 'a', 'b', 1, 'yy'),
+    (3, 1, 'o', 33.5, '2023-12-10', 'a', 'b', 1, 'yy'),
+    (4, 2, 'o', 43.2, '2023-12-11', 'c','d',2, 'mm'),
+    (5, 2, 'o', 56.2, '2023-12-12', 'c','d',2, 'mi'),
+    (5, 2, 'o', 1.2, '2023-12-12', 'c','d',2, 'mi');  
+    """
+
+    sql """
+    insert into partsupp values
+    (2, 3, 9, 10.01, 'supply1'),
+    (2, 3, 10, 11.01, 'supply2');
+    
+    """
+
+    sql """analyze table orders with sync;"""
+    sql """analyze table lineitem with sync;"""
+    sql """analyze table partsupp with sync;"""
+
+    // with filter
+    def mv1_0 =
+            """
+            select *
+            from lineitem where l_orderkey > 2
+            """
+    def query1_0 = """
+            select * 
+            from lineitem where l_orderkey > 3
+            """
+    order_qt_query1_0_before "${query1_0}"
+    check_mv_rewrite_success(db, mv1_0, query1_0, "mv1_0")
+    order_qt_query1_0_after "${query1_0}"
+    sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_0"""
+
+
+    def mv1_1 =
+            """
+            select *
+            from lineitem where l_comment like '%yy%'
+            """
+    def query1_1 = """
+            select * 
+            from lineitem where l_comment like '%yy%'
+            """
+    order_qt_query1_1_before "${query1_1}"
+    check_mv_rewrite_success(db, mv1_1, query1_1, "mv1_1")
+    order_qt_query1_1_after "${query1_1}"
+    sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_1"""
+
+
+    def mv1_2 =
+            """
+            select *
+            from lineitem where l_comment like '%xx%'
+            """
+    def query1_2 = """
+            select l_linenumber, l_receiptdate
+            from lineitem where l_comment like '%xx%'
+            """
+    order_qt_query1_2_before "${query1_2}"
+    check_mv_rewrite_success(db, mv1_2, query1_2, "mv1_2")
+    order_qt_query1_2_after "${query1_2}"
+    sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_2"""
+
+
+    def mv1_3 =
+            """
+            select l_linenumber, l_receiptdate
+            from lineitem where l_comment like '%xx%'
+            """
+    def query1_3 = """
+            select l_linenumber
+            from lineitem where l_comment like '%xx%'
+            """
+    order_qt_query1_3_before "${query1_3}"
+    check_mv_rewrite_success(db, mv1_3, query1_3, "mv1_3")
+    order_qt_query1_3_after "${query1_3}"
+    sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_3"""
+}

From 38002adf1456149b60ab702fc6a953aa8d10cfad Mon Sep 17 00:00:00 2001
From: shuke <37901441+shuke987@users.noreply.github.com>
Date: Sat, 27 Apr 2024 13:47:46 +0800
Subject: [PATCH 071/163] [regression-test](parameter) add
 -Dcom.mysql.cj.disableAbandonedConnectionCleanup=true in JAVA_OPTS (#34193)

---
 regression-test/pipeline/external/conf/fe.conf | 6 +++---
 regression-test/pipeline/p0/conf/be.conf       | 4 ++--
 regression-test/pipeline/p0/conf/fe.conf       | 4 ++--
 regression-test/pipeline/p1/conf/be.conf       | 4 ++--
 regression-test/pipeline/p1/conf/fe.conf       | 4 ++--
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/regression-test/pipeline/external/conf/fe.conf b/regression-test/pipeline/external/conf/fe.conf
index bd069f40c6803f..3727b6a9406de0 100644
--- a/regression-test/pipeline/external/conf/fe.conf
+++ b/regression-test/pipeline/external/conf/fe.conf
@@ -25,13 +25,13 @@
 LOG_DIR = ${DORIS_HOME}/log
 
 DATE = `date +%Y%m%d-%H%M%S`
-JAVA_OPTS="-Xmx4096m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=$DORIS_HOME/log/fe.jmap -XX:+UseMembar -XX:SurvivorRatio=8 -XX:MaxTenuringThreshold=7 -XX:+PrintGCDateStamps -XX:+PrintGCDetails -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:+CMSClassUnloadingEnabled -XX:-CMSParallelRemarkEnabled -XX:CMSInitiatingOccupancyFraction=80 -XX:SoftRefLRUPolicyMSPerMB=0 -Xloggc:$DORIS_HOME/log/fe.gc.log.$DATE"
+JAVA_OPTS="-Xmx4096m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=$DORIS_HOME/log/fe.jmap -XX:+UseMembar -XX:SurvivorRatio=8 -XX:MaxTenuringThreshold=7 -XX:+PrintGCDateStamps -XX:+PrintGCDetails -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:+CMSClassUnloadingEnabled -XX:-CMSParallelRemarkEnabled -XX:CMSInitiatingOccupancyFraction=80 -XX:SoftRefLRUPolicyMSPerMB=0 -Xloggc:$DORIS_HOME/log/fe.gc.log.$DATE -Dcom.mysql.cj.disableAbandonedConnectionCleanup=true"
 
 # For jdk 9+, this JAVA_OPTS will be used as default JVM options
-JAVA_OPTS_FOR_JDK_9="-Xmx4096m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=$DORIS_HOME/log/fe.jmap -XX:SurvivorRatio=8 -XX:MaxTenuringThreshold=7 -XX:+CMSClassUnloadingEnabled -XX:-CMSParallelRemarkEnabled -XX:CMSInitiatingOccupancyFraction=80 -XX:SoftRefLRUPolicyMSPerMB=0 -Xlog:gc*:$DORIS_HOME/log/fe.gc.log.$DATE:time"
+JAVA_OPTS_FOR_JDK_9="-Xmx4096m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=$DORIS_HOME/log/fe.jmap -XX:SurvivorRatio=8 -XX:MaxTenuringThreshold=7 -XX:+CMSClassUnloadingEnabled -XX:-CMSParallelRemarkEnabled -XX:CMSInitiatingOccupancyFraction=80 -XX:SoftRefLRUPolicyMSPerMB=0 -Xlog:gc*:$DORIS_HOME/log/fe.gc.log.$DATE:time -Dcom.mysql.cj.disableAbandonedConnectionCleanup=true"
 
 # For jdk 17+, this JAVA_OPTS will be used as default JVM options
-JAVA_OPTS_FOR_JDK_17="-Djavax.security.auth.useSubjectCredsOnly=false -Xmx8192m -Xms8192m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=$DORIS_HOME/log/ -Xlog:gc*:$DORIS_HOME/log/fe.gc.log.$CUR_DATE:time"
+JAVA_OPTS_FOR_JDK_17="-Djavax.security.auth.useSubjectCredsOnly=false -Xmx8192m -Xms8192m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=$DORIS_HOME/log/ -Xlog:gc*:$DORIS_HOME/log/fe.gc.log.$CUR_DATE:time -Dcom.mysql.cj.disableAbandonedConnectionCleanup=true"
 
 ##
 ## the lowercase properties are read by main program.
diff --git a/regression-test/pipeline/p0/conf/be.conf b/regression-test/pipeline/p0/conf/be.conf
index 1fde856168b35e..474c30a05de471 100644
--- a/regression-test/pipeline/p0/conf/be.conf
+++ b/regression-test/pipeline/p0/conf/be.conf
@@ -20,10 +20,10 @@ CUR_DATE=`date +%Y%m%d-%H%M%S`
 PPROF_TMPDIR="$DORIS_HOME/log/"
 
 # For jdk 8
-JAVA_OPTS="-Xmx1024m -DlogPath=$DORIS_HOME/log/jni.log -Xloggc:$DORIS_HOME/log/be.gc.log.$CUR_DATE -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=50M -Djavax.security.auth.useSubjectCredsOnly=false -Dsun.security.krb5.debug=true -Dsun.java.command=DorisBE -XX:-CriticalJNINatives"
+JAVA_OPTS="-Xmx1024m -DlogPath=$DORIS_HOME/log/jni.log -Xloggc:$DORIS_HOME/log/be.gc.log.$CUR_DATE -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=50M -Djavax.security.auth.useSubjectCredsOnly=false -Dsun.security.krb5.debug=true -Dsun.java.command=DorisBE -XX:-CriticalJNINatives -Dcom.mysql.cj.disableAbandonedConnectionCleanup=true"
 
 # For jdk 17, this JAVA_OPTS will be used as default JVM options
-JAVA_OPTS_FOR_JDK_17="-Xmx1024m -DlogPath=$DORIS_HOME/log/jni.log -Xlog:gc*:$DORIS_HOME/log/be.gc.log.$CUR_DATE:time,uptime:filecount=10,filesize=50M -Djavax.security.auth.useSubjectCredsOnly=false -Dsun.security.krb5.debug=true -Dsun.java.command=DorisBE -XX:-CriticalJNINatives -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED --add-opens=java.management/sun.management=ALL-UNNAMED"
+JAVA_OPTS_FOR_JDK_17="-Xmx1024m -DlogPath=$DORIS_HOME/log/jni.log -Xlog:gc*:$DORIS_HOME/log/be.gc.log.$CUR_DATE:time,uptime:filecount=10,filesize=50M -Djavax.security.auth.useSubjectCredsOnly=false -Dsun.security.krb5.debug=true -Dsun.java.command=DorisBE -XX:-CriticalJNINatives -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED --add-opens=java.management/sun.management=ALL-UNNAMED -Dcom.mysql.cj.disableAbandonedConnectionCleanup=true"
 
 # Set your own JAVA_HOME
 # JAVA_HOME=/path/to/jdk/
diff --git a/regression-test/pipeline/p0/conf/fe.conf b/regression-test/pipeline/p0/conf/fe.conf
index 3d0a6cf1972594..6b0a22fa2583c4 100644
--- a/regression-test/pipeline/p0/conf/fe.conf
+++ b/regression-test/pipeline/p0/conf/fe.conf
@@ -27,10 +27,10 @@ CUR_DATE=`date +%Y%m%d-%H%M%S`
 LOG_DIR = ${DORIS_HOME}/log
 
 # For jdk 8
-JAVA_OPTS="-Djavax.security.auth.useSubjectCredsOnly=false -Xss4m -Xmx4096m -XX:+HeapDumpOnOutOfMemoryError -XX:+UnlockExperimentalVMOptions -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:+PrintGCDateStamps -XX:+PrintGCDetails -Xloggc:$DORIS_HOME/log/fe.gc.log.$CUR_DATE -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=50M -Dlog4j2.formatMsgNoLookups=true"
+JAVA_OPTS="-Djavax.security.auth.useSubjectCredsOnly=false -Xss4m -Xmx4096m -XX:+HeapDumpOnOutOfMemoryError -XX:+UnlockExperimentalVMOptions -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:+PrintGCDateStamps -XX:+PrintGCDetails -Xloggc:$DORIS_HOME/log/fe.gc.log.$CUR_DATE -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=50M -Dlog4j2.formatMsgNoLookups=true -Dcom.mysql.cj.disableAbandonedConnectionCleanup=true"
 
 # For jdk 17, this JAVA_OPTS will be used as default JVM options
-JAVA_OPTS_FOR_JDK_17="-Djavax.security.auth.useSubjectCredsOnly=false -Xmx8192m -Xms8192m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=$DORIS_HOME/log/ -Xlog:gc*:$DORIS_HOME/log/fe.gc.log.$CUR_DATE:time,uptime:filecount=10,filesize=50M"
+JAVA_OPTS_FOR_JDK_17="-Djavax.security.auth.useSubjectCredsOnly=false -Xmx8192m -Xms8192m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=$DORIS_HOME/log/ -Xlog:gc*:$DORIS_HOME/log/fe.gc.log.$CUR_DATE:time,uptime:filecount=10,filesize=50M -Dcom.mysql.cj.disableAbandonedConnectionCleanup=true"
 
 sys_log_level = INFO
 sys_log_mode = NORMAL
diff --git a/regression-test/pipeline/p1/conf/be.conf b/regression-test/pipeline/p1/conf/be.conf
index 90c7521ff26fe3..fde67fbbaf7225 100644
--- a/regression-test/pipeline/p1/conf/be.conf
+++ b/regression-test/pipeline/p1/conf/be.conf
@@ -20,10 +20,10 @@ CUR_DATE=`date +%Y%m%d-%H%M%S`
 PPROF_TMPDIR="$DORIS_HOME/log/"
 
 # For jdk 8
-JAVA_OPTS="-Xmx1024m -DlogPath=$DORIS_HOME/log/jni.log -Xloggc:$DORIS_HOME/log/be.gc.log.$CUR_DATE -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=50M -Djavax.security.auth.useSubjectCredsOnly=false -Dsun.security.krb5.debug=true -Dsun.java.command=DorisBE -XX:-CriticalJNINatives"
+JAVA_OPTS="-Xmx1024m -DlogPath=$DORIS_HOME/log/jni.log -Xloggc:$DORIS_HOME/log/be.gc.log.$CUR_DATE -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=50M -Djavax.security.auth.useSubjectCredsOnly=false -Dsun.security.krb5.debug=true -Dsun.java.command=DorisBE -XX:-CriticalJNINatives -Dcom.mysql.cj.disableAbandonedConnectionCleanup=true"
 
 # For jdk 17, this JAVA_OPTS will be used as default JVM options
-JAVA_OPTS_FOR_JDK_17="-Xmx1024m -DlogPath=$DORIS_HOME/log/jni.log -Xlog:gc*:$DORIS_HOME/log/be.gc.log.$CUR_DATE:time,uptime:filecount=10,filesize=50M -Djavax.security.auth.useSubjectCredsOnly=false -Dsun.security.krb5.debug=true -Dsun.java.command=DorisBE -XX:-CriticalJNINatives -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED --add-opens=java.management/sun.management=ALL-UNNAMED"
+JAVA_OPTS_FOR_JDK_17="-Xmx1024m -DlogPath=$DORIS_HOME/log/jni.log -Xlog:gc*:$DORIS_HOME/log/be.gc.log.$CUR_DATE:time,uptime:filecount=10,filesize=50M -Djavax.security.auth.useSubjectCredsOnly=false -Dsun.security.krb5.debug=true -Dsun.java.command=DorisBE -XX:-CriticalJNINatives -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED --add-opens=java.management/sun.management=ALL-UNNAMED -Dcom.mysql.cj.disableAbandonedConnectionCleanup=true"
 
 # Set your own JAVA_HOME
 # JAVA_HOME=/path/to/jdk/
diff --git a/regression-test/pipeline/p1/conf/fe.conf b/regression-test/pipeline/p1/conf/fe.conf
index 914723b43c63d3..cc66e179774d8a 100644
--- a/regression-test/pipeline/p1/conf/fe.conf
+++ b/regression-test/pipeline/p1/conf/fe.conf
@@ -27,10 +27,10 @@ CUR_DATE=`date +%Y%m%d-%H%M%S`
 LOG_DIR = ${DORIS_HOME}/log
 
 # For jdk 8
-JAVA_OPTS="-Djavax.security.auth.useSubjectCredsOnly=false -Xss4m -Xmx4096m -XX:+HeapDumpOnOutOfMemoryError -XX:+UnlockExperimentalVMOptions -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:+PrintGCDateStamps -XX:+PrintGCDetails -Xloggc:$DORIS_HOME/log/fe.gc.log.$CUR_DATE -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=50M -Dlog4j2.formatMsgNoLookups=true"
+JAVA_OPTS="-Djavax.security.auth.useSubjectCredsOnly=false -Xss4m -Xmx4096m -XX:+HeapDumpOnOutOfMemoryError -XX:+UnlockExperimentalVMOptions -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:+PrintGCDateStamps -XX:+PrintGCDetails -Xloggc:$DORIS_HOME/log/fe.gc.log.$CUR_DATE -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=50M -Dlog4j2.formatMsgNoLookups=true -Dcom.mysql.cj.disableAbandonedConnectionCleanup=true"
 
 # For jdk 17, this JAVA_OPTS will be used as default JVM options
-JAVA_OPTS_FOR_JDK_17="-Djavax.security.auth.useSubjectCredsOnly=false -Xmx8192m -Xms8192m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=$DORIS_HOME/log/ -Xlog:gc*:$DORIS_HOME/log/fe.gc.log.$CUR_DATE:time,uptime:filecount=10,filesize=50M"
+JAVA_OPTS_FOR_JDK_17="-Djavax.security.auth.useSubjectCredsOnly=false -Xmx8192m -Xms8192m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=$DORIS_HOME/log/ -Xlog:gc*:$DORIS_HOME/log/fe.gc.log.$CUR_DATE:time,uptime:filecount=10,filesize=50M -Dcom.mysql.cj.disableAbandonedConnectionCleanup=true"
 
 sys_log_level = INFO
 sys_log_mode = NORMAL

From dc713f993eddc117c7ca8eff0c7cc5da973a3314 Mon Sep 17 00:00:00 2001
From: abmdocrt <Yukang.Lian2022@gmail.com>
Date: Sat, 27 Apr 2024 13:51:28 +0800
Subject: [PATCH 072/163] [Enhancement](full compaction) Add run status support
 for full compaction (#34043)

* The usage is `curl http://{ip}:{host}/api/compaction/run_status?tablet_id={tablet_id}`
e.g. `curl http://127.0.0.1:8040/api/compaction/run_status?tablet_id=10084`

If full compaction is running, the output will be
```
{
"status" : "Success",
"run_status" : true,
"msg" : "compaction task for this tablet is running",
"tablet_id" : 10084,
"compact_type" : "full"
}
```
else the ouput will be
```
{
"status" : "Success",
"run_status" : false,
"msg" : "compaction task for this tablet is not running",
"tablet_id" : 10084,
"compact_type" : "full"
}
```

* 2
---
 be/src/http/action/compaction_action.cpp      |  14 +++
 be/src/olap/full_compaction.cpp               |   6 +-
 be/src/olap/olap_server.cpp                   |  12 ++-
 be/src/olap/storage_engine.cpp                |  24 +++++
 be/src/olap/storage_engine.h                  |   1 +
 be/src/olap/tablet.cpp                        |   2 +-
 be/src/olap/tablet.h                          |   6 ++
 .../test_full_compaciton_run_status.groovy    | 100 ++++++++++++++++++
 8 files changed, 161 insertions(+), 4 deletions(-)
 create mode 100644 regression-test/suites/fault_injection_p0/test_full_compaciton_run_status.groovy

diff --git a/be/src/http/action/compaction_action.cpp b/be/src/http/action/compaction_action.cpp
index f61edf92fc1804..43ad940db5e2c2 100644
--- a/be/src/http/action/compaction_action.cpp
+++ b/be/src/http/action/compaction_action.cpp
@@ -193,6 +193,20 @@ Status CompactionAction::_handle_run_status_compaction(HttpRequest* req, std::st
         std::string compaction_type;
         bool run_status = false;
 
+        {
+            // Full compaction holds both base compaction lock and cumu compaction lock.
+            // So we can not judge if full compaction is running by check these two locks holding.
+            // Here, we use a variable 'is_full_compaction_running' to check if full compaction is running.
+            if (tablet->is_full_compaction_running()) {
+                msg = "compaction task for this tablet is running";
+                compaction_type = "full";
+                run_status = true;
+                *json_result = strings::Substitute(json_template, run_status, msg, tablet_id,
+                                                   compaction_type);
+                return Status::OK();
+            }
+        }
+
         {
             // use try lock to check this tablet is running cumulative compaction
             std::unique_lock<std::mutex> lock_cumulative(tablet->get_cumulative_compaction_lock(),
diff --git a/be/src/olap/full_compaction.cpp b/be/src/olap/full_compaction.cpp
index 0d6660ca54368f..8a2712c38b5507 100644
--- a/be/src/olap/full_compaction.cpp
+++ b/be/src/olap/full_compaction.cpp
@@ -46,7 +46,9 @@ FullCompaction::FullCompaction(StorageEngine& engine, const TabletSharedPtr& tab
         : CompactionMixin(engine, tablet, "FullCompaction:" + std::to_string(tablet->tablet_id())) {
 }
 
-FullCompaction::~FullCompaction() = default;
+FullCompaction::~FullCompaction() {
+    tablet()->set_is_full_compaction_running(false);
+}
 
 Status FullCompaction::prepare_compact() {
     if (!tablet()->init_succeeded()) {
@@ -55,6 +57,7 @@ Status FullCompaction::prepare_compact() {
 
     std::unique_lock base_lock(tablet()->get_base_compaction_lock());
     std::unique_lock cumu_lock(tablet()->get_cumulative_compaction_lock());
+    tablet()->set_is_full_compaction_running(true);
 
     // 1. pick rowsets to compact
     RETURN_IF_ERROR(pick_rowsets_to_compact());
@@ -112,6 +115,7 @@ Status FullCompaction::modify_rowsets() {
         std::lock_guard<std::mutex> rowset_update_wlock(tablet()->get_rowset_update_lock());
         std::lock_guard<std::shared_mutex> meta_wlock(_tablet->get_header_lock());
         RETURN_IF_ERROR(tablet()->modify_rowsets(output_rowsets, _input_rowsets, true));
+        DBUG_EXECUTE_IF("FullCompaction.modify_rowsets.sleep", { sleep(5); })
         tablet()->save_meta();
     }
     return Status::OK();
diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp
index 73bd0e37f81db8..17ad3979faedfb 100644
--- a/be/src/olap/olap_server.cpp
+++ b/be/src/olap/olap_server.cpp
@@ -918,11 +918,16 @@ bool StorageEngine::_push_tablet_into_submitted_compaction(TabletSharedPtr table
                                     .insert(tablet->tablet_id())
                                     .second);
         break;
-    default:
+    case CompactionType::BASE_COMPACTION:
         already_existed = !(_tablet_submitted_base_compaction[tablet->data_dir()]
                                     .insert(tablet->tablet_id())
                                     .second);
         break;
+    case CompactionType::FULL_COMPACTION:
+        already_existed = !(_tablet_submitted_full_compaction[tablet->data_dir()]
+                                    .insert(tablet->tablet_id())
+                                    .second);
+        break;
     }
     return already_existed;
 }
@@ -935,9 +940,12 @@ void StorageEngine::_pop_tablet_from_submitted_compaction(TabletSharedPtr tablet
     case CompactionType::CUMULATIVE_COMPACTION:
         removed = _tablet_submitted_cumu_compaction[tablet->data_dir()].erase(tablet->tablet_id());
         break;
-    default:
+    case CompactionType::BASE_COMPACTION:
         removed = _tablet_submitted_base_compaction[tablet->data_dir()].erase(tablet->tablet_id());
         break;
+    case CompactionType::FULL_COMPACTION:
+        removed = _tablet_submitted_full_compaction[tablet->data_dir()].erase(tablet->tablet_id());
+        break;
     }
 
     if (removed == 1) {
diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp
index d327f82ab3362a..d549e17b1bffd8 100644
--- a/be/src/olap/storage_engine.cpp
+++ b/be/src/olap/storage_engine.cpp
@@ -1396,6 +1396,30 @@ Status StorageEngine::get_compaction_status_json(std::string* result) {
     }
     root.AddMember(base_key, path_obj2, root.GetAllocator());
 
+    // full
+    const std::string& full = "FullCompaction";
+    rapidjson::Value full_key;
+    full_key.SetString(full.c_str(), full.length(), root.GetAllocator());
+    rapidjson::Document path_obj3;
+    path_obj3.SetObject();
+    for (auto& it : _tablet_submitted_full_compaction) {
+        const std::string& dir = it.first->path();
+        rapidjson::Value path_key;
+        path_key.SetString(dir.c_str(), dir.length(), path_obj3.GetAllocator());
+
+        rapidjson::Document arr;
+        arr.SetArray();
+
+        for (auto& tablet_id : it.second) {
+            rapidjson::Value key;
+            const std::string& key_str = std::to_string(tablet_id);
+            key.SetString(key_str.c_str(), key_str.length(), path_obj3.GetAllocator());
+            arr.PushBack(key, root.GetAllocator());
+        }
+        path_obj3.AddMember(path_key, arr, path_obj3.GetAllocator());
+    }
+    root.AddMember(full_key, path_obj3, root.GetAllocator());
+
     rapidjson::StringBuffer strbuf;
     rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(strbuf);
     root.Accept(writer);
diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h
index 0412a576e1641e..63234047305a40 100644
--- a/be/src/olap/storage_engine.h
+++ b/be/src/olap/storage_engine.h
@@ -441,6 +441,7 @@ class StorageEngine final : public BaseStorageEngine {
     // a tablet can do base and cumulative compaction at same time
     std::map<DataDir*, std::unordered_set<TTabletId>> _tablet_submitted_cumu_compaction;
     std::map<DataDir*, std::unordered_set<TTabletId>> _tablet_submitted_base_compaction;
+    std::map<DataDir*, std::unordered_set<TTabletId>> _tablet_submitted_full_compaction;
 
     std::mutex _low_priority_task_nums_mutex;
     std::unordered_map<DataDir*, int32_t> _low_priority_task_nums;
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 94889bbcf8f5d6..358292463fc3fa 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -1241,7 +1241,7 @@ void Tablet::get_compaction_status(std::string* json_result) {
     root.AddMember("last base failure time", base_value, root.GetAllocator());
     rapidjson::Value full_value;
     format_str = ToStringFromUnixMillis(_last_full_compaction_failure_millis.load());
-    base_value.SetString(format_str.c_str(), format_str.length(), root.GetAllocator());
+    full_value.SetString(format_str.c_str(), format_str.length(), root.GetAllocator());
     root.AddMember("last full failure time", full_value, root.GetAllocator());
     rapidjson::Value cumu_success_value;
     format_str = ToStringFromUnixMillis(_last_cumu_compaction_success_millis.load());
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index 41f67b898ce0d9..678a519cfaeb72 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -459,6 +459,10 @@ class Tablet final : public BaseTablet {
     void set_alter_failed(bool alter_failed) { _alter_failed = alter_failed; }
     bool is_alter_failed() { return _alter_failed; }
 
+    void set_is_full_compaction_running(bool is_full_compaction_running) {
+        _is_full_compaction_running = is_full_compaction_running;
+    }
+    inline bool is_full_compaction_running() const { return _is_full_compaction_running; }
     void clear_cache() override;
 
 private:
@@ -573,6 +577,8 @@ class Tablet final : public BaseTablet {
     std::atomic<bool> _alter_failed = false;
 
     int64_t _io_error_times = 0;
+
+    std::atomic_bool _is_full_compaction_running = false;
 };
 
 inline CumulativeCompactionPolicy* Tablet::cumulative_compaction_policy() {
diff --git a/regression-test/suites/fault_injection_p0/test_full_compaciton_run_status.groovy b/regression-test/suites/fault_injection_p0/test_full_compaciton_run_status.groovy
new file mode 100644
index 00000000000000..4f720d51331bee
--- /dev/null
+++ b/regression-test/suites/fault_injection_p0/test_full_compaciton_run_status.groovy
@@ -0,0 +1,100 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_full_compaction_run_status","nonConcurrent") {
+
+
+    def tableName = "full_compaction_run_status_test"
+ 
+    // test successful group commit async load
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+
+    String backend_id;
+
+    def backendId_to_backendIP = [:]
+    def backendId_to_backendHttpPort = [:]
+    getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort);
+
+    backend_id = backendId_to_backendIP.keySet()[0]
+
+    sql """
+        CREATE TABLE IF NOT EXISTS ${tableName} (
+            `k` int ,
+            `v` int ,
+        ) engine=olap
+        DISTRIBUTED BY HASH(`k`) 
+        BUCKETS 2 
+        properties(
+            "replication_num" = "1",
+            "disable_auto_compaction" = "true")
+        """
+
+    sql """ INSERT INTO ${tableName} VALUES (0,00)"""
+    sql """ INSERT INTO ${tableName} VALUES (1,10)"""
+    sql """ INSERT INTO ${tableName} VALUES (2,20)"""
+    sql """ INSERT INTO ${tableName} VALUES (3,30)"""
+    sql """ INSERT INTO ${tableName} VALUES (4,40)"""
+    sql """ INSERT INTO ${tableName} VALUES (5,50)"""
+    sql """ INSERT INTO ${tableName} VALUES (6,60)"""
+    sql """ INSERT INTO ${tableName} VALUES (7,70)"""
+    sql """ INSERT INTO ${tableName} VALUES (8,80)"""
+    sql """ INSERT INTO ${tableName} VALUES (9,90)"""
+
+    GetDebugPoint().clearDebugPointsForAllBEs()
+
+    def exception = false;
+    try {
+        GetDebugPoint().enableDebugPointForAllBEs("FullCompaction.modify_rowsets.sleep")
+        def tablets = sql_return_maparray """ show tablets from ${tableName}; """
+        for (def tablet in tablets) {
+            String tablet_id = tablet.TabletId
+            backend_id = tablet.BackendId
+
+            times = 1
+            do{
+                (code, out, err) = be_run_full_compaction(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id)
+                logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err)
+                ++times
+                sleep(1000)
+            } while (parseJson(out.trim()).status.toLowerCase()!="success" && times<=10)
+
+            (code, out, err) = be_get_compaction_status(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id)
+            logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err)
+            assertEquals(code, 0)
+            def compactJson = parseJson(out.trim())
+            assertTrue(compactJson.msg.toLowerCase().contains("is running"))
+        }
+        Thread.sleep(30000)
+        logger.info("sleep 30s to wait full compaction finish.")
+        for (def tablet in tablets) {
+            String tablet_id = tablet.TabletId
+            backend_id = tablet.BackendId
+
+            (code, out, err) = be_get_compaction_status(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id)
+            logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err)
+            assertEquals(code, 0)
+            def compactJson = parseJson(out.trim())
+            assertTrue(compactJson.msg.toLowerCase().contains("is not running"))
+        }
+    } catch (Exception e) {
+        logger.info(e.getMessage())
+        exception = true;
+    } finally {
+        GetDebugPoint().disableDebugPointForAllBEs("FullCompaction.modify_rowsets.sleep")
+        assertFalse(exception)
+    }
+}

From 083ebc16571b0139573b553d3115c9b0db24d89e Mon Sep 17 00:00:00 2001
From: Luwei <814383175@qq.com>
Date: Sat, 27 Apr 2024 13:51:48 +0800
Subject: [PATCH 073/163] [fix](schema change) fix the defineName field is not
 the same when copying column (#34199)

---
 .../java/org/apache/doris/catalog/Column.java |  8 ++-
 .../test_add_rename_column.out                |  6 ++
 .../test_add_rename_column.groovy             | 56 +++++++++++++++++++
 3 files changed, 69 insertions(+), 1 deletion(-)
 create mode 100644 regression-test/data/schema_change_p0/test_add_rename_column.out
 create mode 100644 regression-test/suites/schema_change_p0/test_add_rename_column.groovy

diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
index 9ea53f41e567ed..d31ab73d83ce58 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
@@ -294,7 +294,7 @@ public Column(Column column) {
         this.children = column.getChildren();
         this.uniqueId = column.getUniqueId();
         this.defineExpr = column.getDefineExpr();
-        this.defineName = column.getDefineName();
+        this.defineName = column.getRealDefineName();
         this.hasOnUpdateDefaultValue = column.hasOnUpdateDefaultValue;
         this.onUpdateDefaultValueExprDef = column.onUpdateDefaultValueExprDef;
         this.clusterKeyId = column.getClusterKeyId();
@@ -341,6 +341,12 @@ public String getDefineName() {
         return name;
     }
 
+    // In order for the copy constructor to get the real defineName value.
+    // getDefineName() cannot meet this requirement
+    public String getRealDefineName() {
+        return defineName;
+    }
+
     public void setName(String newName) {
         this.name = newName;
     }
diff --git a/regression-test/data/schema_change_p0/test_add_rename_column.out b/regression-test/data/schema_change_p0/test_add_rename_column.out
new file mode 100644
index 00000000000000..edebb9dfd198c0
--- /dev/null
+++ b/regression-test/data/schema_change_p0/test_add_rename_column.out
@@ -0,0 +1,6 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql --
+
+-- !sql --
+3	cc	dd
+
diff --git a/regression-test/suites/schema_change_p0/test_add_rename_column.groovy b/regression-test/suites/schema_change_p0/test_add_rename_column.groovy
new file mode 100644
index 00000000000000..04f096d5707907
--- /dev/null
+++ b/regression-test/suites/schema_change_p0/test_add_rename_column.groovy
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// The cases is copied from https://github.com/trinodb/trino/tree/master
+// /testing/trino-product-tests/src/main/resources/sql-tests/testcases
+// and modified by Doris.
+
+suite("test_add_rename_column") {
+    def tableName = "test_add_rename"
+
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+
+    sql """
+        create table ${tableName}(
+            id int,
+            name varchar(100)
+        ) ENGINE = olap
+        unique key(id)
+        distributed by hash(id) buckets 1
+        properties (
+            'replication_num' = 1
+        )
+    """
+
+    sql """ insert into ${tableName} values (2, 'bb') """
+
+    sql """ ALTER TABLE ${tableName} add column c3 varchar(10) """
+
+    sql """ ALTER TABLE ${tableName} rename column c3 c4 """
+
+    sql """ truncate table ${tableName} """
+
+    sql """ sync """
+
+    qt_sql """ select * from ${tableName} """
+
+    sql """ insert into ${tableName} values (3, 'cc', 'dd') """
+
+    sql """ sync """
+
+    qt_sql """ select * from ${tableName} """
+}

From 1c2ad1b0c8dcc863ac06d23bb7ca72007fc38856 Mon Sep 17 00:00:00 2001
From: Lightman <31928846+Lchangliang@users.noreply.github.com>
Date: Sat, 27 Apr 2024 13:53:05 +0800
Subject: [PATCH 074/163] (cloud-merge) Reduce the number of lock requests when
 read from file cache (#34136)

---
 be/src/common/config.cpp                      |   1 +
 be/src/common/config.h                        |   1 +
 be/src/io/cache/block_file_cache.cpp          |  16 ++
 be/src/io/cache/block_file_cache.h            |   2 +-
 be/src/io/cache/cached_remote_file_reader.cpp |  68 ++++++-
 be/src/io/cache/cached_remote_file_reader.h   |   6 +
 be/src/io/cache/file_block.h                  |   1 +
 be/test/io/cache/block_file_cache_test.cpp    | 178 ++++++++++++++----
 8 files changed, 232 insertions(+), 41 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 31a69418d949bb..ea0e0bb3aada70 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -978,6 +978,7 @@ DEFINE_Bool(clear_file_cache, "false");
 DEFINE_Bool(enable_file_cache_query_limit, "false");
 DEFINE_mInt32(file_cache_enter_disk_resource_limit_mode_percent, "90");
 DEFINE_mInt32(file_cache_exit_disk_resource_limit_mode_percent, "80");
+DEFINE_mBool(enable_read_cache_file_directly, "false");
 
 DEFINE_mInt32(index_cache_entry_stay_time_after_lookup_s, "1800");
 DEFINE_mInt32(inverted_index_cache_stale_sweep_time_sec, "600");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 2456de3375626e..40d336d2308a6b 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1022,6 +1022,7 @@ DECLARE_Bool(clear_file_cache);
 DECLARE_Bool(enable_file_cache_query_limit);
 DECLARE_Int32(file_cache_enter_disk_resource_limit_mode_percent);
 DECLARE_Int32(file_cache_exit_disk_resource_limit_mode_percent);
+DECLARE_mBool(enable_read_cache_file_directly);
 
 // inverted index searcher cache
 // cache entry stay time after lookup
diff --git a/be/src/io/cache/block_file_cache.cpp b/be/src/io/cache/block_file_cache.cpp
index 26ca8e47596f75..6a1c873966c3a9 100644
--- a/be/src/io/cache/block_file_cache.cpp
+++ b/be/src/io/cache/block_file_cache.cpp
@@ -1571,6 +1571,22 @@ std::string BlockFileCache::clear_file_cache_directly() {
     return msg;
 }
 
+std::map<size_t, FileBlockSPtr> BlockFileCache::get_blocks_by_key(const UInt128Wrapper& hash) {
+    std::map<size_t, FileBlockSPtr> offset_to_block;
+    std::lock_guard cache_lock(_mutex);
+    if (_files.contains(hash)) {
+        for (auto& [offset, cell] : _files[hash]) {
+            if (cell.file_block->state() == FileBlock::State::DOWNLOADED) {
+                offset_to_block.emplace(offset, cell.file_block);
+                use_cell(cell, nullptr,
+                         need_to_move(cell.file_block->cache_type(), FileCacheType::DISPOSABLE),
+                         cache_lock);
+            }
+        }
+    }
+    return offset_to_block;
+}
+
 template void BlockFileCache::remove(FileBlockSPtr file_block,
                                      std::lock_guard<std::mutex>& cache_lock,
                                      std::lock_guard<std::mutex>& block_lock);
diff --git a/be/src/io/cache/block_file_cache.h b/be/src/io/cache/block_file_cache.h
index f086c2c680ee16..6f19095eace8d8 100644
--- a/be/src/io/cache/block_file_cache.h
+++ b/be/src/io/cache/block_file_cache.h
@@ -94,7 +94,7 @@ class BlockFileCache {
      */
     std::string clear_file_cache_async();
     std::string clear_file_cache_directly();
-
+    std::map<size_t, FileBlockSPtr> get_blocks_by_key(const UInt128Wrapper& hash);
     /// For debug.
     std::string dump_structure(const UInt128Wrapper& hash);
 
diff --git a/be/src/io/cache/cached_remote_file_reader.cpp b/be/src/io/cache/cached_remote_file_reader.cpp
index d976ccb0df4380..9572180c8926ba 100644
--- a/be/src/io/cache/cached_remote_file_reader.cpp
+++ b/be/src/io/cache/cached_remote_file_reader.cpp
@@ -34,6 +34,7 @@
 #include "io/cache/block_file_cache_profile.h"
 #include "io/cache/file_block.h"
 #include "io/fs/file_reader.h"
+#include "io/fs/local_file_system.h"
 #include "io/io_common.h"
 #include "util/bit_util.h"
 #include "util/doris_metrics.h"
@@ -50,6 +51,9 @@ CachedRemoteFileReader::CachedRemoteFileReader(FileReaderSPtr remote_file_reader
     if (_is_doris_table) {
         _cache_hash = BlockFileCache::hash(path().filename().native());
         _cache = FileCacheFactory::instance()->get_by_path(_cache_hash);
+        if (config::enable_read_cache_file_directly) {
+            _cache_file_readers = _cache->get_blocks_by_key(_cache_hash);
+        }
     } else {
         // Use path and modification time to build cache key
         std::string unique_path = fmt::format("{}:{}", path().native(), opts.mtime);
@@ -69,6 +73,14 @@ CachedRemoteFileReader::CachedRemoteFileReader(FileReaderSPtr remote_file_reader
     }
 }
 
+void CachedRemoteFileReader::_insert_file_reader(FileBlockSPtr file_block) {
+    if (config::enable_read_cache_file_directly) {
+        std::lock_guard lock(_mtx);
+        DCHECK(file_block->state() == FileBlock::State::DOWNLOADED);
+        _cache_file_readers.emplace(file_block->offset(), std::move(file_block));
+    }
+}
+
 CachedRemoteFileReader::~CachedRemoteFileReader() {
     static_cast<void>(close());
 }
@@ -110,6 +122,54 @@ Status CachedRemoteFileReader::read_at_impl(size_t offset, Slice result, size_t*
         return Status::OK();
     }
     ReadStatistics stats;
+    auto defer_func = [&](int*) {
+        if (io_ctx->file_cache_stats) {
+            _update_state(stats, io_ctx->file_cache_stats);
+            io::FileCacheProfile::instance().update(io_ctx->file_cache_stats);
+        }
+    };
+    std::unique_ptr<int, decltype(defer_func)> defer((int*)0x01, std::move(defer_func));
+    stats.bytes_read += bytes_req;
+    if (config::enable_read_cache_file_directly) {
+        // read directly
+        size_t need_read_size = bytes_req;
+        std::shared_lock lock(_mtx);
+        if (!_cache_file_readers.empty()) {
+            // find the last offset > offset.
+            auto iter = _cache_file_readers.upper_bound(offset);
+            if (iter != _cache_file_readers.begin()) {
+                iter--;
+            }
+            size_t cur_offset = offset;
+            while (need_read_size != 0 && iter != _cache_file_readers.end()) {
+                if (iter->second->offset() > cur_offset ||
+                    iter->second->range().right < cur_offset) {
+                    break;
+                }
+                size_t file_offset = cur_offset - iter->second->offset();
+                size_t reserve_bytes =
+                        std::min(need_read_size, iter->second->range().size() - file_offset);
+                {
+                    SCOPED_RAW_TIMER(&stats.local_read_timer);
+                    if (!iter->second
+                                 ->read(Slice(result.data + (cur_offset - offset), reserve_bytes),
+                                        file_offset)
+                                 .ok()) {
+                        break;
+                    }
+                }
+                need_read_size -= reserve_bytes;
+                cur_offset += reserve_bytes;
+                iter++;
+            }
+            if (need_read_size == 0) {
+                *bytes_read = bytes_req;
+                stats.hit_cache = true;
+                return Status::OK();
+            }
+        }
+    }
+    // read from cache or remote
     auto [align_left, align_size] = s_align_size(offset, bytes_req, size());
     CacheContext cache_context(io_ctx);
     FileBlocksHolder holder =
@@ -137,7 +197,6 @@ Status CachedRemoteFileReader::read_at_impl(size_t offset, Slice result, size_t*
             break;
         }
     }
-    stats.bytes_read += bytes_req;
     size_t empty_start = 0;
     size_t empty_end = 0;
     if (!empty_blocks.empty()) {
@@ -164,6 +223,8 @@ Status CachedRemoteFileReader::read_at_impl(size_t offset, Slice result, size_t*
             }
             if (!st.ok()) {
                 LOG_WARNING("Write data to file cache failed").error(st);
+            } else {
+                _insert_file_reader(block);
             }
             stats.bytes_write_into_file_cache += block_size;
         }
@@ -243,11 +304,6 @@ Status CachedRemoteFileReader::read_at_impl(size_t offset, Slice result, size_t*
         current_offset = right + 1;
     }
     DCHECK(*bytes_read == bytes_req);
-    DorisMetrics::instance()->s3_bytes_read_total->increment(*bytes_read);
-    if (io_ctx->file_cache_stats) {
-        _update_state(stats, io_ctx->file_cache_stats);
-        io::FileCacheProfile::instance().update(io_ctx->file_cache_stats);
-    }
     return Status::OK();
 }
 
diff --git a/be/src/io/cache/cached_remote_file_reader.h b/be/src/io/cache/cached_remote_file_reader.h
index 6cdba2c3dcc2a8..34fc53c7310f6f 100644
--- a/be/src/io/cache/cached_remote_file_reader.h
+++ b/be/src/io/cache/cached_remote_file_reader.h
@@ -20,14 +20,17 @@
 #include <stddef.h>
 #include <stdint.h>
 
+#include <map>
 #include <memory>
 #include <string>
 #include <utility>
 
 #include "common/status.h"
 #include "io/cache/block_file_cache.h"
+#include "io/cache/file_block.h"
 #include "io/cache/file_cache_common.h"
 #include "io/fs/file_reader.h"
+#include "io/fs/file_reader_writer_fwd.h"
 #include "io/fs/file_system.h"
 #include "io/fs/path.h"
 #include "util/slice.h"
@@ -60,10 +63,13 @@ class CachedRemoteFileReader final : public FileReader {
                         const IOContext* io_ctx) override;
 
 private:
+    void _insert_file_reader(FileBlockSPtr file_block);
     bool _is_doris_table;
     FileReaderSPtr _remote_file_reader;
     UInt128Wrapper _cache_hash;
     BlockFileCache* _cache;
+    std::shared_mutex _mtx;
+    std::map<size_t, FileBlockSPtr> _cache_file_readers;
 
     struct ReadStatistics {
         bool hit_cache = true;
diff --git a/be/src/io/cache/file_block.h b/be/src/io/cache/file_block.h
index 2587cd8607fc5a..b4044786dc73df 100644
--- a/be/src/io/cache/file_block.h
+++ b/be/src/io/cache/file_block.h
@@ -41,6 +41,7 @@ class BlockFileCache;
 class FileBlock {
     friend struct FileBlocksHolder;
     friend class BlockFileCache;
+    friend class CachedRemoteFileReader;
 
 public:
     enum class State {
diff --git a/be/test/io/cache/block_file_cache_test.cpp b/be/test/io/cache/block_file_cache_test.cpp
index 64778b396a29b2..2ebd83d6c379bc 100644
--- a/be/test/io/cache/block_file_cache_test.cpp
+++ b/be/test/io/cache/block_file_cache_test.cpp
@@ -70,6 +70,14 @@ fs::path caches_dir = fs::current_path() / "lru_cache_test";
 std::string cache_base_path = caches_dir / "cache1" / "";
 std::string tmp_file = caches_dir / "tmp_file";
 
+constexpr unsigned long long operator"" _mb(unsigned long long m) {
+    return m * 1024 * 1024;
+}
+
+constexpr unsigned long long operator"" _kb(unsigned long long m) {
+    return m * 1024;
+}
+
 void assert_range([[maybe_unused]] size_t assert_n, io::FileBlockSPtr file_block,
                   const io::FileBlock::Range& expected_range, io::FileBlock::State expected_state) {
     auto range = file_block->range();
@@ -123,7 +131,7 @@ class BlockFileCacheTest : public testing::Test {
             FileWriterPtr writer;
             ASSERT_TRUE(global_local_filesystem()->create_file(tmp_file, &writer).ok());
             for (int i = 0; i < 10; i++) {
-                std::string data(1 * 1024 * 1024, '0' + i);
+                std::string data(1_mb, '0' + i);
                 ASSERT_TRUE(writer->append(Slice(data.data(), data.size())).ok());
             }
             std::string data(1, '0');
@@ -2862,7 +2870,7 @@ TEST_F(BlockFileCacheTest, cached_remote_file_reader) {
     EXPECT_EQ(local_reader->path().native(), reader.get_remote_reader()->path().native());
     {
         std::string buffer;
-        buffer.resize(64 * 1024);
+        buffer.resize(64_kb);
         IOContext io_ctx;
         RuntimeProfile profile("file_cache_test");
         FileCacheProfileReporter reporter(&profile);
@@ -2871,19 +2879,19 @@ TEST_F(BlockFileCacheTest, cached_remote_file_reader) {
         size_t bytes_read {0};
         ASSERT_TRUE(reader.read_at(32222, Slice(buffer.data(), buffer.size()), &bytes_read, &io_ctx)
                             .ok());
-        EXPECT_EQ(std::string(64 * 1024, '0'), buffer);
+        EXPECT_EQ(std::string(64_kb, '0'), buffer);
         reporter.update(&stats);
     }
     {
         std::string buffer;
-        buffer.resize(64 * 1024);
+        buffer.resize(64_kb);
         IOContext io_ctx;
         FileCacheStatistics stats;
         io_ctx.file_cache_stats = &stats;
         size_t bytes_read {0};
-        EXPECT_FALSE(reader.read_at(10 * 1024 * 1024 + 2, Slice(buffer.data(), buffer.size()),
-                                    &bytes_read, &io_ctx)
-                             .ok());
+        EXPECT_FALSE(
+                reader.read_at(10_mb + 2, Slice(buffer.data(), buffer.size()), &bytes_read, &io_ctx)
+                        .ok());
     }
     {
         std::string buffer;
@@ -2897,18 +2905,18 @@ TEST_F(BlockFileCacheTest, cached_remote_file_reader) {
     }
     {
         std::string buffer;
-        buffer.resize(64 * 1024);
+        buffer.resize(64_kb);
         IOContext io_ctx;
         FileCacheStatistics stats;
         io_ctx.file_cache_stats = &stats;
         size_t bytes_read {0};
         ASSERT_TRUE(reader.read_at(32222, Slice(buffer.data(), buffer.size()), &bytes_read, &io_ctx)
                             .ok());
-        EXPECT_EQ(std::string(64 * 1024, '0'), buffer);
+        EXPECT_EQ(std::string(64_kb, '0'), buffer);
     }
     {
         std::string buffer;
-        buffer.resize(10 * 1024 * 1024 + 1);
+        buffer.resize(10_mb + 1);
         IOContext io_ctx;
         FileCacheStatistics stats;
         io_ctx.file_cache_stats = &stats;
@@ -2916,11 +2924,11 @@ TEST_F(BlockFileCacheTest, cached_remote_file_reader) {
         ASSERT_TRUE(
                 reader.read_at(0, Slice(buffer.data(), buffer.size()), &bytes_read, &io_ctx).ok());
         for (int i = 0; i < 10; i++) {
-            std::string data(1 * 1024 * 1024, '0' + i);
-            EXPECT_EQ(data, buffer.substr(i * 1024 * 1024, 1 * 1024 * 1024));
+            std::string data(1_mb, '0' + i);
+            EXPECT_EQ(data, buffer.substr(i * 1024 * 1024, 1_mb));
         }
         std::string data(1, '0');
-        EXPECT_EQ(data, buffer.substr(10 * 1024 * 1024, 1));
+        EXPECT_EQ(data, buffer.substr(10_mb, 1));
     }
     EXPECT_TRUE(reader.close().ok());
     EXPECT_TRUE(reader.closed());
@@ -2965,8 +2973,7 @@ TEST_F(BlockFileCacheTest, cached_remote_file_reader_tail) {
         FileCacheStatistics stats;
         io_ctx.file_cache_stats = &stats;
         size_t bytes_read {0};
-        ASSERT_TRUE(reader.read_at(10 * 1024 * 1024, Slice(buffer.data(), buffer.size()),
-                                   &bytes_read, &io_ctx)
+        ASSERT_TRUE(reader.read_at(10_mb, Slice(buffer.data(), buffer.size()), &bytes_read, &io_ctx)
                             .ok());
         EXPECT_EQ(std::string(1, '0'), buffer);
         reporter.update(&stats);
@@ -2976,12 +2983,12 @@ TEST_F(BlockFileCacheTest, cached_remote_file_reader_tail) {
     {
         auto key = io::BlockFileCache::hash("tmp_file");
         auto cache = FileCacheFactory::instance()->get_by_path(key);
-        auto holder = cache->get_or_set(key, 9 * 1024 * 1024, 1024 * 1024 + 1, context);
+        auto holder = cache->get_or_set(key, 9_mb, 1024_kb + 1, context);
         auto blocks = fromHolder(holder);
         ASSERT_EQ(blocks.size(), 2);
-        assert_range(1, blocks[0], io::FileBlock::Range(9 * 1024 * 1024, 10 * 1024 * 1024 - 1),
+        assert_range(1, blocks[0], io::FileBlock::Range(9_mb, 10_mb - 1),
                      io::FileBlock::State::DOWNLOADED);
-        assert_range(2, blocks[1], io::FileBlock::Range(10 * 1024 * 1024, 10 * 1024 * 1024),
+        assert_range(2, blocks[1], io::FileBlock::Range(10_mb, 10_mb),
                      io::FileBlock::State::DOWNLOADED);
     }
     if (fs::exists(cache_base_path)) {
@@ -3035,14 +3042,14 @@ TEST_F(BlockFileCacheTest, cached_remote_file_reader_error_handle) {
             pairs->second = true;
         });
         std::string buffer;
-        buffer.resize(64 * 1024);
+        buffer.resize(64_kb);
         IOContext io_ctx;
         FileCacheStatistics stats;
         io_ctx.file_cache_stats = &stats;
         size_t bytes_read {0};
         ASSERT_TRUE(
                 reader.read_at(0, Slice(buffer.data(), buffer.size()), &bytes_read, &io_ctx).ok());
-        EXPECT_EQ(std::string(64 * 1024, '0'), buffer);
+        EXPECT_EQ(std::string(64_kb, '0'), buffer);
     }
     {
         Defer defer {[sp] { sp->clear_call_back("LocalFileWriter::close"); }};
@@ -3051,14 +3058,14 @@ TEST_F(BlockFileCacheTest, cached_remote_file_reader_error_handle) {
             pairs->second = true;
         });
         std::string buffer;
-        buffer.resize(64 * 1024);
+        buffer.resize(64_kb);
         IOContext io_ctx;
         FileCacheStatistics stats;
         io_ctx.file_cache_stats = &stats;
         size_t bytes_read {0};
         ASSERT_TRUE(
                 reader.read_at(0, Slice(buffer.data(), buffer.size()), &bytes_read, &io_ctx).ok());
-        EXPECT_EQ(std::string(64 * 1024, '0'), buffer);
+        EXPECT_EQ(std::string(64_kb, '0'), buffer);
     }
     EXPECT_TRUE(reader.close().ok());
     EXPECT_TRUE(reader.closed());
@@ -3162,24 +3169,24 @@ TEST_F(BlockFileCacheTest, cached_remote_file_reader_concurrent) {
     });
     std::thread thread([&]() {
         std::string buffer;
-        buffer.resize(64 * 1024);
+        buffer.resize(64_kb);
         IOContext io_ctx;
         FileCacheStatistics stats;
         io_ctx.file_cache_stats = &stats;
         size_t bytes_read {0};
         ASSERT_TRUE(reader->read_at(100, Slice(buffer.data(), buffer.size()), &bytes_read, &io_ctx)
                             .ok());
-        EXPECT_EQ(std::string(64 * 1024, '0'), buffer);
+        EXPECT_EQ(std::string(64_kb, '0'), buffer);
     });
     std::string buffer;
-    buffer.resize(64 * 1024);
+    buffer.resize(64_kb);
     IOContext io_ctx;
     FileCacheStatistics stats;
     io_ctx.file_cache_stats = &stats;
     size_t bytes_read {0};
     ASSERT_TRUE(
             reader->read_at(100, Slice(buffer.data(), buffer.size()), &bytes_read, &io_ctx).ok());
-    EXPECT_EQ(std::string(64 * 1024, '0'), buffer);
+    EXPECT_EQ(std::string(64_kb, '0'), buffer);
     if (thread.joinable()) {
         thread.join();
     }
@@ -3233,24 +3240,24 @@ TEST_F(BlockFileCacheTest, cached_remote_file_reader_concurrent_2) {
                       [](auto&& args) { *try_any_cast<int64_t*>(args[0]) = 2; });
     std::thread thread([&]() {
         std::string buffer;
-        buffer.resize(64 * 1024);
+        buffer.resize(64_kb);
         IOContext io_ctx;
         FileCacheStatistics stats;
         io_ctx.file_cache_stats = &stats;
         size_t bytes_read {0};
         ASSERT_TRUE(reader->read_at(100, Slice(buffer.data(), buffer.size()), &bytes_read, &io_ctx)
                             .ok());
-        EXPECT_EQ(std::string(64 * 1024, '0'), buffer);
+        EXPECT_EQ(std::string(64_kb, '0'), buffer);
     });
     std::string buffer;
-    buffer.resize(64 * 1024);
+    buffer.resize(64_kb);
     IOContext io_ctx;
     FileCacheStatistics stats;
     io_ctx.file_cache_stats = &stats;
     size_t bytes_read {0};
     ASSERT_TRUE(
             reader->read_at(100, Slice(buffer.data(), buffer.size()), &bytes_read, &io_ctx).ok());
-    EXPECT_EQ(std::string(64 * 1024, '0'), buffer);
+    EXPECT_EQ(std::string(64_kb, '0'), buffer);
     if (thread.joinable()) {
         thread.join();
     }
@@ -3592,10 +3599,6 @@ TEST_F(BlockFileCacheTest, test_check_disk_reource_limit_3) {
     }
 }
 
-constexpr unsigned long long operator"" _mb(unsigned long long m) {
-    return m * 1024 * 1024;
-}
-
 TEST_F(BlockFileCacheTest, test_align_size) {
     const size_t total_size = 10_mb + 10086;
     {
@@ -3697,4 +3700,111 @@ TEST_F(BlockFileCacheTest, remove_if_cached_when_isnt_releasable) {
     ASSERT_TRUE(blocks[0]->finalize().ok());
 }
 
+TEST_F(BlockFileCacheTest, cached_remote_file_reader_opt_lock) {
+    config::enable_read_cache_file_directly = true;
+    if (fs::exists(cache_base_path)) {
+        fs::remove_all(cache_base_path);
+    }
+    fs::create_directories(cache_base_path);
+    TUniqueId query_id;
+    query_id.hi = 1;
+    query_id.lo = 1;
+    io::FileCacheSettings settings;
+    settings.query_queue_size = 6291456;
+    settings.query_queue_elements = 6;
+    settings.index_queue_size = 1048576;
+    settings.index_queue_elements = 1;
+    settings.disposable_queue_size = 1048576;
+    settings.disposable_queue_elements = 1;
+    settings.capacity = 8388608;
+    settings.max_file_block_size = 1048576;
+    settings.max_query_cache_size = 0;
+    io::CacheContext context;
+    context.query_id = query_id;
+    ASSERT_TRUE(FileCacheFactory::instance()->create_file_cache(cache_base_path, settings).ok());
+    io::FileReaderOptions opts;
+    opts.cache_type = FileCachePolicy::FILE_BLOCK_CACHE;
+    opts.is_doris_table = true;
+    {
+        FileReaderSPtr local_reader;
+        ASSERT_TRUE(global_local_filesystem()->open_file(tmp_file, &local_reader).ok());
+        auto reader = CachedRemoteFileReader(local_reader, opts);
+        EXPECT_EQ(reader._cache_file_readers.size(), 0);
+        std::string buffer;
+        buffer.resize(6_mb);
+        IOContext io_ctx;
+        size_t bytes_read {0};
+        ASSERT_TRUE(reader.read_at(1_mb, Slice(buffer.data(), buffer.size()), &bytes_read, &io_ctx)
+                            .ok());
+        EXPECT_EQ(reader._cache_file_readers.size(), 6);
+    }
+    {
+        FileReaderSPtr local_reader;
+        ASSERT_TRUE(global_local_filesystem()->open_file(tmp_file, &local_reader).ok());
+        auto reader = CachedRemoteFileReader(local_reader, opts);
+        EXPECT_EQ(reader._cache_file_readers.size(), 6);
+        std::random_device rd;  // a seed source for the random number engine
+        std::mt19937 gen(rd()); // mersenne_twister_engine seeded with rd()
+        std::uniform_int_distribution<> distrib(1_mb, 7_mb);
+        std::ranges::for_each(std::ranges::iota_view {0, 1000}, [&](int) {
+            size_t read_offset = distrib(gen);
+            size_t read_size = distrib(gen) % 1_mb;
+            if (read_offset + read_size > 7_mb || read_size == 0) {
+                read_size = 1;
+            }
+            std::string buffer;
+            buffer.resize(read_size);
+            IOContext io_ctx;
+            size_t bytes_read {0};
+            ASSERT_TRUE(reader.read_at(read_offset, Slice(buffer.data(), buffer.size()),
+                                       &bytes_read, &io_ctx)
+                                .ok());
+            EXPECT_EQ(bytes_read, read_size);
+            int num = read_offset / 1_mb;
+            size_t upper_offset = (num + 1) * 1_mb;
+            if (upper_offset < read_offset + read_size) {
+                size_t limit_size = upper_offset - read_offset;
+                EXPECT_EQ(std::string(limit_size, '0' + num), buffer.substr(0, limit_size));
+                EXPECT_EQ(std::string(read_size - limit_size, '0' + (num + 1)),
+                          buffer.substr(limit_size));
+            } else {
+                EXPECT_EQ(std::string(read_size, '0' + num), buffer);
+            }
+        });
+    }
+    {
+        FileReaderSPtr local_reader;
+        ASSERT_TRUE(global_local_filesystem()->open_file(tmp_file, &local_reader).ok());
+        auto reader = CachedRemoteFileReader(local_reader, opts);
+        std::string buffer;
+        buffer.resize(10086);
+        IOContext io_ctx;
+        size_t bytes_read {0};
+        ASSERT_TRUE(reader.read_at(9_mb, Slice(buffer.data(), buffer.size()), &bytes_read, &io_ctx)
+                            .ok());
+        EXPECT_EQ(buffer, std::string(10086, '9'));
+        EXPECT_EQ(reader._cache_file_readers.size(), 7);
+    }
+    {
+        FileReaderSPtr local_reader;
+        ASSERT_TRUE(global_local_filesystem()->open_file(tmp_file, &local_reader).ok());
+        auto reader = CachedRemoteFileReader(local_reader, opts);
+        std::string buffer;
+        buffer.resize(10086);
+        IOContext io_ctx;
+        size_t bytes_read {0};
+        ASSERT_TRUE(
+                reader.read_at(0, Slice(buffer.data(), buffer.size()), &bytes_read, &io_ctx).ok());
+        EXPECT_EQ(buffer, std::string(10086, '0'));
+        EXPECT_EQ(reader._cache_file_readers.size(), 8);
+    }
+    if (fs::exists(cache_base_path)) {
+        fs::remove_all(cache_base_path);
+    }
+    FileCacheFactory::instance()->_caches.clear();
+    FileCacheFactory::instance()->_path_to_cache.clear();
+    FileCacheFactory::instance()->_capacity = 0;
+    config::enable_read_cache_file_directly = false;
+}
+
 } // namespace doris::io

From da1cd324ae0d74b3241d5893a857a91bb578a70b Mon Sep 17 00:00:00 2001
From: Lightman <31928846+Lchangliang@users.noreply.github.com>
Date: Sat, 27 Apr 2024 13:54:41 +0800
Subject: [PATCH 075/163] (cloud-merge) Add file cache stats for queries to
 audit log (#34140)

---
 be/src/runtime/query_statistics.cpp                | 10 ++++++++++
 be/src/runtime/query_statistics.h                  | 12 ++++++++++++
 be/src/vec/exec/scan/new_olap_scanner.cpp          |  6 ++++++
 be/test/vec/olap/vertical_compaction_test.cpp      |  4 ----
 .../org/apache/doris/plugin/audit/AuditEvent.java  | 14 ++++++++++++++
 .../java/org/apache/doris/qe/AuditLogHelper.java   |  6 +++++-
 .../WorkloadRuntimeStatusMgr.java                  |  4 ++++
 gensrc/proto/data.proto                            |  2 ++
 gensrc/thrift/FrontendService.thrift               |  2 ++
 9 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/be/src/runtime/query_statistics.cpp b/be/src/runtime/query_statistics.cpp
index de950704180319..551f518f22f2b0 100644
--- a/be/src/runtime/query_statistics.cpp
+++ b/be/src/runtime/query_statistics.cpp
@@ -32,6 +32,10 @@ void QueryStatistics::merge(const QueryStatistics& other) {
     cpu_nanos += other.cpu_nanos.load(std::memory_order_relaxed);
     shuffle_send_bytes += other.shuffle_send_bytes.load(std::memory_order_relaxed);
     shuffle_send_rows += other.shuffle_send_rows.load(std::memory_order_relaxed);
+    _scan_bytes_from_local_storage +=
+            other._scan_bytes_from_local_storage.load(std::memory_order_relaxed);
+    _scan_bytes_from_remote_storage +=
+            other._scan_bytes_from_remote_storage.load(std::memory_order_relaxed);
 
     int64_t other_peak_mem = other.max_peak_memory_bytes.load(std::memory_order_relaxed);
     if (other_peak_mem > this->max_peak_memory_bytes) {
@@ -51,6 +55,8 @@ void QueryStatistics::to_pb(PQueryStatistics* statistics) {
     statistics->set_cpu_ms(cpu_nanos / NANOS_PER_MILLIS);
     statistics->set_returned_rows(returned_rows);
     statistics->set_max_peak_memory_bytes(max_peak_memory_bytes);
+    statistics->set_scan_bytes_from_remote_storage(_scan_bytes_from_remote_storage);
+    statistics->set_scan_bytes_from_local_storage(_scan_bytes_from_local_storage);
 }
 
 void QueryStatistics::to_thrift(TQueryStatistics* statistics) const {
@@ -64,12 +70,16 @@ void QueryStatistics::to_thrift(TQueryStatistics* statistics) const {
             current_used_memory_bytes.load(std::memory_order_relaxed));
     statistics->__set_shuffle_send_bytes(shuffle_send_bytes.load(std::memory_order_relaxed));
     statistics->__set_shuffle_send_rows(shuffle_send_rows.load(std::memory_order_relaxed));
+    statistics->__set_scan_bytes_from_remote_storage(_scan_bytes_from_remote_storage);
+    statistics->__set_scan_bytes_from_local_storage(_scan_bytes_from_local_storage);
 }
 
 void QueryStatistics::from_pb(const PQueryStatistics& statistics) {
     scan_rows = statistics.scan_rows();
     scan_bytes = statistics.scan_bytes();
     cpu_nanos = statistics.cpu_ms() * NANOS_PER_MILLIS;
+    _scan_bytes_from_local_storage = statistics.scan_bytes_from_local_storage();
+    _scan_bytes_from_remote_storage = statistics.scan_bytes_from_remote_storage();
 }
 
 void QueryStatistics::merge(QueryStatisticsRecvr* recvr) {
diff --git a/be/src/runtime/query_statistics.h b/be/src/runtime/query_statistics.h
index a9f6e192ec00d4..0a1c5c9f7ba33f 100644
--- a/be/src/runtime/query_statistics.h
+++ b/be/src/runtime/query_statistics.h
@@ -73,6 +73,14 @@ class QueryStatistics {
         this->shuffle_send_rows.fetch_add(delta_rows, std::memory_order_relaxed);
     }
 
+    void add_scan_bytes_from_local_storage(int64_t scan_bytes_from_local_storage) {
+        this->_scan_bytes_from_local_storage += scan_bytes_from_local_storage;
+    }
+
+    void add_scan_bytes_from_remote_storage(int64_t scan_bytes_from_remote_storage) {
+        this->_scan_bytes_from_remote_storage += scan_bytes_from_remote_storage;
+    }
+
     void set_returned_rows(int64_t num_rows) { this->returned_rows = num_rows; }
 
     void set_max_peak_memory_bytes(int64_t max_peak_memory_bytes) {
@@ -95,6 +103,8 @@ class QueryStatistics {
         cpu_nanos.store(0, std::memory_order_relaxed);
         shuffle_send_bytes.store(0, std::memory_order_relaxed);
         shuffle_send_rows.store(0, std::memory_order_relaxed);
+        _scan_bytes_from_local_storage.store(0);
+        _scan_bytes_from_remote_storage.store(0);
 
         returned_rows = 0;
         max_peak_memory_bytes.store(0, std::memory_order_relaxed);
@@ -120,6 +130,8 @@ class QueryStatistics {
     std::atomic<int64_t> scan_rows;
     std::atomic<int64_t> scan_bytes;
     std::atomic<int64_t> cpu_nanos;
+    std::atomic<int64_t> _scan_bytes_from_local_storage;
+    std::atomic<int64_t> _scan_bytes_from_remote_storage;
     // number rows returned by query.
     // only set once by result sink when closing.
     int64_t returned_rows;
diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp
index c0bef6b3d8ab13..f65f814f569ec5 100644
--- a/be/src/vec/exec/scan/new_olap_scanner.cpp
+++ b/be/src/vec/exec/scan/new_olap_scanner.cpp
@@ -688,6 +688,12 @@ void NewOlapScanner::_collect_profile_before_close() {
     tablet->query_scan_bytes->increment(_compressed_bytes_read);
     tablet->query_scan_rows->increment(_raw_rows_read);
     tablet->query_scan_count->increment(1);
+    if (_query_statistics) {
+        _query_statistics->add_scan_bytes_from_local_storage(
+                stats.file_cache_stats.bytes_read_from_local);
+        _query_statistics->add_scan_bytes_from_remote_storage(
+                stats.file_cache_stats.bytes_read_from_remote);
+    }
 }
 
 } // namespace doris::vectorized
diff --git a/be/test/vec/olap/vertical_compaction_test.cpp b/be/test/vec/olap/vertical_compaction_test.cpp
index a98f9c2944caac..a4feb3db535a9e 100644
--- a/be/test/vec/olap/vertical_compaction_test.cpp
+++ b/be/test/vec/olap/vertical_compaction_test.cpp
@@ -97,10 +97,6 @@ class VerticalCompactionTest : public ::testing::Test {
         auto engine = std::make_unique<StorageEngine>(options);
         engine_ref = engine.get();
         ExecEnv::GetInstance()->set_storage_engine(std::move(engine));
-        io::FileCacheSettings cache_setting;
-        ASSERT_TRUE(io::FileCacheFactory::instance()
-                            ->create_file_cache(absolute_dir + "/tablet_path", cache_setting)
-                            .ok());
 
         _data_dir = new DataDir(*engine_ref, absolute_dir, 100000000);
         static_cast<void>(_data_dir->init());
diff --git a/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/AuditEvent.java b/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/AuditEvent.java
index 6a5fe19fcc6ff4..ac68b38e258b68 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/AuditEvent.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/AuditEvent.java
@@ -107,6 +107,10 @@ public enum EventType {
     // note: newly added fields should be always before fuzzyVariables
     @AuditField(value = "FuzzyVariables")
     public String fuzzyVariables = "";
+    @AuditField(value = "scanBytesFromLocalStorage")
+    public long scanBytesFromLocalStorage = -1;
+    @AuditField(value = "scanBytesFromRemoteStorage")
+    public long scanBytesFromRemoteStorage = -1;
 
     public long pushToAuditLogQueueTime;
 
@@ -251,6 +255,16 @@ public AuditEventBuilder setWorkloadGroup(String workloadGroup) {
             return this;
         }
 
+        public AuditEventBuilder setScanBytesFromLocalStorage(long scanBytesFromLocalStorage) {
+            auditEvent.scanBytesFromLocalStorage = scanBytesFromLocalStorage;
+            return this;
+        }
+
+        public AuditEventBuilder setScanBytesFromRemoteStorage(long scanBytesFromRemoteStorage) {
+            auditEvent.scanBytesFromRemoteStorage = scanBytesFromRemoteStorage;
+            return this;
+        }
+
         public AuditEvent build() {
             return this.auditEvent;
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/AuditLogHelper.java b/fe/fe-core/src/main/java/org/apache/doris/qe/AuditLogHelper.java
index 17a0abe346809e..edad7780512c8e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/AuditLogHelper.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/AuditLogHelper.java
@@ -111,7 +111,11 @@ private static void logAuditLogImpl(ConnectContext ctx, String origStmt, Stateme
                     auditEventBuilder.setSqlDigest(sqlDigest);
                 }
             }
-            auditEventBuilder.setIsQuery(true);
+            auditEventBuilder.setIsQuery(true)
+                    .setScanBytesFromLocalStorage(
+                            statistics == null ? 0 : statistics.getScanBytesFromLocalStorage())
+                    .setScanBytesFromRemoteStorage(
+                            statistics == null ? 0 : statistics.getScanBytesFromRemoteStorage());
         } else {
             auditEventBuilder.setIsQuery(false);
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/resource/workloadschedpolicy/WorkloadRuntimeStatusMgr.java b/fe/fe-core/src/main/java/org/apache/doris/resource/workloadschedpolicy/WorkloadRuntimeStatusMgr.java
index ff2641d5f3c123..983643ec49b8d7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/resource/workloadschedpolicy/WorkloadRuntimeStatusMgr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/resource/workloadschedpolicy/WorkloadRuntimeStatusMgr.java
@@ -68,6 +68,8 @@ protected void runOneCycle() {
                 if (queryStats != null) {
                     auditEvent.scanRows = queryStats.scan_rows;
                     auditEvent.scanBytes = queryStats.scan_bytes;
+                    auditEvent.scanBytesFromLocalStorage = queryStats.scan_bytes_from_local_storage;
+                    auditEvent.scanBytesFromRemoteStorage = queryStats.scan_bytes_from_remote_storage;
                     auditEvent.peakMemoryBytes = queryStats.max_peak_memory_bytes;
                     auditEvent.cpuTimeMs = queryStats.cpu_ms;
                     auditEvent.shuffleSendBytes = queryStats.shuffle_send_bytes;
@@ -176,6 +178,8 @@ public Map<Long, Map<String, TQueryStatistics>> getBeQueryStatsMap() {
     private void mergeQueryStatistics(TQueryStatistics dst, TQueryStatistics src) {
         dst.scan_rows += src.scan_rows;
         dst.scan_bytes += src.scan_bytes;
+        dst.scan_bytes_from_local_storage += src.scan_bytes_from_local_storage;
+        dst.scan_bytes_from_remote_storage += src.scan_bytes_from_remote_storage;
         dst.cpu_ms += src.cpu_ms;
         dst.shuffle_send_bytes += src.shuffle_send_bytes;
         dst.shuffle_send_rows += src.shuffle_send_rows;
diff --git a/gensrc/proto/data.proto b/gensrc/proto/data.proto
index e9ced523912e5d..755a3a042db8ab 100644
--- a/gensrc/proto/data.proto
+++ b/gensrc/proto/data.proto
@@ -35,6 +35,8 @@ message PQueryStatistics {
     optional int64 cpu_ms = 4;
     optional int64 max_peak_memory_bytes = 5;
     repeated PNodeStatistics nodes_statistics = 6;
+    optional int64 scan_bytes_from_local_storage = 7;
+    optional int64 scan_bytes_from_remote_storage = 8;
 }
 
 message PRowBatch {
diff --git a/gensrc/thrift/FrontendService.thrift b/gensrc/thrift/FrontendService.thrift
index 6ed7c23ec3c2f8..23cc6c19e7bd57 100644
--- a/gensrc/thrift/FrontendService.thrift
+++ b/gensrc/thrift/FrontendService.thrift
@@ -409,6 +409,8 @@ struct TQueryStatistics {
     7: optional i64 workload_group_id
     8: optional i64 shuffle_send_bytes
     9: optional i64 shuffle_send_rows
+    10: optional i64 scan_bytes_from_local_storage
+    11: optional i64 scan_bytes_from_remote_storage
 }
 
 struct TReportWorkloadRuntimeStatusParams {

From c5116c5d31be440bba343084f3599f0b029bb293 Mon Sep 17 00:00:00 2001
From: Luwei <814383175@qq.com>
Date: Sat, 27 Apr 2024 13:59:10 +0800
Subject: [PATCH 076/163] [fix](schema change) reduce memory usage of alter
 multi-column statement (#33073)

---
 .../org/apache/doris/alter/RollupJobV2.java   |  9 ++--
 .../apache/doris/alter/SchemaChangeJobV2.java |  7 +--
 .../apache/doris/task/AlterReplicaTask.java   | 52 +++++++++++++------
 3 files changed, 44 insertions(+), 24 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java
index ec0868637e781e..93aceb0936b445 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java
@@ -62,7 +62,6 @@
 import org.apache.doris.task.AgentTaskQueue;
 import org.apache.doris.task.AlterReplicaTask;
 import org.apache.doris.task.CreateReplicaTask;
-import org.apache.doris.thrift.TColumn;
 import org.apache.doris.thrift.TStorageFormat;
 import org.apache.doris.thrift.TStorageMedium;
 import org.apache.doris.thrift.TStorageType;
@@ -86,6 +85,7 @@
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
 
@@ -388,7 +388,7 @@ protected void runWaitingTxnJob() throws AlterCancelException {
         }
 
         tbl.readLock();
-        Map<Object, List<TColumn>> tcloumnsPool  = Maps.newHashMap();
+        Map<Object, Object> objectPool = new ConcurrentHashMap<Object, Object>();
         String vaultId = tbl.getStorageVaultId();
         try {
             long expiration = (createTimeMs + timeoutMs) / 1000;
@@ -401,14 +401,13 @@ protected void runWaitingTxnJob() throws AlterCancelException {
                 // the rollup task will transform the data before visible version(included).
                 long visibleVersion = partition.getVisibleVersion();
 
+                Map<String, Expr> defineExprs = Maps.newHashMap();
                 MaterializedIndex rollupIndex = entry.getValue();
                 Map<Long, Long> tabletIdMap = this.partitionIdToBaseRollupTabletIdMap.get(partitionId);
                 for (Tablet rollupTablet : rollupIndex.getTablets()) {
                     long rollupTabletId = rollupTablet.getId();
                     long baseTabletId = tabletIdMap.get(rollupTabletId);
 
-                    Map<String, Expr> defineExprs = Maps.newHashMap();
-
                     DescriptorTable descTable = new DescriptorTable();
                     TupleDescriptor destTupleDesc = descTable.createTupleDescriptor();
                     Map<String, SlotDescriptor> descMap = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
@@ -470,7 +469,7 @@ protected void runWaitingTxnJob() throws AlterCancelException {
                                 partitionId, rollupIndexId, baseIndexId, rollupTabletId, baseTabletId,
                                 rollupReplica.getId(), rollupSchemaHash, baseSchemaHash, visibleVersion, jobId,
                                 JobType.ROLLUP, defineExprs, descTable, tbl.getSchemaByIndexId(baseIndexId, true),
-                                tcloumnsPool, whereClause, expiration, vaultId);
+                                objectPool, whereClause, expiration, vaultId);
                         rollupBatchTask.addTask(rollupTask);
                     }
                 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java
index f00177ec53746f..277a34115413b9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java
@@ -54,7 +54,6 @@
 import org.apache.doris.task.AgentTaskQueue;
 import org.apache.doris.task.AlterReplicaTask;
 import org.apache.doris.task.CreateReplicaTask;
-import org.apache.doris.thrift.TColumn;
 import org.apache.doris.thrift.TStorageFormat;
 import org.apache.doris.thrift.TStorageMedium;
 import org.apache.doris.thrift.TStorageType;
@@ -76,6 +75,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
 
@@ -418,7 +418,7 @@ protected void runWaitingTxnJob() throws AlterCancelException {
         }
 
         tbl.readLock();
-        Map<Object, List<TColumn>> tcloumnsPool  = Maps.newHashMap();
+        Map<Object, Object> objectPool = new ConcurrentHashMap<Object, Object>();
         String vaultId = tbl.getStorageVaultId();
         try {
             long expiration = (createTimeMs + timeoutMs) / 1000;
@@ -478,12 +478,13 @@ protected void runWaitingTxnJob() throws AlterCancelException {
                             AlterReplicaTask rollupTask = new AlterReplicaTask(shadowReplica.getBackendId(), dbId,
                                     tableId, partitionId, shadowIdxId, originIdxId, shadowTabletId, originTabletId,
                                     shadowReplica.getId(), shadowSchemaHash, originSchemaHash, visibleVersion, jobId,
-                                    JobType.SCHEMA_CHANGE, defineExprs, descTable, originSchemaColumns, tcloumnsPool,
+                                    JobType.SCHEMA_CHANGE, defineExprs, descTable, originSchemaColumns, objectPool,
                                     null, expiration, vaultId);
                             schemaChangeBatchTask.addTask(rollupTask);
                         }
                     }
                 }
+
             } // end for partitions
         } finally {
             tbl.readUnlock();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/AlterReplicaTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/AlterReplicaTask.java
index ebf505e454a68a..c95cc2670768c2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/task/AlterReplicaTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/task/AlterReplicaTask.java
@@ -42,6 +42,7 @@
  * The new replica can be a rollup replica, or a shadow replica of schema change.
  */
 public class AlterReplicaTask extends AgentTask {
+
     private long baseTabletId;
     private long newReplicaId;
     private int baseSchemaHash;
@@ -53,7 +54,7 @@ public class AlterReplicaTask extends AgentTask {
     private Map<String, Expr> defineExprs;
     private Expr whereClause;
     private DescriptorTable descTable;
-    private Map<Object, List<TColumn>> tcloumnsPool;
+    private Map<Object, Object> objectPool;
     private List<Column> baseSchemaColumns;
 
     private long expiration;
@@ -67,7 +68,7 @@ public class AlterReplicaTask extends AgentTask {
     public AlterReplicaTask(long backendId, long dbId, long tableId, long partitionId, long rollupIndexId,
             long baseIndexId, long rollupTabletId, long baseTabletId, long newReplicaId, int newSchemaHash,
             int baseSchemaHash, long version, long jobId, AlterJobV2.JobType jobType, Map<String, Expr> defineExprs,
-            DescriptorTable descTable, List<Column> baseSchemaColumns, Map<Object, List<TColumn>> tcloumnsPool,
+            DescriptorTable descTable, List<Column> baseSchemaColumns, Map<Object, Object> objectPool,
             Expr whereClause, long expiration, String vaultId) {
         super(null, backendId, TTaskType.ALTER, dbId, tableId, partitionId, rollupIndexId, rollupTabletId);
 
@@ -85,7 +86,7 @@ public AlterReplicaTask(long backendId, long dbId, long tableId, long partitionI
         this.whereClause = whereClause;
         this.descTable = descTable;
         this.baseSchemaColumns = baseSchemaColumns;
-        this.tcloumnsPool = tcloumnsPool;
+        this.objectPool = objectPool;
         this.expiration = expiration;
         this.vaultId = vaultId;
     }
@@ -134,32 +135,51 @@ public TAlterTabletReqV2 toThrift() {
             default:
                 break;
         }
+
         if (defineExprs != null) {
             for (Map.Entry<String, Expr> entry : defineExprs.entrySet()) {
-                List<SlotRef> slots = Lists.newArrayList();
-                entry.getValue().collect(SlotRef.class, slots);
-                TAlterMaterializedViewParam mvParam = new TAlterMaterializedViewParam(entry.getKey());
-                mvParam.setMvExpr(entry.getValue().treeToThrift());
-                req.addToMaterializedViewParams(mvParam);
+                Object value = objectPool.get(entry.getKey());
+                if (value == null) {
+                    List<SlotRef> slots = Lists.newArrayList();
+                    entry.getValue().collect(SlotRef.class, slots);
+                    TAlterMaterializedViewParam mvParam = new TAlterMaterializedViewParam(entry.getKey());
+                    mvParam.setMvExpr(entry.getValue().treeToThrift());
+                    req.addToMaterializedViewParams(mvParam);
+                    objectPool.put(entry.getKey(), mvParam);
+                } else {
+                    TAlterMaterializedViewParam mvParam = (TAlterMaterializedViewParam) value;
+                    req.addToMaterializedViewParams(mvParam);
+                }
             }
         }
+
         if (whereClause != null) {
-            TAlterMaterializedViewParam mvParam = new TAlterMaterializedViewParam(Column.WHERE_SIGN);
-            mvParam.setMvExpr(whereClause.treeToThrift());
-            req.addToMaterializedViewParams(mvParam);
+            Object value = objectPool.get(Column.WHERE_SIGN);
+            if (value == null) {
+                TAlterMaterializedViewParam mvParam = new TAlterMaterializedViewParam(Column.WHERE_SIGN);
+                mvParam.setMvExpr(whereClause.treeToThrift());
+                req.addToMaterializedViewParams(mvParam);
+                objectPool.put(Column.WHERE_SIGN, mvParam);
+            } else {
+                TAlterMaterializedViewParam mvParam = (TAlterMaterializedViewParam) value;
+                req.addToMaterializedViewParams(mvParam);
+            }
         }
         req.setDescTbl(descTable.toThrift());
 
         if (baseSchemaColumns != null) {
-            List<TColumn> columns = tcloumnsPool.get(baseSchemaColumns);
-            if (columns == null) {
-                columns = new ArrayList<TColumn>();
+            Object value = objectPool.get(baseSchemaColumns);
+            if (value == null) {
+                List<TColumn> columns = new ArrayList<TColumn>();
                 for (Column column : baseSchemaColumns) {
                     columns.add(column.toThrift());
                 }
-                tcloumnsPool.put(baseSchemaColumns, columns);
+                objectPool.put(baseSchemaColumns, columns);
+                req.setColumns(columns);
+            } else {
+                List<TColumn> columns = (List<TColumn>) value;
+                req.setColumns(columns);
             }
-            req.setColumns(columns);
         }
         req.setStorageVaultId(this.vaultId);
         return req;

From d5d7aed2d5c4d2c24ef770ed94cd4f8c3ccdcf58 Mon Sep 17 00:00:00 2001
From: xueweizhang <zxw520blue1@163.com>
Date: Sat, 27 Apr 2024 14:06:07 +0800
Subject: [PATCH 077/163] [fix](partial update) only unique table with MOW
 insert with target columns can consider be a partial update (#33656)

* [fix](partial update) only unique table with MOW insert with target columns can consider be a partial update

Signed-off-by: nextdreamblue <zxw520blue1@163.com>

* fix 1

Signed-off-by: nextdreamblue <zxw520blue1@163.com>

---------

Signed-off-by: nextdreamblue <zxw520blue1@163.com>
---
 .../org/apache/doris/analysis/NativeInsertStmt.java  | 10 +++++++---
 .../test_partial_update_native_insert_stmt.out       |  8 ++++++++
 .../insert_into_table/partial_update.groovy          |  2 +-
 .../insert_into_table/partial_update_complex.groovy  |  2 +-
 .../test_partial_update_native_insert_stmt.groovy    | 12 +++++-------
 ..._partial_update_native_insert_stmt_complex.groovy |  2 +-
 6 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java
index 3cfc3341ae0fcb..c33eba2e57c4d8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java
@@ -1288,12 +1288,16 @@ private void trySetPartialUpdate() throws UserException {
         if (olapTable.getKeysType() != KeysType.UNIQUE_KEYS) {
             return;
         }
+        // when enable_unique_key_partial_update = true,
+        // only unique table with MOW insert with target columns can consider be a partial update,
+        // and unique table without MOW, insert will be like a normal insert.
+        // when enable_unique_key_partial_update = false,
+        // unique table with MOW, insert will be a normal insert, and column that not set will insert default value.
         if (!olapTable.getEnableUniqueKeyMergeOnWrite()) {
-            throw new UserException("Partial update is only allowed on unique table with merge-on-write enabled.");
+            return;
         }
         if (hasEmptyTargetColumns) {
-            throw new AnalysisException("You must explicitly specify the columns to be updated when "
-                    + "updating partial columns using the INSERT statement.");
+            return;
         }
         for (Column col : olapTable.getFullSchema()) {
             boolean exists = false;
diff --git a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_native_insert_stmt.out b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_native_insert_stmt.out
index 3ee9fbcb760d3c..3c2ae8804c1e4f 100644
--- a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_native_insert_stmt.out
+++ b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_native_insert_stmt.out
@@ -145,3 +145,11 @@
 10000	2017-10-01	2017-10-01T08:00:05	北京	20	0	2017-10-01T06:00	20	10	10
 10000	2017-10-01	2017-10-01T09:00:05	北京	20	0	2017-10-01T07:00	15	2	2
 
+-- !sql --
+10000	2017-10-01	2017-10-01T08:00:05	北京	20	0	2017-10-01T06:00	20	10	10
+10000	2017-10-01	2017-10-01T09:00:05	北京	20	0	2017-10-01T07:00	15	2	2
+
+-- !sql --
+10000	2017-10-01	2017-10-01T08:00:05	北京	20	0	2017-10-01T06:00	20	10	10
+10000	2017-10-01	2017-10-01T09:00:05	北京	20	0	2017-10-01T07:00	15	2	2
+
diff --git a/regression-test/suites/nereids_p0/insert_into_table/partial_update.groovy b/regression-test/suites/nereids_p0/insert_into_table/partial_update.groovy
index c734bcf1846f9f..fd2145a71ed7b2 100644
--- a/regression-test/suites/nereids_p0/insert_into_table/partial_update.groovy
+++ b/regression-test/suites/nereids_p0/insert_into_table/partial_update.groovy
@@ -56,7 +56,7 @@ suite("nereids_partial_update_native_insert_stmt", "p0") {
             qt_1 """ select * from ${tableName} order by id; """
             test {
                 sql """insert into ${tableName} values(2,400),(1,200),(4,400)"""
-                exception "You must explicitly specify the columns to be updated when updating partial columns using the INSERT statement."
+                exception "Column count doesn't match value count"
             }
             sql "set enable_unique_key_partial_update=false;"
             sql "sync;"
diff --git a/regression-test/suites/nereids_p0/insert_into_table/partial_update_complex.groovy b/regression-test/suites/nereids_p0/insert_into_table/partial_update_complex.groovy
index f9857e259d14f3..00e13176e47ff6 100644
--- a/regression-test/suites/nereids_p0/insert_into_table/partial_update_complex.groovy
+++ b/regression-test/suites/nereids_p0/insert_into_table/partial_update_complex.groovy
@@ -91,7 +91,7 @@ suite("nereids_partial_update_native_insert_stmt_complex", "p0") {
                 sql """insert into ${tbName1}
                 select ${tbName2}.id, ${tbName2}.c1, ${tbName2}.c3 * 100
                 from ${tbName2} inner join ${tbName3} on ${tbName2}.id = ${tbName3}.id; """
-                exception "You must explicitly specify the columns to be updated when updating partial columns using the INSERT statement."
+                exception "Column count doesn't match value count"
             }
             sql "truncate table ${tbName1};"
             sql "truncate table ${tbName2};"
diff --git a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_native_insert_stmt.groovy b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_native_insert_stmt.groovy
index bf128ba26b4861..f3d32e2bc508e6 100644
--- a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_native_insert_stmt.groovy
+++ b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_native_insert_stmt.groovy
@@ -56,7 +56,7 @@ suite("test_partial_update_native_insert_stmt", "p0") {
             qt_1 """ select * from ${tableName} order by id; """
             test {
                 sql """insert into ${tableName} values(2,400),(1,200),(4,400)"""
-                exception "You must explicitly specify the columns to be updated when updating partial columns using the INSERT statement."
+                exception "Column count doesn't match value count"
             }
             sql "set enable_unique_key_partial_update=false;"
             sql "sync;"
@@ -329,12 +329,10 @@ suite("test_partial_update_native_insert_stmt", "p0") {
         DISTRIBUTED BY HASH(`user_id`) BUCKETS 1
         PROPERTIES ("replication_allocation" = "tag.location.default: 1", "enable_unique_key_merge_on_write" = "false");"""
 
-        test {
-            sql """insert into ${tableName10} values
-            (10000,"2017-10-01","2017-10-01 08:00:05","北京",20,0,"2017-10-01 06:00:00",20,10,10),
-            (10000,"2017-10-01","2017-10-01 09:00:05","北京",20,0,"2017-10-01 07:00:00",15,2,2);  """
-            exception "Partial update is only allowed on unique table with merge-on-write enabled"
-        }
+        sql """insert into ${tableName10} values
+        (10000,"2017-10-01","2017-10-01 08:00:05","北京",20,0,"2017-10-01 06:00:00",20,10,10),
+        (10000,"2017-10-01","2017-10-01 09:00:05","北京",20,0,"2017-10-01 07:00:00",15,2,2);  """
+        qt_sql "select * from ${tableName10} order by user_id;"
 
         sql """ DROP TABLE IF EXISTS ${tableName8}; """
         sql """ DROP TABLE IF EXISTS ${tableName9}; """
diff --git a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_native_insert_stmt_complex.groovy b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_native_insert_stmt_complex.groovy
index ad143a04be3c94..543a3fe698a809 100644
--- a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_native_insert_stmt_complex.groovy
+++ b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_native_insert_stmt_complex.groovy
@@ -93,7 +93,7 @@ suite("test_partial_update_native_insert_stmt_complex", "p0") {
                 sql """insert into ${tbName1}
                 select ${tbName2}.id, ${tbName2}.c1, ${tbName2}.c3 * 100
                 from ${tbName2} inner join ${tbName3} on ${tbName2}.id = ${tbName3}.id; """
-                exception "You must explicitly specify the columns to be updated when updating partial columns using the INSERT statement."
+                exception "Column count doesn't match value count"
             }
             sql "truncate table ${tbName1};"
             sql "truncate table ${tbName2};"

From 75cb520cbc5b74a9ba47a4fdcfdaaeadcd33da35 Mon Sep 17 00:00:00 2001
From: zy-kkk <zhongyk10@gmail.com>
Date: Sat, 27 Apr 2024 15:40:40 +0800
Subject: [PATCH 078/163] [Enhancement](external catalog) Added status reset
 when jdbc name mapping is abnormal (#33971)

---
 .../datasource/mapping/IdentifierMapping.java | 82 +++++++++++++------
 1 file changed, 57 insertions(+), 25 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/mapping/IdentifierMapping.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/mapping/IdentifierMapping.java
index cd121f2b630ad6..363ef351152a39 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/mapping/IdentifierMapping.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/mapping/IdentifierMapping.java
@@ -26,6 +26,8 @@
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 
 import java.util.Collections;
 import java.util.List;
@@ -35,6 +37,7 @@
 import java.util.concurrent.atomic.AtomicBoolean;
 
 public abstract class IdentifierMapping {
+    private static final Logger LOG = LogManager.getLogger(IdentifierMapping.class);
 
     private final ObjectMapper mapper = new ObjectMapper();
     private final ConcurrentHashMap<String, String> localDBToRemoteDB = new ConcurrentHashMap<>();
@@ -179,51 +182,59 @@ public List<Column> setColumnNameMapping(String remoteDbName, String remoteTable
     }
 
     public String getRemoteDatabaseName(String localDbName) {
-        if (localDBToRemoteDB.isEmpty() || !localDBToRemoteDB.containsKey(localDbName)) {
-            loadDatabaseNamesIfNeeded();
-        }
-        return localDBToRemoteDB.get(localDbName);
+        return getRequiredMapping(localDBToRemoteDB, localDbName, "database", this::loadDatabaseNamesIfNeeded,
+                localDbName);
     }
 
     public String getRemoteTableName(String localDbName, String localTableName) {
         String remoteDbName = getRemoteDatabaseName(localDbName);
-        if (localTableToRemoteTable.isEmpty()
-                || !localTableToRemoteTable.containsKey(remoteDbName)
-                || localTableToRemoteTable.get(remoteDbName) == null
-                || localTableToRemoteTable.get(remoteDbName).isEmpty()
-                || !localTableToRemoteTable.get(remoteDbName).containsKey(localTableName)
-                || localTableToRemoteTable.get(remoteDbName).get(localTableName) == null) {
-            loadTableNamesIfNeeded(localDbName);
-        }
-
-        return localTableToRemoteTable.get(remoteDbName).get(localTableName);
+        Map<String, String> tableMap = localTableToRemoteTable.computeIfAbsent(remoteDbName,
+                k -> new ConcurrentHashMap<>());
+        return getRequiredMapping(tableMap, localTableName, "table", () -> loadTableNamesIfNeeded(localDbName),
+                localTableName);
     }
 
     public Map<String, String> getRemoteColumnNames(String localDbName, String localTableName) {
         String remoteDbName = getRemoteDatabaseName(localDbName);
         String remoteTableName = getRemoteTableName(localDbName, localTableName);
-        if (localColumnToRemoteColumn.isEmpty()
-                || !localColumnToRemoteColumn.containsKey(remoteDbName)
-                || localColumnToRemoteColumn.get(remoteDbName) == null
-                || localColumnToRemoteColumn.get(remoteDbName).isEmpty()
-                || !localColumnToRemoteColumn.get(remoteDbName).containsKey(remoteTableName)
-                || localColumnToRemoteColumn.get(remoteDbName).get(remoteTableName) == null
-                || localColumnToRemoteColumn.get(remoteDbName).get(remoteTableName).isEmpty()) {
+        ConcurrentHashMap<String, ConcurrentHashMap<String, String>> tableColumnMap
+                = localColumnToRemoteColumn.computeIfAbsent(remoteDbName, k -> new ConcurrentHashMap<>());
+        Map<String, String> columnMap = tableColumnMap.computeIfAbsent(remoteTableName, k -> new ConcurrentHashMap<>());
+        if (columnMap.isEmpty()) {
+            LOG.info("Column name mapping missing, loading column names for localDbName: {}, localTableName: {}",
+                    localDbName, localTableName);
             loadColumnNamesIfNeeded(localDbName, localTableName);
+            columnMap = tableColumnMap.get(remoteTableName);
         }
-        return localColumnToRemoteColumn.get(remoteDbName).get(remoteTableName);
+        if (columnMap.isEmpty()) {
+            LOG.warn("No remote column found for localTableName: {}. Please refresh this catalog.", localTableName);
+            throw new RuntimeException(
+                    "No remote column found for localTableName: " + localTableName + ". Please refresh this catalog.");
+        }
+        return columnMap;
     }
 
+
     private void loadDatabaseNamesIfNeeded() {
         if (dbNamesLoaded.compareAndSet(false, true)) {
-            loadDatabaseNames();
+            try {
+                loadDatabaseNames();
+            } catch (Exception e) {
+                dbNamesLoaded.set(false); // Reset on failure
+                LOG.warn("Error loading database names", e);
+            }
         }
     }
 
     private void loadTableNamesIfNeeded(String localDbName) {
         AtomicBoolean isLoaded = tableNamesLoadedMap.computeIfAbsent(localDbName, k -> new AtomicBoolean(false));
         if (isLoaded.compareAndSet(false, true)) {
-            loadTableNames(localDbName);
+            try {
+                loadTableNames(localDbName);
+            } catch (Exception e) {
+                tableNamesLoadedMap.get(localDbName).set(false); // Reset on failure
+                LOG.warn("Error loading table names for localDbName: {}", localDbName, e);
+            }
         }
     }
 
@@ -232,8 +243,29 @@ private void loadColumnNamesIfNeeded(String localDbName, String localTableName)
         AtomicBoolean isLoaded = columnNamesLoadedMap.get(localDbName)
                 .computeIfAbsent(localTableName, k -> new AtomicBoolean(false));
         if (isLoaded.compareAndSet(false, true)) {
-            loadColumnNames(localDbName, localTableName);
+            try {
+                loadColumnNames(localDbName, localTableName);
+            } catch (Exception e) {
+                columnNamesLoadedMap.get(localDbName).get(localTableName).set(false); // Reset on failure
+                LOG.warn("Error loading column names for localDbName: {}, localTableName: {}", localDbName,
+                        localTableName, e);
+            }
+        }
+    }
+
+    private <K, V> V getRequiredMapping(Map<K, V> map, K key, String typeName, Runnable loadIfNeeded,
+            String entityName) {
+        if (map.isEmpty() || !map.containsKey(key) || map.get(key) == null) {
+            LOG.info("{} mapping missing, loading for {}: {}", typeName, typeName, entityName);
+            loadIfNeeded.run();
+        }
+        V value = map.get(key);
+        if (value == null) {
+            LOG.warn("No remote {} found for {}: {}. Please refresh this catalog.", typeName, typeName, entityName);
+            throw new RuntimeException("No remote " + typeName + " found for " + typeName + ": " + entityName
+                    + ". Please refresh this catalog.");
         }
+        return value;
     }
 
     // Load the database name from the data source.

From d52a426002ec912516a592304146e5da2ac5997d Mon Sep 17 00:00:00 2001
From: HHoflittlefish777 <77738092+HHoflittlefish777@users.noreply.github.com>
Date: Sat, 27 Apr 2024 16:21:16 +0800
Subject: [PATCH 079/163] [fix](pipeline-load) fix no error url when data
 quality error and total rows is negative (#34072)

---
 .../stream_load/stream_load_executor.cpp      | 47 +++++-------
 be/src/vec/sink/writer/vtablet_writer.cpp     | 12 +--
 .../load_p0/stream_load/test_error_url.csv    |  9 +++
 .../stream_load/test_pipeline_load.groovy     |  2 +-
 .../test_stream_load_error_url.groovy         | 76 +++++++++++++++++++
 .../test_partial_update_schema_change.groovy  |  4 +-
 ...tial_update_schema_change_row_store.groovy |  4 +-
 7 files changed, 113 insertions(+), 41 deletions(-)
 create mode 100644 regression-test/data/load_p0/stream_load/test_error_url.csv
 create mode 100644 regression-test/suites/load_p0/stream_load/test_stream_load_error_url.groovy

diff --git a/be/src/runtime/stream_load/stream_load_executor.cpp b/be/src/runtime/stream_load/stream_load_executor.cpp
index 720c2e868987e7..58621c77a2a115 100644
--- a/be/src/runtime/stream_load/stream_load_executor.cpp
+++ b/be/src/runtime/stream_load/stream_load_executor.cpp
@@ -78,38 +78,25 @@ Status StreamLoadExecutor::execute_plan_fragment(std::shared_ptr<StreamLoadConte
         }
         ctx->exec_env()->new_load_stream_mgr()->remove(ctx->id);
         ctx->commit_infos = std::move(state->tablet_commit_infos());
-        if (status->ok()) {
-            ctx->number_total_rows = state->num_rows_load_total();
-            ctx->number_loaded_rows = state->num_rows_load_success();
-            ctx->number_filtered_rows = state->num_rows_load_filtered();
-            ctx->number_unselected_rows = state->num_rows_load_unselected();
-
-            int64_t num_selected_rows = ctx->number_total_rows - ctx->number_unselected_rows;
-            if (!ctx->group_commit && num_selected_rows > 0 &&
-                (double)ctx->number_filtered_rows / num_selected_rows > ctx->max_filter_ratio) {
-                // NOTE: Do not modify the error message here, for historical reasons,
-                // some users may rely on this error message.
-                *status = Status::DataQualityError("too many filtered rows");
-            }
-            if (ctx->number_filtered_rows > 0 && !state->get_error_log_file_path().empty()) {
-                ctx->error_url = to_load_error_http_path(state->get_error_log_file_path());
-            }
+        ctx->number_total_rows = state->num_rows_load_total();
+        ctx->number_loaded_rows = state->num_rows_load_success();
+        ctx->number_filtered_rows = state->num_rows_load_filtered();
+        ctx->number_unselected_rows = state->num_rows_load_unselected();
+        int64_t num_selected_rows = ctx->number_total_rows - ctx->number_unselected_rows;
+        if (!ctx->group_commit && num_selected_rows > 0 &&
+            (double)ctx->number_filtered_rows / num_selected_rows > ctx->max_filter_ratio) {
+            // NOTE: Do not modify the error message here, for historical reasons,
+            // some users may rely on this error message.
+            *status = Status::DataQualityError("too many filtered rows");
+        }
+        if (ctx->number_filtered_rows > 0 && !state->get_error_log_file_path().empty()) {
+            ctx->error_url = to_load_error_http_path(state->get_error_log_file_path());
+        }
 
-            if (status->ok()) {
-                DorisMetrics::instance()->stream_receive_bytes_total->increment(ctx->receive_bytes);
-                DorisMetrics::instance()->stream_load_rows_total->increment(
-                        ctx->number_loaded_rows);
-            }
+        if (status->ok()) {
+            DorisMetrics::instance()->stream_receive_bytes_total->increment(ctx->receive_bytes);
+            DorisMetrics::instance()->stream_load_rows_total->increment(ctx->number_loaded_rows);
         } else {
-            if (ctx->group_commit) {
-                ctx->number_total_rows = state->num_rows_load_total();
-                ctx->number_loaded_rows = state->num_rows_load_success();
-                ctx->number_filtered_rows = state->num_rows_load_filtered();
-                ctx->number_unselected_rows = state->num_rows_load_unselected();
-                if (ctx->number_filtered_rows > 0 && !state->get_error_log_file_path().empty()) {
-                    ctx->error_url = to_load_error_http_path(state->get_error_log_file_path());
-                }
-            }
             LOG(WARNING) << "fragment execute failed"
                          << ", err_msg=" << status->to_string() << ", " << ctx->brief();
             // cancel body_sink, make sender known it
diff --git a/be/src/vec/sink/writer/vtablet_writer.cpp b/be/src/vec/sink/writer/vtablet_writer.cpp
index 6f0c4a3e2a890b..fdd683f42ce309 100644
--- a/be/src/vec/sink/writer/vtablet_writer.cpp
+++ b/be/src/vec/sink/writer/vtablet_writer.cpp
@@ -1676,6 +1676,12 @@ Status VTabletWriter::write(doris::vectorized::Block& input_block) {
     bool has_filtered_rows = false;
     int64_t filtered_rows = 0;
     _number_input_rows += rows;
+    // update incrementally so that FE can get the progress.
+    // the real 'num_rows_load_total' will be set when sink being closed.
+    _state->update_num_rows_load_total(rows);
+    _state->update_num_bytes_load_total(bytes);
+    DorisMetrics::instance()->load_rows->increment(rows);
+    DorisMetrics::instance()->load_bytes->increment(bytes);
 
     _row_distribution_watch.start();
     RETURN_IF_ERROR(_row_distribution.generate_rows_distribution(
@@ -1688,12 +1694,6 @@ Status VTabletWriter::write(doris::vectorized::Block& input_block) {
     _generate_index_channels_payloads(_row_part_tablet_ids, channel_to_payload);
     _row_distribution_watch.stop();
 
-    // update incrementally so that FE can get the progress.
-    // the real 'num_rows_load_total' will be set when sink being closed.
-    _state->update_num_rows_load_total(rows);
-    _state->update_num_bytes_load_total(bytes);
-    DorisMetrics::instance()->load_rows->increment(rows);
-    DorisMetrics::instance()->load_bytes->increment(bytes);
     // Random distribution and the block belongs to a single tablet, we could optimize to append the whole
     // block into node channel.
     bool load_block_to_single_tablet =
diff --git a/regression-test/data/load_p0/stream_load/test_error_url.csv b/regression-test/data/load_p0/stream_load/test_error_url.csv
new file mode 100644
index 00000000000000..a1c8c042f11836
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/test_error_url.csv
@@ -0,0 +1,9 @@
+281747159,apple-store,2009-08-07,price_change,2.99,1.99,"","",2024-02-02T14:08:16+08:00
+281747159,apple-store,2009-08-13,version_change,1.1.2 (iPhone OS 3.0 Tested),1.1.1 (iPhone OS 3.0 Tested),"","",2024-02-02T14:08:16+08:00
+281790044,apple-store,2009-08-06,version_change,3.0 (iPhone OS 3.0 Tested),1.3.1 (iPhone OS 3.0 Tested),"","",2024-02-24T19:43:05+08:00
+281790044,apple-store,2009-08-17,version_change,3.0.1 (iPhone OS 3.0 Tested),3.0 (iPhone OS 3.0 Tested),"","",2024-02-24T19:43:05+08:00
+281796108,apple-store,2009-08-24,version_change,3.1.0 (iPhone OS 3.0 Tested),3.0.2 (iPhone OS 3.0 Tested),"","",2024-02-10T17:48:26+08:00
+281941097,apple-store,2009-08-15,version_change,2.6.0 (iPhone OS 3.0 Tested),2.5.0,"","",2024-02-17T11:15:40+08:00
+281941097,apple-store,2009-08-22,version_change,2.6.3 (iPhone OS 3.0 Tested),2.6.0 (iPhone OS 3.0 Tested),"","",2024-02-17T11:15:40+08:00
+282614216,apple-store,2009-08-12,version_change,1.4.0 (iPhone OS 3.0 Tested),1.3.0 (iPhone OS 3.0 Tested),"","",2024-02-21T21:57:58+08:00
+282738621,apple-store,2009-08-12,name_change,ZHI Chinese-English Dictionary,Chinese English Dictionary,"","",2024-02-17T14:20:44+08:00
\ No newline at end of file
diff --git a/regression-test/suites/load_p0/stream_load/test_pipeline_load.groovy b/regression-test/suites/load_p0/stream_load/test_pipeline_load.groovy
index b8978a15ca9f8e..472176a519dcc0 100644
--- a/regression-test/suites/load_p0/stream_load/test_pipeline_load.groovy
+++ b/regression-test/suites/load_p0/stream_load/test_pipeline_load.groovy
@@ -152,7 +152,7 @@ suite("test_pipeline_load", "nonConcurrent") {
                 def json = parseJson(result)
                 assertEquals("fail", json.Status.toLowerCase())
                 assertTrue(json.Message.contains("Encountered unqualified data"))
-                assertEquals(0, json.NumberTotalRows)
+                assertEquals(100, json.NumberTotalRows)
                 assertEquals(0, json.NumberFilteredRows)
                 assertEquals(0, json.NumberUnselectedRows)
             }
diff --git a/regression-test/suites/load_p0/stream_load/test_stream_load_error_url.groovy b/regression-test/suites/load_p0/stream_load/test_stream_load_error_url.groovy
new file mode 100644
index 00000000000000..72fc212e241af8
--- /dev/null
+++ b/regression-test/suites/load_p0/stream_load/test_stream_load_error_url.groovy
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_stream_load_error_url", "p0") {
+    def tableName = "test_stream_load_error_url"
+    try {
+        sql """ DROP TABLE IF EXISTS ${tableName} """
+        sql """
+            CREATE TABLE IF NOT EXISTS ${tableName} (
+                `product_id` BIGINT NULL,
+                `market_code` VARCHAR(32),
+                `date` DATE NULL,
+                `event_type` VARCHAR(255) NULL,
+                `new_value` TEXT NULL,
+                `old_value` TEXT NULL,
+                `release_note` TEXT NULL,
+                `release_date` TEXT NULL,
+                `u_time` DATETIME NULL
+            ) ENGINE=OLAP
+            UNIQUE KEY(`product_id`, `market_code`, `date`, `event_type`)
+            COMMENT 'test_error_url'
+            PARTITION BY RANGE(`date`)
+            (PARTITION p_201001 VALUES [('2010-01-01'), ('2012-01-01')),
+            PARTITION p_201201 VALUES [('2012-01-01'), ('2014-01-01')),
+            PARTITION p_201401 VALUES [('2014-01-01'), ('2016-01-01')),
+            PARTITION p_201601 VALUES [('2016-01-01'), ('2018-01-01')),
+            PARTITION p_201801 VALUES [('2018-01-01'), ('2020-01-01')),
+            PARTITION p_202001 VALUES [('2020-01-01'), ('2022-01-01')),
+            PARTITION p_202201 VALUES [('2022-01-01'), ('2024-01-01')),
+            PARTITION p_202401 VALUES [('2024-01-01'), ('2026-01-01')),
+            PARTITION p_202601 VALUES [('2026-01-01'), ('2028-01-01')),
+            PARTITION p_202801 VALUES [('2028-01-01'), ('2028-12-01')))
+            DISTRIBUTED BY HASH(`product_id`, `market_code`, `date`, `event_type`) BUCKETS 10
+            PROPERTIES (
+                "replication_allocation" = "tag.location.default: 1"
+            );
+        """
+
+        streamLoad {
+            table "${tableName}"
+            set 'column_separator', ','
+            set 'columns', 'k1, k2, k3'
+            file 'test_error_url.csv'
+
+            check { result, exception, startTime, endTime ->
+                if (exception != null) {
+                    throw exception
+                }
+                log.info("Stream load result: ${result}".toString())
+                def json = parseJson(result)
+                assertEquals("fail", json.Status.toLowerCase())
+                assertTrue(json.Message.contains("[DATA_QUALITY_ERROR]too many filtered rows"))
+                def (code, out, err) = curl("GET", json.ErrorURL)
+                log.info("error result: " + out)
+                assertTrue(out.contains("actual column number in csv file is  more than  schema column number.actual number"))
+                log.info("url: " + json.ErrorURL)
+            }
+        }
+    } finally {
+        sql """ DROP TABLE IF EXISTS ${tableName} """
+    }
+}
\ No newline at end of file
diff --git a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_schema_change.groovy b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_schema_change.groovy
index 347cafd499a926..820c8a5b09c917 100644
--- a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_schema_change.groovy
+++ b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_schema_change.groovy
@@ -440,7 +440,7 @@ suite("test_partial_update_schema_change", "p0") {
             log.info("Stream load result: ${result}".toString())
             def json = parseJson(result)
             assertEquals("fail", json.Status.toLowerCase())
-            assertEquals(0, json.NumberTotalRows)
+            assertEquals(1, json.NumberTotalRows)
             assertEquals(0, json.NumberFilteredRows)
             assertEquals(0, json.NumberUnselectedRows)
         }
@@ -1035,7 +1035,7 @@ suite("test_partial_update_schema_change", "p0") {
             log.info("Stream load result: ${result}".toString())
             def json = parseJson(result)
             assertEquals("fail", json.Status.toLowerCase())
-            assertEquals(0, json.NumberTotalRows)
+            assertEquals(1, json.NumberTotalRows)
             assertEquals(0, json.NumberFilteredRows)
             assertEquals(0, json.NumberUnselectedRows)
         }
diff --git a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_schema_change_row_store.groovy b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_schema_change_row_store.groovy
index a9480b664decd2..3ea3c7a613ec41 100644
--- a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_schema_change_row_store.groovy
+++ b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_schema_change_row_store.groovy
@@ -444,7 +444,7 @@ suite("test_partial_update_row_store_schema_change", "p0") {
             log.info("Stream load result: ${result}".toString())
             def json = parseJson(result)
             assertEquals("fail", json.Status.toLowerCase())
-            assertEquals(0, json.NumberTotalRows)
+            assertEquals(1, json.NumberTotalRows)
             assertEquals(0, json.NumberFilteredRows)
             assertEquals(0, json.NumberUnselectedRows)
         }
@@ -1045,7 +1045,7 @@ suite("test_partial_update_row_store_schema_change", "p0") {
             log.info("Stream load result: ${result}".toString())
             def json = parseJson(result)
             assertEquals("fail", json.Status.toLowerCase())
-            assertEquals(0, json.NumberTotalRows)
+            assertEquals(1, json.NumberTotalRows)
             assertEquals(0, json.NumberFilteredRows)
             assertEquals(0, json.NumberUnselectedRows)
         }

From a9040c8113b9f41ac480720ef98713d8dd550d5e Mon Sep 17 00:00:00 2001
From: Kaijie Chen <ckj@apache.org>
Date: Sat, 27 Apr 2024 16:24:45 +0800
Subject: [PATCH 080/163] [improve](load) limit flush thread num by CPU count
 (#33325)

---
 be/src/common/config.cpp                |  3 +++
 be/src/common/config.h                  |  3 +++
 be/src/olap/memtable_flush_executor.cpp | 11 ++++++++---
 be/src/runtime/load_stream_mgr.cpp      |  8 ++++++--
 4 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index ea0e0bb3aada70..0c257e48a8f452 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -662,6 +662,9 @@ DEFINE_mInt64(storage_flood_stage_left_capacity_bytes, "1073741824"); // 1GB
 DEFINE_Int32(flush_thread_num_per_store, "6");
 // number of thread for flushing memtable per store, for high priority load task
 DEFINE_Int32(high_priority_flush_thread_num_per_store, "6");
+// number of threads = min(flush_thread_num_per_store * num_store,
+//                         max_flush_thread_num_per_cpu * num_cpu)
+DEFINE_Int32(max_flush_thread_num_per_cpu, "4");
 
 // config for tablet meta checkpoint
 DEFINE_mInt32(tablet_meta_checkpoint_min_new_rowsets_num, "10");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 40d336d2308a6b..e1ec93ff63f768 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -715,6 +715,9 @@ DECLARE_mInt64(storage_flood_stage_left_capacity_bytes); // 1GB
 DECLARE_Int32(flush_thread_num_per_store);
 // number of thread for flushing memtable per store, for high priority load task
 DECLARE_Int32(high_priority_flush_thread_num_per_store);
+// number of threads = min(flush_thread_num_per_store * num_store,
+//                         max_flush_thread_num_per_cpu * num_cpu)
+DECLARE_Int32(max_flush_thread_num_per_cpu);
 
 // config for tablet meta checkpoint
 DECLARE_mInt32(tablet_meta_checkpoint_min_new_rowsets_num);
diff --git a/be/src/olap/memtable_flush_executor.cpp b/be/src/olap/memtable_flush_executor.cpp
index db14b9acaeeeaf..b52a87dff07762 100644
--- a/be/src/olap/memtable_flush_executor.cpp
+++ b/be/src/olap/memtable_flush_executor.cpp
@@ -202,15 +202,20 @@ void FlushToken::_flush_memtable(std::unique_ptr<MemTable> memtable_ptr, int32_t
 
 void MemTableFlushExecutor::init(int num_disk) {
     num_disk = std::max(1, num_disk);
-    size_t min_threads = std::max(1, config::flush_thread_num_per_store);
-    size_t max_threads = num_disk * min_threads;
+    int num_cpus = std::thread::hardware_concurrency();
+    int min_threads = std::max(1, config::flush_thread_num_per_store);
+    int max_threads = num_cpus == 0 ? num_disk * min_threads
+                                    : std::min(num_disk * min_threads,
+                                               num_cpus * config::max_flush_thread_num_per_cpu);
     static_cast<void>(ThreadPoolBuilder("MemTableFlushThreadPool")
                               .set_min_threads(min_threads)
                               .set_max_threads(max_threads)
                               .build(&_flush_pool));
 
     min_threads = std::max(1, config::high_priority_flush_thread_num_per_store);
-    max_threads = num_disk * min_threads;
+    max_threads = num_cpus == 0 ? num_disk * min_threads
+                                : std::min(num_disk * min_threads,
+                                           num_cpus * config::max_flush_thread_num_per_cpu);
     static_cast<void>(ThreadPoolBuilder("MemTableHighPriorityFlushThreadPool")
                               .set_min_threads(min_threads)
                               .set_max_threads(max_threads)
diff --git a/be/src/runtime/load_stream_mgr.cpp b/be/src/runtime/load_stream_mgr.cpp
index 7d9de12d0782ea..1964cf257e38e0 100644
--- a/be/src/runtime/load_stream_mgr.cpp
+++ b/be/src/runtime/load_stream_mgr.cpp
@@ -37,9 +37,13 @@ LoadStreamMgr::LoadStreamMgr(uint32_t segment_file_writer_thread_num,
         : _num_threads(segment_file_writer_thread_num),
           _heavy_work_pool(heavy_work_pool),
           _light_work_pool(light_work_pool) {
+    uint32_t num_cpu = std::thread::hardware_concurrency();
+    uint32_t thread_num = num_cpu == 0 ? segment_file_writer_thread_num
+                                       : std::min(segment_file_writer_thread_num,
+                                                  num_cpu * config::max_flush_thread_num_per_cpu);
     static_cast<void>(ThreadPoolBuilder("SegmentFileWriterThreadPool")
-                              .set_min_threads(segment_file_writer_thread_num)
-                              .set_max_threads(segment_file_writer_thread_num)
+                              .set_min_threads(thread_num)
+                              .set_max_threads(thread_num)
                               .build(&_file_writer_thread_pool));
 }
 

From e24c7a5d21671c7a756eda34bec11f8a072810f0 Mon Sep 17 00:00:00 2001
From: Kaijie Chen <ckj@apache.org>
Date: Sat, 27 Apr 2024 16:29:48 +0800
Subject: [PATCH 081/163] [improve](move-memtable) reduce default load stream
 per node to 2 for stream load (#34065)

---
 .../main/java/org/apache/doris/service/FrontendServiceImpl.java | 2 +-
 .../src/main/java/org/apache/doris/task/LoadTaskInfo.java       | 2 +-
 .../src/main/java/org/apache/doris/task/StreamLoadTask.java     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
index c26712af0bd345..d602902083485e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java
@@ -2073,7 +2073,7 @@ private void httpStreamPutImpl(TStreamLoadPutRequest request, TStreamLoadPutResu
         ctx.getSessionVariable().groupCommit = request.getGroupCommitMode();
         try {
             HttpStreamParams httpStreamParams = initHttpStreamPlan(request, ctx);
-            int loadStreamPerNode = 20;
+            int loadStreamPerNode = 2;
             if (request.getStreamPerNode() > 0) {
                 loadStreamPerNode = request.getStreamPerNode();
             }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/LoadTaskInfo.java b/fe/fe-core/src/main/java/org/apache/doris/task/LoadTaskInfo.java
index 8671933f7589cf..8f641070c40e71 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/task/LoadTaskInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/task/LoadTaskInfo.java
@@ -126,7 +126,7 @@ default boolean isMemtableOnSinkNode() {
     }
 
     default int getStreamPerNode() {
-        return 20;
+        return 2;
     }
 
     class ImportColumnDescs {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/StreamLoadTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/StreamLoadTask.java
index f663eb65913841..94f3625fbc7c2b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/task/StreamLoadTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/task/StreamLoadTask.java
@@ -89,7 +89,7 @@ public class StreamLoadTask implements LoadTaskInfo {
     private boolean enableProfile = false;
 
     private boolean memtableOnSinkNode = false;
-    private int streamPerNode = 20;
+    private int streamPerNode = 2;
 
     private byte enclose = 0;
 

From c3261f4931448620ba92b26807f6849d7a10e526 Mon Sep 17 00:00:00 2001
From: zhiqiang <seuhezhiqiang@163.com>
Date: Sat, 27 Apr 2024 19:20:40 +0800
Subject: [PATCH 082/163] [fix](profile) Fix print profile in be log (#34166)

---
 be/src/pipeline/pipeline_fragment_context.cpp       | 13 +++++++++++--
 .../org/apache/doris/planner/StreamLoadPlanner.java |  2 ++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/be/src/pipeline/pipeline_fragment_context.cpp b/be/src/pipeline/pipeline_fragment_context.cpp
index 03de71f3c8d829..9cc71afd4d7827 100644
--- a/be/src/pipeline/pipeline_fragment_context.cpp
+++ b/be/src/pipeline/pipeline_fragment_context.cpp
@@ -1505,6 +1505,13 @@ void PipelineFragmentContext::_close_fragment_instance() {
     Defer defer_op {[&]() { _is_fragment_instance_closed = true; }};
     _runtime_profile->total_time_counter()->update(_fragment_watcher.elapsed_time());
     static_cast<void>(send_report(true));
+    // Print profile content in info log is a tempoeray solution for stream load.
+    // Since stream load does not have someting like coordinator on FE, so
+    // backend can not report profile to FE, ant its profile can not be shown
+    // in the same way with other query. So we print the profile content to info log.
+    // Print profile content in log is harmful for log readability, info log will be
+    // full of profile content, and not just profile of stream load.
+    // We know it, but currently we do not have a cheap and good solution for this.
     if (_runtime_state->enable_profile()) {
         std::stringstream ss;
         // Compute the _local_time_percent before pretty_print the runtime_profile
@@ -1513,8 +1520,10 @@ void PipelineFragmentContext::_close_fragment_instance() {
         // After add the operation, the print out like that:
         // UNION_NODE (id=0):(Active: 56.720us, non-child: 82.53%)
         // We can easily know the exec node execute time without child time consumed.
-        _runtime_state->runtime_profile()->compute_time_in_profile();
-        _runtime_state->runtime_profile()->pretty_print(&ss);
+        for (const auto& runtime_profile_ptr : _runtime_state->pipeline_id_to_profile()) {
+            runtime_profile_ptr->pretty_print(&ss);
+        }
+
         if (_runtime_state->load_channel_profile()) {
             _runtime_state->load_channel_profile()->pretty_print(&ss);
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadPlanner.java
index e7bd2c80453e59..10dc8c3a4325fd 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadPlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadPlanner.java
@@ -332,6 +332,7 @@ public TExecPlanFragmentParams plan(TUniqueId loadId, int fragmentInstanceIdInde
         queryOptions.setEnablePipelineXEngine(Config.enable_pipeline_load);
         queryOptions.setBeExecVersion(Config.be_exec_version);
         queryOptions.setIsReportSuccess(taskInfo.getEnableProfile());
+        queryOptions.setEnableProfile(taskInfo.getEnableProfile());
         queryOptions.setEnableMemtableOnSinkNode(enableMemtableOnSinkNode);
         params.setQueryOptions(queryOptions);
         TQueryGlobals queryGlobals = new TQueryGlobals();
@@ -569,6 +570,7 @@ public TPipelineFragmentParams planForPipeline(TUniqueId loadId, int fragmentIns
         queryOptions.setEnablePipelineXEngine(Config.enable_pipeline_load);
         queryOptions.setBeExecVersion(Config.be_exec_version);
         queryOptions.setIsReportSuccess(taskInfo.getEnableProfile());
+        queryOptions.setEnableProfile(taskInfo.getEnableProfile());
         queryOptions.setEnableMemtableOnSinkNode(enableMemtableOnSinkNode);
 
         pipParams.setQueryOptions(queryOptions);

From f5963c61d0235be584ccf22de3796714b00e623e Mon Sep 17 00:00:00 2001
From: seawinde <149132972+seawinde@users.noreply.github.com>
Date: Sat, 27 Apr 2024 21:55:35 +0800
Subject: [PATCH 083/163] [opt](mtmv) Add enable materialized view nest rewrite
 switch (#34197)

* [opt](mtmv) Add enable materialized view nest rewrite switch

* fix ut

* fix ut2
---
 .../org/apache/doris/nereids/memo/Memo.java   |  6 +++--
 .../doris/nereids/memo/StructInfoMap.java     | 21 ++++-------------
 .../mv/AbstractMaterializedViewRule.java      |  3 ++-
 .../exploration/mv/MaterializedViewUtils.java | 11 ++++-----
 .../org/apache/doris/qe/SessionVariable.java  | 12 ++++++++++
 .../doris/nereids/memo/StructInfoMapTest.java |  5 +++-
 .../mv/nested/nested_materialized_view.out    | 23 ++++++++++++++++++-
 .../mv/nested/nested_materialized_view.groovy | 16 +++++++++++++
 .../mv/nested_mtmv/nested_mtmv.groovy         |  1 +
 9 files changed, 71 insertions(+), 27 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java
index 1e6b285187d6a6..d7d46ecc15e439 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java
@@ -36,6 +36,7 @@
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.algebra.CatalogRelation;
 import org.apache.doris.nereids.trees.plans.algebra.SetOperation;
+import org.apache.doris.nereids.trees.plans.logical.LogicalCatalogRelation;
 import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
 import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
 import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
@@ -72,7 +73,6 @@ public class Memo {
             EventChannel.getDefaultChannel().addConsumers(new LogConsumer(GroupMergeEvent.class, EventChannel.LOG)));
     private static long stateId = 0;
     private final ConnectContext connectContext;
-    private final Set<Long> needRefreshTableIdSet = new HashSet<>();
     private final AtomicLong refreshVersion = new AtomicLong(1);
     private final IdGenerator<GroupId> groupIdGenerator = GroupId.createGenerator();
     private final Map<GroupId, Group> groups = Maps.newLinkedHashMap();
@@ -416,7 +416,9 @@ private CopyInResult doCopyIn(Plan plan, @Nullable Group targetGroup, @Nullable
             throw new IllegalStateException("Insert a plan into targetGroup but differ in logicalproperties");
         }
         // TODO Support sync materialized view in the future
-        if (plan instanceof LogicalPlan && plan instanceof CatalogRelation
+        if (connectContext != null
+                && connectContext.getSessionVariable().isEnableMaterializedViewNestRewrite()
+                && plan instanceof LogicalCatalogRelation
                 && ((CatalogRelation) plan).getTable() instanceof MTMV
                 && !plan.getGroupExpression().isPresent()) {
             refreshVersion.incrementAndGet();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/StructInfoMap.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/StructInfoMap.java
index ae07c2043abaf7..df41eb3eb48d07 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/StructInfoMap.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/StructInfoMap.java
@@ -48,12 +48,10 @@ public class StructInfoMap {
      * get struct info according to table map
      *
      * @param tableMap the original table map
-     * @param foldTableMap the fold table map
      * @param group the group that the mv matched
      * @return struct info or null if not found
      */
-    public @Nullable StructInfo getStructInfo(Memo memo, BitSet tableMap, BitSet foldTableMap,
-            Group group, Plan originPlan) {
+    public @Nullable StructInfo getStructInfo(Memo memo, BitSet tableMap, Group group, Plan originPlan) {
         StructInfo structInfo = infoMap.get(tableMap);
         if (structInfo != null) {
             return structInfo;
@@ -84,10 +82,6 @@ public Pair<GroupExpression, List<BitSet>> getGroupExpressionWithChildren(BitSet
         return groupExpressionMap.get(tableMap);
     }
 
-    public long getRefreshVersion() {
-        return refreshVersion;
-    }
-
     public void setRefreshVersion(long refreshVersion) {
         this.refreshVersion = refreshVersion;
     }
@@ -119,7 +113,8 @@ private Plan constructPlan(GroupExpression groupExpression, List<BitSet> childre
      *
      */
     public void refresh(Group group, long memoVersion) {
-        if (memoVersion == group.getstructInfoMap().refreshVersion) {
+        StructInfoMap structInfoMap = group.getstructInfoMap();
+        if (!structInfoMap.getTableMaps().isEmpty() && memoVersion == structInfoMap.refreshVersion) {
             return;
         }
         Set<Integer> refreshedGroup = new HashSet<>();
@@ -152,8 +147,7 @@ public void refresh(Group group, long memoVersion) {
             }
             // if cumulative child table map is different from current
             // or current group expression map is empty, should update the groupExpressionMap currently
-            Collection<Pair<BitSet, List<BitSet>>> bitSetWithChildren = cartesianProduct(childrenTableMap,
-                    new BitSet());
+            Collection<Pair<BitSet, List<BitSet>>> bitSetWithChildren = cartesianProduct(childrenTableMap);
             for (Pair<BitSet, List<BitSet>> bitSetWithChild : bitSetWithChildren) {
                 groupExpressionMap.putIfAbsent(bitSetWithChild.first,
                         Pair.of(groupExpression, bitSetWithChild.second));
@@ -173,8 +167,7 @@ private BitSet constructLeaf(GroupExpression groupExpression) {
         return tableMap;
     }
 
-    private Collection<Pair<BitSet, List<BitSet>>> cartesianProduct(List<Set<BitSet>> childrenTableMap,
-            BitSet targetBitSet) {
+    private Collection<Pair<BitSet, List<BitSet>>> cartesianProduct(List<Set<BitSet>> childrenTableMap) {
         Set<List<BitSet>> cartesianLists = Sets.cartesianProduct(childrenTableMap);
         List<Pair<BitSet, List<BitSet>>> resultPairSet = new LinkedList<>();
         for (List<BitSet> bitSetList : cartesianLists) {
@@ -182,10 +175,6 @@ private Collection<Pair<BitSet, List<BitSet>>> cartesianProduct(List<Set<BitSet>
             for (BitSet b : bitSetList) {
                 bitSet.or(b);
             }
-            // filter the useless bitset which targetBitSet not contains, avoid exponential expansion
-            if (!targetBitSet.isEmpty() && !StructInfo.containsAll(targetBitSet, bitSet)) {
-                continue;
-            }
             resultPairSet.add(Pair.of(bitSet, bitSetList));
         }
         return resultPairSet;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java
index 3405942c3a8817..6d0f60270668f1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java
@@ -107,7 +107,6 @@ public List<Plan> rewrite(Plan queryPlan, CascadesContext cascadesContext) {
             if (checkIfRewritten(queryPlan, context)) {
                 continue;
             }
-            context.tryReGenerateMvScanPlan(cascadesContext);
             // check mv plan is valid or not
             if (!checkPattern(context.getStructInfo())) {
                 context.recordFailReason(context.getStructInfo(),
@@ -321,6 +320,8 @@ protected List<Plan> doRewrite(StructInfo queryStructInfo, CascadesContext casca
                 continue;
             }
             recordIfRewritten(queryStructInfo.getOriginalPlan(), materializationContext);
+            // if rewrite successfully, try to regenerate mv scan because it maybe used again
+            materializationContext.tryReGenerateMvScanPlan(cascadesContext);
             rewriteResults.add(rewrittenPlan);
         }
         return rewriteResults;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java
index 73029b4ec141d3..eb7d07c2310836 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java
@@ -148,11 +148,10 @@ public static List<StructInfo> extractStructInfo(Plan plan, CascadesContext casc
         if (plan.getGroupExpression().isPresent()) {
             Group ownerGroup = plan.getGroupExpression().get().getOwnerGroup();
             StructInfoMap structInfoMap = ownerGroup.getstructInfoMap();
-            if (cascadesContext.getMemo().getRefreshVersion() != structInfoMap.getRefreshVersion()
-                    || structInfoMap.getTableMaps().isEmpty()) {
-                structInfoMap.refresh(ownerGroup, cascadesContext.getMemo().getRefreshVersion());
-                structInfoMap.setRefreshVersion(cascadesContext.getMemo().getRefreshVersion());
-            }
+            // Refresh struct info in current level plan from top to bottom
+            structInfoMap.refresh(ownerGroup, cascadesContext.getMemo().getRefreshVersion());
+            structInfoMap.setRefreshVersion(cascadesContext.getMemo().getRefreshVersion());
+
             Set<BitSet> queryTableSets = structInfoMap.getTableMaps();
             ImmutableList.Builder<StructInfo> structInfosBuilder = ImmutableList.builder();
             if (!queryTableSets.isEmpty()) {
@@ -163,7 +162,7 @@ public static List<StructInfo> extractStructInfo(Plan plan, CascadesContext casc
                         continue;
                     }
                     StructInfo structInfo = structInfoMap.getStructInfo(cascadesContext.getMemo(),
-                            queryTableSet, queryTableSet, ownerGroup, plan);
+                            queryTableSet, ownerGroup, plan);
                     if (structInfo != null) {
                         structInfosBuilder.add(structInfo);
                     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 433997035c4f74..dba97a68861e39 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -527,6 +527,9 @@ public class SessionVariable implements Serializable, Writable {
     public static final String ENABLE_MATERIALIZED_VIEW_UNION_REWRITE
             = "enable_materialized_view_union_rewrite";
 
+    public static final String ENABLE_MATERIALIZED_VIEW_NEST_REWRITE
+            = "enable_materialized_view_nest_rewrite";
+
     public static final String CREATE_TABLE_PARTITION_MAX_NUM
             = "create_table_partition_max_num";
 
@@ -1656,6 +1659,11 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) {
                             + "respond to the query"})
     public boolean enableMaterializedViewUnionRewrite = false;
 
+    @VariableMgr.VarAttr(name = ENABLE_MATERIALIZED_VIEW_NEST_REWRITE, needForward = true,
+            description = {"是否允许嵌套物化视图改写",
+                    "Whether enable materialized view nest rewrite"})
+    public boolean enableMaterializedViewNestRewrite = false;
+
     @VariableMgr.VarAttr(name = CREATE_TABLE_PARTITION_MAX_NUM, needForward = true,
             description = {"建表时创建分区的最大数量",
                     "The maximum number of partitions created during table creation"})
@@ -3704,6 +3712,10 @@ public boolean isEnableMaterializedViewUnionRewrite() {
         return enableMaterializedViewUnionRewrite;
     }
 
+    public boolean isEnableMaterializedViewNestRewrite() {
+        return enableMaterializedViewNestRewrite;
+    }
+
     public int getCreateTablePartitionMaxNum() {
         return createTablePartitionMaxNum;
     }
diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/StructInfoMapTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/StructInfoMapTest.java
index 217cbf2f1a28e2..d644ce2b890487 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/StructInfoMapTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/memo/StructInfoMapTest.java
@@ -59,6 +59,7 @@ public boolean isMVPartitionValid(MTMV mtmv, ConnectContext ctx) {
             }
         };
         connectContext.getSessionVariable().enableMaterializedViewRewrite = true;
+        connectContext.getSessionVariable().enableMaterializedViewNestRewrite = true;
         createMvByNereids("create materialized view mv1 BUILD IMMEDIATE REFRESH COMPLETE ON MANUAL\n"
                 + "        DISTRIBUTED BY RANDOM BUCKETS 1\n"
                 + "        PROPERTIES ('replication_num' = '1') \n"
@@ -107,6 +108,7 @@ public boolean isMVPartitionValid(MTMV mtmv, ConnectContext ctx) {
             }
         };
         connectContext.getSessionVariable().enableMaterializedViewRewrite = true;
+        connectContext.getSessionVariable().enableMaterializedViewNestRewrite = true;
         createMvByNereids("create materialized view mv1 BUILD IMMEDIATE REFRESH COMPLETE ON MANUAL\n"
                 + "        DISTRIBUTED BY RANDOM BUCKETS 1\n"
                 + "        PROPERTIES ('replication_num' = '1') \n"
@@ -146,6 +148,7 @@ public boolean isMVPartitionValid(MTMV mtmv, ConnectContext ctx) {
             }
         };
         connectContext.getSessionVariable().enableMaterializedViewRewrite = true;
+        connectContext.getSessionVariable().enableMaterializedViewNestRewrite = true;
         createMvByNereids("create materialized view mv1 BUILD IMMEDIATE REFRESH COMPLETE ON MANUAL\n"
                 + "        DISTRIBUTED BY RANDOM BUCKETS 1\n"
                 + "        PROPERTIES ('replication_num' = '1') \n"
@@ -168,7 +171,7 @@ public boolean isMVPartitionValid(MTMV mtmv, ConnectContext ctx) {
         BitSet mvMap = structInfoMap.getTableMaps().stream()
                 .filter(b -> b.cardinality() == 2)
                 .collect(Collectors.toList()).get(0);
-        StructInfo structInfo = structInfoMap.getStructInfo(c1.getMemo(), mvMap, mvMap, root, null);
+        StructInfo structInfo = structInfoMap.getStructInfo(c1.getMemo(), mvMap, root, null);
         System.out.println(structInfo.getOriginalPlan().treeString());
         BitSet bitSet = new BitSet();
         structInfo.getRelations().forEach(r -> bitSet.set((int) r.getTable().getId()));
diff --git a/regression-test/data/nereids_rules_p0/mv/nested/nested_materialized_view.out b/regression-test/data/nereids_rules_p0/mv/nested/nested_materialized_view.out
index 09c11bcee6a723..2647a9f7d569ea 100644
--- a/regression-test/data/nereids_rules_p0/mv/nested/nested_materialized_view.out
+++ b/regression-test/data/nereids_rules_p0/mv/nested/nested_materialized_view.out
@@ -17,4 +17,25 @@
 6
 6
 6
-6
\ No newline at end of file
+6
+
+-- !query1_1_before --
+4
+4
+4
+4
+6
+6
+6
+6
+
+-- !query1_1_after --
+4
+4
+4
+4
+6
+6
+6
+6
+
diff --git a/regression-test/suites/nereids_rules_p0/mv/nested/nested_materialized_view.groovy b/regression-test/suites/nereids_rules_p0/mv/nested/nested_materialized_view.groovy
index 6ad175e85ce720..5a5ddc5b3c717a 100644
--- a/regression-test/suites/nereids_rules_p0/mv/nested/nested_materialized_view.groovy
+++ b/regression-test/suites/nereids_rules_p0/mv/nested/nested_materialized_view.groovy
@@ -23,6 +23,7 @@ suite("nested_materialized_view") {
     sql "SET enable_fallback_to_original_planner=false"
     sql "SET enable_materialized_view_rewrite=true"
     sql "SET enable_nereids_timeout = false"
+    sql "SET enable_materialized_view_nest_rewrite = true"
 
     def create_mtmv = { db_name, mv_name, mv_sql ->
         sql """DROP MATERIALIZED VIEW IF EXISTS ${mv_name}"""
@@ -179,4 +180,19 @@ suite("nested_materialized_view") {
     order_qt_query1_0_after "${query1_0}"
     sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_0_inner_mv"""
     sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_0"""
+
+
+    sql "SET enable_materialized_view_nest_rewrite = false"
+
+    order_qt_query1_1_before "${query1_0}"
+    create_mtmv(db, "mv1_0_inner_mv", mv1_0_inner_mv)
+    check_mv_rewrite_fail(db, mv1_0, query1_0, "mv1_0")
+
+    explain {
+        sql("${query1_0}")
+        contains("mv1_0_inner_mv(mv1_0_inner_mv)")
+    }
+
+    order_qt_query1_1_after "${query1_0}"
+
 }
diff --git a/regression-test/suites/nereids_rules_p0/mv/nested_mtmv/nested_mtmv.groovy b/regression-test/suites/nereids_rules_p0/mv/nested_mtmv/nested_mtmv.groovy
index 52b58b29be97ff..881d91d0a45489 100644
--- a/regression-test/suites/nereids_rules_p0/mv/nested_mtmv/nested_mtmv.groovy
+++ b/regression-test/suites/nereids_rules_p0/mv/nested_mtmv/nested_mtmv.groovy
@@ -22,6 +22,7 @@ suite("nested_mtmv") {
     sql "SET enable_fallback_to_original_planner=false"
     sql "SET enable_materialized_view_rewrite=true"
     sql "SET enable_nereids_timeout = false"
+    sql "SET enable_materialized_view_nest_rewrite = true"
 
     sql """
     drop table if exists orders_1

From ea80da9edfb30bdc577b1759b1f5dddc74a3709e Mon Sep 17 00:00:00 2001
From: GoGoWen <82132356+GoGoWen@users.noreply.github.com>
Date: Sat, 27 Apr 2024 23:15:37 +0800
Subject: [PATCH 084/163] [Fix](MethodName) fix method issue #34178

---
 .../main/java/org/apache/doris/clone/TabletSchedCtx.java    | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
index 3a190ef11c206b..9d54ce2dc5fb24 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
@@ -775,7 +775,7 @@ public void chooseDestReplicaForVersionIncomplete(Map<Long, PathSlot> backendsWo
 
             boolean allCatchup = true;
             for (Replica replica : furtherRepairs) {
-                if (checkFurthurRepairFinish(replica, visibleVersion)) {
+                if (checkFurtherRepairFinish(replica, visibleVersion)) {
                     replica.setNeedFurtherRepair(false);
                     replica.setFurtherRepairWatermarkTxnTd(-1);
                 } else {
@@ -858,7 +858,7 @@ public void chooseDestReplicaForVersionIncomplete(Map<Long, PathSlot> backendsWo
         setDest(chosenReplica.getBackendId(), chosenReplica.getPathHash());
     }
 
-    private boolean checkFurthurRepairFinish(Replica replica, long version) {
+    private boolean checkFurtherRepairFinish(Replica replica, long version) {
         if (replica.getVersion() < version || replica.getLastFailedVersion() > 0) {
             return false;
         }
@@ -1209,7 +1209,7 @@ public void finishCloneTask(CloneTask cloneTask, TFinishTaskRequest request)
             // change from prepare to committed or visible, this replica will be fall behind and be removed
             // in REDUNDANT detection.
             //
-            boolean isCatchup = checkFurthurRepairFinish(replica, partition.getVisibleVersion());
+            boolean isCatchup = checkFurtherRepairFinish(replica, partition.getVisibleVersion());
             replica.incrFurtherRepairCount();
             if (isCatchup || replica.getLeftFurtherRepairCount() <= 0) {
                 replica.setNeedFurtherRepair(false);

From 57621c23a356f41d802231ef071d64bad68e4d12 Mon Sep 17 00:00:00 2001
From: Pxl <pxl290@qq.com>
Date: Sun, 28 Apr 2024 09:57:12 +0800
Subject: [PATCH 085/163] [Bug](join) do not short_circuit_for_probe for mark
 join (#34170)

---
 be/src/pipeline/exec/hashjoin_build_sink.cpp  | 14 ++--
 .../data/nereids_syntax_p0/join.out           |  5 ++
 .../join/rqg/rqg1333347798/rqg1333347798.out  |  6 ++
 .../suites/nereids_syntax_p0/join.groovy      |  4 +-
 .../rqg/rqg1333347798/rqg1333347798.groovy    | 80 +++++++++++++++++++
 5 files changed, 101 insertions(+), 8 deletions(-)
 create mode 100644 regression-test/data/query_p0/join/rqg/rqg1333347798/rqg1333347798.out
 create mode 100644 regression-test/suites/query_p0/join/rqg/rqg1333347798/rqg1333347798.groovy

diff --git a/be/src/pipeline/exec/hashjoin_build_sink.cpp b/be/src/pipeline/exec/hashjoin_build_sink.cpp
index d583a827059111..3bec2b07f0babe 100644
--- a/be/src/pipeline/exec/hashjoin_build_sink.cpp
+++ b/be/src/pipeline/exec/hashjoin_build_sink.cpp
@@ -160,13 +160,13 @@ void HashJoinBuildSinkLocalState::init_short_circuit_for_probe() {
             !_shared_state->build_block ||
             !(_shared_state->build_block->rows() > 1); // build size always mock a row into block
     _shared_state->short_circuit_for_probe =
-            (_shared_state->_has_null_in_build_side &&
-             p._join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN && !p._is_mark_join) ||
-            (empty_block && p._join_op == TJoinOp::INNER_JOIN && !p._is_mark_join) ||
-            (empty_block && p._join_op == TJoinOp::LEFT_SEMI_JOIN && !p._is_mark_join) ||
-            (empty_block && p._join_op == TJoinOp::RIGHT_OUTER_JOIN) ||
-            (empty_block && p._join_op == TJoinOp::RIGHT_SEMI_JOIN) ||
-            (empty_block && p._join_op == TJoinOp::RIGHT_ANTI_JOIN);
+            ((_shared_state->_has_null_in_build_side &&
+              p._join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) ||
+             (empty_block &&
+              (p._join_op == TJoinOp::INNER_JOIN || p._join_op == TJoinOp::LEFT_SEMI_JOIN ||
+               p._join_op == TJoinOp::RIGHT_OUTER_JOIN || p._join_op == TJoinOp::RIGHT_SEMI_JOIN ||
+               p._join_op == TJoinOp::RIGHT_ANTI_JOIN))) &&
+            !p._is_mark_join;
 
     //when build table rows is 0 and not have other_join_conjunct and not _is_mark_join and join type is one of LEFT_OUTER_JOIN/FULL_OUTER_JOIN/LEFT_ANTI_JOIN
     //we could get the result is probe table + null-column(if need output)
diff --git a/regression-test/data/nereids_syntax_p0/join.out b/regression-test/data/nereids_syntax_p0/join.out
index 485995904623bc..2cbe9fedd462bb 100644
--- a/regression-test/data/nereids_syntax_p0/join.out
+++ b/regression-test/data/nereids_syntax_p0/join.out
@@ -60,3 +60,8 @@
 -- !outer_join_with_filter --
 1310179	1455	1455	42
 
+-- !test --
+
+-- !test --
+29	Supplier#000000029	VVSymB3fbwaN	ARGENTINA4	ARGENTINA	AMERICA	11-773-203-7342
+
diff --git a/regression-test/data/query_p0/join/rqg/rqg1333347798/rqg1333347798.out b/regression-test/data/query_p0/join/rqg/rqg1333347798/rqg1333347798.out
new file mode 100644
index 00000000000000..64568eea9c659b
--- /dev/null
+++ b/regression-test/data/query_p0/join/rqg/rqg1333347798/rqg1333347798.out
@@ -0,0 +1,6 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !test --
+\N
+\N
+\N
+
diff --git a/regression-test/suites/nereids_syntax_p0/join.groovy b/regression-test/suites/nereids_syntax_p0/join.groovy
index 63382c3ed44d01..4b79f8f52a8353 100644
--- a/regression-test/suites/nereids_syntax_p0/join.groovy
+++ b/regression-test/suites/nereids_syntax_p0/join.groovy
@@ -260,7 +260,7 @@ suite("join") {
     );
     """
 
-    sql """
+    order_qt_test """
         select 
          ref_1.`c_long_decimal` as c0,
          ref_3.`c1` as c1
@@ -271,4 +271,6 @@ suite("join") {
         where
           ref_2.`id` is not NULL
     """
+
+    order_qt_test "SELECT * FROM lineorder RIGHT SEMI JOIN supplier ON lineorder.lo_suppkey = supplier.s_suppkey and s_name='Supplier#000000029';"
 }
diff --git a/regression-test/suites/query_p0/join/rqg/rqg1333347798/rqg1333347798.groovy b/regression-test/suites/query_p0/join/rqg/rqg1333347798/rqg1333347798.groovy
new file mode 100644
index 00000000000000..e870be15682252
--- /dev/null
+++ b/regression-test/suites/query_p0/join/rqg/rqg1333347798/rqg1333347798.groovy
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("rqg1333347798") {
+    sql """
+    DROP TABLE IF EXISTS `table_50_undef_partitions2_keys3_properties4_distributed_by53`;
+    """
+    sql """
+create table table_50_undef_partitions2_keys3_properties4_distributed_by53 (
+pk int,
+col_bigint_undef_signed bigint   ,
+col_bigint_undef_signed2 bigint   
+) engine=olap
+DUPLICATE KEY(pk)
+distributed by hash(pk) buckets 10
+properties("replication_num" = "1");
+    """
+
+    sql """insert into table_50_undef_partitions2_keys3_properties4_distributed_by53(pk,col_bigint_undef_signed,col_bigint_undef_signed2) values (0,null,18332),(1,788547,null),(2,4644959,-56),(3,8364628,72),(4,null,-5581),(5,2344024,-62),(6,-2689177,22979),(7,1320,-41),(8,null,-54),(9,12,-6236),(10,-8321648,null),(11,153691,null),(12,-8056,null),(13,-12,-2343514),(14,-35,-3361960),(15,62,null),(16,-551249,750),(17,-14,null),(18,36,109),(19,null,9365),(20,null,-2574125),(21,null,-739080),(22,-5772468,-74),(23,-1399,113),(24,-23711,8803),(25,118,47),(26,4528265,89),(27,34,null),(28,-7,-2255925),(29,13393,30843),(30,-5615,-693406),(31,749,null),(32,127,31588),(33,-2363619,null),(34,null,57),(35,26116,-3734512),(36,null,-3142945),(37,null,35),(38,48,null),(39,8367482,-26),(40,-90,-2794228),(41,null,5681382),(42,8268283,null),(43,null,-18860),(44,30861,14000),(45,-6207,25),(46,-1292030,-3411881),(47,null,57),(48,4000947,-94),(49,null,118);
+    """
+    sql """analyze table table_50_undef_partitions2_keys3_properties4_distributed_by53 with sync;
+ """
+
+
+        sql """
+    DROP TABLE IF EXISTS `table_100_undef_partitions2_keys3_properties4_distributed_by52`;
+    """
+    sql """
+create table table_100_undef_partitions2_keys3_properties4_distributed_by52 (
+pk int,
+col_bigint_undef_signed bigint   ,
+col_bigint_undef_signed2 bigint   
+) engine=olap
+DUPLICATE KEY(pk, col_bigint_undef_signed)
+distributed by hash(pk) buckets 10
+properties("replication_num" = "1");
+    """
+
+    sql """insert into table_100_undef_partitions2_keys3_properties4_distributed_by52(pk,col_bigint_undef_signed,col_bigint_undef_signed2) values (0,null,69),(1,97,-61),(2,-6336,999069),(3,-32178,null),(4,18555,7),(5,null,-7825686),(6,92,11525),(7,106,null),(8,null,-22098),(9,-39,null),(10,31317,-17962),(11,null,-3402748),(12,1494928,1915512),(13,-25,-15251),(14,null,-5533979),(15,-6919683,71),(16,-30968,80),(17,58,null),(18,null,-1),(19,null,107),(20,null,21),(21,-764352,null),(22,14590,-2840),(23,5900022,null),(24,-12528,-61),(25,-4786407,28),(26,-4906255,-2433636),(27,-14,null),(28,-2704637,52),(29,1336262,12387),(30,-1536053,16722),(31,8198832,-123),(32,45,-2768949),(33,null,-922790),(34,null,29),(35,null,null),(36,3733,71),(37,26893,null),(38,866797,28),(39,-6984218,null),(40,92,null),(41,-12374,7716863),(42,-5039508,-1016),(43,null,null),(44,null,-8323),(45,13751,-6423208),(46,46,-111),(47,5750125,7058920),(48,50,-4254739),(49,null,3767),(50,27018,-2525385),(51,null,-38),(52,7661435,null),(53,126,31951),(54,-18230,null),(55,-5896492,9544),(56,107,23878),(57,2694217,4990),(58,-47,null),(59,6796246,null),(60,127,null),(61,28178,-78),(62,71,-17),(63,null,-10568),(64,30175,-3491),(65,7090289,-55),(66,-93,15453),(67,null,-114),(68,null,6149711),(69,31,10172),(70,97,-24510),(71,null,4011614),(72,8105474,9273),(73,-22636,46),(74,null,104),(75,null,3677142),(76,15057,25091),(77,21810,-121),(78,-126,387596),(79,-43,null),(80,null,-3434730),(81,48,null),(82,6336892,null),(83,10795,null),(84,28865,107),(85,null,-2475364),(86,-862371,-28),(87,null,null),(88,4720,17941),(89,2397,6182496),(90,-4691,651),(91,null,null),(92,null,-63),(93,111,2711),(94,19327,null),(95,19560,7062793),(96,-3186020,null),(97,20572,-48),(98,-2529181,22424),(99,null,-53);
+    """
+    sql """analyze table table_100_undef_partitions2_keys3_properties4_distributed_by52 with sync;
+ """
+
+
+    sql """
+    DROP TABLE IF EXISTS `table_100_undef_partitions2_keys3_properties4_distributed_by5`;
+    """
+    sql """
+create table table_100_undef_partitions2_keys3_properties4_distributed_by5 (
+col_bigint_undef_signed bigint/*agg_type_placeholder*/   ,
+col_bigint_undef_signed2 bigint/*agg_type_placeholder*/   ,
+pk int/*agg_type_placeholder*/
+) engine=olap
+distributed by hash(pk) buckets 10
+properties("replication_num" = "1");
+    """
+
+    sql """insert into table_100_undef_partitions2_keys3_properties4_distributed_by5(pk,col_bigint_undef_signed,col_bigint_undef_signed2) values (0,3429168,null),(1,-8095203,null),(2,null,null),(3,5227651,null),(4,-50,6026740),(5,-43,null),(6,-10,-77),(7,-13598,null),(8,13156,112),(9,-16585,18163),(10,-1184022,-5541355),(11,2386763,90),(12,-29492,-7934048),(13,-30940,-21),(14,22803,null),(15,27132,null),(16,17,-411),(17,6965,-29093),(18,32341,98),(19,-14991,116),(20,-7075162,null),(21,34,51),(22,null,21037),(23,null,1347),(24,18117,-73),(25,46,119),(26,-12,null),(27,9773,null),(28,66,-3097177),(29,-6430976,null),(30,null,11621),(31,null,110),(32,86,101),(33,-60,null),(34,4769996,-3),(35,28,-38),(36,null,-4350183),(37,null,-26947),(38,null,-5137310),(39,null,-20),(40,1844,-78),(41,32070,101),(42,2829368,-3338950),(43,null,6424566),(44,null,null),(45,-31302,-31934),(46,-9531,-107),(47,-2224664,5123063),(48,20520,-10540),(49,7,null),(50,32283,11838),(51,47,126),(52,null,null),(53,null,15248),(54,null,9721),(55,null,null),(56,2093726,23923),(57,28,-10114),(58,null,50),(59,null,-93),(60,null,-2571432),(61,-4934,24083),(62,-3506786,null),(63,-1560712,2491786),(64,null,-45),(65,25,5400504),(66,25054,15),(67,5,-22323),(68,15932,2099875),(69,7409124,null),(70,7137108,-1260937),(71,7116922,-4126902),(72,-16744,15950),(73,null,5082451),(74,null,-31583),(75,null,-128),(76,125,29159),(77,-479413,8178666),(78,2309057,-5791699),(79,-2271,-55),(80,40,-5447925),(81,-3239,2117918),(82,7887251,null),(83,-26,-3294041),(84,null,2466064),(85,30,null),(86,15543,5163523),(87,-4134908,7254),(88,null,31074),(89,null,10100),(90,50,-4062245),(91,null,82),(92,-13828,73),(93,1986044,54),(94,11317,-23336),(95,null,3837322),(96,null,null),(97,-28425,null),(98,14866,null),(99,null,-78);
+    """
+    sql """analyze table table_100_undef_partitions2_keys3_properties4_distributed_by5 with sync;
+ """
+
+    qt_test """
+    SELECT  T2.col_bigint_undef_signed AS C1   FROM table_50_undef_partitions2_keys3_properties4_distributed_by53 AS T1  RIGHT OUTER JOIN  table_100_undef_partitions2_keys3_properties4_distributed_by52 AS T2 ON T1.col_bigint_undef_signed  <=>  T2.col_bigint_undef_signed2   AND  T1.col_bigint_undef_signed IN  (SELECT T3.col_bigint_undef_signed FROM table_100_undef_partitions2_keys3_properties4_distributed_by5 AS T3 WHERE T1.col_bigint_undef_signed  >=  T3.col_bigint_undef_signed2) ORDER BY C1  LIMIT 6, 3;
+    """
+}
\ No newline at end of file

From 556fe2d7074c714a05dc968a2d0bbb90e0daf554 Mon Sep 17 00:00:00 2001
From: kkop <junjie201001@gmail.com>
Date: Sun, 28 Apr 2024 10:31:49 +0800
Subject: [PATCH 086/163] [enhancement](regression-test) Add data verification
 interface (#33794)

Co-authored-by: cjj2010 <2449402815@qq.com>
---
 .../test_unique_model_schema_value_change.out | 10 -----
 .../doris/regression/suite/Suite.groovy       | 18 +++++++++
 ...st_unique_model_schema_value_change.groovy | 39 +++++++++++++++++--
 3 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/regression-test/data/schema_change_p0/test_unique_model_schema_value_change.out b/regression-test/data/schema_change_p0/test_unique_model_schema_value_change.out
index 7babab624e0de4..4a9c32a029384d 100644
--- a/regression-test/data/schema_change_p0/test_unique_model_schema_value_change.out
+++ b/regression-test/data/schema_change_p0/test_unique_model_schema_value_change.out
@@ -179,16 +179,6 @@
 789012345	Grace	1	Xian	29	0	13333333333	No. 222 Street, Xian	2022-07-07T22:00
 123456689	Alice	1	Yaan	25	0	13812345678	No. 123 Street, Beijing	2022-01-01T10:00
 
--- ! --
-123456789	Alice	1	Beijing	25	0	13812345678	No. 123 Street, Beijing	2022-01-01T10:00
-234567890	Bob	1	Shanghai	30	1	13998765432	No. 456 Street, Shanghai	2022-02-02T12:00
-345678901	Carol	1	Guangzhou	28	0	13724681357	No. 789 Street, Guangzhou	2022-03-03T14:00
-456789012	Dave	1	Shenzhen	35	1	13680864279	No. 987 Street, Shenzhen	2022-04-04T16:00
-567890123	Eve	0	Chengdu	27	0	13572468091	No. 654 Street, Chengdu	2022-05-05T18:00
-678901234	Frank	0	Hangzhou	32	1	13467985213	No. 321 Street, Hangzhou	2022-06-06T20:00
-789012345	Grace	1	Xian	29	0	13333333333	No. 222 Street, Xian	2022-07-07T22:00
-123456689	Alice	1	Yaan	25	0	13812345678	No. 123 Street, Beijing	2022-01-01T10:00
-
 -- ! --
 123456789	Alice	1.0	Beijing	25	0	13812345678	No. 123 Street, Beijing	2022-01-01T10:00
 234567890	Bob	1.0	Shanghai	30	1	13998765432	No. 456 Street, Shanghai	2022-02-02T12:00
diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
index 8b440ac60dfa51..faaabac6792b42 100644
--- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
+++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
@@ -626,6 +626,24 @@ class Suite implements GroovyInterceptable {
         }
     }
 
+    void checkTableData(String tbName1 = null, String tbName2 = null, String fieldName = null) {
+        def tb1Result = sql "select ${fieldName} FROM ${tbName1} order by ${fieldName}"
+        def tb2Result = sql "select ${fieldName} FROM ${tbName2} order by ${fieldName}"
+        List<Object> tbData1 = new ArrayList<Object>();
+        for (List<Object> items:tb1Result){
+            tbData1.add(items.get(0))
+        }
+        List<Object> tbData2 = new ArrayList<Object>();
+        for (List<Object> items:tb2Result){
+            tbData2.add(items.get(0))
+        }
+        for (int i =0; i<tbData1.size(); i++) {
+            if (ObjectUtils.notEqual(tbData1.get(i),tbData2.get(i)) ){
+                throw new RuntimeException("tbData should be same")
+            }
+        }
+    }
+
     void expectExceptionLike(Closure userFunction, String errorMessage = null) {
         try {
             userFunction()
diff --git a/regression-test/suites/schema_change_p0/test_unique_model_schema_value_change.groovy b/regression-test/suites/schema_change_p0/test_unique_model_schema_value_change.groovy
index 9137e44d6a82d1..16c443acd877dd 100644
--- a/regression-test/suites/schema_change_p0/test_unique_model_schema_value_change.groovy
+++ b/regression-test/suites/schema_change_p0/test_unique_model_schema_value_change.groovy
@@ -17,7 +17,7 @@
 
 suite("test_unique_model_schema_value_change","p0") {
      def tbName = "test_unique_model_schema_value_change"
-
+     def tbName2 = "test_unique_model_schema_value_change_1"
      //Test the unique model by adding a value column
      sql """ DROP TABLE IF EXISTS ${tbName} """
      def initTable = " CREATE TABLE IF NOT EXISTS ${tbName}\n" +
@@ -282,7 +282,8 @@ suite("test_unique_model_schema_value_change","p0") {
      /**
       *  Test the unique model by modify a value type
       */
-
+     def initTable2 = ""
+     def initTableData2 = ""
      sql """ DROP TABLE IF EXISTS ${tbName} """
      initTable = " CREATE TABLE IF NOT EXISTS ${tbName}\n" +
              "          (\n" +
@@ -510,10 +511,42 @@ suite("test_unique_model_schema_value_change","p0") {
      waitForSchemaChangeDone({
           sql getTableStatusSql
           time 60
-     }, insertSql, true,"${tbName}")
+     }, insertSql, false,"${tbName}")
+
+     sql """ DROP TABLE IF EXISTS ${tbName2} """
+     initTable2 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" +
+             "          (\n" +
+             "              `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" +
+             "              `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" +
+             "              `is_student` SMALLINT COMMENT \"是否是学生\",\n" +
+             "              `city` VARCHAR(20) COMMENT \"用户所在城市\",\n" +
+             "              `age` SMALLINT COMMENT \"用户年龄\",\n" +
+             "              `sex` TINYINT COMMENT \"用户性别\",\n" +
+             "              `phone` LARGEINT COMMENT \"用户电话\",\n" +
+             "              `address` VARCHAR(500) COMMENT \"用户地址\",\n" +
+             "              `register_time` DATETIME COMMENT \"用户注册时间\"\n" +
+             "          )\n" +
+             "          UNIQUE KEY(`user_id`, `username`)\n" +
+             "          DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" +
+             "          PROPERTIES (\n" +
+             "          \"replication_allocation\" = \"tag.location.default: 1\",\n" +
+             "          \"enable_unique_key_merge_on_write\" = \"true\"\n" +
+             "          );"
 
+     initTableData2 = "insert into ${tbName2} values(123456789, 'Alice', 1, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," +
+             "               (234567890, 'Bob', 1, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," +
+             "               (345678901, 'Carol', 1, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," +
+             "               (456789012, 'Dave', 1, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," +
+             "               (567890123, 'Eve', 0, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," +
+             "               (678901234, 'Frank', 0, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," +
+             "               (789012345, 'Grace', 1, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00')," +
+             "               (123456689, 'Alice', 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00');"
+     sql initTable2
+     sql initTableData2
+     checkTableData("${tbName}","${tbName2}","is_student")
 
      //Test the unique model by modify a value type from TINYINT  to INT
+     sql """ DROP TABLE IF EXISTS ${tbName} """
      sql initTable
      sql initTableData
      sql """ alter  table ${tbName} MODIFY  column is_student INT  """

From 0e13f988d02ed63d7c10e20376782b4dc59e970c Mon Sep 17 00:00:00 2001
From: amory <wangqiannan@selectdb.com>
Date: Sun, 28 Apr 2024 10:59:54 +0800
Subject: [PATCH 087/163] [FIX](cases )fix ipv6 value for regress case

---
 .../test_ipv6_cidr_to_range_function.out      | 28 +++++++++----------
 .../test_ipv6_cidr_to_range_function.groovy   | 13 +++++++--
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/regression-test/data/query_p0/sql_functions/ip_functions/test_ipv6_cidr_to_range_function.out b/regression-test/data/query_p0/sql_functions/ip_functions/test_ipv6_cidr_to_range_function.out
index 201af35987a23a..84d0abfde6d31b 100644
--- a/regression-test/data/query_p0/sql_functions/ip_functions/test_ipv6_cidr_to_range_function.out
+++ b/regression-test/data/query_p0/sql_functions/ip_functions/test_ipv6_cidr_to_range_function.out
@@ -23,23 +23,23 @@
 
 -- !sql --
 1	{"min": "::", "max": "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
-4	{"min": "3132:372e::", "max": "3132:372e:ffff:ffff:ffff:ffff:ffff:ffff"}
-5	{"min": "3132::", "max": "3132:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
-7	{"min": "b000::", "max": "bfff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
-8	{"min": "be00::", "max": "beff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
+4	{"min": "::", "max": "::ffff:ffff:ffff:ffff:ffff:ffff"}
+5	{"min": "::", "max": "0:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
+7	{"min": "::", "max": "fff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
+8	{"min": "::", "max": "ff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
 9	\N
 
 -- !sql --
-0	{"min": "3132::", "max": "3132:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
-1	{"min": "3132::", "max": "3132:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
-2	{"min": "3132::", "max": "3132:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
-3	{"min": "3132::", "max": "3132:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
-4	{"min": "3132::", "max": "3132:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
-5	{"min": "3132::", "max": "3132:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
-6	{"min": "3132::", "max": "3132:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
-7	{"min": "3132::", "max": "3132:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
-8	{"min": "3132::", "max": "3132:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
-9	{"min": "3132::", "max": "3132:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
+0	{"min": "::", "max": "0:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
+1	{"min": "::", "max": "0:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
+2	{"min": "::", "max": "0:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
+3	{"min": "::", "max": "0:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
+4	{"min": "::", "max": "0:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
+5	{"min": "::", "max": "0:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
+6	{"min": "::", "max": "0:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
+7	{"min": "::", "max": "0:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
+8	{"min": "::", "max": "0:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
+9	{"min": "::", "max": "0:ffff:ffff:ffff:ffff:ffff:ffff:ffff"}
 
 -- !sql --
 1	::	ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
diff --git a/regression-test/suites/query_p0/sql_functions/ip_functions/test_ipv6_cidr_to_range_function.groovy b/regression-test/suites/query_p0/sql_functions/ip_functions/test_ipv6_cidr_to_range_function.groovy
index d1fe0e2761a91b..41432c986fec49 100644
--- a/regression-test/suites/query_p0/sql_functions/ip_functions/test_ipv6_cidr_to_range_function.groovy
+++ b/regression-test/suites/query_p0/sql_functions/ip_functions/test_ipv6_cidr_to_range_function.groovy
@@ -50,8 +50,17 @@ suite("test_ipv6_cidr_to_range_function") {
     qt_sql "select id, struct_element(ipv6_cidr_to_range(addr, cidr), 'min') as min_range, struct_element(ipv6_cidr_to_range(addr, cidr), 'max') as max_range from test_ipv6_cidr_to_range_function order by id"
     qt_sql "select id, ipv6_cidr_to_range(addr, 16) from test_ipv6_cidr_to_range_function order by id;"
     sql """ delete from test_ipv6_cidr_to_range_function where id in (2,3,6);"""
-    qt_sql """ select id, ipv6_cidr_to_range("127.0.0.1", cidr) from test_ipv6_cidr_to_range_function order by id;"""
-    qt_sql """ select number, ipv6_cidr_to_range("127.0.0.1", 16) from numbers("number"="10") order by number;"""
+    test {
+       sql """select id, ipv6_cidr_to_range(to_ipv6("127.0.0.1"), cidr) from test_ipv6_cidr_to_range_function order by id;"""
+      exception "Invalid IPv6 value '127.0.0.1'"
+    }
+    test {
+     sql """ select number, ipv6_cidr_to_range(to_ipv6("127.0.0.1"), 16) from numbers("number"="10") order by number;"""
+     exception "Invalid IPv6 value '127.0.0.1'"
+   }
+   qt_sql """ select id, ipv6_cidr_to_range(to_ipv6("::1"), cidr) from test_ipv6_cidr_to_range_function order by id; """
+   qt_sql """ select number, ipv6_cidr_to_range(to_ipv6("::1"), 16) from numbers("number"="10") order by number;"""
+     
 
     sql """ DROP TABLE IF EXISTS test_ipv6_cidr_to_range_function """
     sql """ DROP TABLE IF EXISTS test_str_cidr_to_range_function """

From 487174d0430fdc15e646074ccb56ec6428752dd9 Mon Sep 17 00:00:00 2001
From: zy-kkk <zhongyk10@gmail.com>
Date: Sun, 28 Apr 2024 11:58:18 +0800
Subject: [PATCH 088/163] [feature](planner) Support `select constant from
 dual` syntax sugar (#34200)

In MySQL, it's common to use a simplified syntax like `SELECT constant FROM dual`
which is equivalent to just `SELECT constant`.
This syntax is often used by BI tools when utilizing MySQL connectors to verify connection validity.
To enhance compatibility and ensure seamless integration with such tools,
we have now implemented this feature in Doris.

### Key Changes:
- Doris now interprets `SELECT constant FROM dual` as `SELECT constant`, aligning with MySQL's behavior.
- This update ensures that BI tools can use standard MySQL connectors without modifications or errors when connecting to Doris.
---
 .../org/apache/doris/nereids/DorisLexer.g4    |  4 +
 .../org/apache/doris/nereids/DorisParser.g4   |  1 +
 fe/fe-core/src/main/cup/sql_parser.cup        |  3 +
 fe/fe-core/src/main/jflex/sql_scanner.flex    |  1 +
 regression-test/data/query_p0/dual/dual.out   | 73 +++++++++++++++
 .../suites/query_p0/dual/dual.groovy          | 93 +++++++++++++++++++
 6 files changed, 175 insertions(+)
 create mode 100644 regression-test/data/query_p0/dual/dual.out
 create mode 100644 regression-test/suites/query_p0/dual/dual.groovy

diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
index a976710afc2ae0..b70d19b7e0aaf8 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
@@ -225,6 +225,7 @@ DORIS_INTERNAL_TABLE_ID: 'DORIS_INTERNAL_TABLE_ID';
 DOUBLE: 'DOUBLE';
 DROP: 'DROP';
 DROPP: 'DROPP';
+DUAL: 'DUAL';
 DUPLICATE: 'DUPLICATE';
 DYNAMIC: 'DYNAMIC';
 ELSE: 'ELSE';
@@ -674,6 +675,9 @@ BRACKETED_COMMENT
     : '/*' {!isHint()}? ( BRACKETED_COMMENT | . )*? ('*/' | {markUnclosedComment();} EOF) -> channel(HIDDEN)
     ;
 
+FROM_DUAL
+    : 'FROM' WS+ 'DUAL' -> channel(HIDDEN);
+
 WS
     : [ \r\n\t]+ -> channel(HIDDEN)
     ;
diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
index 64de9ecc0c67cd..b26019aa3ef227 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
@@ -1089,6 +1089,7 @@ nonReserved
     | DISTINCTPCSA
     | DO
     | DORIS_INTERNAL_TABLE_ID
+    | DUAL
     | DYNAMIC
     | ENABLE
     | ENCRYPTKEY
diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup
index fc0b116bbea967..e822ffb1508fd8 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -363,6 +363,7 @@ terminal String
     KW_DOUBLE,
     KW_DROP,
     KW_DROPP,
+    KW_DUAL,
     KW_DUPLICATE,
     KW_DYNAMIC,
     KW_ELSE,
@@ -6097,6 +6098,8 @@ type_function_name ::=
 from_clause ::=
     KW_FROM table_ref_list:l
     {: RESULT = new FromClause(l); :}
+    | KW_FROM KW_DUAL
+    {: RESULT = null; :}
     ;
 
 table_ref_list ::=
diff --git a/fe/fe-core/src/main/jflex/sql_scanner.flex b/fe/fe-core/src/main/jflex/sql_scanner.flex
index 7b0b3a72c5e41b..5ffb1df76c7edb 100644
--- a/fe/fe-core/src/main/jflex/sql_scanner.flex
+++ b/fe/fe-core/src/main/jflex/sql_scanner.flex
@@ -207,6 +207,7 @@ import org.apache.doris.qe.SqlModeHelper;
         keywordMap.put("double", new Integer(SqlParserSymbols.KW_DOUBLE));
         keywordMap.put("drop", new Integer(SqlParserSymbols.KW_DROP));
         keywordMap.put("dropp", new Integer(SqlParserSymbols.KW_DROPP));
+        keywordMap.put("dual", new Integer(SqlParserSymbols.KW_DUAL));
         keywordMap.put("duplicate", new Integer(SqlParserSymbols.KW_DUPLICATE));
         keywordMap.put("dynamic", new Integer(SqlParserSymbols.KW_DYNAMIC));
         keywordMap.put("else", new Integer(SqlParserSymbols.KW_ELSE));
diff --git a/regression-test/data/query_p0/dual/dual.out b/regression-test/data/query_p0/dual/dual.out
new file mode 100644
index 00000000000000..fee517f8f2cb06
--- /dev/null
+++ b/regression-test/data/query_p0/dual/dual.out
@@ -0,0 +1,73 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql --
+1
+
+-- !sql --
+1
+
+-- !sql --
+1
+
+-- !sql --
+
+-- !sql --
+
+-- !sql --
+2
+
+-- !sql --
+1
+
+-- !sql --
+1
+
+-- !sql --
+1
+
+-- !sql --
+1
+
+-- !sql --
+1
+
+-- !sql --
+1
+
+-- !sql --
+1
+
+-- !sql --
+
+-- !sql --
+1
+
+-- !sql --
+1
+
+-- !sql --
+1
+
+-- !sql --
+1
+
+-- !sql --
+1
+
+-- !sql --
+1
+
+-- !sql --
+1
+2
+
+-- !sql --
+1
+
+-- !sql --
+1
+1
+1
+
+-- !sql --
+1
+
diff --git a/regression-test/suites/query_p0/dual/dual.groovy b/regression-test/suites/query_p0/dual/dual.groovy
new file mode 100644
index 00000000000000..eb001305b47e61
--- /dev/null
+++ b/regression-test/suites/query_p0/dual/dual.groovy
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite('dual') {
+
+    qt_sql 'select 1 from dual'
+    qt_sql 'select 1 from dual where 1'
+    qt_sql 'select 1 from dual where 1 = 1'
+    qt_sql 'select 1 from dual where 0'
+    qt_sql 'select 1 from dual where 1 = 0'
+    qt_sql 'select 1+1 from dual'
+
+    // Testing constant expressions in more complex contexts
+    qt_sql 'select * from (select 1 as a from dual) sub'
+    qt_sql 'select 1 from dual group by 1'
+    qt_sql 'select 1 from dual having 1'
+    qt_sql 'select 1 from dual group by 1 having 1'
+    qt_sql 'select 1 from dual order by 1'
+    qt_sql 'select 1 from dual order by 1 desc'
+    qt_sql 'select 1 from dual order by 1 limit 1'
+    qt_sql 'select 1 from dual order by 1 limit 1 offset 1'
+    qt_sql 'select 1 from dual where 1 in (1)'
+    qt_sql 'select 1 from dual where 1 group by 1'
+    qt_sql 'select 1 from dual where 1 having 1'
+    qt_sql 'select 1 from dual where 1 group by 1 having 1'
+    qt_sql 'select 1 from dual where 1 order by 1'
+    qt_sql 'with cte as (select 1 as a from dual) select a from cte'
+    qt_sql 'select a from (select 1 as a from dual union all select 2 as a from dual) u'
+    qt_sql 'select row_number() over (order by 1) from dual;'
+
+    // Dropping and creating a table named 'dual' to test behavior when dual is a real table
+    sql 'drop table if exists `dual`'
+    sql '''
+        create table `dual` (
+            k0 int
+        )
+        distributed by hash(k0) buckets 16
+        properties(
+            'replication_num'='1'
+        )
+    '''
+    sql 'insert into `dual` values (1)'
+    sql 'insert into `dual` values (2)'
+    sql 'insert into `dual` values (3)'
+
+    qt_sql 'select 1 from `dual`'
+    qt_sql 'select 1 from dual'
+
+    // Tests for dropping 'dual' and ensuring correct error handling
+    test {
+        sql 'drop table if exists dual'
+        exception """DUAL is keyword, maybe `DUAL`"""
+    }
+    sql 'drop table if exists `dual`'
+
+    // Test error handling when table does not exist
+    test {
+        sql "select 1 from `dual`"
+        exception "Table [dual] does not exist in database [regression_test_query_p0_dual]"
+    }
+
+    // Disable and enable Nereids planner to check behavior differences
+    sql "set enable_nereids_planner = false"
+    test {
+        sql "select 1 from `dual`"
+        exception "Unknown table 'dual'"
+    }
+    sql "set enable_nereids_planner = true"
+
+    // Tests for unknown column errors
+    test {
+        sql "select a from dual"
+        exception "Unknown column 'a' in 'table list'"
+    }
+    test {
+        sql "select 1, a from dual"
+        exception "Unknown column 'a' in 'table list'"
+    }
+}
\ No newline at end of file

From 14b6bb38a3dd4b40fb70d09760141db511b550d2 Mon Sep 17 00:00:00 2001
From: Mingyu Chen <morningman@163.com>
Date: Sun, 28 Apr 2024 12:11:19 +0800
Subject: [PATCH 089/163] [fix](test) fix some external test cases (#34210)

Fix some test cases and enable test_information_schema_external suite
---
 .../hive/test_information_schema_external.out | 244 +++++++++---------
 .../pipeline/p0/conf/regression-conf.groovy   |   1 -
 .../test_information_schema_external.groovy   |  42 +--
 .../test_prepare_hive_data_in_case.groovy     |   2 +-
 .../hive/test_trino_hive_other.groovy         |   2 +-
 .../paimon/paimon_base_types.groovy           |   4 +-
 6 files changed, 148 insertions(+), 147 deletions(-)

diff --git a/regression-test/data/external_table_p0/hive/test_information_schema_external.out b/regression-test/data/external_table_p0/hive/test_information_schema_external.out
index 05a9f7b609353a..199b9e0b12305e 100644
--- a/regression-test/data/external_table_p0/hive/test_information_schema_external.out
+++ b/regression-test/data/external_table_p0/hive/test_information_schema_external.out
@@ -1,17 +1,17 @@
 -- This file is automatically generated. You should know what you did if you want to edit this
 -- !schemata_1 --
-test_information_schema_external	default	utf8	utf8_general_ci	\N
+test_information_schema_external_hive2	default	utf8mb4	utf8mb4_0900_bin	\N	NO
 
 -- !schemata_2 --
-internal	info_schema_ext_db_1	utf8	utf8_general_ci	\N
+internal	info_schema_ext_db_1	utf8mb4	utf8mb4_0900_bin	\N	NO
 
 -- !schemata_3 --
-internal	info_schema_ext_db_2	utf8	utf8_general_ci	\N
+internal	info_schema_ext_db_2	utf8mb4	utf8mb4_0900_bin	\N	NO
 
 -- !schemata_4 --
 
 -- !schemata_5 --
-internal	info_schema_ext_db_2	utf8	utf8_general_ci	\N
+internal	info_schema_ext_db_2	utf8mb4	utf8mb4_0900_bin	\N	NO
 
 -- !schemata_6 --
 
@@ -30,101 +30,101 @@ internal	info_schema_ext_db	abcd	id	1	\N	NO	int	\N	\N	10	0	\N	\N	\N	int(11)	UNI
 internal	info_schema_ext_db	abcd	name	2	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
 
 -- !columns_3 --
-test_information_schema_external	tpch1_parquet	customer	c_acctbal	6	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
-test_information_schema_external	tpch1_parquet	customer	c_address	3	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	customer	c_comment	8	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	customer	c_custkey	1	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	customer	c_mktsegment	7	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	customer	c_name	2	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	customer	c_nationkey	4	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	customer	c_phone	5	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	customer	c_acctbal	6	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	customer	c_address	3	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	customer	c_comment	8	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	customer	c_custkey	1	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	customer	c_mktsegment	7	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	customer	c_name	2	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	customer	c_nationkey	4	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	customer	c_phone	5	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
 
 -- !columns_4 --
-test_information_schema_external	tpch1_parquet	lineitem	l_comment	16	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_commitdate	12	\N	YES	date	\N	\N	\N	\N	\N	\N	\N	date					\N	\N	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_discount	7	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_extendedprice	6	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_linenumber	4	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_linestatus	10	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_orderkey	1	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_partkey	2	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_quantity	5	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_receiptdate	13	\N	YES	date	\N	\N	\N	\N	\N	\N	\N	date					\N	\N	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_returnflag	9	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_shipdate	11	\N	YES	date	\N	\N	\N	\N	\N	\N	\N	date					\N	\N	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_shipinstruct	14	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_shipmode	15	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_suppkey	3	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_tax	8	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	lineitem	l_comment	16	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	lineitem	l_commitdate	12	\N	YES	date	\N	\N	\N	\N	\N	\N	\N	date					\N	\N	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	lineitem	l_discount	7	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	lineitem	l_extendedprice	6	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	lineitem	l_linenumber	4	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	lineitem	l_linestatus	10	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	lineitem	l_orderkey	1	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	lineitem	l_partkey	2	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	lineitem	l_quantity	5	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	lineitem	l_receiptdate	13	\N	YES	date	\N	\N	\N	\N	\N	\N	\N	date					\N	\N	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	lineitem	l_returnflag	9	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	lineitem	l_shipdate	11	\N	YES	date	\N	\N	\N	\N	\N	\N	\N	date					\N	\N	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	lineitem	l_shipinstruct	14	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	lineitem	l_shipmode	15	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	lineitem	l_suppkey	3	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	lineitem	l_tax	8	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
 
 -- !columns_5 --
-test_information_schema_external	tpch1_parquet	nation	n_comment	4	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	nation	n_name	2	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	nation	n_nationkey	1	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	nation	n_regionkey	3	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	nation	n_comment	4	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	nation	n_name	2	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	nation	n_nationkey	1	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	nation	n_regionkey	3	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
 
 -- !columns_6 --
-test_information_schema_external	tpch1_parquet	orders	o_clerk	7	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	orders	o_comment	9	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	orders	o_custkey	2	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	orders	o_orderdate	5	\N	YES	date	\N	\N	\N	\N	\N	\N	\N	date					\N	\N	\N	\N
-test_information_schema_external	tpch1_parquet	orders	o_orderkey	1	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	orders	o_orderpriority	6	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	orders	o_orderstatus	3	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	orders	o_shippriority	8	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	orders	o_totalprice	4	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	orders	o_clerk	7	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	orders	o_comment	9	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	orders	o_custkey	2	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	orders	o_orderdate	5	\N	YES	date	\N	\N	\N	\N	\N	\N	\N	date					\N	\N	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	orders	o_orderkey	1	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	orders	o_orderpriority	6	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	orders	o_orderstatus	3	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	orders	o_shippriority	8	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	orders	o_totalprice	4	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
 
 -- !columns_7 --
-test_information_schema_external	tpch1_parquet	partsupp	ps_availqty	3	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	partsupp	ps_comment	5	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	partsupp	ps_partkey	1	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	partsupp	ps_suppkey	2	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	partsupp	ps_supplycost	4	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	partsupp	ps_availqty	3	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	partsupp	ps_comment	5	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	partsupp	ps_partkey	1	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	partsupp	ps_suppkey	2	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive2	tpch1_parquet	partsupp	ps_supplycost	4	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
 
 -- !ids_1 --
-test_information_schema_external	tpch1_parquet	customer
+test_information_schema_external_hive2	tpch1_parquet	customer
 
 -- !ids_2 --
-test_information_schema_external	tpch1_parquet	lineitem
+test_information_schema_external_hive2	tpch1_parquet	lineitem
 
 -- !ids_3 --
-test_information_schema_external	tpch1_parquet	nation
+test_information_schema_external_hive2	tpch1_parquet	nation
 
 -- !ids_4 --
-test_information_schema_external	tpch1_parquet	orders
+test_information_schema_external_hive2	tpch1_parquet	orders
 
 -- !ids_5 --
-test_information_schema_external	tpch1_parquet	part
+test_information_schema_external_hive2	tpch1_parquet	part
 
 -- !ids_6 --
-test_information_schema_external	tpch1_parquet	partsupp
+test_information_schema_external_hive2	tpch1_parquet	partsupp
 
 -- !ids_7 --
-test_information_schema_external	tpch1_parquet	region
+test_information_schema_external_hive2	tpch1_parquet	region
 
 -- !ids_8 --
-test_information_schema_external	tpch1_parquet	supplier
+test_information_schema_external_hive2	tpch1_parquet	supplier
 
 -- !tables_1 --
-test_information_schema_external	tpch1_parquet	supplier	HMS_EXTERNAL_TABLE	\N	0	\N	
+test_information_schema_external_hive2	tpch1_parquet	supplier	BASE TABLE	hms	0	\N	
 
 -- !tables_2 --
-test_information_schema_external	tpch1_parquet	region	HMS_EXTERNAL_TABLE	\N	0	\N	
+test_information_schema_external_hive2	tpch1_parquet	region	BASE TABLE	hms	0	\N	
 
 -- !tables_3 --
-test_information_schema_external	tpch1_parquet	customer	HMS_EXTERNAL_TABLE	\N	0	\N	
+test_information_schema_external_hive2	tpch1_parquet	customer	BASE TABLE	hms	0	\N	
 
 -- !tables_4 --
-test_information_schema_external	tpch1_parquet	lineitem	HMS_EXTERNAL_TABLE	\N	0	\N	
+test_information_schema_external_hive2	tpch1_parquet	lineitem	BASE TABLE	hms	0	\N	
 
 -- !tables_5 --
-test_information_schema_external	tpch1_parquet	nation	HMS_EXTERNAL_TABLE	\N	0	\N	
+test_information_schema_external_hive2	tpch1_parquet	nation	BASE TABLE	hms	0	\N	
 
 -- !tables_6 --
-test_information_schema_external	tpch1_parquet	orders	HMS_EXTERNAL_TABLE	\N	0	\N	
+test_information_schema_external_hive2	tpch1_parquet	orders	BASE TABLE	hms	0	\N	
 
 -- !tables_7 --
-test_information_schema_external	tpch1_parquet	partsupp	HMS_EXTERNAL_TABLE	\N	0	\N	
+test_information_schema_external_hive2	tpch1_parquet	partsupp	BASE TABLE	hms	0	\N	
 
 -- !views_1 --
 \N	info_schema_ext_db	test_view		NONE	NO	root@%	DEFINER	utf8	\N
@@ -132,18 +132,18 @@ test_information_schema_external	tpch1_parquet	partsupp	HMS_EXTERNAL_TABLE	\N	0
 -- !views_2 --
 
 -- !schemata_1 --
-test_information_schema_external	default	utf8	utf8_general_ci	\N
+test_information_schema_external_hive3	default	utf8mb4	utf8mb4_0900_bin	\N	NO
 
 -- !schemata_2 --
-internal	info_schema_ext_db_1	utf8	utf8_general_ci	\N
+internal	info_schema_ext_db_1	utf8mb4	utf8mb4_0900_bin	\N	NO
 
 -- !schemata_3 --
-internal	info_schema_ext_db_2	utf8	utf8_general_ci	\N
+internal	info_schema_ext_db_2	utf8mb4	utf8mb4_0900_bin	\N	NO
 
 -- !schemata_4 --
 
 -- !schemata_5 --
-internal	info_schema_ext_db_2	utf8	utf8_general_ci	\N
+internal	info_schema_ext_db_2	utf8mb4	utf8mb4_0900_bin	\N	NO
 
 -- !schemata_6 --
 
@@ -162,101 +162,101 @@ internal	info_schema_ext_db	abcd	id	1	\N	NO	int	\N	\N	10	0	\N	\N	\N	int(11)	UNI
 internal	info_schema_ext_db	abcd	name	2	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
 
 -- !columns_3 --
-test_information_schema_external	tpch1_parquet	customer	c_acctbal	6	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
-test_information_schema_external	tpch1_parquet	customer	c_address	3	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	customer	c_comment	8	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	customer	c_custkey	1	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	customer	c_mktsegment	7	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	customer	c_name	2	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	customer	c_nationkey	4	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	customer	c_phone	5	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	customer	c_acctbal	6	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	customer	c_address	3	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	customer	c_comment	8	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	customer	c_custkey	1	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	customer	c_mktsegment	7	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	customer	c_name	2	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	customer	c_nationkey	4	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	customer	c_phone	5	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
 
 -- !columns_4 --
-test_information_schema_external	tpch1_parquet	lineitem	l_comment	16	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_commitdate	12	\N	YES	date	\N	\N	\N	\N	\N	\N	\N	date					\N	\N	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_discount	7	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_extendedprice	6	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_linenumber	4	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_linestatus	10	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_orderkey	1	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_partkey	2	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_quantity	5	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_receiptdate	13	\N	YES	date	\N	\N	\N	\N	\N	\N	\N	date					\N	\N	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_returnflag	9	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_shipdate	11	\N	YES	date	\N	\N	\N	\N	\N	\N	\N	date					\N	\N	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_shipinstruct	14	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_shipmode	15	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_suppkey	3	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	lineitem	l_tax	8	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	lineitem	l_comment	16	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	lineitem	l_commitdate	12	\N	YES	date	\N	\N	\N	\N	\N	\N	\N	date					\N	\N	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	lineitem	l_discount	7	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	lineitem	l_extendedprice	6	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	lineitem	l_linenumber	4	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	lineitem	l_linestatus	10	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	lineitem	l_orderkey	1	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	lineitem	l_partkey	2	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	lineitem	l_quantity	5	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	lineitem	l_receiptdate	13	\N	YES	date	\N	\N	\N	\N	\N	\N	\N	date					\N	\N	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	lineitem	l_returnflag	9	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	lineitem	l_shipdate	11	\N	YES	date	\N	\N	\N	\N	\N	\N	\N	date					\N	\N	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	lineitem	l_shipinstruct	14	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	lineitem	l_shipmode	15	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	lineitem	l_suppkey	3	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	lineitem	l_tax	8	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
 
 -- !columns_5 --
-test_information_schema_external	tpch1_parquet	nation	n_comment	4	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	nation	n_name	2	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	nation	n_nationkey	1	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	nation	n_regionkey	3	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	nation	n_comment	4	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	nation	n_name	2	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	nation	n_nationkey	1	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	nation	n_regionkey	3	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
 
 -- !columns_6 --
-test_information_schema_external	tpch1_parquet	orders	o_clerk	7	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	orders	o_comment	9	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	orders	o_custkey	2	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	orders	o_orderdate	5	\N	YES	date	\N	\N	\N	\N	\N	\N	\N	date					\N	\N	\N	\N
-test_information_schema_external	tpch1_parquet	orders	o_orderkey	1	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	orders	o_orderpriority	6	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	orders	o_orderstatus	3	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	orders	o_shippriority	8	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	orders	o_totalprice	4	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	orders	o_clerk	7	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	orders	o_comment	9	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	orders	o_custkey	2	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	orders	o_orderdate	5	\N	YES	date	\N	\N	\N	\N	\N	\N	\N	date					\N	\N	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	orders	o_orderkey	1	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	orders	o_orderpriority	6	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	orders	o_orderstatus	3	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	orders	o_shippriority	8	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	orders	o_totalprice	4	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
 
 -- !columns_7 --
-test_information_schema_external	tpch1_parquet	partsupp	ps_availqty	3	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	partsupp	ps_comment	5	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
-test_information_schema_external	tpch1_parquet	partsupp	ps_partkey	1	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	partsupp	ps_suppkey	2	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
-test_information_schema_external	tpch1_parquet	partsupp	ps_supplycost	4	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	partsupp	ps_availqty	3	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	partsupp	ps_comment	5	\N	YES	varchar	2147483643	8589934572	\N	\N	\N	\N	\N	string					2147483643	\N	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	partsupp	ps_partkey	1	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	partsupp	ps_suppkey	2	\N	YES	int	\N	\N	10	0	\N	\N	\N	int(11)					10	0	\N	\N
+test_information_schema_external_hive3	tpch1_parquet	partsupp	ps_supplycost	4	\N	YES	decimal	\N	\N	12	2	\N	\N	\N	decimalv3(12, 2)					12	2	\N	\N
 
 -- !ids_1 --
-test_information_schema_external	tpch1_parquet	customer
+test_information_schema_external_hive3	tpch1_parquet	customer
 
 -- !ids_2 --
-test_information_schema_external	tpch1_parquet	lineitem
+test_information_schema_external_hive3	tpch1_parquet	lineitem
 
 -- !ids_3 --
-test_information_schema_external	tpch1_parquet	nation
+test_information_schema_external_hive3	tpch1_parquet	nation
 
 -- !ids_4 --
-test_information_schema_external	tpch1_parquet	orders
+test_information_schema_external_hive3	tpch1_parquet	orders
 
 -- !ids_5 --
-test_information_schema_external	tpch1_parquet	part
+test_information_schema_external_hive3	tpch1_parquet	part
 
 -- !ids_6 --
-test_information_schema_external	tpch1_parquet	partsupp
+test_information_schema_external_hive3	tpch1_parquet	partsupp
 
 -- !ids_7 --
-test_information_schema_external	tpch1_parquet	region
+test_information_schema_external_hive3	tpch1_parquet	region
 
 -- !ids_8 --
-test_information_schema_external	tpch1_parquet	supplier
+test_information_schema_external_hive3	tpch1_parquet	supplier
 
 -- !tables_1 --
-test_information_schema_external	tpch1_parquet	supplier	HMS_EXTERNAL_TABLE	\N	0	\N	
+test_information_schema_external_hive3	tpch1_parquet	supplier	BASE TABLE	hms	0	\N	
 
 -- !tables_2 --
-test_information_schema_external	tpch1_parquet	region	HMS_EXTERNAL_TABLE	\N	0	\N	
+test_information_schema_external_hive3	tpch1_parquet	region	BASE TABLE	hms	0	\N	
 
 -- !tables_3 --
-test_information_schema_external	tpch1_parquet	customer	HMS_EXTERNAL_TABLE	\N	0	\N	
+test_information_schema_external_hive3	tpch1_parquet	customer	BASE TABLE	hms	0	\N	
 
 -- !tables_4 --
-test_information_schema_external	tpch1_parquet	lineitem	HMS_EXTERNAL_TABLE	\N	0	\N	
+test_information_schema_external_hive3	tpch1_parquet	lineitem	BASE TABLE	hms	0	\N	
 
 -- !tables_5 --
-test_information_schema_external	tpch1_parquet	nation	HMS_EXTERNAL_TABLE	\N	0	\N	
+test_information_schema_external_hive3	tpch1_parquet	nation	BASE TABLE	hms	0	\N	
 
 -- !tables_6 --
-test_information_schema_external	tpch1_parquet	orders	HMS_EXTERNAL_TABLE	\N	0	\N	
+test_information_schema_external_hive3	tpch1_parquet	orders	BASE TABLE	hms	0	\N	
 
 -- !tables_7 --
-test_information_schema_external	tpch1_parquet	partsupp	HMS_EXTERNAL_TABLE	\N	0	\N	
+test_information_schema_external_hive3	tpch1_parquet	partsupp	BASE TABLE	hms	0	\N	
 
 -- !views_1 --
 \N	info_schema_ext_db	test_view		NONE	NO	root@%	DEFINER	utf8	\N
diff --git a/regression-test/pipeline/p0/conf/regression-conf.groovy b/regression-test/pipeline/p0/conf/regression-conf.groovy
index 8a339dae31bcdf..9989e6a9a849ba 100644
--- a/regression-test/pipeline/p0/conf/regression-conf.groovy
+++ b/regression-test/pipeline/p0/conf/regression-conf.groovy
@@ -66,7 +66,6 @@ excludeSuites = "000_the_start_sentinel_do_not_touch," + // keep this line as th
     "test_bitmap_filter," +
     "test_dump_image," +
     "test_index_failure_injection," +
-    "test_information_schema_external," +
     "test_profile," +
     "test_refresh_mtmv," +
     "test_spark_load," +
diff --git a/regression-test/suites/external_table_p0/hive/test_information_schema_external.groovy b/regression-test/suites/external_table_p0/hive/test_information_schema_external.groovy
index c846060ff70639..71cd62f8d6e453 100644
--- a/regression-test/suites/external_table_p0/hive/test_information_schema_external.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_information_schema_external.groovy
@@ -40,7 +40,7 @@ suite("test_information_schema_external", "p0,external,hive,external_docker,exte
 
             //schemata
             order_qt_schemata_1 """
-                select * from internal.information_schema.schemata 
+                select * from ${catalog_name}.information_schema.schemata 
                     where  CATALOG_NAME = "${catalog_name}" and  SCHEMA_NAME = "default";
             """
             sql """ create database if not exists ${db_name}_1; """
@@ -105,23 +105,23 @@ suite("test_information_schema_external", "p0,external,hive,external_docker,exte
                     where TABLE_CATALOG = "internal" and  TABLE_SCHEMA = "${db_name}";
             """
             order_qt_columns_3 """
-                select * from internal.information_schema.columns 
+                select * from ${catalog_name}.information_schema.columns 
                     where TABLE_CATALOG = "${catalog_name}" and  TABLE_SCHEMA = "tpch1_parquet" and TABLE_NAME = "customer";
             """
             order_qt_columns_4 """
-                select * from internal.information_schema.columns 
+                select * from ${catalog_name}.information_schema.columns 
                     where TABLE_CATALOG = "${catalog_name}" and  TABLE_SCHEMA = "tpch1_parquet" and TABLE_NAME = "lineitem";
             """            
             order_qt_columns_5 """
-                select * from internal.information_schema.columns 
+                select * from ${catalog_name}.information_schema.columns 
                     where TABLE_CATALOG = "${catalog_name}" and  TABLE_SCHEMA = "tpch1_parquet" and TABLE_NAME = "nation";
             """            
             order_qt_columns_6 """
-                select * from internal.information_schema.columns 
+                select * from ${catalog_name}.information_schema.columns 
                     where TABLE_CATALOG = "${catalog_name}" and  TABLE_SCHEMA = "tpch1_parquet" and TABLE_NAME = "orders";
             """
             order_qt_columns_7 """
-                select * from internal.information_schema.columns 
+                select * from ${catalog_name}.information_schema.columns 
                     where TABLE_CATALOG = "${catalog_name}" and  TABLE_SCHEMA = "tpch1_parquet" and TABLE_NAME = "partsupp";
             """
             sql """ drop table if exists  ${db_name}.abcd """
@@ -129,65 +129,65 @@ suite("test_information_schema_external", "p0,external,hive,external_docker,exte
 
             //metadata_name_ids
             order_qt_ids_1 """
-                select CATALOG_NAME,DATABASE_NAME,TABLE_NAME from internal.information_schema.metadata_name_ids 
+                select CATALOG_NAME,DATABASE_NAME,TABLE_NAME from ${catalog_name}.information_schema.metadata_name_ids 
                     where CATALOG_NAME = "${catalog_name}" and  DATABASE_NAME = "tpch1_parquet" and TABLE_NAME = "customer";
             """
             order_qt_ids_2 """
-                select CATALOG_NAME,DATABASE_NAME,TABLE_NAME from internal.information_schema.metadata_name_ids 
+                select CATALOG_NAME,DATABASE_NAME,TABLE_NAME from ${catalog_name}.information_schema.metadata_name_ids 
                     where CATALOG_NAME = "${catalog_name}" and  DATABASE_NAME = "tpch1_parquet" and TABLE_NAME = "lineitem";
             """            
             order_qt_ids_3 """
-                select CATALOG_NAME,DATABASE_NAME,TABLE_NAME from internal.information_schema.metadata_name_ids 
+                select CATALOG_NAME,DATABASE_NAME,TABLE_NAME from ${catalog_name}.information_schema.metadata_name_ids 
                     where CATALOG_NAME = "${catalog_name}" and  DATABASE_NAME = "tpch1_parquet" and TABLE_NAME = "nation";
             """            
             order_qt_ids_4 """
-                select CATALOG_NAME,DATABASE_NAME,TABLE_NAME from internal.information_schema.metadata_name_ids 
+                select CATALOG_NAME,DATABASE_NAME,TABLE_NAME from ${catalog_name}.information_schema.metadata_name_ids 
                     where CATALOG_NAME = "${catalog_name}" and  DATABASE_NAME = "tpch1_parquet" and TABLE_NAME = "orders";
             """            
             order_qt_ids_5 """
-                select CATALOG_NAME,DATABASE_NAME,TABLE_NAME from internal.information_schema.metadata_name_ids 
+                select CATALOG_NAME,DATABASE_NAME,TABLE_NAME from ${catalog_name}.information_schema.metadata_name_ids 
                     where CATALOG_NAME = "${catalog_name}" and  DATABASE_NAME = "tpch1_parquet" and TABLE_NAME = "part";
             """            
             order_qt_ids_6 """
-                select CATALOG_NAME,DATABASE_NAME,TABLE_NAME from internal.information_schema.metadata_name_ids 
+                select CATALOG_NAME,DATABASE_NAME,TABLE_NAME from ${catalog_name}.information_schema.metadata_name_ids 
                     where CATALOG_NAME = "${catalog_name}" and  DATABASE_NAME = "tpch1_parquet" and TABLE_NAME = "partsupp";
             """            
             order_qt_ids_7 """
-                select CATALOG_NAME,DATABASE_NAME,TABLE_NAME from internal.information_schema.metadata_name_ids 
+                select CATALOG_NAME,DATABASE_NAME,TABLE_NAME from ${catalog_name}.information_schema.metadata_name_ids 
                     where CATALOG_NAME = "${catalog_name}" and  DATABASE_NAME = "tpch1_parquet" and TABLE_NAME = "region";
             """            
             order_qt_ids_8 """
-                select CATALOG_NAME,DATABASE_NAME,TABLE_NAME from internal.information_schema.metadata_name_ids 
+                select CATALOG_NAME,DATABASE_NAME,TABLE_NAME from ${catalog_name}.information_schema.metadata_name_ids 
                     where CATALOG_NAME = "${catalog_name}" and  DATABASE_NAME = "tpch1_parquet" and TABLE_NAME = "supplier";
             """
 
             //tables
             order_qt_tables_1 """
-                select TABLE_CATALOG,TABLE_SCHEMA,TABLE_NAME,TABLE_TYPE,ENGINE,DATA_LENGTH,MAX_DATA_LENGTH,TABLE_COMMENT from internal.information_schema.tables 
+                select TABLE_CATALOG,TABLE_SCHEMA,TABLE_NAME,TABLE_TYPE,ENGINE,DATA_LENGTH,MAX_DATA_LENGTH,TABLE_COMMENT from ${catalog_name}.information_schema.tables 
                     where TABLE_CATALOG = "${catalog_name}" and  TABLE_SCHEMA = "tpch1_parquet" and TABLE_NAME = "supplier";
             """
             order_qt_tables_2 """
-                select TABLE_CATALOG,TABLE_SCHEMA,TABLE_NAME,TABLE_TYPE,ENGINE,DATA_LENGTH,MAX_DATA_LENGTH,TABLE_COMMENT from internal.information_schema.tables 
+                select TABLE_CATALOG,TABLE_SCHEMA,TABLE_NAME,TABLE_TYPE,ENGINE,DATA_LENGTH,MAX_DATA_LENGTH,TABLE_COMMENT from ${catalog_name}.information_schema.tables 
                     where TABLE_CATALOG = "${catalog_name}" and  TABLE_SCHEMA = "tpch1_parquet" and TABLE_NAME = "region";
             """
             order_qt_tables_3 """
-                select TABLE_CATALOG,TABLE_SCHEMA,TABLE_NAME,TABLE_TYPE,ENGINE,DATA_LENGTH,MAX_DATA_LENGTH,TABLE_COMMENT from internal.information_schema.tables 
+                select TABLE_CATALOG,TABLE_SCHEMA,TABLE_NAME,TABLE_TYPE,ENGINE,DATA_LENGTH,MAX_DATA_LENGTH,TABLE_COMMENT from ${catalog_name}.information_schema.tables 
                     where TABLE_CATALOG = "${catalog_name}" and  TABLE_SCHEMA = "tpch1_parquet" and TABLE_NAME = "customer";
             """
             order_qt_tables_4 """
-                select TABLE_CATALOG,TABLE_SCHEMA,TABLE_NAME,TABLE_TYPE,ENGINE,DATA_LENGTH,MAX_DATA_LENGTH,TABLE_COMMENT from internal.information_schema.tables 
+                select TABLE_CATALOG,TABLE_SCHEMA,TABLE_NAME,TABLE_TYPE,ENGINE,DATA_LENGTH,MAX_DATA_LENGTH,TABLE_COMMENT from ${catalog_name}.information_schema.tables 
                     where TABLE_CATALOG = "${catalog_name}" and  TABLE_SCHEMA = "tpch1_parquet" and TABLE_NAME = "lineitem";
             """            
             order_qt_tables_5 """
-                select TABLE_CATALOG,TABLE_SCHEMA,TABLE_NAME,TABLE_TYPE,ENGINE,DATA_LENGTH,MAX_DATA_LENGTH,TABLE_COMMENT from internal.information_schema.tables 
+                select TABLE_CATALOG,TABLE_SCHEMA,TABLE_NAME,TABLE_TYPE,ENGINE,DATA_LENGTH,MAX_DATA_LENGTH,TABLE_COMMENT from ${catalog_name}.information_schema.tables 
                     where TABLE_CATALOG = "${catalog_name}" and  TABLE_SCHEMA = "tpch1_parquet" and TABLE_NAME = "nation";
             """            
             order_qt_tables_6 """
-                select TABLE_CATALOG,TABLE_SCHEMA,TABLE_NAME,TABLE_TYPE,ENGINE,DATA_LENGTH,MAX_DATA_LENGTH,TABLE_COMMENT from internal.information_schema.tables 
+                select TABLE_CATALOG,TABLE_SCHEMA,TABLE_NAME,TABLE_TYPE,ENGINE,DATA_LENGTH,MAX_DATA_LENGTH,TABLE_COMMENT from ${catalog_name}.information_schema.tables 
                     where TABLE_CATALOG = "${catalog_name}" and  TABLE_SCHEMA = "tpch1_parquet" and TABLE_NAME = "orders";
             """
             order_qt_tables_7 """
-                select TABLE_CATALOG,TABLE_SCHEMA,TABLE_NAME,TABLE_TYPE,ENGINE,DATA_LENGTH,MAX_DATA_LENGTH,TABLE_COMMENT from internal.information_schema.tables 
+                select TABLE_CATALOG,TABLE_SCHEMA,TABLE_NAME,TABLE_TYPE,ENGINE,DATA_LENGTH,MAX_DATA_LENGTH,TABLE_COMMENT from ${catalog_name}.information_schema.tables 
                     where TABLE_CATALOG = "${catalog_name}" and  TABLE_SCHEMA = "tpch1_parquet" and TABLE_NAME = "partsupp";
             """
 
diff --git a/regression-test/suites/external_table_p0/hive/test_prepare_hive_data_in_case.groovy b/regression-test/suites/external_table_p0/hive/test_prepare_hive_data_in_case.groovy
index 759e578d917709..e64417afaadcc5 100644
--- a/regression-test/suites/external_table_p0/hive/test_prepare_hive_data_in_case.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_prepare_hive_data_in_case.groovy
@@ -24,7 +24,7 @@ suite("test_prepare_hive_data_in_case", "p0,external,hive,external_docker,extern
     }
 
     for (String hivePrefix : ["hive2", "hive3"]) {
-    setHivePrefix(hivePrefix)
+        setHivePrefix(hivePrefix)
         try {
             String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
             String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort")
diff --git a/regression-test/suites/external_table_p0/trino_connector/hive/test_trino_hive_other.groovy b/regression-test/suites/external_table_p0/trino_connector/hive/test_trino_hive_other.groovy
index ebb9eab64649c9..0c66fbbc29bfec 100644
--- a/regression-test/suites/external_table_p0/trino_connector/hive/test_trino_hive_other.groovy
+++ b/regression-test/suites/external_table_p0/trino_connector/hive/test_trino_hive_other.groovy
@@ -63,7 +63,7 @@ suite("test_trino_hive_other", "external,hive,external_docker,external_docker_hi
 
 
     def q01 = {
-        qt_q24 """ select name, count(1) as c from student group by name order by c desc;"""
+        qt_q24 """ select name, count(1) as c from student group by name order by name desc;"""
         qt_q25 """ select lo_orderkey, count(1) as c from lineorder group by lo_orderkey order by lo_orderkey asc, c desc;"""
         qt_q26 """ select * from test1 order by col_1;"""
         qt_q27 """ select * from string_table order by p_partkey desc;"""
diff --git a/regression-test/suites/external_table_p2/paimon/paimon_base_types.groovy b/regression-test/suites/external_table_p2/paimon/paimon_base_types.groovy
index 5d14a7defa6aa1..74994404564323 100644
--- a/regression-test/suites/external_table_p2/paimon/paimon_base_types.groovy
+++ b/regression-test/suites/external_table_p2/paimon/paimon_base_types.groovy
@@ -39,13 +39,15 @@ suite("paimon_base_types", "p2,external,paimon,external_remote,external_remote_p
     if (enabled != null && enabled.equalsIgnoreCase("true")) {
         String catalog_name = "paimon"
         String user_name = context.config.otherConfigs.get("extHiveHmsUser")
+        String hiveHost = context.config.otherConfigs.get("extHiveHmsHost")
+        String hivePort = context.config.otherConfigs.get("extHdfsPort")
 
         sql """drop catalog if exists ${catalog_name};"""
         sql """
             create catalog if not exists ${catalog_name} properties (
                 "type" = "paimon",
                 "paimon.catalog.type" = "filesystem",
-                "warehouse" = "hdfs:///paimon/paimon1",
+                "warehouse" = "hdfs://${hiveHost}:${hivePort}/paimon/paimon1",
                 "hadoop.username" = "${user_name}"
             );
         """

From 6e94ac902fa22bf7bb662ca1736127373ca18369 Mon Sep 17 00:00:00 2001
From: feiniaofeiafei <53502832+feiniaofeiafei@users.noreply.github.com>
Date: Sun, 28 Apr 2024 12:15:01 +0800
Subject: [PATCH 090/163] [Feat](nereids)nereids support create table like
 (#34025)

nereids support create table like statement.
e.g. CREATE TABLE test1.table2 LIKE test1.table1
---
 .../org/apache/doris/nereids/DorisParser.g4   |  3 +
 .../nereids/parser/LogicalPlanBuilder.java    | 20 +++++
 .../doris/nereids/trees/plans/PlanType.java   |  4 +-
 .../commands/CreateTableLikeCommand.java      | 49 ++++++++++++
 .../commands/info/CreateTableLikeInfo.java    | 77 +++++++++++++++++++
 .../trees/plans/visitor/CommandVisitor.java   |  5 ++
 .../ddl_p0/test_create_table_like_nereids.out | 20 +++++
 .../test_create_table_like_nereids.groovy     | 75 ++++++++++++++++++
 8 files changed, 251 insertions(+), 2 deletions(-)
 create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateTableLikeCommand.java
 create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableLikeInfo.java
 create mode 100644 regression-test/data/ddl_p0/test_create_table_like_nereids.out
 create mode 100644 regression-test/suites/ddl_p0/test_create_table_like_nereids.groovy

diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
index b26019aa3ef227..fb41a6c42c3866 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
@@ -65,6 +65,9 @@ statementBase
         (COMMENT STRING_LITERAL)? AS query                                #createView
     | ALTER VIEW name=multipartIdentifier (LEFT_PAREN cols=simpleColumnDefs RIGHT_PAREN)?
         AS query                                                          #alterView
+    | CREATE (EXTERNAL)? TABLE (IF NOT EXISTS)? name=multipartIdentifier
+      LIKE existedTable=multipartIdentifier
+      (WITH ROLLUP (rollupNames=identifierList)?)?           #createTableLike
     | explain? INSERT (INTO | OVERWRITE TABLE)
         (tableName=multipartIdentifier | DORIS_INTERNAL_TABLE_ID LEFT_PAREN tableId=INTEGER_VALUE RIGHT_PAREN)
         partitionSpec?  // partition define
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
index 4111c71d6fefc0..0d569e80c85a12 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
@@ -78,6 +78,7 @@
 import org.apache.doris.nereids.DorisParser.CreateProcedureContext;
 import org.apache.doris.nereids.DorisParser.CreateRowPolicyContext;
 import org.apache.doris.nereids.DorisParser.CreateTableContext;
+import org.apache.doris.nereids.DorisParser.CreateTableLikeContext;
 import org.apache.doris.nereids.DorisParser.CreateViewContext;
 import org.apache.doris.nereids.DorisParser.CteContext;
 import org.apache.doris.nereids.DorisParser.DataTypeWithNullableContext;
@@ -364,6 +365,7 @@
 import org.apache.doris.nereids.trees.plans.commands.CreatePolicyCommand;
 import org.apache.doris.nereids.trees.plans.commands.CreateProcedureCommand;
 import org.apache.doris.nereids.trees.plans.commands.CreateTableCommand;
+import org.apache.doris.nereids.trees.plans.commands.CreateTableLikeCommand;
 import org.apache.doris.nereids.trees.plans.commands.CreateViewCommand;
 import org.apache.doris.nereids.trees.plans.commands.DeleteFromCommand;
 import org.apache.doris.nereids.trees.plans.commands.DeleteFromUsingCommand;
@@ -393,6 +395,7 @@
 import org.apache.doris.nereids.trees.plans.commands.info.ColumnDefinition;
 import org.apache.doris.nereids.trees.plans.commands.info.CreateMTMVInfo;
 import org.apache.doris.nereids.trees.plans.commands.info.CreateTableInfo;
+import org.apache.doris.nereids.trees.plans.commands.info.CreateTableLikeInfo;
 import org.apache.doris.nereids.trees.plans.commands.info.CreateViewInfo;
 import org.apache.doris.nereids.trees.plans.commands.info.DMLCommandType;
 import org.apache.doris.nereids.trees.plans.commands.info.DefaultValue;
@@ -3500,4 +3503,21 @@ public LogicalPlan visitShowCreateProcedure(ShowCreateProcedureContext ctx) {
     public Object visitUnsupported(UnsupportedContext ctx) {
         return UnsupportedCommand.INSTANCE;
     }
+
+    @Override
+    public LogicalPlan visitCreateTableLike(CreateTableLikeContext ctx) {
+        List<String> nameParts = visitMultipartIdentifier(ctx.name);
+        List<String> existedTableNameParts = visitMultipartIdentifier(ctx.existedTable);
+        ArrayList<String> rollupNames = Lists.newArrayList();
+        boolean withAllRollUp = false;
+        if (ctx.WITH() != null && ctx.rollupNames != null) {
+            rollupNames = new ArrayList<>(visitIdentifierList(ctx.rollupNames));
+        } else if (ctx.WITH() != null && ctx.rollupNames == null) {
+            withAllRollUp = true;
+        }
+        CreateTableLikeInfo info = new CreateTableLikeInfo(ctx.EXISTS() != null,
+                new TableNameInfo(nameParts), new TableNameInfo(existedTableNameParts),
+                rollupNames, withAllRollUp);
+        return new CreateTableLikeCommand(info);
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java
index 3db3ffa3d9343d..4aeecdfd507799 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java
@@ -153,6 +153,6 @@ public enum PlanType {
     SHOW_CREATE_PROCEDURE_COMMAND,
     CREATE_VIEW_COMMAND,
     ALTER_VIEW_COMMAND,
-
-    UNSUPPORTED_COMMAND
+    UNSUPPORTED_COMMAND,
+    CREATE_TABLE_LIKE_COMMAND
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateTableLikeCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateTableLikeCommand.java
new file mode 100644
index 00000000000000..a271623ef08ce8
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateTableLikeCommand.java
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.plans.commands;
+
+import org.apache.doris.analysis.CreateTableLikeStmt;
+import org.apache.doris.catalog.Env;
+import org.apache.doris.nereids.trees.plans.PlanType;
+import org.apache.doris.nereids.trees.plans.commands.info.CreateTableLikeInfo;
+import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
+import org.apache.doris.qe.ConnectContext;
+import org.apache.doris.qe.StmtExecutor;
+
+/** CreateTableLikeCommand */
+public class CreateTableLikeCommand extends Command implements ForwardWithSync {
+    private final CreateTableLikeInfo info;
+
+    public CreateTableLikeCommand(CreateTableLikeInfo info) {
+        super(PlanType.CREATE_TABLE_LIKE_COMMAND);
+        this.info = info;
+    }
+
+    @Override
+    public void run(ConnectContext ctx, StmtExecutor executor) throws Exception {
+        executor.checkBlockRules();
+        info.validate(ctx);
+        CreateTableLikeStmt stmt = info.translateToLegacyStmt();
+        Env.getCurrentEnv().createTableLike(stmt);
+    }
+
+    @Override
+    public <R, C> R accept(PlanVisitor<R, C> visitor, C context) {
+        return visitor.visitCreateTableLikeCommand(this, context);
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableLikeInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableLikeInfo.java
new file mode 100644
index 00000000000000..5428898d40327a
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableLikeInfo.java
@@ -0,0 +1,77 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.plans.commands.info;
+
+import org.apache.doris.analysis.CreateTableLikeStmt;
+import org.apache.doris.catalog.Env;
+import org.apache.doris.common.AnalysisException;
+import org.apache.doris.common.DdlException;
+import org.apache.doris.common.ErrorCode;
+import org.apache.doris.common.ErrorReport;
+import org.apache.doris.common.FeNameFormat;
+import org.apache.doris.common.util.Util;
+import org.apache.doris.mysql.privilege.PrivPredicate;
+import org.apache.doris.qe.ConnectContext;
+
+import java.util.ArrayList;
+
+/** CreateTableLikeInfo */
+public class CreateTableLikeInfo {
+    private final boolean ifNotExists;
+    private final TableNameInfo tableName;
+    private final TableNameInfo existedTableName;
+    private final ArrayList<String> rollupNames;
+    private final boolean withAllRollup;
+
+    public CreateTableLikeInfo(boolean ifNotExists, TableNameInfo tableName, TableNameInfo existedTableName,
+            ArrayList<String> rollupNames, boolean withAllRollup) {
+        this.ifNotExists = ifNotExists;
+        this.tableName = tableName;
+        this.existedTableName = existedTableName;
+        this.rollupNames = rollupNames;
+        this.withAllRollup = withAllRollup;
+    }
+
+    public CreateTableLikeStmt translateToLegacyStmt() throws DdlException {
+        return new CreateTableLikeStmt(ifNotExists, tableName.transferToTableName(),
+                existedTableName.transferToTableName(), rollupNames, withAllRollup);
+    }
+
+    /** validate */
+    public void validate(ConnectContext ctx) throws AnalysisException {
+        existedTableName.analyze(ctx);
+        // disallow external catalog
+        Util.prohibitExternalCatalog(existedTableName.getCtl(), "CreateTableLikeStmt");
+        //check privilege
+        if (!Env.getCurrentEnv().getAccessManager()
+                .checkTblPriv(ctx, existedTableName.getCtl(), existedTableName.getDb(),
+                        existedTableName.getTbl(), PrivPredicate.SELECT)) {
+            ErrorReport.reportAnalysisException(ErrorCode.ERR_SPECIFIC_ACCESS_DENIED_ERROR, "SELECT");
+        }
+
+        tableName.analyze(ctx);
+        // disallow external catalog
+        Util.prohibitExternalCatalog(tableName.getCtl(), "CreateTableLikeStmt");
+        FeNameFormat.checkTableName(tableName.getTbl());
+        //check privilege
+        if (!Env.getCurrentEnv().getAccessManager().checkTblPriv(ctx, tableName.getCtl(), tableName.getDb(),
+                tableName.getTbl(), PrivPredicate.CREATE)) {
+            ErrorReport.reportAnalysisException(ErrorCode.ERR_SPECIFIC_ACCESS_DENIED_ERROR, "CREATE");
+        }
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java
index f82dd1a794e54f..2aafd94ee42ff8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java
@@ -27,6 +27,7 @@
 import org.apache.doris.nereids.trees.plans.commands.CreatePolicyCommand;
 import org.apache.doris.nereids.trees.plans.commands.CreateProcedureCommand;
 import org.apache.doris.nereids.trees.plans.commands.CreateTableCommand;
+import org.apache.doris.nereids.trees.plans.commands.CreateTableLikeCommand;
 import org.apache.doris.nereids.trees.plans.commands.CreateViewCommand;
 import org.apache.doris.nereids.trees.plans.commands.DeleteFromCommand;
 import org.apache.doris.nereids.trees.plans.commands.DeleteFromUsingCommand;
@@ -171,4 +172,8 @@ default R visitAlterViewCommand(AlterViewCommand alterViewCommand, C context) {
     default R visitUnsupportedCommand(UnsupportedCommand unsupportedCommand, C context) {
         return visitCommand(unsupportedCommand, context);
     }
+
+    default R visitCreateTableLikeCommand(CreateTableLikeCommand createTableLikeCommand, C context) {
+        return visitCommand(createTableLikeCommand, context);
+    }
 }
diff --git a/regression-test/data/ddl_p0/test_create_table_like_nereids.out b/regression-test/data/ddl_p0/test_create_table_like_nereids.out
new file mode 100644
index 00000000000000..4ecbecb1505623
--- /dev/null
+++ b/regression-test/data/ddl_p0/test_create_table_like_nereids.out
@@ -0,0 +1,20 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !test_without_roll_up --
+1	1	2
+1	1	4
+1	3	6
+2	1	3
+2	1	4
+2	1	7
+2	3	5
+2	3	9
+2	4	2
+3	2	8
+3	5	\N
+3	5	6
+3	5	6
+3	5	8
+4	5	6
+6	\N	6
+6	7	1
+
diff --git a/regression-test/suites/ddl_p0/test_create_table_like_nereids.groovy b/regression-test/suites/ddl_p0/test_create_table_like_nereids.groovy
new file mode 100644
index 00000000000000..c1a0f74e034f77
--- /dev/null
+++ b/regression-test/suites/ddl_p0/test_create_table_like_nereids.groovy
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_create_table_like_nereids") {
+    sql "SET enable_nereids_planner=true;"
+    sql "SET enable_fallback_to_original_planner=false;"
+
+    sql "drop table if exists mal_test_create_table_like"
+    sql """create table mal_test_create_table_like(pk int, a int, b int) distributed by hash(pk) buckets 10
+    properties('replication_num' = '1');"""
+    sql """insert into mal_test_create_table_like values(2,1,3),(1,1,2),(3,5,6),(6,null,6),(4,5,6),(2,1,4),(2,3,5),(1,1,4)
+    ,(3,5,6),(3,5,null),(6,7,1),(2,1,7),(2,4,2),(2,3,9),(1,3,6),(3,5,8),(3,2,8);"""
+    sql "sync"
+    sql "alter table mal_test_create_table_like add rollup ru1(a,pk);"
+    sleep(2000)
+    sql "alter table mal_test_create_table_like add rollup ru2(b,pk);"
+    sleep(2000)
+
+    // no rollup
+    sql "drop table if exists table_like"
+    sql "CREATE TABLE table_like LIKE mal_test_create_table_like;"
+    sql """insert into table_like values(2,1,3),(1,1,2),(3,5,6),(6,null,6),(4,5,6),(2,1,4),(2,3,5),(1,1,4)
+    ,(3,5,6),(3,5,null),(6,7,1),(2,1,7),(2,4,2),(2,3,9),(1,3,6),(3,5,8),(3,2,8);"""
+    "sync"
+    qt_test_without_roll_up "select * from table_like order by pk,a,b;"
+
+    // with all rollup
+    sql "drop table if exists table_like_with_roll_up"
+    sql "CREATE TABLE table_like_with_roll_up LIKE mal_test_create_table_like with rollup;"
+    explain {
+        sql ("select sum(a) from table_like_with_roll_up group by a")
+        contains "ru1"
+    } ;
+    explain {
+        sql ("select sum(b) from table_like_with_roll_up group by b,pk ;")
+        contains "ru2"
+    } ;
+
+    // with partial rollup
+    sql "drop table if exists table_like_with_partial_roll_up;"
+    sql "CREATE TABLE table_like_with_partial_roll_up LIKE mal_test_create_table_like with rollup (ru1);"
+    sql "select * from table_like_with_partial_roll_up order by pk, a, b"
+    explain {
+        sql("select sum(a) from table_like_with_partial_roll_up group by a")
+        contains("ru1")
+    } ;
+    explain {
+        sql ("select sum(b) from table_like_with_partial_roll_up group by b,pk ;")
+        notContains "ru2"
+    } ;
+    sql """insert into table_like_with_partial_roll_up values(2,1,3),(1,1,2),(3,5,6),(6,null,6),(4,5,6),(2,1,4),(2,3,5),(1,1,4)
+    ,(3,5,6),(3,5,null),(6,7,1),(2,1,7),(2,4,2),(2,3,9),(1,3,6),(3,5,8),(3,2,8);"""
+    sql "sync"
+    sleep(2000)
+    sql "select sum(a) from table_like_with_partial_roll_up group by a order by 1"
+
+    // test if not exists
+    sql "drop table if exists table_like_with_partial_roll_up_exists"
+    sql """CREATE TABLE if not exists table_like_with_partial_roll_up_exists
+    LIKE mal_test_create_table_like with rollup (ru1);"""
+}
\ No newline at end of file

From da85469de3f7238282a2c548aed1e6ac53958f2f Mon Sep 17 00:00:00 2001
From: zhangdong <493738387@qq.com>
Date: Sun, 28 Apr 2024 12:24:33 +0800
Subject: [PATCH 091/163] [enhance](serde) expr serde support gson (#34180)

In the future, it can be easier to change to Expression to avoid metadata compatibility issues
---
 .../apache/doris/persist/gson/GsonUtils.java  | 51 +++++++++++++++++++
 .../org/apache/doris/analysis/ExprTest.java   | 10 ++++
 2 files changed, 61 insertions(+)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java b/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java
index f5e58592d0eef4..b27bca76d1ab63 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java
@@ -22,6 +22,7 @@
 import org.apache.doris.alter.CloudSchemaChangeJobV2;
 import org.apache.doris.alter.RollupJobV2;
 import org.apache.doris.alter.SchemaChangeJobV2;
+import org.apache.doris.analysis.Expr;
 import org.apache.doris.catalog.AggStateType;
 import org.apache.doris.catalog.ArrayType;
 import org.apache.doris.catalog.DatabaseIf;
@@ -149,10 +150,15 @@
 import com.google.gson.stream.JsonWriter;
 import org.apache.commons.lang3.reflect.TypeUtils;
 
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
 import java.io.IOException;
 import java.lang.reflect.Method;
 import java.lang.reflect.ParameterizedType;
 import java.lang.reflect.Type;
+import java.util.Base64;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.Map;
@@ -337,6 +343,7 @@ public class GsonUtils {
                     new HiddenAnnotationExclusionStrategy()).enableComplexMapKeySerialization()
             .addReflectionAccessFilter(ReflectionAccessFilter.BLOCK_INACCESSIBLE_JAVA)
             .registerTypeHierarchyAdapter(Table.class, new GuavaTableAdapter())
+            .registerTypeHierarchyAdapter(Expr.class, new ExprAdapter())
             .registerTypeHierarchyAdapter(Multimap.class, new GuavaMultimapAdapter())
             .registerTypeAdapterFactory(new PostProcessTypeAdapterFactory())
             .registerTypeAdapterFactory(columnTypeAdapterFactory)
@@ -506,6 +513,50 @@ public Table<R, C, V> deserialize(JsonElement json, Type typeOfT, JsonDeserializ
         }
     }
 
+    private static class ExprAdapter
+            implements JsonSerializer<Expr>, JsonDeserializer<Expr> {
+        private static String EXPR_PROP = "expr";
+
+        @Override
+        public JsonElement serialize(Expr src, Type typeOfSrc, JsonSerializationContext context) {
+            ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
+            DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutputStream);
+            try {
+                Expr.writeTo(src, dataOutputStream);
+                String base64Str = Base64.getEncoder().encodeToString(byteArrayOutputStream.toByteArray());
+                JsonObject exprJsonObject = new JsonObject();
+                exprJsonObject.addProperty(EXPR_PROP, base64Str);
+                return exprJsonObject;
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            } finally {
+                try {
+                    dataOutputStream.close();
+                } catch (IOException e) {
+                    throw new RuntimeException(e);
+                }
+            }
+        }
+
+        @Override
+        public Expr deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) {
+            String base64Str = json.getAsJsonObject().get(EXPR_PROP).getAsString();
+            DataInputStream dataInputStream = new DataInputStream(
+                    new ByteArrayInputStream(Base64.getDecoder().decode(base64Str)));
+            try {
+                return Expr.readIn(dataInputStream);
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            } finally {
+                try {
+                    dataInputStream.close();
+                } catch (IOException e) {
+                    throw new RuntimeException(e);
+                }
+            }
+        }
+    }
+
     /*
      * The json adapter for Guava Multimap.
      * Current support:
diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/ExprTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/ExprTest.java
index a91199189ac109..4baa73b860b5f9 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/analysis/ExprTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/ExprTest.java
@@ -24,6 +24,7 @@
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.jmockit.Deencapsulation;
 import org.apache.doris.datasource.InternalCatalog;
+import org.apache.doris.persist.gson.GsonUtils;
 
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
@@ -270,4 +271,13 @@ public void testPersist() throws IOException {
         dis.close();
         file.delete();
     }
+
+    @Test
+    public void testGson() {
+        IntLiteral intLiteral = new IntLiteral(3);
+        String json = GsonUtils.GSON.toJson(intLiteral, Expr.class);
+        Expr expr = GsonUtils.GSON.fromJson(json, Expr.class);
+        Assert.assertTrue(expr instanceof IntLiteral);
+        Assert.assertEquals(intLiteral, expr);
+    }
 }

From a26e6d1a47a2805e7e8f2b25b85f5fde6234964c Mon Sep 17 00:00:00 2001
From: Qi Chen <kaka11.chen@gmail.com>
Date: Sun, 28 Apr 2024 12:27:18 +0800
Subject: [PATCH 092/163] [Fix](orc-reader) Fix the issue when string col has
 mixed plain and dict encoding in different stripes. (#34146)

The root cause there is something wrong when string col has mixed plain and dict encoding in different stripes.
---
 be/src/vec/exec/format/orc/vorc_reader.cpp    | 167 ++++++++++--------
 be/src/vec/exec/format/orc/vorc_reader.h      |   2 +-
 .../scripts/create_preinstalled_table.hql     |  20 +++
 .../string_col_dict_plain_mixed_orc.orc       | Bin 0 -> 13875 bytes
 .../external_table_p0/hive/test_hive_orc.out  |  18 ++
 .../hive/test_hive_orc.groovy                 |   8 +
 6 files changed, 139 insertions(+), 76 deletions(-)
 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/string_col_dict_plain_mixed_orc/string_col_dict_plain_mixed_orc.orc

diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 309754d3348d5d..27d0334316ad2f 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -149,7 +149,7 @@ OrcReader::OrcReader(RuntimeProfile* profile, RuntimeState* state,
           _ctz(ctz),
           _io_ctx(io_ctx),
           _enable_lazy_mat(enable_lazy_mat),
-          _is_dict_cols_converted(false),
+          _dict_cols_has_converted(false),
           _unsupported_pushdown_types(unsupported_pushdown_types) {
     TimezoneUtils::find_cctz_time_zone(ctz, _time_zone);
     VecDateTimeValue t;
@@ -169,7 +169,7 @@ OrcReader::OrcReader(const TFileScanRangeParams& params, const TFileRangeDesc& r
           _file_system(nullptr),
           _io_ctx(io_ctx),
           _enable_lazy_mat(enable_lazy_mat),
-          _is_dict_cols_converted(false) {
+          _dict_cols_has_converted(false) {
     _init_system_properties();
     _init_file_description();
 }
@@ -1439,13 +1439,20 @@ Status OrcReader::_orc_column_to_doris_column(const std::string& col_name, Colum
                                               const orc::Type* orc_column_type,
                                               orc::ColumnVectorBatch* cvb, size_t num_values) {
     TypeDescriptor src_type = convert_to_doris_type(orc_column_type);
+    bool is_dict_filter_col = false;
     for (const std::pair<std::string, int>& dict_col : _dict_filter_cols) {
         if (col_name == dict_col.first) {
             src_type = TypeDescriptor(PrimitiveType::TYPE_INT);
+            is_dict_filter_col = true;
             break;
         }
     }
-    if (!_converters.contains(col_name)) {
+    // If the column can be dictionary filtered, there will be two types.
+    // It may be plain or a dictionary, because the same field in different stripes may have different types.
+    // Here we use the $dict_ prefix to represent the dictionary type converter.
+    auto converter_key = !is_dict_filter_col ? col_name : fmt::format("$dict_{}", col_name);
+
+    if (!_converters.contains(converter_key)) {
         std::unique_ptr<converter::ColumnTypeConverter> converter =
                 converter::ColumnTypeConverter::get_converter(src_type, data_type);
         if (!converter->support()) {
@@ -1454,9 +1461,9 @@ Status OrcReader::_orc_column_to_doris_column(const std::string& col_name, Colum
                     converter->get_error_msg());
         }
         // reuse the cached converter
-        _converters[col_name] = std::move(converter);
+        _converters[converter_key] = std::move(converter);
     }
-    converter::ColumnTypeConverter* converter = _converters[col_name].get();
+    converter::ColumnTypeConverter* converter = _converters[converter_key].get();
     ColumnPtr resolved_column = converter->get_column(src_type, doris_column, data_type);
     const DataTypePtr& resolved_type = converter->get_type();
 
@@ -1620,23 +1627,26 @@ Status OrcReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
             }
         }
 
-        for (auto& dict_filter_cols : _dict_filter_cols) {
-            MutableColumnPtr dict_col_ptr = ColumnVector<Int32>::create();
-            size_t pos = block->get_position_by_name(dict_filter_cols.first);
-            auto& column_with_type_and_name = block->get_by_position(pos);
-            auto& column_type = column_with_type_and_name.type;
-            if (column_type->is_nullable()) {
-                block->get_by_position(pos).type =
-                        std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>());
-                block->replace_by_position(
-                        pos, ColumnNullable::create(std::move(dict_col_ptr),
-                                                    ColumnUInt8::create(dict_col_ptr->size(), 0)));
-            } else {
-                block->get_by_position(pos).type = std::make_shared<DataTypeInt32>();
-                block->replace_by_position(pos, std::move(dict_col_ptr));
+        if (!_dict_cols_has_converted && !_dict_filter_cols.empty()) {
+            for (auto& dict_filter_cols : _dict_filter_cols) {
+                MutableColumnPtr dict_col_ptr = ColumnVector<Int32>::create();
+                size_t pos = block->get_position_by_name(dict_filter_cols.first);
+                auto& column_with_type_and_name = block->get_by_position(pos);
+                auto& column_type = column_with_type_and_name.type;
+                if (column_type->is_nullable()) {
+                    block->get_by_position(pos).type =
+                            std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>());
+                    block->replace_by_position(
+                            pos,
+                            ColumnNullable::create(std::move(dict_col_ptr),
+                                                   ColumnUInt8::create(dict_col_ptr->size(), 0)));
+                } else {
+                    block->get_by_position(pos).type = std::make_shared<DataTypeInt32>();
+                    block->replace_by_position(pos, std::move(dict_col_ptr));
+                }
             }
+            _dict_cols_has_converted = true;
         }
-        _is_dict_cols_converted = true;
 
         std::vector<orc::ColumnVectorBatch*> batch_vec;
         _fill_batch_vec(batch_vec, _batch.get(), 0);
@@ -1766,23 +1776,25 @@ Status OrcReader::filter(orc::ColumnVectorBatch& data, uint16_t* sel, uint16_t s
     Block* block = (Block*)arg;
     size_t origin_column_num = block->columns();
 
-    for (auto& dict_filter_cols : _dict_filter_cols) {
-        MutableColumnPtr dict_col_ptr = ColumnVector<Int32>::create();
-        size_t pos = block->get_position_by_name(dict_filter_cols.first);
-        auto& column_with_type_and_name = block->get_by_position(pos);
-        auto& column_type = column_with_type_and_name.type;
-        if (column_type->is_nullable()) {
-            block->get_by_position(pos).type =
-                    std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>());
-            block->replace_by_position(
-                    pos, ColumnNullable::create(std::move(dict_col_ptr),
-                                                ColumnUInt8::create(dict_col_ptr->size(), 0)));
-        } else {
-            block->get_by_position(pos).type = std::make_shared<DataTypeInt32>();
-            block->replace_by_position(pos, std::move(dict_col_ptr));
+    if (!_dict_cols_has_converted && !_dict_filter_cols.empty()) {
+        for (auto& dict_filter_cols : _dict_filter_cols) {
+            MutableColumnPtr dict_col_ptr = ColumnVector<Int32>::create();
+            size_t pos = block->get_position_by_name(dict_filter_cols.first);
+            auto& column_with_type_and_name = block->get_by_position(pos);
+            auto& column_type = column_with_type_and_name.type;
+            if (column_type->is_nullable()) {
+                block->get_by_position(pos).type =
+                        std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>());
+                block->replace_by_position(
+                        pos, ColumnNullable::create(std::move(dict_col_ptr),
+                                                    ColumnUInt8::create(dict_col_ptr->size(), 0)));
+            } else {
+                block->get_by_position(pos).type = std::make_shared<DataTypeInt32>();
+                block->replace_by_position(pos, std::move(dict_col_ptr));
+            }
         }
+        _dict_cols_has_converted = true;
     }
-    _is_dict_cols_converted = true;
     std::vector<orc::ColumnVectorBatch*> batch_vec;
     _fill_batch_vec(batch_vec, &data, 0);
     std::vector<string> col_names;
@@ -1853,6 +1865,7 @@ Status OrcReader::filter(orc::ColumnVectorBatch& data, uint16_t* sel, uint16_t s
             block->get_by_name(col.first).column->assume_mutable()->clear();
         }
         Block::erase_useless_column(block, origin_column_num);
+        static_cast<void>(_convert_dict_cols_to_string_cols(block, nullptr));
     }
 
     uint16_t new_size = 0;
@@ -2179,52 +2192,56 @@ Status OrcReader::_rewrite_dict_conjuncts(std::vector<int32_t>& dict_codes, int
 
 Status OrcReader::_convert_dict_cols_to_string_cols(
         Block* block, const std::vector<orc::ColumnVectorBatch*>* batch_vec) {
-    if (!_is_dict_cols_converted) {
+    if (!_dict_cols_has_converted) {
         return Status::OK();
     }
-    for (auto& dict_filter_cols : _dict_filter_cols) {
-        size_t pos = block->get_position_by_name(dict_filter_cols.first);
-        ColumnWithTypeAndName& column_with_type_and_name = block->get_by_position(pos);
-        const ColumnPtr& column = column_with_type_and_name.column;
-        auto orc_col_idx = _colname_to_idx.find(dict_filter_cols.first);
-        if (orc_col_idx == _colname_to_idx.end()) {
-            return Status::InternalError("Wrong read column '{}' in orc file",
-                                         dict_filter_cols.first);
-        }
-        if (auto* nullable_column = check_and_get_column<ColumnNullable>(*column)) {
-            const ColumnPtr& nested_column = nullable_column->get_nested_column_ptr();
-            const ColumnInt32* dict_column = assert_cast<const ColumnInt32*>(nested_column.get());
-            DCHECK(dict_column);
-            const NullMap& null_map = nullable_column->get_null_map_data();
-
-            MutableColumnPtr string_column;
-            if (batch_vec != nullptr) {
-                string_column = _convert_dict_column_to_string_column(
-                        dict_column, &null_map, (*batch_vec)[orc_col_idx->second],
-                        _col_orc_type[orc_col_idx->second]);
-            } else {
-                string_column = ColumnString::create();
-            }
+    if (!_dict_filter_cols.empty()) {
+        for (auto& dict_filter_cols : _dict_filter_cols) {
+            size_t pos = block->get_position_by_name(dict_filter_cols.first);
+            ColumnWithTypeAndName& column_with_type_and_name = block->get_by_position(pos);
+            const ColumnPtr& column = column_with_type_and_name.column;
+            auto orc_col_idx = _colname_to_idx.find(dict_filter_cols.first);
+            if (orc_col_idx == _colname_to_idx.end()) {
+                return Status::InternalError("Wrong read column '{}' in orc file",
+                                             dict_filter_cols.first);
+            }
+            if (auto* nullable_column = check_and_get_column<ColumnNullable>(*column)) {
+                const ColumnPtr& nested_column = nullable_column->get_nested_column_ptr();
+                const ColumnInt32* dict_column =
+                        assert_cast<const ColumnInt32*>(nested_column.get());
+                DCHECK(dict_column);
+                const NullMap& null_map = nullable_column->get_null_map_data();
+
+                MutableColumnPtr string_column;
+                if (batch_vec != nullptr) {
+                    string_column = _convert_dict_column_to_string_column(
+                            dict_column, &null_map, (*batch_vec)[orc_col_idx->second],
+                            _col_orc_type[orc_col_idx->second]);
+                } else {
+                    string_column = ColumnString::create();
+                }
 
-            column_with_type_and_name.type =
-                    std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
-            block->replace_by_position(
-                    pos, ColumnNullable::create(std::move(string_column),
-                                                nullable_column->get_null_map_column_ptr()));
-        } else {
-            const ColumnInt32* dict_column = assert_cast<const ColumnInt32*>(column.get());
-            MutableColumnPtr string_column;
-            if (batch_vec != nullptr) {
-                string_column = _convert_dict_column_to_string_column(
-                        dict_column, nullptr, (*batch_vec)[orc_col_idx->second],
-                        _col_orc_type[orc_col_idx->second]);
+                column_with_type_and_name.type =
+                        std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+                block->replace_by_position(
+                        pos, ColumnNullable::create(std::move(string_column),
+                                                    nullable_column->get_null_map_column_ptr()));
             } else {
-                string_column = ColumnString::create();
-            }
+                const ColumnInt32* dict_column = assert_cast<const ColumnInt32*>(column.get());
+                MutableColumnPtr string_column;
+                if (batch_vec != nullptr) {
+                    string_column = _convert_dict_column_to_string_column(
+                            dict_column, nullptr, (*batch_vec)[orc_col_idx->second],
+                            _col_orc_type[orc_col_idx->second]);
+                } else {
+                    string_column = ColumnString::create();
+                }
 
-            column_with_type_and_name.type = std::make_shared<DataTypeString>();
-            block->replace_by_position(pos, std::move(string_column));
+                column_with_type_and_name.type = std::make_shared<DataTypeString>();
+                block->replace_by_position(pos, std::move(string_column));
+            }
         }
+        _dict_cols_has_converted = false;
     }
     return Status::OK();
 }
diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h
index 1ab0180d3178d9..0b07f147c4fd80 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.h
+++ b/be/src/vec/exec/format/orc/vorc_reader.h
@@ -580,7 +580,7 @@ class OrcReader : public GenericReader {
     std::vector<std::pair<std::string, int>> _dict_filter_cols;
     std::shared_ptr<ObjectPool> _obj_pool;
     std::unique_ptr<orc::StringDictFilter> _string_dict_filter;
-    bool _is_dict_cols_converted;
+    bool _dict_cols_has_converted = false;
     bool _has_complex_type = false;
     std::vector<orc::TypeKind>* _unsupported_pushdown_types;
 
diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
index 0ab4a204b84ce0..386077ed54b21d 100644
--- a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
+++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
@@ -1922,6 +1922,26 @@ TBLPROPERTIES (
 
 msck repair table fixed_length_byte_array_decimal_table;
 
+CREATE TABLE `string_col_dict_plain_mixed_orc`(
+  `col0` int,
+  `col1` string,
+  `col2` double,
+  `col3` boolean,
+  `col4` string,
+  `col5` int)
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
+LOCATION
+  '/user/doris/preinstalled_data/orc_table/string_col_dict_plain_mixed_orc'
+TBLPROPERTIES (
+  'orc.compress'='ZLIB');
+
+msck repair table string_col_dict_plain_mixed_orc;
+
 show tables;
 
 
diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/string_col_dict_plain_mixed_orc/string_col_dict_plain_mixed_orc.orc b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/string_col_dict_plain_mixed_orc/string_col_dict_plain_mixed_orc.orc
new file mode 100644
index 0000000000000000000000000000000000000000..6df616923f243a37d5a575856f855aa065cc8625
GIT binary patch
literal 13875
zcmeHNc~DdL7XBp!5?M4L0xp<XSpuR2OO*&h!VZDRW}iSsprEoSsGva}kRWbQ7Bv*b
zabwX@pHN&XXp{s*SyTk8VtqjbB2}ye!Yf3PyqiSe#=&Waseg3l&U}-c+{qv3{LXjp
zeD^zNm7mj00QgdX=1W;Pv@rl!HC`YVx`G4zT{ytgp9U<@U}bBQFR;?Yv;^R23ngdl
z+}Svyv?xBe>h8Vr?Z1zXyc?D5Yp#AI;0EuwBS_jI5bS6c++0L>dD&*keGMuu9lwW~
zagg*Xj_Sd29)qJ9XQ2F~uIDKQG&$_0K6~OYktgMmTBGw3(#*&ruXPq5BMZ*%i#~dd
zj)AEJMdbRFG&iz1mXm$)!(CZ4kF#s5&-6wbg@hGW>T!0plCt3_V)=HESi0`&B-J0$
zUhLB>|F-wdZS5M8{1Ly@#np$`-Lu><WPx3*e_?Q_DvA<a6-|k$ilM|-#Zop^ZK7<>
zmp(~qN@-#@?QYuNl-+coSHBen7C*Gzf+6EoY35kk&Y%y>!|B??!v!jd)`1)RKlBXs
zy?-^!Fla^MtH;GdiOYoE?cL*8+};VtruLU`7`DHFV{N+-hLn(B#jVE+{@`BjDP3v#
zds0+0Jy~{%dF)9N94~h3hcG{WENAY5<6qoP$-=rtA^n+9G^?3##4ha?q2Fypd-AVt
z!UI{YR*4v*t_y9^A{0}EB2?g21Er>_{({Zdf`??a&b}V?<W$R9I{oT=Av}91hGh>`
z@>dH`N_qKMDe|J0Cmn+0Dl-cXSyAQ8vHBznk6ulhKu_-Q!qI&Wos)l!s63XH+Tg89
z*2VAxaQvpZmPvVNZR+SdoihLDuOIJ7la7q0+)ExDy&Cke%<0UJt_AkxzlzWGuP7J8
zp%y2C!z6A14*9u{rzon?u;hOH2<Fzo@r-*94qI+59M`!`;}E+Edg=&cWEkhi=Szi-
zp&lzI@kC?oML?;sy|Tsz&Jye#yqR-)EqyOTsMX#9VVuW`08V8nh7nI+nb@0@iEXGv
zY@tAz(Bm~^D--+JSbvW@_{r%zo6DaI;m=6?$qoBFMQ4UT8BLIGJ(*+D8~ewI<iq>a
zXiGuRoRvG*orkzQ&LUWQH2Or-l`ycRZvt;8y)lm$-0tPxheMVv3`e;QaKQTy#ftum
z-Im3PWoRatLHF&SvG|i(({2dE)LlEe)=mGVCE}R(yXL;<L)*Sic-?xzt=zTuO5wl+
zA>ogd@eV*w8RZIkVd=|DQL5mmGKqtgN&HNi#I`UJ%h@cd5xb2Yxm{WbQ;EXGBfg`g
zG>$_fL6|$>CV_rRuc5#<iU+Z?>_L3~QNdm_19om&;Ob}>anb$LZ%#a~?%Uv25q94D
zS?twX#&wJG0dqJklN#tD0|yJZ-NsND!@Ug$yl+qp?;8}u`v%36wr=bOPPsu6rkj>A
zI}e(cC3O~=mhJ4kXj+!sSz~(qjk#}ULw`4Os~zR|yj3}-cMf$mFlN@pFzl*cI+z1+
zC@#gY(kbU5EtNB0Yi~;expM#o{wdd00Z5T*%XNlKYXAZOw1|kPgs8a9VX?%Bu!Jxy
z0IonJ|4RV$mjG}YfR$M^bru!p;%4Z*KHUgcqH!hq3A^Y};^kZHWm1W>)<j*+ez~vp
z;05oNmJ`M^2J~xKt9y)+XZXe6o3XuNEw_AiR=)cW-SxrJDBMzmooBkZ_HeBO%V!z@
z0g>Ah<VjHfDTn&GJg0Ejh0tMVR%r2vni#9wEaGmL$O@lx3A1gAg;u_W4zEUo%1R>K
zf`52SQ`0cu|5x4anbUlunP>ies!imBUvlC?Zon?J*ulus=dsmV?F~JN)$n6RBqbs#
zCsUS)q(meoA}JNJUrA3w@^V5?LfR6Mlw%T0>AeV%lyX&xNJ@pQL?q=Gd_O=WCE`-b
z?Mk>s3B`Yl$%nX<3Kte}DJM5%5tkBiDG`@aW>+FE<rj59r=W5WmvX9B^b`&oaVe3(
zH!}E!PracSKJ|v;vB4-JDOXx5O$|St_#=aFWblm)z7a|JWe4k1P&tUCL?k7$ONs1K
zBD<8ZbxQamB^1LKDWP~`bxpR;itJJ<mTi$;O8HXdqzie-E~Vn-2-&4Xb}5lv%Fi#-
zPC>u_-7cjk3UmeW>+2d@c-lOkF(}Efp%`FB`{~vrKVdWLfeXjEs?E_ai>|AgVZ*yz
zT3UK5f5plF==gl=S?GWBe~BH_DGV&lz-b^@!#$!@#YbzoPqv<h{>(P?+Pp2%-;Ly$
znH@1cB1zg$Q8(Lfyx;LRw2l_RfZZsey?xPv)5aaxxQ3^$yC&n#*>K_|N!1!ELl=EW
zM~jGkAUII`!b9Lr&{|cv#}v3id3J~5hwoUnh-&RwZ2F#Jwvz`ZYq8OlN@qTGpNdy-
zP>|<$XH^=_QIVx36k+f>?AyGq1bl{-QS}v;^S3L&qj;4^8!O_izKI8vYs9W!Du)gI
z{g%>~ip4tHbp3SbI=dgJ8qKjMqw~co`5If|GjJL1))t|v>K5VM;i{RaKs3>e5pgDy
zefMQgIEvzn^YXN2&7<ycE8}(5w}^}=ttbW>bcO02KCz^(mT$o$T6y~{@PhKpEa<vH
z%(Fe_u>jQEFiHO^*<H(Jd#Oq0n}8VIrw%`pE?SQGMl@qj)xTxX>dlYDi*OZIORX;b
zbkU>2sHew(<>5q*!kA?9OQs#RULE=%lpN~Q*%7+uQrioQWj0x;EQ^4>dJg(AA>o_P
zvGKuy;f$zrC@&t~Japp&d??gfJf0V7j4R2gv{AG+cDNa>dr%GPNPI;`iCzJ23(H-V
zPN(lTwtdPz=t=c3i(*I~KVjQ;+ZbZPG(8?#<170>K&@6laa~Ov=4YyYMByC$>hC>0
z*L$o_4GGV2^bGeX!e3)K*QzWyxivK}*~Z2Z<7hNz`e6*&hRh_J^v`}B_<HtlYVRp(
inB?KT0N~D{@hAfp?khCVB;7}o)zSx5R95*p8T=nocdn)Y

literal 0
HcmV?d00001

diff --git a/regression-test/data/external_table_p0/hive/test_hive_orc.out b/regression-test/data/external_table_p0/hive/test_hive_orc.out
index aa4ed6a1cbf155..066c5d4b4d3b5f 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_orc.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_orc.out
@@ -101,6 +101,15 @@ tablets	tinyint_col	179	182	182	187	183	181	177	183	177	187	183	202	202	186	528
 4	0.0000	0.000000	123.123456789876	12	0E-9	123
 5	1.1234	12.123456	0E-12	0	1234.123456789	0
 
+-- !string_col_dict_plain_mixed1 --
+0
+
+-- !string_col_dict_plain_mixed2 --
+0
+
+-- !string_col_dict_plain_mixed3 --
+10240
+
 -- !select_top50 --
 4	55	999742610	400899305488827731	false	6.5976813E8	7.8723304616937395E17	\N	base tennis pit vertical friday	2022-08-19T07:29:58	\N	tablets	smallint_col	2019-02-07	[7.53124931825377e+17]	["NbSSBtwzpxNSkkwga"]	tablets	smallint_col
 2	49	999613702	105493714032727452	\N	6.3322381E8	9.8642324410240179E17	Unveil bright recruit participate. Suspect impression camera mathematical revelation. Fault live2 elbow debt west hydrogen current.	how literary	2022-09-03T17:20:21	481707.1065	tablets	boolean_col	2020-01-12	[]	["HoMrAnn", "wteEFvIwoZsVpVQdscMb", null, "zcGFmv", "kGEBBckbMtX", "hrEtCGFdPWZK"]	tablets	boolean_col
@@ -255,6 +264,15 @@ tablets	tinyint_col	179	182	182	187	183	181	177	183	177	187	183	202	202	186	528
 4	0.0000	0.000000	123.123456789876	12	0E-9	123
 5	1.1234	12.123456	0E-12	0	1234.123456789	0
 
+-- !string_col_dict_plain_mixed1 --
+0
+
+-- !string_col_dict_plain_mixed2 --
+0
+
+-- !string_col_dict_plain_mixed3 --
+10240
+
 -- !select_top50 --
 4	55	999742610	400899305488827731	false	6.5976813E8	7.8723304616937395E17	\N	base tennis pit vertical friday	2022-08-19T07:29:58	\N	tablets	smallint_col	2019-02-07	[7.53124931825377e+17]	["NbSSBtwzpxNSkkwga"]	tablets	smallint_col
 2	49	999613702	105493714032727452	\N	6.3322381E8	9.8642324410240179E17	Unveil bright recruit participate. Suspect impression camera mathematical revelation. Fault live2 elbow debt west hydrogen current.	how literary	2022-09-03T17:20:21	481707.1065	tablets	boolean_col	2020-01-12	[]	["HoMrAnn", "wteEFvIwoZsVpVQdscMb", null, "zcGFmv", "kGEBBckbMtX", "hrEtCGFdPWZK"]	tablets	boolean_col
diff --git a/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy b/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy
index eddc53212e59c2..0f837c0abd3088 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy
@@ -74,6 +74,13 @@ suite("test_hive_orc", "all_types,p0,external,hive,external_docker,external_dock
         qt_decimals4 """select * from orc_decimal_table where id > 3 order by id;"""
     }
 
+    // string col dict plain encoding mixed in different stripes
+    def string_col_dict_plain_mixed = {
+       qt_string_col_dict_plain_mixed1 """select count(col2) from string_col_dict_plain_mixed_orc where col4 = 'Additional data' and col1 like '%Test%' and col3 like '%2%';"""
+       qt_string_col_dict_plain_mixed2 """select count(col2) from string_col_dict_plain_mixed_orc where col4 = 'Additional data' and col3 like '%2%';"""
+       qt_string_col_dict_plain_mixed3 """select count(col2) from string_col_dict_plain_mixed_orc where col1 like '%Test%';"""
+    }
+
     String enabled = context.config.otherConfigs.get("enableHiveTest")
     if (enabled == null || !enabled.equalsIgnoreCase("true")) {
         logger.info("diable Hive test.")
@@ -100,6 +107,7 @@ suite("test_hive_orc", "all_types,p0,external,hive,external_docker,external_dock
             search_mix()
             only_partition_col()
             decimals()
+            string_col_dict_plain_mixed()
 
             sql """drop catalog if exists ${catalog_name}"""
 

From 471f2dbdc43b83f0a6ee74df1ce638dbb68fcf30 Mon Sep 17 00:00:00 2001
From: daidai <2017501503@qq.com>
Date: Sun, 28 Apr 2024 12:53:09 +0800
Subject: [PATCH 093/163] =?UTF-8?q?[feature](iceberg)support=20read=20iceb?=
 =?UTF-8?q?erg=20complex=20type=EF=BC=8Ciceberg.orc=20format=20and=20posit?=
 =?UTF-8?q?ion=20delete.=20(#33935)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1. support read iceberg complex type : struct map.
2. support read the ORC storage format of Iceberg tables
3. Supports reading of the iceberg table after schema change when using the orc storage format . In addition to the rename operation performed inside the struct. [iceberg schema change](https://iceberg.apache.org/docs/latest/evolution/#schema-evolution)
4. support iceberg position delete.

Related changes：
1. Add in orc_reader: perform filtering operation when the column to be filtered does not exist in the current orc file.
eg.
iceberg_table  :  a int, b string
add column ( c int ) to  iceberg_table.
sql : `select * from iceberg_table where c is not null.`
---
 be/src/vec/exec/format/orc/vorc_reader.cpp    |  46 +-
 be/src/vec/exec/format/orc/vorc_reader.h      |  19 +
 .../vec/exec/format/table/iceberg_reader.cpp  | 508 ++++++++++--------
 be/src/vec/exec/format/table/iceberg_reader.h | 136 ++++-
 be/src/vec/exec/scan/vfile_scanner.cpp        |  23 +-
 .../datasource/iceberg/IcebergUtils.java      |  15 +-
 .../iceberg/iceberg_complex_type.out          | 165 ++++++
 .../iceberg/iceberg_position_delete.out       | 196 +++++++
 .../iceberg/iceberg_schema_change.out         | 305 +++++++++++
 .../iceberg/iceberg_complex_type.groovy       |  92 ++++
 .../iceberg/iceberg_position_delete.groovy    | 195 +++++++
 .../iceberg/iceberg_schema_change.groovy      | 162 ++++++
 12 files changed, 1597 insertions(+), 265 deletions(-)
 create mode 100644 regression-test/data/external_table_p2/iceberg/iceberg_complex_type.out
 create mode 100644 regression-test/data/external_table_p2/iceberg/iceberg_position_delete.out
 create mode 100644 regression-test/data/external_table_p2/iceberg/iceberg_schema_change.out
 create mode 100644 regression-test/suites/external_table_p2/iceberg/iceberg_complex_type.groovy
 create mode 100644 regression-test/suites/external_table_p2/iceberg/iceberg_position_delete.groovy
 create mode 100644 regression-test/suites/external_table_p2/iceberg/iceberg_schema_change.groovy

diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 27d0334316ad2f..6909e1b3798ba9 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -305,6 +305,19 @@ Status OrcReader::get_parsed_schema(std::vector<std::string>* col_names,
     return Status::OK();
 }
 
+Status OrcReader::get_schema_col_name_attribute(std::vector<std::string>* col_names,
+                                                std::vector<uint64_t>* col_attributes,
+                                                std::string attribute) {
+    RETURN_IF_ERROR(_create_file_reader());
+    auto& root_type = _is_acid ? _remove_acid(_reader->getType()) : _reader->getType();
+    for (int i = 0; i < root_type.getSubtypeCount(); ++i) {
+        col_names->emplace_back(get_field_name_lower_case(&root_type, i));
+        col_attributes->emplace_back(
+                std::stol(root_type.getSubtype(i)->getAttributeValue(attribute)));
+    }
+    return Status::OK();
+}
+
 Status OrcReader::_init_read_columns() {
     auto& root_type = _reader->getType();
     std::vector<std::string> orc_cols;
@@ -722,7 +735,11 @@ Status OrcReader::set_fill_columns(
     std::unordered_map<std::string, std::pair<uint32_t, int>> predicate_columns;
     std::function<void(VExpr * expr)> visit_slot = [&](VExpr* expr) {
         if (VSlotRef* slot_ref = typeid_cast<VSlotRef*>(expr)) {
-            auto& expr_name = slot_ref->expr_name();
+            auto expr_name = slot_ref->expr_name();
+            auto iter = _table_col_to_file_col.find(expr_name);
+            if (iter != _table_col_to_file_col.end()) {
+                expr_name = iter->second;
+            }
             predicate_columns.emplace(expr_name,
                                       std::make_pair(slot_ref->column_id(), slot_ref->slot_id()));
             if (slot_ref->column_id() == 0) {
@@ -1587,6 +1604,7 @@ Status OrcReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
             *read_rows = 0;
             return Status::OK();
         }
+        _execute_filter_position_delete_rowids(*_filter);
 
         RETURN_IF_CATCH_EXCEPTION(Block::filter_block_internal(block, columns_to_filter, *_filter));
         if (!_not_single_slot_filter_conjuncts.empty()) {
@@ -1694,6 +1712,10 @@ Status OrcReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
             for (auto& conjunct : _non_dict_filter_conjuncts) {
                 filter_conjuncts.emplace_back(conjunct);
             }
+            //include missing_columns != missing_columns ; missing_column is null; missing_column != file_columns etc...
+            for (auto& [missing_col, conjunct] : _lazy_read_ctx.predicate_missing_columns) {
+                filter_conjuncts.emplace_back(conjunct);
+            }
             std::vector<IColumn::Filter*> filters;
             if (_delete_rows_filter_ptr) {
                 filters.push_back(_delete_rows_filter_ptr.get());
@@ -1710,6 +1732,7 @@ Status OrcReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
                 static_cast<void>(_convert_dict_cols_to_string_cols(block, &batch_vec));
                 return Status::OK();
             }
+            _execute_filter_position_delete_rowids(result_filter);
             RETURN_IF_CATCH_EXCEPTION(
                     Block::filter_block_internal(block, columns_to_filter, result_filter));
             if (!_not_single_slot_filter_conjuncts.empty()) {
@@ -1841,6 +1864,10 @@ Status OrcReader::filter(orc::ColumnVectorBatch& data, uint16_t* sel, uint16_t s
     for (auto& conjunct : _non_dict_filter_conjuncts) {
         filter_conjuncts.emplace_back(conjunct);
     }
+    //include missing_columns != missing_columns ; missing_column is null; missing_column != file_columns etc...
+    for (auto& [missing_col, conjunct] : _lazy_read_ctx.predicate_missing_columns) {
+        filter_conjuncts.emplace_back(conjunct);
+    }
     std::vector<IColumn::Filter*> filters;
     if (_delete_rows_filter_ptr) {
         filters.push_back(_delete_rows_filter_ptr.get());
@@ -1919,6 +1946,9 @@ bool OrcReader::_can_filter_by_dict(int slot_id) {
             break;
         }
     }
+    if (slot == nullptr) {
+        return false;
+    }
     if (!slot->type().is_string_type()) {
         return false;
     }
@@ -2369,5 +2399,19 @@ void ORCFileInputStream::_collect_profile_before_close() {
         _file_reader->collect_profile_before_close();
     }
 }
+void OrcReader::_execute_filter_position_delete_rowids(IColumn::Filter& filter) {
+    if (_position_delete_ordered_rowids == nullptr) {
+        return;
+    }
+    auto start = _row_reader->getRowNumber();
+    auto nums = _batch->numElements;
+    auto l = std::lower_bound(_position_delete_ordered_rowids->begin(),
+                              _position_delete_ordered_rowids->end(), start);
+    auto r = std::upper_bound(_position_delete_ordered_rowids->begin(),
+                              _position_delete_ordered_rowids->end(), start + nums - 1);
+    for (; l < r; l++) {
+        filter[*l - start] = 0;
+    }
+}
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h
index 0b07f147c4fd80..f5bb7004ca2153 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.h
+++ b/be/src/vec/exec/format/orc/vorc_reader.h
@@ -177,6 +177,19 @@ class OrcReader : public GenericReader {
     Status get_parsed_schema(std::vector<std::string>* col_names,
                              std::vector<TypeDescriptor>* col_types) override;
 
+    Status get_schema_col_name_attribute(std::vector<std::string>* col_names,
+                                         std::vector<uint64_t>* col_attributes,
+                                         std::string attribute);
+    void set_table_col_to_file_col(
+            std::unordered_map<std::string, std::string> table_col_to_file_col) {
+        _table_col_to_file_col = table_col_to_file_col;
+    }
+
+    void set_position_delete_rowids(vector<int64_t>* delete_rows) {
+        _position_delete_ordered_rowids = delete_rows;
+    }
+    void _execute_filter_position_delete_rowids(IColumn::Filter& filter);
+
     void set_delete_rows(const TransactionalHiveReader::AcidRowIDSet* delete_rows) {
         _delete_rows = delete_rows;
     }
@@ -246,6 +259,8 @@ class OrcReader : public GenericReader {
         OrcReader* _orc_reader = nullptr;
     };
 
+    //class RowFilter : public orc::RowReader
+
     // Create inner orc file,
     // return EOF if file is empty
     // return EROOR if encounter error.
@@ -586,6 +601,10 @@ class OrcReader : public GenericReader {
 
     // resolve schema change
     std::unordered_map<std::string, std::unique_ptr<converter::ColumnTypeConverter>> _converters;
+    //for iceberg table , when table column name != file column name
+    std::unordered_map<std::string, std::string> _table_col_to_file_col;
+    //support iceberg position delete .
+    std::vector<int64_t>* _position_delete_ordered_rowids = nullptr;
 };
 
 class ORCFileInputStream : public orc::InputStream, public ProfileCollector {
diff --git a/be/src/vec/exec/format/table/iceberg_reader.cpp b/be/src/vec/exec/format/table/iceberg_reader.cpp
index 8c05b8c2a08e70..7e5a5bf6db7df2 100644
--- a/be/src/vec/exec/format/table/iceberg_reader.cpp
+++ b/be/src/vec/exec/format/table/iceberg_reader.cpp
@@ -49,12 +49,10 @@
 #include "vec/common/string_ref.h"
 #include "vec/core/block.h"
 #include "vec/core/column_with_type_and_name.h"
-#include "vec/data_types/data_type.h"
 #include "vec/data_types/data_type_factory.hpp"
 #include "vec/exec/format/format_common.h"
 #include "vec/exec/format/generic_reader.h"
-#include "vec/exec/format/parquet/parquet_common.h"
-#include "vec/exec/format/parquet/vparquet_reader.h"
+#include "vec/exec/format/orc/vorc_reader.h"
 #include "vec/exec/format/table/table_format_reader.h"
 
 namespace cctz {
@@ -74,17 +72,6 @@ class VExprContext;
 } // namespace doris
 
 namespace doris::vectorized {
-
-using DeleteRows = std::vector<int64_t>;
-using DeleteFile = phmap::parallel_flat_hash_map<
-        std::string, std::unique_ptr<DeleteRows>, std::hash<std::string>,
-        std::equal_to<std::string>,
-        std::allocator<std::pair<const std::string, std::unique_ptr<DeleteRows>>>, 8, std::mutex>;
-
-const int64_t MIN_SUPPORT_DELETE_FILES_VERSION = 2;
-const std::string ICEBERG_ROW_POS = "pos";
-const std::string ICEBERG_FILE_PATH = "file_path";
-
 IcebergTableReader::IcebergTableReader(std::unique_ptr<GenericReader> file_format_reader,
                                        RuntimeProfile* profile, RuntimeState* state,
                                        const TFileScanRangeParams& params,
@@ -110,33 +97,6 @@ IcebergTableReader::IcebergTableReader(std::unique_ptr<GenericReader> file_forma
             ADD_CHILD_TIMER(_profile, "DeleteRowsSortTime", iceberg_profile);
 }
 
-Status IcebergTableReader::init_reader(
-        const std::vector<std::string>& file_col_names,
-        const std::unordered_map<int, std::string>& col_id_name_map,
-        std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
-        const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
-        const RowDescriptor* row_descriptor,
-        const std::unordered_map<std::string, int>* colname_to_slot_id,
-        const VExprContextSPtrs* not_single_slot_filter_conjuncts,
-        const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts) {
-    ParquetReader* parquet_reader = static_cast<ParquetReader*>(_file_format_reader.get());
-    _col_id_name_map = col_id_name_map;
-    _file_col_names = file_col_names;
-    _colname_to_value_range = colname_to_value_range;
-    auto parquet_meta_kv = parquet_reader->get_metadata_key_values();
-    static_cast<void>(_gen_col_name_maps(parquet_meta_kv));
-    _gen_file_col_names();
-    _gen_new_colname_to_value_range();
-    parquet_reader->set_table_to_file_col_map(_table_col_to_file_col);
-    parquet_reader->iceberg_sanitize(_all_required_col_names);
-    Status status = parquet_reader->init_reader(
-            _all_required_col_names, _not_in_file_col_names, &_new_colname_to_value_range,
-            conjuncts, tuple_descriptor, row_descriptor, colname_to_slot_id,
-            not_single_slot_filter_conjuncts, slot_id_to_filter_conjuncts);
-
-    return status;
-}
-
 Status IcebergTableReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
     // already get rows from be
     if (_push_down_agg_type == TPushAggOp::type::COUNT && _remaining_push_down_count > 0) {
@@ -220,18 +180,17 @@ Status IcebergTableReader::init_row_filters(const TFileRangeDesc& range) {
     }
 
     if (delete_file_type == POSITION_DELETE) {
-        RETURN_IF_ERROR(_position_delete(files));
+        RETURN_IF_ERROR(_position_delete_base(files));
+    } else if (delete_file_type == EQUALITY_DELETE) {
+        // todo: equality delete
+        //       If it is a count operation and it has equality delete file kind,
+        //       the push down operation of the count for this split needs to be canceled.
+        return Status::NotSupported("NOT SUPPORT EQUALITY_DELETE!");
     }
-
-    // todo: equality delete
-    //       If it is a count operation and it has equality delete file kind,
-    //       the push down operation of the count for this split needs to be canceled.
-
     COUNTER_UPDATE(_iceberg_profile.num_delete_files, files.size());
     return Status::OK();
 }
-
-Status IcebergTableReader::_position_delete(
+Status IcebergTableReader::_position_delete_base(
         const std::vector<TIcebergDeleteFileDesc>& delete_files) {
     std::string data_file_path = _range.path;
     // the path in _range is remove the namenode prefix,
@@ -243,133 +202,31 @@ Status IcebergTableReader::_position_delete(
         }
     }
 
-    // position delete
-    ParquetReader* parquet_reader = (ParquetReader*)(_file_format_reader.get());
-    RowRange whole_range = parquet_reader->get_whole_range();
-    bool init_schema = false;
-    std::vector<std::string> delete_file_col_names;
-    std::vector<TypeDescriptor> delete_file_col_types;
     std::vector<DeleteRows*> delete_rows_array;
     int64_t num_delete_rows = 0;
     std::vector<DeleteFile*> erase_data;
     for (auto& delete_file : delete_files) {
-        if (whole_range.last_row <= delete_file.position_lower_bound ||
-            whole_range.first_row > delete_file.position_upper_bound) {
-            continue;
-        }
-
         SCOPED_TIMER(_iceberg_profile.delete_files_read_time);
         Status create_status = Status::OK();
-        DeleteFile* delete_file_cache = _kv_cache->get<
-                DeleteFile>(_delet_file_cache_key(delete_file.path), [&]() -> DeleteFile* {
-            TFileRangeDesc delete_range;
-            // must use __set() method to make sure __isset is true
-            delete_range.__set_fs_name(_range.fs_name);
-            delete_range.path = delete_file.path;
-            delete_range.start_offset = 0;
-            delete_range.size = -1;
-            delete_range.file_size = -1;
-            ParquetReader delete_reader(_profile, _params, delete_range, 102400,
-                                        const_cast<cctz::time_zone*>(&_state->timezone_obj()),
-                                        _io_ctx, _state);
-            if (!init_schema) {
-                static_cast<void>(delete_reader.get_parsed_schema(&delete_file_col_names,
-                                                                  &delete_file_col_types));
-                init_schema = true;
-            }
-            create_status = delete_reader.open();
-            if (!create_status.ok()) {
-                return nullptr;
-            }
-            create_status = delete_reader.init_reader(delete_file_col_names, _not_in_file_col_names,
-                                                      nullptr, {}, nullptr, nullptr, nullptr,
-                                                      nullptr, nullptr, false);
-            if (!create_status.ok()) {
-                return nullptr;
-            }
-
-            std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
-                    partition_columns;
-            std::unordered_map<std::string, VExprContextSPtr> missing_columns;
-            static_cast<void>(delete_reader.set_fill_columns(partition_columns, missing_columns));
-
-            bool dictionary_coded = true;
-            const tparquet::FileMetaData* meta_data = delete_reader.get_meta_data();
-            for (int i = 0; i < delete_file_col_names.size(); ++i) {
-                if (delete_file_col_names[i] == ICEBERG_FILE_PATH) {
-                    for (int j = 0; j < meta_data->row_groups.size(); ++j) {
-                        auto& column_chunk = meta_data->row_groups[j].columns[i];
-                        if (!(column_chunk.__isset.meta_data &&
-                              column_chunk.meta_data.__isset.dictionary_page_offset)) {
-                            dictionary_coded = false;
-                            break;
-                        }
+        auto* delete_file_cache = _kv_cache->get<DeleteFile>(
+                _delet_file_cache_key(delete_file.path), [&]() -> DeleteFile* {
+                    auto* position_delete = new DeleteFile;
+                    TFileRangeDesc delete_file_range;
+                    // must use __set() method to make sure __isset is true
+                    delete_file_range.__set_fs_name(_range.fs_name);
+                    delete_file_range.path = delete_file.path;
+                    delete_file_range.start_offset = 0;
+                    delete_file_range.size = -1;
+                    delete_file_range.file_size = -1;
+                    //read position delete file base on delete_file_range , generate DeleteFile , add DeleteFile to kv_cache
+                    create_status = _read_position_delete_file(&delete_file_range, position_delete);
+
+                    if (!create_status) {
+                        return nullptr;
                     }
-                    break;
-                }
-            }
 
-            DeleteFile* position_delete = new DeleteFile;
-            bool eof = false;
-            while (!eof) {
-                Block block = Block();
-                for (int i = 0; i < delete_file_col_names.size(); ++i) {
-                    DataTypePtr data_type = DataTypeFactory::instance().create_data_type(
-                            delete_file_col_types[i], false);
-                    if (delete_file_col_names[i] == ICEBERG_FILE_PATH && dictionary_coded) {
-                        // the dictionary data in ColumnDictI32 is referenced by StringValue, it does keep
-                        // the dictionary data in its life circle, so the upper caller should keep the
-                        // dictionary data alive after ColumnDictI32.
-                        MutableColumnPtr dict_column = ColumnDictI32::create();
-                        block.insert(ColumnWithTypeAndName(std::move(dict_column), data_type,
-                                                           delete_file_col_names[i]));
-                    } else {
-                        MutableColumnPtr data_column = data_type->create_column();
-                        block.insert(ColumnWithTypeAndName(std::move(data_column), data_type,
-                                                           delete_file_col_names[i]));
-                    }
-                }
-                eof = false;
-                size_t read_rows = 0;
-                create_status = delete_reader.get_next_block(&block, &read_rows, &eof);
-                if (!create_status.ok()) {
-                    return nullptr;
-                }
-                if (read_rows > 0) {
-                    ColumnPtr path_column = block.get_by_name(ICEBERG_FILE_PATH).column;
-                    DCHECK_EQ(path_column->size(), read_rows);
-                    ColumnPtr pos_column = block.get_by_name(ICEBERG_ROW_POS).column;
-                    using ColumnType = typename PrimitiveTypeTraits<TYPE_BIGINT>::ColumnType;
-                    const int64_t* src_data =
-                            assert_cast<const ColumnType&>(*pos_column).get_data().data();
-                    IcebergTableReader::PositionDeleteRange range;
-                    if (dictionary_coded) {
-                        range = _get_range(assert_cast<const ColumnDictI32&>(*path_column));
-                    } else {
-                        range = _get_range(assert_cast<const ColumnString&>(*path_column));
-                    }
-                    for (int i = 0; i < range.range.size(); ++i) {
-                        std::string key = range.data_file_path[i];
-                        auto iter = position_delete->find(key);
-                        DeleteRows* delete_rows;
-                        if (iter == position_delete->end()) {
-                            delete_rows = new DeleteRows;
-                            std::unique_ptr<DeleteRows> delete_rows_ptr(delete_rows);
-                            (*position_delete)[key] = std::move(delete_rows_ptr);
-                        } else {
-                            delete_rows = iter->second.get();
-                        }
-                        const int64_t* cpy_start = src_data + range.range[i].first;
-                        const int64_t cpy_count = range.range[i].second - range.range[i].first;
-                        int64_t origin_size = delete_rows->size();
-                        delete_rows->resize(origin_size + cpy_count);
-                        int64_t* dest_position = &(*delete_rows)[origin_size];
-                        memcpy(dest_position, cpy_start, cpy_count * sizeof(int64_t));
-                    }
-                }
-            }
-            return position_delete;
-        });
+                    return position_delete;
+                });
         if (create_status.is<ErrorCode::END_OF_FILE>()) {
             continue;
         } else if (!create_status.ok()) {
@@ -382,10 +239,7 @@ Status IcebergTableReader::_position_delete(
             if (row_ids->size() > 0) {
                 delete_rows_array.emplace_back(row_ids);
                 num_delete_rows += row_ids->size();
-                if (row_ids->front() >= whole_range.first_row &&
-                    row_ids->back() < whole_range.last_row) {
-                    erase_data.emplace_back(delete_file_cache);
-                }
+                erase_data.emplace_back(delete_file_cache);
             }
         };
         delete_file_map.if_contains(data_file_path, get_value);
@@ -393,7 +247,7 @@ Status IcebergTableReader::_position_delete(
     if (num_delete_rows > 0) {
         SCOPED_TIMER(_iceberg_profile.delete_rows_sort_time);
         _sort_delete_rows(delete_rows_array, num_delete_rows);
-        parquet_reader->set_delete_rows(&_delete_rows);
+        this->set_delete_rows();
         COUNTER_UPDATE(_iceberg_profile.num_delete_rows, num_delete_rows);
     }
     // the deleted rows are copy out, we can erase them.
@@ -428,9 +282,9 @@ IcebergTableReader::PositionDeleteRange IcebergTableReader::_get_range(
     int index = 0;
     while (index < read_rows) {
         StringRef data_path = file_path_column.get_data_at(index);
-        int left = index;
-        int right = read_rows - 1;
-        while (left < right) {
+        int left = index - 1;
+        int right = read_rows;
+        while (left + 1 != right) {
             int mid = left + (right - left) / 2;
             if (file_path_column.get_data_at(mid) > data_path) {
                 right = mid;
@@ -451,23 +305,23 @@ void IcebergTableReader::_sort_delete_rows(std::vector<std::vector<int64_t>*>& d
         return;
     }
     if (delete_rows_array.size() == 1) {
-        _delete_rows.resize(num_delete_rows);
-        memcpy(&_delete_rows[0], &((*delete_rows_array.front())[0]),
+        _iceberg_delete_rows.resize(num_delete_rows);
+        memcpy(&_iceberg_delete_rows[0], &((*delete_rows_array.front())[0]),
                sizeof(int64_t) * num_delete_rows);
         return;
     }
     if (delete_rows_array.size() == 2) {
-        _delete_rows.resize(num_delete_rows);
+        _iceberg_delete_rows.resize(num_delete_rows);
         std::merge(delete_rows_array.front()->begin(), delete_rows_array.front()->end(),
                    delete_rows_array.back()->begin(), delete_rows_array.back()->end(),
-                   _delete_rows.begin());
+                   _iceberg_delete_rows.begin());
         return;
     }
 
     using vec_pair = std::pair<std::vector<int64_t>::iterator, std::vector<int64_t>::iterator>;
-    _delete_rows.resize(num_delete_rows);
-    auto row_id_iter = _delete_rows.begin();
-    auto iter_end = _delete_rows.end();
+    _iceberg_delete_rows.resize(num_delete_rows);
+    auto row_id_iter = _iceberg_delete_rows.begin();
+    auto iter_end = _iceberg_delete_rows.end();
     std::vector<vec_pair> rows_array;
     for (auto rows : delete_rows_array) {
         if (rows->size() > 0) {
@@ -493,6 +347,222 @@ void IcebergTableReader::_sort_delete_rows(std::vector<std::vector<int64_t>*>& d
     }
 }
 
+/*
+ * Generate _all_required_col_names and _not_in_file_col_names.
+ *
+ * _all_required_col_names is all the columns required by user sql.
+ * If the column name has been modified after the data file was written,
+ * put the old name in data file to _all_required_col_names.
+ *
+ * _not_in_file_col_names is all the columns required by user sql but not in the data file.
+ * e.g. New columns added after this data file was written.
+ * The columns added with names used by old dropped columns should consider as a missing column,
+ * which should be in _not_in_file_col_names.
+ */
+void IcebergTableReader::_gen_file_col_names() {
+    _all_required_col_names.clear();
+    _not_in_file_col_names.clear();
+    for (int i = 0; i < _file_col_names.size(); ++i) {
+        auto name = _file_col_names[i];
+        auto iter = _table_col_to_file_col.find(name);
+        if (iter == _table_col_to_file_col.end()) {
+            // If the user creates the iceberg table, directly append the parquet file that already exists,
+            // there is no 'iceberg.schema' field in the footer of parquet, the '_table_col_to_file_col' may be empty.
+            // Because we are ignoring case, so, it is converted to lowercase here
+            auto name_low = to_lower(name);
+            _all_required_col_names.emplace_back(name_low);
+            if (_has_iceberg_schema) {
+                _not_in_file_col_names.emplace_back(name);
+            } else {
+                _table_col_to_file_col.emplace(name, name_low);
+                _file_col_to_table_col.emplace(name_low, name);
+                if (name != name_low) {
+                    _has_schema_change = true;
+                }
+            }
+        } else {
+            _all_required_col_names.emplace_back(iter->second);
+        }
+    }
+}
+
+/*
+ * Generate _new_colname_to_value_range, by replacing the column name in
+ * _colname_to_value_range with column name in data file.
+ */
+void IcebergTableReader::_gen_new_colname_to_value_range() {
+    for (auto it = _colname_to_value_range->begin(); it != _colname_to_value_range->end(); it++) {
+        auto iter = _table_col_to_file_col.find(it->first);
+        if (iter == _table_col_to_file_col.end()) {
+            _new_colname_to_value_range.emplace(it->first, it->second);
+        } else {
+            _new_colname_to_value_range.emplace(iter->second, it->second);
+        }
+    }
+}
+
+void IcebergTableReader::_gen_position_delete_file_range(Block& block, DeleteFile* position_delete,
+                                                         size_t read_rows,
+                                                         bool file_path_column_dictionary_coded) {
+    ColumnPtr path_column = block.get_by_name(ICEBERG_FILE_PATH).column;
+    DCHECK_EQ(path_column->size(), read_rows);
+    ColumnPtr pos_column = block.get_by_name(ICEBERG_ROW_POS).column;
+    using ColumnType = typename PrimitiveTypeTraits<TYPE_BIGINT>::ColumnType;
+    const int64_t* src_data = assert_cast<const ColumnType&>(*pos_column).get_data().data();
+    IcebergTableReader::PositionDeleteRange range;
+    if (file_path_column_dictionary_coded) {
+        range = _get_range(assert_cast<const ColumnDictI32&>(*path_column));
+    } else {
+        range = _get_range(assert_cast<const ColumnString&>(*path_column));
+    }
+    for (int i = 0; i < range.range.size(); ++i) {
+        std::string key = range.data_file_path[i];
+        auto iter = position_delete->find(key);
+        DeleteRows* delete_rows;
+        if (iter == position_delete->end()) {
+            delete_rows = new DeleteRows;
+            std::unique_ptr<DeleteRows> delete_rows_ptr(delete_rows);
+            (*position_delete)[key] = std::move(delete_rows_ptr);
+        } else {
+            delete_rows = iter->second.get();
+        }
+        const int64_t* cpy_start = src_data + range.range[i].first;
+        const int64_t cpy_count = range.range[i].second - range.range[i].first;
+        int64_t origin_size = delete_rows->size();
+        delete_rows->resize(origin_size + cpy_count);
+        int64_t* dest_position = &(*delete_rows)[origin_size];
+        memcpy(dest_position, cpy_start, cpy_count * sizeof(int64_t));
+    }
+}
+
+Status IcebergParquetReader::init_reader(
+        const std::vector<std::string>& file_col_names,
+        const std::unordered_map<int, std::string>& col_id_name_map,
+        std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
+        const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
+        const RowDescriptor* row_descriptor,
+        const std::unordered_map<std::string, int>* colname_to_slot_id,
+        const VExprContextSPtrs* not_single_slot_filter_conjuncts,
+        const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts) {
+    _file_format = Fileformat::PARQUET;
+    ParquetReader* parquet_reader = static_cast<ParquetReader*>(_file_format_reader.get());
+    _col_id_name_map = col_id_name_map;
+    _file_col_names = file_col_names;
+    _colname_to_value_range = colname_to_value_range;
+    auto parquet_meta_kv = parquet_reader->get_metadata_key_values();
+    static_cast<void>(_gen_col_name_maps(parquet_meta_kv));
+    _gen_file_col_names();
+    _gen_new_colname_to_value_range();
+    parquet_reader->set_table_to_file_col_map(_table_col_to_file_col);
+    parquet_reader->iceberg_sanitize(_all_required_col_names);
+    Status status = parquet_reader->init_reader(
+            _all_required_col_names, _not_in_file_col_names, &_new_colname_to_value_range,
+            conjuncts, tuple_descriptor, row_descriptor, colname_to_slot_id,
+            not_single_slot_filter_conjuncts, slot_id_to_filter_conjuncts);
+
+    return status;
+}
+
+Status IcebergParquetReader ::_read_position_delete_file(const TFileRangeDesc* delete_range,
+                                                         DeleteFile* position_delete) {
+    ParquetReader parquet_delete_reader(
+            _profile, _params, *delete_range, READ_DELETE_FILE_BATCH_SIZE,
+            const_cast<cctz::time_zone*>(&_state->timezone_obj()), _io_ctx, _state);
+
+    RETURN_IF_ERROR(parquet_delete_reader.open());
+    RETURN_IF_ERROR(parquet_delete_reader.init_reader(delete_file_col_names, {}, nullptr, {},
+                                                      nullptr, nullptr, nullptr, nullptr, nullptr,
+                                                      false));
+
+    std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
+            partition_columns;
+    std::unordered_map<std::string, VExprContextSPtr> missing_columns;
+    static_cast<void>(parquet_delete_reader.set_fill_columns(partition_columns, missing_columns));
+
+    const tparquet::FileMetaData* meta_data = parquet_delete_reader.get_meta_data();
+    bool dictionary_coded = true;
+    for (int j = 0; j < meta_data->row_groups.size(); ++j) {
+        auto& column_chunk = meta_data->row_groups[j].columns[ICEBERG_FILE_PATH_INDEX];
+        if (!(column_chunk.__isset.meta_data &&
+              column_chunk.meta_data.__isset.dictionary_page_offset)) {
+            dictionary_coded = false;
+            break;
+        }
+    }
+    DataTypePtr data_type_file_path {new DataTypeString};
+    DataTypePtr data_type_pos {new DataTypeInt64};
+    bool eof = false;
+    while (!eof) {
+        Block block = {dictionary_coded
+                               ? ColumnWithTypeAndName {ColumnDictI32::create(),
+                                                        data_type_file_path, ICEBERG_FILE_PATH}
+                               : ColumnWithTypeAndName {data_type_file_path, ICEBERG_FILE_PATH},
+
+                       {data_type_pos, ICEBERG_ROW_POS}};
+        size_t read_rows = 0;
+        RETURN_IF_ERROR(parquet_delete_reader.get_next_block(&block, &read_rows, &eof));
+
+        if (read_rows <= 0) {
+            break;
+        }
+        _gen_position_delete_file_range(block, position_delete, read_rows, dictionary_coded);
+    }
+    return Status::OK();
+};
+
+Status IcebergOrcReader::init_reader(
+        const std::vector<std::string>& file_col_names,
+        const std::unordered_map<int, std::string>& col_id_name_map,
+        std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
+        const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
+        const RowDescriptor* row_descriptor,
+        const std::unordered_map<std::string, int>* colname_to_slot_id,
+        const VExprContextSPtrs* not_single_slot_filter_conjuncts,
+        const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts) {
+    _file_format = Fileformat::ORC;
+    auto* orc_reader = static_cast<OrcReader*>(_file_format_reader.get());
+    _col_id_name_map = col_id_name_map;
+    _file_col_names = file_col_names;
+    _colname_to_value_range = colname_to_value_range;
+
+    RETURN_IF_ERROR(_gen_col_name_maps(orc_reader));
+    _gen_file_col_names();
+    _gen_new_colname_to_value_range();
+    orc_reader->set_table_col_to_file_col(_table_col_to_file_col);
+    Status status =
+            orc_reader->init_reader(&_all_required_col_names, &_new_colname_to_value_range,
+                                    conjuncts, false, tuple_descriptor, row_descriptor,
+                                    not_single_slot_filter_conjuncts, slot_id_to_filter_conjuncts);
+    return status;
+}
+
+Status IcebergOrcReader::_read_position_delete_file(const TFileRangeDesc* delete_range,
+                                                    DeleteFile* position_delete) {
+    OrcReader orc_delete_reader(_profile, _state, _params, *delete_range,
+                                READ_DELETE_FILE_BATCH_SIZE, _state->timezone(), _io_ctx);
+    std::unordered_map<std::string, ColumnValueRangeType> colname_to_value_range;
+    Status init_status = orc_delete_reader.init_reader(
+            &delete_file_col_names, &colname_to_value_range, {}, false, {}, {}, nullptr, nullptr);
+
+    std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
+            partition_columns;
+    std::unordered_map<std::string, VExprContextSPtr> missing_columns;
+    static_cast<void>(orc_delete_reader.set_fill_columns(partition_columns, missing_columns));
+
+    bool eof = false;
+    DataTypePtr data_type_file_path {new DataTypeString};
+    DataTypePtr data_type_pos {new DataTypeInt64};
+    while (!eof) {
+        Block block = {{data_type_file_path, ICEBERG_FILE_PATH}, {data_type_pos, ICEBERG_ROW_POS}};
+
+        size_t read_rows = 0;
+        RETURN_IF_ERROR(orc_delete_reader.get_next_block(&block, &read_rows, &eof));
+
+        _gen_position_delete_file_range(block, position_delete, read_rows, false);
+    }
+    return Status::OK();
+}
+
 /*
  * To support schema evolution, Iceberg write the column id to column name map to
  * parquet file key_value_metadata.
@@ -507,7 +577,7 @@ void IcebergTableReader::_sort_delete_rows(std::vector<std::vector<int64_t>*>& d
  * 1. col1_new -> col1
  * 2. col1 -> col1_new
  */
-Status IcebergTableReader::_gen_col_name_maps(std::vector<tparquet::KeyValue> parquet_meta_kv) {
+Status IcebergParquetReader::_gen_col_name_maps(std::vector<tparquet::KeyValue> parquet_meta_kv) {
     for (int i = 0; i < parquet_meta_kv.size(); ++i) {
         tparquet::KeyValue kv = parquet_meta_kv[i];
         if (kv.key == "iceberg.schema") {
@@ -545,58 +615,30 @@ Status IcebergTableReader::_gen_col_name_maps(std::vector<tparquet::KeyValue> pa
     return Status::OK();
 }
 
-/*
- * Generate _all_required_col_names and _not_in_file_col_names.
- *
- * _all_required_col_names is all the columns required by user sql.
- * If the column name has been modified after the data file was written,
- * put the old name in data file to _all_required_col_names.
- *
- * _not_in_file_col_names is all the columns required by user sql but not in the data file.
- * e.g. New columns added after this data file was written.
- * The columns added with names used by old dropped columns should consider as a missing column,
- * which should be in _not_in_file_col_names.
- */
-void IcebergTableReader::_gen_file_col_names() {
-    _all_required_col_names.clear();
-    _not_in_file_col_names.clear();
-    for (int i = 0; i < _file_col_names.size(); ++i) {
-        auto name = _file_col_names[i];
-        auto iter = _table_col_to_file_col.find(name);
-        if (iter == _table_col_to_file_col.end()) {
-            // If the user creates the iceberg table, directly append the parquet file that already exists,
-            // there is no 'iceberg.schema' field in the footer of parquet, the '_table_col_to_file_col' may be empty.
-            // Because we are ignoring case, so, it is converted to lowercase here
-            auto name_low = to_lower(name);
-            _all_required_col_names.emplace_back(name_low);
-            if (_has_iceberg_schema) {
-                _not_in_file_col_names.emplace_back(name);
-            } else {
-                _table_col_to_file_col.emplace(name, name_low);
-                _file_col_to_table_col.emplace(name_low, name);
-                if (name != name_low) {
-                    _has_schema_change = true;
-                }
-            }
-        } else {
-            _all_required_col_names.emplace_back(iter->second);
+Status IcebergOrcReader::_gen_col_name_maps(OrcReader* orc_reader) {
+    std::vector<std::string> col_names;
+    std::vector<uint64_t> col_ids;
+    RETURN_IF_ERROR(
+            orc_reader->get_schema_col_name_attribute(&col_names, &col_ids, ICEBERG_ORC_ATTRIBUTE));
+    _has_iceberg_schema = true;
+    _table_col_to_file_col.clear();
+    _file_col_to_table_col.clear();
+    for (size_t i = 0; i < col_ids.size(); i++) {
+        auto col_id = col_ids[i];
+        auto& file_col_name = col_names[i];
+
+        if (_col_id_name_map.find(col_id) == _col_id_name_map.end()) {
+            _has_schema_change = true;
+            continue;
         }
-    }
-}
-
-/*
- * Generate _new_colname_to_value_range, by replacing the column name in
- * _colname_to_value_range with column name in data file.
- */
-void IcebergTableReader::_gen_new_colname_to_value_range() {
-    for (auto it = _colname_to_value_range->begin(); it != _colname_to_value_range->end(); it++) {
-        auto iter = _table_col_to_file_col.find(it->first);
-        if (iter == _table_col_to_file_col.end()) {
-            _new_colname_to_value_range.emplace(it->first, it->second);
-        } else {
-            _new_colname_to_value_range.emplace(iter->second, it->second);
+        auto& table_col_name = _col_id_name_map[col_id];
+        _table_col_to_file_col.emplace(table_col_name, file_col_name);
+        _file_col_to_table_col.emplace(file_col_name, table_col_name);
+        if (table_col_name != file_col_name) {
+            _has_schema_change = true;
         }
     }
+    return Status::OK();
 }
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/exec/format/table/iceberg_reader.h b/be/src/vec/exec/format/table/iceberg_reader.h
index 04be8d53f24c73..50c8d31bed9915 100644
--- a/be/src/vec/exec/format/table/iceberg_reader.h
+++ b/be/src/vec/exec/format/table/iceberg_reader.h
@@ -28,10 +28,16 @@
 
 #include "common/status.h"
 #include "exec/olap_common.h"
+#include "runtime/define_primitive_type.h"
+#include "runtime/primitive_type.h"
+#include "runtime/runtime_state.h"
+#include "runtime/types.h"
 #include "table_format_reader.h"
 #include "util/runtime_profile.h"
 #include "vec/columns/column_dictionary.h"
-
+#include "vec/exec/format/orc/vorc_reader.h"
+#include "vec/exec/format/parquet/vparquet_reader.h"
+#include "vec/exprs/vslot_ref.h"
 namespace tparquet {
 class KeyValue;
 } // namespace tparquet
@@ -60,8 +66,6 @@ class ShardedKVCache;
 class VExprContext;
 
 class IcebergTableReader : public TableFormatReader {
-    ENABLE_FACTORY_CREATOR(IcebergTableReader);
-
 public:
     struct PositionDeleteRange {
         std::vector<std::string> data_file_path;
@@ -74,42 +78,40 @@ class IcebergTableReader : public TableFormatReader {
                        int64_t push_down_count);
     ~IcebergTableReader() override = default;
 
-    Status init_row_filters(const TFileRangeDesc& range) override;
+    Status init_row_filters(const TFileRangeDesc& range) final;
 
-    Status get_next_block(Block* block, size_t* read_rows, bool* eof) override;
+    Status get_next_block(Block* block, size_t* read_rows, bool* eof) final;
 
     Status set_fill_columns(
             const std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>&
                     partition_columns,
-            const std::unordered_map<std::string, VExprContextSPtr>& missing_columns) override;
+            const std::unordered_map<std::string, VExprContextSPtr>& missing_columns) final;
 
-    bool fill_all_columns() const override;
+    bool fill_all_columns() const final;
 
     Status get_columns(std::unordered_map<std::string, TypeDescriptor>* name_to_type,
-                       std::unordered_set<std::string>* missing_cols) override;
+                       std::unordered_set<std::string>* missing_cols) final;
 
-    Status init_reader(
-            const std::vector<std::string>& file_col_names,
-            const std::unordered_map<int, std::string>& col_id_name_map,
-            std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
-            const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
-            const RowDescriptor* row_descriptor,
-            const std::unordered_map<std::string, int>* colname_to_slot_id,
-            const VExprContextSPtrs* not_single_slot_filter_conjuncts,
-            const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts);
+    Status _position_delete_base(const std::vector<TIcebergDeleteFileDesc>& delete_files);
 
     enum { DATA, POSITION_DELETE, EQUALITY_DELETE };
+    enum Fileformat { NONE, PARQUET, ORC, AVRO };
 
-private:
+    virtual void set_delete_rows() = 0;
+
+protected:
     struct IcebergProfile {
         RuntimeProfile::Counter* num_delete_files;
         RuntimeProfile::Counter* num_delete_rows;
         RuntimeProfile::Counter* delete_files_read_time;
         RuntimeProfile::Counter* delete_rows_sort_time;
     };
-
-    Status _position_delete(const std::vector<TIcebergDeleteFileDesc>& delete_files);
-
+    using DeleteRows = std::vector<int64_t>;
+    using DeleteFile = phmap::parallel_flat_hash_map<
+            std::string, std::unique_ptr<DeleteRows>, std::hash<std::string>,
+            std::equal_to<std::string>,
+            std::allocator<std::pair<const std::string, std::unique_ptr<DeleteRows>>>, 8,
+            std::mutex>;
     /**
      * https://iceberg.apache.org/spec/#position-delete-files
      * The rows in the delete file must be sorted by file_path then position to optimize filtering rows while scanning.
@@ -123,8 +125,8 @@ class IcebergTableReader : public TableFormatReader {
 
     PositionDeleteRange _get_range(const ColumnString& file_path_column);
 
-    Status _gen_col_name_maps(std::vector<tparquet::KeyValue> parquet_meta_kv);
     void _gen_file_col_names();
+
     void _gen_new_colname_to_value_range();
     static std::string _delet_file_cache_key(const std::string& path) { return "delete_" + path; }
 
@@ -135,7 +137,7 @@ class IcebergTableReader : public TableFormatReader {
     // owned by scan node
     ShardedKVCache* _kv_cache;
     IcebergProfile _iceberg_profile;
-    std::vector<int64_t> _delete_rows;
+    std::vector<int64_t> _iceberg_delete_rows;
     // col names from _file_slot_descs
     std::vector<std::string> _file_col_names;
     // file column name to table column name map. For iceberg schema evolution.
@@ -143,13 +145,13 @@ class IcebergTableReader : public TableFormatReader {
     // table column name to file column name map. For iceberg schema evolution.
     std::unordered_map<std::string, std::string> _table_col_to_file_col;
     std::unordered_map<std::string, ColumnValueRangeType>* _colname_to_value_range;
-    // copy from _colname_to_value_range with new column name that is in parquet file, to support schema evolution.
+    // copy from _colname_to_value_range with new column name that is in parquet/orc file, to support schema evolution.
     std::unordered_map<std::string, ColumnValueRangeType> _new_colname_to_value_range;
     // column id to name map. Collect from FE slot descriptor.
     std::unordered_map<int, std::string> _col_id_name_map;
-    // col names in the parquet file
+    // col names in the parquet,orc file
     std::vector<std::string> _all_required_col_names;
-    // col names in table but not in parquet file
+    // col names in table but not in parquet,orc file
     std::vector<std::string> _not_in_file_col_names;
 
     io::IOContext* _io_ctx;
@@ -157,6 +159,88 @@ class IcebergTableReader : public TableFormatReader {
     bool _has_iceberg_schema = false;
 
     int64_t _remaining_push_down_count;
+    Fileformat _file_format = Fileformat::NONE;
+
+    const int64_t MIN_SUPPORT_DELETE_FILES_VERSION = 2;
+    const std::string ICEBERG_ROW_POS = "pos";
+    const std::string ICEBERG_FILE_PATH = "file_path";
+    const std::vector<std::string> delete_file_col_names {ICEBERG_ROW_POS, ICEBERG_FILE_PATH};
+    const std::vector<TypeDescriptor> delete_file_col_types {{TYPE_STRING}, {TYPE_BIGINT}};
+    const int ICEBERG_FILE_PATH_INDEX = 0;
+    const int ICEBERG_FILE_POS_INDEX = 1;
+    const int READ_DELETE_FILE_BATCH_SIZE = 102400;
+
+    //Read position_delete_file  TFileRangeDesc, generate DeleteFile
+    virtual Status _read_position_delete_file(const TFileRangeDesc*, DeleteFile*) = 0;
+
+    void _gen_position_delete_file_range(Block& block, DeleteFile* const position_delete,
+                                         size_t read_rows, bool file_path_column_dictionary_coded);
+};
+
+class IcebergParquetReader final : public IcebergTableReader {
+public:
+    ENABLE_FACTORY_CREATOR(IcebergParquetReader);
+
+    IcebergParquetReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeProfile* profile,
+                         RuntimeState* state, const TFileScanRangeParams& params,
+                         const TFileRangeDesc& range, ShardedKVCache* kv_cache,
+                         io::IOContext* io_ctx, int64_t push_down_count)
+            : IcebergTableReader(std::move(file_format_reader), profile, state, params, range,
+                                 kv_cache, io_ctx, push_down_count) {}
+    Status init_reader(
+            const std::vector<std::string>& file_col_names,
+            const std::unordered_map<int, std::string>& col_id_name_map,
+            std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
+            const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
+            const RowDescriptor* row_descriptor,
+            const std::unordered_map<std::string, int>* colname_to_slot_id,
+            const VExprContextSPtrs* not_single_slot_filter_conjuncts,
+            const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts);
+
+    Status _read_position_delete_file(const TFileRangeDesc* delete_range,
+                                      DeleteFile* position_delete) override;
+
+    void set_delete_rows() override {
+        auto* parquet_reader = (ParquetReader*)(_file_format_reader.get());
+        parquet_reader->set_delete_rows(&_iceberg_delete_rows);
+    }
+
+    Status _gen_col_name_maps(std::vector<tparquet::KeyValue> parquet_meta_kv);
+};
+class IcebergOrcReader final : public IcebergTableReader {
+public:
+    ENABLE_FACTORY_CREATOR(IcebergOrcReader);
+
+    Status _read_position_delete_file(const TFileRangeDesc* delete_range,
+                                      DeleteFile* position_delete) override;
+
+    IcebergOrcReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeProfile* profile,
+                     RuntimeState* state, const TFileScanRangeParams& params,
+                     const TFileRangeDesc& range, ShardedKVCache* kv_cache, io::IOContext* io_ctx,
+                     int64_t push_down_count)
+            : IcebergTableReader(std::move(file_format_reader), profile, state, params, range,
+                                 kv_cache, io_ctx, push_down_count) {}
+
+    void set_delete_rows() override {
+        auto* orc_reader = (OrcReader*)_file_format_reader.get();
+        orc_reader->set_position_delete_rowids(&_iceberg_delete_rows);
+    }
+
+    Status init_reader(
+            const std::vector<std::string>& file_col_names,
+            const std::unordered_map<int, std::string>& col_id_name_map,
+            std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range,
+            const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
+            const RowDescriptor* row_descriptor,
+            const std::unordered_map<std::string, int>* colname_to_slot_id,
+            const VExprContextSPtrs* not_single_slot_filter_conjuncts,
+            const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts);
+
+    Status _gen_col_name_maps(OrcReader* orc_reader);
+
+private:
+    const std::string ICEBERG_ORC_ATTRIBUTE = "iceberg.id";
 };
+
 } // namespace vectorized
 } // namespace doris
diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp
index 5810bd15eb9f37..4763c863becbef 100644
--- a/be/src/vec/exec/scan/vfile_scanner.cpp
+++ b/be/src/vec/exec/scan/vfile_scanner.cpp
@@ -827,15 +827,16 @@ Status VFileScanner::_get_next_reader() {
             }
             if (range.__isset.table_format_params &&
                 range.table_format_params.table_format_type == "iceberg") {
-                std::unique_ptr<IcebergTableReader> iceberg_reader =
-                        IcebergTableReader::create_unique(std::move(parquet_reader), _profile,
-                                                          _state, *_params, range, _kv_cache,
-                                                          _io_ctx.get(), _get_push_down_count());
+                std::unique_ptr<IcebergParquetReader> iceberg_reader =
+                        IcebergParquetReader::create_unique(std::move(parquet_reader), _profile,
+                                                            _state, *_params, range, _kv_cache,
+                                                            _io_ctx.get(), _get_push_down_count());
                 init_status = iceberg_reader->init_reader(
                         _file_col_names, _col_id_name_map, _colname_to_value_range,
                         _push_down_conjuncts, _real_tuple_desc, _default_val_row_desc.get(),
                         _col_name_to_slot_id, &_not_single_slot_filter_conjuncts,
                         &_slot_id_to_filter_conjuncts);
+
                 RETURN_IF_ERROR(iceberg_reader->init_row_filters(range));
                 _cur_reader = std::move(iceberg_reader);
             } else {
@@ -877,6 +878,20 @@ Status VFileScanner::_get_next_reader() {
                         &_not_single_slot_filter_conjuncts, &_slot_id_to_filter_conjuncts);
                 RETURN_IF_ERROR(tran_orc_reader->init_row_filters(range));
                 _cur_reader = std::move(tran_orc_reader);
+            } else if (range.__isset.table_format_params &&
+                       range.table_format_params.table_format_type == "iceberg") {
+                std::unique_ptr<IcebergOrcReader> iceberg_reader = IcebergOrcReader::create_unique(
+                        std::move(orc_reader), _profile, _state, *_params, range, _kv_cache,
+                        _io_ctx.get(), _get_push_down_count());
+
+                init_status = iceberg_reader->init_reader(
+                        _file_col_names, _col_id_name_map, _colname_to_value_range,
+                        _push_down_conjuncts, _real_tuple_desc, _default_val_row_desc.get(),
+                        _col_name_to_slot_id, &_not_single_slot_filter_conjuncts,
+                        &_slot_id_to_filter_conjuncts);
+
+                RETURN_IF_ERROR(iceberg_reader->init_row_filters(range));
+                _cur_reader = std::move(iceberg_reader);
             } else {
                 init_status = orc_reader->init_reader(
                         &_file_col_names, _colname_to_value_range, _push_down_conjuncts, false,
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
index 90fcaafecab303..5dbd0de056ed63 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergUtils.java
@@ -37,7 +37,10 @@
 import org.apache.doris.catalog.ArrayType;
 import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.MapType;
 import org.apache.doris.catalog.ScalarType;
+import org.apache.doris.catalog.StructField;
+import org.apache.doris.catalog.StructType;
 import org.apache.doris.catalog.Type;
 import org.apache.doris.common.UserException;
 import org.apache.doris.common.util.TimeUtils;
@@ -66,6 +69,7 @@
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
+import java.util.stream.Collectors;
 
 /**
  * Iceberg utils
@@ -504,8 +508,17 @@ public static Type icebergTypeToDorisType(org.apache.iceberg.types.Type type) {
                 Types.ListType list = (Types.ListType) type;
                 return ArrayType.create(icebergTypeToDorisType(list.elementType()), true);
             case MAP:
+                Types.MapType map = (Types.MapType) type;
+                return new MapType(
+                    icebergTypeToDorisType(map.keyType()),
+                    icebergTypeToDorisType(map.valueType())
+                );
             case STRUCT:
-                return Type.UNSUPPORTED;
+                Types.StructType struct = (Types.StructType) type;
+                ArrayList<StructField> nestedTypes = struct.fields().stream().map(
+                        x -> new StructField(x.name(), icebergTypeToDorisType(x.type()))
+                ).collect(Collectors.toCollection(ArrayList::new));
+                return new StructType(nestedTypes);
             default:
                 throw new IllegalArgumentException("Cannot transform unknown type: " + type);
         }
diff --git a/regression-test/data/external_table_p2/iceberg/iceberg_complex_type.out b/regression-test/data/external_table_p2/iceberg/iceberg_complex_type.out
new file mode 100644
index 00000000000000..2250381e6f85d9
--- /dev/null
+++ b/regression-test/data/external_table_p2/iceberg/iceberg_complex_type.out
@@ -0,0 +1,165 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !parquet_v1_1 --
+id	INT	Yes	true	\N	
+col2	ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<INT>>>>>	Yes	true	\N	
+col3	MAP<ARRAY<FLOAT>,MAP<INT,MAP<INT,FLOAT>>>	Yes	true	\N	
+col4	STRUCT<x:ARRAY<INT>,y:ARRAY<DOUBLE>,z:MAP<BOOLEAN,TEXT>>	Yes	true	\N	
+col5	MAP<INT,MAP<INT,MAP<INT,MAP<INT,MAP<FLOAT,MAP<DOUBLE,STRUCT<x:INT,y:ARRAY<DOUBLE>>>>>>>>	Yes	true	\N	
+col6	STRUCT<xx:ARRAY<INT>,yy:ARRAY<MAP<DOUBLE,FLOAT>>,zz:STRUCT<xxx:STRUCT<xxxx:STRUCT<xxxxx:DECIMAL(13, 2)>>>>	Yes	true	\N	
+
+-- !parquet_v1_2 --
+1	[[[[[2, 2, 3, 9]]]]]	{[2]:{2:{2:2}}}	{"x": [2], "y": [2], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+2	[[[[[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]]]	{[2]:{2:{2:2}}}	{"x": [2], "y": [2], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+3	[[[[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]], [[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]], [[[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]], [[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]]]	{[2]:{2:{2:2}}}	{"x": [2], "y": [2], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+4	[[[[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]]]	{[2]:{2:{2:222}}}	{"x": [2], "y": [2, 2, 22935, 99, 59, 955], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+5	[[[[[2, 2, 3, 9]]]]]	{[2]:{2:{2:2}}}	{"x": [2], "y": [2], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2, 239, 39293259, 2223, 23, 59, 29, 9353]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+6	[[[[[2, 2, 3, 9]]]]]	{[2]:{2:{2:2}}}	{"x": [2], "y": [2], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}, {2:2}, {2:2}, {2:4}, {2:4}, {2:4}, {2:4}, {2:4}, {2:4}, {2:4}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+
+-- !parquet_v1_3 --
+6
+
+-- !parquet_v1_4 --
+1
+1
+2
+1
+1
+1
+
+-- !parquet_v1_5 --
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+
+-- !parquet_v1_6 --
+
+-- !parquet_v1_7 --
+6	1
+5	1
+
+-- !parquet_v2_1 --
+id	INT	Yes	true	\N	
+col2	ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<INT>>>>>	Yes	true	\N	
+col3	MAP<ARRAY<FLOAT>,MAP<INT,MAP<INT,FLOAT>>>	Yes	true	\N	
+col4	STRUCT<x:ARRAY<INT>,y:ARRAY<DOUBLE>,z:MAP<BOOLEAN,TEXT>>	Yes	true	\N	
+col5	MAP<INT,MAP<INT,MAP<INT,MAP<INT,MAP<FLOAT,MAP<DOUBLE,STRUCT<x:INT,y:ARRAY<DOUBLE>>>>>>>>	Yes	true	\N	
+col6	STRUCT<xx:ARRAY<INT>,yy:ARRAY<MAP<DOUBLE,FLOAT>>,zz:STRUCT<xxx:STRUCT<xxxx:STRUCT<xxxxx:DECIMAL(13, 2)>>>>	Yes	true	\N	
+
+-- !parquet_v2_2 --
+1	[[[[[2, 2, 3, 9]]]]]	{[2]:{2:{2:2}}}	{"x": [2], "y": [2], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+2	[[[[[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]]]	{[2]:{2:{2:2}}}	{"x": [2], "y": [2], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+3	[[[[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]], [[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]], [[[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]], [[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]]]	{[2]:{2:{2:2}}}	{"x": [2], "y": [2], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+4	[[[[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]]]	{[2]:{2:{2:222}}}	{"x": [2], "y": [2, 2, 22935, 99, 59, 955], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+5	[[[[[2, 2, 3, 9]]]]]	{[2]:{2:{2:2}}}	{"x": [2], "y": [2], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2, 239, 39293259, 2223, 23, 59, 29, 9353]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+6	[[[[[2, 2, 3, 9]]]]]	{[2]:{2:{2:2}}}	{"x": [2], "y": [2], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}, {2:2}, {2:2}, {2:4}, {2:4}, {2:4}, {2:4}, {2:4}, {2:4}, {2:4}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+
+-- !parquet_v2_3 --
+6
+
+-- !parquet_v2_4 --
+1
+1
+2
+1
+1
+1
+
+-- !parquet_v2_5 --
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+
+-- !parquet_v2_6 --
+
+-- !parquet_v2_7 --
+6	1
+5	1
+
+-- !orc_v1_1 --
+id	INT	Yes	true	\N	
+col2	ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<INT>>>>>	Yes	true	\N	
+col3	MAP<ARRAY<FLOAT>,MAP<INT,MAP<INT,FLOAT>>>	Yes	true	\N	
+col4	STRUCT<x:ARRAY<INT>,y:ARRAY<DOUBLE>,z:MAP<BOOLEAN,TEXT>>	Yes	true	\N	
+col5	MAP<INT,MAP<INT,MAP<INT,MAP<INT,MAP<FLOAT,MAP<DOUBLE,STRUCT<x:INT,y:ARRAY<DOUBLE>>>>>>>>	Yes	true	\N	
+col6	STRUCT<xx:ARRAY<INT>,yy:ARRAY<MAP<DOUBLE,FLOAT>>,zz:STRUCT<xxx:STRUCT<xxxx:STRUCT<xxxxx:DECIMAL(13, 2)>>>>	Yes	true	\N	
+
+-- !orc_v1_2 --
+1	[[[[[2, 2, 3, 9]]]]]	{[2]:{2:{2:2}}}	{"x": [2], "y": [2], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+2	[[[[[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]]]	{[2]:{2:{2:2}}}	{"x": [2], "y": [2], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+3	[[[[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]], [[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]], [[[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]], [[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]]]	{[2]:{2:{2:2}}}	{"x": [2], "y": [2], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+4	[[[[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]]]	{[2]:{2:{2:222}}}	{"x": [2], "y": [2, 2, 22935, 99, 59, 955], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+5	[[[[[2, 2, 3, 9]]]]]	{[2]:{2:{2:2}}}	{"x": [2], "y": [2], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2, 239, 39293259, 2223, 23, 59, 29, 9353]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+6	[[[[[2, 2, 3, 9]]]]]	{[2]:{2:{2:2}}}	{"x": [2], "y": [2], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}, {2:2}, {2:2}, {2:4}, {2:4}, {2:4}, {2:4}, {2:4}, {2:4}, {2:4}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+
+-- !orc_v1_3 --
+6
+
+-- !orc_v1_4 --
+1
+1
+2
+1
+1
+1
+
+-- !orc_v1_5 --
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+
+-- !orc_v1_6 --
+
+-- !orc_v1_7 --
+6	1
+5	1
+
+-- !orc_v2_1 --
+id	INT	Yes	true	\N	
+col2	ARRAY<ARRAY<ARRAY<ARRAY<ARRAY<INT>>>>>	Yes	true	\N	
+col3	MAP<ARRAY<FLOAT>,MAP<INT,MAP<INT,FLOAT>>>	Yes	true	\N	
+col4	STRUCT<x:ARRAY<INT>,y:ARRAY<DOUBLE>,z:MAP<BOOLEAN,TEXT>>	Yes	true	\N	
+col5	MAP<INT,MAP<INT,MAP<INT,MAP<INT,MAP<FLOAT,MAP<DOUBLE,STRUCT<x:INT,y:ARRAY<DOUBLE>>>>>>>>	Yes	true	\N	
+col6	STRUCT<xx:ARRAY<INT>,yy:ARRAY<MAP<DOUBLE,FLOAT>>,zz:STRUCT<xxx:STRUCT<xxxx:STRUCT<xxxxx:DECIMAL(13, 2)>>>>	Yes	true	\N	
+
+-- !orc_v2_2 --
+1	[[[[[2, 2, 3, 9]]]]]	{[2]:{2:{2:2}}}	{"x": [2], "y": [2], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+2	[[[[[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]]]	{[2]:{2:{2:2}}}	{"x": [2], "y": [2], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+3	[[[[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]], [[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]], [[[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]], [[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]]]	{[2]:{2:{2:2}}}	{"x": [2], "y": [2], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+4	[[[[[2, 2, 3, 9]], [[2, 2, 3, 9], [2, 2, 3, 9], [2, 2, 3, 9]]]]]	{[2]:{2:{2:222}}}	{"x": [2], "y": [2, 2, 22935, 99, 59, 955], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+5	[[[[[2, 2, 3, 9]]]]]	{[2]:{2:{2:2}}}	{"x": [2], "y": [2], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2, 239, 39293259, 2223, 23, 59, 29, 9353]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+6	[[[[[2, 2, 3, 9]]]]]	{[2]:{2:{2:2}}}	{"x": [2], "y": [2], "z": {1:"example"}}	{2:{2:{2:{2:{2:{2:{"x": 2, "y": [2]}}}}}}}	{"xx": [2, 2, 3, 9], "yy": [{2:2}, {2:2}, {2:2}, {2:4}, {2:4}, {2:4}, {2:4}, {2:4}, {2:4}, {2:4}], "zz": {"xxx": {"xxxx": {"xxxxx": 10123.33}}}}
+
+-- !orc_v2_3 --
+6
+
+-- !orc_v2_4 --
+1
+1
+2
+1
+1
+1
+
+-- !orc_v2_5 --
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+[[2]]
+
+-- !orc_v2_6 --
+
+-- !orc_v2_7 --
+6	1
+5	1
+
diff --git a/regression-test/data/external_table_p2/iceberg/iceberg_position_delete.out b/regression-test/data/external_table_p2/iceberg/iceberg_position_delete.out
new file mode 100644
index 00000000000000..2d61ebaaf504a0
--- /dev/null
+++ b/regression-test/data/external_table_p2/iceberg/iceberg_position_delete.out
@@ -0,0 +1,196 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !gen_data_1 --
+
+-- !gen_data_2 --
+
+-- !gen_data_3 --
+
+-- !gen_data_4 --
+
+-- !gen_data_5 --
+
+-- !gen_data_6 --
+2	select xxxxxxxxx
+2	select xxxxxxxxx
+2	select xxxxxxxxx
+
+-- !gen_data_7 --
+2
+2
+2
+
+-- !gen_data_8 --
+2	512
+3	512
+4	512
+6	512
+7	512
+8	512
+9	512
+11	512
+12	512
+13	512
+14	512
+
+-- !gen_data_9 --
+
+-- !gen_data_10 --
+
+-- !gen_data_11 --
+
+-- !gen_data_12 --
+
+-- !gen_data_13 --
+
+-- !gen_data_14 --
+2	select xxxxxxxxx
+2	select xxxxxxxxx
+2	select xxxxxxxxx
+
+-- !gen_data_15 --
+7	12345xxx
+7	12345xxx
+7	12345xxx
+
+-- !gen_data_16 --
+
+-- !gen_data_17 --
+
+-- !gen_data_18 --
+
+-- !gen_data_19 --
+5632
+
+-- !gen_data_20 --
+5632
+
+-- !orc_1 --
+
+-- !orc_2 --
+
+-- !orc_3 --
+
+-- !orc_4 --
+
+-- !orc_5 --
+
+-- !orc_6 --
+2	select xxxxxxxxx
+2	select xxxxxxxxx
+2	select xxxxxxxxx
+
+-- !orc_7 --
+2
+2
+2
+
+-- !orc_8 --
+2	512
+3	512
+4	512
+6	512
+7	512
+8	512
+9	512
+11	512
+12	512
+13	512
+14	512
+
+-- !orc_9 --
+
+-- !orc_10 --
+
+-- !orc_11 --
+
+-- !orc_12 --
+
+-- !orc_13 --
+
+-- !orc_14 --
+2	select xxxxxxxxx
+2	select xxxxxxxxx
+2	select xxxxxxxxx
+
+-- !orc_15 --
+7	12345xxx
+7	12345xxx
+7	12345xxx
+
+-- !orc_16 --
+
+-- !orc_17 --
+
+-- !orc_18 --
+
+-- !orc_19 --
+5632
+
+-- !orc_20 --
+5632
+
+-- !parquet_1 --
+
+-- !parquet_2 --
+
+-- !parquet_3 --
+
+-- !parquet_4 --
+
+-- !parquet_5 --
+
+-- !parquet_6 --
+2	select xxxxxxxxx
+2	select xxxxxxxxx
+2	select xxxxxxxxx
+
+-- !parquet_7 --
+2
+2
+2
+
+-- !parquet_8 --
+2	512
+3	512
+4	512
+6	512
+7	512
+8	512
+9	512
+11	512
+12	512
+13	512
+14	512
+
+-- !parquet_9 --
+
+-- !parquet_10 --
+
+-- !parquet_11 --
+
+-- !parquet_12 --
+
+-- !parquet_13 --
+
+-- !parquet_14 --
+2	select xxxxxxxxx
+2	select xxxxxxxxx
+2	select xxxxxxxxx
+
+-- !parquet_15 --
+7	12345xxx
+7	12345xxx
+7	12345xxx
+
+-- !parquet_16 --
+
+-- !parquet_17 --
+
+-- !parquet_18 --
+
+-- !parquet_19 --
+5632
+
+-- !parquet_20 --
+5632
+
diff --git a/regression-test/data/external_table_p2/iceberg/iceberg_schema_change.out b/regression-test/data/external_table_p2/iceberg/iceberg_schema_change.out
new file mode 100644
index 00000000000000..4faf8c695ec36a
--- /dev/null
+++ b/regression-test/data/external_table_p2/iceberg/iceberg_schema_change.out
@@ -0,0 +1,305 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !parquet_v1_1 --
+rename_col8	BIGINT	Yes	true	\N	
+rename_col9	DOUBLE	Yes	true	\N	
+rename_col10	DECIMAL(20, 5)	Yes	true	\N	
+id	INT	Yes	true	\N	
+rename_col1	ARRAY<BIGINT>	Yes	true	\N	
+rename_col2	ARRAY<DOUBLE>	Yes	true	\N	
+rename_col3	ARRAY<DECIMAL(20, 4)>	Yes	true	\N	
+rename_col4	MAP<INT,BIGINT>	Yes	true	\N	
+rename_col5	MAP<INT,DOUBLE>	Yes	true	\N	
+rename_col6	MAP<INT,DECIMAL(20, 5)>	Yes	true	\N	
+rename_col7	STRUCT<add:DOUBLE,x:BIGINT,y:DOUBLE>	Yes	true	\N	
+col_add	INT	Yes	true	\N	
+col_add2	INT	Yes	true	\N	
+
+-- !parquet_v1_2 --
+1	1.2000000476837158	1.12345	1	[1, 2, 3]	[1.100000023841858, 2.200000047683716, 3.299999952316284]	[1.1234, 2.2345, 3.3456]	{1:10, 2:20}	{1:1.100000023841858, 2:2.200000047683716}	{1:1.12345, 2:2.23456}	{"add": null, "x": 1, "y": 1.100000023841858}	\N	\N
+1	1.2000000476837158	1.12345	2	[4, 5, 6]	[4.400000095367432, 5.5, 6.599999904632568]	[4.4567, 5.5678, 6.6789]	{3:30, 4:40}	{3:3.299999952316284, 4:4.400000095367432}	{3:3.34567, 4:4.45678}	{"add": null, "x": 2, "y": 2.200000047683716}	\N	\N
+1	1.2000000476837158	1.12345	3	[7, 8, 9]	[7.699999809265137, 8.800000190734863, 9.899999618530273]	[7.7890, 8.8901, 9.9012]	{5:50, 6:60}	{5:5.5, 6:6.599999904632568}	{5:5.56789, 6:6.67890}	{"add": null, "x": 3, "y": 3.299999952316284}	\N	\N
+1	1.2000000476837158	1.12345	4	[10, 11, 12]	[10.100000381469727, 11.109999656677246, 12.119999885559082]	[10.1011, 11.1112, 12.1213]	{7:70, 8:80}	{7:7.699999809265137, 8:8.800000190734863}	{7:7.78901, 8:8.89012}	{"add": null, "x": 4, "y": 4.400000095367432}	\N	\N
+1	1.2000000476837158	1.12345	5	[13, 14, 15]	[13.130000114440918, 14.140000343322754, 15.149999618530273]	[13.1314, 14.1415, 15.1516]	{9:90, 10:100}	{9:9.899999618530273, 10:10.100000381469727}	{9:9.89012, 10:10.10123}	{"add": null, "x": 5, "y": 5.5}	\N	\N
+21447483648	1.7976931348623157E308	1234567890.12345	6	[21447483648, 21474483649, 21474483650]	[1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308]	[1234567890.1235, 1234567890.2346, 1234567890.3457]	{214748348:2147483648, 24748649:214743383649}	{214748648:1.7976931348623157e+308, 27483649:1.7976931348623157e+308}	{214743648:1234567890.12345, 21474649:1234567890.23456}	{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}	1	2
+2144748345648	1.7976931348623157E308	1234567890.23456	7	[2144748345648, 214742435483649, 214742435483650]	[1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308]	[12345673890.1235, 12345367890.2346, 12344567890.3457]	{214748348:2147483632148, 24748649:213144743383649}	{214748648:1.717693623157e+308, 27483649:1.7976931348623157e+308}	{214743648:1234567890.12345, 21474649:1234567890.23456}	{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}	2	3
+21447483648	1.7976931348623157E308	1234567890.12345	8	[21447483648, 21474483649, 21474483650]	[1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308]	[1234567890.1235, 1234567890.2346, 1234567890.3457]	{214748348:2147483648, 24748649:214743383649}	{214748648:1.7976931348623157e+308, 27483649:1.7976931348623157e+308}	{214743648:1234567890.12345, 21474649:1234567890.23456}	{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}	3	4
+2144748345648	1.7976931348623157E308	1234567890.23456	9	[2144748345648, 214742435483649, 214742435483650, 214742435483650, 214742435483650, 214742435483650]	[1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308]	[12345673890.1235, 12345367890.2346, 12344567890.3457]	{214748348:2147483632148, 24748649:213144743383649}	{214748648:1.717693623157e+308, 27483649:1.7976931348623157e+308}	{214743648:1234567890.12345, 21474649:1234567890.23456}	{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}	4	5
+
+-- !parquet_v1_3 --
+9
+
+-- !parquet_v1_4 --
+6
+7
+8
+9
+
+-- !parquet_v1_5 --
+1
+2
+3
+4
+
+-- !parquet_v1_6 --
+6
+7
+8
+9
+
+-- !parquet_v1_7 --
+{"add": null, "x": 1, "y": 1.100000023841858}
+{"add": null, "x": 2, "y": 2.200000047683716}
+{"add": null, "x": 3, "y": 3.299999952316284}
+{"add": null, "x": 4, "y": 4.400000095367432}
+{"add": null, "x": 5, "y": 5.5}
+{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}
+
+-- !parquet_v1_8 --
+3
+4
+5
+
+-- !parquet_v1_9 --
+9	1
+8	1
+7	1
+6	1
+5	0
+4	0
+3	0
+2	0
+1	0
+
+-- !parquet_v1_10 --
+
+-- !parquet_v2_1 --
+rename_col8	BIGINT	Yes	true	\N	
+rename_col9	DOUBLE	Yes	true	\N	
+rename_col10	DECIMAL(20, 5)	Yes	true	\N	
+id	INT	Yes	true	\N	
+rename_col1	ARRAY<BIGINT>	Yes	true	\N	
+rename_col2	ARRAY<DOUBLE>	Yes	true	\N	
+rename_col3	ARRAY<DECIMAL(20, 4)>	Yes	true	\N	
+rename_col4	MAP<INT,BIGINT>	Yes	true	\N	
+rename_col5	MAP<INT,DOUBLE>	Yes	true	\N	
+rename_col6	MAP<INT,DECIMAL(20, 5)>	Yes	true	\N	
+rename_col7	STRUCT<add:DOUBLE,x:BIGINT,y:DOUBLE>	Yes	true	\N	
+col_add	INT	Yes	true	\N	
+col_add2	INT	Yes	true	\N	
+
+-- !parquet_v2_2 --
+1	1.2000000476837158	1.12345	1	[1, 2, 3]	[1.100000023841858, 2.200000047683716, 3.299999952316284]	[1.1234, 2.2345, 3.3456]	{1:10, 2:20}	{1:1.100000023841858, 2:2.200000047683716}	{1:1.12345, 2:2.23456}	{"add": null, "x": 1, "y": 1.100000023841858}	\N	\N
+1	1.2000000476837158	1.12345	2	[4, 5, 6]	[4.400000095367432, 5.5, 6.599999904632568]	[4.4567, 5.5678, 6.6789]	{3:30, 4:40}	{3:3.299999952316284, 4:4.400000095367432}	{3:3.34567, 4:4.45678}	{"add": null, "x": 2, "y": 2.200000047683716}	\N	\N
+1	1.2000000476837158	1.12345	3	[7, 8, 9]	[7.699999809265137, 8.800000190734863, 9.899999618530273]	[7.7890, 8.8901, 9.9012]	{5:50, 6:60}	{5:5.5, 6:6.599999904632568}	{5:5.56789, 6:6.67890}	{"add": null, "x": 3, "y": 3.299999952316284}	\N	\N
+1	1.2000000476837158	1.12345	4	[10, 11, 12]	[10.100000381469727, 11.109999656677246, 12.119999885559082]	[10.1011, 11.1112, 12.1213]	{7:70, 8:80}	{7:7.699999809265137, 8:8.800000190734863}	{7:7.78901, 8:8.89012}	{"add": null, "x": 4, "y": 4.400000095367432}	\N	\N
+1	1.2000000476837158	1.12345	5	[13, 14, 15]	[13.130000114440918, 14.140000343322754, 15.149999618530273]	[13.1314, 14.1415, 15.1516]	{9:90, 10:100}	{9:9.899999618530273, 10:10.100000381469727}	{9:9.89012, 10:10.10123}	{"add": null, "x": 5, "y": 5.5}	\N	\N
+21447483648	1.7976931348623157E308	1234567890.12345	6	[21447483648, 21474483649, 21474483650]	[1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308]	[1234567890.1235, 1234567890.2346, 1234567890.3457]	{214748348:2147483648, 24748649:214743383649}	{214748648:1.7976931348623157e+308, 27483649:1.7976931348623157e+308}	{214743648:1234567890.12345, 21474649:1234567890.23456}	{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}	1	2
+2144748345648	1.7976931348623157E308	1234567890.23456	7	[2144748345648, 214742435483649, 214742435483650]	[1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308]	[12345673890.1235, 12345367890.2346, 12344567890.3457]	{214748348:2147483632148, 24748649:213144743383649}	{214748648:1.717693623157e+308, 27483649:1.7976931348623157e+308}	{214743648:1234567890.12345, 21474649:1234567890.23456}	{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}	2	3
+21447483648	1.7976931348623157E308	1234567890.12345	8	[21447483648, 21474483649, 21474483650]	[1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308]	[1234567890.1235, 1234567890.2346, 1234567890.3457]	{214748348:2147483648, 24748649:214743383649}	{214748648:1.7976931348623157e+308, 27483649:1.7976931348623157e+308}	{214743648:1234567890.12345, 21474649:1234567890.23456}	{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}	3	4
+2144748345648	1.7976931348623157E308	1234567890.23456	9	[2144748345648, 214742435483649, 214742435483650, 214742435483650, 214742435483650, 214742435483650]	[1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308]	[12345673890.1235, 12345367890.2346, 12344567890.3457]	{214748348:2147483632148, 24748649:213144743383649}	{214748648:1.717693623157e+308, 27483649:1.7976931348623157e+308}	{214743648:1234567890.12345, 21474649:1234567890.23456}	{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}	4	5
+
+-- !parquet_v2_3 --
+9
+
+-- !parquet_v2_4 --
+6
+7
+8
+9
+
+-- !parquet_v2_5 --
+1
+2
+3
+4
+
+-- !parquet_v2_6 --
+6
+7
+8
+9
+
+-- !parquet_v2_7 --
+{"add": null, "x": 1, "y": 1.100000023841858}
+{"add": null, "x": 2, "y": 2.200000047683716}
+{"add": null, "x": 3, "y": 3.299999952316284}
+{"add": null, "x": 4, "y": 4.400000095367432}
+{"add": null, "x": 5, "y": 5.5}
+{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}
+
+-- !parquet_v2_8 --
+3
+4
+5
+
+-- !parquet_v2_9 --
+9	1
+8	1
+7	1
+6	1
+5	0
+4	0
+3	0
+2	0
+1	0
+
+-- !parquet_v2_10 --
+
+-- !orc_v1_1 --
+rename_col8	BIGINT	Yes	true	\N	
+rename_col9	DOUBLE	Yes	true	\N	
+rename_col10	DECIMAL(20, 5)	Yes	true	\N	
+id	INT	Yes	true	\N	
+rename_col1	ARRAY<BIGINT>	Yes	true	\N	
+rename_col2	ARRAY<DOUBLE>	Yes	true	\N	
+rename_col3	ARRAY<DECIMAL(20, 4)>	Yes	true	\N	
+rename_col4	MAP<INT,BIGINT>	Yes	true	\N	
+rename_col5	MAP<INT,DOUBLE>	Yes	true	\N	
+rename_col6	MAP<INT,DECIMAL(20, 5)>	Yes	true	\N	
+rename_col7	STRUCT<add:DOUBLE,x:BIGINT,y:DOUBLE>	Yes	true	\N	
+col_add	INT	Yes	true	\N	
+col_add2	INT	Yes	true	\N	
+
+-- !orc_v1_2 --
+1	1.2000000476837158	1.12345	1	[1, 2, 3]	[1.100000023841858, 2.200000047683716, 3.299999952316284]	[1.1234, 2.2345, 3.3456]	{1:10, 2:20}	{1:1.100000023841858, 2:2.200000047683716}	{1:1.12345, 2:2.23456}	{"add": null, "x": 1, "y": 1.100000023841858}	\N	\N
+1	1.2000000476837158	1.12345	2	[4, 5, 6]	[4.400000095367432, 5.5, 6.599999904632568]	[4.4567, 5.5678, 6.6789]	{3:30, 4:40}	{3:3.299999952316284, 4:4.400000095367432}	{3:3.34567, 4:4.45678}	{"add": null, "x": 2, "y": 2.200000047683716}	\N	\N
+1	1.2000000476837158	1.12345	3	[7, 8, 9]	[7.699999809265137, 8.800000190734863, 9.899999618530273]	[7.7890, 8.8901, 9.9012]	{5:50, 6:60}	{5:5.5, 6:6.599999904632568}	{5:5.56789, 6:6.67890}	{"add": null, "x": 3, "y": 3.299999952316284}	\N	\N
+1	1.2000000476837158	1.12345	4	[10, 11, 12]	[10.100000381469727, 11.109999656677246, 12.119999885559082]	[10.1011, 11.1112, 12.1213]	{7:70, 8:80}	{7:7.699999809265137, 8:8.800000190734863}	{7:7.78901, 8:8.89012}	{"add": null, "x": 4, "y": 4.400000095367432}	\N	\N
+1	1.2000000476837158	1.12345	5	[13, 14, 15]	[13.130000114440918, 14.140000343322754, 15.149999618530273]	[13.1314, 14.1415, 15.1516]	{9:90, 10:100}	{9:9.899999618530273, 10:10.100000381469727}	{9:9.89012, 10:10.10123}	{"add": null, "x": 5, "y": 5.5}	\N	\N
+21447483648	1.7976931348623157E308	1234567890.12345	6	[21447483648, 21474483649, 21474483650]	[1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308]	[1234567890.1235, 1234567890.2346, 1234567890.3457]	{214748348:2147483648, 24748649:214743383649}	{214748648:1.7976931348623157e+308, 27483649:1.7976931348623157e+308}	{214743648:1234567890.12345, 21474649:1234567890.23456}	{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}	1	2
+2144748345648	1.7976931348623157E308	1234567890.23456	7	[2144748345648, 214742435483649, 214742435483650]	[1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308]	[12345673890.1235, 12345367890.2346, 12344567890.3457]	{214748348:2147483632148, 24748649:213144743383649}	{214748648:1.717693623157e+308, 27483649:1.7976931348623157e+308}	{214743648:1234567890.12345, 21474649:1234567890.23456}	{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}	2	3
+21447483648	1.7976931348623157E308	1234567890.12345	8	[21447483648, 21474483649, 21474483650]	[1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308]	[1234567890.1235, 1234567890.2346, 1234567890.3457]	{214748348:2147483648, 24748649:214743383649}	{214748648:1.7976931348623157e+308, 27483649:1.7976931348623157e+308}	{214743648:1234567890.12345, 21474649:1234567890.23456}	{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}	3	4
+2144748345648	1.7976931348623157E308	1234567890.23456	9	[2144748345648, 214742435483649, 214742435483650, 214742435483650, 214742435483650, 214742435483650]	[1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308]	[12345673890.1235, 12345367890.2346, 12344567890.3457]	{214748348:2147483632148, 24748649:213144743383649}	{214748648:1.717693623157e+308, 27483649:1.7976931348623157e+308}	{214743648:1234567890.12345, 21474649:1234567890.23456}	{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}	4	5
+
+-- !orc_v1_3 --
+9
+
+-- !orc_v1_4 --
+6
+7
+8
+9
+
+-- !orc_v1_5 --
+1
+2
+3
+4
+
+-- !orc_v1_6 --
+6
+7
+8
+9
+
+-- !orc_v1_7 --
+{"add": null, "x": 1, "y": 1.100000023841858}
+{"add": null, "x": 2, "y": 2.200000047683716}
+{"add": null, "x": 3, "y": 3.299999952316284}
+{"add": null, "x": 4, "y": 4.400000095367432}
+{"add": null, "x": 5, "y": 5.5}
+{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}
+
+-- !orc_v1_8 --
+3
+4
+5
+
+-- !orc_v1_9 --
+9	1
+8	1
+7	1
+6	1
+5	0
+4	0
+3	0
+2	0
+1	0
+
+-- !orc_v1_10 --
+
+-- !orc_v2_1 --
+rename_col8	BIGINT	Yes	true	\N	
+rename_col9	DOUBLE	Yes	true	\N	
+rename_col10	DECIMAL(20, 5)	Yes	true	\N	
+id	INT	Yes	true	\N	
+rename_col1	ARRAY<BIGINT>	Yes	true	\N	
+rename_col2	ARRAY<DOUBLE>	Yes	true	\N	
+rename_col3	ARRAY<DECIMAL(20, 4)>	Yes	true	\N	
+rename_col4	MAP<INT,BIGINT>	Yes	true	\N	
+rename_col5	MAP<INT,DOUBLE>	Yes	true	\N	
+rename_col6	MAP<INT,DECIMAL(20, 5)>	Yes	true	\N	
+rename_col7	STRUCT<add:DOUBLE,x:BIGINT,y:DOUBLE>	Yes	true	\N	
+col_add	INT	Yes	true	\N	
+col_add2	INT	Yes	true	\N	
+
+-- !orc_v2_2 --
+1	1.2000000476837158	1.12345	1	[1, 2, 3]	[1.100000023841858, 2.200000047683716, 3.299999952316284]	[1.1234, 2.2345, 3.3456]	{1:10, 2:20}	{1:1.100000023841858, 2:2.200000047683716}	{1:1.12345, 2:2.23456}	{"add": null, "x": 1, "y": 1.100000023841858}	\N	\N
+1	1.2000000476837158	1.12345	2	[4, 5, 6]	[4.400000095367432, 5.5, 6.599999904632568]	[4.4567, 5.5678, 6.6789]	{3:30, 4:40}	{3:3.299999952316284, 4:4.400000095367432}	{3:3.34567, 4:4.45678}	{"add": null, "x": 2, "y": 2.200000047683716}	\N	\N
+1	1.2000000476837158	1.12345	3	[7, 8, 9]	[7.699999809265137, 8.800000190734863, 9.899999618530273]	[7.7890, 8.8901, 9.9012]	{5:50, 6:60}	{5:5.5, 6:6.599999904632568}	{5:5.56789, 6:6.67890}	{"add": null, "x": 3, "y": 3.299999952316284}	\N	\N
+1	1.2000000476837158	1.12345	4	[10, 11, 12]	[10.100000381469727, 11.109999656677246, 12.119999885559082]	[10.1011, 11.1112, 12.1213]	{7:70, 8:80}	{7:7.699999809265137, 8:8.800000190734863}	{7:7.78901, 8:8.89012}	{"add": null, "x": 4, "y": 4.400000095367432}	\N	\N
+1	1.2000000476837158	1.12345	5	[13, 14, 15]	[13.130000114440918, 14.140000343322754, 15.149999618530273]	[13.1314, 14.1415, 15.1516]	{9:90, 10:100}	{9:9.899999618530273, 10:10.100000381469727}	{9:9.89012, 10:10.10123}	{"add": null, "x": 5, "y": 5.5}	\N	\N
+21447483648	1.7976931348623157E308	1234567890.12345	6	[21447483648, 21474483649, 21474483650]	[1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308]	[1234567890.1235, 1234567890.2346, 1234567890.3457]	{214748348:2147483648, 24748649:214743383649}	{214748648:1.7976931348623157e+308, 27483649:1.7976931348623157e+308}	{214743648:1234567890.12345, 21474649:1234567890.23456}	{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}	1	2
+2144748345648	1.7976931348623157E308	1234567890.23456	7	[2144748345648, 214742435483649, 214742435483650]	[1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308]	[12345673890.1235, 12345367890.2346, 12344567890.3457]	{214748348:2147483632148, 24748649:213144743383649}	{214748648:1.717693623157e+308, 27483649:1.7976931348623157e+308}	{214743648:1234567890.12345, 21474649:1234567890.23456}	{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}	2	3
+21447483648	1.7976931348623157E308	1234567890.12345	8	[21447483648, 21474483649, 21474483650]	[1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308]	[1234567890.1235, 1234567890.2346, 1234567890.3457]	{214748348:2147483648, 24748649:214743383649}	{214748648:1.7976931348623157e+308, 27483649:1.7976931348623157e+308}	{214743648:1234567890.12345, 21474649:1234567890.23456}	{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}	3	4
+2144748345648	1.7976931348623157E308	1234567890.23456	9	[2144748345648, 214742435483649, 214742435483650, 214742435483650, 214742435483650, 214742435483650]	[1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308]	[12345673890.1235, 12345367890.2346, 12344567890.3457]	{214748348:2147483632148, 24748649:213144743383649}	{214748648:1.717693623157e+308, 27483649:1.7976931348623157e+308}	{214743648:1234567890.12345, 21474649:1234567890.23456}	{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}	4	5
+
+-- !orc_v2_3 --
+9
+
+-- !orc_v2_4 --
+6
+7
+8
+9
+
+-- !orc_v2_5 --
+1
+2
+3
+4
+
+-- !orc_v2_6 --
+6
+7
+8
+9
+
+-- !orc_v2_7 --
+{"add": null, "x": 1, "y": 1.100000023841858}
+{"add": null, "x": 2, "y": 2.200000047683716}
+{"add": null, "x": 3, "y": 3.299999952316284}
+{"add": null, "x": 4, "y": 4.400000095367432}
+{"add": null, "x": 5, "y": 5.5}
+{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214748223648, "y": 1.7976931346232156e+308}
+{"add": 1234567890.12345, "x": 214743338223648, "y": 1.7976931346232156e+308}
+
+-- !orc_v2_8 --
+3
+4
+5
+
+-- !orc_v2_9 --
+9	1
+8	1
+7	1
+6	1
+5	0
+4	0
+3	0
+2	0
+1	0
+
+-- !orc_v2_10 --
+
diff --git a/regression-test/suites/external_table_p2/iceberg/iceberg_complex_type.groovy b/regression-test/suites/external_table_p2/iceberg/iceberg_complex_type.groovy
new file mode 100644
index 00000000000000..f465a9afe37bae
--- /dev/null
+++ b/regression-test/suites/external_table_p2/iceberg/iceberg_complex_type.groovy
@@ -0,0 +1,92 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("iceberg_complex_type", "p2,external,iceberg,external_remote,external_remote_iceberg") {
+
+    String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+
+        String catalog_name = "test_external_iceberg_complex_type"
+        String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost")
+        String extHdfsPort = context.config.otherConfigs.get("extHdfsPort")
+        sql """drop catalog if exists ${catalog_name};"""
+        sql """
+            create catalog if not exists ${catalog_name} properties (
+                'type'='iceberg',
+                'iceberg.catalog.type'='hadoop',
+                'warehouse' = 'hdfs://${extHiveHmsHost}:${extHdfsPort}/usr/hive/warehouse/hadoop_catalog'
+            );
+        """
+
+        logger.info("catalog " + catalog_name + " created")
+        sql """switch ${catalog_name};"""
+        logger.info("switched to catalog " + catalog_name)
+        sql """ use multi_catalog;""" 
+
+
+
+        qt_parquet_v1_1  """ desc complex_parquet_v1 ;""" 
+        qt_parquet_v1_2  """ select * from  complex_parquet_v1 order by id; """ 
+        qt_parquet_v1_3  """ select count(*) from  complex_parquet_v1 ;"""
+        qt_parquet_v1_4  """ select array_size(col2) from  complex_parquet_v1 where col2 is not null   order by id ; """ 
+        qt_parquet_v1_5  """ select map_keys(col3) from  complex_parquet_v1  order by id; """ 
+        qt_parquet_v1_6  """ select struct_element(col4, 1) from  complex_parquet_v1  where id >=7 order by id; """ 
+        qt_parquet_v1_7  """ select id,count(col2) from  complex_parquet_v1  group by id order by id desc limit 2; """ 
+
+
+        qt_parquet_v2_1  """ desc complex_parquet_v2 ;""" 
+        qt_parquet_v2_2  """ select * from  complex_parquet_v2 order by id; """ 
+        qt_parquet_v2_3  """ select count(*) from  complex_parquet_v2 ;"""
+        qt_parquet_v2_4  """ select array_size(col2) from  complex_parquet_v2 where col2 is not null   order by id ; """ 
+        qt_parquet_v2_5  """ select map_keys(col3) from  complex_parquet_v2  order by id; """ 
+        qt_parquet_v2_6  """ select struct_element(col4, 1) from  complex_parquet_v2  where id >=7 order by id; """ 
+        qt_parquet_v2_7  """ select id,count(col2) from  complex_parquet_v2  group by id order by id desc limit 2; """ 
+
+
+        qt_orc_v1_1  """ desc complex_orc_v1 ;""" 
+        qt_orc_v1_2  """ select * from  complex_orc_v1 order by id; """ 
+        qt_orc_v1_3  """ select count(*) from  complex_orc_v1 ;"""
+        qt_orc_v1_4  """ select array_size(col2) from  complex_orc_v1 where col2 is not null   order by id ; """ 
+        qt_orc_v1_5  """ select map_keys(col3) from  complex_orc_v1  order by id; """ 
+        qt_orc_v1_6  """ select struct_element(col4, 1) from  complex_orc_v1  where id >=7 order by id; """ 
+        qt_orc_v1_7  """ select id,count(col2) from  complex_orc_v1  group by id order by id desc limit 2; """ 
+
+
+        qt_orc_v2_1  """ desc complex_orc_v2 ;""" 
+        qt_orc_v2_2  """ select * from  complex_orc_v2 order by id; """ 
+        qt_orc_v2_3  """ select count(*) from  complex_orc_v2 ;"""
+        qt_orc_v2_4  """ select array_size(col2) from  complex_orc_v2 where col2 is not null   order by id ; """ 
+        qt_orc_v2_5  """ select map_keys(col3) from  complex_orc_v2  order by id; """ 
+        qt_orc_v2_6  """ select struct_element(col4, 1) from  complex_orc_v2  where id >=7 order by id; """ 
+        qt_orc_v2_7  """ select id,count(col2) from  complex_orc_v2  group by id order by id desc limit 2; """ 
+
+
+
+
+    }
+}
+
+/*
+schema :
+    id                      int                                         
+    col2                    array<array<array<array<array<int>>>>>                      
+    col3                    map<array<float>,map<int,map<int,float>>>                           
+    col4                    struct<x:array<int>,y:array<double>,z:map<boolean,string>>                          
+    col5                    map<int,map<int,map<int,map<int,map<float,map<double,struct<x:int,y:array<double>>>>>>>>                            
+    col6                    struct<xx:array<int>,yy:array<map<double,float>>,zz:struct<xxx:struct<xxxx:struct<xxxxx:decimal(13,2)>>>>
+
+*/
\ No newline at end of file
diff --git a/regression-test/suites/external_table_p2/iceberg/iceberg_position_delete.groovy b/regression-test/suites/external_table_p2/iceberg/iceberg_position_delete.groovy
new file mode 100644
index 00000000000000..4cb497c307897b
--- /dev/null
+++ b/regression-test/suites/external_table_p2/iceberg/iceberg_position_delete.groovy
@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("iceberg_position_delete", "p2,external,iceberg,external_remote,external_remote_iceberg") {
+
+    String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+
+        String catalog_name = "test_external_iceberg_position_delete"
+        String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost")
+        String extHdfsPort = context.config.otherConfigs.get("extHdfsPort")
+        sql """drop catalog if exists ${catalog_name};"""
+        sql """
+            create catalog if not exists ${catalog_name} properties (
+                'type'='iceberg',
+                'iceberg.catalog.type'='hadoop',
+                'warehouse' = 'hdfs://${extHiveHmsHost}:${extHdfsPort}/usr/hive/warehouse/hadoop_catalog'
+            );
+        """
+
+        logger.info("catalog " + catalog_name + " created")
+        sql """switch ${catalog_name};"""
+        logger.info("switched to catalog " + catalog_name)
+        sql """ use multi_catalog;""" 
+
+        qt_gen_data_1 """ select * from iceberg_position_gen_data where  name = 'xyzxxxxxx' and id != 9;""" 
+        qt_gen_data_2 """ select * from iceberg_position_gen_data where id = 1; """
+        qt_gen_data_3 """ select * from iceberg_position_gen_data where id = 5; """ 
+        qt_gen_data_4 """ select * from iceberg_position_gen_data where id = 10; """ 
+        qt_gen_data_5 """ select * from iceberg_position_gen_data where id = 15; """ 
+        qt_gen_data_6 """ select * from iceberg_position_gen_data where id = 2 limit 3;""" 
+        qt_gen_data_7 """ select id from iceberg_position_gen_data where id = 2 limit 3;""" 
+        qt_gen_data_8 """ select id,count(name) from iceberg_position_gen_data where id != 1 group by id order by id ;"""
+        qt_gen_data_9 """ select id from iceberg_position_gen_data where id = 1; """
+        qt_gen_data_10 """ select name from iceberg_position_gen_data where id = 5; """ 
+        qt_gen_data_11 """ select id from iceberg_position_gen_data where id = 10; """ 
+        qt_gen_data_12 """ select name from iceberg_position_gen_data where id = 15;""" 
+        qt_gen_data_13 """ select * from iceberg_position_gen_data where id = 15 and name = 'select xxxxxxxxx';""" 
+        qt_gen_data_14 """ select * from iceberg_position_gen_data where id = 2 and name = 'select xxxxxxxxx' limit 3;""" 
+        qt_gen_data_15 """ select * from iceberg_position_gen_data where id = 7 and name = '12345xxx' limit 3;""" 
+        qt_gen_data_16 """ select * from iceberg_position_gen_data where  name = 'hello world' ;""" 
+        qt_gen_data_17 """ select name from iceberg_position_gen_data where  name = 'hello world' ;""" 
+        qt_gen_data_18 """ select id from iceberg_position_gen_data where  name = 'hello world' ;""" 
+        qt_gen_data_19 """ select count(*) from iceberg_position_gen_data where  name != 'final entryxxxxxx' ;""" 
+        qt_gen_data_20 """ select count(*) from iceberg_position_gen_data; """ 
+
+
+        qt_orc_1 """ select * from iceberg_position_orc where  name = 'xyzxxxxxx' and id != 9;""" 
+        qt_orc_2 """ select * from iceberg_position_orc where id = 1; """
+        qt_orc_3 """ select * from iceberg_position_orc where id = 5; """ 
+        qt_orc_4 """ select * from iceberg_position_orc where id = 10; """ 
+        qt_orc_5 """ select * from iceberg_position_orc where id = 15; """ 
+        qt_orc_6 """ select * from iceberg_position_orc where id = 2 limit 3;""" 
+        qt_orc_7 """ select id from iceberg_position_orc where id = 2 limit 3;""" 
+        qt_orc_8 """ select id,count(name) from iceberg_position_orc where id != 1 group by id order by id ;"""
+        qt_orc_9 """ select id from iceberg_position_orc where id = 1; """
+        qt_orc_10 """ select name from iceberg_position_orc where id = 5; """ 
+        qt_orc_11 """ select id from iceberg_position_orc where id = 10; """ 
+        qt_orc_12 """ select name from iceberg_position_orc where id = 15;""" 
+        qt_orc_13 """ select * from iceberg_position_orc where id = 15 and name = 'select xxxxxxxxx';""" 
+        qt_orc_14 """ select * from iceberg_position_orc where id = 2 and name = 'select xxxxxxxxx' limit 3;""" 
+        qt_orc_15 """ select * from iceberg_position_orc where id = 7 and name = '12345xxx' limit 3;""" 
+        qt_orc_16 """ select * from iceberg_position_orc where  name = 'hello world' ;""" 
+        qt_orc_17 """ select name from iceberg_position_orc where  name = 'hello world' ;""" 
+        qt_orc_18 """ select id from iceberg_position_orc where  name = 'hello world' ;""" 
+        qt_orc_19 """ select count(*) from iceberg_position_orc where  name != 'final entryxxxxxx' ;""" 
+        qt_orc_20 """ select count(*) from iceberg_position_orc; """ 
+
+        qt_parquet_1 """ select * from iceberg_position_parquet where  name = 'xyzxxxxxx' and id != 9;""" 
+        qt_parquet_2 """ select * from iceberg_position_parquet where id = 1; """
+        qt_parquet_3 """ select * from iceberg_position_parquet where id = 5; """ 
+        qt_parquet_4 """ select * from iceberg_position_parquet where id = 10; """ 
+        qt_parquet_5 """ select * from iceberg_position_parquet where id = 15; """ 
+        qt_parquet_6 """ select * from iceberg_position_parquet where id = 2 limit 3;""" 
+        qt_parquet_7 """ select id from iceberg_position_parquet where id = 2 limit 3;""" 
+        qt_parquet_8 """ select id,count(name) from iceberg_position_parquet where id != 1 group by id order by id ;"""
+        qt_parquet_9 """ select id from iceberg_position_parquet where id = 1; """
+        qt_parquet_10 """ select name from iceberg_position_parquet where id = 5; """ 
+        qt_parquet_11 """ select id from iceberg_position_parquet where id = 10; """ 
+        qt_parquet_12 """ select name from iceberg_position_parquet where id = 15;""" 
+        qt_parquet_13 """ select * from iceberg_position_parquet where id = 15 and name = 'select xxxxxxxxx';""" 
+        qt_parquet_14 """ select * from iceberg_position_parquet where id = 2 and name = 'select xxxxxxxxx' limit 3;""" 
+        qt_parquet_15 """ select * from iceberg_position_parquet where id = 7 and name = '12345xxx' limit 3;""" 
+        qt_parquet_16 """ select * from iceberg_position_parquet where  name = 'hello world' ;""" 
+        qt_parquet_17 """ select name from iceberg_position_parquet where  name = 'hello world' ;""" 
+        qt_parquet_18 """ select id from iceberg_position_parquet where  name = 'hello world' ;""" 
+        qt_parquet_19 """ select count(*) from iceberg_position_parquet where  name != 'final entryxxxxxx' ;""" 
+        qt_parquet_20 """ select count(*) from iceberg_position_parquet; """ 
+
+    }
+}
+/*
+
+
+create table iceberg_position_gen_data(
+    id int,
+    name string
+)
+USING iceberg
+TBLPROPERTIES (
+    'format-version' = '2',
+    'write.format.default' = 'orc',
+    'write.update.mode' = 'merge-on-read',
+    'write.merge.mode' = 'merge-on-read',
+    'write.delete.mode' = 'merge-on-read'
+);
+
+INSERT INTO iceberg_position_gen_data VALUES
+(1, "hello world"),
+(2, "select xxxxxxxxx"),
+(3, "example xxxx"),
+(4, "more dataxxx"),
+(5, "another examplexxx"),
+(6, "testxxx"),
+(7, "12345xxx"),
+(8, "abcdefxxxx"),
+(9, "xyzxxxxxx"),
+(10, "inserted dataxxxxx"),
+(11, "SQLxxxxx"),
+(12, "tablexxxx"),
+(13, "rowxxxx"),
+(14, "data entryxxxx"),
+(15, "final entryxxxxxx");
+insert into iceberg_position_gen_data select * from iceberg_position_gen_data;
+insert into iceberg_position_gen_data select * from iceberg_position_gen_data;
+insert into iceberg_position_gen_data select * from iceberg_position_gen_data;
+insert into iceberg_position_gen_data select * from iceberg_position_gen_data;
+insert into iceberg_position_gen_data select * from iceberg_position_gen_data;
+insert into iceberg_position_gen_data select * from iceberg_position_gen_data;
+insert into iceberg_position_gen_data select * from iceberg_position_gen_data;
+insert into iceberg_position_gen_data select * from iceberg_position_gen_data;
+insert into iceberg_position_gen_data select * from iceberg_position_gen_data;
+
+
+
+create table iceberg_position_parquet(
+    id int,
+    name string
+)
+USING iceberg
+TBLPROPERTIES (
+    'format-version' = '2',
+    'write.format.default' = 'parquet',
+    'write.update.mode' = 'merge-on-read',
+    'write.merge.mode' = 'merge-on-read',
+    'write.delete.mode' = 'merge-on-read'
+);
+create table iceberg_position_orc(
+    id int,
+    name string
+)
+USING iceberg
+TBLPROPERTIES (
+    'format-version' = '2',
+    'write.format.default' = 'orc',
+    'write.update.mode' = 'merge-on-read',
+    'write.merge.mode' = 'merge-on-read',
+    'write.delete.mode' = 'merge-on-read'
+);
+
+insert into iceberg_position_parquet select * from iceberg_position_gen_data; 
+insert into iceberg_position_orc select * from iceberg_position_parquet;
+
+
+delete from iceberg_position_gen_data where id = 1;
+delete from iceberg_position_gen_data where id = 5;
+delete from iceberg_position_gen_data where id = 10;
+delete from iceberg_position_gen_data where id = 15;
+
+delete from iceberg_position_parquet where id = 1;
+delete from iceberg_position_parquet where id = 5;
+delete from iceberg_position_parquet where id = 10;
+delete from iceberg_position_parquet where id = 15;
+
+delete from iceberg_position_orc where id = 1;
+delete from iceberg_position_orc where id = 5;
+delete from iceberg_position_orc where id = 10;
+delete from iceberg_position_orc where id = 15;
+*/
+
+
diff --git a/regression-test/suites/external_table_p2/iceberg/iceberg_schema_change.groovy b/regression-test/suites/external_table_p2/iceberg/iceberg_schema_change.groovy
new file mode 100644
index 00000000000000..5e036683595c48
--- /dev/null
+++ b/regression-test/suites/external_table_p2/iceberg/iceberg_schema_change.groovy
@@ -0,0 +1,162 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("iceberg_schema_change", "p2,external,iceberg,external_remote,external_remote_iceberg") {
+
+    String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+
+        String catalog_name = "test_external_iceberg_schema_change"
+        String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost")
+        String extHdfsPort = context.config.otherConfigs.get("extHdfsPort")
+        sql """drop catalog if exists ${catalog_name};"""
+        sql """
+            create catalog if not exists ${catalog_name} properties (
+                'type'='iceberg',
+                'iceberg.catalog.type'='hadoop',
+                'warehouse' = 'hdfs://${extHiveHmsHost}:${extHdfsPort}/usr/hive/warehouse/hadoop_catalog'
+            );
+        """
+
+        logger.info("catalog " + catalog_name + " created")
+        sql """switch ${catalog_name};"""
+        logger.info("switched to catalog " + catalog_name)
+        sql """ use multi_catalog;""" 
+
+
+
+
+
+        qt_parquet_v1_1  """ desc complex_parquet_v1_schema_change ;""" 
+        qt_parquet_v1_2  """ select * from  complex_parquet_v1_schema_change order by id; """ 
+        qt_parquet_v1_3  """ select count(*) from  complex_parquet_v1_schema_change ;""" 
+        qt_parquet_v1_4  """ select id from  complex_parquet_v1_schema_change where col_add +1 = col_add2 order by id;""" 
+        qt_parquet_v1_5  """ select col_add from  complex_parquet_v1_schema_change where col_add is not null   order by col_add ; """ 
+        qt_parquet_v1_6  """ select id from  complex_parquet_v1_schema_change  where col_add + 5 = id order by id; """ 
+        qt_parquet_v1_7  """ select rename_col7 from  complex_parquet_v1_schema_change order by id; """ 
+        qt_parquet_v1_8  """ select col_add2 from  complex_parquet_v1_schema_change  where id >=7 order by id; """ 
+        qt_parquet_v1_9  """ select id,count(col_add) from  complex_parquet_v1_schema_change  group by id order by id desc ; """ 
+        qt_parquet_v1_10  """ select col_add from  complex_parquet_v1_schema_change where col_add -1 = col_add2 order by id; """ 
+
+
+
+        qt_parquet_v2_1  """ desc complex_parquet_v2_schema_change ;""" 
+        qt_parquet_v2_2  """ select * from  complex_parquet_v2_schema_change order by id; """ 
+        qt_parquet_v2_3  """ select count(*) from  complex_parquet_v2_schema_change ;""" 
+        qt_parquet_v2_4  """ select id from  complex_parquet_v2_schema_change where col_add +1 = col_add2 order by id;""" 
+        qt_parquet_v2_5  """ select col_add from  complex_parquet_v2_schema_change where col_add is not null   order by col_add ; """ 
+        qt_parquet_v2_6  """ select id from  complex_parquet_v2_schema_change  where col_add + 5 = id order by id; """ 
+        qt_parquet_v2_7  """ select rename_col7 from  complex_parquet_v2_schema_change order by id; """ 
+        qt_parquet_v2_8  """ select col_add2 from  complex_parquet_v2_schema_change  where id >=7 order by id; """ 
+        qt_parquet_v2_9  """ select id,count(col_add) from  complex_parquet_v2_schema_change  group by id order by id desc ; """ 
+        qt_parquet_v2_10  """ select col_add from  complex_parquet_v2_schema_change where col_add -1 = col_add2 order by id; """ 
+
+
+
+
+        qt_orc_v1_1  """ desc complex_orc_v1_schema_change ;""" 
+        qt_orc_v1_2  """ select * from  complex_orc_v1_schema_change order by id; """ 
+        qt_orc_v1_3  """ select count(*) from  complex_orc_v1_schema_change ;""" 
+        qt_orc_v1_4  """ select id from  complex_orc_v1_schema_change where col_add +1 = col_add2 order by id;""" 
+        qt_orc_v1_5  """ select col_add from  complex_orc_v1_schema_change where col_add is not null   order by col_add ; """ 
+        qt_orc_v1_6  """ select id from  complex_orc_v1_schema_change  where col_add + 5 = id order by id; """ 
+        qt_orc_v1_7  """ select rename_col7 from  complex_orc_v1_schema_change order by id; """ 
+        qt_orc_v1_8  """ select col_add2 from  complex_orc_v1_schema_change  where id >=7 order by id; """ 
+        qt_orc_v1_9  """ select id,count(col_add) from  complex_orc_v1_schema_change  group by id order by id desc ; """ 
+        qt_orc_v1_10  """ select col_add from  complex_orc_v1_schema_change where col_add -1 = col_add2 order by id; """ 
+
+        
+
+        qt_orc_v2_1  """ desc complex_orc_v2_schema_change ;""" 
+        qt_orc_v2_2  """ select * from  complex_orc_v2_schema_change order by id; """ 
+        qt_orc_v2_3  """ select count(*) from  complex_orc_v2_schema_change ;""" 
+        qt_orc_v2_4  """ select id from  complex_orc_v2_schema_change where col_add +1 = col_add2 order by id;""" 
+        qt_orc_v2_5  """ select col_add from  complex_orc_v2_schema_change where col_add is not null   order by col_add ; """ 
+        qt_orc_v2_6  """ select id from  complex_orc_v2_schema_change  where col_add + 5 = id order by id; """ 
+        qt_orc_v2_7  """ select rename_col7 from  complex_orc_v2_schema_change order by id; """ 
+        qt_orc_v2_8  """ select col_add2 from  complex_orc_v2_schema_change  where id >=7 order by id; """ 
+        qt_orc_v2_9  """ select id,count(col_add) from  complex_orc_v2_schema_change  group by id order by id desc ; """ 
+        qt_orc_v2_10  """ select col_add from  complex_orc_v2_schema_change where col_add -1 = col_add2 order by id; """ 
+
+
+
+    }
+}
+/*
+before schema: 
+    id int,
+    col1 array<int>,
+    col2 array<float>,
+    col3 array<decimal(12,4)>,
+    col4 map<int,int>,
+    col5 map<int,float>,
+    col6 map<int,decimal(12,5)>,
+    col7 struct<x:int,y:float,z:decimal(12,5)>,
+    col8 int,
+    col9 float,
+    col10 decimal(12,5),
+    col_del int
+
+
+ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col1.element type bigint;
+ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col2.element type double;
+ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col3.element type decimal(20,4);
+ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col4.value type bigint;
+ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col5.value type double;
+ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col6.value type decimal(20,5);
+ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col7.x type bigint;
+ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col7.y type double;
+ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col7.z type decimal(20,5);
+alter table complex_parquet_v2_schema_change CHANGE COLUMN col8 col8 bigint;
+alter table complex_parquet_v2_schema_change CHANGE COLUMN col9 col9 double;
+alter table complex_parquet_v2_schema_change CHANGE COLUMN col10 col10 decimal(20,5);
+alter table complex_parquet_v2_schema_change drop column col7.z;
+alter table complex_parquet_v2_schema_change add column col7.add double;
+alter table complex_parquet_v2_schema_change change column col7.add first;
+alter table complex_parquet_v2_schema_change rename COLUMN col1 to rename_col1;
+alter table complex_parquet_v2_schema_change rename COLUMN col2 to rename_col2;
+alter table complex_parquet_v2_schema_change rename COLUMN col3 to rename_col3;
+alter table complex_parquet_v2_schema_change rename COLUMN col4 to rename_col4;
+alter table complex_parquet_v2_schema_change rename COLUMN col5 to rename_col5;
+alter table complex_parquet_v2_schema_change rename COLUMN col6 to rename_col6;
+alter table complex_parquet_v2_schema_change rename COLUMN col7 to rename_col7;
+alter table complex_parquet_v2_schema_change rename COLUMN col8 to rename_col8;
+alter table complex_parquet_v2_schema_change rename COLUMN col9 to rename_col9;
+alter table complex_parquet_v2_schema_change rename COLUMN col10 to rename_col10;
+alter table complex_parquet_v2_schema_change drop column col_del;
+alter table complex_parquet_v2_schema_change CHANGE COLUMN rename_col8 first;
+alter table complex_parquet_v2_schema_change CHANGE COLUMN rename_col9 after rename_col8;
+alter table complex_parquet_v2_schema_change CHANGE COLUMN rename_col10 after rename_col9;
+alter table complex_parquet_v2_schema_change add column col_add int;
+alter table complex_parquet_v2_schema_change add column col_add2 int;
+
+after schema:
+    rename_col8             bigint                                      
+    rename_col9             double                                      
+    rename_col10            decimal(20,5)                               
+    id                      int                                         
+    rename_col1             array<bigint>                               
+    rename_col2             array<double>                               
+    rename_col3             array<decimal(20,4)>                        
+    rename_col4             map<int,bigint>                             
+    rename_col5             map<int,double>                             
+    rename_col6             map<int,decimal(20,5)>                      
+    rename_col7             struct<add:double,x:bigint,y:double>                        
+    col_add                 int                                         
+    col_add2                int                                         
+                                
+*/

From 3695ca59852153b9d669c8f11104947b81c12f7f Mon Sep 17 00:00:00 2001
From: yujun <yu.jun.reach@gmail.com>
Date: Sun, 28 Apr 2024 14:46:21 +0800
Subject: [PATCH 094/163]  [improvement](compaction) be do not compact
 invisible version to avoid query error -230 (#28082)

---
 be/src/agent/agent_server.cpp                 |   3 +
 be/src/agent/task_worker_pool.cpp             |  13 +
 be/src/agent/task_worker_pool.h               |   2 +
 be/src/common/config.cpp                      |   7 +
 be/src/common/config.h                        |   7 +
 be/src/olap/full_compaction.cpp               |   2 +-
 be/src/olap/olap_common.h                     |  21 +
 be/src/olap/tablet.cpp                        |  74 ++-
 be/src/olap/tablet.h                          |  12 +
 be/src/olap/tablet_manager.cpp                |  72 ++-
 be/src/olap/tablet_manager.h                  |  19 +-
 be/src/olap/tablet_meta.cpp                   |  10 +
 be/src/olap/tablet_meta.h                     |   1 +
 be/src/olap/task/engine_clone_task.cpp        |   2 +
 .../java/org/apache/doris/common/Config.java  |   6 +-
 .../org/apache/doris/alter/AlterHandler.java  |   3 +-
 .../org/apache/doris/backup/RestoreJob.java   |   3 +-
 .../doris/catalog/CloudTabletStatMgr.java     |  10 +-
 .../java/org/apache/doris/catalog/Env.java    |   8 +-
 .../apache/doris/catalog/MetadataViewer.java  |   2 +-
 .../org/apache/doris/catalog/Replica.java     |  90 ++--
 .../java/org/apache/doris/catalog/Tablet.java |  22 +-
 .../doris/catalog/TabletInvertedIndex.java    |  50 +-
 .../apache/doris/catalog/TabletStatMgr.java   |  11 +-
 .../apache/doris/clone/TabletSchedCtx.java    |  13 +-
 .../apache/doris/clone/TabletScheduler.java   |   6 +-
 .../CloudGlobalTransactionMgr.java            |   3 +-
 .../doris/common/proc/ReplicasProcNode.java   |   5 +-
 .../common/proc/TabletHealthProcDir.java      |   3 +-
 .../doris/common/proc/TabletsProcDir.java     |  11 +-
 .../doris/datasource/InternalCatalog.java     |   5 +-
 ...ector.java => PartitionInfoCollector.java} |  47 +-
 .../apache/doris/master/ReportHandler.java    |  67 ++-
 .../org/apache/doris/system/Diagnoser.java    |   5 +-
 .../org/apache/doris/task/AgentBatchTask.java |  10 +
 .../doris/task/UpdateVisibleVersionTask.java  |  40 ++
 .../transaction/DatabaseTransactionMgr.java   |  24 +-
 .../transaction/GlobalTransactionMgr.java     |   5 +-
 .../GlobalTransactionMgrIface.java            |   4 +-
 .../transaction/PublishVersionDaemon.java     |  30 +-
 .../apache/doris/alter/RollupJobV2Test.java   |  10 +-
 .../doris/alter/SchemaChangeJobV2Test.java    |   6 +-
 .../doris/analysis/ShowReplicaTest.java       |   3 +-
 .../org/apache/doris/catalog/ReplicaTest.java |  21 +-
 .../clone/DiskReblanceWhenSchedulerIdle.java  |   3 +-
 .../doris/clone/RebalancerTestUtil.java       |   5 +-
 .../apache/doris/clone/RepairVersionTest.java |   8 +-
 .../doris/clone/TabletReplicaTooSlowTest.java |   4 +-
 .../doris/clone/TabletSchedCtxTest.java       |  16 +-
 .../apache/doris/planner/QueryPlanTest.java   |  12 +-
 .../DatabaseTransactionMgrTest.java           |   6 +-
 .../transaction/GlobalTransactionMgrTest.java |  29 +-
 gensrc/thrift/AgentService.thrift             |   5 +
 gensrc/thrift/BackendService.thrift           |   5 +-
 gensrc/thrift/MasterService.thrift            |   4 +-
 gensrc/thrift/Types.thrift                    |   1 +
 .../test_compaction_with_visible_version.out  | 448 ++++++++++++++++++
 .../regression/suite/SuiteCluster.groovy      |   3 +
 ...est_compaction_with_visible_version.groovy | 275 +++++++++++
 59 files changed, 1340 insertions(+), 252 deletions(-)
 rename fe/fe-core/src/main/java/org/apache/doris/master/{PartitionInMemoryInfoCollector.java => PartitionInfoCollector.java} (68%)
 create mode 100644 fe/fe-core/src/main/java/org/apache/doris/task/UpdateVisibleVersionTask.java
 create mode 100644 regression-test/data/compaction/test_compaction_with_visible_version.out
 create mode 100644 regression-test/suites/compaction/test_compaction_with_visible_version.groovy

diff --git a/be/src/agent/agent_server.cpp b/be/src/agent/agent_server.cpp
index 3fbf487b7dd531..565b0b373eae12 100644
--- a/be/src/agent/agent_server.cpp
+++ b/be/src/agent/agent_server.cpp
@@ -180,6 +180,9 @@ void AgentServer::start_workers(StorageEngine& engine, ExecEnv* exec_env) {
     _workers[TTaskType::CLEAN_TRASH] = std::make_unique<TaskWorkerPool>(
             "CLEAN_TRASH", 1, [&engine](auto&& task) {return clean_trash_callback(engine, task); });
 
+    _workers[TTaskType::UPDATE_VISIBLE_VERSION] = std::make_unique<TaskWorkerPool>(
+            "UPDATE_VISIBLE_VERSION", 1, [&engine](auto&& task) { return visible_version_callback(engine, task); });
+
     _report_workers.push_back(std::make_unique<ReportWorker>(
             "REPORT_TASK", _master_info, config::report_task_interval_seconds, [&master_info = _master_info] { report_task_callback(master_info); }));
 
diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp
index 3b8d63f7a95dbe..745a8286490414 100644
--- a/be/src/agent/task_worker_pool.cpp
+++ b/be/src/agent/task_worker_pool.cpp
@@ -426,6 +426,7 @@ bvar::Adder<uint64_t> ALTER_count("task", "ALTER_TABLE");
 bvar::Adder<uint64_t> CLONE_count("task", "CLONE");
 bvar::Adder<uint64_t> STORAGE_MEDIUM_MIGRATE_count("task", "STORAGE_MEDIUM_MIGRATE");
 bvar::Adder<uint64_t> GC_BINLOG_count("task", "GC_BINLOG");
+bvar::Adder<uint64_t> UPDATE_VISIBLE_VERSION_count("task", "UPDATE_VISIBLE_VERSION");
 
 void add_task_count(const TAgentTaskRequest& task, int n) {
     // clang-format off
@@ -452,6 +453,7 @@ void add_task_count(const TAgentTaskRequest& task, int n) {
     ADD_TASK_COUNT(CLONE)
     ADD_TASK_COUNT(STORAGE_MEDIUM_MIGRATE)
     ADD_TASK_COUNT(GC_BINLOG)
+    ADD_TASK_COUNT(UPDATE_VISIBLE_VERSION)
     #undef ADD_TASK_COUNT
     case TTaskType::REALTIME_PUSH:
     case TTaskType::PUSH:
@@ -1077,6 +1079,11 @@ void report_tablet_callback(StorageEngine& engine, const TMasterInfo& master_inf
         DorisMetrics::instance()->report_all_tablets_requests_skip->increment(1);
         return;
     }
+
+    std::map<int64_t, int64_t> partitions_version;
+    engine.tablet_manager()->get_partitions_visible_version(&partitions_version);
+    request.__set_partitions_version(std::move(partitions_version));
+
     int64_t max_compaction_score =
             std::max(DorisMetrics::instance()->tablet_cumulative_max_compaction_score->value(),
                      DorisMetrics::instance()->tablet_base_max_compaction_score->value());
@@ -1927,6 +1934,12 @@ void gc_binlog_callback(StorageEngine& engine, const TAgentTaskRequest& req) {
     engine.gc_binlogs(gc_tablet_infos);
 }
 
+void visible_version_callback(StorageEngine& engine, const TAgentTaskRequest& req) {
+    const TVisibleVersionReq& visible_version_req = req.visible_version_req;
+    engine.tablet_manager()->update_partitions_visible_version(
+            visible_version_req.partition_version);
+}
+
 void clone_callback(StorageEngine& engine, const TMasterInfo& master_info,
                     const TAgentTaskRequest& req) {
     const auto& clone_req = req.clone_req;
diff --git a/be/src/agent/task_worker_pool.h b/be/src/agent/task_worker_pool.h
index 10c0bcce4efd10..4b9a2b6d8dc7e5 100644
--- a/be/src/agent/task_worker_pool.h
+++ b/be/src/agent/task_worker_pool.h
@@ -176,6 +176,8 @@ void gc_binlog_callback(StorageEngine& engine, const TAgentTaskRequest& req);
 
 void clean_trash_callback(StorageEngine& engine, const TAgentTaskRequest& req);
 
+void visible_version_callback(StorageEngine& engine, const TAgentTaskRequest& req);
+
 void report_task_callback(const TMasterInfo& master_info);
 
 void report_disk_callback(StorageEngine& engine, const TMasterInfo& master_info);
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 0c257e48a8f452..3dd170557c907d 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -429,6 +429,13 @@ DEFINE_Validator(compaction_task_num_per_fast_disk,
 // How many rounds of cumulative compaction for each round of base compaction when compaction tasks generation.
 DEFINE_mInt32(cumulative_compaction_rounds_for_each_base_compaction_round, "9");
 
+// Not compact the invisible versions, but with some limitations:
+// if not timeout, keep no more than compaction_keep_invisible_version_max_count versions;
+// if timeout, keep no more than compaction_keep_invisible_version_min_count versions.
+DEFINE_mInt32(compaction_keep_invisible_version_timeout_sec, "1800");
+DEFINE_mInt32(compaction_keep_invisible_version_min_count, "50");
+DEFINE_mInt32(compaction_keep_invisible_version_max_count, "500");
+
 // Threshold to logging compaction trace, in seconds.
 DEFINE_mInt32(base_compaction_trace_threshold, "60");
 DEFINE_mInt32(cumulative_compaction_trace_threshold, "10");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index e1ec93ff63f768..f67eb6ccf7ace8 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -474,6 +474,13 @@ DECLARE_mInt32(compaction_task_num_per_fast_disk);
 // How many rounds of cumulative compaction for each round of base compaction when compaction tasks generation.
 DECLARE_mInt32(cumulative_compaction_rounds_for_each_base_compaction_round);
 
+// Not compact the invisible versions, but with some limitations:
+// if not timeout, keep no more than compaction_keep_invisible_version_max_count versions;
+// if timeout, keep no more than compaction_keep_invisible_version_min_count versions.
+DECLARE_mInt32(compaction_keep_invisible_version_timeout_sec);
+DECLARE_mInt32(compaction_keep_invisible_version_min_count);
+DECLARE_mInt32(compaction_keep_invisible_version_max_count);
+
 // Threshold to logging compaction trace, in seconds.
 DECLARE_mInt32(base_compaction_trace_threshold);
 DECLARE_mInt32(cumulative_compaction_trace_threshold);
diff --git a/be/src/olap/full_compaction.cpp b/be/src/olap/full_compaction.cpp
index 8a2712c38b5507..9d675f731924c1 100644
--- a/be/src/olap/full_compaction.cpp
+++ b/be/src/olap/full_compaction.cpp
@@ -134,7 +134,7 @@ Status FullCompaction::_check_all_version(const std::vector<RowsetSharedPtr>& ro
                 "Full compaction rowsets' versions not equal to all exist rowsets' versions. "
                 "full compaction rowsets max version={}-{}"
                 ", current rowsets max version={}-{}"
-                "full compaction rowsets min version={}-{}, current rowsets min version=0-1",
+                ", full compaction rowsets min version={}-{}, current rowsets min version=0-1",
                 last_rowset->start_version(), last_rowset->end_version(), max_version.first,
                 max_version.second, first_rowset->start_version(), first_rowset->end_version());
     }
diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h
index 782ebb5a60fc55..4d0c035bb0facd 100644
--- a/be/src/olap/olap_common.h
+++ b/be/src/olap/olap_common.h
@@ -20,6 +20,7 @@
 #include <gen_cpp/Types_types.h>
 #include <netinet/in.h>
 
+#include <atomic>
 #include <charconv>
 #include <cstdint>
 #include <functional>
@@ -36,6 +37,7 @@
 #include "io/io_common.h"
 #include "olap/olap_define.h"
 #include "util/hash_util.hpp"
+#include "util/time.h"
 #include "util/uid_util.h"
 
 namespace doris {
@@ -509,6 +511,25 @@ struct RidAndPos {
 
 using PartialUpdateReadPlan = std::map<RowsetId, std::map<uint32_t, std::vector<RidAndPos>>>;
 
+// used for controll compaction
+struct VersionWithTime {
+    std::atomic<int64_t> version;
+    int64_t update_ts;
+
+    VersionWithTime() : version(0), update_ts(MonotonicMillis()) {}
+
+    void update_version_monoto(int64_t new_version) {
+        int64_t cur_version = version.load(std::memory_order_relaxed);
+        while (cur_version < new_version) {
+            if (version.compare_exchange_strong(cur_version, new_version, std::memory_order_relaxed,
+                                                std::memory_order_relaxed)) {
+                update_ts = MonotonicMillis();
+                break;
+            }
+        }
+    }
+};
+
 } // namespace doris
 
 // This intended to be a "good" hash function.  It may change from time to time.
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 358292463fc3fa..ab6471df6be025 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -1074,25 +1074,41 @@ std::vector<RowsetSharedPtr> Tablet::pick_candidate_rowsets_to_cumulative_compac
     if (_cumulative_point == K_INVALID_CUMULATIVE_POINT) {
         return candidate_rowsets;
     }
-    {
-        std::shared_lock rlock(_meta_lock);
-        for (const auto& [version, rs] : _rs_version_map) {
-            if (version.first >= _cumulative_point && rs->is_local()) {
-                candidate_rowsets.push_back(rs);
-            }
-        }
-    }
-    std::sort(candidate_rowsets.begin(), candidate_rowsets.end(), Rowset::comparator);
-    return candidate_rowsets;
+    return _pick_visible_rowsets_to_compaction(_cumulative_point,
+                                               std::numeric_limits<int64_t>::max());
 }
 
 std::vector<RowsetSharedPtr> Tablet::pick_candidate_rowsets_to_base_compaction() {
+    return _pick_visible_rowsets_to_compaction(std::numeric_limits<int64_t>::min(),
+                                               _cumulative_point - 1);
+}
+
+std::vector<RowsetSharedPtr> Tablet::_pick_visible_rowsets_to_compaction(
+        int64_t min_start_version, int64_t max_start_version) {
+    auto [visible_version, update_ts] = get_visible_version_and_time();
+    bool update_time_long = MonotonicMillis() - update_ts >
+                            config::compaction_keep_invisible_version_timeout_sec * 1000L;
+    int32_t keep_invisible_version_limit =
+            update_time_long ? config::compaction_keep_invisible_version_min_count
+                             : config::compaction_keep_invisible_version_max_count;
+
     std::vector<RowsetSharedPtr> candidate_rowsets;
     {
         std::shared_lock rlock(_meta_lock);
         for (const auto& [version, rs] : _rs_version_map) {
-            // Do compaction on local rowsets only.
-            if (version.first < _cumulative_point && rs->is_local()) {
+            int64_t version_start = version.first;
+            // rowset is remote or rowset is not in given range
+            if (!rs->is_local() || version_start < min_start_version ||
+                version_start > max_start_version) {
+                continue;
+            }
+
+            // can compact, met one of the conditions:
+            // 1. had been visible;
+            // 2. exceeds the limit of keep invisible versions.
+            int64_t version_end = version.second;
+            if (version_end <= visible_version ||
+                version_end > visible_version + keep_invisible_version_limit) {
                 candidate_rowsets.push_back(rs);
             }
         }
@@ -1115,13 +1131,8 @@ std::vector<RowsetSharedPtr> Tablet::pick_candidate_rowsets_to_full_compaction()
 
 std::vector<RowsetSharedPtr> Tablet::pick_first_consecutive_empty_rowsets(int limit) {
     std::vector<RowsetSharedPtr> consecutive_empty_rowsets;
-    std::vector<RowsetSharedPtr> candidate_rowsets;
-    traverse_rowsets([&candidate_rowsets, this](const auto& rs) {
-        if (rs->is_local() && rs->start_version() >= _cumulative_point) {
-            candidate_rowsets.emplace_back(rs);
-        }
-    });
-    std::sort(candidate_rowsets.begin(), candidate_rowsets.end(), Rowset::comparator);
+    std::vector<RowsetSharedPtr> candidate_rowsets =
+            pick_candidate_rowsets_to_cumulative_compaction();
     int len = candidate_rowsets.size();
     for (int i = 0; i < len - 1; ++i) {
         auto rowset = candidate_rowsets[i];
@@ -1190,6 +1201,19 @@ std::vector<RowsetSharedPtr> Tablet::pick_candidate_rowsets_to_build_inverted_in
     return candidate_rowsets;
 }
 
+std::tuple<int64_t, int64_t> Tablet::get_visible_version_and_time() const {
+    // some old tablet has bug, its partition_id is 0, fe couldn't update its visible version.
+    // so let this tablet's visible version become int64 max.
+    auto version_info = std::atomic_load_explicit(&_visible_version, std::memory_order_relaxed);
+    if (version_info != nullptr && partition_id() != 0) {
+        return std::make_tuple(version_info->version.load(std::memory_order_relaxed),
+                               version_info->update_ts);
+    } else {
+        return std::make_tuple(std::numeric_limits<int64_t>::max(),
+                               std::numeric_limits<int64_t>::max());
+    }
+}
+
 // For http compaction action
 void Tablet::get_compaction_status(std::string* json_result) {
     rapidjson::Document root;
@@ -1526,13 +1550,23 @@ void Tablet::build_tablet_report_info(TTabletInfo* tablet_info,
         }
     });
 
+    int64_t total_version_count = _tablet_meta->version_count();
+
+    // For compatibility.
+    // For old fe, it wouldn't send visible version request to be, then be's visible version is always 0.
+    // Let visible_version_count set to total_version_count in be's report.
+    int64_t visible_version_count = total_version_count;
+    if (auto [visible_version, _] = get_visible_version_and_time(); visible_version > 0) {
+        visible_version_count = _tablet_meta->version_count_cross_with_range({0, visible_version});
+    }
     // the report version is the largest continuous version, same logic as in FE side
     tablet_info->__set_version(cversion.second);
     // Useless but it is a required filed in TTabletInfo
     tablet_info->__set_version_hash(0);
     tablet_info->__set_partition_id(_tablet_meta->partition_id());
     tablet_info->__set_storage_medium(_data_dir->storage_medium());
-    tablet_info->__set_version_count(_tablet_meta->version_count());
+    tablet_info->__set_total_version_count(total_version_count);
+    tablet_info->__set_visible_version_count(visible_version_count);
     tablet_info->__set_path_hash(_data_dir->path_hash());
     tablet_info->__set_is_in_memory(_tablet_meta->tablet_schema()->is_in_memory());
     tablet_info->__set_replica_id(replica_id());
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index 678a519cfaeb72..04fee1fb3caa3b 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -311,6 +311,12 @@ class Tablet final : public BaseTablet {
 
     std::string get_last_base_compaction_status() { return _last_base_compaction_status; }
 
+    std::tuple<int64_t, int64_t> get_visible_version_and_time() const;
+
+    void set_visible_version(const std::shared_ptr<const VersionWithTime>& visible_version) {
+        std::atomic_store_explicit(&_visible_version, visible_version, std::memory_order_relaxed);
+    }
+
     inline bool all_beta() const {
         std::shared_lock rdlock(_meta_lock);
         return _tablet_meta->all_beta();
@@ -484,6 +490,9 @@ class Tablet final : public BaseTablet {
             std::shared_ptr<CumulativeCompactionPolicy> cumulative_compaction_policy);
     uint32_t _calc_base_compaction_score() const;
 
+    std::vector<RowsetSharedPtr> _pick_visible_rowsets_to_compaction(int64_t min_start_version,
+                                                                     int64_t max_start_version);
+
     void _init_context_common_fields(RowsetWriterContext& context);
 
     ////////////////////////////////////////////////////////////////////////////
@@ -578,6 +587,9 @@ class Tablet final : public BaseTablet {
 
     int64_t _io_error_times = 0;
 
+    // partition's visible version. it sync from fe, but not real-time.
+    std::shared_ptr<const VersionWithTime> _visible_version;
+
     std::atomic_bool _is_full_compaction_running = false;
 };
 
diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp
index d8a23430363c8f..d3fdc52e06c8e6 100644
--- a/be/src/olap/tablet_manager.cpp
+++ b/be/src/olap/tablet_manager.cpp
@@ -1016,13 +1016,14 @@ void TabletManager::build_all_report_tablets_info(std::map<TTabletId, TTablet>*
             tablet_info.__set_transaction_ids(find->second);
             expire_txn_map.erase(find);
         }
-        tablet_version_num_hist.add(tablet->version_count());
+        tablet_version_num_hist.add(tablet_info.total_version_count);
         auto& t_tablet_stat = local_cache->emplace_back();
         t_tablet_stat.__set_tablet_id(tablet_info.tablet_id);
         t_tablet_stat.__set_data_size(tablet_info.data_size);
         t_tablet_stat.__set_remote_data_size(tablet_info.remote_data_size);
-        t_tablet_stat.__set_row_num(tablet_info.row_count);
-        t_tablet_stat.__set_version_count(tablet_info.version_count);
+        t_tablet_stat.__set_row_count(tablet_info.row_count);
+        t_tablet_stat.__set_total_version_count(tablet_info.total_version_count);
+        t_tablet_stat.__set_visible_version_count(tablet_info.visible_version_count);
     };
     for_each_tablet(handler, filter_all_tablets);
 
@@ -1257,9 +1258,29 @@ void TabletManager::update_root_path_info(std::map<string, DataDirInfo>* path_ma
 
 void TabletManager::get_partition_related_tablets(int64_t partition_id,
                                                   std::set<TabletInfo>* tablet_infos) {
-    std::shared_lock rdlock(_partition_tablet_map_lock);
-    if (_partition_tablet_map.find(partition_id) != _partition_tablet_map.end()) {
-        *tablet_infos = _partition_tablet_map[partition_id];
+    std::shared_lock rdlock(_partitions_lock);
+    auto it = _partitions.find(partition_id);
+    if (it != _partitions.end()) {
+        *tablet_infos = it->second.tablets;
+    }
+}
+
+void TabletManager::get_partitions_visible_version(std::map<int64_t, int64_t>* partitions_version) {
+    std::shared_lock rdlock(_partitions_lock);
+    for (const auto& [partition_id, partition] : _partitions) {
+        partitions_version->insert(
+                {partition_id, partition.visible_version->version.load(std::memory_order_relaxed)});
+    }
+}
+
+void TabletManager::update_partitions_visible_version(
+        const std::map<int64_t, int64_t>& partitions_version) {
+    std::shared_lock rdlock(_partitions_lock);
+    for (auto [partition_id, version] : partitions_version) {
+        auto it = _partitions.find(partition_id);
+        if (it != _partitions.end()) {
+            it->second.visible_version->update_version_monoto(version);
+        }
     }
 }
 
@@ -1348,15 +1369,25 @@ TabletSharedPtr TabletManager::_get_tablet_unlocked(TTabletId tablet_id) {
 }
 
 void TabletManager::_add_tablet_to_partition(const TabletSharedPtr& tablet) {
-    std::lock_guard<std::shared_mutex> wrlock(_partition_tablet_map_lock);
-    _partition_tablet_map[tablet->partition_id()].insert(tablet->get_tablet_info());
+    std::lock_guard<std::shared_mutex> wrlock(_partitions_lock);
+    auto& partition = _partitions[tablet->partition_id()];
+    partition.tablets.insert(tablet->get_tablet_info());
+    tablet->set_visible_version(
+            std::static_pointer_cast<const VersionWithTime>(partition.visible_version));
 }
 
 void TabletManager::_remove_tablet_from_partition(const TabletSharedPtr& tablet) {
-    std::lock_guard<std::shared_mutex> wrlock(_partition_tablet_map_lock);
-    _partition_tablet_map[tablet->partition_id()].erase(tablet->get_tablet_info());
-    if (_partition_tablet_map[tablet->partition_id()].empty()) {
-        _partition_tablet_map.erase(tablet->partition_id());
+    tablet->set_visible_version(nullptr);
+    std::lock_guard<std::shared_mutex> wrlock(_partitions_lock);
+    auto it = _partitions.find(tablet->partition_id());
+    if (it == _partitions.end()) {
+        return;
+    }
+
+    auto& tablets = it->second.tablets;
+    tablets.erase(tablet->get_tablet_info());
+    if (tablets.empty()) {
+        _partitions.erase(it);
     }
 }
 
@@ -1393,22 +1424,23 @@ void TabletManager::get_tablets_distribution_on_different_disks(
         std::map<int64_t, std::map<DataDir*, int64_t>>& tablets_num_on_disk,
         std::map<int64_t, std::map<DataDir*, std::vector<TabletSize>>>& tablets_info_on_disk) {
     std::vector<DataDir*> data_dirs = _engine.get_stores();
-    std::map<int64_t, std::set<TabletInfo>> partition_tablet_map;
+    std::map<int64_t, Partition> partitions;
     {
-        // When drop tablet, '_partition_tablet_map_lock' is locked in 'tablet_shard_lock'.
-        // To avoid locking 'tablet_shard_lock' in '_partition_tablet_map_lock', we lock and
-        // copy _partition_tablet_map here.
-        std::shared_lock rdlock(_partition_tablet_map_lock);
-        partition_tablet_map = _partition_tablet_map;
+        // When drop tablet, '_partitions_lock' is locked in 'tablet_shard_lock'.
+        // To avoid locking 'tablet_shard_lock' in '_partitions_lock', we lock and
+        // copy _partitions here.
+        std::shared_lock rdlock(_partitions_lock);
+        partitions = _partitions;
     }
-    for (auto& [partition_id, tablet_infos] : partition_tablet_map) {
+
+    for (const auto& [partition_id, partition] : partitions) {
         std::map<DataDir*, int64_t> tablets_num;
         std::map<DataDir*, std::vector<TabletSize>> tablets_info;
         for (auto* data_dir : data_dirs) {
             tablets_num[data_dir] = 0;
         }
 
-        for (const auto& tablet_info : tablet_infos) {
+        for (const auto& tablet_info : partition.tablets) {
             // get_tablet() will hold 'tablet_shard_lock'
             TabletSharedPtr tablet = get_tablet(tablet_info.tablet_id);
             if (tablet == nullptr) {
diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h
index 9f8164b853f44d..b090277677b5fb 100644
--- a/be/src/olap/tablet_manager.h
+++ b/be/src/olap/tablet_manager.h
@@ -144,6 +144,10 @@ class TabletManager {
 
     void get_partition_related_tablets(int64_t partition_id, std::set<TabletInfo>* tablet_infos);
 
+    void get_partitions_visible_version(std::map<int64_t, int64_t>* partitions_version);
+
+    void update_partitions_visible_version(const std::map<int64_t, int64_t>& partitions_version);
+
     void do_tablet_meta_checkpoint(DataDir* data_dir);
 
     void obtain_specific_quantity_tablets(std::vector<TabletInfo>& tablets_info, int64_t num);
@@ -229,22 +233,27 @@ class TabletManager {
         std::set<int64_t> tablets_under_clone;
     };
 
+    struct Partition {
+        std::set<TabletInfo> tablets;
+        std::shared_ptr<VersionWithTime> visible_version {new VersionWithTime};
+    };
+
     StorageEngine& _engine;
 
     // TODO: memory size of TabletSchema cannot be accurately tracked.
-    // trace the memory use by meta of tablet
     std::shared_ptr<MemTracker> _tablet_meta_mem_tracker;
 
     const int32_t _tablets_shards_size;
     const int32_t _tablets_shards_mask;
     std::vector<tablets_shard> _tablets_shards;
 
-    // Protect _partition_tablet_map, should not be obtained before _tablet_map_lock to avoid dead lock
-    std::shared_mutex _partition_tablet_map_lock;
+    // Protect _partitions, should not be obtained before _tablet_map_lock to avoid dead lock
+    std::shared_mutex _partitions_lock;
+    // partition_id => partition
+    std::map<int64_t, Partition> _partitions;
+
     // Protect _shutdown_tablets, should not be obtained before _tablet_map_lock to avoid dead lock
     std::shared_mutex _shutdown_tablets_lock;
-    // partition_id => tablet_info
-    std::map<int64_t, std::set<TabletInfo>> _partition_tablet_map;
     // the delete tablets. notice only allow function `start_trash_sweep` can erase tablets in _shutdown_tablets
     std::list<TabletSharedPtr> _shutdown_tablets;
     std::mutex _gc_tablets_lock;
diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp
index 434fbaaf2e8921..3cd001fd3e19d4 100644
--- a/be/src/olap/tablet_meta.cpp
+++ b/be/src/olap/tablet_meta.cpp
@@ -751,6 +751,16 @@ Version TabletMeta::max_version() const {
     return max_version;
 }
 
+size_t TabletMeta::version_count_cross_with_range(const Version& range) const {
+    size_t count = 0;
+    for (const auto& rs_meta : _rs_metas) {
+        if (!(range.first > rs_meta->version().second || range.second < rs_meta->version().first)) {
+            count++;
+        }
+    }
+    return count;
+}
+
 Status TabletMeta::add_rs_meta(const RowsetMetaSharedPtr& rs_meta) {
     // check RowsetMeta is valid
     for (auto& rs : _rs_metas) {
diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h
index 0feba660750b7c..12999ed7d31b44 100644
--- a/be/src/olap/tablet_meta.h
+++ b/be/src/olap/tablet_meta.h
@@ -165,6 +165,7 @@ class TabletMeta {
     // Remote disk space occupied by tablet.
     size_t tablet_remote_size() const;
     size_t version_count() const;
+    size_t version_count_cross_with_range(const Version& range) const;
     Version max_version() const;
 
     TabletState tablet_state() const;
diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp
index 94afb9f20fb8b3..1f5f11fa8c0b17 100644
--- a/be/src/olap/task/engine_clone_task.cpp
+++ b/be/src/olap/task/engine_clone_task.cpp
@@ -162,6 +162,8 @@ Status EngineCloneTask::execute() {
     }
     Status st = _do_clone();
     _engine.tablet_manager()->unregister_clone_tablet(_clone_req.tablet_id);
+    _engine.tablet_manager()->update_partitions_visible_version(
+            {{_clone_req.partition_id, _clone_req.version}});
     return st;
 }
 
diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index e2861310faa592..384a858c3fa4d3 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -1475,11 +1475,11 @@ public class Config extends ConfigBase {
     public static int default_max_query_instances = -1;
 
     /*
-     * One master daemon thread will update global partition in memory
-     * info every partition_in_memory_update_interval_secs
+     * One master daemon thread will update global partition info, include in memory and visible version
+     * info every partition_info_update_interval_secs
      */
     @ConfField(mutable = false, masterOnly = true)
-    public static int partition_in_memory_update_interval_secs = 300;
+    public static int partition_info_update_interval_secs = 60;
 
     @ConfField(masterOnly = true)
     public static boolean enable_concurrent_update = false;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/AlterHandler.java b/fe/fe-core/src/main/java/org/apache/doris/alter/AlterHandler.java
index 8d18c687a40773..4c4e1118670cce 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/alter/AlterHandler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/alter/AlterHandler.java
@@ -242,8 +242,7 @@ public void handleFinishAlterTask(AlterReplicaTask task) throws MetaNotFoundExce
                     task.getSignature(), replica, task.getVersion());
             boolean versionChanged = false;
             if (replica.getVersion() < task.getVersion()) {
-                replica.updateVersionInfo(task.getVersion(), replica.getDataSize(), replica.getRemoteDataSize(),
-                        replica.getRowCount());
+                replica.updateVersion(task.getVersion());
                 versionChanged = true;
             }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java
index 10ffb398bc138c..01d61c64cff5c2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java
@@ -1779,8 +1779,7 @@ private Status allTabletCommitted(boolean isReplay) {
                         for (Tablet tablet : idx.getTablets()) {
                             for (Replica replica : tablet.getReplicas()) {
                                 if (!replica.checkVersionCatchUp(part.getVisibleVersion(), false)) {
-                                    replica.updateVersionInfo(part.getVisibleVersion(), replica.getDataSize(),
-                                            replica.getRemoteDataSize(), replica.getRowCount());
+                                    replica.updateVersion(part.getVisibleVersion());
                                 }
                             }
                         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/CloudTabletStatMgr.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/CloudTabletStatMgr.java
index 0d16d3bbc55ae8..e2375131dc5b1a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/CloudTabletStatMgr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/CloudTabletStatMgr.java
@@ -219,10 +219,14 @@ private void updateTabletStat(GetTabletStatsResponse response) {
         for (TabletStatsPB stat : response.getTabletStatsList()) {
             if (invertedIndex.getTabletMeta(stat.getIdx().getTabletId()) != null) {
                 List<Replica> replicas = invertedIndex.getReplicasByTabletId(stat.getIdx().getTabletId());
-                if (replicas != null && !replicas.isEmpty() && replicas.get(0) != null) {
-                    replicas.get(0).updateCloudStat(stat.getDataSize(), stat.getNumRowsets(),
-                            stat.getNumSegments(), stat.getNumRows());
+                if (replicas == null || replicas.isEmpty() || replicas.get(0) == null) {
+                    continue;
                 }
+                Replica replica = replicas.get(0);
+                replica.setDataSize(stat.getDataSize());
+                replica.setRowsetCount(stat.getNumRowsets());
+                replica.setSegmentCount(stat.getNumSegments());
+                replica.setRowCount(stat.getNumRows());
             }
         }
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
index 275c4d1ff42171..e4914ff3491fbf 100755
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
@@ -174,7 +174,7 @@
 import org.apache.doris.load.sync.SyncJobManager;
 import org.apache.doris.master.Checkpoint;
 import org.apache.doris.master.MetaHelper;
-import org.apache.doris.master.PartitionInMemoryInfoCollector;
+import org.apache.doris.master.PartitionInfoCollector;
 import org.apache.doris.meta.MetaContext;
 import org.apache.doris.metric.MetricRepo;
 import org.apache.doris.mtmv.MTMVAlterOpType;
@@ -368,7 +368,7 @@ public class Env {
     private PublishVersionDaemon publishVersionDaemon;
     private DeleteHandler deleteHandler;
     private DbUsedDataQuotaInfoCollector dbUsedDataQuotaInfoCollector;
-    private PartitionInMemoryInfoCollector partitionInMemoryInfoCollector;
+    private PartitionInfoCollector partitionInfoCollector;
     private CooldownConfHandler cooldownConfHandler;
     private ExternalMetaIdMgr externalMetaIdMgr;
     private MetastoreEventsProcessor metastoreEventsProcessor;
@@ -665,7 +665,7 @@ public Env(boolean isCheckpointCatalog) {
         this.publishVersionDaemon = new PublishVersionDaemon();
         this.deleteHandler = new DeleteHandler();
         this.dbUsedDataQuotaInfoCollector = new DbUsedDataQuotaInfoCollector();
-        this.partitionInMemoryInfoCollector = new PartitionInMemoryInfoCollector();
+        this.partitionInfoCollector = new PartitionInfoCollector();
         if (Config.enable_storage_policy) {
             this.cooldownConfHandler = new CooldownConfHandler();
         }
@@ -1695,7 +1695,7 @@ protected void startMasterOnlyDaemonThreads() {
         // start daemon thread to update db used data quota for db txn manager periodically
         dbUsedDataQuotaInfoCollector.start();
         // start daemon thread to update global partition in memory information periodically
-        partitionInMemoryInfoCollector.start();
+        partitionInfoCollector.start();
         if (Config.enable_storage_policy) {
             cooldownConfHandler.start();
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/MetadataViewer.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/MetadataViewer.java
index 333b7b146acf27..1f1e2599d9c064 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/MetadataViewer.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/MetadataViewer.java
@@ -110,7 +110,7 @@ private static List<List<String>> getTabletStatus(String dbName, String tblName,
                             row.add(String.valueOf(replica.getLastSuccessVersion()));
                             row.add(String.valueOf(visibleVersion));
                             row.add(String.valueOf(replica.getSchemaHash()));
-                            row.add(String.valueOf(replica.getVersionCount()));
+                            row.add(String.valueOf(replica.getTotalVersionCount()));
                             row.add(String.valueOf(replica.isBad()));
                             row.add(String.valueOf(replica.isUserDrop()));
                             row.add(replica.getState().name());
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Replica.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Replica.java
index e7831125ca3fda..2d8e7d99af211f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Replica.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Replica.java
@@ -22,6 +22,7 @@
 import org.apache.doris.common.io.Writable;
 import org.apache.doris.common.util.DebugPointUtil;
 import org.apache.doris.system.Backend;
+import org.apache.doris.thrift.TTabletInfo;
 import org.apache.doris.thrift.TUniqueId;
 
 import com.google.gson.annotations.SerializedName;
@@ -122,7 +123,8 @@ public static class ReplicaContext {
     @SerializedName(value = "lastSuccessVersionHash")
     private long lastSuccessVersionHash = 0L;
 
-    private volatile long versionCount = -1;
+    private volatile long totalVersionCount = -1;
+    private volatile long visibleVersionCount = -1;
 
     private long pathHash = -1;
 
@@ -252,22 +254,42 @@ public long getDataSize() {
         return dataSize;
     }
 
+    public void setDataSize(long dataSize) {
+        this.dataSize = dataSize;
+    }
+
     public long getRemoteDataSize() {
         return remoteDataSize;
     }
 
+    public void setRemoteDataSize(long remoteDataSize) {
+        this.remoteDataSize = remoteDataSize;
+    }
+
     public long getRowCount() {
         return rowCount;
     }
 
+    public void setRowCount(long rowCount) {
+        this.rowCount = rowCount;
+    }
+
     public long getSegmentCount() {
         return segmentCount;
     }
 
+    public void setSegmentCount(long segmentCount) {
+        this.segmentCount = segmentCount;
+    }
+
     public long getRowsetCount() {
         return rowsetCount;
     }
 
+    public void setRowsetCount(long rowsetCount) {
+        this.rowsetCount = rowsetCount;
+    }
+
     public long getLastFailedVersion() {
         return lastFailedVersion;
     }
@@ -348,35 +370,24 @@ public void setFurtherRepairWatermarkTxnTd(long furtherRepairWatermarkTxnTd) {
         this.furtherRepairWatermarkTxnTd = furtherRepairWatermarkTxnTd;
     }
 
-    // for compatibility
-    public synchronized void updateStat(long dataSize, long rowNum) {
-        this.dataSize = dataSize;
-        this.rowCount = rowNum;
-    }
-
-    public synchronized void updateStat(long dataSize, long remoteDataSize, long rowNum, long versionCount) {
-        this.dataSize = dataSize;
-        this.remoteDataSize = remoteDataSize;
-        this.rowCount = rowNum;
-        this.versionCount = versionCount;
-    }
-
-    public synchronized void updateCloudStat(long dataSize, long rowsetNum, long segmentNum, long rowNum) {
-        this.dataSize = dataSize;
-        this.rowsetCount = rowsetNum;
-        this.segmentCount = segmentNum;
-        this.rowCount = rowNum;
+    public void updateWithReport(TTabletInfo backendReplica) {
+        updateVersion(backendReplica.getVersion());
+        setDataSize(backendReplica.getDataSize());
+        setRemoteDataSize(backendReplica.getRemoteDataSize());
+        setRowCount(backendReplica.getRowCount());
+        setTotalVersionCount(backendReplica.getTotalVersionCount());
+        setVisibleVersionCount(
+                backendReplica.isSetVisibleVersionCount() ? backendReplica.getVisibleVersionCount()
+                        : backendReplica.getTotalVersionCount());
     }
 
-    public synchronized void updateVersionInfo(long newVersion, long newDataSize, long newRemoteDataSize,
-                                               long newRowCount) {
-        updateReplicaInfo(newVersion, this.lastFailedVersion, this.lastSuccessVersion, newDataSize, newRemoteDataSize,
-                newRowCount);
+    public synchronized void updateVersion(long newVersion) {
+        updateReplicaVersion(newVersion, this.lastFailedVersion, this.lastSuccessVersion);
     }
 
-    public synchronized void updateVersionWithFailedInfo(
+    public synchronized void updateVersionWithFailed(
             long newVersion, long lastFailedVersion, long lastSuccessVersion) {
-        updateReplicaInfo(newVersion, lastFailedVersion, lastSuccessVersion, dataSize, remoteDataSize, rowCount);
+        updateReplicaVersion(newVersion, lastFailedVersion, lastSuccessVersion);
     }
 
     public synchronized void adminUpdateVersionInfo(Long version, Long lastFailedVersion, Long lastSuccessVersion,
@@ -439,9 +450,7 @@ public synchronized void adminUpdateVersionInfo(Long version, Long lastFailedVer
      *      the V(hash) equals to LSV(hash), and V equals to LFV, but LFV hash is 0 or some unknown number.
      *      We just reset the LFV(hash) to recovery this replica.
      */
-    private void updateReplicaInfo(long newVersion,
-            long lastFailedVersion, long lastSuccessVersion,
-            long newDataSize, long newRemoteDataSize, long newRowCount) {
+    private void updateReplicaVersion(long newVersion, long lastFailedVersion, long lastSuccessVersion) {
         if (LOG.isDebugEnabled()) {
             LOG.debug("before update: {}", this.toString());
         }
@@ -466,9 +475,6 @@ private void updateReplicaInfo(long newVersion,
         long oldLastFailedVersion = this.lastFailedVersion;
 
         this.version = newVersion;
-        this.dataSize = newDataSize;
-        this.remoteDataSize = newRemoteDataSize;
-        this.rowCount = newRowCount;
 
         // just check it
         if (lastSuccessVersion <= this.version) {
@@ -531,7 +537,7 @@ private void updateReplicaInfo(long newVersion,
     }
 
     public synchronized void updateLastFailedVersion(long lastFailedVersion) {
-        updateReplicaInfo(this.version, lastFailedVersion, this.lastSuccessVersion, dataSize, remoteDataSize, rowCount);
+        updateReplicaVersion(this.version, lastFailedVersion, this.lastSuccessVersion);
     }
 
     /*
@@ -576,16 +582,28 @@ public boolean tooSlow() {
         return state == ReplicaState.COMPACTION_TOO_SLOW;
     }
 
+    public boolean tooBigVersionCount() {
+        return visibleVersionCount >= Config.min_version_count_indicate_replica_compaction_too_slow;
+    }
+
     public boolean isNormal() {
         return state == ReplicaState.NORMAL;
     }
 
-    public long getVersionCount() {
-        return versionCount;
+    public long getTotalVersionCount() {
+        return totalVersionCount;
+    }
+
+    public void setTotalVersionCount(long totalVersionCount) {
+        this.totalVersionCount = totalVersionCount;
+    }
+
+    public long getVisibleVersionCount() {
+        return visibleVersionCount;
     }
 
-    public void setVersionCount(long versionCount) {
-        this.versionCount = versionCount;
+    public void setVisibleVersionCount(long visibleVersionCount) {
+        this.visibleVersionCount = visibleVersionCount;
     }
 
     public boolean checkVersionRegressive(long newVersion) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
index 376f7649c684c3..cdf59fe41b148d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
@@ -294,16 +294,16 @@ public List<Replica> getQueryableReplicas(long visibleVersion, boolean allowFail
         }
 
         if (Config.skip_compaction_slower_replica && allQueryableReplica.size() > 1) {
-            long minVersionCount = Long.MAX_VALUE;
-            for (Replica replica : allQueryableReplica) {
-                if (replica.getVersionCount() != -1 && replica.getVersionCount() < minVersionCount) {
-                    minVersionCount = replica.getVersionCount();
-                }
+            long minVersionCount = allQueryableReplica.stream().mapToLong(Replica::getVisibleVersionCount)
+                    .filter(count -> count != -1).min().orElse(Long.MAX_VALUE);
+            long maxVersionCount = Config.min_version_count_indicate_replica_compaction_too_slow;
+            if (minVersionCount != Long.MAX_VALUE) {
+                maxVersionCount = Math.max(maxVersionCount, minVersionCount * QUERYABLE_TIMES_OF_MIN_VERSION_COUNT);
             }
-            final long finalMinVersionCount = minVersionCount;
-            return allQueryableReplica.stream().filter(replica -> replica.getVersionCount() == -1
-                            || replica.getVersionCount() < Config.min_version_count_indicate_replica_compaction_too_slow
-                            || replica.getVersionCount() < finalMinVersionCount * QUERYABLE_TIMES_OF_MIN_VERSION_COUNT)
+
+            final long finalMaxVersionCount = maxVersionCount;
+            return allQueryableReplica.stream()
+                    .filter(replica -> replica.getVisibleVersionCount() < finalMaxVersionCount)
                     .collect(Collectors.toList());
         }
         return allQueryableReplica;
@@ -533,7 +533,7 @@ public Pair<TabletStatus, TabletSchedCtx.Priority> getHealthStatusWithPriority(S
 
                 if (versionCompleted) {
                     stable++;
-                    versions.add(replica.getVersionCount());
+                    versions.add(replica.getVisibleVersionCount());
 
                     allocNum = stableVersionCompleteAllocMap.getOrDefault(backend.getLocationTag(), (short) 0);
                     stableVersionCompleteAllocMap.put(backend.getLocationTag(), (short) (allocNum + 1));
@@ -624,7 +624,7 @@ public Pair<TabletStatus, TabletSchedCtx.Priority> getHealthStatusWithPriority(S
             // get the max version diff
             long delta = versions.get(versions.size() - 1) - versions.get(0);
             double ratio = (double) delta / versions.get(versions.size() - 1);
-            if (versions.get(versions.size() - 1) > Config.min_version_count_indicate_replica_compaction_too_slow
+            if (versions.get(versions.size() - 1) >= Config.min_version_count_indicate_replica_compaction_too_slow
                     && ratio > Config.valid_version_count_delta_ratio_between_replicas) {
                 return Pair.of(TabletStatus.REPLICA_COMPACTION_TOO_SLOW, Priority.HIGH);
             }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java
index ada44cac5dc5e5..8561d0b98089ea 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java
@@ -22,6 +22,7 @@
 import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.Pair;
 import org.apache.doris.cooldown.CooldownConf;
+import org.apache.doris.master.PartitionInfoCollector.PartitionCollectInfo;
 import org.apache.doris.task.PublishVersionTask;
 import org.apache.doris.thrift.TPartitionVersionInfo;
 import org.apache.doris.thrift.TStorageMedium;
@@ -37,7 +38,7 @@
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ArrayListMultimap;
 import com.google.common.collect.HashBasedTable;
-import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ListMultimap;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
@@ -98,7 +99,10 @@ public class TabletInvertedIndex {
     // backend id -> (tablet id -> replica)
     private Table<Long, Long, Replica> backingReplicaMetaTable = HashBasedTable.create();
 
-    private volatile ImmutableSet<Long> partitionIdInMemorySet = ImmutableSet.of();
+    // partition id -> partition info.
+    // notice partition info update every Config.partition_info_update_interval_secs seconds,
+    // so it may be stale.
+    private volatile ImmutableMap<Long, PartitionCollectInfo> partitionCollectInfoMap = ImmutableMap.of();
 
     private ForkJoinPool taskPool = new ForkJoinPool(Runtime.getRuntime().availableProcessors());
 
@@ -122,11 +126,13 @@ private void writeUnlock(long stamp) {
     }
 
     public void tabletReport(long backendId, Map<Long, TTablet> backendTablets,
+                             Map<Long, Long> backendPartitionsVersion,
                              final HashMap<Long, TStorageMedium> storageMediumMap,
                              ListMultimap<Long, Long> tabletSyncMap,
                              ListMultimap<Long, Long> tabletDeleteFromMeta,
                              Set<Long> tabletFoundInMeta,
                              ListMultimap<TStorageMedium, Long> tabletMigrationMap,
+                             Map<Long, Long> partitionVersionSyncMap,
                              Map<Long, ListMultimap<Long, TPartitionVersionInfo>> transactionsToPublish,
                              ListMultimap<Long, Long> transactionsToClear,
                              ListMultimap<Long, Long> tabletRecoveryMap,
@@ -134,6 +140,7 @@ public void tabletReport(long backendId, Map<Long, TTablet> backendTablets,
                              List<CooldownConf> cooldownConfToPush,
                              List<CooldownConf> cooldownConfToUpdate) {
         List<Pair<TabletMeta, TTabletInfo>> cooldownTablets = new ArrayList<>();
+        long feTabletNum = 0;
         long stamp = readLock();
         long start = System.currentTimeMillis();
         try {
@@ -142,6 +149,7 @@ public void tabletReport(long backendId, Map<Long, TTablet> backendTablets,
             }
             Map<Long, Replica> replicaMetaWithBackend = backingReplicaMetaTable.row(backendId);
             if (replicaMetaWithBackend != null) {
+                feTabletNum = replicaMetaWithBackend.size();
                 taskPool.submit(() -> {
                     // traverse replicas in meta with this backend
                     replicaMetaWithBackend.entrySet().parallelStream().forEach(entry -> {
@@ -162,11 +170,13 @@ public void tabletReport(long backendId, Map<Long, TTablet> backendTablets,
                                 tabletMetaInfo = new TTabletMetaInfo();
                                 tabletMetaInfo.setReplicaId(replica.getId());
                             }
-                            if (partitionIdInMemorySet.contains(
-                                    backendTabletInfo.getPartitionId()) != backendTabletInfo.isIsInMemory()) {
+                            PartitionCollectInfo partitionCollectInfo =
+                                    partitionCollectInfoMap.get(backendTabletInfo.getPartitionId());
+                            boolean isInMemory = partitionCollectInfo != null && partitionCollectInfo.isInMemory();
+                            if (isInMemory != backendTabletInfo.isIsInMemory()) {
                                 if (tabletMetaInfo == null) {
                                     tabletMetaInfo = new TTabletMetaInfo();
-                                    tabletMetaInfo.setIsInMemory(!backendTabletInfo.isIsInMemory());
+                                    tabletMetaInfo.setIsInMemory(isInMemory);
                                 }
                             }
                             if (Config.fix_tablet_partition_id_eq_0
@@ -325,8 +335,11 @@ && isLocal(tabletMeta.getStorageMedium())) {
 
                             // update replicase's version count
                             // no need to write log, and no need to get db lock.
-                            if (backendTabletInfo.isSetVersionCount()) {
-                                replica.setVersionCount(backendTabletInfo.getVersionCount());
+                            if (backendTabletInfo.isSetTotalVersionCount()) {
+                                replica.setTotalVersionCount(backendTabletInfo.getTotalVersionCount());
+                                replica.setVisibleVersionCount(backendTabletInfo.isSetVisibleVersionCount()
+                                        ? backendTabletInfo.getVisibleVersionCount()
+                                                : backendTabletInfo.getTotalVersionCount());
                             }
                             if (tabletMetaInfo != null) {
                                 tabletMetaInfo.setTabletId(tabletId);
@@ -345,6 +358,15 @@ && isLocal(tabletMeta.getStorageMedium())) {
                             }
                         }
                     });
+
+                    backendPartitionsVersion.entrySet().parallelStream().forEach(entry -> {
+                        long partitionId = entry.getKey();
+                        long backendVersion = entry.getValue();
+                        PartitionCollectInfo partitionInfo = partitionCollectInfoMap.get(partitionId);
+                        if (partitionInfo != null && partitionInfo.getVisibleVersion() > backendVersion) {
+                            partitionVersionSyncMap.put(partitionId, partitionInfo.getVisibleVersion());
+                        }
+                    });
                 }).join();
             }
         } finally {
@@ -353,11 +375,13 @@ && isLocal(tabletMeta.getStorageMedium())) {
         cooldownTablets.forEach(p -> handleCooldownConf(p.first, p.second, cooldownConfToPush, cooldownConfToUpdate));
 
         long end = System.currentTimeMillis();
-        LOG.info("finished to do tablet diff with backend[{}]. sync: {}."
-                        + " metaDel: {}. foundInMeta: {}. migration: {}. "
-                        + "found invalid transactions {}. found republish transactions {}. tabletToUpdate: {}."
-                        + " need recovery: {}. cost: {} ms", backendId, tabletSyncMap.size(),
+        LOG.info("finished to do tablet diff with backend[{}]. fe tablet num: {}, backend tablet num: {}. sync: {}."
+                        + " metaDel: {}. foundInMeta: {}. migration: {}. backend partition num: {}, backend need "
+                        + "update: {}. found invalid transactions {}. found republish "
+                        + "transactions {}. tabletToUpdate: {}. need recovery: {}. cost: {} ms",
+                backendId, feTabletNum, backendTablets.size(), tabletSyncMap.size(),
                 tabletDeleteFromMeta.size(), tabletFoundInMeta.size(), tabletMigrationMap.size(),
+                backendPartitionsVersion.size(), partitionVersionSyncMap.size(),
                 transactionsToClear.size(), transactionsToPublish.size(), tabletToUpdate.size(),
                 tabletRecoveryMap.size(), (end - start));
     }
@@ -756,8 +780,8 @@ public void clear() {
         }
     }
 
-    public void setPartitionIdInMemorySet(ImmutableSet<Long> partitionIdInMemorySet) {
-        this.partitionIdInMemorySet = partitionIdInMemorySet;
+    public void setPartitionCollectInfoMap(ImmutableMap<Long, PartitionCollectInfo> partitionCollectInfoMap) {
+        this.partitionCollectInfoMap = partitionCollectInfoMap;
     }
 
     public Map<Long, Long> getReplicaToTabletMap() {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java
index e2b65a95f6ea63..896ecac6f8ef05 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java
@@ -151,8 +151,12 @@ private void updateTabletStat(Long beId, TTabletStatResult result) {
                 if (invertedIndex.getTabletMeta(stat.getTabletId()) != null) {
                     Replica replica = invertedIndex.getReplica(stat.getTabletId(), beId);
                     if (replica != null) {
-                        replica.updateStat(stat.getDataSize(), stat.getRemoteDataSize(), stat.getRowNum(),
-                                stat.getVersionCount());
+                        replica.setDataSize(stat.getDataSize());
+                        replica.setRemoteDataSize(stat.getRemoteDataSize());
+                        replica.setRowCount(stat.getRowCount());
+                        replica.setTotalVersionCount(stat.getTotalVersionCount());
+                        replica.setVisibleVersionCount(stat.isSetVisibleVersionCount() ? stat.getVisibleVersionCount()
+                                : stat.getTotalVersionCount());
                     }
                 }
             }
@@ -168,7 +172,8 @@ private void updateTabletStat(Long beId, TTabletStatResult result) {
                     continue;
                 }
                 // TODO(cmy) no db lock protected. I think it is ok even we get wrong row num
-                replica.updateStat(entry.getValue().getDataSize(), entry.getValue().getRowNum());
+                replica.setDataSize(entry.getValue().getDataSize());
+                replica.setRowCount(entry.getValue().getRowCount());
             }
         }
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
index 9d54ce2dc5fb24..a7537d59092a2a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
@@ -574,15 +574,15 @@ public boolean compactionRecovered() {
         Replica chosenReplica = null;
         long maxVersionCount = Integer.MIN_VALUE;
         for (Replica replica : tablet.getReplicas()) {
-            if (replica.getVersionCount() > maxVersionCount) {
-                maxVersionCount = replica.getVersionCount();
+            if (replica.getVisibleVersionCount() > maxVersionCount) {
+                maxVersionCount = replica.getVisibleVersionCount();
                 chosenReplica = replica;
             }
         }
         boolean recovered = false;
         for (Replica replica : tablet.getReplicas()) {
             if (replica.isAlive() && replica.tooSlow() && (!replica.equals(chosenReplica)
-                    || replica.getVersionCount() < Config.min_version_count_indicate_replica_compaction_too_slow)) {
+                    || !replica.tooBigVersionCount())) {
                 if (chosenReplica != null) {
                     chosenReplica.setState(ReplicaState.NORMAL);
                     recovered = true;
@@ -1177,8 +1177,7 @@ public void finishCloneTask(CloneTask cloneTask, TFinishTaskRequest request)
                         "replica does not exist. backend id: " + destBackendId);
             }
 
-            replica.updateVersionInfo(reportedTablet.getVersion(), reportedTablet.getDataSize(),
-                    reportedTablet.getDataSize(), reportedTablet.getRowCount());
+            replica.updateWithReport(reportedTablet);
             if (replica.getLastFailedVersion() > partition.getCommittedVersion()
                     && reportedTablet.getVersion() >= partition.getCommittedVersion()
                     //&& !(reportedTablet.isSetVersionMiss() && reportedTablet.isVersionMiss()
@@ -1365,8 +1364,8 @@ public String toString() {
     public static class VersionCountComparator implements Comparator<Replica> {
         @Override
         public int compare(Replica r1, Replica r2) {
-            long verCount1 = r1.getVersionCount() == -1 ? Long.MAX_VALUE : r1.getVersionCount();
-            long verCount2 = r2.getVersionCount() == -1 ? Long.MAX_VALUE : r2.getVersionCount();
+            long verCount1 = r1.getVisibleVersionCount() == -1 ? Long.MAX_VALUE : r1.getVisibleVersionCount();
+            long verCount2 = r2.getVisibleVersionCount() == -1 ? Long.MAX_VALUE : r2.getVisibleVersionCount();
             if (verCount1 < verCount2) {
                 return -1;
             } else if (verCount1 > verCount2) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
index 0de9c5b65895a7..496c9407fc571f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
@@ -1110,13 +1110,13 @@ private void handleReplicaTooSlow(TabletSchedCtx tabletCtx) throws SchedExceptio
             if (replica.isAlive() && !replica.tooSlow()) {
                 normalReplicaCount++;
             }
-            if (replica.getVersionCount() > maxVersionCount) {
-                maxVersionCount = replica.getVersionCount();
+            if (replica.getVisibleVersionCount() > maxVersionCount) {
+                maxVersionCount = replica.getVisibleVersionCount();
                 chosenReplica = replica;
             }
         }
         if (chosenReplica != null && chosenReplica.isAlive() && !chosenReplica.tooSlow()
-                && chosenReplica.getVersionCount() > Config.min_version_count_indicate_replica_compaction_too_slow
+                && chosenReplica.tooBigVersionCount()
                 && normalReplicaCount - 1 >= tabletCtx.getReplicas().size() / 2 + 1) {
             chosenReplica.setState(ReplicaState.COMPACTION_TOO_SLOW);
             LOG.info("set replica id :{} tablet id: {}, backend id: {} to COMPACTION_TOO_SLOW",
diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java
index aa1218ab9b3f3f..bf03aabc7a79cd 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java
@@ -895,7 +895,8 @@ public boolean existCommittedTxns(Long dbId, Long tableId, Long partitionId) {
     }
 
     @Override
-    public void finishTransaction(long dbId, long transactionId) throws UserException {
+    public void finishTransaction(long dbId, long transactionId, Map<Long, Long> partitionVisibleVersions,
+            Map<Long, Set<Long>> backendPartitions) throws UserException {
         throw new UserException("Disallow to call finishTransaction()");
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/proc/ReplicasProcNode.java b/fe/fe-core/src/main/java/org/apache/doris/common/proc/ReplicasProcNode.java
index 9b2a8f6d8f7eee..edf2a9d35179e3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/proc/ReplicasProcNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/proc/ReplicasProcNode.java
@@ -44,7 +44,7 @@ public class ReplicasProcNode implements ProcNodeInterface {
             .add("BackendId").add("Version").add("LstSuccessVersion").add("LstFailedVersion").add("LstFailedTime")
             .add("SchemaHash").add("LocalDataSize").add("RemoteDataSize").add("RowCount").add("State").add("IsBad")
             .add("IsUserDrop")
-            .add("VersionCount").add("PathHash").add("Path")
+            .add("VisibleVersionCount").add("VersionCount").add("PathHash").add("Path")
             .add("MetaUrl").add("CompactionStatus").add("CooldownReplicaId")
             .add("CooldownMetaId").add("QueryHits").build();
 
@@ -117,7 +117,8 @@ public ProcResult fetchResult() {
                                         String.valueOf(replica.getState()),
                                         String.valueOf(replica.isBad()),
                                         String.valueOf(replica.isUserDrop()),
-                                        String.valueOf(replica.getVersionCount()),
+                                        String.valueOf(replica.getVisibleVersionCount()),
+                                        String.valueOf(replica.getTotalVersionCount()),
                                         String.valueOf(replica.getPathHash()),
                                         path,
                                         metaUrl,
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/proc/TabletHealthProcDir.java b/fe/fe-core/src/main/java/org/apache/doris/common/proc/TabletHealthProcDir.java
index 3ce3ff74c7adaf..ce88a52082e830 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/proc/TabletHealthProcDir.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/proc/TabletHealthProcDir.java
@@ -256,8 +256,7 @@ static class DBTabletStatistic {
                                     oversizeTabletIds.add(tablet.getId());
                                 }
                                 for (Replica replica : tablet.getReplicas()) {
-                                    if (replica.getVersionCount()
-                                            > Config.min_version_count_indicate_replica_compaction_too_slow) {
+                                    if (replica.tooBigVersionCount()) {
                                         replicaCompactionTooSlowNum++;
                                         replicaCompactionTooSlowTabletIds.add(tablet.getId());
                                         break;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/proc/TabletsProcDir.java b/fe/fe-core/src/main/java/org/apache/doris/common/proc/TabletsProcDir.java
index c82a55bd3801ba..46c89eb3253e50 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/proc/TabletsProcDir.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/proc/TabletsProcDir.java
@@ -52,9 +52,10 @@ public class TabletsProcDir implements ProcDirInterface {
             .add("LstSuccessVersion").add("LstFailedVersion").add("LstFailedTime")
             .add("LocalDataSize").add("RemoteDataSize").add("RowCount").add("State")
             .add("LstConsistencyCheckTime").add("CheckVersion")
-            .add("VersionCount").add("QueryHits").add("PathHash").add("Path")
+            .add("VisibleVersionCount").add("VersionCount").add("QueryHits").add("PathHash").add("Path")
             .add("MetaUrl").add("CompactionStatus")
-            .add("CooldownReplicaId").add("CooldownMetaId").build();
+            .add("CooldownReplicaId").add("CooldownMetaId")
+            .build();
 
     private Table table;
     private MaterializedIndex index;
@@ -113,7 +114,8 @@ public List<List<Comparable>> fetchComparableResult(long version, long backendId
                     tabletInfo.add(-1); // lst consistency check time
                     tabletInfo.add(-1); // check version
                     tabletInfo.add(-1); // check version hash
-                    tabletInfo.add(-1); // version count
+                    tabletInfo.add(-1); // visible version count
+                    tabletInfo.add(-1); // total version count
                     tabletInfo.add(0L); // query hits
                     tabletInfo.add(-1); // path hash
                     tabletInfo.add(FeConstants.null_string); // path
@@ -147,7 +149,8 @@ public List<List<Comparable>> fetchComparableResult(long version, long backendId
 
                         tabletInfo.add(TimeUtils.longToTimeString(tablet.getLastCheckTime()));
                         tabletInfo.add(tablet.getCheckedVersion());
-                        tabletInfo.add(replica.getVersionCount());
+                        tabletInfo.add(replica.getVisibleVersionCount());
+                        tabletInfo.add(replica.getTotalVersionCount());
                         tabletInfo.add(replicaIdToQueryHits.getOrDefault(replica.getId(), 0L));
                         tabletInfo.add(replica.getPathHash());
                         tabletInfo.add(pathHashToRoot.getOrDefault(replica.getPathHash(), ""));
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
index 7bd0040395a174..98e837de34203c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
@@ -1078,7 +1078,10 @@ private void unprotectUpdateReplica(OlapTable olapTable, ReplicaPersistInfo info
         Tablet tablet = materializedIndex.getTablet(info.getTabletId());
         Replica replica = tablet.getReplicaById(info.getReplicaId());
         Preconditions.checkNotNull(replica, info);
-        replica.updateVersionInfo(info.getVersion(), info.getDataSize(), info.getRemoteDataSize(), info.getRowCount());
+        replica.updateVersion(info.getVersion());
+        replica.setDataSize(info.getDataSize());
+        replica.setRemoteDataSize(info.getRemoteDataSize());
+        replica.setRowCount(info.getRowCount());
         replica.setBad(false);
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/PartitionInMemoryInfoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/master/PartitionInfoCollector.java
similarity index 68%
rename from fe/fe-core/src/main/java/org/apache/doris/master/PartitionInMemoryInfoCollector.java
rename to fe/fe-core/src/main/java/org/apache/doris/master/PartitionInfoCollector.java
index 77ed5829799363..f4bf87ad1099d5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/master/PartitionInMemoryInfoCollector.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/master/PartitionInfoCollector.java
@@ -27,29 +27,49 @@
 import org.apache.doris.common.Config;
 import org.apache.doris.common.util.MasterDaemon;
 
-import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.ImmutableMap;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
 import java.util.List;
 
-public class PartitionInMemoryInfoCollector extends MasterDaemon {
+public class PartitionInfoCollector extends MasterDaemon {
 
-    private static final Logger LOG = LogManager.getLogger(PartitionInMemoryInfoCollector.class);
+    // notice since collect partition info every Config.partition_info_update_interval_secs seconds,
+    // so partition collect info may be stale
+    public static class PartitionCollectInfo {
+        private long visibleVersion;
+        private boolean isInMemory;
 
-    public PartitionInMemoryInfoCollector() {
-        super("PartitionInMemoryInfoCollector", Config.partition_in_memory_update_interval_secs * 1000);
+        PartitionCollectInfo(long visibleVersion, boolean isInMemory) {
+            this.visibleVersion = visibleVersion;
+            this.isInMemory = isInMemory;
+        }
+
+        public long getVisibleVersion() {
+            return this.visibleVersion;
+        }
+
+        public boolean isInMemory() {
+            return this.isInMemory;
+        }
+    }
+
+    private static final Logger LOG = LogManager.getLogger(PartitionInfoCollector.class);
+
+    public PartitionInfoCollector() {
+        super("PartitionInfoCollector", Config.partition_info_update_interval_secs * 1000);
     }
 
     @Override
     protected void runAfterCatalogReady() {
-        updatePartitionInMemoryInfo();
+        updatePartitionCollectInfo();
     }
 
-    private void updatePartitionInMemoryInfo() {
+    private void updatePartitionCollectInfo() {
         Env env = Env.getCurrentEnv();
         TabletInvertedIndex tabletInvertedIndex = env.getTabletInvertedIndex();
-        ImmutableSet.Builder builder = ImmutableSet.builder();
+        ImmutableMap.Builder builder = ImmutableMap.builder();
         List<Long> dbIdList = env.getInternalCatalog().getDbIds();
         for (Long dbId : dbIdList) {
             Database db = env.getInternalCatalog().getDbNullable(dbId);
@@ -70,10 +90,12 @@ private void updatePartitionInMemoryInfo() {
                     try {
                         OlapTable olapTable = (OlapTable) table;
                         for (Partition partition : olapTable.getAllPartitions()) {
-                            if (olapTable.getPartitionInfo().getIsInMemory(partition.getId())) {
+                            boolean isInMemory = olapTable.getPartitionInfo().getIsInMemory(partition.getId());
+                            if (isInMemory) {
                                 partitionInMemoryCount++;
-                                builder.add(partition.getId());
                             }
+                            builder.put(partition.getId(),
+                                    new PartitionCollectInfo(partition.getVisibleVersion(), isInMemory));
                         }
                     } finally {
                         table.readUnlock();
@@ -87,9 +109,6 @@ private void updatePartitionInMemoryInfo() {
                 LOG.warn("Update database[" + db.getFullName() + "] partition in memory info failed", e);
             }
         }
-        ImmutableSet<Long> partitionIdInMemorySet = builder.build();
-        tabletInvertedIndex.setPartitionIdInMemorySet(partitionIdInMemorySet);
+        tabletInvertedIndex.setPartitionCollectInfoMap(builder.build());
     }
-
-
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java
index 6684fe6e71c494..a2427e88d7bb7f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java
@@ -72,6 +72,7 @@
 import org.apache.doris.task.PushStoragePolicyTask;
 import org.apache.doris.task.StorageMediaMigrationTask;
 import org.apache.doris.task.UpdateTabletMetaInfoTask;
+import org.apache.doris.task.UpdateVisibleVersionTask;
 import org.apache.doris.thrift.TBackend;
 import org.apache.doris.thrift.TDisk;
 import org.apache.doris.thrift.TMasterResult;
@@ -156,6 +157,7 @@ public TMasterResult handleReport(TReportRequest request) throws TException {
         Map<TTaskType, Set<Long>> tasks = null;
         Map<String, TDisk> disks = null;
         Map<Long, TTablet> tablets = null;
+        Map<Long, Long> partitionsVersion = null;
         long reportVersion = -1;
 
         ReportType reportType = ReportType.UNKNOWN;
@@ -181,11 +183,15 @@ public TMasterResult handleReport(TReportRequest request) throws TException {
             reportType = ReportType.TABLET;
         }
 
+        if (request.isSetPartitionsVersion()) {
+            partitionsVersion = request.getPartitionsVersion();
+        }
+
         if (request.isSetTabletMaxCompactionScore()) {
             backend.setTabletMaxCompactionScore(request.getTabletMaxCompactionScore());
         }
 
-        ReportTask reportTask = new ReportTask(beId, tasks, disks, tablets, reportVersion,
+        ReportTask reportTask = new ReportTask(beId, tasks, disks, tablets, partitionsVersion, reportVersion,
                 request.getStoragePolicy(), request.getResource(), request.getNumCores(),
                 request.getPipelineExecutorSize());
         try {
@@ -232,6 +238,7 @@ private class ReportTask extends MasterTask {
         private Map<TTaskType, Set<Long>> tasks;
         private Map<String, TDisk> disks;
         private Map<Long, TTablet> tablets;
+        private Map<Long, Long> partitionsVersion;
         private long reportVersion;
 
         private List<TStoragePolicy> storagePolicies;
@@ -240,14 +247,15 @@ private class ReportTask extends MasterTask {
         private int pipelineExecutorSize;
 
         public ReportTask(long beId, Map<TTaskType, Set<Long>> tasks,
-                Map<String, TDisk> disks,
-                Map<Long, TTablet> tablets, long reportVersion,
+                Map<String, TDisk> disks, Map<Long, TTablet> tablets,
+                Map<Long, Long> partitionsVersion, long reportVersion,
                 List<TStoragePolicy> storagePolicies, List<TStorageResource> storageResources, int cpuCores,
                 int pipelineExecutorSize) {
             this.beId = beId;
             this.tasks = tasks;
             this.disks = disks;
             this.tablets = tablets;
+            this.partitionsVersion = partitionsVersion;
             this.reportVersion = reportVersion;
             this.storagePolicies = storagePolicies;
             this.storageResources = storageResources;
@@ -274,7 +282,11 @@ protected void exec() {
                     LOG.warn("out of date report version {} from backend[{}]. current report version[{}]",
                             reportVersion, beId, backendReportVersion);
                 } else {
-                    ReportHandler.tabletReport(beId, tablets, reportVersion);
+                    Map<Long, Long> partitions = this.partitionsVersion;
+                    if (partitions == null) {
+                        partitions = Maps.newHashMap();
+                    }
+                    ReportHandler.tabletReport(beId, tablets, partitions, reportVersion);
                 }
             }
         }
@@ -409,7 +421,8 @@ private static void diffResource(List<TStorageResource> storageResourcesInBe, Li
     }
 
     // public for fe ut
-    public static void tabletReport(long backendId, Map<Long, TTablet> backendTablets, long backendReportVersion) {
+    public static void tabletReport(long backendId, Map<Long, TTablet> backendTablets,
+            Map<Long, Long> backendPartitionsVersion, long backendReportVersion) {
         long start = System.currentTimeMillis();
         LOG.info("backend[{}] reports {} tablet(s). report version: {}",
                 backendId, backendTablets.size(), backendReportVersion);
@@ -427,6 +440,9 @@ public static void tabletReport(long backendId, Map<Long, TTablet> backendTablet
         // storage medium -> tablet id
         ListMultimap<TStorageMedium, Long> tabletMigrationMap = LinkedListMultimap.create();
 
+        // partition id -> visible version
+        Map<Long, Long> partitionVersionSyncMap = Maps.newConcurrentMap();
+
         // dbid -> txn id -> [partition info]
         Map<Long, ListMultimap<Long, TPartitionVersionInfo>> transactionsToPublish = Maps.newHashMap();
         ListMultimap<Long, Long> transactionsToClear = LinkedListMultimap.create();
@@ -440,11 +456,13 @@ public static void tabletReport(long backendId, Map<Long, TTablet> backendTablet
         List<CooldownConf> cooldownConfToUpdate = new LinkedList<>();
 
         // 1. do the diff. find out (intersection) / (be - meta) / (meta - be)
-        Env.getCurrentInvertedIndex().tabletReport(backendId, backendTablets, storageMediumMap,
+        Env.getCurrentInvertedIndex().tabletReport(backendId, backendTablets, backendPartitionsVersion,
+                storageMediumMap,
                 tabletSyncMap,
                 tabletDeleteFromMeta,
                 tabletFoundInMeta,
                 tabletMigrationMap,
+                partitionVersionSyncMap,
                 transactionsToPublish,
                 transactionsToClear,
                 tabletRecoveryMap,
@@ -500,6 +518,9 @@ public static void tabletReport(long backendId, Map<Long, TTablet> backendTablet
         if (!cooldownConfToUpdate.isEmpty()) {
             Env.getCurrentEnv().getCooldownConfHandler().addCooldownConfToUpdate(cooldownConfToUpdate);
         }
+        if (!partitionVersionSyncMap.isEmpty()) {
+            handleUpdatePartitionVersion(partitionVersionSyncMap, backendId);
+        }
 
         final SystemInfoService currentSystemInfo = Env.getCurrentSystemInfo();
         Backend reportBackend = currentSystemInfo.getBackend(backendId);
@@ -660,27 +681,17 @@ private static void sync(Map<Long, TTablet> backendTablets, ListMultimap<Long, L
                     // if the last failed version is changed, then fe will think schema change successfully.
                     // this is an fatal error.
                     if (replica.getState() == ReplicaState.NORMAL) {
-                        long metaVersion = replica.getVersion();
-                        long backendVersion = -1L;
-                        long rowCount = -1L;
-                        long dataSize = -1L;
-                        long remoteDataSize = -1L;
                         // schema change maybe successfully in fe, but not inform be,
                         // then be will report two schema hash
                         // just select the dest schema hash
-                        for (TTabletInfo tabletInfo : backendTablets.get(tabletId).getTabletInfos()) {
-                            if (tabletInfo.getSchemaHash() == schemaHash) {
-                                backendVersion = tabletInfo.getVersion();
-                                rowCount = tabletInfo.getRowCount();
-                                dataSize = tabletInfo.getDataSize();
-                                remoteDataSize = tabletInfo.getRemoteDataSize();
-                                break;
-                            }
-                        }
-                        if (backendVersion == -1L) {
+                        TTabletInfo tabletInfo = backendTablets.get(tabletId).getTabletInfos().stream()
+                                .filter(t -> t.getSchemaHash() == schemaHash).findFirst().orElse(null);
+                        if (tabletInfo == null) {
                             continue;
                         }
 
+                        long metaVersion = replica.getVersion();
+                        long backendVersion = tabletInfo.getVersion();
                         boolean needSync = false;
                         if (metaVersion < backendVersion) {
                             needSync = true;
@@ -703,7 +714,7 @@ private static void sync(Map<Long, TTablet> backendTablets, ListMultimap<Long, L
                             // happens when
                             // 1. PUSH finished in BE but failed or not yet report to FE
                             // 2. repair for VERSION_INCOMPLETE finished in BE, but failed or not yet report to FE
-                            replica.updateVersionInfo(backendVersion, dataSize, remoteDataSize, rowCount);
+                            replica.updateWithReport(tabletInfo);
 
                             if (replica.getLastFailedVersion() < 0) {
                                 if (replica.setBad(false)) {
@@ -717,7 +728,9 @@ private static void sync(Map<Long, TTablet> backendTablets, ListMultimap<Long, L
                                 ReplicaPersistInfo info = ReplicaPersistInfo.createForClone(dbId, tableId,
                                         partitionId, indexId, tabletId, backendId, replica.getId(),
                                         replica.getVersion(), schemaHash,
-                                        dataSize, remoteDataSize, rowCount,
+                                        tabletInfo.getDataSize(),
+                                        tabletInfo.getRemoteDataSize(),
+                                        tabletInfo.getRowCount(),
                                         replica.getLastFailedVersion(),
                                         replica.getLastSuccessVersion());
                                 Env.getCurrentEnv().getEditLog().logUpdateReplica(info);
@@ -1033,6 +1046,14 @@ private static void handleRepublishVersionInfo(
         AgentTaskExecutor.submit(batchTask);
     }
 
+    private static void handleUpdatePartitionVersion(Map<Long, Long> partitionVersionSyncMap, long backendId) {
+        AgentBatchTask batchTask = new AgentBatchTask();
+        UpdateVisibleVersionTask task = new UpdateVisibleVersionTask(backendId, partitionVersionSyncMap,
+                System.currentTimeMillis());
+        batchTask.addTask(task);
+        AgentTaskExecutor.submit(batchTask);
+    }
+
     private static void handleRecoverTablet(ListMultimap<Long, Long> tabletRecoveryMap,
                                             Map<Long, TTablet> backendTablets, long backendId) {
         // print a warn log here to indicate the exceptions on the backend
diff --git a/fe/fe-core/src/main/java/org/apache/doris/system/Diagnoser.java b/fe/fe-core/src/main/java/org/apache/doris/system/Diagnoser.java
index 2b084fcfa23431..5e7748a35247be 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/system/Diagnoser.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/system/Diagnoser.java
@@ -26,7 +26,6 @@
 import org.apache.doris.catalog.Tablet;
 import org.apache.doris.catalog.TabletInvertedIndex;
 import org.apache.doris.catalog.TabletMeta;
-import org.apache.doris.common.Config;
 
 import com.google.common.collect.Lists;
 import org.json.simple.JSONObject;
@@ -154,9 +153,9 @@ public static List<List<String>> diagnoseTablet(long tabletId) {
                         + ", and is bad: " + (replica.isBad() ? "Yes" : "No")
                         + ", and is going to drop: " + (replica.isUserDrop() ? "Yes" : "No"));
             }
-            if (replica.getVersionCount() > Config.min_version_count_indicate_replica_compaction_too_slow) {
+            if (replica.tooBigVersionCount()) {
                 compactionErr.append("Replica on backend " + replica.getBackendId() + "'s version count is too high: "
-                        + replica.getVersionCount());
+                        + replica.getVisibleVersionCount());
             }
         }
         results.add(Lists.newArrayList("ReplicaBackendStatus", (backendErr.length() == 0
diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/AgentBatchTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/AgentBatchTask.java
index 863d4acde0c8ab..36a421e8415e09 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/task/AgentBatchTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/task/AgentBatchTask.java
@@ -49,6 +49,7 @@
 import org.apache.doris.thrift.TTaskType;
 import org.apache.doris.thrift.TUpdateTabletMetaInfoReq;
 import org.apache.doris.thrift.TUploadReq;
+import org.apache.doris.thrift.TVisibleVersionReq;
 
 import com.google.common.collect.Lists;
 import org.apache.logging.log4j.LogManager;
@@ -394,6 +395,15 @@ private TAgentTaskRequest toAgentTaskRequest(AgentTask task) {
                 tAgentTaskRequest.setGcBinlogReq(request);
                 return tAgentTaskRequest;
             }
+            case UPDATE_VISIBLE_VERSION: {
+                UpdateVisibleVersionTask visibleTask = (UpdateVisibleVersionTask) task;
+                TVisibleVersionReq request = visibleTask.toThrift();
+                if (LOG.isDebugEnabled()) {
+                    LOG.debug(request.toString());
+                }
+                tAgentTaskRequest.setVisibleVersionReq(request);
+                return tAgentTaskRequest;
+            }
             case CALCULATE_DELETE_BITMAP: {
                 CalcDeleteBitmapTask calcDeleteBitmapTask = (CalcDeleteBitmapTask) task;
                 TCalcDeleteBitmapRequest request = calcDeleteBitmapTask.toThrift();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/UpdateVisibleVersionTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/UpdateVisibleVersionTask.java
new file mode 100644
index 00000000000000..52ed3b1c490381
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/task/UpdateVisibleVersionTask.java
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.task;
+
+import org.apache.doris.thrift.TTaskType;
+import org.apache.doris.thrift.TVisibleVersionReq;
+
+import java.util.Map;
+
+public class UpdateVisibleVersionTask extends AgentTask {
+    private Map<Long, Long> partitionVisibleVersions;
+
+    public UpdateVisibleVersionTask(long backendId, Map<Long, Long> partitionVisibleVersions, long createTime) {
+        super(null, backendId, TTaskType.UPDATE_VISIBLE_VERSION, -1L, -1L, -1L, -1L, -1L, -1L, createTime);
+        this.partitionVisibleVersions = partitionVisibleVersions;
+    }
+
+    public TVisibleVersionReq toThrift() {
+        TVisibleVersionReq request = new TVisibleVersionReq();
+        partitionVisibleVersions.forEach((partitionId, version) -> {
+            request.putToPartitionVersion(partitionId, version);
+        });
+        return request;
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java b/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java
index eb075628bcc27b..47f0aa9d4062c8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java
@@ -44,6 +44,7 @@
 import org.apache.doris.common.Pair;
 import org.apache.doris.common.QuotaExceedException;
 import org.apache.doris.common.UserException;
+import org.apache.doris.common.util.DebugPointUtil;
 import org.apache.doris.common.util.DebugUtil;
 import org.apache.doris.common.util.InternalDatabaseUtil;
 import org.apache.doris.common.util.MetaLockUtils;
@@ -996,7 +997,12 @@ protected List<TransactionState> getCommittedTxnList() {
         }
     }
 
-    public void finishTransaction(long transactionId) throws UserException {
+    public void finishTransaction(long transactionId, Map<Long, Long> partitionVisibleVersions,
+            Map<Long, Set<Long>> backendPartitions) throws UserException {
+        if (DebugPointUtil.isEnable("DatabaseTransactionMgr.stop_finish_transaction")) {
+            return;
+        }
+
         TransactionState transactionState = null;
         readLock();
         try {
@@ -1059,7 +1065,7 @@ public void finishTransaction(long transactionId) throws UserException {
                     LOG.warn("afterStateTransform txn {} failed. exception: ", transactionState, e);
                 }
             }
-            updateCatalogAfterVisible(transactionState, db);
+            updateCatalogAfterVisible(transactionState, db, partitionVisibleVersions, backendPartitions);
         } finally {
             MetaLockUtils.writeUnlockTables(tableList);
         }
@@ -1925,7 +1931,8 @@ private void updateCatalogAfterCommitted(TransactionState transactionState, Data
         }
     }
 
-    private boolean updateCatalogAfterVisible(TransactionState transactionState, Database db) {
+    private boolean updateCatalogAfterVisible(TransactionState transactionState, Database db,
+            Map<Long, Long> partitionVisibleVersions, Map<Long, Set<Long>> backendPartitions) {
         Set<Long> errorReplicaIds = transactionState.getErrorReplicas();
         AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager();
         List<Long> newPartitionLoadedTableIds = new ArrayList<>();
@@ -1982,7 +1989,13 @@ private boolean updateCatalogAfterVisible(TransactionState transactionState, Dat
                                     lastFailedVersion = newCommitVersion;
                                 }
                             }
-                            replica.updateVersionWithFailedInfo(newVersion, lastFailedVersion, lastSuccessVersion);
+                            replica.updateVersionWithFailed(newVersion, lastFailedVersion, lastSuccessVersion);
+                            Set<Long> partitionIds = backendPartitions.get(replica.getBackendId());
+                            if (partitionIds == null) {
+                                partitionIds = Sets.newHashSet();
+                                backendPartitions.put(replica.getBackendId(), partitionIds);
+                            }
+                            partitionIds.add(partitionId);
                         }
                     }
                 } // end for indices
@@ -1993,6 +2006,7 @@ private boolean updateCatalogAfterVisible(TransactionState transactionState, Dat
                     newPartitionLoadedTableIds.add(tableId);
                 }
                 partition.updateVisibleVersionAndTime(version, versionTime);
+                partitionVisibleVersions.put(partition.getId(), version);
                 if (LOG.isDebugEnabled()) {
                     LOG.debug("transaction state {} set partition {}'s version to [{}]",
                             transactionState, partition.getId(), version);
@@ -2141,7 +2155,7 @@ public void replayUpsertTransactionState(TransactionState transactionState) thro
             if (transactionState.getTransactionStatus() == TransactionStatus.COMMITTED) {
                 updateCatalogAfterCommitted(transactionState, db, true);
             } else if (transactionState.getTransactionStatus() == TransactionStatus.VISIBLE) {
-                updateCatalogAfterVisible(transactionState, db);
+                updateCatalogAfterVisible(transactionState, db, Maps.newHashMap(), Maps.newHashMap());
             }
             unprotectUpsertTransactionState(transactionState, true);
         } finally {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgr.java b/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgr.java
index f527f75c432588..74d2753986c6aa 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgr.java
@@ -402,9 +402,10 @@ public boolean existCommittedTxns(Long dbId, Long tableId, Long partitionId) {
      * @param transactionId
      * @return
      */
-    public void finishTransaction(long dbId, long transactionId) throws UserException {
+    public void finishTransaction(long dbId, long transactionId, Map<Long, Long> partitionVisibleVersions,
+            Map<Long, Set<Long>> backendPartitions) throws UserException {
         DatabaseTransactionMgr dbTransactionMgr = getDatabaseTransactionMgr(dbId);
-        dbTransactionMgr.finishTransaction(transactionId);
+        dbTransactionMgr.finishTransaction(transactionId, partitionVisibleVersions, backendPartitions);
     }
 
     /**
diff --git a/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgrIface.java b/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgrIface.java
index 7ca9229d3627d2..2e19c9e3ce00de 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgrIface.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgrIface.java
@@ -41,6 +41,7 @@
 import java.io.IOException;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.concurrent.TimeoutException;
 
 public interface GlobalTransactionMgrIface extends Writable {
@@ -109,7 +110,8 @@ public void abortTransaction(Long dbId, Long txnId, String reason,
 
     public boolean existCommittedTxns(Long dbId, Long tableId, Long partitionId);
 
-    public void finishTransaction(long dbId, long transactionId) throws UserException;
+    public void finishTransaction(long dbId, long transactionId, Map<Long, Long> partitionVisibleVersions,
+            Map<Long, Set<Long>> backendPartitions) throws UserException;
 
     public boolean isPreviousTransactionsFinished(long endTransactionId, long dbId, List<Long> tableIdList)
             throws AnalysisException;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/transaction/PublishVersionDaemon.java b/fe/fe-core/src/main/java/org/apache/doris/transaction/PublishVersionDaemon.java
index 2ceebe2b53c128..d7bf0d18b12d5f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/transaction/PublishVersionDaemon.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/transaction/PublishVersionDaemon.java
@@ -32,6 +32,7 @@
 import org.apache.doris.task.AgentTaskExecutor;
 import org.apache.doris.task.AgentTaskQueue;
 import org.apache.doris.task.PublishVersionTask;
+import org.apache.doris.task.UpdateVisibleVersionTask;
 import org.apache.doris.thrift.TPartitionVersionInfo;
 import org.apache.doris.thrift.TTaskType;
 
@@ -60,14 +61,18 @@ public PublishVersionDaemon() {
 
     @Override
     protected void runAfterCatalogReady() {
+        Map<Long, Long> partitionVisibleVersions = Maps.newHashMap();
+        Map<Long, Set<Long>> backendPartitions = Maps.newHashMap();
+
         try {
-            publishVersion();
+            publishVersion(partitionVisibleVersions, backendPartitions);
+            sendBackendVisibleVersion(partitionVisibleVersions, backendPartitions);
         } catch (Throwable t) {
             LOG.error("errors while publish version to all backends", t);
         }
     }
 
-    private void publishVersion() {
+    private void publishVersion(Map<Long, Long> partitionVisibleVersions, Map<Long, Set<Long>> backendPartitions) {
         if (DebugPointUtil.isEnable("PublishVersionDaemon.stop_publish")) {
             return;
         }
@@ -176,7 +181,7 @@ private void publishVersion() {
                 try {
                     // one transaction exception should not affect other transaction
                     globalTransactionMgr.finishTransaction(transactionState.getDbId(),
-                            transactionState.getTransactionId());
+                            transactionState.getTransactionId(), partitionVisibleVersions, backendPartitions);
                 } catch (Exception e) {
                     LOG.warn("error happens when finish transaction {}", transactionState.getTransactionId(), e);
                 }
@@ -229,4 +234,23 @@ private Map<Long, Set<Long>> getBaseTabletIdsForEachBe(TransactionState transact
                 .collect(Collectors.groupingBy(p -> p.first,
                         Collectors.mapping(p -> p.second, Collectors.toSet())));
     }
+
+    private void sendBackendVisibleVersion(Map<Long, Long> partitionVisibleVersions,
+            Map<Long, Set<Long>> backendPartitions) {
+        if (partitionVisibleVersions.isEmpty() || backendPartitions.isEmpty()) {
+            return;
+        }
+
+        long createTime = System.currentTimeMillis();
+        AgentBatchTask batchTask = new AgentBatchTask();
+        backendPartitions.forEach((backendId, partitionIds) -> {
+            Map<Long, Long> backendPartitionVersions = partitionVisibleVersions.entrySet().stream()
+                    .filter(entry -> partitionIds.contains(entry.getKey()))
+                    .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+            UpdateVisibleVersionTask task = new UpdateVisibleVersionTask(backendId, backendPartitionVersions,
+                    createTime);
+            batchTask.addTask(task);
+        });
+        AgentTaskExecutor.submit(batchTask);
+    }
 }
diff --git a/fe/fe-core/src/test/java/org/apache/doris/alter/RollupJobV2Test.java b/fe/fe-core/src/test/java/org/apache/doris/alter/RollupJobV2Test.java
index 2a47520f5c5e94..a1bc25d5034cc9 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/alter/RollupJobV2Test.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/alter/RollupJobV2Test.java
@@ -221,10 +221,7 @@ public void testSchemaChange1() throws Exception {
         MaterializedIndex shadowIndex = testPartition.getMaterializedIndices(IndexExtState.SHADOW).get(0);
         for (Tablet shadowTablet : shadowIndex.getTablets()) {
             for (Replica shadowReplica : shadowTablet.getReplicas()) {
-                shadowReplica.updateVersionInfo(testPartition.getVisibleVersion(),
-                        shadowReplica.getDataSize(),
-                        shadowReplica.getRemoteDataSize(),
-                        shadowReplica.getRowCount());
+                shadowReplica.updateVersion(testPartition.getVisibleVersion());
             }
         }
 
@@ -301,10 +298,7 @@ public void testSchemaChangeWhileTabletNotStable() throws Exception {
         MaterializedIndex shadowIndex = testPartition.getMaterializedIndices(IndexExtState.SHADOW).get(0);
         for (Tablet shadowTablet : shadowIndex.getTablets()) {
             for (Replica shadowReplica : shadowTablet.getReplicas()) {
-                shadowReplica.updateVersionInfo(testPartition.getVisibleVersion(),
-                        shadowReplica.getDataSize(),
-                        shadowReplica.getRemoteDataSize(),
-                        shadowReplica.getRowCount());
+                shadowReplica.updateVersion(testPartition.getVisibleVersion());
             }
         }
 
diff --git a/fe/fe-core/src/test/java/org/apache/doris/alter/SchemaChangeJobV2Test.java b/fe/fe-core/src/test/java/org/apache/doris/alter/SchemaChangeJobV2Test.java
index 5e99599b041e20..6db68473bdbe8e 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/alter/SchemaChangeJobV2Test.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/alter/SchemaChangeJobV2Test.java
@@ -213,8 +213,7 @@ public void testSchemaChange1() throws Exception {
         MaterializedIndex shadowIndex = testPartition.getMaterializedIndices(IndexExtState.SHADOW).get(0);
         for (Tablet shadowTablet : shadowIndex.getTablets()) {
             for (Replica shadowReplica : shadowTablet.getReplicas()) {
-                shadowReplica.updateVersionInfo(testPartition.getVisibleVersion(), shadowReplica.getDataSize(),
-                        shadowReplica.getRemoteDataSize(), shadowReplica.getRowCount());
+                shadowReplica.updateVersion(testPartition.getVisibleVersion());
             }
         }
 
@@ -296,8 +295,7 @@ public void testSchemaChangeWhileTabletNotStable() throws Exception {
         MaterializedIndex shadowIndex = testPartition.getMaterializedIndices(IndexExtState.SHADOW).get(0);
         for (Tablet shadowTablet : shadowIndex.getTablets()) {
             for (Replica shadowReplica : shadowTablet.getReplicas()) {
-                shadowReplica.updateVersionInfo(testPartition.getVisibleVersion(), shadowReplica.getDataSize(),
-                        shadowReplica.getRemoteDataSize(), shadowReplica.getRowCount());
+                shadowReplica.updateVersion(testPartition.getVisibleVersion());
             }
         }
 
diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/ShowReplicaTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/ShowReplicaTest.java
index 21fe967a96dd68..54debab9a6362d 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/analysis/ShowReplicaTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/ShowReplicaTest.java
@@ -73,7 +73,8 @@ public void testShowReplicaDistribution() throws Exception {
             for (MaterializedIndex index : partition.getMaterializedIndices(MaterializedIndex.IndexExtState.VISIBLE)) {
                 for (Tablet tablet : index.getTablets()) {
                     for (Replica replica : tablet.getReplicas()) {
-                        replica.updateStat(1024, 2);
+                        replica.setDataSize(1024L);
+                        replica.setRowCount(2L);
                     }
                 }
             }
diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/ReplicaTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/ReplicaTest.java
index d6a81cdd88339f..eb7dbca0775e54 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/catalog/ReplicaTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/ReplicaTest.java
@@ -65,12 +65,7 @@ public void getMethodTest() {
 
         // update new version
         long newVersion = version + 1;
-        long newDataSize = dataSize + 100;
-        long newRowCount = rowCount + 10;
-        replica.updateVersionInfo(newVersion, newDataSize, 0, newRowCount);
-        Assert.assertEquals(newVersion, replica.getVersion());
-        Assert.assertEquals(newDataSize, replica.getDataSize());
-        Assert.assertEquals(newRowCount, replica.getRowCount());
+        replica.updateVersion(newVersion);
 
         // check version catch up
         Assert.assertFalse(replica.checkVersionCatchUp(5, false));
@@ -132,14 +127,14 @@ public void testSerialization() throws Exception {
     public void testUpdateVersion1() {
         Replica originalReplica = new Replica(10000, 20000, 3, 0, 100, 0, 78, ReplicaState.NORMAL, 0, 3);
         // new version is little than original version, it is invalid the version will not update
-        originalReplica.updateVersionInfo(2, 100, 0, 78);
+        originalReplica.updateVersion(2);
         Assert.assertEquals(3, originalReplica.getVersion());
     }
 
     @Test
     public void testUpdateVersion2() {
         Replica originalReplica = new Replica(10000, 20000, 3, 0, 100, 0, 78, ReplicaState.NORMAL, 0, 0);
-        originalReplica.updateVersionInfo(3, 100, 0, 78);
+        originalReplica.updateVersion(3);
         // if new version >= current version and last success version <= new version, then last success version should be updated
         Assert.assertEquals(3, originalReplica.getLastSuccessVersion());
         Assert.assertEquals(3, originalReplica.getVersion());
@@ -155,7 +150,7 @@ public void testUpdateVersion3() {
         Assert.assertEquals(8, originalReplica.getLastFailedVersion());
 
         // update last success version 10
-        originalReplica.updateVersionWithFailedInfo(originalReplica.getVersion(),
+        originalReplica.updateVersionWithFailed(originalReplica.getVersion(),
                 originalReplica.getLastFailedVersion(),
                 10);
         Assert.assertEquals(10, originalReplica.getLastSuccessVersion());
@@ -163,7 +158,7 @@ public void testUpdateVersion3() {
         Assert.assertEquals(8, originalReplica.getLastFailedVersion());
 
         // update version to 8, the last success version and version should be 10
-        originalReplica.updateVersionInfo(8, 100, 0, 78);
+        originalReplica.updateVersion(8);
         Assert.assertEquals(10, originalReplica.getLastSuccessVersion());
         Assert.assertEquals(10, originalReplica.getVersion());
         Assert.assertEquals(-1, originalReplica.getLastFailedVersion());
@@ -175,7 +170,7 @@ public void testUpdateVersion3() {
         Assert.assertEquals(12, originalReplica.getLastFailedVersion());
 
         // update last success version to 15
-        originalReplica.updateVersionWithFailedInfo(originalReplica.getVersion(),
+        originalReplica.updateVersionWithFailed(originalReplica.getVersion(),
                 originalReplica.getLastFailedVersion(),
                 15);
         Assert.assertEquals(15, originalReplica.getLastSuccessVersion());
@@ -189,13 +184,13 @@ public void testUpdateVersion3() {
         Assert.assertEquals(18, originalReplica.getLastFailedVersion());
 
         // update version to 17 then version and success version is 17
-        originalReplica.updateVersionInfo(17, 100, 0, 78);
+        originalReplica.updateVersion(17);
         Assert.assertEquals(17, originalReplica.getLastSuccessVersion());
         Assert.assertEquals(17, originalReplica.getVersion());
         Assert.assertEquals(18, originalReplica.getLastFailedVersion());
 
         // update version to 18, then version and last success version should be 18 and failed version should be -1
-        originalReplica.updateVersionInfo(18, 100, 0, 78);
+        originalReplica.updateVersion(18);
         Assert.assertEquals(18, originalReplica.getLastSuccessVersion());
         Assert.assertEquals(18, originalReplica.getVersion());
         Assert.assertEquals(-1, originalReplica.getLastFailedVersion());
diff --git a/fe/fe-core/src/test/java/org/apache/doris/clone/DiskReblanceWhenSchedulerIdle.java b/fe/fe-core/src/test/java/org/apache/doris/clone/DiskReblanceWhenSchedulerIdle.java
index f563b846ed7da2..40b6683da3dc6b 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/clone/DiskReblanceWhenSchedulerIdle.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/clone/DiskReblanceWhenSchedulerIdle.java
@@ -110,7 +110,8 @@ public void testDiskReblanceWhenSchedulerIdle() throws Exception {
             Lists.newArrayList(tablet.getReplicas()).forEach(
                     replica -> {
                     if (replica.getBackendId() == backends.get(1).getId()) {
-                        replica.updateStat(totalCapacity / 4, 1);
+                        replica.setDataSize(totalCapacity / 4);
+                        replica.setRowCount(1);
                         tablet.deleteReplica(replica);
                         replica.setBackendId(backends.get(0).getId());
                         replica.setPathHash(diskInfo0.getPathHash());
diff --git a/fe/fe-core/src/test/java/org/apache/doris/clone/RebalancerTestUtil.java b/fe/fe-core/src/test/java/org/apache/doris/clone/RebalancerTestUtil.java
index 063faf2d3b2ea6..1e6af5c7324e85 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/clone/RebalancerTestUtil.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/clone/RebalancerTestUtil.java
@@ -104,7 +104,7 @@ public static void createReplicasAndAddToIndex(TabletInvertedIndex invertedIndex
             replica.setPathHash(beIds.get(i));
             if (replicaSizes != null) {
                 // for disk rebalancer, every beId corresponding to a replicaSize
-                replica.updateStat(replicaSizes.get(i), 0);
+                replica.setDataSize(replicaSizes.get(i));
             }
             // isRestore set true, to avoid modifying Catalog.getCurrentInvertedIndex
             tablet.addReplica(replica, true);
@@ -165,7 +165,8 @@ public static void updateReplicaDataSize(long minReplicaSize, int tableSkew,  in
                             for (Tablet tablet : idx.getTablets()) {
                                 long tabletSize = tableBaseSize * (1 + random.nextInt(tabletSkew));
                                 for (Replica replica : tablet.getReplicas()) {
-                                    replica.updateStat(tabletSize, 1000L);
+                                    replica.setDataSize(tabletSize);
+                                    replica.setRowCount(1000L);
                                 }
                             }
                         }
diff --git a/fe/fe-core/src/test/java/org/apache/doris/clone/RepairVersionTest.java b/fe/fe-core/src/test/java/org/apache/doris/clone/RepairVersionTest.java
index 1150b32192f341..1ac497dbebe28d 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/clone/RepairVersionTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/clone/RepairVersionTest.java
@@ -106,7 +106,7 @@ public void testRepairLastFailedVersionByReport() throws Exception {
         tablets.put(tablet.getId(), tTablet);
         Assertions.assertEquals(partition.getVisibleVersion(), replica.getVersion());
 
-        ReportHandler.tabletReport(replica.getBackendId(), tablets, 100L);
+        ReportHandler.tabletReport(replica.getBackendId(), tablets, Maps.newHashMap(), 100L);
 
         Assertions.assertEquals(partition.getVisibleVersion(), replica.getVersion());
         Assertions.assertEquals(-1L, replica.getLastFailedVersion());
@@ -136,11 +136,11 @@ public void testVersionRegressive() throws Exception {
         Map<Long, TTablet> tablets = Maps.newHashMap();
         tablets.put(tablet.getId(), tTablet);
 
-        ReportHandler.tabletReport(replica.getBackendId(), tablets, 100L);
+        ReportHandler.tabletReport(replica.getBackendId(), tablets, Maps.newHashMap(), 100L);
         Assertions.assertEquals(-1L, replica.getLastFailedVersion());
 
         DebugPointUtil.addDebugPoint("Replica.regressive_version_immediately", new DebugPoint());
-        ReportHandler.tabletReport(replica.getBackendId(), tablets, 100L);
+        ReportHandler.tabletReport(replica.getBackendId(), tablets, Maps.newHashMap(), 100L);
         Assertions.assertEquals(replica.getVersion() + 1, replica.getLastFailedVersion());
 
         Assertions.assertEquals(partition.getVisibleVersion(), replica.getVersion());
@@ -160,7 +160,7 @@ private TableInfo prepareTableForTest(String tableName) throws Exception {
         long visibleVersion = 2L;
         partition.updateVisibleVersion(visibleVersion);
         partition.setNextVersion(visibleVersion + 1);
-        tablet.getReplicas().forEach(replica -> replica.updateVersionInfo(visibleVersion, 1L, 1L, 1L));
+        tablet.getReplicas().forEach(replica -> replica.updateVersion(visibleVersion));
 
         Replica replica = tablet.getReplicas().iterator().next();
         Assertions.assertEquals(visibleVersion, replica.getVersion());
diff --git a/fe/fe-core/src/test/java/org/apache/doris/clone/TabletReplicaTooSlowTest.java b/fe/fe-core/src/test/java/org/apache/doris/clone/TabletReplicaTooSlowTest.java
index 6a38985b73f6e7..7d918ef7db54ae 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/clone/TabletReplicaTooSlowTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/clone/TabletReplicaTooSlowTest.java
@@ -141,7 +141,7 @@ private static void updateReplicaVersionCount() {
             List<Long> pathHashes = be.getDisks().values().stream()
                     .map(DiskInfo::getPathHash).collect(Collectors.toList());
             Replica replica = cell.getValue();
-            replica.setVersionCount(versionCount);
+            replica.setVisibleVersionCount(versionCount);
             versionCount = versionCount + 200;
 
             replica.setPathHash(pathHashes.get(0));
@@ -171,7 +171,7 @@ public void test() throws Exception {
             boolean found = false;
             for (Table.Cell<Long, Long, Replica> cell : replicaMetaTable.cellSet()) {
                 Replica replica = cell.getValue();
-                if (replica.getVersionCount() == 401) {
+                if (replica.getVisibleVersionCount() == 401) {
                     if (replica.tooSlow()) {
                         LOG.info("set to TOO_SLOW.");
                     }
diff --git a/fe/fe-core/src/test/java/org/apache/doris/clone/TabletSchedCtxTest.java b/fe/fe-core/src/test/java/org/apache/doris/clone/TabletSchedCtxTest.java
index d4578e17d7fb38..852f072eca1c35 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/clone/TabletSchedCtxTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/clone/TabletSchedCtxTest.java
@@ -84,19 +84,19 @@ public void testVersionCountComparator() {
         TabletSchedCtx.VersionCountComparator countComparator = new TabletSchedCtx.VersionCountComparator();
         List<Replica> replicaList = Lists.newArrayList();
         Replica replica1 = new Replica();
-        replica1.setVersionCount(100);
+        replica1.setVisibleVersionCount(100);
         replica1.setState(Replica.ReplicaState.NORMAL);
 
         Replica replica2 = new Replica();
-        replica2.setVersionCount(50);
+        replica2.setVisibleVersionCount(50);
         replica2.setState(Replica.ReplicaState.NORMAL);
 
         Replica replica3 = new Replica();
-        replica3.setVersionCount(-1);
+        replica3.setVisibleVersionCount(-1);
         replica3.setState(Replica.ReplicaState.NORMAL);
 
         Replica replica4 = new Replica();
-        replica4.setVersionCount(200);
+        replica4.setVisibleVersionCount(200);
         replica4.setState(Replica.ReplicaState.NORMAL);
 
         replicaList.add(replica1);
@@ -105,10 +105,10 @@ public void testVersionCountComparator() {
         replicaList.add(replica4);
 
         Collections.sort(replicaList, countComparator);
-        Assert.assertEquals(50, replicaList.get(0).getVersionCount());
-        Assert.assertEquals(100, replicaList.get(1).getVersionCount());
-        Assert.assertEquals(200, replicaList.get(2).getVersionCount());
-        Assert.assertEquals(-1, replicaList.get(3).getVersionCount());
+        Assert.assertEquals(50, replicaList.get(0).getVisibleVersionCount());
+        Assert.assertEquals(100, replicaList.get(1).getVisibleVersionCount());
+        Assert.assertEquals(200, replicaList.get(2).getVisibleVersionCount());
+        Assert.assertEquals(-1, replicaList.get(3).getVisibleVersionCount());
     }
 
 }
diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java
index 76fe82e6599a87..9ff6e7ac3ca693 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java
@@ -1101,7 +1101,7 @@ public void testBucketShuffleJoin() throws Exception {
                 mIndex.setRowCount(10000);
                 for (Tablet tablet : mIndex.getTablets()) {
                     for (Replica replica : tablet.getReplicas()) {
-                        replica.updateVersionInfo(2, 200000, 0, 10000);
+                        replica.updateVersion(2);
                     }
                 }
             }
@@ -1115,7 +1115,7 @@ public void testBucketShuffleJoin() throws Exception {
                 mIndex.setRowCount(10000);
                 for (Tablet tablet : mIndex.getTablets()) {
                     for (Replica replica : tablet.getReplicas()) {
-                        replica.updateVersionInfo(2, 200000, 0, 10000);
+                        replica.updateVersion(2);
                     }
                 }
             }
@@ -1199,7 +1199,7 @@ public void testJoinWithMysqlTable() throws Exception {
                 mIndex.setRowCount(10000);
                 for (Tablet tablet : mIndex.getTablets()) {
                     for (Replica replica : tablet.getReplicas()) {
-                        replica.updateVersionInfo(2, 200000, 0, 10000);
+                        replica.updateVersion(2);
                     }
                 }
             }
@@ -1229,7 +1229,7 @@ public void testJoinWithMysqlTable() throws Exception {
                 mIndex.setRowCount(0);
                 for (Tablet tablet : mIndex.getTablets()) {
                     for (Replica replica : tablet.getReplicas()) {
-                        replica.updateVersionInfo(2, 0, 0, 0);
+                        replica.updateVersion(2);
                     }
                 }
             }
@@ -1249,7 +1249,7 @@ public void testJoinWithOdbcTable() throws Exception {
                 mIndex.setRowCount(10000);
                 for (Tablet tablet : mIndex.getTablets()) {
                     for (Replica replica : tablet.getReplicas()) {
-                        replica.updateVersionInfo(2, 200000, 0, 10000);
+                        replica.updateVersion(2);
                     }
                 }
             }
@@ -1278,7 +1278,7 @@ public void testJoinWithOdbcTable() throws Exception {
                 mIndex.setRowCount(0);
                 for (Tablet tablet : mIndex.getTablets()) {
                     for (Replica replica : tablet.getReplicas()) {
-                        replica.updateVersionInfo(2, 0, 0, 0);
+                        replica.updateVersion(2);
                     }
                 }
             }
diff --git a/fe/fe-core/src/test/java/org/apache/doris/transaction/DatabaseTransactionMgrTest.java b/fe/fe-core/src/test/java/org/apache/doris/transaction/DatabaseTransactionMgrTest.java
index 84a7d6d283d477..0fc445d4436f5d 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/transaction/DatabaseTransactionMgrTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/transaction/DatabaseTransactionMgrTest.java
@@ -42,6 +42,7 @@
 import java.lang.reflect.InvocationTargetException;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 public class DatabaseTransactionMgrTest {
 
@@ -115,7 +116,10 @@ public Map<String, Long> addTransactionToTransactionMgr() throws UserException {
         setTransactionFinishPublish(transactionState1,
                 Lists.newArrayList(CatalogTestUtil.testBackendId1,
                         CatalogTestUtil.testBackendId2, CatalogTestUtil.testBackendId3));
-        masterTransMgr.finishTransaction(CatalogTestUtil.testDbId1, transactionId1);
+        Map<Long, Long> partitionVisibleVersions = Maps.newHashMap();
+        Map<Long, Set<Long>> backendPartitions = Maps.newHashMap();
+        masterTransMgr.finishTransaction(CatalogTestUtil.testDbId1, transactionId1,
+                partitionVisibleVersions, backendPartitions);
         labelToTxnId.put(CatalogTestUtil.testTxnLabel1, transactionId1);
 
         TransactionState.TxnCoordinator beTransactionSource = new TransactionState.TxnCoordinator(TransactionState.TxnSourceType.BE, "be1");
diff --git a/fe/fe-core/src/test/java/org/apache/doris/transaction/GlobalTransactionMgrTest.java b/fe/fe-core/src/test/java/org/apache/doris/transaction/GlobalTransactionMgrTest.java
index cc6744ed6a3fae..362c1bf2ff1df0 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/transaction/GlobalTransactionMgrTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/transaction/GlobalTransactionMgrTest.java
@@ -53,8 +53,10 @@
 import org.apache.doris.transaction.TransactionState.TxnCoordinator;
 import org.apache.doris.transaction.TransactionState.TxnSourceType;
 
+import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
 import mockit.Injectable;
 import mockit.Mocked;
 import org.apache.kafka.clients.consumer.KafkaConsumer;
@@ -65,6 +67,7 @@
 import java.lang.reflect.InvocationTargetException;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.UUID;
 
 public class GlobalTransactionMgrTest {
@@ -469,7 +472,10 @@ public void testFinishTransaction() throws UserException {
                         CatalogTestUtil.testBackendId2, CatalogTestUtil.testBackendId3));
         transactionState.getPublishVersionTasks()
                 .get(CatalogTestUtil.testBackendId1).getErrorTablets().add(CatalogTestUtil.testTabletId1);
-        masterTransMgr.finishTransaction(CatalogTestUtil.testDbId1, transactionId);
+        Map<Long, Long> partitionVisibleVersions = Maps.newHashMap();
+        Map<Long, Set<Long>> backendPartitions = Maps.newHashMap();
+        masterTransMgr.finishTransaction(CatalogTestUtil.testDbId1, transactionId,
+                partitionVisibleVersions, backendPartitions);
         transactionState = fakeEditLog.getTransaction(transactionId);
         Assert.assertEquals(TransactionStatus.VISIBLE, transactionState.getTransactionStatus());
         // check replica version
@@ -488,6 +494,14 @@ public void testFinishTransaction() throws UserException {
             }
 
         }
+
+        Assert.assertEquals(ImmutableMap.of(testPartition.getId(), CatalogTestUtil.testStartVersion + 1),
+                partitionVisibleVersions);
+        Set<Long> partitionIds = Sets.newHashSet(testPartition.getId());
+        Assert.assertEquals(partitionIds, backendPartitions.get(CatalogTestUtil.testBackendId1));
+        Assert.assertEquals(partitionIds, backendPartitions.get(CatalogTestUtil.testBackendId2));
+        Assert.assertEquals(partitionIds, backendPartitions.get(CatalogTestUtil.testBackendId3));
+
         // slave replay new state and compare catalog
         slaveTransMgr.replayUpsertTransactionState(transactionState);
         Assert.assertTrue(CatalogTestUtil.compareCatalog(masterEnv, slaveEnv));
@@ -530,8 +544,13 @@ public void testFinishTransactionWithOneFailed() throws UserException {
         // backend2 publish failed
         transactionState.getPublishVersionTasks()
                 .get(CatalogTestUtil.testBackendId2).getErrorTablets().add(CatalogTestUtil.testTabletId1);
-        masterTransMgr.finishTransaction(CatalogTestUtil.testDbId1, transactionId);
+        Map<Long, Long> partitionVisibleVersions = Maps.newHashMap();
+        Map<Long, Set<Long>> backendPartitions = Maps.newHashMap();
+        masterTransMgr.finishTransaction(CatalogTestUtil.testDbId1, transactionId,
+                partitionVisibleVersions, backendPartitions);
         Assert.assertEquals(TransactionStatus.COMMITTED, transactionState.getTransactionStatus());
+        Assert.assertTrue(partitionVisibleVersions.isEmpty());
+        Assert.assertTrue(backendPartitions.isEmpty());
         Replica replica1 = tablet.getReplicaById(CatalogTestUtil.testReplicaId1);
         Replica replica2 = tablet.getReplicaById(CatalogTestUtil.testReplicaId2);
         Replica replica3 = tablet.getReplicaById(CatalogTestUtil.testReplicaId3);
@@ -549,7 +568,8 @@ public void testFinishTransactionWithOneFailed() throws UserException {
         backend2SuccTablets.put(CatalogTestUtil.testTabletId1, 0L);
         transactionState.getPublishVersionTasks()
                 .get(CatalogTestUtil.testBackendId2).setSuccTablets(backend2SuccTablets);
-        masterTransMgr.finishTransaction(CatalogTestUtil.testDbId1, transactionId);
+        masterTransMgr.finishTransaction(CatalogTestUtil.testDbId1, transactionId,
+                partitionVisibleVersions, backendPartitions);
         Assert.assertEquals(TransactionStatus.VISIBLE, transactionState.getTransactionStatus());
         Assert.assertEquals(CatalogTestUtil.testStartVersion + 1, replica1.getVersion());
         Assert.assertEquals(CatalogTestUtil.testStartVersion + 1, replica2.getVersion());
@@ -614,7 +634,8 @@ public void testFinishTransactionWithOneFailed() throws UserException {
         DatabaseTransactionMgrTest.setTransactionFinishPublish(transactionState,
                 Lists.newArrayList(CatalogTestUtil.testBackendId1,
                         CatalogTestUtil.testBackendId2, CatalogTestUtil.testBackendId3));
-        masterTransMgr.finishTransaction(CatalogTestUtil.testDbId1, transactionId2);
+        masterTransMgr.finishTransaction(CatalogTestUtil.testDbId1, transactionId2,
+                partitionVisibleVersions, backendPartitions);
         Assert.assertEquals(TransactionStatus.VISIBLE, transactionState.getTransactionStatus());
         Assert.assertEquals(CatalogTestUtil.testStartVersion + 2, replica1.getVersion());
         Assert.assertEquals(CatalogTestUtil.testStartVersion + 2, replica2.getVersion());
diff --git a/gensrc/thrift/AgentService.thrift b/gensrc/thrift/AgentService.thrift
index 4e381b168052ad..587715d43cd28d 100644
--- a/gensrc/thrift/AgentService.thrift
+++ b/gensrc/thrift/AgentService.thrift
@@ -404,6 +404,10 @@ struct TPublishVersionRequest {
     4: optional set<Types.TTabletId> base_tablet_ids
 }
 
+struct TVisibleVersionReq {
+    1: required map<Types.TPartitionId, Types.TVersion> partition_version
+}
+
 struct TCalcDeleteBitmapPartitionInfo {
     1: required Types.TPartitionId partition_id
     2: required Types.TVersion version
@@ -516,6 +520,7 @@ struct TAgentTaskRequest {
     32: optional TAlterInvertedIndexReq alter_inverted_index_req
     33: optional TGcBinlogReq gc_binlog_req
     34: optional TCleanTrashReq clean_trash_req
+    35: optional TVisibleVersionReq visible_version_req
 
     // For cloud
     1000: optional TCalcDeleteBitmapRequest calc_delete_bitmap_req
diff --git a/gensrc/thrift/BackendService.thrift b/gensrc/thrift/BackendService.thrift
index d4039ff58127bf..46018fc947d884 100644
--- a/gensrc/thrift/BackendService.thrift
+++ b/gensrc/thrift/BackendService.thrift
@@ -34,9 +34,10 @@ struct TTabletStat {
     1: required i64 tablet_id
     // local data size
     2: optional i64 data_size
-    3: optional i64 row_num
-    4: optional i64 version_count
+    3: optional i64 row_count
+    4: optional i64 total_version_count
     5: optional i64 remote_data_size
+    6: optional i64 visible_version_count
 }
 
 struct TTabletStatResult {
diff --git a/gensrc/thrift/MasterService.thrift b/gensrc/thrift/MasterService.thrift
index fb5dd416bd1ae5..ec647dbcf92256 100644
--- a/gensrc/thrift/MasterService.thrift
+++ b/gensrc/thrift/MasterService.thrift
@@ -33,7 +33,7 @@ struct TTabletInfo {
     6: required Types.TSize data_size
     7: optional Types.TStorageMedium storage_medium
     8: optional list<Types.TTransactionId> transaction_ids
-    9: optional i64 version_count
+    9: optional i64 total_version_count
     10: optional i64 path_hash
     11: optional bool version_miss
     12: optional bool used
@@ -46,6 +46,7 @@ struct TTabletInfo {
     // 18: optional bool is_cooldown
     19: optional i64 cooldown_term
     20: optional Types.TUniqueId cooldown_meta_id
+    21: optional i64 visible_version_count
 
     // For cloud
     1000: optional bool is_persistent
@@ -109,6 +110,7 @@ struct TReportRequest {
     10: optional list<AgentService.TStorageResource> resource // only id and version
     11: i32 num_cores
     12: i32 pipeline_executor_size
+    13: optional map<Types.TPartitionId, Types.TVersion> partitions_version
 }
 
 struct TMasterResult {
diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift
index c7df9a7e0944f2..4e0ad9afed923a 100644
--- a/gensrc/thrift/Types.thrift
+++ b/gensrc/thrift/Types.thrift
@@ -224,6 +224,7 @@ enum TTaskType {
     ALTER_INVERTED_INDEX,
     GC_BINLOG,
     CLEAN_TRASH,
+    UPDATE_VISIBLE_VERSION,
 
     // CLOUD
     CALCULATE_DELETE_BITMAP = 1000
diff --git a/regression-test/data/compaction/test_compaction_with_visible_version.out b/regression-test/data/compaction/test_compaction_with_visible_version.out
new file mode 100644
index 00000000000000..de90dd5fa2a3a7
--- /dev/null
+++ b/regression-test/data/compaction/test_compaction_with_visible_version.out
@@ -0,0 +1,448 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_1 --
+0	0
+1	10
+2	20
+3	30
+4	40
+5	50
+6	60
+7	70
+8	80
+9	90
+10	100
+11	110
+12	120
+13	130
+14	140
+15	150
+16	160
+17	170
+18	180
+19	190
+20	200
+
+-- !select_2 --
+0	0
+1	10
+2	20
+3	30
+4	40
+5	50
+6	60
+7	70
+8	80
+9	90
+10	100
+11	110
+12	120
+13	130
+14	140
+15	150
+16	160
+17	170
+18	180
+19	190
+20	200
+
+-- !select_3 --
+0	0
+1	10
+2	20
+3	30
+4	40
+5	50
+6	60
+7	70
+8	80
+9	90
+10	100
+11	110
+12	120
+13	130
+14	140
+15	150
+16	160
+17	170
+18	180
+19	190
+20	200
+21	210
+22	220
+23	230
+24	240
+25	250
+26	260
+27	270
+28	280
+29	290
+30	300
+31	310
+32	320
+33	330
+34	340
+35	350
+36	360
+37	370
+38	380
+39	390
+40	400
+41	410
+
+-- !select_4 --
+0	0
+1	10
+2	20
+3	30
+4	40
+5	50
+6	60
+7	70
+8	80
+9	90
+10	100
+11	110
+12	120
+13	130
+14	140
+15	150
+16	160
+17	170
+18	180
+19	190
+20	200
+21	210
+22	220
+23	230
+24	240
+25	250
+26	260
+27	270
+28	280
+29	290
+30	300
+31	310
+32	320
+33	330
+34	340
+35	350
+36	360
+37	370
+38	380
+39	390
+40	400
+41	410
+
+-- !select_5 --
+0	0
+1	10
+2	20
+3	30
+4	40
+5	50
+6	60
+7	70
+8	80
+9	90
+10	100
+11	110
+12	120
+13	130
+14	140
+15	150
+16	160
+17	170
+18	180
+19	190
+20	200
+21	210
+22	220
+23	230
+24	240
+25	250
+26	260
+27	270
+28	280
+29	290
+30	300
+31	310
+32	320
+33	330
+34	340
+35	350
+36	360
+37	370
+38	380
+39	390
+40	400
+41	410
+
+-- !select_6 --
+0	0
+1	10
+2	20
+3	30
+4	40
+5	50
+6	60
+7	70
+8	80
+9	90
+10	100
+11	110
+12	120
+13	130
+14	140
+15	150
+16	160
+17	170
+18	180
+19	190
+20	200
+21	210
+22	220
+23	230
+24	240
+25	250
+26	260
+27	270
+28	280
+29	290
+30	300
+31	310
+32	320
+33	330
+34	340
+35	350
+36	360
+37	370
+38	380
+39	390
+40	400
+41	410
+42	420
+43	430
+44	440
+45	450
+46	460
+47	470
+48	480
+49	490
+50	500
+51	510
+52	520
+53	530
+54	540
+55	550
+56	560
+57	570
+58	580
+59	590
+60	600
+61	610
+62	620
+63	630
+64	640
+65	650
+66	660
+67	670
+68	680
+69	690
+70	700
+71	710
+72	720
+73	730
+74	740
+75	750
+76	760
+77	770
+78	780
+79	790
+80	800
+81	810
+82	820
+83	830
+84	840
+85	850
+86	860
+87	870
+88	880
+89	890
+90	900
+91	910
+92	920
+93	930
+94	940
+95	950
+96	960
+97	970
+98	980
+99	990
+100	1000
+101	1010
+102	1020
+103	1030
+104	1040
+105	1050
+106	1060
+107	1070
+108	1080
+109	1090
+110	1100
+111	1110
+112	1120
+113	1130
+114	1140
+115	1150
+116	1160
+117	1170
+118	1180
+119	1190
+120	1200
+121	1210
+
+-- !select_7 --
+0	0
+1	10
+2	20
+3	30
+4	40
+5	50
+6	60
+7	70
+8	80
+9	90
+10	100
+11	110
+12	120
+13	130
+14	140
+15	150
+16	160
+17	170
+18	180
+19	190
+20	200
+21	210
+22	220
+23	230
+24	240
+25	250
+26	260
+27	270
+28	280
+29	290
+30	300
+31	310
+32	320
+33	330
+34	340
+35	350
+36	360
+37	370
+38	380
+39	390
+40	400
+41	410
+42	420
+43	430
+44	440
+45	450
+46	460
+47	470
+48	480
+49	490
+50	500
+51	510
+52	520
+53	530
+54	540
+55	550
+56	560
+57	570
+58	580
+59	590
+60	600
+61	610
+62	620
+63	630
+64	640
+65	650
+66	660
+67	670
+68	680
+69	690
+70	700
+71	710
+72	720
+73	730
+74	740
+75	750
+76	760
+77	770
+78	780
+79	790
+80	800
+81	810
+82	820
+83	830
+84	840
+85	850
+86	860
+87	870
+88	880
+89	890
+90	900
+91	910
+92	920
+93	930
+94	940
+95	950
+96	960
+97	970
+98	980
+99	990
+100	1000
+101	1010
+102	1020
+103	1030
+104	1040
+105	1050
+106	1060
+107	1070
+108	1080
+109	1090
+110	1100
+111	1110
+112	1120
+113	1130
+114	1140
+115	1150
+116	1160
+117	1170
+118	1180
+119	1190
+120	1200
+121	1210
+122	1220
+123	1230
+124	1240
+125	1250
+126	1260
+127	1270
+128	1280
+129	1290
+130	1300
+131	1310
+132	1320
+133	1330
+134	1340
+135	1350
+136	1360
+137	1370
+138	1380
+139	1390
+140	1400
+141	1410
+142	1420
+
diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/SuiteCluster.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/SuiteCluster.groovy
index a915851b9389e8..49bfbc18792f0c 100644
--- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/SuiteCluster.groovy
+++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/SuiteCluster.groovy
@@ -34,13 +34,16 @@ class ClusterOptions {
 
     int feNum = 1
     int beNum = 3
+
     List<String> feConfigs = [
         'heartbeat_interval_second=5',
     ]
 
     List<String> beConfigs = [
+        'report_disk_state_interval_seconds=2',
         'report_random_wait=false',
     ]
+
     boolean connectToFollower = false
 
     // 1. cloudMode = true, only create cloud cluster.
diff --git a/regression-test/suites/compaction/test_compaction_with_visible_version.groovy b/regression-test/suites/compaction/test_compaction_with_visible_version.groovy
new file mode 100644
index 00000000000000..194a1b67566192
--- /dev/null
+++ b/regression-test/suites/compaction/test_compaction_with_visible_version.groovy
@@ -0,0 +1,275 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.apache.doris.regression.suite.ClusterOptions
+import org.apache.doris.regression.util.Http
+import org.apache.doris.regression.util.NodeType
+
+suite('test_compaction_with_visible_version') {
+    def options = new ClusterOptions()
+    def compaction_keep_invisible_version_min_count = 50L
+    options.feConfigs += [
+        'partition_info_update_interval_secs=5',
+    ]
+    options.beConfigs += [
+        'disable_auto_compaction=true',
+        'report_tablet_interval_seconds=1',
+        'tablet_rowset_stale_sweep_by_size=true',
+        'tablet_rowset_stale_sweep_threshold_size=0',
+        'compaction_keep_invisible_version_timeout_sec=6000',
+        "compaction_keep_invisible_version_min_count=${compaction_keep_invisible_version_min_count}".toString(),
+        'compaction_keep_invisible_version_max_count=500',
+    ]
+    options.enableDebugPoints()
+
+    docker(options) {
+        def E_CUMULATIVE_NO_SUITABLE_VERSION = 'E-2000'
+        def E_FULL_MISS_VERSION = 'E-2009'
+
+        sql 'SET GLOBAL insert_visible_timeout_ms = 3000'
+
+        def tableName = 'test_compaction_with_visible_version'
+        def backendId_to_backendIP = [:]
+        def backendId_to_backendHttpPort = [:]
+        getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort)
+
+        def triggerCompaction = { tablet, isCompactSucc, compaction_type ->
+            def tabletId = tablet.TabletId
+            def backendId = tablet.BackendId
+            def backendIp = backendId_to_backendIP.get(backendId)
+            def backendHttpPort = backendId_to_backendHttpPort.get(backendId)
+            def code
+            def out
+            def err
+            if (compaction_type == 'base') {
+                (code, out, err) = be_run_base_compaction(backendIp, backendHttpPort, tabletId)
+            } else {
+                (code, out, err) = be_run_cumulative_compaction(backendIp, backendHttpPort, tabletId)
+            }
+            logger.info("Run compaction: code=${code}, out=${out}, err=${err}")
+            assertEquals(0, code)
+            def compactJson = parseJson(out.trim())
+            if (isCompactSucc) {
+                assertEquals('success', compactJson.status.toLowerCase())
+            } else {
+                if (compaction_type == 'base') {
+                    assertEquals(E_FULL_MISS_VERSION, compactJson.status)
+                } else {
+                    assertEquals(E_CUMULATIVE_NO_SUITABLE_VERSION, compactJson.status)
+                }
+            }
+        }
+
+        def waitCompaction = { tablet, startTs ->
+            def tabletId = tablet.TabletId
+            def backendId = tablet.BackendId
+            def backendIp = backendId_to_backendIP.get(backendId)
+            def backendHttpPort = backendId_to_backendHttpPort.get(backendId)
+            def running = true
+            while (running) {
+                assertTrue(System.currentTimeMillis() - startTs < 60 * 1000)
+                Thread.sleep(1000)
+                def (code, out, err) = be_get_compaction_status(backendIp, backendHttpPort, tabletId)
+                logger.info("Get compaction: code=${code}, out=${out}, err=${err}")
+                assertEquals(0, code)
+
+                def compactionStatus = parseJson(out.trim())
+                assertEquals('success', compactionStatus.status.toLowerCase())
+                running = compactionStatus.run_status
+            }
+        }
+
+        def checkCompact = { isCumuCompactSucc, runBaseCompact, isInvisibleTimeout, version, visibleVersion ->
+            def partition = sql_return_maparray("SHOW PARTITIONS FROM ${tableName}")[0]
+            assertEquals(visibleVersion, partition.VisibleVersion as long)
+
+            // wait be report version count
+            Thread.sleep(3 * 1000)
+            def tablets = sql_return_maparray "SHOW TABLETS FROM ${tableName}"
+            def lastInvisibleVersionCountMap = [:]
+            def lastVisibleVersionCountMap = [:]
+            tablets.each {
+                lastVisibleVersionCountMap[it.BackendId] = it.VisibleVersionCount as long
+                lastInvisibleVersionCountMap[it.BackendId] =
+                        (it.VersionCount as long) - (it.VisibleVersionCount as long)
+                triggerCompaction it, isCumuCompactSucc, 'cumulative'
+            }
+
+            if (isCumuCompactSucc) {
+                // wait compaction done
+                def startTs = System.currentTimeMillis()
+                tablets.each {
+                    waitCompaction it, startTs
+                }
+            }
+
+            if (runBaseCompact) {
+                tablets.each {
+                    triggerCompaction it, true, 'base'
+                }
+
+                def startTs = System.currentTimeMillis()
+                tablets.each {
+                    waitCompaction it, startTs
+                }
+            }
+
+            // wait report
+            Thread.sleep(3 * 1000)
+
+            tablets = sql_return_maparray "SHOW TABLETS FROM ${tableName}"
+            tablets.each {
+                def backendId = it.BackendId
+                def visibleVersionCount = it.VisibleVersionCount as long
+                def totalVersionCount = it.VersionCount as long
+                def invisibleVersionCount = totalVersionCount - visibleVersionCount
+                assertEquals(version, it.Version as long)
+
+                if (isInvisibleTimeout) {
+                    def values = [Math.min(version - visibleVersion, compaction_keep_invisible_version_min_count),
+                    Math.min(version - visibleVersion, compaction_keep_invisible_version_min_count + 1)]
+
+                    // part of invisible version was compact
+                    assertTrue(invisibleVersionCount in values,
+                            "not match, invisibleVersionCount: ${invisibleVersionCount}, candidate values: ${values}")
+                } else {
+                    // invisible version couldn't compact
+                    assertEquals(version - visibleVersion, invisibleVersionCount)
+                }
+
+                def lastVisibleVersionCount = lastVisibleVersionCountMap.get(backendId)
+                def lastInvisibleVersionCount = lastInvisibleVersionCountMap.get(backendId)
+                if (isCumuCompactSucc) {
+                    assertTrue(lastInvisibleVersionCount > invisibleVersionCount || lastInvisibleVersionCount <= 1,
+                            "not met with: lastInvisibleVersionCount ${lastInvisibleVersionCount} > "
+                            + "invisibleVersionCount ${invisibleVersionCount}")
+                    if (runBaseCompact) {
+                        assertEquals(1L, visibleVersionCount)
+                    }
+                } else {
+                    assertEquals(lastVisibleVersionCount, visibleVersionCount)
+                }
+            }
+        }
+
+        sql " CREATE TABLE ${tableName} (k1 int, k2 int) DISTRIBUTED BY HASH(k1) BUCKETS 1 "
+
+        // normal
+        def rowNum = 0L
+        def insertNRecords = { num ->
+            // if enable debug point DatabaseTransactionMgr.stop_finish_transaction,
+            // insert will need to wait insert_visible_timeout_ms.
+            // so use multiple threads to reduce the wait time.
+            def futures = []
+            for (def i = 0; i < num; i++, rowNum++) {
+                def index = rowNum
+                futures.add(thread {
+                    sql " INSERT INTO ${tableName} VALUES (${index}, ${index * 10}) "
+                })
+            }
+            futures.each { it.get() }
+        }
+        insertNRecords(21)
+        // after insert 21 rows, be can run compact ok.
+        checkCompact(true, false, false, rowNum + 1, rowNum + 1)
+        qt_select_1 "SELECT * FROM ${tableName} ORDER BY k1"
+
+        // publish but not visible
+        def lastRowNum = rowNum
+        cluster.injectDebugPoints(NodeType.FE, ['DatabaseTransactionMgr.stop_finish_transaction':null])
+        insertNRecords(21)
+        // after enable debugpoint, be will add rowsets, but visible version will not increase.
+        // then no rowsets can pick to compact.
+        // so expect compact failed.
+        checkCompact(false, false, false, rowNum + 1, lastRowNum + 1)
+        qt_select_2 "SELECT * FROM ${tableName} ORDER BY k1"
+
+        cluster.clearFrontendDebugPoints()
+        Thread.sleep(5000)
+        // after clear debug point, visible version will increase.
+        // then some rowsets can pick to compact.
+        // so expect compact succ.
+        checkCompact(true, true, false, rowNum + 1, rowNum + 1)
+        qt_select_3 "SELECT * FROM ${tableName} ORDER BY k1"
+
+        lastRowNum = rowNum
+        cluster.injectDebugPoints(NodeType.FE, ['DatabaseTransactionMgr.stop_finish_transaction':null])
+        insertNRecords(80)
+        // 80 versions are not invisible yet,  BE will not compact them.
+        // if we send http to compact them,  BE will reply no rowsets can compact now
+        checkCompact(false, false, false, rowNum + 1, lastRowNum + 1)
+        // Because BE not compact, so query should be ok.
+        qt_select_4 "SELECT * FROM ${tableName} ORDER BY k1"
+
+        update_all_be_config('compaction_keep_invisible_version_timeout_sec', 1)
+        checkCompact(true, false, true, rowNum + 1, lastRowNum + 1)
+        qt_select_5 "SELECT * FROM ${tableName} ORDER BY k1"
+
+        def getVersionCountMap = { ->
+            def versionCountMap = [:]
+            def tablets = sql_return_maparray "SHOW TABLETS FROM ${tableName}"
+            tablets.each {
+                versionCountMap.put(it.BackendId as long, [it.VisibleVersionCount as long, it.VersionCount as long])
+            }
+            return versionCountMap
+        }
+
+        // after backend restart, it should update its visible version from FE
+        // and then it report its visible version count and total version count
+        def oldVersionCountMap = getVersionCountMap()
+        cluster.restartBackends()
+        Thread.sleep(20000)
+        def newVersionCountMap = getVersionCountMap()
+        assertEquals(oldVersionCountMap, newVersionCountMap)
+
+        cluster.clearFrontendDebugPoints()
+        Thread.sleep(5000)
+        // after clear fe's debug point, the 80 version are visible now.
+        // so compact is ok
+        checkCompact(true, false, false, rowNum + 1, rowNum + 1)
+        qt_select_6 "SELECT * FROM ${tableName} ORDER BY k1"
+
+        cluster.injectDebugPoints(NodeType.FE, ['DatabaseTransactionMgr.stop_finish_transaction':null])
+        def compaction_keep_invisible_version_timeout_sec = 1
+        compaction_keep_invisible_version_min_count = 0L
+        update_all_be_config('compaction_keep_invisible_version_timeout_sec', compaction_keep_invisible_version_timeout_sec)
+        update_all_be_config('compaction_keep_invisible_version_min_count', compaction_keep_invisible_version_min_count)
+
+        lastRowNum = rowNum
+        insertNRecords(21)
+
+        Thread.sleep((compaction_keep_invisible_version_timeout_sec + 1) * 1000)
+
+        // after compaction_keep_invisible_version_timeout_sec,
+        // all version had been compact (compaction_keep_invisible_version_min_count=0),
+        checkCompact(true, false, true, rowNum + 1, lastRowNum + 1)
+
+        // visible version had been compact
+        test {
+            sql "SELECT * FROM ${tableName} ORDER BY k1"
+
+            // E-230:
+            //(1105, 'errCode = 2, detailMessage = (128.2.51.2)[CANCELLED]missed_versions is empty, spec_version 43,
+            // max_version 123, tablet_id 10062')
+            exception 'missed_versions is empty'
+        }
+
+        cluster.clearFrontendDebugPoints()
+        Thread.sleep(5000)
+        qt_select_7 "SELECT * FROM ${tableName} ORDER BY k1"
+    }
+}

From dcc7c078c7536d8ad679d0f12517744aeae990d5 Mon Sep 17 00:00:00 2001
From: abmdocrt <Yukang.Lian2022@gmail.com>
Date: Sun, 28 Apr 2024 14:57:17 +0800
Subject: [PATCH 095/163] [Fix](partial update) Fix rowset not found error when
 doing partial update (#34112)

Cause: In the logic of partial column updates, the existing data columns are read first, and then the data is supplemented and written back. During the reading process, initialization involves initially fetching rowset IDs, and the actual rowset object is fetched only when needed later. However, between fetching the rowset IDs and the rowset object, compaction may occur, turning the old rowset into a stale rowset. If too much time passes, the stale rowset might be directly deleted. Thus, when the rowset object is needed for an update, it cannot be found. Although the update operation with partial column logic should be able to read all keys and should not encounter new keys, if the rowset disappears, the Backend (BE) will consider these keys as missing. Consequently, it will check whether other columns have default values or are nullable. If this check fails, the aforementioned error is thrown.

Solution: To avoid such issues during partial column updates, the initialization step should involve fetching both the rowset IDs and the shared pointer to the rowset object simultaneously. This ensures that the rowset can always be found during data retrieval.
---
 be/src/olap/base_tablet.cpp                   |  11 +-
 be/src/olap/base_tablet.h                     |   4 +-
 be/src/olap/cumulative_compaction_policy.cpp  |   2 +
 be/src/olap/olap_common.h                     |  10 +-
 .../olap/rowset/segment_v2/segment_writer.cpp |  19 +--
 .../segment_v2/vertical_segment_writer.cpp    |  20 +---
 be/src/olap/rowset_builder.cpp                |   6 +-
 ...te_rowset_not_found_fault_injection.groovy | 112 ++++++++++++++++++
 8 files changed, 133 insertions(+), 51 deletions(-)
 create mode 100644 regression-test/suites/fault_injection_p2/test_partial_update_rowset_not_found_fault_injection.groovy

diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp
index 6128bdfb75258d..3dd5c1a3399643 100644
--- a/be/src/olap/base_tablet.cpp
+++ b/be/src/olap/base_tablet.cpp
@@ -445,7 +445,7 @@ Status BaseTablet::calc_delete_bitmap_between_segments(
 }
 
 std::vector<RowsetSharedPtr> BaseTablet::get_rowset_by_ids(
-        const RowsetIdUnorderedSet* specified_rowset_ids, bool include_stale) {
+        const RowsetIdUnorderedSet* specified_rowset_ids) {
     std::vector<RowsetSharedPtr> rowsets;
     for (auto& rs : _rs_version_map) {
         if (!specified_rowset_ids ||
@@ -454,15 +454,6 @@ std::vector<RowsetSharedPtr> BaseTablet::get_rowset_by_ids(
         }
     }
 
-    if (include_stale && specified_rowset_ids != nullptr &&
-        rowsets.size() != specified_rowset_ids->size()) {
-        for (auto& rs : _stale_rs_version_map) {
-            if (specified_rowset_ids->find(rs.second->rowset_id()) != specified_rowset_ids->end()) {
-                rowsets.push_back(rs.second);
-            }
-        }
-    }
-
     std::sort(rowsets.begin(), rowsets.end(), [](RowsetSharedPtr& lhs, RowsetSharedPtr& rhs) {
         return lhs->end_version() > rhs->end_version();
     });
diff --git a/be/src/olap/base_tablet.h b/be/src/olap/base_tablet.h
index 8f41ef6a57b4ea..397e93e80586c0 100644
--- a/be/src/olap/base_tablet.h
+++ b/be/src/olap/base_tablet.h
@@ -128,8 +128,8 @@ class BaseTablet {
     ////////////////////////////////////////////////////////////////////////////
     // begin MoW functions
     ////////////////////////////////////////////////////////////////////////////
-    std::vector<RowsetSharedPtr> get_rowset_by_ids(const RowsetIdUnorderedSet* specified_rowset_ids,
-                                                   bool include_stale = false);
+    std::vector<RowsetSharedPtr> get_rowset_by_ids(
+            const RowsetIdUnorderedSet* specified_rowset_ids);
 
     // Lookup a row with TupleDescriptor and fill Block
     Status lookup_row_data(const Slice& encoded_key, const RowLocation& row_location,
diff --git a/be/src/olap/cumulative_compaction_policy.cpp b/be/src/olap/cumulative_compaction_policy.cpp
index 0d35ae7ca4f148..ee7a2b1812a0ae 100644
--- a/be/src/olap/cumulative_compaction_policy.cpp
+++ b/be/src/olap/cumulative_compaction_policy.cpp
@@ -284,6 +284,8 @@ int SizeBasedCumulativeCompactionPolicy::pick_input_rowsets(
         transient_size += 1;
         input_rowsets->push_back(rowset);
     }
+    DBUG_EXECUTE_IF("SizeBaseCumulativeCompactionPolicy.pick_input_rowsets.return_input_rowsets",
+                    { return transient_size; })
 
     if (total_size >= promotion_size) {
         return transient_size;
diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h
index 4d0c035bb0facd..4acf18a0834575 100644
--- a/be/src/olap/olap_common.h
+++ b/be/src/olap/olap_common.h
@@ -36,6 +36,7 @@
 
 #include "io/io_common.h"
 #include "olap/olap_define.h"
+#include "olap/rowset/rowset_fwd.h"
 #include "util/hash_util.hpp"
 #include "util/time.h"
 #include "util/uid_util.h"
@@ -494,11 +495,16 @@ class DeleteBitmap;
 // merge on write context
 struct MowContext {
     MowContext(int64_t version, int64_t txnid, const RowsetIdUnorderedSet& ids,
-               std::shared_ptr<DeleteBitmap> db)
-            : max_version(version), txn_id(txnid), rowset_ids(ids), delete_bitmap(db) {}
+               const std::vector<RowsetSharedPtr>& rowset_ptrs, std::shared_ptr<DeleteBitmap> db)
+            : max_version(version),
+              txn_id(txnid),
+              rowset_ids(ids),
+              rowset_ptrs(rowset_ptrs),
+              delete_bitmap(db) {}
     int64_t max_version;
     int64_t txn_id;
     const RowsetIdUnorderedSet& rowset_ids;
+    std::vector<RowsetSharedPtr> rowset_ptrs;
     std::shared_ptr<DeleteBitmap> delete_bitmap;
 };
 
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 7a83496b7fb481..10ce2137b9c2ba 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -437,24 +437,7 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block*
     std::vector<RowsetSharedPtr> specified_rowsets;
     {
         std::shared_lock rlock(tablet->get_header_lock());
-
-        // Under normal circumstances, `get_rowset_by_ids` does not need to consider the stale
-        // rowset, in other word, if a rowset id is not found in the normal rowset, we can ignore
-        // it. This is because even if we handle stale rowset here, we need to recalculate the
-        // new rowset generated by the corresponding compaction in the publish phase.
-        // However, for partial update, ignoring the stale rowset may cause some keys to not be
-        // found in the flush phase (lookup_row_key returns KEY_NOT_FOUND), and thus be mistaken
-        // as new keys in the flush phase, which will cause the load to fail in the following
-        // two cases:
-        // 1. when strict_mode is enabled, new keys are not allowed to be added.
-        // 2. Some columns that need to be filled are neither nullable nor have a default value,
-        //    in which case the value of the field cannot be filled as a new key, leading to a
-        //    failure of the load.
-        bool should_include_stale =
-                _opts.rowset_ctx->partial_update_info->is_strict_mode ||
-                !_opts.rowset_ctx->partial_update_info->can_insert_new_rows_in_partial_update;
-        specified_rowsets =
-                tablet->get_rowset_by_ids(&_mow_context->rowset_ids, should_include_stale);
+        specified_rowsets = _mow_context->rowset_ptrs;
         if (specified_rowsets.size() != _mow_context->rowset_ids.size()) {
             // Only when this is a strict mode partial update that missing rowsets here will lead to problems.
             // In other case, the missing rowsets will be calculated in later phases(commit phase/publish phase)
diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
index 8f7bb9fe245f0e..b0b24a79c0a8c2 100644
--- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
@@ -368,24 +368,10 @@ Status VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da
 
     std::vector<RowsetSharedPtr> specified_rowsets;
     {
+        DBUG_EXECUTE_IF("VerticalSegmentWriter._append_block_with_partial_content.sleep",
+                        { sleep(60); })
         std::shared_lock rlock(tablet->get_header_lock());
-        // Under normal circumstances, `get_rowset_by_ids` does not need to consider the stale
-        // rowset, in other word, if a rowset id is not found in the normal rowset, we can ignore
-        // it. This is because even if we handle stale rowset here, we need to recalculate the
-        // new rowset generated by the corresponding compaction in the publish phase.
-        // However, for partial update, ignoring the stale rowset may cause some keys to not be
-        // found in the flush phase (lookup_row_key returns KEY_NOT_FOUND), and thus be mistaken
-        // as new keys in the flush phase, which will cause the load to fail in the following
-        // two cases:
-        // 1. when strict_mode is enabled, new keys are not allowed to be added.
-        // 2. Some columns that need to be filled are neither nullable nor have a default value,
-        //    in which case the value of the field cannot be filled as a new key, leading to a
-        //    failure of the load.
-        bool should_include_stale =
-                _opts.rowset_ctx->partial_update_info->is_strict_mode ||
-                !_opts.rowset_ctx->partial_update_info->can_insert_new_rows_in_partial_update;
-        specified_rowsets =
-                tablet->get_rowset_by_ids(&_mow_context->rowset_ids, should_include_stale);
+        specified_rowsets = _mow_context->rowset_ptrs;
         if (specified_rowsets.size() != _mow_context->rowset_ids.size()) {
             // Only when this is a strict mode partial update that missing rowsets here will lead to problems.
             // In other case, the missing rowsets will be calculated in later phases(commit phase/publish phase)
diff --git a/be/src/olap/rowset_builder.cpp b/be/src/olap/rowset_builder.cpp
index 4458d43c17eb72..9d8b8163b716d7 100644
--- a/be/src/olap/rowset_builder.cpp
+++ b/be/src/olap/rowset_builder.cpp
@@ -123,6 +123,7 @@ void RowsetBuilder::_garbage_collection() {
 Status BaseRowsetBuilder::init_mow_context(std::shared_ptr<MowContext>& mow_context) {
     std::lock_guard<std::shared_mutex> lck(tablet()->get_header_lock());
     int64_t cur_max_version = tablet()->max_version_unlocked();
+    std::vector<RowsetSharedPtr> rowset_ptrs;
     // tablet is under alter process. The delete bitmap will be calculated after conversion.
     if (tablet()->tablet_state() == TABLET_NOTREADY) {
         // Disable 'partial_update' when the tablet is undergoing a 'schema changing process'
@@ -134,10 +135,11 @@ Status BaseRowsetBuilder::init_mow_context(std::shared_ptr<MowContext>& mow_cont
         _rowset_ids.clear();
     } else {
         RETURN_IF_ERROR(tablet()->get_all_rs_id_unlocked(cur_max_version, &_rowset_ids));
+        rowset_ptrs = tablet()->get_rowset_by_ids(&_rowset_ids);
     }
     _delete_bitmap = std::make_shared<DeleteBitmap>(tablet()->tablet_id());
-    mow_context =
-            std::make_shared<MowContext>(cur_max_version, _req.txn_id, _rowset_ids, _delete_bitmap);
+    mow_context = std::make_shared<MowContext>(cur_max_version, _req.txn_id, _rowset_ids,
+                                               rowset_ptrs, _delete_bitmap);
     return Status::OK();
 }
 
diff --git a/regression-test/suites/fault_injection_p2/test_partial_update_rowset_not_found_fault_injection.groovy b/regression-test/suites/fault_injection_p2/test_partial_update_rowset_not_found_fault_injection.groovy
new file mode 100644
index 00000000000000..befad64da0a3bf
--- /dev/null
+++ b/regression-test/suites/fault_injection_p2/test_partial_update_rowset_not_found_fault_injection.groovy
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_partial_update_rowset_not_found_fault_injection", "p2,nonConcurrent") {
+    def testTable = "test_partial_update_rowset_not_found_fault_injection"
+    sql """ DROP TABLE IF EXISTS ${testTable}"""
+    sql """
+        create table ${testTable}
+        (
+        `k1` INT,
+        `v1` INT NOT NULL,
+        `v2` INT NOT NULL,
+        `v3` INT NOT NULL,
+        `v4` INT NOT NULL,
+        `v5` INT NOT NULL,
+        `v6` INT NOT NULL,
+        `v7` INT NOT NULL,
+        `v8` INT NOT NULL,
+        `v9` INT NOT NULL,
+        `v10` INT NOT NULL
+        )
+        UNIQUE KEY (`k1`)
+        DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+        PROPERTIES (
+        "replication_num" = "1",
+        "disable_auto_compaction" = "true"
+        );
+    """
+
+    def load_data =  {
+        streamLoad {
+            table 'test_partial_update_rowset_not_found_fault_injection'
+            set 'column_separator', ','
+            set 'compress_type', 'GZ'
+
+
+            file """${getS3Url()}/regression/fault_injection/test_partial_update_rowset_not_found_falut_injection1.csv.gz"""
+
+            time 300000 
+
+            check { result, exception, startTime, endTime ->
+                if (exception != null) {
+                    throw exception
+                }
+                log.info("Stream load result: ${result}".toString())
+                def json = parseJson(result)
+                assertEquals("success", json.Status.toLowerCase())
+                assertEquals(json.NumberTotalRows, json.NumberLoadedRows)
+                assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
+            }
+        }
+    }
+
+    def backendId_to_backendIP = [:]
+    def backendId_to_backendHttpPort = [:]
+    def backendId_to_params = [string:[:]]
+
+    getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort);
+
+    load_data()
+    def error = false
+
+
+    GetDebugPoint().clearDebugPointsForAllBEs()
+    try {
+        GetDebugPoint().enableDebugPointForAllBEs("VerticalSegmentWriter._append_block_with_partial_content.sleep")
+        GetDebugPoint().enableDebugPointForAllBEs("SizeBaseCumulativeCompactionPolicy.pick_input_rowsets.return_input_rowsets")
+        def thread = Thread.start{
+            try {
+                sql """update ${testTable} set v10=1"""
+            }
+            catch (Exception e){
+                logger.info(e.getMessage())
+                error = true
+            }
+        }
+
+        Thread.sleep(2000)
+        // trigger compactions for all tablets in ${tableName}
+        def tablets = sql_return_maparray """ show tablets from ${testTable}; """
+        for (def tablet in tablets) {
+            String tablet_id = tablet.TabletId
+            backend_id = tablet.BackendId
+            (code, out, err) = be_run_cumulative_compaction(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id)
+            logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err)
+            assertEquals(code, 0)
+        }
+
+        thread.join()
+        assertFalse(error)
+    } catch (Exception e){
+        logger.info(e.getMessage())
+        assertFalse(true)
+    } finally {
+        GetDebugPoint().disableDebugPointForAllBEs("VerticalSegmentWriter._append_block_with_partial_content.sleep")
+        GetDebugPoint().disableDebugPointForAllBEs("SizeBaseCumulativeCompactionPolicy.pick_input_rowsets.return_input_rowsets")
+    }
+}

From 676c2ae1995aa6e926b8258fa0e0239cd27fe854 Mon Sep 17 00:00:00 2001
From: HHoflittlefish777 <77738092+HHoflittlefish777@users.noreply.github.com>
Date: Sun, 28 Apr 2024 15:01:37 +0800
Subject: [PATCH 096/163] [refactor](cloud) refactor copy into make logic clear
 (#34181)

---
 .../org/apache/doris/catalog/EnvFactory.java  |   6 -
 .../apache/doris/cloud/catalog/CloudEnv.java  |   5 +-
 .../doris/cloud/catalog/CloudEnvFactory.java  |   7 +-
 .../load}/CleanCopyJobScheduler.java          |   2 +-
 .../load}/CleanCopyJobTask.java               |   2 +-
 .../doris/cloud/load/CloudBrokerLoadJob.java  |  18 +-
 .../apache/doris/cloud/load/CloudCopyJob.java | 106 ---------
 .../doris/cloud/load/CloudLoadManager.java    | 212 +++++++++++++++++-
 .../{load/loadv2 => cloud/load}/CopyJob.java  |  10 +-
 .../load}/CopyLoadPendingTask.java            |   6 +-
 .../apache/doris/common/proc/LoadProcDir.java |  11 +-
 .../doris/load/loadv2/BrokerLoadJob.java      |  94 +-------
 .../load/loadv2/BrokerLoadPendingTask.java    |   2 +-
 .../apache/doris/load/loadv2/BulkLoadJob.java |  40 +---
 .../org/apache/doris/load/loadv2/LoadJob.java |   5 +-
 .../doris/load/loadv2/LoadLoadingTask.java    |  63 ------
 .../apache/doris/load/loadv2/LoadManager.java | 131 +----------
 .../java/org/apache/doris/qe/DdlExecutor.java |   5 +-
 .../org/apache/doris/qe/ShowExecutor.java     |  23 +-
 .../loadv2 => cloud/load}/CopyJobTest.java    |   3 +-
 .../load}/CopyLoadPendingTaskTest.java        |   2 +-
 21 files changed, 298 insertions(+), 455 deletions(-)
 rename fe/fe-core/src/main/java/org/apache/doris/{load/loadv2 => cloud/load}/CleanCopyJobScheduler.java (98%)
 rename fe/fe-core/src/main/java/org/apache/doris/{load/loadv2 => cloud/load}/CleanCopyJobTask.java (98%)
 delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/cloud/load/CloudCopyJob.java
 rename fe/fe-core/src/main/java/org/apache/doris/{load/loadv2 => cloud/load}/CopyJob.java (96%)
 rename fe/fe-core/src/main/java/org/apache/doris/{load/loadv2 => cloud/load}/CopyLoadPendingTask.java (98%)
 rename fe/fe-core/src/test/java/org/apache/doris/{load/loadv2 => cloud/load}/CopyJobTest.java (98%)
 rename fe/fe-core/src/test/java/org/apache/doris/{load/loadv2 => cloud/load}/CopyLoadPendingTaskTest.java (99%)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/EnvFactory.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/EnvFactory.java
index 98b79d66a1c831..a0be94b683435c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/EnvFactory.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/EnvFactory.java
@@ -29,7 +29,6 @@
 import org.apache.doris.common.util.PropertyAnalyzer;
 import org.apache.doris.datasource.InternalCatalog;
 import org.apache.doris.load.loadv2.BrokerLoadJob;
-import org.apache.doris.load.loadv2.CleanCopyJobScheduler;
 import org.apache.doris.load.loadv2.LoadJobScheduler;
 import org.apache.doris.load.loadv2.LoadManager;
 import org.apache.doris.load.routineload.RoutineLoadManager;
@@ -159,11 +158,6 @@ public LoadManager createLoadManager(LoadJobScheduler loadJobScheduler) {
         return new LoadManager(loadJobScheduler);
     }
 
-    public LoadManager createLoadManager(LoadJobScheduler loadJobScheduler,
-                                        CleanCopyJobScheduler cleanCopyJobScheduler) {
-        return new LoadManager(loadJobScheduler, cleanCopyJobScheduler);
-    }
-
     public MasterDaemon createTabletStatMgr() {
         return new TabletStatMgr();
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudEnv.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudEnv.java
index 8b14a733957ee9..e52ea292932161 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudEnv.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudEnv.java
@@ -27,6 +27,7 @@
 import org.apache.doris.cloud.CloudWarmUpJob;
 import org.apache.doris.cloud.CloudWarmUpJob.JobState;
 import org.apache.doris.cloud.datasource.CloudInternalCatalog;
+import org.apache.doris.cloud.load.CleanCopyJobScheduler;
 import org.apache.doris.cloud.persist.UpdateCloudReplicaInfo;
 import org.apache.doris.cloud.proto.Cloud;
 import org.apache.doris.cloud.proto.Cloud.NodeInfoPB;
@@ -41,7 +42,6 @@
 import org.apache.doris.common.util.NetUtils;
 import org.apache.doris.ha.FrontendNodeType;
 import org.apache.doris.httpv2.meta.MetaBaseAction;
-import org.apache.doris.load.loadv2.CleanCopyJobScheduler;
 import org.apache.doris.mysql.privilege.PrivPredicate;
 import org.apache.doris.persist.Storage;
 import org.apache.doris.qe.ConnectContext;
@@ -80,7 +80,8 @@ public class CloudEnv extends Env {
     public CloudEnv(boolean isCheckpointCatalog) {
         super(isCheckpointCatalog);
         this.cleanCopyJobScheduler = new CleanCopyJobScheduler();
-        this.loadManager = EnvFactory.getInstance().createLoadManager(loadJobScheduler, cleanCopyJobScheduler);
+        this.loadManager = ((CloudEnvFactory) EnvFactory.getInstance())
+                                    .createLoadManager(loadJobScheduler, cleanCopyJobScheduler);
         this.cloudClusterCheck = new CloudClusterChecker((CloudSystemInfoService) systemInfo);
         this.cloudInstanceStatusChecker = new CloudInstanceStatusChecker((CloudSystemInfoService) systemInfo);
         this.cloudTabletRebalancer = new CloudTabletRebalancer((CloudSystemInfoService) systemInfo);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudEnvFactory.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudEnvFactory.java
index 5d05928b540749..32992307a8b203 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudEnvFactory.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudEnvFactory.java
@@ -33,6 +33,7 @@
 import org.apache.doris.catalog.Tablet;
 import org.apache.doris.cloud.common.util.CloudPropertyAnalyzer;
 import org.apache.doris.cloud.datasource.CloudInternalCatalog;
+import org.apache.doris.cloud.load.CleanCopyJobScheduler;
 import org.apache.doris.cloud.load.CloudBrokerLoadJob;
 import org.apache.doris.cloud.load.CloudLoadManager;
 import org.apache.doris.cloud.load.CloudRoutineLoadManager;
@@ -178,9 +179,9 @@ public RoutineLoadManager createRoutineLoadManager() {
         return new CloudRoutineLoadManager();
     }
 
-    @Override
-    public LoadManager createLoadManager(LoadJobScheduler loadJobScheduler) {
-        return new CloudLoadManager(loadJobScheduler);
+    public LoadManager createLoadManager(LoadJobScheduler loadJobScheduler,
+                                        CleanCopyJobScheduler cleanCopyJobScheduler) {
+        return new CloudLoadManager(loadJobScheduler, cleanCopyJobScheduler);
     }
 
     public MasterDaemon createTabletStatMgr() {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/CleanCopyJobScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/load/CleanCopyJobScheduler.java
similarity index 98%
rename from fe/fe-core/src/main/java/org/apache/doris/load/loadv2/CleanCopyJobScheduler.java
rename to fe/fe-core/src/main/java/org/apache/doris/cloud/load/CleanCopyJobScheduler.java
index 758c899e082b6f..610d99716d4127 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/CleanCopyJobScheduler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/load/CleanCopyJobScheduler.java
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-package org.apache.doris.load.loadv2;
+package org.apache.doris.cloud.load;
 
 import org.apache.doris.common.Config;
 import org.apache.doris.common.util.MasterDaemon;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/CleanCopyJobTask.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/load/CleanCopyJobTask.java
similarity index 98%
rename from fe/fe-core/src/main/java/org/apache/doris/load/loadv2/CleanCopyJobTask.java
rename to fe/fe-core/src/main/java/org/apache/doris/cloud/load/CleanCopyJobTask.java
index 245c4e19f26f49..3b6bd75d3b9bd4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/CleanCopyJobTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/load/CleanCopyJobTask.java
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-package org.apache.doris.load.loadv2;
+package org.apache.doris.cloud.load;
 
 import org.apache.doris.catalog.Env;
 import org.apache.doris.cloud.datasource.CloudInternalCatalog;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/load/CloudBrokerLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/load/CloudBrokerLoadJob.java
index 814e6020362a92..04560df0fd8bfd 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/cloud/load/CloudBrokerLoadJob.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/load/CloudBrokerLoadJob.java
@@ -30,6 +30,7 @@
 import org.apache.doris.common.util.LogKey;
 import org.apache.doris.load.BrokerFileGroup;
 import org.apache.doris.load.BrokerFileGroupAggInfo.FileGroupAggKey;
+import org.apache.doris.load.EtlJobType;
 import org.apache.doris.load.FailMsg;
 import org.apache.doris.load.FailMsg.CancelType;
 import org.apache.doris.load.loadv2.BrokerLoadJob;
@@ -62,14 +63,29 @@ public class CloudBrokerLoadJob extends BrokerLoadJob {
     protected static final String CLOUD_CLUSTER_ID = "clusterId";
     protected String cloudClusterId;
 
-    private int retryTimes = 3;
+    protected int retryTimes = 3;
 
     public CloudBrokerLoadJob() {
     }
 
+    public CloudBrokerLoadJob(EtlJobType type) {
+        super(type);
+    }
+
+    public CloudBrokerLoadJob(EtlJobType type, long dbId, String label, BrokerDesc brokerDesc,
+            OriginStatement originStmt, UserIdentity userInfo)
+            throws MetaNotFoundException {
+        super(type, dbId, label, brokerDesc, originStmt, userInfo);
+        setCloudClusterId();
+    }
+
     public CloudBrokerLoadJob(long dbId, String label, BrokerDesc brokerDesc, OriginStatement originStmt,
             UserIdentity userInfo) throws MetaNotFoundException {
         super(dbId, label, brokerDesc, originStmt, userInfo);
+        setCloudClusterId();
+    }
+
+    private void setCloudClusterId() throws MetaNotFoundException {
         ConnectContext context = ConnectContext.get();
         if (context != null) {
             String clusterName = context.getCloudCluster();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/load/CloudCopyJob.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/load/CloudCopyJob.java
deleted file mode 100644
index c33029010723ce..00000000000000
--- a/fe/fe-core/src/main/java/org/apache/doris/cloud/load/CloudCopyJob.java
+++ /dev/null
@@ -1,106 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package org.apache.doris.cloud.load;
-
-import org.apache.doris.cloud.proto.Cloud.StagePB;
-import org.apache.doris.cloud.storage.RemoteBase.ObjectInfo;
-import org.apache.doris.common.DdlException;
-import org.apache.doris.common.io.Text;
-import org.apache.doris.load.EtlJobType;
-import org.apache.doris.load.FailMsg;
-import org.apache.doris.load.loadv2.BrokerLoadJob;
-import org.apache.doris.load.loadv2.JobState;
-
-import com.google.gson.Gson;
-import com.google.gson.reflect.TypeToken;
-import lombok.Getter;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-public class CloudCopyJob extends BrokerLoadJob {
-    private static final Logger LOG = LogManager.getLogger(CloudCopyJob.class);
-    private static final String TABLE_NAME_KEY = "TableName";
-    private static final String USER_NAME_KEY = "UserName";
-
-    @Getter
-    private String stageId;
-    @Getter
-    private StagePB.StageType stageType;
-    @Getter
-    private String stagePrefix;
-    @Getter
-    private long sizeLimit;
-    @Getter
-    private String pattern;
-    @Getter
-    private ObjectInfo objectInfo;
-    @Getter
-    private String copyId;
-    @Getter
-    private boolean forceCopy;
-    private String loadFilePaths = "";
-    private Map<String, String> properties = new HashMap<>();
-    private volatile boolean abortedCopy = false;
-    private boolean isReplay = false;
-    private List<String> loadFiles = null;
-
-    public CloudCopyJob() {
-        super(EtlJobType.COPY);
-    }
-
-    @Override
-    public void cancelJob(FailMsg failMsg) throws DdlException {
-        super.cancelJob(failMsg);
-        loadFiles = null;
-        abortedCopy = true;
-    }
-
-    public void setAbortedCopy(boolean abortedCopy) {
-        this.abortedCopy = abortedCopy;
-    }
-
-    @Override
-    public void write(DataOutput out) throws IOException {
-        super.write(out);
-        Text.writeString(out, copyId);
-        Text.writeString(out, loadFilePaths);
-        Gson gson = new Gson();
-        Text.writeString(out, properties == null ? "" : gson.toJson(properties));
-    }
-
-    @Override
-    public void readFields(DataInput in) throws IOException {
-        super.readFields(in);
-        copyId = Text.readString(in);
-        loadFilePaths = Text.readString(in);
-        String property = Text.readString(in);
-        properties = property.isEmpty() ? new HashMap<>()
-                : (new Gson().fromJson(property, new TypeToken<Map<String, String>>() {
-                }.getType()));
-
-        // FIXME: COPY JOB is not supported yet.
-        state = JobState.CANCELLED;
-    }
-}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/load/CloudLoadManager.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/load/CloudLoadManager.java
index 5caa2108c592fe..f4543c6b066a4a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/cloud/load/CloudLoadManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/load/CloudLoadManager.java
@@ -17,19 +17,52 @@
 
 package org.apache.doris.cloud.load;
 
+import org.apache.doris.analysis.CopyStmt;
 import org.apache.doris.analysis.InsertStmt;
 import org.apache.doris.analysis.LoadStmt;
+import org.apache.doris.catalog.Database;
 import org.apache.doris.catalog.Env;
 import org.apache.doris.cloud.system.CloudSystemInfoService;
+import org.apache.doris.common.AnalysisException;
+import org.apache.doris.common.CaseSensibility;
+import org.apache.doris.common.Config;
 import org.apache.doris.common.DdlException;
+import org.apache.doris.common.MetaNotFoundException;
+import org.apache.doris.common.PatternMatcher;
+import org.apache.doris.common.PatternMatcherWrapper;
 import org.apache.doris.common.UserException;
+import org.apache.doris.load.EtlJobType;
+import org.apache.doris.load.loadv2.BrokerLoadJob;
+import org.apache.doris.load.loadv2.JobState;
+import org.apache.doris.load.loadv2.LoadJob;
 import org.apache.doris.load.loadv2.LoadJobScheduler;
 import org.apache.doris.load.loadv2.LoadManager;
+import org.apache.doris.qe.ConnectContext;
+
+import com.google.common.base.Strings;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.EnumSet;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.function.Function;
+import java.util.stream.Collectors;
 
 public class CloudLoadManager extends LoadManager {
+    private static final Logger LOG = LogManager.getLogger(CloudLoadManager.class);
+    private CleanCopyJobScheduler cleanCopyJobScheduler;
 
-    public CloudLoadManager(LoadJobScheduler loadJobScheduler) {
+    public CloudLoadManager(LoadJobScheduler loadJobScheduler, CleanCopyJobScheduler cleanCopyJobScheduler) {
         super(loadJobScheduler);
+        this.cleanCopyJobScheduler = cleanCopyJobScheduler;
     }
 
     @Override
@@ -46,5 +79,182 @@ public long createLoadJobFromStmt(InsertStmt stmt) throws DdlException {
         return super.createLoadJobFromStmt(stmt);
     }
 
+    public LoadJob createLoadJobFromStmt(CopyStmt stmt) throws DdlException {
+        Database database = super.checkDb(stmt.getDbName());
+        long dbId = database.getId();
+        BrokerLoadJob loadJob = null;
+        ((CloudSystemInfoService) Env.getCurrentSystemInfo()).waitForAutoStartCurrentCluster();
+
+        writeLock();
+        try {
+            long unfinishedCopyJobNum = unprotectedGetUnfinishedCopyJobNum();
+            if (unfinishedCopyJobNum >= Config.cluster_max_waiting_copy_jobs) {
+                throw new DdlException(
+                        "There are more than " + unfinishedCopyJobNum + " unfinished copy jobs, please retry later.");
+            }
+            loadJob = new CopyJob(dbId, stmt.getLabel().getLabelName(), ConnectContext.get().queryId(),
+                    stmt.getBrokerDesc(), stmt.getOrigStmt(), stmt.getUserInfo(), stmt.getStageId(),
+                    stmt.getStageType(), stmt.getStagePrefix(), stmt.getSizeLimit(), stmt.getPattern(),
+                    stmt.getObjectInfo(), stmt.isForce(), stmt.getUserName());
+            loadJob.setJobProperties(stmt.getProperties());
+            loadJob.checkAndSetDataSourceInfo(database, stmt.getDataDescriptions());
+            loadJob.setTimeout(ConnectContext.get().getExecTimeout());
+            createLoadJob(loadJob);
+        } catch (MetaNotFoundException e) {
+            throw new DdlException(e.getMessage());
+        } finally {
+            super.writeUnlock();
+        }
+        Env.getCurrentEnv().getEditLog().logCreateLoadJob(loadJob);
+
+        // The job must be submitted after edit log.
+        // It guarantees that load job has not been changed before edit log.
+        loadJobScheduler.submitJob(loadJob);
+        return loadJob;
+    }
+
+    public void createCleanCopyJobTask(CleanCopyJobTask task) throws DdlException {
+        cleanCopyJobScheduler.submitJob(task);
+    }
+
+    private long unprotectedGetUnfinishedCopyJobNum() {
+        return idToLoadJob.values().stream()
+                .filter(j -> (j.getState() != JobState.FINISHED && j.getState() != JobState.CANCELLED))
+                .filter(j -> j instanceof CopyJob).count();
+    }
+
+    /**
+     * This method will return the jobs info which can meet the condition of input param.
+     *
+     * @param dbId          used to filter jobs which belong to this db
+     * @param labelValue    used to filter jobs which's label is or like labelValue.
+     * @param accurateMatch true: filter jobs which's label is labelValue. false: filter jobs which's label like itself.
+     * @param statesValue   used to filter jobs which's state within the statesValue set.
+     * @param jobTypes      used to filter jobs which's type within the jobTypes set.
+     * @param copyIdValue        used to filter jobs which's copyId is or like copyIdValue.
+     * @param copyIdAccurateMatch  true: filter jobs which's copyId is copyIdValue.
+     *                             false: filter jobs which's copyId like itself.
+     * @return The result is the list of jobInfo.
+     *         JobInfo is a list which includes the comparable object: jobId, label, state etc.
+     *         The result is unordered.
+     */
+    public List<List<Comparable>> getLoadJobInfosByDb(long dbId, String labelValue, boolean accurateMatch,
+            Set<String> statesValue, Set<EtlJobType> jobTypes, String copyIdValue, boolean copyIdAccurateMatch,
+            String tableNameValue, boolean tableNameAccurateMatch, String fileValue, boolean fileAccurateMatch)
+            throws AnalysisException {
+        LinkedList<List<Comparable>> loadJobInfos = new LinkedList<List<Comparable>>();
+        if (!dbIdToLabelToLoadJobs.containsKey(dbId)) {
+            return loadJobInfos;
+        }
+
+        if (jobTypes == null || jobTypes.isEmpty()) {
+            jobTypes = new HashSet<>();
+            jobTypes.addAll(EnumSet.allOf(EtlJobType.class));
+        }
+
+        Set<JobState> states = Sets.newHashSet();
+        if (statesValue == null || statesValue.size() == 0) {
+            states.addAll(EnumSet.allOf(JobState.class));
+        } else {
+            for (String stateValue : statesValue) {
+                try {
+                    states.add(JobState.valueOf(stateValue));
+                } catch (IllegalArgumentException e) {
+                    // ignore this state
+                }
+            }
+        }
+
+        readLock();
+        try {
+            Map<String, List<LoadJob>> labelToLoadJobs = dbIdToLabelToLoadJobs.get(dbId);
+            List<LoadJob> loadJobList = Lists.newArrayList();
+            if (Strings.isNullOrEmpty(labelValue)) {
+                loadJobList.addAll(
+                        labelToLoadJobs.values().stream().flatMap(Collection::stream).collect(Collectors.toList()));
+            } else {
+                // check label value
+                if (accurateMatch) {
+                    if (!labelToLoadJobs.containsKey(labelValue)) {
+                        return loadJobInfos;
+                    }
+                    loadJobList.addAll(labelToLoadJobs.get(labelValue));
+                } else {
+                    // non-accurate match
+                    PatternMatcher matcher =
+                            PatternMatcherWrapper.createMysqlPattern(labelValue,
+                                    CaseSensibility.LABEL.getCaseSensibility());
+                    for (Map.Entry<String, List<LoadJob>> entry : labelToLoadJobs.entrySet()) {
+                        if (matcher.match(entry.getKey())) {
+                            loadJobList.addAll(entry.getValue());
+                        }
+                    }
+                }
+            }
+
+            List<LoadJob> loadJobList2 = new ArrayList<>();
+            // check state
+            for (LoadJob loadJob : loadJobList) {
+                if (!states.contains(loadJob.getState())) {
+                    continue;
+                }
+                if (!jobTypes.contains(loadJob.getJobType())) {
+                    continue;
+                }
+                loadJobList2.add(loadJob);
+            }
+            loadJobList2 = filterCopyJob(loadJobList2, copyIdValue, copyIdAccurateMatch, c -> c.getCopyId());
+            loadJobList2 = filterCopyJob(loadJobList2, tableNameValue, tableNameAccurateMatch, c -> c.getTableName());
+            loadJobList2 = filterCopyJob(loadJobList2, fileValue, fileAccurateMatch, c -> c.getFiles());
+            for (LoadJob loadJob : loadJobList2) {
+                try {
+                    if (!states.contains(loadJob.getState())) {
+                        continue;
+                    }
+                    // check auth
+                    try {
+                        checkJobAuth(loadJob.getDb().getCatalog().getName(), loadJob.getDb().getName(),
+                                loadJob.getTableNames());
+                    } catch (AnalysisException e) {
+                        continue;
+                    }
+                    // add load job info
+                    loadJobInfos.add(loadJob.getShowInfo());
+                } catch (RuntimeException | DdlException | MetaNotFoundException e) {
+                    // ignore this load job
+                    LOG.warn("get load job info failed. job id: {}", loadJob.getId(), e);
+                }
+            }
+            return loadJobInfos;
+        } finally {
+            readUnlock();
+        }
+    }
+
+    private List<LoadJob> filterCopyJob(List<LoadJob> loadJobList, String value, boolean accurateMatch,
+            Function<CopyJob, String> func) throws AnalysisException {
+        if (Strings.isNullOrEmpty(value)) {
+            return loadJobList;
+        }
+        List<LoadJob> loadJobList2 = Lists.newArrayList();
+        for (LoadJob loadJob : loadJobList) {
+            if (loadJob.getJobType() != EtlJobType.COPY) {
+                continue;
+            }
+            CopyJob copyJob = (CopyJob) loadJob;
+            if (accurateMatch) {
+                if (func.apply(copyJob).equalsIgnoreCase(value)) {
+                    loadJobList2.add(copyJob);
+                }
+            } else {
+                // non-accurate match
+                PatternMatcher matcher = PatternMatcherWrapper.createMysqlPattern(value, false);
+                if (matcher.match(func.apply(copyJob))) {
+                    loadJobList2.add(copyJob);
+                }
+            }
+        }
+        return loadJobList2;
+    }
 }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/CopyJob.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/load/CopyJob.java
similarity index 96%
rename from fe/fe-core/src/main/java/org/apache/doris/load/loadv2/CopyJob.java
rename to fe/fe-core/src/main/java/org/apache/doris/cloud/load/CopyJob.java
index 637a182ec07cd4..35574561a52ba5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/CopyJob.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/load/CopyJob.java
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-package org.apache.doris.load.loadv2;
+package org.apache.doris.cloud.load;
 
 import org.apache.doris.analysis.BrokerDesc;
 import org.apache.doris.analysis.CopyStmt;
@@ -40,6 +40,8 @@
 import org.apache.doris.load.BrokerFileGroupAggInfo.FileGroupAggKey;
 import org.apache.doris.load.EtlJobType;
 import org.apache.doris.load.FailMsg;
+import org.apache.doris.load.loadv2.LoadJobFinalOperation;
+import org.apache.doris.load.loadv2.LoadTask;
 import org.apache.doris.qe.OriginStatement;
 import org.apache.doris.thrift.TBrokerFileStatus;
 import org.apache.doris.thrift.TUniqueId;
@@ -61,7 +63,7 @@
 import java.util.Map.Entry;
 import java.util.stream.Collectors;
 
-public class CopyJob extends BrokerLoadJob {
+public class CopyJob extends CloudBrokerLoadJob {
     private static final Logger LOG = LogManager.getLogger(CopyJob.class);
     private static final String TABLE_NAME_KEY = "TableName";
     private static final String USER_NAME_KEY = "UserName";
@@ -108,7 +110,7 @@ public CopyJob(long dbId, String label, TUniqueId queryId, BrokerDesc brokerDesc
     }
 
     @Override
-    protected void checkAndSetDataSourceInfo(Database db, List<DataDescription> dataDescriptions) throws DdlException {
+    public void checkAndSetDataSourceInfo(Database db, List<DataDescription> dataDescriptions) throws DdlException {
         super.checkAndSetDataSourceInfo(db, dataDescriptions);
         // now, copy into only support one table
         for (DataDescription dataDescription : dataDescriptions) {
@@ -138,7 +140,7 @@ protected void afterCommit() throws DdlException {
                     && !isForceCopy()) {
                 CleanCopyJobTask copyJobCleanTask = new CleanCopyJobTask(objectInfo, stageId, stageType, tableId,
                         copyId, loadFiles);
-                Env.getCurrentEnv().getLoadManager().createCleanCopyJobTask(copyJobCleanTask);
+                ((CloudLoadManager) Env.getCurrentEnv().getLoadManager()).createCleanCopyJobTask(copyJobCleanTask);
             }
         }
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/CopyLoadPendingTask.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/load/CopyLoadPendingTask.java
similarity index 98%
rename from fe/fe-core/src/main/java/org/apache/doris/load/loadv2/CopyLoadPendingTask.java
rename to fe/fe-core/src/main/java/org/apache/doris/cloud/load/CopyLoadPendingTask.java
index bd27c67e6e582f..0ab6a9ce90739b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/CopyLoadPendingTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/load/CopyLoadPendingTask.java
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-package org.apache.doris.load.loadv2;
+package org.apache.doris.cloud.load;
 
 import org.apache.doris.analysis.BrokerDesc;
 import org.apache.doris.analysis.StorageBackend;
@@ -38,6 +38,8 @@
 import org.apache.doris.common.util.LogKey;
 import org.apache.doris.load.BrokerFileGroup;
 import org.apache.doris.load.BrokerFileGroupAggInfo.FileGroupAggKey;
+import org.apache.doris.load.loadv2.BrokerLoadPendingTask;
+import org.apache.doris.load.loadv2.BrokerPendingTaskAttachment;
 import org.apache.doris.thrift.TBrokerFileStatus;
 
 import com.google.common.collect.Lists;
@@ -72,7 +74,7 @@ public CopyLoadPendingTask(CopyJob loadTaskCallback,
     }
 
     @Override
-    void executeTask() throws UserException {
+    public void executeTask() throws UserException {
         super.executeTask(); // get all files and begin txn
         if (!isBeginCopyDone) {
             beginCopy((BrokerPendingTaskAttachment) attachment);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/proc/LoadProcDir.java b/fe/fe-core/src/main/java/org/apache/doris/common/proc/LoadProcDir.java
index e6cd4108ee85ff..8201c0176e0776 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/proc/LoadProcDir.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/proc/LoadProcDir.java
@@ -18,7 +18,9 @@
 package org.apache.doris.common.proc;
 
 import org.apache.doris.catalog.Database;
+import org.apache.doris.cloud.load.CloudLoadManager;
 import org.apache.doris.common.AnalysisException;
+import org.apache.doris.common.Config;
 import org.apache.doris.load.loadv2.LoadManager;
 
 import com.google.common.collect.ImmutableList;
@@ -66,8 +68,13 @@ public ProcResult fetchResult() throws AnalysisException {
         if (db == null) {
             loadJobInfos = loadManager.getAllLoadJobInfos();
         } else {
-            loadJobInfos = loadManager.getLoadJobInfosByDb(db.getId(), null, false,
-                null, null, null, false, null, false, null, false);
+            if (!Config.isCloudMode()) {
+                loadJobInfos = loadManager.getLoadJobInfosByDb(db.getId(), null, false, null);
+            } else {
+                loadJobInfos = ((CloudLoadManager) loadManager)
+                        .getLoadJobInfosByDb(db.getId(), null, false,
+                        null, null, null, false, null, false, null, false);
+            }
         }
 
         int counter = 0;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BrokerLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BrokerLoadJob.java
index 60dfce0edb663e..fd70e03caa721e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BrokerLoadJob.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BrokerLoadJob.java
@@ -58,7 +58,6 @@
 import org.apache.doris.transaction.TransactionState.TxnSourceType;
 
 import com.google.common.base.Joiner;
-import com.google.common.base.Strings;
 import com.google.common.collect.Lists;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
@@ -224,16 +223,8 @@ brokerFileGroups, getDeadlineMs(), getExecMemLimit(),
 
         UUID uuid = UUID.randomUUID();
         TUniqueId loadId = new TUniqueId(uuid.getMostSignificantBits(), uuid.getLeastSignificantBits());
-        if (Config.isNotCloudMode()) {
-            task.init(loadId, attachment.getFileStatusByTable(aggKey),
-                    attachment.getFileNumByTable(aggKey), getUserInfo());
-        } else {
-            if (Strings.isNullOrEmpty(clusterId)) {
-                throw new UserException("can not get a valid cluster");
-            }
-            task.init(loadId, attachment.getFileStatusByTable(aggKey),
-                    attachment.getFileNumByTable(aggKey), getUserInfo(), clusterId);
-        }
+        task.init(loadId, attachment.getFileStatusByTable(aggKey),
+                attachment.getFileNumByTable(aggKey), getUserInfo());
         task.settWorkloadGroups(tWorkloadGroups);
         return task;
     }
@@ -464,87 +455,6 @@ public void afterVisible(TransactionState txnState, boolean txnOperated) {
         jobProfile = null;
     }
 
-    @Override
-    public void onTaskFailed(long taskId, FailMsg failMsg) {
-        if (!Config.isCloudMode() || Strings.isNullOrEmpty(this.clusterId)) {
-            super.onTaskFailed(taskId, failMsg);
-            return;
-        }
-        try {
-            writeLock();
-            if (isTxnDone()) {
-                LOG.warn(new LogBuilder(LogKey.LOAD_JOB, id)
-                        .add("label", label)
-                        .add("transactionId", transactionId)
-                        .add("state", state)
-                        .add("error_msg", "this task will be ignored when job is: " + state)
-                        .build());
-                return;
-            }
-            LOG.info(new LogBuilder(LogKey.LOAD_JOB, id)
-                    .add("label", label)
-                    .add("transactionId", transactionId)
-                    .add("state", state)
-                    .add("retryTimes", retryTimes)
-                    .add("failMsg", failMsg.getMsg())
-                    .build());
-
-            this.retryTimes--;
-            if (this.retryTimes <= 0) {
-                boolean abortTxn = this.transactionId > 0 ? true : false;
-                unprotectedExecuteCancel(failMsg, abortTxn);
-                logFinalOperation();
-                return;
-            } else {
-                unprotectedExecuteRetry(failMsg);
-            }
-        } finally {
-            writeUnlock();
-        }
-
-        boolean allTaskDone = false;
-        while (!allTaskDone) {
-            try {
-                writeLock();
-                // check if all task has been done
-                // unprotectedExecuteRetry() will cancel all running task
-                allTaskDone = true;
-                for (Map.Entry<Long, LoadTask> entry : idToTasks.entrySet()) {
-                    if (entry.getKey() != taskId && !entry.getValue().isDone()) {
-                        LOG.info("LoadTask({}) has not been done", entry.getKey());
-                        allTaskDone = false;
-                    }
-                }
-            } finally {
-                writeUnlock();
-            }
-            if (!allTaskDone) {
-                try {
-                    Thread.sleep(1000);
-                    continue;
-                } catch (InterruptedException e) {
-                    LOG.warn("", e);
-                }
-            }
-        }
-
-        try {
-            writeLock();
-            this.state = JobState.PENDING;
-            this.idToTasks.clear();
-            this.failMsg = null;
-            this.finishedTaskIds.clear();
-            Env.getCurrentGlobalTransactionMgr().getCallbackFactory().addCallback(this);
-            LoadTask task = createPendingTask();
-            // retry default backoff 60 seconds, because `be restart` is slow
-            task.setStartTimeMs(System.currentTimeMillis() + 60 * 1000);
-            idToTasks.put(task.getSignature(), task);
-            Env.getCurrentEnv().getPendingLoadTaskScheduler().submit(task);
-        } finally {
-            writeUnlock();
-        }
-    }
-
     @Override
     public String getResourceName() {
         StorageBackend.StorageType storageType = brokerDesc.getStorageType();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BrokerLoadPendingTask.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BrokerLoadPendingTask.java
index e1303f3c7a41b4..e41cd4aee2aa3d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BrokerLoadPendingTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BrokerLoadPendingTask.java
@@ -55,7 +55,7 @@ public BrokerLoadPendingTask(BrokerLoadJob loadTaskCallback,
     }
 
     @Override
-    void executeTask() throws UserException {
+    public void executeTask() throws UserException {
         LOG.info("begin to execute broker pending task. job: {}", callback.getCallbackId());
         getAllFileStatus();
         ((BrokerLoadJob) callback).beginTxn();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BulkLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BulkLoadJob.java
index d9b1b1fbcd8312..72b3a96c4eb9be 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BulkLoadJob.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BulkLoadJob.java
@@ -32,8 +32,6 @@
 import org.apache.doris.catalog.EnvFactory;
 import org.apache.doris.catalog.Table;
 import org.apache.doris.catalog.TableIf;
-import org.apache.doris.cloud.system.CloudSystemInfoService;
-import org.apache.doris.common.Config;
 import org.apache.doris.common.DdlException;
 import org.apache.doris.common.FeMetaVersion;
 import org.apache.doris.common.MetaNotFoundException;
@@ -97,12 +95,6 @@ public abstract class BulkLoadJob extends LoadJob {
     // we persist these sessionVariables due to the session is not available when replaying the job.
     protected Map<String, String> sessionVariables = Maps.newHashMap();
 
-    private static final String CLUSTER_ID = "clusterId";
-    protected String clusterId;
-
-    // retry 3 times is enough
-    protected int retryTimes = 3;
-
     public BulkLoadJob(EtlJobType jobType) {
         super(jobType);
     }
@@ -114,36 +106,9 @@ public BulkLoadJob(EtlJobType jobType, long dbId, String label,
         this.authorizationInfo = gatherAuthInfo();
         this.userInfo = userInfo;
 
-        if (Config.isCloudMode()) {
-            ConnectContext context = ConnectContext.get();
-            if (context != null) {
-                String clusterName = context.getCloudCluster();
-                if (Strings.isNullOrEmpty(clusterName)) {
-                    LOG.warn("cluster name is empty");
-                    throw new MetaNotFoundException("cluster name is empty");
-                }
-
-                this.clusterId = ((CloudSystemInfoService) Env.getCurrentSystemInfo())
-                                    .getCloudClusterIdByName(clusterName);
-                if (!Strings.isNullOrEmpty(context.getSessionVariable().getCloudCluster())) {
-                    clusterName = context.getSessionVariable().getCloudCluster();
-                    this.clusterId =
-                            ((CloudSystemInfoService) Env.getCurrentSystemInfo())
-                            .getCloudClusterIdByName(clusterName);
-                }
-                if (Strings.isNullOrEmpty(this.clusterId)) {
-                    LOG.warn("cluster id is empty, cluster name {}", clusterName);
-                    throw new MetaNotFoundException("cluster id is empty, cluster name: " + clusterName);
-                }
-            }
-        }
-
         if (ConnectContext.get() != null) {
             SessionVariable var = ConnectContext.get().getSessionVariable();
             sessionVariables.put(SessionVariable.SQL_MODE, Long.toString(var.getSqlMode()));
-            if (Config.isCloudMode()) {
-                sessionVariables.put(CLUSTER_ID, clusterId);
-            }
         } else {
             sessionVariables.put(SessionVariable.SQL_MODE, String.valueOf(SqlModeHelper.MODE_DEFAULT));
         }
@@ -184,7 +149,7 @@ public static BulkLoadJob fromLoadStmt(LoadStmt stmt) throws DdlException {
         }
     }
 
-    protected void checkAndSetDataSourceInfo(Database db, List<DataDescription> dataDescriptions) throws DdlException {
+    public void checkAndSetDataSourceInfo(Database db, List<DataDescription> dataDescriptions) throws DdlException {
         // check data source info
         db.readLock();
         try {
@@ -305,9 +270,6 @@ public void analyze() {
         fileGroupAggInfo = new BrokerFileGroupAggInfo();
         SqlParser parser = new SqlParser(new SqlScanner(new StringReader(originStmt.originStmt),
                 Long.valueOf(sessionVariables.get(SessionVariable.SQL_MODE))));
-        if (Config.isCloudMode()) {
-            clusterId = sessionVariables.get("clusterId");
-        }
         try {
             Database db = Env.getCurrentInternalCatalog().getDbOrDdlException(dbId);
             analyzeStmt(SqlParserUtils.getStmt(parser, originStmt.idx), db);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadJob.java
index 16363d72c03964..65befadb13aa15 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadJob.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadJob.java
@@ -23,6 +23,7 @@
 import org.apache.doris.catalog.Database;
 import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.EnvFactory;
+import org.apache.doris.cloud.load.CopyJob;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.Config;
 import org.apache.doris.common.DdlException;
@@ -248,7 +249,7 @@ public void setLoadFileInfo(int fileNum, long fileSize) {
      *
      * @return
      */
-    abstract Set<String> getTableNames() throws MetaNotFoundException;
+    public abstract Set<String> getTableNames() throws MetaNotFoundException;
 
     // return true if the corresponding transaction is done(COMMITTED, FINISHED, CANCELLED, RETRY)
     public boolean isTxnDone() {
@@ -1156,7 +1157,7 @@ public long getTimeout() {
         return (long) jobProperties.get(LoadStmt.TIMEOUT_PROPERTY);
     }
 
-    protected void setTimeout(long timeout) {
+    public void setTimeout(long timeout) {
         jobProperties.put(LoadStmt.TIMEOUT_PROPERTY, timeout);
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadLoadingTask.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadLoadingTask.java
index 543c3ac7805322..af83fead692b2b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadLoadingTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadLoadingTask.java
@@ -20,10 +20,8 @@
 import org.apache.doris.analysis.BrokerDesc;
 import org.apache.doris.analysis.UserIdentity;
 import org.apache.doris.catalog.Database;
-import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.EnvFactory;
 import org.apache.doris.catalog.OlapTable;
-import org.apache.doris.cloud.system.CloudSystemInfoService;
 import org.apache.doris.common.Config;
 import org.apache.doris.common.LoadException;
 import org.apache.doris.common.Status;
@@ -34,7 +32,6 @@
 import org.apache.doris.common.util.LogKey;
 import org.apache.doris.load.BrokerFileGroup;
 import org.apache.doris.load.FailMsg;
-import org.apache.doris.qe.ConnectContext;
 import org.apache.doris.qe.Coordinator;
 import org.apache.doris.qe.QeProcessorImpl;
 import org.apache.doris.thrift.TBrokerFileStatus;
@@ -44,7 +41,6 @@
 import org.apache.doris.transaction.ErrorTabletInfo;
 import org.apache.doris.transaction.TabletCommitInfo;
 
-import com.google.common.base.Strings;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
@@ -84,7 +80,6 @@ public class LoadLoadingTask extends LoadTask {
 
     private Profile jobProfile;
     private long beginTime;
-    private String clusterId;
 
     private List<TPipelineWorkloadGroup> tWorkloadGroups = null;
 
@@ -127,43 +122,6 @@ public void init(TUniqueId loadId, List<List<TBrokerFileStatus>> fileStatusList,
         planner.plan(loadId, fileStatusList, fileNum);
     }
 
-    public void init(TUniqueId loadId, List<List<TBrokerFileStatus>> fileStatusList,
-            int fileNum, UserIdentity userInfo, String clusterId) throws UserException {
-        this.loadId = loadId;
-        planner = new LoadingTaskPlanner(callback.getCallbackId(), txnId, db.getId(), table, brokerDesc, fileGroups,
-                strictMode, isPartialUpdate, timezone, this.timeoutS, this.loadParallelism, this.sendBatchParallelism,
-                this.useNewLoadScanNode, userInfo, singleTabletLoadPerSink, enableMemTableOnSinkNode);
-        boolean needCleanCtx = false;
-        try {
-            if (Config.isCloudMode()) {
-                String clusterName = ((CloudSystemInfoService) Env.getCurrentSystemInfo())
-                                        .getClusterNameByClusterId(clusterId);
-                if (Strings.isNullOrEmpty(clusterName)) {
-                    String errMsg = "cluster name is empty, cluster id is " + clusterId;
-                    LOG.warn(errMsg);
-                    throw new UserException(errMsg);
-                }
-
-                if (ConnectContext.get() == null) {
-                    ConnectContext ctx = new ConnectContext();
-                    ctx.setThreadLocalInfo();
-                    ctx.setCloudCluster(clusterName);
-                    needCleanCtx = true;
-                } else {
-                    ConnectContext.get().setCloudCluster(clusterName);
-                }
-            }
-            planner.plan(loadId, fileStatusList, fileNum);
-            this.clusterId = clusterId;
-        } catch (Exception e) {
-            throw e;
-        } finally {
-            if (Config.isCloudMode() && needCleanCtx) {
-                ConnectContext.remove();
-            }
-        }
-    }
-
     public TUniqueId getLoadId() {
         return loadId;
     }
@@ -172,23 +130,6 @@ public TUniqueId getLoadId() {
     protected void executeTask() throws Exception {
         LOG.info("begin to execute loading task. load id: {} job id: {}. db: {}, tbl: {}. left retry: {}",
                 DebugUtil.printId(loadId), callback.getCallbackId(), db.getFullName(), table.getName(), retryTime);
-        boolean needCleanCtx = false;
-        if (Config.isCloudMode()) {
-            String clusterName = ((CloudSystemInfoService) Env.getCurrentSystemInfo())
-                                    .getClusterNameByClusterId(this.clusterId);
-            if (Strings.isNullOrEmpty(clusterName)) {
-                throw new Exception("cluster is empty");
-            }
-
-            if (ConnectContext.get() == null) {
-                ConnectContext ctx = new ConnectContext();
-                ctx.setThreadLocalInfo();
-                ctx.setCloudCluster(clusterName);
-                needCleanCtx = true;
-            } else {
-                ConnectContext.get().setCloudCluster(clusterName);
-            }
-        }
 
         retryTime--;
         beginTime = System.currentTimeMillis();
@@ -197,10 +138,6 @@ protected void executeTask() throws Exception {
             return;
         }
         executeOnce();
-
-        if (Config.isCloudMode() && needCleanCtx) {
-            ConnectContext.remove();
-        }
     }
 
     protected void executeOnce() throws Exception {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadManager.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadManager.java
index d76097fb7cafa4..ecde827b9ed646 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadManager.java
@@ -21,13 +21,11 @@
 import org.apache.doris.analysis.CancelLoadStmt;
 import org.apache.doris.analysis.CleanLabelStmt;
 import org.apache.doris.analysis.CompoundPredicate.Operator;
-import org.apache.doris.analysis.CopyStmt;
 import org.apache.doris.analysis.InsertStmt;
 import org.apache.doris.analysis.LoadStmt;
 import org.apache.doris.analysis.UserIdentity;
 import org.apache.doris.catalog.Database;
 import org.apache.doris.catalog.Env;
-import org.apache.doris.cloud.system.CloudSystemInfoService;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.CaseSensibility;
 import org.apache.doris.common.Config;
@@ -78,7 +76,6 @@
 import java.util.Deque;
 import java.util.EnumSet;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
@@ -87,7 +84,6 @@
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
-import java.util.function.Function;
 import java.util.function.Predicate;
 import java.util.stream.Collectors;
 
@@ -101,10 +97,9 @@
 public class LoadManager implements Writable {
     private static final Logger LOG = LogManager.getLogger(LoadManager.class);
 
-    private Map<Long, LoadJob> idToLoadJob = Maps.newConcurrentMap();
-    private Map<Long, Map<String, List<LoadJob>>> dbIdToLabelToLoadJobs = Maps.newConcurrentMap();
-    private LoadJobScheduler loadJobScheduler;
-    private CleanCopyJobScheduler cleanCopyJobScheduler;
+    protected Map<Long, LoadJob> idToLoadJob = Maps.newConcurrentMap();
+    protected Map<Long, Map<String, List<LoadJob>>> dbIdToLabelToLoadJobs = Maps.newConcurrentMap();
+    protected LoadJobScheduler loadJobScheduler;
 
     private ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
     private MysqlLoadManager mysqlLoadManager;
@@ -116,11 +111,6 @@ public LoadManager(LoadJobScheduler loadJobScheduler) {
         this.mysqlLoadManager = new MysqlLoadManager(tokenManager);
     }
 
-    public LoadManager(LoadJobScheduler loadJobScheduler, CleanCopyJobScheduler cleanCopyJobScheduler) {
-        this(loadJobScheduler);
-        this.cleanCopyJobScheduler = cleanCopyJobScheduler;
-    }
-
     public void start() {
         tokenManager.start();
         mysqlLoadManager.start();
@@ -172,62 +162,11 @@ public long createLoadJobFromStmt(LoadStmt stmt) throws DdlException, UserExcept
         return loadJob.getId();
     }
 
-    public LoadJob createLoadJobFromStmt(CopyStmt stmt) throws DdlException {
-        Database database = checkDb(stmt.getDbName());
-        long dbId = database.getId();
-        BrokerLoadJob loadJob = null;
-        if (Config.isCloudMode()) {
-            ConnectContext context = ConnectContext.get();
-            if (context != null) {
-                String cloudCluster = context.getCloudCluster();
-                if (!Strings.isNullOrEmpty(cloudCluster)) {
-                    ((CloudSystemInfoService) Env.getCurrentSystemInfo()).waitForAutoStart(cloudCluster);
-                }
-            }
-        }
-        writeLock();
-        try {
-            long unfinishedCopyJobNum = unprotectedGetUnfinishedCopyJobNum();
-            if (unfinishedCopyJobNum >= Config.cluster_max_waiting_copy_jobs) {
-                throw new DdlException(
-                        "There are more than " + unfinishedCopyJobNum + " unfinished copy jobs, please retry later.");
-            }
-            loadJob = new CopyJob(dbId, stmt.getLabel().getLabelName(), ConnectContext.get().queryId(),
-                    stmt.getBrokerDesc(), stmt.getOrigStmt(), stmt.getUserInfo(), stmt.getStageId(),
-                    stmt.getStageType(), stmt.getStagePrefix(), stmt.getSizeLimit(), stmt.getPattern(),
-                    stmt.getObjectInfo(), stmt.isForce(), stmt.getUserName());
-            loadJob.setJobProperties(stmt.getProperties());
-            loadJob.checkAndSetDataSourceInfo(database, stmt.getDataDescriptions());
-            loadJob.setTimeout(ConnectContext.get().getExecTimeout());
-            createLoadJob(loadJob);
-        } catch (MetaNotFoundException e) {
-            throw new DdlException(e.getMessage());
-        } finally {
-            writeUnlock();
-        }
-        Env.getCurrentEnv().getEditLog().logCreateLoadJob(loadJob);
-
-        // The job must be submitted after edit log.
-        // It guarantees that load job has not been changed before edit log.
-        loadJobScheduler.submitJob(loadJob);
-        return loadJob;
-    }
-
-    public void createCleanCopyJobTask(CleanCopyJobTask task) throws DdlException {
-        cleanCopyJobScheduler.submitJob(task);
-    }
-
     private long unprotectedGetUnfinishedJobNum() {
         return idToLoadJob.values().stream()
                 .filter(j -> (j.getState() != JobState.FINISHED && j.getState() != JobState.CANCELLED)).count();
     }
 
-    private long unprotectedGetUnfinishedCopyJobNum() {
-        return idToLoadJob.values().stream()
-                .filter(j -> (j.getState() != JobState.FINISHED && j.getState() != JobState.CANCELLED))
-                .filter(j -> j instanceof CopyJob).count();
-    }
-
     /**
      * MultiLoadMgr use.
      **/
@@ -257,7 +196,7 @@ public void replayCreateLoadJob(LoadJob loadJob) {
     }
 
     // add load job and also add to callback factory
-    private void createLoadJob(LoadJob loadJob) {
+    protected void createLoadJob(LoadJob loadJob) {
         if (loadJob.isExpired(System.currentTimeMillis())) {
             // This can happen in replay logic.
             return;
@@ -631,28 +570,17 @@ public List<Pair<Long, String>> getCreateLoadStmt(long dbId, String label) throw
      * @param labelValue    used to filter jobs which's label is or like labelValue.
      * @param accurateMatch true: filter jobs which's label is labelValue. false: filter jobs which's label like itself.
      * @param statesValue   used to filter jobs which's state within the statesValue set.
-     * @param jobTypes      used to filter jobs which's type within the jobTypes set.
-     * @param copyIdValue        used to filter jobs which's copyId is or like copyIdValue.
-     * @param copyIdAccurateMatch  true: filter jobs which's copyId is copyIdValue.
-     *                             false: filter jobs which's copyId like itself.
      * @return The result is the list of jobInfo.
      *         JobInfo is a list which includes the comparable object: jobId, label, state etc.
      *         The result is unordered.
      */
     public List<List<Comparable>> getLoadJobInfosByDb(long dbId, String labelValue, boolean accurateMatch,
-            Set<String> statesValue, Set<EtlJobType> jobTypes, String copyIdValue, boolean copyIdAccurateMatch,
-            String tableNameValue, boolean tableNameAccurateMatch, String fileValue, boolean fileAccurateMatch)
-            throws AnalysisException {
+                                                      Set<String> statesValue) throws AnalysisException {
         LinkedList<List<Comparable>> loadJobInfos = new LinkedList<List<Comparable>>();
         if (!dbIdToLabelToLoadJobs.containsKey(dbId)) {
             return loadJobInfos;
         }
 
-        if (jobTypes == null || jobTypes.isEmpty()) {
-            jobTypes = new HashSet<>();
-            jobTypes.addAll(EnumSet.allOf(EtlJobType.class));
-        }
-
         Set<JobState> states = Sets.newHashSet();
         if (statesValue == null || statesValue.size() == 0) {
             states.addAll(EnumSet.allOf(JobState.class));
@@ -693,21 +621,8 @@ public List<List<Comparable>> getLoadJobInfosByDb(long dbId, String labelValue,
                 }
             }
 
-            List<LoadJob> loadJobList2 = new ArrayList<>();
             // check state
             for (LoadJob loadJob : loadJobList) {
-                if (!states.contains(loadJob.getState())) {
-                    continue;
-                }
-                if (!jobTypes.contains(loadJob.jobType)) {
-                    continue;
-                }
-                loadJobList2.add(loadJob);
-            }
-            loadJobList2 = filterCopyJob(loadJobList2, copyIdValue, copyIdAccurateMatch, c -> c.getCopyId());
-            loadJobList2 = filterCopyJob(loadJobList2, tableNameValue, tableNameAccurateMatch, c -> c.getTableName());
-            loadJobList2 = filterCopyJob(loadJobList2, fileValue, fileAccurateMatch, c -> c.getFiles());
-            for (LoadJob loadJob : loadJobList2) {
                 try {
                     if (!states.contains(loadJob.getState())) {
                         continue;
@@ -732,32 +647,6 @@ public List<List<Comparable>> getLoadJobInfosByDb(long dbId, String labelValue,
         }
     }
 
-    private List<LoadJob> filterCopyJob(List<LoadJob> loadJobList, String value, boolean accurateMatch,
-            Function<CopyJob, String> func) throws AnalysisException {
-        if (Strings.isNullOrEmpty(value)) {
-            return loadJobList;
-        }
-        List<LoadJob> loadJobList2 = Lists.newArrayList();
-        for (LoadJob loadJob : loadJobList) {
-            if (loadJob.getJobType() != EtlJobType.COPY) {
-                continue;
-            }
-            CopyJob copyJob = (CopyJob) loadJob;
-            if (accurateMatch) {
-                if (func.apply(copyJob).equalsIgnoreCase(value)) {
-                    loadJobList2.add(copyJob);
-                }
-            } else {
-                // non-accurate match
-                PatternMatcher matcher = PatternMatcherWrapper.createMysqlPattern(value, false);
-                if (matcher.match(func.apply(copyJob))) {
-                    loadJobList2.add(copyJob);
-                }
-            }
-        }
-        return loadJobList2;
-    }
-
     public void checkJobAuth(String ctlName, String dbName, Set<String> tableNames) throws AnalysisException {
         if (tableNames.isEmpty()) {
             if (!Env.getCurrentEnv().getAccessManager()
@@ -875,7 +764,7 @@ private void analyzeLoadJobs() {
         }
     }
 
-    private Database checkDb(String dbName) throws DdlException {
+    protected Database checkDb(String dbName) throws DdlException {
         return Env.getCurrentInternalCatalog().getDbOrDdlException(dbName);
     }
 
@@ -993,19 +882,19 @@ private void cleanLabelInternal(long dbId, String label, boolean isReplay) {
                 counter, dbId, label, isReplay);
     }
 
-    private void readLock() {
+    protected void readLock() {
         lock.readLock().lock();
     }
 
-    private void readUnlock() {
+    protected void readUnlock() {
         lock.readLock().unlock();
     }
 
-    private void writeLock() {
+    protected void writeLock() {
         lock.writeLock().lock();
     }
 
-    private void writeUnlock() {
+    protected void writeUnlock() {
         lock.writeLock().unlock();
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/DdlExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/DdlExecutor.java
index 4973552104409f..bdc662dccec10c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/DdlExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/DdlExecutor.java
@@ -131,12 +131,13 @@
 import org.apache.doris.catalog.EncryptKeyHelper;
 import org.apache.doris.catalog.Env;
 import org.apache.doris.cloud.catalog.CloudEnv;
+import org.apache.doris.cloud.load.CloudLoadManager;
+import org.apache.doris.cloud.load.CopyJob;
 import org.apache.doris.common.Config;
 import org.apache.doris.common.DdlException;
 import org.apache.doris.common.util.ProfileManager;
 import org.apache.doris.load.EtlStatus;
 import org.apache.doris.load.FailMsg;
-import org.apache.doris.load.loadv2.CopyJob;
 import org.apache.doris.load.loadv2.JobState;
 import org.apache.doris.load.loadv2.LoadJob;
 import org.apache.doris.load.sync.SyncJobManager;
@@ -445,7 +446,7 @@ public static void execute(Env env, DdlStmt ddlStmt) throws Exception {
     }
 
     private static void executeCopyStmt(Env env, CopyStmt copyStmt) throws Exception {
-        CopyJob job = (CopyJob) env.getLoadManager().createLoadJobFromStmt(copyStmt);
+        CopyJob job = (CopyJob) (((CloudLoadManager) env.getLoadManager()).createLoadJobFromStmt(copyStmt));
         if (!copyStmt.isAsync()) {
             // wait for execute finished
             waitJobCompleted(job);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
index e6fbef298890b8..8bcd7ab15f4ea5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java
@@ -152,6 +152,7 @@
 import org.apache.doris.clone.DynamicPartitionScheduler;
 import org.apache.doris.cloud.catalog.CloudEnv;
 import org.apache.doris.cloud.datasource.CloudInternalCatalog;
+import org.apache.doris.cloud.load.CloudLoadManager;
 import org.apache.doris.cloud.proto.Cloud;
 import org.apache.doris.cloud.rpc.MetaServiceProxy;
 import org.apache.doris.cloud.system.CloudSystemInfoService;
@@ -1331,11 +1332,17 @@ private void handleShowLoad(Set<EtlJobType> jobTypes) throws AnalysisException {
         Set<String> statesValue = showStmt.getStates() == null ? null : showStmt.getStates().stream()
                 .map(entity -> entity.name())
                 .collect(Collectors.toSet());
-        loadInfos.addAll(env.getLoadManager().getLoadJobInfosByDb(dbId, showStmt.getLabelValue(),
+        if (!Config.isCloudMode()) {
+            loadInfos.addAll(env.getLoadManager()
+                    .getLoadJobInfosByDb(dbId, showStmt.getLabelValue(), showStmt.isAccurateMatch(), statesValue));
+        } else {
+            loadInfos.addAll(((CloudLoadManager) env.getLoadManager())
+                    .getLoadJobInfosByDb(dbId, showStmt.getLabelValue(),
                         showStmt.isAccurateMatch(), statesValue, jobTypes, showStmt.getCopyIdValue(),
                         showStmt.isCopyIdAccurateMatch(), showStmt.getTableNameValue(),
                         showStmt.isTableNameAccurateMatch(),
                         showStmt.getFileValue(), showStmt.isFileAccurateMatch()));
+        }
         // add the nerieds load info
         JobManager loadMgr = env.getJobManager();
         loadInfos.addAll(loadMgr.getLoadJobInfosByDb(dbId, db.getFullName(), showStmt.getLabelValue(),
@@ -1526,9 +1533,17 @@ private ShowResultSet handleShowLoadWarningV2(ShowLoadWarningsStmt showWarningsS
             throws AnalysisException {
         LoadManager loadManager = Env.getCurrentEnv().getLoadManager();
         if (showWarningsStmt.isFindByLabel()) {
-            List<List<Comparable>> loadJobInfosByDb = loadManager.getLoadJobInfosByDb(db.getId(),
-                    showWarningsStmt.getLabel(),
-                    true, null, null, null, false, null, false, null, false);
+            List<List<Comparable>> loadJobInfosByDb;
+            if (!Config.isCloudMode()) {
+                loadJobInfosByDb = loadManager.getLoadJobInfosByDb(db.getId(),
+                        showWarningsStmt.getLabel(),
+                        true, null);
+            } else {
+                loadJobInfosByDb = ((CloudLoadManager) loadManager)
+                        .getLoadJobInfosByDb(db.getId(),
+                        showWarningsStmt.getLabel(),
+                        true, null, null, null, false, null, false, null, false);
+            }
             if (CollectionUtils.isEmpty(loadJobInfosByDb)) {
                 return null;
             }
diff --git a/fe/fe-core/src/test/java/org/apache/doris/load/loadv2/CopyJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/cloud/load/CopyJobTest.java
similarity index 98%
rename from fe/fe-core/src/test/java/org/apache/doris/load/loadv2/CopyJobTest.java
rename to fe/fe-core/src/test/java/org/apache/doris/cloud/load/CopyJobTest.java
index a2a880ba847bca..ccaeecd4a8fc48 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/load/loadv2/CopyJobTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/cloud/load/CopyJobTest.java
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-package org.apache.doris.load.loadv2;
+package org.apache.doris.cloud.load;
 
 import org.apache.doris.analysis.BrokerDesc;
 import org.apache.doris.analysis.UserIdentity;
@@ -31,6 +31,7 @@
 import org.apache.doris.common.FeMetaVersion;
 import org.apache.doris.common.MetaNotFoundException;
 import org.apache.doris.common.Pair;
+import org.apache.doris.load.loadv2.LoadJob;
 import org.apache.doris.meta.MetaContext;
 import org.apache.doris.qe.OriginStatement;
 import org.apache.doris.thrift.TUniqueId;
diff --git a/fe/fe-core/src/test/java/org/apache/doris/load/loadv2/CopyLoadPendingTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/cloud/load/CopyLoadPendingTaskTest.java
similarity index 99%
rename from fe/fe-core/src/test/java/org/apache/doris/load/loadv2/CopyLoadPendingTaskTest.java
rename to fe/fe-core/src/test/java/org/apache/doris/cloud/load/CopyLoadPendingTaskTest.java
index 1beab75f8e8293..577d485f4e7382 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/load/loadv2/CopyLoadPendingTaskTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/cloud/load/CopyLoadPendingTaskTest.java
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-package org.apache.doris.load.loadv2;
+package org.apache.doris.cloud.load;
 
 import org.apache.doris.catalog.Env;
 import org.apache.doris.cloud.datasource.CloudInternalCatalog;

From 55ec41ad564053525f88f4e1c02c89952119c3cf Mon Sep 17 00:00:00 2001
From: kkop <junjie201001@gmail.com>
Date: Sun, 28 Apr 2024 15:14:00 +0800
Subject: [PATCH 097/163] [enhancement](test) unique model by modify a key type
 from INT to other type (#33123)

Co-authored-by: cjj2010 <2449402815@qq.com>
---
 .../test_unique_model_schema_key_change.out   |  30 +++
 ...test_unique_model_schema_key_change.groovy | 183 ++++++++++++++++++
 2 files changed, 213 insertions(+)

diff --git a/regression-test/data/schema_change_p0/test_unique_model_schema_key_change.out b/regression-test/data/schema_change_p0/test_unique_model_schema_key_change.out
index c60779a5e6a5a9..b01d631734f520 100644
--- a/regression-test/data/schema_change_p0/test_unique_model_schema_key_change.out
+++ b/regression-test/data/schema_change_p0/test_unique_model_schema_key_change.out
@@ -199,3 +199,33 @@
 789012345	Grace	15686	Xian	29	0	13333333333	No. 222 Street, Xian	2022-07-07T22:00
 123456689	Alice	asd	Yaan	25	0	13812345678	No. 123 Street, Beijing	2022-01-01T10:00
 
+-- ! --
+123456789	Alice	2147483641	Beijing	25	0	13812345678	No. 123 Street, Beijing	2022-01-01T10:00
+234567890	Bob	214748364	Shanghai	30	1	13998765432	No. 456 Street, Shanghai	2022-02-02T12:00
+345678901	Carol	2147483441	Guangzhou	28	0	13724681357	No. 789 Street, Guangzhou	2022-03-03T14:00
+456789012	Dave	2147483141	Shenzhen	35	1	13680864279	No. 987 Street, Shenzhen	2022-04-04T16:00
+567890123	Eve	2127483141	Chengdu	27	0	13572468091	No. 654 Street, Chengdu	2022-05-05T18:00
+678901234	Frank	2124483141	Hangzhou	32	1	13467985213	No. 321 Street, Hangzhou	2022-06-06T20:00
+789012345	Grace	2123483141	Xian	29	0	13333333333	No. 222 Street, Xian	2022-07-07T22:00
+123456689	Alice	4	Yaan	25	0	13812345678	No. 123 Street, Beijing	2022-01-01T10:00
+
+-- ! --
+123456789	Alice	2147483641	Beijing	25	0	13812345678	No. 123 Street, Beijing	2022-01-01T10:00
+234567890	Bob	214748364	Shanghai	30	1	13998765432	No. 456 Street, Shanghai	2022-02-02T12:00
+345678901	Carol	2147483441	Guangzhou	28	0	13724681357	No. 789 Street, Guangzhou	2022-03-03T14:00
+456789012	Dave	2147483141	Shenzhen	35	1	13680864279	No. 987 Street, Shenzhen	2022-04-04T16:00
+567890123	Eve	2127483141	Chengdu	27	0	13572468091	No. 654 Street, Chengdu	2022-05-05T18:00
+678901234	Frank	2124483141	Hangzhou	32	1	13467985213	No. 321 Street, Hangzhou	2022-06-06T20:00
+789012345	Grace	2123483141	Xian	29	0	13333333333	No. 222 Street, Xian	2022-07-07T22:00
+123456689	Alice	5	Yaan	25	0	13812345678	No. 123 Street, Beijing	2022-01-01T10:00
+
+-- ! --
+123456789	Alice	2147483641	Beijing	25	0	13812345678	No. 123 Street, Beijing	2022-01-01T10:00
+234567890	Bob	214748364	Shanghai	30	1	13998765432	No. 456 Street, Shanghai	2022-02-02T12:00
+345678901	Carol	2147483441	Guangzhou	28	0	13724681357	No. 789 Street, Guangzhou	2022-03-03T14:00
+456789012	Dave	2147483141	Shenzhen	35	1	13680864279	No. 987 Street, Shenzhen	2022-04-04T16:00
+567890123	Eve	2127483141	Chengdu	27	0	13572468091	No. 654 Street, Chengdu	2022-05-05T18:00
+678901234	Frank	2124483141	Hangzhou	32	1	13467985213	No. 321 Street, Hangzhou	2022-06-06T20:00
+789012345	Grace	2123483141	Xian	29	0	13333333333	No. 222 Street, Xian	2022-07-07T22:00
+123456689	Alice	asd	Yaan	25	0	13812345678	No. 123 Street, Beijing	2022-01-01T10:00
+
diff --git a/regression-test/suites/schema_change_p0/test_unique_model_schema_key_change.groovy b/regression-test/suites/schema_change_p0/test_unique_model_schema_key_change.groovy
index 7728ceb43994eb..61485350f115ea 100644
--- a/regression-test/suites/schema_change_p0/test_unique_model_schema_key_change.groovy
+++ b/regression-test/suites/schema_change_p0/test_unique_model_schema_key_change.groovy
@@ -797,5 +797,188 @@ suite("test_unique_model_schema_key_change","p0") {
           }, insertSql, true,"${tbName}")
      },errorMessage)
 
+     /**
+      *  Test the unique model by modify a key type from INT to other type
+      */
+     sql """ DROP TABLE IF EXISTS ${tbName} """
+     initTable = " CREATE TABLE IF NOT EXISTS ${tbName}\n" +
+             "          (\n" +
+             "              `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" +
+             "              `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" +
+             "              `sn_number` INT COMMENT \"sn卡\",\n" +
+             "              `city` VARCHAR(20) COMMENT \"用户所在城市\",\n" +
+             "              `age` SMALLINT COMMENT \"用户年龄\",\n" +
+             "              `sex` TINYINT COMMENT \"用户性别\",\n" +
+             "              `phone` LARGEINT COMMENT \"用户电话\",\n" +
+             "              `address` VARCHAR(500) COMMENT \"用户地址\",\n" +
+             "              `register_time` DATETIME COMMENT \"用户注册时间\"\n" +
+             "          )\n" +
+             "          UNIQUE KEY(`user_id`, `username`,`sn_number`)\n" +
+             "          DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" +
+             "          PROPERTIES (\n" +
+             "          \"replication_allocation\" = \"tag.location.default: 1\",\n" +
+             "          \"enable_unique_key_merge_on_write\" = \"true\"\n" +
+             "          );"
+
+     initTableData = "insert into ${tbName} values(123456789, 'Alice', 2147483641, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," +
+             "               (234567890, 'Bob', 214748364, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," +
+             "               (345678901, 'Carol', 2147483441, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," +
+             "               (456789012, 'Dave', 2147483141, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," +
+             "               (567890123, 'Eve', 2127483141, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," +
+             "               (678901234, 'Frank', 2124483141, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," +
+             "               (789012345, 'Grace', 2123483141, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');"
+
+     //TODO Test the unique model by modify a key type from INT  to BOOLEAN
+     errorMessage = "errCode = 2, detailMessage = Can not change INT to BOOLEAN"
+     expectException({
+          sql initTable
+          sql initTableData
+          sql """ alter  table ${tbName} MODIFY  column sn_number BOOLEAN  key """
+          insertSql = "insert into ${tbName} values(123456689, 'Alice', false, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); "
+          waitForSchemaChangeDone({
+               sql getTableStatusSql
+               time 60
+          }, insertSql, true, "${tbName}")
+     }, errorMessage)
+
+
+     // TODO Test the unique model by modify a key type from INT  to TINYINT
+     errorMessage = "errCode = 2, detailMessage = Can not change INT to TINYINT"
+     expectException({
+          sql initTable
+          sql initTableData
+          sql """ alter  table ${tbName} MODIFY  column sn_number TINYINT key """
+          insertSql = "insert into ${tbName} values(123456689, 'Alice', 2, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); "
+          waitForSchemaChangeDone({
+               sql getTableStatusSql
+               time 60
+          }, insertSql, true, "${tbName}")
+     }, errorMessage)
+
+
+     //Test the unique model by modify a key type from INT  to SMALLINT
+     errorMessage = "errCode = 2, detailMessage = Can not change INT to SMALLINT"
+     expectException({
+          sql initTable
+          sql initTableData
+          sql """ alter  table ${tbName} MODIFY  column sn_number SMALLINT key  """
+          insertSql = "insert into ${tbName} values(123456689, 'Alice', 3, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); "
+          waitForSchemaChangeDone({
+               sql getTableStatusSql
+               time 60
+          }, insertSql, true, "${tbName}")
+     }, errorMessage)
+
+     //Test the unique model by modify a key type from INT  to BIGINT
+     sql initTable
+     sql initTableData
+     sql """ alter  table ${tbName} MODIFY  column sn_number BIGINT key """
+     insertSql = "insert into ${tbName} values(123456689, 'Alice', 4, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); "
+     waitForSchemaChangeDone({
+          sql getTableStatusSql
+          time 60
+     }, insertSql, true, "${tbName}")
+
+     //Test the unique model by modify a key type from INT  to LARGEINT
+     sql initTable
+     sql initTableData
+     sql """ alter  table ${tbName} MODIFY  column sn_number LARGEINT key """
+     insertSql = "insert into ${tbName} values(123456689, 'Alice', 5, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); "
+     waitForSchemaChangeDone({
+          sql getTableStatusSql
+          time 60
+     }, insertSql, true, "${tbName}")
+
+     //Test the unique model by modify a key type from INT  to FLOAT
+     errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead."
+     expectException({
+          sql initTable
+          sql initTableData
+          sql """ alter  table ${tbName} MODIFY  column sn_number FLOAT key """
+          insertSql = "insert into ${tbName} values(123456689, 'Alice', 1.2, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); "
+          waitForSchemaChangeDone({
+               sql getTableStatusSql
+               time 60
+          }, insertSql, true, "${tbName}")
+     }, errorMessage)
+
+
+     //Test the unique model by modify a key type from INT  to DOUBLE
+     errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead."
+     expectException({
+          sql initTable
+          sql initTableData
+          sql """ alter  table ${tbName} MODIFY  column sn_number DOUBLE key """
+          insertSql = "insert into ${tbName} values(123456689, 'Alice', 1.23, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); "
+          waitForSchemaChangeDone({
+               sql getTableStatusSql
+               time 60
+          }, insertSql, true, "${tbName}")
+     }, errorMessage)
+
+
+     //TODO Test the unique model by modify a key type from INT  to DECIMAL
+     errorMessage = "errCode = 2, detailMessage = Can not change INT to DECIMAL128"
+     expectException({
+          sql initTable
+          sql initTableData
+          sql """ alter  table ${tbName} MODIFY  column sn_number DECIMAL(38,0) key """
+          insertSql = "insert into ${tbName} values(123456689, 'Alice', 1.23, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); "
+          waitForSchemaChangeDone({
+               sql getTableStatusSql
+               time 60
+          }, insertSql, true, "${tbName}")
+
+     }, errorMessage)
+
+     //TODO Test the unique model by modify a  key type from INT  to CHAR
+     errorMessage = "errCode = 2, detailMessage = Can not change INT to CHAR"
+     expectException({
+          sql initTable
+          sql initTableData
+          sql """ alter  table ${tbName} MODIFY  column sn_number CHAR(15) key """
+          insertSql = "insert into ${tbName} values(123456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); "
+          waitForSchemaChangeDone({
+               sql getTableStatusSql
+               time 60
+          }, insertSql, true, "${tbName}")
+     }, errorMessage)
+
+
+     //Test the unique model by modify a key type from INT  to VARCHAR
+     sql initTable
+     sql initTableData
+     sql """ alter  table ${tbName} MODIFY  column sn_number VARCHAR(100) key """
+     insertSql = "insert into ${tbName} values(123456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); "
+     waitForSchemaChangeDone({
+          sql getTableStatusSql
+          time 60
+     }, insertSql, true, "${tbName}")
+
+     //Test the unique model by modify a key type from INT  to VARCHAR
+     errorMessage = "errCode = 2, detailMessage = Can not change from wider type INT to narrower type VARCHAR(2)"
+     expectException({
+          sql initTable
+          sql initTableData
+          sql """ alter  table ${tbName} MODIFY  column sn_number VARCHAR(2) key """
+          insertSql = "insert into ${tbName} values(123456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); "
+          waitForSchemaChangeDone({
+               sql getTableStatusSql
+               time 60
+          }, insertSql, true, "${tbName}")
+     }, errorMessage)
+
+     //Test the unique model by modify a key type from INT  to STRING
+     errorMessage = "errCode = 2, detailMessage = String Type should not be used in key column[sn_number]."
+     expectException({
+          sql initTable
+          sql initTableData
+          sql """ alter  table ${tbName} MODIFY  column sn_number STRING key """
+          insertSql = "insert into ${tbName} values(123456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); "
+          waitForSchemaChangeDone({
+               sql getTableStatusSql
+               time 60
+          }, insertSql, true, "${tbName}")
+     }, errorMessage)
 
 }

From 8f759bfd1f20887d6be4245983d9db4bae8b0f8d Mon Sep 17 00:00:00 2001
From: minghong <englefly@gmail.com>
Date: Sun, 28 Apr 2024 15:33:22 +0800
Subject: [PATCH 098/163] [fix](nereids) disable PROJECT_OTHER_JOIN_CONDITION
 rule if bitmap filter is enabled. #34189

this pr is a quick solution, but not complete.
runtime filter on NestLoopJoin suffers this bug even without PROJECT_OTHER_JOIN_CONDITION rule.

for example, when enable Min_Max Runtime filter, the target Expression is n_regionkey, but it should be "n_regionkey - 28"
explain
select n_nationkey, nrkey
from (select n_regionkey -28 nrkey, n_nationkey from nation) T
join region on nrkey > r_regionkey;

we will refactor RuntimeFilterGenerator to completely solve this issue in following pr.
---
 ...ctOtherJoinConditionForNestedLoopJoin.java | 14 +--
 .../org/apache/doris/qe/SessionVariable.java  |  4 +
 .../join/test_bitmap_filter_nereids.groovy    | 87 +++++++++----------
 3 files changed, 54 insertions(+), 51 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ProjectOtherJoinConditionForNestedLoopJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ProjectOtherJoinConditionForNestedLoopJoin.java
index 7a7146e451aced..7d28269005c141 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ProjectOtherJoinConditionForNestedLoopJoin.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ProjectOtherJoinConditionForNestedLoopJoin.java
@@ -26,6 +26,8 @@
 import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewriter;
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
+import org.apache.doris.qe.ConnectContext;
+import org.apache.doris.thrift.TRuntimeFilterType;
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
@@ -49,12 +51,14 @@
 public class ProjectOtherJoinConditionForNestedLoopJoin extends OneRewriteRuleFactory {
     @Override
     public Rule build() {
-        return logicalJoin().when(
-                join -> join.getHashJoinConjuncts().isEmpty()
+        return logicalJoin()
+                .when(join -> join.getHashJoinConjuncts().isEmpty()
                         && !join.isMarkJoin()
-                        && !join.getOtherJoinConjuncts().isEmpty()
-        ).then(
-                join -> {
+                        && !join.getOtherJoinConjuncts().isEmpty())
+                .whenNot(join -> ConnectContext.get() != null
+                        && ConnectContext.get().getSessionVariable()
+                        .allowedRuntimeFilterType(TRuntimeFilterType.BITMAP))
+                .then(join -> {
                     List<Expression> otherConjuncts = join.getOtherJoinConjuncts();
                     List<Expression> newOtherConjuncts = new ArrayList<>();
                     Set<Slot> leftSlots = join.child(0).getOutputSet();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index dba97a68861e39..d5c07dbe6baa9a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -2611,6 +2611,10 @@ public int getRuntimeFilterType() {
         return runtimeFilterType;
     }
 
+    public boolean allowedRuntimeFilterType(TRuntimeFilterType type) {
+        return (runtimeFilterType & type.getValue()) != 0;
+    }
+
     public boolean isRuntimeFilterTypeEnabled(TRuntimeFilterType type) {
         return (runtimeFilterType & type.getValue()) == type.getValue();
     }
diff --git a/regression-test/suites/query_p0/join/test_bitmap_filter_nereids.groovy b/regression-test/suites/query_p0/join/test_bitmap_filter_nereids.groovy
index e219ce03f27872..9557aada40daef 100644
--- a/regression-test/suites/query_p0/join/test_bitmap_filter_nereids.groovy
+++ b/regression-test/suites/query_p0/join/test_bitmap_filter_nereids.groovy
@@ -16,84 +16,79 @@
 // under the License.
 
 suite("test_bitmap_filter_nereids") {
-    def tbl1 = "test_query_db.bigtable"
-    def tbl2 = "bitmap_table_nereids"
-    def tbl3 = "test_query_db.baseall"
-
-    sql "set runtime_filter_type = 16"
-
-    sql "DROP TABLE IF EXISTS ${tbl2}"
-    sql """
-    CREATE TABLE ${tbl2} (
-      `k1` int(11) NULL,
-      `k2` bitmap BITMAP_UNION NULL,
-      `k3` bitmap BITMAP_UNION NULL
-    ) ENGINE=OLAP
-    AGGREGATE KEY(`k1`)
-    COMMENT 'OLAP'
-    DISTRIBUTED BY HASH(`k1`) BUCKETS 2
-    PROPERTIES (
-    "replication_allocation" = "tag.location.default: 1"
-    );
-    """
-    sql """
-    insert into ${tbl2} values 
-    (1, bitmap_from_string('1, 3, 5, 7, 9, 11, 13, 99, 19910811, 20150402'),
-        bitmap_from_string('32767, 1985, 255, 789, 1991')),
-    (2, bitmap_from_string('10, 11, 12, 13, 14'), bitmap_empty());"""
-
-    sql "set enable_nereids_planner=true;"
-    sql "set enable_fallback_to_original_planner=false;"
+    multi_sql """
+        set runtime_filter_type = 16;
+        DROP TABLE IF EXISTS bitmap_table_nereids;
+    
+        CREATE TABLE bitmap_table_nereids (
+        `k1` int(11) NULL,
+        `k2` bitmap BITMAP_UNION NULL,
+        `k3` bitmap BITMAP_UNION NULL
+        ) ENGINE=OLAP
+        AGGREGATE KEY(`k1`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`k1`) BUCKETS 2
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1"
+        );
+        insert into bitmap_table_nereids values 
+        (1, bitmap_from_string('1, 3, 5, 7, 9, 11, 13, 99, 19910811, 20150402'),
+            bitmap_from_string('32767, 1985, 255, 789, 1991')),
+        (2, bitmap_from_string('10, 11, 12, 13, 14'), bitmap_empty());
 
-    qt_sql1 "select k1, k2 from ${tbl1} where k1 in (select k2 from ${tbl2}) order by k1;"
+        set enable_nereids_planner=true;
+        set enable_fallback_to_original_planner=false;
+        """
+        
+    qt_sql1 "select k1, k2 from test_query_db.bigtable where k1 in (select k2 from bitmap_table_nereids) order by k1;"
 
-    qt_sql2 "select k1, k2 from ${tbl1} where k1 + 1 in (select k2 from ${tbl2}) order by k1;"
+    qt_sql2 "select k1, k2 from test_query_db.bigtable where k1 + 1 in (select k2 from bitmap_table_nereids) order by k1;"
 
-    qt_sql3 "select k1, k2 from ${tbl1} where k1 not in (select k2 from ${tbl2} where k1 = 1) order by k1;"
+    qt_sql3 "select k1, k2 from test_query_db.bigtable where k1 not in (select k2 from bitmap_table_nereids where k1 = 1) order by k1;"
 
-    qt_sql4 "select t1.k1, t1.k2 from ${tbl1} t1 join ${tbl3} t3 on t1.k1 = t3.k1 where t1.k1 in (select k2 from ${tbl2} where k1 = 1) order by t1.k1;"
+    qt_sql4 "select t1.k1, t1.k2 from test_query_db.bigtable t1 join test_query_db.baseall t3 on t1.k1 = t3.k1 where t1.k1 in (select k2 from bitmap_table_nereids where k1 = 1) order by t1.k1;"
 
-    qt_sql5 "select k1, k2 from ${tbl1} where k1 in (select k2 from ${tbl2}) and k2 not in (select k3 from ${tbl2}) order by k1;"
+    qt_sql5 "select k1, k2 from test_query_db.bigtable where k1 in (select k2 from bitmap_table_nereids) and k2 not in (select k3 from bitmap_table_nereids) order by k1;"
 
-    qt_sql6 "select k2, count(k2) from ${tbl1} where k1 in (select k2 from ${tbl2}) group by k2 order by k2;"
+    qt_sql6 "select k2, count(k2) from test_query_db.bigtable where k1 in (select k2 from bitmap_table_nereids) group by k2 order by k2;"
 
-    qt_sql7 "select k1, k2 from (select 2 k1, 2 k2) t where k1 in (select k2 from ${tbl2}) order by 1, 2;"
+    qt_sql7 "select k1, k2 from (select 2 k1, 2 k2) t where k1 in (select k2 from bitmap_table_nereids) order by 1, 2;"
 
-    qt_sql8 "select k1, k2 from (select 11 k1, 11 k2) t where k1 in (select k2 from ${tbl2}) order by 1, 2;"
+    qt_sql8 "select k1, k2 from (select 11 k1, 11 k2) t where k1 in (select k2 from bitmap_table_nereids) order by 1, 2;"
 
-    qt_sql9 "select k1, k2 from (select 2 k1, 11 k2) t where k1 not in (select k2 from ${tbl2}) order by 1, 2;"
+    qt_sql9 "select k1, k2 from (select 2 k1, 11 k2) t where k1 not in (select k2 from bitmap_table_nereids) order by 1, 2;"
 
-    qt_sql10 "select k1, k2 from (select 1 k1, 11 k2) t where k1 not in (select k2 from ${tbl2}) order by 1, 2;"
+    qt_sql10 "select k1, k2 from (select 1 k1, 11 k2) t where k1 not in (select k2 from bitmap_table_nereids) order by 1, 2;"
 
-    qt_sql11 "select k10 from ${tbl1} where cast(k10 as bigint) in (select bitmap_or(k2, to_bitmap(20120314)) from ${tbl2} b) order by 1;"
+    qt_sql11 "select k10 from test_query_db.bigtable where cast(k10 as bigint) in (select bitmap_or(k2, to_bitmap(20120314)) from bitmap_table_nereids b) order by 1;"
 
     qt_sql12 """
-        with w1 as (select k1 from ${tbl1} where k1 in (select k2 from ${tbl2})), w2 as (select k2 from ${tbl1} where k2 in (select k3 from ${tbl2})) 
+        with w1 as (select k1 from test_query_db.bigtable where k1 in (select k2 from bitmap_table_nereids)), w2 as (select k2 from test_query_db.bigtable where k2 in (select k3 from bitmap_table_nereids)) 
         select * from (select * from w1 union select * from w2) tmp order by 1;
     """
 
-    qt_sql13 "select k1, k2 from ${tbl1} where k1 in (select to_bitmap(10)) order by 1, 2"
+    qt_sql13 "select k1, k2 from test_query_db.bigtable where k1 in (select to_bitmap(10)) order by 1, 2"
 
-    qt_sql14 "select k1, k2 from ${tbl1} where k1 in (select bitmap_from_string('1,10')) order by 1, 2"
+    qt_sql14 "select k1, k2 from test_query_db.bigtable where k1 in (select bitmap_from_string('1,10')) order by 1, 2"
 
     test {
-        sql "select k1, count(*) from ${tbl1} b1 group by k1 having k1 in (select k2 from ${tbl2} b2) order by k1;"
+        sql "select k1, count(*) from test_query_db.bigtable b1 group by k1 having k1 in (select k2 from bitmap_table_nereids b2) order by k1;"
         exception "Doris hll, bitmap, array, map, struct, jsonb, variant column must use with specific function, and don't support filter"
     }
 
     sql "set ignore_storage_data_distribution=false"
     explain{
-        sql "select k1, k2 from ${tbl1} where k1 in (select k2 from ${tbl2}) order by k1;"
+        sql "select k1, k2 from test_query_db.bigtable where k1 in (select k2 from bitmap_table_nereids) order by k1;"
         contains "RF000[bitmap]"
     }   
 
     explain{
-        sql "select k1, k2 from ${tbl1} where k1 not in (select k2 from ${tbl2} where k1 = 1)"
+        sql "select k1, k2 from test_query_db.bigtable where k1 not in (select k2 from bitmap_table_nereids where k1 = 1)"
         contains "RF000[bitmap]"
     }   
 
     explain{
-        sql " select k1, k2 from (select 2 k1, 2 k2) t where k1 in (select k2 from ${tbl2})"
+        sql " select k1, k2 from (select 2 k1, 2 k2) t where k1 in (select k2 from bitmap_table_nereids)"
         notContains "RF000[bitmap]"
     }  
 }

From 560584d49a5a8f5b981f6ee01d12d0c4e208ff7e Mon Sep 17 00:00:00 2001
From: Vallish Pai <vallishpai@gmail.com>
Date: Sun, 28 Apr 2024 13:43:16 +0530
Subject: [PATCH 099/163] [Fix] Improve logging and method names (#34212)

---
 be/src/agent/agent_server.cpp                     | 2 +-
 be/src/agent/task_worker_pool.cpp                 | 8 ++++----
 be/src/agent/task_worker_pool.h                   | 4 ++--
 be/src/agent/workload_group_listener.cpp          | 2 +-
 be/src/cloud/cloud_base_compaction.cpp            | 4 ++--
 be/src/cloud/cloud_cumulative_compaction.cpp      | 2 +-
 be/src/cloud/cloud_cumulative_compaction_policy.h | 2 +-
 be/src/cloud/cloud_full_compaction.cpp            | 4 ++--
 be/src/cloud/cloud_meta_mgr.cpp                   | 2 +-
 be/src/cloud/cloud_stream_load_executor.cpp       | 2 +-
 be/src/cloud/cloud_tablet.cpp                     | 2 +-
 be/src/cloud/cloud_tablet.h                       | 2 +-
 be/src/cloud/cloud_txn_delete_bitmap_cache.cpp    | 2 +-
 13 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/be/src/agent/agent_server.cpp b/be/src/agent/agent_server.cpp
index 565b0b373eae12..794293e1401378 100644
--- a/be/src/agent/agent_server.cpp
+++ b/be/src/agent/agent_server.cpp
@@ -206,7 +206,7 @@ void AgentServer::cloud_start_workers(CloudStorageEngine& engine, ExecEnv* exec_
 
     _workers[TTaskType::CALCULATE_DELETE_BITMAP] = std::make_unique<TaskWorkerPool>(
             "CALC_DBM_TASK", config::calc_delete_bitmap_worker_count,
-            [&engine](auto&& task) { return calc_delete_bimtap_callback(engine, task); });
+            [&engine](auto&& task) { return calc_delete_bitmap_callback(engine, task); });
 
     _report_workers.push_back(std::make_unique<ReportWorker>(
             "REPORT_TASK", _master_info, config::report_task_interval_seconds,
diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp
index 745a8286490414..83e97c5d7d0b5f 100644
--- a/be/src/agent/task_worker_pool.cpp
+++ b/be/src/agent/task_worker_pool.cpp
@@ -524,7 +524,7 @@ Status TaskWorkerPool::submit_task(const TAgentTaskRequest& task) {
 }
 
 PriorTaskWorkerPool::PriorTaskWorkerPool(
-        std::string_view name, int normal_worker_count, int high_prior_worker_conut,
+        std::string_view name, int normal_worker_count, int high_prior_worker_count,
         std::function<void(const TAgentTaskRequest& task)> callback)
         : _callback(std::move(callback)) {
     auto st = ThreadPoolBuilder(fmt::format("TaskWP_.{}", name))
@@ -537,8 +537,8 @@ PriorTaskWorkerPool::PriorTaskWorkerPool(
     CHECK(st.ok()) << name << ": " << st;
 
     st = ThreadPoolBuilder(fmt::format("HighPriorPool.{}", name))
-                 .set_min_threads(high_prior_worker_conut)
-                 .set_max_threads(high_prior_worker_conut)
+                 .set_min_threads(high_prior_worker_count)
+                 .set_max_threads(high_prior_worker_count)
                  .build(&_high_prior_pool);
     CHECK(st.ok()) << name << ": " << st;
 
@@ -2013,7 +2013,7 @@ void storage_medium_migrate_callback(StorageEngine& engine, const TAgentTaskRequ
     remove_task_info(req.task_type, req.signature);
 }
 
-void calc_delete_bimtap_callback(CloudStorageEngine& engine, const TAgentTaskRequest& req) {
+void calc_delete_bitmap_callback(CloudStorageEngine& engine, const TAgentTaskRequest& req) {
     std::vector<TTabletId> error_tablet_ids;
     std::vector<TTabletId> succ_tablet_ids;
     Status status;
diff --git a/be/src/agent/task_worker_pool.h b/be/src/agent/task_worker_pool.h
index 4b9a2b6d8dc7e5..786777d74e8b9e 100644
--- a/be/src/agent/task_worker_pool.h
+++ b/be/src/agent/task_worker_pool.h
@@ -79,7 +79,7 @@ class PublishVersionWorkerPool final : public TaskWorkerPool {
 
 class PriorTaskWorkerPool final : public TaskWorkerPoolIf {
 public:
-    PriorTaskWorkerPool(std::string_view name, int normal_worker_count, int high_prior_worker_conut,
+    PriorTaskWorkerPool(std::string_view name, int normal_worker_count, int high_prior_worker_count,
                         std::function<void(const TAgentTaskRequest& task)> callback);
 
     ~PriorTaskWorkerPool() override;
@@ -186,6 +186,6 @@ void report_disk_callback(CloudStorageEngine& engine, const TMasterInfo& master_
 
 void report_tablet_callback(StorageEngine& engine, const TMasterInfo& master_info);
 
-void calc_delete_bimtap_callback(CloudStorageEngine& engine, const TAgentTaskRequest& req);
+void calc_delete_bitmap_callback(CloudStorageEngine& engine, const TAgentTaskRequest& req);
 
 } // namespace doris
diff --git a/be/src/agent/workload_group_listener.cpp b/be/src/agent/workload_group_listener.cpp
index 822e3c692f76f5..5ba95a36784614 100644
--- a/be/src/agent/workload_group_listener.cpp
+++ b/be/src/agent/workload_group_listener.cpp
@@ -34,7 +34,7 @@ void WorkloadGroupListener::handle_topic_info(const std::vector<TopicInfo>& topi
         }
         is_set_workload_group_info = true;
 
-        // 1 parse topicinfo to group info
+        // 1 parse topic info to group info
         WorkloadGroupInfo workload_group_info;
         Status ret = WorkloadGroupInfo::parse_topic_info(topic_info.workload_group_info,
                                                          &workload_group_info);
diff --git a/be/src/cloud/cloud_base_compaction.cpp b/be/src/cloud/cloud_base_compaction.cpp
index b27f6fffc90e3e..dc3de319fce431 100644
--- a/be/src/cloud/cloud_base_compaction.cpp
+++ b/be/src/cloud/cloud_base_compaction.cpp
@@ -154,7 +154,7 @@ Status CloudBaseCompaction::pick_rowsets_to_compact() {
     _filter_input_rowset();
     if (_input_rowsets.size() <= 1) {
         return Status::Error<BE_NO_SUITABLE_VERSION>(
-                "insuffient compation input rowset, #rowsets={}", _input_rowsets.size());
+                "insufficent compaction input rowset, #rowsets={}", _input_rowsets.size());
     }
 
     if (_input_rowsets.size() == 2 && _input_rowsets[0]->end_version() == 1) {
@@ -283,7 +283,7 @@ Status CloudBaseCompaction::modify_rowsets() {
         _tablet->enable_unique_key_merge_on_write()) {
         int64_t initiator = HashUtil::hash64(_uuid.data(), _uuid.size(), 0) &
                             std::numeric_limits<int64_t>::max();
-        RETURN_IF_ERROR(cloud_tablet()->calc_delete_bitmap_for_compaciton(
+        RETURN_IF_ERROR(cloud_tablet()->calc_delete_bitmap_for_compaction(
                 _input_rowsets, _output_rowset, _rowid_conversion, compaction_type(),
                 _stats.merged_rows, initiator, output_rowset_delete_bitmap));
         compaction_job->set_delete_bitmap_lock_initiator(initiator);
diff --git a/be/src/cloud/cloud_cumulative_compaction.cpp b/be/src/cloud/cloud_cumulative_compaction.cpp
index ff69f1c6d60acd..7f8e2cde152b62 100644
--- a/be/src/cloud/cloud_cumulative_compaction.cpp
+++ b/be/src/cloud/cloud_cumulative_compaction.cpp
@@ -238,7 +238,7 @@ Status CloudCumulativeCompaction::modify_rowsets() {
         _tablet->enable_unique_key_merge_on_write()) {
         int64_t initiator = HashUtil::hash64(_uuid.data(), _uuid.size(), 0) &
                             std::numeric_limits<int64_t>::max();
-        RETURN_IF_ERROR(cloud_tablet()->calc_delete_bitmap_for_compaciton(
+        RETURN_IF_ERROR(cloud_tablet()->calc_delete_bitmap_for_compaction(
                 _input_rowsets, _output_rowset, _rowid_conversion, compaction_type(),
                 _stats.merged_rows, initiator, output_rowset_delete_bitmap));
         compaction_job->set_delete_bitmap_lock_initiator(initiator);
diff --git a/be/src/cloud/cloud_cumulative_compaction_policy.h b/be/src/cloud/cloud_cumulative_compaction_policy.h
index 17edc41859ee5f..66068e9f3be186 100644
--- a/be/src/cloud/cloud_cumulative_compaction_policy.h
+++ b/be/src/cloud/cloud_cumulative_compaction_policy.h
@@ -94,7 +94,7 @@ class CloudSizeBasedCumulativeCompactionPolicy : public CloudCumulativeCompactio
     int64_t _promotion_min_size;
     /// lower bound size to do compaction compaction.
     int64_t _compaction_min_size;
-    // cululative compaction promotion version count, only works for unique key MoW table
+    // cumulative compaction promotion version count, only works for unique key MoW table
     int64_t _promotion_version_count;
 };
 
diff --git a/be/src/cloud/cloud_full_compaction.cpp b/be/src/cloud/cloud_full_compaction.cpp
index df6915d3a7d4a0..c5747d82fb83b2 100644
--- a/be/src/cloud/cloud_full_compaction.cpp
+++ b/be/src/cloud/cloud_full_compaction.cpp
@@ -55,7 +55,7 @@ Status CloudFullCompaction::prepare_compact() {
         return Status::InternalError("invalid tablet state. tablet_id={}", _tablet->tablet_id());
     }
 
-    // always sync lastest rowset for full compaction
+    // always sync latest rowset for full compaction
     RETURN_IF_ERROR(cloud_tablet()->sync_rowsets());
 
     RETURN_IF_ERROR(pick_rowsets_to_compact());
@@ -124,7 +124,7 @@ Status CloudFullCompaction::pick_rowsets_to_compact() {
     }
     if (_input_rowsets.size() <= 1) {
         return Status::Error<BE_NO_SUITABLE_VERSION>(
-                "insuffient compation input rowset, #rowsets={}", _input_rowsets.size());
+                "insufficent compaction input rowset, #rowsets={}", _input_rowsets.size());
     }
 
     if (_input_rowsets.size() == 2 && _input_rowsets[0]->end_version() == 1) {
diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp
index 7ee91061136a3f..32f9922808809d 100644
--- a/be/src/cloud/cloud_meta_mgr.cpp
+++ b/be/src/cloud/cloud_meta_mgr.cpp
@@ -458,7 +458,7 @@ Status CloudMetaMgr::sync_tablet_rowsets(CloudTablet* tablet, bool warmup_delta_
                 continue;
             }
             if (!st.ok()) {
-                LOG_WARNING("failed to get delete bimtap")
+                LOG_WARNING("failed to get delete bitmap")
                         .tag("tablet", tablet->tablet_id())
                         .error(st);
                 return st;
diff --git a/be/src/cloud/cloud_stream_load_executor.cpp b/be/src/cloud/cloud_stream_load_executor.cpp
index e6e63d6e7536c8..7381e4de069e02 100644
--- a/be/src/cloud/cloud_stream_load_executor.cpp
+++ b/be/src/cloud/cloud_stream_load_executor.cpp
@@ -30,7 +30,7 @@ CloudStreamLoadExecutor::CloudStreamLoadExecutor(ExecEnv* exec_env)
 CloudStreamLoadExecutor::~CloudStreamLoadExecutor() = default;
 
 Status CloudStreamLoadExecutor::commit_txn(StreamLoadContext* ctx) {
-    // forward to fe to excute commit transaction for MoW table
+    // forward to fe to execute commit transaction for MoW table
     Status st;
     int retry_times = 0;
     // mow table will retry when DELETE_BITMAP_LOCK_ERROR occurs
diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp
index 4e1db6fe1b3017..cf3de273e11bf0 100644
--- a/be/src/cloud/cloud_tablet.cpp
+++ b/be/src/cloud/cloud_tablet.cpp
@@ -624,7 +624,7 @@ Status CloudTablet::save_delete_bitmap(const TabletTxnInfo* txn_info, int64_t tx
     return Status::OK();
 }
 
-Status CloudTablet::calc_delete_bitmap_for_compaciton(
+Status CloudTablet::calc_delete_bitmap_for_compaction(
         const std::vector<RowsetSharedPtr>& input_rowsets, const RowsetSharedPtr& output_rowset,
         const RowIdConversion& rowid_conversion, ReaderType compaction_type, int64_t merged_rows,
         int64_t initiator, DeleteBitmapPtr& output_rowset_delete_bitmap) {
diff --git a/be/src/cloud/cloud_tablet.h b/be/src/cloud/cloud_tablet.h
index 86e89217523ccf..d4403feed13e27 100644
--- a/be/src/cloud/cloud_tablet.h
+++ b/be/src/cloud/cloud_tablet.h
@@ -183,7 +183,7 @@ class CloudTablet final : public BaseTablet {
                               DeleteBitmapPtr delete_bitmap, RowsetWriter* rowset_writer,
                               const RowsetIdUnorderedSet& cur_rowset_ids) override;
 
-    Status calc_delete_bitmap_for_compaciton(const std::vector<RowsetSharedPtr>& input_rowsets,
+    Status calc_delete_bitmap_for_compaction(const std::vector<RowsetSharedPtr>& input_rowsets,
                                              const RowsetSharedPtr& output_rowset,
                                              const RowIdConversion& rowid_conversion,
                                              ReaderType compaction_type, int64_t merged_rows,
diff --git a/be/src/cloud/cloud_txn_delete_bitmap_cache.cpp b/be/src/cloud/cloud_txn_delete_bitmap_cache.cpp
index 284587eb756974..5a242f9af3a6c7 100644
--- a/be/src/cloud/cloud_txn_delete_bitmap_cache.cpp
+++ b/be/src/cloud/cloud_txn_delete_bitmap_cache.cpp
@@ -83,7 +83,7 @@ Status CloudTxnDeleteBitmapCache::get_tablet_txn_info(
         LOG_INFO("cache missed when get delete bitmap")
                 .tag("txn_id", transaction_id)
                 .tag("tablet_id", tablet_id);
-        // Becasue of the rowset_ids become empty, all delete bitmap
+        // Because of the rowset_ids become empty, all delete bitmap
         // will be recalculate in CalcDeleteBitmapTask
         *delete_bitmap = std::make_shared<DeleteBitmap>(tablet_id);
     }

From 75437015c046698da277da8a12e8356177ba005f Mon Sep 17 00:00:00 2001
From: Chester <42577861+superdiaodiao@users.noreply.github.com>
Date: Sun, 28 Apr 2024 16:51:04 +0800
Subject: [PATCH 100/163] [feature](spark-load) add Hive HLL UDFs (#33896)

---
 .../java/org/apache/doris/common/HllUtil.java |  44 ++++++
 .../apache/doris/udf/HllCardinalityUDF.java   |  74 +++++++++
 .../org/apache/doris/udf/HllUnionUDAF.java    | 134 ++++++++++++++++
 .../java/org/apache/doris/udf/ToHllUDAF.java  | 144 ++++++++++++++++++
 .../java/org/apache/doris/HllUDFTest.java     | 111 ++++++++++++++
 5 files changed, 507 insertions(+)
 create mode 100644 fe/hive-udf/src/main/java/org/apache/doris/common/HllUtil.java
 create mode 100644 fe/hive-udf/src/main/java/org/apache/doris/udf/HllCardinalityUDF.java
 create mode 100644 fe/hive-udf/src/main/java/org/apache/doris/udf/HllUnionUDAF.java
 create mode 100644 fe/hive-udf/src/main/java/org/apache/doris/udf/ToHllUDAF.java
 create mode 100644 fe/hive-udf/src/test/java/org/apache/doris/HllUDFTest.java

diff --git a/fe/hive-udf/src/main/java/org/apache/doris/common/HllUtil.java b/fe/hive-udf/src/main/java/org/apache/doris/common/HllUtil.java
new file mode 100644
index 00000000000000..fd0c226544d396
--- /dev/null
+++ b/fe/hive-udf/src/main/java/org/apache/doris/common/HllUtil.java
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common;
+
+import org.apache.doris.common.io.Hll;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+public class HllUtil {
+    public static byte[] serializeToBytes(Hll hll) throws IOException {
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        DataOutputStream dos = new DataOutputStream(bos);
+        hll.serialize(dos);
+        dos.close();
+        return bos.toByteArray();
+    }
+
+    public static Hll deserializeToHll(byte[] bytes) throws IOException {
+        Hll hll = new Hll();
+        DataInputStream in = new DataInputStream(new ByteArrayInputStream(bytes));
+        hll.deserialize(in);
+        in.close();
+        return hll;
+    }
+}
diff --git a/fe/hive-udf/src/main/java/org/apache/doris/udf/HllCardinalityUDF.java b/fe/hive-udf/src/main/java/org/apache/doris/udf/HllCardinalityUDF.java
new file mode 100644
index 00000000000000..9aee931decdbb8
--- /dev/null
+++ b/fe/hive-udf/src/main/java/org/apache/doris/udf/HllCardinalityUDF.java
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.udf;
+
+import org.apache.doris.common.HllUtil;
+import org.apache.doris.common.io.Hll;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+import java.io.IOException;
+
+/**
+ * HllCardinality.
+ *
+ */
+@Description(name = "hll_cardinality", value = "a _FUNC_ b - Returns the number of distinct elements"
+        + " added to the hll")
+public class HllCardinalityUDF extends GenericUDF {
+    private transient BinaryObjectInspector inputOI;
+
+    @Override
+    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+
+        ObjectInspector input = arguments[0];
+        if (!(input instanceof BinaryObjectInspector)) {
+            throw new UDFArgumentException("first argument must be a binary");
+        }
+
+        this.inputOI = (BinaryObjectInspector) input;
+
+        return PrimitiveObjectInspectorFactory.javaLongObjectInspector;
+    }
+
+    @Override
+    public Object evaluate(DeferredObject[]  args) throws HiveException {
+        if (args[0] == null) {
+            return 0;
+        }
+        byte[] inputBytes = this.inputOI.getPrimitiveJavaObject(args[0].get());
+
+        try {
+            Hll hll = HllUtil.deserializeToHll(inputBytes);
+            return hll.estimateCardinality();
+        } catch (IOException ioException) {
+            throw new HiveException(ioException);
+        }
+    }
+
+    @Override
+    public String getDisplayString(String[] children) {
+        return "Usage: hll_cardinality(hll)";
+    }
+}
diff --git a/fe/hive-udf/src/main/java/org/apache/doris/udf/HllUnionUDAF.java b/fe/hive-udf/src/main/java/org/apache/doris/udf/HllUnionUDAF.java
new file mode 100644
index 00000000000000..f8d4f7316f91ab
--- /dev/null
+++ b/fe/hive-udf/src/main/java/org/apache/doris/udf/HllUnionUDAF.java
@@ -0,0 +1,134 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.udf;
+
+import org.apache.doris.common.HllUtil;
+import org.apache.doris.common.io.Hll;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import java.io.IOException;
+
+/**
+ * HllUnion.
+ *
+ */
+@Description(name = "hll_union", value = "_FUNC_(expr) - Calculate the grouped hll"
+        + " union , Returns an doris hll representation of a column.")
+public class HllUnionUDAF extends AbstractGenericUDAFResolver {
+
+    @Override
+    public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
+            throws SemanticException {
+        if (parameters.length != 1) {
+            throw new UDFArgumentTypeException(parameters.length - 1,
+                    "Exactly one argument is expected.");
+        }
+        return new GenericEvaluate();
+    }
+
+    //The UDAF evaluator assumes that all rows it's evaluating have
+    //the same (desired) value.
+    public static class GenericEvaluate extends GenericUDAFEvaluator {
+
+        private transient BinaryObjectInspector inputOI;
+        private transient BinaryObjectInspector internalMergeOI;
+
+        @Override
+        public ObjectInspector init(Mode m, ObjectInspector[] parameters)
+                throws HiveException {
+            super.init(m, parameters);
+            // init output object inspectors
+            // The output of a partial aggregation is a binary
+            if (m == Mode.PARTIAL1 || m == Mode.COMPLETE) {
+                this.inputOI = (BinaryObjectInspector) parameters[0];
+            } else {
+                this.internalMergeOI = (BinaryObjectInspector) parameters[0];
+            }
+            return PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector;
+        }
+
+        /** class for storing the current partial result aggregation */
+        @AggregationType(estimable = true)
+        static class HllAgg extends AbstractAggregationBuffer {
+            Hll hll;
+        }
+
+        @Override
+        public void reset(AggregationBuffer agg) throws HiveException {
+            ((HllAgg) agg).hll = new Hll();
+        }
+
+        @Override
+        public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+            HllAgg result = new HllAgg();
+            reset(result);
+            return result;
+        }
+
+        @Override
+        public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+            assert (parameters.length == 1);
+            Object p = parameters[0];
+            if (p != null) {
+                HllAgg myagg = (HllAgg) agg;
+                byte[] partialResult = this.inputOI.getPrimitiveJavaObject(parameters[0]);
+                try {
+                    myagg.hll.merge(HllUtil.deserializeToHll(partialResult));
+                } catch (IOException ioException) {
+                    throw new HiveException(ioException);
+                }
+            }
+        }
+
+        @Override
+        public Object terminate(AggregationBuffer agg) {
+            HllAgg myagg = (HllAgg) agg;
+            try {
+                return HllUtil.serializeToBytes(myagg.hll);
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+        }
+
+        @Override
+        public void merge(AggregationBuffer agg, Object partial) {
+            HllAgg myagg = (HllAgg) agg;
+            byte[] partialResult = this.internalMergeOI.getPrimitiveJavaObject(partial);
+            try {
+                myagg.hll.merge(HllUtil.deserializeToHll(partialResult));
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+        }
+
+        @Override
+        public Object terminatePartial(AggregationBuffer agg) {
+            return terminate(agg);
+        }
+    }
+}
diff --git a/fe/hive-udf/src/main/java/org/apache/doris/udf/ToHllUDAF.java b/fe/hive-udf/src/main/java/org/apache/doris/udf/ToHllUDAF.java
new file mode 100644
index 00000000000000..3a49238dc707be
--- /dev/null
+++ b/fe/hive-udf/src/main/java/org/apache/doris/udf/ToHllUDAF.java
@@ -0,0 +1,144 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.udf;
+
+import org.apache.doris.common.HllUtil;
+import org.apache.doris.common.io.Hll;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import java.io.IOException;
+
+/**
+ * ToHll.
+ *
+ */
+@Description(name = "to_hll", value = "_FUNC_(expr) - Returns an doris hll representation of a column.")
+public class ToHllUDAF extends AbstractGenericUDAFResolver {
+
+    @Override
+    public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
+            throws SemanticException {
+        if (parameters.length != 1) {
+            throw new UDFArgumentTypeException(parameters.length - 1,
+                    "Exactly one argument is expected.");
+        }
+        return new GenericEvaluate();
+    }
+
+    //The UDAF evaluator assumes that all rows it's evaluating have
+    //the same (desired) value.
+    public static class GenericEvaluate extends GenericUDAFEvaluator {
+
+        // For PARTIAL1 and COMPLETE: ObjectInspectors for original data
+        private PrimitiveObjectInspector inputOI;
+
+        // For PARTIAL2 and FINAL: ObjectInspectors for partial aggregations
+        // (doris hlls)
+
+        private transient BinaryObjectInspector internalMergeOI;
+
+        @Override
+        public ObjectInspector init(Mode m, ObjectInspector[] parameters)
+                throws HiveException {
+            super.init(m, parameters);
+            // init output object inspectors
+            // The output of a partial aggregation is a binary
+            if (m == Mode.PARTIAL1 || m == Mode.COMPLETE) {
+                inputOI = (PrimitiveObjectInspector) parameters[0];
+            } else {
+                this.internalMergeOI = (BinaryObjectInspector) parameters[0];
+            }
+            return PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector;
+        }
+
+        /** class for storing the current partial result aggregation */
+        @AggregationType(estimable = true)
+        static class HllAgg extends AbstractAggregationBuffer {
+            Hll hll;
+        }
+
+        @Override
+        public void reset(AggregationBuffer agg) throws HiveException {
+            ((HllAgg) agg).hll = new Hll();
+        }
+
+        @Override
+        public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+            HllAgg result = new HllAgg();
+            reset(result);
+            return result;
+        }
+
+        @Override
+        public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+            assert (parameters.length == 1);
+            Object p = parameters[0];
+            if (p != null) {
+                HllAgg myagg = (HllAgg) agg;
+                try {
+                    long row = PrimitiveObjectInspectorUtils.getLong(p, inputOI);
+                    addHll(row, myagg);
+                } catch (NumberFormatException e) {
+                    throw new HiveException(e);
+                }
+            }
+        }
+
+        @Override
+        public Object terminate(AggregationBuffer agg) {
+            HllAgg myagg = (HllAgg) agg;
+            try {
+                return HllUtil.serializeToBytes(myagg.hll);
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+        }
+
+        @Override
+        public void merge(AggregationBuffer agg, Object partial) {
+            HllAgg myagg = (HllAgg) agg;
+            byte[] partialResult = this.internalMergeOI.getPrimitiveJavaObject(partial);
+            try {
+                myagg.hll.merge(HllUtil.deserializeToHll(partialResult));
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+        }
+
+        @Override
+        public Object terminatePartial(AggregationBuffer agg) {
+            return terminate(agg);
+        }
+
+        private void addHll(long newRow, HllAgg myagg) {
+            myagg.hll.updateWithHash(newRow);
+        }
+    }
+}
diff --git a/fe/hive-udf/src/test/java/org/apache/doris/HllUDFTest.java b/fe/hive-udf/src/test/java/org/apache/doris/HllUDFTest.java
new file mode 100644
index 00000000000000..ca18b3fa9b420f
--- /dev/null
+++ b/fe/hive-udf/src/test/java/org/apache/doris/HllUDFTest.java
@@ -0,0 +1,111 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris;
+
+import org.apache.doris.common.HllUtil;
+import org.apache.doris.common.io.Hll;
+import org.apache.doris.udf.HllCardinalityUDF;
+import org.apache.doris.udf.HllUnionUDAF;
+import org.apache.doris.udf.ToHllUDAF;
+
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaConstantBinaryObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.junit.Assert;
+import org.junit.Test;
+
+// hive hll udf test
+public class HllUDFTest {
+
+    private BinaryObjectInspector inputOI0 = new JavaConstantBinaryObjectInspector(new byte[0]);
+
+    @Test
+    public void hllCardinalityTest() throws Exception {
+        // Theoretically, the HLL algorithm can count very large cardinality with little relative error rate,
+        // which is less than 2% in Doris. We cost about 5 minutes to test with one billion input numbers here
+        // successfully, but in order to shorten the test time, we chose one million numbers.
+        HllCardinalityUDF hllCardinalityUDF = new HllCardinalityUDF();
+        hllCardinalityUDF.initialize(new ObjectInspector[] { inputOI0 });
+        Hll hll = new Hll();
+        long largeInputSize = 1000000L;
+        for (long i = 1; i <= largeInputSize; i++) {
+            hll.updateWithHash(i);
+        }
+        byte[] hllLargeBytes = HllUtil.serializeToBytes(hll);
+        hllCardinalityUDF.initialize(new ObjectInspector[] { inputOI0 });
+        Object evaluateLarge = hllCardinalityUDF
+                .evaluate(new GenericUDF.DeferredObject[] { new GenericUDF.DeferredJavaObject(hllLargeBytes) });
+        long actualCardinality = (long) evaluateLarge;
+
+        double relativeError = Math.abs(actualCardinality - largeInputSize) / (double) largeInputSize;
+        Assert.assertTrue("Relative error rate should be less than 2%", relativeError <= 0.02);
+    }
+
+    @Test
+    public void hllUnionUDAFTest() throws Exception {
+        Hll hll0 = new Hll();
+        Hll hll1 = new Hll();
+
+        hll0.updateWithHash(1);
+        hll0.updateWithHash(2);
+
+        hll1.updateWithHash(2);
+        hll1.updateWithHash(3);
+        hll1.updateWithHash(4);
+
+        byte[] hll0Bytes = HllUtil.serializeToBytes(hll0);
+        byte[] hll1Bytes = HllUtil.serializeToBytes(hll1);
+
+        HllUnionUDAF hllUnionUDAF = new HllUnionUDAF();
+        HllUnionUDAF.GenericEvaluate evaluator = (HllUnionUDAF.GenericEvaluate) hllUnionUDAF
+                .getEvaluator(new TypeInfo[] { TypeInfoFactory.binaryTypeInfo });
+
+        GenericUDAFEvaluator.AggregationBuffer aggBuffer = evaluator.getNewAggregationBuffer();
+
+        evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, new ObjectInspector[] { inputOI0 });
+        evaluator.iterate(aggBuffer, new Object[] { hll0Bytes });
+        evaluator.iterate(aggBuffer, new Object[] { hll1Bytes });
+        byte[] mergedHllBytes = (byte[]) evaluator.terminate(aggBuffer);
+
+        Hll mergedHll = HllUtil.deserializeToHll(mergedHllBytes);
+        Assert.assertEquals(4L, mergedHll.estimateCardinality());
+    }
+
+    @Test
+    public void toHllUDAFTest() throws Exception {
+        ToHllUDAF toHllUDAF = new ToHllUDAF();
+        ToHllUDAF.GenericEvaluate evaluator = (ToHllUDAF.GenericEvaluate) toHllUDAF
+                .getEvaluator(new TypeInfo[] { TypeInfoFactory.longTypeInfo });
+
+        GenericUDAFEvaluator.AggregationBuffer aggBuffer = evaluator.getNewAggregationBuffer();
+
+        evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1,
+                new ObjectInspector[] { PrimitiveObjectInspectorFactory.javaLongObjectInspector });
+        evaluator.iterate(aggBuffer, new Object[] { 1L });
+        evaluator.iterate(aggBuffer, new Object[] { 2L });
+        byte[] hllBytes = (byte[]) evaluator.terminate(aggBuffer);
+
+        Hll hll = HllUtil.deserializeToHll(hllBytes);
+        Assert.assertEquals(2L, hll.estimateCardinality());
+    }
+}

From 2da210f8d9f5026e4415a7867a6fae1e2ad0fd6f Mon Sep 17 00:00:00 2001
From: AlexYue <yj976240184@gmail.com>
Date: Sun, 28 Apr 2024 17:01:16 +0800
Subject: [PATCH 101/163] [enhance](Backup) Obscure the secret key for show
 backup's repository stmt (#34226)

---
 .../src/main/java/org/apache/doris/backup/Repository.java | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/Repository.java b/fe/fe-core/src/main/java/org/apache/doris/backup/Repository.java
index 0f21027f2e8a18..a1bbd1144bd640 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/backup/Repository.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/backup/Repository.java
@@ -743,8 +743,12 @@ public String getCreateStatement() {
         stmtBuilder.append("\"");
 
         stmtBuilder.append("\nPROPERTIES\n(");
-        stmtBuilder.append(new PrintableMap<>(this.getRemoteFileSystem().getProperties(), " = ",
-                true, true, true));
+        Map<String, String> properties = new HashMap();
+        properties.putAll(this.getRemoteFileSystem().getProperties());
+        // WE should not return the acturl secret key to user for safety consideration
+        properties.putIfAbsent(S3Properties.SECRET_KEY, "xxxxxx");
+        properties.putIfAbsent(S3Properties.Env.SECRET_KEY, "xxxxxx");
+        stmtBuilder.append(new PrintableMap<>(properties, " = ", true, true, true));
         stmtBuilder.append("\n)");
         return stmtBuilder.toString();
     }

From fc7c49e3d5be395c72639a1a770db041379754dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=8B=8F=E5=B0=8F=E5=88=9A?= <suxiaogang223@icloud.com>
Date: Sun, 28 Apr 2024 19:38:00 +0800
Subject: [PATCH 102/163] [opt](paimon) support mapping Paimon column type
 "Row" to Doris type "Struct" (#33786)

---
 .../org/apache/doris/paimon/PaimonColumnValue.java   |  6 +++++-
 .../org/apache/doris/paimon/PaimonTypeUtils.java     | 10 +++++++++-
 .../doris/datasource/paimon/PaimonExternalTable.java | 10 ++++++++++
 .../external_table_p0/paimon/test_paimon_catalog.out | 10 ++++++++++
 .../paimon/test_paimon_catalog.groovy                | 12 ++++++++----
 5 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonColumnValue.java b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonColumnValue.java
index b1b7440649e77c..d102ebd2fdf0ef 100644
--- a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonColumnValue.java
+++ b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonColumnValue.java
@@ -166,6 +166,10 @@ public void unpackMap(List<ColumnValue> keys, List<ColumnValue> values) {
 
     @Override
     public void unpackStruct(List<Integer> structFieldIndex, List<ColumnValue> values) {
-
+        // todo: support pruned struct fields
+        InternalRow row = record.getRow(idx, structFieldIndex.size());
+        for (int i : structFieldIndex) {
+            values.add(new PaimonColumnValue(row, i, dorisType.getChildTypes().get(i)));
+        }
     }
 }
diff --git a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonTypeUtils.java b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonTypeUtils.java
index c0bb94dd8e4597..1b8046494d2a96 100644
--- a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonTypeUtils.java
+++ b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonTypeUtils.java
@@ -25,6 +25,7 @@
 import org.apache.paimon.types.BinaryType;
 import org.apache.paimon.types.BooleanType;
 import org.apache.paimon.types.CharType;
+import org.apache.paimon.types.DataField;
 import org.apache.paimon.types.DataType;
 import org.apache.paimon.types.DataTypeDefaultVisitor;
 import org.apache.paimon.types.DateType;
@@ -48,6 +49,7 @@
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
+import java.util.stream.Collectors;
 
 /**
  * Convert paimon type to doris type.
@@ -190,7 +192,13 @@ public PaimonColumnType visit(MapType mapType) {
 
         @Override
         public PaimonColumnType visit(RowType rowType) {
-            return this.defaultMethod(rowType);
+            PaimonColumnType paimonColumnType = new PaimonColumnType(Type.STRUCT);
+            List<DataField> fields = rowType.getFields();
+            List<ColumnType> childTypes = fields.stream()
+                    .map(field -> fromPaimonType(field.name(), field.type()))
+                    .collect(Collectors.toList());
+            paimonColumnType.setChildTypes(childTypes);
+            return paimonColumnType;
         }
 
         @Override
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java
index af2da32374f475..7d870f36059c85 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java
@@ -39,9 +39,12 @@
 import org.apache.paimon.types.DataField;
 import org.apache.paimon.types.DecimalType;
 import org.apache.paimon.types.MapType;
+import org.apache.paimon.types.RowType;
 
+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
+import java.util.stream.Collectors;
 
 public class PaimonExternalTable extends ExternalTable {
 
@@ -131,6 +134,13 @@ private Type paimonPrimitiveTypeToDorisType(org.apache.paimon.types.DataType dat
                 MapType mapType = (MapType) dataType;
                 return new org.apache.doris.catalog.MapType(
                         paimonTypeToDorisType(mapType.getKeyType()), paimonTypeToDorisType(mapType.getValueType()));
+            case ROW:
+                RowType rowType = (RowType) dataType;
+                List<DataField> fields = rowType.getFields();
+                return new org.apache.doris.catalog.StructType(fields.stream()
+                        .map(field -> new org.apache.doris.catalog.StructField(field.name(),
+                                paimonTypeToDorisType(field.type())))
+                        .collect(Collectors.toCollection(ArrayList::new)));
             case TIME_WITHOUT_TIME_ZONE:
                 return Type.UNSUPPORTED;
             default:
diff --git a/regression-test/data/external_table_p0/paimon/test_paimon_catalog.out b/regression-test/data/external_table_p0/paimon/test_paimon_catalog.out
index 430292ceb86c47..92cfe08294ca37 100644
--- a/regression-test/data/external_table_p0/paimon/test_paimon_catalog.out
+++ b/regression-test/data/external_table_p0/paimon/test_paimon_catalog.out
@@ -548,3 +548,13 @@ bbb
 2	[[0, 0, 0], [1, 0, 0], [0, 1, 1]]	[["a", "5", "f"], ["7", "2", "3"], ["1", "b", "f"]]	[["11", "a8", "e3"], ["f4", "ee", "c3"], ["0f", "c6", "05"]]	[["b723869515b24e9fbb54503f8a7584f083479998766213b784a9c530cbc0376bd5035054c657437251b85fa3dd41a0483776", "ee780d81d3e9faa36aad06522a09cf9b18e00614c991d2c079243dcc7190f3dd6559e75e2c1992270272d9a9c01e950c7bd4", "6e825a52cdae65786801caae53182956c80f88a48fa258a90d2c93302023c78b83f1dce758615a74731c9eef993a8c1dc4ad"], ["2ae97869372970f7ad7d0007a1baf32f545ce6b35c07d1036a442f2e158cf27866bff515aa87979e54f912474828aff0a9f0", "7a209d362febaa020046d95188d1c3ceacf7d9395b013deefb46e0f2909a9585b9f082d1f2fcb82089aa8605e4eef1ac0e42", "fcc347c8c9c6a9b234549935ad12e804465f11a163dd056057a18db660724ec45d49956417aada66756ae0a26cf462736e9f"], ["70240ada78e7218dd4e35bc2b0dbde0723550bc12310f9ed5d1db473ac692085ad71356a3047351fff568c88b0d3376cf3ce", "ac03095f9b2fdd51d35d7a4d7e8d5afa504b7c6aa3f276916496e0ca7a3686b63ef416aaeb21012a20cb889acfd820e138d8", "583d85847b103178402b3980802a904f88ab6f3fa06b704a50604d79171ec658cb5050ad30fa5b88819e21b1e88bf8245266"]]	[[27294220.50, 621609.42, 89810366.98], [94120497.72, 52108269.00, 51943703.30], [30485257.66, 9827656.82, 10269787.75]]	[[31, 7, 85], [41, -81, 74], [74, -103, -124]]	[[-22183, 17022, 12520], [5187, 14755, -8284], [70, -502, 11762]]	[[345357276, -845901970, 1807254971], [-1727596054, -1015360238, -1974497384], [-1865141785, 1458802452, -1015598789]]	[[1595618270205702107, 4161236884532086724, 4447316033415055190], [7178945014245655672, 1744975092040033332, -1258393622459157798], [352676403984115467, -413501818079508501, -5172233667710709371]]	[[9.592082e+36, 2.2061998e+38, 3.217547e+37], [1.923472e+38, 2.2883073e+38, 2.738194e+38], [9.799585e+37, 1.2078655e+38, 1.3424833e+38]]	[[5.494505226388825e+307, 2.2120955121044974e+306, 6.433699041859979e+307], [1.228345591586141e+307, 1.4133837555064845e+308, 1.2823937308644953e+308], [7.28717488536204e+307, 1.675073217903553e+308, 1.3720069819113936e+308]]	[["2023-10-26", "2023-10-26", "2023-10-26"], ["2023-10-26", "2023-10-26", "2023-10-26"], ["2023-10-26", "2023-10-26", "2023-10-26"]]	[["2023-10-26 03:16:52.978000", "2023-10-26 03:16:52.978000", "2023-10-26 03:16:52.978000"], ["2023-10-26 03:16:52.978000", "2023-10-26 03:16:52.978000", "2023-10-26 03:16:52.978000"], ["2023-10-26 03:16:52.978000", "2023-10-26 03:16:52.978000", "2023-10-26 03:16:52.978000"]]	[{0:0, 1:1}, {0:1, 1:0}, {0:1, 1:0}]	[{0:"6", 1:"a"}, {1:"d"}, {0:"f", 1:"8"}]	[{0:"8b", 1:"7c"}, {1:"d4"}, {0:"78", 1:"70"}]	[{0:"3373c76adf77290db661a17416237af654e03fa35c19a47c24aaade6d68c3e0ad1eb918cf9ad5a6d26c3a6fd4a354b4280b8", 1:"1834c88752f94635e08b9219b81cdcdfa1486b3576fffe31a417ed7c1e882b3fcc0885523e086ae4ad47ed2439639d7fd578"}, {0:"23eea4f2875e28203eb6b056ab82bc4e399110eecf4b117d61c374a904336b6dc2a1c3c19011063300ed304ef72d1a01decb", 1:"9071be5c97e0957535b30fb7e5b6fc15c28bc58c4f3371a61d461683967cdbb5526d1aed6806dcb6a7a8e634d909022e5c27"}, {0:"121cb5f0e372baf6b8a547e533ecbf91065dc75ffd3d883a4ccba6ed92aab2638671e4f5d112cb64f73f835c54d0d43f0e54", 1:"f14c84918207bd23e2492e45c148a0d4555a76215cc127373322bd8a891200361fdd4140eda03a2ac6560fe0d3db47ae880f"}]	[{0:23164756.95}, {0:57668952.85, 1:45497021.00}, {0:89971113.51, 1:93512132.85}]	[{0:-37, 1:-36}, {0:-9}, {1:-46}]	[{0:2361, 1:-23931}, {0:6558}, {0:-30863, 1:28120}]	[{0:-1497921417, 1:115430027}, {0:-1559227823, 1:168556957}, {0:-132672594}]	[{0:-1049555347411727074, 1:-1121490872336991226}, {0:-2632747659482705401, 1:-7442460162314167230}, {0:7517716583745120627, 1:3205812162045902270}]	[{0:2.0413164e+38, 1:6.486671e+37}, {0:1.9324323e+38, 1:8.768093e+37}, {0:2.8165718e+37, 1:1.8986205e+38}]	[{0:1.109556098246685e+308, 1:1.6655508415857888e+308}, {0:1.6690543045381207e+308, 1:6.429990070539004e+307}, {1:1.3246574407231653e+308}]	[{0:"2023-10-26", 1:"2023-10-26"}, {0:"2023-10-26", 1:"2023-10-26"}, {0:"2023-10-26", 1:"2023-10-26"}]	[{1:"2023-10-26 03:16:52.979000"}, {0:"2023-10-26 03:16:52.979000", 1:"2023-10-26 03:16:52.979000"}, {0:"2023-10-26 03:16:52.979000", 1:"2023-10-26 03:16:52.979000"}]	[{"6":1, "2":0, "4":1}, {"8":1, "a":0, "1":1}, {"6":0, "4":0, "0":0}]	[{"7":"3", "7":"3", "b":"e"}, {"d":"5", "1":"6", "f":"b"}, {"7":"0", "6":"6", "f":"0"}]	[{"b":"78", "3":"a4", "e":"90"}, {"b":"ec", "1":"ec", "c":"c5"}, {"5":"04", "2":"48", "8":"cf"}]	[{"a":"158117877f551676582b68677b6ed4e662edc85988595dc01216f227a816d92d7d355aa31f8fa266ee8db96197add9283a1b", "4":"4434955d9a279bfc59fc22ee7334b3d37b3b20ddf58182bfc3d3eb269fc4aac9fb84e7f1d263cd414235e42fba9ee8b9a49e", "0":"9ae1de45315df60e6b4dfd7b90a4f1b49ec2d8383e8f48c501ccafe700a9673f72c0806fe315986582a41f321633bb6e1806"}, {"9":"7f9382e9b670b1c5ed9251a8bdc4bb8188cdb52acb92bb2a1e71ca9190425d1cb2b614cfea1f20c16c0cb05523f9e2ec04a3", "0":"3fd21c9f8e44810bc7144ea9b98e0ff448465e74c70984bf864b567e9caf36ffe8830bed5ff0e739ce144b91a662721f985b", "0":"7c8ed58c3401bd5ca778f1da7303fd5ab95c7ed5baeb44efad0818dd9a84a60e1f4eaf900942e6d91dcc7c30200a55e216e9"}, {"4":"d4e2bb6a1df7dd75d3a87ff3c6c6d3175770818a22199bbbdbc9637db46c29d973faaffcc662ca63179ff05c1910fdd62844", "8":"738ec06b1fa51c03d6245760166b450a0c278fa8ddf8dda0da784c03fff33d365e2f6f718d5d35d452d68593cc6d6ba9d537", "a":"3439c2ca006bd369401d286bad73acaeac4807a916d4459b8d43e4bf69eac5a6c035567d4d19d30b95e871a1c2a06df436f6"}]	[{"3":24392132.85, "4":9487420.19, "d":92095408.83}, {"e":31199693.00, "6":59650399.36, "2":69556596.15}, {"0":53085519.85, "3":66408349.20, "2":15651029.71}]	[{"c":72, "c":31, "0":38}, {"f":52, "f":46, "6":-119}, {"0":42, "c":127, "c":19}]	[{"8":-15264, "c":27729, "3":22048}, {"d":16291, "2":23038, "4":29936}, {"3":15465, "5":-22048, "6":-21629}]	[{"a":-2123437528, "4":-1826547987, "3":1001444635}, {"5":-1245178942, "5":1317449109, "c":2053352594}, {"9":-922174876, "e":1467215952, "7":-71135416}]	[{"3":-4172989025121123140, "6":6538912373819103749, "a":-2230018246838553160}, {"e":7809145645942202126, "9":-3271832743100640484, "2":-5072396908364223483}, {"9":4474832576540322054, "4":-7992269645736114390, "4":-9106565998170115259}]	[{"6":7.632407e+37, "5":3.0157235e+38, "2":6.589075e+37}, {"b":2.6171484e+38, "8":2.5065334e+38, "7":2.7708948e+38}, {"b":9.25816e+37, "4":6.117546e+37, "2":2.8126064e+38}]	[{"3":1.3274524069013966e+308, "e":3.1662877373892025e+307, "a":1.6860993526343602e+308}, {"1":2.922697105821476e+306, "2":4.3928540779994066e+307, "7":6.381311188104359e+307}, {"7":6.884259875198191e+307, "b":1.177137048112183e+308, "0":1.6411622519450489e+308}]	[{"2":"2023-10-26", "6":"2023-10-26", "c":"2023-10-26"}, {"b":"2023-10-26", "c":"2023-10-26", "9":"2023-10-26"}, {"5":"2023-10-26", "f":"2023-10-26", "8":"2023-10-26"}]	[{"5":"2023-10-26 03:16:52.979000", "3":"2023-10-26 03:16:52.979000", "c":"2023-10-26 03:16:52.979000"}, {"b":"2023-10-26 03:16:52.979000", "9":"2023-10-26 03:16:52.979000", "9":"2023-10-26 03:16:52.979000"}, {"3":"2023-10-26 03:16:52.979000", "6":"2023-10-26 03:16:52.979000", "5":"2023-10-26 03:16:52.979000"}]	[{"96":1, "4f":0, "90":1}, {"d9":1, "2d":1, "cf":0}, {"91":1, "49":1, "cd":1}]	[{"4f":"5", "03":"e", "12":"4"}, {"4c":"9", "55":"e", "a4":"3"}, {"0f":"2", "d1":"7", "a4":"b"}]	[{"89":"b1", "60":"c2", "dc":"f5"}, {"02":"d6", "0a":"a8", "01":"56"}, {"0a":"f8", "75":"1f", "88":"84"}]	[{"3e":"875fedd73b6d3875af3ba9f506c3a2ce597c942587cbbfbda76963f2412ea34caa0eec719ecbc5d6dd9170489eea65d1d3b2", "a4":"2364e48899c9843035944653b246d54790150c120c2bf6ec5489184fcbd2fca299833d5432db984023dc12a4abb2422dbed4", "a0":"dab5fffb329d544204ce662842183faf54d1e50eb522f2ae4513dac184f0ce15514932be3bac884b4a8e70a472ffccf53139"}, {"68":"1d81598bdd6b81525c5a74570d23f7cfee8780d563be7c5ceee3e46ec673629e72f92657cc80b1d976812119fa4bce7a58ce", "95":"09e9334751fb9a04dc09ef3243fba7e246f0dfc0a44a770563901ae8370b1e2062ad5d0e9d86c1c3810c9080b1867e545965", "5a":"8b6437949fcc7b232c28f1c5316b7ae0b0137c7e62c09e35f64e486cd5cf41486617d5f0d3a0913ab507e3ab49ba23be6b30"}, {"4b":"db9b126b28609fc170279bbdee92d3b66b69a219dfd0e1738755751f623c8562026a81e646fcf6ea974e5340dc6e308281f5", "91":"55a0efee8ce1a17b5b2d6c963bc4380ac73a84a94b22f52d981e333d72408aac94c6c05665b6988b72e88fa33efc1f866565", "25":"c452bb3b483d31c9e419e8a033bd9c250a2dbf5a1cd64d304476eb7f32f41a13e1e3d622e7843c9fa73228487fe7f4648595"}]	[{"5e":21834887.64, "ec":47311086.03, "cc":98208392.34}, {"a3":17925355.33, "fd":70430822.15, "c7":15590338.28}, {"87":23190581.12, "25":76034575.37, "a0":12771556.04}]	[{"0c":-91, "6d":21, "d4":83}, {"f6":-75, "f0":-105, "72":70}, {"79":-25, "ee":-116, "53":-94}]	[{"66":6331, "c0":9597, "ca":22929}, {"9c":-13236, "a7":29400, "30":24088}, {"d4":8214, "12":-9768, "d1":22293}]	[{"7e":72050710, "b7":-463248347, "c1":-1577130460}, {"a5":-843856682, "dd":1743398965, "c3":-876260327}, {"15":1877464138, "4d":1325319144, "62":-845592129}]	[{"d2":-4195164360243115761, "ba":7414503939669271863, "75":1559483977275326633}, {"69":1495332962010830027, "72":8124333721012892662, "6c":7714825807296524135}, {"1d":5691614238331234131, "ec":6274090450804373496, "09":848410483928349751}]	[{"fa":4.585313e+37, "93":3.5148627e+37, "1b":2.4460823e+38}, {"0e":1.1828208e+38, "dc":1.70649e+38, "a4":3.3094453e+38}, {"99":5.83204e+37, "c4":2.3862326e+38, "9f":1.4772062e+38}]	[{"b2":2.8261672475750253e+307, "24":4.021273450642802e+307, "42":5.59411330729981e+307}, {"04":1.1257207294480216e+308, "7c":2.843581781368162e+307, "fe":4.2062716227077134e+306}, {"d9":1.268318681376126e+308, "c7":1.1136404837688582e+308, "64":1.095517098490517e+308}]	[{"73":"2023-10-26", "fa":"2023-10-26", "02":"2023-10-26"}, {"bb":"2023-10-26", "9d":"2023-10-26", "fd":"2023-10-26"}, {"ab":"2023-10-26", "7c":"2023-10-26", "26":"2023-10-26"}]	[{"71":"2023-10-26 03:16:52.979000", "2a":"2023-10-26 03:16:52.979000", "df":"2023-10-26 03:16:52.979000"}, {"9c":"2023-10-26 03:16:52.979000", "0b":"2023-10-26 03:16:52.979000", "a3":"2023-10-26 03:16:52.979000"}, {"b4":"2023-10-26 03:16:52.979000", "e0":"2023-10-26 03:16:52.979000", "cd":"2023-10-26 03:16:52.979000"}]	[{"b8e1033538d3ec6e07499a3725d7b80dcc1df256c43ff97d8fd27ae5cd65b1f846b3c3cdd48f145438edb2e0b9312644523b":0, "0ea406c379c0ac021754fcd8053268e3fab25c33a6ccb2f0aca5744c8e4f1230d4745370f9f7955681fa566d99f38febc528":1, "b4123e496a40859006790a63eeb540e000a072a7d00033b6ca2b56211bed232f3530e58add1c70f82cbc6662638f99fc24a2":1}, {"536490b6404dc41ada639a27e70cf5e8a8af9791194287be6520ad08661d01921b08da7e2a297c83aefa30aff3ceeabd75e9":0, "bfea92b8ad24067ea21057ec656c4aa9a3de221fccbc22d30d8d3ee62e595ccdcb06ad4d6659ba6e1dee7a108160ecfa98e9":0, "3a9e38f5eb1b837e9a07892d028bd119b017d24723e0d6c7cbc920a58a5b89602875361d5315ddb1a339252865d4f2f4c69b":0}, {"7203eb495b86da099833cac6d73ab66e582e339c9a514ed04c069a6ef9f1dc6622d9efb316a8dbf71c42577cb0347d620896":0, "d53824f4bed5e8811f1abf63078f2f6b7b386c23e7c8822641ffe19d5e1d45dacaa85ae0b1c39657d32e0a5309d66cd31b07":1, "ebf5cb46ca57d5dac6a727fcbdbfc884c72925b22a7554bee5cbd36586f963978bffd791b7978d331e3780ee7d05caff643f":1}]	[{"8350c1ee4de7c60cc81918920afdc32dceb76e0db8ad6fcad93b817c3b6c0148754a3e1be534887a514cd7f60fadea62197c":"3", "56c05fd9d4f54080108953f5f02d13acbb136558ea70baa2c5e19e5951b75dfbfe190add4c9c6b9486bbe1a508eda1cd34f6":"1", "1f3f3ffc673e7338d8fc8de658f817d3921957c09c9c074ed761fa20d684b5a06d3b9d5f529facdba881874f047b21a75870":"6"}, {"dcf7320548d8d231c1ee79630c1c19f18d535a8d22d0c8ab660e2964243f6fa512b9843bb4e782ded975dedcef5774c233ae":"5", "aba4a8e8d5d3fc2f380e1a2da6231cdd28c35f14821f2ed174abc435689a57b9612a106526b289765d34d222b2aa1685e5ac":"d", "637759f294e0ea2fd917d892506c3ecd82dd33b14d1b1f08c1b829393c88019e71393fbb83c8fdb296f45058a54c553a6c35":"a"}, {"bc08097864e17ab0e7b2953f1afff701ccb3c05dd30697195cd35e013f73f0a8d30740fb1e886497e133b66ff3af999cd539":"6", "84f9fee41af5355d8e7a2da6e81641260e204290e92d8a6d8d6f0d1c050705a084473b7fbdd179bbc524f2b3ecde25870db4":"9", "815e061b83095108b0a66244bbc2fc7c6d8712ebdf029641a7722adfa3314034f49307b7f72335f578cf1515996a4ca04b56":"e"}]	[{"224a513ce220772e3e2269723cd85c770c9cb928fb138380e82d8a8b94f00ce13da7fef889351719e99b3dcde8d793881c62":"47", "9a33cbd2f3f55e0709e59a94f4d2f5a78c71559ccd8097dbbe069765b806783473a09d4c2d727189f94846d56808eef17f8f":"c5", "4af7eb71a3dd4db2b22780ca9fa4d57aaeb519a6873853cc2274a7dca8c3fb8da1480c56ae013003ef02fe7b199cf694199e":"47"}, {"c2f800ab71b436d99ad8717ed0b2e9ec71cd50acd5bac515553f5e776a4eae78e2f6fb42f8c48bd42f391061729f20808578":"63", "0fba442e8e217f53113e6611f0f7eecb1c5665ac70d7af9d660918ef75f0fd3ee27d026b183af0656620a369defc3c8e3e14":"eb", "e3921715c1a475f0a1afeb468aa8c0cbf18f2b4d662d02c9682d54a08fe9d6bef6caa6272573664e7598db0215095dd6a950":"43"}, {"95b489c2257044f910f5e69a4427cfb2b4a82d12e93b8a55194a88bf4c3791a5e43bac091fbe53909ec1a8aaf580ec894813":"80", "9a6496a06c53bc2131d5c5254f2846c659082547369ace717bf1ccc150589e719e9ed590cb114324b8785b824e4632f2e3cc":"63", "892eff4ea70adb9afe3768e5fa5d4ee6a859b015c0dc0a5a42c07cc5d9d1cd25ce648972af5a60edec20eb6054103d7c0c66":"c9"}]	[{"342a9c6663775ae9c264dc93c829c607ed439fd3522644326d7339dc9ab47241121c1f1017f6c42812ba0d4a3e4080419018":"bbf13fb9f34b0323b22812710aa9d829313105ca6f306efad574fb723654a738422fa3d53eb5fd4b1f4fe5744d864cb60eb3", "ec236784aea949dd4b233f2088bf4cf54425da108b7a2012d35ec9000e75b55a4a13493d54bbe38fb2637d64c42b36a20ff1":"df2138bf5f5d994dfeae805c2b0f0246fa403aac136f8ada1f687a96a104f3c20eafcc8b5dddf866d4d66ef510e25950af4e", "9124750f8cfa8147797443f3f18ed6e22ac1d21fe5285f333f45052e4a1751c73a12c3c1b28493f4e474775ae818cbe59330":"cb3e80649b3e4864d0cf829efb7dc06d7bc18fcc9353f00028a37590309c182595d4757243b6175fdd3cbeca9a05b28b6d42"}, {"52d83366d5c54a3b5d2c6708008fae828af5dc9a73158e7c678e627817481e9439c98b9b51d9b9685fe658651babea85c999":"9edd36d96a6f3dc7fbdd7af0f090eb05f15921abd11b58f1f46dc911adb9027880c8e7f6bd1c150cc26c2c7c3bee5f370793", "002370a40af29e10d89dc0cf7634a723547d712401becbf9ed82bd4a560ca7d82e514146808954e9fc301569b56630f9ba3c":"64878178fc827540de4f39f7fda1d615de2605c056d24408cf6762b6cb06f396ab4e856437553117fc690a9f7ed48ebc473d", "6791b7111361ad17127b1b931d325a0502139e87ab59f7af3c35364eb428cf082a9e37345ea1d61d920936962b3d830efe28":"02b56998bcfbb5abec567f9d89f1f578a7e3f265526dfc0014347312dcbdcbea33f48ad5f1251a185257ebbed5a3e3a41991"}, {"b2c478a75bdbb5f2d06ca068b8070aad1fdca2b5f6917c705144755a4d18ccf688c5246e9009892831f7bcaff98d1027366c":"74d3bd5e0a7d45ac290b72ed78597d3886a522924dc5f5e9bbf38351915edcaa9d50e926ff0046c7ae75f89fb7375e385748", "1a96d2b2cf84fa4bc7deb59dd3f0ae449d452037d651354060d14d725a735ceb3dd891d6672377ecff5cdeb44cd19183ce67":"80e9e67e990062ce22d0c5abf20b177f23b9727bb4dedf8b4ddfff4d5092eb587d7ac6e80c95d63c983f9e7391882f8d1d48", "d92089d708b0478df4503504fa95a29166a0e1c59e7a29f8b1250772de600a75fd7f58db6a0ba0a18c1edc92f12b8d576bd1":"1b1832ed78d97bd701c5bbcd7cfeb5154bdd06f56b87e22d62d330722c0f41c5241aa0c0aa242ad94903689000a5ae875583"}]	[{"7b4ccecb204bab6f61d25da2908258d23d102751b4a1bdbac86b6a04dfd908c55b1358e0061b19292f204be55b36aa0112e9":79235422.54, "d3f7cb07dc0735ddc237ee4b1718973176883b9854d19503d619966bcc40b4965e64feeaab50b7b49b70d94dd8f01debbb82":87319263.11, "8ea2a8e98515155d3ece5ce126f0e417d83fb09c7927c2c3370694e1072ed4f5a155e1484ef8f64484c73c3aa86059ba350f":54654978.84}, {"f9fe1c0f935ef4cb821f5b69415f17f494dc3959d3ad26e9237256d86530ff3985acfdb8e25fc6e86d55d00d0b188767350a":1715018.00, "75f5926b05c9abd1a1fd59c35f0fc75b0e06a525403ce966f233676b426b8a94d24e0959e4617430c1f78f5cff45d825cea0":22364174.73, "3a2bba360d6bc3aa68012def81b349365ebd1a0cd5f65cb82990bff7a60ea87fa869827d8eaa25d25a5f9e6caaa2207523d7":44378081.09}, {"8b52600d7a367bdd9777a7d28e51bf391f4d3e1a91f3dcc686dd99b7bce5c45ae1d7dd0c8ddfde30ca58ec66107d0aa2422e":67125327.58, "bda55523ff0c74696daa9a90d655168955c17d1ac1f668211d118596a4fd727b83f5717a87f23d7adde450ecc7c85037ce7d":47497740.16, "136a0d1261bd31d7f41e4f2ac86522ff1d09bfcd383453b8430d53052fade6953aa65efd6f506f25ee7cd531da0f1434452f":90274350.69}]	[{"c4b216cb2ea22c0aadca5f7941d6ccea06ef77334f88bc8bcd71ced981d47bc02923450f453393b1ce47db78572b00e357e4":59, "9c1fc6834b9d993d51dee1f653f0abc6ded4c969f006c4851262f4d3c071196a65507f057236048f280e0519e6caf52032ce":46, "c1d604f0f35a3a7049844a1f7a35d43b64e14b837425f13667a5e1d950cf341afa483f850a834d83104803d4ec0b99835c4a":-35}, {"1431905930ca24cc44173a5b7e7fd2bb6b79a437bb65754658bf7e7ef846f81ed8989f34b761ad9e8564b4bc9f419b0aef2e":72, "d8d342db09032c27ce13df5339c434c021c3f29a5fe21697c6782f700e5f010345218e5da19d36bbaf86ef06f569fb0be60b":-115, "c14c5a6ddfb7a5a57d33f97b0ce3d33a345c12815012ba621f06051d5bdba9fcde377cd57c9c48ba3294ef31ea207878db35":56}, {"245f458fd44df5676dabbe669140e6e86e6088d3b9b72fba6494909d1c5f2d83427785074c4e924694613e0a2dfb2919de64":-83, "4248d46dd0b873e8c7227010fb48c37a920c7f98ec62040b5a7d02c37bdd242519e6ee61ef842dbf0b0c8a44924edb49b8a2":-65, "e39579081abb2d5cd46db2f9f9ad6d15d7b8d046ea097b10850969424303cda79f744d07827491545a6d527090b96edd0be6":8}]	[{"a7478a5ab23fb7b63fa4d1a92e6ae11006462caca2474cf0c61d28a5ed7be45994b05fc597a3db11f9b8e6c359ec77a2e5e3":9172, "754240e072121a13b9cd18cc616377e83dd1a4f3a3212e5dcf8c7888ed6606de525d52872477dce8d33368cdfa9568ce1ebd":28881, "99ba43f8aa07f48b62cef502d4c8f52d1de1d269fe184df38243ad5f225e563b2d1b382d2d57820486b9bffa42af4df33797":-31824}, {"971a55498dc304ce23bc3f6e2c1ee4b1f4836884c19c891302063116b92f0329fa9931bdc3c019f1848928cd6bd3f3460ceb":-7153, "58aa72287a13f7e1d89952fff0c27e894c58c69f0108ce1f753d82f49b62e5dd98bcb75be7cb8e13ba6183b4abe7d2db6600":7819, "262b22a5478c67ffa9b51dde92478902309c99c68cb2017e182e5f386738536287e8400b0c9079f6feceef5dfbdcf5eeac3b":14461}, {"6c3ef76435e3b7683fe1e56df450265d2a52f7b9780e84201f3bc1e3b37158b5a0122a184531ab24f1c49206dfd4588d993a":-19506, "01da859e9e3722845b995f9e5e583fcfef394e0a9e31ce2eac44f2f3a9889f25cdbc0d7dd8bf1739fbf720d0c39691db76f5":-957, "40944bc5437088ece82cf25b626c6787f75d1f037db3f3550eba0fa9725921f4a2418f715cd73b2fad62bcab56e6af1c0975":3288}]	[{"f7196b5e70ae80b7ccd92f5d0fa8c7e67bfdb48e1368a72519d991abd1d1ae9e327f5729b9a2d469624b07c170d53b31e48f":-1230230998, "01498ed59a5d79ac31bb4d6421e25b4dc0b89d72ac50ebaf609b13d6338cdbd38494d594d0acef8b830af0d8e6b56dd1c5ce":2067993311, "283dd12f1fa0613a329d3b19542ba8fa8d56507767903075ea79cfeccc9c0ca31ab6bf8d96c8438d51b439a26e0afb980588":-486532987}, {"e8a40f2531f0120319d4540ec627ea4a9c288e6280a5ea4d51db6e2374b126edc4b27a6de8605476365f18ba9fa70946d09c":-1199846390, "bbcebb20641599c37bb76703eda9e54ee63fe957ff188a88a2cab07d7314dc1a3582378fe1d147934d5e6d743435b3a5ed17":-253311236, "2b6a5c2f29469486c3110f8574946d8c0b7db108f38c268fd368a70646cb68c46dbaa6a4c4bd8ccc314d0d33e28e39b88618":362549692}, {"3581ebf704de709cb1ce6c6d98a1bd888b29eb144275577fc8606acfdd0043ca0922245f31336157d5d4a4c6a151f834f2f9":-993132596, "52565affd7c720bb8842f06d1f8b1cffe0b8ac642ae8ccd863a08be7e59d4b9c949f275c9be9ca6b21478a4ca380c6599ce2":548227681, "1ea2ceffce798ebaefead40e4341e1abcf5e6fb1d61af7e48f9045b8c1fa7a8156412f314b9c10ac67be31e68c14318c8021":-770441557}]	[{"94ece5021c2af808f0d24d65c4aa54286a69890024b8d49485ee924a2fbd14b78e208b0e74d6a6d4200b5906127e10d179cb":-8699746582524763266, "4238da7967bb5241544ea361b42f1496d1efe7830bf3786b63447eb179cc0934af54bc80bf5626ac7ed02e3c55d6047bd140":-5695370085931976199, "a5e67a862a7c977f7561d42a8c876e99547c985a7749eea2631853994ad7502f06b9879049cbdc6fdc75effc02fd03407528":3751245492765213536}, {"dad10807167b5fb16ea1a4514e2294731bd8c5e4e067046b71f0e35b6bd919939f42e5bb3a8bd6e71bec1a295cee1e26ba9c":3236985760469621417, "11c04eb08a94ab2d28d9be9d4d77f3820728eb0ed43a7de4200565fe82a92c6ed720edd38acee9a18e42dfbb4097bccf3e05":4039841294342327135, "57e1edbbb4c5ea340fc9a5da8b01b16db14c92fc8ef9ebc535a5cd2e402e834fcaaa885c6f8d274fe89e2b9283e1108997ad":-6936135020533867640}, {"756b38cd7c242872b4dc8c35c0fa043a06eedb61c895a29c38bcb63eed760dc19042775ea43508201d47dfaa26d418d3d0a0":-5235582976327400696, "bc10801ddf14642ee9a235aa0bb011734879e320ffad51783cc49c736fb39d4b6f0a09ad9e8cc051488febe76ca0c8333fa2":-5639429209357416178, "b810497d98ff852b6c53c8f23f03c5be197159923b935b96208d98a1bad9bc50eb25d6170b09e8718b769041c85e22615678":2713530635143005369}]	[{"757f8a085d1e762c9f75ec0c23fd6868f70158a9cc7de5cdb5c1bfd61f09fae9769de5d9f89dd4b88c308f08592c2f5aca76":2.2035603e+38, "8ee5663149668115f8a98dc441edd44fd35ac5c3968b3d838cce9ced565f1f992f900f7e0f71eb7ca5146d2fc2ff151b4b03":1.0114456e+38, "b18314c8c1abc5f215f19a52d792ea805c054a5f7d9fee3c13f86db86975ca662476802ef5b704624f3579cdf24c99936e3d":1.8999912e+38}, {"cd02a58273ce115cc0ff84b47788e4bc46d77292cf6dd343b4a787f5927e89710a1b4e6b2fcb0d6cc15d544b2aff95cb7a78":3.0883003e+38, "0c3b1c6f6ef30217699b848c4d3719056487df1baddfad85a61b7098c17512668a56f9aa85d02b20f2ab9236575cdf3e1762":2.663354e+38, "cceac347aeef3b8a978e92d94af0f074f49c25743aae694cd713477c89a733408fa28c5a9d7d07279a0e90d2c14a95961125":2.5675032e+38}, {"2a333a90bd3b4697e251c8af9e3c5e5db464998429da52332e3a80ee85e16a93ce98fc20bab8dcf7c430fc8b183a815e99c0":2.3445013e+38, "ff1e09f9f9d058ea7b23360a71344d86157bfb3601d2dafd57713fbfa95244ebe165b40499848437a40a8e65519d866bd8a7":7.2439965e+37, "845e2aeef69d77eb0b78f80cf47295d5e1e8154b321b7c9a962c7cfff41e933d89d9562f58967ba346f7aea6457a62af60ab":1.1417229e+38}]	[{"bb74c1b3c29d3bb630673c12500ceb909e3d934c7c0f73c6309bb16fd4296cc1341039a8ef73cacc06a232b787e5db64f7e8":2.3423512001512156e+307, "cf088f2366a4994c3460e67d3ba5f054474989a521f99c91bd24e64780918f831a5b2d1335e9130e5964ff7ec7bafbb07b09":7.787179960789173e+307, "4c74dd14befe4385585eef6bf752ba5b23de35a9f1a87f263ec8e414e5011278a0aa73c73c05065ae9faeeba2830b2ed0e5a":1.6792278823089662e+308}, {"39c24fe31693757c99c5ed784434d40ad0e4625b69df248bc9fc940783edcb7091128bdddaf0e0636693f3dc81b66390882d":2.576367914839004e+307, "4f5f58f49a5dd9f1dcbd3d1031a43f2dd02be95f05b5ea4e4c8384a18d5e884b4a0e36f6288ed197072e369682bdb0a3ba11":1.755601769978382e+307, "34369351d6a0e2fa81f34dae63617422b56bdefb485165ae7e99a9cfb313a713fb4374cc44df9b891bec344d4570a63f2eda":1.6466637405324768e+308}, {"ab5f6e458a85dd552656ed612a5030af599bd5cc5a983f2557f15abd891c84c8a7f0676ffe98a5775034f72107ec0e28e909":1.0146318446829217e+308, "9cfabd94c15e1b8cbfda6ed24c2eb35bc6a6e902160e1cabb4c8cc879d3efb5be4f30de08711cba7bcd537e61d1fb2287084":4.953838383329487e+307, "a0f2d61aa57d1679f54fdb059250485932ab557348632ab0a3a9d7988f668dbe94f689e8ae40ffd889e30d97ddf4409c9456":1.4056924496744237e+308}]	[{"5f1af7f5da1e9dc29f40c10f625eb7ece45ffd3977d74e2943d1da2bdc54fd3d946f803e1e2e72f7372a89789af2215dfd1c":"2023-10-26", "10d783869d92dff4981e37aa4aa2a4c5978dc03e47243e592142114edd370f1b6e8d753bddc29067053742a89d7381545e30":"2023-10-26", "da128e8f184a142eeaaf93f4de32b02652e77616be535110674fb71d27a7d1054613da41ec97ff6f06d5ecb726c4fc5795fc":"2023-10-26"}, {"f76a81c309f4315c9b4025a62887d48ac590eff54d3e2789b8ebe1c77dc372ca282fc4f74a8db15b2b7b50969e1121c6d108":"2023-10-26", "c245ed664e66f0bc5f93547dadfd4d23014e32176af5f840b830ff86074fbce04c7176a01a3eb8a56dcef9286e8599173370":"2023-10-26", "36a3d82ac5fd2a4db217c0920f8b45f811eb8170c8d7a299291613383a9cf24b316f25d979b84631ca2cf4d00a567b874bed":"2023-10-26"}, {"b0f5f2448749383bfc9f7ee2c2d19514c539c288c4dc405c0921bde03857485f8a4a14fb7ca3ecb957821d1a658b82d6a295":"2023-10-26", "ab2bca3cce7929d030c1fcecaa91215010d3a62057e857199116c7e6e2c0dcaeaf115998ebc7e61e1a7d4c4ac8eb05b5d662":"2023-10-26", "e4b4db3c61d75e5aba3bc0a04d26d0e2d45308b5f8abdca50aa84f4765c4e1f7f0e0624e02e3c406742c585cf52caca33d0a":"2023-10-26"}]	[{"c3d468704005ddb5400f3178e60e90cbaca5951982e796bba60bbe918fe188b9fe4a42746e81dab2d495fd7e79f17c9955b6":"2023-10-26 03:16:52.980000", "f1d43fd657c7c242df7479e02637f6578df34b44b6126774419216be93b2da312e6b1da49488de940ed0dfde7a53df540717":"2023-10-26 03:16:52.980000", "89e8518996f5eeadb4b9dfcf75be8d83403ef211715c499d99ee94a9f6e78bb787067088f58cfecd0014fcfb8a142283f8aa":"2023-10-26 03:16:52.980000"}, {"9e723e2524431997e22a5b869fb4f701c3cc9f24d7840176a619e6c034316927a39c2d7bd463ef1afb316ca252ece269017c":"2023-10-26 03:16:52.980000", "0b0f3e1f7b65e0c5bc6eb7b0188fbc9813276f9940f7b81bd269f5ff7d0cace64e64f9c44926adf54e2db150a9f7c4cb2417":"2023-10-26 03:16:52.980000", "86f99a76a47625ede3f07e71920e81edf059c91ad9461e4f9fd31681ccd73a411499e08f0642dc9cd2f518138555aa3ea336":"2023-10-26 03:16:52.980000"}, {"f1f3d67c894e3737b3594e5201e070152a07f4f1324bb02d263852f56ca489bf1f70d28e0d5e5510005e649136dcf3dd834a":"2023-10-26 03:16:52.980000", "ba9aee45c3e6c8cf5a7163dbc914d960cc056f18c9503a838a40e867787e52350f63e95ed9a1d0b547e7aff9002437ca64c8":"2023-10-26 03:16:52.980000", "f648bba9f89ce052e1b54adde09ac49582248e356a8d303b27f48c0a175979a891b29a5440d38b56b815e4674b6e41e87d76":"2023-10-26 03:16:52.980000"}]	[{72185873.50:1, 56611371.35:1, 50520633.03:0}, {8870652.78:0, 79840870.96:1, 7497786.81:1}, {49987471.72:1, 68856998.89:0, 89294834.89:0}]	[{23139239.15:"7", 29318768.78:"1", 23653829.82:"a"}, {37812990.67:"3", 12220785.23:"c", 8176324.13:"3"}, {18886259.59:"8", 67798086.92:"3", 88573325.86:"d"}]	[{50104031.94:"3d", 45197369.06:"b8", 93326522.91:"d8"}, {33491020.58:"50", 88467976.54:"f5", 60372025.13:"d7"}, {86649204.90:"26", 39679962.21:"bd", 72245532.19:"43"}]	[{45667372.70:"ba0673959f1118b5cfa574c3d1d72029e57d5b30769e0b8c2ad43e2bd30f12ff46df62d8eebfe0782b7dc78f22a76e240e86", 42855455.13:"f4af554b25284bd0de249eac62745ffc04204774253bbd9622c4f937671ca5e6a0e4c55b80e9bdb7308cf56f8e1755e95ed8", 23702841.03:"d7b0d50e2599a11b58072876219c4423047d788133c98235b0fb61c48969f16d3027945e31132f5c6433369601ab0f005633"}, {451066.74:"d6985245416f1ef077f3c34a45008135e4c06e07848f7a12bcc0208d6161f2844cc42510d78d89a1dc5d82fed1c74ac53087", 82507176.87:"4348a0cbeb648d4fe8c99d35797c3010ee52974c3a7f6f2b7ad1bf0b554f098f900f0ed9dcb791901dcea68d64579f4ed8c4", 97869555.98:"ccb71b2086bf51ccfbb2bdcd3f966030e3d74ebb720d4b6c2d593424bfe5db1d4227441ed069a16d1d2076ef2f415f3cf6a4"}, {48212336.36:"acf9b77114a0d5f02f40bd8fe9a7aae76ba6a88f33d2e6dce0668558b76f14155789bac1e2dc2d9b7a127a4edbd3eaefd844", 3835648.75:"4f6dacddeb5ebf1aeca5be0cff2ce27d1ecef10fe4629205ee63cb96d9b97eb7c116f6ae9281d2f86149e68aa7e260da8d94", 59943266.11:"36f3131bff57638e52440c88461a04dc6a479959a24353845d5672c3fc93e4c9f7b62ce615472023969d643b314c5adc3b23"}]	[{50396766.26:93618969.11, 27023532.15:75688784.95, 54166849.28:75480419.80}, {65613714.71:62863125.19, 77016575.34:25512102.30, 58594410.50:40229678.22}, {86896011.84:30189744.16, 69272130.69:57731948.20, 99857193.62:52898029.21}]	[{10527262.08:-59, 35173844.26:73, 57245681.52:-99}, {1798923.81:10, 21178409.38:94, 13653083.36:29}, {50819571.84:12, 18703371.95:125, 76199979.68:-111}]	[{41128527.43:-26263, 17965937.09:-10188, 45648732.64:-10148}, {16269611.57:10709, 65331804.67:-25636, 48302434.96:6584}, {23495328.62:-20379, 93465759.62:-18819, 47868595.00:28269}]	[{49541268.14:1945722429, 76742059.62:651984414, 19953579.48:-602898770}, {32319055.49:-501604905, 56527981.48:1305999072, 79736796.69:1641809100}, {1455425.52:-1292559844, 14292892.63:-148439227, 31901409.48:286138012}]	[{88873044.38:653914187654872398, 32949593.90:-6380068941370057766, 33768083.29:-1492511479966029819}, {85375548.20:-2097797699569810149, 83169225.53:-2568161106216050077, 16292676.37:2547844327486812488}, {33770469.84:5993134912301194695, 10491869.66:-8226973137941854350, 53003436.87:8376478410406804847}]	[{69746181.85:1.8691526e+38, 4663684.99:1.9598114e+38, 64152971.03:2.7341165e+38}, {88268742.35:1.8826585e+38, 79100214.60:1.7029561e+38, 69780052.81:2.4530895e+38}, {99442027.74:8.3651284e+37, 74149716.13:2.61323e+37, 66348923.09:1.059177e+38}]	[{28640525.18:7.14130921808126e+307, 57072720.10:5.933935590620741e+307, 63708916.46:6.73322312751925e+307}, {52461019.27:9.563408023274936e+307, 41204159.50:7.776191127688398e+307, 32293505.12:1.7876485112547596e+308}, {75167271.25:1.0947889810982528e+308, 59938108.38:9.107901840269192e+307, 28858577.10:2.1832221539153736e+307}]	[{20614913.03:"2023-10-26", 38982072.89:"2023-10-26", 35068084.57:"2023-10-26"}, {48707081.19:"2023-10-26", 1196542.49:"2023-10-26", 97882962.73:"2023-10-26"}, {31546261.33:"2023-10-26", 9120162.24:"2023-10-26", 16163041.52:"2023-10-26"}]	[{68660942.74:"2023-10-26 03:16:52.980000", 11989537.60:"2023-10-26 03:16:52.980000", 69807213.12:"2023-10-26 03:16:52.980000"}, {23801511.45:"2023-10-26 03:16:52.980000", 65291202.38:"2023-10-26 03:16:52.980000", 88700274.36:"2023-10-26 03:16:52.980000"}, {20857706.58:"2023-10-26 03:16:52.980000", 98201246.55:"2023-10-26 03:16:52.980000", 37721961.60:"2023-10-26 03:16:52.980000"}]	[{-122:0, -111:0, 63:0}, {-115:0, -103:0, -27:1}, {-45:0, -2:0, -28:0}]	[{-49:"5", 25:"e", -71:"5"}, {8:"e", 98:"c", -56:"d"}, {-97:"3", -98:"1", 62:"0"}]	[{98:"e8", -104:"d2", 79:"21"}, {-30:"9f", -58:"16", -54:"94"}, {-109:"2d", 48:"8d", -15:"78"}]	[{-81:"18b29795b68904159aa2b99b0ffad1c766345f8c665cb7464d482b0b2816c0217ae1cc8fc9d4fe468d5a72d5b26a42410f9a", 113:"646e345b49931b63214bb3dac5dceb165ff879cd8c18ebf5a0ac696075b4e45e7e517f9b1ac96c29c37f46ea08dfa12b4e63", -11:"d375773eade002c0faec242e90045d071d4e3bf7247334c1e38fab98aa221350ddcd21d82d159a3550d379a3d66a5f2ade08"}, {-40:"06493b82399543cf68be4adda31d73ddb66a02532f947d9f2a389353aa12e6fccd962f1e623783d30daa6f4f9eb63a906dec", -120:"285b4ea78b16c5c9496461de67176aca5044389dc2b6446baef833842648cc541f1014c598abe4a1ab255644dd3ede4afe64", 119:"2db500821369d84b54912edd87a404184f58dd10de1b347968e1a874b75ade39c553dc9be470a042538d4addcea7f6749092"}, {8:"020b137b335e21cabb94ad084b65878575a436996298db6b8eb3b5f22e07b9177252ce515d337e91712a333efddd2c609485", -17:"9de2bbf406dadfa32a837b29639da7370dcebab18ed31d966fab491ca3a315aef61dfd23d6b3493647ff85bc7392c5021c41", -74:"4e59a7912c5cc5d9aa1a7479ccce55a3dd5c81bd84d09043f04463af45c73bdbe9a914dfe99ac7d318bf660a430e3079238b"}]	[{-65:19490258.53, 120:51265544.45, -44:29697012.72}, {32:32598541.34, 62:69594568.13, -32:63327870.34}, {-65:44263268.84, -25:58210049.89, 69:95701274.70}]	[{-33:85, 5:0, 54:-41}, {-41:116, 21:60, -127:85}, {-66:-79, -60:13, -84:-105}]	[{20:-12611, 78:-27673, 59:-25778}, {56:-30960, -97:-5334, -127:6554}, {-45:26144, 92:-8503}]	[{-85:-803291057, -107:-452904869, -47:1358316357}, {69:1359346610, 106:814581281, 58:1651691160}, {65:-399482975, 18:-491838310, -60:-198488911}]	[{0:-6061559191285871892, 85:4792227183449866329, 62:-2677595349004121540}, {40:-5463340573334895817, -19:2867301247077727211, 123:3415938387537434417}, {69:3644925503017774693, -64:5489787090871154444, 43:5190096168728564253}]	[{-97:1.8318788e+38, 101:3.3108113e+38, 61:2.7126058e+38}, {34:1.9023006e+38, 91:2.0331319e+38, -44:8.831285e+37}, {-57:1.1150573e+38, 70:4.822874e+37, 123:1.9317418e+37}]	[{-29:1.2675165035256152e+308, 79:1.727672663130933e+308, 103:1.0179986459504677e+307}, {-30:1.7776523450181598e+308, 34:3.372203332235452e+307, 78:1.6160032599513208e+308}, {68:5.689733379805889e+307, -123:2.820301977391415e+307, -7:9.335967074809405e+307}]	[{-59:"2023-10-26", 122:"2023-10-26", 38:"2023-10-26"}, {-110:"2023-10-26", 85:"2023-10-26", -44:"2023-10-26"}, {-106:"2023-10-26", -79:"2023-10-26", -75:"2023-10-26"}]	[{-69:"2023-10-26 03:16:52.980000", 115:"2023-10-26 03:16:52.980000", 31:"2023-10-26 03:16:52.980000"}, {-65:"2023-10-26 03:16:52.980000", -22:"2023-10-26 03:16:52.980000", 75:"2023-10-26 03:16:52.980000"}, {-110:"2023-10-26 03:16:52.980000", 45:"2023-10-26 03:16:52.980000", -100:"2023-10-26 03:16:52.980000"}]	[{-30690:1, 25934:1, 26038:1}, {-447:0, -8903:0, -1024:1}, {31272:1, 23026:1, 32679:0}]	[{12697:"5", 9186:"7", -27868:"f"}, {-2229:"8", 840:"7", 15233:"c"}, {-12941:"0", -30243:"5", -20023:"7"}]	[{-11370:"e1", -13106:"14", 4765:"6a"}, {-866:"7e", -25223:"48", 24055:"94"}, {6584:"bd", -11270:"9b", -5112:"a8"}]	[{28804:"2f4d89d610f9928ba115c834ad8448d2bdd5b8aa8ad62de6a3e4b72e0645f5cb27b88bc52a63e2b4270d9e89b114e142df18", -15779:"d1f375335e17d5be02ed76b41b16d4821a2d9730a75a4c7690f30c60caf9782d0c912640397869e785552038489a60100033", -19468:"3672ba97ab20c3b2ffcdf6ac1c70512df868a3cc0ceb12e7b6de82311ac2ad27ba6cb573c8cbd4ef713b510d1cc018cf0f09"}, {-26206:"258a1a5bdc4a58118a52beffa5574945c62d54a5c835d0166567b85e4775548858d0f2842d93a801ad286295f9effe89cf85", -5951:"592e9df3313c1a283cdc665efea28fa442b6188d8768efa9c6a954828ac9dfb09624f37921645dad9506dc1af29636ed7418", -26403:"8503f5269ca5ff3ce9a91857d8988ff3e18d63d86fe16a7a733bfc301cd356234fc5a2762fac0fbdde9bdb8ce1bad8f330f4"}, {7804:"f3436ff0e42f97dc6ab30b98455e45dcf35b3dd0a69f9a2947b9ebc8d9f769d5eec98457f28193cf2e8893198796fe79536c", 13464:"52dd48238eb279b017049c3d3dc72f4edccae6accbfd0b5e8287b3fcc88235da2b60a821aead5f3d1bcf76fc72e7a6f4eb9f", 18055:"9f0b6023f0d4cbc781886e5a6689239c7838a0c52961baaccf059aeceda0afffaae5c879d328278dc5f55a05fb47ad761856"}]	[{-27051:97033247.58, -20679:69549581.73, 4043:72288478.30}, {-5121:69934054.31, -30211:53829228.41, -25735:41796208.87}, {4224:14370892.37, 1232:462863.26, -10943:65031138.32}]	[{-10987:-32, -16595:68, 11879:-61}, {5120:42, -12081:108, 32564:-34}, {-26241:-100, -19667:-113, -23271:-63}]	[{-3525:32491, 23112:-8382, 28574:26023}, {1386:-26663, -17247:-16509, -28168:-11710}, {-6510:-17545, -6239:-7325, -3004:31363}]	[{-26637:-2084519745, 12833:2139842758, 9141:-1099682669}, {-14494:-563996778, 32058:393113919, 19478:1514500317}, {21760:-1736927877, 6875:974956350, 20931:186878002}]	[{-1793:-1395849241629461850, -3933:-196117327172458301, 30367:4782987890347027879}, {17380:-8180842525045112936, 3494:-1476331741758210456, -21368:-9195609603979583463}, {-6169:-430980561794573776, 26377:-5441049689093378744, 17545:-2101381969171187264}]	[{-18033:2.5412914e+38, 25217:3.234819e+38, 23851:7.031212e+37}, {-22185:1.4724157e+38, -4394:2.4248854e+38, 18839:2.268062e+38}, {-30798:7.918642e+37, 23930:8.300827e+37, 1991:6.277742e+36}]	[{-31851:1.1258883376501757e+308, 12835:1.203014568237875e+308, 31551:1.153430810352527e+308}, {-26157:8.358129511849151e+307, 15151:3.57053797051619e+305, -6284:3.592857809517692e+307}, {-29473:1.323009024350782e+308, 20116:3.1694090688964317e+307, -10776:9.080060588795442e+307}]	[{-13897:"2023-10-26", -1949:"2023-10-26", -20441:"2023-10-26"}, {-502:"2023-10-26", -7207:"2023-10-26", 5590:"2023-10-26"}, {-17485:"2023-10-26", 17436:"2023-10-26", -32004:"2023-10-26"}]	[{232:"2023-10-26 03:16:52.980000", 9050:"2023-10-26 03:16:52.980000", -8284:"2023-10-26 03:16:52.980000"}, {-3149:"2023-10-26 03:16:52.980000", -29553:"2023-10-26 03:16:52.980000", 24294:"2023-10-26 03:16:52.980000"}, {-31558:"2023-10-26 03:16:52.980000", 7569:"2023-10-26 03:16:52.980000", -10122:"2023-10-26 03:16:52.980000"}]	[{853818152:1, 1507320381:1, 2011801263:0}, {-2006788694:0, 1324595437:1, 580154894:0}, {791195772:0, -1607475598:1, 567847275:0}]	[{-598702671:"8", 1948828617:"1", -736696891:"e"}, {1994658440:"c", 1856315075:"7", -773964752:"4"}, {2097687445:"5", 743770504:"0", -1275777674:"1"}]	[{-2088352934:"4a", 170421737:"ee", -2107308309:"4a"}, {2104593465:"e3", -908932671:"c2", 790010713:"7b"}, {697649641:"d4", 946214823:"93", 1236663139:"7f"}]	[{891706202:"240bf7f44c5a8ba124deda7585ee6896644405cc6ebc85a3881d11ed1d14852801a994060a7bb47674fe6ded29ec4c12f925", -331324107:"a94718e43ae334a3ce68caeaf2f8e50f22f10a50fe39519c3825e9311bda40fb24ef84ca58f2230a31f1574b7bf9240c1531", -99657294:"6bfa0dca2a55d8d3699e38d099db69b401b2939d17b14eeca076725a2c0f0a5cd2f9c54362622a7f7d3f5a54bee986bc23ac"}, {1903931149:"1b2fd537c78cd7676597994e957d8b07c21f73ff90380b82348df13476310666ce8cf1b74f231738ce83df281ba04bb58a7a", 432969684:"d4e4e473cbbe410c1961876c3b6fa3e560017b170ab6887fbd716b66d79b9b0ec5d7e5609101dd329ec8aa31b7a680e0d2da", 90887837:"ba13423528ee857157e555707ed2e1cfb8f61a61e5f11ce17479faa2813a196b68dc5609df04e93668e5f275724d4e2415f1"}, {-1207732243:"478a3e77971910c2d5eac17a5b3dee752731e3a4e8d842c693141cb87e06ff1cd13e1f88d65c0c67c9c36a07db34966061d1", 456724011:"7aa82f64eeb483ff00e05e72d51efaa6455bb982c09b3587fa2101520b11bfc056102f4832845f27e338279b129a1cf9fa9d", 1144851654:"4d8e5e5a0c51e4838de3f8d2079af1ebd08b6ac70e1e7f893022e85b48f0c12af03dc5d9df0aafa196584d02005d115aeabb"}]	[{2012432178:18818664.05, 1351589882:86412006.80, 86706745:71037816.26}, {2071264695:40425249.78, 1099550982:21845567.36, -1304189052:68405979.47}, {-494929353:33276030.64, -1537033813:70473508.50, 795204578:56482105.02}]	[{2088854977:120, -1466859008:33, -1753132446:49}, {1278179798:54, 1147901960:41, 1874122347:31}, {1522553764:125, 475518976:83, -937600374:-8}]	[{972707719:29707, 434993511:-18329, -686425502:-18858}, {-182822314:12671, -238681689:-12629, 1226751480:16497}, {-1439957061:20300, -1142304443:22248, 1187438487:-8276}]	[{1553326257:1203297569, 2043892618:-35993383, -207334862:-73797461}, {384339080:1780394651, -508192228:1660345850, -858683972:-570795968}, {839318931:-1591802550, -2033356680:-432126037, -410725890:18125134}]	[{-1830399321:663106583550997062, -855216044:-111800205402470356, -1199104991:-2175022965048987103}, {-1289038822:686966301900628900, -2104007229:4187308566611935250, 984104624:8539268123326068389}, {-2047302608:338671647489149425, -1257417350:-3923588938911195670, -248798948:2883481690187447650}]	[{-1465176773:2.2085554e+37, 1951182072:2.1516651e+38, -1285262258:1.9614737e+38}, {-1544390172:2.206411e+38, -835334393:1.7531382e+37, 2022345585:1.6319622e+38}, {-41369940:5.3088046e+37, 1112082552:3.106548e+38, 1602184445:2.37337e+38}]	[{-775002234:1.2901969630904464e+308, 1525169252:1.5927780078240161e+308, -1394810560:1.089357010370962e+308}, {-1079178952:1.499074375861764e+308, -702804538:1.4828783545207568e+308, -2108148702:5.794419954092007e+307}, {-1822608923:3.502268205815582e+307, 1064800078:1.2040276169662617e+308, -754592104:8.426006630924612e+307}]	[{1622805502:"2023-10-26", -896889825:"2023-10-26", 1633229720:"2023-10-26"}, {-413182594:"2023-10-26", 526311685:"2023-10-26", -790586249:"2023-10-26"}, {-520950394:"2023-10-26", 709840088:"2023-10-26", 1174693003:"2023-10-26"}]	[{-456758430:"2023-10-26 03:16:52.980000", 171452530:"2023-10-26 03:16:52.980000", 1172193709:"2023-10-26 03:16:52.980000"}, {-247497446:"2023-10-26 03:16:52.980000", 1862760938:"2023-10-26 03:16:52.980000", 811872354:"2023-10-26 03:16:52.980000"}, {1998721926:"2023-10-26 03:16:52.980000", 2109462570:"2023-10-26 03:16:52.980000", 261734312:"2023-10-26 03:16:52.980000"}]	[{-7951063950642344721:0, -4205088131239668274:1, -1289908268654437950:1}, {-5990293059267038297:0, 7621084711018783451:0, 1802825137395660123:0}, {4039155025278899671:0, -2583997383452376449:0, 5113615318009121443:1}]	[{7069317866900674585:"7", -8429640772271548904:"f", 3166223379358144889:"0"}, {9084567314846474965:"8", 3293896716828330676:"2", -9132537244839602775:"8"}, {1378239105042325208:"0", 2752072243919781983:"6", 8968884231084686368:"f"}]	[{-2565200316837887779:"b0", -218935404095841249:"eb", 6620250675400934148:"ec"}, {-984537824429103143:"71", -2109588646221313310:"3b", 6561941739911285829:"40"}, {4913717839523112356:"ca", 7378059115177931362:"d4", -4093188018424154837:"64"}]	[{-398620380031457696:"d33c39444a6fdaffd57320dc422fd5828fefddc85a78dd924fbcc713e0f95730649162b017bc92852a70c5bc54c117397276", 5903883581737791086:"83da42f13ddcbb61228c376003fdae5b3228716c27d5cd4ba081252963f8ff1a76c698ab05342995e9f3754704176c0d5389", -6698033796431496492:"014a102e742ad41f657eb3819932cefad2dd4124e1f9b59a211c1cb7c9b4a2b054a051a1b349c54e0334794f01775e01f5ee"}, {3862727925141571466:"61d4e150ff329859f7ebd972a56dbf29734935fe4ccb5fcb8df3984a07edc1fbb24f8ed224985a1a2592e439033a89e9e65d", 5878890496704105124:"9730c49d58e78a4bfec6e68b3901af659af9b97ab10e600f7c4b31298276e1cda71703d7495c0f8541bf072a7a81f7bdf382", 2384212248578055560:"e018f61caa90e8a7fb113f30df5f61aee4da157d4199bf96806079cf603ed9aaf087e9623b6d5105b1f97c30ddee92451862"}, {-2217790992814326890:"3faa7de0e77af0e731021942395f3a25d62f151d7e4b2e81d0eb11deb69dda164200a269bd71ee8625ae6348f356e7d175dc", 8502600453513099230:"0807ba39ffeb29ed6b493c0a0e442d3bd6919e1eda242a7e99f0a7619b83c431ae2c1ea15bc436898a507e8da137cb339ab3", -7566905961493617588:"b1c41740dbce2745f381188cd596a11bc976da7127a1399e1ddebb0e532e7844b304dbc013741421aab87aa9bfaba7f29bc1"}]	[{5832815832395750039:56923709.07, -7399665648947090644:30701670.30, -1205087189401004746:99087907.80}, {-6680308343285880454:94808365.00, 7863988011359101127:71166819.54, 655870306373515251:56589637.78}, {-4308900787885521325:84585932.76, 578379343029977014:74680843.05, -276230549733320310:6378960.40}]	[{-3447908128437695508:-88, 7274377443403350333:-2, 5472981513836591606:-50}, {4237575771521331368:-7, 880871537639887798:-42, -7703288721822530335:13}, {-2070928763433972479:39, 4833757679143367182:107, 887708792917147263:-106}]	[{-8155635399790741320:-8266, -8740868533151583751:19018, -1992989487961866096:-23207}, {4624132081015071440:-675, -6661539545404629013:-6220, 3207922963124335753:5175}, {2408681240214746281:-347, 538209966159822549:-12012, 6513602497691698143:-11192}]	[{5412877616041027616:496144670, 8423256971956392242:-1277594312, 4972793681188677299:826917163}, {3150485209265965818:1988994698, -5835782063869726346:1980656834, 8461250796401496898:442119907}, {3221838457426299760:1159279813, 8360156165490788075:-1289116902, -6077682763540152493:-515066376}]	[{-5317695961552654710:8794978360416930273, -4937180357533956041:6472034488557761280, -7843019386357747945:-8632532147868907646}, {9157572555209733130:-5519987918095083857, 2920366025044543224:-1521544401229175128, -6056352465624227252:-308960335417192223}, {-4508491037053447746:-721855790875949069, -1321203143411470555:-3103960197907234827, 1481747476507598760:168771129351337045}]	[{-7876447156323612353:9.052511e+37, 7886495108522604865:7.5443434e+37, -3303510749232471903:2.1553356e+38}, {-780863273862928842:9.763497e+37, -1940634396973422140:4.919533e+37, -3777769701699849445:3.3390499e+38}, {-219626155432476293:1.2851074e+38, 7581268539226783859:3.0940548e+38, -2411712615205683336:3.2963854e+38}]	[{7717530884521091972:8.387143011646801e+307, 2385189084227887797:5.111675624969553e+307, -3484129392081615713:7.244765273625276e+307}, {-3148282736343199735:2.2569450380229393e+307, 6204696402794912888:1.4981004590100919e+308, -4297188139423083998:1.696866995232208e+307}, {-4384932973492808466:1.019293990881179e+308, -395409044675382707:6.195814801513859e+307, -6371673861533194071:1.6322067834914874e+308}]	[{7646494356971077925:"2023-10-26", 2160034688503809029:"2023-10-26", -3575219561512307138:"2023-10-26"}, {2280810562333467117:"2023-10-26", -7068375455117127088:"2023-10-26", 19293144593505224:"2023-10-26"}, {9207056097176522983:"2023-10-26", 1462871400671785516:"2023-10-26", -5370823582650949144:"2023-10-26"}]	[{8379130174692217380:"2023-10-26 03:16:52.980000", 7021483882380587848:"2023-10-26 03:16:52.980000", -2682385881848843999:"2023-10-26 03:16:52.980000"}, {-1591313391848357526:"2023-10-26 03:16:52.980000", 1573548364544965008:"2023-10-26 03:16:52.980000", 8906222880182589887:"2023-10-26 03:16:52.980000"}, {-2500641467155471495:"2023-10-26 03:16:52.980000", -5677223362201873077:"2023-10-26 03:16:52.980000", -1493375525942895085:"2023-10-26 03:16:52.980000"}]	[{1.9157734e+38:1, 1.2430983e+38:0, 9.537576e+37:1}, {1.7119694e+38:1, 2.6868459e+38:1, 3.0860106e+38:0}, {8.471956e+37:0, 1.0640104e+37:0, 6.9223514e+37:0}]	[{1.0261944e+38:"c", 3.261662e+38:"1", 3.409563e+37:"e"}, {2.5638566e+38:"a", 3.3626454e+38:"8", 1.5036742e+38:"c"}, {2.8036874e+38:"0", 1.8246544e+38:"4", 2.5642286e+38:"c"}]	[{1.3446665e+38:"27", 7.6941387e+37:"76", 1.9918403e+38:"b0"}, {1.5698505e+38:"ba", 2.8841645e+38:"f5", 2.4765528e+38:"9f"}, {2.137655e+38:"27", 7.5707826e+37:"d3", 1.000346e+38:"22"}]	[{2.0788677e+38:"a265ca07c3cc6e06be60575713ce688ca1347e929d7b82fc567968581f2feed53444dc2239c733711b1b48111f73a29315f4", 2.0846297e+38:"d22a7b3bf7184f43f52fad5f7fb1f25a154c4c3aea3d045087aab874c409592f70443e4e8e83e692917158b90265a2d14988", 1.2844366e+38:"e2b005499bd2a6ed8707582e4f081d38e13dcaa391db966dd1118daa16c32acd9fae4072312609562161e6e5408beabf2b0b"}, {1.7883558e+38:"06c45a373f261325aa73295ff4efbaeab41f9d64ea3479607f9b5be2e21cfe17c0dc82a7c8afdb424c356e454a0d5da5cb34", 2.0218054e+38:"0a14ea89627d90a52bac0064e39315b788e240dd71cab188ace0b4b721c935120f54ab161f2a0c943c16e66a8a3d0d87784a", 1.920469e+38:"39d248698a1a328db81004878683de3117077475cecbba0d8710551ea38d6e22ef62f6f27228dff028ccd7a88650deb276c4"}, {2.8222002e+37:"1d1a62b2b5caade6f48f30ea71d1c36eb8eb7940b9cc3dee38491c47dcb7c3865a24e0961b426d9f3e072a712aa47629bc2b", 2.455654e+38:"8f785d92262c6b15b091e7750a0c6972374bf11e7756f905b38ea98ae578fe60973503ac1e98d0cf4b86b532727496f5dc8b", 7.894209e+37:"604d84c4ef2c8395fa1767954d30f39452188f3171bf753562c6bc251cb7b55d217c55053e83432d09c7d00e7c0bb7434061"}]	[{2.5934718e+38:29646773.78, 2.5814159e+38:29803106.53, 3.038429e+37:91129554.69}, {2.715942e+38:82104677.03, 2.2915312e+38:18292995.75, 2.157406e+38:53245813.24}, {1.1959878e+38:47584744.67, 2.9528081e+38:65719906.50, 2.6165028e+38:65454333.65}]	[{2.3657972e+38:-59, 1.7845582e+38:-58, 3.0001487e+38:65}, {2.3066034e+38:93, 7.9458053e+37:-61, 2.7675021e+38:120}, {2.0211991e+38:-3, 3.3949235e+38:-33, 2.0230694e+38:58}]	[{2.569701e+38:31450, 1.4225671e+38:-1019, 3.3605693e+38:12569}, {3.0033883e+37:22645, 1.2471552e+37:-30106, 6.0843943e+37:16625}, {2.4588673e+38:902, 1.7168792e+38:-23807, 2.0429765e+37:23637}]	[{2.5139854e+38:-1647331648, 1.285296e+38:18839579, 2.8907468e+38:-517201505}, {2.7549579e+38:-1138804563, 6.6291104e+37:879399867, 1.0059823e+38:-1025095053}, {3.169066e+38:135030662, 2.6074407e+38:-624810353, 3.2564526e+38:553212556}]	[{1.6013742e+38:3654547239021094563, 2.882746e+38:9083703347518843755, 3.0452127e+38:8378878058262552506}, {3.3299104e+38:8184697417895064998, 1.0202054e+37:2931893902138833791, 2.9324476e+38:-4336163723247495033}, {4.5727123e+37:7427465937514938973, 1.6488885e+38:2876334764201694278, 1.8685819e+38:-9205563431378939356}]	[{2.1802803e+38:1.1972118e+38, 1.72696e+38:3.0303936e+38, 2.2761404e+37:8.0007504e+37}, {1.6292909e+38:2.2281168e+38, 3.3842535e+38:3.0520972e+38, 9.397675e+37:6.230754e+37}, {1.7391272e+38:1.0020658e+38, 7.280526e+37:3.1956697e+38, 2.6613503e+38:1.2921451e+38}]	[{3.368691e+38:1.3193211104106196e+308, 5.185398e+37:7.896041819783945e+307, 1.326064e+38:2.259246731126644e+307}, {3.1143636e+38:9.989893815245082e+307, 3.34551e+38:7.0845381491861e+307, 2.3331553e+38:9.332840676536547e+307}, {2.7970112e+37:8.755019191250077e+307, 2.3195053e+38:1.600355866650953e+308, 3.0494007e+38:1.7303263097083202e+308}]	[{2.7671837e+38:"2023-10-26", 9.957572e+37:"2023-10-26", 2.693325e+38:"2023-10-26"}, {1.1102931e+38:"2023-10-26", 8.83295e+37:"2023-10-26", 2.403772e+38:"2023-10-26"}, {6.965985e+37:"2023-10-26", 2.3738997e+38:"2023-10-26", 1.1589004e+37:"2023-10-26"}]	[{2.1477127e+38:"2023-10-26 03:16:52.981000", 3.327215e+38:"2023-10-26 03:16:52.981000", 2.4077425e+38:"2023-10-26 03:16:52.981000"}, {1.5131882e+38:"2023-10-26 03:16:52.981000", 1.7886296e+38:"2023-10-26 03:16:52.981000", 1.9836614e+38:"2023-10-26 03:16:52.981000"}, {3.1079923e+38:"2023-10-26 03:16:52.981000", 3.112939e+38:"2023-10-26 03:16:52.981000", 3.2935023e+38:"2023-10-26 03:16:52.981000"}]	[{1.7863783445216499e+308:0, 5.406342026176915e+307:0, 2.2212508662010354e+306:1}, {6.10268991219079e+307:1, 2.0400457681234079e+307:1, 1.6651882713315521e+308:1}, {6.599416620847358e+307:1, 8.204923924647313e+307:0, 1.5581302064633035e+308:1}]	[{1.1293276942669707e+308:"6", 2.599185456959952e+307:"8", 7.896560660331335e+307:"f"}, {1.119376651212137e+308:"b", 1.1522016897324584e+308:"b", 7.488108209546655e+307:"b"}, {1.2369326697095564e+308:"1", 1.5124719455136983e+308:"4", 1.1995496407758467e+308:"1"}]	[{9.984453390928323e+307:"80", 6.865892672772194e+307:"9c", 1.4206507750874213e+308:"73"}, {1.2747412509463522e+308:"de", 1.449969459916486e+308:"33", 1.5414153073225206e+308:"57"}, {1.202962191706013e+308:"d9", 6.851450350412055e+307:"3b", 6.837210294246853e+307:"64"}]	[{1.350661771380991e+308:"126ce22720b98a53929e279039a06f709d944f59283daf896d8b11da93a325c3a6f4c481ef15cc164554a27a304fffa292a4", 1.3386591115088859e+308:"98d275afc194e440d4b14dd450632396133d8b0d7177624df10c2456481a48bdc048bf44392b031534ca76d8c26df7dcfc1c", 1.1746528946925264e+308:"20ff948bcedf44bbd28eeb7b91786b7f2c5cb3dd7649385df1878854dd206495460b5bc9dee264dd8d5ad6f2dac19067ef11"}, {9.306406986352745e+307:"e0f90a26775712a7bd1b7625f0c6b18da127f1ada78941d83cdedfdecaf3847f383182d5b31c77d1d7408db9a1df9f174716", 3.2007256225980254e+307:"9d74b7a6d1ef35642cb53fd9e19f739df9e2d079def27189129686c7306e3bd52d16416fd7b79725bb2e1ecc9dd685e5eccc", 1.5880026291551707e+307:"b61b24aa47dd492e3e0e20729bb6dd5a8a8c619784552c203e75b0f8e2cf1571c4739d3414ca75b7001663f3f729b686ec0c"}, {9.88639751590524e+307:"96207eb11d1eb8a6447057a693561ac043f48d85be1a8fc619c5340f4ce491befa7fbc178803f17169aa87fa1bd34ba6dfee", 1.054562740532088e+308:"b13e2f06e481c945aba394a0beec921beec9d73c1211f0967a8b1c2295a34888450f4d2dfefa5972da78a6b5506cf57c20bb", 4.344142971393447e+307:"94915b787b248e5287faba6761a731622aee2769051705e955c89e6b907bbbd412cd14b7f2457c5a350660b652b12885cfe2"}]	[{5.082754843697191e+307:38206509.46, 8.372258875738777e+307:39216365.10, 5.179444293189977e+307:66588625.34}, {1.5998912486398083e+308:55682572.67, 9.571924139572229e+307:60943131.36, 7.647881876062308e+307:43239042.72}, {1.5845210986426648e+308:30234940.14, 1.6026908779327157e+308:60702753.07, 4.801279072316682e+307:7703647.33}]	[{7.970476279466306e+307:-71, 3.5931177058304696e+307:96, 1.446683125773332e+308:-4}, {1.7905443893640787e+308:35, 1.1675147296063708e+307:8, 1.7817187828467568e+308:48}, {1.76317010501145e+308:13, 8.548604160915873e+307:59, 1.1353726461612988e+308:-54}]	[{4.79805358335142e+307:12155, 1.467583048646221e+308:21011, 4.03696256442736e+306:17488}, {3.9964428726574267e+307:28579, 6.292230455737551e+307:30477, 5.164107641801589e+307:-32622}, {7.862057700668116e+307:-14034, 2.608161217356572e+307:12699, 7.395947933084873e+307:2426}]	[{7.456829390769088e+307:-1536430449, 1.4965611082560973e+308:156772662, 9.830209978245863e+307:217438687}, {9.688819445674728e+307:-1509913359, 1.7034976710679524e+308:-209133351, 1.88595895566057e+307:1495021176}, {9.629187435649685e+307:239456003, 1.4887527348251017e+308:-106552782, 1.6645969037654707e+308:-1291299586}]	[{5.006323509943437e+307:1285836839268046027, 8.623741806306082e+306:-9171578578237511683, 1.2124782117970968e+307:-2481101727181623778}, {6.620205877078632e+307:1684624107492982992, 9.311078566358723e+307:-8960471988781501676, 9.025598355072127e+307:4972395459337776317}, {1.1914113091689633e+308:4396478642246881748, 6.051591990967415e+307:-5410146590534036768, 6.839650459025969e+307:2292359843368183391}]	[{7.709641451373475e+307:2.132393e+38, 1.3227149299916771e+308:3.268609e+38, 4.67766076568587e+307:1.5228753e+38}, {1.1256826073418275e+308:1.3389857e+37, 1.4815413336686926e+308:9.195957e+37, 5.061191502881589e+307:8.556928e+37}, {8.481219782388874e+307:2.5425814e+38, 1.0889264191447482e+308:3.3152495e+38, 2.916964992081149e+307:2.4279304e+38}]	[{1.3730695089175998e+308:3.3334777557431834e+307, 1.3468792819565234e+308:1.4787194056808138e+307, 1.602767242110148e+307:2.9680539206615153e+307}, {1.275772437263217e+307:2.225630579745904e+307, 1.7317568997231886e+308:1.3579877859164e+307, 1.368674028125795e+308:2.978860967098416e+307}, {1.5574326164942272e+308:1.1051672294984759e+308, 1.4560316404336756e+308:6.904084473990024e+305, 6.585599004291569e+306:1.1754522539075596e+308}]	[{1.6177278939041744e+308:"2023-10-26", 9.951582973321031e+307:"2023-10-26", 5.074825699831892e+307:"2023-10-26"}, {2.657286502672462e+307:"2023-10-26", 1.6125133890611584e+308:"2023-10-26", 1.377762737247419e+308:"2023-10-26"}, {1.14336175615532e+308:"2023-10-26", 1.1985345361489119e+308:"2023-10-26", 1.2781530004076846e+308:"2023-10-26"}]	[{1.0765430448756355e+308:"2023-10-26 03:16:52.981000", 1.5974813193981796e+308:"2023-10-26 03:16:52.981000", 6.574932866434695e+307:"2023-10-26 03:16:52.981000"}, {1.2766677013851353e+307:"2023-10-26 03:16:52.981000", 7.042268356559367e+307:"2023-10-26 03:16:52.981000", 1.12134087442788e+307:"2023-10-26 03:16:52.981000"}, {6.826570274434086e+307:"2023-10-26 03:16:52.981000", 2.1046354875197386e+306:"2023-10-26 03:16:52.981000", 2.0444907227444578e+306:"2023-10-26 03:16:52.981000"}]	[{"2023-10-26":0}, {"2023-10-26":1}, {"2023-10-26":0}]	[{"2023-10-26":"7"}, {"2023-10-26":"0"}, {"2023-10-26":"3"}]	[{"2023-10-26":"0b"}, {"2023-10-26":"60"}, {"2023-10-26":"6c"}]	[{"2023-10-26":"35dfa4d5c8e8acaf991f8a9bbf5629ef734d6585441013f6126c977d39500bb4adf9d901381c5db374f85dbd191aac5325fb"}, {"2023-10-26":"11d2537db633a133af82a82e93a278553f71fe0431ce050343eefa7adab6cbad253b0952dc8e8ddf3190c78fbbc900d0a6f9"}, {"2023-10-26":"92a34f3e8ac7a0e0bd2564255c2ddfae84f0f2b76dc78bae5a5593f4b90af1f02d0582605cec9752a4d80318ad51bfa3c433"}]	[{"2023-10-26":41350220.93}, {"2023-10-26":43687922.56}, {"2023-10-26":60102562.51}]	[{"2023-10-26":85}, {"2023-10-26":-96}, {"2023-10-26":33}]	[{"2023-10-26":22381}, {"2023-10-26":-26388}, {"2023-10-26":8529}]	[{"2023-10-26":-607158792}, {"2023-10-26":-1555496414}, {"2023-10-26":607980365}]	[{"2023-10-26":7486933745350704840}, {"2023-10-26":-4010486314599977506}, {"2023-10-26":9220261474956750599}]	[{"2023-10-26":2.9687927e+38}, {"2023-10-26":8.3410634e+37}, {"2023-10-26":6.263943e+37}]	[{"2023-10-26":1.267224790184319e+308}, {"2023-10-26":6.401429714065946e+307}, {"2023-10-26":9.507167559968873e+307}]	[{"2023-10-26":"2023-10-26"}, {"2023-10-26":"2023-10-26"}, {"2023-10-26":"2023-10-26"}]	[{"2023-10-26":"2023-10-26 03:16:52.981000"}, {"2023-10-26":"2023-10-26 03:16:52.981000"}, {"2023-10-26":"2023-10-26 03:16:52.981000"}]	[{"2023-10-26 03:16:52.981000":0}, {"2023-10-26 03:16:52.981000":0}, {"2023-10-26 03:16:52.981000":0}]	[{"2023-10-26 03:16:52.981000":"8"}, {"2023-10-26 03:16:52.981000":"8"}, {"2023-10-26 03:16:52.981000":"3"}]	[{"2023-10-26 03:16:52.981000":"d4"}, {"2023-10-26 03:16:52.981000":"bb"}, {"2023-10-26 03:16:52.981000":"51"}]	[{"2023-10-26 03:16:52.981000":"9fe175ee0dbfeb26aebdb9cef3c481a847e0117f3b625727cb33e2aa02a09e83bdb45809ddb775e1363e66cfe1d011ec86b9"}, {"2023-10-26 03:16:52.981000":"de9eb335a8505e578e2a6d6700e84c559b37c896459eabe61170312c43fcabd233e45ff139dca12609e7bb02b742dd8ee6df"}, {"2023-10-26 03:16:52.981000":"2e2c6d1cec6b97f520924f3427d3345c3f18958c61f37cad1ec908e38dc3148fbe7499cb93854288384fde6f03e6ab48c981"}]	[{"2023-10-26 03:16:52.981000":94497939.74}, {"2023-10-26 03:16:52.981000":22111680.34}, {"2023-10-26 03:16:52.981000":58323905.64}]	[{"2023-10-26 03:16:52.981000":-25}, {"2023-10-26 03:16:52.981000":80}, {"2023-10-26 03:16:52.981000":46}]	[{"2023-10-26 03:16:52.981000":16807}, {"2023-10-26 03:16:52.981000":-8091}, {"2023-10-26 03:16:52.981000":-9373}]	[{"2023-10-26 03:16:52.981000":79710347}, {"2023-10-26 03:16:52.981000":-2022250327}, {"2023-10-26 03:16:52.981000":1091021962}]	[{"2023-10-26 03:16:52.981000":1687715012832559350}, {"2023-10-26 03:16:52.981000":-4894414771471660987}, {"2023-10-26 03:16:52.981000":91609642581411284}]	[{"2023-10-26 03:16:52.981000":3.1441367e+38}, {"2023-10-26 03:16:52.981000":2.4318627e+37}, {"2023-10-26 03:16:52.981000":1.7095067e+38}]	[{"2023-10-26 03:16:52.981000":1.7942753750955034e+307}, {"2023-10-26 03:16:52.981000":1.1413258779645193e+308}, {"2023-10-26 03:16:52.981000":8.355561908030063e+307}]	[{"2023-10-26 03:16:52.981000":"2023-10-26"}, {"2023-10-26 03:16:52.981000":"2023-10-26"}, {"2023-10-26 03:16:52.981000":"2023-10-26"}]	[{"2023-10-26 03:16:52.981000":"2023-10-26 03:16:52.981000"}, {"2023-10-26 03:16:52.981000":"2023-10-26 03:16:52.981000"}, {"2023-10-26 03:16:52.981000":"2023-10-26 03:16:52.981000"}]
 3	[[1, 0, 0], [1, 0, 1], [1, 0, 1]]	[["c", "6", "4"], ["1", "e", "7"], ["7", "1", "d"]]	[["50", "a9", "b0"], ["02", "bf", "3a"], ["0a", "1d", "9a"]]	[["2df05ba6ce8661f0fb9bc88386a1ba67188e3e99a4142a0703e1cd8bdf041fbc20131e50bea2a9891498c638ebac842d3d46", "f6fecf3a4263ed8d4c5b63e4b3d9f4084b83835d9fba2046bf48d0a8068f2044c48271a1e9726741a09badea72c37cf18de3", "d23af3266db4eda12673f5c451d36343ba1cea00fbfeeff2165de40e834778eb96a1199cb523dd394b4f08824f6af2a7d894"], ["dc00e9b27b9540e170caf938058b547e8e21c7bb49ce5fa0fe929f18f4822e630dc26d4816d5f3d97e5dab8ebd4080b4f42a", "5d592c5c0464c800deeabbef436323571b3c0e57fcd12752895815e6c51e614ed6c32189f5cb5693327fb7c165cf9fa505ab", "7f56a4f147563724c01f3517a998d513943490b7d1186755bcfe559ab21095ea38151024166fe1c4bd79f0475f93d8a19eef"], ["ac298bd533c5eb30b7088f1c53ca7f3cd15bdcb831b0a658b5f533d9f666bf6a7af3237babe99cfb329ee06d52764f22476f", "c81b46d418692b6f9fd7376aa9ee3af8877c77f536696b40ac8e61dd928d30250a404ebf8617789527affeb5fa1524dc1357", "3ef1b08be0753d746743e97e88bd3870731a83e55b4004ec40e59bc12682ca23b03f87a11933600f01e257dde681e3c5262f"]]	[[5003753.23, 73570529.65, 14282493.86], [87800482.55, 8557989.22, 22459017.96], [30093982.75, 53169540.20, 90906195.89]]	[[-6, 40, -38], [-47, 89, -57], [24, -117, 32]]	[[15374, 26151, -20338], [15006, 32692, 7665], [12425, -30752, 15615]]	[[1796619104, 1023314741, -376282550], [156272155, 335095467, 15466270], [-252662041, -986864039, 2010198139]]	[[3055378941426239704, 5426937350987735345, 1844608114473848829], [-8415676173577023370, -6300775517173435538, -4056129460641262219], [-6450128590773189755, 5495688587826492233, 8080965061759130501]]	[[5.608788e+37, 3.1603907e+38, 8.4842156e+37], [1.9740397e+38, 2.4502789e+37, 3.0691853e+38], [1.8005423e+37, 5.2711493e+37, 2.0173921e+38]]	[[1.4813014163717616e+308, 1.2400728092921882e+308, 1.1860913398849248e+308], [1.0249154562992275e+308, 1.056096413122483e+308, 8.024037549734095e+307], [1.645853469299623e+307, 2.9017044792708546e+307, 1.253587607736931e+308]]	[["2023-10-26", "2023-10-26", "2023-10-26"], ["2023-10-26", "2023-10-26", "2023-10-26"], ["2023-10-26", "2023-10-26", "2023-10-26"]]	[["2023-10-26 03:16:52.984000", "2023-10-26 03:16:52.984000", "2023-10-26 03:16:52.984000"], ["2023-10-26 03:16:52.984000", "2023-10-26 03:16:52.984000", "2023-10-26 03:16:52.984000"], ["2023-10-26 03:16:52.984000", "2023-10-26 03:16:52.984000", "2023-10-26 03:16:52.984000"]]	[{1:1}, {1:0}, {0:1, 1:1}]	[{0:"9", 1:"8"}, {0:"2", 1:"e"}, {1:"4"}]	[{0:"91", 1:"10"}, {0:"bf", 1:"6b"}, {1:"be"}]	[{1:"1f071fcffde93ecbb256e1a78ee2482abbd35ffbf92266fe1d1a090f1d36a62b341f8b3b7b66a8e282a483c213db1f529261"}, {0:"9b931b1329fd10d8764749709584ab69229b622571f3ad7bc1f038d9ab8b253cf63c8b57f01de5c212ba2bd3e32d5d312c22"}, {0:"7ebf35c4f1581d34234ac5fe44edcf466fbc9be9a4511bb550e55801905d59cb17e2548d542928594ea68d7e3d6a9c6fe014", 1:"a2ba28a58b5e0b83b285b5b55ed6bac4a011e45fb7f9be164bc25943003b5a77058c8af819ff06bc2411c28b4f724dcfbc1b"}]	[{0:19839754.05, 1:89600377.82}, {0:71402873.04, 1:43020241.84}, {0:33521290.44}]	[{0:68, 1:-18}, {1:-15}, {0:110, 1:43}]	[{0:22055, 1:15480}, {0:756}, {0:32569, 1:30857}]	[{0:-1097067759, 1:1537489392}, {0:1186620471}, {0:-515408435, 1:-1591697674}]	[{0:-252631566868171671, 1:-810351861392731413}, {1:-2777491830756081712}, {0:7329068750388563594, 1:-1211236243710043924}]	[{0:2.5761776e+38, 1:4.340879e+37}, {0:2.2513081e+38}, {0:3.9311245e+37, 1:2.7582459e+38}]	[{1:7.199144324527028e+307}, {0:5.696291020334654e+307}, {0:1.3739388307135712e+308, 1:1.5518702176738732e+308}]	[{0:"2023-10-26", 1:"2023-10-26"}, {1:"2023-10-26"}, {0:"2023-10-26", 1:"2023-10-26"}]	[{0:"2023-10-26 03:16:52.984000", 1:"2023-10-26 03:16:52.984000"}, {0:"2023-10-26 03:16:52.984000", 1:"2023-10-26 03:16:52.984000"}, {0:"2023-10-26 03:16:52.984000", 1:"2023-10-26 03:16:52.984000"}]	[{"d":1, "e":1, "a":0}, {"8":0, "d":1, "c":1}, {"9":0, "e":1, "d":1}]	[{"d":"d", "b":"d", "f":"9"}, {"d":"2", "0":"d", "6":"f"}, {"f":"9", "4":"0", "a":"b"}]	[{"f":"bd", "3":"82", "0":"2b"}, {"2":"8c", "7":"78", "4":"42"}, {"1":"45", "c":"e0", "d":"45"}]	[{"d":"559469bc832f2c592527d56f3269f963025706c3042554dc85e6469b71d06c5e68ae7893c0ca453ad4ee2f30b2412dc7a139", "7":"c0a84d74cce1ede358b7a8d4dc58c3c49df9289e77fc2964504700d9dea63344b9ae70bf798472f229042c3b71faaaf98303", "2":"c888ad2986f919a14e127de52f86ca06d88678a510e46e7e1b7a4af77ed08c2ea094c893e8c5f6663ceab1258bff655656ba"}, {"2":"5aaa2ac95ec3e5c05e8216d9d38dce81908f61b827dfecfcf2ea8738f375162fe059c76aacce650cf7216e4777c226092e69", "2":"b4a98a580f42ab1519b6957c2056bb5fa3ee574742e8d9aa3f451f68f8770a7b74001942f4151f6567cfbdfc3e902242d393", "7":"ccc907e9c6fdec74712ac8c763fc40bdf1a8c743ecedb48f5748008db034bfecfb2f3655d3df99faa950109b9a49ec9e1011"}, {"2":"c31a804695d5ebb66b8496c9b38528b9bf37f7db8d1acf09ccde8fa83b00ed0b036d533e36e82a3552395dce378721a800d1", "e":"1724fc9a583027d7f11cd3efeca5ab437769d0267a939e0fd04936f3d2712502f1b4350090c730bf6b476ee8c6c14da31f60", "f":"57607195e62b78a3b0c578661c59b85f3867dc7e91c72cb142919b4fe17d77315825d41cd2d6a75887fcda100b8038bf4621"}]	[{"b":76553637.24, "b":47833250.44, "c":84313986.40}, {"9":59618808.73, "f":5129841.99, "2":28334633.75}, {"4":5138048.17, "e":42396061.69, "e":783279.24}]	[{"e":42, "d":-41, "2":63}, {"0":-114, "a":-106, "4":-70}, {"7":117, "8":62, "3":75}]	[{"3":-14362, "4":5735, "2":6901}, {"4":-28862, "a":5282, "9":24752}, {"c":-16971, "1":28151, "e":28833}]	[{"7":-384669457, "6":133341996, "7":-1788558557}, {"7":-1082793097, "5":-1348390022, "7":39540163}, {"4":952566277, "5":299721249, "9":-130464146}]	[{"c":-645918960611412183, "9":8614726590096659219, "a":-4811786611451268605}, {"9":-4803426571288018750, "0":7937988270821110422, "3":-8552779640056569770}, {"d":-6011898865730652301, "8":7094587763368443709, "c":-6346167729377060318}]	[{"f":2.7666876e+38, "3":3.3488213e+38, "3":3.2617023e+38}, {"b":1.0549502e+38, "d":7.311188e+37, "e":3.1130286e+38}, {"5":1.4492938e+38, "e":8.918909e+37, "a":2.7725249e+38}]	[{"5":3.666944771034206e+307, "f":3.6333552809806644e+306, "4":1.0680168131382127e+308}, {"c":9.50457206759392e+306, "c":7.92261880030454e+307, "5":1.0455456973639938e+308}, {"2":1.1054050778372218e+307, "a":9.090763858301942e+307, "7":1.0811585934507172e+308}]	[{"d":"2023-10-26", "e":"2023-10-26", "a":"2023-10-26"}, {"9":"2023-10-26", "0":"2023-10-26", "c":"2023-10-26"}, {"4":"2023-10-26", "7":"2023-10-26", "c":"2023-10-26"}]	[{"3":"2023-10-26 03:16:52.984000", "f":"2023-10-26 03:16:52.984000", "b":"2023-10-26 03:16:52.984000"}, {"3":"2023-10-26 03:16:52.985000", "6":"2023-10-26 03:16:52.984000", "4":"2023-10-26 03:16:52.984000"}, {"b":"2023-10-26 03:16:52.985000", "9":"2023-10-26 03:16:52.985000", "0":"2023-10-26 03:16:52.985000"}]	[{"aa":0, "c9":0, "ad":1}, {"8d":1, "41":1, "ab":0}, {"c8":1, "b1":0, "1a":1}]	[{"0f":"e", "77":"5", "6d":"8"}, {"dc":"f", "f6":"4", "5a":"1"}, {"88":"d", "1f":"f", "01":"2"}]	[{"4a":"99", "0e":"7f", "5b":"05"}, {"71":"df", "e6":"0d", "72":"7c"}, {"6b":"f2", "72":"6f", "70":"97"}]	[{"f8":"fdfae432c68a3eeb5f5bae4a2040ff8f162aa2c71ccdef6c0bca7578961ebbe1cf1399307e55e005e4a52e64b423d9dccde9", "81":"e9c8352dff50b7ce08e1a6d8f5c41a21865a1028d2a7c0a52f972c1678e3504958e1cf0a5b9ac8b6b22ea904e8ca0deaee2a", "a2":"f0865b0086dd2c29bfc2b34e3a4d2873ad8fc6d2d27fff1a20c28baa42a175b6abc237e72b16196a10d864cdcd9dded342a1"}, {"16":"d8ced66fbfd05b848088a1cf253b3d775a05d6c1df4860c46609de574f2404b52ae10e9b2c5ced7a6a689294cacc597cf4fb", "76":"9b40006c890b57fdd32ef53719c93eb4163ad1b2255966620fe46e7c51f109ca56188c99b6ac4c6b57544d06376139fed4e7", "da":"750091647e21cf3f1a9ce1c8291e5fc2b0284b78c7bc02cf719ce19c50229b72aa59a0f6e2a0828bbc2f0aa54f7cb3ce8a90"}, {"18":"ae94afbb7cb711c1b5375842000f16bd812d88acadb74d4e63bcb3c1f44d6e8b57faf214641ac22f5feaab5ea68b08defe54", "c9":"7b92d9de5ac9ed64fab32fad0e92af79deddec23c97bb1fe6380761576bf28ffe34fe3405a8a4b74b059c54f10a9a36c4f2c", "10":"3d63fbebd3c5bfa50212aca5aeec7d9d2b5788ea8b09a5d7873bd3dd29d07e58d12a85673f3b70adc0e23126dd5b30b85841"}]	[{"f5":92472285.08, "c9":16515468.97, "90":80306144.06}, {"cb":77785434.65, "fc":51830427.17, "95":93907933.88}, {"f8":25371963.60, "fc":64226710.19, "18":5326712.20}]	[{"08":-112, "10":-120, "1d":-6}, {"8a":1, "fa":-14, "90":93}, {"a9":-31, "43":-39, "a1":-43}]	[{"b0":6897, "dc":-15993, "2d":-16602}, {"95":-6178, "8e":24863, "3e":-13460}, {"5c":-22981, "a4":-23773, "9d":-23930}]	[{"d4":-1321130084, "e6":-1500927045, "2b":-455607275}, {"9a":-1289540032, "70":-1996773624, "3e":256067347}, {"9a":1381550835, "a3":-1116577372, "76":2022998110}]	[{"fb":-608184207064354145, "ff":3494598411731077659, "5f":-8222350788773010574}, {"91":-1737097448609502969, "ab":8510899815098152879, "e2":-1268475157311976784}, {"06":2357523518723177529, "ed":8086699582886906415, "b5":8314635499142654791}]	[{"3c":3.1540135e+38, "f9":2.3966902e+38, "81":1.3954178e+38}, {"df":1.4702904e+38, "f0":1.9401873e+37, "bc":2.1363367e+38}, {"3b":1.6773348e+38, "9a":2.0904364e+38, "e2":5.757035e+37}]	[{"2c":1.1205062274338892e+308, "32":1.2369010198246723e+307, "5a":1.7803785231148744e+308}, {"05":3.5857871839466424e+307, "08":1.042864575807794e+306, "87":8.348183423116359e+307}, {"b5":7.405605999049156e+306, "20":1.0426197328991832e+308, "e4":4.1637764990124554e+307}]	[{"5c":"2023-10-26", "3e":"2023-10-26", "f7":"2023-10-26"}, {"c8":"2023-10-26", "cf":"2023-10-26", "c5":"2023-10-26"}, {"ca":"2023-10-26", "bd":"2023-10-26", "3a":"2023-10-26"}]	[{"f2":"2023-10-26 03:16:52.985000", "99":"2023-10-26 03:16:52.985000", "b4":"2023-10-26 03:16:52.985000"}, {"cd":"2023-10-26 03:16:52.985000", "f9":"2023-10-26 03:16:52.985000", "0d":"2023-10-26 03:16:52.985000"}, {"4e":"2023-10-26 03:16:52.985000", "b4":"2023-10-26 03:16:52.985000", "39":"2023-10-26 03:16:52.985000"}]	[{"6d03b441521bc76d32675ded7fc3651f0b22c22204fd80daea3f45720c4866dd9938601920b484db882ba24c80228daadc4a":1, "76ac0fcfe09c559ce5dd3bd033afc13036f910129dc3ef202d4cfa5dce6dcfcd04d59cb039e6cd3a3df267bc75020b88edfa":0, "f6f4fddff33988497c7bceb9ea7f0b146e50854ba58e992ffff68f1281cf5cd9972d89b1aa6d2dddcdc8dcf98217bee44d13":1}, {"f5b45e7e6fc27230fa705a704f7f8ccc388b00b64bda787a294e87a3ed6b862f215f10b2c8f6700eedd0e044d2918e734591":0, "c6e5164ee9fe855c18635a516c8a6b493878e32ee768416d18a2258f3a749f0e150ee418f2889b29bc957f901706055361e7":0, "7e70383a81d19661ff3b7fa2d8b9d22e9a96908ef2aa3749e1cf9c6243501ce5ca5054901d231096fc2e0ac5e056582a8a8d":1}, {"a7fe72d0f5f86b832b7c73e6e0de3bb7f74be747a3bf818e1262809c0ae66ee11eda875f1c2451666e4031ef1256be323b06":0, "7ebae5cda9655ca047ec3c98ecad9ec142f3d1bb0d46c074f604a0b8a8ba9d38f8dee45ef40c1c31b69a0e56a307ac99904b":1, "8e23b51f8e04c7e11139bf06ba550ba7ceef467790ea2c353851aeb2f878a9928999627f876b263bed4f3a0aa066e914a033":1}]	[{"b4a72d1e491856c0669a5e86aa184e5e431e8ab12fc8e035c3090ca987503b6a84fb4b49c58c5f9185a21327277ae5ece5fa":"2", "3df4b400ecfb9c892702a35c0d901ed858735fc5088c13ecc6b1bf3a408af58b16c213445c99933c8ae43de160a78f553536":"b", "5b9a6483c347f7d29c162f5deac494e51b93f0f8636d467b0c6ef5805a589710d929ca714f7f3abb6f48186025362786299b":"9"}, {"84d703145ce6b8c1b00c5b3d8c4dfd30435613b8c8f4de8f43925e7eb83f47aefb450b7cd094ef7b99ab5afad577c7b4b700":"a", "dcfd108ef6bc3b98f8023f2ce995bd332e91bb832f7bdbeb6cf086a743f21a26c3c0e9de365788df2ab4902e952fd89de70a":"0", "c911e46a1e05fee5702c4122f73c1c3670dd001db64079c9a94e1e0226210553185808d3f2daf46f08ad65b4f85eb2d253f3":"1"}, {"929a056c34bc030f284344886cab10ac50cd17d8f056a1354f8e6c2b16c26a845deda5138f6e9fec449d163a952d3731ac44":"a", "a56c9753cd88d4521788569e0d52a52e4f6133e0c92fccf61972d816eb5a648c3deb38e34742203e86a5735aaf400dde907d":"6", "44174b85018833bd5f669b2c686e414d576f100a62e167f6c889f473467a6d63532de3e6c437d925ff8dd959ad548f843006":"4"}]	[{"ac47c92898f102464c5a7f09583bc653b14f884d873aedb3b77fa64aeefaa624047f5e50c9cde50a0883f24ceeef72eafa97":"03", "1d381e353bc03696febdc7630aa1751ad115de4591c3a6ac655d777bb847a9c1ce76264e2786c5faf7329d52f9f5bfe1694b":"88", "5dabf208daa7f524b18dcadab776a52d37b0a9353b69649dbca9639e568b4a9cf930e7edbcecd50a2dc73e31cf936894780f":"72"}, {"3636bddd9a41998957997bfc9fdf2e52105b2b91b8ca7a2306a5bdaec5b02b052ea07ae1b4a84b3c22efa4a51aa11a625b2b":"a2", "9f6bdb1606975a934fc58fc9cacd08f927f22e6bee36c070d03be7ea5984d9088e15a474c3ca81993f965a93b7960063cd3a":"5a", "c33748b749866054dcec141c7faa72f9748b34e6e872dc0e46a9fae92cf3c7c570e29b015e38ad6b53d38aff2c67bd521243":"91"}, {"bb34390928ee05060e7889e0831acd63694a908de1782765f3c47342f52ecb5a848c91d48d858ab7e2a122227919aca736bf":"76", "46d565b1987a7471986a42898d55d0fa15e693e4b89aa137c75fc9ef1d7c827bdd662f04dc2d9ee46470d83def62dfc52996":"0f", "77a33d59616f59f025aea7b78dd7641e7edc3a2ff3326ec4a304c80f283d775516579a3ddf5e48556e0823ec2c0f0477546d":"2a"}]	[{"9ca01a116e2963222055d39aaeb307fa0ca35fa2f9325145d732ea3d0d0f91ab97a2b40ca123306ccc28e310ee7dfc39d45d":"aefe29a082d3e3beb997ca4c64704b024f4b1dee0d651715ba13304c8ea5fe12414eb4b644ecab0b936e301a2da7136b0c65", "46af35f99d5876a9ab465f781ba7cba2921a3dfc25f379864f71d9c89551bebd66f0b767cff1a4d6c78735cdfe584cf55b91":"a4ce8fb2af3ce081120976961368f83611c1c1ac67d6aa7905d5bb194650a220a602d9f530303b84ee3d59e26da0ea373b6b", "430ba28215445e0af10df01fdb242b22a78a10e6280fee944e96b0463848759468410e4c9129a013ecc4c236f098e9a93bfc":"bab6d6afbb64b28234875d90cd2cfca05f00bd3d95124e4cc63db5c6533f20e6d7fa0a1cefbf5ac203831942b5a99dad4725"}, {"5ed5dd2c2b6b67b7a0bfb3d81a41b53ceb5114c578aa3ec3e61fca7816ad737fc86c67d3dc8d658055fae93ed8dbe61c29e5":"6e54af5581e950237c33fb57ba5ed118caddbedd70e834abe75b525fe1531942be0d970450b753a5308175bd037b7f586731", "d943ac739b0df976db367f76e115a30d8ddd3da1492c25338d752050440f08a46f026e79e157c5180cf0d88f2d2a05a477dd":"c5eb8f1756dd3135171751cbd6bac71ec1d1f752e8a8d70da209baabadfac037c84df039092dcef8302bac2d10f146ca2ade", "84b547a3ef71f8c253d0a587d0c2f65518e36cd5454aa5a490fc25efc047bd43f20f81f18bf42702d38b68afc90ffb56a3e3":"660c90e47575b4a2b487c97595d112db56f35ea1d2ca3bf4e086ca5e4fd36bb3beeb9f9b8eec57ed7a07966a1e21d5a23d2a"}, {"f19fec4d95f88df5d2fca890ea554b9fe93bf6ca2987e2bcb0b143eabb04b5706872942176f0d40121e4f18624e3d11e1566":"aee8be5f9e4598931f653765c92dbd4c0986dec6800e10c49f59985a5a8b1e1d658af7719f339cd4f9e280807d72a1f90d31", "7bf57ac0a7c2b23fa5cba4b5fbedca20bb4212e5042de2ae8241ed4ae6d9c23b3085be41bd461f4ed1ecdedd1f5fee2f4844":"086750dd1d29c4a7f0880037eff59a90752a08f0e92f1d097aa828716753a6a77f9a9e5776ae6c90301e9167ce6faf1a01d4", "56e4209346c6b6155319909eae1cbb450ea903331334c1b988c69cfbd863d83886c083f1941a38e12188068f72b01738e6a9":"10aa69e4272f32a12b26863a2159666b3cc97071abfe8bd5598eee36391665ac42b82db7dfe695f9ce22c13faff6e96ed800"}]	[{"133b6956d9af07ab12202545424bf0ac48b218098ad6911ed3715ba27d115f481ea29c0edd70508a7581e0eb9d3eb1661523":36677207.77, "13de0989cd0be6715f6c7f891769907b634f873e8a3ab5aecced6b5dccf742853923f3f23ad5f6705a24f7180edd9abfe504":8510328.74, "35981245f8f07c4ce893fab6bde28c9ff0f3058d9c70d4b860b05c4a514496359d9967e70c6c68f9c045f9dd21baa259318a":35365816.89}, {"485a4e37c73c36dba9ef5fa6e9c0f17e4180eaaabcc6e89cc74b46481583323f03f08c440f824067bb68c51019257b0522f6":76755283.04, "ec7d2877b667827bc7a2cf4f778e0aec60572c630c62c21f4a91c266ec4d3d6063d5a0a89f1f5d612af094aaf950a6263a3d":9825650.93, "978cf4596747d30d790643664aa8d5233fb62245021406ed82edcb597bbaa3664111cacaa64dbaefd7d96ed369d08d94fbeb":72149432.40}, {"02b225bb41bf5bcb89e0bb6e81a61c3a001bb78c06686eb85e0144608965219c9d395c380ad9ad673f71a0bb9c7908d0fe05":9901010.39, "3d50822b5182b42dce839aedc4e6f41989b59ee3e2a1d546a4a2aea8557308b33d2382272879d681576eb948ddbefd429d57":40618693.07, "d37716a8b12985c581460afeb0b29b405e351495201cb9002582ae98ba847ca9d435391eba692084866861e1f3d76ff86cb0":39336421.28}]	[{"bbfe359184a41bde7431e5a617571c482987295f4ad421edcd74a421718b685eecc4b26d078ce747047153c49027dac0d12c":75, "b3e557849463b93ffa5b93f3f308aee04bbe73cb364cbc9af664ee50494eb21c6a882d09f5eee07745e027e9734187b1608e":-112, "788c338573aa310f403157ad9629ac8404c26daf341a9f654a536c01838da685f5924affdefbdec0b43fce3d6c529e398a3b":-59}, {"f9c1330c1b70165cb01496f46cffd0244b463ce6a97d65468c3ab58fd36dbfc7b6a8812e5f1ccaea82c73cc48e5ccdf8ac42":-118, "06eae6706b5484f85ae9cb6cebb1d4514d9e7c50cd1407be8c662993e5da8d42389152d3951794f5edee87bfe798ea9812a9":-3, "3694461166318ac4b96822967c61169a2bfcb41de9531cd4ee4bad5ad43bbe5e173eca791f1b87528010117b22cfc2938aff":-124}, {"978df7c86b4e373f9b8e97e60f4efcc66c99b8e232243dcaac7d203117a3c3d280e86e940016996c794be66291149aad4859":-64, "34f3638a75090afc45b32b944494c78a5852c4eb9d03b74fdd630d254b94bbe6afffc210cea635df3a513c67f27294a427e8":-82, "90c9153100168ea7a9fbd379359ed0ddb7d4cdca417bc6c93acb7b6e39ec0c863e30871b0f8f9c3db11eefcc4ca942209c1a":20}]	[{"23d7f18d422807cfe00b4b1d4218eef270e51f193ac987b895a77f2e2031cf1d3e7a121cc9773f70b56ff76c1d8bfbb8fa4c":12564, "22af05473c8307eab37b2d09c01e78aab3945871469d7cebdea9d09782eb939c8f72413b01fdd69275a7932962dce38acca0":24000, "ccdbfc81b15084a0939d38c45a90fb049eb13349c6a97788a66b2e2c7e2bdab05495962f6288c1f084983c3003fd6fd80454":-28955}, {"1cbca3d307bf65d4d1e39273b82997d9ff0a42633b88fe13955b3dec0e271858516080bfd116e41b52dce0fc00f30d58200d":-3339, "f3435eee4b49a91751aafabc0db198f1c2cb4531c9e737b1ae2e2ab5d6f42e2afd8ebaaf595e97d59e5324a2be21ffed14a3":28437, "c88d6eb295c3739ab16bcb7806c73a3c3e9ce2d75c11fe92fbb48505e69e5f1217e69db4a3bf01fce44b66d4cd48248346fe":18442}, {"432f3b73009c40f07fb8fd27c23dce03bfe1055fc8baa9c57801c54b0efe6cb20ab41f523d03201b5a7ddecff8e095cabc70":-31746, "b212c532e8123fea1e64891c579bb321369edf69b56455a71d31e2495245fce2e1bfdacf88387427e5f3a1eee3bf5aceed47":-1259, "61e5fd0d988bc9ac7871a7f34326fcd7e6525223b75e392eb5537245ce40ac4234283714cf5de267951be98550d0e702b8b0":12002}]	[{"4b274d8f56edf0e03e4bc8375f79b1708c98cbbf4569a04004bf7de4f25483253375a500a29c4a5addd24019240413997145":-1192648923, "db650e630a12d14b43fc178f7ad8282ff5d0d7ab26b141ae51178a0abc62911ef4dba0cd56142686e7ecf5b77a27ad0ca930":1124565633, "72da24f6b7fe895bb84898aaf316d02998dce5e3209dae930c5800702baa464b34a76bd42765d7bc84d39c6a95f9c77d1b79":-573762681}, {"a6657202fdd16b0f75fd7774d3b2243a662defc3336c4072747b1639ab5e3358b0f548b3d0bfdd4ad500261287fa2bea40b9":697772338, "2b35c3dfd764f62e5950497bddc130fb8a0335961fb7769ef6c5fadd2f7de57e3f925a66847d6ec08b01dab1ea387047fea6":2093310859, "36c2c5dfd0b55d98bfacb85e1ee5985bf402875176d1b05a990c3a15300623f1de259660f817fd11cbc445b22f9467dfffa3":-1421925028}, {"a299e18577ddc5de6a86b1b8a79d8a1555fd2f1c4dddb0973db15f13a9ffcd8cd63f4229ef98c1424f76ca922590fddc3d25":-892535469, "e42c8354b29b0e7010e7aba349f817685cdee8fd80de3ca232284d84b6dcd0236a51574a05cc63e0cf52d61d0205dc4e6b20":1688349931, "8fe5176a80280577dbc88b463294542085d71d521e5932662b6219fd735e53d7485867842ea9dbb7a2830b04f1685a5f4d15":1267219382}]	[{"70014ab48570d90b12f928a7920d23db7cb6f96ac4a21aa37931205a5c1962a11ec178d8a4adf6a2bbf82adf46c3654a15e2":-5950742631241271482, "19640c415a78275accb41fb91bd93a091ff7112fb102fe9f3a6ca7bc7e93f930386dc36311af4345dbcdc2d0f60161f86c77":3490627268062090718, "1ab000adb5e8033957d2107a62fa0cf623c69232b38283f639d1f08c936f1989369d9a9ab91cb15a607a10e7fb83c816cf0c":5960361510265206703}, {"64332073565c17b7071b9a037a62d978a2c6a12d254af65c9185af1d337fa7539485f996eaf7362b054ccff7e2f1b27f62ae":7552640314076087414, "67ef7d4c905042f1bae532c2dc3c5d765d4032e3e6130e1aae53e4a2997b4b94c75de7efe98b01ac8945ca49098a92df0483":-4586467585305815660, "e1602d2edec610e0a9cf158391bdf73a6de61c3aba1d32f2d0bc47e091e1ce60526c4ce155f4c91a7bec5505720922791c56":-1556604219310661752}, {"a58ba24c559ec9162f801dbe8cb6c86aacca03635da2f24d5294ad2ebe19f7c1441a6b4de72410854607ac1db34c504018e0":8185816873556859569, "f8b4a829bbdf3698aa4b9b3ebd38dbb433d1e562b0f026ac7a93943008b6e01c225ea6c22fa1e3a8142b38e789c69e98409e":2354512397123676805, "38ab811189b5788ccad597c185d9b4309027a21c72ab5c452e5131a6601d1542376510caec0c7b5260cac8d6c9c037f4cb6d":4194647391995991705}]	[{"bbe9db6012010ecd236bd8a9ef3cf60a323b24cd72a0c649847552906d926bf7417628fd805d650e6de7cddf8c078ec045a2":1.0888399e+38, "dfbc82812524edb0b6abd410ae7e8742cf3fe96fb97f51cb8a706ba13eae07cfed79d580c1518262211f2c3c6a7d555992e0":3.3393484e+37, "14fde2d3bb668ea307defb6ec50f49ed42c72369b1a22dd8131c849a39442e40f40a62a022e1ec3909c244c6db3995c620d8":9.542224e+36}, {"e7768685c538e23b47cb8c391a2cc8b381df550c732e4491a17f3ff7ea05675fa56037b6fe286b1f526bc26ef464b4de5edf":1.842277e+38, "29c6db57fed052c453edbfcc178465dbe2d3d304f8195ef7def5eb49fa3ae813a4f937e149ce25ba57063f7d062c7422edf0":2.2982194e+37, "7f7dcded8a9c7155aef8041723304566724f845a3db1debe31e07a1f907a570a8f7c0776b4faeee6aa197c1cda1d3033f0dd":9.429395e+36}, {"661aa7d96730476eb75afee490e394711c10be3d78cc55506f052339d148508619401725e0b2495fae7d3ab348d9289a66bb":2.1364529e+38, "d4aabe08b69a3bccbf620959dea42901cfcc7a7c3dce2bcb13cbe5f2d9a28be4fc6ecbfbb1bf0f9a405436d7f902e4bcd98c":2.662585e+36, "b8ba54e6d040134f6ff790dd72544eebaf351f2812a0055c20b9a4c344468ce8f1434fa0065939a2d018d23b0b1bb66c1a17":1.8345417e+38}]	[{"70c20873c0602262c98dce606c627f92936ab4babd37d133b04af21afde8f72db4cf32e9ab4f4a0052f005ea54f09e6444fc":1.7900983236166278e+308, "edcb799889a4b17eaa395b44486f1c33f86e3da09f83a14780a8e018aea0895a668eb36a81dd3d9dc0c5218209bcd3650c2f":1.126897368703759e+308, "9ddae0ea0a1cc65878f039e7026e0cf49c9546c597710efd1ef0ef3b59dc0daac2de9d1d84d0cc784714e73e6df06221cc19":1.0480588805783237e+308}, {"42bbbed63b57fc198c0af258b4ed4b8c9bc08123cf26856a4b7a1266f13aabc3ccec6f3aed13c4b2a21a82e53a41b0151e44":9.177554645504904e+307, "ee5c0b4da9bcb4b044377e7e14089d0a186056153cb5216dc7984b0632538c6c366f77c54704758cf4a05bfa3fdac3d4a9eb":1.0735122940557393e+308, "81a7a4ff4546f65465fee10070739448285a4c7699993a2870c92c5fc9ac117b338f6dcfe012c562520e2cc6e9bea44aa895":1.2517900361791551e+308}, {"7e3099d104abb78e4906ef47da58ee7f25495e8fa38136deaf2356212a3f8f899dbad73c3647af8a226489dff38d5e06df38":1.3691151822793197e+308, "28c751cf443f04afde6dfcc1eac0fccc2b857cfa30fe0067959acb285eda90312a29f7edf4678e4e3569bc62edf441317cb5":3.2132892998460623e+307, "f82bcf82da89d51287ac352a950da239c5bd4e808b7e80d13b47ce14378d6dce5271acb718e8d2688513dd092fb630075a58":5.038956728600937e+306}]	[{"5be9708dc59aecbe9dd4effb81d18bd7c338619ba306b502b7f9f59dde1f4b47e7ef1bb12e154099abc929efc4ad911b33d3":"2023-10-26", "64504e6d1241985e9e8379b38a767e05202ec583f98569fb895e4599ee04eacaa50383906cd7ab4e5c43cb47f15cead43bb5":"2023-10-26", "ac53cba478401094d41a89788ea8b4df8483dd1101db9daee2d26b25ce90fde88d99453820551effbd8f75c24bc6817d5134":"2023-10-26"}, {"f2e10c2efe9cc5cef099660589d1c1b7f8b60366d520141d719f6a3c6310d0f9081ad45bac87f4c8df706cc8759b133898e5":"2023-10-26", "f39477b56d32f9558eada981e826205f5573247c27916a9cef285c82a6b5522f76032b3c31f70cc61134c0bbe67b9b45d32c":"2023-10-26", "af605a723664086a66f1db78c25e1eef1816b724968d6ddac4ca1dbd4f2f6bcdf60997c840c96ac2b5a6ed2d90515b795643":"2023-10-26"}, {"1de84d8a6cd7df5c868f99f6cd6c9eef3e36a9a017d69ef842ee7da5a85a47cd66e0cc100cda3600eb18f19b0ab1e8e9b3b2":"2023-10-26", "efe16a69969ed6bb3984dca0fcb7ac87aa5a17107e635bb112fd0755f09c6a49b5037f5b153f928877b1169f8b8f5d44c610":"2023-10-26", "1901737e6e84e04bd96dd4092ffe7b8ca0eda3fb6fd065400e33c90043c065a84c8b3613a9bbee973b89f1502b5d146e82ca":"2023-10-26"}]	[{"237b3af9f2670d10f23322cabce5e9665dacefe254e3086777eb963ad867836a0cd49ee7eba49cbefeae32f6c8296c99f334":"2023-10-26 03:16:52.986000", "e054ff38281ac463b01b03ad40c804b635d8fab910a3c4b7bd110783ec1324894ed6948623d3c23bacacbcc4ad55d54001e1":"2023-10-26 03:16:52.986000", "4dce633458f86ae31bf4f23f73d94c45ef7d937b106670c06008ce0d6cb3367e2d129b158abeb2c8b0f26f65f02fb00a16f7":"2023-10-26 03:16:52.986000"}, {"aade785e783bf9494bb551f6f7c1fe17abf543cca900f896eb057a6eda9148cd5f81f64e3a4fa9929105802fc1d96a2df335":"2023-10-26 03:16:52.986000", "acfad66e63d67104c6bd5a00d6bea693848a0912639a5b1987ba8605a1be97a7d6116b01f5ed4cf00a57fb18e3f9b064c201":"2023-10-26 03:16:52.986000", "f0e6f8b1b86ce19222a3eb7148bd3bac67fcb25c4a6bbd3d211e1665e8f4b359a69e7a126aa2cf2652551d5dc03a2ba2ecb5":"2023-10-26 03:16:52.986000"}, {"aac1382a9247e1b64cbef663536564f449bf399be4e90740085c94f5cdbc9d8e6a50189ee86f22278a86aa07130ba584b3d4":"2023-10-26 03:16:52.986000", "a8fd5933e9cfa1b478a788731648ef82e6e5931f49216375407b975993510288bec28a988989aa8469e07795b1446141c675":"2023-10-26 03:16:52.986000", "413fd67f896584e2ec84349709e174ae04ba155f989587fd29f030144638601f8c3de22160d6ab951e582c024c0321f3c5a3":"2023-10-26 03:16:52.986000"}]	[{8520103.02:1, 33150200.74:1, 64649148.92:0}, {51631274.89:1, 7665359.29:1, 74154921.82:0}, {71907927.47:0, 56084186.53:1, 31494018.64:1}]	[{5336148.97:"5", 26725167.11:"d", 13600113.74:"5"}, {38997940.87:"6", 19527932.05:"1", 31385141.45:"f"}, {66323600.16:"b", 41248379.50:"4", 72679263.13:"7"}]	[{48443164.77:"56", 82400542.84:"02", 98049726.33:"b7"}, {80657142.08:"9d", 48405401.90:"98", 46364070.08:"be"}, {33702681.98:"3d", 47690856.30:"17", 24479400.81:"30"}]	[{14042759.11:"4e683834ac5b6d20825355812da724a2cb4f7b98a80883c213c2e3f9fb83cd503a38d968bbe6fe30dcc4acdab8a4e82c94c0", 63264142.85:"baedd01ce4a34217f33085d68fc70b1c539921ec2df527bcbaeba0e961740dc38023bd068c21b63ab2d3d255cca7dfd92ee1", 46070761.42:"5ebd78416ee5ebdca2c53f5b67d8f918b18ccf60d18992c99ac127e1ffef5bcdb1a67f5bfee00c881efb453300c72c0460b1"}, {63380351.74:"9aa41c068c6bd13ec3968e8d3f74b5a47092f0fb8372af70c72e6184400663c24101ee33d36165cd36624378e3946b5f34dd", 82133511.80:"22c0e53ebccd359752ddcefb53cd9aa9ebca57eda964886a69851165a3f4b97438662a6b346a20c47385ce2c8a6d26cf4e47", 33791888.61:"45391355e0021e37f4bfe16d5c67deba662770bfe733812da444d636e57236e73592eb5db165a9f260291e057e0efc5f86c1"}, {14967538.75:"638a314ee816921d09a89807768c43691374baac6b5bce936f15468b03e0d103a6eb459a10ee0be08c92e3e719dbf149a059", 59427645.95:"9546c625ae7e16470a7ffe56d45287350df80353cb78e9fe54685f397ed44e214f1d908552ee64f0cfc81fe7621d4ec36d93", 2899872.81:"d20eb7dc287c90b656ba6067e8e814b70467173008dd876011cb38cdaf6613871729efa92bd314f1a1581328c85634deafe5"}]	[{9313831.01:1735726.03, 81403060.05:99773767.76, 43635744.84:95901731.93}, {87353883.54:65663935.89, 51135625.07:612161.18, 19739848.64:7527457.45}, {99425828.71:98097524.38, 10470185.37:29911594.37, 44741404.60:77062364.29}]	[{3595691.52:-70, 66940420.41:-53, 13993955.43:-70}, {47204475.14:-39, 80932815.88:14, 30404573.27:-70}, {27276469.64:-55, 65101288.88:-103, 30219469.92:82}]	[{29652008.79:12324, 59112817.73:-5050, 31518931.55:-5605}, {44764201.08:-7075, 34191039.74:25558, 98502040.88:-18485}, {9856886.42:-26913, 5446933.12:17864, 6653154.84:25006}]	[{92620322.48:305170953, 64372127.91:-2085751959, 48308396.22:97356289}, {63190718.36:1553515184, 97015821.73:1656123681, 49939729.71:-2023510759}, {46726131.96:-1129044688, 12865964.57:-59243749, 14103690.56:-340019991}]	[{90102715.52:6218132939653208622, 62451212.28:-531521974849018784, 21944067.54:-5131323368421038817}, {91426871.76:3407725703630855070, 67501104.98:-2187961339337430148, 76836353.44:-7113923712276511813}, {42041991.24:-5790911582408512237, 98966670.05:9138304141769356305, 24142688.10:-523206532579812793}]	[{5048737.60:6.7502303e+37, 28196906.87:6.597664e+37, 54093861.80:1.1929905e+38}, {82557955.27:3.1013628e+38, 3004023.15:1.357968e+38, 54439379.28:1.9437927e+38}, {15256342.27:2.202082e+38, 11934884.97:5.0453036e+37, 10587577.71:3.285984e+38}]	[{39094956.93:6.468830503398919e+307, 55425630.25:4.0875347255490105e+307, 97579684.07:1.946174991553297e+307}, {45760454.91:1.2615967120157324e+308, 87750775.16:3.742552076755832e+307, 85468435.58:7.946604840401096e+307}, {25026327.05:4.087082114981469e+307, 38091922.82:1.7203246936389238e+308, 7449296.72:3.835261226868087e+307}]	[{9091735.23:"2023-10-26", 24894768.54:"2023-10-26", 35488182.29:"2023-10-26"}, {5922617.53:"2023-10-26", 97873265.35:"2023-10-26", 52499649.21:"2023-10-26"}, {82100319.07:"2023-10-26", 89593712.06:"2023-10-26", 16070877.80:"2023-10-26"}]	[{98846355.74:"2023-10-26 03:16:52.986000", 91979360.36:"2023-10-26 03:16:52.986000", 27415606.25:"2023-10-26 03:16:52.986000"}, {25908470.13:"2023-10-26 03:16:52.986000", 46857009.16:"2023-10-26 03:16:52.986000", 31839705.83:"2023-10-26 03:16:52.986000"}, {28227918.65:"2023-10-26 03:16:52.986000", 85142863.76:"2023-10-26 03:16:52.986000", 73036366.64:"2023-10-26 03:16:52.986000"}]	[{33:0, 90:1, 86:1}, {52:0, -34:1, 91:1}, {1:1, -118:0, -107:1}]	[{22:"c", -3:"b", 3:"e"}, {18:"3", -76:"9", -72:"3"}, {-2:"a", -83:"9", -24:"6"}]	[{-121:"3c", -70:"b6", -8:"0c"}, {16:"61", 0:"4a", 83:"41"}, {100:"c0", -111:"39", 43:"c2"}]	[{80:"acce851f2953daab534a9538d2de993393ebc0770f000bd67fc119570dabc4725aae41c1357d62c53f6bf9346396de96cb07", -71:"49c27636dd21d2e3a7fbf24492068158ee0a69569ee0ec630e6cfaade4e2398c7dd4bb1b055da68d24d574850ee86bae4aba", -88:"66a26e6e7e638944124b7794806cbd7bd7a70ba7fb576520c6268229785ed307aca0d534d0e830d3a094248a26eddf19c595"}, {-30:"ee1cd1c51a6b2c4ec5ab374ef87487d628ea20878e5a453b9dd80fd50f15869ca2dfc876b2210ba8ba03da996733fe80903f", -103:"989ab1db7ba57bb261ee6845d98cf8f18c5afc1820895e646dea72ea5db5968ba9354f40d41ecfbb5381acd8d8621166ac56", -47:"55f890949b873c43b0f6e036c6018ee66d469a3879b89833fbd114a8c3e2b5c6bae03ccd4cd6b554b9cefa84d1ba9578d14d"}, {-75:"16310112498e149727bbaac04a81f11a4f81031e3eadcf25f25561d733f8961e9d22112abb26dd4b1832021d4cfa9719e341", -20:"07492559d23f0bd44650fd79f1c1f0028a6be48cf738bba1436c1bb0ff2b477ea5f85688930857ce6b187995746abfb22806", -96:"ec6af58b07f102db442ffbedba58b7f297819df60af4384d1518267547f0467936f22aa070dbd7df53ba07ead1707b71e43f"}]	[{-110:67712748.98, -124:46220854.18, 63:33495650.15}, {-22:86964636.84, 57:12062450.33, -20:94376190.17}, {59:2920430.00, -80:89992773.79, 71:99247729.39}]	[{-33:-92, 113:-98, 126:-101}, {45:-107, -52:72, 63:-95}, {60:-39, -49:-20, 99:27}]	[{56:6538, -29:-18696, 59:-20200}, {84:18388, -82:2027, -34:19058}, {-46:-19989, 57:-15524, -24:26067}]	[{-21:1774301461, 117:-1299931121, -47:-1481799633}, {12:-1217929424, 85:-1276418163, -120:1325283594}, {-81:784242435, -98:706166876, -116:-1668021208}]	[{77:-3927223138828377131, 125:3693748221639404854, -35:-738092288975151942}, {36:-2367727016212520427, -25:7022757294133569050, 93:1879001990359631862}, {-102:810963393936201432, -59:-8864541744320524200, 67:-7329710472402329495}]	[{36:3.9183765e+37, -98:2.9836885e+38, 66:2.4713974e+38}, {-47:3.003101e+38, 14:1.5627054e+38, -80:3.3529407e+38}, {-116:7.3803987e+37, -68:1.0566693e+38, -48:3.084806e+38}]	[{78:1.7582338828817097e+308, 94:1.6198169187760943e+308, -68:1.0543864125565194e+307}, {-33:1.1998484617110846e+308, -118:8.066883169727828e+307, 26:1.4900130013342143e+308}, {88:5.706168240685659e+307, 101:6.829759025630212e+307, 27:1.4773307736201595e+308}]	[{0:"2023-10-26", 34:"2023-10-26", -39:"2023-10-26"}, {-77:"2023-10-26", 40:"2023-10-26", -73:"2023-10-26"}, {-61:"2023-10-26", 48:"2023-10-26", -62:"2023-10-26"}]	[{-69:"2023-10-26 03:16:52.986000", 93:"2023-10-26 03:16:52.986000", 86:"2023-10-26 03:16:52.986000"}, {124:"2023-10-26 03:16:52.986000", 68:"2023-10-26 03:16:52.986000", 4:"2023-10-26 03:16:52.986000"}, {41:"2023-10-26 03:16:52.986000", -7:"2023-10-26 03:16:52.986000", 27:"2023-10-26 03:16:52.986000"}]	[{-938:1, -14702:1, -555:1}, {-27074:0, -20323:0, -8216:0}, {6330:0, 15183:1, -5356:0}]	[{25284:"8", -10403:"5", 22571:"1"}, {-23981:"6", 10013:"e", 7766:"2"}, {24090:"9", 28447:"7", 16839:"b"}]	[{-6289:"01", -25847:"6b", 11818:"ca"}, {4472:"1e", 23017:"60", 26161:"ae"}, {-31963:"e2", -23120:"5b", 18527:"1b"}]	[{-12041:"659216014248a4358f93c896488745462ea56cd388647a5443be2d165365b6d915716b3f5c797549ef82f8ca562732c5ebef", 23194:"2305da27c751389854e7d86355e551a3eb7d132998190e03f98c819ecf9e92850af598f67a66849702a199b0b484c7d0b03d", -26687:"98b74e190018919f4f9257771213ad3469bd98be85d7abd052e1d9d63fa139e552c45e3070b10f6d800055c8a03a7d8ff726"}, {-20141:"b62db0d5711df7122f0d1a1d1621d513251b6755e65e759cea518522bb4a02577efacdb551e1bebdb70751863e3bffc45291", -14870:"e25f5d25c7deb2049f1f1d0216bd9a4e954b283a281b81e925fd5fced4c0687c6da0743de47cab29d3d2e435c7181c88f353", 30101:"650c2270db140c8d587cddb4ec2a6cc79a1e97752ecc9a8f66200dc174c23786a26836ccde9ea36ad62a03f915f9e9c1dabd"}, {2968:"8363d882c8a17927f4c907051513482c7412b2ec5a3e790223b8cd5f3ddfa918b9ed16161bfbad610e0e83bd6782d130856f", -6767:"64e1eea2aadfe1d352ebfe0dc5e2197a2735e37d7bea233cd8393f8819d88f2551c9f9ac93f3069060b9395f3df0a472c824", -30431:"bfe256cd06ad22bf345c33c93b2ca380cd058b797a01f16fe9a02ea6bcddc3bf0b30e0596304d2ef19916a2a4cdb34242090"}]	[{14418:47422864.83, 6722:52974625.45, -2492:4806535.72}, {13077:44273088.54, -2322:39452352.79, 9895:32637704.63}, {-30558:33558011.78, -16359:17857571.67, -29056:6982144.13}]	[{-16249:9, 16210:127, 27430:38}, {-5989:74, 28740:-21, 3021:-57}, {-16481:111, 22671:-2, -20860:17}]	[{12924:13339, 26174:-29694, 3023:-2400}, {31544:18040, -5635:5211, -22796:28931}, {19493:13468, 9538:-27879, 8827:-31362}]	[{-7705:-1548292944, -12482:624994557, 28517:-893683680}, {23000:-171355841, 32711:161781665, 5587:-872491216}, {-6545:1659108101, 18798:-947095038, -14648:-480003018}]	[{2106:-2439183381828831690, -29320:-3010909727210773603, -25008:-4138315238991813428}, {25964:1791516789950945213, 13494:-1449049425031652460, -25419:6186364633373951140}, {-15358:-7684372786688112969, 27901:-7824869410062469510, -29848:2417774707291778273}]	[{16398:1.5755516e+38, 18019:1.6403663e+38, 6191:2.3612485e+38}, {9229:2.1707249e+38, 11790:2.7049803e+37, -9203:1.6202249e+38}, {5152:1.8797194e+38, -28591:3.2235965e+38, -27807:2.9189653e+38}]	[{2384:1.041271466145199e+308, 4266:9.650474012471909e+307, -22620:1.5958506697896049e+308}, {18224:6.661942234144627e+307, 6969:5.247752142057255e+307, -4776:1.4787276240609435e+308}, {30689:8.134196893884338e+307, -1359:1.2637680122540544e+308, -30332:1.5290841066825333e+308}]	[{29804:"2023-10-26", 4281:"2023-10-26", 690:"2023-10-26"}, {15569:"2023-10-26", -2395:"2023-10-26", -11112:"2023-10-26"}, {-1157:"2023-10-26", 19102:"2023-10-26", 1650:"2023-10-26"}]	[{14090:"2023-10-26 03:16:52.986000", 28394:"2023-10-26 03:16:52.986000", -11712:"2023-10-26 03:16:52.986000"}, {-10330:"2023-10-26 03:16:52.986000", 14214:"2023-10-26 03:16:52.986000", 16702:"2023-10-26 03:16:52.986000"}, {6852:"2023-10-26 03:16:52.986000", -5986:"2023-10-26 03:16:52.986000", 22985:"2023-10-26 03:16:52.986000"}]	[{1648625797:0, -1916745273:1, -1796110526:0}, {397864882:1, 133576600:0, -922934822:0}, {-1558716061:0, -1589821319:0, -1984466136:0}]	[{-587449708:"9", -771269706:"3", -220744203:"c"}, {237173603:"6", 1883587076:"1", 1784007616:"9"}, {245340026:"6", 492322125:"7", 1776389268:"e"}]	[{-1652453736:"d5", 525223664:"a9", -1188333267:"1e"}, {-1348144674:"10", 817514952:"7e", -1760384991:"37"}, {-1712431878:"80", 69010803:"d5", 1267886252:"e8"}]	[{-1314140134:"1a835b85e57c8fe3c8f8f2be69d1d000f453803fb589228af76ed00e36156a7cb3a4a4b308958af4554301d2d854defaa85d", -900785463:"fc012c3ce853e9087f518c9a00fbe4c6bde8d733a4d65203f18f03439b447925a38a7c1fb415fc34b107b2845c0271ef1359", 1584765254:"e960ffd4a974b2203fe2fa69c5e82abc2e7cd07a918c33f684edb129c7f1f79340d9bcf8664ad8defb2af1f46010e27a8536"}, {1265239574:"523cc1309337b7b1f4d6029b266c637ddb4e8f7104d354205d724bf637bf17595eab59e57a5d4c17e9c56320d5e93f70f69b", 1014613598:"b1b510e15a3767d920f41cb13fbe79e6995452e0cbfb3b6fb1968dec546e7725298a5ddf08ec77c7d3220aa83bcad12d25e1", -1084544152:"bd89ef54b7a54aabf17907373b22cb9a3979a728e48ffd36cac23efbead071ae503f00d5e51e7c3f70e88a2867108cf19fb3"}, {2023117026:"a3b0096d1d8e7e13d9328e8ed6568344f78b3b006156c52a173c9f5cdf6730eb6541216e3caf63da035fed1835e6021b0625", -922826946:"390f30bd3d79efc91f46c7a5e0bce1848710641afda0165d6d592d74c2b01eb204111ea132de6da752ce59948b3f071ce885", 1510390575:"ab340f0580254f8b4792a8e13d59ce0023847d801d756a0b364df65c137d7e1418c092e6ddc91d5baec30669c3d62c6948fb"}]	[{-782038093:5035393.61, 116206468:78331998.02, -1240231639:65542123.19}, {-860997521:85802752.63, 1983588242:46461344.26, -2113975148:20500503.29}, {1140349268:54585129.59, 388183282:83944724.73, 1651646629:7788231.06}]	[{2076663790:33, -569016177:36, 205269127:24}, {-1513070588:25, 1844166896:-60, -1797593663:30}, {-1891145685:56, 812635439:-47, 1141343750:52}]	[{-1397566712:29517, 1631787309:8908, -927803951:14907}, {625271908:20854, -1367784987:3555, 1945750295:-1538}, {-819242049:1949, -1367394061:15558, -1887083681:-17956}]	[{-1877888392:-1802321330, 1936364760:1463263317, 1714272858:1394651830}, {2121924866:-1262513593, -411888927:-1500275863, 1318199760:-1377795828}, {-1703236488:-1499741613, -1848925556:-39055776, -567806216:-1686298125}]	[{-1359982700:-8406507371565433806, 1805050963:2998036703112045478, 2106222880:-52741285402987305}, {-1162434002:4949182737260266951, 917836179:-8569073056407769060, -1426967323:6326256075088223795}, {1704873194:5998206389660526257, -336214204:-6509825477235419681, 1175690120:1292219765955055806}]	[{-235088567:2.9335702e+38, 1716132823:2.6714661e+37, -1193336447:1.5043472e+38}, {1019340904:2.397558e+38, 1810096649:2.3723925e+38, -1907255970:2.0726907e+38}, {-707083028:1.08617e+38, -1236478977:2.908725e+38, 972844419:3.2613417e+37}]	[{1415179393:1.6736592164557544e+308, -1836634395:1.6740150610752804e+308, 1046333122:8.622447698151434e+307}, {61396437:1.1939040278004224e+308, -1080148932:3.623794336266382e+306, -2063709668:5.143920413830824e+305}, {-401423982:1.6888146187004249e+308, 1698217221:1.7029949539113577e+308, 1253070314:1.0927911804886686e+308}]	[{858829916:"2023-10-26", 1247366810:"2023-10-26", -923423906:"2023-10-26"}, {1974603354:"2023-10-26", -709589026:"2023-10-26", -1724207175:"2023-10-26"}, {-1534836328:"2023-10-26", 1989849895:"2023-10-26", -2112866669:"2023-10-26"}]	[{114775671:"2023-10-26 03:16:52.986000", -902521882:"2023-10-26 03:16:52.986000", -17884440:"2023-10-26 03:16:52.986000"}, {1128469562:"2023-10-26 03:16:52.986000", -203825040:"2023-10-26 03:16:52.986000", 2085796436:"2023-10-26 03:16:52.986000"}, {-1989144471:"2023-10-26 03:16:52.986000", -2019673991:"2023-10-26 03:16:52.986000", 1092210742:"2023-10-26 03:16:52.986000"}]	[{-7366105307915772026:1, 2599324890260512919:1, -8079262002787734728:1}, {1402880447171107056:0, -4532245612963177394:0, -1600052487520804574:1}, {2690562969261551410:1, -5835513996215584310:0, 6670010146206520193:0}]	[{4413453244095551452:"a", -1211818538289992070:"4", -5430397343810900483:"8"}, {2594403108707658672:"e", -8042723108869254233:"f", 7464455733411419448:"a"}, {-4343956155279332465:"c", 5852015903816164465:"5", 6452319468353669171:"9"}]	[{975791611952226652:"af", 608721917240828401:"be", -3628839568527813485:"51"}, {-8175267199516343951:"13", -8858809582261080536:"d5", 6633702528417962174:"42"}, {-2697209353496795211:"2a", 2048774271313901023:"13", 6505180960883661120:"cf"}]	[{-1346745244214907279:"d1c4cc52af6f36457ba67ced7650e784a98b30da68c1bf0e3c1c1d0f126364ad7511069610a538ac2d2283f841b8054801ce", -6314874609008667437:"fd940f00dbef91cfb7b78af8e8dffbc214e2954db6ebdebd8f514da361885df7abdc5cbf3698aa37ec46c69ffb14a2f9dc09", 3463843154118313620:"bb2c4939bc70fdb5134c9362d018c5a0224a26c65b8778cc8c61c0e9bc2a1c13cffd63b4372b3fa7d3868a6183a08313d8fe"}, {5288868401956481522:"2f2d7c39f7fb6cb084c7230195f220d1e2ff5d852af0d4f4ce62c6ed4b4865d7a888c97220a76558475aa9139b44fca76684", -6832896243406509788:"abf314f42e3f900ef20d6f4fdb97b85522012baa3d0bee244838872b09e34dbc5d67a615217663f95376cec176866dfc55c7", -4503954307342471764:"13b5f4692405b495116a896189c5bfcee8f9b59067ea3d413c01de66ce12b710bd406a311d16b5938414ac993185ad9a6178"}, {-1667578948785987366:"8d0e968a468a8ca454704ec6327a2f98df9007650eb187128aa47fc174816ef35d7c1e274f2dbd02350b8868f7b97d804208", -6727608163596017846:"eb2cfb53bde4642b2ceb3751f4a6c318c1be98131302d87f2366adf3deb46de007d33f29f799c2efb19b6628d987f64d03d3", -5583643715655600222:"0ad7524dc3317fcbb0a7ecbfe79bdd23986aac3dfa9f07b1f73dce23c129a74417e61379b92cc5c01897eef0b5647b192a6d"}]	[{159908411859734456:9136722.50, -1111551349683810310:37867116.36, -322328980980475177:23420646.81}, {1207184303147983890:58519011.18, 3449004235823923149:79248862.07, 6340455063061808783:3196097.05}, {-6094643256257790754:60087541.51, 5584086872468129009:17097793.63, -7148850621247198039:13939457.86}]	[{-8396208680374024065:52, -4729477683018986072:18, 2508437656819186409:6}, {7341980552633333003:-48, 1089073202521184641:-101, -2765550091107076085:31}, {-1517174732906476365:-109, -5180776839495860623:-108, -8548707263218640010:65}]	[{-5414574279395557642:30842, -7303476451795664957:-7667, 9204891015781125495:-24798}, {-7883554104486220914:18500, -1928436990280083319:-7624, -3593041641777088040:-4464}, {3648669436381121324:-13061, -3011901398593878520:9444, 4108783407109816471:5524}]	[{7196929626441064801:-101044284, -6985575674038337450:989100904, 5990726155536940173:766938129}, {-6810959414650930899:-1045104694, -5924416194564136020:-2127034062, 5902534287823570109:1799714157}, {-1726879291727452367:985596995, -5493624492888742149:-1008396645, 697260709149987221:212960358}]	[{5189859692953071815:-31913493257957933, -304305469384580292:-3134829775142285973, 729516411968549980:-2710187328973948624}, {2307866299777804834:5284506100663519715, -2485329502409325451:-8899641637886853684, -2457376949059429391:6116488331130445175}, {-8970057904137155598:-589746610635219992, -6870514015972289926:6028218678212929701, -5399766036281995100:6669865074577741746}]	[{8656794698150164371:1.2630847e+38, -2883993939281508319:6.706486e+37, 8469241797230858595:2.3226955e+38}, {-6305580096134117582:7.9023e+37, 2394292456341801388:1.4050514e+38, -2559448774591566890:3.2110985e+38}, {4748990774415988933:2.8153907e+38, 5201809383293658248:1.2668793e+38, 6145402410742887824:2.2867342e+38}]	[{7941586766285252916:4.74689766514415e+307, -755009563146551618:1.5726645043868444e+308, -4551728424472363511:1.4556121608026074e+308}, {5190516698883241093:1.5505562446643624e+308, 5517850752777374917:1.5741733381897408e+308, -7946875212385751990:1.3159078744754623e+308}, {-5720858564776536983:1.3873483225290794e+308, 7397656217005837466:1.3729902854493187e+308, -5098501786445789178:1.0484470893634214e+308}]	[{-8105850773343240448:"2023-10-26", 665260583989298139:"2023-10-26", 9087851294756900337:"2023-10-26"}, {-7264010240187051851:"2023-10-26", 1499332189592232697:"2023-10-26", -3651267312532415178:"2023-10-26"}, {4013035271743652142:"2023-10-26", 8775996522750339414:"2023-10-26", -6934135974188719206:"2023-10-26"}]	[{-2161417055945133227:"2023-10-26 03:16:52.986000", 755811567489341660:"2023-10-26 03:16:52.986000", 4216426814030631499:"2023-10-26 03:16:52.986000"}, {-5156268857120190977:"2023-10-26 03:16:52.986000", 5043844330898652676:"2023-10-26 03:16:52.986000", 5165219018259762219:"2023-10-26 03:16:52.986000"}, {8310801222166953064:"2023-10-26 03:16:52.986000", 6777390833640654906:"2023-10-26 03:16:52.986000", -8540639044781263974:"2023-10-26 03:16:52.986000"}]	[{1.7518221e+38:0, 2.946355e+38:1, 3.341446e+38:1}, {2.0305574e+38:0, 2.0838355e+38:0, 2.1534986e+38:0}, {3.3044071e+37:0, 2.046396e+38:1, 2.7637387e+38:1}]	[{3.285282e+37:"f", 6.4530443e+37:"f", 1.9574195e+37:"7"}, {7.3086034e+36:"8", 3.3466361e+38:"b", 1.4495752e+38:"a"}, {7.1481854e+36:"7", 6.152256e+37:"c", 9.393699e+37:"8"}]	[{1.3678543e+38:"32", 2.5697841e+38:"db", 8.997528e+37:"68"}, {2.0849159e+38:"be", 8.220332e+37:"c7", 3.2548104e+38:"fd"}, {2.8628242e+38:"80", 1.5211348e+38:"cf", 3.3900605e+37:"93"}]	[{2.800981e+38:"636623589bb9becfe8f46bb8d665e57b4516969c28df257be32705738c1c559d628a1d601e36ef7eb898645e04b193807cf9", 2.6066722e+38:"ff1e1ab4320cc8dc7c85efa07a57cde62bf4944f7f48276aaa98b0292cb3c0a5092339b1a519415c77503d561e0f6e6301a9", 6.304843e+37:"8a9ecde4c299e66d41cdff2a938fa1177f98d0b00b5474a588a75683a8dc9fdba4a39592a7531a630b26c7eaa4201915ef0b"}, {2.9687936e+37:"945d78cbde1ef8911ed181a4414af1e83135e1ed6c04c0da2e9120bc15f9f1a259387eae95a807e9529003402b8b6c2e978b", 1.985085e+38:"ea146e2d146fdf32c57f1339eb0d58765cd5511b3b15c65806a2289f78e4130c76aefb0a0ed138a760643469e06bcc792678", 1.9457554e+38:"9eaf0d8be26b2c0c7b0afc0277110e299bfcb647e75710d6cb3beb9d6c0e07375ba1f51765c945893d093f7ae7032d18c514"}, {1.2534964e+38:"2be8cf07182de86f61868fcd7cb2e4b12125537d46d791c8773822acb3c0d90727dccf914274430e166c6358f25705987427", 1.8730493e+38:"cfd9f13edd58a1fa9f79e5186aadcf4c341fdf53a7d24a2b9b321cd3f3eb9400473eb052ef6b0d41e25275ca879ae7acdfd2", 2.439195e+38:"e993b22dea6729fb7da9e62b0927d2402d989304975251a9f28b78ab9a20bf0310ff1d89e07ed2fa5fc65a7aa3145d10c4f8"}]	[{1.7993635e+38:82046329.98, 9.032053e+37:69050750.48, 1.1245292e+38:95527951.33}, {2.9125627e+38:20817805.64, 1.1691811e+38:13227959.41, 9.219877e+37:34297834.54}, {1.3829097e+38:15644319.48, 6.998127e+36:69384957.41, 6.403349e+37:99120783.59}]	[{5.5470864e+37:77, 2.1589432e+38:62, 1.0122624e+38:78}, {1.3793198e+38:87, 1.3348865e+38:-21, 3.825185e+37:79}, {2.9102098e+38:1, 1.205493e+37:-79, 1.4579308e+38:-47}]	[{5.112703e+37:-26850, 1.6311614e+38:-1118, 1.2600384e+38:24864}, {5.083342e+37:-31518, 9.224857e+37:10582, 1.1032294e+38:21269}, {2.6317002e+38:-31727, 2.2466257e+38:32023, 2.3830073e+38:-23815}]	[{2.4685356e+38:-1162263708, 1.4748677e+38:1200207862, 2.9596816e+38:579593546}, {6.251459e+37:-1444887010, 2.8935648e+38:26851854, 3.0352598e+38:-116565992}, {1.0357336e+38:250884268, 3.052491e+38:-1929859043, 3.0465775e+38:1962433460}]	[{3.214866e+38:5737796259439971568, 2.6620981e+38:-1620834117999910231, 2.4601812e+38:-7700913896791875817}, {1.3225473e+38:3170094435332391129, 2.2596444e+38:-3520748004397558592, 3.2590745e+38:6436283103070667842}, {1.4001315e+38:1223700250225204298, 3.219044e+38:5617609752086895667, 4.5102866e+37:-2594255970026495278}]	[{4.0467279e+37:2.2421306e+37, 1.4734602e+38:1.8301668e+38, 2.783976e+37:8.105264e+37}, {4.0733369e+37:5.281942e+37, 1.4999969e+37:3.3161411e+38, 1.8830723e+38:1.5928712e+38}, {8.322095e+37:2.9007054e+38, 2.0762411e+38:9.505991e+37, 2.1421498e+38:2.9553098e+37}]	[{2.3146355e+38:1.7083590718843164e+308, 1.9005457e+38:2.9569638035197616e+307, 1.1105389e+38:5.871963616750201e+307}, {1.4665541e+38:1.1060346654452101e+308, 8.98433e+37:4.4751544864085193e+307, 2.8684933e+38:1.407900239801236e+308}, {1.8532074e+38:4.678584303449043e+307, 2.6331476e+38:1.3460887152316149e+308, 2.4709528e+38:1.4334768316844933e+308}]	[{2.274238e+38:"2023-10-26", 7.8158113e+37:"2023-10-26", 1.0720517e+38:"2023-10-26"}, {1.134687e+38:"2023-10-26", 1.7763916e+38:"2023-10-26", 3.0358084e+38:"2023-10-26"}, {2.4241613e+38:"2023-10-26", 2.4720347e+38:"2023-10-26", 1.1518704e+38:"2023-10-26"}]	[{8.780304e+37:"2023-10-26 03:16:52.987000", 1.5468421e+38:"2023-10-26 03:16:52.987000", 1.040932e+38:"2023-10-26 03:16:52.987000"}, {2.5066716e+38:"2023-10-26 03:16:52.987000", 2.9127078e+38:"2023-10-26 03:16:52.987000", 1.1711671e+38:"2023-10-26 03:16:52.987000"}, {1.2017717e+38:"2023-10-26 03:16:52.987000", 2.3821676e+38:"2023-10-26 03:16:52.987000", 2.2182281e+38:"2023-10-26 03:16:52.987000"}]	[{1.6791023976855424e+308:0, 2.529906727319904e+306:1, 1.4057482298674958e+307:1}, {1.708185173315915e+308:0, 1.5239357146144377e+308:1, 1.7699262046915312e+308:0}, {1.526893903116762e+308:0, 9.065507224385068e+307:0, 8.587461547700147e+307:0}]	[{2.088956493227517e+307:"5", 1.0674323380560357e+308:"b", 9.00369769102035e+307:"f"}, {1.4761628616481553e+308:"b", 1.1088452742179012e+308:"7", 3.0104395312877223e+307:"0"}, {1.4249806162156349e+308:"f", 1.7122823675967652e+308:"b", 4.84828850430845e+307:"f"}]	[{4.793608086170913e+307:"b4", 1.440192573791489e+308:"d5", 1.1749931059026829e+308:"97"}, {5.53723292220953e+307:"9c", 1.7766361830419384e+308:"66", 4.561052230188406e+307:"33"}, {1.2570054571915617e+308:"a5", 8.636914806017421e+307:"52", 1.2766632325188215e+308:"93"}]	[{2.20542192647443e+307:"813dd727fd83957973a650ea44b26ae59fe9ad85e304b9466327764a21ae73c18d7b2fbc5ff496c7b8cc4be991caf9935cb1", 1.7880726683240837e+308:"fa2f47e030e3ecca28df60ba30a73fd2b3435ed3fbf9f5fdbc0968464491eee96d6b855a0fd5307777c851fcc8f68fba0301", 1.3595608280129958e+308:"f4522f434683e8a7738d6d872bec8975442c4d75459bed17313d245b09a80567729fb94928a498ef15258c06246a8bb85820"}, {8.969017259585889e+307:"84e0981ed42a41c1644eb70cc57d7805cc019e978c3e1f919cb0121b8064f147f74bff110e167f1d7d7cff6be1eb4b28933b", 1.6746958659903706e+307:"6114e39f2a717b073dad37b87a04e430090a507c52f3480e196ecd8c8beec2f4c3bbc442d90b52e7252fb7dc6d1204602eda", 1.397646009532737e+308:"7a8c8c89d892854b0104e5121e6816fb2ed1f1cc83926f0a5163747bd200f98d045a6d17bd335b53bf92c6743c024819328c"}, {1.2832573531975194e+307:"c0d032d7eb52b26fe1dc81fcab684603c013453e38dcbd23a7150e9f5d119c9bc24110b415bb5c7505d0de4c030ac36b360d", 9.977572741923437e+307:"2b6c594cbfd044937904bd94d751116e2c140af1e50a98a82cf8f4451cb4bcd7a372a8f7427b2689ae31d3c92bf937dd0738", 1.6162183669341029e+308:"7d16d873a5d0eed913596172da0bdb949cb92abd666178e03cdc7352049e1cfa3f4921004c3a8709f0b37a47e20c84595f19"}]	[{1.1210112586833211e+308:99613311.48, 5.033770121288181e+307:66538034.68, 1.1880462167228515e+308:96619066.79}, {7.691419339941178e+307:41995003.32, 4.2576493833990175e+307:35638144.39, 1.2946285683633852e+308:91149882.21}, {3.10465393939426e+307:31356699.90, 8.437698380594389e+307:17755106.80, 5.22140997575902e+307:56961478.40}]	[{3.166443660905408e+307:-14, 6.649802388451954e+307:48, 1.3569396968987324e+308:-11}, {1.6343096883653655e+308:-31, 3.347239268016444e+307:121, 3.603765983230191e+307:106}, {3.694044210331801e+307:-35, 1.386813639272129e+308:118, 1.2801794351856471e+307:-45}]	[{1.4555669676436808e+308:8312, 1.6914250334690539e+308:-7631, 1.619564362975667e+306:31618}, {1.436159669330754e+308:11360, 1.012759730823758e+308:3984, 1.6890292133620608e+308:794}, {1.462727301621658e+308:13706, 2.397770609025096e+307:-29955, 9.6156624455307e+307:2020}]	[{8.375827362548797e+307:1592290309, 9.855379030015646e+306:1783905994, 1.0174492289376822e+308:1555825176}, {3.9753775350227687e+307:268528214, 3.290701161724469e+307:1805986498, 8.780250092602124e+307:-1395751015}, {8.49508852940846e+307:1955051099, 1.7500226830733431e+308:892553294, 7.053953375571965e+307:258000687}]	[{8.939162882450082e+307:-4238501403164926611, 7.481469658458987e+306:5520190430717696350, 2.1905204880439089e+307:-8680240056253701121}, {1.775867966152558e+308:5775440171943789240, 5.857403250962968e+307:-3904457361041348258, 1.4842637913547878e+308:2354683699006584272}, {1.5833944041219624e+308:-1508753137464543061, 9.543144254744027e+307:-8642396304262043373, 9.469506825104564e+307:-4241183732039493845}]	[{1.3471920098351424e+308:1.5401668e+38, 7.162044870691665e+307:1.9967957e+38, 3.158443614762019e+307:2.901348e+38}, {2.978825356730441e+307:1.5356359e+37, 5.444237215851644e+307:3.2200493e+38, 1.7054871912325912e+308:2.052682e+38}, {7.905634312268436e+307:2.4205804e+38, 1.4254388539804705e+308:2.3927e+38, 8.546602075540835e+307:4.6546036e+37}]	[{1.7270285909493177e+308:7.790841009335306e+307, 6.715207215166598e+307:7.563683401982446e+307, 1.2587289640929043e+308:7.914880915652018e+307}, {1.7035192386264395e+308:1.5470219640066283e+308, 1.2942207285255918e+308:7.812313384818419e+307, 1.2364692716057604e+308:1.476367298743881e+308}, {1.7273342538276163e+308:1.3372076292046648e+308, 4.848921737748133e+307:1.9796330111934133e+307, 6.422139880188336e+306:1.42376541531052e+308}]	[{1.3152089352277706e+308:"2023-10-26", 7.046963473642823e+307:"2023-10-26", 7.00844337893011e+307:"2023-10-26"}, {7.943403979571175e+307:"2023-10-26", 7.572113322488651e+307:"2023-10-26", 1.5575847721158946e+308:"2023-10-26"}, {1.3999887293411846e+308:"2023-10-26", 1.0823556247834667e+308:"2023-10-26", 8.492616844060308e+307:"2023-10-26"}]	[{7.964378898022343e+307:"2023-10-26 03:16:52.987000", 1.2664623508771269e+308:"2023-10-26 03:16:52.987000", 3.292857746080317e+307:"2023-10-26 03:16:52.987000"}, {9.828652297105342e+307:"2023-10-26 03:16:52.987000", 9.197381761143894e+307:"2023-10-26 03:16:52.987000", 1.2391271890721865e+308:"2023-10-26 03:16:52.987000"}, {1.3823857186844162e+308:"2023-10-26 03:16:52.987000", 1.6882013803571224e+308:"2023-10-26 03:16:52.987000", 1.2426454915645717e+307:"2023-10-26 03:16:52.987000"}]	[{"2023-10-26":1}, {"2023-10-26":1}, {"2023-10-26":1}]	[{"2023-10-26":"7"}, {"2023-10-26":"c"}, {"2023-10-26":"1"}]	[{"2023-10-26":"30"}, {"2023-10-26":"5d"}, {"2023-10-26":"40"}]	[{"2023-10-26":"1290d69923cf6bb99c9ccd03f49dc8a6d9d14551bc5473f30511e5365f38e594d4368677ec34a365ccdba3dfdb4c53d05efb"}, {"2023-10-26":"d6ccf534c43e08520e09c5aea0b417af556f371c94cdfcf5099c7829cfd2e8dfb87eaf8db61a9c0f70a645bd2a2b2159d2f3"}, {"2023-10-26":"d59ab72761bc84d6f013c0539cc821ec7328d87cf369f4af56640e469534fd0afa7af4f32b8822d2d19814c4cd813c80e3cc"}]	[{"2023-10-26":45997691.70}, {"2023-10-26":21424348.87}, {"2023-10-26":91359356.44}]	[{"2023-10-26":110}, {"2023-10-26":-75}, {"2023-10-26":-99}]	[{"2023-10-26":-25694}, {"2023-10-26":6198}, {"2023-10-26":23718}]	[{"2023-10-26":-776923703}, {"2023-10-26":1451914003}, {"2023-10-26":323923385}]	[{"2023-10-26":-6626506271166747754}, {"2023-10-26":1101161460355326549}, {"2023-10-26":5325684255116417875}]	[{"2023-10-26":2.578902e+38}, {"2023-10-26":2.8164312e+38}, {"2023-10-26":1.1726865e+38}]	[{"2023-10-26":1.673406995035706e+308}, {"2023-10-26":1.1768835185850233e+307}, {"2023-10-26":4.963817817564378e+307}]	[{"2023-10-26":"2023-10-26"}, {"2023-10-26":"2023-10-26"}, {"2023-10-26":"2023-10-26"}]	[{"2023-10-26":"2023-10-26 03:16:52.987000"}, {"2023-10-26":"2023-10-26 03:16:52.987000"}, {"2023-10-26":"2023-10-26 03:16:52.987000"}]	[{"2023-10-26 03:16:52.987000":0}, {"2023-10-26 03:16:52.987000":0}, {"2023-10-26 03:16:52.987000":0}]	[{"2023-10-26 03:16:52.987000":"3"}, {"2023-10-26 03:16:52.987000":"f"}, {"2023-10-26 03:16:52.987000":"c"}]	[{"2023-10-26 03:16:52.987000":"45"}, {"2023-10-26 03:16:52.987000":"e4"}, {"2023-10-26 03:16:52.987000":"97"}]	[{"2023-10-26 03:16:52.987000":"050aeddc100e3ba0212999f4c2062dcc0115bee7beb5e478b2515c4875e5f43edc3a5ed0a1fae5d515caadbbdad82bfd1623"}, {"2023-10-26 03:16:52.987000":"21d4932074cdff9f0a2e1f458e0eee7b3686efcd21520b730abfd94ea23c43710be78c8a4bb54f3c71ca2abf53e9e5e0aaef"}, {"2023-10-26 03:16:52.987000":"0e9f9bd46b3c446a022cc593662afff927c7f32487c298f2278cd1a932f00371e8049abdc2adab5fdfab530da51f9530c85b"}]	[{"2023-10-26 03:16:52.987000":61673823.67}, {"2023-10-26 03:16:52.987000":54860070.29}, {"2023-10-26 03:16:52.987000":50203646.01}]	[{"2023-10-26 03:16:52.987000":66}, {"2023-10-26 03:16:52.987000":71}, {"2023-10-26 03:16:52.987000":-75}]	[{"2023-10-26 03:16:52.987000":26076}, {"2023-10-26 03:16:52.987000":-8292}, {"2023-10-26 03:16:52.987000":30309}]	[{"2023-10-26 03:16:52.987000":-1342327171}, {"2023-10-26 03:16:52.987000":1797049642}, {"2023-10-26 03:16:52.987000":-2134381346}]	[{"2023-10-26 03:16:52.987000":4621394428141713482}, {"2023-10-26 03:16:52.987000":-2376792531548299094}, {"2023-10-26 03:16:52.987000":-5285583889220779304}]	[{"2023-10-26 03:16:52.987000":2.0516586e+38}, {"2023-10-26 03:16:52.987000":7.3252346e+37}, {"2023-10-26 03:16:52.987000":4.59661e+37}]	[{"2023-10-26 03:16:52.987000":4.875637357178808e+307}, {"2023-10-26 03:16:52.987000":9.375065745598432e+307}, {"2023-10-26 03:16:52.987000":1.1561060812088368e+308}]	[{"2023-10-26 03:16:52.987000":"2023-10-26"}, {"2023-10-26 03:16:52.987000":"2023-10-26"}, {"2023-10-26 03:16:52.987000":"2023-10-26"}]	[{"2023-10-26 03:16:52.987000":"2023-10-26 03:16:52.987000"}, {"2023-10-26 03:16:52.987000":"2023-10-26 03:16:52.987000"}, {"2023-10-26 03:16:52.987000":"2023-10-26 03:16:52.987000"}]
 
+-- !c102 --
+1	{"c_boolean": 1, "c_tinyint": 1, "c_smallint": 2, "c_int": 3, "c_bigint": 4, "c_float": 5.1, "c_double": 6.2, "c_decimal": 7.30, "c_char": "6", "c_varchar": "9", "c_binary": "a", "c_varbinary": "bbbb", "c_date": "2020-01-01", "c_timestamp": "2020-01-01 00:00:00.000000", "c_array": [1, 2, 3], "c_map": {"a":1, "b":2}, "c_row": {"c_int": 3, "c_float": 4.1, "c_varchar": "5"}}
+2	{"c_boolean": 1, "c_tinyint": 5, "c_smallint": 6, "c_int": 7, "c_bigint": 8, "c_float": 10.2, "c_double": 11.4, "c_decimal": 12.60, "c_char": "7", "c_varchar": "10", "c_binary": "c", "c_varbinary": "dddd", "c_date": "2021-06-15", "c_timestamp": "2021-06-15 06:00:00.000000", "c_array": [4, 5, 6], "c_map": {"a":6, "b":7}, "c_row": {"c_int": 8, "c_float": 9.2, "c_varchar": "10"}}
+3	{"c_boolean": 1, "c_tinyint": 9, "c_smallint": 10, "c_int": 11, "c_bigint": 12, "c_float": 13.3, "c_double": 14.6, "c_decimal": 15.90, "c_char": "8", "c_varchar": "11", "c_binary": "e", "c_varbinary": "ffff", "c_date": "2022-12-31", "c_timestamp": "2022-12-31 12:59:59.000000", "c_array": [7, 8, 9], "c_map": {"a":11, "b":12}, "c_row": {"c_int": 13, "c_float": 14.3, "c_varchar": "15"}}
+
+-- !c103 --
+1	{"c_boolean": 1, "c_tinyint": 1, "c_smallint": 2, "c_int": 3, "c_bigint": 4, "c_float": 5.1, "c_double": 6.2, "c_decimal": 7.30, "c_char": "6", "c_varchar": "9", "c_binary": "a", "c_varbinary": "bbbb", "c_date": "2020-01-01", "c_timestamp": "2020-01-01 00:00:00.000000", "c_array": [1, 2, 3], "c_map": {"a":1, "b":2}, "c_row": {"c_int": 3, "c_float": 4.1, "c_varchar": "5"}}
+2	{"c_boolean": 1, "c_tinyint": 5, "c_smallint": 6, "c_int": 7, "c_bigint": 8, "c_float": 10.2, "c_double": 11.4, "c_decimal": 12.60, "c_char": "7", "c_varchar": "10", "c_binary": "c", "c_varbinary": "dddd", "c_date": "2021-06-15", "c_timestamp": "2021-06-15 06:00:00.000000", "c_array": [4, 5, 6], "c_map": {"a":6, "b":7}, "c_row": {"c_int": 8, "c_float": 9.2, "c_varchar": "10"}}
+3	{"c_boolean": 1, "c_tinyint": 9, "c_smallint": 10, "c_int": 11, "c_bigint": 12, "c_float": 13.3, "c_double": 14.6, "c_decimal": 15.90, "c_char": "8", "c_varchar": "11", "c_binary": "e", "c_varbinary": "ffff", "c_date": "2022-12-31", "c_timestamp": "2022-12-31 12:59:59.000000", "c_array": [7, 8, 9], "c_map": {"a":11, "b":12}, "c_row": {"c_int": 13, "c_float": 14.3, "c_varchar": "15"}}
+
diff --git a/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy b/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy
index 70ccb1c97d7416..53ab596a82fcf0 100644
--- a/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy
+++ b/regression-test/suites/external_table_p0/paimon/test_paimon_catalog.groovy
@@ -56,10 +56,7 @@ suite("test_paimon_catalog", "p0,external,doris,external_docker,external_docker_
     sql """drop catalog ${hms_ctl_name}""";
 
     String enabled = context.config.otherConfigs.get("enablePaimonTest")
-        if (enabled != null && enabled.equalsIgnoreCase("enable_deprecated_case")) {
-            // The timestamp type of paimon has no logical or converted type,
-            // and is conflict with column type change from bigint to timestamp.
-            // Deprecated currently.
+        if (enabled != null && enabled.equalsIgnoreCase("true")) {
             def qt_all_type = { String table_name ->
                 qt_all """select * from ${table_name} order by c1"""
                 qt_predict_like_1 """select * from ${table_name} where c13 like '%3%' order by c1"""
@@ -174,6 +171,9 @@ suite("test_paimon_catalog", "p0,external,doris,external_docker,external_docker_
 
             def c100= """select * from array_nested order by c1;"""
 
+            def c102= """select * from row_native_test order by id;"""
+            def c103= """select * from row_jni_test order by id;"""
+
             String hdfs_port = context.config.otherConfigs.get("hive2HdfsPort")
             String catalog_name = "ctl_test_paimon_catalog"
             String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
@@ -271,6 +271,10 @@ suite("test_paimon_catalog", "p0,external,doris,external_docker,external_docker_
             qt_c98 c98
             qt_c99 c99
             qt_c100 c100
+            qt_c102 c102
+            sql """ set force_jni_scanner=true; """
+            qt_c103 c103
+            sql """ set force_jni_scanner=false; """
 
             // test view from jion paimon
             sql """ switch internal """

From e4a0f76db42cd9647886eae9f7684f6e44c5ee45 Mon Sep 17 00:00:00 2001
From: daidai <2017501503@qq.com>
Date: Sun, 28 Apr 2024 19:54:07 +0800
Subject: [PATCH 103/163] [fix](outfile)Fixed orcOutputStream.close() throwing
 an exception during destruction causing the program to hang. (#34243)

sql:
```
select * from outfile_exception_test t ORDER BY user_id
into outfile "s3://test-outfile-exception-no-exists/test_outfile/exp_"
format as orc
properties(
    "s3.endpoint" = "xxxxxxxx",
    "s3.region" = "xxxx",
    "s3.access_key"= "xx",
    "s3.secret_key" = "xxx"
);
```

When the sql is executed normally, the close() function of `VOrcOutputStream` will be called first, and then the destructor will be called. If there is a problem with SQL, the close function will throw an exception to return the error to the user. If the task is canceled, the destructor will be called directly, and then the close() function will be executed, causing the system to hang.
---
 be/src/vec/runtime/vorc_transformer.cpp | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/be/src/vec/runtime/vorc_transformer.cpp b/be/src/vec/runtime/vorc_transformer.cpp
index a07f734acac9dd..3bf3d8cea70267 100644
--- a/be/src/vec/runtime/vorc_transformer.cpp
+++ b/be/src/vec/runtime/vorc_transformer.cpp
@@ -62,7 +62,16 @@ VOrcOutputStream::VOrcOutputStream(doris::io::FileWriter* file_writer)
 
 VOrcOutputStream::~VOrcOutputStream() {
     if (!_is_closed) {
-        close();
+        try {
+            close();
+        } catch (...) {
+            /*
+         * Under normal circumstances, close() will be called first, and then the destructor will be called.
+         * If the task is canceled, close() will not be executed, but the destructor will be called directly,
+         * which will cause the be core.When the task is canceled, since the log file has been written during
+         * close(), no operation is performed here.
+         */
+        }
     }
 }
 

From 7d610308ad0035d39e12e5395fc24473c6df6971 Mon Sep 17 00:00:00 2001
From: Adonis Ling <adonis0147@gmail.com>
Date: Sun, 28 Apr 2024 20:15:55 +0800
Subject: [PATCH 104/163] [fix] Missing cache types in BlockFileCacheTest
 (#34227)

Fix BE UT (macOS).
---
 be/test/io/cache/block_file_cache_test.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/be/test/io/cache/block_file_cache_test.cpp b/be/test/io/cache/block_file_cache_test.cpp
index 2ebd83d6c379bc..902f0cd8df6f30 100644
--- a/be/test/io/cache/block_file_cache_test.cpp
+++ b/be/test/io/cache/block_file_cache_test.cpp
@@ -2529,6 +2529,7 @@ TEST_F(BlockFileCacheTest, test_query_limit) {
         settings.max_file_block_size = 30;
         settings.max_query_cache_size = 15;
         io::CacheContext context;
+        context.cache_type = FileCacheType::NORMAL;
         context.query_id = query_id;
         auto key = io::BlockFileCache::hash("key1");
 
@@ -3680,6 +3681,7 @@ TEST_F(BlockFileCacheTest, remove_if_cached_when_isnt_releasable) {
     settings.max_file_block_size = 30;
     settings.max_query_cache_size = 30;
     io::CacheContext context;
+    context.cache_type = io::FileCacheType::NORMAL;
     context.query_id = query_id;
     auto key = io::BlockFileCache::hash("key1");
     io::BlockFileCache cache(cache_base_path, settings);

From 0cebee5567ced6d06e542429c24f0d2fa5746bc3 Mon Sep 17 00:00:00 2001
From: zclllyybb <zhaochangle@selectdb.com>
Date: Sun, 28 Apr 2024 20:16:53 +0800
Subject: [PATCH 105/163] [fix](test) Fix some testcases #34203

---
 .../test_dup_tab_auto_inc_start_value_basic.groovy   |  3 +++
 .../ddl_p0/test_create_table_auto_partition.groovy   |  8 +++-----
 .../insert_overwrite_auto_detect.groovy              | 12 ++++++++----
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/regression-test/suites/data_model_p0/duplicate/storage/test_dup_tab_auto_inc_start_value_basic.groovy b/regression-test/suites/data_model_p0/duplicate/storage/test_dup_tab_auto_inc_start_value_basic.groovy
index 07cefae8d00bb4..5da825b353aedc 100644
--- a/regression-test/suites/data_model_p0/duplicate/storage/test_dup_tab_auto_inc_start_value_basic.groovy
+++ b/regression-test/suites/data_model_p0/duplicate/storage/test_dup_tab_auto_inc_start_value_basic.groovy
@@ -45,6 +45,7 @@ suite("test_dup_table_auto_inc_start_value_basic") {
         file 'auto_inc_basic.csv'
         time 10000 // limit inflight 10s
     }
+    sql " sync "
     qt_auto_inc_ids "select * from ${table1};"
     sql "drop table if exists ${table1};"
 
@@ -77,6 +78,7 @@ suite("test_dup_table_auto_inc_start_value_basic") {
         file 'auto_inc_basic.csv'
         time 10000 // limit inflight 10s
     }
+    sql " sync "
     qt_auto_inc_ids "select * from ${table2} order by id;"
     sql "drop table if exists ${table2};"
 
@@ -108,6 +110,7 @@ suite("test_dup_table_auto_inc_start_value_basic") {
         file 'auto_inc_basic.csv'
         time 10000 // limit inflight 10s
     }
+    sql " sync "
     qt_auto_inc_ids "select * from ${table3};"
     sql "drop table if exists ${table3};"
 
diff --git a/regression-test/suites/ddl_p0/test_create_table_auto_partition.groovy b/regression-test/suites/ddl_p0/test_create_table_auto_partition.groovy
index a9ca68a87771ba..f7bfc97dd39d91 100644
--- a/regression-test/suites/ddl_p0/test_create_table_auto_partition.groovy
+++ b/regression-test/suites/ddl_p0/test_create_table_auto_partition.groovy
@@ -20,11 +20,9 @@
 
 
 suite("test_create_table_auto_partition") {
-    def testTable = "test_create_table_auto_partition_table"
-
-    sql "DROP TABLE IF EXISTS ${testTable}"
+    sql "DROP TABLE IF EXISTS test_create_table_auto_partition_table"
     sql """
-    CREATE TABLE `${testTable}` (
+    CREATE TABLE `test_create_table_auto_partition_table` (
         `TIME_STAMP` datev2 NOT NULL COMMENT 'Date of collection'
     ) ENGINE=OLAP
     DUPLICATE KEY(`TIME_STAMP`)
@@ -38,7 +36,7 @@ suite("test_create_table_auto_partition") {
     """
 
     // The AUTO PARTITION func call must wrapped with ().
-    def text = sql_return_maparray "show create table ${testTable}"
+    def text = sql_return_maparray "show create table test_create_table_auto_partition_table"
     def createTable = text[0]['Create Table']
     assertTrue(createTable.contains("AUTO PARTITION BY RANGE (date_trunc(`TIME_STAMP`, 'month')"))
 }
diff --git a/regression-test/suites/insert_overwrite_p0/insert_overwrite_auto_detect.groovy b/regression-test/suites/insert_overwrite_p0/insert_overwrite_auto_detect.groovy
index 4cf5f28e15333f..cd33775f221ff9 100644
--- a/regression-test/suites/insert_overwrite_p0/insert_overwrite_auto_detect.groovy
+++ b/regression-test/suites/insert_overwrite_p0/insert_overwrite_auto_detect.groovy
@@ -77,13 +77,17 @@ suite("test_iot_auto_detect") {
    qt_sql " select * from list1 order by k0; "
 
    // with label - transactions
-   sql """ insert overwrite table list1 partition(*) with label `txn1` values ("BEIJING"), ("7654321"); """
-   sql """ insert overwrite table list1 partition(*) with label `txn2` values ("SHANGHAI"), ("LIST"); """
-   sql """ insert overwrite table list1 partition(*) with label `txn3` values  ("XXX"); """
+   def uniqueID1 = Math.abs(UUID.randomUUID().hashCode()).toString()
+   def uniqueID2 = Math.abs(UUID.randomUUID().hashCode()).toString()
+   def uniqueID3 = Math.abs(UUID.randomUUID().hashCode()).toString()
+   sql """ insert overwrite table list1 partition(*) with label `iot_auto_txn${uniqueID1}` values ("BEIJING"), ("7654321"); """
+   sql """ insert overwrite table list1 partition(*) with label `iot_auto_txn${uniqueID2}` values ("SHANGHAI"), ("LIST"); """
+   sql """ insert overwrite table list1 partition(*) with label `iot_auto_txn${uniqueID3}` values  ("XXX"); """
 
    def max_try_milli_secs = 10000
    while(max_try_milli_secs) {
-      def result = sql " show load where label like 'txn_' "
+      def result = sql " show load where label like 'iot_auto_txn%' order by LoadStartTime desc "
+      // the last three loads are loads upper
       if(result[0][2] == "FINISHED" && result[1][2] == "FINISHED" && result[2][2] == "FINISHED" ) {
          break
       } else {

From 42eee25cfb964d16a83d4238386555219a8842c3 Mon Sep 17 00:00:00 2001
From: zclllyybb <zhaochangle@selectdb.com>
Date: Sun, 28 Apr 2024 20:18:15 +0800
Subject: [PATCH 106/163] [Feature](pipeline) Trace pipeline schedule (part II)
 - visualization demo (#31301)

---
 be/src/pipeline/pipeline_tracing.cpp     | 119 ++++++++++++-----------
 be/src/pipeline/pipeline_tracing.h       |  13 +--
 be/src/pipeline/task_scheduler.cpp       |   3 +-
 tools/pipeline-tracing/README.md         |  36 +++++++
 tools/pipeline-tracing/origin-to-show.py |  85 ++++++++++++++++
 5 files changed, 191 insertions(+), 65 deletions(-)
 create mode 100644 tools/pipeline-tracing/README.md
 create mode 100644 tools/pipeline-tracing/origin-to-show.py

diff --git a/be/src/pipeline/pipeline_tracing.cpp b/be/src/pipeline/pipeline_tracing.cpp
index efe9667f2b57dc..abedc4ff4f4f21 100644
--- a/be/src/pipeline/pipeline_tracing.cpp
+++ b/be/src/pipeline/pipeline_tracing.cpp
@@ -19,6 +19,7 @@
 
 #include <absl/time/clock.h>
 #include <fcntl.h>
+#include <sys/stat.h>
 
 #include <boost/algorithm/string/predicate.hpp>
 #include <chrono>
@@ -34,6 +35,8 @@
 
 namespace doris::pipeline {
 
+std::filesystem::path log_dir = fmt::format("{}/pipe_tracing", getenv("LOG_DIR"));
+
 void PipelineTracerContext::record(ScheduleRecord record) {
     if (_dump_type == RecordType::None) [[unlikely]] {
         return;
@@ -41,6 +44,7 @@ void PipelineTracerContext::record(ScheduleRecord record) {
     if (_datas.contains(record.query_id)) {
         _datas[record.query_id].enqueue(record);
     } else {
+        // dump per timeslice may cause this. lead perv records broken. that's acceptable
         std::unique_lock<std::mutex> l(_data_lock); // add new item, may rehash
         _datas[record.query_id].enqueue(record);
     }
@@ -52,12 +56,12 @@ void PipelineTracerContext::end_query(TUniqueId query_id, uint64_t workload_grou
         _id_to_workload_group[query_id] = workload_group;
     }
     if (_dump_type == RecordType::PerQuery) {
-        _dump(query_id);
+        _dump_query(query_id);
     } else if (_dump_type == RecordType::Periodic) {
         auto now = MonotonicSeconds();
         auto interval = now - _last_dump_time;
         if (interval > _dump_interval_s) {
-            _dump(query_id);
+            _dump_timeslice();
         }
     }
 }
@@ -90,75 +94,76 @@ Status PipelineTracerContext::change_record_params(
                                "No qualified param in changing tracing record method");
 }
 
-void PipelineTracerContext::_dump(TUniqueId query_id) {
-    if (_dump_type == RecordType::None) {
-        return;
-    }
-
-    std::filesystem::path log_dir = fmt::format("{}/pipe_tracing", getenv("LOG_DIR"));
+void PipelineTracerContext::_dump_query(TUniqueId query_id) {
     //TODO: when dump, now could append records but can't add new query. try use better grained locks.
     std::unique_lock<std::mutex> l(_data_lock); // can't rehash
-    if (_dump_type == RecordType::PerQuery) {
-        auto path = log_dir / fmt::format("query{}", to_string(query_id));
-        int fd = ::open(
-                path.c_str(), O_CREAT | O_WRONLY | O_TRUNC,
-                S_ISGID | S_ISUID | S_IWUSR | S_IRUSR | S_IWGRP | S_IRGRP | S_IWOTH | S_IROTH);
-        if (fd < 0) [[unlikely]] {
-            throw Exception(Status::Error<ErrorCode::CREATE_FILE_ERROR>(
-                    "create tracing log file {} failed", path.c_str()));
+    auto path = log_dir / fmt::format("query{}", to_string(query_id));
+    int fd = ::open(path.c_str(), O_CREAT | O_WRONLY | O_TRUNC,
+                    S_ISGID | S_ISUID | S_IWUSR | S_IRUSR | S_IWGRP | S_IRGRP | S_IWOTH | S_IROTH);
+    if (fd < 0) [[unlikely]] {
+        throw Exception(Status::Error<ErrorCode::CREATE_FILE_ERROR>(
+                "create tracing log file {} failed", path.c_str()));
+    }
+    auto writer = io::LocalFileWriter {path, fd};
+
+    ScheduleRecord record;
+    while (_datas[query_id].try_dequeue(record)) {
+        uint64_t v = 0;
+        {
+            std::unique_lock<std::mutex> l(_tg_lock);
+            v = _id_to_workload_group.at(query_id);
         }
-        auto writer = io::LocalFileWriter {path, fd};
+        auto tmp_str = record.to_string(v);
+        auto text = Slice {tmp_str};
+        THROW_IF_ERROR(writer.appendv(&text, 1));
+    }
+
+    THROW_IF_ERROR(writer.finalize());
+    THROW_IF_ERROR(writer.close());
+
+    _last_dump_time = MonotonicSeconds();
+
+    _datas.erase(query_id);
+    {
+        std::unique_lock<std::mutex> l(_tg_lock);
+        _id_to_workload_group.erase(query_id);
+    }
+}
+
+void PipelineTracerContext::_dump_timeslice() {
+    std::unique_lock<std::mutex> l(_data_lock); // can't rehash
 
+    //TODO: if long time, per timeslice per file
+    auto path = log_dir /
+                fmt::format("until{}", std::chrono::steady_clock::now().time_since_epoch().count());
+    int fd = ::open(path.c_str(), O_CREAT | O_WRONLY | O_TRUNC,
+                    S_ISGID | S_ISUID | S_IWUSR | S_IRUSR | S_IWGRP | S_IRGRP | S_IWOTH | S_IROTH);
+    if (fd < 0) [[unlikely]] {
+        throw Exception(Status::Error<ErrorCode::CREATE_FILE_ERROR>(
+                "create tracing log file {} failed", path.c_str()));
+    }
+    auto writer = io::LocalFileWriter {path, fd};
+
+    // dump all query traces in this time window to one file.
+    for (auto& [query_id, trace] : _datas) {
         ScheduleRecord record;
-        while (_datas[query_id].try_dequeue(record)) {
+        while (trace.try_dequeue(record)) {
             uint64_t v = 0;
             {
                 std::unique_lock<std::mutex> l(_tg_lock);
-                v = _id_to_workload_group[query_id];
+                v = _id_to_workload_group.at(query_id);
             }
             auto tmp_str = record.to_string(v);
             auto text = Slice {tmp_str};
             THROW_IF_ERROR(writer.appendv(&text, 1));
         }
-
-        THROW_IF_ERROR(writer.finalize());
-        THROW_IF_ERROR(writer.close());
-    } else if (_dump_type == RecordType::Periodic) {
-        auto path =
-                log_dir /
-                fmt::format("until{}", std::chrono::steady_clock::now().time_since_epoch().count());
-        int fd = ::open(
-                path.c_str(), O_CREAT | O_WRONLY | O_TRUNC,
-                S_ISGID | S_ISUID | S_IWUSR | S_IRUSR | S_IWGRP | S_IRGRP | S_IWOTH | S_IROTH);
-        if (fd < 0) [[unlikely]] {
-            throw Exception(Status::Error<ErrorCode::CREATE_FILE_ERROR>(
-                    "create tracing log file {} failed", path.c_str()));
-        }
-        auto writer = io::LocalFileWriter {path, fd};
-
-        for (auto& [id, trace] : _datas) {
-            ScheduleRecord record;
-            while (trace.try_dequeue(record)) {
-                uint64_t v = 0;
-                {
-                    std::unique_lock<std::mutex> l(_tg_lock);
-                    v = _id_to_workload_group[query_id];
-                }
-                auto tmp_str = record.to_string(v);
-                auto text = Slice {tmp_str};
-                THROW_IF_ERROR(writer.appendv(&text, 1));
-            }
-        }
-        THROW_IF_ERROR(writer.finalize());
-        THROW_IF_ERROR(writer.close());
-
-        _last_dump_time = MonotonicSeconds();
     }
+    THROW_IF_ERROR(writer.finalize());
+    THROW_IF_ERROR(writer.close());
 
-    _datas.erase(query_id);
-    {
-        std::unique_lock<std::mutex> l(_tg_lock);
-        _id_to_workload_group.erase(query_id);
-    }
+    _last_dump_time = MonotonicSeconds();
+
+    _datas.clear();
+    _id_to_workload_group.clear();
 }
 } // namespace doris::pipeline
diff --git a/be/src/pipeline/pipeline_tracing.h b/be/src/pipeline/pipeline_tracing.h
index e4e4d4e4e7d2ec..eb0f2ac684aea7 100644
--- a/be/src/pipeline/pipeline_tracing.h
+++ b/be/src/pipeline/pipeline_tracing.h
@@ -18,12 +18,11 @@
 #pragma once
 
 #include <concurrentqueue.h>
-#include <fmt/core.h>
+#include <fmt/format.h>
 #include <gen_cpp/Types_types.h>
 #include <parallel_hashmap/phmap.h>
 
 #include <cstdint>
-#include <filesystem>
 
 #include "common/config.h"
 #include "util/hash_util.hpp" // IWYU pragma: keep
@@ -51,8 +50,7 @@ struct ScheduleRecord {
 // all tracing datas of ONE specific query
 using OneQueryTraces = moodycamel::ConcurrentQueue<ScheduleRecord>;
 
-// belongs to exec_env, for all query, if enable
-// curl http://{host}:{web_server_port}/api/running_pipeline_tasks
+// belongs to exec_env, for all query, if enabled
 class PipelineTracerContext {
 public:
     enum class RecordType {
@@ -68,12 +66,15 @@ class PipelineTracerContext {
     bool enabled() const { return !(_dump_type == RecordType::None); }
 
 private:
-    void _dump(TUniqueId query_id); // dump data to disk. one query or all.
+    // dump data to disk. one query or all.
+    void _dump_query(TUniqueId query_id);
+    void _dump_timeslice();
 
     std::mutex _data_lock; // lock for map, not map items.
     phmap::flat_hash_map<TUniqueId, OneQueryTraces> _datas;
     std::mutex _tg_lock; //TODO: use an lockfree DS
-    phmap::flat_hash_map<TUniqueId, uint64_t> _id_to_workload_group;
+    phmap::flat_hash_map<TUniqueId, uint64_t>
+            _id_to_workload_group; // save query's workload group number
 
     RecordType _dump_type = RecordType::None;
     decltype(MonotonicSeconds()) _last_dump_time;
diff --git a/be/src/pipeline/task_scheduler.cpp b/be/src/pipeline/task_scheduler.cpp
index 0461999d185260..78a728faaede17 100644
--- a/be/src/pipeline/task_scheduler.cpp
+++ b/be/src/pipeline/task_scheduler.cpp
@@ -172,8 +172,7 @@ void TaskScheduler::_do_work(size_t index) {
 
                 uint64_t end_time = MonotonicMicros();
                 auto state = task->get_state();
-                std::string state_name =
-                        state == PipelineTaskState::RUNNABLE ? get_state_name(state) : "";
+                std::string state_name = get_state_name(state);
                 ExecEnv::GetInstance()->pipeline_tracer_context()->record(
                         {query_id, task_name, core_id, thread_id, start_time, end_time,
                          state_name});
diff --git a/tools/pipeline-tracing/README.md b/tools/pipeline-tracing/README.md
new file mode 100644
index 00000000000000..f047d61a452380
--- /dev/null
+++ b/tools/pipeline-tracing/README.md
@@ -0,0 +1,36 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Pipeline Tracing and Display Tool
+
+In the Pipeline execution engine, we split the execution plan tree of each Instance into multiple small Pipeline Tasks and execute them under our custom Pipeline scheduler. Therefore, in an environment with a large number of Pipeline Tasks executing, how these Tasks are scheduled across threads and CPU cores is an important factor for execution performance. We have developed a specialised tool to observe the scheduling process on a particular query or time period, which we call "Pipeline Tracing".
+
+This tool converts record files to proper JSON format for visualization.
+
+## How to Use
+
+```shell
+python3 origin-to-show.py -s <SOURCE_FILE> -d <DEST>.json
+```
+to transfer record file `<SOURCE_FILE>` to `<DEST>.json`. Then it could be visualized.
+
+```shell
+python3 origin-to-show.py --help
+```
+for help details.
\ No newline at end of file
diff --git a/tools/pipeline-tracing/origin-to-show.py b/tools/pipeline-tracing/origin-to-show.py
new file mode 100644
index 00000000000000..cbe68545e36f4e
--- /dev/null
+++ b/tools/pipeline-tracing/origin-to-show.py
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import argparse
+import sys
+from typing import List
+import json
+
+class Record:
+    def __init__(self, query_id, task_name, core_id, thread_id, start_time, end_time, state_name, group_id) -> None:
+        self.query_id : str = query_id
+        self.task_name : str = task_name
+        self.core_id : int = int(core_id)
+        self.thread_id : int = int(thread_id)
+        self.start_time : int = int(start_time)
+        self.end_time : int = int(end_time)
+        self.state_name : str = state_name
+        self.group_id : int = int(group_id)
+
+    def print(self) :
+        print(f"query_id = {self.query_id}, task_name = {self.task_name}, start_time={self.start_time}")
+
+    def get_core(self) :
+        return 1 if same_core else self.core_id
+
+    def to_json(self) :
+        json = {"name": self.task_name, "cat": self.state_name, "ph": "X", "ts": self.start_time, "dur": self.end_time - self.start_time,
+                "pid": self.get_core(), "tid": self.thread_id}
+        return json
+
+def get_key(record : Record) -> int:
+    return record.start_time
+
+def print_header(file):
+    print(r'{"traceEvents":[', file=file)
+
+def print_footer(file):
+    print(r"]}", file=file)
+
+parser = argparse.ArgumentParser(description='Accept file to analyse. Use parameters to sepecific how to illustrate it.')
+parser.add_argument('-s', '--source', help='SOURCE as the path of tracing record file to analyze')
+parser.add_argument('-d', '--dest', help='DEST as the path of json result file you want to save')
+parser.add_argument('-n', '--no-core', action='store_true', help='combine the thread in one core group to display')
+args = parser.parse_args()
+
+records : List[Record] = []
+same_core : bool = args.no_core
+
+### get records
+try:
+    with open(args.source, "r") as file:
+        for line in file :
+            record = Record(*line.strip().split('|'))
+            records.append(record)
+except FileNotFoundError:
+    sys.exit(f"File '{args.source}' doesn't exist. Please check the path.")
+except Exception as e:
+    sys.exit(f"Error occured! {e}")
+
+records.sort(key=get_key)
+
+### print json
+try:
+    with open(args.dest, "w") as file: # overwrite file
+        print_header(file)
+        for record in records:
+            print(json.dumps(record.to_json(), sort_keys=True, indent=4, separators=(',', ':')), end=',\n', file=file)
+        print_footer(file)
+    print(f"Generate json to {args.dest} succeed!")
+except Exception as e:
+    print(f"Error occured! {e}")

From 0d27f607f7855d89cede052792b17492fadd1cb7 Mon Sep 17 00:00:00 2001
From: M1saka <114387113+M1saka2003@users.noreply.github.com>
Date: Sun, 28 Apr 2024 20:18:34 +0800
Subject: [PATCH 107/163] [enhancement](testutil) add new function for function
 test in be-ut (#33741)

---
 be/test/vec/function/function_string_test.cpp | 640 +++++++++++-------
 be/test/vec/function/function_test_util.h     |  44 +-
 2 files changed, 419 insertions(+), 265 deletions(-)

diff --git a/be/test/vec/function/function_string_test.cpp b/be/test/vec/function/function_string_test.cpp
index 6e3c2bba95748a..5606f2cf4f0639 100644
--- a/be/test/vec/function/function_string_test.cpp
+++ b/be/test/vec/function/function_string_test.cpp
@@ -15,10 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <stdint.h>
-
 #include <cstdint>
 #include <cstring>
+#include <limits>
 #include <memory>
 #include <string>
 #include <vector>
@@ -43,7 +42,7 @@ TEST(function_string_test, function_string_substr_test) {
     std::string func_name = "substr";
 
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32, TypeIndex::Int32};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32, TypeIndex::Int32};
 
         DataSet data_set = {
                 {{std::string("asd你好"), 4, 10}, std::string("\xE4\xBD\xA0\xE5\xA5\xBD")}, //你好
@@ -56,13 +55,30 @@ TEST(function_string_test, function_string_substr_test) {
                 {{std::string(""), 0, 4}, std::string("")},
                 {{std::string("123"), 0, 4}, std::string("")},
                 {{std::string("123"), 1, 0}, std::string("")},
-                {{Null(), 5, 4}, Null()}};
+                {{Null(), 5, 4}, Null()},
+                {{std::string("abbcc"), 1, -1}, std::string("")},
+                {{std::string("abc"), -10, 3}, std::string("")},
+                {{Null(), Null(), Null()}, Null()},
+                {{std::string(1e6, 'a'), Null(), Null()}, Null {}},
+                {{std::string(1e6, 'a'), std::numeric_limits<int>::max(), Null()}, Null()},
+                {{std::string("test——test"), Null(), 1}, Null()},
+                {{std::string("test——test"), 1, Null()}, Null()},
+                {{Null(), Null(), 1}, Null()},
+                {{std::string("test——test"), -10, 1}, std::string("t")},
+                {{std::string("test——test"), -20, 1}, std::string("")},
+                {{std::string("test——test"), -10, 10}, std::string("test——test")},
+                //{{std::string("abcdef"), 3, 2}, std::string("cd")},
+                {{Null(), 1, 1}, Null()},
+                {{std::string("test——test"), Null(), 1}, Null()},
+                {{std::string("test——test"), 1, Null()}, Null()},
+
+        };
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     }
 
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32};
 
         DataSet data_set = {
                 {{std::string("asd你好"), 4}, std::string("\xE4\xBD\xA0\xE5\xA5\xBD")}, //你好
@@ -74,116 +90,228 @@ TEST(function_string_test, function_string_substr_test) {
                 {{std::string("12"), 3}, std::string("")},
                 {{std::string(""), 0}, std::string("")},
                 {{std::string("123"), 0}, std::string("")},
-                {{Null(), 5, 4}, Null()}};
+                {{Null(), 5}, Null()},
+                {{std::string("abc"), Null()}, Null()},
+                {{std::string("12345"), 10}, std::string("")},
+                {{std::string("12345"), -10}, std::string("")},
+                {{std::string(""), Null()}, Null()},
+                {{Null(), -100}, Null()},
+                {{std::string("12345"), 12345}, std::string("")},
+        };
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     }
 }
 
 TEST(function_string_test, function_string_strright_test) {
     std::string func_name = "strright";
-    InputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32};
 
-    DataSet data_set = {{{std::string("asd"), 1}, std::string("d")},
-                        {{std::string("hello word"), -2}, std::string("ello word")},
-                        {{std::string("hello word"), 20}, std::string("hello word")},
-                        {{std::string("HELLO,!^%"), 2}, std::string("^%")},
-                        {{std::string(""), 3}, std::string("")},
-                        {{Null(), 3}, Null()}};
+    {
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32};
+
+        DataSet data_set = {{{std::string("asd"), 1}, std::string("d")},
+                            {{std::string("hello word"), -2}, std::string("ello word")},
+                            {{std::string("hello word"), 20}, std::string("hello word")},
+                            {{std::string("HELLO,!^%"), 2}, std::string("^%")},
+                            {{std::string(""), 3}, std::string("")},
+                            {{Null(), 3}, Null()},
+                            {{std::string("12345"), 10}, std::string("12345")},
+                            {{std::string("12345"), -10}, std::string("")},
+                            {{std::string(""), Null()}, Null()},
+                            {{Null(), -100}, Null()},
+                            {{std::string("12345"), 12345}, std::string("12345")},
+                            {{std::string(""), 1}, std::string()},
+                            {{std::string("a b c d _ %"), -3}, std::string("b c d _ %")},
+                            {{std::string(""), Null()}, Null()},
+                            {{std::string("hah hah"), -1}, std::string("hah hah")},
+                            {{std::string("🤣"), -1}, std::string("🤣")},
+                            {{std::string("🤣😃😄"), -2}, std::string("😃😄")},
+                            {{std::string("12345"), 6}, std::string("12345")},
+                            {{std::string("12345"), 12345}, std::string("12345")},
+                            {{std::string("-12345"), -1}, std::string("-12345")},
+                            {{std::string("-12345"), -12345}, std::string()},
+                            {{Null(), -12345}, Null()},
+                            {{std::string("😡"), Null()}, Null()},
+                            {{std::string("🤣"), 0}, std::string()}
 
-    static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        };
+
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
+    }
 }
 
 TEST(function_string_test, function_string_strleft_test) {
     std::string func_name = "strleft";
-    InputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32};
-
-    DataSet data_set = {{{std::string("asd"), 1}, std::string("a")},
-                        {{std::string("hel  lo  "), 5}, std::string("hel  ")},
-                        {{std::string("hello word"), 20}, std::string("hello word")},
-                        {{std::string("HELLO,!^%"), 7}, std::string("HELLO,!")},
-                        {{std::string(""), 2}, std::string("")},
-                        {{std::string(""), -2}, std::string("")},
-                        {{std::string(""), 0}, std::string("")},
-                        {{std::string("123"), 0}, std::string("")},
-                        {{Null(), 3}, Null()}};
-
-    static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+    {
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32};
+
+        DataSet data_set = {
+                {{std::string("asd"), 1}, std::string("a")},
+                {{std::string("hel  lo  "), 5}, std::string("hel  ")},
+                {{std::string("hello word"), 20}, std::string("hello word")},
+                {{std::string("HELLO,!^%"), 7}, std::string("HELLO,!")},
+                {{std::string(""), 2}, std::string("")},
+                {{std::string(""), -2}, std::string("")},
+                {{std::string(""), 0}, std::string("")},
+                {{std::string("123"), 0}, std::string("")},
+                {{Null(), 3}, Null()},
+                {{std::string("12321"), 3}, std::string("123")},
+                {{std::string("123"), 0}, std::string()},
+                {{std::string("123"), -1}, std::string()},
+                {{std::string("123"), Null()}, Null()},
+                {{Null(), 0}, Null()},
+                {{std::string("🫢"), 0}, std::string()},
+                {{std::string("123"), 4}, std::string("123")},
+                {{std::string("哈哈hh🤣"), 1}, std::string("哈")},
+                {{std::string("哈哈hh🤣"), 100}, std::string("哈哈hh🤣")},
+                {{std::string("mnzxv"), -1}, std::string()},
+                {{std::string("123"), Null()}, Null()},
+                {{std::string(1e6, 'a'), Null()}, Null()},
+                {{std::string(""), -100}, std::string()},
+                {{std::string("abcdef"), 4}, std::string("abcd")},
+                {{std::string("NULL"), 3}, std::string("NUL")},
+                {{std::string("NuLl"), 4}, std::string("NuLl")},
+                {{Null(), 123}, Null()},
+        };
+
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
+    }
 }
 
 TEST(function_string_test, function_string_lower_test) {
     std::string func_name = "lower";
-    InputTypeSet input_types = {TypeIndex::String};
-    DataSet data_set = {{{std::string("ASD")}, std::string("asd")},
-                        {{std::string("HELLO123")}, std::string("hello123")},
-                        {{std::string("MYtestSTR")}, std::string("myteststr")},
-                        {{std::string("HELLO,!^%")}, std::string("hello,!^%")},
-                        {{std::string("")}, std::string("")}};
+    {
+        BaseInputTypeSet input_types = {TypeIndex::String};
+        DataSet data_set = {
+                {{std::string("ASD")}, std::string("asd")},
+                {{std::string("HELLO123")}, std::string("hello123")},
+                {{std::string("MYtestSTR")}, std::string("myteststr")},
+                {{std::string("HELLO,!^%")}, std::string("hello,!^%")},
+                {{std::string("")}, std::string("")},
+                {{Null()}, Null()},
+                {{std::string("🤣a aB B11   3_ _!&")}, std::string("🤣a ab b11   3_ _!&")},
+                {{std::string("")}, std::string("")},
+                {{std::string("你好HELLO!")}, std::string("你好hello!")},
+                {{std::string("")}, std::string("")},
+                {{std::string("123ABC_")}, std::string("123abc_")},
+        };
 
-    static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
+        check_function_all_arg_comb<DataTypeString, true>(std::string("lcase"), input_types,
+                                                          data_set);
+    }
 }
 
 TEST(function_string_test, function_string_upper_test) {
     std::string func_name = "upper";
-    InputTypeSet input_types = {TypeIndex::String};
-    DataSet data_set = {{{std::string("asd")}, std::string("ASD")},
-                        {{std::string("hello123")}, std::string("HELLO123")},
-                        {{std::string("HELLO,!^%")}, std::string("HELLO,!^%")},
-                        {{std::string("MYtestStr")}, std::string("MYTESTSTR")},
-                        {{std::string("")}, std::string("")}};
-
-    static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+    {
+        BaseInputTypeSet input_types = {TypeIndex::String};
+        DataSet data_set = {{{std::string("asd")}, std::string("ASD")},
+                            {{std::string("hello123")}, std::string("HELLO123")},
+                            {{std::string("HELLO,!^%")}, std::string("HELLO,!^%")},
+                            {{std::string("MYtestStr")}, std::string("MYTESTSTR")},
+                            {{std::string("")}, std::string("")},
+                            {{Null {}}, Null {}},
+                            {{std::string("123123")}, std::string("123123")},
+                            {{std::string("AaBbCcDd")}, std::string("AABBCCDD")},
+                            {{std::string("你好hello")}, std::string("你好HELLO")}};
+
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
+        check_function_all_arg_comb<DataTypeString, true>(std::string("ucase"), input_types,
+                                                          data_set);
+    }
 }
 
 TEST(function_string_test, function_string_trim_test) {
     std::string func_name = "trim";
-    InputTypeSet input_types = {TypeIndex::String};
-    DataSet data_set = {{{std::string("a sd")}, std::string("a sd")},
-                        {{std::string("  hello 123  ")}, std::string("hello 123")},
-                        {{std::string("  HELLO,!^%")}, std::string("HELLO,!^%")},
-                        {{std::string("MY test Str你好  ")}, std::string("MY test Str你好")},
-                        {{Null()}, Null()},
-                        {{std::string("")}, std::string("")}};
+    {
+        BaseInputTypeSet input_types = {TypeIndex::String};
+        DataSet data_set = {{{std::string("a sd")}, std::string("a sd")},
+                            {{std::string("  hello 123  ")}, std::string("hello 123")},
+                            {{std::string("  HELLO,!^%")}, std::string("HELLO,!^%")},
+                            {{std::string("MY test Str你好  ")}, std::string("MY test Str你好")},
+                            {{Null()}, Null()},
+                            {{std::string("")}, std::string("")},
+                            {{std::string("a sd")}, std::string("a sd")},
+                            {{std::string("  hello 123  ")}, std::string("hello 123")},
+                            {{std::string("  HELLO,!^%")}, std::string("HELLO,!^%")},
+                            {{std::string("MY test Str你好  ")}, std::string("MY test Str你好")},
+                            {{Null()}, Null()},
+                            {{std::string("")}, std::string("")}};
+
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
+    }
 
-    static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+    // todo
+    //  Argument at index 1 for function trim must be constant
+    // {
+    //     BaseInputTypeSet input_types = {TypeIndex::String, Consted {TypeIndex::String}};
+    //     DataSet data_set = {
+    //             {{std::string("ABCABCABCABCABC"), std::string("ABC")}, std::string("")},
+    //             {{std::string("ABCCCABC"), std::string("ABC")}, std::string("CC")},
+    //             {{std::string("NULL"), std::string("L")}, std::string("NU")},
+    //             {{std::string(""), Null()}, Null()},
+    //             {{Null(), std::string("ABC")}, Null()},
+    //             {{std::string("ABCABC"), std::string("")}, std::string("ABCABC")},
+    //             {{std::string(""), std::string("")}, std::string("")},
+    //             {{std::string("- - -AA__BBc"), std::string("- -")}, std::string(" -AA__BBc")},
+    //             {{std::string("--- -++- ---"), std::string("-")}, std::string(" -++-")}};
+
+    //     for (const auto& line : data_set) {
+    //         DataSet tmp_set {line};
+    //         static_cast<void>(
+    //                 check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, tmp_set));
+    //     }
+    // }
 }
 
 TEST(function_string_test, function_string_ltrim_test) {
     std::string func_name = "ltrim";
-    InputTypeSet input_types = {TypeIndex::String};
+    BaseInputTypeSet input_types = {TypeIndex::String};
     DataSet data_set = {
             {{std::string("a sd")}, std::string("a sd")},
             {{std::string("  hello 123  ")}, std::string("hello 123  ")},
             {{std::string("  HELLO,!^%")}, std::string("HELLO,!^%")},
             {{std::string("  你好MY test Str你好  ")}, std::string("你好MY test Str你好  ")},
             {{std::string("")}, std::string("")}};
-    static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+    check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
 }
 
 TEST(function_string_test, function_string_rtrim_test) {
     std::string func_name = "rtrim";
-    InputTypeSet input_types = {TypeIndex::String};
+    BaseInputTypeSet input_types = {TypeIndex::String};
     DataSet data_set = {{{std::string("a sd ")}, std::string("a sd")},
                         {{std::string("hello 123  ")}, std::string("hello 123")},
                         {{std::string("  HELLO,!^%")}, std::string("  HELLO,!^%")},
                         {{std::string("  MY test Str你好  ")}, std::string("  MY test Str你好")},
                         {{std::string("")}, std::string("")}};
 
-    static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+    check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
 }
+
 TEST(function_string_test, function_string_repeat_test) {
     std::string func_name = "repeat";
-    InputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32};
-
-    DataSet data_set = {{{std::string("a"), 3}, std::string("aaa")},
-                        {{std::string("hel lo"), 2}, std::string("hel lohel lo")},
-                        {{std::string("hello word"), -1}, std::string("")},
-                        {{std::string(""), 1}, std::string("")},
-                        {{std::string("HELLO,!^%"), 2}, std::string("HELLO,!^%HELLO,!^%")},
-                        {{std::string("你"), 2}, std::string("你你")}};
-    static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+    {
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32};
+
+        DataSet data_set = {{{std::string("a"), 3}, std::string("aaa")},
+                            {{std::string("hel lo"), 2}, std::string("hel lohel lo")},
+                            {{std::string("hello word"), -1}, std::string("")},
+                            {{std::string(""), 1}, std::string("")},
+                            {{std::string("HELLO,!^%"), 2}, std::string("HELLO,!^%HELLO,!^%")},
+                            {{std::string("你"), 2}, std::string("你你")},
+                            {{Null(), 4}, Null()},
+                            {{std::string(""), Null()}, Null()},
+                            {{Null(), Null()}, Null()},
+                            {{Null(), 0}, Null()},
+                            {{Null(), -1}, Null()},
+                            {{std::string("HELLO"), -100}, std::string("")}};
+
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
+    }
 
     {
+        InputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32};
         DataSet data_set = {{{std::string("a"), 1073741825},
                              std::string("aaaaaaaaaa")}}; // ut repeat max num 10
         Status st = check_function<DataTypeString, true>(func_name, input_types, data_set, true);
@@ -193,57 +321,69 @@ TEST(function_string_test, function_string_repeat_test) {
 
 TEST(function_string_test, function_string_reverse_test) {
     std::string func_name = "reverse";
-    InputTypeSet input_types = {TypeIndex::String};
-    DataSet data_set = {{{std::string("asd ")}, std::string(" dsa")},
-                        {{std::string("  hello 123  ")}, std::string("  321 olleh  ")},
-                        {{std::string("  HELLO,!^%")}, std::string("%^!,OLLEH  ")},
-                        {{std::string("你好啊")}, std::string("啊好你")},
-                        {{std::string("")}, std::string("")}};
-
-    static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+    {
+        BaseInputTypeSet input_types = {TypeIndex::String};
+        DataSet data_set = {{{std::string("asd ")}, std::string(" dsa")},
+                            {{std::string("  hello 123  ")}, std::string("  321 olleh  ")},
+                            {{std::string("  HELLO,!^%")}, std::string("%^!,OLLEH  ")},
+                            {{std::string("你好啊")}, std::string("啊好你")},
+                            {{std::string("")}, std::string("")},
+                            {{std::string("你好你好你")}, std::string("你好你好你")},
+                            {{std::string("kjaj _aksjdb !@!$$** ajs _")},
+                             std::string("_ sja **$$!@! bdjska_ jajk")},
+                            {{Null()}, Null()}};
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
+    }
 }
 
 TEST(function_string_test, function_string_length_test) {
     std::string func_name = "length";
-    InputTypeSet input_types = {TypeIndex::String};
+    BaseInputTypeSet input_types = {TypeIndex::String};
     DataSet data_set = {{{std::string("asd ")}, int32_t(4)},
                         {{std::string("  hello 123  ")}, int32_t(13)},
                         {{std::string("  HELLO,!^%")}, int32_t(11)},
                         {{std::string("你好啊")}, int32_t(9)},
-                        {{std::string("")}, int32_t(0)}};
+                        {{std::string("")}, int32_t(0)},
+                        {{Null()}, Null()}};
 
-    static_cast<void>(check_function<DataTypeInt32, true>(func_name, input_types, data_set));
+    check_function_all_arg_comb<DataTypeInt32, true>(func_name, input_types, data_set);
 }
 
 TEST(function_string_test, function_string_quote_test) {
     std::string func_name = "quote";
-    InputTypeSet input_types = {TypeIndex::String};
+    BaseInputTypeSet input_types = {TypeIndex::String};
     DataSet data_set = {{{std::string("hello")}, std::string(R"('hello')")},
                         {{std::string("hello\t\n\nworld")}, std::string("'hello\t\n\nworld'")},
                         {{std::string("HELLO,!^%")}, std::string("'HELLO,!^%'")},
                         {{std::string("MYtestStr\\t\\n")}, std::string("'MYtestStr\\t\\n'")},
                         {{std::string("")}, std::string("''")},
                         {{Null()}, Null()}};
-    static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+    check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
 }
 
 TEST(function_string_test, function_append_trailing_char_if_absent_test) {
     std::string func_name = "append_trailing_char_if_absent";
 
-    InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+    BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
 
     DataSet data_set = {{{std::string("ASD"), std::string("D")}, std::string("ASD")},
                         {{std::string("AS"), std::string("D")}, std::string("ASD")},
                         {{std::string(""), std::string("")}, Null()},
-                        {{std::string(""), std::string("A")}, std::string("A")}};
+                        {{std::string(""), std::string("A")}, std::string("A")},
+                        {{std::string("AC"), std::string("BACBAC")}, Null()},
+                        {{Null(), Null()}, Null()},
+                        {{std::string("ABC"), Null()}, Null()},
+                        {{Null(), std::string("ABC")}, Null()},
+                        {{std::string(""), Null()}, Null()},
+                        {{Null(), std::string("")}, Null()}};
 
-    static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+    check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
 }
 
 TEST(function_string_test, function_starts_with_test) {
     std::string func_name = "starts_with";
 
-    InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+    BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
 
     DataSet data_set = {{{std::string("hello world"), std::string("hello")}, uint8_t(1)},
                         {{std::string("hello world"), std::string("world")}, uint8_t(0)},
@@ -252,28 +392,29 @@ TEST(function_string_test, function_starts_with_test) {
                         {{std::string("你好"), Null()}, Null()},
                         {{Null(), std::string("")}, Null()}};
 
-    static_cast<void>(check_function<DataTypeUInt8, true>(func_name, input_types, data_set));
+    check_function_all_arg_comb<DataTypeUInt8, true>(func_name, input_types, data_set);
 }
 
 TEST(function_string_test, function_ends_with_test) {
     std::string func_name = "ends_with";
 
-    InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+    BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
 
     DataSet data_set = {{{std::string("hello world"), std::string("hello")}, uint8_t(0)},
                         {{std::string("hello world"), std::string("world")}, uint8_t(1)},
                         {{std::string("你好"), std::string("好")}, uint8_t(1)},
                         {{std::string(""), std::string("")}, uint8_t(1)},
                         {{std::string("你好"), Null()}, Null()},
-                        {{Null(), std::string("")}, Null()}};
+                        {{Null(), std::string("")}, Null()},
+                        {{Null(), Null()}, Null()}};
 
-    static_cast<void>(check_function<DataTypeUInt8, true>(func_name, input_types, data_set));
+    check_function_all_arg_comb<DataTypeUInt8, true>(func_name, input_types, data_set);
 }
 
 TEST(function_string_test, function_lpad_test) {
     std::string func_name = "lpad";
 
-    InputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32, TypeIndex::String};
+    BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32, TypeIndex::String};
 
     DataSet data_set = {{{std::string("hi"), 5, std::string("?")}, std::string("???hi")},
                         {{std::string("g8%7IgY%AHx7luNtf8Kh"), 20, std::string("")},
@@ -286,15 +427,18 @@ TEST(function_string_test, function_lpad_test) {
                         {{std::string("hi"), 5, std::string("")}, Null()},
                         {{std::string("hi"), 5, std::string("ab")}, std::string("abahi")},
                         {{std::string("hi"), 5, std::string("呵呵")}, std::string("呵呵呵hi")},
-                        {{std::string("呵呵"), 5, std::string("hi")}, std::string("hih呵呵")}};
+                        {{std::string("呵呵"), 5, std::string("hi")}, std::string("hih呵呵")},
+                        {{std::string(""), 5, std::string("")}, Null()},
+                        {{std::string("AA"), Null(), std::string("BB")}, Null()},
+                        {{Null(), 100, Null()}, Null()}};
 
-    static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+    check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
 }
 
 TEST(function_string_test, function_rpad_test) {
     std::string func_name = "rpad";
 
-    InputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32, TypeIndex::String};
+    BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32, TypeIndex::String};
 
     DataSet data_set = {{{std::string("hi"), 5, std::string("?")}, std::string("hi???")},
                         {{std::string("g8%7IgY%AHx7luNtf8Kh"), 20, std::string("")},
@@ -307,61 +451,65 @@ TEST(function_string_test, function_rpad_test) {
                         {{std::string("hi"), 5, std::string("")}, Null()},
                         {{std::string("hi"), 5, std::string("ab")}, std::string("hiaba")},
                         {{std::string("hi"), 5, std::string("呵呵")}, std::string("hi呵呵呵")},
-                        {{std::string("呵呵"), 5, std::string("hi")}, std::string("呵呵hih")}};
+                        {{std::string("呵呵"), 5, std::string("hi")}, std::string("呵呵hih")},
+                        {{std::string("1"), 5, std::string("")}, Null()},
+                        {{Null(), 1, Null()}, Null()},
+                        {{Null(), -1, Null()}, Null()},
+                        {{std::string(""), 0, Null()}, Null()}};
 
-    static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+    check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
 }
 
 TEST(function_string_test, function_ascii_test) {
     std::string func_name = "ascii";
 
-    InputTypeSet input_types = {TypeIndex::String};
+    BaseInputTypeSet input_types = {TypeIndex::String};
 
     DataSet data_set = {{{std::string("")}, 0},
                         {{std::string("aa")}, 97},
                         {{std::string("我")}, 230},
                         {{Null()}, Null()}};
 
-    static_cast<void>(check_function<DataTypeInt32, true>(func_name, input_types, data_set));
+    check_function_all_arg_comb<DataTypeInt32, true>(func_name, input_types, data_set);
 }
 
 TEST(function_string_test, function_char_length_test) {
     std::string func_name = "char_length";
 
-    InputTypeSet input_types = {TypeIndex::String};
+    BaseInputTypeSet input_types = {TypeIndex::String};
 
     DataSet data_set = {{{std::string("")}, 0},    {{std::string("aa")}, 2},
                         {{std::string("我")}, 1},  {{std::string("我a")}, 2},
                         {{std::string("a我")}, 2}, {{std::string("123")}, 3},
-                        {{Null()}, Null()}};
+                        {{Null()}, Null()},        {{std::string("哈哈你好!")}, 5}};
 
-    static_cast<void>(check_function<DataTypeInt32, true>(func_name, input_types, data_set));
+    check_function_all_arg_comb<DataTypeInt32, true>(func_name, input_types, data_set);
 }
 
 TEST(function_string_test, function_concat_test) {
     std::string func_name = "concat";
     {
-        InputTypeSet input_types = {TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String};
 
         DataSet data_set = {{{std::string("")}, std::string("")},
                             {{std::string("123")}, std::string("123")},
                             {{Null()}, Null()}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     };
 
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
 
         DataSet data_set = {{{std::string(""), std::string("")}, std::string("")},
                             {{std::string("123"), std::string("45")}, std::string("12345")},
                             {{std::string("123"), Null()}, Null()}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     };
 
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
 
         DataSet data_set = {
                 {{std::string(""), std::string("1"), std::string("")}, std::string("1")},
@@ -369,7 +517,7 @@ TEST(function_string_test, function_concat_test) {
                  std::string("123456789")},
                 {{std::string("123"), Null(), std::string("789")}, Null()}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     };
 }
 
@@ -377,50 +525,29 @@ TEST(function_string_test, function_elt_test) {
     std::string func_name = "elt";
 
     {
-        InputTypeSet input_types = {TypeIndex::Int32, TypeIndex::String, TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::Int32, TypeIndex::String, TypeIndex::String};
 
         DataSet data_set = {{{1, std::string("hello"), std::string("world")}, std::string("hello")},
                             {{1, std::string("你好"), std::string("百度")}, std::string("你好")},
-                            {{1, std::string("hello"), std::string("")}, std::string("hello")}};
-
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
-    };
-
-    {
-        InputTypeSet input_types = {TypeIndex::Int32, TypeIndex::String, TypeIndex::String};
-
-        DataSet data_set = {{{2, std::string("hello"), std::string("world")}, std::string("world")},
+                            {{1, std::string("hello"), std::string("")}, std::string("hello")},
+                            {{2, std::string("hello"), std::string("world")}, std::string("world")},
                             {{2, std::string("你好"), std::string("百度")}, std::string("百度")},
-                            {{2, std::string("hello"), std::string("")}, std::string("")}};
-
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
-    };
-
-    {
-        InputTypeSet input_types = {TypeIndex::Int32, TypeIndex::String, TypeIndex::String};
-
-        DataSet data_set = {{{0, std::string("hello"), std::string("world")}, Null()},
+                            {{2, std::string("hello"), std::string("")}, std::string("")},
+                            {{0, std::string("hello"), std::string("world")}, Null()},
                             {{0, std::string("你好"), std::string("百度")}, Null()},
-                            {{0, std::string("hello"), std::string("")}, Null()}};
-
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
-    };
-
-    {
-        InputTypeSet input_types = {TypeIndex::Int32, TypeIndex::String, TypeIndex::String};
-
-        DataSet data_set = {{{3, std::string("hello"), std::string("world")}, Null()},
+                            {{0, std::string("hello"), std::string("")}, Null()},
+                            {{3, std::string("hello"), std::string("world")}, Null()},
                             {{3, std::string("你好"), std::string("百度")}, Null()},
                             {{3, std::string("hello"), std::string("")}, Null()}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     };
 }
 
 TEST(function_string_test, function_concat_ws_test) {
     std::string func_name = "concat_ws";
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
 
         DataSet data_set = {{{std::string("-"), std::string("")}, std::string("")},
                             {{std::string(""), std::string("123")}, std::string("123")},
@@ -428,11 +555,11 @@ TEST(function_string_test, function_concat_ws_test) {
                             {{Null(), std::string("")}, Null()},
                             {{Null(), Null()}, Null()}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     };
 
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
 
         DataSet data_set = {
                 {{std::string("-"), std::string(""), std::string("")}, std::string("-")},
@@ -441,12 +568,12 @@ TEST(function_string_test, function_concat_ws_test) {
                 {{Null(), std::string(""), std::string("")}, Null()},
                 {{Null(), std::string(""), Null()}, Null()}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     };
 
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String,
-                                    TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String,
+                                        TypeIndex::String};
 
         DataSet data_set = {
                 {{std::string("-"), std::string(""), std::string(""), std::string("")},
@@ -459,11 +586,11 @@ TEST(function_string_test, function_concat_ws_test) {
                 {{std::string("-"), std::string("123"), Null(), std::string("456")},
                  std::string("123-456")}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     };
 
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::Array, TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::Array, TypeIndex::String};
 
         Array vec1 = {Field("", 0), Field("", 0), Field("", 0)};
         Array vec2 = {Field("123", 3), Field("456", 3), Field("789", 3)};
@@ -476,40 +603,43 @@ TEST(function_string_test, function_concat_ws_test) {
                             {{Null(), vec4}, Null()},
                             {{std::string("-"), vec5}, std::string("abc-def-ghi")}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     };
 }
 
 TEST(function_string_test, function_null_or_empty_test) {
     std::string func_name = "null_or_empty";
 
-    InputTypeSet input_types = {TypeIndex::String};
+    BaseInputTypeSet input_types = {TypeIndex::String};
 
     DataSet data_set = {{{std::string("")}, uint8(true)},
                         {{std::string("aa")}, uint8(false)},
                         {{std::string("我")}, uint8(false)},
                         {{Null()}, uint8(true)}};
 
-    static_cast<void>(check_function<DataTypeUInt8, false>(func_name, input_types, data_set));
+    check_function_all_arg_comb<DataTypeUInt8, false>(func_name, input_types, data_set);
 }
 
 TEST(function_string_test, function_to_base64_test) {
     std::string func_name = "to_base64";
-    InputTypeSet input_types = {TypeIndex::String};
+    BaseInputTypeSet input_types = {TypeIndex::String};
 
     DataSet data_set = {{{std::string("asd你好")}, {std::string("YXNk5L2g5aW9")}},
                         {{std::string("hello world")}, {std::string("aGVsbG8gd29ybGQ=")}},
                         {{std::string("HELLO,!^%")}, {std::string("SEVMTE8sIV4l")}},
                         {{std::string("")}, {std::string("")}},
                         {{std::string("MYtestSTR")}, {std::string("TVl0ZXN0U1RS")}},
-                        {{std::string("ò&ø")}, {std::string("w7Imw7g=")}}};
+                        {{std::string("ò&ø")}, {std::string("w7Imw7g=")}},
+                        {{std::string("啊哈哈哈😄 。——!")},
+                         std::string("5ZWK5ZOI5ZOI5ZOI8J+YhCDjgILigJTigJQh")},
+                        {{Null()}, Null()}};
 
-    static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+    check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
 }
 
 TEST(function_string_test, function_from_base64_test) {
     std::string func_name = "from_base64";
-    InputTypeSet input_types = {TypeIndex::String};
+    BaseInputTypeSet input_types = {TypeIndex::String};
 
     DataSet data_set = {{{std::string("YXNk5L2g5aW9")}, {std::string("asd你好")}},
                         {{std::string("aGVsbG8gd29ybGQ=")}, {std::string("hello world")}},
@@ -518,14 +648,18 @@ TEST(function_string_test, function_from_base64_test) {
                         {{std::string("TVl0ZXN0U1RS")}, {std::string("MYtestSTR")}},
                         {{std::string("w7Imw7g=")}, {std::string("ò&ø")}},
                         {{std::string("ò&ø")}, {Null()}},
-                        {{std::string("你好哈喽")}, {Null()}}};
+                        {{std::string("你好哈喽")}, {Null()}},
+                        {{Null()}, Null()},
+                        {{std::string("😡")}, Null()},
+                        {{std::string("5ZWK5ZOI5ZOI5ZOI8J+YhCDjgILigJTigJQh")},
+                         std::string("啊哈哈哈😄 。——!")}};
 
-    static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+    check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
 }
 
 TEST(function_string_test, function_reverse_test) {
     std::string func_name = "reverse";
-    InputTypeSet input_types = {TypeIndex::String};
+    BaseInputTypeSet input_types = {TypeIndex::String};
     DataSet data_set = {
             {{std::string("")}, {std::string("")}},
             {{std::string("a")}, {std::string("a")}},
@@ -535,13 +669,13 @@ TEST(function_string_test, function_reverse_test) {
             {{std::string("A攀c")}, {std::string("c攀A")}},
             {{std::string("NULL")}, {std::string("LLUN")}}};
 
-    static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+    check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
 }
 
 TEST(function_string_test, function_instr_test) {
     std::string func_name = "instr";
 
-    InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+    BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
 
     DataSet data_set = {
             {{STRING("abcdefg"), STRING("efg")}, INT(5)}, {{STRING("aa"), STRING("a")}, INT(1)},
@@ -549,14 +683,14 @@ TEST(function_string_test, function_instr_test) {
             {{STRING("abcdef"), STRING("")}, INT(1)},     {{STRING(""), STRING("")}, INT(1)},
             {{STRING("aaaab"), STRING("bb")}, INT(0)}};
 
-    static_cast<void>(check_function<DataTypeInt32, true>(func_name, input_types, data_set));
+    check_function_all_arg_comb<DataTypeInt32, true>(func_name, input_types, data_set);
 }
 
 TEST(function_string_test, function_locate_test) {
     std::string func_name = "locate";
 
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
 
         DataSet data_set = {{{STRING("efg"), STRING("abcdefg")}, INT(5)},
                             {{STRING("a"), STRING("aa")}, INT(1)},
@@ -566,11 +700,11 @@ TEST(function_string_test, function_locate_test) {
                             {{STRING(""), STRING("")}, INT(1)},
                             {{STRING("bb"), STRING("aaaab")}, INT(0)}};
 
-        static_cast<void>(check_function<DataTypeInt32, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeInt32, true>(func_name, input_types, data_set);
     }
 
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::Int32};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::Int32};
 
         DataSet data_set = {{{STRING("bar"), STRING("foobarbar"), INT(5)}, INT(7)},
                             {{STRING("xbar"), STRING("foobar"), INT(1)}, INT(0)},
@@ -580,14 +714,15 @@ TEST(function_string_test, function_locate_test) {
                             {{STRING("A"), STRING("大A写的A"), INT(2)}, INT(2)},
                             {{STRING("A"), STRING("大A写的A"), INT(3)}, INT(5)}};
 
-        static_cast<void>(check_function<DataTypeInt32, true>(func_name, input_types, data_set));
+        static_cast<void>(
+                check_function_all_arg_comb<DataTypeInt32, true>(func_name, input_types, data_set));
     }
 }
 
 TEST(function_string_test, function_find_in_set_test) {
     std::string func_name = "find_in_set";
 
-    InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+    BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
 
     DataSet data_set = {{{std::string("abcdefg"), std::string("a,b,c")}, 0},
                         {{std::string("aa"), std::string("a,aa,aaa")}, 2},
@@ -597,14 +732,15 @@ TEST(function_string_test, function_find_in_set_test) {
                         {{std::string("a"), std::string("")}, 0},
                         {{std::string(""), std::string(",,")}, 1}};
 
-    static_cast<void>(check_function<DataTypeInt32, true>(func_name, input_types, data_set));
+    static_cast<void>(
+            check_function_all_arg_comb<DataTypeInt32, true>(func_name, input_types, data_set));
 }
 
 TEST(function_string_test, function_md5sum_test) {
     std::string func_name = "md5sum";
 
     {
-        InputTypeSet input_types = {TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String};
         DataSet data_set = {
                 {{std::string("asd你好")}, {std::string("a38c15675555017e6b8ea042f2eb24f5")}},
                 {{std::string("hello world")}, {std::string("5eb63bbbe01eeed093cb22bb8f5acdc3")}},
@@ -615,11 +751,11 @@ TEST(function_string_test, function_md5sum_test) {
                 {{std::string("MYtestSTR")}, {std::string("cd24c90b3fc1192eb1879093029e87d4")}},
                 {{std::string("ò&ø")}, {std::string("fd157b4cb921fa91acc667380184d59c")}}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     }
 
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
         DataSet data_set = {{{std::string("asd"), std::string("你好")},
                              {std::string("a38c15675555017e6b8ea042f2eb24f5")}},
                             {{std::string("hello "), std::string("world")},
@@ -628,11 +764,11 @@ TEST(function_string_test, function_md5sum_test) {
                              {std::string("b8e6e34d1cc3dc76b784ddfdfb7df800")}},
                             {{Null(), std::string("HELLO")}, {Null()}}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     }
 
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
         DataSet data_set = {{{std::string("a"), std::string("sd"), std::string("你好")},
                              {std::string("a38c15675555017e6b8ea042f2eb24f5")}},
                             {{std::string(""), std::string(""), std::string("")},
@@ -641,7 +777,7 @@ TEST(function_string_test, function_md5sum_test) {
                              {std::string("b8e6e34d1cc3dc76b784ddfdfb7df800")}},
                             {{Null(), std::string("HELLO"), Null()}, {Null()}}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     }
 }
 
@@ -649,7 +785,7 @@ TEST(function_string_test, function_sm3sum_test) {
     std::string func_name = "sm3sum";
 
     {
-        InputTypeSet input_types = {TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String};
         DataSet data_set = {
                 {{std::string("asd你好")},
                  {std::string("0d6b9dfa8fe5708eb0dccfbaff4f2964abaaa976cc4445a7ecace49c0ceb31d3")}},
@@ -668,11 +804,11 @@ TEST(function_string_test, function_sm3sum_test) {
                  {std::string(
                          "aa47ac31c85aa819d4cc80c932e7900fa26a3073a67aa7eb011bc2ba4924a066")}}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     }
 
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
         DataSet data_set = {
                 {{std::string("asd"), std::string("你好")},
                  {std::string("0d6b9dfa8fe5708eb0dccfbaff4f2964abaaa976cc4445a7ecace49c0ceb31d3")}},
@@ -682,11 +818,11 @@ TEST(function_string_test, function_sm3sum_test) {
                  {std::string("1f5866e786ebac9ffed0dbd8f2586e3e99d1d05f7efe7c5915478b57b7423570")}},
                 {{Null(), std::string("HELLO")}, {Null()}}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     }
 
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
         DataSet data_set = {
                 {{std::string("a"), std::string("sd"), std::string("你好")},
                  {std::string("0d6b9dfa8fe5708eb0dccfbaff4f2964abaaa976cc4445a7ecace49c0ceb31d3")}},
@@ -696,14 +832,14 @@ TEST(function_string_test, function_sm3sum_test) {
                  {std::string("5fc6e38f40b31a659a59e1daba9b68263615f20c02037b419d9deb3509e6b5c6")}},
                 {{Null(), std::string("HELLO"), Null()}, {Null()}}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     }
 }
 
 TEST(function_string_test, function_aes_encrypt_test) {
     std::string func_name = "aes_encrypt";
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
 
         const char* mode = "AES_128_ECB";
         const char* key = "doris";
@@ -728,11 +864,11 @@ TEST(function_string_test, function_aes_encrypt_test) {
                             {{std::string(src[5]), std::string(key), std::string(mode)}, Null()},
                             {{Null(), std::string(key), std::string(mode)}, Null()}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     }
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String,
-                                    TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String,
+                                        TypeIndex::String};
         const char* iv = "0123456789abcdef";
         const char* mode = "AES_256_ECB";
         const char* key = "vectorized";
@@ -764,14 +900,14 @@ TEST(function_string_test, function_aes_encrypt_test) {
                  Null()},
                 {{Null(), std::string(key), std::string(iv), std::string(mode)}, Null()}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     }
 }
 
 TEST(function_string_test, function_aes_decrypt_test) {
     std::string func_name = "aes_decrypt";
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
 
         const char* mode = "AES_128_ECB";
         const char* key = "doris";
@@ -795,11 +931,11 @@ TEST(function_string_test, function_aes_decrypt_test) {
                             {{r[4], std::string(key), std::string(mode)}, std::string(src[4])},
                             {{Null(), std::string(key), std::string(mode)}, Null()}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     }
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String,
-                                    TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String,
+                                        TypeIndex::String};
         const char* key = "vectorized";
         const char* iv = "0123456789abcdef";
         const char* mode = "AES_128_OFB";
@@ -828,15 +964,15 @@ TEST(function_string_test, function_aes_decrypt_test) {
                 {{r[4], std::string(key), std::string(iv), std::string(mode)}, std::string(src[4])},
                 {{Null(), std::string(key), std::string(iv), std::string(mode)}, Null()}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     }
 }
 
 TEST(function_string_test, function_sm4_encrypt_test) {
     std::string func_name = "sm4_encrypt";
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String,
-                                    TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String,
+                                        TypeIndex::String};
 
         const char* key = "doris";
         const char* iv = "0123456789abcdef";
@@ -869,12 +1005,12 @@ TEST(function_string_test, function_sm4_encrypt_test) {
                  Null()},
                 {{Null(), std::string(key), std::string(iv), std::string(mode)}, Null()}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     }
 
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String,
-                                    TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String,
+                                        TypeIndex::String};
 
         const char* key = "vectorized";
         const char* iv = "0123456789abcdef";
@@ -907,15 +1043,15 @@ TEST(function_string_test, function_sm4_encrypt_test) {
                  Null()},
                 {{Null(), std::string(key), std::string(iv), std::string(mode)}, Null()}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     }
 }
 
 TEST(function_string_test, function_sm4_decrypt_test) {
     std::string func_name = "sm4_decrypt";
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String,
-                                    TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String,
+                                        TypeIndex::String};
 
         const char* key = "doris";
         const char* iv = "0123456789abcdef";
@@ -946,12 +1082,12 @@ TEST(function_string_test, function_sm4_decrypt_test) {
                 {{r[4], std::string(key), std::string(iv), std::string(mode)}, std::string(src[4])},
                 {{Null(), std::string(key), std::string(iv), std::string(mode)}, Null()}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     }
 
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String,
-                                    TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String,
+                                        TypeIndex::String};
 
         const char* key = "vectorized";
         const char* iv = "0123456789abcdef";
@@ -982,13 +1118,13 @@ TEST(function_string_test, function_sm4_decrypt_test) {
                 {{r[4], std::string(key), std::string(iv), std::string(mode)}, std::string(src[4])},
                 {{Null(), Null(), std::string(iv), std::string(mode)}, Null()}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     }
 }
 
 TEST(function_string_test, function_extract_url_parameter_test) {
     std::string func_name = "extract_url_parameter";
-    InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+    BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
     DataSet data_set = {
             {{VARCHAR(""), VARCHAR("k1")}, {VARCHAR("")}},
             {{VARCHAR("http://doris.apache.org?k1=aa"), VARCHAR("")}, {VARCHAR("")}},
@@ -1008,14 +1144,14 @@ TEST(function_string_test, function_extract_url_parameter_test) {
             {{VARCHAR("http://doris.apache.org?k1=aa&k2=bb&test=dd#999/"), VARCHAR("test")},
              {VARCHAR("dd")}}};
 
-    static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+    check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
 }
 
 TEST(function_string_test, function_parse_url_test) {
     std::string func_name = "parse_url";
 
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
         DataSet data_set = {
                 {{std::string("zhangsan"), std::string("HOST")}, {Null()}},
                 {{std::string("facebook.com/path/p1"), std::string("HOST")}, {Null()}},
@@ -1051,13 +1187,17 @@ TEST(function_string_test, function_parse_url_test) {
                  {std::string("9090")}},
                 {{std::string("http://www.baidu.com/a/b/c?a=b"), std::string("PORT")}, {Null()}},
                 {{std::string("http://fb.com/path/p1.p?q=1#f"), std::string("QUERY")},
-                 {std::string("q=1")}}};
+                 {std::string("q=1")}},
+                {{std::string(
+                          "https://www.facebook.com/aa/bb?returnpage=https://www.facebook.com/"),
+                  std::string("HosT")},
+                 std::string("www.facebook.com")}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     }
 
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
         DataSet data_set = {
                 {{std::string("http://fb.com/path/p1.p?q=1#f"), std::string("QUERY"),
                   std::string("q")},
@@ -1071,13 +1211,13 @@ TEST(function_string_test, function_parse_url_test) {
                   std::string("q")},
                  {Null()}}};
 
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     }
 }
 
 TEST(function_string_test, function_hex_test) {
     std::string func_name = "hex";
-    InputTypeSet input_types = {TypeIndex::String};
+    BaseInputTypeSet input_types = {TypeIndex::String};
     DataSet data_set = {{{Null()}, Null()},
                         {{std::string("0")}, std::string("30")},
                         {{std::string("1")}, std::string("31")},
@@ -1088,12 +1228,12 @@ TEST(function_string_test, function_hex_test) {
                         {{std::string("我")}, std::string("E68891")},
                         {{std::string("?")}, std::string("3F")},
                         {{std::string("？")}, std::string("EFBC9F")}};
-    static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+    check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
 }
 
 TEST(function_string_test, function_unhex_test) {
     std::string func_name = "unhex";
-    InputTypeSet input_types = {TypeIndex::String};
+    BaseInputTypeSet input_types = {TypeIndex::String};
     DataSet data_set = {{{Null()}, {Null()}},
                         {{std::string("@!#")}, std::string("")},
                         {{std::string("")}, std::string("")},
@@ -1103,45 +1243,46 @@ TEST(function_string_test, function_unhex_test) {
                         {{std::string("41")}, std::string("A")},
                         {{std::string("313233")}, std::string("123")},
                         {{std::string("EFBC9F")}, std::string("？")}};
-    static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+    check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
 }
 
 TEST(function_string_test, function_coalesce_test) {
     std::string func_name = "coalesce";
     {
-        InputTypeSet input_types = {TypeIndex::Int32, TypeIndex::Int32, TypeIndex::Int32};
+        BaseInputTypeSet input_types = {TypeIndex::Int32, TypeIndex::Int32, TypeIndex::Int32};
         DataSet data_set = {{{Null(), Null(), (int32_t)1}, {(int32_t)1}},
                             {{Null(), Null(), (int32_t)2}, {(int32_t)2}},
                             {{Null(), Null(), (int32_t)3}, {(int32_t)3}},
                             {{Null(), Null(), (int32_t)4}, {(int32_t)4}}};
-        static_cast<void>(check_function<DataTypeInt32, true>(func_name, input_types, data_set));
+        static_cast<void>(
+                check_function_all_arg_comb<DataTypeInt32, true>(func_name, input_types, data_set));
     }
 
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::Int32};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::Int32};
         DataSet data_set = {
                 {{std::string("qwer"), Null(), (int32_t)1}, {std::string("qwer")}},
                 {{std::string("asdf"), Null(), (int32_t)2}, {std::string("asdf")}},
                 {{std::string("zxcv"), Null(), (int32_t)3}, {std::string("zxcv")}},
                 {{std::string("vbnm"), Null(), (int32_t)4}, {std::string("vbnm")}},
         };
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     }
 
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
         DataSet data_set = {
                 {{Null(), std::string("abc"), std::string("hij")}, {std::string("abc")}},
                 {{Null(), std::string("def"), std::string("klm")}, {std::string("def")}},
                 {{Null(), std::string(""), std::string("xyz")}, {std::string("")}},
                 {{Null(), Null(), std::string("uvw")}, {std::string("uvw")}}};
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     }
 }
 
 TEST(function_string_test, function_replace) {
     std::string func_name = "replace";
-    InputTypeSet input_types = {
+    BaseInputTypeSet input_types = {
             TypeIndex::String,
             TypeIndex::String,
             TypeIndex::String,
@@ -1152,12 +1293,12 @@ TEST(function_string_test, function_replace) {
                         {{VARCHAR("aaaaa"), VARCHAR("a"), VARCHAR("")}, {VARCHAR("")}},
                         {{VARCHAR("aaaaa"), VARCHAR("aa"), VARCHAR("")}, {VARCHAR("a")}},
                         {{VARCHAR("aaaaa"), VARCHAR("aa"), VARCHAR("a")}, {VARCHAR("aaa")}}};
-    static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+    check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
 }
 
 TEST(function_string_test, function_bit_length_test) {
     std::string func_name = "bit_length";
-    InputTypeSet input_types = {TypeIndex::String};
+    BaseInputTypeSet input_types = {TypeIndex::String};
     DataSet data_set = {{{Null()}, {Null()}},
                         {{std::string("@!#")}, 24},
                         {{std::string("")}, 0},
@@ -1167,13 +1308,14 @@ TEST(function_string_test, function_bit_length_test) {
                         {{std::string("hello你好")}, 88},
                         {{std::string("313233")}, 48},
                         {{std::string("EFBC9F")}, 48}};
-    static_cast<void>(check_function<DataTypeInt32, true>(func_name, input_types, data_set));
+    static_cast<void>(
+            check_function_all_arg_comb<DataTypeInt32, true>(func_name, input_types, data_set));
 }
 
 TEST(function_string_test, function_uuid_test) {
     {
         std::string func_name = "uuid_to_int";
-        InputTypeSet input_types = {TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String};
         uint64_t high = 9572195551486940809ULL;
         uint64_t low = 1759290071393952876ULL;
         __int128 result = (__int128)high * (__int128)10000000000000000000ULL + (__int128)low;
@@ -1183,11 +1325,12 @@ TEST(function_string_test, function_uuid_test) {
                             {{std::string("ffffffff-ffff-ffff-ffff-ffffffffffff")}, (__int128)-1},
                             {{std::string("00000000-0000-0000-0000-000000000000")}, (__int128)0},
                             {{std::string("123")}, Null()}};
-        static_cast<void>(check_function<DataTypeInt128, true>(func_name, input_types, data_set));
+        static_cast<void>(check_function_all_arg_comb<DataTypeInt128, true>(func_name, input_types,
+                                                                            data_set));
     }
     {
         std::string func_name = "int_to_uuid";
-        InputTypeSet input_types = {TypeIndex::Int128};
+        BaseInputTypeSet input_types = {TypeIndex::Int128};
         uint64_t high = 9572195551486940809ULL;
         uint64_t low = 1759290071393952876ULL;
         __int128 value = (__int128)high * (__int128)10000000000000000000ULL + (__int128)low;
@@ -1195,14 +1338,14 @@ TEST(function_string_test, function_uuid_test) {
                             {{value}, std::string("6ce4766f-6783-4b30-b357-bba1c7600348")},
                             {{(__int128)-1}, std::string("ffffffff-ffff-ffff-ffff-ffffffffffff")},
                             {{(__int128)0}, std::string("00000000-0000-0000-0000-000000000000")}};
-        static_cast<void>(check_function<DataTypeString, true>(func_name, input_types, data_set));
+        check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
     }
 }
 
 TEST(function_string_test, function_strcmp_test) {
     std::string func_name = "strcmp";
     {
-        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+        BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
 
         DataSet data_set = {{{Null(), Null()}, Null()},
                             {{std::string(""), std::string("")}, (int8_t)0},
@@ -1217,52 +1360,25 @@ TEST(function_string_test, function_strcmp_test) {
                             {{VARCHAR("test"), VARCHAR("test1")}, (int8_t)-1},
                             {{Null(), VARCHAR("test")}, Null()},
                             {{VARCHAR("test"), Null()}, Null()}};
-        static_cast<void>(check_function<DataTypeInt8, true>(func_name, input_types, data_set));
+        static_cast<void>(
+                check_function_all_arg_comb<DataTypeInt8, true>(func_name, input_types, data_set));
     }
-    {
-        InputTypeSet input_types = {Consted {TypeIndex::String}, TypeIndex::String};
-        DataSet data_set = {{{Null(), Null()}, Null()},
-                            {{std::string(""), std::string("")}, (int8_t)0},
-                            {{std::string("test"), std::string("test")}, (int8_t)0},
-                            {{std::string("test1"), std::string("test")}, (int8_t)1},
-                            {{std::string("test"), std::string("test1")}, (int8_t)-1},
-                            {{Null(), std::string("test")}, Null()},
-                            {{std::string("test"), Null()}, Null()},
-                            {{VARCHAR(""), VARCHAR("")}, (int8_t)0},
-                            {{VARCHAR("test"), VARCHAR("test")}, (int8_t)0},
-                            {{VARCHAR("test1"), VARCHAR("test")}, (int8_t)1},
-                            {{VARCHAR("test"), VARCHAR("test1")}, (int8_t)-1},
-                            {{Null(), VARCHAR("test")}, Null()},
-                            {{VARCHAR("test"), Null()}, Null()}};
+}
 
-        for (const auto& line : data_set) {
-            DataSet const_dataset = {line};
-            static_cast<void>(
-                    check_function<DataTypeInt8, true>(func_name, input_types, const_dataset));
-        }
-    }
-    {
-        InputTypeSet input_types = {TypeIndex::String, Consted {TypeIndex::String}};
-        DataSet data_set = {{{Null(), Null()}, Null()},
-                            {{std::string(""), std::string("")}, (int8_t)0},
-                            {{std::string("test"), std::string("test")}, (int8_t)0},
-                            {{std::string("test1"), std::string("test")}, (int8_t)1},
-                            {{std::string("test"), std::string("test1")}, (int8_t)-1},
-                            {{Null(), std::string("test")}, Null()},
-                            {{std::string("test"), Null()}, Null()},
-                            {{VARCHAR(""), VARCHAR("")}, (int8_t)0},
-                            {{VARCHAR("test"), VARCHAR("test")}, (int8_t)0},
-                            {{VARCHAR("test1"), VARCHAR("test")}, (int8_t)1},
-                            {{VARCHAR("test"), VARCHAR("test1")}, (int8_t)-1},
-                            {{Null(), VARCHAR("test")}, Null()},
-                            {{VARCHAR("test"), Null()}, Null()}};
+TEST(function_string_test, function_initcap) {
+    std::string func_name {"initcap"};
 
-        for (const auto& line : data_set) {
-            DataSet const_dataset = {line};
-            static_cast<void>(
-                    check_function<DataTypeInt8, true>(func_name, input_types, const_dataset));
-        }
-    }
+    BaseInputTypeSet input_types = {TypeIndex::String};
+
+    DataSet data_set = {{{std::string("SKJ_ASD_SAD _1A")}, std::string("Skj_Asd_Sad _1a")},
+                        {{std::string("BC'S aaaaA'' 'S")}, std::string("Bc'S Aaaaa'' 'S")},
+                        {{std::string("NULL")}, std::string("Null")},
+                        {{Null()}, Null()},
+                        {{std::string("HELLO, WORLD!")}, std::string("Hello, World!")},
+                        {{std::string("HHHH+-1; asAAss__!")}, std::string("Hhhh+-1; Asaass__!")},
+                        {{std::string("a,B,C,D")}, std::string("A,B,C,D")}};
+
+    check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set);
 }
 
 } // namespace doris::vectorized
diff --git a/be/test/vec/function/function_test_util.h b/be/test/vec/function/function_test_util.h
index 44cb954e3c377b..84fe343fbaa2c3 100644
--- a/be/test/vec/function/function_test_util.h
+++ b/be/test/vec/function/function_test_util.h
@@ -17,11 +17,16 @@
 
 #include <gtest/gtest-message.h>
 #include <gtest/gtest-test-part.h>
-#include <stdint.h>
-#include <time.h>
+#include <mysql/mysql.h>
 
+#include <algorithm>
+#include <concepts>
+#include <cstdint>
+#include <ctime>
 #include <memory>
+#include <span>
 #include <string>
+#include <type_traits>
 #include <utility>
 #include <vector>
 
@@ -50,8 +55,8 @@
 #include "vec/data_types/data_type_bitmap.h"
 #include "vec/data_types/data_type_nullable.h"
 #include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_string.h"
 #include "vec/functions/simple_function_factory.h"
-
 namespace doris::vectorized {
 
 class DataTypeJsonb;
@@ -352,4 +357,37 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty
     return Status::OK();
 }
 
+using BaseInputTypeSet = std::vector<TypeIndex>;
+
+// Each parameter may be decorated with 'const', but each invocation of 'check_function' can only handle one state of the parameters.
+// If there are 'n' parameters, it would require manually calling 'check_function' 2^n times, whereas through this function, only one
+// invocation is needed.
+template <typename ReturnType, bool nullable = false>
+void check_function_all_arg_comb(const std::string& func_name, const BaseInputTypeSet& base_set,
+                                 const DataSet& data_set) {
+    int arg_cnt = base_set.size();
+    // Consider each parameter as a bit, if the j-th bit is 1, the j-th parameter is const; otherwise, it is not.
+    for (int i = 0; i < (1 << arg_cnt); i++) {
+        InputTypeSet input_types {};
+        for (int j = 0; j < arg_cnt; j++) {
+            if ((1 << j) & i) {
+                input_types.emplace_back(Consted {static_cast<TypeIndex>(base_set[j])});
+            } else {
+                input_types.emplace_back(static_cast<TypeIndex>(base_set[j]));
+            }
+        }
+
+        // exists parameter are const
+        if (i != 0) {
+            for (const auto& line : data_set) {
+                DataSet tmp_set {line};
+                static_cast<void>(check_function<ReturnType, nullable>(func_name, input_types,
+                                                                       tmp_set, false));
+            }
+        } else {
+            static_cast<void>(
+                    check_function<ReturnType, nullable>(func_name, input_types, data_set));
+        }
+    }
+}
 } // namespace doris::vectorized

From d278c165bba2c6549e11ce6f48f0d4af08924632 Mon Sep 17 00:00:00 2001
From: shuke <37901441+shuke987@users.noreply.github.com>
Date: Mon, 29 Apr 2024 09:10:32 +0800
Subject: [PATCH 108/163] [chore](third-party) Fix compilation errors on arm
 platform (#34261)

---
 thirdparty/patches/krb5-1.19.patch | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/thirdparty/patches/krb5-1.19.patch b/thirdparty/patches/krb5-1.19.patch
index 463f2c9cb6787d..04bde390886812 100644
--- a/thirdparty/patches/krb5-1.19.patch
+++ b/thirdparty/patches/krb5-1.19.patch
@@ -1,12 +1,14 @@
 diff --git a/src/lib/krb5/os/dnsglue.c b/src/lib/krb5/os/dnsglue.c
-index 0cd213f..2514fcd 100644
+index 0cd213f..db35532 100644
 --- a/src/lib/krb5/os/dnsglue.c
 +++ b/src/lib/krb5/os/dnsglue.c
-@@ -87,6 +87,8 @@ static int initparse(struct krb5int_dns_state *);
+@@ -87,6 +87,10 @@ static int initparse(struct krb5int_dns_state *);
  
  #elif HAVE_RES_NINIT && HAVE_RES_NSEARCH
  
++#if defined(__x86_64__)
 +__asm__(".symver __res_nsearch,__res_nsearch@GLIBC_2.2.5");
++#endif
 +
  /* Use res_ninit, res_nsearch, and res_ndestroy or res_nclose. */
  #define DECLARE_HANDLE(h) struct __res_state h

From 036d73342c9f3b6864d944768c7117e8935ee3e5 Mon Sep 17 00:00:00 2001
From: Gabriel <gabrielleebuaa@gmail.com>
Date: Mon, 29 Apr 2024 10:34:16 +0800
Subject: [PATCH 109/163] [refactor](cleanup) Clean up pipeline engine (PART
 II) (#34253)

---
 be/src/exprs/runtime_filter.cpp               |   2 +-
 .../pipeline/{pipeline_x => }/dependency.cpp  |  14 +-
 be/src/pipeline/{pipeline_x => }/dependency.h |  15 +-
 .../pipeline/exec/aggregation_sink_operator.h |   3 +-
 .../exec/aggregation_source_operator.h        |   1 -
 be/src/pipeline/exec/analytic_sink_operator.h |  19 +-
 .../pipeline/exec/analytic_source_operator.h  |   1 -
 .../pipeline/exec/assert_num_rows_operator.h  |   1 -
 be/src/pipeline/exec/data_queue.cpp           |   2 +-
 be/src/pipeline/exec/datagen_operator.h       |   3 +-
 .../distinct_streaming_aggregation_operator.h |   2 +-
 be/src/pipeline/exec/empty_set_operator.h     |   1 -
 be/src/pipeline/exec/es_scan_operator.h       |   1 -
 .../pipeline/exec/exchange_sink_operator.cpp  |   2 +-
 be/src/pipeline/exec/exchange_sink_operator.h |   1 -
 .../pipeline/exec/exchange_source_operator.h  |   1 -
 be/src/pipeline/exec/file_scan_operator.h     |   1 -
 .../exec/group_commit_block_sink_operator.h   |   1 -
 be/src/pipeline/exec/hashjoin_build_sink.h    |  15 -
 .../pipeline/exec/hashjoin_probe_operator.h   |   1 -
 .../pipeline/exec/hive_table_sink_operator.h  |   1 -
 be/src/pipeline/exec/jdbc_scan_operator.h     |   1 -
 .../pipeline/exec/jdbc_table_sink_operator.h  |   1 -
 .../exec/join_build_sink_operator.cpp         |   2 +-
 .../pipeline/exec/join_build_sink_operator.h  |   1 -
 be/src/pipeline/exec/join_probe_operator.h    |   1 -
 be/src/pipeline/exec/meta_scan_operator.h     |   1 -
 .../exec/multi_cast_data_stream_sink.h        |   1 -
 .../exec/multi_cast_data_stream_source.h      |  56 -
 .../exec/multi_cast_data_streamer.cpp         |   2 +-
 .../exec/nested_loop_join_build_operator.h    |   1 -
 .../exec/nested_loop_join_probe_operator.h    |   1 -
 be/src/pipeline/exec/olap_scan_operator.h     |   1 -
 .../pipeline/exec/olap_table_sink_operator.h  |   1 -
 .../exec/olap_table_sink_v2_operator.h        |  18 -
 be/src/pipeline/exec/operator.cpp             | 725 ++++++++++++-
 be/src/pipeline/exec/operator.h               | 980 +++++++++++++-----
 .../exec/partition_sort_sink_operator.h       |   1 -
 .../exec/partition_sort_source_operator.h     |   1 -
 .../partitioned_aggregation_sink_operator.h   |   2 +-
 .../partitioned_aggregation_source_operator.h |   1 -
 .../partitioned_hash_join_probe_operator.cpp  |   4 +-
 .../partitioned_hash_join_probe_operator.h    |   1 -
 .../partitioned_hash_join_sink_operator.h     |   1 -
 be/src/pipeline/exec/repeat_operator.h        |   2 +-
 .../pipeline/exec/result_file_sink_operator.h |   1 -
 be/src/pipeline/exec/result_sink_operator.h   |   1 -
 be/src/pipeline/exec/scan_operator.h          |   3 +-
 be/src/pipeline/exec/schema_scan_operator.h   |   1 -
 be/src/pipeline/exec/select_operator.h        |   1 -
 .../pipeline/exec/set_probe_sink_operator.h   |   1 -
 be/src/pipeline/exec/set_sink_operator.h      |   1 -
 be/src/pipeline/exec/set_source_operator.h    |   1 -
 be/src/pipeline/exec/sort_sink_operator.h     |   1 -
 be/src/pipeline/exec/sort_source_operator.h   |   1 -
 .../pipeline/exec/spill_sort_sink_operator.h  |   1 -
 .../exec/spill_sort_source_operator.h         |   1 -
 .../exec/streaming_aggregation_operator.h     |   2 +-
 .../pipeline/exec/table_function_operator.h   |   1 -
 be/src/pipeline/exec/union_sink_operator.h    |   1 -
 .../pipeline/exec/union_source_operator.cpp   |   2 +-
 be/src/pipeline/exec/union_source_operator.h  |   1 -
 .../local_exchange_sink_operator.cpp          |   4 +-
 .../local_exchange_sink_operator.h            |   2 +-
 .../local_exchange_source_operator.cpp        |   4 +-
 .../local_exchange_source_operator.h          |   2 +-
 .../local_exchange/local_exchanger.cpp        |   6 +-
 .../local_exchange/local_exchanger.h          |   3 +-
 be/src/pipeline/pipeline.cpp                  |  40 -
 be/src/pipeline/pipeline.h                    |  44 -
 be/src/pipeline/pipeline_fragment_context.cpp |  10 +-
 be/src/pipeline/pipeline_fragment_context.h   |   5 +-
 be/src/pipeline/pipeline_task.cpp             | 454 +++++---
 be/src/pipeline/pipeline_task.h               | 286 ++---
 be/src/pipeline/pipeline_x/operator.cpp       | 720 -------------
 be/src/pipeline/pipeline_x/operator.h         | 891 ----------------
 .../pipeline/pipeline_x/pipeline_x_task.cpp   | 453 --------
 be/src/pipeline/pipeline_x/pipeline_x_task.h  | 235 -----
 be/src/pipeline/task_queue.cpp                |   7 +
 be/src/pipeline/task_queue.h                  |  13 +-
 be/src/pipeline/task_scheduler.cpp            |   4 -
 be/src/runtime/query_context.cpp              |   2 +-
 be/src/runtime/runtime_state.cpp              |   3 +-
 be/src/runtime/runtime_state.h                |   2 +-
 .../runtime/workload_group/workload_group.h   |   1 -
 be/src/vec/exec/runtime_filter_consumer.cpp   |   2 +-
 be/src/vec/exec/runtime_filter_consumer.h     |   2 +-
 be/src/vec/runtime/partitioner.cpp            |   2 +-
 .../vec/sink/writer/async_result_writer.cpp   |   2 +-
 be/src/vec/sink/writer/async_result_writer.h  |   2 +-
 90 files changed, 1946 insertions(+), 3170 deletions(-)
 rename be/src/pipeline/{pipeline_x => }/dependency.cpp (95%)
 rename be/src/pipeline/{pipeline_x => }/dependency.h (98%)
 rename be/src/pipeline/{pipeline_x => }/local_exchange/local_exchange_sink_operator.cpp (96%)
 rename be/src/pipeline/{pipeline_x => }/local_exchange/local_exchange_sink_operator.h (99%)
 rename be/src/pipeline/{pipeline_x => }/local_exchange/local_exchange_source_operator.cpp (95%)
 rename be/src/pipeline/{pipeline_x => }/local_exchange/local_exchange_source_operator.h (98%)
 rename be/src/pipeline/{pipeline_x => }/local_exchange/local_exchanger.cpp (98%)
 rename be/src/pipeline/{pipeline_x => }/local_exchange/local_exchanger.h (99%)
 delete mode 100644 be/src/pipeline/pipeline_x/operator.cpp
 delete mode 100644 be/src/pipeline/pipeline_x/operator.h
 delete mode 100644 be/src/pipeline/pipeline_x/pipeline_x_task.cpp
 delete mode 100644 be/src/pipeline/pipeline_x/pipeline_x_task.h

diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp
index c77fdcd903ac77..e3af3ed2de21b7 100644
--- a/be/src/exprs/runtime_filter.cpp
+++ b/be/src/exprs/runtime_filter.cpp
@@ -41,7 +41,7 @@
 #include "exprs/create_predicate_function.h"
 #include "exprs/hybrid_set.h"
 #include "gutil/strings/substitute.h"
-#include "pipeline/pipeline_x/dependency.h"
+#include "pipeline/dependency.h"
 #include "runtime/define_primitive_type.h"
 #include "runtime/large_int_value.h"
 #include "runtime/primitive_type.h"
diff --git a/be/src/pipeline/pipeline_x/dependency.cpp b/be/src/pipeline/dependency.cpp
similarity index 95%
rename from be/src/pipeline/pipeline_x/dependency.cpp
rename to be/src/pipeline/dependency.cpp
index ec890dac7f1a80..eba993a8a53362 100644
--- a/be/src/pipeline/pipeline_x/dependency.cpp
+++ b/be/src/pipeline/dependency.cpp
@@ -21,9 +21,9 @@
 #include <mutex>
 
 #include "common/logging.h"
+#include "pipeline/local_exchange/local_exchanger.h"
 #include "pipeline/pipeline_fragment_context.h"
-#include "pipeline/pipeline_x/local_exchange/local_exchanger.h"
-#include "pipeline/pipeline_x/pipeline_x_task.h"
+#include "pipeline/pipeline_task.h"
 #include "runtime/exec_env.h"
 #include "runtime/memory/mem_tracker.h"
 #include "vec/spill/spill_stream_manager.h"
@@ -46,7 +46,7 @@ Dependency* BasicSharedState::create_sink_dependency(int dest_id, int node_id, s
     return sink_deps.back().get();
 }
 
-void Dependency::_add_block_task(PipelineXTask* task) {
+void Dependency::_add_block_task(PipelineTask* task) {
     DCHECK(_blocked_task.empty() || _blocked_task[_blocked_task.size() - 1] != task)
             << "Duplicate task: " << task->debug_string();
     _blocked_task.push_back(task);
@@ -57,7 +57,7 @@ void Dependency::set_ready() {
         return;
     }
     _watcher.stop();
-    std::vector<PipelineXTask*> local_block_task {};
+    std::vector<PipelineTask*> local_block_task {};
     {
         std::unique_lock<std::mutex> lc(_task_lock);
         if (_ready) {
@@ -71,7 +71,7 @@ void Dependency::set_ready() {
     }
 }
 
-Dependency* Dependency::is_blocked_by(PipelineXTask* task) {
+Dependency* Dependency::is_blocked_by(PipelineTask* task) {
     std::unique_lock<std::mutex> lc(_task_lock);
     auto ready = _ready.load() || _is_cancelled();
     if (!ready && task) {
@@ -80,7 +80,7 @@ Dependency* Dependency::is_blocked_by(PipelineXTask* task) {
     return ready ? nullptr : this;
 }
 
-Dependency* FinishDependency::is_blocked_by(PipelineXTask* task) {
+Dependency* FinishDependency::is_blocked_by(PipelineTask* task) {
     std::unique_lock<std::mutex> lc(_task_lock);
     auto ready = _ready.load();
     if (!ready && task) {
@@ -115,7 +115,7 @@ std::string RuntimeFilterDependency::debug_string(int indentation_level) {
     return fmt::to_string(debug_string_buffer);
 }
 
-Dependency* RuntimeFilterDependency::is_blocked_by(PipelineXTask* task) {
+Dependency* RuntimeFilterDependency::is_blocked_by(PipelineTask* task) {
     std::unique_lock<std::mutex> lc(_task_lock);
     auto ready = _ready.load() || _is_cancelled();
     if (!ready && task) {
diff --git a/be/src/pipeline/pipeline_x/dependency.h b/be/src/pipeline/dependency.h
similarity index 98%
rename from be/src/pipeline/pipeline_x/dependency.h
rename to be/src/pipeline/dependency.h
index d663b0aae8bb12..09b95d01775679 100644
--- a/be/src/pipeline/pipeline_x/dependency.h
+++ b/be/src/pipeline/dependency.h
@@ -31,7 +31,6 @@
 #include "gutil/integral_types.h"
 #include "pipeline/exec/data_queue.h"
 #include "pipeline/exec/multi_cast_data_streamer.h"
-#include "pipeline/exec/operator.h"
 #include "vec/common/hash_table/hash_map_context_creator.h"
 #include "vec/common/sort/partition_sorter.h"
 #include "vec/common/sort/sorter.h"
@@ -47,7 +46,7 @@
 namespace doris::pipeline {
 
 class Dependency;
-class PipelineXTask;
+class PipelineTask;
 struct BasicSharedState;
 using DependencySPtr = std::shared_ptr<Dependency>;
 using DependencyMap = std::map<int, std::vector<DependencySPtr>>;
@@ -118,7 +117,7 @@ class Dependency : public std::enable_shared_from_this<Dependency> {
     [[nodiscard]] int64_t watcher_elapse_time() { return _watcher.elapsed_time(); }
 
     // Which dependency current pipeline task is blocked by. `nullptr` if this dependency is ready.
-    [[nodiscard]] virtual Dependency* is_blocked_by(PipelineXTask* task = nullptr);
+    [[nodiscard]] virtual Dependency* is_blocked_by(PipelineTask* task = nullptr);
     // Notify downstream pipeline tasks this dependency is ready.
     void set_ready();
     void set_ready_to_read() {
@@ -165,7 +164,7 @@ class Dependency : public std::enable_shared_from_this<Dependency> {
     }
 
 protected:
-    void _add_block_task(PipelineXTask* task);
+    void _add_block_task(PipelineTask* task);
     bool _is_cancelled() const { return _query_ctx->is_cancelled(); }
 
     const int _id;
@@ -179,7 +178,7 @@ class Dependency : public std::enable_shared_from_this<Dependency> {
     MonotonicStopWatch _watcher;
 
     std::mutex _task_lock;
-    std::vector<PipelineXTask*> _blocked_task;
+    std::vector<PipelineTask*> _blocked_task;
 
     // If `_always_ready` is true, `block()` will never block tasks.
     std::atomic<bool> _always_ready = false;
@@ -196,7 +195,7 @@ struct FakeDependency final : public Dependency {
     FakeDependency(int id, int node_id, QueryContext* query_ctx)
             : Dependency(id, node_id, "FakeDependency", query_ctx) {}
 
-    [[nodiscard]] Dependency* is_blocked_by(PipelineXTask* task) override { return nullptr; }
+    [[nodiscard]] Dependency* is_blocked_by(PipelineTask* task) override { return nullptr; }
 };
 
 struct FinishDependency : public Dependency {
@@ -205,7 +204,7 @@ struct FinishDependency : public Dependency {
     FinishDependency(int id, int node_id, std::string name, QueryContext* query_ctx)
             : Dependency(id, node_id, name, true, query_ctx) {}
 
-    [[nodiscard]] Dependency* is_blocked_by(PipelineXTask* task) override;
+    [[nodiscard]] Dependency* is_blocked_by(PipelineTask* task) override;
 };
 
 struct CountedFinishDependency final : public FinishDependency {
@@ -307,7 +306,7 @@ class RuntimeFilterDependency final : public Dependency {
             : Dependency(id, node_id, name, query_ctx), _runtime_filter(runtime_filter) {}
     std::string debug_string(int indentation_level = 0) override;
 
-    Dependency* is_blocked_by(PipelineXTask* task) override;
+    Dependency* is_blocked_by(PipelineTask* task) override;
 
 private:
     const IRuntimeFilter* _runtime_filter = nullptr;
diff --git a/be/src/pipeline/exec/aggregation_sink_operator.h b/be/src/pipeline/exec/aggregation_sink_operator.h
index ee31db3691759d..d32e5f616c2e0d 100644
--- a/be/src/pipeline/exec/aggregation_sink_operator.h
+++ b/be/src/pipeline/exec/aggregation_sink_operator.h
@@ -19,8 +19,7 @@
 
 #include <stdint.h>
 
-#include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
+#include "pipeline/exec/operator.h"
 #include "runtime/block_spill_manager.h"
 #include "runtime/exec_env.h"
 
diff --git a/be/src/pipeline/exec/aggregation_source_operator.h b/be/src/pipeline/exec/aggregation_source_operator.h
index d43ec7db0b550d..ddf4a2d9b1a011 100644
--- a/be/src/pipeline/exec/aggregation_source_operator.h
+++ b/be/src/pipeline/exec/aggregation_source_operator.h
@@ -20,7 +20,6 @@
 
 #include "common/status.h"
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris {
 class RuntimeState;
diff --git a/be/src/pipeline/exec/analytic_sink_operator.h b/be/src/pipeline/exec/analytic_sink_operator.h
index 0098c108e53d4e..0881cc4ff64423 100644
--- a/be/src/pipeline/exec/analytic_sink_operator.h
+++ b/be/src/pipeline/exec/analytic_sink_operator.h
@@ -21,28 +21,11 @@
 #include <stdint.h>
 
 #include "operator.h"
-#include "pipeline/pipeline_x/dependency.h"
-#include "pipeline/pipeline_x/operator.h"
+#include "pipeline/dependency.h"
 
 namespace doris {
 
 namespace pipeline {
-class AnalyticSinkOperatorBuilder final : public OperatorBuilder<vectorized::VAnalyticEvalNode> {
-public:
-    AnalyticSinkOperatorBuilder(int32_t, ExecNode*);
-
-    OperatorPtr build_operator() override;
-
-    bool is_sink() const override { return true; }
-};
-
-class AnalyticSinkOperator final : public StreamingOperator<vectorized::VAnalyticEvalNode> {
-public:
-    AnalyticSinkOperator(OperatorBuilderBase* operator_builder, ExecNode* node);
-
-    bool can_write() override { return _node->can_write(); }
-};
-
 class AnalyticSinkOperatorX;
 
 class AnalyticSinkLocalState : public PipelineXSinkLocalState<AnalyticSharedState> {
diff --git a/be/src/pipeline/exec/analytic_source_operator.h b/be/src/pipeline/exec/analytic_source_operator.h
index 075053a486600c..bb8d638703b2c8 100644
--- a/be/src/pipeline/exec/analytic_source_operator.h
+++ b/be/src/pipeline/exec/analytic_source_operator.h
@@ -21,7 +21,6 @@
 
 #include "common/status.h"
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris {
 class RuntimeState;
diff --git a/be/src/pipeline/exec/assert_num_rows_operator.h b/be/src/pipeline/exec/assert_num_rows_operator.h
index 07a90d6b471144..423bd69144e49d 100644
--- a/be/src/pipeline/exec/assert_num_rows_operator.h
+++ b/be/src/pipeline/exec/assert_num_rows_operator.h
@@ -18,7 +18,6 @@
 #pragma once
 
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris::pipeline {
 
diff --git a/be/src/pipeline/exec/data_queue.cpp b/be/src/pipeline/exec/data_queue.cpp
index d248edd908177d..20fae125e3bd98 100644
--- a/be/src/pipeline/exec/data_queue.cpp
+++ b/be/src/pipeline/exec/data_queue.cpp
@@ -24,7 +24,7 @@
 #include <utility>
 
 #include "gutil/integral_types.h"
-#include "pipeline/pipeline_x/dependency.h"
+#include "pipeline/dependency.h"
 #include "vec/core/block.h"
 
 namespace doris {
diff --git a/be/src/pipeline/exec/datagen_operator.h b/be/src/pipeline/exec/datagen_operator.h
index edcc85c8342e12..0518bdb3b08507 100644
--- a/be/src/pipeline/exec/datagen_operator.h
+++ b/be/src/pipeline/exec/datagen_operator.h
@@ -20,8 +20,7 @@
 #include <stdint.h>
 
 #include "common/status.h"
-#include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
+#include "pipeline/exec/operator.h"
 #include "vec/exec/vdata_gen_scan_node.h"
 
 namespace doris {
diff --git a/be/src/pipeline/exec/distinct_streaming_aggregation_operator.h b/be/src/pipeline/exec/distinct_streaming_aggregation_operator.h
index ca091f743bd8d9..d10bd4039acc7f 100644
--- a/be/src/pipeline/exec/distinct_streaming_aggregation_operator.h
+++ b/be/src/pipeline/exec/distinct_streaming_aggregation_operator.h
@@ -23,7 +23,7 @@
 #include <memory>
 
 #include "common/status.h"
-#include "pipeline/pipeline_x/operator.h"
+#include "pipeline/exec/operator.h"
 #include "util/runtime_profile.h"
 #include "vec/core/block.h"
 
diff --git a/be/src/pipeline/exec/empty_set_operator.h b/be/src/pipeline/exec/empty_set_operator.h
index 3d5e9a72cf2bfe..6b200bfdbde249 100644
--- a/be/src/pipeline/exec/empty_set_operator.h
+++ b/be/src/pipeline/exec/empty_set_operator.h
@@ -20,7 +20,6 @@
 #include <stdint.h>
 
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris::pipeline {
 
diff --git a/be/src/pipeline/exec/es_scan_operator.h b/be/src/pipeline/exec/es_scan_operator.h
index c4e7772807181e..5f66c0a100fa8f 100644
--- a/be/src/pipeline/exec/es_scan_operator.h
+++ b/be/src/pipeline/exec/es_scan_operator.h
@@ -24,7 +24,6 @@
 #include "common/status.h"
 #include "operator.h"
 #include "pipeline/exec/scan_operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris {
 
diff --git a/be/src/pipeline/exec/exchange_sink_operator.cpp b/be/src/pipeline/exec/exchange_sink_operator.cpp
index 7db22c98f9f6b7..943eea704c7dca 100644
--- a/be/src/pipeline/exec/exchange_sink_operator.cpp
+++ b/be/src/pipeline/exec/exchange_sink_operator.cpp
@@ -27,7 +27,7 @@
 #include "common/status.h"
 #include "exchange_sink_buffer.h"
 #include "pipeline/exec/operator.h"
-#include "pipeline/pipeline_x/local_exchange/local_exchange_sink_operator.h"
+#include "pipeline/local_exchange/local_exchange_sink_operator.h"
 #include "vec/columns/column_const.h"
 #include "vec/exprs/vexpr.h"
 
diff --git a/be/src/pipeline/exec/exchange_sink_operator.h b/be/src/pipeline/exec/exchange_sink_operator.h
index aaa89d246be656..dd9dad12b58518 100644
--- a/be/src/pipeline/exec/exchange_sink_operator.h
+++ b/be/src/pipeline/exec/exchange_sink_operator.h
@@ -24,7 +24,6 @@
 #include "common/status.h"
 #include "exchange_sink_buffer.h"
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 #include "vec/sink/scale_writer_partitioning_exchanger.hpp"
 #include "vec/sink/vdata_stream_sender.h"
 
diff --git a/be/src/pipeline/exec/exchange_source_operator.h b/be/src/pipeline/exec/exchange_source_operator.h
index de761d8391c94c..4dc7d1e55a2adc 100644
--- a/be/src/pipeline/exec/exchange_source_operator.h
+++ b/be/src/pipeline/exec/exchange_source_operator.h
@@ -20,7 +20,6 @@
 #include <stdint.h>
 
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris {
 class ExecNode;
diff --git a/be/src/pipeline/exec/file_scan_operator.h b/be/src/pipeline/exec/file_scan_operator.h
index f5c4f194bc5767..9a4c82f11e6556 100644
--- a/be/src/pipeline/exec/file_scan_operator.h
+++ b/be/src/pipeline/exec/file_scan_operator.h
@@ -25,7 +25,6 @@
 #include "common/status.h"
 #include "operator.h"
 #include "pipeline/exec/scan_operator.h"
-#include "pipeline/pipeline_x/operator.h"
 #include "vec/exec/format/format_common.h"
 
 namespace doris {
diff --git a/be/src/pipeline/exec/group_commit_block_sink_operator.h b/be/src/pipeline/exec/group_commit_block_sink_operator.h
index ad03d607c15d2d..be0426d1ebfd5b 100644
--- a/be/src/pipeline/exec/group_commit_block_sink_operator.h
+++ b/be/src/pipeline/exec/group_commit_block_sink_operator.h
@@ -18,7 +18,6 @@
 #pragma once
 
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 #include "vec/sink/group_commit_block_sink.h"
 
 namespace doris::pipeline {
diff --git a/be/src/pipeline/exec/hashjoin_build_sink.h b/be/src/pipeline/exec/hashjoin_build_sink.h
index 18c922eb19d5ea..f675ab4620962a 100644
--- a/be/src/pipeline/exec/hashjoin_build_sink.h
+++ b/be/src/pipeline/exec/hashjoin_build_sink.h
@@ -21,24 +21,9 @@
 
 #include "join_build_sink_operator.h"
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris::pipeline {
 
-class HashJoinBuildSinkBuilder final : public OperatorBuilder<vectorized::HashJoinNode> {
-public:
-    HashJoinBuildSinkBuilder(int32_t, ExecNode*);
-
-    OperatorPtr build_operator() override;
-    bool is_sink() const override { return true; }
-};
-
-class HashJoinBuildSink final : public StreamingOperator<vectorized::HashJoinNode> {
-public:
-    HashJoinBuildSink(OperatorBuilderBase* operator_builder, ExecNode* node);
-    bool can_write() override { return _node->can_sink_write(); }
-};
-
 class HashJoinBuildSinkOperatorX;
 
 class HashJoinBuildSinkLocalState final
diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.h b/be/src/pipeline/exec/hashjoin_probe_operator.h
index b5daefd735d984..d86be6b43cec43 100644
--- a/be/src/pipeline/exec/hashjoin_probe_operator.h
+++ b/be/src/pipeline/exec/hashjoin_probe_operator.h
@@ -21,7 +21,6 @@
 #include "common/status.h"
 #include "operator.h"
 #include "pipeline/exec/join_probe_operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris {
 class RuntimeState;
diff --git a/be/src/pipeline/exec/hive_table_sink_operator.h b/be/src/pipeline/exec/hive_table_sink_operator.h
index a489948268b9e4..7599585f62569f 100644
--- a/be/src/pipeline/exec/hive_table_sink_operator.h
+++ b/be/src/pipeline/exec/hive_table_sink_operator.h
@@ -18,7 +18,6 @@
 #pragma once
 
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 #include "vec/sink/vhive_table_sink.h"
 
 namespace doris::pipeline {
diff --git a/be/src/pipeline/exec/jdbc_scan_operator.h b/be/src/pipeline/exec/jdbc_scan_operator.h
index 8e3e93f2026844..9ceff69ef4cd4b 100644
--- a/be/src/pipeline/exec/jdbc_scan_operator.h
+++ b/be/src/pipeline/exec/jdbc_scan_operator.h
@@ -24,7 +24,6 @@
 #include "common/status.h"
 #include "operator.h"
 #include "pipeline/exec/scan_operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris {
 
diff --git a/be/src/pipeline/exec/jdbc_table_sink_operator.h b/be/src/pipeline/exec/jdbc_table_sink_operator.h
index ebe64cf2ec4d3f..df8189320ac1de 100644
--- a/be/src/pipeline/exec/jdbc_table_sink_operator.h
+++ b/be/src/pipeline/exec/jdbc_table_sink_operator.h
@@ -20,7 +20,6 @@
 #include <stdint.h>
 
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 #include "vec/sink/vresult_sink.h"
 #include "vec/sink/writer/vjdbc_table_writer.h"
 
diff --git a/be/src/pipeline/exec/join_build_sink_operator.cpp b/be/src/pipeline/exec/join_build_sink_operator.cpp
index 60b0970781ddcd..2439dbc8fe1c95 100644
--- a/be/src/pipeline/exec/join_build_sink_operator.cpp
+++ b/be/src/pipeline/exec/join_build_sink_operator.cpp
@@ -19,8 +19,8 @@
 
 #include "pipeline/exec/hashjoin_build_sink.h"
 #include "pipeline/exec/nested_loop_join_build_operator.h"
+#include "pipeline/exec/operator.h"
 #include "pipeline/exec/partitioned_hash_join_sink_operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris::pipeline {
 
diff --git a/be/src/pipeline/exec/join_build_sink_operator.h b/be/src/pipeline/exec/join_build_sink_operator.h
index 90e7c2caa5d276..cc96bb1201d573 100644
--- a/be/src/pipeline/exec/join_build_sink_operator.h
+++ b/be/src/pipeline/exec/join_build_sink_operator.h
@@ -18,7 +18,6 @@
 #pragma once
 
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris::pipeline {
 template <typename LocalStateType>
diff --git a/be/src/pipeline/exec/join_probe_operator.h b/be/src/pipeline/exec/join_probe_operator.h
index 228a4140b13c34..60600f1ba86d63 100644
--- a/be/src/pipeline/exec/join_probe_operator.h
+++ b/be/src/pipeline/exec/join_probe_operator.h
@@ -18,7 +18,6 @@
 #pragma once
 
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris::pipeline {
 template <typename LocalStateType>
diff --git a/be/src/pipeline/exec/meta_scan_operator.h b/be/src/pipeline/exec/meta_scan_operator.h
index 440f489f2513e3..d717a5ab09ba7b 100644
--- a/be/src/pipeline/exec/meta_scan_operator.h
+++ b/be/src/pipeline/exec/meta_scan_operator.h
@@ -24,7 +24,6 @@
 #include "common/status.h"
 #include "operator.h"
 #include "pipeline/exec/scan_operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris {
 
diff --git a/be/src/pipeline/exec/multi_cast_data_stream_sink.h b/be/src/pipeline/exec/multi_cast_data_stream_sink.h
index 1a18772af4fd12..180a3c3fbdef68 100644
--- a/be/src/pipeline/exec/multi_cast_data_stream_sink.h
+++ b/be/src/pipeline/exec/multi_cast_data_stream_sink.h
@@ -18,7 +18,6 @@
 #pragma once
 
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris::pipeline {
 
diff --git a/be/src/pipeline/exec/multi_cast_data_stream_source.h b/be/src/pipeline/exec/multi_cast_data_stream_source.h
index c3404a873c6a9a..a772f1ca70fdd1 100644
--- a/be/src/pipeline/exec/multi_cast_data_stream_source.h
+++ b/be/src/pipeline/exec/multi_cast_data_stream_source.h
@@ -23,7 +23,6 @@
 
 #include "common/status.h"
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 #include "vec/exec/runtime_filter_consumer.h"
 
 namespace doris {
@@ -35,61 +34,6 @@ class Block;
 
 namespace pipeline {
 class MultiCastDataStreamer;
-
-class MultiCastDataStreamerSourceOperatorBuilder final : public OperatorBuilderBase {
-public:
-    MultiCastDataStreamerSourceOperatorBuilder(int32_t id, const int consumer_id,
-                                               std::shared_ptr<MultiCastDataStreamer>&,
-                                               const TDataStreamSink&);
-
-    bool is_source() const override { return true; }
-
-    OperatorPtr build_operator() override;
-
-    const RowDescriptor& row_desc() const override;
-
-private:
-    const int _consumer_id;
-    std::shared_ptr<MultiCastDataStreamer> _multi_cast_data_streamer;
-    TDataStreamSink _t_data_stream_sink;
-};
-
-class MultiCastDataStreamerSourceOperator final : public OperatorBase,
-                                                  public vectorized::RuntimeFilterConsumer {
-public:
-    MultiCastDataStreamerSourceOperator(OperatorBuilderBase* operator_builder,
-                                        const int consumer_id,
-                                        std::shared_ptr<MultiCastDataStreamer>& data_streamer,
-                                        const TDataStreamSink& sink);
-
-    Status get_block(RuntimeState* state, vectorized::Block* block,
-                     SourceState& source_state) override;
-
-    Status prepare(RuntimeState* state) override;
-
-    Status open(RuntimeState* state) override;
-
-    bool runtime_filters_are_ready_or_timeout() override;
-
-    Status sink(RuntimeState* state, vectorized::Block* block, SourceState source_state) override {
-        return Status::OK();
-    }
-
-    bool can_read() override;
-
-    Status close(doris::RuntimeState* state) override;
-
-    [[nodiscard]] RuntimeProfile* get_runtime_profile() const override;
-
-private:
-    const int _consumer_id;
-    std::shared_ptr<MultiCastDataStreamer> _multi_cast_data_streamer;
-    const TDataStreamSink _t_data_stream_sink;
-
-    vectorized::VExprContextSPtrs _output_expr_contexts;
-    vectorized::VExprContextSPtrs _conjuncts;
-};
-
 class MultiCastDataStreamerSourceOperatorX;
 
 class MultiCastDataStreamSourceLocalState final : public PipelineXLocalState<MultiCastSharedState>,
diff --git a/be/src/pipeline/exec/multi_cast_data_streamer.cpp b/be/src/pipeline/exec/multi_cast_data_streamer.cpp
index 175a21469b87e3..24fa3217e3d821 100644
--- a/be/src/pipeline/exec/multi_cast_data_streamer.cpp
+++ b/be/src/pipeline/exec/multi_cast_data_streamer.cpp
@@ -17,8 +17,8 @@
 
 #include "multi_cast_data_streamer.h"
 
+#include "pipeline/dependency.h"
 #include "pipeline/exec/multi_cast_data_stream_source.h"
-#include "pipeline/pipeline_x/dependency.h"
 #include "runtime/runtime_state.h"
 
 namespace doris::pipeline {
diff --git a/be/src/pipeline/exec/nested_loop_join_build_operator.h b/be/src/pipeline/exec/nested_loop_join_build_operator.h
index 98377559f91b2e..4c2b3d442e920d 100644
--- a/be/src/pipeline/exec/nested_loop_join_build_operator.h
+++ b/be/src/pipeline/exec/nested_loop_join_build_operator.h
@@ -21,7 +21,6 @@
 
 #include "operator.h"
 #include "pipeline/exec/join_build_sink_operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris::pipeline {
 
diff --git a/be/src/pipeline/exec/nested_loop_join_probe_operator.h b/be/src/pipeline/exec/nested_loop_join_probe_operator.h
index 5c483a4348f87f..0ef4abcbc1b746 100644
--- a/be/src/pipeline/exec/nested_loop_join_probe_operator.h
+++ b/be/src/pipeline/exec/nested_loop_join_probe_operator.h
@@ -22,7 +22,6 @@
 #include "common/status.h"
 #include "operator.h"
 #include "pipeline/exec/join_probe_operator.h"
-#include "pipeline/pipeline_x/operator.h"
 #include "util/simd/bits.h"
 
 namespace doris {
diff --git a/be/src/pipeline/exec/olap_scan_operator.h b/be/src/pipeline/exec/olap_scan_operator.h
index f11cc6db0b982c..15dfd821772a6a 100644
--- a/be/src/pipeline/exec/olap_scan_operator.h
+++ b/be/src/pipeline/exec/olap_scan_operator.h
@@ -24,7 +24,6 @@
 #include "common/status.h"
 #include "operator.h"
 #include "pipeline/exec/scan_operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris {
 
diff --git a/be/src/pipeline/exec/olap_table_sink_operator.h b/be/src/pipeline/exec/olap_table_sink_operator.h
index ad8bbab3ee9b18..c1e60e7665dd7a 100644
--- a/be/src/pipeline/exec/olap_table_sink_operator.h
+++ b/be/src/pipeline/exec/olap_table_sink_operator.h
@@ -18,7 +18,6 @@
 #pragma once
 
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 #include "vec/sink/volap_table_sink.h"
 
 namespace doris::pipeline {
diff --git a/be/src/pipeline/exec/olap_table_sink_v2_operator.h b/be/src/pipeline/exec/olap_table_sink_v2_operator.h
index da0700f8af3377..4f2f39ca2945ef 100644
--- a/be/src/pipeline/exec/olap_table_sink_v2_operator.h
+++ b/be/src/pipeline/exec/olap_table_sink_v2_operator.h
@@ -18,28 +18,10 @@
 #pragma once
 
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 #include "vec/sink/volap_table_sink_v2.h"
 
 namespace doris::pipeline {
 
-class OlapTableSinkV2OperatorBuilder final
-        : public DataSinkOperatorBuilder<vectorized::VOlapTableSinkV2> {
-public:
-    OlapTableSinkV2OperatorBuilder(int32_t id, DataSink* sink)
-            : DataSinkOperatorBuilder(id, "OlapTableSinkV2Operator", sink) {}
-
-    OperatorPtr build_operator() override;
-};
-
-class OlapTableSinkV2Operator final : public DataSinkOperator<vectorized::VOlapTableSinkV2> {
-public:
-    OlapTableSinkV2Operator(OperatorBuilderBase* operator_builder, DataSink* sink)
-            : DataSinkOperator(operator_builder, sink) {}
-
-    bool can_write() override { return _sink->can_write(); }
-};
-
 class OlapTableSinkV2OperatorX;
 
 class OlapTableSinkV2LocalState final
diff --git a/be/src/pipeline/exec/operator.cpp b/be/src/pipeline/exec/operator.cpp
index 4a01fa4578d372..fadbf8c46772e1 100644
--- a/be/src/pipeline/exec/operator.cpp
+++ b/be/src/pipeline/exec/operator.cpp
@@ -17,6 +17,62 @@
 
 #include "operator.h"
 
+#include "common/logging.h"
+#include "common/status.h"
+#include "exec/exec_node.h"
+#include "pipeline/dependency.h"
+#include "pipeline/exec/aggregation_sink_operator.h"
+#include "pipeline/exec/aggregation_source_operator.h"
+#include "pipeline/exec/analytic_sink_operator.h"
+#include "pipeline/exec/analytic_source_operator.h"
+#include "pipeline/exec/assert_num_rows_operator.h"
+#include "pipeline/exec/datagen_operator.h"
+#include "pipeline/exec/distinct_streaming_aggregation_operator.h"
+#include "pipeline/exec/empty_set_operator.h"
+#include "pipeline/exec/es_scan_operator.h"
+#include "pipeline/exec/exchange_sink_operator.h"
+#include "pipeline/exec/exchange_source_operator.h"
+#include "pipeline/exec/file_scan_operator.h"
+#include "pipeline/exec/group_commit_block_sink_operator.h"
+#include "pipeline/exec/hashjoin_build_sink.h"
+#include "pipeline/exec/hashjoin_probe_operator.h"
+#include "pipeline/exec/hive_table_sink_operator.h"
+#include "pipeline/exec/jdbc_scan_operator.h"
+#include "pipeline/exec/jdbc_table_sink_operator.h"
+#include "pipeline/exec/meta_scan_operator.h"
+#include "pipeline/exec/multi_cast_data_stream_sink.h"
+#include "pipeline/exec/multi_cast_data_stream_source.h"
+#include "pipeline/exec/nested_loop_join_build_operator.h"
+#include "pipeline/exec/nested_loop_join_probe_operator.h"
+#include "pipeline/exec/olap_scan_operator.h"
+#include "pipeline/exec/olap_table_sink_operator.h"
+#include "pipeline/exec/olap_table_sink_v2_operator.h"
+#include "pipeline/exec/partition_sort_sink_operator.h"
+#include "pipeline/exec/partition_sort_source_operator.h"
+#include "pipeline/exec/partitioned_aggregation_sink_operator.h"
+#include "pipeline/exec/partitioned_aggregation_source_operator.h"
+#include "pipeline/exec/partitioned_hash_join_probe_operator.h"
+#include "pipeline/exec/partitioned_hash_join_sink_operator.h"
+#include "pipeline/exec/repeat_operator.h"
+#include "pipeline/exec/result_file_sink_operator.h"
+#include "pipeline/exec/result_sink_operator.h"
+#include "pipeline/exec/schema_scan_operator.h"
+#include "pipeline/exec/select_operator.h"
+#include "pipeline/exec/set_probe_sink_operator.h"
+#include "pipeline/exec/set_sink_operator.h"
+#include "pipeline/exec/set_source_operator.h"
+#include "pipeline/exec/sort_sink_operator.h"
+#include "pipeline/exec/sort_source_operator.h"
+#include "pipeline/exec/spill_sort_sink_operator.h"
+#include "pipeline/exec/spill_sort_source_operator.h"
+#include "pipeline/exec/streaming_aggregation_operator.h"
+#include "pipeline/exec/table_function_operator.h"
+#include "pipeline/exec/union_sink_operator.h"
+#include "pipeline/exec/union_source_operator.h"
+#include "pipeline/local_exchange/local_exchange_sink_operator.h"
+#include "pipeline/local_exchange/local_exchange_source_operator.h"
+#include "util/debug_util.h"
+#include "util/runtime_profile.h"
 #include "vec/exprs/vexpr.h"
 #include "vec/exprs/vexpr_context.h"
 #include "vec/utils/util.hpp"
@@ -28,38 +84,667 @@ class RuntimeState;
 
 namespace doris::pipeline {
 
-OperatorBase::OperatorBase(OperatorBuilderBase* operator_builder)
-        : _operator_builder(operator_builder),
-          _child(nullptr),
-          _child_x(nullptr),
-          _is_closed(false) {}
+Status OperatorBase::close(RuntimeState* state) {
+    if (_is_closed) {
+        return Status::OK();
+    }
+    _is_closed = true;
+    return Status::OK();
+}
 
-bool OperatorBase::is_sink() const {
-    return _operator_builder->is_sink();
+template <typename SharedStateArg>
+std::string PipelineXLocalState<SharedStateArg>::name_suffix() const {
+    return " (id=" + std::to_string(_parent->node_id()) + ")";
 }
 
-bool OperatorBase::is_source() const {
-    return _operator_builder->is_source();
+template <typename SharedStateArg>
+std::string PipelineXSinkLocalState<SharedStateArg>::name_suffix() {
+    return " (id=" + std::to_string(_parent->node_id()) + ")";
 }
 
-Status OperatorBase::close(RuntimeState* state) {
-    if (_is_closed) {
+DataDistribution DataSinkOperatorXBase::required_data_distribution() const {
+    return _child_x && _child_x->ignore_data_distribution()
+                   ? DataDistribution(ExchangeType::PASSTHROUGH)
+                   : DataDistribution(ExchangeType::NOOP);
+}
+const RowDescriptor& OperatorBase::row_desc() const {
+    return _child_x->row_desc();
+}
+
+template <typename SharedStateArg>
+std::string PipelineXLocalState<SharedStateArg>::debug_string(int indentation_level) const {
+    fmt::memory_buffer debug_string_buffer;
+    fmt::format_to(debug_string_buffer, "{}", _parent->debug_string(indentation_level));
+    return fmt::to_string(debug_string_buffer);
+}
+
+template <typename SharedStateArg>
+std::string PipelineXSinkLocalState<SharedStateArg>::debug_string(int indentation_level) const {
+    fmt::memory_buffer debug_string_buffer;
+    fmt::format_to(debug_string_buffer, "{}", _parent->debug_string(indentation_level));
+    return fmt::to_string(debug_string_buffer);
+}
+
+std::string OperatorXBase::debug_string(int indentation_level) const {
+    fmt::memory_buffer debug_string_buffer;
+    fmt::format_to(debug_string_buffer, "{}{}: id={}, parallel_tasks={}",
+                   std::string(indentation_level * 2, ' '), _op_name, node_id(), _parallel_tasks);
+    return fmt::to_string(debug_string_buffer);
+}
+
+std::string OperatorXBase::debug_string(RuntimeState* state, int indentation_level) const {
+    return state->get_local_state(operator_id())->debug_string(indentation_level);
+}
+
+Status OperatorXBase::init(const TPlanNode& tnode, RuntimeState* /*state*/) {
+    std::string node_name = print_plan_node_type(tnode.node_type);
+    if (!tnode.intermediate_output_tuple_id_list.empty()) {
+        if (!tnode.__isset.output_tuple_id) {
+            return Status::InternalError("no final output tuple id");
+        }
+        if (tnode.intermediate_output_tuple_id_list.size() !=
+            tnode.intermediate_projections_list.size()) {
+            return Status::InternalError(
+                    "intermediate_output_tuple_id_list size:{} not match "
+                    "intermediate_projections_list size:{}",
+                    tnode.intermediate_output_tuple_id_list.size(),
+                    tnode.intermediate_projections_list.size());
+        }
+    }
+    auto substr = node_name.substr(0, node_name.find("_NODE"));
+    _op_name = substr + "_OPERATOR";
+
+    if (tnode.__isset.vconjunct) {
+        vectorized::VExprContextSPtr context;
+        RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(tnode.vconjunct, context));
+        _conjuncts.emplace_back(context);
+    } else if (tnode.__isset.conjuncts) {
+        for (auto& conjunct : tnode.conjuncts) {
+            vectorized::VExprContextSPtr context;
+            RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(conjunct, context));
+            _conjuncts.emplace_back(context);
+        }
+    }
+
+    // create the projections expr
+
+    if (tnode.__isset.projections) {
+        DCHECK(tnode.__isset.output_tuple_id);
+        RETURN_IF_ERROR(vectorized::VExpr::create_expr_trees(tnode.projections, _projections));
+    }
+    if (!tnode.intermediate_projections_list.empty()) {
+        DCHECK(tnode.__isset.projections) << "no final projections";
+        _intermediate_projections.reserve(tnode.intermediate_projections_list.size());
+        for (const auto& tnode_projections : tnode.intermediate_projections_list) {
+            vectorized::VExprContextSPtrs projections;
+            RETURN_IF_ERROR(vectorized::VExpr::create_expr_trees(tnode_projections, projections));
+            _intermediate_projections.push_back(projections);
+        }
+    }
+    return Status::OK();
+}
+
+Status OperatorXBase::prepare(RuntimeState* state) {
+    for (auto& conjunct : _conjuncts) {
+        RETURN_IF_ERROR(conjunct->prepare(state, intermediate_row_desc()));
+    }
+    for (int i = 0; i < _intermediate_projections.size(); i++) {
+        RETURN_IF_ERROR(vectorized::VExpr::prepare(_intermediate_projections[i], state,
+                                                   intermediate_row_desc(i)));
+    }
+    RETURN_IF_ERROR(vectorized::VExpr::prepare(_projections, state, projections_row_desc()));
+
+    if (has_output_row_desc()) {
+        RETURN_IF_ERROR(
+                vectorized::VExpr::check_expr_output_type(_projections, *_output_row_descriptor));
+    }
+
+    if (_child_x && !is_source()) {
+        RETURN_IF_ERROR(_child_x->prepare(state));
+    }
+
+    return Status::OK();
+}
+
+Status OperatorXBase::open(RuntimeState* state) {
+    for (auto& conjunct : _conjuncts) {
+        RETURN_IF_ERROR(conjunct->open(state));
+    }
+    RETURN_IF_ERROR(vectorized::VExpr::open(_projections, state));
+    for (auto& projections : _intermediate_projections) {
+        RETURN_IF_ERROR(vectorized::VExpr::open(projections, state));
+    }
+    if (_child_x && !is_source()) {
+        RETURN_IF_ERROR(_child_x->open(state));
+    }
+    return Status::OK();
+}
+
+Status OperatorXBase::close(RuntimeState* state) {
+    if (_child_x && !is_source()) {
+        RETURN_IF_ERROR(_child_x->close(state));
+    }
+    auto result = state->get_local_state_result(operator_id());
+    if (!result) {
+        return result.error();
+    }
+    return result.value()->close(state);
+}
+
+void PipelineXLocalStateBase::clear_origin_block() {
+    _origin_block.clear_column_data(_parent->intermediate_row_desc().num_materialized_slots());
+}
+
+Status OperatorXBase::do_projections(RuntimeState* state, vectorized::Block* origin_block,
+                                     vectorized::Block* output_block) const {
+    auto* local_state = state->get_local_state(operator_id());
+    SCOPED_TIMER(local_state->exec_time_counter());
+    SCOPED_TIMER(local_state->_projection_timer);
+    const size_t rows = origin_block->rows();
+    if (rows == 0) {
         return Status::OK();
     }
-    _is_closed = true;
+    vectorized::Block input_block = *origin_block;
+
+    std::vector<int> result_column_ids;
+    for (const auto& projections : _intermediate_projections) {
+        result_column_ids.resize(projections.size());
+        for (int i = 0; i < projections.size(); i++) {
+            RETURN_IF_ERROR(projections[i]->execute(&input_block, &result_column_ids[i]));
+        }
+        input_block.shuffle_columns(result_column_ids);
+    }
+
+    DCHECK_EQ(rows, input_block.rows());
+    auto insert_column_datas = [&](auto& to, vectorized::ColumnPtr& from, size_t rows) {
+        if (to->is_nullable() && !from->is_nullable()) {
+            if (_keep_origin || !from->is_exclusive()) {
+                auto& null_column = reinterpret_cast<vectorized::ColumnNullable&>(*to);
+                null_column.get_nested_column().insert_range_from(*from, 0, rows);
+                null_column.get_null_map_column().get_data().resize_fill(rows, 0);
+            } else {
+                to = make_nullable(from, false)->assume_mutable();
+            }
+        } else {
+            if (_keep_origin || !from->is_exclusive()) {
+                to->insert_range_from(*from, 0, rows);
+            } else {
+                to = from->assume_mutable();
+            }
+        }
+    };
+
+    using namespace vectorized;
+    vectorized::MutableBlock mutable_block =
+            vectorized::VectorizedUtils::build_mutable_mem_reuse_block(output_block,
+                                                                       *_output_row_descriptor);
+    if (rows != 0) {
+        auto& mutable_columns = mutable_block.mutable_columns();
+        DCHECK(mutable_columns.size() == local_state->_projections.size());
+        for (int i = 0; i < mutable_columns.size(); ++i) {
+            auto result_column_id = -1;
+            RETURN_IF_ERROR(local_state->_projections[i]->execute(&input_block, &result_column_id));
+            auto column_ptr = input_block.get_by_position(result_column_id)
+                                      .column->convert_to_full_column_if_const();
+            insert_column_datas(mutable_columns[i], column_ptr, rows);
+        }
+        DCHECK(mutable_block.rows() == rows);
+        output_block->set_columns(std::move(mutable_columns));
+    }
+
     return Status::OK();
 }
 
-const RowDescriptor& OperatorBase::row_desc() const {
-    return _operator_builder->row_desc();
+Status OperatorXBase::get_block_after_projects(RuntimeState* state, vectorized::Block* block,
+                                               bool* eos) {
+    auto local_state = state->get_local_state(operator_id());
+    if (_output_row_descriptor) {
+        local_state->clear_origin_block();
+        auto status = get_block(state, &local_state->_origin_block, eos);
+        if (UNLIKELY(!status.ok())) return status;
+        return do_projections(state, &local_state->_origin_block, block);
+    }
+    local_state->_peak_memory_usage_counter->set(local_state->_mem_tracker->peak_consumption());
+    return get_block(state, block, eos);
+}
+
+bool PipelineXLocalStateBase::reached_limit() const {
+    return _parent->_limit != -1 && _num_rows_returned >= _parent->_limit;
+}
+
+void PipelineXLocalStateBase::reached_limit(vectorized::Block* block, bool* eos) {
+    if (_parent->_limit != -1 and _num_rows_returned + block->rows() >= _parent->_limit) {
+        block->set_num_rows(_parent->_limit - _num_rows_returned);
+        *eos = true;
+    }
+
+    if (auto rows = block->rows()) {
+        _num_rows_returned += rows;
+        COUNTER_UPDATE(_blocks_returned_counter, 1);
+        COUNTER_SET(_rows_returned_counter, _num_rows_returned);
+    }
+}
+
+std::string DataSinkOperatorXBase::debug_string(int indentation_level) const {
+    fmt::memory_buffer debug_string_buffer;
+
+    fmt::format_to(debug_string_buffer, "{}{}: id={}", std::string(indentation_level * 2, ' '),
+                   _name, node_id());
+    return fmt::to_string(debug_string_buffer);
 }
 
-std::string OperatorBase::debug_string() const {
-    std::stringstream ss;
-    ss << _operator_builder->get_name() << ": is_source: " << is_source();
-    ss << ", is_sink: " << is_sink() << ", is_closed: " << _is_closed;
-    ss << ", is_pending_finish: " << is_pending_finish();
-    return ss.str();
+std::string DataSinkOperatorXBase::debug_string(RuntimeState* state, int indentation_level) const {
+    return state->get_sink_local_state()->debug_string(indentation_level);
+}
+
+Status DataSinkOperatorXBase::init(const TDataSink& tsink) {
+    std::string op_name = "UNKNOWN_SINK";
+    std::map<int, const char*>::const_iterator it = _TDataSinkType_VALUES_TO_NAMES.find(tsink.type);
+
+    if (it != _TDataSinkType_VALUES_TO_NAMES.end()) {
+        op_name = it->second;
+    }
+    _name = op_name + "_OPERATOR";
+    return Status::OK();
 }
 
+Status DataSinkOperatorXBase::init(const TPlanNode& tnode, RuntimeState* state) {
+    std::string op_name = print_plan_node_type(tnode.node_type);
+
+    auto substr = op_name.substr(0, op_name.find("_NODE"));
+
+    _name = substr + "_SINK_OPERATOR";
+    return Status::OK();
+}
+
+template <typename LocalStateType>
+Status DataSinkOperatorX<LocalStateType>::setup_local_state(RuntimeState* state,
+                                                            LocalSinkStateInfo& info) {
+    auto local_state = LocalStateType::create_unique(this, state);
+    RETURN_IF_ERROR(local_state->init(state, info));
+    state->emplace_sink_local_state(operator_id(), std::move(local_state));
+    return Status::OK();
+}
+
+template <typename LocalStateType>
+std::shared_ptr<BasicSharedState> DataSinkOperatorX<LocalStateType>::create_shared_state() const {
+    if constexpr (std::is_same_v<typename LocalStateType::SharedStateType,
+                                 LocalExchangeSharedState>) {
+        return nullptr;
+    } else if constexpr (std::is_same_v<typename LocalStateType::SharedStateType,
+                                        MultiCastSharedState>) {
+        LOG(FATAL) << "should not reach here!";
+        return nullptr;
+    } else {
+        std::shared_ptr<BasicSharedState> ss = nullptr;
+        ss = LocalStateType::SharedStateType::create_shared();
+        ss->id = operator_id();
+        for (auto& dest : dests_id()) {
+            ss->related_op_ids.insert(dest);
+        }
+        return ss;
+    }
+}
+
+template <typename LocalStateType>
+Status OperatorX<LocalStateType>::setup_local_state(RuntimeState* state, LocalStateInfo& info) {
+    auto local_state = LocalStateType::create_unique(state, this);
+    RETURN_IF_ERROR(local_state->init(state, info));
+    state->emplace_local_state(operator_id(), std::move(local_state));
+    return Status::OK();
+}
+
+PipelineXSinkLocalStateBase::PipelineXSinkLocalStateBase(DataSinkOperatorXBase* parent,
+                                                         RuntimeState* state)
+        : _parent(parent), _state(state) {
+    _query_statistics = std::make_shared<QueryStatistics>();
+}
+
+PipelineXLocalStateBase::PipelineXLocalStateBase(RuntimeState* state, OperatorXBase* parent)
+        : _num_rows_returned(0),
+          _rows_returned_counter(nullptr),
+          _peak_memory_usage_counter(nullptr),
+          _parent(parent),
+          _state(state) {
+    _query_statistics = std::make_shared<QueryStatistics>();
+}
+
+template <typename SharedStateArg>
+Status PipelineXLocalState<SharedStateArg>::init(RuntimeState* state, LocalStateInfo& info) {
+    _runtime_profile.reset(new RuntimeProfile(_parent->get_name() + name_suffix()));
+    _runtime_profile->set_metadata(_parent->node_id());
+    _runtime_profile->set_is_sink(false);
+    info.parent_profile->add_child(_runtime_profile.get(), true, nullptr);
+    constexpr auto is_fake_shared = std::is_same_v<SharedStateArg, FakeSharedState>;
+    if constexpr (!is_fake_shared) {
+        if constexpr (std::is_same_v<LocalExchangeSharedState, SharedStateArg>) {
+            _shared_state = info.le_state_map[_parent->operator_id()].first.get();
+
+            _dependency = _shared_state->get_dep_by_channel_id(info.task_idx);
+            _wait_for_dependency_timer = ADD_TIMER_WITH_LEVEL(
+                    _runtime_profile, "WaitForDependency[" + _dependency->name() + "]Time", 1);
+        } else if (info.shared_state) {
+            // For UnionSourceOperator without children, there is no shared state.
+            _shared_state = info.shared_state->template cast<SharedStateArg>();
+
+            _dependency = _shared_state->create_source_dependency(
+                    _parent->operator_id(), _parent->node_id(), _parent->get_name(),
+                    state->get_query_ctx());
+            _wait_for_dependency_timer = ADD_TIMER_WITH_LEVEL(
+                    _runtime_profile, "WaitForDependency[" + _dependency->name() + "]Time", 1);
+        }
+    }
+
+    _rows_returned_counter =
+            ADD_COUNTER_WITH_LEVEL(_runtime_profile, "RowsProduced", TUnit::UNIT, 1);
+    _blocks_returned_counter =
+            ADD_COUNTER_WITH_LEVEL(_runtime_profile, "BlocksProduced", TUnit::UNIT, 1);
+    _projection_timer = ADD_TIMER_WITH_LEVEL(_runtime_profile, "ProjectionTime", 1);
+    _init_timer = ADD_TIMER_WITH_LEVEL(_runtime_profile, "InitTime", 1);
+    _open_timer = ADD_TIMER_WITH_LEVEL(_runtime_profile, "OpenTime", 1);
+    _close_timer = ADD_TIMER_WITH_LEVEL(_runtime_profile, "CloseTime", 1);
+    _exec_timer = ADD_TIMER_WITH_LEVEL(_runtime_profile, "ExecTime", 1);
+    _mem_tracker = std::make_unique<MemTracker>("PipelineXLocalState:" + _runtime_profile->name());
+    _memory_used_counter = ADD_LABEL_COUNTER_WITH_LEVEL(_runtime_profile, "MemoryUsage", 1);
+    _peak_memory_usage_counter = _runtime_profile->AddHighWaterMarkCounter(
+            "PeakMemoryUsage", TUnit::BYTES, "MemoryUsage", 1);
+    return Status::OK();
+}
+
+template <typename SharedStateArg>
+Status PipelineXLocalState<SharedStateArg>::open(RuntimeState* state) {
+    _conjuncts.resize(_parent->_conjuncts.size());
+    _projections.resize(_parent->_projections.size());
+    for (size_t i = 0; i < _conjuncts.size(); i++) {
+        RETURN_IF_ERROR(_parent->_conjuncts[i]->clone(state, _conjuncts[i]));
+    }
+    for (size_t i = 0; i < _projections.size(); i++) {
+        RETURN_IF_ERROR(_parent->_projections[i]->clone(state, _projections[i]));
+    }
+    _intermediate_projections.resize(_parent->_intermediate_projections.size());
+    for (int i = 0; i < _parent->_intermediate_projections.size(); i++) {
+        _intermediate_projections[i].resize(_parent->_intermediate_projections[i].size());
+        for (int j = 0; j < _parent->_intermediate_projections[i].size(); j++) {
+            RETURN_IF_ERROR(_parent->_intermediate_projections[i][j]->clone(
+                    state, _intermediate_projections[i][j]));
+        }
+    }
+    return Status::OK();
+}
+
+template <typename SharedStateArg>
+Status PipelineXLocalState<SharedStateArg>::close(RuntimeState* state) {
+    if (_closed) {
+        return Status::OK();
+    }
+    if constexpr (!std::is_same_v<SharedStateArg, FakeSharedState>) {
+        COUNTER_SET(_wait_for_dependency_timer, _dependency->watcher_elapse_time());
+    }
+    if (_rows_returned_counter != nullptr) {
+        COUNTER_SET(_rows_returned_counter, _num_rows_returned);
+    }
+    if (_peak_memory_usage_counter) {
+        _peak_memory_usage_counter->set(_mem_tracker->peak_consumption());
+    }
+    _closed = true;
+    return Status::OK();
+}
+
+template <typename SharedState>
+Status PipelineXSinkLocalState<SharedState>::init(RuntimeState* state, LocalSinkStateInfo& info) {
+    // create profile
+    _profile = state->obj_pool()->add(new RuntimeProfile(_parent->get_name() + name_suffix()));
+    _profile->set_metadata(_parent->node_id());
+    _profile->set_is_sink(true);
+    _wait_for_finish_dependency_timer = ADD_TIMER(_profile, "PendingFinishDependency");
+    constexpr auto is_fake_shared = std::is_same_v<SharedState, FakeSharedState>;
+    if constexpr (!is_fake_shared) {
+        if constexpr (std::is_same_v<LocalExchangeSharedState, SharedState>) {
+            _dependency = info.le_state_map[_parent->dests_id().front()].second.get();
+            _shared_state = (SharedState*)_dependency->shared_state();
+        } else {
+            _shared_state = info.shared_state->template cast<SharedState>();
+            _dependency = _shared_state->create_sink_dependency(
+                    _parent->dests_id().front(), _parent->node_id(), _parent->get_name(),
+                    state->get_query_ctx());
+        }
+        _wait_for_dependency_timer = ADD_TIMER_WITH_LEVEL(
+                _profile, "WaitForDependency[" + _dependency->name() + "]Time", 1);
+    } else {
+        _dependency = nullptr;
+    }
+    _rows_input_counter = ADD_COUNTER_WITH_LEVEL(_profile, "InputRows", TUnit::UNIT, 1);
+    _init_timer = ADD_TIMER_WITH_LEVEL(_profile, "InitTime", 1);
+    _open_timer = ADD_TIMER_WITH_LEVEL(_profile, "OpenTime", 1);
+    _close_timer = ADD_TIMER_WITH_LEVEL(_profile, "CloseTime", 1);
+    _exec_timer = ADD_TIMER_WITH_LEVEL(_profile, "ExecTime", 1);
+    info.parent_profile->add_child(_profile, true, nullptr);
+    _mem_tracker = std::make_unique<MemTracker>(_parent->get_name());
+    _memory_used_counter = ADD_LABEL_COUNTER_WITH_LEVEL(_profile, "MemoryUsage", 1);
+    _peak_memory_usage_counter =
+            _profile->AddHighWaterMarkCounter("PeakMemoryUsage", TUnit::BYTES, "MemoryUsage", 1);
+    return Status::OK();
+}
+
+template <typename SharedState>
+Status PipelineXSinkLocalState<SharedState>::close(RuntimeState* state, Status exec_status) {
+    if (_closed) {
+        return Status::OK();
+    }
+    if constexpr (!std::is_same_v<SharedState, FakeSharedState>) {
+        COUNTER_SET(_wait_for_dependency_timer, _dependency->watcher_elapse_time());
+    }
+    if (_peak_memory_usage_counter) {
+        _peak_memory_usage_counter->set(_mem_tracker->peak_consumption());
+    }
+    _closed = true;
+    return Status::OK();
+}
+
+template <typename LocalStateType>
+Status StreamingOperatorX<LocalStateType>::get_block(RuntimeState* state, vectorized::Block* block,
+                                                     bool* eos) {
+    RETURN_IF_ERROR(
+            OperatorX<LocalStateType>::_child_x->get_block_after_projects(state, block, eos));
+    return pull(state, block, eos);
+}
+
+template <typename LocalStateType>
+Status StatefulOperatorX<LocalStateType>::get_block(RuntimeState* state, vectorized::Block* block,
+                                                    bool* eos) {
+    auto& local_state = get_local_state(state);
+    if (need_more_input_data(state)) {
+        local_state._child_block->clear_column_data(
+                OperatorX<LocalStateType>::_child_x->row_desc().num_materialized_slots());
+        RETURN_IF_ERROR(OperatorX<LocalStateType>::_child_x->get_block_after_projects(
+                state, local_state._child_block.get(), &local_state._child_eos));
+        *eos = local_state._child_eos;
+        if (local_state._child_block->rows() == 0 && !local_state._child_eos) {
+            return Status::OK();
+        }
+        {
+            SCOPED_TIMER(local_state.exec_time_counter());
+            RETURN_IF_ERROR(push(state, local_state._child_block.get(), local_state._child_eos));
+        }
+    }
+
+    if (!need_more_input_data(state)) {
+        SCOPED_TIMER(local_state.exec_time_counter());
+        bool new_eos = false;
+        RETURN_IF_ERROR(pull(state, block, &new_eos));
+        if (new_eos) {
+            *eos = true;
+        } else if (!need_more_input_data(state)) {
+            *eos = false;
+        }
+    }
+    return Status::OK();
+}
+
+template <typename Writer, typename Parent>
+    requires(std::is_base_of_v<vectorized::AsyncResultWriter, Writer>)
+Status AsyncWriterSink<Writer, Parent>::init(RuntimeState* state, LocalSinkStateInfo& info) {
+    RETURN_IF_ERROR(Base::init(state, info));
+    _writer.reset(new Writer(info.tsink, _output_vexpr_ctxs));
+    _async_writer_dependency = AsyncWriterDependency::create_shared(
+            _parent->operator_id(), _parent->node_id(), state->get_query_ctx());
+    _writer->set_dependency(_async_writer_dependency.get(), _finish_dependency.get());
+
+    _wait_for_dependency_timer = ADD_TIMER_WITH_LEVEL(
+            _profile, "WaitForDependency[" + _async_writer_dependency->name() + "]Time", 1);
+    return Status::OK();
+}
+
+template <typename Writer, typename Parent>
+    requires(std::is_base_of_v<vectorized::AsyncResultWriter, Writer>)
+Status AsyncWriterSink<Writer, Parent>::open(RuntimeState* state) {
+    RETURN_IF_ERROR(Base::open(state));
+    _output_vexpr_ctxs.resize(_parent->cast<Parent>()._output_vexpr_ctxs.size());
+    for (size_t i = 0; i < _output_vexpr_ctxs.size(); i++) {
+        RETURN_IF_ERROR(
+                _parent->cast<Parent>()._output_vexpr_ctxs[i]->clone(state, _output_vexpr_ctxs[i]));
+    }
+    RETURN_IF_ERROR(_writer->start_writer(state, _profile));
+    return Status::OK();
+}
+
+template <typename Writer, typename Parent>
+    requires(std::is_base_of_v<vectorized::AsyncResultWriter, Writer>)
+Status AsyncWriterSink<Writer, Parent>::sink(RuntimeState* state, vectorized::Block* block,
+                                             bool eos) {
+    return _writer->sink(block, eos);
+}
+
+template <typename Writer, typename Parent>
+    requires(std::is_base_of_v<vectorized::AsyncResultWriter, Writer>)
+Status AsyncWriterSink<Writer, Parent>::close(RuntimeState* state, Status exec_status) {
+    if (_closed) {
+        return Status::OK();
+    }
+    COUNTER_SET(_wait_for_dependency_timer, _async_writer_dependency->watcher_elapse_time());
+    COUNTER_SET(_wait_for_finish_dependency_timer, _finish_dependency->watcher_elapse_time());
+    // if the init failed, the _writer may be nullptr. so here need check
+    if (_writer) {
+        Status st = _writer->get_writer_status();
+        if (exec_status.ok()) {
+            _writer->force_close(state->is_cancelled() ? Status::Cancelled("Cancelled")
+                                                       : Status::Cancelled("force close"));
+        } else {
+            _writer->force_close(exec_status);
+        }
+        // If there is an error in process_block thread, then we should get the writer
+        // status before call force_close. For example, the thread may failed in commit
+        // transaction.
+        RETURN_IF_ERROR(st);
+    }
+    return Base::close(state, exec_status);
+}
+
+#define DECLARE_OPERATOR_X(LOCAL_STATE) template class DataSinkOperatorX<LOCAL_STATE>;
+DECLARE_OPERATOR_X(HashJoinBuildSinkLocalState)
+DECLARE_OPERATOR_X(ResultSinkLocalState)
+DECLARE_OPERATOR_X(JdbcTableSinkLocalState)
+DECLARE_OPERATOR_X(ResultFileSinkLocalState)
+DECLARE_OPERATOR_X(OlapTableSinkLocalState)
+DECLARE_OPERATOR_X(OlapTableSinkV2LocalState)
+DECLARE_OPERATOR_X(HiveTableSinkLocalState)
+DECLARE_OPERATOR_X(AnalyticSinkLocalState)
+DECLARE_OPERATOR_X(SortSinkLocalState)
+DECLARE_OPERATOR_X(SpillSortSinkLocalState)
+DECLARE_OPERATOR_X(LocalExchangeSinkLocalState)
+DECLARE_OPERATOR_X(AggSinkLocalState)
+DECLARE_OPERATOR_X(PartitionedAggSinkLocalState)
+DECLARE_OPERATOR_X(ExchangeSinkLocalState)
+DECLARE_OPERATOR_X(NestedLoopJoinBuildSinkLocalState)
+DECLARE_OPERATOR_X(UnionSinkLocalState)
+DECLARE_OPERATOR_X(MultiCastDataStreamSinkLocalState)
+DECLARE_OPERATOR_X(PartitionSortSinkLocalState)
+DECLARE_OPERATOR_X(SetProbeSinkLocalState<true>)
+DECLARE_OPERATOR_X(SetProbeSinkLocalState<false>)
+DECLARE_OPERATOR_X(SetSinkLocalState<true>)
+DECLARE_OPERATOR_X(SetSinkLocalState<false>)
+DECLARE_OPERATOR_X(PartitionedHashJoinSinkLocalState)
+DECLARE_OPERATOR_X(GroupCommitBlockSinkLocalState)
+
+#undef DECLARE_OPERATOR_X
+
+#define DECLARE_OPERATOR_X(LOCAL_STATE) template class OperatorX<LOCAL_STATE>;
+DECLARE_OPERATOR_X(HashJoinProbeLocalState)
+DECLARE_OPERATOR_X(OlapScanLocalState)
+DECLARE_OPERATOR_X(JDBCScanLocalState)
+DECLARE_OPERATOR_X(FileScanLocalState)
+DECLARE_OPERATOR_X(EsScanLocalState)
+DECLARE_OPERATOR_X(AnalyticLocalState)
+DECLARE_OPERATOR_X(SortLocalState)
+DECLARE_OPERATOR_X(SpillSortLocalState)
+DECLARE_OPERATOR_X(AggLocalState)
+DECLARE_OPERATOR_X(PartitionedAggLocalState)
+DECLARE_OPERATOR_X(TableFunctionLocalState)
+DECLARE_OPERATOR_X(ExchangeLocalState)
+DECLARE_OPERATOR_X(RepeatLocalState)
+DECLARE_OPERATOR_X(NestedLoopJoinProbeLocalState)
+DECLARE_OPERATOR_X(AssertNumRowsLocalState)
+DECLARE_OPERATOR_X(EmptySetLocalState)
+DECLARE_OPERATOR_X(UnionSourceLocalState)
+DECLARE_OPERATOR_X(MultiCastDataStreamSourceLocalState)
+DECLARE_OPERATOR_X(PartitionSortSourceLocalState)
+DECLARE_OPERATOR_X(SetSourceLocalState<true>)
+DECLARE_OPERATOR_X(SetSourceLocalState<false>)
+DECLARE_OPERATOR_X(DataGenLocalState)
+DECLARE_OPERATOR_X(SchemaScanLocalState)
+DECLARE_OPERATOR_X(MetaScanLocalState)
+DECLARE_OPERATOR_X(LocalExchangeSourceLocalState)
+DECLARE_OPERATOR_X(PartitionedHashJoinProbeLocalState)
+
+#undef DECLARE_OPERATOR_X
+
+template class StreamingOperatorX<AssertNumRowsLocalState>;
+template class StreamingOperatorX<SelectLocalState>;
+
+template class StatefulOperatorX<HashJoinProbeLocalState>;
+template class StatefulOperatorX<PartitionedHashJoinProbeLocalState>;
+template class StatefulOperatorX<RepeatLocalState>;
+template class StatefulOperatorX<StreamingAggLocalState>;
+template class StatefulOperatorX<DistinctStreamingAggLocalState>;
+template class StatefulOperatorX<NestedLoopJoinProbeLocalState>;
+template class StatefulOperatorX<TableFunctionLocalState>;
+
+template class PipelineXSinkLocalState<HashJoinSharedState>;
+template class PipelineXSinkLocalState<PartitionedHashJoinSharedState>;
+template class PipelineXSinkLocalState<SortSharedState>;
+template class PipelineXSinkLocalState<SpillSortSharedState>;
+template class PipelineXSinkLocalState<NestedLoopJoinSharedState>;
+template class PipelineXSinkLocalState<AnalyticSharedState>;
+template class PipelineXSinkLocalState<AggSharedState>;
+template class PipelineXSinkLocalState<PartitionedAggSharedState>;
+template class PipelineXSinkLocalState<FakeSharedState>;
+template class PipelineXSinkLocalState<UnionSharedState>;
+template class PipelineXSinkLocalState<PartitionSortNodeSharedState>;
+template class PipelineXSinkLocalState<MultiCastSharedState>;
+template class PipelineXSinkLocalState<SetSharedState>;
+template class PipelineXSinkLocalState<LocalExchangeSharedState>;
+template class PipelineXSinkLocalState<BasicSharedState>;
+
+template class PipelineXLocalState<HashJoinSharedState>;
+template class PipelineXLocalState<PartitionedHashJoinSharedState>;
+template class PipelineXLocalState<SortSharedState>;
+template class PipelineXLocalState<SpillSortSharedState>;
+template class PipelineXLocalState<NestedLoopJoinSharedState>;
+template class PipelineXLocalState<AnalyticSharedState>;
+template class PipelineXLocalState<AggSharedState>;
+template class PipelineXLocalState<PartitionedAggSharedState>;
+template class PipelineXLocalState<FakeSharedState>;
+template class PipelineXLocalState<UnionSharedState>;
+template class PipelineXLocalState<MultiCastSharedState>;
+template class PipelineXLocalState<PartitionSortNodeSharedState>;
+template class PipelineXLocalState<SetSharedState>;
+template class PipelineXLocalState<LocalExchangeSharedState>;
+template class PipelineXLocalState<BasicSharedState>;
+
+template class AsyncWriterSink<doris::vectorized::VFileResultWriter, ResultFileSinkOperatorX>;
+template class AsyncWriterSink<doris::vectorized::VJdbcTableWriter, JdbcTableSinkOperatorX>;
+template class AsyncWriterSink<doris::vectorized::VTabletWriter, OlapTableSinkOperatorX>;
+template class AsyncWriterSink<doris::vectorized::VTabletWriterV2, OlapTableSinkV2OperatorX>;
+template class AsyncWriterSink<doris::vectorized::VHiveTableWriter, HiveTableSinkOperatorX>;
+
 } // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/operator.h b/be/src/pipeline/exec/operator.h
index b30619079d8d25..4553aa79e3be4d 100644
--- a/be/src/pipeline/exec/operator.h
+++ b/be/src/pipeline/exec/operator.h
@@ -27,8 +27,11 @@
 #include <utility>
 #include <vector>
 
+#include "common/logging.h"
 #include "common/status.h"
-#include "exec/exec_node.h"
+#include "pipeline/dependency.h"
+#include "pipeline/exec/operator.h"
+#include "pipeline/local_exchange/local_exchanger.h"
 #include "runtime/runtime_state.h"
 #include "util/runtime_profile.h"
 #include "vec/core/block.h"
@@ -40,6 +43,9 @@ class DataSink;
 class RowDescriptor;
 class RuntimeState;
 class TDataSink;
+namespace vectorized {
+class AsyncResultWriter;
+}
 } // namespace doris
 
 namespace doris::pipeline {
@@ -58,21 +64,6 @@ enum class SourceState : uint8_t {
     FINISHED = 2
 };
 
-/**
- * State of sink operator.
- *                     |------> SINK_BUSY ------|
- *                     |         ^    |         |
- *   SINK_IDLE --------|         |----|         |----> FINISHED
- *   ^       |         |------------------------|
- *   |-------|
- */
-enum class SinkState : uint8_t {
-    SINK_IDLE = 0, // Can send block to sink.
-    SINK_BUSY = 1, // Sink buffer is full, sink operator is blocked until buffer is freed.
-    FINISHED = 2
-};
-
-class OperatorBuilderBase;
 class OperatorBase;
 class OperatorXBase;
 class DataSinkOperatorXBase;
@@ -85,355 +76,846 @@ using OperatorXs = std::vector<OperatorXPtr>;
 
 using DataSinkOperatorXPtr = std::shared_ptr<DataSinkOperatorXBase>;
 
-using OperatorBuilderPtr = std::shared_ptr<OperatorBuilderBase>;
-using OperatorBuilders = std::vector<OperatorBuilderPtr>;
-
-class OperatorBuilderBase {
-public:
-    OperatorBuilderBase(int32_t id, std::string name) : _id(id), _name(std::move(name)) {}
+// This struct is used only for initializing local state.
+struct LocalStateInfo {
+    RuntimeProfile* parent_profile = nullptr;
+    const std::vector<TScanRangeParams> scan_ranges;
+    BasicSharedState* shared_state;
+    std::map<int, std::pair<std::shared_ptr<LocalExchangeSharedState>, std::shared_ptr<Dependency>>>
+            le_state_map;
+    const int task_idx;
+};
 
-    virtual ~OperatorBuilderBase() = default;
+// This struct is used only for initializing local sink state.
+struct LocalSinkStateInfo {
+    const int task_idx;
+    RuntimeProfile* parent_profile = nullptr;
+    const int sender_id;
+    BasicSharedState* shared_state;
+    std::map<int, std::pair<std::shared_ptr<LocalExchangeSharedState>, std::shared_ptr<Dependency>>>
+            le_state_map;
+    const TDataSink& tsink;
+};
 
-    virtual OperatorPtr build_operator() = 0;
+class OperatorBase {
+public:
+    explicit OperatorBase() : _child_x(nullptr), _is_closed(false) {}
+    virtual ~OperatorBase() = default;
 
     virtual bool is_sink() const { return false; }
+
     virtual bool is_source() const { return false; }
 
-    std::string get_name() const { return _name; }
+    [[nodiscard]] virtual const RowDescriptor& row_desc() const;
 
-    virtual const RowDescriptor& row_desc() const = 0;
+    [[nodiscard]] virtual Status init(const TDataSink& tsink) { return Status::OK(); }
 
-    int32_t id() const { return _id; }
+    // Prepare for running. (e.g. resource allocation, etc.)
+    [[nodiscard]] virtual Status prepare(RuntimeState* state) = 0;
+    [[nodiscard]] virtual std::string get_name() const = 0;
 
-protected:
-    // Exec node id.
-    const int32_t _id;
-    const std::string _name;
+    /**
+     * Allocate resources needed by this operator.
+     *
+     * This is called when current pipeline is scheduled first time.
+     * e.g. If we got three pipeline and dependencies are A -> B, B-> C, all operators' `open`
+     * method in pipeline C will be called once pipeline A and B finished.
+     *
+     * Now we have only one task per pipeline, so it has no problem，
+     * But if one pipeline have multi task running in parallel, we need to rethink this logic.
+     */
+    [[nodiscard]] virtual Status open(RuntimeState* state) = 0;
 
-    RuntimeState* _state = nullptr;
-    bool _is_closed = false;
-};
+    /**
+     * Release all resources once this operator done its work.
+     */
+    [[nodiscard]] virtual Status close(RuntimeState* state);
 
-template <typename NodeType>
-class OperatorBuilder : public OperatorBuilderBase {
-public:
-    OperatorBuilder(int32_t id, const std::string& name, ExecNode* exec_node = nullptr)
-            : OperatorBuilderBase(id, name), _node(reinterpret_cast<NodeType*>(exec_node)) {}
+    [[nodiscard]] virtual Status set_child(OperatorXPtr child) {
+        _child_x = std::move(child);
+        return Status::OK();
+    }
 
-    ~OperatorBuilder() override = default;
+    [[nodiscard]] bool is_closed() const { return _is_closed; }
 
-    const RowDescriptor& row_desc() const override { return _node->row_desc(); }
+    [[nodiscard]] virtual RuntimeProfile* get_runtime_profile() const = 0;
+
+    [[nodiscard]] virtual int id() const = 0;
 
-    NodeType* exec_node() const { return _node; }
+    virtual size_t revocable_mem_size(RuntimeState* state) const { return 0; }
+
+    virtual Status revoke_memory(RuntimeState* state) { return Status::OK(); };
 
 protected:
-    NodeType* _node = nullptr;
+    OperatorXPtr _child_x = nullptr;
+
+    bool _is_closed;
 };
 
-template <typename SinkType>
-class DataSinkOperatorBuilder : public OperatorBuilderBase {
+class PipelineXLocalStateBase {
 public:
-    DataSinkOperatorBuilder(int32_t id, const std::string& name, DataSink* sink = nullptr)
-            : OperatorBuilderBase(id, name), _sink(reinterpret_cast<SinkType*>(sink)) {}
+    PipelineXLocalStateBase(RuntimeState* state, OperatorXBase* parent);
+    virtual ~PipelineXLocalStateBase() = default;
+
+    template <class TARGET>
+    TARGET& cast() {
+        DCHECK(dynamic_cast<TARGET*>(this))
+                << " Mismatch type! Current type is " << typeid(*this).name()
+                << " and expect type is" << typeid(TARGET).name();
+        return reinterpret_cast<TARGET&>(*this);
+    }
+    template <class TARGET>
+    const TARGET& cast() const {
+        DCHECK(dynamic_cast<TARGET*>(this))
+                << " Mismatch type! Current type is " << typeid(*this).name()
+                << " and expect type is" << typeid(TARGET).name();
+        return reinterpret_cast<const TARGET&>(*this);
+    }
+
+    // Do initialization. This step should be executed only once and in bthread, so we can do some
+    // lightweight or non-idempotent operations (e.g. init profile, clone expr ctx from operatorX)
+    virtual Status init(RuntimeState* state, LocalStateInfo& info) = 0;
+    // Do initialization. This step can be executed multiple times, so we should make sure it is
+    // idempotent (e.g. wait for runtime filters).
+    virtual Status open(RuntimeState* state) = 0;
+    virtual Status close(RuntimeState* state) = 0;
+
+    // If use projection, we should clear `_origin_block`.
+    void clear_origin_block();
+
+    [[nodiscard]] bool reached_limit() const;
+    void reached_limit(vectorized::Block* block, bool* eos);
+    RuntimeProfile* profile() { return _runtime_profile.get(); }
+
+    MemTracker* mem_tracker() { return _mem_tracker.get(); }
+    RuntimeProfile::Counter* rows_returned_counter() { return _rows_returned_counter; }
+    RuntimeProfile::Counter* blocks_returned_counter() { return _blocks_returned_counter; }
+    RuntimeProfile::Counter* exec_time_counter() { return _exec_timer; }
+    OperatorXBase* parent() { return _parent; }
+    RuntimeState* state() { return _state; }
+    vectorized::VExprContextSPtrs& conjuncts() { return _conjuncts; }
+    vectorized::VExprContextSPtrs& projections() { return _projections; }
+    [[nodiscard]] int64_t num_rows_returned() const { return _num_rows_returned; }
+    void add_num_rows_returned(int64_t delta) { _num_rows_returned += delta; }
+    void set_num_rows_returned(int64_t value) { _num_rows_returned = value; }
 
-    ~DataSinkOperatorBuilder() override = default;
+    [[nodiscard]] virtual std::string debug_string(int indentation_level = 0) const = 0;
 
-    bool is_sink() const override { return true; }
+    virtual std::vector<Dependency*> dependencies() const { return {nullptr}; }
 
-    const RowDescriptor& row_desc() const override { return _sink->row_desc(); }
+    // override in Scan
+    virtual Dependency* finishdependency() { return nullptr; }
+    //  override in Scan  MultiCastSink
+    virtual std::vector<Dependency*> filter_dependencies() { return {}; }
 
-    SinkType* exec_node() const { return _sink; }
+    std::shared_ptr<QueryStatistics> get_query_statistics_ptr() { return _query_statistics; }
 
 protected:
-    SinkType* _sink = nullptr;
+    friend class OperatorXBase;
+
+    ObjectPool* _pool = nullptr;
+    int64_t _num_rows_returned {0};
+
+    std::unique_ptr<RuntimeProfile> _runtime_profile;
+
+    // Record this node memory size. it is expected that artificial guarantees are accurate,
+    // which will providea reference for operator memory.
+    std::unique_ptr<MemTracker> _mem_tracker;
+
+    std::shared_ptr<QueryStatistics> _query_statistics = nullptr;
+
+    RuntimeProfile::Counter* _rows_returned_counter = nullptr;
+    RuntimeProfile::Counter* _blocks_returned_counter = nullptr;
+    RuntimeProfile::Counter* _wait_for_dependency_timer = nullptr;
+    RuntimeProfile::Counter* _memory_used_counter = nullptr;
+    RuntimeProfile::Counter* _projection_timer = nullptr;
+    RuntimeProfile::Counter* _exec_timer = nullptr;
+    // Account for peak memory used by this node
+    RuntimeProfile::Counter* _peak_memory_usage_counter = nullptr;
+    RuntimeProfile::Counter* _init_timer = nullptr;
+    RuntimeProfile::Counter* _open_timer = nullptr;
+    RuntimeProfile::Counter* _close_timer = nullptr;
+
+    OperatorXBase* _parent = nullptr;
+    RuntimeState* _state = nullptr;
+    vectorized::VExprContextSPtrs _conjuncts;
+    vectorized::VExprContextSPtrs _projections;
+    // Used in common subexpression elimination to compute intermediate results.
+    std::vector<vectorized::VExprContextSPtrs> _intermediate_projections;
+
+    bool _closed = false;
+    vectorized::Block _origin_block;
 };
 
-class OperatorBase {
+template <typename SharedStateArg = FakeSharedState>
+class PipelineXLocalState : public PipelineXLocalStateBase {
 public:
-    explicit OperatorBase(OperatorBuilderBase* operator_builder);
-    virtual ~OperatorBase() = default;
+    using SharedStateType = SharedStateArg;
+    PipelineXLocalState(RuntimeState* state, OperatorXBase* parent)
+            : PipelineXLocalStateBase(state, parent) {}
+    ~PipelineXLocalState() override = default;
 
-    virtual std::string get_name() const { return _operator_builder->get_name(); }
+    Status init(RuntimeState* state, LocalStateInfo& info) override;
+    Status open(RuntimeState* state) override;
 
-    virtual bool is_sink() const;
+    virtual std::string name_suffix() const;
 
-    virtual bool is_source() const;
+    Status close(RuntimeState* state) override;
 
-    virtual Status init(const TDataSink& tsink) { return Status::OK(); }
+    [[nodiscard]] std::string debug_string(int indentation_level = 0) const override;
 
-    // Prepare for running. (e.g. resource allocation, etc.)
-    virtual Status prepare(RuntimeState* state) = 0;
-
-    /**
-     * Allocate resources needed by this operator.
-     *
-     * This is called when current pipeline is scheduled first time.
-     * e.g. If we got three pipeline and dependencies are A -> B, B-> C, all operators' `open`
-     * method in pipeline C will be called once pipeline A and B finished.
-     *
-     * Now we have only one task per pipeline, so it has no problem，
-     * But if one pipeline have multi task running in parallel, we need to rethink this logic.
-     */
-    virtual Status open(RuntimeState* state) = 0;
+    std::vector<Dependency*> dependencies() const override {
+        return _dependency ? std::vector<Dependency*> {_dependency} : std::vector<Dependency*> {};
+    }
 
-    /**
-     * Release all resources once this operator done its work.
-     */
-    virtual Status close(RuntimeState* state);
+    void inc_running_big_mem_op_num(RuntimeState* state) {
+        if (!_big_mem_op_num_added) {
+            state->get_query_ctx()->inc_running_big_mem_op_num();
+            _big_mem_op_num_added = true;
+        }
+    }
 
-    Status set_child(OperatorPtr child) {
-        if (is_source()) {
-            return Status::InternalError("source can not has child.");
+    void dec_running_big_mem_op_num(RuntimeState* state) {
+        if (_big_mem_op_num_added && !_big_mem_op_num_deced) {
+            state->get_query_ctx()->dec_running_big_mem_op_num();
+            _big_mem_op_num_deced = true;
         }
-        _child = std::move(child);
-        return Status::OK();
     }
 
-    virtual Status set_child(OperatorXPtr child) {
-        _child_x = std::move(child);
+protected:
+    Dependency* _dependency = nullptr;
+    SharedStateArg* _shared_state = nullptr;
+
+private:
+    bool _big_mem_op_num_added = false;
+    bool _big_mem_op_num_deced = false;
+};
+
+template <typename SharedStateArg>
+class PipelineXSpillLocalState : public PipelineXLocalState<SharedStateArg> {
+public:
+    using Base = PipelineXLocalState<SharedStateArg>;
+    PipelineXSpillLocalState(RuntimeState* state, OperatorXBase* parent)
+            : PipelineXLocalState<SharedStateArg>(state, parent) {}
+    ~PipelineXSpillLocalState() override = default;
+
+    Status init(RuntimeState* state, LocalStateInfo& info) override {
+        RETURN_IF_ERROR(PipelineXLocalState<SharedStateArg>::init(state, info));
+        _spill_counters = ADD_LABEL_COUNTER_WITH_LEVEL(Base::profile(), "Spill", 1);
+        _spill_recover_time =
+                ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillRecoverTime", "Spill", 1);
+        _spill_read_data_time =
+                ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillReadDataTime", "Spill", 1);
+        _spill_deserialize_time =
+                ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillDeserializeTime", "Spill", 1);
+        _spill_read_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(Base::profile(), "SpillReadDataSize",
+                                                         TUnit::BYTES, "Spill", 1);
+        _spill_wait_in_queue_timer =
+                ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillWaitInQueueTime", "Spill", 1);
+        _spill_write_wait_io_timer =
+                ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillWriteWaitIOTime", "Spill", 1);
+        _spill_read_wait_io_timer =
+                ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillReadWaitIOTime", "Spill", 1);
         return Status::OK();
     }
 
-    virtual bool can_read() { return false; } // for source
+    RuntimeProfile::Counter* _spill_counters = nullptr;
+    RuntimeProfile::Counter* _spill_recover_time;
+    RuntimeProfile::Counter* _spill_read_data_time;
+    RuntimeProfile::Counter* _spill_deserialize_time;
+    RuntimeProfile::Counter* _spill_read_bytes;
+    RuntimeProfile::Counter* _spill_write_wait_io_timer = nullptr;
+    RuntimeProfile::Counter* _spill_read_wait_io_timer = nullptr;
+    RuntimeProfile::Counter* _spill_wait_in_queue_timer = nullptr;
+};
 
-    virtual bool runtime_filters_are_ready_or_timeout() { return true; } // for source
+class DataSinkOperatorXBase;
 
-    virtual bool can_write() { return false; } // for sink
+class PipelineXSinkLocalStateBase {
+public:
+    PipelineXSinkLocalStateBase(DataSinkOperatorXBase* parent_, RuntimeState* state_);
+    virtual ~PipelineXSinkLocalStateBase() = default;
 
-    [[nodiscard]] virtual bool can_terminate_early() { return false; }
+    // Do initialization. This step should be executed only once and in bthread, so we can do some
+    // lightweight or non-idempotent operations (e.g. init profile, clone expr ctx from operatorX)
+    virtual Status init(RuntimeState* state, LocalSinkStateInfo& info) = 0;
 
-    /**
-     * The main method to execute a pipeline task.
-     * Now it is a pull-based pipeline and operators pull data from its child by this method.
-     */
-    virtual Status get_block(RuntimeState* runtime_state, vectorized::Block* block,
-                             SourceState& result_state) {
-        return Status::OK();
+    // Do initialization. This step can be executed multiple times, so we should make sure it is
+    // idempotent (e.g. wait for runtime filters).
+    virtual Status open(RuntimeState* state) = 0;
+    virtual Status close(RuntimeState* state, Status exec_status) = 0;
+
+    [[nodiscard]] virtual std::string debug_string(int indentation_level) const = 0;
+
+    template <class TARGET>
+    TARGET& cast() {
+        DCHECK(dynamic_cast<TARGET*>(this))
+                << " Mismatch type! Current type is " << typeid(*this).name()
+                << " and expect type is" << typeid(TARGET).name();
+        return reinterpret_cast<TARGET&>(*this);
+    }
+    template <class TARGET>
+    const TARGET& cast() const {
+        DCHECK(dynamic_cast<const TARGET*>(this))
+                << " Mismatch type! Current type is " << typeid(*this).name()
+                << " and expect type is" << typeid(TARGET).name();
+        return reinterpret_cast<const TARGET&>(*this);
     }
 
-    /**
-     * Push data to the sink operator.
-     * Data in this block will be sent by RPC or written to somewhere finally.
-     */
-    virtual Status sink(RuntimeState* state, vectorized::Block* block,
-                        SourceState source_state) = 0;
+    DataSinkOperatorXBase* parent() { return _parent; }
+    RuntimeState* state() { return _state; }
+    RuntimeProfile* profile() { return _profile; }
+    MemTracker* mem_tracker() { return _mem_tracker.get(); }
+    [[nodiscard]] RuntimeProfile* faker_runtime_profile() const {
+        return _faker_runtime_profile.get();
+    }
 
-    /**
-     * pending_finish means we have called `close` and there are still some work to do before finishing.
-     * Now it is a pull-based pipeline and operators pull data from its child by this method.
-     *
-     * For source operator, it is pending_finish iff scan threads have not been released yet
-     * For sink operator, it is pending_finish iff RPC resources have not been released yet
-     * Otherwise, it will return false.
-     */
-    virtual bool is_pending_finish() const { return false; }
+    RuntimeProfile::Counter* rows_input_counter() { return _rows_input_counter; }
+    RuntimeProfile::Counter* exec_time_counter() { return _exec_timer; }
+    virtual std::vector<Dependency*> dependencies() const { return {nullptr}; }
 
-    bool is_closed() const { return _is_closed; }
+    // override in exchange sink , AsyncWriterSink
+    virtual Dependency* finishdependency() { return nullptr; }
 
-    const OperatorBuilderBase* operator_builder() const { return _operator_builder; }
+    std::shared_ptr<QueryStatistics> get_query_statistics_ptr() { return _query_statistics; }
 
-    virtual const RowDescriptor& row_desc() const;
+protected:
+    DataSinkOperatorXBase* _parent = nullptr;
+    RuntimeState* _state = nullptr;
+    RuntimeProfile* _profile = nullptr;
+    std::unique_ptr<MemTracker> _mem_tracker;
+    // Set to true after close() has been called. subclasses should check and set this in
+    // close().
+    bool _closed = false;
+    //NOTICE: now add a faker profile, because sometimes the profile record is useless
+    //so we want remove some counters and timers, eg: in join node, if it's broadcast_join
+    //and shared hash table, some counter/timer about build hash table is useless,
+    //so we could add those counter/timer in faker profile, and those will not display in web profile.
+    std::unique_ptr<RuntimeProfile> _faker_runtime_profile =
+            std::make_unique<RuntimeProfile>("faker profile");
+
+    RuntimeProfile::Counter* _rows_input_counter = nullptr;
+    RuntimeProfile::Counter* _init_timer = nullptr;
+    RuntimeProfile::Counter* _open_timer = nullptr;
+    RuntimeProfile::Counter* _close_timer = nullptr;
+    RuntimeProfile::Counter* _wait_for_dependency_timer = nullptr;
+    RuntimeProfile::Counter* _wait_for_finish_dependency_timer = nullptr;
+    RuntimeProfile::Counter* _exec_timer = nullptr;
+    RuntimeProfile::Counter* _memory_used_counter = nullptr;
+    RuntimeProfile::Counter* _peak_memory_usage_counter = nullptr;
+
+    std::shared_ptr<QueryStatistics> _query_statistics = nullptr;
+};
 
-    virtual std::string debug_string() const;
-    virtual int32_t id() const { return _operator_builder->id(); }
+template <typename SharedStateArg = FakeSharedState>
+class PipelineXSinkLocalState : public PipelineXSinkLocalStateBase {
+public:
+    using SharedStateType = SharedStateArg;
+    PipelineXSinkLocalState(DataSinkOperatorXBase* parent, RuntimeState* state)
+            : PipelineXSinkLocalStateBase(parent, state) {}
+    ~PipelineXSinkLocalState() override = default;
 
-    [[nodiscard]] virtual RuntimeProfile* get_runtime_profile() const = 0;
+    Status init(RuntimeState* state, LocalSinkStateInfo& info) override;
 
-    virtual size_t revocable_mem_size(RuntimeState* state) const { return 0; }
+    Status open(RuntimeState* state) override { return Status::OK(); }
 
-    virtual Status revoke_memory(RuntimeState* state) { return Status::OK(); };
+    Status close(RuntimeState* state, Status exec_status) override;
 
-protected:
-    OperatorBuilderBase* _operator_builder = nullptr;
-    OperatorPtr _child;
+    [[nodiscard]] std::string debug_string(int indentation_level) const override;
 
-    // Used on pipeline X
-    OperatorXPtr _child_x = nullptr;
+    virtual std::string name_suffix();
 
-    bool _is_closed;
+    std::vector<Dependency*> dependencies() const override {
+        return _dependency ? std::vector<Dependency*> {_dependency} : std::vector<Dependency*> {};
+    }
+
+    void inc_running_big_mem_op_num(RuntimeState* state) {
+        if (!_big_mem_op_num_added) {
+            state->get_query_ctx()->inc_running_big_mem_op_num();
+            _big_mem_op_num_added = true;
+        }
+    }
+
+    void dec_running_big_mem_op_num(RuntimeState* state) {
+        if (_big_mem_op_num_added && !_big_mem_op_num_deced) {
+            state->get_query_ctx()->dec_running_big_mem_op_num();
+            _big_mem_op_num_deced = true;
+        }
+    }
+
+protected:
+    Dependency* _dependency = nullptr;
+    SharedStateType* _shared_state = nullptr;
+
+private:
+    bool _big_mem_op_num_added = false;
+    bool _big_mem_op_num_deced = false;
 };
 
-/**
- * All operators inherited from DataSinkOperator will hold a SinkNode inside.
- * Namely, it is a one-to-one relation between DataSinkOperator and DataSink.
- *
- * It should be mentioned that, not all SinkOperators are inherited from this
- * (e.g. SortSinkOperator which holds a sort node inside instead of a DataSink).
- */
-template <typename DataSinkType>
-class DataSinkOperator : public OperatorBase {
+class DataSinkOperatorXBase : public OperatorBase {
 public:
-    DataSinkOperator(OperatorBuilderBase* builder, DataSink* sink)
-            : OperatorBase(builder), _sink(reinterpret_cast<DataSinkType*>(sink)) {}
+    DataSinkOperatorXBase(const int operator_id, const int node_id)
+            : OperatorBase(), _operator_id(operator_id), _node_id(node_id), _dests_id({1}) {}
+
+    DataSinkOperatorXBase(const int operator_id, const int node_id, const int dest_id)
+            : OperatorBase(), _operator_id(operator_id), _node_id(node_id), _dests_id({dest_id}) {}
+
+    DataSinkOperatorXBase(const int operator_id, const int node_id, std::vector<int>& sources)
+            : OperatorBase(), _operator_id(operator_id), _node_id(node_id), _dests_id(sources) {}
+
+    ~DataSinkOperatorXBase() override = default;
 
-    ~DataSinkOperator() override = default;
+    // For agg/sort/join sink.
+    virtual Status init(const TPlanNode& tnode, RuntimeState* state);
+
+    Status init(const TDataSink& tsink) override;
+    [[nodiscard]] virtual Status init(ExchangeType type, const int num_buckets,
+                                      const bool is_shuffled_hash_join,
+                                      const std::map<int, int>& shuffle_idx_to_instance_idx) {
+        return Status::InternalError("init() is only implemented in local exchange!");
+    }
 
     Status prepare(RuntimeState* state) override { return Status::OK(); }
+    Status open(RuntimeState* state) override { return Status::OK(); }
 
-    Status open(RuntimeState* state) override { return _sink->open(state); }
+    [[nodiscard]] virtual Status sink(RuntimeState* state, vectorized::Block* block, bool eos) = 0;
 
-    Status sink(RuntimeState* state, vectorized::Block* in_block,
-                SourceState source_state) override {
-        if (in_block->rows() > 0 || source_state == SourceState::FINISHED) {
-            return _sink->sink(state, in_block, source_state == SourceState::FINISHED);
-        }
-        return Status::OK();
+    [[nodiscard]] virtual Status setup_local_state(RuntimeState* state,
+                                                   LocalSinkStateInfo& info) = 0;
+
+    template <class TARGET>
+    TARGET& cast() {
+        DCHECK(dynamic_cast<TARGET*>(this))
+                << " Mismatch type! Current type is " << typeid(*this).name()
+                << " and expect type is" << typeid(TARGET).name();
+        return reinterpret_cast<TARGET&>(*this);
+    }
+    template <class TARGET>
+    const TARGET& cast() const {
+        DCHECK(dynamic_cast<const TARGET*>(this))
+                << " Mismatch type! Current type is " << typeid(*this).name()
+                << " and expect type is" << typeid(TARGET).name();
+        return reinterpret_cast<const TARGET&>(*this);
     }
 
-    [[nodiscard]] bool is_pending_finish() const override { return _sink->is_pending_finish(); }
+    [[nodiscard]] virtual std::shared_ptr<BasicSharedState> create_shared_state() const = 0;
+    [[nodiscard]] virtual DataDistribution required_data_distribution() const;
+
+    [[nodiscard]] virtual bool is_shuffled_hash_join() const { return false; }
 
     Status close(RuntimeState* state) override {
-        if (is_closed()) {
-            return Status::OK();
+        return Status::InternalError("Should not reach here!");
+    }
+
+    [[nodiscard]] virtual std::string debug_string(int indentation_level) const;
+
+    [[nodiscard]] virtual std::string debug_string(RuntimeState* state,
+                                                   int indentation_level) const;
+
+    [[nodiscard]] bool is_sink() const override { return true; }
+
+    [[nodiscard]] bool is_source() const override { return false; }
+
+    static Status close(RuntimeState* state, Status exec_status) {
+        auto result = state->get_sink_local_state_result();
+        if (!result) {
+            return result.error();
         }
-        RETURN_IF_ERROR(_sink->close(state, state->query_status()));
-        _is_closed = true;
-        return Status::OK();
+        return result.value()->close(state, exec_status);
     }
 
-    [[nodiscard]] RuntimeProfile* get_runtime_profile() const override { return _sink->profile(); }
+    [[nodiscard]] RuntimeProfile* get_runtime_profile() const override {
+        throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
+                               "Runtime Profile is not owned by operator");
+        return nullptr;
+    }
+
+    [[nodiscard]] int id() const override { return node_id(); }
+
+    [[nodiscard]] int operator_id() const { return _operator_id; }
+
+    [[nodiscard]] const std::vector<int>& dests_id() const { return _dests_id; }
+
+    void set_dests_id(const std::vector<int>& dest_id) { _dests_id = dest_id; }
+
+    [[nodiscard]] int node_id() const { return _node_id; }
+
+    [[nodiscard]] std::string get_name() const override { return _name; }
+
+    virtual bool should_dry_run(RuntimeState* state) { return false; }
 
 protected:
-    DataSinkType* _sink = nullptr;
+    template <typename Writer, typename Parent>
+        requires(std::is_base_of_v<vectorized::AsyncResultWriter, Writer>)
+    friend class AsyncWriterSink;
+    // _operator_id : the current Operator's ID, which is not visible to the user.
+    // _node_id : the plan node ID corresponding to the Operator, which is visible on the profile.
+    // _dests_id : the target _operator_id of the sink, for example, in the case of a multi-sink, there are multiple targets.
+    const int _operator_id;
+    const int _node_id;
+    std::vector<int> _dests_id;
+    std::string _name;
+
+    // Maybe this will be transferred to BufferControlBlock.
+    std::shared_ptr<QueryStatistics> _query_statistics;
 };
 
-/**
- * All operators inherited from Operator will hold a ExecNode inside.
- */
-template <typename StreamingNodeType>
-class StreamingOperator : public OperatorBase {
+template <typename LocalStateType>
+class DataSinkOperatorX : public DataSinkOperatorXBase {
 public:
-    StreamingOperator(OperatorBuilderBase* builder, ExecNode* node)
-            : OperatorBase(builder), _node(reinterpret_cast<StreamingNodeType*>(node)) {}
+    DataSinkOperatorX(int operator_id, const int node_id)
+            : DataSinkOperatorXBase(operator_id, node_id) {}
+
+    DataSinkOperatorX(const int id, const int node_id, const int source_id)
+            : DataSinkOperatorXBase(id, node_id, source_id) {}
 
-    ~StreamingOperator() override = default;
+    DataSinkOperatorX(const int id, const int node_id, std::vector<int> sources)
+            : DataSinkOperatorXBase(id, node_id, sources) {}
+    ~DataSinkOperatorX() override = default;
 
-    [[nodiscard]] bool can_terminate_early() override { return _node->can_terminate_early(); }
+    Status setup_local_state(RuntimeState* state, LocalSinkStateInfo& info) override;
+    std::shared_ptr<BasicSharedState> create_shared_state() const override;
 
-    Status prepare(RuntimeState* state) override {
-        _node->increase_ref();
-        _use_projection = _node->has_output_row_descriptor();
+    using LocalState = LocalStateType;
+    [[nodiscard]] LocalState& get_local_state(RuntimeState* state) const {
+        return state->get_sink_local_state()->template cast<LocalState>();
+    }
+};
+
+template <typename SharedStateArg>
+class PipelineXSpillSinkLocalState : public PipelineXSinkLocalState<SharedStateArg> {
+public:
+    using Base = PipelineXSinkLocalState<SharedStateArg>;
+    PipelineXSpillSinkLocalState(DataSinkOperatorXBase* parent, RuntimeState* state)
+            : Base(parent, state) {}
+    ~PipelineXSpillSinkLocalState() override = default;
+
+    Status init(RuntimeState* state, LocalSinkStateInfo& info) override {
+        RETURN_IF_ERROR(Base::init(state, info));
+
+        _spill_counters = ADD_LABEL_COUNTER_WITH_LEVEL(Base::profile(), "Spill", 1);
+        _spill_timer = ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillTime", "Spill", 1);
+        _spill_serialize_block_timer =
+                ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillSerializeBlockTime", "Spill", 1);
+        _spill_write_disk_timer =
+                ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillWriteDiskTime", "Spill", 1);
+        _spill_data_size = ADD_CHILD_COUNTER_WITH_LEVEL(Base::profile(), "SpillWriteDataSize",
+                                                        TUnit::BYTES, "Spill", 1);
+        _spill_block_count = ADD_CHILD_COUNTER_WITH_LEVEL(Base::profile(), "SpillWriteBlockCount",
+                                                          TUnit::UNIT, "Spill", 1);
+        _spill_wait_in_queue_timer =
+                ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillWaitInQueueTime", "Spill", 1);
+        _spill_write_wait_io_timer =
+                ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillWriteWaitIOTime", "Spill", 1);
+        _spill_read_wait_io_timer =
+                ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillReadWaitIOTime", "Spill", 1);
         return Status::OK();
     }
 
-    Status open(RuntimeState* state) override { return _node->alloc_resource(state); }
+    RuntimeProfile::Counter* _spill_counters = nullptr;
+    RuntimeProfile::Counter* _spill_timer = nullptr;
+    RuntimeProfile::Counter* _spill_serialize_block_timer = nullptr;
+    RuntimeProfile::Counter* _spill_write_disk_timer = nullptr;
+    RuntimeProfile::Counter* _spill_data_size = nullptr;
+    RuntimeProfile::Counter* _spill_block_count = nullptr;
+    RuntimeProfile::Counter* _spill_wait_in_queue_timer = nullptr;
+    RuntimeProfile::Counter* _spill_write_wait_io_timer = nullptr;
+    RuntimeProfile::Counter* _spill_read_wait_io_timer = nullptr;
+};
 
-    Status sink(RuntimeState* state, vectorized::Block* in_block,
-                SourceState source_state) override {
-        return _node->sink(state, in_block, source_state == SourceState::FINISHED);
+class OperatorXBase : public OperatorBase {
+public:
+    OperatorXBase(ObjectPool* pool, const TPlanNode& tnode, const int operator_id,
+                  const DescriptorTbl& descs)
+            : OperatorBase(),
+              _operator_id(operator_id),
+              _node_id(tnode.node_id),
+              _type(tnode.node_type),
+              _pool(pool),
+              _tuple_ids(tnode.row_tuples),
+              _row_descriptor(descs, tnode.row_tuples, tnode.nullable_tuples),
+              _resource_profile(tnode.resource_profile),
+              _limit(tnode.limit) {
+        if (tnode.__isset.output_tuple_id) {
+            _output_row_descriptor.reset(new RowDescriptor(descs, {tnode.output_tuple_id}, {true}));
+        }
+        if (tnode.__isset.output_tuple_id) {
+            _output_row_descriptor = std::make_unique<RowDescriptor>(
+                    descs, std::vector {tnode.output_tuple_id}, std::vector {true});
+        }
+        if (!tnode.intermediate_output_tuple_id_list.empty()) {
+            // common subexpression elimination
+            _intermediate_output_row_descriptor.reserve(
+                    tnode.intermediate_output_tuple_id_list.size());
+            for (auto output_tuple_id : tnode.intermediate_output_tuple_id_list) {
+                _intermediate_output_row_descriptor.push_back(
+                        RowDescriptor(descs, std::vector {output_tuple_id}, std::vector {true}));
+            }
+        }
     }
 
-    Status close(RuntimeState* state) override {
-        if (is_closed()) {
-            return Status::OK();
+    OperatorXBase(ObjectPool* pool, int node_id, int operator_id)
+            : OperatorBase(),
+              _operator_id(operator_id),
+              _node_id(node_id),
+              _pool(pool),
+              _limit(-1) {}
+    virtual Status init(const TPlanNode& tnode, RuntimeState* state);
+    Status init(const TDataSink& tsink) override {
+        LOG(FATAL) << "should not reach here!";
+        return Status::OK();
+    }
+    virtual Status init(ExchangeType type) {
+        LOG(FATAL) << "should not reach here!";
+        return Status::OK();
+    }
+    [[nodiscard]] RuntimeProfile* get_runtime_profile() const override {
+        throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
+                               "Runtime Profile is not owned by operator");
+        return nullptr;
+    }
+    [[noreturn]] virtual const std::vector<TRuntimeFilterDesc>& runtime_filter_descs() {
+        throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, _op_name);
+    }
+    [[nodiscard]] std::string get_name() const override { return _op_name; }
+    [[nodiscard]] virtual DataDistribution required_data_distribution() const {
+        return _child_x && _child_x->ignore_data_distribution() && !is_source()
+                       ? DataDistribution(ExchangeType::PASSTHROUGH)
+                       : DataDistribution(ExchangeType::NOOP);
+    }
+    [[nodiscard]] virtual bool need_data_from_children(RuntimeState* state) const {
+        return is_source() ? true : _child_x == nullptr || _child_x->need_data_from_children(state);
+    }
+    [[nodiscard]] virtual bool ignore_data_distribution() const {
+        return _child_x ? _child_x->ignore_data_distribution() : _ignore_data_distribution;
+    }
+    [[nodiscard]] bool ignore_data_hash_distribution() const {
+        return _child_x ? _child_x->ignore_data_hash_distribution() : _ignore_data_distribution;
+    }
+    void set_ignore_data_distribution() { _ignore_data_distribution = true; }
+
+    Status prepare(RuntimeState* state) override;
+
+    Status open(RuntimeState* state) override;
+
+    [[nodiscard]] virtual Status get_block(RuntimeState* state, vectorized::Block* block,
+                                           bool* eos) = 0;
+
+    [[nodiscard]] virtual bool is_shuffled_hash_join() const { return false; }
+
+    Status close(RuntimeState* state) override;
+
+    [[nodiscard]] virtual const RowDescriptor& intermediate_row_desc() const {
+        return _row_descriptor;
+    }
+
+    [[nodiscard]] const RowDescriptor& intermediate_row_desc(int idx) {
+        if (idx == 0) {
+            return intermediate_row_desc();
         }
-        if (!_node->decrease_ref()) {
-            _node->release_resource(state);
+        DCHECK((idx - 1) < _intermediate_output_row_descriptor.size());
+        return _intermediate_output_row_descriptor[idx - 1];
+    }
+
+    [[nodiscard]] const RowDescriptor& projections_row_desc() const {
+        if (_intermediate_output_row_descriptor.empty()) {
+            return intermediate_row_desc();
+        } else {
+            return _intermediate_output_row_descriptor.back();
         }
-        _is_closed = true;
-        return Status::OK();
     }
 
-    Status get_block(RuntimeState* state, vectorized::Block* block,
-                     SourceState& source_state) override {
-        DCHECK(_child);
-        auto input_block = _use_projection ? _node->get_clear_input_block() : block;
-        RETURN_IF_ERROR(_child->get_block(state, input_block, source_state));
-        bool eos = false;
-        RETURN_IF_ERROR(_node->get_next_after_projects(
-                state, block, &eos,
-                std::bind(&ExecNode::pull, _node, std::placeholders::_1, std::placeholders::_2,
-                          std::placeholders::_3),
-                false));
-        return Status::OK();
+    virtual std::string debug_string(int indentation_level = 0) const;
+
+    virtual std::string debug_string(RuntimeState* state, int indentation_level = 0) const;
+
+    virtual Status setup_local_state(RuntimeState* state, LocalStateInfo& info) = 0;
+
+    template <class TARGET>
+    TARGET& cast() {
+        DCHECK(dynamic_cast<TARGET*>(this))
+                << " Mismatch type! Current type is " << typeid(*this).name()
+                << " and expect type is" << typeid(TARGET).name();
+        return reinterpret_cast<TARGET&>(*this);
+    }
+    template <class TARGET>
+    const TARGET& cast() const {
+        DCHECK(dynamic_cast<const TARGET*>(this))
+                << " Mismatch type! Current type is " << typeid(*this).name()
+                << " and expect type is" << typeid(TARGET).name();
+        return reinterpret_cast<const TARGET&>(*this);
     }
 
-    bool can_read() override { return _node->can_read(); }
+    [[nodiscard]] OperatorXPtr get_child() { return _child_x; }
 
-    [[nodiscard]] RuntimeProfile* get_runtime_profile() const override {
-        return _node->runtime_profile();
+    [[nodiscard]] vectorized::VExprContextSPtrs& conjuncts() { return _conjuncts; }
+    [[nodiscard]] virtual RowDescriptor& row_descriptor() { return _row_descriptor; }
+
+    [[nodiscard]] int id() const override { return node_id(); }
+    [[nodiscard]] int operator_id() const { return _operator_id; }
+    [[nodiscard]] int node_id() const { return _node_id; }
+
+    [[nodiscard]] int64_t limit() const { return _limit; }
+
+    [[nodiscard]] const RowDescriptor& row_desc() const override {
+        return _output_row_descriptor ? *_output_row_descriptor : _row_descriptor;
+    }
+
+    [[nodiscard]] const RowDescriptor* output_row_descriptor() {
+        return _output_row_descriptor.get();
     }
 
+    bool has_output_row_desc() const { return _output_row_descriptor != nullptr; }
+
+    [[nodiscard]] bool is_source() const override { return false; }
+
+    [[nodiscard]] virtual Status get_block_after_projects(RuntimeState* state,
+                                                          vectorized::Block* block, bool* eos);
+
+    /// Only use in vectorized exec engine try to do projections to trans _row_desc -> _output_row_desc
+    Status do_projections(RuntimeState* state, vectorized::Block* origin_block,
+                          vectorized::Block* output_block) const;
+    void set_parallel_tasks(int parallel_tasks) { _parallel_tasks = parallel_tasks; }
+    int parallel_tasks() const { return _parallel_tasks; }
+
 protected:
-    StreamingNodeType* _node = nullptr;
-    bool _use_projection;
+    template <typename Dependency>
+    friend class PipelineXLocalState;
+    friend class PipelineXLocalStateBase;
+    friend class VScanner;
+    const int _operator_id;
+    const int _node_id; // unique w/in single plan tree
+    TPlanNodeType::type _type;
+    ObjectPool* _pool = nullptr;
+    std::vector<TupleId> _tuple_ids;
+
+    vectorized::VExprContextSPtrs _conjuncts;
+
+    RowDescriptor _row_descriptor;
+
+    std::unique_ptr<RowDescriptor> _output_row_descriptor = nullptr;
+    vectorized::VExprContextSPtrs _projections;
+
+    std::vector<RowDescriptor> _intermediate_output_row_descriptor;
+    // Used in common subexpression elimination to compute intermediate results.
+    std::vector<vectorized::VExprContextSPtrs> _intermediate_projections;
+
+    /// Resource information sent from the frontend.
+    const TBackendResourceProfile _resource_profile;
+
+    int64_t _limit; // -1: no limit
+
+    std::string _op_name;
+    bool _ignore_data_distribution = false;
+    int _parallel_tasks = 0;
+
+    //_keep_origin is used to avoid copying during projection,
+    // currently set to false only in the nestloop join.
+    bool _keep_origin = true;
 };
 
-template <typename SourceNodeType>
-class SourceOperator : public StreamingOperator<SourceNodeType> {
+template <typename LocalStateType>
+class OperatorX : public OperatorXBase {
 public:
-    SourceOperator(OperatorBuilderBase* builder, ExecNode* node)
-            : StreamingOperator<SourceNodeType>(builder, node) {}
-
-    ~SourceOperator() override = default;
-
-    Status get_block(RuntimeState* state, vectorized::Block* block,
-                     SourceState& source_state) override {
-        auto& node = StreamingOperator<SourceNodeType>::_node;
-        bool eos = false;
-        RETURN_IF_ERROR(node->get_next_after_projects(
-                state, block, &eos,
-                std::bind(&ExecNode::pull, node, std::placeholders::_1, std::placeholders::_2,
-                          std::placeholders::_3)));
-        source_state = eos ? SourceState::FINISHED : SourceState::DEPEND_ON_SOURCE;
-        return Status::OK();
+    OperatorX(ObjectPool* pool, const TPlanNode& tnode, const int operator_id,
+              const DescriptorTbl& descs)
+            : OperatorXBase(pool, tnode, operator_id, descs) {}
+    OperatorX(ObjectPool* pool, int node_id, int operator_id)
+            : OperatorXBase(pool, node_id, operator_id) {};
+    ~OperatorX() override = default;
+
+    Status setup_local_state(RuntimeState* state, LocalStateInfo& info) override;
+    using LocalState = LocalStateType;
+    [[nodiscard]] LocalState& get_local_state(RuntimeState* state) const {
+        return state->get_local_state(operator_id())->template cast<LocalState>();
     }
 };
 
 /**
- * StatefulOperator indicates the operators with some states inside.
+ * StreamingOperatorX indicates operators which always processes block in streaming way (one-in-one-out).
+ */
+template <typename LocalStateType>
+class StreamingOperatorX : public OperatorX<LocalStateType> {
+public:
+    StreamingOperatorX(ObjectPool* pool, const TPlanNode& tnode, int operator_id,
+                       const DescriptorTbl& descs)
+            : OperatorX<LocalStateType>(pool, tnode, operator_id, descs) {}
+    virtual ~StreamingOperatorX() = default;
+
+    Status get_block(RuntimeState* state, vectorized::Block* block, bool* eos) override;
+
+    virtual Status pull(RuntimeState* state, vectorized::Block* block, bool* eos) = 0;
+};
+
+/**
+ * StatefulOperatorX indicates the operators with some states inside.
  *
  * Specifically, we called an operator stateful if an operator can determine its output by itself.
  * For example, hash join probe operator is a typical StatefulOperator. When it gets a block from probe side, it will hold this block inside (e.g. _child_block).
  * If there are still remain rows in probe block, we can get output block by calling `get_block` without any data from its child.
  * In a nutshell, it is a one-to-many relation between input blocks and output blocks for StatefulOperator.
  */
-template <typename StatefulNodeType>
-class StatefulOperator : public StreamingOperator<StatefulNodeType> {
+template <typename LocalStateType>
+class StatefulOperatorX : public OperatorX<LocalStateType> {
 public:
-    StatefulOperator(OperatorBuilderBase* builder, ExecNode* node)
-            : StreamingOperator<StatefulNodeType>(builder, node),
-              _child_block(vectorized::Block::create_shared()) {}
-
-    virtual ~StatefulOperator() = default;
-
-    Status get_block(RuntimeState* state, vectorized::Block* block,
-                     SourceState& source_state) override {
-        auto& node = StreamingOperator<StatefulNodeType>::_node;
-        auto& child = StreamingOperator<StatefulNodeType>::_child;
-
-        if (node->need_more_input_data()) {
-            _child_block->clear_column_data();
-            RETURN_IF_ERROR(child->get_block(state, _child_block.get(), _child_source_state));
-            source_state = _child_source_state;
-            if (_child_block->rows() == 0 && _child_source_state != SourceState::FINISHED) {
-                return Status::OK();
-            }
-            node->prepare_for_next();
-            RETURN_IF_ERROR(node->push(state, _child_block.get(),
-                                       _child_source_state == SourceState::FINISHED));
-        }
+    StatefulOperatorX(ObjectPool* pool, const TPlanNode& tnode, const int operator_id,
+                      const DescriptorTbl& descs)
+            : OperatorX<LocalStateType>(pool, tnode, operator_id, descs) {}
+    virtual ~StatefulOperatorX() = default;
 
-        if (!node->need_more_input_data()) {
-            bool eos = false;
-            RETURN_IF_ERROR(node->get_next_after_projects(
-                    state, block, &eos,
-                    std::bind(&ExecNode::pull, node, std::placeholders::_1, std::placeholders::_2,
-                              std::placeholders::_3)));
-            if (eos) {
-                source_state = SourceState::FINISHED;
-            } else if (!node->need_more_input_data()) {
-                source_state = SourceState::MORE_DATA;
-            } else if (source_state == SourceState::MORE_DATA) {
-                source_state = _child_source_state;
-            }
+    using OperatorX<LocalStateType>::get_local_state;
+
+    [[nodiscard]] Status get_block(RuntimeState* state, vectorized::Block* block,
+                                   bool* eos) override;
+
+    [[nodiscard]] virtual Status pull(RuntimeState* state, vectorized::Block* block,
+                                      bool* eos) const = 0;
+    [[nodiscard]] virtual Status push(RuntimeState* state, vectorized::Block* input_block,
+                                      bool eos) const = 0;
+
+    [[nodiscard]] virtual bool need_more_input_data(RuntimeState* state) const = 0;
+
+    bool need_data_from_children(RuntimeState* state) const override {
+        if (need_more_input_data(state)) {
+            return OperatorX<LocalStateType>::_child_x->need_data_from_children(state);
+        } else {
+            return false;
         }
-        return Status::OK();
     }
+};
+
+template <typename Writer, typename Parent>
+    requires(std::is_base_of_v<vectorized::AsyncResultWriter, Writer>)
+class AsyncWriterSink : public PipelineXSinkLocalState<FakeSharedState> {
+public:
+    using Base = PipelineXSinkLocalState<FakeSharedState>;
+    AsyncWriterSink(DataSinkOperatorXBase* parent, RuntimeState* state)
+            : Base(parent, state), _async_writer_dependency(nullptr) {
+        _finish_dependency = std::make_shared<FinishDependency>(
+                parent->operator_id(), parent->node_id(), parent->get_name() + "_FINISH_DEPENDENCY",
+                state->get_query_ctx());
+    }
+
+    Status init(RuntimeState* state, LocalSinkStateInfo& info) override;
+
+    Status open(RuntimeState* state) override;
+
+    Status sink(RuntimeState* state, vectorized::Block* block, bool eos);
+
+    std::vector<Dependency*> dependencies() const override {
+        return {_async_writer_dependency.get()};
+    }
+    Status close(RuntimeState* state, Status exec_status) override;
+
+    Dependency* finishdependency() override { return _finish_dependency.get(); }
 
 protected:
-    std::shared_ptr<vectorized::Block> _child_block;
-    SourceState _child_source_state {SourceState::DEPEND_ON_SOURCE};
+    vectorized::VExprContextSPtrs _output_vexpr_ctxs;
+    std::unique_ptr<Writer> _writer;
+
+    std::shared_ptr<AsyncWriterDependency> _async_writer_dependency;
+
+    std::shared_ptr<Dependency> _finish_dependency;
 };
 
 } // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/partition_sort_sink_operator.h b/be/src/pipeline/exec/partition_sort_sink_operator.h
index 4ac8a9d73f41a0..43829f95a8f187 100644
--- a/be/src/pipeline/exec/partition_sort_sink_operator.h
+++ b/be/src/pipeline/exec/partition_sort_sink_operator.h
@@ -22,7 +22,6 @@
 #include <cstdint>
 
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 #include "vec/common/sort/partition_sorter.h"
 
 namespace doris::pipeline {
diff --git a/be/src/pipeline/exec/partition_sort_source_operator.h b/be/src/pipeline/exec/partition_sort_source_operator.h
index 5398a728e4ecf3..4b5589c0e8f0cd 100644
--- a/be/src/pipeline/exec/partition_sort_source_operator.h
+++ b/be/src/pipeline/exec/partition_sort_source_operator.h
@@ -21,7 +21,6 @@
 
 #include "common/status.h"
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris {
 class RuntimeState;
diff --git a/be/src/pipeline/exec/partitioned_aggregation_sink_operator.h b/be/src/pipeline/exec/partitioned_aggregation_sink_operator.h
index 1233f66b56294c..1b415629f2244f 100644
--- a/be/src/pipeline/exec/partitioned_aggregation_sink_operator.h
+++ b/be/src/pipeline/exec/partitioned_aggregation_sink_operator.h
@@ -17,7 +17,7 @@
 
 #pragma once
 #include "aggregation_sink_operator.h"
-#include "pipeline/pipeline_x/operator.h"
+#include "pipeline/exec/operator.h"
 #include "vec/exprs/vexpr.h"
 #include "vec/spill/spill_stream_manager.h"
 
diff --git a/be/src/pipeline/exec/partitioned_aggregation_source_operator.h b/be/src/pipeline/exec/partitioned_aggregation_source_operator.h
index c1deb8af50db79..ad835948dd951c 100644
--- a/be/src/pipeline/exec/partitioned_aggregation_source_operator.h
+++ b/be/src/pipeline/exec/partitioned_aggregation_source_operator.h
@@ -20,7 +20,6 @@
 
 #include "common/status.h"
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris {
 class RuntimeState;
diff --git a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp
index 2784522c9fbda0..3eb3de0ce1072b 100644
--- a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp
+++ b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp
@@ -17,7 +17,7 @@
 
 #include "partitioned_hash_join_probe_operator.h"
 
-#include "pipeline/pipeline_x/pipeline_x_task.h"
+#include "pipeline/pipeline_task.h"
 #include "util/mem_info.h"
 #include "vec/spill/spill_stream_manager.h"
 
@@ -790,7 +790,7 @@ Status PartitionedHashJoinProbeOperatorX::_revoke_memory(RuntimeState* state, bo
 bool PartitionedHashJoinProbeOperatorX::_should_revoke_memory(RuntimeState* state) const {
     auto& local_state = get_local_state(state);
     const auto revocable_size = revocable_mem_size(state);
-    if (PipelineXTask::should_revoke_memory(state, revocable_size)) {
+    if (PipelineTask::should_revoke_memory(state, revocable_size)) {
         return true;
     }
     if (local_state._shared_state->need_to_spill) {
diff --git a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h
index d650dd1590d2e0..f338d205407507 100644
--- a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h
+++ b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.h
@@ -24,7 +24,6 @@
 #include "pipeline/exec/hashjoin_build_sink.h"
 #include "pipeline/exec/hashjoin_probe_operator.h"
 #include "pipeline/exec/join_build_sink_operator.h"
-#include "pipeline/pipeline_x/operator.h"
 #include "vec/runtime/partitioner.h"
 
 namespace doris {
diff --git a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h
index 9120392e104554..46ab5eab619efa 100644
--- a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h
+++ b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.h
@@ -24,7 +24,6 @@
 #include "pipeline/exec/hashjoin_build_sink.h"
 #include "pipeline/exec/hashjoin_probe_operator.h"
 #include "pipeline/exec/join_build_sink_operator.h"
-#include "pipeline/pipeline_x/operator.h"
 #include "vec/runtime/partitioner.h"
 
 namespace doris {
diff --git a/be/src/pipeline/exec/repeat_operator.h b/be/src/pipeline/exec/repeat_operator.h
index 922645d270d915..a96f8074a06a9f 100644
--- a/be/src/pipeline/exec/repeat_operator.h
+++ b/be/src/pipeline/exec/repeat_operator.h
@@ -20,7 +20,7 @@
 #include <stdint.h>
 
 #include "common/status.h"
-#include "pipeline/pipeline_x/operator.h"
+#include "pipeline/exec/operator.h"
 
 namespace doris {
 class RuntimeState;
diff --git a/be/src/pipeline/exec/result_file_sink_operator.h b/be/src/pipeline/exec/result_file_sink_operator.h
index 9dc91193510ce7..5edfb65ac661f3 100644
--- a/be/src/pipeline/exec/result_file_sink_operator.h
+++ b/be/src/pipeline/exec/result_file_sink_operator.h
@@ -20,7 +20,6 @@
 #include <stdint.h>
 
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 #include "vec/sink/vresult_file_sink.h"
 
 namespace doris::pipeline {
diff --git a/be/src/pipeline/exec/result_sink_operator.h b/be/src/pipeline/exec/result_sink_operator.h
index 71b8afce1719ec..f2c9a6c150221e 100644
--- a/be/src/pipeline/exec/result_sink_operator.h
+++ b/be/src/pipeline/exec/result_sink_operator.h
@@ -20,7 +20,6 @@
 #include <stdint.h>
 
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris {
 class PipBufferControlBlock;
diff --git a/be/src/pipeline/exec/scan_operator.h b/be/src/pipeline/exec/scan_operator.h
index 6b62af1eefd9ac..3ebd573fc719f3 100644
--- a/be/src/pipeline/exec/scan_operator.h
+++ b/be/src/pipeline/exec/scan_operator.h
@@ -24,8 +24,7 @@
 
 #include "common/status.h"
 #include "operator.h"
-#include "pipeline/pipeline_x/dependency.h"
-#include "pipeline/pipeline_x/operator.h"
+#include "pipeline/dependency.h"
 #include "runtime/descriptors.h"
 #include "vec/exec/scan/vscan_node.h"
 
diff --git a/be/src/pipeline/exec/schema_scan_operator.h b/be/src/pipeline/exec/schema_scan_operator.h
index f07a0e854e70c0..4bea708b866950 100644
--- a/be/src/pipeline/exec/schema_scan_operator.h
+++ b/be/src/pipeline/exec/schema_scan_operator.h
@@ -21,7 +21,6 @@
 
 #include "common/status.h"
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 #include "vec/exec/vschema_scan_node.h"
 
 namespace doris {
diff --git a/be/src/pipeline/exec/select_operator.h b/be/src/pipeline/exec/select_operator.h
index 4bdc5a9e42d298..5370cd9e293c34 100644
--- a/be/src/pipeline/exec/select_operator.h
+++ b/be/src/pipeline/exec/select_operator.h
@@ -20,7 +20,6 @@
 #include <stdint.h>
 
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris::pipeline {
 
diff --git a/be/src/pipeline/exec/set_probe_sink_operator.h b/be/src/pipeline/exec/set_probe_sink_operator.h
index 499eeee0d6205b..93a862fa1cbe39 100644
--- a/be/src/pipeline/exec/set_probe_sink_operator.h
+++ b/be/src/pipeline/exec/set_probe_sink_operator.h
@@ -21,7 +21,6 @@
 
 #include "common/status.h"
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris {
 class RuntimeState;
diff --git a/be/src/pipeline/exec/set_sink_operator.h b/be/src/pipeline/exec/set_sink_operator.h
index 8894b8b15f2252..1c579d679ff79e 100644
--- a/be/src/pipeline/exec/set_sink_operator.h
+++ b/be/src/pipeline/exec/set_sink_operator.h
@@ -21,7 +21,6 @@
 
 #include "olap/olap_common.h"
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris {
 
diff --git a/be/src/pipeline/exec/set_source_operator.h b/be/src/pipeline/exec/set_source_operator.h
index 94487507c26e3e..2bbc4257193112 100644
--- a/be/src/pipeline/exec/set_source_operator.h
+++ b/be/src/pipeline/exec/set_source_operator.h
@@ -21,7 +21,6 @@
 
 #include "common/status.h"
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris {
 class RuntimeState;
diff --git a/be/src/pipeline/exec/sort_sink_operator.h b/be/src/pipeline/exec/sort_sink_operator.h
index 8298dc980b6c7d..6ade3ada8bd77a 100644
--- a/be/src/pipeline/exec/sort_sink_operator.h
+++ b/be/src/pipeline/exec/sort_sink_operator.h
@@ -20,7 +20,6 @@
 #include <stdint.h>
 
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 #include "vec/core/field.h"
 
 namespace doris::pipeline {
diff --git a/be/src/pipeline/exec/sort_source_operator.h b/be/src/pipeline/exec/sort_source_operator.h
index 43c4934b977415..f20e8b9314b6fe 100644
--- a/be/src/pipeline/exec/sort_source_operator.h
+++ b/be/src/pipeline/exec/sort_source_operator.h
@@ -21,7 +21,6 @@
 
 #include "common/status.h"
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris {
 class RuntimeState;
diff --git a/be/src/pipeline/exec/spill_sort_sink_operator.h b/be/src/pipeline/exec/spill_sort_sink_operator.h
index d552d67570add3..feba33bc96d5f7 100644
--- a/be/src/pipeline/exec/spill_sort_sink_operator.h
+++ b/be/src/pipeline/exec/spill_sort_sink_operator.h
@@ -18,7 +18,6 @@
 #pragma once
 
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 #include "sort_sink_operator.h"
 
 namespace doris::pipeline {
diff --git a/be/src/pipeline/exec/spill_sort_source_operator.h b/be/src/pipeline/exec/spill_sort_source_operator.h
index a20eb57889bc83..1afe0597ad5cca 100644
--- a/be/src/pipeline/exec/spill_sort_source_operator.h
+++ b/be/src/pipeline/exec/spill_sort_source_operator.h
@@ -21,7 +21,6 @@
 
 #include "common/status.h"
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris {
 class ExecNode;
diff --git a/be/src/pipeline/exec/streaming_aggregation_operator.h b/be/src/pipeline/exec/streaming_aggregation_operator.h
index 4617aa87114d59..c466acf69750cc 100644
--- a/be/src/pipeline/exec/streaming_aggregation_operator.h
+++ b/be/src/pipeline/exec/streaming_aggregation_operator.h
@@ -22,7 +22,7 @@
 #include <memory>
 
 #include "common/status.h"
-#include "pipeline/pipeline_x/operator.h"
+#include "pipeline/exec/operator.h"
 #include "util/runtime_profile.h"
 #include "vec/core/block.h"
 
diff --git a/be/src/pipeline/exec/table_function_operator.h b/be/src/pipeline/exec/table_function_operator.h
index 5b9457a474b79b..702f0407b09daa 100644
--- a/be/src/pipeline/exec/table_function_operator.h
+++ b/be/src/pipeline/exec/table_function_operator.h
@@ -21,7 +21,6 @@
 
 #include "common/status.h"
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 #include "vec/exec/vtable_function_node.h"
 
 namespace doris {
diff --git a/be/src/pipeline/exec/union_sink_operator.h b/be/src/pipeline/exec/union_sink_operator.h
index c11465da134b60..d9fa5a321e1a18 100644
--- a/be/src/pipeline/exec/union_sink_operator.h
+++ b/be/src/pipeline/exec/union_sink_operator.h
@@ -23,7 +23,6 @@
 
 #include "common/status.h"
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 #include "vec/core/block.h"
 
 namespace doris {
diff --git a/be/src/pipeline/exec/union_source_operator.cpp b/be/src/pipeline/exec/union_source_operator.cpp
index 6393ffcccd312b..ea9f2eced6d0e6 100644
--- a/be/src/pipeline/exec/union_source_operator.cpp
+++ b/be/src/pipeline/exec/union_source_operator.cpp
@@ -21,10 +21,10 @@
 #include <utility>
 
 #include "common/status.h"
+#include "pipeline/dependency.h"
 #include "pipeline/exec/data_queue.h"
 #include "pipeline/exec/operator.h"
 #include "pipeline/exec/union_sink_operator.h"
-#include "pipeline/pipeline_x/dependency.h"
 #include "runtime/descriptors.h"
 #include "util/defer_op.h"
 #include "vec/core/block.h"
diff --git a/be/src/pipeline/exec/union_source_operator.h b/be/src/pipeline/exec/union_source_operator.h
index ee1d35d73cff04..88ea2e5bee59ba 100644
--- a/be/src/pipeline/exec/union_source_operator.h
+++ b/be/src/pipeline/exec/union_source_operator.h
@@ -22,7 +22,6 @@
 
 #include "common/status.h"
 #include "operator.h"
-#include "pipeline/pipeline_x/operator.h"
 
 namespace doris {
 class RuntimeState;
diff --git a/be/src/pipeline/pipeline_x/local_exchange/local_exchange_sink_operator.cpp b/be/src/pipeline/local_exchange/local_exchange_sink_operator.cpp
similarity index 96%
rename from be/src/pipeline/pipeline_x/local_exchange/local_exchange_sink_operator.cpp
rename to be/src/pipeline/local_exchange/local_exchange_sink_operator.cpp
index 069439c48b10d3..9f6cb670b7da1a 100644
--- a/be/src/pipeline/pipeline_x/local_exchange/local_exchange_sink_operator.cpp
+++ b/be/src/pipeline/local_exchange/local_exchange_sink_operator.cpp
@@ -15,9 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "pipeline/pipeline_x/local_exchange/local_exchange_sink_operator.h"
+#include "pipeline/local_exchange/local_exchange_sink_operator.h"
 
-#include "pipeline/pipeline_x/local_exchange/local_exchanger.h"
+#include "pipeline/local_exchange/local_exchanger.h"
 
 namespace doris::pipeline {
 
diff --git a/be/src/pipeline/pipeline_x/local_exchange/local_exchange_sink_operator.h b/be/src/pipeline/local_exchange/local_exchange_sink_operator.h
similarity index 99%
rename from be/src/pipeline/pipeline_x/local_exchange/local_exchange_sink_operator.h
rename to be/src/pipeline/local_exchange/local_exchange_sink_operator.h
index db6662a221ad8e..1ce9ccdf278cc5 100644
--- a/be/src/pipeline/pipeline_x/local_exchange/local_exchange_sink_operator.h
+++ b/be/src/pipeline/local_exchange/local_exchange_sink_operator.h
@@ -17,7 +17,7 @@
 
 #pragma once
 
-#include "pipeline/pipeline_x/operator.h"
+#include "pipeline/exec/operator.h"
 
 namespace doris::pipeline {
 
diff --git a/be/src/pipeline/pipeline_x/local_exchange/local_exchange_source_operator.cpp b/be/src/pipeline/local_exchange/local_exchange_source_operator.cpp
similarity index 95%
rename from be/src/pipeline/pipeline_x/local_exchange/local_exchange_source_operator.cpp
rename to be/src/pipeline/local_exchange/local_exchange_source_operator.cpp
index 4b0840ea01a474..1ee48e54a448e0 100644
--- a/be/src/pipeline/pipeline_x/local_exchange/local_exchange_source_operator.cpp
+++ b/be/src/pipeline/local_exchange/local_exchange_source_operator.cpp
@@ -15,9 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "pipeline/pipeline_x/local_exchange/local_exchange_source_operator.h"
+#include "pipeline/local_exchange/local_exchange_source_operator.h"
 
-#include "pipeline/pipeline_x/local_exchange/local_exchanger.h"
+#include "pipeline/local_exchange/local_exchanger.h"
 
 namespace doris::pipeline {
 
diff --git a/be/src/pipeline/pipeline_x/local_exchange/local_exchange_source_operator.h b/be/src/pipeline/local_exchange/local_exchange_source_operator.h
similarity index 98%
rename from be/src/pipeline/pipeline_x/local_exchange/local_exchange_source_operator.h
rename to be/src/pipeline/local_exchange/local_exchange_source_operator.h
index 7d416b10c19a9c..89868feac3cf3c 100644
--- a/be/src/pipeline/pipeline_x/local_exchange/local_exchange_source_operator.h
+++ b/be/src/pipeline/local_exchange/local_exchange_source_operator.h
@@ -17,7 +17,7 @@
 
 #pragma once
 
-#include "pipeline/pipeline_x/operator.h"
+#include "pipeline/exec/operator.h"
 
 namespace doris::pipeline {
 
diff --git a/be/src/pipeline/pipeline_x/local_exchange/local_exchanger.cpp b/be/src/pipeline/local_exchange/local_exchanger.cpp
similarity index 98%
rename from be/src/pipeline/pipeline_x/local_exchange/local_exchanger.cpp
rename to be/src/pipeline/local_exchange/local_exchanger.cpp
index 0837a1212b98f6..c241b6bc5f3e45 100644
--- a/be/src/pipeline/pipeline_x/local_exchange/local_exchanger.cpp
+++ b/be/src/pipeline/local_exchange/local_exchanger.cpp
@@ -15,10 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "pipeline/pipeline_x/local_exchange/local_exchanger.h"
+#include "pipeline/local_exchange/local_exchanger.h"
 
-#include "pipeline/pipeline_x/local_exchange/local_exchange_sink_operator.h"
-#include "pipeline/pipeline_x/local_exchange/local_exchange_source_operator.h"
+#include "pipeline/local_exchange/local_exchange_sink_operator.h"
+#include "pipeline/local_exchange/local_exchange_source_operator.h"
 
 namespace doris::pipeline {
 
diff --git a/be/src/pipeline/pipeline_x/local_exchange/local_exchanger.h b/be/src/pipeline/local_exchange/local_exchanger.h
similarity index 99%
rename from be/src/pipeline/pipeline_x/local_exchange/local_exchanger.h
rename to be/src/pipeline/local_exchange/local_exchanger.h
index 69e9f79d3e3590..5ad5aab9bdd2e9 100644
--- a/be/src/pipeline/pipeline_x/local_exchange/local_exchanger.h
+++ b/be/src/pipeline/local_exchange/local_exchanger.h
@@ -17,7 +17,8 @@
 
 #pragma once
 
-#include "pipeline/pipeline_x/operator.h"
+#include "pipeline/dependency.h"
+#include "pipeline/exec/operator.h"
 
 namespace doris::pipeline {
 
diff --git a/be/src/pipeline/pipeline.cpp b/be/src/pipeline/pipeline.cpp
index 8b2123bcce20d4..fca736497e76d1 100644
--- a/be/src/pipeline/pipeline.cpp
+++ b/be/src/pipeline/pipeline.cpp
@@ -30,35 +30,6 @@ void Pipeline::_init_profile() {
     _pipeline_profile = std::make_unique<RuntimeProfile>(std::move(s));
 }
 
-Status Pipeline::build_operators() {
-    _name.reserve(_operator_builders.size() * 10);
-    _name.append(std::to_string(id()));
-
-    OperatorPtr pre;
-    for (auto& operator_t : _operator_builders) {
-        auto o = operator_t->build_operator();
-        if (pre) {
-            RETURN_IF_ERROR(o->set_child(pre));
-        }
-        _operators.emplace_back(o);
-
-        _name.push_back('-');
-        _name.append(std::to_string(operator_t->id()));
-        _name.append(o->get_name());
-
-        pre = std::move(o);
-    }
-    return Status::OK();
-}
-
-Status Pipeline::add_operator(OperatorBuilderPtr& op) {
-    if (_operator_builders.empty() && !op->is_source()) {
-        return Status::InternalError("Should set source before other operator");
-    }
-    _operator_builders.emplace_back(op);
-    return Status::OK();
-}
-
 Status Pipeline::add_operator(OperatorXPtr& op) {
     op->set_parallel_tasks(num_tasks());
     operatorXs.emplace_back(op);
@@ -77,17 +48,6 @@ Status Pipeline::prepare(RuntimeState* state) {
     return Status::OK();
 }
 
-Status Pipeline::set_sink_builder(OperatorBuilderPtr& sink_) {
-    if (_sink_builder) {
-        return Status::InternalError("set sink twice");
-    }
-    if (!sink_->is_sink()) {
-        return Status::InternalError("should set a sink operator but {}", typeid(sink_).name());
-    }
-    _sink_builder = sink_;
-    return Status::OK();
-}
-
 Status Pipeline::set_sink(DataSinkOperatorXPtr& sink) {
     if (_sink_x) {
         return Status::InternalError("set sink twice");
diff --git a/be/src/pipeline/pipeline.h b/be/src/pipeline/pipeline.h
index 1bab4f1fd5003b..a5ca26b8bd14f5 100644
--- a/be/src/pipeline/pipeline.h
+++ b/be/src/pipeline/pipeline.h
@@ -27,7 +27,6 @@
 
 #include "common/status.h"
 #include "pipeline/exec/operator.h"
-#include "pipeline/pipeline_x/operator.h"
 #include "util/runtime_profile.h"
 
 namespace doris::pipeline {
@@ -41,7 +40,6 @@ using PipelineId = uint32_t;
 
 class Pipeline : public std::enable_shared_from_this<Pipeline> {
     friend class PipelineTask;
-    friend class PipelineXTask;
     friend class PipelineFragmentContext;
 
 public:
@@ -52,61 +50,22 @@ class Pipeline : public std::enable_shared_from_this<Pipeline> {
         _init_profile();
     }
 
-    void add_dependency(std::shared_ptr<Pipeline>& pipeline) {
-        pipeline->_parents.emplace_back(_operator_builders.size(), weak_from_this());
-        _dependencies.emplace_back(_operator_builders.size(), pipeline);
-    }
-
-    // If all dependencies are finished, this pipeline task should be scheduled.
-    // e.g. Hash join probe task will be scheduled once Hash join build task is finished.
-    void finish_one_dependency(int dep_opr, int dependency_core_id) {
-        std::lock_guard l(_depend_mutex);
-        if (!_operators.empty() && _operators[dep_opr - 1]->can_terminate_early()) {
-            _always_can_read = true;
-            _always_can_write = (dep_opr == _operators.size());
-
-            for (int i = 0; i < _dependencies.size(); ++i) {
-                if (dep_opr == _dependencies[i].first) {
-                    _dependencies.erase(_dependencies.begin(), _dependencies.begin() + i + 1);
-                    break;
-                }
-            }
-        } else {
-            for (int i = 0; i < _dependencies.size(); ++i) {
-                if (dep_opr == _dependencies[i].first) {
-                    _dependencies.erase(_dependencies.begin() + i);
-                    break;
-                }
-            }
-        }
-
-        if (_dependencies.empty()) {
-            _previous_schedule_id = dependency_core_id;
-        }
-    }
-
     bool has_dependency() {
         std::lock_guard l(_depend_mutex);
         return !_dependencies.empty();
     }
 
-    Status add_operator(OperatorBuilderPtr& op);
-
     // Add operators for pipelineX
     Status add_operator(OperatorXPtr& op);
     // prepare operators for pipelineX
     Status prepare(RuntimeState* state);
 
-    Status set_sink_builder(OperatorBuilderPtr& sink_operator_builder);
     Status set_sink(DataSinkOperatorXPtr& sink_operator);
 
-    OperatorBuilderBase* get_sink_builder() { return _sink_builder.get(); }
     DataSinkOperatorXBase* sink_x() { return _sink_x.get(); }
     OperatorXs& operator_xs() { return operatorXs; }
     DataSinkOperatorXPtr sink_shared_pointer() { return _sink_x; }
 
-    Status build_operators();
-
     RuntimeProfile* pipeline_profile() { return _pipeline_profile.get(); }
 
     [[nodiscard]] const RowDescriptor& output_row_desc() const {
@@ -180,9 +139,6 @@ class Pipeline : public std::enable_shared_from_this<Pipeline> {
 private:
     void _init_profile();
 
-    OperatorBuilders _operator_builders; // left is _source, right is _root
-    OperatorBuilderPtr _sink_builder;    // put block to sink
-
     std::mutex _depend_mutex;
     std::vector<std::pair<int, std::weak_ptr<Pipeline>>> _parents;
     std::vector<std::pair<int, std::shared_ptr<Pipeline>>> _dependencies;
diff --git a/be/src/pipeline/pipeline_fragment_context.cpp b/be/src/pipeline/pipeline_fragment_context.cpp
index 9cc71afd4d7827..4e398f495e05d7 100644
--- a/be/src/pipeline/pipeline_fragment_context.cpp
+++ b/be/src/pipeline/pipeline_fragment_context.cpp
@@ -91,8 +91,8 @@
 #include "pipeline/exec/table_function_operator.h"
 #include "pipeline/exec/union_sink_operator.h"
 #include "pipeline/exec/union_source_operator.h"
-#include "pipeline/pipeline_x/local_exchange/local_exchange_sink_operator.h"
-#include "pipeline/pipeline_x/local_exchange/local_exchange_source_operator.h"
+#include "pipeline/local_exchange/local_exchange_sink_operator.h"
+#include "pipeline/local_exchange/local_exchange_source_operator.h"
 #include "pipeline/task_scheduler.h"
 #include "pipeline_task.h"
 #include "runtime/exec_env.h"
@@ -421,7 +421,7 @@ Status PipelineFragmentContext::_build_pipeline_tasks(
         filterparams->runtime_filter_mgr = runtime_filter_mgr.get();
 
         _runtime_filter_states.push_back(std::move(filterparams));
-        std::map<PipelineId, PipelineXTask*> pipeline_id_to_task;
+        std::map<PipelineId, PipelineTask*> pipeline_id_to_task;
         auto get_local_exchange_state = [&](PipelinePtr pipeline)
                 -> std::map<int, std::pair<std::shared_ptr<LocalExchangeSharedState>,
                                            std::shared_ptr<Dependency>>> {
@@ -457,7 +457,7 @@ Status PipelineFragmentContext::_build_pipeline_tasks(
                 auto cur_task_id = _total_tasks++;
                 task_runtime_state->set_task_id(cur_task_id);
                 task_runtime_state->set_task_num(pipeline->num_tasks());
-                auto task = std::make_unique<PipelineXTask>(
+                auto task = std::make_unique<PipelineTask>(
                         pipeline, cur_task_id, get_task_runtime_state(cur_task_id), this,
                         pipeline_id_to_profile[pip_idx].get(), get_local_exchange_state(pipeline),
                         i);
@@ -486,7 +486,7 @@ Status PipelineFragmentContext::_build_pipeline_tasks(
 
         // First, set up the parent profile,task runtime state
 
-        auto prepare_and_set_parent_profile = [&](PipelineXTask* task, size_t pip_idx) {
+        auto prepare_and_set_parent_profile = [&](PipelineTask* task, size_t pip_idx) {
             DCHECK(pipeline_id_to_profile[pip_idx]);
             RETURN_IF_ERROR(
                     task->prepare(local_params, request.fragment.output_sink, _query_ctx.get()));
diff --git a/be/src/pipeline/pipeline_fragment_context.h b/be/src/pipeline/pipeline_fragment_context.h
index 0c3af6733bccba..7d0dae3c1fba03 100644
--- a/be/src/pipeline/pipeline_fragment_context.h
+++ b/be/src/pipeline/pipeline_fragment_context.h
@@ -30,11 +30,10 @@
 #include <vector>
 
 #include "common/status.h"
+#include "pipeline/local_exchange/local_exchanger.h"
 #include "pipeline/pipeline.h"
 #include "pipeline/pipeline_fragment_context.h"
 #include "pipeline/pipeline_task.h"
-#include "pipeline/pipeline_x/local_exchange/local_exchanger.h"
-#include "pipeline/pipeline_x/pipeline_x_task.h"
 #include "runtime/query_context.h"
 #include "runtime/runtime_state.h"
 #include "runtime/task_execution_context.h"
@@ -254,7 +253,7 @@ class PipelineFragmentContext : public TaskExecutionContext {
 
     OperatorXPtr _root_op = nullptr;
     // this is a [n * m] matrix. n is parallelism of pipeline engine and m is the number of pipelines.
-    std::vector<std::vector<std::unique_ptr<PipelineXTask>>> _tasks;
+    std::vector<std::vector<std::unique_ptr<PipelineTask>>> _tasks;
 
     bool _need_local_merge = false;
 
diff --git a/be/src/pipeline/pipeline_task.cpp b/be/src/pipeline/pipeline_task.cpp
index 167ff01fe695ed..5491fe6759d904 100644
--- a/be/src/pipeline/pipeline_task.cpp
+++ b/be/src/pipeline/pipeline_task.cpp
@@ -23,16 +23,20 @@
 #include <stddef.h>
 
 #include <ostream>
+#include <vector>
 
 #include "common/status.h"
 #include "pipeline/exec/operator.h"
+#include "pipeline/exec/scan_operator.h"
 #include "pipeline/pipeline.h"
-#include "pipeline_fragment_context.h"
+#include "pipeline/pipeline_fragment_context.h"
+#include "pipeline/task_queue.h"
 #include "runtime/descriptors.h"
 #include "runtime/query_context.h"
 #include "runtime/thread_context.h"
-#include "task_queue.h"
+#include "util/container_util.hpp"
 #include "util/defer_op.h"
+#include "util/mem_info.h"
 #include "util/runtime_profile.h"
 
 namespace doris {
@@ -41,10 +45,14 @@ class RuntimeState;
 
 namespace doris::pipeline {
 
-PipelineTask::PipelineTask(PipelinePtr& pipeline, uint32_t index, RuntimeState* state,
-                           OperatorPtr& sink, PipelineFragmentContext* fragment_context,
-                           RuntimeProfile* parent_profile)
-        : _index(index),
+PipelineTask::PipelineTask(
+        PipelinePtr& pipeline, uint32_t task_id, RuntimeState* state,
+        PipelineFragmentContext* fragment_context, RuntimeProfile* parent_profile,
+        std::map<int,
+                 std::pair<std::shared_ptr<LocalExchangeSharedState>, std::shared_ptr<Dependency>>>
+                le_state_map,
+        int task_idx)
+        : _index(task_id),
           _pipeline(pipeline),
           _prepared(false),
           _opened(false),
@@ -53,43 +61,95 @@ PipelineTask::PipelineTask(PipelinePtr& pipeline, uint32_t index, RuntimeState*
           _data_state(SourceState::DEPEND_ON_SOURCE),
           _fragment_context(fragment_context),
           _parent_profile(parent_profile),
-          _operators(pipeline->_operators),
+          _operators(pipeline->operator_xs()),
           _source(_operators.front()),
           _root(_operators.back()),
-          _sink(sink) {
+          _sink(pipeline->sink_shared_pointer()),
+          _le_state_map(std::move(le_state_map)),
+          _task_idx(task_idx),
+          _execution_dep(state->get_query_ctx()->get_execution_dependency()) {
     _pipeline_task_watcher.start();
+
+    auto shared_state = _sink->create_shared_state();
+    if (shared_state) {
+        _sink_shared_state = shared_state;
+    }
+    pipeline->incr_created_tasks();
 }
 
-PipelineTask::PipelineTask(PipelinePtr& pipeline, uint32_t index, RuntimeState* state,
-                           PipelineFragmentContext* fragment_context,
-                           RuntimeProfile* parent_profile)
-        : _index(index),
-          _pipeline(pipeline),
-          _prepared(false),
-          _opened(false),
-          _state(state),
-          _cur_state(PipelineTaskState::NOT_READY),
-          _data_state(SourceState::DEPEND_ON_SOURCE),
-          _fragment_context(fragment_context),
-          _parent_profile(parent_profile),
-          _operators({}),
-          _source(nullptr),
-          _root(nullptr),
-          _sink(nullptr) {
-    _pipeline_task_watcher.start();
+Status PipelineTask::prepare(const TPipelineInstanceParams& local_params, const TDataSink& tsink,
+                             QueryContext* query_ctx) {
+    DCHECK(_sink);
+    DCHECK(_cur_state == PipelineTaskState::NOT_READY) << get_state_name(_cur_state);
+    _init_profile();
+    SCOPED_TIMER(_task_profile->total_time_counter());
+    SCOPED_CPU_TIMER(_task_cpu_timer);
+    SCOPED_TIMER(_prepare_timer);
+
+    {
+        // set sink local state
+        LocalSinkStateInfo info {_task_idx,
+                                 _task_profile.get(),
+                                 local_params.sender_id,
+                                 get_sink_shared_state().get(),
+                                 _le_state_map,
+                                 tsink};
+        RETURN_IF_ERROR(_sink->setup_local_state(_state, info));
+    }
+
+    std::vector<TScanRangeParams> no_scan_ranges;
+    auto scan_ranges = find_with_default(local_params.per_node_scan_ranges,
+                                         _operators.front()->node_id(), no_scan_ranges);
+    auto* parent_profile = _state->get_sink_local_state()->profile();
+    query_ctx->register_query_statistics(
+            _state->get_sink_local_state()->get_query_statistics_ptr());
+
+    for (int op_idx = _operators.size() - 1; op_idx >= 0; op_idx--) {
+        auto& op = _operators[op_idx];
+        LocalStateInfo info {parent_profile, scan_ranges, get_op_shared_state(op->operator_id()),
+                             _le_state_map, _task_idx};
+        RETURN_IF_ERROR(op->setup_local_state(_state, info));
+        parent_profile = _state->get_local_state(op->operator_id())->profile();
+        query_ctx->register_query_statistics(
+                _state->get_local_state(op->operator_id())->get_query_statistics_ptr());
+    }
+    {
+        const auto& deps = _state->get_local_state(_source->operator_id())->filter_dependencies();
+        std::copy(deps.begin(), deps.end(),
+                  std::inserter(_filter_dependencies, _filter_dependencies.end()));
+    }
+    // We should make sure initial state for task are runnable so that we can do some preparation jobs (e.g. initialize runtime filters).
+    set_state(PipelineTaskState::RUNNABLE);
+    _prepared = true;
+    return Status::OK();
 }
 
-void PipelineTask::_fresh_profile_counter() {
-    COUNTER_SET(_wait_source_timer, (int64_t)_wait_source_watcher.elapsed_time());
-    COUNTER_SET(_wait_bf_timer, (int64_t)_wait_bf_watcher.elapsed_time());
-    COUNTER_SET(_schedule_counts, (int64_t)_schedule_time);
-    COUNTER_SET(_wait_sink_timer, (int64_t)_wait_sink_watcher.elapsed_time());
-    COUNTER_SET(_wait_worker_timer, (int64_t)_wait_worker_watcher.elapsed_time());
-    COUNTER_SET(_begin_execute_timer, _begin_execute_time);
-    COUNTER_SET(_eos_timer, _eos_time);
-    COUNTER_SET(_src_pending_finish_over_timer, _src_pending_finish_over_time);
-    COUNTER_SET(_dst_pending_finish_over_timer, _dst_pending_finish_over_time);
-    COUNTER_SET(_pip_task_total_timer, (int64_t)_pipeline_task_watcher.elapsed_time());
+Status PipelineTask::_extract_dependencies() {
+    for (auto op : _operators) {
+        auto result = _state->get_local_state_result(op->operator_id());
+        if (!result) {
+            return result.error();
+        }
+        auto* local_state = result.value();
+        const auto& deps = local_state->dependencies();
+        std::copy(deps.begin(), deps.end(),
+                  std::inserter(_read_dependencies, _read_dependencies.end()));
+        auto* fin_dep = local_state->finishdependency();
+        if (fin_dep) {
+            _finish_dependencies.push_back(fin_dep);
+        }
+    }
+    {
+        auto* local_state = _state->get_sink_local_state();
+        _write_dependencies = local_state->dependencies();
+        DCHECK(std::all_of(_write_dependencies.begin(), _write_dependencies.end(),
+                           [](auto* dep) { return dep->is_write_dependency(); }));
+        auto* fin_dep = local_state->finishdependency();
+        if (fin_dep) {
+            _finish_dependencies.push_back(fin_dep);
+        }
+    }
+    return Status::OK();
 }
 
 void PipelineTask::_init_profile() {
@@ -110,96 +170,42 @@ void PipelineTask::_init_profile() {
     _sink_timer = ADD_CHILD_TIMER(_task_profile, "SinkTime", exec_time);
     _close_timer = ADD_CHILD_TIMER(_task_profile, "CloseTime", exec_time);
 
-    _wait_source_timer = ADD_TIMER(_task_profile, "WaitSourceTime");
     _wait_bf_timer = ADD_TIMER(_task_profile, "WaitBfTime");
-    _wait_sink_timer = ADD_TIMER(_task_profile, "WaitSinkTime");
     _wait_worker_timer = ADD_TIMER(_task_profile, "WaitWorkerTime");
+
     _block_counts = ADD_COUNTER(_task_profile, "NumBlockedTimes", TUnit::UNIT);
     _block_by_source_counts = ADD_COUNTER(_task_profile, "NumBlockedBySrcTimes", TUnit::UNIT);
     _block_by_sink_counts = ADD_COUNTER(_task_profile, "NumBlockedBySinkTimes", TUnit::UNIT);
     _schedule_counts = ADD_COUNTER(_task_profile, "NumScheduleTimes", TUnit::UNIT);
     _yield_counts = ADD_COUNTER(_task_profile, "NumYieldTimes", TUnit::UNIT);
     _core_change_times = ADD_COUNTER(_task_profile, "CoreChangeTimes", TUnit::UNIT);
+
     _wait_bf_counts = ADD_COUNTER(_task_profile, "WaitBfTimes", TUnit::UNIT);
     _wait_dependency_counts = ADD_COUNTER(_task_profile, "WaitDenpendencyTimes", TUnit::UNIT);
     _pending_finish_counts = ADD_COUNTER(_task_profile, "PendingFinishTimes", TUnit::UNIT);
-
-    _begin_execute_timer = ADD_TIMER(_task_profile, "Task1BeginExecuteTime");
-    _eos_timer = ADD_TIMER(_task_profile, "Task2EosTime");
-    _src_pending_finish_over_timer = ADD_TIMER(_task_profile, "Task3SrcPendingFinishOverTime");
-    _dst_pending_finish_over_timer = ADD_TIMER(_task_profile, "Task4DstPendingFinishOverTime");
-    _pip_task_total_timer = ADD_TIMER(_task_profile, "Task5TotalTime");
-    _close_pipeline_timer = ADD_TIMER(_task_profile, "Task6ClosePipelineTime");
 }
 
-Status PipelineTask::prepare(RuntimeState* state) {
-    DCHECK(_sink);
-    DCHECK(_cur_state == PipelineTaskState::NOT_READY);
-    _init_profile();
-    SCOPED_TIMER(_task_profile->total_time_counter());
-    SCOPED_CPU_TIMER(_task_cpu_timer);
-    SCOPED_TIMER(_prepare_timer);
-    RETURN_IF_ERROR(_sink->prepare(state));
-    for (auto& o : _operators) {
-        RETURN_IF_ERROR(o->prepare(state));
-    }
-
-    _task_profile->add_info_string("Sink",
-                                   fmt::format("{}(dst_id={})", _sink->get_name(), _sink->id()));
-    fmt::memory_buffer operator_ids_str;
-    for (size_t i = 0; i < _operators.size(); i++) {
-        if (i == 0) {
-            fmt::format_to(
-                    operator_ids_str,
-                    fmt::format("[{}(node_id={})", _operators[i]->get_name(), _operators[i]->id()));
-        } else {
-            fmt::format_to(operator_ids_str,
-                           fmt::format(", {}(node_id={})", _operators[i]->get_name(),
-                                       _operators[i]->id()));
-        }
-    }
-    fmt::format_to(operator_ids_str, "]");
-    _task_profile->add_info_string("OperatorIds(source2root)", fmt::to_string(operator_ids_str));
-
-    _block = doris::vectorized::Block::create_unique();
-
-    // We should make sure initial state for task are runnable so that we can do some preparation jobs (e.g. initialize runtime filters).
-    set_state(PipelineTaskState::RUNNABLE);
-    _prepared = true;
-    return Status::OK();
-}
-
-bool PipelineTask::has_dependency() {
-    if (_dependency_finish) {
-        return false;
-    }
-    if (_fragment_context->is_canceled()) {
-        _dependency_finish = true;
-        return false;
-    }
-    if (_pipeline->has_dependency()) {
-        return true;
-    }
-
-    if (!query_context()->is_ready_to_execute()) {
-        return true;
-    }
-
-    // runtime filter is a dependency
-    _dependency_finish = true;
-    return false;
+void PipelineTask::_fresh_profile_counter() {
+    COUNTER_SET(_wait_bf_timer, (int64_t)_wait_bf_watcher.elapsed_time());
+    COUNTER_SET(_schedule_counts, (int64_t)_schedule_time);
+    COUNTER_SET(_wait_worker_timer, (int64_t)_wait_worker_watcher.elapsed_time());
 }
 
 Status PipelineTask::_open() {
     SCOPED_TIMER(_task_profile->total_time_counter());
     SCOPED_CPU_TIMER(_task_cpu_timer);
     SCOPED_TIMER(_open_timer);
+    _dry_run = _sink->should_dry_run(_state);
     for (auto& o : _operators) {
-        RETURN_IF_ERROR(o->open(_state));
-    }
-    if (_sink) {
-        RETURN_IF_ERROR(_sink->open(_state));
+        auto* local_state = _state->get_local_state(o->operator_id());
+        auto st = local_state->open(_state);
+        DCHECK(st.is<ErrorCode::PIP_WAIT_FOR_RF>() ? !_filter_dependencies.empty() : true)
+                << debug_string();
+        RETURN_IF_ERROR(st);
     }
+    RETURN_IF_ERROR(_state->get_sink_local_state()->open(_state));
+    RETURN_IF_ERROR(_extract_dependencies());
+    _block = doris::vectorized::Block::create_unique();
     _opened = true;
     return Status::OK();
 }
@@ -227,39 +233,20 @@ Status PipelineTask::execute(bool* eos) {
             cpu_qs->add_cpu_nanos(delta_cpu_time);
         }
     }};
-    // The status must be runnable
     *eos = false;
+    if (has_dependency()) {
+        set_state(PipelineTaskState::BLOCKED_FOR_DEPENDENCY);
+        return Status::OK();
+    }
+    if (_runtime_filter_blocked_dependency() != nullptr) {
+        set_state(PipelineTaskState::BLOCKED_FOR_RF);
+        return Status::OK();
+    }
+    // The status must be runnable
     if (!_opened) {
         {
             SCOPED_RAW_TIMER(&time_spent);
-            // if _open_status is not ok, could know have execute open function,
-            // now execute open again, so need excluding PIP_WAIT_FOR_RF and PIP_WAIT_FOR_SC error out.
-            if (!_open_status.ok() && !_open_status.is<ErrorCode::PIP_WAIT_FOR_RF>() &&
-                !_open_status.is<ErrorCode::PIP_WAIT_FOR_SC>()) {
-                return _open_status;
-            }
-            // here execute open and not check dependency(eg: the second start rpc arrival)
-            // so if open have some error, and return error status directly, the query will be cancel.
-            // and then the rpc arrival will not found the query as have been canceled and remove.
-            _open_status = _open();
-            if (_open_status.is<ErrorCode::PIP_WAIT_FOR_RF>()) {
-                set_state(PipelineTaskState::BLOCKED_FOR_RF);
-                return Status::OK();
-            } else if (_open_status.is<ErrorCode::PIP_WAIT_FOR_SC>()) {
-                set_state(PipelineTaskState::BLOCKED_FOR_SOURCE);
-                return Status::OK();
-            }
-            //if status is not ok, and have dependency to push back to queue again.
-            if (!_open_status.ok() && has_dependency()) {
-                set_state(PipelineTaskState::BLOCKED_FOR_DEPENDENCY);
-                return Status::OK();
-            }
-            // if not ok and no dependency, return error to cancel.
-            RETURN_IF_ERROR(_open_status);
-        }
-        if (has_dependency()) {
-            set_state(PipelineTaskState::BLOCKED_FOR_DEPENDENCY);
-            return Status::OK();
+            RETURN_IF_ERROR(_open());
         }
         if (!source_can_read()) {
             set_state(PipelineTaskState::BLOCKED_FOR_SOURCE);
@@ -271,11 +258,9 @@ Status PipelineTask::execute(bool* eos) {
         }
     }
 
-    auto status = Status::OK();
-
-    this->set_begin_execute_time();
+    Status status = Status::OK();
     while (!_fragment_context->is_canceled()) {
-        if (_data_state != SourceState::MORE_DATA && !source_can_read()) {
+        if (_root->need_data_from_children(_state) && !source_can_read()) {
             set_state(PipelineTaskState::BLOCKED_FOR_SOURCE);
             break;
         }
@@ -283,26 +268,45 @@ Status PipelineTask::execute(bool* eos) {
             set_state(PipelineTaskState::BLOCKED_FOR_SINK);
             break;
         }
+
+        /// When a task is cancelled,
+        /// its blocking state will be cleared and it will transition to a ready state (though it is not truly ready).
+        /// Here, checking whether it is cancelled to prevent tasks in a blocking state from being re-executed.
+        if (_fragment_context->is_canceled()) {
+            break;
+        }
+
         if (time_spent > THREAD_TIME_SLICE) {
             COUNTER_UPDATE(_yield_counts, 1);
             break;
         }
-        // TODO llj: Pipeline entity should_yield
         SCOPED_RAW_TIMER(&time_spent);
         _block->clear_column_data(_root->row_desc().num_materialized_slots());
         auto* block = _block.get();
 
+        auto sink_revocable_mem_size = _sink->revocable_mem_size(_state);
+        if (should_revoke_memory(_state, sink_revocable_mem_size)) {
+            RETURN_IF_ERROR(_sink->revoke_memory(_state));
+            continue;
+        }
+
         // Pull block from operator chain
-        {
+        if (!_dry_run) {
             SCOPED_TIMER(_get_block_timer);
             _get_block_counter->update(1);
-            RETURN_IF_ERROR(_root->get_block(_state, block, _data_state));
+            try {
+                RETURN_IF_ERROR(_root->get_block_after_projects(_state, block, eos));
+            } catch (const Exception& e) {
+                return Status::InternalError(e.to_string() +
+                                             " task debug string: " + debug_string());
+            }
+        } else {
+            *eos = true;
         }
-        *eos = _data_state == SourceState::FINISHED;
 
         if (_block->rows() != 0 || *eos) {
             SCOPED_TIMER(_sink_timer);
-            status = _sink->sink(_state, block, _data_state);
+            status = _sink->sink(_state, block, *eos);
             if (!status.is<ErrorCode::END_OF_FILE>()) {
                 RETURN_IF_ERROR(status);
             }
@@ -312,42 +316,61 @@ Status PipelineTask::execute(bool* eos) {
             }
         }
     }
-    if (*eos) { // now only join node/set operation node have add_dependency, and join probe could start when the join sink is eos
-        _finish_p_dependency();
-    }
 
-    // If the status is eof(sink node will return eof if downstream fragment finished), then return it.
     return status;
 }
 
-Status PipelineTask::close(Status exec_status) {
-    int64_t close_ns = 0;
-    Defer defer {[&]() {
-        if (_task_queue) {
-            _task_queue->update_statistics(this, close_ns);
+bool PipelineTask::should_revoke_memory(RuntimeState* state, int64_t revocable_mem_bytes) {
+    auto* query_ctx = state->get_query_ctx();
+    auto wg = query_ctx->workload_group();
+    if (!wg) {
+        LOG_ONCE(INFO) << "no workload group for query " << print_id(state->query_id());
+        return false;
+    }
+    const auto min_revocable_mem_bytes = state->min_revocable_mem();
+    bool is_wg_mem_low_water_mark = false;
+    bool is_wg_mem_high_water_mark = false;
+    wg->check_mem_used(&is_wg_mem_low_water_mark, &is_wg_mem_high_water_mark);
+    if (is_wg_mem_high_water_mark) {
+        if (revocable_mem_bytes > min_revocable_mem_bytes) {
+            LOG_EVERY_N(INFO, 10) << "revoke memory, hight water mark";
+            return true;
         }
-    }};
-    Status s;
-    {
-        SCOPED_RAW_TIMER(&close_ns);
-        s = _sink->close(_state);
-        for (auto& op : _operators) {
-            auto tem = op->close(_state);
-            if (!tem.ok() && s.ok()) {
-                s = tem;
-            }
+        return false;
+    } else if (is_wg_mem_low_water_mark) {
+        int64_t query_weighted_limit = 0;
+        int64_t query_weighted_consumption = 0;
+        query_ctx->get_weighted_mem_info(query_weighted_limit, query_weighted_consumption);
+        if (query_weighted_consumption < query_weighted_limit) {
+            return false;
         }
+        auto big_memory_operator_num = query_ctx->get_running_big_mem_op_num();
+        DCHECK(big_memory_operator_num >= 0);
+        int64_t mem_limit_of_op;
+        if (0 == big_memory_operator_num) {
+            mem_limit_of_op = int64_t(query_weighted_limit * 0.8);
+        } else {
+            mem_limit_of_op = query_weighted_limit / big_memory_operator_num;
+        }
+
+        LOG_EVERY_N(INFO, 10) << "revoke memory, low water mark, revocable_mem_bytes: "
+                              << PrettyPrinter::print_bytes(revocable_mem_bytes)
+                              << ", mem_limit_of_op: "
+                              << PrettyPrinter::print_bytes(mem_limit_of_op)
+                              << ", min_revocable_mem_bytes: "
+                              << PrettyPrinter::print_bytes(min_revocable_mem_bytes);
+        return (revocable_mem_bytes > mem_limit_of_op ||
+                revocable_mem_bytes > min_revocable_mem_bytes);
+    } else {
+        return false;
     }
-    if (_opened) {
-        _fresh_profile_counter();
-        COUNTER_SET(_close_timer, close_ns);
-        COUNTER_UPDATE(_task_profile->total_time_counter(), close_ns);
-    }
-    return s;
 }
-
-QueryContext* PipelineTask::query_context() {
-    return _fragment_context->get_query_ctx();
+void PipelineTask::finalize() {
+    std::unique_lock<std::mutex> lc(_release_lock);
+    _finished = true;
+    _sink_shared_state.reset();
+    _op_shared_states.clear();
+    _le_state_map.clear();
 }
 
 // The FSM see PipelineTaskState's comment
@@ -390,8 +413,95 @@ void PipelineTask::set_state(PipelineTaskState state) {
     _cur_state = state;
 }
 
+Status PipelineTask::close(Status exec_status) {
+    int64_t close_ns = 0;
+    Defer defer {[&]() {
+        if (_task_queue) {
+            _task_queue->update_statistics(this, close_ns);
+        }
+    }};
+    Status s;
+    {
+        SCOPED_RAW_TIMER(&close_ns);
+        s = _sink->close(_state, exec_status);
+        for (auto& op : _operators) {
+            auto tem = op->close(_state);
+            if (!tem.ok() && s.ok()) {
+                s = tem;
+            }
+        }
+    }
+    if (_opened) {
+        _fresh_profile_counter();
+        COUNTER_SET(_close_timer, close_ns);
+        COUNTER_UPDATE(_task_profile->total_time_counter(), close_ns);
+    }
+    return s;
+}
+
+Status PipelineTask::close_sink(Status exec_status) {
+    return _sink->close(_state, exec_status);
+}
+
 std::string PipelineTask::debug_string() {
-    return "";
+    std::unique_lock<std::mutex> lc(_release_lock);
+    fmt::memory_buffer debug_string_buffer;
+
+    fmt::format_to(debug_string_buffer, "QueryId: {}\n", print_id(query_context()->query_id()));
+    fmt::format_to(debug_string_buffer, "InstanceId: {}\n",
+                   print_id(_state->fragment_instance_id()));
+
+    auto elapsed = (MonotonicNanos() - _fragment_context->create_time()) / 1000000000.0;
+    fmt::format_to(debug_string_buffer,
+                   "PipelineTask[this = {}, state = {}, dry run = {}, elapse time "
+                   "= {}s], block dependency = {}, is running = {}\noperators: ",
+                   (void*)this, get_state_name(_cur_state), _dry_run, elapsed,
+                   _blocked_dep && !_finished ? _blocked_dep->debug_string() : "NULL",
+                   is_running());
+    for (size_t i = 0; i < _operators.size(); i++) {
+        fmt::format_to(debug_string_buffer, "\n{}",
+                       _opened && !_finished ? _operators[i]->debug_string(_state, i)
+                                             : _operators[i]->debug_string(i));
+    }
+    fmt::format_to(debug_string_buffer, "\n{}",
+                   _opened && !_finished ? _sink->debug_string(_state, _operators.size())
+                                         : _sink->debug_string(_operators.size()));
+    if (_finished) {
+        return fmt::to_string(debug_string_buffer);
+    }
+
+    size_t i = 0;
+    for (; i < _read_dependencies.size(); i++) {
+        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
+                       _read_dependencies[i]->debug_string(i + 1));
+    }
+
+    fmt::format_to(debug_string_buffer, "Write Dependency Information: \n");
+    for (size_t j = 0; j < _write_dependencies.size(); j++, i++) {
+        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
+                       _write_dependencies[j]->debug_string(i + 1));
+    }
+
+    fmt::format_to(debug_string_buffer, "\nRuntime Filter Dependency Information: \n");
+    for (size_t j = 0; j < _filter_dependencies.size(); j++, i++) {
+        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
+                       _filter_dependencies[j]->debug_string(i + 1));
+    }
+
+    fmt::format_to(debug_string_buffer, "Finish Dependency Information: \n");
+    for (size_t j = 0; j < _finish_dependencies.size(); j++, i++) {
+        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
+                       _finish_dependencies[j]->debug_string(j + 1));
+    }
+    return fmt::to_string(debug_string_buffer);
 }
 
+void PipelineTask::wake_up() {
+    // call by dependency
+    static_cast<void>(get_task_queue()->push_back(this));
+}
+
+QueryContext* PipelineTask::query_context() {
+    return _fragment_context->get_query_ctx();
+}
 } // namespace doris::pipeline
diff --git a/be/src/pipeline/pipeline_task.h b/be/src/pipeline/pipeline_task.h
index aabfc91af37a39..2ccd47586e4026 100644
--- a/be/src/pipeline/pipeline_task.h
+++ b/be/src/pipeline/pipeline_task.h
@@ -17,19 +17,20 @@
 
 #pragma once
 
-#include <cstdint>
+#include <stdint.h>
+
 #include <memory>
 #include <string>
-#include <string_view>
+#include <vector>
 
-#include "common/config.h"
 #include "common/status.h"
-#include "exec/operator.h"
-#include "pipeline.h"
-#include "runtime/workload_group/workload_group.h"
+#include "pipeline/dependency.h"
+#include "pipeline/exec/operator.h"
+#include "pipeline/pipeline.h"
 #include "util/runtime_profile.h"
 #include "util/stopwatch.hpp"
 #include "vec/core/block.h"
+#include "vec/sink/vresult_sink.h"
 
 namespace doris {
 class QueryContext;
@@ -116,59 +117,35 @@ inline bool is_final_state(PipelineTaskState idx) {
 
 class TaskQueue;
 class PriorityTaskQueue;
+class Dependency;
 
-// The class do the pipeline task. Minest schdule union by task scheduler
 class PipelineTask {
 public:
-    PipelineTask(PipelinePtr& pipeline, uint32_t index, RuntimeState* state, OperatorPtr& sink,
-                 PipelineFragmentContext* fragment_context, RuntimeProfile* parent_profile);
-
-    PipelineTask(PipelinePtr& pipeline, uint32_t index, RuntimeState* state,
-                 PipelineFragmentContext* fragment_context, RuntimeProfile* parent_profile);
-    virtual ~PipelineTask() = default;
+    PipelineTask(PipelinePtr& pipeline, uint32_t task_id, RuntimeState* state,
+                 PipelineFragmentContext* fragment_context, RuntimeProfile* parent_profile,
+                 std::map<int, std::pair<std::shared_ptr<LocalExchangeSharedState>,
+                                         std::shared_ptr<Dependency>>>
+                         le_state_map,
+                 int task_idx);
 
-    virtual Status prepare(RuntimeState* state);
+    Status prepare(const TPipelineInstanceParams& local_params, const TDataSink& tsink,
+                   QueryContext* query_ctx);
 
-    virtual Status execute(bool* eos);
+    Status execute(bool* eos);
 
     // if the pipeline create a bunch of pipeline task
     // must be call after all pipeline task is finish to release resource
-    virtual Status close(Status exec_status);
-
-    void put_in_runnable_queue() {
-        _schedule_time++;
-        _wait_worker_watcher.start();
-    }
-    void pop_out_runnable_queue() { _wait_worker_watcher.stop(); }
-    PipelineTaskState get_state() const { return _cur_state; }
-    void set_state(PipelineTaskState state);
+    Status close(Status exec_status);
 
-    virtual bool is_pending_finish() {
-        bool source_ret = _source->is_pending_finish();
-        if (source_ret) {
+    Status close_sink(Status exec_status);
+    bool source_can_read() {
+        if (_dry_run) {
             return true;
-        } else {
-            this->set_src_pending_finish_time();
         }
-
-        bool sink_ret = _sink->is_pending_finish();
-        if (sink_ret) {
-            return true;
-        } else {
-            this->set_dst_pending_finish_time();
-        }
-        return false;
+        return _read_blocked_dependency() == nullptr;
     }
 
-    virtual bool source_can_read() { return _source->can_read() || _pipeline->_always_can_read; }
-
-    virtual bool runtime_filters_are_ready_or_timeout() {
-        return _source->runtime_filters_are_ready_or_timeout();
-    }
-
-    virtual bool sink_can_write() { return _sink->can_write() || _pipeline->_always_can_write; }
-
-    virtual void finalize() {}
+    bool sink_can_write() { return _write_blocked_dependency() == nullptr; }
 
     PipelineFragmentContext* fragment_context() { return _fragment_context; }
 
@@ -189,11 +166,62 @@ class PipelineTask {
         _previous_schedule_id = id;
     }
 
-    virtual bool has_dependency();
+    void finalize();
+
+    bool is_finished() const { return _finished.load(); }
+
+    std::string debug_string();
+
+    bool is_pending_finish() { return _finish_blocked_dependency() != nullptr; }
+
+    std::shared_ptr<BasicSharedState> get_source_shared_state() {
+        return _op_shared_states.contains(_source->operator_id())
+                       ? _op_shared_states[_source->operator_id()]
+                       : nullptr;
+    }
+
+    void inject_shared_state(std::shared_ptr<BasicSharedState> shared_state) {
+        if (!shared_state) {
+            return;
+        }
+        // Shared state is created by upstream task's sink operator and shared by source operator of this task.
+        for (auto& op : _operators) {
+            if (shared_state->related_op_ids.contains(op->operator_id())) {
+                _op_shared_states.insert({op->operator_id(), shared_state});
+                return;
+            }
+        }
+        if (shared_state->related_op_ids.contains(_sink->dests_id().front())) {
+            DCHECK(_sink_shared_state == nullptr);
+            _sink_shared_state = shared_state;
+        }
+    }
+
+    std::shared_ptr<BasicSharedState> get_sink_shared_state() { return _sink_shared_state; }
+
+    BasicSharedState* get_op_shared_state(int id) {
+        if (!_op_shared_states.contains(id)) {
+            return nullptr;
+        }
+        return _op_shared_states[id].get();
+    }
+
+    void wake_up();
+
+    DataSinkOperatorXPtr sink() const { return _sink; }
+
+    OperatorXPtr source() const { return _source; }
+
+    OperatorXs operatorXs() { return _operators; }
 
-    OperatorPtr get_root() { return _root; }
+    int task_id() const { return _index; };
 
-    virtual std::string debug_string();
+    void clear_blocking_state() {
+        if (!_finished && get_state() != PipelineTaskState::PENDING_FINISH && _blocked_dep) {
+            _blocked_dep->set_ready();
+            _blocked_dep = nullptr;
+        }
+    }
 
     void set_task_queue(TaskQueue* task_queue);
     TaskQueue* get_task_queue() { return _task_queue; }
@@ -214,46 +242,27 @@ class PipelineTask {
     void set_core_id(int core_id) { this->_core_id = core_id; }
     int get_core_id() const { return this->_core_id; }
 
-    void set_begin_execute_time() {
-        if (!_is_first_time_to_execute) {
-            _begin_execute_time = _pipeline_task_watcher.elapsed_time();
-            _is_first_time_to_execute = true;
-        }
-    }
-
-    void set_eos_time() {
-        if (!_is_eos) {
-            _eos_time = _pipeline_task_watcher.elapsed_time();
-            _is_eos = true;
+    bool has_dependency() {
+        _blocked_dep = _execution_dep->is_blocked_by(this);
+        if (_blocked_dep != nullptr) {
+            static_cast<Dependency*>(_blocked_dep)->start_watcher();
+            return true;
         }
+        return false;
     }
 
-    void set_src_pending_finish_time() {
-        if (!_is_src_pending_finish_over) {
-            _src_pending_finish_over_time = _pipeline_task_watcher.elapsed_time();
-            _is_src_pending_finish_over = true;
-        }
-    }
+    static bool should_revoke_memory(RuntimeState* state, int64_t revocable_mem_bytes);
 
-    void set_dst_pending_finish_time() {
-        if (!_is_dst_pending_finish_over) {
-            _dst_pending_finish_over_time = _pipeline_task_watcher.elapsed_time();
-            _is_dst_pending_finish_over = true;
-        }
-    }
-
-    virtual void set_close_pipeline_time() {
-        if (!_is_close_pipeline) {
-            _close_pipeline_time = _pipeline_task_watcher.elapsed_time();
-            _is_close_pipeline = true;
-            COUNTER_SET(_close_pipeline_timer, _close_pipeline_time);
-        }
+    void put_in_runnable_queue() {
+        _schedule_time++;
+        _wait_worker_watcher.start();
     }
 
+    void pop_out_runnable_queue() { _wait_worker_watcher.stop(); }
+    PipelineTaskState get_state() const { return _cur_state; }
+    void set_state(PipelineTaskState state);
     TUniqueId instance_id() const { return _state->fragment_instance_id(); }
 
-    void set_parent_profile(RuntimeProfile* profile) { _parent_profile = profile; }
-
     bool is_running() { return _running.load(); }
     void set_running(bool running) { _running = running; }
 
@@ -287,16 +296,56 @@ class PipelineTask {
 
     std::string task_name() const { return fmt::format("task{}({})", _index, _pipeline->_name); }
 
-protected:
-    void _finish_p_dependency() {
-        for (const auto& p : _pipeline->_parents) {
-            p.second.lock()->finish_one_dependency(p.first, _previous_schedule_id);
+private:
+    friend class RuntimeFilterDependency;
+    Dependency* _write_blocked_dependency() {
+        for (auto* op_dep : _write_dependencies) {
+            _blocked_dep = op_dep->is_blocked_by(this);
+            if (_blocked_dep != nullptr) {
+                _blocked_dep->start_watcher();
+                return _blocked_dep;
+            }
+        }
+        return nullptr;
+    }
+
+    Dependency* _finish_blocked_dependency() {
+        for (auto* fin_dep : _finish_dependencies) {
+            _blocked_dep = fin_dep->is_blocked_by(this);
+            if (_blocked_dep != nullptr) {
+                _blocked_dep->start_watcher();
+                return _blocked_dep;
+            }
+        }
+        return nullptr;
+    }
+
+    Dependency* _read_blocked_dependency() {
+        for (auto* op_dep : _read_dependencies) {
+            _blocked_dep = op_dep->is_blocked_by(this);
+            if (_blocked_dep != nullptr) {
+                _blocked_dep->start_watcher();
+                return _blocked_dep;
+            }
+        }
+        return nullptr;
+    }
+
+    Dependency* _runtime_filter_blocked_dependency() {
+        for (auto* op_dep : _filter_dependencies) {
+            _blocked_dep = op_dep->is_blocked_by(this);
+            if (_blocked_dep != nullptr) {
+                _blocked_dep->start_watcher();
+                return _blocked_dep;
+            }
         }
+        return nullptr;
     }
 
-    virtual Status _open();
-    virtual void _init_profile();
-    virtual void _fresh_profile_counter();
+    Status _extract_dependencies();
+    void _init_profile();
+    void _fresh_profile_counter();
+    Status _open();
 
     uint32_t _index;
     PipelinePtr _pipeline;
@@ -340,12 +389,10 @@ class PipelineTask {
     RuntimeProfile::Counter* _block_by_sink_counts = nullptr;
     RuntimeProfile::Counter* _schedule_counts = nullptr;
     MonotonicStopWatch _wait_source_watcher;
-    RuntimeProfile::Counter* _wait_source_timer = nullptr;
     MonotonicStopWatch _wait_bf_watcher;
     RuntimeProfile::Counter* _wait_bf_timer = nullptr;
     RuntimeProfile::Counter* _wait_bf_counts = nullptr;
     MonotonicStopWatch _wait_sink_watcher;
-    RuntimeProfile::Counter* _wait_sink_timer = nullptr;
     MonotonicStopWatch _wait_worker_watcher;
     RuntimeProfile::Counter* _wait_worker_timer = nullptr;
     RuntimeProfile::Counter* _wait_dependency_counts = nullptr;
@@ -354,43 +401,34 @@ class PipelineTask {
     RuntimeProfile::Counter* _yield_counts = nullptr;
     RuntimeProfile::Counter* _core_change_times = nullptr;
 
-    // The monotonic time of the entire lifecycle of the pipelinetask, almost synchronized with the pipfragmentctx
-    // There are several important time points:
-    // 1 first time pipelinetask to execute
-    // 2 task eos
-    // 3 src pending finish over
-    // 4 dst pending finish over
-    // 5 close pipeline time, we mark this beacause pending finish state may change
     MonotonicStopWatch _pipeline_task_watcher;
-    // time 1
-    bool _is_first_time_to_execute = false;
-    RuntimeProfile::Counter* _begin_execute_timer = nullptr;
-    int64_t _begin_execute_time = 0;
-    // time 2
-    bool _is_eos = false;
-    RuntimeProfile::Counter* _eos_timer = nullptr;
-    int64_t _eos_time = 0;
-    //time 3
-    bool _is_src_pending_finish_over = false;
-    RuntimeProfile::Counter* _src_pending_finish_over_timer = nullptr;
-    int64_t _src_pending_finish_over_time = 0;
-    // time 4
-    bool _is_dst_pending_finish_over = false;
-    RuntimeProfile::Counter* _dst_pending_finish_over_timer = nullptr;
-    int64_t _dst_pending_finish_over_time = 0;
-    // time 5
-    bool _is_close_pipeline = false;
-    RuntimeProfile::Counter* _close_pipeline_timer = nullptr;
-    int64_t _close_pipeline_time = 0;
-
-    RuntimeProfile::Counter* _pip_task_total_timer = nullptr;
 
-private:
-    Operators _operators; // left is _source, right is _root
-    OperatorPtr _source;
-    OperatorPtr _root;
-    OperatorPtr _sink;
+    OperatorXs _operators; // left is _source, right is _root
+    OperatorXPtr _source;
+    OperatorXPtr _root;
+    DataSinkOperatorXPtr _sink;
+
+    std::vector<Dependency*> _read_dependencies;
+    std::vector<Dependency*> _write_dependencies;
+    std::vector<Dependency*> _finish_dependencies;
+    std::vector<Dependency*> _filter_dependencies;
+
+    // All shared states of this pipeline task.
+    std::map<int, std::shared_ptr<BasicSharedState>> _op_shared_states;
+    std::shared_ptr<BasicSharedState> _sink_shared_state;
+    std::map<int, std::pair<std::shared_ptr<LocalExchangeSharedState>, std::shared_ptr<Dependency>>>
+            _le_state_map;
+    int _task_idx;
+    bool _dry_run = false;
+
+    Dependency* _blocked_dep = nullptr;
+
+    Dependency* _execution_dep = nullptr;
+
+    std::atomic<bool> _finished {false};
+    std::mutex _release_lock;
 
     std::atomic<bool> _running {false};
 };
+
 } // namespace doris::pipeline
diff --git a/be/src/pipeline/pipeline_x/operator.cpp b/be/src/pipeline/pipeline_x/operator.cpp
deleted file mode 100644
index 8b1fb15857107a..00000000000000
--- a/be/src/pipeline/pipeline_x/operator.cpp
+++ /dev/null
@@ -1,720 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "operator.h"
-
-#include <glog/logging.h>
-
-#include <memory>
-#include <string>
-
-#include "common/logging.h"
-#include "common/status.h"
-#include "exec/exec_node.h"
-#include "pipeline/exec/aggregation_sink_operator.h"
-#include "pipeline/exec/aggregation_source_operator.h"
-#include "pipeline/exec/analytic_sink_operator.h"
-#include "pipeline/exec/analytic_source_operator.h"
-#include "pipeline/exec/assert_num_rows_operator.h"
-#include "pipeline/exec/datagen_operator.h"
-#include "pipeline/exec/distinct_streaming_aggregation_operator.h"
-#include "pipeline/exec/empty_set_operator.h"
-#include "pipeline/exec/es_scan_operator.h"
-#include "pipeline/exec/exchange_sink_operator.h"
-#include "pipeline/exec/exchange_source_operator.h"
-#include "pipeline/exec/file_scan_operator.h"
-#include "pipeline/exec/group_commit_block_sink_operator.h"
-#include "pipeline/exec/hashjoin_build_sink.h"
-#include "pipeline/exec/hashjoin_probe_operator.h"
-#include "pipeline/exec/hive_table_sink_operator.h"
-#include "pipeline/exec/jdbc_scan_operator.h"
-#include "pipeline/exec/jdbc_table_sink_operator.h"
-#include "pipeline/exec/meta_scan_operator.h"
-#include "pipeline/exec/multi_cast_data_stream_sink.h"
-#include "pipeline/exec/multi_cast_data_stream_source.h"
-#include "pipeline/exec/nested_loop_join_build_operator.h"
-#include "pipeline/exec/nested_loop_join_probe_operator.h"
-#include "pipeline/exec/olap_scan_operator.h"
-#include "pipeline/exec/olap_table_sink_operator.h"
-#include "pipeline/exec/olap_table_sink_v2_operator.h"
-#include "pipeline/exec/partition_sort_sink_operator.h"
-#include "pipeline/exec/partition_sort_source_operator.h"
-#include "pipeline/exec/partitioned_aggregation_sink_operator.h"
-#include "pipeline/exec/partitioned_aggregation_source_operator.h"
-#include "pipeline/exec/partitioned_hash_join_probe_operator.h"
-#include "pipeline/exec/partitioned_hash_join_sink_operator.h"
-#include "pipeline/exec/repeat_operator.h"
-#include "pipeline/exec/result_file_sink_operator.h"
-#include "pipeline/exec/result_sink_operator.h"
-#include "pipeline/exec/schema_scan_operator.h"
-#include "pipeline/exec/select_operator.h"
-#include "pipeline/exec/set_probe_sink_operator.h"
-#include "pipeline/exec/set_sink_operator.h"
-#include "pipeline/exec/set_source_operator.h"
-#include "pipeline/exec/sort_sink_operator.h"
-#include "pipeline/exec/sort_source_operator.h"
-#include "pipeline/exec/spill_sort_sink_operator.h"
-#include "pipeline/exec/spill_sort_source_operator.h"
-#include "pipeline/exec/streaming_aggregation_operator.h"
-#include "pipeline/exec/table_function_operator.h"
-#include "pipeline/exec/union_sink_operator.h"
-#include "pipeline/exec/union_source_operator.h"
-#include "pipeline/pipeline_x/dependency.h"
-#include "pipeline/pipeline_x/local_exchange/local_exchange_sink_operator.h"
-#include "pipeline/pipeline_x/local_exchange/local_exchange_source_operator.h"
-#include "util/debug_util.h"
-#include "util/runtime_profile.h"
-
-namespace doris::pipeline {
-
-template <typename SharedStateArg>
-std::string PipelineXLocalState<SharedStateArg>::debug_string(int indentation_level) const {
-    fmt::memory_buffer debug_string_buffer;
-    fmt::format_to(debug_string_buffer, "{}", _parent->debug_string(indentation_level));
-    return fmt::to_string(debug_string_buffer);
-}
-
-template <typename SharedStateArg>
-std::string PipelineXSinkLocalState<SharedStateArg>::debug_string(int indentation_level) const {
-    fmt::memory_buffer debug_string_buffer;
-    fmt::format_to(debug_string_buffer, "{}", _parent->debug_string(indentation_level));
-    return fmt::to_string(debug_string_buffer);
-}
-
-std::string OperatorXBase::debug_string(int indentation_level) const {
-    fmt::memory_buffer debug_string_buffer;
-    fmt::format_to(debug_string_buffer, "{}{}: id={}, parallel_tasks={}",
-                   std::string(indentation_level * 2, ' '), _op_name, node_id(), _parallel_tasks);
-    return fmt::to_string(debug_string_buffer);
-}
-
-std::string OperatorXBase::debug_string(RuntimeState* state, int indentation_level) const {
-    return state->get_local_state(operator_id())->debug_string(indentation_level);
-}
-
-Status OperatorXBase::init(const TPlanNode& tnode, RuntimeState* /*state*/) {
-    std::string node_name = print_plan_node_type(tnode.node_type);
-    if (!tnode.intermediate_output_tuple_id_list.empty()) {
-        if (!tnode.__isset.output_tuple_id) {
-            return Status::InternalError("no final output tuple id");
-        }
-        if (tnode.intermediate_output_tuple_id_list.size() !=
-            tnode.intermediate_projections_list.size()) {
-            return Status::InternalError(
-                    "intermediate_output_tuple_id_list size:{} not match "
-                    "intermediate_projections_list size:{}",
-                    tnode.intermediate_output_tuple_id_list.size(),
-                    tnode.intermediate_projections_list.size());
-        }
-    }
-    auto substr = node_name.substr(0, node_name.find("_NODE"));
-    _op_name = substr + "_OPERATOR";
-
-    if (tnode.__isset.vconjunct) {
-        vectorized::VExprContextSPtr context;
-        RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(tnode.vconjunct, context));
-        _conjuncts.emplace_back(context);
-    } else if (tnode.__isset.conjuncts) {
-        for (auto& conjunct : tnode.conjuncts) {
-            vectorized::VExprContextSPtr context;
-            RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(conjunct, context));
-            _conjuncts.emplace_back(context);
-        }
-    }
-
-    // create the projections expr
-
-    if (tnode.__isset.projections) {
-        DCHECK(tnode.__isset.output_tuple_id);
-        RETURN_IF_ERROR(vectorized::VExpr::create_expr_trees(tnode.projections, _projections));
-    }
-    if (!tnode.intermediate_projections_list.empty()) {
-        DCHECK(tnode.__isset.projections) << "no final projections";
-        _intermediate_projections.reserve(tnode.intermediate_projections_list.size());
-        for (const auto& tnode_projections : tnode.intermediate_projections_list) {
-            vectorized::VExprContextSPtrs projections;
-            RETURN_IF_ERROR(vectorized::VExpr::create_expr_trees(tnode_projections, projections));
-            _intermediate_projections.push_back(projections);
-        }
-    }
-    return Status::OK();
-}
-
-Status OperatorXBase::prepare(RuntimeState* state) {
-    for (auto& conjunct : _conjuncts) {
-        RETURN_IF_ERROR(conjunct->prepare(state, intermediate_row_desc()));
-    }
-    for (int i = 0; i < _intermediate_projections.size(); i++) {
-        RETURN_IF_ERROR(vectorized::VExpr::prepare(_intermediate_projections[i], state,
-                                                   intermediate_row_desc(i)));
-    }
-    RETURN_IF_ERROR(vectorized::VExpr::prepare(_projections, state, projections_row_desc()));
-
-    if (has_output_row_desc()) {
-        RETURN_IF_ERROR(
-                vectorized::VExpr::check_expr_output_type(_projections, *_output_row_descriptor));
-    }
-
-    if (_child_x && !is_source()) {
-        RETURN_IF_ERROR(_child_x->prepare(state));
-    }
-
-    return Status::OK();
-}
-
-Status OperatorXBase::open(RuntimeState* state) {
-    for (auto& conjunct : _conjuncts) {
-        RETURN_IF_ERROR(conjunct->open(state));
-    }
-    RETURN_IF_ERROR(vectorized::VExpr::open(_projections, state));
-    for (auto& projections : _intermediate_projections) {
-        RETURN_IF_ERROR(vectorized::VExpr::open(projections, state));
-    }
-    if (_child_x && !is_source()) {
-        RETURN_IF_ERROR(_child_x->open(state));
-    }
-    return Status::OK();
-}
-
-Status OperatorXBase::close(RuntimeState* state) {
-    if (_child_x && !is_source()) {
-        RETURN_IF_ERROR(_child_x->close(state));
-    }
-    auto result = state->get_local_state_result(operator_id());
-    if (!result) {
-        return result.error();
-    }
-    return result.value()->close(state);
-}
-
-void PipelineXLocalStateBase::clear_origin_block() {
-    _origin_block.clear_column_data(_parent->intermediate_row_desc().num_materialized_slots());
-}
-
-Status OperatorXBase::do_projections(RuntimeState* state, vectorized::Block* origin_block,
-                                     vectorized::Block* output_block) const {
-    auto* local_state = state->get_local_state(operator_id());
-    SCOPED_TIMER(local_state->exec_time_counter());
-    SCOPED_TIMER(local_state->_projection_timer);
-    const size_t rows = origin_block->rows();
-    if (rows == 0) {
-        return Status::OK();
-    }
-    vectorized::Block input_block = *origin_block;
-
-    std::vector<int> result_column_ids;
-    for (const auto& projections : _intermediate_projections) {
-        result_column_ids.resize(projections.size());
-        for (int i = 0; i < projections.size(); i++) {
-            RETURN_IF_ERROR(projections[i]->execute(&input_block, &result_column_ids[i]));
-        }
-        input_block.shuffle_columns(result_column_ids);
-    }
-
-    DCHECK_EQ(rows, input_block.rows());
-    auto insert_column_datas = [&](auto& to, vectorized::ColumnPtr& from, size_t rows) {
-        if (to->is_nullable() && !from->is_nullable()) {
-            if (_keep_origin || !from->is_exclusive()) {
-                auto& null_column = reinterpret_cast<vectorized::ColumnNullable&>(*to);
-                null_column.get_nested_column().insert_range_from(*from, 0, rows);
-                null_column.get_null_map_column().get_data().resize_fill(rows, 0);
-            } else {
-                to = make_nullable(from, false)->assume_mutable();
-            }
-        } else {
-            if (_keep_origin || !from->is_exclusive()) {
-                to->insert_range_from(*from, 0, rows);
-            } else {
-                to = from->assume_mutable();
-            }
-        }
-    };
-
-    using namespace vectorized;
-    vectorized::MutableBlock mutable_block =
-            vectorized::VectorizedUtils::build_mutable_mem_reuse_block(output_block,
-                                                                       *_output_row_descriptor);
-    if (rows != 0) {
-        auto& mutable_columns = mutable_block.mutable_columns();
-        DCHECK(mutable_columns.size() == local_state->_projections.size());
-        for (int i = 0; i < mutable_columns.size(); ++i) {
-            auto result_column_id = -1;
-            RETURN_IF_ERROR(local_state->_projections[i]->execute(&input_block, &result_column_id));
-            auto column_ptr = input_block.get_by_position(result_column_id)
-                                      .column->convert_to_full_column_if_const();
-            insert_column_datas(mutable_columns[i], column_ptr, rows);
-        }
-        DCHECK(mutable_block.rows() == rows);
-        output_block->set_columns(std::move(mutable_columns));
-    }
-
-    return Status::OK();
-}
-
-Status OperatorXBase::get_block_after_projects(RuntimeState* state, vectorized::Block* block,
-                                               bool* eos) {
-    auto local_state = state->get_local_state(operator_id());
-    if (_output_row_descriptor) {
-        local_state->clear_origin_block();
-        auto status = get_block(state, &local_state->_origin_block, eos);
-        if (UNLIKELY(!status.ok())) return status;
-        return do_projections(state, &local_state->_origin_block, block);
-    }
-    local_state->_peak_memory_usage_counter->set(local_state->_mem_tracker->peak_consumption());
-    return get_block(state, block, eos);
-}
-
-bool PipelineXLocalStateBase::reached_limit() const {
-    return _parent->_limit != -1 && _num_rows_returned >= _parent->_limit;
-}
-
-void PipelineXLocalStateBase::reached_limit(vectorized::Block* block, bool* eos) {
-    if (_parent->_limit != -1 and _num_rows_returned + block->rows() >= _parent->_limit) {
-        block->set_num_rows(_parent->_limit - _num_rows_returned);
-        *eos = true;
-    }
-
-    if (auto rows = block->rows()) {
-        _num_rows_returned += rows;
-        COUNTER_UPDATE(_blocks_returned_counter, 1);
-        COUNTER_SET(_rows_returned_counter, _num_rows_returned);
-    }
-}
-
-std::string DataSinkOperatorXBase::debug_string(int indentation_level) const {
-    fmt::memory_buffer debug_string_buffer;
-
-    fmt::format_to(debug_string_buffer, "{}{}: id={}", std::string(indentation_level * 2, ' '),
-                   _name, node_id());
-    return fmt::to_string(debug_string_buffer);
-}
-
-std::string DataSinkOperatorXBase::debug_string(RuntimeState* state, int indentation_level) const {
-    return state->get_sink_local_state()->debug_string(indentation_level);
-}
-
-Status DataSinkOperatorXBase::init(const TDataSink& tsink) {
-    std::string op_name = "UNKNOWN_SINK";
-    std::map<int, const char*>::const_iterator it = _TDataSinkType_VALUES_TO_NAMES.find(tsink.type);
-
-    if (it != _TDataSinkType_VALUES_TO_NAMES.end()) {
-        op_name = it->second;
-    }
-    _name = op_name + "_OPERATOR";
-    return Status::OK();
-}
-
-Status DataSinkOperatorXBase::init(const TPlanNode& tnode, RuntimeState* state) {
-    std::string op_name = print_plan_node_type(tnode.node_type);
-
-    auto substr = op_name.substr(0, op_name.find("_NODE"));
-
-    _name = substr + "_SINK_OPERATOR";
-    return Status::OK();
-}
-
-template <typename LocalStateType>
-Status DataSinkOperatorX<LocalStateType>::setup_local_state(RuntimeState* state,
-                                                            LocalSinkStateInfo& info) {
-    auto local_state = LocalStateType::create_unique(this, state);
-    RETURN_IF_ERROR(local_state->init(state, info));
-    state->emplace_sink_local_state(operator_id(), std::move(local_state));
-    return Status::OK();
-}
-
-template <typename LocalStateType>
-std::shared_ptr<BasicSharedState> DataSinkOperatorX<LocalStateType>::create_shared_state() const {
-    if constexpr (std::is_same_v<typename LocalStateType::SharedStateType,
-                                 LocalExchangeSharedState>) {
-        return nullptr;
-    } else if constexpr (std::is_same_v<typename LocalStateType::SharedStateType,
-                                        MultiCastSharedState>) {
-        LOG(FATAL) << "should not reach here!";
-        return nullptr;
-    } else {
-        std::shared_ptr<BasicSharedState> ss = nullptr;
-        ss = LocalStateType::SharedStateType::create_shared();
-        ss->id = operator_id();
-        for (auto& dest : dests_id()) {
-            ss->related_op_ids.insert(dest);
-        }
-        return ss;
-    }
-}
-
-template <typename LocalStateType>
-Status OperatorX<LocalStateType>::setup_local_state(RuntimeState* state, LocalStateInfo& info) {
-    auto local_state = LocalStateType::create_unique(state, this);
-    RETURN_IF_ERROR(local_state->init(state, info));
-    state->emplace_local_state(operator_id(), std::move(local_state));
-    return Status::OK();
-}
-
-PipelineXSinkLocalStateBase::PipelineXSinkLocalStateBase(DataSinkOperatorXBase* parent,
-                                                         RuntimeState* state)
-        : _parent(parent), _state(state) {
-    _query_statistics = std::make_shared<QueryStatistics>();
-}
-
-PipelineXLocalStateBase::PipelineXLocalStateBase(RuntimeState* state, OperatorXBase* parent)
-        : _num_rows_returned(0),
-          _rows_returned_counter(nullptr),
-          _peak_memory_usage_counter(nullptr),
-          _parent(parent),
-          _state(state) {
-    _query_statistics = std::make_shared<QueryStatistics>();
-}
-
-template <typename SharedStateArg>
-Status PipelineXLocalState<SharedStateArg>::init(RuntimeState* state, LocalStateInfo& info) {
-    _runtime_profile.reset(new RuntimeProfile(_parent->get_name() + name_suffix()));
-    _runtime_profile->set_metadata(_parent->node_id());
-    _runtime_profile->set_is_sink(false);
-    info.parent_profile->add_child(_runtime_profile.get(), true, nullptr);
-    constexpr auto is_fake_shared = std::is_same_v<SharedStateArg, FakeSharedState>;
-    if constexpr (!is_fake_shared) {
-        if constexpr (std::is_same_v<LocalExchangeSharedState, SharedStateArg>) {
-            _shared_state = info.le_state_map[_parent->operator_id()].first.get();
-
-            _dependency = _shared_state->get_dep_by_channel_id(info.task_idx);
-            _wait_for_dependency_timer = ADD_TIMER_WITH_LEVEL(
-                    _runtime_profile, "WaitForDependency[" + _dependency->name() + "]Time", 1);
-        } else if (info.shared_state) {
-            // For UnionSourceOperator without children, there is no shared state.
-            _shared_state = info.shared_state->template cast<SharedStateArg>();
-
-            _dependency = _shared_state->create_source_dependency(
-                    _parent->operator_id(), _parent->node_id(), _parent->get_name(),
-                    state->get_query_ctx());
-            _wait_for_dependency_timer = ADD_TIMER_WITH_LEVEL(
-                    _runtime_profile, "WaitForDependency[" + _dependency->name() + "]Time", 1);
-        }
-    }
-
-    _rows_returned_counter =
-            ADD_COUNTER_WITH_LEVEL(_runtime_profile, "RowsProduced", TUnit::UNIT, 1);
-    _blocks_returned_counter =
-            ADD_COUNTER_WITH_LEVEL(_runtime_profile, "BlocksProduced", TUnit::UNIT, 1);
-    _projection_timer = ADD_TIMER_WITH_LEVEL(_runtime_profile, "ProjectionTime", 1);
-    _init_timer = ADD_TIMER_WITH_LEVEL(_runtime_profile, "InitTime", 1);
-    _open_timer = ADD_TIMER_WITH_LEVEL(_runtime_profile, "OpenTime", 1);
-    _close_timer = ADD_TIMER_WITH_LEVEL(_runtime_profile, "CloseTime", 1);
-    _exec_timer = ADD_TIMER_WITH_LEVEL(_runtime_profile, "ExecTime", 1);
-    _mem_tracker = std::make_unique<MemTracker>("PipelineXLocalState:" + _runtime_profile->name());
-    _memory_used_counter = ADD_LABEL_COUNTER_WITH_LEVEL(_runtime_profile, "MemoryUsage", 1);
-    _peak_memory_usage_counter = _runtime_profile->AddHighWaterMarkCounter(
-            "PeakMemoryUsage", TUnit::BYTES, "MemoryUsage", 1);
-    return Status::OK();
-}
-
-template <typename SharedStateArg>
-Status PipelineXLocalState<SharedStateArg>::open(RuntimeState* state) {
-    _conjuncts.resize(_parent->_conjuncts.size());
-    _projections.resize(_parent->_projections.size());
-    for (size_t i = 0; i < _conjuncts.size(); i++) {
-        RETURN_IF_ERROR(_parent->_conjuncts[i]->clone(state, _conjuncts[i]));
-    }
-    for (size_t i = 0; i < _projections.size(); i++) {
-        RETURN_IF_ERROR(_parent->_projections[i]->clone(state, _projections[i]));
-    }
-    _intermediate_projections.resize(_parent->_intermediate_projections.size());
-    for (int i = 0; i < _parent->_intermediate_projections.size(); i++) {
-        _intermediate_projections[i].resize(_parent->_intermediate_projections[i].size());
-        for (int j = 0; j < _parent->_intermediate_projections[i].size(); j++) {
-            RETURN_IF_ERROR(_parent->_intermediate_projections[i][j]->clone(
-                    state, _intermediate_projections[i][j]));
-        }
-    }
-    return Status::OK();
-}
-
-template <typename SharedStateArg>
-Status PipelineXLocalState<SharedStateArg>::close(RuntimeState* state) {
-    if (_closed) {
-        return Status::OK();
-    }
-    if constexpr (!std::is_same_v<SharedStateArg, FakeSharedState>) {
-        COUNTER_SET(_wait_for_dependency_timer, _dependency->watcher_elapse_time());
-    }
-    if (_rows_returned_counter != nullptr) {
-        COUNTER_SET(_rows_returned_counter, _num_rows_returned);
-    }
-    if (_peak_memory_usage_counter) {
-        _peak_memory_usage_counter->set(_mem_tracker->peak_consumption());
-    }
-    _closed = true;
-    return Status::OK();
-}
-
-template <typename SharedState>
-Status PipelineXSinkLocalState<SharedState>::init(RuntimeState* state, LocalSinkStateInfo& info) {
-    // create profile
-    _profile = state->obj_pool()->add(new RuntimeProfile(_parent->get_name() + name_suffix()));
-    _profile->set_metadata(_parent->node_id());
-    _profile->set_is_sink(true);
-    _wait_for_finish_dependency_timer = ADD_TIMER(_profile, "PendingFinishDependency");
-    constexpr auto is_fake_shared = std::is_same_v<SharedState, FakeSharedState>;
-    if constexpr (!is_fake_shared) {
-        if constexpr (std::is_same_v<LocalExchangeSharedState, SharedState>) {
-            _dependency = info.le_state_map[_parent->dests_id().front()].second.get();
-            _shared_state = (SharedState*)_dependency->shared_state();
-        } else {
-            _shared_state = info.shared_state->template cast<SharedState>();
-            _dependency = _shared_state->create_sink_dependency(
-                    _parent->dests_id().front(), _parent->node_id(), _parent->get_name(),
-                    state->get_query_ctx());
-        }
-        _wait_for_dependency_timer = ADD_TIMER_WITH_LEVEL(
-                _profile, "WaitForDependency[" + _dependency->name() + "]Time", 1);
-    } else {
-        _dependency = nullptr;
-    }
-    _rows_input_counter = ADD_COUNTER_WITH_LEVEL(_profile, "InputRows", TUnit::UNIT, 1);
-    _init_timer = ADD_TIMER_WITH_LEVEL(_profile, "InitTime", 1);
-    _open_timer = ADD_TIMER_WITH_LEVEL(_profile, "OpenTime", 1);
-    _close_timer = ADD_TIMER_WITH_LEVEL(_profile, "CloseTime", 1);
-    _exec_timer = ADD_TIMER_WITH_LEVEL(_profile, "ExecTime", 1);
-    info.parent_profile->add_child(_profile, true, nullptr);
-    _mem_tracker = std::make_unique<MemTracker>(_parent->get_name());
-    _memory_used_counter = ADD_LABEL_COUNTER_WITH_LEVEL(_profile, "MemoryUsage", 1);
-    _peak_memory_usage_counter =
-            _profile->AddHighWaterMarkCounter("PeakMemoryUsage", TUnit::BYTES, "MemoryUsage", 1);
-    return Status::OK();
-}
-
-template <typename SharedState>
-Status PipelineXSinkLocalState<SharedState>::close(RuntimeState* state, Status exec_status) {
-    if (_closed) {
-        return Status::OK();
-    }
-    if constexpr (!std::is_same_v<SharedState, FakeSharedState>) {
-        COUNTER_SET(_wait_for_dependency_timer, _dependency->watcher_elapse_time());
-    }
-    if (_peak_memory_usage_counter) {
-        _peak_memory_usage_counter->set(_mem_tracker->peak_consumption());
-    }
-    _closed = true;
-    return Status::OK();
-}
-
-template <typename LocalStateType>
-Status StreamingOperatorX<LocalStateType>::get_block(RuntimeState* state, vectorized::Block* block,
-                                                     bool* eos) {
-    RETURN_IF_ERROR(
-            OperatorX<LocalStateType>::_child_x->get_block_after_projects(state, block, eos));
-    return pull(state, block, eos);
-}
-
-template <typename LocalStateType>
-Status StatefulOperatorX<LocalStateType>::get_block(RuntimeState* state, vectorized::Block* block,
-                                                    bool* eos) {
-    auto& local_state = get_local_state(state);
-    if (need_more_input_data(state)) {
-        local_state._child_block->clear_column_data(
-                OperatorX<LocalStateType>::_child_x->row_desc().num_materialized_slots());
-        RETURN_IF_ERROR(OperatorX<LocalStateType>::_child_x->get_block_after_projects(
-                state, local_state._child_block.get(), &local_state._child_eos));
-        *eos = local_state._child_eos;
-        if (local_state._child_block->rows() == 0 && !local_state._child_eos) {
-            return Status::OK();
-        }
-        {
-            SCOPED_TIMER(local_state.exec_time_counter());
-            RETURN_IF_ERROR(push(state, local_state._child_block.get(), local_state._child_eos));
-        }
-    }
-
-    if (!need_more_input_data(state)) {
-        SCOPED_TIMER(local_state.exec_time_counter());
-        bool new_eos = false;
-        RETURN_IF_ERROR(pull(state, block, &new_eos));
-        if (new_eos) {
-            *eos = true;
-        } else if (!need_more_input_data(state)) {
-            *eos = false;
-        }
-    }
-    return Status::OK();
-}
-
-template <typename Writer, typename Parent>
-    requires(std::is_base_of_v<vectorized::AsyncResultWriter, Writer>)
-Status AsyncWriterSink<Writer, Parent>::init(RuntimeState* state, LocalSinkStateInfo& info) {
-    RETURN_IF_ERROR(Base::init(state, info));
-    _writer.reset(new Writer(info.tsink, _output_vexpr_ctxs));
-    _async_writer_dependency = AsyncWriterDependency::create_shared(
-            _parent->operator_id(), _parent->node_id(), state->get_query_ctx());
-    _writer->set_dependency(_async_writer_dependency.get(), _finish_dependency.get());
-
-    _wait_for_dependency_timer = ADD_TIMER_WITH_LEVEL(
-            _profile, "WaitForDependency[" + _async_writer_dependency->name() + "]Time", 1);
-    return Status::OK();
-}
-
-template <typename Writer, typename Parent>
-    requires(std::is_base_of_v<vectorized::AsyncResultWriter, Writer>)
-Status AsyncWriterSink<Writer, Parent>::open(RuntimeState* state) {
-    RETURN_IF_ERROR(Base::open(state));
-    _output_vexpr_ctxs.resize(_parent->cast<Parent>()._output_vexpr_ctxs.size());
-    for (size_t i = 0; i < _output_vexpr_ctxs.size(); i++) {
-        RETURN_IF_ERROR(
-                _parent->cast<Parent>()._output_vexpr_ctxs[i]->clone(state, _output_vexpr_ctxs[i]));
-    }
-    RETURN_IF_ERROR(_writer->start_writer(state, _profile));
-    return Status::OK();
-}
-
-template <typename Writer, typename Parent>
-    requires(std::is_base_of_v<vectorized::AsyncResultWriter, Writer>)
-Status AsyncWriterSink<Writer, Parent>::sink(RuntimeState* state, vectorized::Block* block,
-                                             bool eos) {
-    return _writer->sink(block, eos);
-}
-
-template <typename Writer, typename Parent>
-    requires(std::is_base_of_v<vectorized::AsyncResultWriter, Writer>)
-Status AsyncWriterSink<Writer, Parent>::close(RuntimeState* state, Status exec_status) {
-    if (_closed) {
-        return Status::OK();
-    }
-    COUNTER_SET(_wait_for_dependency_timer, _async_writer_dependency->watcher_elapse_time());
-    COUNTER_SET(_wait_for_finish_dependency_timer, _finish_dependency->watcher_elapse_time());
-    // if the init failed, the _writer may be nullptr. so here need check
-    if (_writer) {
-        Status st = _writer->get_writer_status();
-        if (exec_status.ok()) {
-            _writer->force_close(state->is_cancelled() ? Status::Cancelled("Cancelled")
-                                                       : Status::Cancelled("force close"));
-        } else {
-            _writer->force_close(exec_status);
-        }
-        // If there is an error in process_block thread, then we should get the writer
-        // status before call force_close. For example, the thread may failed in commit
-        // transaction.
-        RETURN_IF_ERROR(st);
-    }
-    return Base::close(state, exec_status);
-}
-
-#define DECLARE_OPERATOR_X(LOCAL_STATE) template class DataSinkOperatorX<LOCAL_STATE>;
-DECLARE_OPERATOR_X(HashJoinBuildSinkLocalState)
-DECLARE_OPERATOR_X(ResultSinkLocalState)
-DECLARE_OPERATOR_X(JdbcTableSinkLocalState)
-DECLARE_OPERATOR_X(ResultFileSinkLocalState)
-DECLARE_OPERATOR_X(OlapTableSinkLocalState)
-DECLARE_OPERATOR_X(OlapTableSinkV2LocalState)
-DECLARE_OPERATOR_X(HiveTableSinkLocalState)
-DECLARE_OPERATOR_X(AnalyticSinkLocalState)
-DECLARE_OPERATOR_X(SortSinkLocalState)
-DECLARE_OPERATOR_X(SpillSortSinkLocalState)
-DECLARE_OPERATOR_X(LocalExchangeSinkLocalState)
-DECLARE_OPERATOR_X(AggSinkLocalState)
-DECLARE_OPERATOR_X(PartitionedAggSinkLocalState)
-DECLARE_OPERATOR_X(ExchangeSinkLocalState)
-DECLARE_OPERATOR_X(NestedLoopJoinBuildSinkLocalState)
-DECLARE_OPERATOR_X(UnionSinkLocalState)
-DECLARE_OPERATOR_X(MultiCastDataStreamSinkLocalState)
-DECLARE_OPERATOR_X(PartitionSortSinkLocalState)
-DECLARE_OPERATOR_X(SetProbeSinkLocalState<true>)
-DECLARE_OPERATOR_X(SetProbeSinkLocalState<false>)
-DECLARE_OPERATOR_X(SetSinkLocalState<true>)
-DECLARE_OPERATOR_X(SetSinkLocalState<false>)
-DECLARE_OPERATOR_X(PartitionedHashJoinSinkLocalState)
-DECLARE_OPERATOR_X(GroupCommitBlockSinkLocalState)
-
-#undef DECLARE_OPERATOR_X
-
-#define DECLARE_OPERATOR_X(LOCAL_STATE) template class OperatorX<LOCAL_STATE>;
-DECLARE_OPERATOR_X(HashJoinProbeLocalState)
-DECLARE_OPERATOR_X(OlapScanLocalState)
-DECLARE_OPERATOR_X(JDBCScanLocalState)
-DECLARE_OPERATOR_X(FileScanLocalState)
-DECLARE_OPERATOR_X(EsScanLocalState)
-DECLARE_OPERATOR_X(AnalyticLocalState)
-DECLARE_OPERATOR_X(SortLocalState)
-DECLARE_OPERATOR_X(SpillSortLocalState)
-DECLARE_OPERATOR_X(AggLocalState)
-DECLARE_OPERATOR_X(PartitionedAggLocalState)
-DECLARE_OPERATOR_X(TableFunctionLocalState)
-DECLARE_OPERATOR_X(ExchangeLocalState)
-DECLARE_OPERATOR_X(RepeatLocalState)
-DECLARE_OPERATOR_X(NestedLoopJoinProbeLocalState)
-DECLARE_OPERATOR_X(AssertNumRowsLocalState)
-DECLARE_OPERATOR_X(EmptySetLocalState)
-DECLARE_OPERATOR_X(UnionSourceLocalState)
-DECLARE_OPERATOR_X(MultiCastDataStreamSourceLocalState)
-DECLARE_OPERATOR_X(PartitionSortSourceLocalState)
-DECLARE_OPERATOR_X(SetSourceLocalState<true>)
-DECLARE_OPERATOR_X(SetSourceLocalState<false>)
-DECLARE_OPERATOR_X(DataGenLocalState)
-DECLARE_OPERATOR_X(SchemaScanLocalState)
-DECLARE_OPERATOR_X(MetaScanLocalState)
-DECLARE_OPERATOR_X(LocalExchangeSourceLocalState)
-DECLARE_OPERATOR_X(PartitionedHashJoinProbeLocalState)
-
-#undef DECLARE_OPERATOR_X
-
-template class StreamingOperatorX<AssertNumRowsLocalState>;
-template class StreamingOperatorX<SelectLocalState>;
-
-template class StatefulOperatorX<HashJoinProbeLocalState>;
-template class StatefulOperatorX<PartitionedHashJoinProbeLocalState>;
-template class StatefulOperatorX<RepeatLocalState>;
-template class StatefulOperatorX<StreamingAggLocalState>;
-template class StatefulOperatorX<DistinctStreamingAggLocalState>;
-template class StatefulOperatorX<NestedLoopJoinProbeLocalState>;
-template class StatefulOperatorX<TableFunctionLocalState>;
-
-template class PipelineXSinkLocalState<HashJoinSharedState>;
-template class PipelineXSinkLocalState<PartitionedHashJoinSharedState>;
-template class PipelineXSinkLocalState<SortSharedState>;
-template class PipelineXSinkLocalState<SpillSortSharedState>;
-template class PipelineXSinkLocalState<NestedLoopJoinSharedState>;
-template class PipelineXSinkLocalState<AnalyticSharedState>;
-template class PipelineXSinkLocalState<AggSharedState>;
-template class PipelineXSinkLocalState<PartitionedAggSharedState>;
-template class PipelineXSinkLocalState<FakeSharedState>;
-template class PipelineXSinkLocalState<UnionSharedState>;
-template class PipelineXSinkLocalState<PartitionSortNodeSharedState>;
-template class PipelineXSinkLocalState<MultiCastSharedState>;
-template class PipelineXSinkLocalState<SetSharedState>;
-template class PipelineXSinkLocalState<LocalExchangeSharedState>;
-template class PipelineXSinkLocalState<BasicSharedState>;
-
-template class PipelineXLocalState<HashJoinSharedState>;
-template class PipelineXLocalState<PartitionedHashJoinSharedState>;
-template class PipelineXLocalState<SortSharedState>;
-template class PipelineXLocalState<SpillSortSharedState>;
-template class PipelineXLocalState<NestedLoopJoinSharedState>;
-template class PipelineXLocalState<AnalyticSharedState>;
-template class PipelineXLocalState<AggSharedState>;
-template class PipelineXLocalState<PartitionedAggSharedState>;
-template class PipelineXLocalState<FakeSharedState>;
-template class PipelineXLocalState<UnionSharedState>;
-template class PipelineXLocalState<MultiCastSharedState>;
-template class PipelineXLocalState<PartitionSortNodeSharedState>;
-template class PipelineXLocalState<SetSharedState>;
-template class PipelineXLocalState<LocalExchangeSharedState>;
-template class PipelineXLocalState<BasicSharedState>;
-
-template class AsyncWriterSink<doris::vectorized::VFileResultWriter, ResultFileSinkOperatorX>;
-template class AsyncWriterSink<doris::vectorized::VJdbcTableWriter, JdbcTableSinkOperatorX>;
-template class AsyncWriterSink<doris::vectorized::VTabletWriter, OlapTableSinkOperatorX>;
-template class AsyncWriterSink<doris::vectorized::VTabletWriterV2, OlapTableSinkV2OperatorX>;
-template class AsyncWriterSink<doris::vectorized::VHiveTableWriter, HiveTableSinkOperatorX>;
-
-} // namespace doris::pipeline
diff --git a/be/src/pipeline/pipeline_x/operator.h b/be/src/pipeline/pipeline_x/operator.h
deleted file mode 100644
index efa35b2c2fe548..00000000000000
--- a/be/src/pipeline/pipeline_x/operator.h
+++ /dev/null
@@ -1,891 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include "common/logging.h"
-#include "pipeline/exec/operator.h"
-#include "pipeline/pipeline_x/dependency.h"
-#include "pipeline/pipeline_x/local_exchange/local_exchanger.h"
-
-namespace doris::vectorized {
-class AsyncResultWriter;
-}
-namespace doris::pipeline {
-
-// This struct is used only for initializing local state.
-struct LocalStateInfo {
-    RuntimeProfile* parent_profile = nullptr;
-    const std::vector<TScanRangeParams> scan_ranges;
-    BasicSharedState* shared_state;
-    std::map<int, std::pair<std::shared_ptr<LocalExchangeSharedState>, std::shared_ptr<Dependency>>>
-            le_state_map;
-    const int task_idx;
-};
-
-// This struct is used only for initializing local sink state.
-struct LocalSinkStateInfo {
-    const int task_idx;
-    RuntimeProfile* parent_profile = nullptr;
-    const int sender_id;
-    BasicSharedState* shared_state;
-    std::map<int, std::pair<std::shared_ptr<LocalExchangeSharedState>, std::shared_ptr<Dependency>>>
-            le_state_map;
-    const TDataSink& tsink;
-};
-
-class PipelineXLocalStateBase {
-public:
-    PipelineXLocalStateBase(RuntimeState* state, OperatorXBase* parent);
-    virtual ~PipelineXLocalStateBase() = default;
-
-    template <class TARGET>
-    TARGET& cast() {
-        DCHECK(dynamic_cast<TARGET*>(this))
-                << " Mismatch type! Current type is " << typeid(*this).name()
-                << " and expect type is" << typeid(TARGET).name();
-        return reinterpret_cast<TARGET&>(*this);
-    }
-    template <class TARGET>
-    const TARGET& cast() const {
-        DCHECK(dynamic_cast<TARGET*>(this))
-                << " Mismatch type! Current type is " << typeid(*this).name()
-                << " and expect type is" << typeid(TARGET).name();
-        return reinterpret_cast<const TARGET&>(*this);
-    }
-
-    // Do initialization. This step should be executed only once and in bthread, so we can do some
-    // lightweight or non-idempotent operations (e.g. init profile, clone expr ctx from operatorX)
-    virtual Status init(RuntimeState* state, LocalStateInfo& info) = 0;
-    // Do initialization. This step can be executed multiple times, so we should make sure it is
-    // idempotent (e.g. wait for runtime filters).
-    virtual Status open(RuntimeState* state) = 0;
-    virtual Status close(RuntimeState* state) = 0;
-
-    // If use projection, we should clear `_origin_block`.
-    void clear_origin_block();
-
-    [[nodiscard]] bool reached_limit() const;
-    void reached_limit(vectorized::Block* block, bool* eos);
-    RuntimeProfile* profile() { return _runtime_profile.get(); }
-
-    MemTracker* mem_tracker() { return _mem_tracker.get(); }
-    RuntimeProfile::Counter* rows_returned_counter() { return _rows_returned_counter; }
-    RuntimeProfile::Counter* blocks_returned_counter() { return _blocks_returned_counter; }
-    RuntimeProfile::Counter* exec_time_counter() { return _exec_timer; }
-    OperatorXBase* parent() { return _parent; }
-    RuntimeState* state() { return _state; }
-    vectorized::VExprContextSPtrs& conjuncts() { return _conjuncts; }
-    vectorized::VExprContextSPtrs& projections() { return _projections; }
-    [[nodiscard]] int64_t num_rows_returned() const { return _num_rows_returned; }
-    void add_num_rows_returned(int64_t delta) { _num_rows_returned += delta; }
-    void set_num_rows_returned(int64_t value) { _num_rows_returned = value; }
-
-    [[nodiscard]] virtual std::string debug_string(int indentation_level = 0) const = 0;
-
-    virtual std::vector<Dependency*> dependencies() const { return {nullptr}; }
-
-    // override in Scan
-    virtual Dependency* finishdependency() { return nullptr; }
-    //  override in Scan  MultiCastSink
-    virtual std::vector<Dependency*> filter_dependencies() { return {}; }
-
-    std::shared_ptr<QueryStatistics> get_query_statistics_ptr() { return _query_statistics; }
-
-protected:
-    friend class OperatorXBase;
-
-    ObjectPool* _pool = nullptr;
-    int64_t _num_rows_returned {0};
-
-    std::unique_ptr<RuntimeProfile> _runtime_profile;
-
-    // Record this node memory size. it is expected that artificial guarantees are accurate,
-    // which will providea reference for operator memory.
-    std::unique_ptr<MemTracker> _mem_tracker;
-
-    std::shared_ptr<QueryStatistics> _query_statistics = nullptr;
-
-    RuntimeProfile::Counter* _rows_returned_counter = nullptr;
-    RuntimeProfile::Counter* _blocks_returned_counter = nullptr;
-    RuntimeProfile::Counter* _wait_for_dependency_timer = nullptr;
-    RuntimeProfile::Counter* _memory_used_counter = nullptr;
-    RuntimeProfile::Counter* _projection_timer = nullptr;
-    RuntimeProfile::Counter* _exec_timer = nullptr;
-    // Account for peak memory used by this node
-    RuntimeProfile::Counter* _peak_memory_usage_counter = nullptr;
-    RuntimeProfile::Counter* _init_timer = nullptr;
-    RuntimeProfile::Counter* _open_timer = nullptr;
-    RuntimeProfile::Counter* _close_timer = nullptr;
-
-    OperatorXBase* _parent = nullptr;
-    RuntimeState* _state = nullptr;
-    vectorized::VExprContextSPtrs _conjuncts;
-    vectorized::VExprContextSPtrs _projections;
-    // Used in common subexpression elimination to compute intermediate results.
-    std::vector<vectorized::VExprContextSPtrs> _intermediate_projections;
-
-    bool _closed = false;
-    vectorized::Block _origin_block;
-};
-
-class OperatorXBase : public OperatorBase {
-public:
-    OperatorXBase(ObjectPool* pool, const TPlanNode& tnode, const int operator_id,
-                  const DescriptorTbl& descs)
-            : OperatorBase(nullptr),
-              _operator_id(operator_id),
-              _node_id(tnode.node_id),
-              _type(tnode.node_type),
-              _pool(pool),
-              _tuple_ids(tnode.row_tuples),
-              _row_descriptor(descs, tnode.row_tuples, tnode.nullable_tuples),
-              _resource_profile(tnode.resource_profile),
-              _limit(tnode.limit) {
-        if (tnode.__isset.output_tuple_id) {
-            _output_row_descriptor.reset(new RowDescriptor(descs, {tnode.output_tuple_id}, {true}));
-        }
-        if (tnode.__isset.output_tuple_id) {
-            _output_row_descriptor = std::make_unique<RowDescriptor>(
-                    descs, std::vector {tnode.output_tuple_id}, std::vector {true});
-        }
-        if (!tnode.intermediate_output_tuple_id_list.empty()) {
-            // common subexpression elimination
-            _intermediate_output_row_descriptor.reserve(
-                    tnode.intermediate_output_tuple_id_list.size());
-            for (auto output_tuple_id : tnode.intermediate_output_tuple_id_list) {
-                _intermediate_output_row_descriptor.push_back(
-                        RowDescriptor(descs, std::vector {output_tuple_id}, std::vector {true}));
-            }
-        }
-    }
-
-    OperatorXBase(ObjectPool* pool, int node_id, int operator_id)
-            : OperatorBase(nullptr),
-              _operator_id(operator_id),
-              _node_id(node_id),
-              _pool(pool),
-              _limit(-1) {}
-    virtual Status init(const TPlanNode& tnode, RuntimeState* state);
-    Status init(const TDataSink& tsink) override {
-        LOG(FATAL) << "should not reach here!";
-        return Status::OK();
-    }
-    virtual Status init(ExchangeType type) {
-        LOG(FATAL) << "should not reach here!";
-        return Status::OK();
-    }
-    [[nodiscard]] RuntimeProfile* get_runtime_profile() const override {
-        throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
-                               "Runtime Profile is not owned by operator");
-        return nullptr;
-    }
-    [[noreturn]] virtual const std::vector<TRuntimeFilterDesc>& runtime_filter_descs() {
-        throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, _op_name);
-    }
-    [[nodiscard]] std::string get_name() const override { return _op_name; }
-    [[nodiscard]] virtual DataDistribution required_data_distribution() const {
-        return _child_x && _child_x->ignore_data_distribution() && !is_source()
-                       ? DataDistribution(ExchangeType::PASSTHROUGH)
-                       : DataDistribution(ExchangeType::NOOP);
-    }
-    [[nodiscard]] virtual bool need_data_from_children(RuntimeState* state) const {
-        return is_source() ? true : _child_x == nullptr || _child_x->need_data_from_children(state);
-    }
-    [[nodiscard]] virtual bool ignore_data_distribution() const {
-        return _child_x ? _child_x->ignore_data_distribution() : _ignore_data_distribution;
-    }
-    [[nodiscard]] bool ignore_data_hash_distribution() const {
-        return _child_x ? _child_x->ignore_data_hash_distribution() : _ignore_data_distribution;
-    }
-    void set_ignore_data_distribution() { _ignore_data_distribution = true; }
-
-    Status prepare(RuntimeState* state) override;
-
-    Status open(RuntimeState* state) override;
-
-    Status get_block(RuntimeState* state, vectorized::Block* block,
-                     SourceState& source_state) override {
-        LOG(FATAL) << "should not be called in pipelineX";
-        return Status::OK();
-    }
-
-    [[nodiscard]] virtual Status get_block(RuntimeState* state, vectorized::Block* block,
-                                           bool* eos) = 0;
-
-    [[nodiscard]] bool can_terminate_early() override { return false; }
-
-    [[nodiscard]] virtual bool can_terminate_early(RuntimeState* state) { return false; }
-
-    [[nodiscard]] virtual bool is_shuffled_hash_join() const { return false; }
-
-    bool can_read() override {
-        LOG(FATAL) << "should not reach here!";
-        return false;
-    }
-
-    bool can_write() override {
-        LOG(FATAL) << "should not reach here!";
-        return false;
-    }
-
-    [[nodiscard]] bool is_pending_finish() const override {
-        LOG(FATAL) << "should not reach here!";
-        return false;
-    }
-
-    Status sink(RuntimeState* state, vectorized::Block* block, SourceState source_state) override {
-        LOG(FATAL) << "should not reach here!";
-        return Status::OK();
-    }
-
-    bool runtime_filters_are_ready_or_timeout() override {
-        LOG(FATAL) << "should not reach here!";
-        return true;
-    }
-
-    Status close(RuntimeState* state) override;
-
-    [[nodiscard]] virtual const RowDescriptor& intermediate_row_desc() const {
-        return _row_descriptor;
-    }
-
-    //  input expr -> intermediate_projections[0] -> intermediate_projections[1] -> intermediate_projections[2]    ... ->     final projections         ->         output expr
-    //  prepare        _row_descriptor          intermediate_row_desc[0]             intermediate_row_desc[1]            intermediate_row_desc.end()          _output_row_descriptor
-
-    [[nodiscard]] const RowDescriptor& intermediate_row_desc(int idx) {
-        if (idx == 0) {
-            return intermediate_row_desc();
-        }
-        DCHECK((idx - 1) < _intermediate_output_row_descriptor.size());
-        return _intermediate_output_row_descriptor[idx - 1];
-    }
-
-    [[nodiscard]] const RowDescriptor& projections_row_desc() const {
-        if (_intermediate_output_row_descriptor.empty()) {
-            return intermediate_row_desc();
-        } else {
-            return _intermediate_output_row_descriptor.back();
-        }
-    }
-
-    [[nodiscard]] std::string debug_string() const override { return ""; }
-
-    virtual std::string debug_string(int indentation_level = 0) const;
-
-    virtual std::string debug_string(RuntimeState* state, int indentation_level = 0) const;
-
-    virtual Status setup_local_state(RuntimeState* state, LocalStateInfo& info) = 0;
-
-    template <class TARGET>
-    TARGET& cast() {
-        DCHECK(dynamic_cast<TARGET*>(this))
-                << " Mismatch type! Current type is " << typeid(*this).name()
-                << " and expect type is" << typeid(TARGET).name();
-        return reinterpret_cast<TARGET&>(*this);
-    }
-    template <class TARGET>
-    const TARGET& cast() const {
-        DCHECK(dynamic_cast<const TARGET*>(this))
-                << " Mismatch type! Current type is " << typeid(*this).name()
-                << " and expect type is" << typeid(TARGET).name();
-        return reinterpret_cast<const TARGET&>(*this);
-    }
-
-    [[nodiscard]] OperatorXPtr get_child() { return _child_x; }
-
-    [[nodiscard]] vectorized::VExprContextSPtrs& conjuncts() { return _conjuncts; }
-    [[nodiscard]] virtual RowDescriptor& row_descriptor() { return _row_descriptor; }
-
-    [[nodiscard]] int id() const override { return node_id(); }
-    [[nodiscard]] int operator_id() const { return _operator_id; }
-    [[nodiscard]] int node_id() const { return _node_id; }
-
-    [[nodiscard]] int64_t limit() const { return _limit; }
-
-    [[nodiscard]] const RowDescriptor& row_desc() const override {
-        return _output_row_descriptor ? *_output_row_descriptor : _row_descriptor;
-    }
-
-    [[nodiscard]] const RowDescriptor* output_row_descriptor() {
-        return _output_row_descriptor.get();
-    }
-
-    bool has_output_row_desc() const { return _output_row_descriptor != nullptr; }
-
-    [[nodiscard]] bool is_source() const override { return false; }
-
-    [[nodiscard]] virtual Status get_block_after_projects(RuntimeState* state,
-                                                          vectorized::Block* block, bool* eos);
-
-    /// Only use in vectorized exec engine try to do projections to trans _row_desc -> _output_row_desc
-    Status do_projections(RuntimeState* state, vectorized::Block* origin_block,
-                          vectorized::Block* output_block) const;
-    void set_parallel_tasks(int parallel_tasks) { _parallel_tasks = parallel_tasks; }
-    int parallel_tasks() const { return _parallel_tasks; }
-
-protected:
-    template <typename Dependency>
-    friend class PipelineXLocalState;
-    friend class PipelineXLocalStateBase;
-    friend class VScanner;
-    const int _operator_id;
-    const int _node_id; // unique w/in single plan tree
-    TPlanNodeType::type _type;
-    ObjectPool* _pool = nullptr;
-    std::vector<TupleId> _tuple_ids;
-
-    vectorized::VExprContextSPtrs _conjuncts;
-
-    RowDescriptor _row_descriptor;
-
-    std::unique_ptr<RowDescriptor> _output_row_descriptor = nullptr;
-    vectorized::VExprContextSPtrs _projections;
-
-    std::vector<RowDescriptor> _intermediate_output_row_descriptor;
-    // Used in common subexpression elimination to compute intermediate results.
-    std::vector<vectorized::VExprContextSPtrs> _intermediate_projections;
-
-    /// Resource information sent from the frontend.
-    const TBackendResourceProfile _resource_profile;
-
-    int64_t _limit; // -1: no limit
-
-    std::string _op_name;
-    bool _ignore_data_distribution = false;
-    int _parallel_tasks = 0;
-
-    //_keep_origin is used to avoid copying during projection,
-    // currently set to false only in the nestloop join.
-    bool _keep_origin = true;
-};
-
-template <typename LocalStateType>
-class OperatorX : public OperatorXBase {
-public:
-    OperatorX(ObjectPool* pool, const TPlanNode& tnode, const int operator_id,
-              const DescriptorTbl& descs)
-            : OperatorXBase(pool, tnode, operator_id, descs) {}
-    OperatorX(ObjectPool* pool, int node_id, int operator_id)
-            : OperatorXBase(pool, node_id, operator_id) {};
-    ~OperatorX() override = default;
-
-    Status setup_local_state(RuntimeState* state, LocalStateInfo& info) override;
-    using LocalState = LocalStateType;
-    [[nodiscard]] LocalState& get_local_state(RuntimeState* state) const {
-        return state->get_local_state(operator_id())->template cast<LocalState>();
-    }
-};
-
-template <typename SharedStateArg = FakeSharedState>
-class PipelineXLocalState : public PipelineXLocalStateBase {
-public:
-    using SharedStateType = SharedStateArg;
-    PipelineXLocalState(RuntimeState* state, OperatorXBase* parent)
-            : PipelineXLocalStateBase(state, parent) {}
-    ~PipelineXLocalState() override = default;
-
-    Status init(RuntimeState* state, LocalStateInfo& info) override;
-    Status open(RuntimeState* state) override;
-
-    virtual std::string name_suffix() const {
-        return " (id=" + std::to_string(_parent->node_id()) + ")";
-    }
-
-    Status close(RuntimeState* state) override;
-
-    [[nodiscard]] std::string debug_string(int indentation_level = 0) const override;
-
-    std::vector<Dependency*> dependencies() const override {
-        return _dependency ? std::vector<Dependency*> {_dependency} : std::vector<Dependency*> {};
-    }
-
-    void inc_running_big_mem_op_num(RuntimeState* state) {
-        if (!_big_mem_op_num_added) {
-            state->get_query_ctx()->inc_running_big_mem_op_num();
-            _big_mem_op_num_added = true;
-        }
-    }
-
-    void dec_running_big_mem_op_num(RuntimeState* state) {
-        if (_big_mem_op_num_added && !_big_mem_op_num_deced) {
-            state->get_query_ctx()->dec_running_big_mem_op_num();
-            _big_mem_op_num_deced = true;
-        }
-    }
-
-protected:
-    Dependency* _dependency = nullptr;
-    SharedStateArg* _shared_state = nullptr;
-
-private:
-    bool _big_mem_op_num_added = false;
-    bool _big_mem_op_num_deced = false;
-};
-
-template <typename SharedStateArg>
-class PipelineXSpillLocalState : public PipelineXLocalState<SharedStateArg> {
-public:
-    using Base = PipelineXLocalState<SharedStateArg>;
-    PipelineXSpillLocalState(RuntimeState* state, OperatorXBase* parent)
-            : PipelineXLocalState<SharedStateArg>(state, parent) {}
-    ~PipelineXSpillLocalState() override = default;
-
-    Status init(RuntimeState* state, LocalStateInfo& info) override {
-        RETURN_IF_ERROR(PipelineXLocalState<SharedStateArg>::init(state, info));
-        _spill_counters = ADD_LABEL_COUNTER_WITH_LEVEL(Base::profile(), "Spill", 1);
-        _spill_recover_time =
-                ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillRecoverTime", "Spill", 1);
-        _spill_read_data_time =
-                ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillReadDataTime", "Spill", 1);
-        _spill_deserialize_time =
-                ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillDeserializeTime", "Spill", 1);
-        _spill_read_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(Base::profile(), "SpillReadDataSize",
-                                                         TUnit::BYTES, "Spill", 1);
-        _spill_wait_in_queue_timer =
-                ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillWaitInQueueTime", "Spill", 1);
-        _spill_write_wait_io_timer =
-                ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillWriteWaitIOTime", "Spill", 1);
-        _spill_read_wait_io_timer =
-                ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillReadWaitIOTime", "Spill", 1);
-        return Status::OK();
-    }
-
-    RuntimeProfile::Counter* _spill_counters = nullptr;
-    RuntimeProfile::Counter* _spill_recover_time;
-    RuntimeProfile::Counter* _spill_read_data_time;
-    RuntimeProfile::Counter* _spill_deserialize_time;
-    RuntimeProfile::Counter* _spill_read_bytes;
-    RuntimeProfile::Counter* _spill_write_wait_io_timer = nullptr;
-    RuntimeProfile::Counter* _spill_read_wait_io_timer = nullptr;
-    RuntimeProfile::Counter* _spill_wait_in_queue_timer = nullptr;
-};
-
-class DataSinkOperatorXBase;
-
-class PipelineXSinkLocalStateBase {
-public:
-    PipelineXSinkLocalStateBase(DataSinkOperatorXBase* parent_, RuntimeState* state_);
-    virtual ~PipelineXSinkLocalStateBase() = default;
-
-    // Do initialization. This step should be executed only once and in bthread, so we can do some
-    // lightweight or non-idempotent operations (e.g. init profile, clone expr ctx from operatorX)
-    virtual Status init(RuntimeState* state, LocalSinkStateInfo& info) = 0;
-
-    // Do initialization. This step can be executed multiple times, so we should make sure it is
-    // idempotent (e.g. wait for runtime filters).
-    virtual Status open(RuntimeState* state) = 0;
-    virtual Status close(RuntimeState* state, Status exec_status) = 0;
-
-    [[nodiscard]] virtual std::string debug_string(int indentation_level) const = 0;
-
-    template <class TARGET>
-    TARGET& cast() {
-        DCHECK(dynamic_cast<TARGET*>(this))
-                << " Mismatch type! Current type is " << typeid(*this).name()
-                << " and expect type is" << typeid(TARGET).name();
-        return reinterpret_cast<TARGET&>(*this);
-    }
-    template <class TARGET>
-    const TARGET& cast() const {
-        DCHECK(dynamic_cast<const TARGET*>(this))
-                << " Mismatch type! Current type is " << typeid(*this).name()
-                << " and expect type is" << typeid(TARGET).name();
-        return reinterpret_cast<const TARGET&>(*this);
-    }
-
-    DataSinkOperatorXBase* parent() { return _parent; }
-    RuntimeState* state() { return _state; }
-    RuntimeProfile* profile() { return _profile; }
-    MemTracker* mem_tracker() { return _mem_tracker.get(); }
-    [[nodiscard]] RuntimeProfile* faker_runtime_profile() const {
-        return _faker_runtime_profile.get();
-    }
-
-    RuntimeProfile::Counter* rows_input_counter() { return _rows_input_counter; }
-    RuntimeProfile::Counter* exec_time_counter() { return _exec_timer; }
-    virtual std::vector<Dependency*> dependencies() const { return {nullptr}; }
-
-    // override in exchange sink , AsyncWriterSink
-    virtual Dependency* finishdependency() { return nullptr; }
-
-    std::shared_ptr<QueryStatistics> get_query_statistics_ptr() { return _query_statistics; }
-
-protected:
-    DataSinkOperatorXBase* _parent = nullptr;
-    RuntimeState* _state = nullptr;
-    RuntimeProfile* _profile = nullptr;
-    std::unique_ptr<MemTracker> _mem_tracker;
-    // Set to true after close() has been called. subclasses should check and set this in
-    // close().
-    bool _closed = false;
-    //NOTICE: now add a faker profile, because sometimes the profile record is useless
-    //so we want remove some counters and timers, eg: in join node, if it's broadcast_join
-    //and shared hash table, some counter/timer about build hash table is useless,
-    //so we could add those counter/timer in faker profile, and those will not display in web profile.
-    std::unique_ptr<RuntimeProfile> _faker_runtime_profile =
-            std::make_unique<RuntimeProfile>("faker profile");
-
-    RuntimeProfile::Counter* _rows_input_counter = nullptr;
-    RuntimeProfile::Counter* _init_timer = nullptr;
-    RuntimeProfile::Counter* _open_timer = nullptr;
-    RuntimeProfile::Counter* _close_timer = nullptr;
-    RuntimeProfile::Counter* _wait_for_dependency_timer = nullptr;
-    RuntimeProfile::Counter* _wait_for_finish_dependency_timer = nullptr;
-    RuntimeProfile::Counter* _exec_timer = nullptr;
-    RuntimeProfile::Counter* _memory_used_counter = nullptr;
-    RuntimeProfile::Counter* _peak_memory_usage_counter = nullptr;
-
-    std::shared_ptr<QueryStatistics> _query_statistics = nullptr;
-};
-
-class DataSinkOperatorXBase : public OperatorBase {
-public:
-    DataSinkOperatorXBase(const int operator_id, const int node_id)
-            : OperatorBase(nullptr), _operator_id(operator_id), _node_id(node_id), _dests_id({1}) {}
-
-    DataSinkOperatorXBase(const int operator_id, const int node_id, const int dest_id)
-            : OperatorBase(nullptr),
-              _operator_id(operator_id),
-              _node_id(node_id),
-              _dests_id({dest_id}) {}
-
-    DataSinkOperatorXBase(const int operator_id, const int node_id, std::vector<int>& sources)
-            : OperatorBase(nullptr),
-              _operator_id(operator_id),
-              _node_id(node_id),
-              _dests_id(sources) {}
-
-    ~DataSinkOperatorXBase() override = default;
-
-    // For agg/sort/join sink.
-    virtual Status init(const TPlanNode& tnode, RuntimeState* state);
-
-    Status init(const TDataSink& tsink) override;
-    [[nodiscard]] virtual Status init(ExchangeType type, const int num_buckets,
-                                      const bool is_shuffled_hash_join,
-                                      const std::map<int, int>& shuffle_idx_to_instance_idx) {
-        return Status::InternalError("init() is only implemented in local exchange!");
-    }
-
-    Status prepare(RuntimeState* state) override { return Status::OK(); }
-    Status open(RuntimeState* state) override { return Status::OK(); }
-
-    Status sink(RuntimeState* state, vectorized::Block* block, SourceState source_state) override {
-        LOG(FATAL) << "should not reach here!";
-        return Status::OK();
-    }
-
-    [[nodiscard]] virtual Status sink(RuntimeState* state, vectorized::Block* block, bool eos) = 0;
-
-    [[nodiscard]] virtual Status setup_local_state(RuntimeState* state,
-                                                   LocalSinkStateInfo& info) = 0;
-
-    template <class TARGET>
-    TARGET& cast() {
-        DCHECK(dynamic_cast<TARGET*>(this))
-                << " Mismatch type! Current type is " << typeid(*this).name()
-                << " and expect type is" << typeid(TARGET).name();
-        return reinterpret_cast<TARGET&>(*this);
-    }
-    template <class TARGET>
-    const TARGET& cast() const {
-        DCHECK(dynamic_cast<const TARGET*>(this))
-                << " Mismatch type! Current type is " << typeid(*this).name()
-                << " and expect type is" << typeid(TARGET).name();
-        return reinterpret_cast<const TARGET&>(*this);
-    }
-
-    [[nodiscard]] virtual std::shared_ptr<BasicSharedState> create_shared_state() const = 0;
-    [[nodiscard]] virtual DataDistribution required_data_distribution() const {
-        return _child_x && _child_x->ignore_data_distribution()
-                       ? DataDistribution(ExchangeType::PASSTHROUGH)
-                       : DataDistribution(ExchangeType::NOOP);
-    }
-
-    [[nodiscard]] virtual bool is_shuffled_hash_join() const { return false; }
-
-    Status close(RuntimeState* state) override {
-        return Status::InternalError("Should not reach here!");
-    }
-
-    bool can_read() override {
-        LOG(FATAL) << "should not reach here!";
-        return false;
-    }
-
-    bool can_write() override {
-        LOG(FATAL) << "should not reach here!";
-        return false;
-    }
-
-    [[nodiscard]] bool is_pending_finish() const override {
-        LOG(FATAL) << "should not reach here!";
-        return false;
-    }
-
-    [[nodiscard]] std::string debug_string() const override { return ""; }
-
-    [[nodiscard]] virtual std::string debug_string(int indentation_level) const;
-
-    [[nodiscard]] virtual std::string debug_string(RuntimeState* state,
-                                                   int indentation_level) const;
-
-    [[nodiscard]] bool is_sink() const override { return true; }
-
-    [[nodiscard]] bool is_source() const override { return false; }
-
-    static Status close(RuntimeState* state, Status exec_status) {
-        auto result = state->get_sink_local_state_result();
-        if (!result) {
-            return result.error();
-        }
-        return result.value()->close(state, exec_status);
-    }
-
-    [[nodiscard]] RuntimeProfile* get_runtime_profile() const override {
-        throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
-                               "Runtime Profile is not owned by operator");
-        return nullptr;
-    }
-
-    [[nodiscard]] int id() const override { return node_id(); }
-
-    [[nodiscard]] int operator_id() const { return _operator_id; }
-
-    [[nodiscard]] const std::vector<int>& dests_id() const { return _dests_id; }
-
-    void set_dests_id(const std::vector<int>& dest_id) { _dests_id = dest_id; }
-
-    [[nodiscard]] int node_id() const { return _node_id; }
-
-    [[nodiscard]] std::string get_name() const override { return _name; }
-
-    virtual bool should_dry_run(RuntimeState* state) { return false; }
-
-protected:
-    template <typename Writer, typename Parent>
-        requires(std::is_base_of_v<vectorized::AsyncResultWriter, Writer>)
-    friend class AsyncWriterSink;
-    // _operator_id : the current Operator's ID, which is not visible to the user.
-    // _node_id : the plan node ID corresponding to the Operator, which is visible on the profile.
-    // _dests_id : the target _operator_id of the sink, for example, in the case of a multi-sink, there are multiple targets.
-    const int _operator_id;
-    const int _node_id;
-    std::vector<int> _dests_id;
-    std::string _name;
-
-    // Maybe this will be transferred to BufferControlBlock.
-    std::shared_ptr<QueryStatistics> _query_statistics;
-};
-
-template <typename LocalStateType>
-class DataSinkOperatorX : public DataSinkOperatorXBase {
-public:
-    DataSinkOperatorX(int operator_id, const int node_id)
-            : DataSinkOperatorXBase(operator_id, node_id) {}
-
-    DataSinkOperatorX(const int id, const int node_id, const int source_id)
-            : DataSinkOperatorXBase(id, node_id, source_id) {}
-
-    DataSinkOperatorX(const int id, const int node_id, std::vector<int> sources)
-            : DataSinkOperatorXBase(id, node_id, sources) {}
-    ~DataSinkOperatorX() override = default;
-
-    Status setup_local_state(RuntimeState* state, LocalSinkStateInfo& info) override;
-    std::shared_ptr<BasicSharedState> create_shared_state() const override;
-
-    using LocalState = LocalStateType;
-    [[nodiscard]] LocalState& get_local_state(RuntimeState* state) const {
-        return state->get_sink_local_state()->template cast<LocalState>();
-    }
-};
-
-template <typename SharedStateArg = FakeSharedState>
-class PipelineXSinkLocalState : public PipelineXSinkLocalStateBase {
-public:
-    using SharedStateType = SharedStateArg;
-    PipelineXSinkLocalState(DataSinkOperatorXBase* parent, RuntimeState* state)
-            : PipelineXSinkLocalStateBase(parent, state) {}
-    ~PipelineXSinkLocalState() override = default;
-
-    Status init(RuntimeState* state, LocalSinkStateInfo& info) override;
-
-    Status open(RuntimeState* state) override { return Status::OK(); }
-
-    Status close(RuntimeState* state, Status exec_status) override;
-
-    [[nodiscard]] std::string debug_string(int indentation_level) const override;
-
-    virtual std::string name_suffix() { return " (id=" + std::to_string(_parent->node_id()) + ")"; }
-
-    std::vector<Dependency*> dependencies() const override {
-        return _dependency ? std::vector<Dependency*> {_dependency} : std::vector<Dependency*> {};
-    }
-
-    void inc_running_big_mem_op_num(RuntimeState* state) {
-        if (!_big_mem_op_num_added) {
-            state->get_query_ctx()->inc_running_big_mem_op_num();
-            _big_mem_op_num_added = true;
-        }
-    }
-
-    void dec_running_big_mem_op_num(RuntimeState* state) {
-        if (_big_mem_op_num_added && !_big_mem_op_num_deced) {
-            state->get_query_ctx()->dec_running_big_mem_op_num();
-            _big_mem_op_num_deced = true;
-        }
-    }
-
-protected:
-    Dependency* _dependency = nullptr;
-    SharedStateType* _shared_state = nullptr;
-
-private:
-    bool _big_mem_op_num_added = false;
-    bool _big_mem_op_num_deced = false;
-};
-
-template <typename SharedStateArg>
-class PipelineXSpillSinkLocalState : public PipelineXSinkLocalState<SharedStateArg> {
-public:
-    using Base = PipelineXSinkLocalState<SharedStateArg>;
-    PipelineXSpillSinkLocalState(DataSinkOperatorXBase* parent, RuntimeState* state)
-            : Base(parent, state) {}
-    ~PipelineXSpillSinkLocalState() override = default;
-
-    Status init(RuntimeState* state, LocalSinkStateInfo& info) override {
-        RETURN_IF_ERROR(Base::init(state, info));
-
-        _spill_counters = ADD_LABEL_COUNTER_WITH_LEVEL(Base::profile(), "Spill", 1);
-        _spill_timer = ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillTime", "Spill", 1);
-        _spill_serialize_block_timer =
-                ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillSerializeBlockTime", "Spill", 1);
-        _spill_write_disk_timer =
-                ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillWriteDiskTime", "Spill", 1);
-        _spill_data_size = ADD_CHILD_COUNTER_WITH_LEVEL(Base::profile(), "SpillWriteDataSize",
-                                                        TUnit::BYTES, "Spill", 1);
-        _spill_block_count = ADD_CHILD_COUNTER_WITH_LEVEL(Base::profile(), "SpillWriteBlockCount",
-                                                          TUnit::UNIT, "Spill", 1);
-        _spill_wait_in_queue_timer =
-                ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillWaitInQueueTime", "Spill", 1);
-        _spill_write_wait_io_timer =
-                ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillWriteWaitIOTime", "Spill", 1);
-        _spill_read_wait_io_timer =
-                ADD_CHILD_TIMER_WITH_LEVEL(Base::profile(), "SpillReadWaitIOTime", "Spill", 1);
-        return Status::OK();
-    }
-
-    RuntimeProfile::Counter* _spill_counters = nullptr;
-    RuntimeProfile::Counter* _spill_timer = nullptr;
-    RuntimeProfile::Counter* _spill_serialize_block_timer = nullptr;
-    RuntimeProfile::Counter* _spill_write_disk_timer = nullptr;
-    RuntimeProfile::Counter* _spill_data_size = nullptr;
-    RuntimeProfile::Counter* _spill_block_count = nullptr;
-    RuntimeProfile::Counter* _spill_wait_in_queue_timer = nullptr;
-    RuntimeProfile::Counter* _spill_write_wait_io_timer = nullptr;
-    RuntimeProfile::Counter* _spill_read_wait_io_timer = nullptr;
-};
-
-/**
- * StreamingOperatorX indicates operators which always processes block in streaming way (one-in-one-out).
- */
-template <typename LocalStateType>
-class StreamingOperatorX : public OperatorX<LocalStateType> {
-public:
-    StreamingOperatorX(ObjectPool* pool, const TPlanNode& tnode, int operator_id,
-                       const DescriptorTbl& descs)
-            : OperatorX<LocalStateType>(pool, tnode, operator_id, descs) {}
-    virtual ~StreamingOperatorX() = default;
-
-    Status get_block(RuntimeState* state, vectorized::Block* block, bool* eos) override;
-
-    virtual Status pull(RuntimeState* state, vectorized::Block* block, bool* eos) = 0;
-};
-
-/**
- * StatefulOperatorX indicates the operators with some states inside.
- *
- * Specifically, we called an operator stateful if an operator can determine its output by itself.
- * For example, hash join probe operator is a typical StatefulOperator. When it gets a block from probe side, it will hold this block inside (e.g. _child_block).
- * If there are still remain rows in probe block, we can get output block by calling `get_block` without any data from its child.
- * In a nutshell, it is a one-to-many relation between input blocks and output blocks for StatefulOperator.
- */
-template <typename LocalStateType>
-class StatefulOperatorX : public OperatorX<LocalStateType> {
-public:
-    StatefulOperatorX(ObjectPool* pool, const TPlanNode& tnode, const int operator_id,
-                      const DescriptorTbl& descs)
-            : OperatorX<LocalStateType>(pool, tnode, operator_id, descs) {}
-    virtual ~StatefulOperatorX() = default;
-
-    using OperatorX<LocalStateType>::get_local_state;
-
-    [[nodiscard]] Status get_block(RuntimeState* state, vectorized::Block* block,
-                                   bool* eos) override;
-
-    [[nodiscard]] virtual Status pull(RuntimeState* state, vectorized::Block* block,
-                                      bool* eos) const = 0;
-    [[nodiscard]] virtual Status push(RuntimeState* state, vectorized::Block* input_block,
-                                      bool eos) const = 0;
-
-    [[nodiscard]] virtual bool need_more_input_data(RuntimeState* state) const = 0;
-
-    bool need_data_from_children(RuntimeState* state) const override {
-        if (need_more_input_data(state)) {
-            return OperatorX<LocalStateType>::_child_x->need_data_from_children(state);
-        } else {
-            return false;
-        }
-    }
-};
-
-template <typename Writer, typename Parent>
-    requires(std::is_base_of_v<vectorized::AsyncResultWriter, Writer>)
-class AsyncWriterSink : public PipelineXSinkLocalState<FakeSharedState> {
-public:
-    using Base = PipelineXSinkLocalState<FakeSharedState>;
-    AsyncWriterSink(DataSinkOperatorXBase* parent, RuntimeState* state)
-            : Base(parent, state), _async_writer_dependency(nullptr) {
-        _finish_dependency = std::make_shared<FinishDependency>(
-                parent->operator_id(), parent->node_id(), parent->get_name() + "_FINISH_DEPENDENCY",
-                state->get_query_ctx());
-    }
-
-    Status init(RuntimeState* state, LocalSinkStateInfo& info) override;
-
-    Status open(RuntimeState* state) override;
-
-    Status sink(RuntimeState* state, vectorized::Block* block, bool eos);
-
-    std::vector<Dependency*> dependencies() const override {
-        return {_async_writer_dependency.get()};
-    }
-    Status close(RuntimeState* state, Status exec_status) override;
-
-    Dependency* finishdependency() override { return _finish_dependency.get(); }
-
-protected:
-    vectorized::VExprContextSPtrs _output_vexpr_ctxs;
-    std::unique_ptr<Writer> _writer;
-
-    std::shared_ptr<AsyncWriterDependency> _async_writer_dependency;
-
-    std::shared_ptr<Dependency> _finish_dependency;
-};
-
-} // namespace doris::pipeline
diff --git a/be/src/pipeline/pipeline_x/pipeline_x_task.cpp b/be/src/pipeline/pipeline_x/pipeline_x_task.cpp
deleted file mode 100644
index e8957318a6d66b..00000000000000
--- a/be/src/pipeline/pipeline_x/pipeline_x_task.cpp
+++ /dev/null
@@ -1,453 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "pipeline_x_task.h"
-
-#include <fmt/format.h>
-#include <gen_cpp/Metrics_types.h>
-#include <glog/logging.h>
-#include <stddef.h>
-
-#include <ostream>
-#include <vector>
-
-#include "common/status.h"
-#include "pipeline/exec/operator.h"
-#include "pipeline/exec/scan_operator.h"
-#include "pipeline/pipeline.h"
-#include "pipeline/pipeline_fragment_context.h"
-#include "pipeline/task_queue.h"
-#include "runtime/descriptors.h"
-#include "runtime/query_context.h"
-#include "runtime/thread_context.h"
-#include "util/container_util.hpp"
-#include "util/defer_op.h"
-#include "util/mem_info.h"
-#include "util/runtime_profile.h"
-
-namespace doris {
-class RuntimeState;
-} // namespace doris
-
-namespace doris::pipeline {
-
-PipelineXTask::PipelineXTask(
-        PipelinePtr& pipeline, uint32_t task_id, RuntimeState* state,
-        PipelineFragmentContext* fragment_context, RuntimeProfile* parent_profile,
-        std::map<int,
-                 std::pair<std::shared_ptr<LocalExchangeSharedState>, std::shared_ptr<Dependency>>>
-                le_state_map,
-        int task_idx)
-        : PipelineTask(pipeline, task_id, state, fragment_context, parent_profile),
-          _operators(pipeline->operator_xs()),
-          _source(_operators.front()),
-          _root(_operators.back()),
-          _sink(pipeline->sink_shared_pointer()),
-          _le_state_map(std::move(le_state_map)),
-          _task_idx(task_idx),
-          _execution_dep(state->get_query_ctx()->get_execution_dependency()) {
-    _pipeline_task_watcher.start();
-
-    auto shared_state = _sink->create_shared_state();
-    if (shared_state) {
-        _sink_shared_state = shared_state;
-    }
-    pipeline->incr_created_tasks();
-}
-
-Status PipelineXTask::prepare(const TPipelineInstanceParams& local_params, const TDataSink& tsink,
-                              QueryContext* query_ctx) {
-    DCHECK(_sink);
-    DCHECK(_cur_state == PipelineTaskState::NOT_READY) << get_state_name(_cur_state);
-    _init_profile();
-    SCOPED_TIMER(_task_profile->total_time_counter());
-    SCOPED_CPU_TIMER(_task_cpu_timer);
-    SCOPED_TIMER(_prepare_timer);
-
-    {
-        // set sink local state
-        LocalSinkStateInfo info {_task_idx,
-                                 _task_profile.get(),
-                                 local_params.sender_id,
-                                 get_sink_shared_state().get(),
-                                 _le_state_map,
-                                 tsink};
-        RETURN_IF_ERROR(_sink->setup_local_state(_state, info));
-    }
-
-    std::vector<TScanRangeParams> no_scan_ranges;
-    auto scan_ranges = find_with_default(local_params.per_node_scan_ranges,
-                                         _operators.front()->node_id(), no_scan_ranges);
-    auto* parent_profile = _state->get_sink_local_state()->profile();
-    query_ctx->register_query_statistics(
-            _state->get_sink_local_state()->get_query_statistics_ptr());
-
-    for (int op_idx = _operators.size() - 1; op_idx >= 0; op_idx--) {
-        auto& op = _operators[op_idx];
-        LocalStateInfo info {parent_profile, scan_ranges, get_op_shared_state(op->operator_id()),
-                             _le_state_map, _task_idx};
-        RETURN_IF_ERROR(op->setup_local_state(_state, info));
-        parent_profile = _state->get_local_state(op->operator_id())->profile();
-        query_ctx->register_query_statistics(
-                _state->get_local_state(op->operator_id())->get_query_statistics_ptr());
-    }
-    {
-        const auto& deps = _state->get_local_state(_source->operator_id())->filter_dependencies();
-        std::copy(deps.begin(), deps.end(),
-                  std::inserter(_filter_dependencies, _filter_dependencies.end()));
-    }
-    // We should make sure initial state for task are runnable so that we can do some preparation jobs (e.g. initialize runtime filters).
-    set_state(PipelineTaskState::RUNNABLE);
-    _prepared = true;
-    return Status::OK();
-}
-
-Status PipelineXTask::_extract_dependencies() {
-    for (auto op : _operators) {
-        auto result = _state->get_local_state_result(op->operator_id());
-        if (!result) {
-            return result.error();
-        }
-        auto* local_state = result.value();
-        const auto& deps = local_state->dependencies();
-        std::copy(deps.begin(), deps.end(),
-                  std::inserter(_read_dependencies, _read_dependencies.end()));
-        auto* fin_dep = local_state->finishdependency();
-        if (fin_dep) {
-            _finish_dependencies.push_back(fin_dep);
-        }
-    }
-    {
-        auto* local_state = _state->get_sink_local_state();
-        _write_dependencies = local_state->dependencies();
-        DCHECK(std::all_of(_write_dependencies.begin(), _write_dependencies.end(),
-                           [](auto* dep) { return dep->is_write_dependency(); }));
-        auto* fin_dep = local_state->finishdependency();
-        if (fin_dep) {
-            _finish_dependencies.push_back(fin_dep);
-        }
-    }
-    return Status::OK();
-}
-
-void PipelineXTask::_init_profile() {
-    std::stringstream ss;
-    ss << "PipelineXTask"
-       << " (index=" << _index << ")";
-    auto* task_profile = new RuntimeProfile(ss.str());
-    _parent_profile->add_child(task_profile, true, nullptr);
-    _task_profile.reset(task_profile);
-    _task_cpu_timer = ADD_TIMER(_task_profile, "TaskCpuTime");
-
-    static const char* exec_time = "ExecuteTime";
-    _exec_timer = ADD_TIMER(_task_profile, exec_time);
-    _prepare_timer = ADD_CHILD_TIMER(_task_profile, "PrepareTime", exec_time);
-    _open_timer = ADD_CHILD_TIMER(_task_profile, "OpenTime", exec_time);
-    _get_block_timer = ADD_CHILD_TIMER(_task_profile, "GetBlockTime", exec_time);
-    _get_block_counter = ADD_COUNTER(_task_profile, "GetBlockCounter", TUnit::UNIT);
-    _sink_timer = ADD_CHILD_TIMER(_task_profile, "SinkTime", exec_time);
-    _close_timer = ADD_CHILD_TIMER(_task_profile, "CloseTime", exec_time);
-
-    _wait_bf_timer = ADD_TIMER(_task_profile, "WaitBfTime");
-    _wait_worker_timer = ADD_TIMER(_task_profile, "WaitWorkerTime");
-
-    _block_counts = ADD_COUNTER(_task_profile, "NumBlockedTimes", TUnit::UNIT);
-    _block_by_source_counts = ADD_COUNTER(_task_profile, "NumBlockedBySrcTimes", TUnit::UNIT);
-    _block_by_sink_counts = ADD_COUNTER(_task_profile, "NumBlockedBySinkTimes", TUnit::UNIT);
-    _schedule_counts = ADD_COUNTER(_task_profile, "NumScheduleTimes", TUnit::UNIT);
-    _yield_counts = ADD_COUNTER(_task_profile, "NumYieldTimes", TUnit::UNIT);
-    _core_change_times = ADD_COUNTER(_task_profile, "CoreChangeTimes", TUnit::UNIT);
-
-    _wait_bf_counts = ADD_COUNTER(_task_profile, "WaitBfTimes", TUnit::UNIT);
-    _wait_dependency_counts = ADD_COUNTER(_task_profile, "WaitDenpendencyTimes", TUnit::UNIT);
-    _pending_finish_counts = ADD_COUNTER(_task_profile, "PendingFinishTimes", TUnit::UNIT);
-}
-
-void PipelineXTask::_fresh_profile_counter() {
-    COUNTER_SET(_wait_bf_timer, (int64_t)_wait_bf_watcher.elapsed_time());
-    COUNTER_SET(_schedule_counts, (int64_t)_schedule_time);
-    COUNTER_SET(_wait_worker_timer, (int64_t)_wait_worker_watcher.elapsed_time());
-}
-
-Status PipelineXTask::_open() {
-    SCOPED_TIMER(_task_profile->total_time_counter());
-    SCOPED_CPU_TIMER(_task_cpu_timer);
-    SCOPED_TIMER(_open_timer);
-    _dry_run = _sink->should_dry_run(_state);
-    for (auto& o : _operators) {
-        auto* local_state = _state->get_local_state(o->operator_id());
-        auto st = local_state->open(_state);
-        DCHECK(st.is<ErrorCode::PIP_WAIT_FOR_RF>() ? !_filter_dependencies.empty() : true)
-                << debug_string();
-        RETURN_IF_ERROR(st);
-    }
-    RETURN_IF_ERROR(_state->get_sink_local_state()->open(_state));
-    RETURN_IF_ERROR(_extract_dependencies());
-    _block = doris::vectorized::Block::create_unique();
-    _opened = true;
-    return Status::OK();
-}
-
-Status PipelineXTask::execute(bool* eos) {
-    SCOPED_TIMER(_task_profile->total_time_counter());
-    SCOPED_TIMER(_exec_timer);
-    SCOPED_ATTACH_TASK(_state);
-    int64_t time_spent = 0;
-
-    ThreadCpuStopWatch cpu_time_stop_watch;
-    cpu_time_stop_watch.start();
-    Defer defer {[&]() {
-        if (_task_queue) {
-            _task_queue->update_statistics(this, time_spent);
-        }
-        int64_t delta_cpu_time = cpu_time_stop_watch.elapsed_time();
-        _task_cpu_timer->update(delta_cpu_time);
-        auto cpu_qs = query_context()->get_cpu_statistics();
-        if (cpu_qs) {
-            cpu_qs->add_cpu_nanos(delta_cpu_time);
-        }
-    }};
-    *eos = false;
-    if (has_dependency()) {
-        set_state(PipelineTaskState::BLOCKED_FOR_DEPENDENCY);
-        return Status::OK();
-    }
-    if (_runtime_filter_blocked_dependency() != nullptr) {
-        set_state(PipelineTaskState::BLOCKED_FOR_RF);
-        return Status::OK();
-    }
-    // The status must be runnable
-    if (!_opened) {
-        {
-            SCOPED_RAW_TIMER(&time_spent);
-            RETURN_IF_ERROR(_open());
-        }
-        if (!source_can_read()) {
-            set_state(PipelineTaskState::BLOCKED_FOR_SOURCE);
-            return Status::OK();
-        }
-        if (!sink_can_write()) {
-            set_state(PipelineTaskState::BLOCKED_FOR_SINK);
-            return Status::OK();
-        }
-    }
-
-    Status status = Status::OK();
-    set_begin_execute_time();
-    while (!_fragment_context->is_canceled()) {
-        if (_root->need_data_from_children(_state) && !source_can_read()) {
-            set_state(PipelineTaskState::BLOCKED_FOR_SOURCE);
-            break;
-        }
-        if (!sink_can_write()) {
-            set_state(PipelineTaskState::BLOCKED_FOR_SINK);
-            break;
-        }
-
-        /// When a task is cancelled,
-        /// its blocking state will be cleared and it will transition to a ready state (though it is not truly ready).
-        /// Here, checking whether it is cancelled to prevent tasks in a blocking state from being re-executed.
-        if (_fragment_context->is_canceled()) {
-            break;
-        }
-
-        if (time_spent > THREAD_TIME_SLICE) {
-            COUNTER_UPDATE(_yield_counts, 1);
-            break;
-        }
-        SCOPED_RAW_TIMER(&time_spent);
-        _block->clear_column_data(_root->row_desc().num_materialized_slots());
-        auto* block = _block.get();
-
-        auto sink_revocable_mem_size = _sink->revocable_mem_size(_state);
-        if (should_revoke_memory(_state, sink_revocable_mem_size)) {
-            RETURN_IF_ERROR(_sink->revoke_memory(_state));
-            continue;
-        }
-
-        // Pull block from operator chain
-        if (!_dry_run) {
-            SCOPED_TIMER(_get_block_timer);
-            _get_block_counter->update(1);
-            try {
-                RETURN_IF_ERROR(_root->get_block_after_projects(_state, block, eos));
-            } catch (const Exception& e) {
-                return Status::InternalError(e.to_string() +
-                                             " task debug string: " + debug_string());
-            }
-        } else {
-            *eos = true;
-        }
-
-        if (_block->rows() != 0 || *eos) {
-            SCOPED_TIMER(_sink_timer);
-            status = _sink->sink(_state, block, *eos);
-            if (!status.is<ErrorCode::END_OF_FILE>()) {
-                RETURN_IF_ERROR(status);
-            }
-            *eos = status.is<ErrorCode::END_OF_FILE>() ? true : *eos;
-            if (*eos) { // just return, the scheduler will do finish work
-                break;
-            }
-        }
-    }
-
-    return status;
-}
-
-bool PipelineXTask::should_revoke_memory(RuntimeState* state, int64_t revocable_mem_bytes) {
-    auto* query_ctx = state->get_query_ctx();
-    auto wg = query_ctx->workload_group();
-    if (!wg) {
-        LOG_ONCE(INFO) << "no workload group for query " << print_id(state->query_id());
-        return false;
-    }
-    const auto min_revocable_mem_bytes = state->min_revocable_mem();
-    bool is_wg_mem_low_water_mark = false;
-    bool is_wg_mem_high_water_mark = false;
-    wg->check_mem_used(&is_wg_mem_low_water_mark, &is_wg_mem_high_water_mark);
-    if (is_wg_mem_high_water_mark) {
-        if (revocable_mem_bytes > min_revocable_mem_bytes) {
-            LOG_EVERY_N(INFO, 10) << "revoke memory, hight water mark";
-            return true;
-        }
-        return false;
-    } else if (is_wg_mem_low_water_mark) {
-        int64_t query_weighted_limit = 0;
-        int64_t query_weighted_consumption = 0;
-        query_ctx->get_weighted_mem_info(query_weighted_limit, query_weighted_consumption);
-        if (query_weighted_consumption < query_weighted_limit) {
-            return false;
-        }
-        auto big_memory_operator_num = query_ctx->get_running_big_mem_op_num();
-        DCHECK(big_memory_operator_num >= 0);
-        int64_t mem_limit_of_op;
-        if (0 == big_memory_operator_num) {
-            mem_limit_of_op = int64_t(query_weighted_limit * 0.8);
-        } else {
-            mem_limit_of_op = query_weighted_limit / big_memory_operator_num;
-        }
-
-        LOG_EVERY_N(INFO, 10) << "revoke memory, low water mark, revocable_mem_bytes: "
-                              << PrettyPrinter::print_bytes(revocable_mem_bytes)
-                              << ", mem_limit_of_op: "
-                              << PrettyPrinter::print_bytes(mem_limit_of_op)
-                              << ", min_revocable_mem_bytes: "
-                              << PrettyPrinter::print_bytes(min_revocable_mem_bytes);
-        return (revocable_mem_bytes > mem_limit_of_op ||
-                revocable_mem_bytes > min_revocable_mem_bytes);
-    } else {
-        return false;
-    }
-}
-void PipelineXTask::finalize() {
-    PipelineTask::finalize();
-    std::unique_lock<std::mutex> lc(_release_lock);
-    _finished = true;
-    _sink_shared_state.reset();
-    _op_shared_states.clear();
-    _le_state_map.clear();
-}
-
-Status PipelineXTask::close(Status exec_status) {
-    int64_t close_ns = 0;
-    Defer defer {[&]() {
-        if (_task_queue) {
-            _task_queue->update_statistics(this, close_ns);
-        }
-    }};
-    Status s;
-    {
-        SCOPED_RAW_TIMER(&close_ns);
-        s = _sink->close(_state, exec_status);
-        for (auto& op : _operators) {
-            auto tem = op->close(_state);
-            if (!tem.ok() && s.ok()) {
-                s = tem;
-            }
-        }
-    }
-    if (_opened) {
-        _fresh_profile_counter();
-        COUNTER_SET(_close_timer, close_ns);
-        COUNTER_UPDATE(_task_profile->total_time_counter(), close_ns);
-    }
-    return s;
-}
-
-Status PipelineXTask::close_sink(Status exec_status) {
-    return _sink->close(_state, exec_status);
-}
-
-std::string PipelineXTask::debug_string() {
-    std::unique_lock<std::mutex> lc(_release_lock);
-    fmt::memory_buffer debug_string_buffer;
-
-    fmt::format_to(debug_string_buffer, "QueryId: {}\n", print_id(query_context()->query_id()));
-    fmt::format_to(debug_string_buffer, "InstanceId: {}\n",
-                   print_id(_state->fragment_instance_id()));
-
-    auto elapsed = (MonotonicNanos() - _fragment_context->create_time()) / 1000000000.0;
-    fmt::format_to(debug_string_buffer,
-                   "PipelineTask[this = {}, state = {}, dry run = {}, elapse time "
-                   "= {}s], block dependency = {}, is running = {}\noperators: ",
-                   (void*)this, get_state_name(_cur_state), _dry_run, elapsed,
-                   _blocked_dep && !_finished ? _blocked_dep->debug_string() : "NULL",
-                   is_running());
-    for (size_t i = 0; i < _operators.size(); i++) {
-        fmt::format_to(debug_string_buffer, "\n{}",
-                       _opened && !_finished ? _operators[i]->debug_string(_state, i)
-                                             : _operators[i]->debug_string(i));
-    }
-    fmt::format_to(debug_string_buffer, "\n{}",
-                   _opened && !_finished ? _sink->debug_string(_state, _operators.size())
-                                         : _sink->debug_string(_operators.size()));
-    if (_finished) {
-        return fmt::to_string(debug_string_buffer);
-    }
-
-    size_t i = 0;
-    for (; i < _read_dependencies.size(); i++) {
-        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
-                       _read_dependencies[i]->debug_string(i + 1));
-    }
-
-    fmt::format_to(debug_string_buffer, "Write Dependency Information: \n");
-    for (size_t j = 0; j < _write_dependencies.size(); j++, i++) {
-        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
-                       _write_dependencies[j]->debug_string(i + 1));
-    }
-
-    fmt::format_to(debug_string_buffer, "\nRuntime Filter Dependency Information: \n");
-    for (size_t j = 0; j < _filter_dependencies.size(); j++, i++) {
-        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
-                       _filter_dependencies[j]->debug_string(i + 1));
-    }
-
-    fmt::format_to(debug_string_buffer, "Finish Dependency Information: \n");
-    for (size_t j = 0; j < _finish_dependencies.size(); j++, i++) {
-        fmt::format_to(debug_string_buffer, "{}. {}\n", i,
-                       _finish_dependencies[j]->debug_string(j + 1));
-    }
-    return fmt::to_string(debug_string_buffer);
-}
-
-void PipelineXTask::wake_up() {
-    // call by dependency
-    static_cast<void>(get_task_queue()->push_back(this));
-}
-} // namespace doris::pipeline
diff --git a/be/src/pipeline/pipeline_x/pipeline_x_task.h b/be/src/pipeline/pipeline_x/pipeline_x_task.h
deleted file mode 100644
index c1a72836fd8720..00000000000000
--- a/be/src/pipeline/pipeline_x/pipeline_x_task.h
+++ /dev/null
@@ -1,235 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <stdint.h>
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "common/status.h"
-#include "pipeline/exec/operator.h"
-#include "pipeline/pipeline.h"
-#include "pipeline/pipeline_task.h"
-#include "pipeline/pipeline_x/dependency.h"
-#include "runtime/workload_group/workload_group.h"
-#include "util/runtime_profile.h"
-#include "util/stopwatch.hpp"
-#include "vec/core/block.h"
-#include "vec/sink/vresult_sink.h"
-
-namespace doris {
-class QueryContext;
-class RuntimeState;
-namespace pipeline {
-class PipelineFragmentContext;
-} // namespace pipeline
-} // namespace doris
-
-namespace doris::pipeline {
-
-class TaskQueue;
-class PriorityTaskQueue;
-class Dependency;
-
-// The class do the pipeline task. Minest schdule union by task scheduler
-class PipelineXTask : public PipelineTask {
-public:
-    PipelineXTask(PipelinePtr& pipeline, uint32_t task_id, RuntimeState* state,
-                  PipelineFragmentContext* fragment_context, RuntimeProfile* parent_profile,
-                  std::map<int, std::pair<std::shared_ptr<LocalExchangeSharedState>,
-                                          std::shared_ptr<Dependency>>>
-                          le_state_map,
-                  int task_idx);
-
-    Status prepare(RuntimeState* state) override {
-        return Status::InternalError("Should not reach here!");
-    }
-
-    Status prepare(const TPipelineInstanceParams& local_params, const TDataSink& tsink,
-                   QueryContext* query_ctx);
-
-    Status execute(bool* eos) override;
-
-    // if the pipeline create a bunch of pipeline task
-    // must be call after all pipeline task is finish to release resource
-    Status close(Status exec_status) override;
-
-    Status close_sink(Status exec_status);
-    bool source_can_read() override {
-        if (_dry_run) {
-            return true;
-        }
-        return _read_blocked_dependency() == nullptr;
-    }
-
-    bool runtime_filters_are_ready_or_timeout() override {
-        throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "Should not reach here!");
-        return false;
-    }
-
-    bool sink_can_write() override { return _write_blocked_dependency() == nullptr; }
-
-    void finalize() override;
-
-    bool is_finished() const { return _finished.load(); }
-
-    std::string debug_string() override;
-
-    bool is_pending_finish() override { return _finish_blocked_dependency() != nullptr; }
-
-    std::shared_ptr<BasicSharedState> get_source_shared_state() {
-        return _op_shared_states.contains(_source->operator_id())
-                       ? _op_shared_states[_source->operator_id()]
-                       : nullptr;
-    }
-
-    void inject_shared_state(std::shared_ptr<BasicSharedState> shared_state) {
-        if (!shared_state) {
-            return;
-        }
-        // Shared state is created by upstream task's sink operator and shared by source operator of this task.
-        for (auto& op : _operators) {
-            if (shared_state->related_op_ids.contains(op->operator_id())) {
-                _op_shared_states.insert({op->operator_id(), shared_state});
-                return;
-            }
-        }
-        if (shared_state->related_op_ids.contains(_sink->dests_id().front())) {
-            DCHECK(_sink_shared_state == nullptr);
-            _sink_shared_state = shared_state;
-        }
-    }
-
-    std::shared_ptr<BasicSharedState> get_sink_shared_state() { return _sink_shared_state; }
-
-    BasicSharedState* get_op_shared_state(int id) {
-        if (!_op_shared_states.contains(id)) {
-            return nullptr;
-        }
-        return _op_shared_states[id].get();
-    }
-
-    void wake_up();
-
-    DataSinkOperatorXPtr sink() const { return _sink; }
-
-    OperatorXPtr source() const { return _source; }
-
-    OperatorXs operatorXs() { return _operators; }
-
-    int task_id() const { return _index; };
-
-    void clear_blocking_state() {
-        if (!_finished && get_state() != PipelineTaskState::PENDING_FINISH && _blocked_dep) {
-            _blocked_dep->set_ready();
-            _blocked_dep = nullptr;
-        }
-    }
-
-    bool has_dependency() override {
-        _blocked_dep = _execution_dep->is_blocked_by(this);
-        if (_blocked_dep != nullptr) {
-            static_cast<Dependency*>(_blocked_dep)->start_watcher();
-            return true;
-        }
-        return false;
-    }
-
-    static bool should_revoke_memory(RuntimeState* state, int64_t revocable_mem_bytes);
-
-private:
-    friend class RuntimeFilterDependency;
-    Dependency* _write_blocked_dependency() {
-        for (auto* op_dep : _write_dependencies) {
-            _blocked_dep = op_dep->is_blocked_by(this);
-            if (_blocked_dep != nullptr) {
-                _blocked_dep->start_watcher();
-                return _blocked_dep;
-            }
-        }
-        return nullptr;
-    }
-
-    Dependency* _finish_blocked_dependency() {
-        for (auto* fin_dep : _finish_dependencies) {
-            _blocked_dep = fin_dep->is_blocked_by(this);
-            if (_blocked_dep != nullptr) {
-                _blocked_dep->start_watcher();
-                return _blocked_dep;
-            }
-        }
-        return nullptr;
-    }
-
-    Dependency* _read_blocked_dependency() {
-        for (auto* op_dep : _read_dependencies) {
-            _blocked_dep = op_dep->is_blocked_by(this);
-            if (_blocked_dep != nullptr) {
-                _blocked_dep->start_watcher();
-                return _blocked_dep;
-            }
-        }
-        return nullptr;
-    }
-
-    Dependency* _runtime_filter_blocked_dependency() {
-        for (auto* op_dep : _filter_dependencies) {
-            _blocked_dep = op_dep->is_blocked_by(this);
-            if (_blocked_dep != nullptr) {
-                _blocked_dep->start_watcher();
-                return _blocked_dep;
-            }
-        }
-        return nullptr;
-    }
-
-    Status _extract_dependencies();
-    void set_close_pipeline_time() override {}
-    void _init_profile() override;
-    void _fresh_profile_counter() override;
-    Status _open() override;
-
-    OperatorXs _operators; // left is _source, right is _root
-    OperatorXPtr _source;
-    OperatorXPtr _root;
-    DataSinkOperatorXPtr _sink;
-
-    std::vector<Dependency*> _read_dependencies;
-    std::vector<Dependency*> _write_dependencies;
-    std::vector<Dependency*> _finish_dependencies;
-    std::vector<Dependency*> _filter_dependencies;
-
-    // All shared states of this pipeline task.
-    std::map<int, std::shared_ptr<BasicSharedState>> _op_shared_states;
-    std::shared_ptr<BasicSharedState> _sink_shared_state;
-    std::map<int, std::pair<std::shared_ptr<LocalExchangeSharedState>, std::shared_ptr<Dependency>>>
-            _le_state_map;
-    int _task_idx;
-    bool _dry_run = false;
-
-    Dependency* _blocked_dep = nullptr;
-
-    Dependency* _execution_dep = nullptr;
-
-    std::atomic<bool> _finished {false};
-    std::mutex _release_lock;
-};
-
-} // namespace doris::pipeline
diff --git a/be/src/pipeline/task_queue.cpp b/be/src/pipeline/task_queue.cpp
index 617cd7a78d110a..c54df42e1b16fc 100644
--- a/be/src/pipeline/task_queue.cpp
+++ b/be/src/pipeline/task_queue.cpp
@@ -23,6 +23,7 @@
 
 #include "common/logging.h"
 #include "pipeline/pipeline_task.h"
+#include "runtime/workload_group/workload_group.h"
 
 namespace doris {
 namespace pipeline {
@@ -204,5 +205,11 @@ Status MultiCoreTaskQueue::push_back(PipelineTask* task, size_t core_id) {
     return _prio_task_queue_list[core_id].push(task);
 }
 
+void MultiCoreTaskQueue::update_statistics(PipelineTask* task, int64_t time_spent) {
+    task->inc_runtime_ns(time_spent);
+    _prio_task_queue_list[task->get_core_id()].inc_sub_queue_runtime(task->get_queue_level(),
+                                                                     time_spent);
+}
+
 } // namespace pipeline
 } // namespace doris
\ No newline at end of file
diff --git a/be/src/pipeline/task_queue.h b/be/src/pipeline/task_queue.h
index 02994511019f7d..481427eaf4bbaa 100644
--- a/be/src/pipeline/task_queue.h
+++ b/be/src/pipeline/task_queue.h
@@ -30,10 +30,8 @@
 
 #include "common/status.h"
 #include "pipeline_task.h"
-#include "runtime/workload_group/workload_group.h"
 
-namespace doris {
-namespace pipeline {
+namespace doris::pipeline {
 
 class TaskQueue {
 public:
@@ -145,11 +143,7 @@ class MultiCoreTaskQueue : public TaskQueue {
 
     Status push_back(PipelineTask* task, size_t core_id) override;
 
-    void update_statistics(PipelineTask* task, int64_t time_spent) override {
-        task->inc_runtime_ns(time_spent);
-        _prio_task_queue_list[task->get_core_id()].inc_sub_queue_runtime(task->get_queue_level(),
-                                                                         time_spent);
-    }
+    void update_statistics(PipelineTask* task, int64_t time_spent) override;
 
 private:
     PipelineTask* _steal_take(size_t core_id);
@@ -159,5 +153,4 @@ class MultiCoreTaskQueue : public TaskQueue {
     std::atomic<bool> _closed;
 };
 
-} // namespace pipeline
-} // namespace doris
+} // namespace doris::pipeline
diff --git a/be/src/pipeline/task_scheduler.cpp b/be/src/pipeline/task_scheduler.cpp
index 78a728faaede17..b32343acb2ef05 100644
--- a/be/src/pipeline/task_scheduler.cpp
+++ b/be/src/pipeline/task_scheduler.cpp
@@ -33,7 +33,6 @@
 
 #include "common/logging.h"
 #include "pipeline/pipeline_task.h"
-#include "pipeline/pipeline_x/pipeline_x_task.h"
 #include "pipeline/task_queue.h"
 #include "pipeline_fragment_context.h"
 #include "runtime/exec_env.h"
@@ -96,7 +95,6 @@ void _close_task(PipelineTask* task, PipelineTaskState state, Status exec_status
         state = PipelineTaskState::CANCELED;
     }
     task->set_state(state);
-    task->set_close_pipeline_time();
     task->finalize();
     task->set_running(false);
     task->fragment_context()->close_a_pipeline();
@@ -190,7 +188,6 @@ void TaskScheduler::_do_work(size_t index) {
             _close_task(task, PipelineTaskState::FINISHED, Status::OK());
             continue;
         } else if (!status.ok()) {
-            task->set_eos_time();
             LOG(WARNING) << fmt::format("Pipeline task failed. query_id: {} reason: {}",
                                         print_id(task->query_context()->query_id()),
                                         status.to_string());
@@ -207,7 +204,6 @@ void TaskScheduler::_do_work(size_t index) {
         fragment_ctx->trigger_report_if_necessary();
 
         if (eos) {
-            task->set_eos_time();
             // TODO: pipeline parallel need to wait the last task finish to call finalize
             //  and find_p_dependency
             VLOG_DEBUG << fmt::format("Try close task: {}, fragment_ctx->is_canceled(): {}",
diff --git a/be/src/runtime/query_context.cpp b/be/src/runtime/query_context.cpp
index c521c855515952..7b517db8ce166b 100644
--- a/be/src/runtime/query_context.cpp
+++ b/be/src/runtime/query_context.cpp
@@ -31,8 +31,8 @@
 
 #include "common/logging.h"
 #include "olap/olap_common.h"
+#include "pipeline/dependency.h"
 #include "pipeline/pipeline_fragment_context.h"
-#include "pipeline/pipeline_x/dependency.h"
 #include "runtime/exec_env.h"
 #include "runtime/fragment_mgr.h"
 #include "runtime/runtime_query_statistics_mgr.h"
diff --git a/be/src/runtime/runtime_state.cpp b/be/src/runtime/runtime_state.cpp
index 4df90fd798af24..76a2cd1cc01697 100644
--- a/be/src/runtime/runtime_state.cpp
+++ b/be/src/runtime/runtime_state.cpp
@@ -33,8 +33,7 @@
 #include "common/object_pool.h"
 #include "common/status.h"
 #include "pipeline/exec/operator.h"
-#include "pipeline/pipeline_x/operator.h"
-#include "pipeline/pipeline_x/pipeline_x_task.h"
+#include "pipeline/pipeline_task.h"
 #include "runtime/exec_env.h"
 #include "runtime/load_path_mgr.h"
 #include "runtime/memory/mem_tracker_limiter.h"
diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h
index 025d9517c28ef4..afbc89725d7539 100644
--- a/be/src/runtime/runtime_state.h
+++ b/be/src/runtime/runtime_state.h
@@ -50,7 +50,7 @@ namespace pipeline {
 class PipelineXLocalStateBase;
 class PipelineXSinkLocalStateBase;
 class PipelineFragmentContext;
-class PipelineXTask;
+class PipelineTask;
 } // namespace pipeline
 
 class DescriptorTbl;
diff --git a/be/src/runtime/workload_group/workload_group.h b/be/src/runtime/workload_group/workload_group.h
index d4ef689766a906..54fe3bd3157477 100644
--- a/be/src/runtime/workload_group/workload_group.h
+++ b/be/src/runtime/workload_group/workload_group.h
@@ -46,7 +46,6 @@ class SimplifiedScanScheduler;
 }
 
 namespace pipeline {
-class PipelineTask;
 class TaskScheduler;
 } // namespace pipeline
 
diff --git a/be/src/vec/exec/runtime_filter_consumer.cpp b/be/src/vec/exec/runtime_filter_consumer.cpp
index 2913fad3d8d8d1..4e2177e3f1be38 100644
--- a/be/src/vec/exec/runtime_filter_consumer.cpp
+++ b/be/src/vec/exec/runtime_filter_consumer.cpp
@@ -17,7 +17,7 @@
 
 #include "vec/exec/runtime_filter_consumer.h"
 
-#include "pipeline/pipeline_x/pipeline_x_task.h"
+#include "pipeline/pipeline_task.h"
 
 namespace doris::vectorized {
 
diff --git a/be/src/vec/exec/runtime_filter_consumer.h b/be/src/vec/exec/runtime_filter_consumer.h
index 61fdf13cd8bb11..ff1ae35e205d1e 100644
--- a/be/src/vec/exec/runtime_filter_consumer.h
+++ b/be/src/vec/exec/runtime_filter_consumer.h
@@ -19,7 +19,7 @@
 
 #include "exec/exec_node.h"
 #include "exprs/runtime_filter.h"
-#include "pipeline/pipeline_x/dependency.h"
+#include "pipeline/dependency.h"
 
 namespace doris::vectorized {
 
diff --git a/be/src/vec/runtime/partitioner.cpp b/be/src/vec/runtime/partitioner.cpp
index bbb6ebfc1a880a..0d6165b75556f6 100644
--- a/be/src/vec/runtime/partitioner.cpp
+++ b/be/src/vec/runtime/partitioner.cpp
@@ -17,7 +17,7 @@
 
 #include "partitioner.h"
 
-#include "pipeline/pipeline_x/local_exchange/local_exchange_sink_operator.h"
+#include "pipeline/local_exchange/local_exchange_sink_operator.h"
 #include "runtime/thread_context.h"
 #include "vec/columns/column_const.h"
 #include "vec/sink/vdata_stream_sender.h"
diff --git a/be/src/vec/sink/writer/async_result_writer.cpp b/be/src/vec/sink/writer/async_result_writer.cpp
index 9a84f374464a10..5494959945657f 100644
--- a/be/src/vec/sink/writer/async_result_writer.cpp
+++ b/be/src/vec/sink/writer/async_result_writer.cpp
@@ -18,7 +18,7 @@
 #include "async_result_writer.h"
 
 #include "common/status.h"
-#include "pipeline/pipeline_x/dependency.h"
+#include "pipeline/dependency.h"
 #include "runtime/exec_env.h"
 #include "runtime/fragment_mgr.h"
 #include "runtime/runtime_state.h"
diff --git a/be/src/vec/sink/writer/async_result_writer.h b/be/src/vec/sink/writer/async_result_writer.h
index 7f9700486da80b..793d7e629ebdcb 100644
--- a/be/src/vec/sink/writer/async_result_writer.h
+++ b/be/src/vec/sink/writer/async_result_writer.h
@@ -35,7 +35,7 @@ class TExpr;
 namespace pipeline {
 class AsyncWriterDependency;
 class Dependency;
-class PipelineXTask;
+class PipelineTask;
 
 } // namespace pipeline
 

From fa4341f85263178520e291937f01aca7a6bfd134 Mon Sep 17 00:00:00 2001
From: amory <wangqiannan@selectdb.com>
Date: Mon, 29 Apr 2024 10:37:23 +0800
Subject: [PATCH 110/163] [Improve](expr)first support array_contains for expr
 push down inverted index (#32620)

in this pr we will do:

support expr to eval inverted index
inverted index reader param should be generic for expr
---
 be/src/olap/comparison_predicate.h            |  8 +-
 be/src/olap/in_list_predicate.h               | 11 ++-
 .../segment_v2/inverted_index_reader.cpp      | 41 ++++++++++
 .../rowset/segment_v2/inverted_index_reader.h | 74 +++++++++++++++++
 .../rowset/segment_v2/segment_iterator.cpp    | 71 ++++++++++++++++
 .../olap/rowset/segment_v2/segment_iterator.h |  4 +
 be/src/runtime/runtime_state.h                |  6 ++
 be/src/vec/exprs/vcompound_pred.h             | 69 ++++++++++++++++
 be/src/vec/exprs/vectorized_fn_call.cpp       | 25 ++++++
 be/src/vec/exprs/vectorized_fn_call.h         |  7 ++
 be/src/vec/exprs/vexpr.h                      | 11 +++
 be/src/vec/exprs/vexpr_context.cpp            |  7 ++
 be/src/vec/exprs/vexpr_context.h              | 16 ++++
 .../functions/array/function_array_index.h    | 60 ++++++++++++++
 be/src/vec/functions/function.h               | 33 ++++++++
 be/test/util/roaring_bitmap_test.cpp          | 58 +++++++++++++
 .../org/apache/doris/qe/SessionVariable.java  | 19 +++++
 gensrc/thrift/PaloInternalService.thrift      |  3 +
 ...est_array_contains_with_inverted_index.out | 82 +++++++++++++++++++
 ..._array_contains_with_inverted_index.groovy | 78 ++++++++++++++++++
 20 files changed, 676 insertions(+), 7 deletions(-)
 create mode 100644 be/test/util/roaring_bitmap_test.cpp
 create mode 100644 regression-test/data/inverted_index_p0/test_array_contains_with_inverted_index.out
 create mode 100644 regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy

diff --git a/be/src/olap/comparison_predicate.h b/be/src/olap/comparison_predicate.h
index 6a5f27bd32606c..24a35a3ba158c0 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -101,9 +101,11 @@ class ComparisonPredicateBase : public ColumnPredicate {
 
         std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
 
-        auto&& value = PrimitiveTypeConvertor<Type>::to_storage_field_type(_value);
-        RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, &value, query_type,
-                                                           num_rows, roaring));
+        std::unique_ptr<InvertedIndexQueryParamFactory> query_param = nullptr;
+        RETURN_IF_ERROR(
+                InvertedIndexQueryParamFactory::create_query_value<Type>(&_value, query_param));
+        RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, query_param->get_value(),
+                                                           query_type, num_rows, roaring));
 
         // mask out null_bitmap, since NULL cmp VALUE will produce NULL
         //  and be treated as false in WHERE
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index 4a1a10f898f0e6..5e10fdb62a864e 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -191,12 +191,15 @@ class InListPredicateBase : public ColumnPredicate {
         HybridSetBase::IteratorBase* iter = _values->begin();
         while (iter->has_next()) {
             const void* ptr = iter->get_value();
-            auto&& value = PrimitiveTypeConvertor<Type>::to_storage_field_type(
-                    *reinterpret_cast<const T*>(ptr));
+            //            auto&& value = PrimitiveTypeConvertor<Type>::to_storage_field_type(
+            //                    *reinterpret_cast<const T*>(ptr));
+            std::unique_ptr<InvertedIndexQueryParamFactory> query_param = nullptr;
+            RETURN_IF_ERROR(
+                    InvertedIndexQueryParamFactory::create_query_value<Type>(ptr, query_param));
             InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY;
             std::shared_ptr<roaring::Roaring> index = std::make_shared<roaring::Roaring>();
-            RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, &value, query_type,
-                                                               num_rows, index));
+            RETURN_IF_ERROR(iterator->read_from_inverted_index(
+                    column_name, query_param->get_value(), query_type, num_rows, index));
             indices |= *index;
             iter->next();
         }
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
index f08dac8fb9bc28..522c7125db508d 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
@@ -73,6 +73,47 @@
 
 namespace doris::segment_v2 {
 
+template <PrimitiveType PT>
+Status InvertedIndexQueryParamFactory::create_query_value(
+        const void* value, std::unique_ptr<InvertedIndexQueryParamFactory>& result_param) {
+    using CPP_TYPE = typename PrimitiveTypeTraits<PT>::CppType;
+    std::unique_ptr<InvertedIndexQueryParam<PT>> param =
+            InvertedIndexQueryParam<PT>::create_unique();
+    auto&& storage_val = PrimitiveTypeConvertor<PT>::to_storage_field_type(
+            *reinterpret_cast<const CPP_TYPE*>(value));
+    param->set_value(&storage_val);
+    result_param = std::move(param);
+    return Status::OK();
+};
+
+#define CREATE_QUERY_VALUE_TEMPLATE(PT)                                     \
+    template Status InvertedIndexQueryParamFactory::create_query_value<PT>( \
+            const void* value, std::unique_ptr<InvertedIndexQueryParamFactory>& result_param);
+
+CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_BOOLEAN)
+CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_TINYINT)
+CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_SMALLINT)
+CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_INT)
+CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_BIGINT)
+CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_LARGEINT)
+CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_FLOAT)
+CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DOUBLE)
+CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_VARCHAR)
+CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DATE)
+CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DATEV2)
+CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DATETIME)
+CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DATETIMEV2)
+CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_CHAR)
+CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMALV2)
+CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMAL32)
+CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMAL64)
+CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMAL128I)
+CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMAL256)
+CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_HLL)
+CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_STRING)
+CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_IPV4)
+CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_IPV6)
+
 std::unique_ptr<lucene::analysis::Analyzer> InvertedIndexReader::create_analyzer(
         InvertedIndexCtx* inverted_index_ctx) {
     std::unique_ptr<lucene::analysis::Analyzer> analyzer;
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
index 64f78bd52d9afa..48450b974ac034 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
@@ -33,6 +33,7 @@
 #include "olap/rowset/segment_v2/inverted_index_desc.h"
 #include "olap/rowset/segment_v2/inverted_index_query_type.h"
 #include "olap/tablet_schema.h"
+#include "runtime/primitive_type.h"
 #include "util/once.h"
 
 #define FINALIZE_INPUT(x) \
@@ -279,6 +280,79 @@ class BkdIndexReader : public InvertedIndexReader {
     const KeyCoder* _value_key_coder {};
 };
 
+/**
+ * @brief InvertedIndexQueryParamFactory is a factory class to create QueryValue object.
+ * we need a template function to make predict class like in_list_predict template class to use.
+ * also need a function with primitive type parameter to create inverted index query value. like some function expr: function_array_index
+ * Now we just mapping field value in query engine to storage field value
+ */
+class InvertedIndexQueryParamFactory {
+    ENABLE_FACTORY_CREATOR(InvertedIndexQueryParamFactory);
+
+public:
+    virtual ~InvertedIndexQueryParamFactory() = default;
+
+    template <PrimitiveType PT>
+    static Status create_query_value(const void* value,
+                                     std::unique_ptr<InvertedIndexQueryParamFactory>& result_param);
+
+    static Status create_query_value(
+            const PrimitiveType& primitiveType, const void* value,
+            std::unique_ptr<InvertedIndexQueryParamFactory>& result_param) {
+        switch (primitiveType) {
+#define M(TYPE)                                               \
+    case TYPE: {                                              \
+        return create_query_value<TYPE>(value, result_param); \
+    }
+            M(PrimitiveType::TYPE_BOOLEAN)
+            M(PrimitiveType::TYPE_TINYINT)
+            M(PrimitiveType::TYPE_SMALLINT)
+            M(PrimitiveType::TYPE_INT)
+            M(PrimitiveType::TYPE_BIGINT)
+            M(PrimitiveType::TYPE_LARGEINT)
+            M(PrimitiveType::TYPE_FLOAT)
+            M(PrimitiveType::TYPE_DOUBLE)
+            M(PrimitiveType::TYPE_DECIMALV2)
+            M(PrimitiveType::TYPE_DECIMAL32)
+            M(PrimitiveType::TYPE_DECIMAL64)
+            M(PrimitiveType::TYPE_DECIMAL128I)
+            M(PrimitiveType::TYPE_DECIMAL256)
+            M(PrimitiveType::TYPE_DATE)
+            M(PrimitiveType::TYPE_DATETIME)
+            M(PrimitiveType::TYPE_CHAR)
+            M(PrimitiveType::TYPE_VARCHAR)
+            M(PrimitiveType::TYPE_STRING)
+#undef M
+        default:
+            return Status::NotSupported("Unsupported primitive type {} for inverted index reader",
+                                        primitiveType);
+        }
+    };
+
+    virtual const void* get_value() const {
+        LOG_FATAL(
+                "Execution reached an undefined behavior code path in "
+                "InvertedIndexQueryParamFactory");
+        __builtin_unreachable();
+    };
+};
+
+template <PrimitiveType PT>
+class InvertedIndexQueryParam : public InvertedIndexQueryParamFactory {
+    ENABLE_FACTORY_CREATOR(InvertedIndexQueryParam);
+    using storage_val = typename PrimitiveTypeTraits<PT>::StorageFieldType;
+
+public:
+    void set_value(const storage_val* value) {
+        _value = *reinterpret_cast<const storage_val*>(value);
+    }
+
+    const void* get_value() const override { return &_value; }
+
+private:
+    storage_val _value;
+};
+
 class InvertedIndexIterator {
     ENABLE_FACTORY_CREATOR(InvertedIndexIterator);
 
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 506d7d0b2ad322..3adef84e7dda55 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -362,6 +362,12 @@ Status SegmentIterator::_lazy_init() {
         _segment->_tablet_schema->cluster_key_idxes().empty()) {
         RETURN_IF_ERROR(_get_row_ranges_by_keys());
     }
+    // extract for index apply col id which is slot_ref
+    if (_enable_common_expr_pushdown && !_remaining_conjunct_roots.empty()) {
+        for (auto expr : _remaining_conjunct_roots) {
+            RETURN_IF_ERROR(_extract_common_expr_columns_for_index(expr));
+        }
+    }
     RETURN_IF_ERROR(_get_row_ranges_by_column_conditions());
     RETURN_IF_ERROR(_vec_init_lazy_materialization());
     // Remove rows that have been marked deleted
@@ -726,6 +732,20 @@ Status SegmentIterator::_extract_common_expr_columns(const vectorized::VExprSPtr
     return Status::OK();
 }
 
+Status SegmentIterator::_extract_common_expr_columns_for_index(const vectorized::VExprSPtr& expr) {
+    auto& children = expr->children();
+    for (int i = 0; i < children.size(); ++i) {
+        RETURN_IF_ERROR(_extract_common_expr_columns_for_index(children[i]));
+    }
+
+    auto node_type = expr->node_type();
+    if (node_type == TExprNodeType::SLOT_REF) {
+        auto slot_expr = std::dynamic_pointer_cast<doris::vectorized::VSlotRef>(expr);
+        _common_expr_columns_for_index.insert(slot_expr->column_id());
+    }
+    return Status::OK();
+}
+
 Status SegmentIterator::_execute_predicates_except_leafnode_of_andnode(
         const vectorized::VExprSPtr& expr) {
     if (expr == nullptr) {
@@ -815,6 +835,17 @@ bool SegmentIterator::_can_filter_by_preds_except_leafnode_of_andnode() {
     return true;
 }
 
+bool SegmentIterator::_check_apply_by_inverted_index(ColumnId col_id) {
+    if (_opts.runtime_state && !_opts.runtime_state->query_options().enable_inverted_index_query) {
+        return false;
+    }
+    if (_inverted_index_iterators[col_id] == nullptr) {
+        //this column without inverted index
+        return false;
+    }
+    return true;
+}
+
 bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool pred_in_compound) {
     if (_opts.runtime_state && !_opts.runtime_state->query_options().enable_inverted_index_query) {
         return false;
@@ -1210,6 +1241,46 @@ Status SegmentIterator::_apply_inverted_index() {
         }
     }
 
+    // add a switch for inverted index filter
+    if (_opts.runtime_state &&
+        _opts.runtime_state->enable_common_expr_pushdown_for_inverted_index()) {
+        // support expr to evaluate inverted index
+        std::unordered_map<ColumnId, std::pair<vectorized::NameAndTypePair, InvertedIndexIterator*>>
+                iter_map;
+        for (auto col_id : _common_expr_columns_for_index) {
+            auto tablet_col_id = _schema->column_id(col_id);
+            if (_check_apply_by_inverted_index(tablet_col_id)) {
+                iter_map[col_id] = std::make_pair(_storage_name_and_type[tablet_col_id],
+                                                  _inverted_index_iterators[tablet_col_id].get());
+            }
+        }
+        for (auto expr_ctx : _common_expr_ctxs_push_down) {
+            // _inverted_index_iterators has all column ids which has inverted index
+            // _common_expr_columns has all column ids from _common_expr_ctxs_push_down
+            // if current bitmap is already empty just return
+            if (_row_bitmap.isEmpty()) {
+                break;
+            }
+            std::shared_ptr<roaring::Roaring> result_bitmap = std::make_shared<roaring::Roaring>();
+            if (Status st =
+                        expr_ctx->eval_inverted_index(iter_map, num_rows(), result_bitmap.get());
+                !st.ok()) {
+                if (_downgrade_without_index(st) || st.code() == ErrorCode::NOT_IMPLEMENTED_ERROR) {
+                    continue;
+                } else {
+                    // other code is not to be handled, we should just break
+                    LOG(WARNING) << "failed to evaluate inverted index for expr_ctx: "
+                                 << expr_ctx->root()->debug_string()
+                                 << ", error msg: " << st.to_string();
+                    return st;
+                }
+            } else {
+                // every single result of expr_ctx must be `and` collection relationship
+                _row_bitmap &= *result_bitmap;
+            }
+        }
+    }
+
     _col_predicates = std::move(remaining_predicates);
     _opts.stats->rows_inverted_index_filtered += (input_rows - _row_bitmap.cardinality());
     return Status::OK();
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h
index 84c10f3b8b2825..c01141509b553e 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -274,6 +274,8 @@ class SegmentIterator : public RowwiseIterator {
     bool _can_evaluated_by_vectorized(ColumnPredicate* predicate);
 
     [[nodiscard]] Status _extract_common_expr_columns(const vectorized::VExprSPtr& expr);
+    // same with _extract_common_expr_columns, but only extract columns that can be used for index
+    [[nodiscard]] Status _extract_common_expr_columns_for_index(const vectorized::VExprSPtr& expr);
     [[nodiscard]] Status _execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& selected_size,
                                               vectorized::Block* block);
     uint16_t _evaluate_common_expr_filter(uint16_t* sel_rowid_idx, uint16_t selected_size,
@@ -284,6 +286,7 @@ class SegmentIterator : public RowwiseIterator {
 
     void _convert_dict_code_for_predicate_if_necessary_impl(ColumnPredicate* predicate);
 
+    bool _check_apply_by_inverted_index(ColumnId col_id);
     bool _check_apply_by_inverted_index(ColumnPredicate* pred, bool pred_in_compound = false);
 
     std::string _gen_predicate_result_sign(ColumnPredicate* predicate);
@@ -409,6 +412,7 @@ class SegmentIterator : public RowwiseIterator {
     // columns to read after predicate evaluation and remaining expr execute
     std::vector<ColumnId> _non_predicate_columns;
     std::set<ColumnId> _common_expr_columns;
+    std::set<ColumnId> _common_expr_columns_for_index;
     // remember the rowids we've read for the current row block.
     // could be a local variable of next_batch(), kept here to reuse vector memory
     std::vector<rowid_t> _block_rowids;
diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h
index afbc89725d7539..775b0470c6b1c3 100644
--- a/be/src/runtime/runtime_state.h
+++ b/be/src/runtime/runtime_state.h
@@ -178,6 +178,12 @@ class RuntimeState {
                _query_options.enable_common_expr_pushdown;
     }
 
+    bool enable_common_expr_pushdown_for_inverted_index() const {
+        return enable_common_expr_pushdown() &&
+               _query_options.__isset.enable_common_expr_pushdown_for_inverted_index &&
+               _query_options.enable_common_expr_pushdown_for_inverted_index;
+    };
+
     bool enable_faster_float_convert() const {
         return _query_options.__isset.faster_float_convert && _query_options.faster_float_convert;
     }
diff --git a/be/src/vec/exprs/vcompound_pred.h b/be/src/vec/exprs/vcompound_pred.h
index 2ede99cae63c77..4ff628548692d3 100644
--- a/be/src/vec/exprs/vcompound_pred.h
+++ b/be/src/vec/exprs/vcompound_pred.h
@@ -53,6 +53,75 @@ class VCompoundPred : public VectorizedFnCall {
 
     const std::string& expr_name() const override { return _expr_name; }
 
+    //   1. when meet 'or' conjunct: a or b, if b can apply index, return all rows, so b should not be extracted
+    //   2. when meet 'and' conjunct, function with column b can not apply inverted index
+    //      eg. a and hash(b)=1, if b can apply index, but hash(b)=1 is not for index, so b should not be extracted
+    //          but a and array_contains(b, 1), b can be applied inverted index, which b can be extracted
+    Status eval_inverted_index(
+            VExprContext* context,
+            const std::unordered_map<ColumnId, std::pair<vectorized::NameAndTypePair,
+                                                         segment_v2::InvertedIndexIterator*>>&
+                    colid_to_inverted_index_iter,
+            uint32_t num_rows, roaring::Roaring* bitmap) const override {
+        std::shared_ptr<roaring::Roaring> res = std::make_shared<roaring::Roaring>();
+        if (_op == TExprOpcode::COMPOUND_OR) {
+            for (auto child : _children) {
+                std::shared_ptr<roaring::Roaring> child_roaring =
+                        std::make_shared<roaring::Roaring>();
+                Status st = child->eval_inverted_index(context, colid_to_inverted_index_iter,
+                                                       num_rows, child_roaring.get());
+                if (!st.ok()) {
+                    bitmap->addRange(0, num_rows);
+                    return st;
+                }
+                if (child_roaring->cardinality() == 0) {
+                    // means inverted index filter do not reduce any rows
+                    // the left expr no need to be extracted by inverted index,
+                    // and cur roaring is all rows which means this inverted index is not useful,
+                    // do not need to calculate with res bitmap
+                    bitmap->addRange(0, num_rows);
+                    return Status::OK();
+                }
+                *res |= *child_roaring;
+            }
+            *bitmap = *res;
+        } else if (_op == TExprOpcode::COMPOUND_AND) {
+            for (int i = 0; i < _children.size(); ++i) {
+                std::shared_ptr<roaring::Roaring> child_roaring =
+                        std::make_shared<roaring::Roaring>();
+                Status st = _children[0]->eval_inverted_index(context, colid_to_inverted_index_iter,
+                                                              num_rows, child_roaring.get());
+                if (!st.ok()) {
+                    continue;
+                }
+                if (i == 0) {
+                    *res = *child_roaring;
+                } else {
+                    *res &= *child_roaring;
+                }
+                if (res->isEmpty()) {
+                    // the left expr no need to be extracted by inverted index, just return 0 rows
+                    // res bitmap will be zero
+                    return Status::OK();
+                }
+            }
+            *bitmap = *res;
+        } else if (_op == TExprOpcode::COMPOUND_NOT) {
+            Status st = _children[0]->eval_inverted_index(context, colid_to_inverted_index_iter,
+                                                          num_rows, res.get());
+            if (!st.ok()) {
+                return st;
+            }
+            std::shared_ptr<roaring::Roaring> all_rows = std::make_shared<roaring::Roaring>();
+            all_rows->addRange(0, num_rows);
+            *bitmap = *all_rows - *res;
+        } else {
+            return Status::NotSupported(
+                    "Compound operator must be AND or OR or Not can execute with inverted index.");
+        }
+        return Status::OK();
+    }
+
     Status execute(VExprContext* context, Block* block, int* result_column_id) override {
         if (children().size() == 1 || !_all_child_is_compound_and_not_const()) {
             return VectorizedFnCall::execute(context, block, result_column_id);
diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp b/be/src/vec/exprs/vectorized_fn_call.cpp
index b84499653ebf9b..732d99d0f0ec93 100644
--- a/be/src/vec/exprs/vectorized_fn_call.cpp
+++ b/be/src/vec/exprs/vectorized_fn_call.cpp
@@ -141,6 +141,31 @@ void VectorizedFnCall::close(VExprContext* context, FunctionContext::FunctionSta
     VExpr::close(context, scope);
 }
 
+Status VectorizedFnCall::eval_inverted_index(
+        VExprContext* context,
+        const std::unordered_map<ColumnId, std::pair<vectorized::NameAndTypePair,
+                                                     segment_v2::InvertedIndexIterator*>>&
+                colid_to_inverted_index_iter,
+        uint32_t num_rows, roaring::Roaring* bitmap) const {
+    DCHECK_GE(get_num_children(), 1);
+    if (get_child(0)->is_slot_ref()) {
+        auto* column_slot_ref = assert_cast<VSlotRef*>(get_child(0).get());
+        if (auto iter = colid_to_inverted_index_iter.find(column_slot_ref->column_id());
+            iter != colid_to_inverted_index_iter.end()) {
+            const auto& pair = iter->second;
+            return _function->eval_inverted_index(context->fn_context(_fn_context_index),
+                                                  pair.first, pair.second, num_rows, bitmap);
+        } else {
+            return Status::NotSupported("column id {} not found in colid_to_inverted_index_iter",
+                                        column_slot_ref->column_id());
+        }
+    } else {
+        return Status::NotSupported("we can only eval inverted index for slot ref expr, but got ",
+                                    get_child(0)->expr_name());
+    }
+    return Status::OK();
+}
+
 Status VectorizedFnCall::_do_execute(doris::vectorized::VExprContext* context,
                                      doris::vectorized::Block* block, int* result_column_id,
                                      std::vector<size_t>& args) {
diff --git a/be/src/vec/exprs/vectorized_fn_call.h b/be/src/vec/exprs/vectorized_fn_call.h
index 24cab0c94ba0d3..a3ce85bb588f9a 100644
--- a/be/src/vec/exprs/vectorized_fn_call.h
+++ b/be/src/vec/exprs/vectorized_fn_call.h
@@ -27,6 +27,7 @@
 #include "udf/udf.h"
 #include "vec/core/column_numbers.h"
 #include "vec/exprs/vexpr.h"
+#include "vec/exprs/vslot_ref.h"
 #include "vec/functions/function.h"
 
 namespace doris {
@@ -50,6 +51,12 @@ class VectorizedFnCall : public VExpr {
     Status execute_runtime_fitler(doris::vectorized::VExprContext* context,
                                   doris::vectorized::Block* block, int* result_column_id,
                                   std::vector<size_t>& args) override;
+    Status eval_inverted_index(
+            VExprContext* context,
+            const std::unordered_map<ColumnId, std::pair<vectorized::NameAndTypePair,
+                                                         segment_v2::InvertedIndexIterator*>>&
+                    colid_to_inverted_index_iter,
+            uint32_t num_rows, roaring::Roaring* bitmap) const override;
     Status prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) override;
     Status open(RuntimeState* state, VExprContext* context,
                 FunctionContext::FunctionStateScope scope) override;
diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h
index 8ad62100e064f3..10467e579622b8 100644
--- a/be/src/vec/exprs/vexpr.h
+++ b/be/src/vec/exprs/vexpr.h
@@ -30,6 +30,7 @@
 #include <vector>
 
 #include "common/status.h"
+#include "olap/rowset/segment_v2/inverted_index_reader.h"
 #include "runtime/define_primitive_type.h"
 #include "runtime/large_int_value.h"
 #include "runtime/types.h"
@@ -115,6 +116,16 @@ class VExpr {
 
     virtual Status execute(VExprContext* context, Block* block, int* result_column_id) = 0;
 
+    // execute current expr with inverted index to filter block. Given a roaringbitmap of match rows
+    virtual Status eval_inverted_index(
+            VExprContext* context,
+            const std::unordered_map<ColumnId, std::pair<vectorized::NameAndTypePair,
+                                                         segment_v2::InvertedIndexIterator*>>&
+                    colid_to_inverted_index_iter,
+            uint32_t num_rows, roaring::Roaring* bitmap) const {
+        return Status::NotSupported("Not supported execute_with_inverted_index");
+    }
+
     // Only the 4th parameter is used in the runtime filter. In and MinMax need overwrite the
     // interface
     virtual Status execute_runtime_fitler(VExprContext* context, Block* block,
diff --git a/be/src/vec/exprs/vexpr_context.cpp b/be/src/vec/exprs/vexpr_context.cpp
index cdbf22a7209c05..c2a45180aac0af 100644
--- a/be/src/vec/exprs/vexpr_context.cpp
+++ b/be/src/vec/exprs/vexpr_context.cpp
@@ -119,6 +119,13 @@ int VExprContext::register_function_context(RuntimeState* state, const TypeDescr
     _fn_contexts.back()->set_check_overflow_for_decimal(state->check_overflow_for_decimal());
     return _fn_contexts.size() - 1;
 }
+Status VExprContext::eval_inverted_index(
+        const std::unordered_map<ColumnId, std::pair<vectorized::NameAndTypePair,
+                                                     segment_v2::InvertedIndexIterator*>>&
+                colid_to_inverted_index_iter,
+        uint32_t num_rows, roaring::Roaring* bitmap) {
+    return _root->eval_inverted_index(this, colid_to_inverted_index_iter, num_rows, bitmap);
+}
 
 Status VExprContext::filter_block(VExprContext* vexpr_ctx, Block* block, int column_to_keep) {
     if (vexpr_ctx == nullptr || block->rows() == 0) {
diff --git a/be/src/vec/exprs/vexpr_context.h b/be/src/vec/exprs/vexpr_context.h
index 423e1aac12a540..fc4862ef6c1ea6 100644
--- a/be/src/vec/exprs/vexpr_context.h
+++ b/be/src/vec/exprs/vexpr_context.h
@@ -25,6 +25,7 @@
 
 #include "common/factory_creator.h"
 #include "common/status.h"
+#include "olap/rowset/segment_v2/inverted_index_reader.h"
 #include "runtime/types.h"
 #include "udf/udf.h"
 #include "vec/core/block.h"
@@ -69,6 +70,21 @@ class VExprContext {
         return _fn_contexts[i].get();
     }
 
+    // execute expr with inverted index which column a, b has inverted indexes
+    //  but some situation although column b has indexes, but apply index is not useful, we should
+    //  skip this expr, just do not apply index anymore.
+    /**
+     * @param colid_to_inverted_index_iter contains all column id to inverted index iterator mapping from segmentIterator
+     * @param num_rows number of rows in one segment.
+     * @param bitmap roaring bitmap to store the result. 0 is present filed by index.
+     * @return status not ok means execute failed.
+     */
+    [[nodiscard]] Status eval_inverted_index(
+            const std::unordered_map<ColumnId, std::pair<vectorized::NameAndTypePair,
+                                                         segment_v2::InvertedIndexIterator*>>&
+                    colid_to_inverted_index_iter,
+            uint32_t num_rows, roaring::Roaring* bitmap);
+
     [[nodiscard]] static Status filter_block(VExprContext* vexpr_ctx, Block* block,
                                              int column_to_keep);
 
diff --git a/be/src/vec/functions/array/function_array_index.h b/be/src/vec/functions/array/function_array_index.h
index d4a8fa32962fab..5221d4f51f3ed5 100644
--- a/be/src/vec/functions/array/function_array_index.h
+++ b/be/src/vec/functions/array/function_array_index.h
@@ -25,6 +25,10 @@
 #include <utility>
 
 #include "common/status.h"
+#include "olap/column_predicate.h"
+#include "olap/rowset/segment_v2/inverted_index_query_type.h"
+#include "olap/rowset/segment_v2/inverted_index_reader.h"
+#include "runtime/primitive_type.h"
 #include "vec/columns/column.h"
 #include "vec/columns/column_array.h"
 #include "vec/columns/column_nullable.h"
@@ -70,6 +74,11 @@ struct ArrayCountEqual {
     static constexpr void apply(ResultType& current, size_t j) noexcept { ++current; }
 };
 
+struct ParamValue {
+    PrimitiveType type;
+    Field value;
+};
+
 template <typename ConcreteAction, bool OldVersion = false>
 class FunctionArrayIndex : public IFunction {
 public:
@@ -87,6 +96,57 @@ class FunctionArrayIndex : public IFunction {
 
     bool use_default_implementation_for_nulls() const override { return false; }
 
+    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
+        if (scope == FunctionContext::THREAD_LOCAL) {
+            return Status::OK();
+        }
+
+        DCHECK(context->get_num_args() >= 1);
+        DCHECK(context->get_arg_type(0)->is_array_type());
+        // now we only support same
+        std::shared_ptr<ParamValue> state = std::make_shared<ParamValue>();
+        Field field;
+        if (context->get_constant_col(1)) {
+            context->get_constant_col(1)->column_ptr->get(0, field);
+            state->value = field;
+            state->type = context->get_arg_type(1)->type;
+            context->set_function_state(scope, state);
+        }
+        return Status::OK();
+    }
+
+    /**
+     * eval inverted index. we can filter array rows with inverted index iter
+     */
+    Status eval_inverted_index(FunctionContext* context,
+                               const vectorized::NameAndTypePair& data_type_with_name,
+                               segment_v2::InvertedIndexIterator* iter, uint32_t num_rows,
+                               roaring::Roaring* bitmap) const override {
+        std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
+        auto* param_value = reinterpret_cast<ParamValue*>(
+                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
+
+        std::unique_ptr<InvertedIndexQueryParamFactory> query_param = nullptr;
+        RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value(
+                param_value->type, &param_value->value, query_param));
+        RETURN_IF_ERROR(iter->read_from_inverted_index(
+                data_type_with_name.first, query_param->get_value(),
+                segment_v2::InvertedIndexQueryType::MATCH_ANY_QUERY, num_rows, roaring));
+
+        // mask out null_bitmap, since NULL cmp VALUE will produce NULL
+        //  and be treated as false in WHERE
+        // keep it after query, since query will try to read null_bitmap and put it to cache
+        segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
+        RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
+        std::shared_ptr<roaring::Roaring> null_bitmap = null_bitmap_cache_handle.get_bitmap();
+        if (null_bitmap) {
+            *bitmap -= *null_bitmap;
+        }
+
+        *bitmap = *roaring;
+        return Status::OK();
+    }
+
     DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
         if constexpr (OldVersion) {
             return make_nullable(std::make_shared<DataTypeNumber<ResultType>>());
diff --git a/be/src/vec/functions/function.h b/be/src/vec/functions/function.h
index 30a849b4724211..c2eb31d6a4766d 100644
--- a/be/src/vec/functions/function.h
+++ b/be/src/vec/functions/function.h
@@ -31,6 +31,7 @@
 
 #include "common/exception.h"
 #include "common/status.h"
+#include "olap/rowset/segment_v2/inverted_index_reader.h"
 #include "udf/udf.h"
 #include "vec/core/block.h"
 #include "vec/core/column_numbers.h"
@@ -179,6 +180,14 @@ class IFunctionBase {
                 ->execute(context, block, arguments, result, input_rows_count, dry_run);
     }
 
+    virtual Status eval_inverted_index(FunctionContext* context,
+                                       const vectorized::NameAndTypePair& data_type_with_name,
+                                       segment_v2::InvertedIndexIterator* iter, uint32_t num_rows,
+                                       roaring::Roaring* bitmap) const {
+        return Status::NotSupported("eval_inverted_index is not supported in function: ",
+                                    get_name());
+    }
+
     /// Do cleaning work when function is finished, i.e., release state variables in the
     /// `FunctionContext` which are registered in `prepare` phase.
     virtual Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) {
@@ -395,6 +404,15 @@ class IFunction : public std::enable_shared_from_this<IFunction>,
         return Status::OK();
     }
 
+    // here are lots of function not extends eval_inverted_index.
+    Status eval_inverted_index(FunctionContext* context,
+                               const vectorized::NameAndTypePair& data_type_with_name,
+                               segment_v2::InvertedIndexIterator* iter, uint32_t num_rows,
+                               roaring::Roaring* bitmap) const override {
+        return Status::NotSupported("eval_inverted_index is not supported in function: ",
+                                    get_name());
+    }
+
     [[noreturn]] const DataTypes& get_argument_types() const final {
         LOG(FATAL) << "get_argument_types is not implemented for IFunction";
         __builtin_unreachable();
@@ -428,6 +446,14 @@ class DefaultExecutable final : public PreparedFunctionImpl {
                         size_t result, size_t input_rows_count) const final {
         return function->execute_impl(context, block, arguments, result, input_rows_count);
     }
+
+    Status eval_inverted_index(FunctionContext* context,
+                               const vectorized::NameAndTypePair& data_type_with_name,
+                               segment_v2::InvertedIndexIterator* iter, uint32_t num_rows,
+                               roaring::Roaring* bitmap) const {
+        return function->eval_inverted_index(context, data_type_with_name, iter, num_rows, bitmap);
+    }
+
     Status execute_impl_dry_run(FunctionContext* context, Block& block,
                                 const ColumnNumbers& arguments, size_t result,
                                 size_t input_rows_count) const final {
@@ -491,6 +517,13 @@ class DefaultFunction final : public IFunctionBase {
                function_name == "gt" || function_name == "le" || function_name == "ge";
     }
 
+    Status eval_inverted_index(FunctionContext* context,
+                               const vectorized::NameAndTypePair& data_type_with_name,
+                               segment_v2::InvertedIndexIterator* iter, uint32_t num_rows,
+                               roaring::Roaring* bitmap) const override {
+        return function->eval_inverted_index(context, data_type_with_name, iter, num_rows, bitmap);
+    }
+
     IFunctionBase::Monotonicity get_monotonicity_for_range(const IDataType& type, const Field& left,
                                                            const Field& right) const override {
         return function->get_monotonicity_for_range(type, left, right);
diff --git a/be/test/util/roaring_bitmap_test.cpp b/be/test/util/roaring_bitmap_test.cpp
new file mode 100644
index 00000000000000..157e02ca2f5d45
--- /dev/null
+++ b/be/test/util/roaring_bitmap_test.cpp
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest-message.h>
+#include <gtest/gtest-test-part.h>
+
+#include "gtest/gtest_pred_impl.h"
+#include "roaring/roaring.hh"
+
+namespace doris {
+
+TEST(RaringBitmapTest, IsAllOne) {
+    std::shared_ptr<roaring::Roaring> bitmap = std::make_shared<roaring::Roaring>();
+    bitmap->addRange(0, 1024);
+    EXPECT_TRUE(bitmap->contains(1));
+    EXPECT_FALSE(bitmap->contains(1025));
+    EXPECT_EQ(bitmap->cardinality(), 1024);
+
+    std::shared_ptr<roaring::Roaring> bitmap2 = std::make_shared<roaring::Roaring>();
+    bitmap2->addRange(26, 31);
+    // and
+    *bitmap &= *bitmap2;
+    EXPECT_TRUE(bitmap->contains(26));
+    EXPECT_FALSE(bitmap->contains(25));
+    EXPECT_EQ(bitmap->cardinality(), 5);
+
+    // or
+    std::shared_ptr<roaring::Roaring> bitmap3 = std::make_shared<roaring::Roaring>();
+    bitmap3->addRange(0, 1024);
+    *bitmap |= *bitmap3;
+    EXPECT_TRUE(bitmap->contains(1));
+    EXPECT_TRUE(bitmap->contains(31));
+    EXPECT_FALSE(bitmap->contains(1025));
+
+    // not
+    std::shared_ptr<roaring::Roaring> bitmap4 = std::make_shared<roaring::Roaring>();
+    bitmap4->addRange(32, 2048);
+    *bitmap -= *bitmap4;
+    EXPECT_EQ(0, bitmap->minimum());
+    EXPECT_EQ(bitmap->maximum(), 31);
+    EXPECT_EQ(bitmap->cardinality(), 32);
+}
+
+} // namespace doris
\ No newline at end of file
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index d5c07dbe6baa9a..770999472b178e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -385,6 +385,9 @@ public class SessionVariable implements Serializable, Writable {
 
     public static final String ENABLE_INVERTED_INDEX_QUERY = "enable_inverted_index_query";
 
+    public static final String ENABLE_COMMON_EXPR_PUSHDOWN_FOR_INVERTED_INDEX
+            = "enable_common_expr_pushdown_for_inverted_index";
+
     public static final String ENABLE_PUSHDOWN_COUNT_ON_INDEX = "enable_count_on_index_pushdown";
 
     public static final String GROUP_BY_AND_HAVING_USE_ALIAS_FIRST = "group_by_and_having_use_alias_first";
@@ -1371,6 +1374,11 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) {
             "是否启用inverted index query。", "Set whether to use inverted index query."})
     public boolean enableInvertedIndexQuery = true;
 
+    // Whether enable query expr with inverted index.
+    @VariableMgr.VarAttr(name = ENABLE_COMMON_EXPR_PUSHDOWN_FOR_INVERTED_INDEX, needForward = true, description = {
+            "是否启用表达式上使用 inverted index。", "Set whether to use inverted index query for expr."})
+    public boolean enableCommonExpPushDownForInvertedIndex = false;
+
     // Whether enable pushdown count agg to scan node when using inverted index match.
     @VariableMgr.VarAttr(name = ENABLE_PUSHDOWN_COUNT_ON_INDEX, needForward = true, description = {
             "是否启用count_on_index pushdown。", "Set whether to pushdown count_on_index."})
@@ -3067,6 +3075,16 @@ public void setEnableInvertedIndexQuery(boolean enableInvertedIndexQuery) {
         this.enableInvertedIndexQuery = enableInvertedIndexQuery;
     }
 
+
+    public boolean isEnableCommonExprPushdownForInvertedIndex() {
+        return enableCommonExpPushDownForInvertedIndex;
+    }
+
+
+    public void setEnableCommonExprPushdownForInvertedIndex(boolean enableCommonExpPushDownForInvertedIndex) {
+        this.enableCommonExpPushDownForInvertedIndex = enableCommonExpPushDownForInvertedIndex;
+    }
+
     public boolean isEnablePushDownCountOnIndex() {
         return enablePushDownCountOnIndex;
     }
@@ -3210,6 +3228,7 @@ public TQueryOptions toThrift() {
         tResult.setFileCacheBasePath(fileCacheBasePath);
 
         tResult.setEnableInvertedIndexQuery(enableInvertedIndexQuery);
+        tResult.setEnableCommonExprPushdownForInvertedIndex(enableCommonExpPushDownForInvertedIndex);
 
         if (dryRunQuery) {
             tResult.setDryRunQuery(true);
diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift
index 24d56ef8df01fe..63396fc5aa7001 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -289,6 +289,9 @@ struct TQueryOptions {
   // max rows of each sub-queue in DataQueue.
   106: optional i64 data_queue_max_blocks = 0;
   
+  // expr pushdown for index filter rows
+  107: optional bool enable_common_expr_pushdown_for_inverted_index = false; 
+  
   // For cloud, to control if the content would be written into file cache
   1000: optional bool disable_file_cache = false
 }
diff --git a/regression-test/data/inverted_index_p0/test_array_contains_with_inverted_index.out b/regression-test/data/inverted_index_p0/test_array_contains_with_inverted_index.out
new file mode 100644
index 00000000000000..76a72d8c595b01
--- /dev/null
+++ b/regression-test/data/inverted_index_p0/test_array_contains_with_inverted_index.out
@@ -0,0 +1,82 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql --
+16
+
+-- !sql --
+2019-01-01	a648a447b8f71522f11632eba4b4adde	["p", "q", "r", "s", "t"]
+
+-- !sql --
+
+-- !sql --
+2019-01-01	a648a447b8f71522f11632eba4b4adde	["p", "q", "r", "s", "t"]
+
+-- !sql --
+2017-01-01	021603e7dcfe65d44af0efd0e5aee154	["n"]
+2017-01-01	48a33ec3453a28bce84b8f96fe161956	["m"]
+2017-01-01	6afef581285b6608bf80d5a4e46cf839	["a", "b", "c"]
+2017-01-01	8fcb57ae675f0af4d613d9e6c0e8a2a3	\N
+2017-01-01	8fcb57ae675f0af4d613d9e6c0e8a2a4	\N
+2017-01-01	8fcb57ae675f0af4d613d9e6c0e8a2a5	[]
+2017-01-01	8fcb57ae675f0af4d613d9e6c0e8a2a6	[null, null, null]
+2017-01-01	8fcb57ae675f0af4d613d9e6c0e8a2a7	[null, null, null]
+2017-01-01	8fcb57ae675f0af4d613d9e6c0e8a2a8	[]
+2017-01-01	9fcb57ae675f0af4d613d9e6c0e8a2a2	["o"]
+2017-01-01	d93d942d985a8fb7547c72dada8d332d	["d", "e", "f", "g", "h", "i", "j", "k", "l"]
+2019-01-01	a648a447b8f71522f11632eba4b4adde	["p", "q", "r", "s", "t"]
+
+-- !sql --
+2017-01-01	021603e7dcfe65d44af0efd0e5aee154	["n"]
+2017-01-01	48a33ec3453a28bce84b8f96fe161956	["m"]
+2017-01-01	6afef581285b6608bf80d5a4e46cf839	["a", "b", "c"]
+2017-01-01	8fcb57ae675f0af4d613d9e6c0e8a2a5	[]
+2017-01-01	8fcb57ae675f0af4d613d9e6c0e8a2a6	[null, null, null]
+2017-01-01	8fcb57ae675f0af4d613d9e6c0e8a2a7	[null, null, null]
+2017-01-01	8fcb57ae675f0af4d613d9e6c0e8a2a8	[]
+2017-01-01	9fcb57ae675f0af4d613d9e6c0e8a2a2	["o"]
+2017-01-01	d93d942d985a8fb7547c72dada8d332d	["d", "e", "f", "g", "h", "i", "j", "k", "l"]
+2019-01-01	0974e7a82e30d1af83205e474fadd0a2	["w"]
+2019-01-01	26823b3995ee38bd145ddd910b2f6300	["x"]
+2019-01-01	a9fb5c985c90bf05f3bee5ca3ae95260	["u", "v"]
+2019-01-01	ee27ee1da291e46403c408e220bed6e1	["y"]
+
+-- !sql --
+2017-01-01	021603e7dcfe65d44af0efd0e5aee154	["n"]
+2017-01-01	48a33ec3453a28bce84b8f96fe161956	["m"]
+2017-01-01	6afef581285b6608bf80d5a4e46cf839	["a", "b", "c"]
+2017-01-01	8fcb57ae675f0af4d613d9e6c0e8a2a5	[]
+2017-01-01	8fcb57ae675f0af4d613d9e6c0e8a2a6	[null, null, null]
+2017-01-01	8fcb57ae675f0af4d613d9e6c0e8a2a7	[null, null, null]
+2017-01-01	8fcb57ae675f0af4d613d9e6c0e8a2a8	[]
+2017-01-01	9fcb57ae675f0af4d613d9e6c0e8a2a2	["o"]
+2017-01-01	d93d942d985a8fb7547c72dada8d332d	["d", "e", "f", "g", "h", "i", "j", "k", "l"]
+
+-- !sql --
+2019-01-01	0974e7a82e30d1af83205e474fadd0a2	["w"]
+2019-01-01	26823b3995ee38bd145ddd910b2f6300	["x"]
+2019-01-01	a9fb5c985c90bf05f3bee5ca3ae95260	["u", "v"]
+2019-01-01	ee27ee1da291e46403c408e220bed6e1	["y"]
+
+-- !sql --
+2017-01-01	021603e7dcfe65d44af0efd0e5aee154	["n"]
+2017-01-01	48a33ec3453a28bce84b8f96fe161956	["m"]
+2017-01-01	6afef581285b6608bf80d5a4e46cf839	["a", "b", "c"]
+2017-01-01	8fcb57ae675f0af4d613d9e6c0e8a2a3	\N
+2017-01-01	8fcb57ae675f0af4d613d9e6c0e8a2a4	\N
+2017-01-01	8fcb57ae675f0af4d613d9e6c0e8a2a5	[]
+2017-01-01	8fcb57ae675f0af4d613d9e6c0e8a2a6	[null, null, null]
+2017-01-01	8fcb57ae675f0af4d613d9e6c0e8a2a7	[null, null, null]
+2017-01-01	8fcb57ae675f0af4d613d9e6c0e8a2a8	[]
+2017-01-01	9fcb57ae675f0af4d613d9e6c0e8a2a2	["o"]
+2017-01-01	d93d942d985a8fb7547c72dada8d332d	["d", "e", "f", "g", "h", "i", "j", "k", "l"]
+2019-01-01	0974e7a82e30d1af83205e474fadd0a2	["w"]
+2019-01-01	26823b3995ee38bd145ddd910b2f6300	["x"]
+2019-01-01	a9fb5c985c90bf05f3bee5ca3ae95260	["u", "v"]
+2019-01-01	ee27ee1da291e46403c408e220bed6e1	["y"]
+
+-- !sql --
+2019-01-01	0974e7a82e30d1af83205e474fadd0a2	["w"]
+2019-01-01	26823b3995ee38bd145ddd910b2f6300	["x"]
+2019-01-01	a648a447b8f71522f11632eba4b4adde	["p", "q", "r", "s", "t"]
+2019-01-01	a9fb5c985c90bf05f3bee5ca3ae95260	["u", "v"]
+2019-01-01	ee27ee1da291e46403c408e220bed6e1	["y"]
+
diff --git a/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy b/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy
new file mode 100644
index 00000000000000..0ea18c784d6340
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_array_contains_with_inverted_index"){
+    // prepare test table
+    def indexTblName = "tai"
+
+    // If we use common expr pass to inverted index , we should set enable_common_expr_pushdown = true
+    sql """ set enable_common_expr_pushdown = true; """
+    sql """ set enable_common_expr_pushdown_for_inverted_index = true; """
+    sql """ set enable_pipeline_x_engine = true;"""
+    sql """ set enable_profile = true;"""
+
+    sql "DROP TABLE IF EXISTS ${indexTblName}"
+    // create 1 replica table
+    sql """
+	CREATE TABLE IF NOT EXISTS `${indexTblName}` (
+      `apply_date` date NULL COMMENT '',
+      `id` varchar(60) NOT NULL COMMENT '',
+      `inventors` array<text> NULL COMMENT '',
+      INDEX index_inverted_inventors(inventors) USING INVERTED  COMMENT ''
+    ) ENGINE=OLAP
+    DUPLICATE KEY(`apply_date`, `id`)
+    COMMENT 'OLAP'
+    DISTRIBUTED BY HASH(`id`) BUCKETS 1
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1",
+    "is_being_synced" = "false",
+    "storage_format" = "V2",
+    "light_schema_change" = "true",
+    "disable_auto_compaction" = "false",
+    "enable_single_replica_compaction" = "false"
+    );
+    """
+
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '6afef581285b6608bf80d5a4e46cf839', '[\"a\", \"b\", \"c\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', 'd93d942d985a8fb7547c72dada8d332d', '[\"d\", \"e\", \"f\", \"g\", \"h\", \"i\", \"j\", \"k\", \"l\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '48a33ec3453a28bce84b8f96fe161956', '[\"m\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '021603e7dcfe65d44af0efd0e5aee154', '[\"n\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '9fcb57ae675f0af4d613d9e6c0e8a2a2', '[\"o\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a3'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a4', NULL); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a5', '[]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a6', '[null,null,null]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a7', [null,null,null]); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a8', []); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', 'a648a447b8f71522f11632eba4b4adde', '[\"p\", \"q\", \"r\", \"s\", \"t\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', 'a9fb5c985c90bf05f3bee5ca3ae95260', '[\"u\", \"v\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', '0974e7a82e30d1af83205e474fadd0a2', '[\"w\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', '26823b3995ee38bd145ddd910b2f6300', '[\"x\"]'); """
+    sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', 'ee27ee1da291e46403c408e220bed6e1', '[\"y\"]'); """
+
+    qt_sql """ select count() from ${indexTblName}"""
+    order_qt_sql """ select * from tai where array_contains(inventors, 's') order by id; """
+
+    order_qt_sql """ select * from tai where array_contains(inventors, 's') and apply_date = '2017-01-01' order by id; """
+    order_qt_sql """ select * from tai where array_contains(inventors, 's') and apply_date = '2019-01-01' order by id; """
+    order_qt_sql """ select * from tai where array_contains(inventors, 's') or apply_date = '2017-01-01' order by id; """
+    order_qt_sql """ select * from tai where !array_contains(inventors, 's') order by id; """
+    order_qt_sql """ select * from tai where !array_contains(inventors, 's') and apply_date = '2017-01-01' order by id; """
+    order_qt_sql """ select * from tai where !array_contains(inventors, 's') and apply_date = '2019-01-01' order by id; """
+    order_qt_sql """ select * from tai where !array_contains(inventors, 's') or apply_date = '2017-01-01' order by id; """
+    order_qt_sql """ select * from tai where (array_contains(inventors, 's') and apply_date = '2017-01-01') or apply_date = '2019-01-01' order by id; """
+}

From a5fe4544096d356fe1c4c9ebc9595aff61dd9624 Mon Sep 17 00:00:00 2001
From: GoGoWen <82132356+GoGoWen@users.noreply.github.com>
Date: Mon, 29 Apr 2024 10:45:28 +0800
Subject: [PATCH 111/163] [BugFix](TabletInvertedIndex) fix replica not found
 in TabletInvertedIndex (#34117)

* fix replica not found in TabletInvertedIndex
---
 .../apache/doris/catalog/TabletInvertedIndex.java | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java
index 8561d0b98089ea..fbe313e28e79d4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java
@@ -648,8 +648,19 @@ public void deleteReplica(long tabletId, long backendId) {
             }
             if (replicaMetaTable.containsRow(tabletId)) {
                 Replica replica = replicaMetaTable.remove(tabletId, backendId);
-                replicaToTabletMap.remove(replica.getId());
-                replicaMetaTable.remove(tabletId, backendId);
+
+                // sometimes, replicas may have same replica id in different backend
+                // we need to cover this situation to avoid some "replica not found" issue
+                if (replicaMetaTable.containsRow(tabletId)) {
+                    long replicaNum = replicaMetaTable.row(tabletId).values().stream()
+                            .filter(c -> c.getId() == replica.getId()).count();
+                    if (replicaNum == 0) {
+                        replicaToTabletMap.remove(replica.getId());
+                    }
+                } else {
+                    replicaToTabletMap.remove(replica.getId());
+                }
+
                 backingReplicaMetaTable.remove(backendId, tabletId);
                 if (LOG.isDebugEnabled()) {
                     LOG.debug("delete replica {} of tablet {} in backend {}",

From 58497b98d5a6fec585a4ab4648d6982ed34fadd8 Mon Sep 17 00:00:00 2001
From: walter <w41ter.l@gmail.com>
Date: Mon, 29 Apr 2024 10:48:25 +0800
Subject: [PATCH 112/163] [improve](regression) Support qt_target_sql (#34236)

---
 .../doris/regression/suite/Suite.groovy       | 28 +++++++++++--------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
index faaabac6792b42..4971f6876a0ce5 100644
--- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
+++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
@@ -887,24 +887,26 @@ class Suite implements GroovyInterceptable {
             if (arg instanceof PreparedStatement) {
                 if (tag.contains("hive_docker")) {
                     tupleResult = JdbcUtils.executeToStringList(context.getHiveDockerConnection(hivePrefix),  (PreparedStatement) arg)
-                }else if (tag.contains("hive_remote")) {
+                } else if (tag.contains("hive_remote")) {
                     tupleResult = JdbcUtils.executeToStringList(context.getHiveRemoteConnection(),  (PreparedStatement) arg)
                 } else if (tag.contains("arrow_flight_sql") || context.useArrowFlightSql()) {
                     tupleResult = JdbcUtils.executeToStringList(context.getArrowFlightSqlConnection(), (PreparedStatement) arg)
-                }
-                else{
+                } else if (tag.contains("target_sql")) {
+                    tupleResult = JdbcUtils.executeToStringList(context.getTargetConnection(this), (PreparedStatement) arg)
+                } else {
                     tupleResult = JdbcUtils.executeToStringList(context.getConnection(),  (PreparedStatement) arg)
                 }
             } else {
                 if (tag.contains("hive_docker")) {
                     tupleResult = JdbcUtils.executeToStringList(context.getHiveDockerConnection(hivePrefix), (String) arg)
-                }else if (tag.contains("hive_remote")) {
+                } else if (tag.contains("hive_remote")) {
                     tupleResult = JdbcUtils.executeToStringList(context.getHiveRemoteConnection(), (String) arg)
                 } else if (tag.contains("arrow_flight_sql") || context.useArrowFlightSql()) {
                     tupleResult = JdbcUtils.executeToStringList(context.getArrowFlightSqlConnection(),
                             (String) ("USE ${context.dbName};" + (String) arg))
-                }
-                else{
+                } else if (tag.contains("target_sql")) {
+                    tupleResult = JdbcUtils.executeToStringList(context.getTargetConnection(this), (String) arg)
+                } else {
                     tupleResult = JdbcUtils.executeToStringList(context.getConnection(),  (String) arg)
                 }
             }
@@ -930,24 +932,26 @@ class Suite implements GroovyInterceptable {
             if (arg instanceof PreparedStatement) {
                 if (tag.contains("hive_docker")) {
                     tupleResult = JdbcUtils.executeToStringList(context.getHiveDockerConnection(hivePrefix),  (PreparedStatement) arg)
-                }else if (tag.contains("hive_remote")) {
+                } else if (tag.contains("hive_remote")) {
                     tupleResult = JdbcUtils.executeToStringList(context.getHiveRemoteConnection(),  (PreparedStatement) arg)
                 } else if (tag.contains("arrow_flight_sql") || context.useArrowFlightSql()) {
                     tupleResult = JdbcUtils.executeToStringList(context.getArrowFlightSqlConnection(), (PreparedStatement) arg)
-                }
-                else{
+                } else if (tag.contains("target_sql")) {
+                    tupleResult = JdbcUtils.executeToStringList(context.getTargetConnection(this), (PreparedStatement) arg)
+                } else {
                     tupleResult = JdbcUtils.executeToStringList(context.getConnection(),  (PreparedStatement) arg)
                 }
             } else {
                 if (tag.contains("hive_docker")) {
                     tupleResult = JdbcUtils.executeToStringList(context.getHiveDockerConnection(hivePrefix), (String) arg)
-                }else if (tag.contains("hive_remote")) {
+                } else if (tag.contains("hive_remote")) {
                     tupleResult = JdbcUtils.executeToStringList(context.getHiveRemoteConnection(), (String) arg)
                 } else if (tag.contains("arrow_flight_sql") || context.useArrowFlightSql()) {
                     tupleResult = JdbcUtils.executeToStringList(context.getArrowFlightSqlConnection(),
                             (String) ("USE ${context.dbName};" + (String) arg))
-                }
-                else{
+                } else if (tag.contains("target_sql")) {
+                    tupleResult = JdbcUtils.executeToStringList(context.getTargetConnection(this), (String) arg)
+                } else {
                     tupleResult = JdbcUtils.executeToStringList(context.getConnection(),  (String) arg)
                 }
             }

From 4ab7ac075a01c112f7cfa3a2f9a99a61139e0750 Mon Sep 17 00:00:00 2001
From: meiyi <myimeiyi@gmail.com>
Date: Mon, 29 Apr 2024 10:55:56 +0800
Subject: [PATCH 113/163] [fix](insert) Insert init plan error may forget to
 abrot txn (#34220)

---
 .../plans/commands/insert/InsertIntoTableCommand.java  | 10 ++++++++--
 .../main/java/org/apache/doris/qe/StmtExecutor.java    |  6 +-----
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertIntoTableCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertIntoTableCommand.java
index 7f326cf2212400..d288cd42b134a3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertIntoTableCommand.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertIntoTableCommand.java
@@ -132,7 +132,7 @@ public AbstractInsertExecutor initPlan(ConnectContext ctx, StmtExecutor executor
                     targetTableIf.getDatabase().getFullName() + "." + targetTableIf.getName());
         }
 
-        AbstractInsertExecutor insertExecutor;
+        AbstractInsertExecutor insertExecutor = null;
         // should lock target table until we begin transaction.
         targetTableIf.readLock();
         try {
@@ -183,8 +183,14 @@ public AbstractInsertExecutor initPlan(ConnectContext ctx, StmtExecutor executor
 
             insertExecutor.beginTransaction();
             insertExecutor.finalizeSink(planner.getFragments().get(0), sink, physicalSink);
-        } finally {
             targetTableIf.readUnlock();
+        } catch (Throwable e) {
+            targetTableIf.readUnlock();
+            // the abortTxn in onFail need to acquire table write lock
+            if (insertExecutor != null) {
+                insertExecutor.onFail(e);
+            }
+            throw e;
         }
 
         executor.setProfileType(ProfileType.LOAD);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
index 14aa5ad0f71277..ff21d8703ed68b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
@@ -586,11 +586,7 @@ public void execute(TUniqueId queryId) throws Exception {
                         LOG.warn("Analyze failed. {}", context.getQueryIdentifier(), e);
                         throw ((NereidsException) e).getException();
                     }
-                    // FIXME: Force fallback for:
-                    //  1. group commit because nereids does not support it (see the following `isGroupCommit` variable)
-                    //  Skip force fallback for:
-                    //  1. Transaction insert because nereids support `insert into select` while legacy does not
-                    //  2. Nereids support insert into external table while legacy does not
+                    // FIXME: Force fallback for group commit because nereids does not support it
                     boolean isInsertCommand = parsedStmt != null
                             && parsedStmt instanceof LogicalPlanAdapter
                             && ((LogicalPlanAdapter) parsedStmt).getLogicalPlan() instanceof InsertIntoTableCommand;

From 4cbadb73bd5d7f12e98d19519f2a1e1dc01afcb3 Mon Sep 17 00:00:00 2001
From: feiniaofeiafei <53502832+feiniaofeiafei@users.noreply.github.com>
Date: Mon, 29 Apr 2024 11:00:05 +0800
Subject: [PATCH 114/163] [Fix](nereids) ignore slot implements
 SlotNotFromChildren when check the slot from children in NormalizeAggregate
 (#34171)

---
 .../rules/analysis/NormalizeAggregate.java        |  3 ++-
 .../nereids_function_p0/scalar_function/Array.out | 15 +++++++++++++++
 .../scalar_function/Array.groovy                  |  2 ++
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java
index e9b3d32da6e2a9..e920036247c782 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java
@@ -27,6 +27,7 @@
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.NamedExpression;
 import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.trees.expressions.SlotNotFromChildren;
 import org.apache.doris.nereids.trees.expressions.SlotReference;
 import org.apache.doris.nereids.trees.expressions.SubqueryExpr;
 import org.apache.doris.nereids.trees.expressions.WindowExpression;
@@ -269,7 +270,7 @@ private LogicalPlan normalizeAgg(LogicalAggregate<Plan> aggregate, Optional<Logi
             }
             List<Slot> errorSlots = new ArrayList<>(slots.size());
             for (Slot slot : slots) {
-                if (!aggOutputExprIds.contains(slot.getExprId())) {
+                if (!aggOutputExprIds.contains(slot.getExprId()) && !(slot instanceof SlotNotFromChildren)) {
                     errorSlots.add(slot);
                 }
             }
diff --git a/regression-test/data/nereids_function_p0/scalar_function/Array.out b/regression-test/data/nereids_function_p0/scalar_function/Array.out
index 0ac1f221443205..c6c32bf9b15af3 100644
--- a/regression-test/data/nereids_function_p0/scalar_function/Array.out
+++ b/regression-test/data/nereids_function_p0/scalar_function/Array.out
@@ -11976,6 +11976,21 @@ true
 [1, 1]
 [1, 1]
 
+-- !sql_array_map_lambda_agg --
+[101]
+[102]
+[103]
+[104]
+[105]
+[106]
+[107]
+[108]
+[109]
+[110]
+[111]
+[112]
+[]
+
 -- !sql_array_exists_Double --
 \N
 [0]
diff --git a/regression-test/suites/nereids_function_p0/scalar_function/Array.groovy b/regression-test/suites/nereids_function_p0/scalar_function/Array.groovy
index 14a9453632a906..5b13ed4590bbfa 100644
--- a/regression-test/suites/nereids_function_p0/scalar_function/Array.groovy
+++ b/regression-test/suites/nereids_function_p0/scalar_function/Array.groovy
@@ -1005,6 +1005,8 @@ suite("nereids_scalar_fn_Array") {
     order_qt_sql_array_map_TinyInt_notnull "select array_map(x -> x is not null, katint) from fn_test_not_nullable"
     order_qt_sql_array_map_DecimalV3 "select array_map(x -> x is not null, kadcml) from fn_test"
     order_qt_sql_array_map_DecimalV3_notnull "select array_map(x -> x is not null, kadcml) from fn_test_not_nullable"
+    order_qt_sql_array_map_lambda_agg "select array_map(x->(x+100), collect_list(ktint)) from fn_test group by id;"
+
     // test array_exists
     order_qt_sql_array_exists_Double "select array_exists(x -> x > 1, kadbl) from fn_test"
     order_qt_sql_array_exists_Double_notnull "select array_exists(x -> x > 1, kadbl) from fn_test_not_nullable"

From 958fa5c763d9b96b03bac658202f65b954330b23 Mon Sep 17 00:00:00 2001
From: Xujian Duan <50550370+DarvenDuan@users.noreply.github.com>
Date: Mon, 29 Apr 2024 11:03:42 +0800
Subject: [PATCH 115/163] fix syntax error for CreateTableLikeStmt with
 partition properties (#34187)

fix syntax error for CreateTableLikeStmt with partition properties
---
 .../doris/catalog/ListPartitionInfo.java      |  3 +-
 .../doris/catalog/RangePartitionInfo.java     |  3 +-
 .../ddl_p0/test_create_table_like.groovy      | 54 +++++++++++++++++++
 3 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ListPartitionInfo.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/ListPartitionInfo.java
index d657dc7d9d439a..ce8d4303cbec2d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ListPartitionInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ListPartitionInfo.java
@@ -28,6 +28,7 @@
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.DdlException;
 import org.apache.doris.common.util.ListUtil;
+import org.apache.doris.common.util.PropertyAnalyzer;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
@@ -249,7 +250,7 @@ public String toSql(OlapTable table, List<Long> partitionId) {
 
             Optional.ofNullable(this.idToStoragePolicy.get(entry.getKey())).ifPresent(p -> {
                 if (!p.equals("")) {
-                    sb.append("PROPERTIES (\"STORAGE POLICY\" = \"");
+                    sb.append(" (\"" + PropertyAnalyzer.PROPERTIES_STORAGE_POLICY + "\" = \"");
                     sb.append(p).append("\")");
                 }
             });
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/RangePartitionInfo.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/RangePartitionInfo.java
index 9a6c5d353fd78a..caa77f660bd8fa 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/RangePartitionInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/RangePartitionInfo.java
@@ -25,6 +25,7 @@
 import org.apache.doris.analysis.SinglePartitionDesc;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.DdlException;
+import org.apache.doris.common.util.PropertyAnalyzer;
 import org.apache.doris.common.util.RangeUtils;
 
 import com.google.common.base.Preconditions;
@@ -299,7 +300,7 @@ public String toSql(OlapTable table, List<Long> partitionId) {
 
             Optional.ofNullable(this.idToStoragePolicy.get(entry.getKey())).ifPresent(p -> {
                 if (!p.equals("")) {
-                    sb.append("PROPERTIES (\"STORAGE POLICY\" = \"");
+                    sb.append(" (\"" + PropertyAnalyzer.PROPERTIES_STORAGE_POLICY + "\" = \"");
                     sb.append(p).append("\")");
                 }
             });
diff --git a/regression-test/suites/ddl_p0/test_create_table_like.groovy b/regression-test/suites/ddl_p0/test_create_table_like.groovy
index a1154c05a4316b..f4c1da02d93fa5 100644
--- a/regression-test/suites/ddl_p0/test_create_table_like.groovy
+++ b/regression-test/suites/ddl_p0/test_create_table_like.groovy
@@ -47,4 +47,58 @@ suite("test_create_table_like") {
         VALUES ("test1", 1, 123456789, 1234567891, 123456789, 1234567891, 123456789)"""
 
     qt_select_table_like """select * from decimal_test_like"""
+
+    def resource_name = "test_create_table_like_use_resource"
+    def policy_name = "test_create_table_like_use_policy"
+    def table1 = "create_table_partion_use_created_policy"
+    def table2 = "create_table_partion_like_use_created_policy"
+
+    sql """
+        CREATE RESOURCE IF NOT EXISTS $resource_name
+        PROPERTIES(
+            "type"="s3",
+            "AWS_REGION" = "bj",
+            "AWS_ENDPOINT" = "bj.s3.comaaaa",
+            "AWS_ROOT_PATH" = "path/to/rootaaaa",
+            "AWS_SECRET_KEY" = "aaaa",
+            "AWS_ACCESS_KEY" = "bbba",
+            "AWS_BUCKET" = "test-bucket",
+            "s3_validity_check" = "false"
+        );
+    """
+    sql """
+        CREATE STORAGE POLICY IF NOT EXISTS $policy_name
+        PROPERTIES(
+        "storage_resource" = "$resource_name",
+        "cooldown_ttl" = "10"
+        );
+    """
+
+    sql """DROP TABLE IF EXISTS $table1"""
+    sql """DROP TABLE IF EXISTS $table2"""
+
+    sql """
+        CREATE TABLE $table1
+        (
+            k1 DATE,
+            k2 INT,
+            V1 VARCHAR(2048) REPLACE
+        ) PARTITION BY RANGE (k1) (
+            PARTITION p1 VALUES LESS THAN ("2022-01-01") ("storage_policy" = "$policy_name"),
+            PARTITION p2 VALUES LESS THAN ("2022-02-01") ("storage_policy" = "$policy_name")
+        ) 
+        DISTRIBUTED BY HASH(k2) BUCKETS 1
+        PROPERTIES (
+            "replication_num"="1"
+        );
+    """
+
+    sql """
+        CREATE TABLE $table2 LIKE $table1
+    """
+
+    sql """DROP TABLE IF EXISTS $table1"""
+    sql """DROP TABLE IF EXISTS $table2"""
+    sql """DROP STORAGE POLICY $policy_name"""
+    sql """DROP RESOURCE $resource_name"""
 }
\ No newline at end of file

From 1e87bae1aa9cc39ed4117e573bbd8de7aa32430c Mon Sep 17 00:00:00 2001
From: morrySnow <101034200+morrySnow@users.noreply.github.com>
Date: Mon, 29 Apr 2024 11:12:03 +0800
Subject: [PATCH 116/163] [fix](Nereids) should derive stats asap to avoid npe
 (#34238)

we do derive stats job eager to avoid un derive stats due to merge group and optimize group
consider:
  we have two groups burned by order: G1 and G2
  then we have job by order derive G2, optimize group expression in G2,
    derive G1, optimize group expression in G1
  if G1 merged into G2, then we maybe generated job optimize group G2 before derive G1
  in this case, we will do get stats from G1's child before derive G1's child stats
  then we will meet NPE in CostModel.
---
 .../nereids/jobs/cascades/ApplyRuleJob.java    | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java
index 5560c369dd6f74..eb4f86bb0cabbb 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/cascades/ApplyRuleJob.java
@@ -34,6 +34,8 @@
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
 
+import com.google.common.collect.Lists;
+
 import java.util.HashMap;
 import java.util.List;
 
@@ -68,6 +70,7 @@ public final void execute() throws AnalysisException {
         }
         countJobExecutionTimesOfGroupExpressions(groupExpression);
 
+        List<DeriveStatsJob> deriveStatsJobs = Lists.newArrayList();
         GroupExpressionMatching groupExpressionMatching
                 = new GroupExpressionMatching(rule.getPattern(), groupExpression);
         for (Plan plan : groupExpressionMatching) {
@@ -87,7 +90,7 @@ public final void execute() throws AnalysisException {
                 if (newPlan instanceof LogicalPlan) {
                     pushJob(new OptimizeGroupExpressionJob(newGroupExpression, context));
                     if (!rule.getRuleType().equals(RuleType.LOGICAL_JOIN_COMMUTE)) {
-                        pushJob(new DeriveStatsJob(newGroupExpression, context));
+                        deriveStatsJobs.add(new DeriveStatsJob(newGroupExpression, context));
                     } else {
                         // The Join Commute rule preserves the operator's expression and children,
                         // thereby not altering the statistics. Hence, there is no need to derive statistics for it.
@@ -101,7 +104,7 @@ public final void execute() throws AnalysisException {
                         // logicalTopN ==> GlobalPhysicalTopN
                         //                   -> localPhysicalTopN
                         // These implementation rules integrate rules for plan shape transformation.
-                        pushJob(new DeriveStatsJob(newGroupExpression, context));
+                        deriveStatsJobs.add(new DeriveStatsJob(newGroupExpression, context));
                     } else {
                         newGroupExpression.setStatDerived(true);
                     }
@@ -111,6 +114,17 @@ public final void execute() throws AnalysisException {
                 APPLY_RULE_TRACER.log(TransformEvent.of(groupExpression, plan, newPlans, rule.getRuleType()),
                         rule::isRewrite);
             }
+            // we do derive stats job eager to avoid un derive stats due to merge group and optimize group
+            // consider:
+            //   we have two groups burned by order: G1 and G2
+            //   then we have job by order derive G2, optimize group expression in G2,
+            //     derive G1, optimize group expression in G1
+            //   if G1 merged into G2, then we maybe generated job optimize group G2 before derive G1
+            //   in this case, we will do get stats from G1's child before derive G1's child stats
+            //   then we will meet NPE in CostModel.
+            for (DeriveStatsJob deriveStatsJob : deriveStatsJobs) {
+                pushJob(deriveStatsJob);
+            }
         }
         groupExpression.setApplied(rule);
     }

From f7b7eff654124aa9438d3cd766313cfe7c04bc00 Mon Sep 17 00:00:00 2001
From: deardeng <565620795@qq.com>
Date: Mon, 29 Apr 2024 11:50:56 +0800
Subject: [PATCH 117/163] =?UTF-8?q?[improvement](publish)=20Publish=20task?=
 =?UTF-8?q?=20not=20wait=20when=20be's=20publish=20task=20a=E2=80=A6=20(#2?=
 =?UTF-8?q?8079)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../java/org/apache/doris/common/Config.java  |   8 ++
 .../apache/doris/master/ReportHandler.java    |  17 ++-
 .../java/org/apache/doris/system/Backend.java |  10 ++
 .../transaction/PublishVersionDaemon.java     | 133 ++++++++++++------
 .../insert/test_publish_slow_not_wait.out     |   8 ++
 .../insert/test_publish_slow_not_wait.groovy  |  70 +++++++++
 6 files changed, 199 insertions(+), 47 deletions(-)
 create mode 100644 regression-test/data/load/insert/test_publish_slow_not_wait.out
 create mode 100644 regression-test/suites/load/insert/test_publish_slow_not_wait.groovy

diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index 384a858c3fa4d3..0428cae127ece7 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -980,6 +980,14 @@ public class Config extends ConfigBase {
     @ConfField(mutable = true, masterOnly = true)
     public static int tablet_further_repair_max_times = 5;
 
+    /**
+     * publish version queue's size in be, report it to fe,
+     * if publish task in be exceed direct_publish_limit_number,
+     * fe will direct publish task
+     */
+    @ConfField(mutable = true, masterOnly = true)
+    public static int publish_version_queued_limit_number = 1000;
+
     /**
      * the default slot number per path for hdd in tablet scheduler
      * TODO(cmy): remove this config and dynamically adjust it by clone task statistic
diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java
index a2427e88d7bb7f..c0ad4183d45cb8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java
@@ -551,6 +551,13 @@ private static void taskReport(long backendId, Map<TTaskType, Set<Long>> running
             }
         }
 
+        Backend be = Env.getCurrentSystemInfo().getBackend(backendId);
+        int publishTaskSize = runningTasks.get(TTaskType.PUBLISH_VERSION) != null
+                ? runningTasks.get(TTaskType.PUBLISH_VERSION).size() : 0;
+        if (be != null) {
+            be.setPublishTaskLastTimeAccumulated((long) publishTaskSize);
+        }
+
         List<AgentTask> diffTasks = AgentTaskQueue.getDiffTasks(backendId, runningTasks);
 
         AgentBatchTask batchTask = new AgentBatchTask();
@@ -580,8 +587,14 @@ private static void taskReport(long backendId, Map<TTaskType, Set<Long>> running
         if (batchTask.getTaskNum() > 0) {
             AgentTaskExecutor.submit(batchTask);
         }
-        LOG.info("finished to handle task report from backend {}, diff task num: {}. cost: {} ms",
-                backendId, batchTask.getTaskNum(), (System.currentTimeMillis() - start));
+
+        LOG.info("finished to handle task report from backend {}-{}, "
+                + "diff task num: {}, publishTaskSize: {}, runningTasks: {}, cost: {} ms.",
+                backendId, be != null ? be.getHost() : "",
+                batchTask.getTaskNum(), publishTaskSize, runningTasks.entrySet().stream()
+                .filter(entry -> !entry.getValue().isEmpty())
+                .map(entry -> entry.getKey() + "=" + entry.getValue().size()).collect(Collectors.toList()),
+                (System.currentTimeMillis() - start));
     }
 
     private static void diskReport(long backendId, Map<String, TDisk> backendDisks) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java b/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java
index 4fe258cba21ccc..0e3dca85ddf0d7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java
@@ -93,6 +93,8 @@ public class Backend implements Writable {
     @SerializedName("disksRef")
     private volatile ImmutableMap<String, DiskInfo> disksRef;
 
+    private Long lastPublishTaskAccumulatedNum = 0L;
+
     private String heartbeatErrMsg = "";
 
     // This is used for the first time we init pathHashToDishInfo in SystemInfoService.
@@ -931,4 +933,12 @@ public String getTagMapString() {
         return "{" + new PrintableMap<>(tagMap, ":", true, false).toString() + "}";
     }
 
+    public Long getPublishTaskLastTimeAccumulated() {
+        return this.lastPublishTaskAccumulatedNum;
+    }
+
+    public void setPublishTaskLastTimeAccumulated(Long accumulatedNum) {
+        this.lastPublishTaskAccumulatedNum = accumulatedNum;
+    }
+
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/transaction/PublishVersionDaemon.java b/fe/fe-core/src/main/java/org/apache/doris/transaction/PublishVersionDaemon.java
index d7bf0d18b12d5f..8a1281620e6d4b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/transaction/PublishVersionDaemon.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/transaction/PublishVersionDaemon.java
@@ -27,6 +27,7 @@
 import org.apache.doris.common.util.DebugPointUtil;
 import org.apache.doris.common.util.MasterDaemon;
 import org.apache.doris.metric.MetricRepo;
+import org.apache.doris.system.Backend;
 import org.apache.doris.system.SystemInfoService;
 import org.apache.doris.task.AgentBatchTask;
 import org.apache.doris.task.AgentTaskExecutor;
@@ -48,8 +49,8 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.stream.Collectors;
-import java.util.stream.Stream;
 
 public class PublishVersionDaemon extends MasterDaemon {
 
@@ -90,6 +91,13 @@ private void publishVersion(Map<Long, Long> partitionVisibleVersions, Map<Long,
             LOG.warn("some transaction state need to publish, but no backend exists");
             return;
         }
+        traverseReadyTxnAndDispatchPublishVersionTask(readyTransactionStates, allBackends);
+        tryFinishTxn(readyTransactionStates, infoService, globalTransactionMgr,
+                partitionVisibleVersions, backendPartitions);
+    }
+
+    private void traverseReadyTxnAndDispatchPublishVersionTask(List<TransactionState> readyTransactionStates,
+                                                               List<Long> allBackends) {
         long createPublishVersionTaskTime = System.currentTimeMillis();
         // every backend-transaction identified a single task
         AgentBatchTask batchTask = new AgentBatchTask();
@@ -103,7 +111,6 @@ private void publishVersion(Map<Long, Long> partitionVisibleVersions, Map<Long,
             List<PartitionCommitInfo> partitionCommitInfos = new ArrayList<>();
             for (TableCommitInfo tableCommitInfo : transactionState.getIdToTableCommitInfos().values()) {
                 partitionCommitInfos.addAll(tableCommitInfo.getIdToPartitionCommitInfo().values());
-
                 try {
                     beIdToBaseTabletIds.putAll(getBaseTabletIdsForEachBe(transactionState, tableCommitInfo));
                 } catch (MetaNotFoundException e) {
@@ -123,59 +130,95 @@ private void publishVersion(Map<Long, Long> partitionVisibleVersions, Map<Long,
                 }
             }
 
-            Set<Long> publishBackends = transactionState.getPublishVersionTasks().keySet();
-            // public version tasks are not persisted in catalog, so publishBackends may be empty.
-            // so we have to try publish to all backends;
-            if (publishBackends.isEmpty()) {
-                // could not just add to it, should new a new object, or the back map will destroyed
-                publishBackends = Sets.newHashSet();
-                publishBackends.addAll(allBackends);
-            }
-
-            for (long backendId : publishBackends) {
-                PublishVersionTask task = new PublishVersionTask(backendId,
-                        transactionState.getTransactionId(),
-                        transactionState.getDbId(),
-                        partitionVersionInfos,
-                        createPublishVersionTaskTime);
-                task.setBaseTabletsIds(beIdToBaseTabletIds.getOrDefault(backendId, Collections.emptySet()));
-                // add to AgentTaskQueue for handling finish report.
-                // not check return value, because the add will success
-                AgentTaskQueue.addTask(task);
-                batchTask.addTask(task);
-                transactionState.addPublishVersionTask(backendId, task);
-            }
-            transactionState.setSendedTask();
-            LOG.info("send publish tasks for transaction: {}, db: {}", transactionState.getTransactionId(),
-                    transactionState.getDbId());
+            genPublishTask(allBackends, transactionState, partitionVersionInfos,
+                    createPublishVersionTaskTime, beIdToBaseTabletIds, batchTask);
         }
         if (!batchTask.getAllTasks().isEmpty()) {
             AgentTaskExecutor.submit(batchTask);
         }
+    }
+
+    private static void genPublishTask(List<Long> allBackends, TransactionState transactionState,
+                                       List<TPartitionVersionInfo> partitionVersionInfos,
+                                       long createPublishVersionTaskTime,
+                                       Map<Long, Set<Long>> beIdToBaseTabletIds, AgentBatchTask batchTask) {
+        Set<Long> publishBackends = transactionState.getPublishVersionTasks().keySet();
+        // public version tasks are not persisted in catalog, so publishBackends may be empty.
+        // so we have to try publish to all backends;
+        if (publishBackends.isEmpty()) {
+            // could not just add to it, should new a new object, or the back map will destroyed
+            publishBackends = Sets.newHashSet();
+            publishBackends.addAll(allBackends);
+        }
+
+        for (long backendId : publishBackends) {
+            PublishVersionTask task = new PublishVersionTask(backendId,
+                    transactionState.getTransactionId(),
+                    transactionState.getDbId(),
+                    partitionVersionInfos,
+                    createPublishVersionTaskTime);
+            task.setBaseTabletsIds(beIdToBaseTabletIds.getOrDefault(backendId, Collections.emptySet()));
+            // add to AgentTaskQueue for handling finish report.
+            // not check return value, because the add will success
+            AgentTaskQueue.addTask(task);
+            batchTask.addTask(task);
+            transactionState.addPublishVersionTask(backendId, task);
+        }
+        transactionState.setSendedTask();
+        LOG.info("send publish tasks for transaction: {}, db: {}", transactionState.getTransactionId(),
+                transactionState.getDbId());
+    }
 
+    private static void tryFinishTxn(List<TransactionState> readyTransactionStates,
+                                     SystemInfoService infoService, GlobalTransactionMgrIface globalTransactionMgr,
+                                     Map<Long, Long> partitionVisibleVersions, Map<Long, Set<Long>> backendPartitions) {
         Map<Long, Long> tableIdToTotalDeltaNumRows = Maps.newHashMap();
         // try to finish the transaction, if failed just retry in next loop
         for (TransactionState transactionState : readyTransactionStates) {
-            Stream<PublishVersionTask> publishVersionTaskStream = transactionState
-                    .getPublishVersionTasks()
-                    .values()
-                    .stream()
-                    .peek(task -> {
-                        if (task.isFinished() && CollectionUtils.isEmpty(task.getErrorTablets())) {
-                            Map<Long, Long> tableIdToDeltaNumRows =
-                                    task.getTableIdToDeltaNumRows();
-                            tableIdToDeltaNumRows.forEach((tableId, numRows) -> {
-                                tableIdToTotalDeltaNumRows
-                                        .computeIfPresent(tableId, (id, orgNumRows) -> orgNumRows + numRows);
-                                tableIdToTotalDeltaNumRows.putIfAbsent(tableId, numRows);
-                            });
-                        }
-                    });
-            boolean hasBackendAliveAndUnfinishedTask = publishVersionTaskStream
-                    .anyMatch(task -> !task.isFinished() && infoService.checkBackendAlive(task.getBackendId()));
+            AtomicBoolean hasBackendAliveAndUnfinishedTask = new AtomicBoolean(false);
+            Set<Long> notFinishTaskBe = Sets.newHashSet();
+            transactionState.getPublishVersionTasks().forEach((beId, task) -> {
+                if (task.isFinished()) {
+                    if (CollectionUtils.isEmpty(task.getErrorTablets())) {
+                        Map<Long, Long> tableIdToDeltaNumRows = task.getTableIdToDeltaNumRows();
+                        tableIdToDeltaNumRows.forEach((tableId, numRows) -> {
+                            tableIdToTotalDeltaNumRows
+                                .computeIfPresent(tableId, (id, orgNumRows) -> orgNumRows + numRows);
+                            tableIdToTotalDeltaNumRows.putIfAbsent(tableId, numRows);
+                        });
+                    }
+                } else {
+                    if (infoService.checkBackendAlive(task.getBackendId())) {
+                        hasBackendAliveAndUnfinishedTask.set(true);
+                    }
+                    notFinishTaskBe.add(beId);
+                }
+            });
+
             transactionState.setTableIdToTotalNumDeltaRows(tableIdToTotalDeltaNumRows);
+            if (LOG.isDebugEnabled()) {
+                LOG.debug("notFinishTaskBe {}, trans {}", notFinishTaskBe, transactionState);
+            }
+            boolean isPublishSlow = false;
+            long totalNum = transactionState.getPublishVersionTasks().keySet().size();
+            boolean allUnFinishTaskIsSlow = notFinishTaskBe.stream().allMatch(beId -> {
+                Backend be = infoService.getBackend(beId);
+                if (be == null) {
+                    return false;
+                }
+                return be.getPublishTaskLastTimeAccumulated() > Config.publish_version_queued_limit_number;
+            });
+            if (totalNum - notFinishTaskBe.size() > totalNum / 2 && allUnFinishTaskIsSlow) {
+                if (LOG.isDebugEnabled()) {
+                    LOG.debug(" finishNum {}, txn publish tasks {}, notFinishTaskBe {}",
+                            totalNum - notFinishTaskBe.size(), transactionState.getPublishVersionTasks().keySet(),
+                            notFinishTaskBe);
+                }
+                isPublishSlow = true;
+            }
 
-            boolean shouldFinishTxn = !hasBackendAliveAndUnfinishedTask || transactionState.isPublishTimeout()
+            boolean shouldFinishTxn = !hasBackendAliveAndUnfinishedTask.get() || transactionState.isPublishTimeout()
+                    || isPublishSlow
                     || DebugPointUtil.isEnable("PublishVersionDaemon.not_wait_unfinished_tasks");
             if (shouldFinishTxn) {
                 try {
diff --git a/regression-test/data/load/insert/test_publish_slow_not_wait.out b/regression-test/data/load/insert/test_publish_slow_not_wait.out
new file mode 100644
index 00000000000000..0925fc2c835064
--- /dev/null
+++ b/regression-test/data/load/insert/test_publish_slow_not_wait.out
@@ -0,0 +1,8 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql --
+
+-- !sql --
+1	10
+2	20
+3	30
+
diff --git a/regression-test/suites/load/insert/test_publish_slow_not_wait.groovy b/regression-test/suites/load/insert/test_publish_slow_not_wait.groovy
new file mode 100644
index 00000000000000..8d3cddc42187b8
--- /dev/null
+++ b/regression-test/suites/load/insert/test_publish_slow_not_wait.groovy
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.apache.doris.regression.suite.ClusterOptions
+
+suite('test_publish_slow_not_wait') {
+    def options = new ClusterOptions()
+    options.beNum = 3
+    options.feConfigs.add('disable_tablet_scheduler=true')
+    options.feConfigs.add('publish_version_queued_limit_number=2')
+    options.beConfigs.add('report_task_interval_seconds=1')
+    options.enableDebugPoints()
+    docker(options) {
+        def tbl = 'test_publish_slow_not_wait_tbl'
+
+        sql """ DROP TABLE IF EXISTS ${tbl} """
+
+        sql """
+         CREATE TABLE ${tbl} (
+           `k1` int(11) NULL,
+           `k2` int(11) NULL
+         )
+         DUPLICATE KEY(`k1`, `k2`)
+         COMMENT 'OLAP'
+         DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+         PROPERTIES (
+           "replication_num"="3"
+         );
+         """
+        def be1 = cluster.getBeByIndex(1)
+        // be1 dp publish duration 10mins
+        be1.enableDebugPoint('EnginePublishVersionTask.finish.wait', ['duration': 10*60*1000])
+
+        sql 'SET GLOBAL insert_visible_timeout_ms = 10000'
+        def begin = System.currentTimeMillis();
+        def result
+        for (def i = 1; i <= 2; i++) {
+            result = sql """INSERT INTO ${tbl} (k1, k2) VALUES (${i}, ${i}*10)"""
+        }
+        def cost = System.currentTimeMillis() - begin;
+        log.info("insert1 time cost : {}, result: {}", cost, result)
+        // be1's replica publish slow, so wait, and txn's status: COMMITTED
+        assertTrue(cost > 2 * 10000 && cost < 100000)
+        qt_sql """select * from ${tbl}"""
+
+        begin = System.currentTimeMillis();
+        result = sql """INSERT INTO ${tbl} (k1, k2) VALUES (3, 30)""" 
+        cost = System.currentTimeMillis() - begin;
+        log.info("insert2 time cost: {}", cost)
+        assertTrue(cost < 10000)
+
+        qt_sql """select * from ${tbl}"""
+
+        be1.clearDebugPoints()
+    }
+}

From 2a05a52f979e373c4b9b59d24c85d34d156ef95c Mon Sep 17 00:00:00 2001
From: Qi Chen <kaka11.chen@gmail.com>
Date: Mon, 29 Apr 2024 11:52:36 +0800
Subject: [PATCH 118/163] [Feature](hive-writer) Implements s3 file committer.
 (#33937)

Issue Number: #31442

[Feature] (hive-writer) Implements s3 file committer.

S3 committer will start multipart uploading all files on BE side, and then complete multipart upload these files on FE side. If you do not complete multi parts of a file, the file will not be visible. So in this way, the atomicity of a single file can be guaranteed. But it still cannot guarantee the atomicity of multiple files. Because hive committers have best-effort semantics, this shortens the inconsistent time window.

## ChangeList:
- Add `used_by_s3_committer` in `FileWriterOptions` on BE side to start multi-part uploading files, then complete multi-part uploading files on FE side.
- `cosn://`use s3 client on FE side, because it need to complete multi-part uploading files on FE side.
-  Add `Status directoryExists(String dir)` and `Status deleteDirectory` in `FileSystem`.
---
 be/src/io/fs/file_writer.h                    |   8 +
 be/src/io/fs/s3_file_writer.cpp               | 130 +++++----
 be/src/io/fs/s3_file_writer.h                 |  18 ++
 .../sink/writer/vhive_partition_writer.cpp    |  24 +-
 .../vec/sink/writer/vhive_partition_writer.h  |   1 +
 be/src/vec/sink/writer/vhive_table_writer.cpp |  31 ++-
 .../doris/common/profile/SummaryProfile.java  |  11 +
 .../doris/common/util/LocationPath.java       |  13 +-
 .../org/apache/doris/common/util/S3URI.java   |   4 +-
 .../datasource/hive/HMSExternalCatalog.java   |  14 +-
 .../doris/datasource/hive/HMSTransaction.java | 263 +++++++++++++++---
 .../datasource/hive/HiveMetaStoreCache.java   |  18 +-
 .../datasource/hive/HiveMetadataOps.java      |  17 +-
 .../datasource/hudi/source/HudiScanNode.java  |   2 +-
 .../iceberg/source/IcebergScanNode.java       |   4 +-
 .../paimon/source/PaimonScanNode.java         |   2 +-
 .../java/org/apache/doris/fs/FileSystem.java  |   8 +
 .../org/apache/doris/fs/FileSystemCache.java  |  17 +-
 .../apache/doris/fs/FileSystemFactory.java    |   5 +-
 .../apache/doris/fs/FileSystemProvider.java   |  24 ++
 .../doris/fs/FileSystemProviderImpl.java      |  43 +++
 .../apache/doris/fs/LocalDfsFileSystem.java   |   5 +
 .../apache/doris/fs/remote/ObjFileSystem.java |  15 +
 .../apache/doris/fs/remote/S3FileSystem.java  |   4 -
 .../doris/fs/remote/SwitchingFileSystem.java  | 132 +++++++++
 .../apache/doris/planner/HiveTableSink.java   |  37 ++-
 .../transaction/HiveTransactionManager.java   |  13 +-
 .../TransactionManagerFactory.java            |   8 +-
 .../doris/common/util/LocationPathTest.java   |  30 +-
 .../doris/datasource/hive/HmsCommitTest.java  |  32 ++-
 gensrc/thrift/DataSinks.thrift                |  10 +
 .../hive/test_hive_write_insert_s3.out        |  61 ++++
 .../hive/test_hive_write_insert_s3.groovy     | 166 +++++++++++
 33 files changed, 980 insertions(+), 190 deletions(-)
 create mode 100644 fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemProvider.java
 create mode 100644 fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemProviderImpl.java
 create mode 100644 fe/fe-core/src/main/java/org/apache/doris/fs/remote/SwitchingFileSystem.java
 create mode 100644 regression-test/data/external_table_p2/hive/test_hive_write_insert_s3.out
 create mode 100644 regression-test/suites/external_table_p2/hive/test_hive_write_insert_s3.groovy

diff --git a/be/src/io/fs/file_writer.h b/be/src/io/fs/file_writer.h
index 5d22dca60551c8..ad13f7f0095f77 100644
--- a/be/src/io/fs/file_writer.h
+++ b/be/src/io/fs/file_writer.h
@@ -32,6 +32,14 @@ struct FileCacheAllocatorBuilder;
 
 // Only affects remote file writers
 struct FileWriterOptions {
+    // S3 committer will start multipart uploading all files on BE side,
+    // and then complete multipart upload these files on FE side.
+    // If you do not complete multi parts of a file, the file will not be visible.
+    // So in this way, the atomicity of a single file can be guaranteed. But it still cannot
+    // guarantee the atomicity of multiple files.
+    // Because hive committers have best-effort semantics,
+    // this shortens the inconsistent time window.
+    bool used_by_s3_committer = false;
     bool write_file_cache = false;
     bool is_cold_data = false;
     bool sync_file_data = true;         // Whether flush data into storage system
diff --git a/be/src/io/fs/s3_file_writer.cpp b/be/src/io/fs/s3_file_writer.cpp
index 69202bd22fe98f..22bf6d01bc0fbd 100644
--- a/be/src/io/fs/s3_file_writer.cpp
+++ b/be/src/io/fs/s3_file_writer.cpp
@@ -87,7 +87,8 @@ S3FileWriter::S3FileWriter(std::shared_ptr<Aws::S3::S3Client> client, std::strin
         : _path(fmt::format("s3://{}/{}", bucket, key)),
           _bucket(std::move(bucket)),
           _key(std::move(key)),
-          _client(std::move(client)) {
+          _client(std::move(client)),
+          _used_by_s3_committer(opts ? opts->used_by_s3_committer : false) {
     s3_file_writer_total << 1;
     s3_file_being_written << 1;
     Aws::Http::SetCompliantRfc3986Encoding(true);
@@ -195,10 +196,7 @@ Status S3FileWriter::close() {
     Defer defer {[this] { _closed = true; }};
 
     if (_upload_id.empty() && _pending_buf) {
-        // It might be one file less than 5MB, and call close without finalize
-        auto* buf = dynamic_cast<UploadFileBuffer*>(_pending_buf.get());
-        DCHECK(buf != nullptr);
-        buf->set_upload_to_remote([this](UploadFileBuffer& b) { _put_object(b); });
+        RETURN_IF_ERROR(_set_upload_to_remote_less_than_buffer_size());
     }
 
     if (_bytes_appended == 0) {
@@ -225,6 +223,13 @@ Status S3FileWriter::close() {
         RETURN_IF_ERROR(builder.build(&_pending_buf));
         auto* buf = dynamic_cast<UploadFileBuffer*>(_pending_buf.get());
         DCHECK(buf != nullptr);
+        if (_used_by_s3_committer) {
+            buf->set_upload_to_remote([part_num = _cur_part_num, this](UploadFileBuffer& buf) {
+                _upload_one_part(part_num, buf);
+            });
+            DCHECK(_cur_part_num == 1);
+            RETURN_IF_ERROR(_create_multi_upload_request());
+        }
     }
 
     if (_pending_buf != nullptr) {
@@ -392,56 +397,61 @@ Status S3FileWriter::_complete() {
         _wait_until_finish("PutObject");
         return _st;
     }
-    CompleteMultipartUploadRequest complete_request;
-    complete_request.WithBucket(_bucket).WithKey(_key).WithUploadId(_upload_id);
-
+    // Wait multipart load and finish.
     _wait_until_finish("Complete");
     DBUG_EXECUTE_IF("s3_file_writer::_complete:1", { _cur_part_num++; });
-    if (_failed || _completed_parts.size() != _cur_part_num) {
-        _st = Status::InternalError(
-                "error status {}, complete parts {}, cur part num {}, whole parts {}", _st,
-                _completed_parts.size(), _cur_part_num, _dump_completed_part());
-        LOG(WARNING) << _st;
-        return _st;
-    }
-    // make sure _completed_parts are ascending order
-    std::sort(_completed_parts.begin(), _completed_parts.end(),
-              [](auto& p1, auto& p2) { return p1->GetPartNumber() < p2->GetPartNumber(); });
-    DBUG_EXECUTE_IF("s3_file_writer::_complete:2",
-                    { _completed_parts.back()->SetPartNumber(10 * _completed_parts.size()); });
-    CompletedMultipartUpload completed_upload;
-    for (size_t i = 0; i < _completed_parts.size(); i++) {
-        if (_completed_parts[i]->GetPartNumber() != i + 1) [[unlikely]] {
-            auto st = Status::InternalError(
-                    "error status {}, part num not continous, expected num {}, actual num {}, "
-                    "whole parts {}",
-                    _st, i + 1, _completed_parts[i]->GetPartNumber(), _dump_completed_part());
-            LOG(WARNING) << st;
-            _st = st;
-            return st;
+    if (!_used_by_s3_committer) { // S3 committer will complete multipart upload file on FE side.
+        CompleteMultipartUploadRequest complete_request;
+        complete_request.WithBucket(_bucket).WithKey(_key).WithUploadId(_upload_id);
+
+        if (_failed || _completed_parts.size() != _cur_part_num) {
+            _st = Status::InternalError(
+                    "error status {}, complete parts {}, cur part num {}, whole parts {}", _st,
+                    _completed_parts.size(), _cur_part_num, _dump_completed_part());
+            LOG(WARNING) << _st;
+            return _st;
+        }
+        // make sure _completed_parts are ascending order
+        std::sort(_completed_parts.begin(), _completed_parts.end(),
+                  [](auto& p1, auto& p2) { return p1->GetPartNumber() < p2->GetPartNumber(); });
+        DBUG_EXECUTE_IF("s3_file_writer::_complete:2",
+                        { _completed_parts.back()->SetPartNumber(10 * _completed_parts.size()); });
+        CompletedMultipartUpload completed_upload;
+        for (size_t i = 0; i < _completed_parts.size(); i++) {
+            if (_completed_parts[i]->GetPartNumber() != i + 1) [[unlikely]] {
+                auto st = Status::InternalError(
+                        "error status {}, part num not continous, expected num {}, actual num {}, "
+                        "whole parts {}",
+                        _st, i + 1, _completed_parts[i]->GetPartNumber(), _dump_completed_part());
+                LOG(WARNING) << st;
+                _st = st;
+                return st;
+            }
+            completed_upload.AddParts(*_completed_parts[i]);
         }
-        completed_upload.AddParts(*_completed_parts[i]);
-    }
 
-    complete_request.WithMultipartUpload(completed_upload);
+        complete_request.WithMultipartUpload(completed_upload);
 
-    DBUG_EXECUTE_IF("s3_file_writer::_complete:3", {
-        auto s = Status::IOError(
-                "failed to create complete multi part upload (bucket={}, key={}): injected error",
-                _bucket, _path.native());
-        LOG_WARNING(s.to_string());
-        return s;
-    });
-    SCOPED_BVAR_LATENCY(s3_bvar::s3_multi_part_upload_latency);
-    auto complete_outcome = _client->CompleteMultipartUpload(complete_request);
+        DBUG_EXECUTE_IF("s3_file_writer::_complete:3", {
+            auto s = Status::IOError(
+                    "failed to create complete multi part upload (bucket={}, key={}): injected "
+                    "error",
+                    _bucket, _path.native());
+            LOG_WARNING(s.to_string());
+            return s;
+        });
+        SCOPED_BVAR_LATENCY(s3_bvar::s3_multi_part_upload_latency);
+        auto complete_outcome = _client->CompleteMultipartUpload(complete_request);
 
-    if (!complete_outcome.IsSuccess()) {
-        _st = s3fs_error(
-                complete_outcome.GetError(),
-                fmt::format("failed to complete multi part upload {}, upload_id={}, whole parts={}",
+        if (!complete_outcome.IsSuccess()) {
+            _st = s3fs_error(
+                    complete_outcome.GetError(),
+                    fmt::format(
+                            "failed to complete multi part upload {}, upload_id={}, whole parts={}",
                             _path.native(), _upload_id, _dump_completed_part()));
-        LOG(WARNING) << _st;
-        return _st;
+            LOG(WARNING) << _st;
+            return _st;
+        }
     }
     s3_file_created_total << 1;
     return Status::OK();
@@ -457,12 +467,8 @@ Status S3FileWriter::finalize() {
     // submit pending buf if it's not nullptr
     // it's the last buf, we can submit it right now
     if (_pending_buf != nullptr) {
-        // if we only need to upload one file less than 5MB, we can just
-        // call PutObject to reduce the network IO
         if (_upload_id.empty()) {
-            auto* buf = dynamic_cast<UploadFileBuffer*>(_pending_buf.get());
-            DCHECK(buf != nullptr);
-            buf->set_upload_to_remote([this](UploadFileBuffer& b) { _put_object(b); });
+            RETURN_IF_ERROR(_set_upload_to_remote_less_than_buffer_size());
         }
         _countdown_event.add_count();
         RETURN_IF_ERROR(_pending_buf->submit(std::move(_pending_buf)));
@@ -472,6 +478,24 @@ Status S3FileWriter::finalize() {
     return _st;
 }
 
+Status S3FileWriter::_set_upload_to_remote_less_than_buffer_size() {
+    auto* buf = dynamic_cast<UploadFileBuffer*>(_pending_buf.get());
+    DCHECK(buf != nullptr);
+    if (_used_by_s3_committer) {
+        // If used_by_s3_committer, we always use multi-parts uploading.
+        buf->set_upload_to_remote([part_num = _cur_part_num, this](UploadFileBuffer& buf) {
+            _upload_one_part(part_num, buf);
+        });
+        DCHECK(_cur_part_num == 1);
+        RETURN_IF_ERROR(_create_multi_upload_request());
+    } else {
+        // if we only need to upload one file less than 5MB, we can just
+        // call PutObject to reduce the network IO
+        buf->set_upload_to_remote([this](UploadFileBuffer& b) { _put_object(b); });
+    }
+    return Status::OK();
+}
+
 void S3FileWriter::_put_object(UploadFileBuffer& buf) {
     DCHECK(!closed());
     Aws::S3::Model::PutObjectRequest request;
diff --git a/be/src/io/fs/s3_file_writer.h b/be/src/io/fs/s3_file_writer.h
index 8f27b202369444..9f72b02da600dd 100644
--- a/be/src/io/fs/s3_file_writer.h
+++ b/be/src/io/fs/s3_file_writer.h
@@ -62,12 +62,21 @@ class S3FileWriter final : public FileWriter {
         return _cache_builder == nullptr ? nullptr : _cache_builder.get();
     }
 
+    const std::vector<std::unique_ptr<Aws::S3::Model::CompletedPart>>& completed_parts() const {
+        return _completed_parts;
+    }
+
+    const std::string& key() const { return _key; }
+    const std::string& bucket() const { return _bucket; }
+    const std::string& upload_id() const { return _upload_id; }
+
 private:
     Status _abort();
     [[nodiscard]] std::string _dump_completed_part() const;
     void _wait_until_finish(std::string_view task_name);
     Status _complete();
     Status _create_multi_upload_request();
+    Status _set_upload_to_remote_less_than_buffer_size();
     void _put_object(UploadFileBuffer& buf);
     void _upload_one_part(int64_t part_num, UploadFileBuffer& buf);
 
@@ -95,6 +104,15 @@ class S3FileWriter final : public FileWriter {
     std::shared_ptr<FileBuffer> _pending_buf;
     std::unique_ptr<FileCacheAllocatorBuilder>
             _cache_builder; // nullptr if disable write file cache
+
+    // S3 committer will start multipart uploading all files on BE side,
+    // and then complete multipart upload these files on FE side.
+    // If you do not complete multi parts of a file, the file will not be visible.
+    // So in this way, the atomicity of a single file can be guaranteed. But it still cannot
+    // guarantee the atomicity of multiple files.
+    // Because hive committers have best-effort semantics,
+    // this shortens the inconsistent time window.
+    bool _used_by_s3_committer;
 };
 
 } // namespace io
diff --git a/be/src/vec/sink/writer/vhive_partition_writer.cpp b/be/src/vec/sink/writer/vhive_partition_writer.cpp
index 47dc957dbe88dc..1e1faef7e8070e 100644
--- a/be/src/vec/sink/writer/vhive_partition_writer.cpp
+++ b/be/src/vec/sink/writer/vhive_partition_writer.cpp
@@ -17,7 +17,10 @@
 
 #include "vhive_partition_writer.h"
 
+#include <aws/s3/model/CompletedPart.h>
+
 #include "io/file_factory.h"
+#include "io/fs/s3_file_writer.h"
 #include "runtime/runtime_state.h"
 #include "vec/columns/column_map.h"
 #include "vec/core/materialize_block.h"
@@ -55,7 +58,8 @@ Status VHivePartitionWriter::open(RuntimeState* state, RuntimeProfile* profile)
     io::FileDescription file_description = {
             .path = fmt::format("{}/{}", _write_info.write_path, _get_target_file_name())};
     _fs = DORIS_TRY(FileFactory::create_fs(fs_properties, file_description));
-    RETURN_IF_ERROR(_fs->create_file(file_description.path, &_file_writer));
+    io::FileWriterOptions file_writer_options = {.used_by_s3_committer = true};
+    RETURN_IF_ERROR(_fs->create_file(file_description.path, &_file_writer, &file_writer_options));
 
     std::vector<std::string> column_names;
     column_names.reserve(_columns.size());
@@ -189,12 +193,28 @@ THivePartitionUpdate VHivePartitionWriter::_build_partition_update() {
     hive_partition_update.__set_name(_partition_name);
     hive_partition_update.__set_update_mode(_update_mode);
     THiveLocationParams location;
-    location.__set_write_path(_write_info.write_path);
+    location.__set_write_path(_write_info.original_write_path);
     location.__set_target_path(_write_info.target_path);
     hive_partition_update.__set_location(location);
     hive_partition_update.__set_file_names({_get_target_file_name()});
     hive_partition_update.__set_row_count(_row_count);
     hive_partition_update.__set_file_size(_input_size_in_bytes);
+
+    if (_write_info.file_type == TFileType::FILE_S3) {
+        doris::io::S3FileWriter* s3_mpu_file_writer =
+                dynamic_cast<doris::io::S3FileWriter*>(_file_writer.get());
+        TS3MPUPendingUpload s3_mpu_pending_upload;
+        s3_mpu_pending_upload.__set_bucket(s3_mpu_file_writer->bucket());
+        s3_mpu_pending_upload.__set_key(s3_mpu_file_writer->key());
+        s3_mpu_pending_upload.__set_upload_id(s3_mpu_file_writer->upload_id());
+
+        std::map<int, std::string> etags;
+        for (auto& completed_part : s3_mpu_file_writer->completed_parts()) {
+            etags.insert({completed_part->GetPartNumber(), completed_part->GetETag()});
+        }
+        s3_mpu_pending_upload.__set_etags(etags);
+        hive_partition_update.__set_s3_mpu_pending_uploads({s3_mpu_pending_upload});
+    }
     return hive_partition_update;
 }
 
diff --git a/be/src/vec/sink/writer/vhive_partition_writer.h b/be/src/vec/sink/writer/vhive_partition_writer.h
index 117806f7b055ac..8c63d855a02138 100644
--- a/be/src/vec/sink/writer/vhive_partition_writer.h
+++ b/be/src/vec/sink/writer/vhive_partition_writer.h
@@ -43,6 +43,7 @@ class VHivePartitionWriter {
 public:
     struct WriteInfo {
         std::string write_path;
+        std::string original_write_path;
         std::string target_path;
         TFileType::type file_type;
     };
diff --git a/be/src/vec/sink/writer/vhive_table_writer.cpp b/be/src/vec/sink/writer/vhive_table_writer.cpp
index d43fc34b4e5204..7c3a864ebb3677 100644
--- a/be/src/vec/sink/writer/vhive_table_writer.cpp
+++ b/be/src/vec/sink/writer/vhive_table_writer.cpp
@@ -256,26 +256,30 @@ std::shared_ptr<VHivePartitionWriter> VHiveTableWriter::_create_partition_writer
         if (existing_table == false) {   // new table
             update_mode = TUpdateMode::NEW;
             if (_partition_columns_input_index.empty()) { // new unpartitioned table
-                write_info = {write_location.write_path, write_location.target_path,
-                              write_location.file_type};
+                write_info = {write_location.write_path, write_location.original_write_path,
+                              write_location.target_path, write_location.file_type};
             } else { // a new partition in a new partitioned table
                 auto write_path = fmt::format("{}/{}", write_location.write_path, partition_name);
+                auto original_write_path =
+                        fmt::format("{}/{}", write_location.original_write_path, partition_name);
                 auto target_path = fmt::format("{}/{}", write_location.target_path, partition_name);
-                write_info = {std::move(write_path), std::move(target_path),
-                              write_location.file_type};
+                write_info = {std::move(write_path), std::move(original_write_path),
+                              std::move(target_path), write_location.file_type};
             }
         } else { // a new partition in an existing partitioned table, or an existing unpartitioned table
             if (_partition_columns_input_index.empty()) { // an existing unpartitioned table
                 update_mode =
                         !hive_table_sink.overwrite ? TUpdateMode::APPEND : TUpdateMode::OVERWRITE;
-                write_info = {write_location.write_path, write_location.target_path,
-                              write_location.file_type};
+                write_info = {write_location.write_path, write_location.original_write_path,
+                              write_location.target_path, write_location.file_type};
             } else { // a new partition in an existing partitioned table
                 update_mode = TUpdateMode::NEW;
                 auto write_path = fmt::format("{}/{}", write_location.write_path, partition_name);
+                auto original_write_path =
+                        fmt::format("{}/{}", write_location.original_write_path, partition_name);
                 auto target_path = fmt::format("{}/{}", write_location.target_path, partition_name);
-                write_info = {std::move(write_path), std::move(target_path),
-                              write_location.file_type};
+                write_info = {std::move(write_path), std::move(original_write_path),
+                              std::move(target_path), write_location.file_type};
             }
             // need to get schema from existing table ?
         }
@@ -285,16 +289,21 @@ std::shared_ptr<VHivePartitionWriter> VHiveTableWriter::_create_partition_writer
         if (!hive_table_sink.overwrite) {
             update_mode = TUpdateMode::APPEND;
             auto write_path = fmt::format("{}/{}", write_location.write_path, partition_name);
+            auto original_write_path =
+                    fmt::format("{}/{}", write_location.original_write_path, partition_name);
             auto target_path = fmt::format("{}", existing_partition->location.target_path);
-            write_info = {std::move(write_path), std::move(target_path),
-                          existing_partition->location.file_type};
+            write_info = {std::move(write_path), std::move(original_write_path),
+                          std::move(target_path), existing_partition->location.file_type};
             file_format_type = existing_partition->file_format;
             write_compress_type = hive_table_sink.compression_type;
         } else {
             update_mode = TUpdateMode::OVERWRITE;
             auto write_path = fmt::format("{}/{}", write_location.write_path, partition_name);
+            auto original_write_path =
+                    fmt::format("{}/{}", write_location.original_write_path, partition_name);
             auto target_path = fmt::format("{}/{}", write_location.target_path, partition_name);
-            write_info = {std::move(write_path), std::move(target_path), write_location.file_type};
+            write_info = {std::move(write_path), std::move(original_write_path),
+                          std::move(target_path), write_location.file_type};
             file_format_type = hive_table_sink.file_format;
             write_compress_type = hive_table_sink.compression_type;
             // need to get schema from existing table ?
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/profile/SummaryProfile.java b/fe/fe-core/src/main/java/org/apache/doris/common/profile/SummaryProfile.java
index f5ef529b4d97f3..29d5a095823562 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/profile/SummaryProfile.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/profile/SummaryProfile.java
@@ -94,6 +94,8 @@ public class SummaryProfile {
     public static final String FILESYSTEM_OPT_TIME = "FileSystem Operator Time";
     public static final String FILESYSTEM_OPT_RENAME_FILE_CNT = "Rename File Count";
     public static final String FILESYSTEM_OPT_RENAME_DIR_CNT = "Rename Dir Count";
+
+    public static final String FILESYSTEM_OPT_DELETE_FILE_CNT = "Delete File Count";
     public static final String FILESYSTEM_OPT_DELETE_DIR_CNT = "Delete Dir Count";
     public static final String HMS_ADD_PARTITION_TIME = "HMS Add Partition Time";
     public static final String HMS_ADD_PARTITION_CNT = "HMS Add Partition Count";
@@ -178,6 +180,7 @@ public class SummaryProfile {
             .put(FILESYSTEM_OPT_TIME, 1)
             .put(FILESYSTEM_OPT_RENAME_FILE_CNT, 2)
             .put(FILESYSTEM_OPT_RENAME_DIR_CNT, 2)
+            .put(FILESYSTEM_OPT_DELETE_FILE_CNT, 2)
             .put(FILESYSTEM_OPT_DELETE_DIR_CNT, 2)
             .put(HMS_ADD_PARTITION_TIME, 1)
             .put(HMS_ADD_PARTITION_CNT, 2)
@@ -242,6 +245,8 @@ public class SummaryProfile {
     private long hmsUpdatePartitionCnt = 0;
     private long filesystemRenameFileCnt = 0;
     private long filesystemRenameDirCnt = 0;
+
+    private long filesystemDeleteFileCnt = 0;
     private long filesystemDeleteDirCnt = 0;
     private TransactionType transactionType = TransactionType.UNKNOWN;
 
@@ -372,6 +377,8 @@ public void setTransactionSummary() {
                     getPrettyCount(filesystemRenameFileCnt));
             executionSummaryProfile.addInfoString(FILESYSTEM_OPT_RENAME_DIR_CNT,
                     getPrettyCount(filesystemRenameDirCnt));
+            executionSummaryProfile.addInfoString(FILESYSTEM_OPT_DELETE_FILE_CNT,
+                    getPrettyCount(filesystemDeleteFileCnt));
             executionSummaryProfile.addInfoString(FILESYSTEM_OPT_DELETE_DIR_CNT,
                     getPrettyCount(filesystemDeleteDirCnt));
 
@@ -738,4 +745,8 @@ public void incRenameDirCnt() {
     public void incDeleteDirRecursiveCnt() {
         this.filesystemDeleteDirCnt += 1;
     }
+
+    public void incDeleteFileCnt() {
+        this.filesystemDeleteFileCnt += 1;
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java
index 005a8f2cb84bf4..38b5250a15797e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java
@@ -296,8 +296,11 @@ private FileSystemType getFileSystemType() {
                 fsType = FileSystemType.S3;
                 break;
             case COSN:
+                // COSN use s3 client on FE side, because it need to complete multi-part uploading files on FE side.
+                fsType = FileSystemType.S3;
+                break;
             case OFS:
-                // ofs:// and cosn:// use the same underlying file system: Tencent Cloud HDFS, aka CHDFS)) {
+                // ofs:// use the underlying file system: Tencent Cloud HDFS, aka CHDFS)) {
                 fsType = FileSystemType.OFS;
                 break;
             case HDFS:
@@ -329,7 +332,11 @@ public static TFileType getTFileTypeForBE(String location) {
             return null;
         }
         LocationPath locationPath = new LocationPath(location);
-        switch (locationPath.getLocationType()) {
+        return locationPath.getTFileTypeForBE();
+    }
+
+    public TFileType getTFileTypeForBE() {
+        switch (this.getLocationType()) {
             case S3:
             case S3A:
             case S3N:
@@ -362,7 +369,7 @@ public static TFileType getTFileTypeForBE(String location) {
      *
      * @return BE scan range path
      */
-    public Path toScanRangeLocation() {
+    public Path toStorageLocation() {
         switch (locationType) {
             case S3:
             case S3A:
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3URI.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3URI.java
index 29c3f2700c43f5..9c9a887aa7e36c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3URI.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3URI.java
@@ -68,10 +68,10 @@ public class S3URI {
     public static final String SCHEME_DELIM = "://";
     public static final String PATH_DELIM = "/";
     private static final Set<String> VALID_SCHEMES = ImmutableSet.of("http", "https", "s3", "s3a", "s3n",
-            "bos", "oss", "cos", "obs");
+            "bos", "oss", "cos", "cosn", "obs");
 
     private static final Set<String> OS_SCHEMES = ImmutableSet.of("s3", "s3a", "s3n",
-            "bos", "oss", "cos", "obs");
+            "bos", "oss", "cos", "cosn", "obs");
 
     private URI uri;
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java
index 299ab6dddfb9aa..20dc870cd354d5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalCatalog.java
@@ -22,6 +22,7 @@
 import org.apache.doris.cluster.ClusterNamespace;
 import org.apache.doris.common.Config;
 import org.apache.doris.common.DdlException;
+import org.apache.doris.common.ThreadPoolManager;
 import org.apache.doris.common.security.authentication.AuthenticationConfig;
 import org.apache.doris.common.security.authentication.HadoopUGI;
 import org.apache.doris.datasource.CatalogProperty;
@@ -34,6 +35,8 @@
 import org.apache.doris.datasource.operations.ExternalMetadataOperations;
 import org.apache.doris.datasource.property.PropertyConverter;
 import org.apache.doris.datasource.property.constants.HMSProperties;
+import org.apache.doris.fs.FileSystemProvider;
+import org.apache.doris.fs.FileSystemProviderImpl;
 import org.apache.doris.transaction.TransactionManagerFactory;
 
 import com.google.common.base.Strings;
@@ -46,6 +49,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
+import java.util.concurrent.ThreadPoolExecutor;
 
 /**
  * External catalog for hive metastore compatible data sources.
@@ -63,6 +67,9 @@ public class HMSExternalCatalog extends ExternalCatalog {
     // 0 means file cache is disabled; >0 means file cache with ttl;
     public static final int FILE_META_CACHE_TTL_DISABLE_CACHE = 0;
 
+    private static final int FILE_SYSTEM_EXECUTOR_THREAD_NUM = 16;
+    private ThreadPoolExecutor fileSystemExecutor;
+
     public HMSExternalCatalog() {
         catalogProperty = new CatalogProperty(null, null);
     }
@@ -147,7 +154,12 @@ protected void initLocalObjectsImpl() {
                     AuthenticationConfig.HADOOP_KERBEROS_KEYTAB));
         }
         HiveMetadataOps hiveOps = ExternalMetadataOperations.newHiveMetadataOps(hiveConf, jdbcClientConfig, this);
-        transactionManager = TransactionManagerFactory.createHiveTransactionManager(hiveOps);
+        FileSystemProvider fileSystemProvider = new FileSystemProviderImpl(Env.getCurrentEnv().getExtMetaCacheMgr(),
+                this.bindBrokerName(), this.catalogProperty.getHadoopProperties());
+        this.fileSystemExecutor = ThreadPoolManager.newDaemonFixedThreadPool(FILE_SYSTEM_EXECUTOR_THREAD_NUM,
+                Integer.MAX_VALUE, String.format("hms_committer_%s_file_system_executor_pool", name), true);
+        transactionManager = TransactionManagerFactory.createHiveTransactionManager(hiveOps, fileSystemProvider,
+                fileSystemExecutor);
         metadataOps = hiveOps;
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java
index 32dd083c2adaf5..6fca8b4745feb9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java
@@ -23,13 +23,18 @@
 
 import org.apache.doris.backup.Status;
 import org.apache.doris.common.Pair;
+import org.apache.doris.common.UserException;
 import org.apache.doris.common.profile.SummaryProfile;
 import org.apache.doris.fs.FileSystem;
+import org.apache.doris.fs.FileSystemProvider;
 import org.apache.doris.fs.FileSystemUtil;
 import org.apache.doris.fs.remote.RemoteFile;
+import org.apache.doris.fs.remote.S3FileSystem;
+import org.apache.doris.fs.remote.SwitchingFileSystem;
 import org.apache.doris.nereids.trees.plans.commands.insert.HiveInsertCommandContext;
 import org.apache.doris.qe.ConnectContext;
 import org.apache.doris.thrift.THivePartitionUpdate;
+import org.apache.doris.thrift.TS3MPUPendingUpload;
 import org.apache.doris.thrift.TUpdateMode;
 import org.apache.doris.transaction.Transaction;
 
@@ -48,6 +53,11 @@
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
+import software.amazon.awssdk.services.s3.S3Client;
+import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest;
+import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest;
+import software.amazon.awssdk.services.s3.model.CompletedMultipartUpload;
+import software.amazon.awssdk.services.s3.model.CompletedPart;
 
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -79,17 +89,34 @@ public class HMSTransaction implements Transaction {
     private final Map<DatabaseTableName, Map<List<String>, Action<PartitionAndMore>>>
             partitionActions = new HashMap<>();
 
+    private final Executor fileSystemExecutor;
     private HmsCommitter hmsCommitter;
     private List<THivePartitionUpdate> hivePartitionUpdates = Lists.newArrayList();
     private String declaredIntentionsToWrite;
 
-    public HMSTransaction(HiveMetadataOps hiveOps) {
-        this.hiveOps = hiveOps;
-        this.fs = hiveOps.getFs();
+    private static class UncompletedMpuPendingUpload {
+
+        private final TS3MPUPendingUpload s3MPUPendingUpload;
+        private final String path;
+
+        public UncompletedMpuPendingUpload(TS3MPUPendingUpload s3MPUPendingUpload, String path) {
+            this.s3MPUPendingUpload = s3MPUPendingUpload;
+            this.path = path;
+        }
+    }
 
+    private Set<UncompletedMpuPendingUpload> uncompletedMpuPendingUploads = new HashSet<>();
+
+    public HMSTransaction(HiveMetadataOps hiveOps, FileSystemProvider fileSystemProvider, Executor fileSystemExecutor) {
+        this.hiveOps = hiveOps;
+        this.fs = fileSystemProvider.get(null);
+        if (!(fs instanceof SwitchingFileSystem)) {
+            throw new RuntimeException("fs should be SwitchingFileSystem");
+        }
         if (ConnectContext.get().getExecutor() != null) {
             summaryProfile = Optional.of(ConnectContext.get().getExecutor().getSummaryProfile());
         }
+        this.fileSystemExecutor = fileSystemExecutor;
     }
 
     @Override
@@ -112,6 +139,9 @@ public List<THivePartitionUpdate> mergePartitions(List<THivePartitionUpdate> hiv
                 THivePartitionUpdate old = mm.get(pu.getName());
                 old.setFileSize(old.getFileSize() + pu.getFileSize());
                 old.setRowCount(old.getRowCount() + pu.getRowCount());
+                if (old.getS3MpuPendingUploads() != null && pu.getS3MpuPendingUploads() != null) {
+                    old.getS3MpuPendingUploads().addAll(pu.getS3MpuPendingUploads());
+                }
                 old.getFileNames().addAll(pu.getFileNames());
             } else {
                 mm.put(pu.getName(), pu);
@@ -136,6 +166,14 @@ public void finishInsertTable(String dbName, String tbName) {
         this.dbName = dbName;
         this.tbName = tbName;
         List<THivePartitionUpdate> mergedPUs = mergePartitions(hivePartitionUpdates);
+        for (THivePartitionUpdate pu : mergedPUs) {
+            if (pu.getS3MpuPendingUploads() != null) {
+                for (TS3MPUPendingUpload s3MPUPendingUpload : pu.getS3MpuPendingUploads()) {
+                    uncompletedMpuPendingUploads.add(
+                            new UncompletedMpuPendingUpload(s3MPUPendingUpload, pu.getLocation().getTargetPath()));
+                }
+            }
+        }
         Table table = getTable(dbName, tbName);
         List<Pair<THivePartitionUpdate, HivePartitionStatistics>> insertExistsPartitions = new ArrayList<>();
         for (THivePartitionUpdate pu : mergedPUs) {
@@ -156,11 +194,12 @@ public void finishInsertTable(String dbName, String tbName) {
                                 tbName,
                                 writePath,
                                 pu.getFileNames(),
-                                hivePartitionStatistics);
+                                hivePartitionStatistics,
+                                pu);
                         break;
                     case OVERWRITE:
                         dropTable(dbName, tbName);
-                        createTable(table, writePath, pu.getFileNames(), hivePartitionStatistics);
+                        createTable(table, writePath, pu.getFileNames(), hivePartitionStatistics, pu);
                         break;
                     default:
                         throw new RuntimeException("Not support mode:[" + updateMode + "] in unPartitioned table");
@@ -191,7 +230,7 @@ public void finishInsertTable(String dbName, String tbName) {
                         }
                         addPartition(
                                 dbName, tbName, hivePartition, writePath,
-                                pu.getName(), pu.getFileNames(), hivePartitionStatistics);
+                                pu.getName(), pu.getFileNames(), hivePartitionStatistics, pu);
                         break;
                     default:
                         throw new RuntimeException("Not support mode:[" + updateMode + "] in partitioned table");
@@ -351,7 +390,8 @@ private void convertToInsertExistingPartitionAction(
                                     pu.getLocation().getWritePath(),
                                     pu.getName(),
                                     pu.getFileNames(),
-                                    updateStats
+                                    updateStats,
+                                    pu
                                 ))
                 );
             }
@@ -550,8 +590,8 @@ public String toString() {
 
 
-    private void recursiveDeleteItems(Path directory, boolean deleteEmptyDir) {
-        DeleteRecursivelyResult deleteResult = recursiveDeleteFiles(directory, deleteEmptyDir);
+    private void recursiveDeleteItems(Path directory, boolean deleteEmptyDir, boolean reverse) {
+        DeleteRecursivelyResult deleteResult = recursiveDeleteFiles(directory, deleteEmptyDir, reverse);
 
         if (!deleteResult.getNotDeletedEligibleItems().isEmpty()) {
             LOG.warn("Failed to delete directory {}. Some eligible items can't be deleted: {}.",
@@ -561,9 +601,9 @@ private void recursiveDeleteItems(Path directory, boolean deleteEmptyDir) {
         }
     }
 
-    private DeleteRecursivelyResult recursiveDeleteFiles(Path directory, boolean deleteEmptyDir) {
+    private DeleteRecursivelyResult recursiveDeleteFiles(Path directory, boolean deleteEmptyDir, boolean reverse) {
         try {
-            if (!fs.exists(directory.toString()).ok()) {
+            if (!fs.directoryExists(directory.toString()).ok()) {
                 return new DeleteRecursivelyResult(true, ImmutableList.of());
             }
         } catch (Exception e) {
@@ -572,10 +612,11 @@ private DeleteRecursivelyResult recursiveDeleteFiles(Path directory, boolean del
             return new DeleteRecursivelyResult(false, notDeletedEligibleItems.build());
         }
 
-        return doRecursiveDeleteFiles(directory, deleteEmptyDir, queryId);
+        return doRecursiveDeleteFiles(directory, deleteEmptyDir, queryId, reverse);
     }
 
-    private DeleteRecursivelyResult doRecursiveDeleteFiles(Path directory, boolean deleteEmptyDir, String queryId) {
+    private DeleteRecursivelyResult doRecursiveDeleteFiles(Path directory, boolean deleteEmptyDir,
+            String queryId, boolean reverse) {
         List<RemoteFile> allFiles = new ArrayList<>();
         Set<String> allDirs = new HashSet<>();
         Status statusFile = fs.listFiles(directory.toString(), true, allFiles);
@@ -589,7 +630,7 @@ private DeleteRecursivelyResult doRecursiveDeleteFiles(Path directory, boolean d
         boolean allDescendentsDeleted = true;
         ImmutableList.Builder<String> notDeletedEligibleItems = ImmutableList.builder();
         for (RemoteFile file : allFiles) {
-            if (file.getName().startsWith(queryId)) {
+            if (reverse ^ file.getName().startsWith(queryId)) {
                 if (!deleteIfExists(file.getPath())) {
                     allDescendentsDeleted = false;
                     notDeletedEligibleItems.add(file.getPath().toString());
@@ -600,7 +641,7 @@ private DeleteRecursivelyResult doRecursiveDeleteFiles(Path directory, boolean d
         }
 
         for (String dir : allDirs) {
-            DeleteRecursivelyResult subResult = doRecursiveDeleteFiles(new Path(dir), deleteEmptyDir, queryId);
+            DeleteRecursivelyResult subResult = doRecursiveDeleteFiles(new Path(dir), deleteEmptyDir, queryId, reverse);
             if (!subResult.dirNotExists()) {
                 allDescendentsDeleted = false;
             }
@@ -611,7 +652,7 @@ private DeleteRecursivelyResult doRecursiveDeleteFiles(Path directory, boolean d
 
         if (allDescendentsDeleted && deleteEmptyDir) {
             Verify.verify(notDeletedEligibleItems.build().isEmpty());
-            if (!deleteIfExists(directory)) {
+            if (!deleteDirectoryIfExists(directory)) {
                 return new DeleteRecursivelyResult(false, ImmutableList.of(directory + "/"));
             }
             // all items of the location have been deleted.
@@ -628,6 +669,14 @@ public boolean deleteIfExists(Path path) {
         return !fs.exists(path.toString()).ok();
     }
 
+    public boolean deleteDirectoryIfExists(Path path) {
+        Status status = wrapperDeleteDirWithProfileSummary(path.toString());
+        if (status.ok()) {
+            return true;
+        }
+        return !fs.directoryExists(path.toString()).ok();
+    }
+
     public static class DatabaseTableName {
         private final String dbName;
         private final String tbName;
@@ -676,15 +725,19 @@ private static class TableAndMore {
         private final List<String> fileNames;
         private final HivePartitionStatistics statisticsUpdate;
 
+        private final THivePartitionUpdate hivePartitionUpdate;
+
         public TableAndMore(
                 Table table,
                 String currentLocation,
                 List<String> fileNames,
-                HivePartitionStatistics statisticsUpdate) {
+                HivePartitionStatistics statisticsUpdate,
+                THivePartitionUpdate hivePartitionUpdate) {
             this.table = Objects.requireNonNull(table, "table is null");
             this.currentLocation = Objects.requireNonNull(currentLocation);
             this.fileNames = Objects.requireNonNull(fileNames);
             this.statisticsUpdate = Objects.requireNonNull(statisticsUpdate, "statisticsUpdate is null");
+            this.hivePartitionUpdate = Objects.requireNonNull(hivePartitionUpdate, "hivePartitionUpdate is null");
         }
 
         public Table getTable() {
@@ -703,6 +756,10 @@ public HivePartitionStatistics getStatisticsUpdate() {
             return statisticsUpdate;
         }
 
+        public THivePartitionUpdate getHivePartitionUpdate() {
+            return hivePartitionUpdate;
+        }
+
         @Override
         public String toString() {
             return MoreObjects.toStringHelper(this)
@@ -719,17 +776,22 @@ private static class PartitionAndMore {
         private final List<String> fileNames;
         private final HivePartitionStatistics statisticsUpdate;
 
+        private final THivePartitionUpdate hivePartitionUpdate;
+
+
         public PartitionAndMore(
                 HivePartition partition,
                 String currentLocation,
                 String partitionName,
                 List<String> fileNames,
-                HivePartitionStatistics statisticsUpdate) {
+                HivePartitionStatistics statisticsUpdate,
+                THivePartitionUpdate hivePartitionUpdate) {
             this.partition = Objects.requireNonNull(partition, "partition is null");
             this.currentLocation = Objects.requireNonNull(currentLocation, "currentLocation is null");
             this.partitionName = Objects.requireNonNull(partitionName, "partition is null");
             this.fileNames = Objects.requireNonNull(fileNames, "fileNames is null");
             this.statisticsUpdate = Objects.requireNonNull(statisticsUpdate, "statisticsUpdate is null");
+            this.hivePartitionUpdate = Objects.requireNonNull(hivePartitionUpdate, "hivePartitionUpdate is null");
         }
 
         public HivePartition getPartition() {
@@ -752,6 +814,10 @@ public HivePartitionStatistics getStatisticsUpdate() {
             return statisticsUpdate;
         }
 
+        public THivePartitionUpdate getHivePartitionUpdate() {
+            return hivePartitionUpdate;
+        }
+
         @Override
         public String toString() {
             return MoreObjects.toStringHelper(this)
@@ -835,7 +901,8 @@ public synchronized void finishChangingExistingTable(
             String tableName,
             String location,
             List<String> fileNames,
-            HivePartitionStatistics statisticsUpdate) {
+            HivePartitionStatistics statisticsUpdate,
+            THivePartitionUpdate hivePartitionUpdate) {
         DatabaseTableName databaseTableName = new DatabaseTableName(databaseName, tableName);
         Action<TableAndMore> oldTableAction = tableActions.get(databaseTableName);
         if (oldTableAction == null) {
@@ -843,12 +910,13 @@ public synchronized void finishChangingExistingTable(
             tableActions.put(
                     databaseTableName,
                     new Action<>(
-                        actionType,
+                            actionType,
                             new TableAndMore(
-                                table,
-                                location,
-                                fileNames,
-                                statisticsUpdate)));
+                                    table,
+                                    location,
+                                    fileNames,
+                                    statisticsUpdate,
+                                    hivePartitionUpdate)));
             return;
         }
 
@@ -870,12 +938,13 @@ public synchronized void finishChangingExistingTable(
     }
 
     public synchronized void createTable(
-            Table table, String location, List<String> fileNames,  HivePartitionStatistics statistics) {
+            Table table, String location, List<String> fileNames, HivePartitionStatistics statistics,
+            THivePartitionUpdate hivePartitionUpdate) {
         // When creating a table, it should never have partition actions. This is just a sanity check.
         checkNoPartitionAction(dbName, tbName);
         DatabaseTableName databaseTableName = new DatabaseTableName(dbName, tbName);
         Action<TableAndMore> oldTableAction = tableActions.get(databaseTableName);
-        TableAndMore tableAndMore = new TableAndMore(table, location, fileNames, statistics);
+        TableAndMore tableAndMore = new TableAndMore(table, location, fileNames, statistics, hivePartitionUpdate);
         if (oldTableAction == null) {
             tableActions.put(databaseTableName, new Action<>(ActionType.ADD, tableAndMore));
             return;
@@ -939,7 +1008,8 @@ public synchronized void addPartition(
             String currentLocation,
             String partitionName,
             List<String> files,
-            HivePartitionStatistics statistics) {
+            HivePartitionStatistics statistics,
+            THivePartitionUpdate hivePartitionUpdate) {
         Map<List<String>, Action<PartitionAndMore>> partitionActionsForTable =
                 partitionActions.computeIfAbsent(new DatabaseTableName(databaseName, tableName), k -> new HashMap<>());
         Action<PartitionAndMore> oldPartitionAction = partitionActionsForTable.get(partition.getPartitionValues());
@@ -948,7 +1018,8 @@ public synchronized void addPartition(
                     partition.getPartitionValues(),
                     new Action<>(
                             ActionType.ADD,
-                            new PartitionAndMore(partition, currentLocation, partitionName, files, statistics))
+                            new PartitionAndMore(partition, currentLocation, partitionName, files, statistics,
+                                    hivePartitionUpdate))
             );
             return;
         }
@@ -959,7 +1030,8 @@ public synchronized void addPartition(
                         partition.getPartitionValues(),
                         new Action<>(
                                 ActionType.ALTER,
-                                new PartitionAndMore(partition, currentLocation, partitionName, files, statistics))
+                                new PartitionAndMore(partition, currentLocation, partitionName, files, statistics,
+                                        hivePartitionUpdate))
                 );
                 return;
             case ADD:
@@ -1029,7 +1101,8 @@ class HmsCommitter {
         private final List<RenameDirectoryTask> renameDirectoryTasksForAbort = new ArrayList<>();
         // when finished, we need clear some directories
         private final List<String> clearDirsForFinish = new ArrayList<>();
-        Executor fileSystemExecutor = Executors.newFixedThreadPool(16);
+
+        private final List<String> s3cleanWhenSuccess = new ArrayList<>();
 
         public void cancelUnStartedAsyncFileSystemTask() {
             fileSystemTaskCancelled.set(true);
@@ -1091,15 +1164,20 @@ public void prepareInsertExistingTable(TableAndMore tableAndMore) {
                         writePath,
                         targetPath,
                         tableAndMore.getFileNames());
+            } else {
+                if (!tableAndMore.hivePartitionUpdate.s3_mpu_pending_uploads.isEmpty()) {
+                    s3Commit(fileSystemExecutor, asyncFileSystemTaskFutures, fileSystemTaskCancelled,
+                            tableAndMore.hivePartitionUpdate, targetPath);
+                }
             }
             directoryCleanUpTasksForAbort.add(new DirectoryCleanUpTask(targetPath, false));
             updateStatisticsTasks.add(
-                new UpdateStatisticsTask(
-                        dbName,
-                        tbName,
-                        Optional.empty(),
-                        tableAndMore.getStatisticsUpdate(),
-                        true
+                    new UpdateStatisticsTask(
+                            dbName,
+                            tbName,
+                            Optional.empty(),
+                            tableAndMore.getStatisticsUpdate(),
+                            true
                     ));
         }
 
@@ -1129,6 +1207,12 @@ public void prepareAlterTable(TableAndMore tableAndMore) {
                     throw new RuntimeException(
                         "Error to rename dir from " + writePath + " to " + targetPath + ":" + status.getErrMsg());
                 }
+            } else {
+                if (!tableAndMore.hivePartitionUpdate.s3_mpu_pending_uploads.isEmpty()) {
+                    s3cleanWhenSuccess.add(targetPath);
+                    s3Commit(fileSystemExecutor, asyncFileSystemTaskFutures, fileSystemTaskCancelled,
+                            tableAndMore.hivePartitionUpdate, targetPath);
+                }
             }
             updateStatisticsTasks.add(
                 new UpdateStatisticsTask(
@@ -1154,6 +1238,11 @@ public void prepareAddPartition(PartitionAndMore partitionAndMore) {
                         writePath,
                         targetPath,
                         () -> directoryCleanUpTasksForAbort.add(new DirectoryCleanUpTask(targetPath, true)));
+            } else {
+                if (!partitionAndMore.hivePartitionUpdate.s3_mpu_pending_uploads.isEmpty()) {
+                    s3Commit(fileSystemExecutor, asyncFileSystemTaskFutures, fileSystemTaskCancelled,
+                            partitionAndMore.hivePartitionUpdate, targetPath);
+                }
             }
 
             StorageDescriptor sd = getTable(dbName, tbName).getSd();
@@ -1194,6 +1283,11 @@ public void prepareInsertExistPartition(PartitionAndMore partitionAndMore) {
                         writePath,
                         targetPath,
                         partitionAndMore.getFileNames());
+            } else {
+                if (!partitionAndMore.hivePartitionUpdate.s3_mpu_pending_uploads.isEmpty()) {
+                    s3Commit(fileSystemExecutor, asyncFileSystemTaskFutures, fileSystemTaskCancelled,
+                            partitionAndMore.hivePartitionUpdate, targetPath);
+                }
             }
 
             updateStatisticsTasks.add(
@@ -1207,7 +1301,7 @@ public void prepareInsertExistPartition(PartitionAndMore partitionAndMore) {
 
         private void runDirectoryClearUpTasksForAbort() {
             for (DirectoryCleanUpTask cleanUpTask : directoryCleanUpTasksForAbort) {
-                recursiveDeleteItems(cleanUpTask.getPath(), cleanUpTask.isDeleteEmptyDir());
+                recursiveDeleteItems(cleanUpTask.getPath(), cleanUpTask.isDeleteEmptyDir(), false);
             }
         }
 
@@ -1228,13 +1322,19 @@ private void runRenameDirTasksForAbort() {
         private void runClearPathsForFinish() {
             Status status;
             for (String path : clearDirsForFinish) {
-                status = wrapperDeleteWithProfileSummary(path);
+                status = wrapperDeleteDirWithProfileSummary(path);
                 if (!status.ok()) {
                     LOG.warn("Failed to recursively delete path {}:{}", path, status.getErrCode());
                 }
             }
         }
 
+        private void runS3cleanWhenSuccess() {
+            for (String path : s3cleanWhenSuccess) {
+                recursiveDeleteItems(new Path(path), false, true);
+            }
+        }
+
         public void prepareAlterPartition(PartitionAndMore partitionAndMore) {
             HivePartition partition = partitionAndMore.getPartition();
             String targetPath = partition.getPath();
@@ -1263,6 +1363,12 @@ public void prepareAlterPartition(PartitionAndMore partitionAndMore) {
                     throw new RuntimeException(
                         "Error to rename dir from " + writePath + " to " + targetPath + ":" + status.getErrMsg());
                 }
+            } else {
+                if (!partitionAndMore.hivePartitionUpdate.s3_mpu_pending_uploads.isEmpty()) {
+                    s3cleanWhenSuccess.add(targetPath);
+                    s3Commit(fileSystemExecutor, asyncFileSystemTaskFutures, fileSystemTaskCancelled,
+                            partitionAndMore.hivePartitionUpdate, targetPath);
+                }
             }
 
             updateStatisticsTasks.add(
@@ -1337,8 +1443,32 @@ private void doUpdateStatisticsTasks() {
             summaryProfile.ifPresent(SummaryProfile::setHmsUpdatePartitionTime);
         }
 
-        public void pruneAndDeleteStagingDirectories() {
-            recursiveDeleteItems(new Path(declaredIntentionsToWrite), true);
+        private void pruneAndDeleteStagingDirectories() {
+            recursiveDeleteItems(new Path(declaredIntentionsToWrite), true, false);
+        }
+
+        private void abortMultiUploads() {
+            if (uncompletedMpuPendingUploads.isEmpty()) {
+                return;
+            }
+            for (UncompletedMpuPendingUpload uncompletedMpuPendingUpload : uncompletedMpuPendingUploads) {
+                S3FileSystem s3FileSystem = (S3FileSystem) ((SwitchingFileSystem) fs)
+                        .fileSystem(uncompletedMpuPendingUpload.path);
+
+                S3Client s3Client;
+                try {
+                    s3Client = (S3Client) s3FileSystem.getObjStorage().getClient();
+                } catch (UserException e) {
+                    throw new RuntimeException(e);
+                }
+                asyncFileSystemTaskFutures.add(CompletableFuture.runAsync(() -> {
+                    s3Client.abortMultipartUpload(AbortMultipartUploadRequest.builder()
+                            .bucket(uncompletedMpuPendingUpload.s3MPUPendingUpload.getBucket())
+                            .key(uncompletedMpuPendingUpload.s3MPUPendingUpload.getKey())
+                            .uploadId(uncompletedMpuPendingUpload.s3MPUPendingUpload.getUploadId())
+                            .build());
+                }, fileSystemExecutor));
+            }
         }
 
         public void doNothing() {
@@ -1348,6 +1478,7 @@ public void doNothing() {
 
         public void doCommit() {
             waitForAsyncFileSystemTasks();
+            runS3cleanWhenSuccess();
             doAddPartitionsTask();
             doUpdateStatisticsTasks();
             doNothing();
@@ -1365,6 +1496,11 @@ public void abort() {
         public void rollback() {
             //delete write path
             pruneAndDeleteStagingDirectories();
+            // abort the in-progress multipart uploads
+            abortMultiUploads();
+            for (CompletableFuture<?> future : asyncFileSystemTaskFutures) {
+                MoreFutures.getFutureValue(future, RuntimeException.class);
+            }
         }
     }
 
@@ -1385,7 +1521,7 @@ public Status wrapperRenameDirWithProfileSummary(String origFilePath,
     public Status wrapperDeleteWithProfileSummary(String remotePath) {
         summaryProfile.ifPresent(profile -> {
             profile.setTempStartTime();
-            profile.incDeleteDirRecursiveCnt();
+            profile.incDeleteFileCnt();
         });
 
         Status status = fs.delete(remotePath);
@@ -1394,6 +1530,18 @@ public Status wrapperDeleteWithProfileSummary(String remotePath) {
         return status;
     }
 
+    public Status wrapperDeleteDirWithProfileSummary(String remotePath) {
+        summaryProfile.ifPresent(profile -> {
+            profile.setTempStartTime();
+            profile.incDeleteDirRecursiveCnt();
+        });
+
+        Status status = fs.deleteDirectory(remotePath);
+
+        summaryProfile.ifPresent(SummaryProfile::freshFilesystemOptTime);
+        return status;
+    }
+
     public void wrapperAsyncRenameWithProfileSummary(Executor executor,
                                                      List<CompletableFuture<?>> renameFileFutures,
                                                      AtomicBoolean cancelled,
@@ -1415,4 +1563,37 @@ public void wrapperAsyncRenameDirWithProfileSummary(Executor executor,
                 fs, executor, renameFileFutures, cancelled, origFilePath, destFilePath, runWhenPathNotExist);
         summaryProfile.ifPresent(SummaryProfile::incRenameDirCnt);
     }
+
+    private void s3Commit(Executor fileSystemExecutor, List<CompletableFuture<?>> asyncFileSystemTaskFutures,
+            AtomicBoolean fileSystemTaskCancelled, THivePartitionUpdate hivePartitionUpdate, String path) {
+        S3FileSystem s3FileSystem = (S3FileSystem) ((SwitchingFileSystem) fs).fileSystem(path);
+        S3Client s3Client;
+        try {
+            s3Client = (S3Client) s3FileSystem.getObjStorage().getClient();
+        } catch (UserException e) {
+            throw new RuntimeException(e);
+        }
+
+        for (TS3MPUPendingUpload s3MPUPendingUpload : hivePartitionUpdate.getS3MpuPendingUploads()) {
+            asyncFileSystemTaskFutures.add(CompletableFuture.runAsync(() -> {
+                if (fileSystemTaskCancelled.get()) {
+                    return;
+                }
+                List<CompletedPart> completedParts = Lists.newArrayList();
+                for (Map.Entry<Integer, String> entry : s3MPUPendingUpload.getEtags().entrySet()) {
+                    completedParts.add(CompletedPart.builder().eTag(entry.getValue()).partNumber(entry.getKey())
+                            .build());
+                }
+
+                s3Client.completeMultipartUpload(CompleteMultipartUploadRequest.builder()
+                        .bucket(s3MPUPendingUpload.getBucket())
+                        .key(s3MPUPendingUpload.getKey())
+                        .uploadId(s3MPUPendingUpload.getUploadId())
+                        .multipartUpload(CompletedMultipartUpload.builder().parts(completedParts).build())
+                        .build());
+                uncompletedMpuPendingUploads.remove(new UncompletedMpuPendingUpload(s3MPUPendingUpload, path));
+            }, fileSystemExecutor));
+        }
+    }
 }
+
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index fcebd67954e84f..be5ecb163b1629 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -349,9 +349,11 @@ private FileCacheValue getFileCache(String location, String inputFormat,
             List<String> partitionValues,
             String bindBrokerName) throws UserException {
         FileCacheValue result = new FileCacheValue();
+        Map<String, String> properties = new HashMap<>();
+        jobConf.iterator().forEachRemaining(e -> properties.put(e.getKey(), e.getValue()));
         RemoteFileSystem fs = Env.getCurrentEnv().getExtMetaCacheMgr().getFsCache().getRemoteFileSystem(
                 new FileSystemCache.FileSystemCacheKey(LocationPath.getFSIdentity(
-                        location, bindBrokerName), jobConf, bindBrokerName));
+                        location, bindBrokerName), properties, bindBrokerName));
         result.setSplittable(HiveUtil.isSplittable(fs, inputFormat, location, jobConf));
         // For Tez engine, it may generate subdirectoies for "union" query.
         // So there may be files and directories in the table directory at the same time. eg:
@@ -366,7 +368,7 @@ private FileCacheValue getFileCache(String location, String inputFormat,
             for (RemoteFile remoteFile : remoteFiles) {
                 String srcPath = remoteFile.getPath().toString();
                 LocationPath locationPath = new LocationPath(srcPath, catalog.getProperties());
-                Path convertedPath = locationPath.toScanRangeLocation();
+                Path convertedPath = locationPath.toStorageLocation();
                 if (!convertedPath.toString().equals(srcPath)) {
                     remoteFile.setPath(convertedPath);
                 }
@@ -777,10 +779,12 @@ public List<FileCacheValue> getFilesByTransaction(List<HivePartition> partitions
                         return Collections.emptyList();
                     }
                     String acidVersionPath = new Path(baseOrDeltaPath, "_orc_acid_version").toUri().toString();
+                    Map<String, String> properties = new HashMap<>();
+                    jobConf.iterator().forEachRemaining(e -> properties.put(e.getKey(), e.getValue()));
                     RemoteFileSystem fs = Env.getCurrentEnv().getExtMetaCacheMgr().getFsCache().getRemoteFileSystem(
                             new FileSystemCache.FileSystemCacheKey(
                                     LocationPath.getFSIdentity(baseOrDeltaPath.toUri().toString(),
-                                            bindBrokerName), jobConf, bindBrokerName));
+                                            bindBrokerName), properties, bindBrokerName));
                     Status status = fs.exists(acidVersionPath);
                     if (status != Status.OK) {
                         if (status.getErrCode() == ErrCode.NOT_FOUND) {
@@ -800,10 +804,12 @@ public List<FileCacheValue> getFilesByTransaction(List<HivePartition> partitions
                 List<DeleteDeltaInfo> deleteDeltas = new ArrayList<>();
                 for (AcidUtils.ParsedDelta delta : directory.getCurrentDirectories()) {
                     String location = delta.getPath().toString();
+                    Map<String, String> properties = new HashMap<>();
+                    jobConf.iterator().forEachRemaining(e -> properties.put(e.getKey(), e.getValue()));
                     RemoteFileSystem fs = Env.getCurrentEnv().getExtMetaCacheMgr().getFsCache().getRemoteFileSystem(
                             new FileSystemCache.FileSystemCacheKey(
                                     LocationPath.getFSIdentity(location, bindBrokerName),
-                                            jobConf, bindBrokerName));
+                                            properties, bindBrokerName));
                     List<RemoteFile> remoteFiles = new ArrayList<>();
                     Status status = fs.listFiles(location, false, remoteFiles);
                     if (status.ok()) {
@@ -825,10 +831,12 @@ public List<FileCacheValue> getFilesByTransaction(List<HivePartition> partitions
                 // base
                 if (directory.getBaseDirectory() != null) {
                     String location = directory.getBaseDirectory().toString();
+                    Map<String, String> properties = new HashMap<>();
+                    jobConf.iterator().forEachRemaining(e -> properties.put(e.getKey(), e.getValue()));
                     RemoteFileSystem fs = Env.getCurrentEnv().getExtMetaCacheMgr().getFsCache().getRemoteFileSystem(
                             new FileSystemCache.FileSystemCacheKey(
                                     LocationPath.getFSIdentity(location, bindBrokerName),
-                                            jobConf, bindBrokerName));
+                                            properties, bindBrokerName));
                     List<RemoteFile> remoteFiles = new ArrayList<>();
                     Status status = fs.listFiles(location, false, remoteFiles);
                     if (status.ok()) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetadataOps.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetadataOps.java
index a4566cd0b7a03c..1cf6595bbad9ad 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetadataOps.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetadataOps.java
@@ -36,8 +36,6 @@
 import org.apache.doris.datasource.jdbc.client.JdbcClient;
 import org.apache.doris.datasource.jdbc.client.JdbcClientConfig;
 import org.apache.doris.datasource.operations.ExternalMetadataOps;
-import org.apache.doris.fs.FileSystem;
-import org.apache.doris.fs.remote.dfs.DFSFileSystem;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
@@ -61,7 +59,6 @@ public class HiveMetadataOps implements ExternalMetadataOps {
     private static final Logger LOG = LogManager.getLogger(HiveMetadataOps.class);
     private static final int MIN_CLIENT_POOL_SIZE = 8;
     private final HMSCachedClient client;
-    private final FileSystem fs;
     private final HMSExternalCatalog catalog;
 
     public HiveMetadataOps(HiveConf hiveConf, JdbcClientConfig jdbcClientConfig, HMSExternalCatalog catalog) {
@@ -74,24 +71,14 @@ public HiveMetadataOps(HiveConf hiveConf, JdbcClientConfig jdbcClientConfig, HMS
     public HiveMetadataOps(HMSExternalCatalog catalog, HMSCachedClient client) {
         this.catalog = catalog;
         this.client = client;
-        // TODO Currently only supports DFSFileSystem, more types will be supported in the future
-        this.fs = new DFSFileSystem(catalog.getProperties());
     }
 
-    @VisibleForTesting
-    public HiveMetadataOps(HMSExternalCatalog catalog, HMSCachedClient client, FileSystem fs) {
-        this.catalog = catalog;
-        this.client = client;
-        this.fs = fs;
-    }
-
-
     public HMSCachedClient getClient() {
         return client;
     }
 
-    public FileSystem getFs() {
-        return fs;
+    public HMSExternalCatalog getCatalog() {
+        return catalog;
     }
 
     public static HMSCachedClient createCachedClient(HiveConf hiveConf, int thriftClientPoolSize,
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java
index eb1d77a322dfc2..94748e7e4273fa 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java
@@ -354,7 +354,7 @@ public List<Split> getSplits() throws UserException {
                     long fileSize = baseFile.getFileSize();
                     // Need add hdfs host to location
                     LocationPath locationPath = new LocationPath(filePath, hmsTable.getCatalogProperties());
-                    Path splitFilePath = locationPath.toScanRangeLocation();
+                    Path splitFilePath = locationPath.toStorageLocation();
                     splits.add(new FileSplit(splitFilePath, 0, fileSize, fileSize,
                             new String[0], partition.getPartitionValues()));
                 });
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
index 961fb8ae1d6679..21826dfd8d53eb 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
@@ -150,7 +150,7 @@ public void setIcebergParams(TFileRangeDesc rangeDesc, IcebergSplit icebergSplit
                 TIcebergDeleteFileDesc deleteFileDesc = new TIcebergDeleteFileDesc();
                 String deleteFilePath = filter.getDeleteFilePath();
                 LocationPath locationPath = new LocationPath(deleteFilePath, icebergSplit.getConfig());
-                Path splitDeletePath = locationPath.toScanRangeLocation();
+                Path splitDeletePath = locationPath.toStorageLocation();
                 deleteFileDesc.setPath(splitDeletePath.toString());
                 if (filter instanceof IcebergDeleteFileFilter.PositionDelete) {
                     fileDesc.setContent(FileContent.POSITION_DELETES.id());
@@ -244,7 +244,7 @@ private List<Split> doGetSplits() throws UserException {
                     partitionPathSet.add(structLike.toString());
                 }
                 LocationPath locationPath = new LocationPath(dataFilePath, source.getCatalog().getProperties());
-                Path finalDataFilePath = locationPath.toScanRangeLocation();
+                Path finalDataFilePath = locationPath.toStorageLocation();
                 IcebergSplit split = new IcebergSplit(
                         finalDataFilePath,
                         splitTask.start(),
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
index ddb5a8c4f3de81..b9672f70c4107c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
@@ -161,7 +161,7 @@ public List<Split> getSplits() throws UserException {
                     List<RawFile> rawFiles = optRawFiles.get();
                     for (RawFile file : rawFiles) {
                         LocationPath locationPath = new LocationPath(file.path(), source.getCatalog().getProperties());
-                        Path finalDataFilePath = locationPath.toScanRangeLocation();
+                        Path finalDataFilePath = locationPath.toStorageLocation();
                         try {
                             splits.addAll(
                                     splitFile(
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystem.java
index 94f5c4204380d3..b6fd96bbd37d15 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystem.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystem.java
@@ -38,6 +38,10 @@ public interface FileSystem {
 
     Status exists(String remotePath);
 
+    default Status directoryExists(String dir) {
+        return exists(dir);
+    }
+
     Status downloadWithFileSize(String remoteFilePath, String localFilePath, long fileSize);
 
     Status upload(String localPath, String remotePath);
@@ -58,6 +62,10 @@ default Status renameDir(String origFilePath,
 
     Status delete(String remotePath);
 
+    default Status deleteDirectory(String dir) {
+        return delete(dir);
+    }
+
     Status makeDir(String remotePath);
 
     Status listFiles(String remotePath, boolean recursive, List<RemoteFile> result);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemCache.java b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemCache.java
index 149bbe2d378817..dd66c359b9d410 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemCache.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemCache.java
@@ -23,8 +23,8 @@
 import org.apache.doris.fs.remote.RemoteFileSystem;
 
 import com.github.benmanes.caffeine.cache.LoadingCache;
-import org.apache.hadoop.mapred.JobConf;
 
+import java.util.Map;
 import java.util.Objects;
 import java.util.OptionalLong;
 
@@ -44,7 +44,7 @@ public FileSystemCache() {
     }
 
     private RemoteFileSystem loadFileSystem(FileSystemCacheKey key) {
-        return FileSystemFactory.getRemoteFileSystem(key.type, key.conf, key.bindBrokerName);
+        return FileSystemFactory.getRemoteFileSystem(key.type, key.properties, key.bindBrokerName);
     }
 
     public RemoteFileSystem getRemoteFileSystem(FileSystemCacheKey key) {
@@ -55,13 +55,14 @@ public static class FileSystemCacheKey {
         private final FileSystemType type;
         // eg: hdfs://nameservices1
         private final String fsIdent;
-        private final JobConf conf;
+        private final Map<String, String> properties;
         private final String bindBrokerName;
 
-        public FileSystemCacheKey(Pair<FileSystemType, String> fs, JobConf conf, String bindBrokerName) {
+        public FileSystemCacheKey(Pair<FileSystemType, String> fs,
+                Map<String, String> properties, String bindBrokerName) {
             this.type = fs.first;
             this.fsIdent = fs.second;
-            this.conf = conf;
+            this.properties = properties;
             this.bindBrokerName = bindBrokerName;
         }
 
@@ -75,7 +76,7 @@ public boolean equals(Object obj) {
             }
             boolean equalsWithoutBroker = type.equals(((FileSystemCacheKey) obj).type)
                     && fsIdent.equals(((FileSystemCacheKey) obj).fsIdent)
-                    && conf == ((FileSystemCacheKey) obj).conf;
+                    && properties == ((FileSystemCacheKey) obj).properties;
             if (bindBrokerName == null) {
                 return equalsWithoutBroker;
             }
@@ -85,9 +86,9 @@ public boolean equals(Object obj) {
         @Override
         public int hashCode() {
             if (bindBrokerName == null) {
-                return Objects.hash(conf, fsIdent, type);
+                return Objects.hash(properties, fsIdent, type);
             }
-            return Objects.hash(conf, fsIdent, type, bindBrokerName);
+            return Objects.hash(properties, fsIdent, type, bindBrokerName);
         }
     }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemFactory.java b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemFactory.java
index 63f552a8ab82b8..cd7212c8e391fb 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemFactory.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemFactory.java
@@ -29,7 +29,6 @@
 import org.apache.hadoop.fs.Path;
 
 import java.io.IOException;
-import java.util.HashMap;
 import java.util.Map;
 
 public class FileSystemFactory {
@@ -51,10 +50,8 @@ public static RemoteFileSystem get(String name, StorageBackend.StorageType type,
         }
     }
 
-    public static RemoteFileSystem getRemoteFileSystem(FileSystemType type, Configuration conf,
+    public static RemoteFileSystem getRemoteFileSystem(FileSystemType type, Map<String, String> properties,
                                                        String bindBrokerName) {
-        Map<String, String> properties = new HashMap<>();
-        conf.iterator().forEachRemaining(e -> properties.put(e.getKey(), e.getValue()));
         switch (type) {
             case S3:
                 return new S3FileSystem(properties);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemProvider.java b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemProvider.java
new file mode 100644
index 00000000000000..aab7471fd99c0a
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemProvider.java
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.fs;
+
+import org.apache.doris.datasource.SessionContext;
+
+public interface FileSystemProvider {
+    FileSystem get(SessionContext ctx);
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemProviderImpl.java b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemProviderImpl.java
new file mode 100644
index 00000000000000..680592ab4a8719
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemProviderImpl.java
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.fs;
+
+import org.apache.doris.datasource.ExternalMetaCacheMgr;
+import org.apache.doris.datasource.SessionContext;
+import org.apache.doris.fs.remote.SwitchingFileSystem;
+
+import java.util.Map;
+
+public class FileSystemProviderImpl implements FileSystemProvider {
+    private ExternalMetaCacheMgr extMetaCacheMgr;
+    private String bindBrokerName;
+
+    private Map<String, String> properties;
+
+    public FileSystemProviderImpl(ExternalMetaCacheMgr extMetaCacheMgr, String bindBrokerName,
+            Map<String, String> properties) {
+        this.extMetaCacheMgr = extMetaCacheMgr;
+        this.bindBrokerName = bindBrokerName;
+        this.properties = properties;
+    }
+
+    @Override
+    public FileSystem get(SessionContext ctx) {
+        return new SwitchingFileSystem(extMetaCacheMgr, bindBrokerName, properties);
+    }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/LocalDfsFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/LocalDfsFileSystem.java
index 57a10ed1109dd8..93e79bf94b150f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/LocalDfsFileSystem.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/LocalDfsFileSystem.java
@@ -48,6 +48,11 @@ public Map<String, String> getProperties() {
         return null;
     }
 
+    @Override
+    public Status directoryExists(String dir) {
+        return exists(dir);
+    }
+
     @Override
     public Status exists(String remotePath) {
         boolean exists = false;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/ObjFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/ObjFileSystem.java
index 72b75350140eeb..f821e5bb6cef25 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/ObjFileSystem.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/ObjFileSystem.java
@@ -31,6 +31,7 @@
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.util.ArrayList;
 import java.util.Comparator;
 
 public abstract class ObjFileSystem extends RemoteFileSystem {
@@ -43,11 +44,20 @@ public ObjFileSystem(String name, StorageBackend.StorageType type, ObjStorage<?>
         this.objStorage = objStorage;
     }
 
+    public ObjStorage<?> getObjStorage() {
+        return objStorage;
+    }
+
     @Override
     public Status exists(String remotePath) {
         return objStorage.headObject(remotePath);
     }
 
+    @Override
+    public Status directoryExists(String dir) {
+        return listFiles(dir, false, new ArrayList<>());
+    }
+
     /**
      * download data from remote file and check data size with expected file size.
      * @param remoteFilePath remote file path
@@ -139,4 +149,9 @@ public Status copy(String origFilePath, String destFilePath) {
     public Status delete(String remotePath) {
         return objStorage.deleteObject(remotePath);
     }
+
+    @Override
+    public Status deleteDirectory(String absolutePath) {
+        return objStorage.deleteObjects(absolutePath);
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java
index 5771c65224b9b7..3869824de55d64 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/S3FileSystem.java
@@ -107,8 +107,4 @@ public Status globList(String remotePath, List<RemoteFile> result, boolean fileN
         }
         return Status.OK;
     }
-
-    public Status deleteDirectory(String absolutePath) {
-        return ((S3ObjStorage) objStorage).deleteObjects(absolutePath);
-    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/remote/SwitchingFileSystem.java b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/SwitchingFileSystem.java
new file mode 100644
index 00000000000000..00802922ef3689
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/fs/remote/SwitchingFileSystem.java
@@ -0,0 +1,132 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.fs.remote;
+
+import org.apache.doris.backup.Status;
+import org.apache.doris.common.util.LocationPath;
+import org.apache.doris.datasource.ExternalMetaCacheMgr;
+import org.apache.doris.fs.FileSystem;
+import org.apache.doris.fs.FileSystemCache;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+public class SwitchingFileSystem implements FileSystem {
+
+    private final ExternalMetaCacheMgr extMetaCacheMgr;
+
+    private final String bindBrokerName;
+
+    private final Map<String, String> properties;
+
+    public SwitchingFileSystem(ExternalMetaCacheMgr extMetaCacheMgr, String bindBrokerName,
+            Map<String, String> properties) {
+        this.extMetaCacheMgr = extMetaCacheMgr;
+        this.bindBrokerName = bindBrokerName;
+        this.properties = properties;
+    }
+
+    @Override
+    public Map<String, String> getProperties() {
+        return properties;
+    }
+
+    @Override
+    public Status exists(String remotePath) {
+        return fileSystem(remotePath).exists(remotePath);
+    }
+
+    @Override
+    public Status directoryExists(String dir) {
+        return fileSystem(dir).directoryExists(dir);
+    }
+
+    @Override
+    public Status downloadWithFileSize(String remoteFilePath, String localFilePath, long fileSize) {
+        return fileSystem(remoteFilePath).downloadWithFileSize(remoteFilePath, localFilePath, fileSize);
+    }
+
+    @Override
+    public Status upload(String localPath, String remotePath) {
+        return fileSystem(localPath).upload(localPath, remotePath);
+    }
+
+    @Override
+    public Status directUpload(String content, String remoteFile) {
+        return fileSystem(remoteFile).directUpload(content, remoteFile);
+    }
+
+    @Override
+    public Status rename(String origFilePath, String destFilePath) {
+        return fileSystem(origFilePath).rename(origFilePath, destFilePath);
+    }
+
+    @Override
+    public Status renameDir(String origFilePath, String destFilePath) {
+        return fileSystem(origFilePath).renameDir(origFilePath, destFilePath);
+    }
+
+    @Override
+    public Status renameDir(String origFilePath, String destFilePath, Runnable runWhenPathNotExist) {
+        return fileSystem(origFilePath).renameDir(origFilePath, destFilePath, runWhenPathNotExist);
+    }
+
+    @Override
+    public Status delete(String remotePath) {
+        return fileSystem(remotePath).delete(remotePath);
+    }
+
+    @Override
+    public Status deleteDirectory(String absolutePath) {
+        return fileSystem(absolutePath).deleteDirectory(absolutePath);
+    }
+
+    @Override
+    public Status makeDir(String remotePath) {
+        return fileSystem(remotePath).makeDir(remotePath);
+    }
+
+    @Override
+    public Status listFiles(String remotePath, boolean recursive, List<RemoteFile> result) {
+        return fileSystem(remotePath).listFiles(remotePath, recursive, result);
+    }
+
+    @Override
+    public Status globList(String remotePath, List<RemoteFile> result) {
+        return fileSystem(remotePath).globList(remotePath, result);
+    }
+
+    @Override
+    public Status globList(String remotePath, List<RemoteFile> result, boolean fileNameOnly) {
+        return fileSystem(remotePath).globList(remotePath, result, fileNameOnly);
+    }
+
+    @Override
+    public Status listDirectories(String remotePath, Set<String> result) {
+        return fileSystem(remotePath).listDirectories(remotePath, result);
+    }
+
+    public FileSystem fileSystem(String location) {
+        return extMetaCacheMgr.getFsCache().getRemoteFileSystem(
+                new FileSystemCache.FileSystemCacheKey(
+                        LocationPath.getFSIdentity(location,
+                                bindBrokerName), properties, bindBrokerName));
+    }
+}
+
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java
index 1944647a90dd0f..3b462344ac5c1c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java
@@ -33,6 +33,7 @@
 import org.apache.doris.thrift.TExplainLevel;
 import org.apache.doris.thrift.TFileCompressType;
 import org.apache.doris.thrift.TFileFormatType;
+import org.apache.doris.thrift.TFileType;
 import org.apache.doris.thrift.THiveBucket;
 import org.apache.doris.thrift.THiveColumn;
 import org.apache.doris.thrift.THiveColumnType;
@@ -128,21 +129,35 @@ public void bindDataSink(List<Column> insertCols, Optional<InsertCommandContext>
         setCompressType(tSink, formatType);
 
         THiveLocationParams locationParams = new THiveLocationParams();
-        String location = sd.getLocation();
-
-        String writeTempPath = createTempPath(location);
-        locationParams.setWritePath(writeTempPath);
-        locationParams.setTargetPath(location);
-        locationParams.setFileType(LocationPath.getTFileTypeForBE(location));
+        LocationPath locationPath = new LocationPath(sd.getLocation(), targetTable.getHadoopProperties());
+        String location = locationPath.toString();
+        String storageLocation = locationPath.toStorageLocation().toString();
+        TFileType fileType = locationPath.getTFileTypeForBE();
+        if (fileType == TFileType.FILE_S3) {
+            locationParams.setWritePath(storageLocation);
+            locationParams.setOriginalWritePath(location);
+            locationParams.setTargetPath(location);
+            if (insertCtx.isPresent()) {
+                HiveInsertCommandContext context = (HiveInsertCommandContext) insertCtx.get();
+                tSink.setOverwrite(context.isOverwrite());
+                context.setWritePath(storageLocation);
+            }
+        } else {
+            String writeTempPath = createTempPath(location);
+            locationParams.setWritePath(writeTempPath);
+            locationParams.setOriginalWritePath(writeTempPath);
+            locationParams.setTargetPath(location);
+            if (insertCtx.isPresent()) {
+                HiveInsertCommandContext context = (HiveInsertCommandContext) insertCtx.get();
+                tSink.setOverwrite(context.isOverwrite());
+                context.setWritePath(writeTempPath);
+            }
+        }
+        locationParams.setFileType(fileType);
         tSink.setLocation(locationParams);
 
         tSink.setHadoopConfig(targetTable.getHadoopProperties());
 
-        if (insertCtx.isPresent()) {
-            HiveInsertCommandContext context = (HiveInsertCommandContext) insertCtx.get();
-            tSink.setOverwrite(context.isOverwrite());
-            context.setWritePath(writeTempPath);
-        }
         tDataSink = new TDataSink(getDataSinkType());
         tDataSink.setHiveTableSink(tSink);
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/transaction/HiveTransactionManager.java b/fe/fe-core/src/main/java/org/apache/doris/transaction/HiveTransactionManager.java
index 2499cc6eba4472..838d135fa45769 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/transaction/HiveTransactionManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/transaction/HiveTransactionManager.java
@@ -21,23 +21,32 @@
 import org.apache.doris.common.UserException;
 import org.apache.doris.datasource.hive.HMSTransaction;
 import org.apache.doris.datasource.hive.HiveMetadataOps;
+import org.apache.doris.fs.FileSystemProvider;
 
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.Executor;
 
 public class HiveTransactionManager implements TransactionManager {
 
     private final Map<Long, HMSTransaction> transactions = new ConcurrentHashMap<>();
     private final HiveMetadataOps ops;
 
-    public HiveTransactionManager(HiveMetadataOps ops) {
+    private final FileSystemProvider fileSystemProvider;
+
+    private final Executor fileSystemExecutor;
+
+    public HiveTransactionManager(HiveMetadataOps ops, FileSystemProvider fileSystemProvider,
+            Executor fileSystemExecutor) {
         this.ops = ops;
+        this.fileSystemProvider = fileSystemProvider;
+        this.fileSystemExecutor = fileSystemExecutor;
     }
 
     @Override
     public long begin() {
         long id = Env.getCurrentEnv().getNextId();
-        HMSTransaction hiveTransaction = new HMSTransaction(ops);
+        HMSTransaction hiveTransaction = new HMSTransaction(ops, fileSystemProvider, fileSystemExecutor);
         transactions.put(id, hiveTransaction);
         return id;
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/transaction/TransactionManagerFactory.java b/fe/fe-core/src/main/java/org/apache/doris/transaction/TransactionManagerFactory.java
index 334258a3f12d60..394494a129d3b5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/transaction/TransactionManagerFactory.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/transaction/TransactionManagerFactory.java
@@ -18,10 +18,14 @@
 package org.apache.doris.transaction;
 
 import org.apache.doris.datasource.hive.HiveMetadataOps;
+import org.apache.doris.fs.FileSystemProvider;
+
+import java.util.concurrent.Executor;
 
 public class TransactionManagerFactory {
 
-    public static TransactionManager createHiveTransactionManager(HiveMetadataOps ops) {
-        return new HiveTransactionManager(ops);
+    public static TransactionManager createHiveTransactionManager(HiveMetadataOps ops,
+            FileSystemProvider fileSystemProvider, Executor fileSystemExecutor) {
+        return new HiveTransactionManager(ops, fileSystemProvider, fileSystemExecutor);
     }
 }
diff --git a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
index 571826aa9c8ac1..69130f57fff4b3 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java
@@ -34,7 +34,7 @@ public void testHdfsLocationConvert() {
         LocationPath locationPath = new LocationPath("hdfs://dir/file.path", rangeProps);
         Assertions.assertTrue(locationPath.get().startsWith("hdfs://"));
 
-        String beLocation = locationPath.toScanRangeLocation().toString();
+        String beLocation = locationPath.toStorageLocation().toString();
         Assertions.assertTrue(beLocation.startsWith("hdfs://"));
         Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.DFS);
 
@@ -45,21 +45,21 @@ public void testHdfsLocationConvert() {
         Assertions.assertTrue(locationPath.get().startsWith("hdfs://")
                 && !locationPath.get().startsWith("hdfs:///"));
 
-        beLocation = locationPath.toScanRangeLocation().toString();
+        beLocation = locationPath.toStorageLocation().toString();
         Assertions.assertTrue(beLocation.startsWith("hdfs://") && !beLocation.startsWith("hdfs:///"));
 
         // nonstandard '/' for hdfs path
         locationPath = new LocationPath("hdfs:/dir/file.path", props);
         Assertions.assertTrue(locationPath.get().startsWith("hdfs://"));
 
-        beLocation = locationPath.toScanRangeLocation().toString();
+        beLocation = locationPath.toStorageLocation().toString();
         Assertions.assertTrue(beLocation.startsWith("hdfs://"));
 
         // empty ha nameservices
         props.put("dfs.nameservices", "");
         locationPath = new LocationPath("hdfs:/dir/file.path", props);
 
-        beLocation = locationPath.toScanRangeLocation().toString();
+        beLocation = locationPath.toStorageLocation().toString();
         Assertions.assertTrue(locationPath.get().startsWith("/dir")
                 && !locationPath.get().startsWith("hdfs://"));
         Assertions.assertTrue(beLocation.startsWith("/dir") && !beLocation.startsWith("hdfs://"));
@@ -75,7 +75,7 @@ public void testJFSLocationConvert() {
         // FE
         Assertions.assertTrue(locationPath.get().startsWith("jfs://"));
         // BE
-        loc = locationPath.toScanRangeLocation().toString();
+        loc = locationPath.toStorageLocation().toString();
         Assertions.assertTrue(loc.startsWith("jfs://"));
         Assertions.assertEquals(LocationPath.getFSIdentity(loc, null).first, FileSystemType.JFS);
     }
@@ -89,7 +89,7 @@ public void testGSLocationConvert() {
         // FE
         Assertions.assertTrue(locationPath.get().startsWith("s3://"));
         // BE
-        String beLoc = locationPath.toScanRangeLocation().toString();
+        String beLoc = locationPath.toStorageLocation().toString();
         Assertions.assertTrue(beLoc.startsWith("s3://"));
         Assertions.assertEquals(LocationPath.getFSIdentity(beLoc, null).first, FileSystemType.S3);
     }
@@ -101,7 +101,7 @@ public void testOSSLocationConvert() {
         // FE
         Assertions.assertTrue(locationPath.get().startsWith("oss://"));
         // BE
-        String beLocation = locationPath.toScanRangeLocation().toString();
+        String beLocation = locationPath.toStorageLocation().toString();
         Assertions.assertTrue(beLocation.startsWith("s3://"));
         Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.S3);
 
@@ -109,7 +109,7 @@ public void testOSSLocationConvert() {
         // FE
         Assertions.assertTrue(locationPath.get().startsWith("oss://test.oss-dls.aliyuncs"));
         // BE
-        beLocation = locationPath.toScanRangeLocation().toString();
+        beLocation = locationPath.toStorageLocation().toString();
         Assertions.assertTrue(beLocation.startsWith("oss://test.oss-dls.aliyuncs"));
         Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.DFS);
 
@@ -121,7 +121,7 @@ public void testCOSLocationConvert() {
         LocationPath locationPath = new LocationPath("cos://test.com", rangeProps);
         // FE
         Assertions.assertTrue(locationPath.get().startsWith("cos://"));
-        String beLocation = locationPath.toScanRangeLocation().toString();
+        String beLocation = locationPath.toStorageLocation().toString();
         // BE
         Assertions.assertTrue(beLocation.startsWith("s3://"));
         Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.S3);
@@ -130,7 +130,7 @@ public void testCOSLocationConvert() {
         // FE
         Assertions.assertTrue(locationPath.get().startsWith("cosn://"));
         // BE
-        beLocation = locationPath.toScanRangeLocation().toString();
+        beLocation = locationPath.toStorageLocation().toString();
         Assertions.assertTrue(beLocation.startsWith("s3://"));
         Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.S3);
 
@@ -138,7 +138,7 @@ public void testCOSLocationConvert() {
         // FE
         Assertions.assertTrue(locationPath.get().startsWith("ofs://"));
         // BE
-        beLocation = locationPath.toScanRangeLocation().toString();
+        beLocation = locationPath.toStorageLocation().toString();
         Assertions.assertTrue(beLocation.startsWith("ofs://"));
         Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.OFS);
 
@@ -147,7 +147,7 @@ public void testCOSLocationConvert() {
         // FE
         Assertions.assertTrue(locationPath.get().startsWith("gfs://"));
         // BE
-        beLocation = locationPath.toScanRangeLocation().toString();
+        beLocation = locationPath.toStorageLocation().toString();
         Assertions.assertTrue(beLocation.startsWith("gfs://"));
         Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.DFS);
     }
@@ -159,7 +159,7 @@ public void testOBSLocationConvert() {
         // FE
         Assertions.assertTrue(locationPath.get().startsWith("obs://"));
         // BE
-        String beLocation = locationPath.toScanRangeLocation().toString();
+        String beLocation = locationPath.toStorageLocation().toString();
         Assertions.assertTrue(beLocation.startsWith("s3://"));
         Assertions.assertEquals(LocationPath.getFSIdentity(beLocation, null).first, FileSystemType.S3);
     }
@@ -173,7 +173,7 @@ public void testUnsupportedLocationConvert() {
         Assertions.assertTrue(locationPath.get().startsWith("unknown://"));
         Assertions.assertTrue(locationPath.getLocationType() == LocationPath.LocationType.UNKNOWN);
         // BE
-        String beLocation = locationPath.toScanRangeLocation().toString();
+        String beLocation = locationPath.toStorageLocation().toString();
         Assertions.assertTrue(beLocation.startsWith("unknown://"));
     }
 
@@ -186,7 +186,7 @@ public void testNoSchemeLocation() {
         Assertions.assertTrue(locationPath.get().equalsIgnoreCase("/path/to/local"));
         Assertions.assertTrue(locationPath.getLocationType() == LocationPath.LocationType.NOSCHEME);
         // BE
-        String beLocation = locationPath.toScanRangeLocation().toString();
+        String beLocation = locationPath.toStorageLocation().toString();
         Assertions.assertTrue(beLocation.equalsIgnoreCase("/path/to/local"));
     }
 }
diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/hive/HmsCommitTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/hive/HmsCommitTest.java
index ba87dd8f48eaed..e441262f12e2dd 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/datasource/hive/HmsCommitTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/hive/HmsCommitTest.java
@@ -21,7 +21,10 @@
 import org.apache.doris.catalog.PrimitiveType;
 import org.apache.doris.common.util.DebugUtil;
 import org.apache.doris.datasource.TestHMSCachedClient;
+import org.apache.doris.fs.FileSystem;
+import org.apache.doris.fs.FileSystemProvider;
 import org.apache.doris.fs.LocalDfsFileSystem;
+import org.apache.doris.fs.remote.SwitchingFileSystem;
 import org.apache.doris.nereids.trees.plans.commands.insert.HiveInsertCommandContext;
 import org.apache.doris.qe.ConnectContext;
 import org.apache.doris.thrift.THiveLocationParams;
@@ -54,16 +57,21 @@
 import java.util.UUID;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.Executor;
+import java.util.concurrent.Executors;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 public class HmsCommitTest {
 
     private static HiveMetadataOps hmsOps;
     private static HMSCachedClient hmsClient;
+
+    private static FileSystemProvider fileSystemProvider;
     private static final String dbName = "test_db";
     private static final String tbWithPartition = "test_tb_with_partition";
     private static final String tbWithoutPartition = "test_tb_without_partition";
-    private static LocalDfsFileSystem fs;
+    private static FileSystem fs;
+    private static LocalDfsFileSystem localDFSFileSystem;
+    private static Executor fileSystemExecutor;
     static String dbLocation;
     static String writeLocation;
     static String uri = "thrift://127.0.0.1:9083";
@@ -86,7 +94,14 @@ public static void afterClass() {
     }
 
     public static void createTestHiveCatalog() throws IOException {
-        fs = new LocalDfsFileSystem();
+        localDFSFileSystem = new LocalDfsFileSystem();
+        new MockUp<SwitchingFileSystem>(SwitchingFileSystem.class) {
+            @Mock
+            public FileSystem fileSystem(String location) {
+                return localDFSFileSystem;
+            }
+        };
+        fs = new SwitchingFileSystem(null, null, null);
 
         if (hasRealHmsService) {
             // If you have a real HMS service, then you can use this client to create real connections for testing
@@ -96,7 +111,9 @@ public static void createTestHiveCatalog() throws IOException {
         } else {
             hmsClient = new TestHMSCachedClient();
         }
-        hmsOps = new HiveMetadataOps(null, hmsClient, fs);
+        hmsOps = new HiveMetadataOps(null, hmsClient);
+        fileSystemProvider = ctx -> fs;
+        fileSystemExecutor = Executors.newFixedThreadPool(16);
     }
 
     public static void createTestHiveDatabase() {
@@ -339,9 +356,9 @@ public THivePartitionUpdate genOnePartitionUpdate(String partitionValue, TUpdate
             fs.makeDir(targetPath);
         }
 
-        fs.createFile(writePath + "/" + f1);
-        fs.createFile(writePath + "/" + f2);
-        fs.createFile(writePath + "/" + f3);
+        localDFSFileSystem.createFile(writePath + "/" + f1);
+        localDFSFileSystem.createFile(writePath + "/" + f2);
+        localDFSFileSystem.createFile(writePath + "/" + f3);
         return pu;
     }
 
@@ -363,7 +380,7 @@ public THivePartitionUpdate createRandomOverwrite(String partition) throws IOExc
     public void commit(String dbName,
                        String tableName,
                        List<THivePartitionUpdate> hivePUs) {
-        HMSTransaction hmsTransaction = new HMSTransaction(hmsOps);
+        HMSTransaction hmsTransaction = new HMSTransaction(hmsOps, fileSystemProvider, fileSystemExecutor);
         hmsTransaction.setHivePartitionUpdates(hivePUs);
         HiveInsertCommandContext ctx = new HiveInsertCommandContext();
         String queryId = DebugUtil.printId(ConnectContext.get().queryId());
@@ -634,3 +651,4 @@ public void testRollbackNewPartitionForPartitionedTableWithNewAppendPartition()
         assertNumRows(3, pa);
     }
 }
+
diff --git a/gensrc/thrift/DataSinks.thrift b/gensrc/thrift/DataSinks.thrift
index d04f0d34a0b337..b9cf79b0aec9ec 100644
--- a/gensrc/thrift/DataSinks.thrift
+++ b/gensrc/thrift/DataSinks.thrift
@@ -284,6 +284,8 @@ struct THiveLocationParams {
   1: optional string write_path
   2: optional string target_path
   3: optional Types.TFileType file_type
+  // Other object store will convert write_path to s3 scheme path for BE, this field keeps the original write path.
+  4: optional string original_write_path
 }
 
 struct TSortedColumn {
@@ -338,6 +340,13 @@ enum TUpdateMode {
     OVERWRITE = 2 // insert overwrite
 }
 
+struct TS3MPUPendingUpload {
+    1: optional string bucket
+    2: optional string key
+    3: optional string upload_id
+    4: optional map<i32, string> etags
+}
+
 struct THivePartitionUpdate {
     1: optional string name
     2: optional TUpdateMode update_mode
@@ -345,6 +354,7 @@ struct THivePartitionUpdate {
     4: optional list<string> file_names
     5: optional i64 row_count
     6: optional i64 file_size
+    7: optional list<TS3MPUPendingUpload> s3_mpu_pending_uploads
 }
 
 enum TFileContent {
diff --git a/regression-test/data/external_table_p2/hive/test_hive_write_insert_s3.out b/regression-test/data/external_table_p2/hive/test_hive_write_insert_s3.out
new file mode 100644
index 00000000000000..7d34ada055042b
--- /dev/null
+++ b/regression-test/data/external_table_p2/hive/test_hive_write_insert_s3.out
@@ -0,0 +1,61 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !q01 --
+false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint": -1234567890}	{"key":[{"s_int": -123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
+false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint": -1234567890}	{"key":[{"s_int": -123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
+true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint": 1234567890}	{"key":[{"s_int": 123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
+
+-- !q02 --
+false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint": -1234567890}	{"key":[{"s_int": -123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
+false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint": -1234567890}	{"key":[{"s_int": -123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
+false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint": -1234567890}	{"key":[{"s_int": -123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
+false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint": -1234567890}	{"key":[{"s_int": -123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
+true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint": 1234567890}	{"key":[{"s_int": 123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
+true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint": 1234567890}	{"key":[{"s_int": 123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
+
+-- !q03 --
+\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{2:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[3.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
+\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
+\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
+false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint": -1234567890}	{"key":[{"s_int": -123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
+false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint": -1234567890}	{"key":[{"s_int": -123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
+false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint": -1234567890}	{"key":[{"s_int": -123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
+false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint": -1234567890}	{"key":[{"s_int": -123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
+true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint": 1234567890}	{"key":[{"s_int": 123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
+true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint": 1234567890}	{"key":[{"s_int": 123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
+
+-- !q04 --
+\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{2:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[3.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
+\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
+\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
+
+-- !q05 --
+
+-- !q01 --
+false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint": -1234567890}	{"key":[{"s_int": -123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
+false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint": -1234567890}	{"key":[{"s_int": -123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
+true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint": 1234567890}	{"key":[{"s_int": 123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
+
+-- !q02 --
+false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint": -1234567890}	{"key":[{"s_int": -123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
+false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint": -1234567890}	{"key":[{"s_int": -123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
+false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint": -1234567890}	{"key":[{"s_int": -123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
+false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint": -1234567890}	{"key":[{"s_int": -123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
+true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint": 1234567890}	{"key":[{"s_int": 123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
+true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint": 1234567890}	{"key":[{"s_int": 123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
+
+-- !q03 --
+\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{2:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[3.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
+\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240322
+\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240320
+false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint": -1234567890}	{"key":[{"s_int": -123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
+false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint": -1234567890}	{"key":[{"s_int": -123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
+false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint": -1234567890}	{"key":[{"s_int": -123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
+false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint": -1234567890}	{"key":[{"s_int": -123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
+true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint": 1234567890}	{"key":[{"s_int": 123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
+true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint": 1234567890}	{"key":[{"s_int": 123}]}	{"struct_field": ["value1", "value2"]}	{"struct_field_null": null, "struct_field_null2": null}	{"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"}	{"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
+
+-- !q04 --
+\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{2:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[3.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
+\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240322
+\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240320
+
diff --git a/regression-test/suites/external_table_p2/hive/test_hive_write_insert_s3.groovy b/regression-test/suites/external_table_p2/hive/test_hive_write_insert_s3.groovy
new file mode 100644
index 00000000000000..87633ba1b09a5f
--- /dev/null
+++ b/regression-test/suites/external_table_p2/hive/test_hive_write_insert_s3.groovy
@@ -0,0 +1,166 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_write_insert_s3", "p2,external,hive,external_remote,external_remote_hive") {
+    def format_compressions = ["parquet_snappy"]
+
+    def q01 = { String format_compression, String catalog_name ->
+        logger.info("hive sql: " + """ truncate table all_types_${format_compression}_s3; """)
+        hive_remote """ truncate table all_types_${format_compression}_s3; """
+        sql """refresh catalog ${catalog_name};"""
+
+        sql """
+        INSERT INTO all_types_${format_compression}_s3
+        SELECT * FROM all_types_parquet_snappy_src;
+        """
+        order_qt_q01 """ select * from all_types_${format_compression}_s3;
+        """
+
+        sql """
+        INSERT INTO all_types_${format_compression}_s3
+        SELECT boolean_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, decimal_col1, decimal_col2,
+         decimal_col3, decimal_col4, string_col, binary_col, date_col, timestamp_col1, timestamp_col2, timestamp_col3, char_col1,
+          char_col2, char_col3, varchar_col1, varchar_col2, varchar_col3, t_map_string, t_map_varchar, t_map_char, t_map_int,
+           t_map_bigint, t_map_float, t_map_double, t_map_boolean, t_map_decimal_precision_2, t_map_decimal_precision_4,
+            t_map_decimal_precision_8, t_map_decimal_precision_17, t_map_decimal_precision_18, t_map_decimal_precision_38,
+             t_array_string, t_array_int, t_array_bigint, t_array_float, t_array_double, t_array_boolean, t_array_varchar,
+              t_array_char, t_array_decimal_precision_2, t_array_decimal_precision_4, t_array_decimal_precision_8,
+               t_array_decimal_precision_17, t_array_decimal_precision_18, t_array_decimal_precision_38, t_struct_bigint, t_complex,
+                t_struct_nested, t_struct_null, t_struct_non_nulls_after_nulls, t_nested_struct_non_nulls_after_nulls,
+                 t_map_null_value, t_array_string_starting_with_nulls, t_array_string_with_nulls_in_between,
+                  t_array_string_ending_with_nulls, t_array_string_all_nulls, dt FROM all_types_parquet_snappy_src;
+        """
+        order_qt_q02 """ select * from all_types_${format_compression}_s3;
+        """
+
+        sql """
+        INSERT INTO all_types_${format_compression}_s3(float_col, t_map_int, t_array_decimal_precision_8, t_array_string_starting_with_nulls)
+        SELECT float_col, t_map_int, t_array_decimal_precision_8, t_array_string_starting_with_nulls FROM all_types_parquet_snappy_src;
+        """
+        order_qt_q03 """
+        select * from all_types_${format_compression}_s3;
+        """
+
+        sql """
+        INSERT OVERWRITE TABLE all_types_${format_compression}_s3(float_col, t_map_int, t_array_decimal_precision_8, t_array_string_starting_with_nulls)
+        SELECT float_col, t_map_int, t_array_decimal_precision_8, t_array_string_starting_with_nulls FROM all_types_parquet_snappy_src;
+        """
+        order_qt_q04 """
+        select * from all_types_${format_compression}_s3;
+        """
+
+        logger.info("hive sql: " + """ truncate table all_types_${format_compression}_s3; """)
+        hive_remote """ truncate table all_types_${format_compression}_s3; """
+        sql """refresh catalog ${catalog_name};"""
+        order_qt_q05 """
+        select * from all_types_${format_compression}_s3;
+        """
+    }
+
+    def q02 = { String format_compression, String catalog_name ->
+        logger.info("hive sql: " + """ DROP TABLE IF EXISTS all_types_par_${format_compression}_s3_${catalog_name}_q02; """)
+        hive_remote """ DROP TABLE IF EXISTS all_types_par_${format_compression}_s3_${catalog_name}_q02; """
+        logger.info("hive sql: " + """ CREATE TABLE IF NOT EXISTS all_types_par_${format_compression}_s3_${catalog_name}_q02 like all_types_par_${format_compression}_s3; """)
+        hive_remote """ CREATE TABLE IF NOT EXISTS all_types_par_${format_compression}_s3_${catalog_name}_q02 like all_types_par_${format_compression}_s3; """
+        logger.info("hive sql: " + """ ALTER TABLE all_types_par_${format_compression}_s3_${catalog_name}_q02 SET LOCATION 'cosn://doris-build-1308700295/regression/write/data/all_types_par_${format_compression}_s3_${catalog_name}_q02'; """)
+        hive_remote """ ALTER TABLE all_types_par_${format_compression}_s3_${catalog_name}_q02 SET LOCATION 'cosn://doris-build-1308700295/regression/write/data/all_types_par_${format_compression}_s3_${catalog_name}_q02'; """
+        sql """refresh catalog ${catalog_name};"""
+
+        sql """
+        INSERT INTO all_types_par_${format_compression}_s3_${catalog_name}_q02
+        SELECT * FROM all_types_par_parquet_snappy_src;
+        """
+        order_qt_q01 """ select * from all_types_par_${format_compression}_s3_${catalog_name}_q02;
+        """
+
+        sql """
+        INSERT INTO all_types_par_${format_compression}_s3_${catalog_name}_q02
+        SELECT boolean_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, decimal_col1, decimal_col2,
+         decimal_col3, decimal_col4, string_col, binary_col, date_col, timestamp_col1, timestamp_col2, timestamp_col3, char_col1,
+          char_col2, char_col3, varchar_col1, varchar_col2, varchar_col3, t_map_string, t_map_varchar, t_map_char, t_map_int,
+           t_map_bigint, t_map_float, t_map_double, t_map_boolean, t_map_decimal_precision_2, t_map_decimal_precision_4,
+            t_map_decimal_precision_8, t_map_decimal_precision_17, t_map_decimal_precision_18, t_map_decimal_precision_38,
+             t_array_string, t_array_int, t_array_bigint, t_array_float, t_array_double, t_array_boolean, t_array_varchar,
+              t_array_char, t_array_decimal_precision_2, t_array_decimal_precision_4, t_array_decimal_precision_8,
+               t_array_decimal_precision_17, t_array_decimal_precision_18, t_array_decimal_precision_38, t_struct_bigint, t_complex,
+                t_struct_nested, t_struct_null, t_struct_non_nulls_after_nulls, t_nested_struct_non_nulls_after_nulls,
+                 t_map_null_value, t_array_string_starting_with_nulls, t_array_string_with_nulls_in_between,
+                  t_array_string_ending_with_nulls, t_array_string_all_nulls, dt FROM all_types_parquet_snappy_src;
+        """
+        order_qt_q02 """ select * from all_types_par_${format_compression}_s3_${catalog_name}_q02;
+        """
+
+        sql """
+        INSERT INTO all_types_par_${format_compression}_s3_${catalog_name}_q02(float_col, t_map_int, t_array_decimal_precision_8, t_array_string_starting_with_nulls, dt)
+        SELECT float_col, t_map_int, t_array_decimal_precision_8, t_array_string_starting_with_nulls, dt FROM all_types_parquet_snappy_src;
+        """
+        order_qt_q03 """ select * from all_types_par_${format_compression}_s3_${catalog_name}_q02;
+        """
+
+        sql """
+        INSERT OVERWRITE TABLE all_types_par_${format_compression}_s3_${catalog_name}_q02(float_col, t_map_int, t_array_decimal_precision_8, t_array_string_starting_with_nulls, dt)
+        SELECT float_col, t_map_int, t_array_decimal_precision_8, t_array_string_starting_with_nulls, dt FROM all_types_parquet_snappy_src;
+        """
+        order_qt_q04 """
+        select * from all_types_par_${format_compression}_s3_${catalog_name}_q02;
+        """
+
+        logger.info("hive sql: " + """ DROP TABLE IF EXISTS all_types_par_${format_compression}_s3_${catalog_name}_q02; """)
+        hive_remote """ DROP TABLE IF EXISTS all_types_par_${format_compression}_s3_${catalog_name}_q02; """
+    }
+
+    String enabled = context.config.otherConfigs.get("enableHiveTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+        try {
+            String catalog_name = "test_hive_write_insert_s3"
+
+            String hms_host = context.config.otherConfigs.get("extHiveHmsHost")
+            String Hms_port = context.config.otherConfigs.get("extHiveHmsPort")
+            String hdfs_host = context.config.otherConfigs.get("extHiveHmsHost")
+            String hdfs_port = context.config.otherConfigs.get("extHdfsPort")
+            String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+            String ak = context.config.otherConfigs.get("extAk")
+            String sk = context.config.otherConfigs.get("extSk")
+            String endpoint = context.config.otherConfigs.get("extS3Endpoint")
+
+            sql """drop catalog if exists ${catalog_name}"""
+            sql """create catalog if not exists ${catalog_name} properties (
+                'type'='hms',
+                'hive.metastore.uris' = 'thrift://${hms_host}:${Hms_port}',
+                'fs.defaultFS' = 'hdfs://${hdfs_host}:${hdfs_port}',
+                'hadoop.username' = 'hadoop',
+                's3.endpoint' = '${endpoint}',
+                's3.access_key' = '${ak}',
+                's3.secret_key' = '${sk}'
+            );"""
+            sql """use `${catalog_name}`.`write_test`"""
+            logger.info("hive sql: " + """ use `write_test` """)
+            hive_remote """use `write_test`"""
+
+            sql """set enable_fallback_to_original_planner=false;"""
+
+            for (String format_compression in format_compressions) {
+                logger.info("Process format_compression" + format_compression)
+                q01(format_compression, catalog_name)
+                q02(format_compression, catalog_name)
+            }
+
+            sql """drop catalog if exists ${catalog_name}"""
+        } finally {
+        }
+    }
+}

From 3f256804373ad18261f78d8f03b5672e301657b1 Mon Sep 17 00:00:00 2001
From: AlexYue <yj976240184@gmail.com>
Date: Mon, 29 Apr 2024 12:09:53 +0800
Subject: [PATCH 119/163] [enhance](Cloud) Add one create vault property to
 control if use this vault as default vault (#34195)

---
 .../meta-service/meta_service_resource.cpp    | 44 +++++++++++++++++++
 .../analysis/CreateStorageVaultStmt.java      |  7 +++
 .../doris/catalog/HdfsStorageVault.java       |  4 +-
 .../apache/doris/catalog/S3StorageVault.java  |  5 ++-
 .../apache/doris/catalog/StorageVault.java    | 13 ++++--
 .../apache/doris/catalog/StorageVaultMgr.java | 12 ++++-
 .../cloud/catalog/HdfsStorageVaultTest.java   |  4 +-
 gensrc/proto/cloud.proto                      |  2 +
 .../suites/vaults/create/create.groovy        | 26 ++++++++++-
 .../suites/vaults/default/default.groovy      | 30 +++++++++++++
 10 files changed, 135 insertions(+), 12 deletions(-)

diff --git a/cloud/src/meta-service/meta_service_resource.cpp b/cloud/src/meta-service/meta_service_resource.cpp
index eefd01eb254dfd..d36b85332b8f16 100644
--- a/cloud/src/meta-service/meta_service_resource.cpp
+++ b/cloud/src/meta-service/meta_service_resource.cpp
@@ -526,6 +526,19 @@ static int remove_hdfs_storage_vault(InstanceInfoPB& instance, Transaction* txn,
     return 0;
 }
 
+// Log vault message and origin default storage vault message for potential tracing
+static void set_default_vault_log_helper(const InstanceInfoPB& instance,
+                                         std::string_view vault_name, std::string_view vault_id) {
+    auto vault_msg = fmt::format("instance {} tries to set default vault as {}, id {}",
+                                 instance.instance_id(), vault_id, vault_name);
+    if (instance.has_default_storage_vault_id()) {
+        vault_msg = fmt::format("{}, origin default vault name {}, vault id {}", vault_msg,
+                                instance.default_storage_vault_name(),
+                                instance.default_storage_vault_id());
+    }
+    LOG(INFO) << vault_msg;
+}
+
 void MetaServiceImpl::alter_obj_store_info(google::protobuf::RpcController* controller,
                                            const AlterObjStoreInfoRequest* request,
                                            AlterObjStoreInfoResponse* response,
@@ -757,7 +770,18 @@ void MetaServiceImpl::alter_obj_store_info(google::protobuf::RpcController* cont
         last_item.set_sse_enabled(instance.sse_enabled());
         if (request->op() == AlterObjStoreInfoRequest::ADD_OBJ_INFO) {
             instance.add_obj_info()->CopyFrom(last_item);
+            LOG_INFO("Instance {} tries to put obj info", instance.instance_id());
         } else if (request->op() == AlterObjStoreInfoRequest::ADD_S3_VAULT) {
+            if (instance.storage_vault_names().end() !=
+                std::find_if(instance.storage_vault_names().begin(),
+                             instance.storage_vault_names().end(),
+                             [&](const std::string& candidate_name) {
+                                 return candidate_name == request->vault().name();
+                             })) {
+                code = MetaServiceCode::ALREADY_EXISTED;
+                msg = fmt::format("vault_name={} already created", request->vault().name());
+                return;
+            }
             StorageVaultPB vault;
             vault.set_id(last_item.id());
             vault.set_name(request->vault().name());
@@ -766,6 +790,16 @@ void MetaServiceImpl::alter_obj_store_info(google::protobuf::RpcController* cont
             vault.mutable_obj_info()->MergeFrom(last_item);
             auto vault_key = storage_vault_key({instance.instance_id(), last_item.id()});
             txn->put(vault_key, vault.SerializeAsString());
+            if (request->has_set_as_default_storage_vault() &&
+                request->set_as_default_storage_vault()) {
+                response->set_default_storage_vault_replaced(
+                        instance.has_default_storage_vault_id());
+                set_default_vault_log_helper(instance, vault.name(), vault.id());
+                instance.set_default_storage_vault_id(vault.id());
+                instance.set_default_storage_vault_name(vault.name());
+            }
+            LOG_INFO("try to put storage vault_id={}, vault_name={}, vault_key={}", vault.id(),
+                     vault.name(), hex(vault_key));
         }
     } break;
     case AlterObjStoreInfoRequest::ADD_HDFS_INFO: {
@@ -774,6 +808,14 @@ void MetaServiceImpl::alter_obj_store_info(google::protobuf::RpcController* cont
             ret != 0) {
             return;
         }
+        if (request->has_set_as_default_storage_vault() &&
+            request->set_as_default_storage_vault()) {
+            response->set_default_storage_vault_replaced(instance.has_default_storage_vault_id());
+            set_default_vault_log_helper(instance, *instance.storage_vault_names().rbegin(),
+                                         *instance.resource_ids().rbegin());
+            instance.set_default_storage_vault_id(*instance.resource_ids().rbegin());
+            instance.set_default_storage_vault_name(*instance.storage_vault_names().rbegin());
+        }
         break;
     }
     case AlterObjStoreInfoRequest::ADD_BUILT_IN_VAULT: {
@@ -812,6 +854,8 @@ void MetaServiceImpl::alter_obj_store_info(google::protobuf::RpcController* cont
         }
         auto pos = name_itr - instance.storage_vault_names().begin();
         auto id_itr = instance.resource_ids().begin() + pos;
+        response->set_default_storage_vault_replaced(instance.has_default_storage_vault_id());
+        set_default_vault_log_helper(instance, name, *id_itr);
         instance.set_default_storage_vault_id(*id_itr);
         instance.set_default_storage_vault_name(name);
         response->set_storage_vault_id(*id_itr);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateStorageVaultStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateStorageVaultStmt.java
index 652c15d4b8ea05..c6861736f3d189 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateStorageVaultStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateStorageVaultStmt.java
@@ -37,10 +37,12 @@
 // PROPERTIES (key1 = value1, ...)
 public class CreateStorageVaultStmt extends DdlStmt {
     private static final String TYPE = "type";
+    private static final String SET_AS_DEFAULT = "set_as_default";
 
     private final boolean ifNotExists;
     private final String vaultName;
     private final Map<String, String> properties;
+    private boolean setAsDefault;
     private StorageVault.StorageVaultType vaultType;
 
     public CreateStorageVaultStmt(boolean ifNotExists, String vaultName, Map<String, String> properties) {
@@ -54,6 +56,10 @@ public boolean isIfNotExists() {
         return ifNotExists;
     }
 
+    public boolean setAsDefault() {
+        return setAsDefault;
+    }
+
     public String getStorageVaultName() {
         return vaultName;
     }
@@ -102,6 +108,7 @@ public void analyze(Analyzer analyzer) throws UserException {
         if (type == null) {
             throw new AnalysisException("Storage Vault type can't be null");
         }
+        setAsDefault = Boolean.parseBoolean(properties.getOrDefault(SET_AS_DEFAULT, "false"));
         setStorageVaultType(StorageVault.StorageVaultType.fromString(type));
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsStorageVault.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsStorageVault.java
index 6332acd04b72a0..4614beef828b2d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsStorageVault.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsStorageVault.java
@@ -67,8 +67,8 @@ public class HdfsStorageVault extends StorageVault {
     @SerializedName(value = "properties")
     private Map<String, String> properties;
 
-    public HdfsStorageVault(String name, boolean ifNotExists) {
-        super(name, StorageVault.StorageVaultType.HDFS, ifNotExists);
+    public HdfsStorageVault(String name, boolean ifNotExists, boolean setAsDefault) {
+        super(name, StorageVault.StorageVaultType.HDFS, ifNotExists, setAsDefault);
         properties = Maps.newHashMap();
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/S3StorageVault.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/S3StorageVault.java
index 25f7e60ce3d194..3f06286f47dc98 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/S3StorageVault.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/S3StorageVault.java
@@ -56,8 +56,9 @@ public class S3StorageVault extends StorageVault {
     @SerializedName(value = "properties")
     private Map<String, String> properties;
 
-    public S3StorageVault(String name, boolean ifNotExists, CreateResourceStmt stmt) throws DdlException {
-        super(name, StorageVault.StorageVaultType.S3, ifNotExists);
+    public S3StorageVault(String name, boolean ifNotExists,
+            boolean setAsDefault, CreateResourceStmt stmt) throws DdlException {
+        super(name, StorageVault.StorageVaultType.S3, ifNotExists, setAsDefault);
         resource = Resource.fromStmt(stmt);
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/StorageVault.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/StorageVault.java
index 8bfa4648a8e451..fb68581a00e850 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/StorageVault.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/StorageVault.java
@@ -63,6 +63,7 @@ public static StorageVaultType fromString(String storageVaultTypeType) {
     protected StorageVaultType type;
     protected String id;
     private boolean ifNotExists;
+    private boolean setAsDefault;
 
     private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(true);
 
@@ -85,10 +86,11 @@ public void readUnlock() {
     public StorageVault() {
     }
 
-    public StorageVault(String name, StorageVaultType type, boolean ifNotExists) {
+    public StorageVault(String name, StorageVaultType type, boolean ifNotExists, boolean setAsDefault) {
         this.name = name;
         this.type = type;
         this.ifNotExists = ifNotExists;
+        this.setAsDefault = setAsDefault;
     }
 
     public static StorageVault fromStmt(CreateStorageVaultStmt stmt) throws DdlException, UserException {
@@ -99,6 +101,10 @@ public boolean ifNotExists() {
         return this.ifNotExists;
     }
 
+    public boolean setAsDefault() {
+        return this.setAsDefault;
+    }
+
 
     public String getId() {
         return this.id;
@@ -120,17 +126,18 @@ public void setId(String id) {
         StorageVaultType type = stmt.getStorageVaultType();
         String name = stmt.getStorageVaultName();
         boolean ifNotExists = stmt.isIfNotExists();
+        boolean setAsDefault = stmt.setAsDefault();
         StorageVault vault;
         switch (type) {
             case HDFS:
-                vault = new HdfsStorageVault(name, ifNotExists);
+                vault = new HdfsStorageVault(name, ifNotExists, setAsDefault);
                 vault.modifyProperties(stmt.getProperties());
                 break;
             case S3:
                 CreateResourceStmt resourceStmt =
                         new CreateResourceStmt(false, ifNotExists, name, stmt.getProperties());
                 resourceStmt.analyzeResourceType();
-                vault = new S3StorageVault(name, ifNotExists, resourceStmt);
+                vault = new S3StorageVault(name, ifNotExists, setAsDefault, resourceStmt);
                 break;
             default:
                 throw new DdlException("Unknown StorageVault type: " + type);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/StorageVaultMgr.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/StorageVaultMgr.java
index 3cc92d22485a53..c9f254e8ed98b6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/StorageVaultMgr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/StorageVaultMgr.java
@@ -152,17 +152,23 @@ public void createHdfsVault(StorageVault vault) throws DdlException {
                 = Cloud.AlterObjStoreInfoRequest.newBuilder();
         requestBuilder.setOp(Cloud.AlterObjStoreInfoRequest.Operation.ADD_HDFS_INFO);
         requestBuilder.setVault(alterHdfsInfoBuilder.build());
+        requestBuilder.setSetAsDefaultStorageVault(vault.setAsDefault());
         try {
             Cloud.AlterObjStoreInfoResponse response =
                     MetaServiceProxy.getInstance().alterObjStoreInfo(requestBuilder.build());
             if (response.getStatus().getCode() == Cloud.MetaServiceCode.ALREADY_EXISTED
                     && hdfsStorageVault.ifNotExists()) {
+                LOG.info("Hdfs vault {} already existed", hdfsStorageVault.getName());
                 return;
             }
             if (response.getStatus().getCode() != Cloud.MetaServiceCode.OK) {
-                LOG.warn("failed to alter storage vault response: {} ", response);
+                LOG.warn("failed to create hdfs storage vault, vault name {}, response: {} ",
+                        hdfsStorageVault.getName(), response);
                 throw new DdlException(response.getStatus().getMsg());
             }
+            LOG.info("Succeed to create hdfs vault {}, id {}, origin default vault replaced {}",
+                    hdfsStorageVault.getName(), response.getStorageVaultId(),
+                            response.getDefaultStorageVaultReplaced());
         } catch (RpcException e) {
             LOG.warn("failed to alter storage vault due to RpcException: {}", e);
             throw new DdlException(e.getMessage());
@@ -190,17 +196,21 @@ public void createS3Vault(StorageVault vault) throws DdlException {
         alterObjVaultBuilder.setName(s3StorageVault.getName());
         alterObjVaultBuilder.setObjInfo(objBuilder.build());
         requestBuilder.setVault(alterObjVaultBuilder.build());
+        requestBuilder.setSetAsDefaultStorageVault(vault.setAsDefault());
         try {
             Cloud.AlterObjStoreInfoResponse response =
                     MetaServiceProxy.getInstance().alterObjStoreInfo(requestBuilder.build());
             if (response.getStatus().getCode() == Cloud.MetaServiceCode.ALREADY_EXISTED
                     && s3StorageVault.ifNotExists()) {
+                LOG.info("S3 vault {} already existed", s3StorageVault.getName());
                 return;
             }
             if (response.getStatus().getCode() != Cloud.MetaServiceCode.OK) {
                 LOG.warn("failed to alter storage vault response: {} ", response);
                 throw new DdlException(response.getStatus().getMsg());
             }
+            LOG.info("Succeed to create s3 vault {}, id {}, origin default vault replaced {}",
+                    s3StorageVault.getName(), response.getStorageVaultId(), response.getDefaultStorageVaultReplaced());
         } catch (RpcException e) {
             LOG.warn("failed to alter storage vault due to RpcException: {}", e);
             throw new DdlException(e.getMessage());
diff --git a/fe/fe-core/src/test/java/org/apache/doris/cloud/catalog/HdfsStorageVaultTest.java b/fe/fe-core/src/test/java/org/apache/doris/cloud/catalog/HdfsStorageVaultTest.java
index a78f7ad7e7339c..5f92b9665b58d1 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/cloud/catalog/HdfsStorageVaultTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/cloud/catalog/HdfsStorageVaultTest.java
@@ -158,7 +158,7 @@ public void testAlterMetaServiceIfNotExists() throws Exception {
                 return resp.build();
             }
         };
-        StorageVault vault = new HdfsStorageVault("name", true);
+        StorageVault vault = new HdfsStorageVault("name", true, false);
         vault.modifyProperties(ImmutableMap.of(
                 "type", "hdfs",
                 "path", "abs/"));
@@ -201,7 +201,7 @@ public Pair getDefaultStorageVaultInfo() {
                 return resp.build();
             }
         };
-        StorageVault vault = new HdfsStorageVault("name", true);
+        StorageVault vault = new HdfsStorageVault("name", true, false);
         Assertions.assertThrows(DdlException.class,
                 () -> {
                     mgr.setDefaultStorageVault(new SetDefaultStorageVaultStmt("non_existent"));
diff --git a/gensrc/proto/cloud.proto b/gensrc/proto/cloud.proto
index ba9017a5eccb27..70ad5f79ff3e5e 100644
--- a/gensrc/proto/cloud.proto
+++ b/gensrc/proto/cloud.proto
@@ -784,11 +784,13 @@ message AlterObjStoreInfoRequest {
     optional ObjectStoreInfoPB obj = 2;
     optional Operation op = 3;
     optional StorageVaultPB vault = 4;
+    optional bool set_as_default_storage_vault = 5;
 }
 
 message AlterObjStoreInfoResponse {
     optional MetaServiceResponseStatus status = 1;
     optional string storage_vault_id = 2;
+    optional bool default_storage_vault_replaced = 3;
 }
 
 message UpdateAkSkRequest {
diff --git a/regression-test/suites/vaults/create/create.groovy b/regression-test/suites/vaults/create/create.groovy
index 9070d82083ccc9..56ce5dc3557233 100644
--- a/regression-test/suites/vaults/create/create.groovy
+++ b/regression-test/suites/vaults/create/create.groovy
@@ -78,12 +78,12 @@ suite("create_vault") {
                 )
     """
 
-    def create_stmt = """
+    String create_stmt = sql """
         show create table create_table_use_vault
     """
 
     logger.info("the create table stmt is ${create_stmt}")
-    assertTrue(create_stmt.contains("\"storage_vault_name\" = \"create_hdfs_vault\""))
+    assertTrue(create_stmt.contains("create_hdfs_vault"))
 
     expectExceptionLike({
         sql """
@@ -129,6 +129,28 @@ suite("create_vault") {
         """
     }, "already created")
 
+    sql """
+        CREATE TABLE IF NOT EXISTS create_table_use_s3_vault (
+                C_CUSTKEY     INTEGER NOT NULL,
+                C_NAME        INTEGER NOT NULL
+                )
+                DUPLICATE KEY(C_CUSTKEY, C_NAME)
+                DISTRIBUTED BY HASH(C_CUSTKEY) BUCKETS 1
+                PROPERTIES (
+                "replication_num" = "1",
+                "storage_vault_name" = "create_s3_vault"
+                )
+    """
+
+    sql """
+        insert into create_table_use_s3_vault values(1,1);
+    """
+
+    sql """
+        select * from create_table_use_s3_vault;
+    """
+
+
     def vaults_info = try_sql """
         show storage vault
     """
diff --git a/regression-test/suites/vaults/default/default.groovy b/regression-test/suites/vaults/default/default.groovy
index 46f55a865ed0af..a69880b60305ff 100644
--- a/regression-test/suites/vaults/default/default.groovy
+++ b/regression-test/suites/vaults/default/default.groovy
@@ -39,6 +39,36 @@ suite("default_vault") {
         """
     }, "supply")
 
+    sql """
+        CREATE STORAGE VAULT IF NOT EXISTS create_s3_vault_for_default
+        PROPERTIES (
+        "type"="S3",
+        "s3.endpoint"="${getS3Endpoint()}",
+        "s3.region" = "${getS3Region()}",
+        "s3.access_key" = "${getS3AK()}",
+        "s3.secret_key" = "${getS3SK()}",
+        "s3.root.path" = "ssb_sf1_p2_s3",
+        "s3.bucket" = "${getS3BucketName()}",
+        "s3.external_endpoint" = "",
+        "provider" = "${getS3Provider()}",
+        "set_as_default" = "true"
+        );
+    """
+
+    def vaults_info = sql """
+        show storage vault
+    """
+
+    // check if create_s3_vault_for_default is set as default
+    for (int i = 0; i < vaults_info.size(); i++) {
+        def name = vaults_info[i][0]
+        if (name.equals("create_s3_vault_for_default")) {
+            // isDefault is true
+            assertEquals(vaults_info[i][3], "true")
+        }
+    }
+
+
     sql """
         set built_in_storage_vault as default storage vault
     """

From 4dbde8e9f6b79107fafa148ec9f0c3b236c2ce3f Mon Sep 17 00:00:00 2001
From: zhangdong <493738387@qq.com>
Date: Mon, 29 Apr 2024 12:15:11 +0800
Subject: [PATCH 120/163] [enhance](mtmv) When specifying a data model in a
 materialized view, it is compatible with the syntax of an OLAP table 
 (#34231)

---
 .../org/apache/doris/nereids/DorisParser.g4   |  2 +-
 .../suites/mtmv_p0/test_key_mtmv.groovy       | 61 +++++++++++++++++++
 2 files changed, 62 insertions(+), 1 deletion(-)
 create mode 100644 regression-test/suites/mtmv_p0/test_key_mtmv.groovy

diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
index fb41a6c42c3866..f3f712742edd5c 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
@@ -99,7 +99,7 @@ statementBase
     | CREATE MATERIALIZED VIEW (IF NOT EXISTS)? mvName=multipartIdentifier
         (LEFT_PAREN cols=simpleColumnDefs RIGHT_PAREN)? buildMode?
         (REFRESH refreshMethod? refreshTrigger?)?
-        (KEY keys=identifierList)?
+        ((DUPLICATE)? KEY keys=identifierList)?
         (COMMENT STRING_LITERAL)?
         (PARTITION BY LEFT_PAREN partitionKey = identifier RIGHT_PAREN)?
         (DISTRIBUTED BY (HASH hashKeys=identifierList | RANDOM) (BUCKETS (INTEGER_VALUE | AUTO))?)?
diff --git a/regression-test/suites/mtmv_p0/test_key_mtmv.groovy b/regression-test/suites/mtmv_p0/test_key_mtmv.groovy
new file mode 100644
index 00000000000000..ed403e1581fb3e
--- /dev/null
+++ b/regression-test/suites/mtmv_p0/test_key_mtmv.groovy
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.junit.Assert;
+
+suite("test_key_mtmv") {
+    def tableName = "t_test_key_mtmv_user"
+    def mvName = "test_key_mtmv"
+    def dbName = "regression_test_mtmv_p0"
+    sql """drop table if exists `${tableName}`"""
+    sql """drop materialized view if exists ${mvName};"""
+
+    sql """
+        CREATE TABLE `${tableName}` (
+          `user_id` LARGEINT NOT NULL COMMENT '\"用户id\"',
+          `num` SMALLINT NOT NULL COMMENT '\"数量\"'
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`user_id`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`user_id`) BUCKETS 2
+        PROPERTIES ('replication_num' = '1') ;
+        """
+    
+    sql """
+        CREATE MATERIALIZED VIEW ${mvName}
+            BUILD DEFERRED REFRESH AUTO ON MANUAL
+            KEY(`user_id`)
+            DISTRIBUTED BY RANDOM BUCKETS 2
+            PROPERTIES ('replication_num' = '1')
+            AS
+            SELECT * FROM ${tableName};
+    """
+    sql """drop materialized view if exists ${mvName};"""
+
+    sql """
+        CREATE MATERIALIZED VIEW ${mvName}
+            BUILD DEFERRED REFRESH AUTO ON MANUAL
+            DUPLICATE KEY(`user_id`)
+            DISTRIBUTED BY RANDOM BUCKETS 2
+            PROPERTIES ('replication_num' = '1')
+            AS
+            SELECT * FROM ${tableName};
+    """
+
+    sql """drop table if exists `${tableName}`"""
+    sql """drop materialized view if exists ${mvName};"""
+}

From d1889076a3effb19c091cd4698ac5fa407705310 Mon Sep 17 00:00:00 2001
From: zhangstar333 <87313068+zhangstar333@users.noreply.github.com>
Date: Mon, 29 Apr 2024 13:35:58 +0800
Subject: [PATCH 121/163] [Bug](union) fix union operator set eos is not
 incorrect (#34250)

* [test](case) fix unstable case without order by distinct row

* [Bug](union) fix union operator set eos is not incorrect
---
 .../pipeline/exec/union_source_operator.cpp   | 20 +++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/be/src/pipeline/exec/union_source_operator.cpp b/be/src/pipeline/exec/union_source_operator.cpp
index ea9f2eced6d0e6..c94fda822175a4 100644
--- a/be/src/pipeline/exec/union_source_operator.cpp
+++ b/be/src/pipeline/exec/union_source_operator.cpp
@@ -99,12 +99,20 @@ std::string UnionSourceLocalState::debug_string(int indentation_level) const {
 Status UnionSourceOperatorX::get_block(RuntimeState* state, vectorized::Block* block, bool* eos) {
     auto& local_state = get_local_state(state);
     Defer set_eos {[&]() {
-        //have executing const expr, queue have no data anymore, and child could be closed
-        *eos = (_child_size == 0 && !local_state._need_read_for_const_expr) ||
-               // here should check `_has_data` first, or when `is_all_finish` is false,
-               // the data queue will have no chance to change the `_flag_queue_idx`.
-               (!_has_data(state) && _child_size > 0 &&
-                local_state._shared_state->data_queue.is_all_finish());
+        // the eos check of union operator is complex, need check all logical if you want modify
+        // could ref this PR: https://github.com/apache/doris/pull/29677
+        // have executing const expr, queue have no data anymore, and child could be closed
+        if (_child_size == 0 && !local_state._need_read_for_const_expr) {
+            *eos = true;
+        } else if (_has_data(state)) {
+            *eos = false;
+        } else if (local_state._shared_state->data_queue.is_all_finish()) {
+            // Here, check the value of `_has_data(state)` again after `data_queue.is_all_finish()` is TRUE
+            // as there may be one or more blocks when `data_queue.is_all_finish()` is TRUE.
+            *eos = !_has_data(state);
+        } else {
+            *eos = false;
+        }
     }};
 
     SCOPED_TIMER(local_state.exec_time_counter());

From be1a86fc1d8c75fef6112fde981e8779375d483e Mon Sep 17 00:00:00 2001
From: Jerry Hu <mrhhsg@gmail.com>
Date: Mon, 29 Apr 2024 14:25:28 +0800
Subject: [PATCH 122/163] [fix](spill) handel canceled status in spill (#34268)

---
 .../partitioned_hash_join_probe_operator.cpp    | 10 ++++++++--
 .../partitioned_hash_join_sink_operator.cpp     | 17 ++++++++++++++---
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp
index 3eb3de0ce1072b..ef01bed3809902 100644
--- a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp
+++ b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp
@@ -335,10 +335,11 @@ Status PartitionedHashJoinProbeLocalState::recovery_build_blocks_from_disk(Runti
     auto read_func = [this, state, &spilled_stream, &mutable_block, execution_context,
                       submit_timer] {
         auto execution_context_lock = execution_context.lock();
-        if (!execution_context_lock) {
-            LOG(INFO) << "execution_context released, maybe query was cancelled.";
+        if (!execution_context_lock || state->is_cancelled()) {
+            LOG(INFO) << "execution_context released, maybe query was canceled.";
             return;
         }
+
         SCOPED_ATTACH_TASK(state);
         _spill_wait_in_queue_timer->update(submit_timer.elapsed_time());
         SCOPED_TIMER(_recovery_build_timer);
@@ -363,6 +364,11 @@ Status PartitionedHashJoinProbeLocalState::recovery_build_blocks_from_disk(Runti
                 continue;
             }
 
+            if (UNLIKELY(state->is_cancelled())) {
+                LOG(INFO) << "recovery build block when canceled.";
+                break;
+            }
+
             DCHECK_EQ(mutable_block->columns(), block.columns());
             if (mutable_block->empty()) {
                 *mutable_block = std::move(block);
diff --git a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp
index 69f1cab6f491ad..b2f2e9581f23ff 100644
--- a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp
+++ b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp
@@ -149,10 +149,11 @@ Status PartitionedHashJoinSinkLocalState::_revoke_unpartitioned_block(RuntimeSta
         }};
 
         auto execution_context_lock = execution_context.lock();
-        if (!execution_context_lock) {
-            LOG(INFO) << "execution_context released, maybe query was cancelled.";
+        if (!execution_context_lock || state->is_cancelled()) {
+            LOG(INFO) << "execution_context released, maybe query was canceled.";
             return;
         }
+
         auto& p = _parent->cast<PartitionedHashJoinSinkOperatorX>();
         SCOPED_TIMER(_partition_shuffle_timer);
         auto* channel_ids = _partitioner->get_channel_ids().get<uint32_t>();
@@ -196,6 +197,10 @@ Status PartitionedHashJoinSinkLocalState::_revoke_unpartitioned_block(RuntimeSta
             vectorized::SpillStreamSPtr& spilling_stream = _shared_state->spilled_streams[i];
             DCHECK(spilling_stream != nullptr);
 
+            if (UNLIKELY(state->is_cancelled())) {
+                break;
+            }
+
             const auto rows = build_block->rows();
             for (size_t idx = 1; idx != rows; ++idx) {
                 if (channel_ids[idx] == i) {
@@ -209,6 +214,12 @@ Status PartitionedHashJoinSinkLocalState::_revoke_unpartitioned_block(RuntimeSta
                     if (!flush_rows(partitioned_blocks[i], spilling_stream)) {
                         break;
                     }
+
+                    if (UNLIKELY(state->is_cancelled())) {
+                        LOG(INFO) << "query was canceled.";
+                        partition_indices.clear();
+                        break;
+                    }
                 }
             }
 
@@ -228,6 +239,7 @@ Status PartitionedHashJoinSinkLocalState::revoke_memory(RuntimeState* state) {
               << ", eos: " << _child_eos;
     DCHECK_EQ(_spilling_streams_count, 0);
 
+    _shared_state_holder = _shared_state->shared_from_this();
     if (!_shared_state->need_to_spill) {
         _shared_state->need_to_spill = true;
         return _revoke_unpartitioned_block(state);
@@ -250,7 +262,6 @@ Status PartitionedHashJoinSinkLocalState::revoke_memory(RuntimeState* state) {
                 ExecEnv::GetInstance()->spill_stream_mgr()->get_spill_io_thread_pool();
         DCHECK(spill_io_pool != nullptr);
         auto execution_context = state->get_task_execution_context();
-        _shared_state_holder = _shared_state->shared_from_this();
 
         MonotonicStopWatch submit_timer;
         submit_timer.start();

From 7dc1dbfa1bf51b2579e65752c842307c6f585839 Mon Sep 17 00:00:00 2001
From: Pxl <pxl290@qq.com>
Date: Mon, 29 Apr 2024 14:27:56 +0800
Subject: [PATCH 123/163] [Bug](materialized-view) change nvl to ifnull when
 create mv (#34272)

change nvl to ifnull when create mv
---
 .../apache/doris/rewrite/FunctionAlias.java   |  2 +-
 .../data/mv_p0/test_nvl/test_nvl.out          | 17 +++++
 .../suites/mv_p0/test_nvl/test_nvl.groovy     | 75 +++++++++++++++++++
 3 files changed, 93 insertions(+), 1 deletion(-)
 create mode 100644 regression-test/data/mv_p0/test_nvl/test_nvl.out
 create mode 100644 regression-test/suites/mv_p0/test_nvl/test_nvl.groovy

diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/FunctionAlias.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FunctionAlias.java
index 29cf2988cfda4e..878319ac563c69 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/rewrite/FunctionAlias.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FunctionAlias.java
@@ -40,7 +40,7 @@ public final class FunctionAlias implements ExprRewriteRule {
             .put("date_sub", "days_sub").put("subdate", "days_sub").put("inet_ntoa", "ipv4_num_to_string")
             .put("inet_aton", "ipv4_string_to_num_or_null").put("inet6_ntoa", "ipv6_num_to_string")
             .put("inet6_aton", "ipv6_string_to_num_or_null").put("lcase", "lower").put("add_months", "months_add")
-            .put("current_timestamp", "now").put("localtime", "now").put("localtimestamp", "now").put("ifnull", "nvl")
+            .put("current_timestamp", "now").put("localtime", "now").put("localtimestamp", "now").put("nvl", "ifnull")
             .put("rand", "random").put("sha", "sha1").put("substr", "substring").put("ucase", "upper")
             .put("approx_count_distinct", "ndv").build();
 
diff --git a/regression-test/data/mv_p0/test_nvl/test_nvl.out b/regression-test/data/mv_p0/test_nvl/test_nvl.out
new file mode 100644
index 00000000000000..77b859604c7ca7
--- /dev/null
+++ b/regression-test/data/mv_p0/test_nvl/test_nvl.out
@@ -0,0 +1,17 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_mv --
+1
+2
+
+-- !select_mv --
+1
+2
+
+-- !select_mv --
+1
+2
+
+-- !select_mv --
+1
+2
+
diff --git a/regression-test/suites/mv_p0/test_nvl/test_nvl.groovy b/regression-test/suites/mv_p0/test_nvl/test_nvl.groovy
new file mode 100644
index 00000000000000..e7c0d32f826778
--- /dev/null
+++ b/regression-test/suites/mv_p0/test_nvl/test_nvl.groovy
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+suite ("test_nvl") {
+    sql """ drop table if exists dwd;"""
+
+    sql """
+        CREATE TABLE `dwd` (
+            `id` bigint(20) NULL COMMENT 'id',
+            `created_at` datetime NULL,
+            `dt` date NULL
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`id`)
+            DISTRIBUTED BY HASH(`id`) BUCKETS 10
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+        );
+        """
+
+    sql """insert into dwd(id) values(1);"""
+
+    createMV ("""
+            create materialized view dwd_mv as  select nvl(id,0) from dwd;
+    """)
+
+    sql """insert into dwd(id) values(2);"""
+
+    explain {
+        sql("select nvl(id,0) from dwd order by 1;")
+        contains "(dwd_mv)"
+    }
+    qt_select_mv "select nvl(id,0) from dwd order by 1;"
+
+    explain {
+        sql("select ifnull(id,0) from dwd order by 1;")
+        contains "(dwd_mv)"
+    }
+    qt_select_mv "select ifnull(id,0) from dwd order by 1;"
+
+    sql """ drop materialized view dwd_mv on dwd;
+    """
+
+    createMV ("""
+            create materialized view dwd_mv as  select ifnull(id,0) from dwd;
+    """)
+
+    explain {
+        sql("select nvl(id,0) from dwd order by 1;")
+        contains "(dwd_mv)"
+    }
+    qt_select_mv "select nvl(id,0) from dwd order by 1;"
+
+    explain {
+        sql("select ifnull(id,0) from dwd order by 1;")
+        contains "(dwd_mv)"
+    }
+    qt_select_mv "select ifnull(id,0) from dwd order by 1;"
+
+}

From ab3c27ce6c99f9ec7efafa1e2a4c02db25012e70 Mon Sep 17 00:00:00 2001
From: Sun Chenyang <csun5285@gmail.com>
Date: Mon, 29 Apr 2024 15:00:50 +0800
Subject: [PATCH 124/163] [fix](compaction test) show single replica compaction
 status and fix test (#33076)

---
 be/src/http/action/compaction_action.cpp      | 53 ++++++++++++++-----
 be/src/http/action/compaction_action.h        |  4 +-
 be/src/olap/single_replica_compaction.cpp     | 20 +++----
 be/src/olap/tablet.cpp                        | 45 +++++++++++++---
 be/src/olap/tablet.h                          | 12 +++++
 .../test_single_replica_compaction.groovy     | 48 ++++++++---------
 6 files changed, 122 insertions(+), 60 deletions(-)

diff --git a/be/src/http/action/compaction_action.cpp b/be/src/http/action/compaction_action.cpp
index 43ad940db5e2c2..c119ea142135a5 100644
--- a/be/src/http/action/compaction_action.cpp
+++ b/be/src/http/action/compaction_action.cpp
@@ -41,6 +41,7 @@
 #include "olap/cumulative_compaction_time_series_policy.h"
 #include "olap/full_compaction.h"
 #include "olap/olap_define.h"
+#include "olap/single_replica_compaction.h"
 #include "olap/storage_engine.h"
 #include "olap/tablet_manager.h"
 #include "util/doris_metrics.h"
@@ -121,6 +122,15 @@ Status CompactionAction::_handle_run_compaction(HttpRequest* req, std::string* j
         return Status::NotSupported("The compaction type '{}' is not supported", compaction_type);
     }
 
+    // "remote" = "true" means tablet should do single replica compaction to fetch rowset from peer
+    bool fetch_from_remote = false;
+    std::string param_remote = req->param(PARAM_COMPACTION_REMOTE);
+    if (param_remote == "true") {
+        fetch_from_remote = true;
+    } else if (!param_remote.empty() && param_remote != "false") {
+        return Status::NotSupported("The remote = '{}' is not supported", param_remote);
+    }
+
     if (tablet_id == 0 && table_id != 0) {
         std::vector<TabletSharedPtr> tablet_vec = _engine.tablet_manager()->get_all_tablet(
                 [table_id](Tablet* tablet) -> bool { return tablet->get_table_id() == table_id; });
@@ -135,9 +145,13 @@ Status CompactionAction::_handle_run_compaction(HttpRequest* req, std::string* j
             return Status::NotFound("Tablet not found. tablet_id={}", tablet_id);
         }
 
+        if (fetch_from_remote && !tablet->should_fetch_from_peer()) {
+            return Status::NotSupported("tablet should do compaction locally");
+        }
+
         // 3. execute compaction task
-        std::packaged_task<Status()> task([this, tablet, compaction_type]() {
-            return _execute_compaction_callback(tablet, compaction_type);
+        std::packaged_task<Status()> task([this, tablet, compaction_type, fetch_from_remote]() {
+            return _execute_compaction_callback(tablet, compaction_type, fetch_from_remote);
         });
         std::future<Status> future_obj = task.get_future();
         std::thread(std::move(task)).detach();
@@ -242,7 +256,8 @@ Status CompactionAction::_handle_run_status_compaction(HttpRequest* req, std::st
 }
 
 Status CompactionAction::_execute_compaction_callback(TabletSharedPtr tablet,
-                                                      const std::string& compaction_type) {
+                                                      const std::string& compaction_type,
+                                                      bool fetch_from_remote) {
     MonotonicStopWatch timer;
     timer.start();
 
@@ -266,18 +281,29 @@ Status CompactionAction::_execute_compaction_callback(TabletSharedPtr tablet,
             }
         }
     } else if (compaction_type == PARAM_COMPACTION_CUMULATIVE) {
-        CumulativeCompaction cumulative_compaction(_engine, tablet);
-        res = do_compact(cumulative_compaction);
-        if (!res) {
-            if (res.is<CUMULATIVE_NO_SUITABLE_VERSION>()) {
-                // Ignore this error code.
-                VLOG_NOTICE << "failed to init cumulative compaction due to no suitable version,"
-                            << "tablet=" << tablet->tablet_id();
-            } else {
-                DorisMetrics::instance()->cumulative_compaction_request_failed->increment(1);
-                LOG(WARNING) << "failed to do cumulative compaction. res=" << res
+        if (fetch_from_remote) {
+            SingleReplicaCompaction single_compaction(_engine, tablet,
+                                                      CompactionType::CUMULATIVE_COMPACTION);
+            res = do_compact(single_compaction);
+            if (!res) {
+                LOG(WARNING) << "failed to do single compaction. res=" << res
                              << ", table=" << tablet->tablet_id();
             }
+        } else {
+            CumulativeCompaction cumulative_compaction(_engine, tablet);
+            res = do_compact(cumulative_compaction);
+            if (!res) {
+                if (res.is<CUMULATIVE_NO_SUITABLE_VERSION>()) {
+                    // Ignore this error code.
+                    VLOG_NOTICE
+                            << "failed to init cumulative compaction due to no suitable version,"
+                            << "tablet=" << tablet->tablet_id();
+                } else {
+                    DorisMetrics::instance()->cumulative_compaction_request_failed->increment(1);
+                    LOG(WARNING) << "failed to do cumulative compaction. res=" << res
+                                 << ", table=" << tablet->tablet_id();
+                }
+            }
         }
     } else if (compaction_type == PARAM_COMPACTION_FULL) {
         FullCompaction full_compaction(_engine, tablet);
@@ -293,7 +319,6 @@ Status CompactionAction::_execute_compaction_callback(TabletSharedPtr tablet,
             }
         }
     }
-
     timer.stop();
     LOG(INFO) << "Manual compaction task finish, status=" << res
               << ", compaction_use_time=" << timer.elapsed_time() / 1000000 << "ms";
diff --git a/be/src/http/action/compaction_action.h b/be/src/http/action/compaction_action.h
index a5cbae276f5e0a..4aed97cafbb88b 100644
--- a/be/src/http/action/compaction_action.h
+++ b/be/src/http/action/compaction_action.h
@@ -40,6 +40,7 @@ const std::string PARAM_COMPACTION_TYPE = "compact_type";
 const std::string PARAM_COMPACTION_BASE = "base";
 const std::string PARAM_COMPACTION_CUMULATIVE = "cumulative";
 const std::string PARAM_COMPACTION_FULL = "full";
+const std::string PARAM_COMPACTION_REMOTE = "remote";
 
 /// This action is used for viewing the compaction status.
 /// See compaction-action.md for details.
@@ -60,7 +61,8 @@ class CompactionAction : public HttpHandlerWithAuth {
     Status _handle_run_compaction(HttpRequest* req, std::string* json_result);
 
     /// thread callback function for the tablet to do compaction
-    Status _execute_compaction_callback(TabletSharedPtr tablet, const std::string& compaction_type);
+    Status _execute_compaction_callback(TabletSharedPtr tablet, const std::string& compaction_type,
+                                        bool fethch_from_remote);
 
     /// fetch compaction running status
     Status _handle_run_status_compaction(HttpRequest* req, std::string* json_result);
diff --git a/be/src/olap/single_replica_compaction.cpp b/be/src/olap/single_replica_compaction.cpp
index cd83f28be49f2f..64e48812aacb41 100644
--- a/be/src/olap/single_replica_compaction.cpp
+++ b/be/src/olap/single_replica_compaction.cpp
@@ -65,6 +65,9 @@ Status SingleReplicaCompaction::prepare_compact() {
 }
 
 Status SingleReplicaCompaction::execute_compact() {
+    if (!tablet()->should_fetch_from_peer()) {
+        return Status::Aborted("compaction should be performed locally");
+    }
     std::unique_lock<std::mutex> lock_cumu(tablet()->get_cumulative_compaction_lock(),
                                            std::try_to_lock);
     if (!lock_cumu.owns_lock()) {
@@ -112,8 +115,7 @@ Status SingleReplicaCompaction::_do_single_replica_compaction_impl() {
     Version proper_version;
     // 3. find proper version to fetch
     if (!_find_rowset_to_fetch(peer_versions, &proper_version)) {
-        LOG(WARNING) << _tablet->tablet_id() << " tablet don't need to fetch, no matched version";
-        return Status::Aborted("no matched version to fetch");
+        return Status::Cancelled("no matched versions for single replica compaction");
     }
 
     // 4. fetch compaction result
@@ -130,6 +132,8 @@ Status SingleReplicaCompaction::_do_single_replica_compaction_impl() {
         tablet()->set_last_full_compaction_success_time(UnixMillis());
     }
 
+    tablet()->set_last_fetched_version(_output_rowset->version());
+
     int64_t current_max_version = -1;
     {
         std::shared_lock rdlock(_tablet->get_header_lock());
@@ -160,23 +164,19 @@ Status SingleReplicaCompaction::_get_rowset_verisons_from_peer(
             ExecEnv::GetInstance()->brpc_internal_client_cache()->get_client(addr.host,
                                                                              addr.brpc_port);
     if (stub == nullptr) {
-        LOG(WARNING) << "get rowset versions from peer: get rpc stub failed, host = " << addr.host
-                     << " port = " << addr.brpc_port;
-        return Status::Cancelled("get rpc stub failed");
+        return Status::Aborted("get rpc stub failed");
     }
 
     brpc::Controller cntl;
     stub->get_tablet_rowset_versions(&cntl, &request, &response, nullptr);
     if (cntl.Failed()) {
-        LOG(WARNING) << "open brpc connection to " << addr.host << " failed: " << cntl.ErrorText();
-        return Status::Cancelled("open brpc connection failed");
+        return Status::Aborted("open brpc connection failed");
     }
     if (response.status().status_code() != 0) {
-        LOG(WARNING) << "peer " << addr.host << " don't have tablet " << _tablet->tablet_id();
-        return Status::Cancelled("peer don't have tablet");
+        return Status::Aborted("peer don't have tablet");
     }
     if (response.versions_size() == 0) {
-        return Status::Cancelled("no peer version");
+        return Status::Aborted("no peer version");
     }
     for (int i = 0; i < response.versions_size(); ++i) {
         (*peer_versions).emplace_back(response.versions(i).first(), response.versions(i).second());
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index ab6471df6be025..e89d0db22459c8 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -1289,18 +1289,36 @@ void Tablet::get_compaction_status(std::string* json_result) {
                                            root.GetAllocator());
     root.AddMember("last base status", base_compaction_status_value, root.GetAllocator());
 
+    // last single replica compaction status
+    // "single replica compaction status": {
+    //     "remote peer": "172.100.1.0:10875",
+    //     "last failure status": "",
+    //     "last fetched rowset": "[8-10]"
+    // }
+    rapidjson::Document status;
+    status.SetObject();
     TReplicaInfo replica_info;
     std::string dummp_token;
-    rapidjson::Value fetch_addr;
     if (tablet_meta()->tablet_schema()->enable_single_replica_compaction() &&
         _engine.get_peer_replica_info(tablet_id(), &replica_info, &dummp_token)) {
+        // remote peer
+        rapidjson::Value peer_addr;
         std::string addr = replica_info.host + ":" + std::to_string(replica_info.brpc_port);
-        fetch_addr.SetString(addr.c_str(), addr.length(), root.GetAllocator());
-    } else {
-        // -1 means do compaction locally
-        fetch_addr.SetString("-1", root.GetAllocator());
+        peer_addr.SetString(addr.c_str(), addr.length(), status.GetAllocator());
+        status.AddMember("remote peer", peer_addr, status.GetAllocator());
+        // last failure status
+        rapidjson::Value compaction_status;
+        compaction_status.SetString(_last_single_compaction_failure_status.c_str(),
+                                    _last_single_compaction_failure_status.length(),
+                                    status.GetAllocator());
+        status.AddMember("last failure status", compaction_status, status.GetAllocator());
+        // last fetched rowset
+        rapidjson::Value version;
+        std::string fetched_version = _last_fetched_version.to_string();
+        version.SetString(fetched_version.c_str(), fetched_version.length(), status.GetAllocator());
+        status.AddMember("last fetched rowset", version, status.GetAllocator());
+        root.AddMember("single replica compaction status", status, root.GetAllocator());
     }
-    root.AddMember("fetch from peer", fetch_addr, root.GetAllocator());
 
     // print all rowsets' version as an array
     rapidjson::Document versions_arr;
@@ -1667,13 +1685,24 @@ void Tablet::execute_single_replica_compaction(SingleReplicaCompaction& compacti
     Status res = compaction.execute_compact();
     if (!res.ok()) {
         set_last_failure_time(this, compaction, UnixMillis());
-        LOG(WARNING) << "failed to do single replica compaction. res=" << res
-                     << ", tablet=" << tablet_id();
+        set_last_single_compaction_failure_status(res.to_string());
+        if (res.is<CANCELLED>()) {
+            VLOG_CRITICAL << "Cannel fetching from the remote peer. res=" << res
+                          << ", tablet=" << tablet_id();
+        } else {
+            LOG(WARNING) << "failed to do single replica compaction. res=" << res
+                         << ", tablet=" << tablet_id();
+        }
         return;
     }
     set_last_failure_time(this, compaction, 0);
 }
 
+bool Tablet::should_fetch_from_peer() {
+    return tablet_meta()->tablet_schema()->enable_single_replica_compaction() &&
+           _engine.should_fetch_from_peer(tablet_id());
+}
+
 std::vector<Version> Tablet::get_all_local_versions() {
     std::vector<Version> local_versions;
     {
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index 04fee1fb3caa3b..16cc78a67a6ecd 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -249,6 +249,12 @@ class Tablet final : public BaseTablet {
         _last_base_compaction_schedule_millis = millis;
     }
 
+    void set_last_single_compaction_failure_status(std::string status) {
+        _last_single_compaction_failure_status = std::move(status);
+    }
+
+    void set_last_fetched_version(Version version) { _last_fetched_version = std::move(version); }
+
     void delete_all_files();
 
     void check_tablet_path_exists();
@@ -317,6 +323,8 @@ class Tablet final : public BaseTablet {
         std::atomic_store_explicit(&_visible_version, visible_version, std::memory_order_relaxed);
     }
 
+    bool should_fetch_from_peer();
+
     inline bool all_beta() const {
         std::shared_lock rdlock(_meta_lock);
         return _tablet_meta->all_beta();
@@ -555,6 +563,10 @@ class Tablet final : public BaseTablet {
     std::atomic<int64_t> _last_checkpoint_time;
     std::string _last_base_compaction_status;
 
+    // single replica compaction status
+    std::string _last_single_compaction_failure_status;
+    Version _last_fetched_version;
+
     // cumulative compaction policy
     std::shared_ptr<CumulativeCompactionPolicy> _cumulative_compaction_policy;
     std::string_view _cumulative_compaction_type;
diff --git a/regression-test/suites/compaction/test_single_replica_compaction.groovy b/regression-test/suites/compaction/test_single_replica_compaction.groovy
index a08505fccf4415..391afcfc5facf6 100644
--- a/regression-test/suites/compaction/test_single_replica_compaction.groovy
+++ b/regression-test/suites/compaction/test_single_replica_compaction.groovy
@@ -77,12 +77,12 @@ suite("test_single_replica_compaction", "p2") {
             return out
         } 
 
-        def triggerFullCompaction = { be_host, be_http_port, table_id ->
+        def triggerSingleCompaction = { be_host, be_http_port, tablet_id ->
             StringBuilder sb = new StringBuilder();
             sb.append("curl -X POST http://${be_host}:${be_http_port}")
-            sb.append("/api/compaction/run?table_id=")
-            sb.append(table_id)
-            sb.append("&compact_type=full")
+            sb.append("/api/compaction/run?tablet_id=")
+            sb.append(tablet_id)
+            sb.append("&compact_type=cumulative&remote=true")
 
             String command = sb.toString()
             logger.info(command)
@@ -149,36 +149,33 @@ suite("test_single_replica_compaction", "p2") {
             UNIQUE KEY(`id`)
             COMMENT 'OLAP'
             DISTRIBUTED BY HASH(`id`) BUCKETS 1
-            PROPERTIES ( "replication_num" = "3", "enable_single_replica_compaction" = "true", "enable_unique_key_merge_on_write" = "false" );
+            PROPERTIES ( "replication_num" = "2", "enable_single_replica_compaction" = "true", "enable_unique_key_merge_on_write" = "false" );
         """
 
         def tablets = sql_return_maparray """ show tablets from ${tableName}; """
 
         // wait for update replica infos
-        // be.conf: update_replica_infos_interval_seconds
-        Thread.sleep(20000)
+        // be.conf: update_replica_infos_interval_seconds + 2s
+        Thread.sleep(62000)
         
         // find the master be for single replica compaction
         Boolean found = false
         String master_backend_id;
         List<String> follower_backend_id = new ArrayList<>()
-        // The test table only has one bucket with 3 replicas,
-        // and `show tablets` will return 3 different replicas with the same tablet.
+        // The test table only has one bucket with 2 replicas,
+        // and `show tablets` will return 2 different replicas with the same tablet.
         // So we can use the same tablet_id to get tablet/trigger compaction with different backends.
         String tablet_id = tablets[0].TabletId
         def tablet_info = sql_return_maparray """ show tablet ${tablet_id}; """
         logger.info("tablet: " + tablet_info)
-        def table_id = tablet_info[0].TableId
         for (def tablet in tablets) {
             String trigger_backend_id = tablet.BackendId
             def tablet_status = getTabletStatus(backendId_to_backendIP[trigger_backend_id], backendId_to_backendHttpPort[trigger_backend_id], tablet_id);
-            def fetchFromPeerValue = tablet_status."fetch from peer"
-
-            if (found && fetchFromPeerValue.contains("-1")) {
-                logger.warn("multipe master");
-                assertTrue(false)
-            }
-            if (fetchFromPeerValue.contains("-1")) {
+            if (!tablet_status.containsKey("single replica compaction status")) {
+                if (found) {
+                    logger.warn("multipe master");
+                    assertTrue(false)
+                }
                 found = true
                 master_backend_id = trigger_backend_id
             } else {
@@ -207,15 +204,14 @@ suite("test_single_replica_compaction", "p2") {
         sql """ INSERT INTO ${tableName} VALUES (3, "a", 100); """
         sql """ INSERT INTO ${tableName} VALUES (3, "b", 100); """
 
-        // trigger master be to do cum compaction
+        // trigger master be to do cumu compaction
         assertTrue(triggerCompaction(backendId_to_backendIP[master_backend_id], backendId_to_backendHttpPort[master_backend_id],
                     "cumulative", tablet_id).contains("Success")); 
         waitForCompaction(backendId_to_backendIP[master_backend_id], backendId_to_backendHttpPort[master_backend_id], tablet_id)
 
         // trigger follower be to fetch compaction result
         for (String id in follower_backend_id) {
-            assertTrue(triggerCompaction(backendId_to_backendIP[id], backendId_to_backendHttpPort[id],
-                    "cumulative", tablet_id).contains("Success")); 
+            assertTrue(triggerSingleCompaction(backendId_to_backendIP[id], backendId_to_backendHttpPort[id], tablet_id).contains("Success")); 
             waitForCompaction(backendId_to_backendIP[id], backendId_to_backendHttpPort[id], tablet_id)
         }
 
@@ -223,21 +219,20 @@ suite("test_single_replica_compaction", "p2") {
         checkCompactionResult.call()
 
         sql """ INSERT INTO ${tableName} VALUES (4, "a", 100); """
-        sql """ DELETE FROM ${tableName} WHERE id = 4; """
         sql """ INSERT INTO ${tableName} VALUES (5, "a", 100); """
         sql """ INSERT INTO ${tableName} VALUES (6, "a", 100); """
+        sql """ DELETE FROM ${tableName} WHERE id = 4; """
         sql """ INSERT INTO ${tableName} VALUES (7, "a", 100); """
         sql """ INSERT INTO ${tableName} VALUES (8, "a", 100); """
 
-        // trigger master be to do cum compaction with delete
+        // trigger master be to do cumu compaction with delete
         assertTrue(triggerCompaction(backendId_to_backendIP[master_backend_id], backendId_to_backendHttpPort[master_backend_id],
                     "cumulative", tablet_id).contains("Success")); 
         waitForCompaction(backendId_to_backendIP[master_backend_id], backendId_to_backendHttpPort[master_backend_id], tablet_id)
 
         // trigger follower be to fetch compaction result
         for (String id in follower_backend_id) {
-            assertTrue(triggerFullCompaction(backendId_to_backendIP[id], backendId_to_backendHttpPort[id], 
-                        table_id).contains("Success")); 
+            assertTrue(triggerSingleCompaction(backendId_to_backendIP[id], backendId_to_backendHttpPort[id], tablet_id).contains("Success")); 
             waitForCompaction(backendId_to_backendIP[id], backendId_to_backendHttpPort[id], tablet_id)
         }
 
@@ -249,10 +244,9 @@ suite("test_single_replica_compaction", "p2") {
                     "base", tablet_id).contains("Success")); 
         waitForCompaction(backendId_to_backendIP[master_backend_id], backendId_to_backendHttpPort[master_backend_id], tablet_id)
 
-        // // trigger follower be to fetch compaction result
+        // trigger follower be to fetch compaction result
         for (String id in follower_backend_id) {
-            assertTrue(triggerFullCompaction(backendId_to_backendIP[id], backendId_to_backendHttpPort[id],
-                        table_id).contains("Success")); 
+            assertTrue(triggerSingleCompaction(backendId_to_backendIP[id], backendId_to_backendHttpPort[id], tablet_id).contains("Success")); 
             waitForCompaction(backendId_to_backendIP[id], backendId_to_backendHttpPort[id], tablet_id)
         }
 

From 33f31c9f04ac9799a11b16ac9adb044caba87944 Mon Sep 17 00:00:00 2001
From: Mryange <59914473+Mryange@users.noreply.github.com>
Date: Mon, 29 Apr 2024 15:08:42 +0800
Subject: [PATCH 125/163] [fix](profile) Fix reporting the profile while
 building the pipeline profile. (#34215)

---
 be/src/pipeline/pipeline_fragment_context.cpp | 12 +-----
 be/src/runtime/runtime_state.cpp              | 40 +++++++++++++++++++
 be/src/runtime/runtime_state.h                | 20 ++++------
 3 files changed, 48 insertions(+), 24 deletions(-)

diff --git a/be/src/pipeline/pipeline_fragment_context.cpp b/be/src/pipeline/pipeline_fragment_context.cpp
index 4e398f495e05d7..9946d7fff978b6 100644
--- a/be/src/pipeline/pipeline_fragment_context.cpp
+++ b/be/src/pipeline/pipeline_fragment_context.cpp
@@ -348,17 +348,7 @@ Status PipelineFragmentContext::_build_pipeline_tasks(
     _total_tasks = 0;
     int target_size = request.local_params.size();
     _tasks.resize(target_size);
-    auto& pipeline_id_to_profile = _runtime_state->pipeline_id_to_profile();
-    DCHECK(pipeline_id_to_profile.empty());
-    pipeline_id_to_profile.resize(_pipelines.size());
-    {
-        size_t pip_idx = 0;
-        for (auto& pipeline_profile : pipeline_id_to_profile) {
-            pipeline_profile =
-                    std::make_unique<RuntimeProfile>("Pipeline : " + std::to_string(pip_idx));
-            pip_idx++;
-        }
-    }
+    auto& pipeline_id_to_profile = _runtime_state->build_pipeline_profile(_pipelines.size());
 
     for (size_t i = 0; i < target_size; i++) {
         const auto& local_params = request.local_params[i];
diff --git a/be/src/runtime/runtime_state.cpp b/be/src/runtime/runtime_state.cpp
index 76a2cd1cc01697..5ccddff5a3f71f 100644
--- a/be/src/runtime/runtime_state.cpp
+++ b/be/src/runtime/runtime_state.cpp
@@ -555,4 +555,44 @@ bool RuntimeState::is_nereids() const {
     return _query_ctx->is_nereids();
 }
 
+std::vector<std::shared_ptr<RuntimeProfile>> RuntimeState::pipeline_id_to_profile() {
+    std::shared_lock lc(_pipeline_profile_lock);
+    std::vector<std::shared_ptr<RuntimeProfile>> pipelines_profile;
+    pipelines_profile.reserve(_pipeline_id_to_profile.size());
+    // The sort here won't change the structure of _pipeline_id_to_profile;
+    // it sorts the children of each element in sort_pipeline_id_to_profile,
+    // and these children are locked.
+    for (auto& pipeline_profile : _pipeline_id_to_profile) {
+        DCHECK(pipeline_profile);
+        // pipeline 0
+        //  pipeline task 0
+        //  pipeline task 1
+        //  pipleine task 2
+        //  .......
+        // sort by pipeline task total time
+        pipeline_profile->sort_children_by_total_time();
+        pipelines_profile.push_back(pipeline_profile);
+    }
+    return pipelines_profile;
+}
+
+std::vector<std::shared_ptr<RuntimeProfile>>& RuntimeState::build_pipeline_profile(
+        std::size_t pipeline_size) {
+    std::unique_lock lc(_pipeline_profile_lock);
+    if (!_pipeline_id_to_profile.empty()) {
+        throw Exception(ErrorCode::INTERNAL_ERROR,
+                        "build_pipeline_profile can only be called once.");
+    }
+    _pipeline_id_to_profile.resize(pipeline_size);
+    {
+        size_t pip_idx = 0;
+        for (auto& pipeline_profile : _pipeline_id_to_profile) {
+            pipeline_profile =
+                    std::make_shared<RuntimeProfile>("Pipeline : " + std::to_string(pip_idx));
+            pip_idx++;
+        }
+    }
+    return _pipeline_id_to_profile;
+}
+
 } // end namespace doris
diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h
index 775b0470c6b1c3..6552e7b82fe19d 100644
--- a/be/src/runtime/runtime_state.h
+++ b/be/src/runtime/runtime_state.h
@@ -30,6 +30,7 @@
 #include <functional>
 #include <memory>
 #include <mutex>
+#include <shared_mutex>
 #include <string>
 #include <utility>
 #include <vector>
@@ -578,18 +579,9 @@ class RuntimeState {
 
     void resize_op_id_to_local_state(int operator_size);
 
-    auto& pipeline_id_to_profile() {
-        for (auto& pipeline_profile : _pipeline_id_to_profile) {
-            // pipeline 0
-            //  pipeline task 0
-            //  pipeline task 1
-            //  pipleine task 2
-            //  .......
-            // sort by pipeline task total time
-            pipeline_profile->sort_children_by_total_time();
-        }
-        return _pipeline_id_to_profile;
-    }
+    std::vector<std::shared_ptr<RuntimeProfile>> pipeline_id_to_profile();
+
+    std::vector<std::shared_ptr<RuntimeProfile>>& build_pipeline_profile(std::size_t pipeline_size);
 
     void set_task_execution_context(std::shared_ptr<TaskExecutionContext> context) {
         _task_execution_context_inited = true;
@@ -766,7 +758,9 @@ class RuntimeState {
     // true if max_filter_ratio is 0
     bool _load_zero_tolerance = false;
 
-    std::vector<std::unique_ptr<RuntimeProfile>> _pipeline_id_to_profile;
+    // only to lock _pipeline_id_to_profile
+    std::shared_mutex _pipeline_profile_lock;
+    std::vector<std::shared_ptr<RuntimeProfile>> _pipeline_id_to_profile;
 
     // prohibit copies
     RuntimeState(const RuntimeState&);

From bcd68ca339eb35581d24ce85e59b7cc14960667b Mon Sep 17 00:00:00 2001
From: Pxl <pxl290@qq.com>
Date: Mon, 29 Apr 2024 15:26:33 +0800
Subject: [PATCH 126/163] [Bug](bitmap-filter) fix empty bitmap when rf do
 merge (#34182)

fix empty bitmap when rf do merge
---
 be/src/exprs/runtime_filter.cpp                       |  3 ++-
 .../data/query_p0/join/test_bitmap_filter_nereids.out | 11 +++++++++++
 .../query_p0/join/test_bitmap_filter_nereids.groovy   |  2 ++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp
index e3af3ed2de21b7..b204d7a1a21870 100644
--- a/be/src/exprs/runtime_filter.cpp
+++ b/be/src/exprs/runtime_filter.cpp
@@ -543,7 +543,8 @@ class RuntimePredicateWrapper {
             break;
         }
         case RuntimeFilterType::BITMAP_FILTER: {
-            // do nothing because we assume bitmap filter join always have full data
+            // use input bitmap directly because we assume bitmap filter join always have full data
+            _context->bitmap_filter_func = wrapper->_context->bitmap_filter_func;
             break;
         }
         default:
diff --git a/regression-test/data/query_p0/join/test_bitmap_filter_nereids.out b/regression-test/data/query_p0/join/test_bitmap_filter_nereids.out
index f44900b0e0bb7f..2b1f654a198d0c 100644
--- a/regression-test/data/query_p0/join/test_bitmap_filter_nereids.out
+++ b/regression-test/data/query_p0/join/test_bitmap_filter_nereids.out
@@ -98,3 +98,14 @@
 1	1989
 10	1991
 
+-- !sql1 --
+1	1989
+3	1989
+5	1985
+7	-32767
+9	1991
+10	1991
+11	1989
+12	32767
+13	-32767
+14	255
\ No newline at end of file
diff --git a/regression-test/suites/query_p0/join/test_bitmap_filter_nereids.groovy b/regression-test/suites/query_p0/join/test_bitmap_filter_nereids.groovy
index 9557aada40daef..be18b4fe0ea05f 100644
--- a/regression-test/suites/query_p0/join/test_bitmap_filter_nereids.groovy
+++ b/regression-test/suites/query_p0/join/test_bitmap_filter_nereids.groovy
@@ -91,4 +91,6 @@ suite("test_bitmap_filter_nereids") {
         sql " select k1, k2 from (select 2 k1, 2 k2) t where k1 in (select k2 from bitmap_table_nereids)"
         notContains "RF000[bitmap]"
     }  
+    sql "set parallel_pipeline_task_num=6;"
+    qt_sql1 "select k1, k2 from test_query_db.bigtable where k1 in (select k2 from bitmap_table_nereids) order by k1;"
 }

From 167e85ec3fbd84bce151d7888aefcbd485f175f1 Mon Sep 17 00:00:00 2001
From: HappenLee <happenlee@hotmail.com>
Date: Mon, 29 Apr 2024 15:52:28 +0800
Subject: [PATCH 127/163] [Opt] multi table func exec performance (#34090)

---
 .../pipeline/exec/table_function_operator.cpp | 18 ++++----
 be/src/vec/exec/vtable_function_node.cpp      | 18 ++++----
 .../vec/exprs/table_function/table_function.h |  2 +-
 .../table_function/udf_table_function.cpp     | 10 ++---
 .../exprs/table_function/udf_table_function.h |  2 +-
 be/src/vec/exprs/table_function/vexplode.cpp  | 10 ++---
 be/src/vec/exprs/table_function/vexplode.h    |  2 +-
 .../exprs/table_function/vexplode_bitmap.cpp  | 10 ++---
 .../exprs/table_function/vexplode_bitmap.h    |  2 +-
 .../table_function/vexplode_json_array.cpp    | 37 +++++++++++----
 .../table_function/vexplode_json_array.h      | 45 ++++++++++++++-----
 .../vec/exprs/table_function/vexplode_map.cpp | 10 ++---
 .../vec/exprs/table_function/vexplode_map.h   |  2 +-
 .../exprs/table_function/vexplode_numbers.cpp | 10 ++---
 .../exprs/table_function/vexplode_numbers.h   |  2 +-
 .../exprs/table_function/vexplode_split.cpp   |  6 +--
 .../vec/exprs/table_function/vexplode_split.h |  2 +-
 be/test/vec/function/function_test_util.cpp   |  2 +-
 18 files changed, 116 insertions(+), 74 deletions(-)

diff --git a/be/src/pipeline/exec/table_function_operator.cpp b/be/src/pipeline/exec/table_function_operator.cpp
index cb547688595de6..c828b4c1d1d1af 100644
--- a/be/src/pipeline/exec/table_function_operator.cpp
+++ b/be/src/pipeline/exec/table_function_operator.cpp
@@ -178,16 +178,14 @@ Status TableFunctionLocalState::get_expanded_block(RuntimeState* state,
             if (skip_child_row = _is_inner_and_empty(); skip_child_row) {
                 continue;
             }
-            if (p._fn_num == 1) {
-                _current_row_insert_times += _fns[0]->get_value(
-                        columns[p._child_slots.size()],
-                        state->batch_size() - columns[p._child_slots.size()]->size());
-            } else {
-                for (int i = 0; i < p._fn_num; i++) {
-                    _fns[i]->get_value(columns[i + p._child_slots.size()]);
-                }
-                _current_row_insert_times++;
-                _fns[p._fn_num - 1]->forward();
+
+            DCHECK_LE(1, p._fn_num);
+            auto repeat_times = _fns[p._fn_num - 1]->get_value(
+                    columns[p._child_slots.size() + p._fn_num - 1],
+                    state->batch_size() - columns[p._child_slots.size()]->size());
+            _current_row_insert_times += repeat_times;
+            for (int i = 0; i < p._fn_num - 1; i++) {
+                _fns[i]->get_same_many_values(columns[i + p._child_slots.size()], repeat_times);
             }
         }
     }
diff --git a/be/src/vec/exec/vtable_function_node.cpp b/be/src/vec/exec/vtable_function_node.cpp
index 8a04be3b6bb4b0..ee52c97347c470 100644
--- a/be/src/vec/exec/vtable_function_node.cpp
+++ b/be/src/vec/exec/vtable_function_node.cpp
@@ -197,16 +197,14 @@ Status VTableFunctionNode::_get_expanded_block(RuntimeState* state, Block* outpu
             if (skip_child_row = _is_inner_and_empty(); skip_child_row) {
                 continue;
             }
-            if (_fn_num == 1) {
-                _current_row_insert_times += _fns[0]->get_value(
-                        columns[_child_slots.size()],
-                        state->batch_size() - columns[_child_slots.size()]->size());
-            } else {
-                for (int i = 0; i < _fn_num; i++) {
-                    _fns[i]->get_value(columns[i + _child_slots.size()]);
-                }
-                _current_row_insert_times++;
-                _fns[_fn_num - 1]->forward();
+
+            DCHECK_LE(1, _fn_num);
+            auto repeat_times = _fns[_fn_num - 1]->get_value(
+                    columns[_child_slots.size()],
+                    state->batch_size() - columns[_child_slots.size()]->size());
+            _current_row_insert_times += repeat_times;
+            for (int i = 0; i < _fn_num - 1; i++) {
+                _fns[i]->get_same_many_values(columns[i + _child_slots.size()], repeat_times);
             }
         }
     }
diff --git a/be/src/vec/exprs/table_function/table_function.h b/be/src/vec/exprs/table_function/table_function.h
index 97a0a5785e21cc..bd926c365704fd 100644
--- a/be/src/vec/exprs/table_function/table_function.h
+++ b/be/src/vec/exprs/table_function/table_function.h
@@ -53,7 +53,7 @@ class TableFunction {
         _cur_offset = 0;
     }
 
-    virtual void get_value(MutableColumnPtr& column) = 0;
+    virtual void get_same_many_values(MutableColumnPtr& column, int length = 0) = 0;
     virtual int get_value(MutableColumnPtr& column, int max_step) = 0;
 
     virtual Status close() { return Status::OK(); }
diff --git a/be/src/vec/exprs/table_function/udf_table_function.cpp b/be/src/vec/exprs/table_function/udf_table_function.cpp
index bc4c815ceb15cd..59111c8e88e70d 100644
--- a/be/src/vec/exprs/table_function/udf_table_function.cpp
+++ b/be/src/vec/exprs/table_function/udf_table_function.cpp
@@ -153,20 +153,20 @@ void UDFTableFunction::process_close() {
     _array_offset = 0;
 }
 
-void UDFTableFunction::get_value(MutableColumnPtr& column) {
+void UDFTableFunction::get_same_many_values(MutableColumnPtr& column, int length) {
     size_t pos = _array_offset + _cur_offset;
     if (current_empty() || (_array_column_detail.nested_nullmap_data &&
                             _array_column_detail.nested_nullmap_data[pos])) {
-        column->insert_default();
+        column->insert_many_defaults(length);
     } else {
         if (_is_nullable) {
             auto* nullable_column = assert_cast<ColumnNullable*>(column.get());
             auto nested_column = nullable_column->get_nested_column_ptr();
             auto nullmap_column = nullable_column->get_null_map_column_ptr();
-            nested_column->insert_from(*_array_column_detail.nested_col, pos);
-            assert_cast<ColumnUInt8*>(nullmap_column.get())->insert_default();
+            nested_column->insert_many_from(*_array_column_detail.nested_col, pos, length);
+            assert_cast<ColumnUInt8*>(nullmap_column.get())->insert_many_defaults(length);
         } else {
-            column->insert_from(*_array_column_detail.nested_col, pos);
+            column->insert_many_from(*_array_column_detail.nested_col, pos, length);
         }
     }
 }
diff --git a/be/src/vec/exprs/table_function/udf_table_function.h b/be/src/vec/exprs/table_function/udf_table_function.h
index ae6a7c13b355a9..b09371984675df 100644
--- a/be/src/vec/exprs/table_function/udf_table_function.h
+++ b/be/src/vec/exprs/table_function/udf_table_function.h
@@ -38,7 +38,7 @@ class UDFTableFunction final : public TableFunction {
     Status process_init(Block* block, RuntimeState* state) override;
     void process_row(size_t row_idx) override;
     void process_close() override;
-    void get_value(MutableColumnPtr& column) override;
+    void get_same_many_values(MutableColumnPtr& column, int length) override;
     int get_value(MutableColumnPtr& column, int max_step) override;
     Status close() override {
         if (_jni_ctx) {
diff --git a/be/src/vec/exprs/table_function/vexplode.cpp b/be/src/vec/exprs/table_function/vexplode.cpp
index 58764a44fb1516..cfd1260c29cd9b 100644
--- a/be/src/vec/exprs/table_function/vexplode.cpp
+++ b/be/src/vec/exprs/table_function/vexplode.cpp
@@ -71,20 +71,20 @@ void VExplodeTableFunction::process_close() {
     _array_offset = 0;
 }
 
-void VExplodeTableFunction::get_value(MutableColumnPtr& column) {
+void VExplodeTableFunction::get_same_many_values(MutableColumnPtr& column, int length) {
     size_t pos = _array_offset + _cur_offset;
     if (current_empty() || (_detail.nested_nullmap_data && _detail.nested_nullmap_data[pos])) {
-        column->insert_default();
+        column->insert_many_defaults(length);
     } else {
         if (_is_nullable) {
             assert_cast<ColumnNullable*>(column.get())
                     ->get_nested_column_ptr()
-                    ->insert_from(*_detail.nested_col, pos);
+                    ->insert_many_from(*_detail.nested_col, pos, length);
             assert_cast<ColumnUInt8*>(
                     assert_cast<ColumnNullable*>(column.get())->get_null_map_column_ptr().get())
-                    ->insert_default();
+                    ->insert_many_defaults(length);
         } else {
-            column->insert_from(*_detail.nested_col, pos);
+            column->insert_many_from(*_detail.nested_col, pos, length);
         }
     }
 }
diff --git a/be/src/vec/exprs/table_function/vexplode.h b/be/src/vec/exprs/table_function/vexplode.h
index 86f9f75869d9c9..b59b9718ad54d1 100644
--- a/be/src/vec/exprs/table_function/vexplode.h
+++ b/be/src/vec/exprs/table_function/vexplode.h
@@ -43,7 +43,7 @@ class VExplodeTableFunction : public TableFunction {
     Status process_init(Block* block, RuntimeState* state) override;
     void process_row(size_t row_idx) override;
     void process_close() override;
-    void get_value(MutableColumnPtr& column) override;
+    void get_same_many_values(MutableColumnPtr& column, int length) override;
     int get_value(MutableColumnPtr& column, int max_step) override;
 
 private:
diff --git a/be/src/vec/exprs/table_function/vexplode_bitmap.cpp b/be/src/vec/exprs/table_function/vexplode_bitmap.cpp
index 188439798ffff0..7973d9e1cbab20 100644
--- a/be/src/vec/exprs/table_function/vexplode_bitmap.cpp
+++ b/be/src/vec/exprs/table_function/vexplode_bitmap.cpp
@@ -71,19 +71,19 @@ void VExplodeBitmapTableFunction::forward(int step) {
     TableFunction::forward(step);
 }
 
-void VExplodeBitmapTableFunction::get_value(MutableColumnPtr& column) {
+void VExplodeBitmapTableFunction::get_same_many_values(MutableColumnPtr& column, int length) {
     if (current_empty()) {
-        column->insert_default();
+        column->insert_many_defaults(length);
     } else {
         if (_is_nullable) {
             assert_cast<ColumnInt64*>(
                     assert_cast<ColumnNullable*>(column.get())->get_nested_column_ptr().get())
-                    ->insert_value(**_cur_iter);
+                    ->fill(**_cur_iter, length);
             assert_cast<ColumnUInt8*>(
                     assert_cast<ColumnNullable*>(column.get())->get_null_map_column_ptr().get())
-                    ->insert_default();
+                    ->insert_many_defaults(length);
         } else {
-            assert_cast<ColumnInt64*>(column.get())->insert_value(**_cur_iter);
+            assert_cast<ColumnInt64*>(column.get())->fill(**_cur_iter, length);
         }
     }
 }
diff --git a/be/src/vec/exprs/table_function/vexplode_bitmap.h b/be/src/vec/exprs/table_function/vexplode_bitmap.h
index 695b60d6093f0a..17c1070646bb87 100644
--- a/be/src/vec/exprs/table_function/vexplode_bitmap.h
+++ b/be/src/vec/exprs/table_function/vexplode_bitmap.h
@@ -42,7 +42,7 @@ class VExplodeBitmapTableFunction final : public TableFunction {
     ~VExplodeBitmapTableFunction() override = default;
 
     void reset() override;
-    void get_value(MutableColumnPtr& column) override;
+    void get_same_many_values(MutableColumnPtr& column, int length) override;
     int get_value(MutableColumnPtr& column, int max_step) override;
 
     void forward(int step = 1) override;
diff --git a/be/src/vec/exprs/table_function/vexplode_json_array.cpp b/be/src/vec/exprs/table_function/vexplode_json_array.cpp
index ca98c2c97593b9..00c4d92a359e4f 100644
--- a/be/src/vec/exprs/table_function/vexplode_json_array.cpp
+++ b/be/src/vec/exprs/table_function/vexplode_json_array.cpp
@@ -74,11 +74,12 @@ void VExplodeJsonArrayTableFunction<DataImpl>::process_close() {
 }
 
 template <typename DataImpl>
-void VExplodeJsonArrayTableFunction<DataImpl>::get_value(MutableColumnPtr& column) {
+void VExplodeJsonArrayTableFunction<DataImpl>::get_same_many_values(MutableColumnPtr& column,
+                                                                    int length) {
     if (current_empty()) {
-        column->insert_default();
+        column->insert_many_defaults(length);
     } else {
-        insert_values_into_column(column, 1);
+        _insert_same_many_values_into_column(column, length);
     }
 }
 
@@ -89,20 +90,40 @@ int VExplodeJsonArrayTableFunction<DataImpl>::get_value(MutableColumnPtr& column
         column->insert_default();
         max_step = 1;
     } else {
-        insert_values_into_column(column, max_step);
+        _insert_values_into_column(column, max_step);
     }
     forward(max_step);
     return max_step;
 }
 
 template <typename DataImpl>
-void VExplodeJsonArrayTableFunction<DataImpl>::insert_values_into_column(MutableColumnPtr& column,
-                                                                         int max_step) {
+void VExplodeJsonArrayTableFunction<DataImpl>::_insert_same_many_values_into_column(
+        MutableColumnPtr& column, int length) {
+    if (_is_nullable) {
+        auto* nullable_column = assert_cast<ColumnNullable*>(column.get());
+        auto nested_column = nullable_column->get_nested_column_ptr();
+
+        _parsed_data.insert_many_same_value_from_parsed_data(nested_column, _cur_offset, length);
+
+        auto* nullmap_column =
+                assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
+        size_t old_size = nullmap_column->size();
+        nullmap_column->resize(old_size + length);
+        memset(nullmap_column->get_data().data() + old_size,
+               *(_parsed_data.get_null_flag_address(_cur_offset)), length * sizeof(UInt8));
+    } else {
+        _parsed_data.insert_many_same_value_from_parsed_data(column, _cur_offset, length);
+    }
+}
+
+template <typename DataImpl>
+void VExplodeJsonArrayTableFunction<DataImpl>::_insert_values_into_column(MutableColumnPtr& column,
+                                                                          int max_step) {
     if (_is_nullable) {
         auto* nullable_column = assert_cast<ColumnNullable*>(column.get());
         auto nested_column = nullable_column->get_nested_column_ptr();
 
-        _parsed_data.insert_result_from_parsed_data(nested_column, max_step, _cur_offset);
+        _parsed_data.insert_result_from_parsed_data(nested_column, _cur_offset, max_step);
 
         auto* nullmap_column =
                 assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
@@ -111,7 +132,7 @@ void VExplodeJsonArrayTableFunction<DataImpl>::insert_values_into_column(Mutable
         memcpy(nullmap_column->get_data().data() + old_size,
                _parsed_data.get_null_flag_address(_cur_offset), max_step * sizeof(UInt8));
     } else {
-        _parsed_data.insert_result_from_parsed_data(column, max_step, _cur_offset);
+        _parsed_data.insert_result_from_parsed_data(column, _cur_offset, max_step);
     }
 }
 
diff --git a/be/src/vec/exprs/table_function/vexplode_json_array.h b/be/src/vec/exprs/table_function/vexplode_json_array.h
index b054446b0c0d4b..a47956ff1775b2 100644
--- a/be/src/vec/exprs/table_function/vexplode_json_array.h
+++ b/be/src/vec/exprs/table_function/vexplode_json_array.h
@@ -44,8 +44,10 @@ struct ParsedData {
         _values_null_flag.clear();
     }
     virtual int set_output(rapidjson::Document& document, int value_size) = 0;
-    virtual void insert_result_from_parsed_data(MutableColumnPtr& column, int max_step,
-                                                int64_t cur_offset) = 0;
+    virtual void insert_result_from_parsed_data(MutableColumnPtr& column, int64_t cur_offset,
+                                                int max_step) = 0;
+    virtual void insert_many_same_value_from_parsed_data(MutableColumnPtr& column,
+                                                         int64_t cur_offset, int length) = 0;
     const char* get_null_flag_address(int cur_offset) {
         return reinterpret_cast<const char*>(_values_null_flag.data() + cur_offset);
     }
@@ -88,12 +90,19 @@ struct ParsedDataInt : public ParsedData<int64_t> {
         }
         return value_size;
     }
-    void insert_result_from_parsed_data(MutableColumnPtr& column, int max_step,
-                                        int64_t cur_offset) override {
+
+    void insert_result_from_parsed_data(MutableColumnPtr& column, int64_t cur_offset,
+                                        int max_step) override {
         assert_cast<ColumnInt64*>(column.get())
                 ->insert_many_raw_data(
                         reinterpret_cast<const char*>(_backup_data.data() + cur_offset), max_step);
     }
+
+    void insert_many_same_value_from_parsed_data(MutableColumnPtr& column, int64_t cur_offset,
+                                                 int length) override {
+        assert_cast<ColumnInt64*>(column.get())
+                ->insert_raw_integers(_backup_data[cur_offset], length);
+    }
 };
 
 struct ParsedDataDouble : public ParsedData<double> {
@@ -112,20 +121,35 @@ struct ParsedDataDouble : public ParsedData<double> {
         }
         return value_size;
     }
-    void insert_result_from_parsed_data(MutableColumnPtr& column, int max_step,
-                                        int64_t cur_offset) override {
+
+    void insert_result_from_parsed_data(MutableColumnPtr& column, int64_t cur_offset,
+                                        int max_step) override {
         assert_cast<ColumnFloat64*>(column.get())
                 ->insert_many_raw_data(
                         reinterpret_cast<const char*>(_backup_data.data() + cur_offset), max_step);
     }
+
+    void insert_many_same_value_from_parsed_data(MutableColumnPtr& column, int64_t cur_offset,
+                                                 int length) override {
+        assert_cast<ColumnFloat64*>(column.get())
+                ->insert_raw_integers(_backup_data[cur_offset], length);
+    }
 };
 
 struct ParsedDataStringBase : public ParsedData<std::string> {
-    void insert_result_from_parsed_data(MutableColumnPtr& column, int max_step,
-                                        int64_t cur_offset) override {
+    void insert_result_from_parsed_data(MutableColumnPtr& column, int64_t cur_offset,
+                                        int max_step) override {
         assert_cast<ColumnString*>(column.get())
                 ->insert_many_strings(_data_string_ref.data() + cur_offset, max_step);
     }
+
+    void insert_many_same_value_from_parsed_data(MutableColumnPtr& column, int64_t cur_offset,
+                                                 int length) override {
+        assert_cast<ColumnString*>(column.get())
+                ->insert_many_data(_data_string_ref[cur_offset].data,
+                                   _data_string_ref[cur_offset].size, length);
+    }
+
     void reset() override {
         ParsedData<std::string>::reset();
         _data_string_ref.clear();
@@ -236,11 +260,12 @@ class VExplodeJsonArrayTableFunction final : public TableFunction {
     Status process_init(Block* block, RuntimeState* state) override;
     void process_row(size_t row_idx) override;
     void process_close() override;
-    void get_value(MutableColumnPtr& column) override;
+    void get_same_many_values(MutableColumnPtr& column, int length) override;
     int get_value(MutableColumnPtr& column, int max_step) override;
-    void insert_values_into_column(MutableColumnPtr& column, int max_step);
 
 private:
+    void _insert_same_many_values_into_column(MutableColumnPtr& column, int max_step);
+    void _insert_values_into_column(MutableColumnPtr& column, int max_step);
     DataImpl _parsed_data;
     ColumnPtr _text_column;
 };
diff --git a/be/src/vec/exprs/table_function/vexplode_map.cpp b/be/src/vec/exprs/table_function/vexplode_map.cpp
index cf4eb2b701561c..ee88d9f07dfd49 100644
--- a/be/src/vec/exprs/table_function/vexplode_map.cpp
+++ b/be/src/vec/exprs/table_function/vexplode_map.cpp
@@ -90,12 +90,12 @@ void VExplodeMapTableFunction::process_close() {
     _collection_offset = 0;
 }
 
-void VExplodeMapTableFunction::get_value(MutableColumnPtr& column) {
+void VExplodeMapTableFunction::get_same_many_values(MutableColumnPtr& column, int length) {
     // now we only support map column explode to struct column
     size_t pos = _collection_offset + _cur_offset;
     // if current is empty map row, also append a default value
     if (current_empty()) {
-        column->insert_default();
+        column->insert_many_defaults(length);
         return;
     }
     ColumnStruct* ret = nullptr;
@@ -106,7 +106,7 @@ void VExplodeMapTableFunction::get_value(MutableColumnPtr& column) {
                 assert_cast<ColumnNullable*>(column.get())->get_nested_column_ptr().get());
         assert_cast<ColumnUInt8*>(
                 assert_cast<ColumnNullable*>(column.get())->get_null_map_column_ptr().get())
-                ->insert_default();
+                ->insert_many_defaults(length);
     } else if (column->is_column_struct()) {
         ret = assert_cast<ColumnStruct*>(column.get());
     } else {
@@ -118,8 +118,8 @@ void VExplodeMapTableFunction::get_value(MutableColumnPtr& column) {
                 ErrorCode::INTERNAL_ERROR,
                 "only support map column explode to two column, but given:  ", ret->tuple_size());
     }
-    ret->get_column(0).insert_from(_map_detail.map_col->get_keys(), pos);
-    ret->get_column(1).insert_from(_map_detail.map_col->get_values(), pos);
+    ret->get_column(0).insert_many_from(_map_detail.map_col->get_keys(), pos, length);
+    ret->get_column(1).insert_many_from(_map_detail.map_col->get_values(), pos, length);
 }
 
 int VExplodeMapTableFunction::get_value(MutableColumnPtr& column, int max_step) {
diff --git a/be/src/vec/exprs/table_function/vexplode_map.h b/be/src/vec/exprs/table_function/vexplode_map.h
index 969629f1cd306c..c9756c9499bd8f 100644
--- a/be/src/vec/exprs/table_function/vexplode_map.h
+++ b/be/src/vec/exprs/table_function/vexplode_map.h
@@ -57,7 +57,7 @@ class VExplodeMapTableFunction : public TableFunction {
     Status process_init(Block* block, RuntimeState* state) override;
     void process_row(size_t row_idx) override;
     void process_close() override;
-    void get_value(MutableColumnPtr& column) override;
+    void get_same_many_values(MutableColumnPtr& column, int length) override;
     int get_value(MutableColumnPtr& column, int max_step) override;
 
 private:
diff --git a/be/src/vec/exprs/table_function/vexplode_numbers.cpp b/be/src/vec/exprs/table_function/vexplode_numbers.cpp
index 9b5f7ffcbbcd6d..42f8aa9360e84e 100644
--- a/be/src/vec/exprs/table_function/vexplode_numbers.cpp
+++ b/be/src/vec/exprs/table_function/vexplode_numbers.cpp
@@ -92,19 +92,19 @@ void VExplodeNumbersTableFunction::process_close() {
     _value_column = nullptr;
 }
 
-void VExplodeNumbersTableFunction::get_value(MutableColumnPtr& column) {
+void VExplodeNumbersTableFunction::get_same_many_values(MutableColumnPtr& column, int length) {
     if (current_empty()) {
-        column->insert_default();
+        column->insert_many_defaults(length);
     } else {
         if (_is_nullable) {
             assert_cast<ColumnInt32*>(
                     assert_cast<ColumnNullable*>(column.get())->get_nested_column_ptr().get())
-                    ->insert_value(_cur_offset);
+                    ->insert_raw_integers(_cur_offset, length);
             assert_cast<ColumnUInt8*>(
                     assert_cast<ColumnNullable*>(column.get())->get_null_map_column_ptr().get())
-                    ->insert_default();
+                    ->insert_many_defaults(length);
         } else {
-            assert_cast<ColumnInt32*>(column.get())->insert_value(_cur_offset);
+            assert_cast<ColumnInt32*>(column.get())->insert_raw_integers(_cur_offset, length);
         }
     }
 }
diff --git a/be/src/vec/exprs/table_function/vexplode_numbers.h b/be/src/vec/exprs/table_function/vexplode_numbers.h
index 1890719c689aa6..dda4a047a2088c 100644
--- a/be/src/vec/exprs/table_function/vexplode_numbers.h
+++ b/be/src/vec/exprs/table_function/vexplode_numbers.h
@@ -44,7 +44,7 @@ class VExplodeNumbersTableFunction : public TableFunction {
     Status process_init(Block* block, RuntimeState* state) override;
     void process_row(size_t row_idx) override;
     void process_close() override;
-    void get_value(MutableColumnPtr& column) override;
+    void get_same_many_values(MutableColumnPtr& column, int length) override;
     int get_value(MutableColumnPtr& column, int max_step) override {
         max_step = std::min(max_step, (int)(_cur_size - _cur_offset));
         if (_is_const) {
diff --git a/be/src/vec/exprs/table_function/vexplode_split.cpp b/be/src/vec/exprs/table_function/vexplode_split.cpp
index 13ffb602ac2268..51a95dbd5c6f21 100644
--- a/be/src/vec/exprs/table_function/vexplode_split.cpp
+++ b/be/src/vec/exprs/table_function/vexplode_split.cpp
@@ -125,11 +125,11 @@ void VExplodeSplitTableFunction::process_close() {
     _delimiter = {};
 }
 
-void VExplodeSplitTableFunction::get_value(MutableColumnPtr& column) {
+void VExplodeSplitTableFunction::get_same_many_values(MutableColumnPtr& column, int length) {
     if (current_empty()) {
-        column->insert_default();
+        column->insert_many_defaults(length);
     } else {
-        column->insert_data(_backup[_cur_offset].data, _backup[_cur_offset].size);
+        column->insert_many_data(_backup[_cur_offset].data, _backup[_cur_offset].size, length);
     }
 }
 
diff --git a/be/src/vec/exprs/table_function/vexplode_split.h b/be/src/vec/exprs/table_function/vexplode_split.h
index 6d00cc48f78126..089abe23dda140 100644
--- a/be/src/vec/exprs/table_function/vexplode_split.h
+++ b/be/src/vec/exprs/table_function/vexplode_split.h
@@ -45,7 +45,7 @@ class VExplodeSplitTableFunction final : public TableFunction {
     Status process_init(Block* block, RuntimeState* state) override;
     void process_row(size_t row_idx) override;
     void process_close() override;
-    void get_value(MutableColumnPtr& column) override;
+    void get_same_many_values(MutableColumnPtr& column, int length) override;
     int get_value(MutableColumnPtr& column, int max_step) override;
 
 private:
diff --git a/be/test/vec/function/function_test_util.cpp b/be/test/vec/function/function_test_util.cpp
index 50d1d3ab090e9d..0025945ef19add 100644
--- a/be/test/vec/function/function_test_util.cpp
+++ b/be/test/vec/function/function_test_util.cpp
@@ -364,7 +364,7 @@ Block* process_table_function(TableFunction* fn, Block* input_block,
         }
 
         do {
-            fn->get_value(column);
+            fn->get_same_many_values(column, 1);
             fn->forward();
         } while (!fn->eos());
     }

From d630065b66111d746f2a74ce240c63725ae6db6e Mon Sep 17 00:00:00 2001
From: Vallish Pai <vallishpai@gmail.com>
Date: Mon, 29 Apr 2024 14:13:01 +0530
Subject: [PATCH 128/163] [enhancement] (plsql) support show procedure status
 filters (#33264)

support filter Name(ProcedureName), Db and support Like for procedureName.
---
 .../nereids/parser/LogicalPlanBuilder.java    |  17 ++-
 .../commands/ShowProcedureStatusCommand.java  |  76 ++++++++++-
 .../functions/DorisFunctionRegistry.java      |  37 +++++-
 .../plsql/functions/FunctionRegistry.java     |   2 +-
 .../functions/InMemoryFunctionRegistry.java   |   2 +-
 .../plsql_p0/test_plsql_show_procedure.groovy | 118 ++++++++++++++++++
 6 files changed, 242 insertions(+), 10 deletions(-)
 create mode 100644 regression-test/suites/plsql_p0/test_plsql_show_procedure.groovy

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
index 0d569e80c85a12..604cdde889b945 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
@@ -459,6 +459,7 @@
 import com.google.common.collect.ImmutableMap.Builder;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
 import org.antlr.v4.runtime.ParserRuleContext;
 import org.antlr.v4.runtime.RuleContext;
 import org.antlr.v4.runtime.Token;
@@ -475,6 +476,7 @@
 import java.util.Locale;
 import java.util.Map;
 import java.util.Optional;
+import java.util.Set;
 import java.util.function.Supplier;
 import java.util.stream.Collectors;
 
@@ -3489,7 +3491,20 @@ public LogicalPlan visitDropProcedure(DropProcedureContext ctx) {
 
     @Override
     public LogicalPlan visitShowProcedureStatus(ShowProcedureStatusContext ctx) {
-        return ParserUtils.withOrigin(ctx, () -> new ShowProcedureStatusCommand());
+        Set<Expression> whereExpr = Collections.emptySet();
+        if (ctx.whereClause() != null) {
+            whereExpr = ExpressionUtils.extractConjunctionToSet(
+                    getExpression(ctx.whereClause().booleanExpression()));
+        }
+
+        if (ctx.valueExpression() != null) {
+            // parser allows only LIKE or WhereClause.
+            // Mysql grammar: SHOW PROCEDURE STATUS [LIKE 'pattern' | WHERE expr]
+            whereExpr = Sets.newHashSet(new Like(new UnboundSlot("ProcedureName"), getExpression(ctx.pattern)));
+        }
+
+        final Set<Expression> whereExprConst = whereExpr;
+        return ParserUtils.withOrigin(ctx, () -> new ShowProcedureStatusCommand(whereExprConst));
     }
 
     @Override
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowProcedureStatusCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowProcedureStatusCommand.java
index 4ba6688d4f4124..34c47fa74034a2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowProcedureStatusCommand.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowProcedureStatusCommand.java
@@ -19,6 +19,12 @@
 
 import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.ScalarType;
+import org.apache.doris.nereids.exceptions.AnalysisException;
+import org.apache.doris.nereids.trees.expressions.EqualTo;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.Like;
+import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.trees.expressions.literal.Literal;
 import org.apache.doris.nereids.trees.plans.PlanType;
 import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
 import org.apache.doris.qe.ConnectContext;
@@ -27,28 +33,37 @@
 import org.apache.doris.qe.StmtExecutor;
 
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.stream.Collectors;
 
 /**
  * show procedure status command
  */
 public class ShowProcedureStatusCommand extends Command implements NoForward {
-
     public static final Logger LOG = LogManager.getLogger(ShowProcedureStatusCommand.class);
     public static final ImmutableList<String> TITLE_NAMES = new ImmutableList.Builder<String>().add("ProcedureName")
-                                                                .add("CatalogId").add("DbId").add("PackageName")
-                                                                .add("OwnerName").add("CreateTime").add("ModifyTime")
-                                                                .build();
+            .add("CatalogId").add("DbId").add("DbName").add("PackageName").add("OwnerName").add("CreateTime")
+            .add("ModifyTime").build();
+    private static final String colDb = "db";
+    private static final String colProcName = "procedurename";
+    private static final String colName = "name";
+    private final Set<Expression> whereExpr;
 
     /**
      * constructor
      */
-    public ShowProcedureStatusCommand() {
+    public ShowProcedureStatusCommand(final Set<Expression> whereExpr) {
         super(PlanType.SHOW_PROCEDURE_COMMAND);
+        this.whereExpr = Objects.requireNonNull(whereExpr, "whereExpr should not be null");
     }
 
     public ShowResultSetMetaData getMetaData() {
@@ -59,10 +74,59 @@ public ShowResultSetMetaData getMetaData() {
         return builder.build();
     }
 
+    private void validateAndExtractFilters(StringBuilder dbFilter, StringBuilder procFilter) throws Exception {
+
+        if (whereExpr.isEmpty()) {
+            return;
+        }
+        Set<Expression> likeSet = whereExpr.stream().filter(Like.class::isInstance).collect(Collectors.toSet());
+        Set<Expression> equalTo = whereExpr.stream().filter(EqualTo.class::isInstance).collect(Collectors.toSet());
+
+        if (whereExpr.size() != likeSet.size() + equalTo.size()) {
+            throw new AnalysisException("Only support equalTo  and Like filters.");
+        }
+
+        equalTo.addAll(likeSet);
+        Map<String, String> filterMap = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
+        filterMap = equalTo.stream()
+                .collect(ImmutableMap.toImmutableMap(exp -> ((Slot) exp.child(0)).getName().toLowerCase(),
+                        exp -> ((Literal) exp.child(1)).getStringValue().toLowerCase(),
+                        (a, b) -> {
+                            throw new AnalysisException("WhereClause can contain one predicate for one column.");
+                        }));
+
+        // we support filter on Db and Name and ProcedureName.
+        // But one column we can put only once and support conjuncts
+        for (Map.Entry<String, String> elem : filterMap.entrySet()) {
+            String columnName = elem.getKey();
+            if ((!columnName.toLowerCase().equals(colDb))
+                    && (!columnName.toLowerCase().equals(colName))
+                    && (!columnName.toLowerCase().equals(colProcName))) {
+                throw new AnalysisException("Only supports filter" + colProcName + ", "
+                        + colName + "," + colProcName + "with equalTo or LIKE");
+            }
+            if (columnName.toLowerCase().equals(colDb)) {
+                if (dbFilter.length() != 0) {
+                    throw new AnalysisException("Only supports filter Db only 1 time in where clause");
+                }
+                dbFilter.append(elem.getValue().toLowerCase());
+            } else if ((columnName.toLowerCase().equals(colName)) || (columnName.toLowerCase().equals(colProcName))) {
+                if (procFilter.length() != 0) {
+                    throw new AnalysisException("Only supports filter Name/ProcedureName only 1 time in where clause");
+                }
+                procFilter.append(elem.getValue().toLowerCase());
+            }
+        }
+    }
+
     @Override
     public void run(ConnectContext ctx, StmtExecutor executor) throws Exception {
+        StringBuilder dbFilter = new StringBuilder();
+        StringBuilder procFilter = new StringBuilder();
+        validateAndExtractFilters(dbFilter, procFilter);
+
         List<List<String>> results = new ArrayList<>();
-        ctx.getPlSqlOperation().getExec().functions.showProcedure(results);
+        ctx.getPlSqlOperation().getExec().functions.showProcedure(results, dbFilter.toString(), procFilter.toString());
         if (!results.isEmpty()) {
             ShowResultSet commonResultSet = new ShowResultSet(getMetaData(), results);
             executor.sendResultSet(commonResultSet);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/plsql/functions/DorisFunctionRegistry.java b/fe/fe-core/src/main/java/org/apache/doris/plsql/functions/DorisFunctionRegistry.java
index 82283fc9ad5e9d..10b4807205b11e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/plsql/functions/DorisFunctionRegistry.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/plsql/functions/DorisFunctionRegistry.java
@@ -20,6 +20,9 @@
 
 package org.apache.doris.plsql.functions;
 
+import org.apache.doris.catalog.DatabaseIf;
+import org.apache.doris.catalog.Env;
+import org.apache.doris.datasource.CatalogIf;
 import org.apache.doris.nereids.PLLexer;
 import org.apache.doris.nereids.PLParser;
 import org.apache.doris.nereids.PLParser.Create_function_stmtContext;
@@ -89,15 +92,47 @@ private String qualified(String name) {
         return (ConnectContext.get().getDatabase() + "." + name).toUpperCase();
     }
 
+    private String getDbName(long catalogId, long dbId) {
+        String dbName = "";
+        CatalogIf catalog = Env.getCurrentEnv().getCatalogMgr().getCatalog(catalogId);
+        if (catalog != null) {
+            DatabaseIf db = catalog.getDbNullable(dbId);
+            if (db != null) {
+                dbName = db.getFullName();
+            }
+        }
+        return dbName;
+    }
+
+    public boolean like(String str, String wild) {
+        str = str.toLowerCase();
+        return str.matches(wild.replace(".", "\\.").replace("?", ".").replace("%", ".*").toLowerCase());
+    }
+
+    public boolean applyFilter(String value, String filter) {
+        if (filter.isEmpty()) {
+            return true;
+        }
+        return like(value, filter);
+    }
+
     @Override
-    public void showProcedure(List<List<String>> columns) {
+    public void showProcedure(List<List<String>> columns, String dbFilter, String procFilter) {
         Map<PlsqlProcedureKey, PlsqlStoredProcedure> allProc = client.getAllPlsqlStoredProcedures();
         for (Map.Entry<PlsqlProcedureKey, PlsqlStoredProcedure> entry : allProc.entrySet()) {
             List<String> row = new ArrayList<>();
             PlsqlStoredProcedure proc = entry.getValue();
+            if (!applyFilter(proc.getName(), procFilter)) {
+                continue;
+            }
+            String dbName = getDbName(proc.getCatalogId(), proc.getDbId());
+            if (!applyFilter(dbName, dbFilter)) {
+                continue;
+            }
             row.add(proc.getName());
             row.add(Long.toString(proc.getCatalogId()));
             row.add(Long.toString(proc.getDbId()));
+            row.add(dbName);
             row.add(proc.getPackageName());
             row.add(proc.getOwnerName());
             row.add(proc.getCreateTime());
diff --git a/fe/fe-core/src/main/java/org/apache/doris/plsql/functions/FunctionRegistry.java b/fe/fe-core/src/main/java/org/apache/doris/plsql/functions/FunctionRegistry.java
index abd0e819d18308..cd295fb4bb5139 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/plsql/functions/FunctionRegistry.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/plsql/functions/FunctionRegistry.java
@@ -42,7 +42,7 @@ public interface FunctionRegistry {
 
     void removeCached(String name);
 
-    void showProcedure(List<List<String>> columns);
+    void showProcedure(List<List<String>> columns, String dbFilter, String procFilter);
 
     void showCreateProcedure(FuncNameInfo procedureName, List<List<String>> columns);
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/plsql/functions/InMemoryFunctionRegistry.java b/fe/fe-core/src/main/java/org/apache/doris/plsql/functions/InMemoryFunctionRegistry.java
index 67e0eb7d0971cf..b9b97fbb028209 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/plsql/functions/InMemoryFunctionRegistry.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/plsql/functions/InMemoryFunctionRegistry.java
@@ -72,7 +72,7 @@ public void remove(FuncNameInfo procedureName) {
         procMap.remove(procedureName.toString());
     }
 
-    public void showProcedure(List<List<String>> columns) {
+    public void showProcedure(List<List<String>> columns, String dbFilter, String procFilter) {
 
     }
 
diff --git a/regression-test/suites/plsql_p0/test_plsql_show_procedure.groovy b/regression-test/suites/plsql_p0/test_plsql_show_procedure.groovy
new file mode 100644
index 00000000000000..2202be9ee5c981
--- /dev/null
+++ b/regression-test/suites/plsql_p0/test_plsql_show_procedure.groovy
@@ -0,0 +1,118 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Add PL-SQL regression test notice:
+// 1. JDBC does not support the execution of stored procedures that return results. You can only Into the execution
+// results into a variable or write them into a table, because when multiple result sets are returned, JDBC needs
+// to use the prepareCall statement to execute, otherwise the Statemnt of the returned result executes Finalize.
+// Send EOF Packet will report an error;
+// 2. The format of the result returned by Doris Statement is xxxx\n, xxxx\n, 2 rows affected (0.03 sec).
+// PL-SQL uses Print to print variable values in an unformatted format, and JDBC cannot easily obtain them. Real results.
+suite("test_plsql_show_procedure") {
+    def dbName = "plsql_show_procedure_db"
+    sql "drop database if exists ${dbName}"
+    sql """DROP PROC test_plsql_show_proc1"""
+    sql """DROP PROC test_plsql_show_proc2"""
+    sql """DROP PROC test_plsql_show_proc3"""
+    sql """DROP PROC test_plsql_show_proc4"""
+    sql """DROP PROC test_plsql_show_proc5"""
+
+    sql "CREATE DATABASE IF NOT EXISTS ${dbName}"
+    sql "use ${dbName}"
+    
+    def procedure_body = "BEGIN DECLARE a int = 1; print a; END;"
+    sql """ CREATE OR REPLACE PROCEDURE test_plsql_show_proc1() ${procedure_body} """
+    test {
+             sql """SHOW PROCEDURE STATUS where Db="${dbName}";"""
+             rowNum 1
+             check { result, exception, startTime, endTime ->
+             assertEquals("TEST_PLSQL_SHOW_PROC1", result[0][0]);
+             }
+        }
+
+    sql """ CREATE OR REPLACE PROCEDURE test_plsql_show_proc2() ${procedure_body} """
+    sql """ CREATE OR REPLACE PROCEDURE test_plsql_show_proc3() ${procedure_body} """
+
+    test {
+             sql """SHOW PROCEDURE STATUS where Db="${dbName}";"""
+             rowNum 3
+             check { result, exception, startTime, endTime ->
+                result = sortRows(result);
+             	assertEquals("TEST_PLSQL_SHOW_PROC1", result[0][0]);
+             	assertEquals("TEST_PLSQL_SHOW_PROC2", result[1][0]);
+             	assertEquals("TEST_PLSQL_SHOW_PROC3", result[2][0]);
+             }
+        }
+
+
+    sql """ CREATE OR REPLACE PROCEDURE test_plsql_show_proc4() ${procedure_body} """
+    sql """ CREATE OR REPLACE PROCEDURE test_plsql_show_proc5() ${procedure_body} """
+
+    test {
+             sql """SHOW PROCEDURE STATUS where Db="${dbName}";"""
+             rowNum 5
+             check { result, exception, startTime, endTime ->
+                result = sortRows(result);
+             	assertEquals("TEST_PLSQL_SHOW_PROC1", result[0][0]);
+             	assertEquals("TEST_PLSQL_SHOW_PROC2", result[1][0]);
+             	assertEquals("TEST_PLSQL_SHOW_PROC3", result[2][0]);
+             	assertEquals("TEST_PLSQL_SHOW_PROC4", result[3][0]);
+             	assertEquals("TEST_PLSQL_SHOW_PROC5", result[4][0]);
+             }
+        }
+
+    sql """DROP PROCEDURE test_plsql_show_proc1"""
+
+    test {
+             sql """SHOW PROCEDURE STATUS where Db="${dbName}";"""
+             rowNum 4
+             check { result, exception, startTime, endTime ->
+                result = sortRows(result);
+             	assertEquals("TEST_PLSQL_SHOW_PROC2", result[0][0]);
+             	assertEquals("TEST_PLSQL_SHOW_PROC3", result[1][0]);
+             	assertEquals("TEST_PLSQL_SHOW_PROC4", result[2][0]);
+             	assertEquals("TEST_PLSQL_SHOW_PROC5", result[3][0]);
+             }
+        }
+
+    sql """ CREATE OR REPLACE PROCEDURE test_plsql_show_proc1() ${procedure_body} """
+
+    test {
+             sql """SHOW PROCEDURE STATUS where Db="${dbName}";"""
+             rowNum 5
+             check { result, exception, startTime, endTime ->
+                result = sortRows(result);
+             	assertEquals("TEST_PLSQL_SHOW_PROC1", result[0][0]);
+             	assertEquals("TEST_PLSQL_SHOW_PROC2", result[1][0]);
+             	assertEquals("TEST_PLSQL_SHOW_PROC3", result[2][0]);
+             	assertEquals("TEST_PLSQL_SHOW_PROC4", result[3][0]);
+             	assertEquals("TEST_PLSQL_SHOW_PROC5", result[4][0]);
+             }
+        }
+
+    sql """SHOW PROCEDURE STATUS;"""
+    sql """SHOW PROCEDURE STATUS where db="${dbName}";"""
+    sql """SHOW PROCEDURE STATUS where Db="${dbName}" and name = "test_plsql_show_proc1";"""
+    sql """SHOW PROCEDURE STATUS where Db="${dbName}" and Name LIKE "test_plsql_show_proc1";"""
+    sql """SHOW PROCEDURE STATUS where procedureName="test_plsql_show_proc1";"""
+
+    sql """DROP PROC test_plsql_show_proc1"""
+    sql """DROP PROC test_plsql_show_proc2"""
+    sql """DROP PROC test_plsql_show_proc3"""
+    sql """DROP PROC test_plsql_show_proc4"""
+    sql """DROP PROC test_plsql_show_proc5"""
+}

From cc2cfd8daeef705f230e26c036ec9cd239081976 Mon Sep 17 00:00:00 2001
From: morrySnow <101034200+morrySnow@users.noreply.github.com>
Date: Mon, 29 Apr 2024 19:26:13 +0800
Subject: [PATCH 129/163] [fix](Nereids) topn should not inherit logical
 properties when repace child (#34282)

---
 .../doris/nereids/trees/plans/logical/LogicalTopN.java     | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalTopN.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalTopN.java
index dd1c171ca2c8bc..6def25d24276d4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalTopN.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalTopN.java
@@ -137,16 +137,11 @@ public LogicalTopN<Plan> withOrderKeys(List<OrderKey> orderKeys) {
     }
 
     public LogicalTopN<Plan> withLimitChild(long limit, long offset, Plan child) {
-        Preconditions.checkArgument(children.size() == 1,
-                "LogicalTopN should have 1 child, but input is %s", children.size());
         return new LogicalTopN<>(orderKeys, limit, offset, child);
     }
 
     public LogicalTopN<Plan> withLimitOrderKeyAndChild(long limit, long offset, List<OrderKey> orderKeys, Plan child) {
-        Preconditions.checkArgument(children.size() == 1,
-                "LogicalTopN should have 1 child, but input is %s", children.size());
-        return new LogicalTopN<>(orderKeys, limit, offset,
-                Optional.empty(), Optional.of(getLogicalProperties()), child);
+        return new LogicalTopN<>(orderKeys, limit, offset, child);
     }
 
     @Override

From 582ad779609a564dbe335daafa043cf6c91f518b Mon Sep 17 00:00:00 2001
From: morrySnow <101034200+morrySnow@users.noreply.github.com>
Date: Mon, 29 Apr 2024 19:33:48 +0800
Subject: [PATCH 130/163] [fix](Nereids) rewritten mv should check output set
 and should not return null (#34288)

1. we should check output set, since we will remove top project and
   result output size will diff with its child output size if there are
   dup slot in result list
2. should not return null, instead we should return rewritten plan
   itself, because we will use return result in many place and do not
   check null at all
---
 .../nereids/rules/exploration/mv/MaterializedViewUtils.java  | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java
index eb7d07c2310836..6863a7e01b14f1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java
@@ -203,9 +203,8 @@ public static Plan rewriteByRules(
             CascadesContext cascadesContext,
             Function<CascadesContext, Plan> planRewriter,
             Plan rewrittenPlan, Plan originPlan) {
-        List<Slot> originOutputs = originPlan.getOutput();
-        if (originOutputs.size() != rewrittenPlan.getOutput().size()) {
-            return null;
+        if (originPlan.getOutputSet().size() != rewrittenPlan.getOutputSet().size()) {
+            return rewrittenPlan;
         }
         // After RBO, slot order may change, so need originSlotToRewrittenExprId which record
         // origin plan slot order

From 89a17df25169df4c990fc779cc748402a5cf0b9c Mon Sep 17 00:00:00 2001
From: Sun Chenyang <csun5285@gmail.com>
Date: Mon, 29 Apr 2024 19:35:57 +0800
Subject: [PATCH 131/163] [fix](compaction)fix test case to compatible with 3
 replicas (#34276)

---
 .../test_time_series_compaction_policy.groovy | 25 +++++++++++--------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/regression-test/suites/compaction/test_time_series_compaction_policy.groovy b/regression-test/suites/compaction/test_time_series_compaction_policy.groovy
index f6f0796a7ca292..5c37ff45cb162b 100644
--- a/regression-test/suites/compaction/test_time_series_compaction_policy.groovy
+++ b/regression-test/suites/compaction/test_time_series_compaction_policy.groovy
@@ -75,13 +75,6 @@ suite("test_time_series_compaction_polciy", "p0") {
         return rowsetCount
     }
 
-    // String backend_id;
-    // backend_id = backendId_to_backendIP.keySet()[0]
-    // def (code, out, err) = show_be_config(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id))
-    
-    // logger.info("Show config: code=" + code + ", out=" + out + ", err=" + err)
-    // assertEquals(code, 0)
-
     sql """ DROP TABLE IF EXISTS ${tableName}; """
     sql """
         CREATE TABLE ${tableName} (
@@ -117,10 +110,20 @@ suite("test_time_series_compaction_polciy", "p0") {
 
     //TabletId,ReplicaId,BackendId,SchemaHash,Version,LstSuccessVersion,LstFailedVersion,LstFailedTime,LocalDataSize,RemoteDataSize,RowCount,State,LstConsistencyCheckTime,CheckVersion,VersionCount,PathHash,MetaUrl,CompactionStatus
     def tablets = sql_return_maparray """ show tablets from ${tableName}; """
+
+    int replicaNum = 1
+    def dedup_tablets = deduplicate_tablets(tablets)
+    if (dedup_tablets.size() > 0) {
+        replicaNum = Math.round(tablets.size() / dedup_tablets.size())
+        if (replicaNum != 1 && replicaNum != 3) {
+            assert(false)
+        }
+    }
+    
     // BUCKETS = 2
     // before cumulative compaction, there are 17 * 2 = 34 rowsets.
     int rowsetCount = get_rowset_count.call(tablets);
-    assert (rowsetCount == 34)
+    assert (rowsetCount == 34 * replicaNum)
 
     // trigger cumulative compactions for all tablets in table
     trigger_cumulative_compaction_on_tablets.call(tablets)
@@ -132,7 +135,7 @@ suite("test_time_series_compaction_polciy", "p0") {
     // 5 consecutive empty versions are merged into one empty version
     // 34 - 2*4 = 26
     rowsetCount = get_rowset_count.call(tablets);
-    assert (rowsetCount == 26)
+    assert (rowsetCount == 26 * replicaNum)
 
     // trigger cumulative compactions for all tablets in ${tableName}
     trigger_cumulative_compaction_on_tablets.call(tablets)
@@ -143,7 +146,7 @@ suite("test_time_series_compaction_polciy", "p0") {
     // after cumulative compaction, there is only 22 rowset.
     // 26 - 4 = 22
     rowsetCount = get_rowset_count.call(tablets);
-    assert (rowsetCount == 22)
+    assert (rowsetCount == 22 * replicaNum)
 
     qt_sql_2 """ select count() from ${tableName}"""
     sql """ alter table ${tableName} set ("time_series_compaction_file_count_threshold"="10")"""
@@ -156,6 +159,6 @@ suite("test_time_series_compaction_polciy", "p0") {
 
     // after cumulative compaction, there is only 11 rowset.
     rowsetCount = get_rowset_count.call(tablets);
-    assert (rowsetCount == 11)
+    assert (rowsetCount == 11 * replicaNum)
     qt_sql_3 """ select count() from ${tableName}"""
 }

From 3c80625bee7d97b1e03544135a5f65d6d4cc0f52 Mon Sep 17 00:00:00 2001
From: zy-kkk <zhongyk10@gmail.com>
Date: Mon, 29 Apr 2024 20:04:34 +0800
Subject: [PATCH 132/163] [Enhancement](jdbc catalog) Change Jdbc connection
 pool to hikari (#34045)

---
 fe/be-java-extensions/jdbc-scanner/pom.xml    |  4 +-
 .../apache/doris/jdbc/BaseJdbcExecutor.java   | 72 +++++++++---------
 .../apache/doris/jdbc/DB2JdbcExecutor.java    |  6 +-
 .../doris/jdbc/DefaultJdbcExecutor.java       | 76 +++++++++----------
 .../org/apache/doris/jdbc/JdbcDataSource.java | 12 +--
 .../apache/doris/jdbc/OracleJdbcExecutor.java |  6 +-
 .../preload-extensions/pom.xml                |  4 +-
 fe/fe-core/pom.xml                            |  5 +-
 .../datasource/jdbc/client/JdbcClient.java    | 54 ++++++-------
 fe/pom.xml                                    | 10 +--
 10 files changed, 120 insertions(+), 129 deletions(-)

diff --git a/fe/be-java-extensions/jdbc-scanner/pom.xml b/fe/be-java-extensions/jdbc-scanner/pom.xml
index 0f3eace7dda8d4..a37b5e0f62f99d 100644
--- a/fe/be-java-extensions/jdbc-scanner/pom.xml
+++ b/fe/be-java-extensions/jdbc-scanner/pom.xml
@@ -46,8 +46,8 @@ under the License.
             <scope>provided</scope>
         </dependency>
         <dependency>
-            <groupId>com.alibaba</groupId>
-            <artifactId>druid</artifactId>
+            <groupId>com.zaxxer</groupId>
+            <artifactId>HikariCP</artifactId>
             <scope>provided</scope>
         </dependency>
         <dependency>
diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/BaseJdbcExecutor.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/BaseJdbcExecutor.java
index a9117e6962793b..2126ba941bb695 100644
--- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/BaseJdbcExecutor.java
+++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/BaseJdbcExecutor.java
@@ -29,8 +29,8 @@
 import org.apache.doris.thrift.TJdbcOperation;
 import org.apache.doris.thrift.TOdbcTableType;
 
-import com.alibaba.druid.pool.DruidDataSource;
 import com.google.common.base.Preconditions;
+import com.zaxxer.hikari.HikariDataSource;
 import org.apache.log4j.Logger;
 import org.apache.thrift.TDeserializer;
 import org.apache.thrift.TException;
@@ -56,8 +56,8 @@
 public abstract class BaseJdbcExecutor implements JdbcExecutor {
     private static final Logger LOG = Logger.getLogger(BaseJdbcExecutor.class);
     private static final TBinaryProtocol.Factory PROTOCOL_FACTORY = new TBinaryProtocol.Factory();
-    private DruidDataSource druidDataSource = null;
-    private final byte[] druidDataSourceLock = new byte[0];
+    private HikariDataSource hikariDataSource = null;
+    private final byte[] hikariDataSourceLock = new byte[0];
     private TOdbcTableType tableType;
     private JdbcDataSourceConfig config;
     private Connection conn = null;
@@ -120,10 +120,10 @@ public void close() throws Exception {
                 closeResources(resultSet, stmt, conn);
             }
         } finally {
-            if (config.getConnectionPoolMinSize() == 0 && druidDataSource != null) {
-                druidDataSource.close();
+            if (config.getConnectionPoolMinSize() == 0 && hikariDataSource != null) {
+                hikariDataSource.close();
                 JdbcDataSource.getDataSource().getSourcesMap().remove(config.createCacheKey());
-                druidDataSource = null;
+                hikariDataSource = null;
             }
         }
     }
@@ -152,10 +152,10 @@ protected boolean abortReadConnection(Connection connection, ResultSet resultSet
     }
 
     public void cleanDataSource() {
-        if (druidDataSource != null) {
-            druidDataSource.close();
+        if (hikariDataSource != null) {
+            hikariDataSource.close();
             JdbcDataSource.getDataSource().getSourcesMap().remove(config.createCacheKey());
-            druidDataSource = null;
+            hikariDataSource = null;
         }
     }
 
@@ -304,40 +304,34 @@ public boolean hasNext() throws UdfRuntimeException {
     }
 
     private void init(JdbcDataSourceConfig config, String sql) throws UdfRuntimeException {
-        String druidDataSourceKey = config.createCacheKey();
+        ClassLoader oldClassLoader = Thread.currentThread().getContextClassLoader();
+        String hikariDataSourceKey = config.createCacheKey();
         try {
             ClassLoader parent = getClass().getClassLoader();
             ClassLoader classLoader = UdfUtils.getClassLoader(config.getJdbcDriverUrl(), parent);
-            druidDataSource = JdbcDataSource.getDataSource().getSource(druidDataSourceKey);
-            if (druidDataSource == null) {
-                synchronized (druidDataSourceLock) {
-                    druidDataSource = JdbcDataSource.getDataSource().getSource(druidDataSourceKey);
-                    if (druidDataSource == null) {
+            hikariDataSource = JdbcDataSource.getDataSource().getSource(hikariDataSourceKey);
+            if (hikariDataSource == null) {
+                synchronized (hikariDataSourceLock) {
+                    hikariDataSource = JdbcDataSource.getDataSource().getSource(hikariDataSourceKey);
+                    if (hikariDataSource == null) {
                         long start = System.currentTimeMillis();
-                        DruidDataSource ds = new DruidDataSource();
-                        ds.setDriverClassLoader(classLoader);
+                        Thread.currentThread().setContextClassLoader(classLoader);
+                        HikariDataSource ds = new HikariDataSource();
                         ds.setDriverClassName(config.getJdbcDriverClass());
-                        ds.setUrl(SecurityChecker.getInstance().getSafeJdbcUrl(config.getJdbcUrl()));
+                        ds.setJdbcUrl(SecurityChecker.getInstance().getSafeJdbcUrl(config.getJdbcUrl()));
                         ds.setUsername(config.getJdbcUser());
                         ds.setPassword(config.getJdbcPassword());
-                        ds.setMinIdle(config.getConnectionPoolMinSize()); // default 1
-                        ds.setInitialSize(config.getConnectionPoolMinSize()); // default 1
-                        ds.setMaxActive(config.getConnectionPoolMaxSize()); // default 10
-                        ds.setMaxWait(config.getConnectionPoolMaxWaitTime()); // default 5000
-                        ds.setTestWhileIdle(true);
-                        ds.setTestOnBorrow(false);
+                        ds.setMinimumIdle(config.getConnectionPoolMinSize()); // default 1
+                        ds.setMaximumPoolSize(config.getConnectionPoolMaxSize()); // default 10
+                        ds.setConnectionTimeout(config.getConnectionPoolMaxWaitTime()); // default 5000
+                        ds.setMaxLifetime(config.getConnectionPoolMaxLifeTime()); // default 30 min
+                        ds.setIdleTimeout(config.getConnectionPoolMaxLifeTime() / 2L); // default 15 min
                         setValidationQuery(ds);
-                        // default 3 min
-                        ds.setTimeBetweenEvictionRunsMillis(config.getConnectionPoolMaxLifeTime() / 10L);
-                        // default 15 min
-                        ds.setMinEvictableIdleTimeMillis(config.getConnectionPoolMaxLifeTime() / 2L);
-                        // default 30 min
-                        ds.setMaxEvictableIdleTimeMillis(config.getConnectionPoolMaxLifeTime());
-                        ds.setKeepAlive(config.isConnectionPoolKeepAlive());
-                        // default 6 min
-                        ds.setKeepAliveBetweenTimeMillis(config.getConnectionPoolMaxLifeTime() / 5L);
-                        druidDataSource = ds;
-                        JdbcDataSource.getDataSource().putSource(druidDataSourceKey, ds);
+                        if (config.isConnectionPoolKeepAlive()) {
+                            ds.setKeepaliveTime(config.getConnectionPoolMaxLifeTime() / 5L); // default 6 min
+                        }
+                        hikariDataSource = ds;
+                        JdbcDataSource.getDataSource().putSource(hikariDataSourceKey, ds);
                         LOG.info("JdbcClient set"
                                 + " ConnectionPoolMinSize = " + config.getConnectionPoolMinSize()
                                 + ", ConnectionPoolMaxSize = " + config.getConnectionPoolMaxSize()
@@ -351,7 +345,7 @@ private void init(JdbcDataSourceConfig config, String sql) throws UdfRuntimeExce
             }
 
             long start = System.currentTimeMillis();
-            conn = druidDataSource.getConnection();
+            conn = hikariDataSource.getConnection();
             LOG.info("get connection [" + (config.getJdbcUrl() + config.getJdbcUser()) + "] cost: " + (
                     System.currentTimeMillis() - start)
                     + " ms");
@@ -366,11 +360,13 @@ private void init(JdbcDataSourceConfig config, String sql) throws UdfRuntimeExce
             throw new UdfRuntimeException("FileNotFoundException failed: ", e);
         } catch (Exception e) {
             throw new UdfRuntimeException("Initialize datasource failed: ", e);
+        } finally {
+            Thread.currentThread().setContextClassLoader(oldClassLoader);
         }
     }
 
-    protected void setValidationQuery(DruidDataSource ds) {
-        ds.setValidationQuery("SELECT 1");
+    protected void setValidationQuery(HikariDataSource ds) {
+        ds.setConnectionTestQuery("SELECT 1");
     }
 
     protected void initializeStatement(Connection conn, JdbcDataSourceConfig config, String sql) throws SQLException {
diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/DB2JdbcExecutor.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/DB2JdbcExecutor.java
index 2c20757ef8f276..a95970af23f8d7 100644
--- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/DB2JdbcExecutor.java
+++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/DB2JdbcExecutor.java
@@ -22,7 +22,7 @@
 import org.apache.doris.common.jni.vec.ColumnValueConverter;
 import org.apache.doris.common.jni.vec.VectorTable;
 
-import com.alibaba.druid.pool.DruidDataSource;
+import com.zaxxer.hikari.HikariDataSource;
 
 import java.math.BigDecimal;
 import java.sql.Date;
@@ -37,8 +37,8 @@ public DB2JdbcExecutor(byte[] thriftParams) throws Exception {
     }
 
     @Override
-    protected void setValidationQuery(DruidDataSource ds) {
-        ds.setValidationQuery("select 1 from sysibm.sysdummy1");
+    protected void setValidationQuery(HikariDataSource ds) {
+        ds.setConnectionTestQuery("select 1 from sysibm.sysdummy1");
     }
 
     @Override
diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/DefaultJdbcExecutor.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/DefaultJdbcExecutor.java
index ef29ad17b2e645..fb296d0aaedada 100644
--- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/DefaultJdbcExecutor.java
+++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/DefaultJdbcExecutor.java
@@ -29,7 +29,6 @@
 import org.apache.doris.thrift.TJdbcOperation;
 import org.apache.doris.thrift.TOdbcTableType;
 
-import com.alibaba.druid.pool.DruidDataSource;
 import com.clickhouse.data.value.UnsignedByte;
 import com.clickhouse.data.value.UnsignedInteger;
 import com.clickhouse.data.value.UnsignedLong;
@@ -37,6 +36,7 @@
 import com.google.common.base.Preconditions;
 import com.google.common.util.concurrent.MoreExecutors;
 import com.vesoft.nebula.client.graph.data.ValueWrapper;
+import com.zaxxer.hikari.HikariDataSource;
 import org.apache.log4j.Logger;
 import org.apache.thrift.TDeserializer;
 import org.apache.thrift.TException;
@@ -93,8 +93,8 @@ public class DefaultJdbcExecutor {
     private int batchSizeNum = 0;
     private int curBlockRows = 0;
     private static final byte[] emptyBytes = new byte[0];
-    private DruidDataSource druidDataSource = null;
-    private final byte[] druidDataSourceLock = new byte[0];
+    private HikariDataSource hikariDataSource = null;
+    private final byte[] hikariDataSourceLock = new byte[0];
     private TOdbcTableType tableType;
     private JdbcDataSourceConfig config;
 
@@ -148,10 +148,10 @@ public void close() throws Exception {
                 closeResources(resultSet, stmt, conn);
             }
         } finally {
-            if (config.getConnectionPoolMinSize() == 0 && druidDataSource != null) {
-                druidDataSource.close();
+            if (config.getConnectionPoolMinSize() == 0 && hikariDataSource != null) {
+                hikariDataSource.close();
                 JdbcDataSource.getDataSource().getSourcesMap().remove(config.createCacheKey());
-                druidDataSource = null;
+                hikariDataSource = null;
             }
         }
     }
@@ -186,10 +186,10 @@ public boolean abortReadConnection(Connection connection, ResultSet resultSet, T
     }
 
     public void cleanDataSource() {
-        if (druidDataSource != null) {
-            druidDataSource.close();
+        if (hikariDataSource != null) {
+            hikariDataSource.close();
             JdbcDataSource.getDataSource().getSourcesMap().remove(config.createCacheKey());
-            druidDataSource = null;
+            hikariDataSource = null;
         }
     }
 
@@ -341,7 +341,8 @@ public boolean hasNext() throws UdfRuntimeException {
     }
 
     private void init(JdbcDataSourceConfig config, String sql) throws UdfRuntimeException {
-        String druidDataSourceKey = config.createCacheKey();
+        ClassLoader oldClassLoader = Thread.currentThread().getContextClassLoader();
+        String hikariDataSourceKey = config.createCacheKey();
         try {
             if (isNebula()) {
                 batchSizeNum = config.getBatchSize();
@@ -352,36 +353,29 @@ private void init(JdbcDataSourceConfig config, String sql) throws UdfRuntimeExce
             } else {
                 ClassLoader parent = getClass().getClassLoader();
                 ClassLoader classLoader = UdfUtils.getClassLoader(config.getJdbcDriverUrl(), parent);
-                druidDataSource = JdbcDataSource.getDataSource().getSource(druidDataSourceKey);
-                if (druidDataSource == null) {
-                    synchronized (druidDataSourceLock) {
-                        druidDataSource = JdbcDataSource.getDataSource().getSource(druidDataSourceKey);
-                        if (druidDataSource == null) {
+                hikariDataSource = JdbcDataSource.getDataSource().getSource(hikariDataSourceKey);
+                if (hikariDataSource == null) {
+                    synchronized (hikariDataSourceLock) {
+                        hikariDataSource = JdbcDataSource.getDataSource().getSource(hikariDataSourceKey);
+                        if (hikariDataSource == null) {
                             long start = System.currentTimeMillis();
-                            DruidDataSource ds = new DruidDataSource();
-                            ds.setDriverClassLoader(classLoader);
+                            Thread.currentThread().setContextClassLoader(classLoader);
+                            HikariDataSource ds = new HikariDataSource();
                             ds.setDriverClassName(config.getJdbcDriverClass());
-                            ds.setUrl(SecurityChecker.getInstance().getSafeJdbcUrl(config.getJdbcUrl()));
+                            ds.setJdbcUrl(SecurityChecker.getInstance().getSafeJdbcUrl(config.getJdbcUrl()));
                             ds.setUsername(config.getJdbcUser());
                             ds.setPassword(config.getJdbcPassword());
-                            ds.setMinIdle(config.getConnectionPoolMinSize()); // default 1
-                            ds.setInitialSize(config.getConnectionPoolMinSize()); // default 1
-                            ds.setMaxActive(config.getConnectionPoolMaxSize()); // default 10
-                            ds.setMaxWait(config.getConnectionPoolMaxWaitTime()); // default 5000
-                            ds.setTestWhileIdle(true);
-                            ds.setTestOnBorrow(false);
+                            ds.setMinimumIdle(config.getConnectionPoolMinSize()); // default 1
+                            ds.setMaximumPoolSize(config.getConnectionPoolMaxSize()); // default 10
+                            ds.setConnectionTimeout(config.getConnectionPoolMaxWaitTime()); // default 5000
+                            ds.setMaxLifetime(config.getConnectionPoolMaxLifeTime()); // default 30 min
+                            ds.setIdleTimeout(config.getConnectionPoolMaxLifeTime() / 2L); // default 15 min
                             setValidationQuery(ds, config.getTableType());
-                            // default 3 min
-                            ds.setTimeBetweenEvictionRunsMillis(config.getConnectionPoolMaxLifeTime() / 10L);
-                            // default 15 min
-                            ds.setMinEvictableIdleTimeMillis(config.getConnectionPoolMaxLifeTime() / 2L);
-                            // default 30 min
-                            ds.setMaxEvictableIdleTimeMillis(config.getConnectionPoolMaxLifeTime());
-                            ds.setKeepAlive(config.isConnectionPoolKeepAlive());
-                            // default 6 min
-                            ds.setKeepAliveBetweenTimeMillis(config.getConnectionPoolMaxLifeTime() / 5L);
-                            druidDataSource = ds;
-                            JdbcDataSource.getDataSource().putSource(druidDataSourceKey, ds);
+                            if (config.isConnectionPoolKeepAlive()) {
+                                ds.setKeepaliveTime(config.getConnectionPoolMaxLifeTime() / 5L); // default 6 min
+                            }
+                            hikariDataSource = ds;
+                            JdbcDataSource.getDataSource().putSource(hikariDataSourceKey, ds);
                             LOG.info("JdbcClient set"
                                     + " ConnectionPoolMinSize = " + config.getConnectionPoolMinSize()
                                     + ", ConnectionPoolMaxSize = " + config.getConnectionPoolMaxSize()
@@ -395,7 +389,7 @@ private void init(JdbcDataSourceConfig config, String sql) throws UdfRuntimeExce
                 }
 
                 long start = System.currentTimeMillis();
-                conn = druidDataSource.getConnection();
+                conn = hikariDataSource.getConnection();
                 LOG.info("get connection [" + (config.getJdbcUrl() + config.getJdbcUser()) + "] cost: " + (
                         System.currentTimeMillis() - start)
                         + " ms");
@@ -422,16 +416,18 @@ private void init(JdbcDataSourceConfig config, String sql) throws UdfRuntimeExce
             throw new UdfRuntimeException("FileNotFoundException failed: ", e);
         } catch (Exception e) {
             throw new UdfRuntimeException("Initialize datasource failed: ", e);
+        } finally {
+            Thread.currentThread().setContextClassLoader(oldClassLoader);
         }
     }
 
-    private void setValidationQuery(DruidDataSource ds, TOdbcTableType tableType) {
+    private void setValidationQuery(HikariDataSource ds, TOdbcTableType tableType) {
         if (tableType == TOdbcTableType.ORACLE || tableType == TOdbcTableType.OCEANBASE_ORACLE) {
-            ds.setValidationQuery("SELECT 1 FROM dual");
+            ds.setConnectionTestQuery("SELECT 1 FROM dual");
         } else if (tableType == TOdbcTableType.SAP_HANA) {
-            ds.setValidationQuery("SELECT 1 FROM DUMMY");
+            ds.setConnectionTestQuery("SELECT 1 FROM DUMMY");
         } else {
-            ds.setValidationQuery("SELECT 1");
+            ds.setConnectionTestQuery("SELECT 1");
         }
     }
 
diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcDataSource.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcDataSource.java
index 3c8ac38cf7dcd7..447566596a7e92 100644
--- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcDataSource.java
+++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcDataSource.java
@@ -17,7 +17,7 @@
 
 package org.apache.doris.jdbc;
 
-import com.alibaba.druid.pool.DruidDataSource;
+import com.zaxxer.hikari.HikariDataSource;
 import org.apache.log4j.Logger;
 
 import java.util.Map;
@@ -30,7 +30,7 @@
 public class JdbcDataSource {
     private static final Logger LOG = Logger.getLogger(JdbcDataSource.class);
     private static final JdbcDataSource jdbcDataSource = new JdbcDataSource();
-    private final Map<String, DruidDataSource> sourcesMap = new ConcurrentHashMap<>();
+    private final Map<String, HikariDataSource> sourcesMap = new ConcurrentHashMap<>();
     private final Map<String, Long> lastAccessTimeMap = new ConcurrentHashMap<>();
     private final ScheduledExecutorService executor = Executors.newSingleThreadScheduledExecutor();
     private long cleanupInterval = 8 * 60 * 60 * 1000; // 8 hours
@@ -44,17 +44,17 @@ public static JdbcDataSource getDataSource() {
         return jdbcDataSource;
     }
 
-    public DruidDataSource getSource(String cacheKey) {
+    public HikariDataSource getSource(String cacheKey) {
         lastAccessTimeMap.put(cacheKey, System.currentTimeMillis());
         return sourcesMap.get(cacheKey);
     }
 
-    public void putSource(String cacheKey, DruidDataSource ds) {
+    public void putSource(String cacheKey, HikariDataSource ds) {
         sourcesMap.put(cacheKey, ds);
         lastAccessTimeMap.put(cacheKey, System.currentTimeMillis());
     }
 
-    public Map<String, DruidDataSource> getSourcesMap() {
+    public Map<String, HikariDataSource> getSourcesMap() {
         return sourcesMap;
     }
 
@@ -72,7 +72,7 @@ private synchronized void restartCleanupTask() {
                 long now = System.currentTimeMillis();
                 lastAccessTimeMap.forEach((key, lastAccessTime) -> {
                     if (now - lastAccessTime > cleanupInterval) {
-                        DruidDataSource ds = sourcesMap.remove(key);
+                        HikariDataSource ds = sourcesMap.remove(key);
                         if (ds != null) {
                             ds.close();
                         }
diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/OracleJdbcExecutor.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/OracleJdbcExecutor.java
index 9ea902efb12bd1..0c29ac440e5c5c 100644
--- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/OracleJdbcExecutor.java
+++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/OracleJdbcExecutor.java
@@ -22,7 +22,7 @@
 import org.apache.doris.common.jni.vec.ColumnValueConverter;
 import org.apache.doris.common.jni.vec.VectorTable;
 
-import com.alibaba.druid.pool.DruidDataSource;
+import com.zaxxer.hikari.HikariDataSource;
 import org.apache.log4j.Logger;
 
 import java.math.BigDecimal;
@@ -40,8 +40,8 @@ public OracleJdbcExecutor(byte[] thriftParams) throws Exception {
     }
 
     @Override
-    protected void setValidationQuery(DruidDataSource ds) {
-        ds.setValidationQuery("SELECT 1 FROM dual");
+    protected void setValidationQuery(HikariDataSource ds) {
+        ds.setConnectionTestQuery("SELECT 1 FROM dual");
     }
 
     @Override
diff --git a/fe/be-java-extensions/preload-extensions/pom.xml b/fe/be-java-extensions/preload-extensions/pom.xml
index 4952824b118b87..31ce08db1b9917 100644
--- a/fe/be-java-extensions/preload-extensions/pom.xml
+++ b/fe/be-java-extensions/preload-extensions/pom.xml
@@ -208,8 +208,8 @@ under the License.
             <artifactId>ojdbc8</artifactId>
         </dependency>
         <dependency>
-            <groupId>com.alibaba</groupId>
-            <artifactId>druid</artifactId>
+            <groupId>com.zaxxer</groupId>
+            <artifactId>HikariCP</artifactId>
         </dependency>
         <dependency>
             <groupId>com.clickhouse</groupId>
diff --git a/fe/fe-core/pom.xml b/fe/fe-core/pom.xml
index 9c7ea07e8ad5bc..a9f429a0c7a425 100644
--- a/fe/fe-core/pom.xml
+++ b/fe/fe-core/pom.xml
@@ -663,10 +663,9 @@ under the License.
             <version>${antlr4.version}</version>
         </dependency>
 
-        <!-- https://mvnrepository.com/artifact/com.alibaba/druid -->
         <dependency>
-            <groupId>com.alibaba</groupId>
-            <artifactId>druid</artifactId>
+            <groupId>com.zaxxer</groupId>
+            <artifactId>HikariCP</artifactId>
         </dependency>
 
         <dependency>
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClient.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClient.java
index 55bfe79ef29138..b83f0263e1e9d6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClient.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClient.java
@@ -26,9 +26,9 @@
 import org.apache.doris.common.util.Util;
 import org.apache.doris.datasource.jdbc.JdbcIdentifierMapping;
 
-import com.alibaba.druid.pool.DruidDataSource;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Lists;
+import com.zaxxer.hikari.HikariDataSource;
 import lombok.Data;
 import lombok.Getter;
 import org.apache.logging.log4j.LogManager;
@@ -58,8 +58,8 @@ public abstract class JdbcClient {
     private String catalogName;
     protected String dbType;
     protected String jdbcUser;
-    protected URLClassLoader classLoader = null;
-    protected DruidDataSource dataSource = null;
+    protected ClassLoader classLoader = null;
+    protected HikariDataSource dataSource = null;
     protected boolean isOnlySpecifiedDatabase;
     protected boolean isLowerCaseMetaNames;
     protected String metaNamesMapping;
@@ -106,50 +106,36 @@ protected JdbcClient(JdbcClientConfig jdbcClientConfig) {
                 Optional.ofNullable(jdbcClientConfig.getExcludeDatabaseMap()).orElse(Collections.emptyMap());
         String jdbcUrl = jdbcClientConfig.getJdbcUrl();
         this.dbType = parseDbType(jdbcUrl);
+        initializeClassLoader(jdbcClientConfig);
         initializeDataSource(jdbcClientConfig);
         this.jdbcLowerCaseMetaMatching = new JdbcIdentifierMapping(isLowerCaseMetaNames, metaNamesMapping, this);
     }
 
-    // Initialize DruidDataSource
+    // Initialize DataSource
     private void initializeDataSource(JdbcClientConfig config) {
         ClassLoader oldClassLoader = Thread.currentThread().getContextClassLoader();
         try {
-            // TODO(ftw): The problem here is that the jar package is handled by FE
-            //  and URLClassLoader may load the jar package directly into memory
-            URL[] urls = {new URL(JdbcResource.getFullDriverUrl(config.getDriverUrl()))};
-            // set parent ClassLoader to null, we can achieve class loading isolation.
-            ClassLoader parent = getClass().getClassLoader();
-            ClassLoader classLoader = URLClassLoader.newInstance(urls, parent);
-            if (LOG.isDebugEnabled()) {
-                LOG.debug("parent ClassLoader: {}, old ClassLoader: {}, class Loader: {}.",
-                        parent, oldClassLoader, classLoader);
-            }
-            Thread.currentThread().setContextClassLoader(classLoader);
-            dataSource = new DruidDataSource();
-            dataSource.setDriverClassLoader(classLoader);
+            Thread.currentThread().setContextClassLoader(this.classLoader);
+            dataSource = new HikariDataSource();
             dataSource.setDriverClassName(config.getDriverClass());
-            dataSource.setUrl(SecurityChecker.getInstance().getSafeJdbcUrl(config.getJdbcUrl()));
+            dataSource.setJdbcUrl(SecurityChecker.getInstance().getSafeJdbcUrl(config.getJdbcUrl()));
             dataSource.setUsername(config.getUser());
             dataSource.setPassword(config.getPassword());
-            dataSource.setMinIdle(config.getConnectionPoolMinSize()); // default 1
-            dataSource.setInitialSize(config.getConnectionPoolMinSize()); // default 1
-            dataSource.setMaxActive(config.getConnectionPoolMaxSize()); // default 10
+            dataSource.setMinimumIdle(config.getConnectionPoolMinSize()); // default 1
+            dataSource.setMaximumPoolSize(config.getConnectionPoolMaxSize()); // default 10
             // set connection timeout to 5s.
             // The default is 30s, which is too long.
             // Because when querying information_schema db, BE will call thrift rpc(default timeout is 30s)
             // to FE to get schema info, and may create connection here, if we set it too long and the url is invalid,
             // it may cause the thrift rpc timeout.
-            dataSource.setMaxWait(config.getConnectionPoolMaxWaitTime()); // default 5000
-            dataSource.setTimeBetweenEvictionRunsMillis(config.getConnectionPoolMaxLifeTime() / 10L); // default 3 min
-            dataSource.setMinEvictableIdleTimeMillis(config.getConnectionPoolMaxLifeTime() / 2L); // default 15 min
-            dataSource.setMaxEvictableIdleTimeMillis(config.getConnectionPoolMaxLifeTime()); // default 30 min
+            dataSource.setConnectionTimeout(config.getConnectionPoolMaxWaitTime()); // default 5000
+            dataSource.setMaxLifetime(config.getConnectionPoolMaxLifeTime()); // default 30 min
+            dataSource.setIdleTimeout(config.getConnectionPoolMaxLifeTime() / 2L); // default 15 min
             LOG.info("JdbcClient set"
                     + " ConnectionPoolMinSize = " + config.getConnectionPoolMinSize()
                     + ", ConnectionPoolMaxSize = " + config.getConnectionPoolMaxSize()
                     + ", ConnectionPoolMaxWaitTime = " + config.getConnectionPoolMaxWaitTime()
                     + ", ConnectionPoolMaxLifeTime = " + config.getConnectionPoolMaxLifeTime());
-        } catch (MalformedURLException e) {
-            throw new JdbcClientException("MalformedURLException to load class about " + config.getDriverUrl(), e);
         } catch (Exception e) {
             throw new JdbcClientException(e.getMessage());
         } finally {
@@ -157,6 +143,16 @@ private void initializeDataSource(JdbcClientConfig config) {
         }
     }
 
+    private void initializeClassLoader(JdbcClientConfig config) {
+        try {
+            URL[] urls = {new URL(JdbcResource.getFullDriverUrl(config.getDriverUrl()))};
+            ClassLoader parent = getClass().getClassLoader();
+            this.classLoader = URLClassLoader.newInstance(urls, parent);
+        } catch (MalformedURLException e) {
+            throw new RuntimeException("Error loading JDBC driver.", e);
+        }
+    }
+
     public static String parseDbType(String jdbcUrl) {
         try {
             return JdbcResource.parseDbType(jdbcUrl);
@@ -170,13 +166,17 @@ public void closeClient() {
     }
 
     public Connection getConnection() throws JdbcClientException {
+        ClassLoader oldClassLoader = Thread.currentThread().getContextClassLoader();
         Connection conn;
         try {
+            Thread.currentThread().setContextClassLoader(this.classLoader);
             conn = dataSource.getConnection();
         } catch (Exception e) {
             String errorMessage = String.format("Can not connect to jdbc due to error: %s, Catalog name: %s",
                     e.getMessage(), this.getCatalogName());
             throw new JdbcClientException(errorMessage, e);
+        } finally {
+            Thread.currentThread().setContextClassLoader(oldClassLoader);
         }
         return conn;
     }
diff --git a/fe/pom.xml b/fe/pom.xml
index 378650ce3184c2..af36003638505c 100644
--- a/fe/pom.xml
+++ b/fe/pom.xml
@@ -228,7 +228,7 @@ under the License.
         <doris.home>${fe.dir}/../</doris.home>
         <revision>1.2-SNAPSHOT</revision>
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-        <doris.hive.catalog.shade.version>2.0.0</doris.hive.catalog.shade.version>
+        <doris.hive.catalog.shade.version>2.0.1</doris.hive.catalog.shade.version>
         <maven.compiler.source>1.8</maven.compiler.source>
         <maven.compiler.target>1.8</maven.compiler.target>
         <!--plugin parameters-->
@@ -258,7 +258,7 @@ under the License.
         <commons-io.version>2.7</commons-io.version>
         <json-simple.version>1.1.1</json-simple.version>
         <junit.version>5.8.2</junit.version>
-        <druid.version>1.2.5</druid.version>
+        <hikaricp.version>4.0.3</hikaricp.version>
         <clickhouse.version>0.4.6</clickhouse.version>
         <thrift.version>0.16.0</thrift.version>
         <tomcat-embed-core.version>8.5.86</tomcat-embed-core.version>
@@ -1552,9 +1552,9 @@ under the License.
                 </exclusions>
             </dependency>
             <dependency>
-                <groupId>com.alibaba</groupId>
-                <artifactId>druid</artifactId>
-                <version>${druid.version}</version>
+                <groupId>com.zaxxer</groupId>
+                <artifactId>HikariCP</artifactId>
+                <version>${hikaricp.version}</version>
             </dependency>
             <dependency>
                 <groupId>com.clickhouse</groupId>

From 239ed8d0863bb739b6512718f64691f89142066d Mon Sep 17 00:00:00 2001
From: Mryange <59914473+Mryange@users.noreply.github.com>
Date: Mon, 29 Apr 2024 20:58:09 +0800
Subject: [PATCH 133/163] [fix](pipelinex) exchange sink not set ready when
 source limit #34241

---
 be/src/pipeline/exec/exchange_source_operator.cpp | 6 ++++++
 be/src/vec/runtime/vdata_stream_recvr.cpp         | 9 +++++++++
 be/src/vec/runtime/vdata_stream_recvr.h           | 7 +++++++
 3 files changed, 22 insertions(+)

diff --git a/be/src/pipeline/exec/exchange_source_operator.cpp b/be/src/pipeline/exec/exchange_source_operator.cpp
index 1a891655ee8daa..116014da6f2ad2 100644
--- a/be/src/pipeline/exec/exchange_source_operator.cpp
+++ b/be/src/pipeline/exec/exchange_source_operator.cpp
@@ -22,6 +22,7 @@
 #include "pipeline/exec/operator.h"
 #include "runtime/exec_env.h"
 #include "runtime/runtime_state.h"
+#include "util/defer_op.h"
 #include "vec/common/sort/vsort_exec_exprs.h"
 #include "vec/exec/vexchange_node.h"
 #include "vec/exprs/vexpr_context.h"
@@ -139,6 +140,11 @@ Status ExchangeSourceOperatorX::open(RuntimeState* state) {
 Status ExchangeSourceOperatorX::get_block(RuntimeState* state, vectorized::Block* block,
                                           bool* eos) {
     auto& local_state = get_local_state(state);
+    Defer is_eos([&]() {
+        if (*eos) {
+            local_state.stream_recvr->set_sink_dep_always_ready();
+        }
+    });
     SCOPED_TIMER(local_state.exec_time_counter());
     if (_is_merging && !local_state.is_ready) {
         RETURN_IF_ERROR(local_state.stream_recvr->create_merger(
diff --git a/be/src/vec/runtime/vdata_stream_recvr.cpp b/be/src/vec/runtime/vdata_stream_recvr.cpp
index 3516cb35946fba..483a21e2545751 100644
--- a/be/src/vec/runtime/vdata_stream_recvr.cpp
+++ b/be/src/vec/runtime/vdata_stream_recvr.cpp
@@ -558,4 +558,13 @@ void VDataStreamRecvr::PipSenderQueue::add_block(Block* block, bool use_move) {
     }
 }
 
+void VDataStreamRecvr::set_sink_dep_always_ready() const {
+    for (auto* sender_queues : sender_queues()) {
+        auto dep = sender_queues->local_channel_dependency();
+        if (dep) {
+            dep->set_always_ready();
+        }
+    }
+}
+
 } // namespace doris::vectorized
diff --git a/be/src/vec/runtime/vdata_stream_recvr.h b/be/src/vec/runtime/vdata_stream_recvr.h
index 06b05d2a577577..3832a10c4f25df 100644
--- a/be/src/vec/runtime/vdata_stream_recvr.h
+++ b/be/src/vec/runtime/vdata_stream_recvr.h
@@ -104,6 +104,9 @@ class VDataStreamRecvr : public HasTaskExecutionCtx {
 
     void close();
 
+    // When the source reaches eos = true
+    void set_sink_dep_always_ready() const;
+
     // Careful: stream sender will call this function for a local receiver,
     // accessing members of receiver that are allocated by Object pool
     // in this function is not safe.
@@ -193,6 +196,10 @@ class VDataStreamRecvr::SenderQueue {
         _local_channel_dependency = local_channel_dependency;
     }
 
+    std::shared_ptr<pipeline::Dependency> local_channel_dependency() {
+        return _local_channel_dependency;
+    }
+
     bool should_wait();
 
     virtual Status get_batch(Block* next_block, bool* eos);

From 90815447b02d14e6d6aa09e397bc5f10df493471 Mon Sep 17 00:00:00 2001
From: zhangdong <493738387@qq.com>
Date: Mon, 29 Apr 2024 21:39:22 +0800
Subject: [PATCH 134/163] [fix](mtmv)Solving the problem of calling each other
 in toString() loops (#34277)

Change the name of the complex toString method to toInfoString() to avoid calling it incorrectly
---
 .../java/org/apache/doris/catalog/MTMV.java   | 87 +++++++++++++++----
 .../doris/job/extensions/mtmv/MTMVTask.java   |  3 -
 .../org/apache/doris/mtmv/MTMVJobInfo.java    |  4 +-
 .../doris/mtmv/MTMVMaxTimestampSnapshot.java  |  8 ++
 .../apache/doris/mtmv/MTMVPartitionInfo.java  |  4 +-
 .../mtmv/MTMVRefreshPartitionSnapshot.java    |  8 ++
 .../doris/mtmv/MTMVRefreshSnapshot.java       |  7 ++
 .../org/apache/doris/mtmv/MTMVRelation.java   |  4 +-
 .../doris/mtmv/MTMVTimestampSnapshot.java     |  7 ++
 .../doris/mtmv/MTMVVersionSnapshot.java       |  7 ++
 .../tablefunction/MetadataGenerator.java      |  2 +-
 .../java/org/apache/doris/mtmv/MTMVTest.java  | 86 ++++++++++++++++++
 12 files changed, 199 insertions(+), 28 deletions(-)
 create mode 100644 fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVTest.java

diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java
index a751cd4beec1b7..c76f1a253f2fe2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java
@@ -364,6 +364,51 @@ public Map<Long, Set<Long>> calculatePartitionMappings() throws AnalysisExceptio
         return res;
     }
 
+    // for test
+    public void setRefreshInfo(MTMVRefreshInfo refreshInfo) {
+        this.refreshInfo = refreshInfo;
+    }
+
+    // for test
+    public void setQuerySql(String querySql) {
+        this.querySql = querySql;
+    }
+
+    // for test
+    public void setStatus(MTMVStatus status) {
+        this.status = status;
+    }
+
+    // for test
+    public void setEnvInfo(EnvInfo envInfo) {
+        this.envInfo = envInfo;
+    }
+
+    // for test
+    public void setJobInfo(MTMVJobInfo jobInfo) {
+        this.jobInfo = jobInfo;
+    }
+
+    // for test
+    public void setMvProperties(Map<String, String> mvProperties) {
+        this.mvProperties = mvProperties;
+    }
+
+    // for test
+    public void setRelation(MTMVRelation relation) {
+        this.relation = relation;
+    }
+
+    // for test
+    public void setMvPartitionInfo(MTMVPartitionInfo mvPartitionInfo) {
+        this.mvPartitionInfo = mvPartitionInfo;
+    }
+
+    // for test
+    public void setRefreshSnapshot(MTMVRefreshSnapshot refreshSnapshot) {
+        this.refreshSnapshot = refreshSnapshot;
+    }
+
     public void readMvLock() {
         this.mvRwLock.readLock().lock();
     }
@@ -405,23 +450,29 @@ public void readFields(DataInput in) throws IOException {
         }
     }
 
-    @Override
-    public String toString() {
-        return "MTMV{"
-                + ", refreshInfo=" + refreshInfo
-                + ", querySql='" + querySql + '\''
-                + ", status=" + status
-                + ", envInfo=" + envInfo
-                + ", jobInfo=" + jobInfo
-                + ", mvProperties=" + mvProperties
-                + ", relation=" + relation
-                + ", mvPartitionInfo=" + mvPartitionInfo
-                + ", refreshSnapshot=" + refreshSnapshot
-                + ", cache=" + cache
-                + ", id=" + id
-                + ", name='" + name + '\''
-                + ", qualifiedDbName='" + qualifiedDbName + '\''
-                + ", comment='" + comment + '\''
-                + '}';
+    // toString() is not easy to find where to call the method
+    public String toInfoString() {
+        final StringBuilder sb = new StringBuilder("MTMV{");
+        sb.append("refreshInfo=").append(refreshInfo);
+        sb.append(", querySql='").append(querySql).append('\'');
+        sb.append(", status=").append(status);
+        sb.append(", envInfo=").append(envInfo);
+        if (jobInfo != null) {
+            sb.append(", jobInfo=").append(jobInfo.toInfoString());
+        }
+        sb.append(", mvProperties=").append(mvProperties);
+        if (relation != null) {
+            sb.append(", relation=").append(relation.toInfoString());
+        }
+        if (mvPartitionInfo != null) {
+            sb.append(", mvPartitionInfo=").append(mvPartitionInfo.toInfoString());
+        }
+        sb.append(", refreshSnapshot=").append(refreshSnapshot);
+        sb.append(", id=").append(id);
+        sb.append(", name='").append(name).append('\'');
+        sb.append(", qualifiedDbName='").append(qualifiedDbName).append('\'');
+        sb.append(", comment='").append(comment).append('\'');
+        sb.append('}');
+        return sb.toString();
     }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java b/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java
index 42560575b14156..f67144cd0d3e6f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java
@@ -441,9 +441,6 @@ public String toString() {
                 + ", needRefreshPartitions=" + needRefreshPartitions
                 + ", completedPartitions=" + completedPartitions
                 + ", refreshMode=" + refreshMode
-                + ", mtmv=" + mtmv
-                + ", relation=" + relation
-                + ", executor=" + executor
                 + "} " + super.toString();
     }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVJobInfo.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVJobInfo.java
index b9a65e4d54373c..eb79c30adf5f41 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVJobInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVJobInfo.java
@@ -56,8 +56,8 @@ public ConcurrentLinkedQueue<MTMVTask> getHistoryTasks() {
         return historyTasks;
     }
 
-    @Override
-    public String toString() {
+    // toString() is not easy to find where to call the method
+    public String toInfoString() {
         return "MTMVJobInfo{"
                 + "jobName='" + jobName + '\''
                 + ", historyTasks=" + historyTasks
diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVMaxTimestampSnapshot.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVMaxTimestampSnapshot.java
index 5b551cebef6f86..53f9df542cfbd8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVMaxTimestampSnapshot.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVMaxTimestampSnapshot.java
@@ -56,4 +56,12 @@ public boolean equals(Object o) {
     public int hashCode() {
         return Objects.hashCode(partitionId, timestamp);
     }
+
+    @Override
+    public String toString() {
+        return "MTMVMaxTimestampSnapshot{"
+                + "partitionId=" + partitionId
+                + ", timestamp=" + timestamp
+                + '}';
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionInfo.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionInfo.java
index 3a364e0749d12c..348f6e4de99671 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionInfo.java
@@ -102,8 +102,8 @@ public int getRelatedColPos() throws AnalysisException {
         return MTMVPartitionUtil.getPos(getRelatedTable(), relatedCol);
     }
 
-    @Override
-    public String toString() {
+    // toString() is not easy to find where to call the method
+    public String toInfoString() {
         return "MTMVPartitionInfo{"
                 + "partitionType=" + partitionType
                 + ", relatedTable=" + relatedTable
diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRefreshPartitionSnapshot.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRefreshPartitionSnapshot.java
index 8de2b4cdfedb6b..2336c3922ea4b1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRefreshPartitionSnapshot.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRefreshPartitionSnapshot.java
@@ -40,4 +40,12 @@ public Map<String, MTMVSnapshotIf> getPartitions() {
     public Map<Long, MTMVSnapshotIf> getTables() {
         return tables;
     }
+
+    @Override
+    public String toString() {
+        return "MTMVRefreshPartitionSnapshot{"
+                + "partitions=" + partitions
+                + ", tables=" + tables
+                + '}';
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRefreshSnapshot.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRefreshSnapshot.java
index 5132f06a12ed5f..c1d07b2704951c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRefreshSnapshot.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRefreshSnapshot.java
@@ -72,4 +72,11 @@ public void updateSnapshots(Map<String, MTMVRefreshPartitionSnapshot> addPartiti
             }
         }
     }
+
+    @Override
+    public String toString() {
+        return "MTMVRefreshSnapshot{"
+                + "partitionSnapshots=" + partitionSnapshots
+                + '}';
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRelation.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRelation.java
index d8f4d5be627cdb..b9f5350086a92b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRelation.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRelation.java
@@ -44,8 +44,8 @@ public Set<BaseTableInfo> getBaseViews() {
         return baseViews;
     }
 
-    @Override
-    public String toString() {
+    // toString() is not easy to find where to call the method
+    public String toInfoString() {
         return "MTMVRelation{"
                 + "baseTables=" + baseTables
                 + ", baseViews=" + baseViews
diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVTimestampSnapshot.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVTimestampSnapshot.java
index b3fd88a94de336..359f83abf89faf 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVTimestampSnapshot.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVTimestampSnapshot.java
@@ -48,4 +48,11 @@ public boolean equals(Object o) {
     public int hashCode() {
         return Objects.hashCode(timestamp);
     }
+
+    @Override
+    public String toString() {
+        return "MTMVTimestampSnapshot{"
+                + "timestamp=" + timestamp
+                + '}';
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVVersionSnapshot.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVVersionSnapshot.java
index 14304e601838ce..0eb7860bc54ee0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVVersionSnapshot.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVVersionSnapshot.java
@@ -44,4 +44,11 @@ public boolean equals(Object o) {
     public int hashCode() {
         return Objects.hashCode(version);
     }
+
+    @Override
+    public String toString() {
+        return "MTMVVersionSnapshot{"
+                + "version=" + version
+                + '}';
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java
index 85d9ce94cddc2c..50d97e69705ff9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java
@@ -727,7 +727,7 @@ private static TFetchSchemaTableDataResult mtmvMetadataResult(TMetadataTableRequ
                 }
                 MTMV mv = (MTMV) table;
                 if (LOG.isDebugEnabled()) {
-                    LOG.debug("mv: " + mv);
+                    LOG.debug("mv: " + mv.toInfoString());
                 }
                 TRow trow = new TRow();
                 trow.addToColumnValue(new TCell().setLongVal(mv.getId()));
diff --git a/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVTest.java b/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVTest.java
new file mode 100644
index 00000000000000..695203b9cecf7c
--- /dev/null
+++ b/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVTest.java
@@ -0,0 +1,86 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.mtmv;
+
+import org.apache.doris.catalog.MTMV;
+import org.apache.doris.job.common.IntervalUnit;
+import org.apache.doris.job.extensions.mtmv.MTMVTask;
+import org.apache.doris.mtmv.MTMVRefreshEnum.BuildMode;
+import org.apache.doris.mtmv.MTMVRefreshEnum.RefreshMethod;
+import org.apache.doris.mtmv.MTMVRefreshEnum.RefreshTrigger;
+
+import com.google.common.collect.Sets;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+
+public class MTMVTest {
+    @Test
+    public void testToInfoString() {
+        String expect
+                = "MTMV{refreshInfo=BUILD IMMEDIATE REFRESH COMPLETE ON SCHEDULE EVERY 2 SECOND STARTS ss, "
+                + "querySql='select * from xxx;', "
+                + "status=MTMVStatus{state=INIT, schemaChangeDetail='null', refreshState=INIT}, "
+                + "envInfo=EnvInfo{ctlId='1', dbId='2'}, "
+                + "jobInfo=MTMVJobInfo{jobName='job1', "
+                + "historyTasks=[MTMVTask{dbId=0, mtmvId=0, taskContext=null, "
+                + "needRefreshPartitions=null, completedPartitions=null, refreshMode=null} "
+                + "AbstractTask(jobId=null, taskId=1, status=null, createTimeMs=null, startTimeMs=null, "
+                + "finishTimeMs=null, taskType=null, errMsg=null)]}, mvProperties={}, "
+                + "relation=MTMVRelation{baseTables=[], baseViews=[]}, "
+                + "mvPartitionInfo=MTMVPartitionInfo{partitionType=null, relatedTable=null, "
+                + "relatedCol='null', partitionCol='null'}, "
+                + "refreshSnapshot=MTMVRefreshSnapshot{partitionSnapshots={}}, id=1, name='null', "
+                + "qualifiedDbName='db1', comment='comment1'}";
+        MTMV mtmv = new MTMV();
+        mtmv.setId(1L);
+        mtmv.setComment("comment1");
+        mtmv.setQualifiedDbName("db1");
+        mtmv.setRefreshInfo(buildMTMVRefreshInfo(mtmv));
+        mtmv.setQuerySql("select * from xxx;");
+        mtmv.setStatus(new MTMVStatus());
+        mtmv.setEnvInfo(new EnvInfo(1L, 2L));
+        mtmv.setJobInfo(buildMTMVJobInfo(mtmv));
+        mtmv.setMvProperties(new HashMap<>());
+        mtmv.setRelation(new MTMVRelation(Sets.newHashSet(), Sets.newHashSet()));
+        mtmv.setMvPartitionInfo(new MTMVPartitionInfo());
+        mtmv.setRefreshSnapshot(new MTMVRefreshSnapshot());
+        Assert.assertEquals(expect, mtmv.toInfoString());
+    }
+
+    private MTMVRefreshInfo buildMTMVRefreshInfo(MTMV mtmv) {
+        MTMVRefreshTriggerInfo info = new MTMVRefreshTriggerInfo(RefreshTrigger.SCHEDULE,
+                new MTMVRefreshSchedule("ss", 2,
+                        IntervalUnit.SECOND));
+        MTMVRefreshInfo mtmvRefreshInfo = new MTMVRefreshInfo(BuildMode.IMMEDIATE, RefreshMethod.COMPLETE, info);
+        return mtmvRefreshInfo;
+    }
+
+    private MTMVJobInfo buildMTMVJobInfo(MTMV mtmv) {
+        MTMVJobInfo mtmvJobInfo = new MTMVJobInfo("job1");
+        mtmvJobInfo.addHistoryTask(buildMTMVTask(mtmv));
+        return mtmvJobInfo;
+    }
+
+    private MTMVTask buildMTMVTask(MTMV mtmv) {
+        MTMVTask task = new MTMVTask(mtmv, null, null);
+        task.setTaskId(1L);
+        return task;
+    }
+}

From cb8f9e812913d4176da48a98730a2c6f28f97c44 Mon Sep 17 00:00:00 2001
From: AlexYue <yj976240184@gmail.com>
Date: Mon, 29 Apr 2024 23:19:30 +0800
Subject: [PATCH 135/163] [chore](InjectPoint) Log when entering injection
 point #34286

---
 be/src/cloud/cloud_meta_mgr.cpp      |  2 +-
 be/src/common/sync_point.h           | 12 ++++++------
 be/src/http/action/config_action.cpp |  2 +-
 be/src/io/fs/hdfs_file_writer.cpp    | 10 +++++-----
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp
index 32f9922808809d..506c57672d9ec5 100644
--- a/be/src/cloud/cloud_meta_mgr.cpp
+++ b/be/src/cloud/cloud_meta_mgr.cpp
@@ -131,7 +131,7 @@ bvar::LatencyRecorder g_cloud_commit_txn_resp_redirect_latency("cloud_table_stat
 class MetaServiceProxy {
 public:
     static Status get_client(std::shared_ptr<MetaService_Stub>* stub) {
-        SYNC_POINT_RETURN_WITH_VALUE("MetaServiceProxy::get_client", Status::OK(), stub);
+        TEST_SYNC_POINT_RETURN_WITH_VALUE("MetaServiceProxy::get_client", Status::OK(), stub);
         return get_pooled_client(stub);
     }
 
diff --git a/be/src/common/sync_point.h b/be/src/common/sync_point.h
index 9cddce96e4ca11..81764a09ee9e51 100644
--- a/be/src/common/sync_point.h
+++ b/be/src/common/sync_point.h
@@ -228,12 +228,12 @@ auto try_any_cast_ret(std::vector<std::any>& any) {
 namespace doris::config {
 extern bool enable_injection_point;
 }
-# define TEST_INJECTION_POINT(x) if (doris::config::enable_injection_point) { SYNC_POINT(x); }
-# define TEST_IDX_INJECTION_POINT(x, index) if (doris::config::enable_injection_point) { IDX_SYNC_POINT(x, index); }
-# define TEST_INJECTION_POINT_CALLBACK(x, ...) if (doris::config::enable_injection_point) { SYNC_POINT_CALLBACK(x, __VA_ARGS__); }
-# define TEST_INJECTION_POINT_SINGLETON() if (doris::config::enable_injection_point) { SYNC_POINT_SINGLETON(); }
-# define TEST_INJECTION_POINT_RETURN_WITH_VALUE(x, default_ret_val, ...) if (doris::config::enable_injection_point) { SYNC_POINT_RETURN_WITH_VALUE(x, default_ret_val, __VA_ARGS__); }
-# define TEST_INJECTION_POINT_RETURN_WITH_VOID(x, ...) if (doris::config::enable_injection_point) { SYNC_POINT_RETURN_WITH_VOID(x, __VA_ARGS__); }
+# define TEST_INJECTION_POINT(x) if (doris::config::enable_injection_point) { LOG_INFO("enter inject point {}", x); SYNC_POINT(x); }
+# define TEST_IDX_INJECTION_POINT(x, index) if (doris::config::enable_injection_point) { LOG_INFO("enter inject point {}", x); IDX_SYNC_POINT(x, index); }
+# define TEST_INJECTION_POINT_CALLBACK(x, ...) if (doris::config::enable_injection_point) { LOG_INFO("enter inject point {}", x); SYNC_POINT_CALLBACK(x, __VA_ARGS__); }
+# define TEST_INJECTION_POINT_SINGLETON() if (doris::config::enable_injection_point) { LOG_INFO("enter inject point {}", x); SYNC_POINT_SINGLETON(); }
+# define TEST_INJECTION_POINT_RETURN_WITH_VALUE(x, default_ret_val, ...) if (doris::config::enable_injection_point) { LOG_INFO("enter inject point {}", x); SYNC_POINT_RETURN_WITH_VALUE(x, default_ret_val, __VA_ARGS__); }
+# define TEST_INJECTION_POINT_RETURN_WITH_VOID(x, ...) if (doris::config::enable_injection_point) { LOG_INFO("enter inject point {}", x); SYNC_POINT_RETURN_WITH_VOID(x, __VA_ARGS__); }
 #endif // ENABLE_INJECTION_POINT
 
 // clang-format on
diff --git a/be/src/http/action/config_action.cpp b/be/src/http/action/config_action.cpp
index a9de9dc2406b61..b855de88100269 100644
--- a/be/src/http/action/config_action.cpp
+++ b/be/src/http/action/config_action.cpp
@@ -62,7 +62,7 @@ void ConfigAction::handle_show_config(HttpRequest* req) {
 
     writer.StartArray();
     for (const auto& _config : config_info) {
-        if (conf_item != nullptr || conf_item != "") {
+        if (conf_item != nullptr || !conf_item.empty()) {
             if (_config[0] == conf_item) {
                 writer.StartArray();
                 for (const std::string& config_filed : _config) {
diff --git a/be/src/io/fs/hdfs_file_writer.cpp b/be/src/io/fs/hdfs_file_writer.cpp
index 9ea66ca4da13eb..54e609a040bda0 100644
--- a/be/src/io/fs/hdfs_file_writer.cpp
+++ b/be/src/io/fs/hdfs_file_writer.cpp
@@ -185,11 +185,11 @@ Status HdfsFileWriter::append_hdfs_file(std::string_view content) {
             written_bytes =
                     hdfsWrite(_hdfs_handler->hdfs_fs, _hdfs_file, content.data(), content.size());
             {
-                [[maybe_unused]] Status error_ret = Status::InternalError(
-                        "write hdfs failed. fs_name: {}, path: {}, error: size exceeds", _fs_name,
-                        _path.native());
-                TEST_INJECTION_POINT_RETURN_WITH_VALUE("HdfsFileWriter::append_hdfs_file_error",
-                                                       error_ret);
+                TEST_INJECTION_POINT_RETURN_WITH_VALUE(
+                        "HdfsFileWriter::append_hdfs_file_error",
+                        Status::InternalError(
+                                "write hdfs failed. fs_name: {}, path: {}, error: inject error",
+                                _fs_name, _path.native()));
             }
         }
         if (written_bytes < 0) {

From 619f83a677d2c4b714a4a2cd83ffe5e0d4c5bb66 Mon Sep 17 00:00:00 2001
From: Jerry Hu <mrhhsg@gmail.com>
Date: Tue, 30 Apr 2024 08:20:43 +0800
Subject: [PATCH 136/163] [improvement](join) Avoid merging blocks more than
 once on the build side (#34291)

---
 be/src/pipeline/exec/hashjoin_build_sink.cpp | 32 ++++++++++++++------
 be/src/pipeline/exec/hashjoin_build_sink.h   |  4 +++
 2 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/be/src/pipeline/exec/hashjoin_build_sink.cpp b/be/src/pipeline/exec/hashjoin_build_sink.cpp
index 3bec2b07f0babe..4331fd8efcb374 100644
--- a/be/src/pipeline/exec/hashjoin_build_sink.cpp
+++ b/be/src/pipeline/exec/hashjoin_build_sink.cpp
@@ -535,23 +535,37 @@ Status HashJoinBuildSinkOperatorX::sink(RuntimeState* state, vectorized::Block*
             RETURN_IF_ERROR(local_state._do_evaluate(*in_block, local_state._build_expr_ctxs,
                                                      *local_state._build_expr_call_timer,
                                                      res_col_ids));
-
-            SCOPED_TIMER(local_state._build_side_merge_block_timer);
-            RETURN_IF_ERROR(local_state._build_side_mutable_block.merge_ignore_overflow(*in_block));
-            COUNTER_UPDATE(local_state._build_blocks_memory_usage, in_block->bytes());
-            local_state._mem_tracker->consume(in_block->bytes());
-            if (local_state._build_side_mutable_block.rows() >
-                std::numeric_limits<uint32_t>::max()) {
+            local_state._build_side_rows += in_block->rows();
+            if (local_state._build_side_rows > std::numeric_limits<uint32_t>::max()) {
                 return Status::NotSupported(
-                        "Hash join do not support build table rows"
-                        " over:" +
+                        "Hash join do not support build table rows over: {}, you should enable "
+                        "join spill to avoid this issue",
                         std::to_string(std::numeric_limits<uint32_t>::max()));
             }
+
+            local_state._mem_tracker->consume(in_block->bytes());
+            COUNTER_UPDATE(local_state._build_blocks_memory_usage, in_block->bytes());
+            local_state._build_blocks.emplace_back(std::move(*in_block));
         }
     }
 
     if (local_state._should_build_hash_table && eos) {
         DCHECK(!local_state._build_side_mutable_block.empty());
+
+        for (auto& column : local_state._build_side_mutable_block.mutable_columns()) {
+            column->reserve(local_state._build_side_rows);
+        }
+
+        {
+            SCOPED_TIMER(local_state._build_side_merge_block_timer);
+            for (auto& block : local_state._build_blocks) {
+                RETURN_IF_ERROR(local_state._build_side_mutable_block.merge_ignore_overflow(block));
+
+                vectorized::Block temp;
+                std::swap(block, temp);
+            }
+        }
+
         local_state._shared_state->build_block = std::make_shared<vectorized::Block>(
                 local_state._build_side_mutable_block.to_block());
 
diff --git a/be/src/pipeline/exec/hashjoin_build_sink.h b/be/src/pipeline/exec/hashjoin_build_sink.h
index f675ab4620962a..0d40d3d49f090b 100644
--- a/be/src/pipeline/exec/hashjoin_build_sink.h
+++ b/be/src/pipeline/exec/hashjoin_build_sink.h
@@ -77,6 +77,10 @@ class HashJoinBuildSinkLocalState final
     bool _should_build_hash_table = true;
     int64_t _build_side_mem_used = 0;
     int64_t _build_side_last_mem_used = 0;
+
+    size_t _build_side_rows = 0;
+    std::vector<vectorized::Block> _build_blocks;
+
     vectorized::MutableBlock _build_side_mutable_block;
     std::shared_ptr<VRuntimeFilterSlots> _runtime_filter_slots;
     bool _has_set_need_null_map_for_build = false;

From 27e77a7d2a642a4555a45a394cd2814de3b6f87e Mon Sep 17 00:00:00 2001
From: AlexYue <yj976240184@gmail.com>
Date: Tue, 30 Apr 2024 10:04:05 +0800
Subject: [PATCH 137/163] [enhance](Cloud) Use http action to fetch instance's
 enable storage vault field instead of using regression's conf (#34252)

---
 .../org/apache/doris/regression/Config.groovy | 10 -------
 .../doris/regression/ConfigOptions.groovy     |  9 ------
 .../doris/regression/suite/Suite.groovy       | 29 ++++++++++++++++++-
 3 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy
index 85bae191f7634b..cb75a43836c153 100644
--- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy
+++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy
@@ -69,7 +69,6 @@ class Config {
     public String realDataPath
     public String cacheDataPath
     public boolean enableCacheData
-    public boolean enableStorageVault
     public String pluginPath
     public String sslCertificatePath
     public String dorisComposePath
@@ -165,7 +164,6 @@ class Config {
             String realDataPath,
             String cacheDataPath,
             Boolean enableCacheData,
-            Boolean enableStorageVault,
             String testGroups,
             String excludeGroups,
             String testSuites, 
@@ -218,7 +216,6 @@ class Config {
         this.realDataPath = realDataPath
         this.cacheDataPath = cacheDataPath
         this.enableCacheData = enableCacheData
-        this.enableStorageVault = enableStorageVault
         this.testGroups = testGroups
         this.excludeGroups = excludeGroups
         this.testSuites = testSuites
@@ -273,7 +270,6 @@ class Config {
         config.realDataPath = FileUtils.getCanonicalPath(cmd.getOptionValue(realDataOpt, config.realDataPath))
         config.cacheDataPath = cmd.getOptionValue(cacheDataOpt, config.cacheDataPath)
         config.enableCacheData = Boolean.parseBoolean(cmd.getOptionValue(enableCacheDataOpt, config.enableCacheData.toString()))
-        config.enableStorageVault = Boolean.parseBoolean(cmd.getOptionValue(enableStorageVaultOpt, config.enableStorageVault.toString()))
         config.pluginPath = FileUtils.getCanonicalPath(cmd.getOptionValue(pluginOpt, config.pluginPath))
         config.sslCertificatePath = FileUtils.getCanonicalPath(cmd.getOptionValue(sslCertificateOpt, config.sslCertificatePath))
         config.dorisComposePath = FileUtils.getCanonicalPath(config.dorisComposePath)
@@ -498,7 +494,6 @@ class Config {
             configToString(obj.realDataPath),
             configToString(obj.cacheDataPath),
             configToBoolean(obj.enableCacheData),
-            configToBoolean(obj.enableStorageVault),
             configToString(obj.testGroups),
             configToString(obj.excludeGroups),
             configToString(obj.testSuites),
@@ -724,11 +719,6 @@ class Config {
             log.info("Set enableCacheData to '${config.enableCacheData}' because not specify.".toString())
         }
 
-        if (config.enableStorageVault == null) {
-            config.enableStorageVault = true
-            log.info("Set enableStorageVault to '${config.enableStorageVault}' because not specify.".toString())
-        }
-
         if (config.pluginPath == null) {
             config.pluginPath = "regression-test/plugins"
             log.info("Set dataPath to '${config.pluginPath}' because not specify.".toString())
diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/ConfigOptions.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/ConfigOptions.groovy
index c76560ba7455c6..b1a782da94b656 100644
--- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/ConfigOptions.groovy
+++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/ConfigOptions.groovy
@@ -52,7 +52,6 @@ class ConfigOptions {
     static Option realDataOpt
     static Option cacheDataOpt
     static Option enableCacheDataOpt
-    static Option enableStorageVaultOpt
     static Option pluginOpt
     static Option sslCertificateOpt
     static Option imageOpt
@@ -184,14 +183,6 @@ class ConfigOptions {
                 .longOpt("enableCacheData")
                 .desc("enable caches data for stream load from s3")
                 .build()
-        enableStorageVaultOpt = Option.builder("ESV")
-                .argName("enableStorageVault")
-                .required(false)
-                .hasArg(true)
-                .type(String.class)
-                .longOpt("enableStorageVault")
-                .desc("does cloud mode enable storage vault")
-                .build()
         pluginOpt = Option.builder("plugin")
                 .argName("pluginPath")
                 .required(false)
diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
index 4971f6876a0ce5..7e5877086ea9f8 100644
--- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
+++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
@@ -1127,7 +1127,34 @@ class Suite implements GroovyInterceptable {
     }
 
     boolean enableStoragevault() {
-        return isCloudMode() && context.config.enableStorageVault;
+        if (context.config.metaServiceHttpAddress == null || context.config.metaServiceHttpAddress.isEmpty() ||
+                context.config.metaServiceHttpAddress == null || context.config.metaServiceHttpAddress.isEmpty() ||
+                    context.config.instanceId == null || context.config.instanceId.isEmpty() ||
+                        context.config.metaServiceToken == null || context.config.metaServiceToken.isEmpty()) {
+            return false;
+        }
+        def getInstanceInfo = { check_func ->
+            httpTest {
+                endpoint context.config.metaServiceHttpAddress
+                uri "/MetaService/http/get_instance?token=${context.config.metaServiceToken}&instance_id=${context.config.instanceId}"
+                op "get"
+                check check_func
+            }
+        }
+        boolean enableStorageVault = false;
+        getInstanceInfo.call() {
+            respCode, body ->
+                String respCodeValue = "${respCode}".toString();
+                if (!respCodeValue.equals("200")) {
+                    return;
+                }
+                def json = parseJson(body)
+                if (json.result.containsKey("enableStorageVault") && json.result.enableStorageVault == "true") {
+                    enableStorageVault = true;
+                }
+                
+        }
+        return enableStorageVault;
     }
 
     String getFeConfig(String key) {

From d5e122a1205378eec48b66ee9e14ad2b50ef0f7b Mon Sep 17 00:00:00 2001
From: Gavin Chou <gavineaglechou@gmail.com>
Date: Tue, 30 Apr 2024 10:10:30 +0800
Subject: [PATCH 138/163] [chore](cloud) Sync cloud modification (#34213)

* (selectdb-cloud) Print key if not found

* (selectdb-cloud) Change flush_thread_num_per_store=24 vertical_compaction_max_row_source_memory_mb=1024 by default (#3121)

* (selectdb-cloud) Optimize regression test of analyze_stats
---
 be/src/common/config.cpp                               |  2 +-
 cloud/src/meta-service/meta_service.cpp                |  7 ++++---
 regression-test/suites/statistics/analyze_stats.groovy | 10 ++++++++++
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 3dd170557c907d..c7258d0f1c6402 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -356,7 +356,7 @@ DEFINE_mBool(enable_ordered_data_compaction, "true");
 // In vertical compaction, column number for every group
 DEFINE_mInt32(vertical_compaction_num_columns_per_group, "5");
 // In vertical compaction, max memory usage for row_source_buffer
-DEFINE_Int32(vertical_compaction_max_row_source_memory_mb, "200");
+DEFINE_Int32(vertical_compaction_max_row_source_memory_mb, "1024");
 // In vertical compaction, max dest segment file size
 DEFINE_mInt64(vertical_compaction_max_segment_size, "1073741824");
 
diff --git a/cloud/src/meta-service/meta_service.cpp b/cloud/src/meta-service/meta_service.cpp
index 8bdcc0581b0afa..6cc37f788570d9 100644
--- a/cloud/src/meta-service/meta_service.cpp
+++ b/cloud/src/meta-service/meta_service.cpp
@@ -175,19 +175,20 @@ void get_tablet_idx(MetaServiceCode& code, std::string& msg, Transaction* txn,
         } else {
             code = cast_as<ErrCategory::READ>(err);
         }
-        msg = fmt::format("failed to get tablet_idx, err={} tablet_id={} ", err, tablet_id);
+        msg = fmt::format("failed to get tablet_idx, err={} tablet_id={} key={}", err, tablet_id,
+                          hex(key));
         return;
     }
     if (!tablet_idx.ParseFromString(val)) [[unlikely]] {
         code = MetaServiceCode::PROTOBUF_PARSE_ERR;
-        msg = fmt::format("malformed tablet index value, key={}", hex(key));
+        msg = fmt::format("malformed tablet index value, tablet_id={} key={}", tablet_id, hex(key));
         return;
     }
     if (tablet_id != tablet_idx.tablet_id()) [[unlikely]] {
         code = MetaServiceCode::UNDEFINED_ERR;
         msg = "internal error";
         LOG(WARNING) << "unexpected error given_tablet_id=" << tablet_id
-                     << " idx_pb_tablet_id=" << tablet_idx.tablet_id();
+                     << " idx_pb_tablet_id=" << tablet_idx.tablet_id() << " key=" << hex(key);
         return;
     }
 }
diff --git a/regression-test/suites/statistics/analyze_stats.groovy b/regression-test/suites/statistics/analyze_stats.groovy
index bcbec86b1a77b9..efedd76108b5fc 100644
--- a/regression-test/suites/statistics/analyze_stats.groovy
+++ b/regression-test/suites/statistics/analyze_stats.groovy
@@ -1149,6 +1149,11 @@ PARTITION `p599` VALUES IN (599)
     sql """ANALYZE TABLE test_updated_rows WITH SYNC"""
     sql """ INSERT INTO test_updated_rows VALUES('1',1,1); """
     def cnt1 = sql """ SHOW TABLE STATS test_updated_rows """
+    for (int i = 0; i < 10; ++i) {
+      if (Integer.valueOf(cnt1[0][0]) == 8) break;
+      Thread.sleep(1000) // rows updated report is async
+      cnt1 = sql """ SHOW TABLE STATS test_updated_rows """
+    }
     assertEquals(Integer.valueOf(cnt1[0][0]), 1)
     sql """ANALYZE TABLE test_updated_rows WITH SYNC"""
     sql """ INSERT INTO test_updated_rows SELECT * FROM test_updated_rows """
@@ -1156,6 +1161,11 @@ PARTITION `p599` VALUES IN (599)
     sql """ INSERT INTO test_updated_rows SELECT * FROM test_updated_rows """
     sql """ANALYZE TABLE test_updated_rows WITH SYNC"""
     def cnt2 = sql """ SHOW TABLE STATS test_updated_rows """
+    for (int i = 0; i < 10; ++i) {
+      if (Integer.valueOf(cnt2[0][0]) == 8) break;
+      Thread.sleep(1000) // rows updated report is async
+      cnt2 = sql """ SHOW TABLE STATS test_updated_rows """
+    }
     assertTrue(Integer.valueOf(cnt2[0][0]) == 0 || Integer.valueOf(cnt2[0][0]) == 8)
 
     // test analyze specific column

From 00c76d18c8b7dc3e93c9acbdd96d7cbc86f3252f Mon Sep 17 00:00:00 2001
From: Lei Zhang <27994433+SWJTU-ZhangLei@users.noreply.github.com>
Date: Tue, 30 Apr 2024 10:11:34 +0800
Subject: [PATCH 139/163] [feature](merge-cloud) Add show data warehouse stmt
 (#34068)

---
 fe/fe-core/src/main/cup/sql_parser.cup        |   6 +-
 .../apache/doris/analysis/ShowDataStmt.java   |  88 +++++++++++++-
 .../doris/datasource/InternalCatalog.java     |   8 ++
 .../doris/analysis/ShowDataStmtTest.java      |   6 +-
 .../data/show_p0/test_show_data_warehouse.out |  14 +++
 .../show_p0/test_show_data_warehouse.groovy   | 108 ++++++++++++++++++
 6 files changed, 223 insertions(+), 7 deletions(-)
 create mode 100644 regression-test/data/show_p0/test_show_data_warehouse.out
 create mode 100644 regression-test/suites/show_p0/test_show_data_warehouse.groovy

diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup
index e822ffb1508fd8..29bf94b04ee4d6 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -4369,13 +4369,13 @@ show_param ::=
         RESULT = new ShowDataSkewStmt(table_ref);
     :}
     /* Show data statement: used to show data size of specified range */
-     | KW_DATA order_by_clause:orderByClause
+     | KW_DATA order_by_clause:orderByClause opt_properties:prop
     {:
-        RESULT = new ShowDataStmt(null, orderByClause);
+        RESULT = new ShowDataStmt(null, orderByClause, prop);
     :}
     | KW_DATA KW_FROM table_name:dbTblName order_by_clause:orderByClause
     {:
-        RESULT = new ShowDataStmt(dbTblName, orderByClause);
+        RESULT = new ShowDataStmt(dbTblName, orderByClause, null);
     :}
     | opt_tmp:tmp KW_PARTITIONS KW_FROM table_name:tblName opt_wild_where order_by_clause:orderByClause limit_clause: limitClause
     {:
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowDataStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowDataStmt.java
index 97828a005f9114..6ea50ac2b72ad4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowDataStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowDataStmt.java
@@ -75,6 +75,13 @@ public class ShowDataStmt extends ShowStmt {
                     .addColumn(new Column("RemoteSize", ScalarType.createVarchar(30)))
                     .build();
 
+    private static final ShowResultSetMetaData SHOW_WAREHOUSE_DATA_META_DATA =
+            ShowResultSetMetaData.builder()
+                    .addColumn(new Column("DBName", ScalarType.createVarchar(20)))
+                    .addColumn(new Column("DataSize", ScalarType.createVarchar(20)))
+                    .addColumn(new Column("RecycleSize", ScalarType.createVarchar(20)))
+                    .build();
+
     private static final ShowResultSetMetaData SHOW_INDEX_DATA_META_DATA =
             ShowResultSetMetaData.builder()
                     .addColumn(new Column("TableName", ScalarType.createVarchar(20)))
@@ -101,15 +108,24 @@ public class ShowDataStmt extends ShowStmt {
     private List<OrderByElement> orderByElements;
     private List<OrderByPair> orderByPairs;
 
-    public ShowDataStmt(TableName tableName, List<OrderByElement> orderByElements) {
+    private final Map<String, String> properties;
+
+    private static final String WAREHOUSE = "entire_warehouse";
+    private static final String DB_LIST = "db_names";
+
+    public ShowDataStmt(TableName tableName, List<OrderByElement> orderByElements, Map<String, String> properties) {
         this.tableName = tableName;
         this.totalRows = Lists.newArrayList();
         this.orderByElements = orderByElements;
+        this.properties = properties;
     }
 
     @Override
     public void analyze(Analyzer analyzer) throws UserException {
         super.analyze(analyzer);
+        if (getDbStatsByProperties()) {
+            return;
+        }
         dbName = analyzer.getDefaultDb();
         if (Strings.isNullOrEmpty(dbName)) {
             getAllDbStats();
@@ -387,6 +403,14 @@ public List<List<String>> getResultRows() throws AnalysisException {
 
     @Override
     public ShowResultSetMetaData getMetaData() {
+        String value = null;
+        if (properties != null) {
+            value = properties.get(WAREHOUSE);
+        }
+        if (value != null && value.equals("true")) {
+            return SHOW_WAREHOUSE_DATA_META_DATA;
+        }
+
         if (Strings.isNullOrEmpty(dbName)) {
             return SHOW_DATABASE_DATA_META_DATA;
         }
@@ -424,6 +448,68 @@ public String toString() {
         return toSql();
     }
 
+    private boolean getDbStatsByProperties() {
+        if (properties == null) {
+            return false;
+        }
+        String value = properties.get(WAREHOUSE);
+        if (value != null && value.equals("true")) {
+            List<String> dbList = null;
+            String dbNames = properties.get(DB_LIST);
+            if (dbNames != null) {
+                dbList = Arrays.asList(dbNames.split(","));
+            }
+            Map<String, Long> dbToDataSize = Env.getCurrentInternalCatalog().getUsedDataQuota();
+            Map<Long, Pair<Long, Long>> dbToRecycleSize = Env.getCurrentRecycleBin().getDbToRecycleSize();
+            Long total = 0L;
+            Long totalRecycleSize = 0L;
+            if (dbList == null) {
+                for (Map.Entry<String, Long> pair : dbToDataSize.entrySet()) {
+                    Database db = Env.getCurrentInternalCatalog().getDbNullable(pair.getKey());
+                    if (db == null) {
+                        continue;
+                    }
+                    Long recycleSize = dbToRecycleSize.getOrDefault(db.getId(), Pair.of(0L, 0L)).first;
+                    List<String> result = Arrays.asList(db.getName(),
+                            String.valueOf(pair.getValue()), String.valueOf(recycleSize));
+                    totalRows.add(result);
+                    total += pair.getValue();
+                    totalRecycleSize += recycleSize;
+                    dbToRecycleSize.remove(db.getId());
+                }
+
+                // Append left database in recycle bin
+                for (Map.Entry<Long, Pair<Long, Long>> entry : dbToRecycleSize.entrySet()) {
+                    List<String> result = Arrays.asList("NULL:" + entry.getKey(),
+                            "0", String.valueOf(entry.getValue().first));
+                    totalRows.add(result);
+                    totalRecycleSize += entry.getValue().first;
+                }
+            } else {
+                for (String databaseName : Env.getCurrentInternalCatalog().getDbNames()) {
+                    Database db = Env.getCurrentInternalCatalog().getDbNullable(databaseName);
+                    if (db == null) {
+                        continue;
+                    }
+                    if (!dbList.contains(db.getName())) {
+                        continue;
+                    }
+                    Long recycleSize = dbToRecycleSize.getOrDefault(db.getId(), Pair.of(0L, 0L)).first;
+                    Long dataSize = dbToDataSize.getOrDefault(databaseName, 0L);
+                    List<String> result =
+                            Arrays.asList(db.getName(), String.valueOf(dataSize), String.valueOf(recycleSize));
+                    totalRows.add(result);
+                    total += dataSize;
+                    totalRecycleSize += recycleSize;
+                }
+            }
+            List<String> result = Arrays.asList("total", String.valueOf(total), String.valueOf(totalRecycleSize));
+            totalRows.add(result);
+            return true;
+        }
+        return false;
+    }
+
     private void getAllDbStats() throws AnalysisException {
         // check auth
         if (!Env.getCurrentEnv().getAccessManager().checkGlobalPriv(ConnectContext.get(), PrivPredicate.ADMIN)) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
index 98e837de34203c..e91b508487965d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
@@ -3569,4 +3569,12 @@ public void replayAutoIncrementIdUpdateLog(AutoIncrementIdUpdateLog log) throws
     public boolean enableAutoAnalyze() {
         return true;
     }
+
+    public Map<String, Long> getUsedDataQuota() {
+        Map<String, Long> dbToDataSize = new TreeMap<>();
+        for (Database db : this.idToDb.values()) {
+            dbToDataSize.put(db.getFullName(), db.getUsedDataQuotaWithLock());
+        }
+        return dbToDataSize;
+    }
 }
diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/ShowDataStmtTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/ShowDataStmtTest.java
index 633294d5e20c6a..9b421de0c397bd 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/analysis/ShowDataStmtTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/ShowDataStmtTest.java
@@ -139,7 +139,7 @@ public void setUp() throws UserException {
 
     @Test
     public void testNormal() throws AnalysisException, UserException {
-        ShowDataStmt stmt = new ShowDataStmt(null, null);
+        ShowDataStmt stmt = new ShowDataStmt(null, null, null);
         stmt.analyze(analyzer);
         Assert.assertEquals("SHOW DATA", stmt.toString());
         Assert.assertEquals(4, stmt.getMetaData().getColumnCount());
@@ -151,7 +151,7 @@ public void testNormal() throws AnalysisException, UserException {
         OrderByElement orderByElementTwo = new OrderByElement(slotRefTwo, false, false);
 
         stmt = new ShowDataStmt(new TableName(InternalCatalog.INTERNAL_CATALOG_NAME, "testDb", "test_tbl"),
-                Arrays.asList(orderByElementOne, orderByElementTwo));
+                Arrays.asList(orderByElementOne, orderByElementTwo), null);
         stmt.analyze(analyzer);
         Assert.assertEquals(
                 "SHOW DATA FROM `testDb`.`test_tbl` ORDER BY `ReplicaCount` DESC, `Size` DESC",
@@ -159,7 +159,7 @@ public void testNormal() throws AnalysisException, UserException {
         Assert.assertEquals(6, stmt.getMetaData().getColumnCount());
         Assert.assertEquals(true, stmt.hasTable());
 
-        stmt = new ShowDataStmt(null, Arrays.asList(orderByElementOne, orderByElementTwo));
+        stmt = new ShowDataStmt(null, Arrays.asList(orderByElementOne, orderByElementTwo), null);
         stmt.analyze(analyzer);
         Assert.assertEquals("SHOW DATA ORDER BY `ReplicaCount` DESC, `Size` DESC",
                 stmt.toString());
diff --git a/regression-test/data/show_p0/test_show_data_warehouse.out b/regression-test/data/show_p0/test_show_data_warehouse.out
new file mode 100644
index 00000000000000..ce415488232ee8
--- /dev/null
+++ b/regression-test/data/show_p0/test_show_data_warehouse.out
@@ -0,0 +1,14 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !show_1 --
+SHOW_DATA_1	785	0
+total	785	0
+
+-- !show_2 --
+SHOW_DATA_2	762	0
+total	762	0
+
+-- !show_3 --
+SHOW_DATA_1	785	0
+SHOW_DATA_2	762	0
+total	1547	0
+
diff --git a/regression-test/suites/show_p0/test_show_data_warehouse.groovy b/regression-test/suites/show_p0/test_show_data_warehouse.groovy
new file mode 100644
index 00000000000000..97835dc97aa5ab
--- /dev/null
+++ b/regression-test/suites/show_p0/test_show_data_warehouse.groovy
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_show_data_warehouse") {
+  sql """ DROP DATABASE IF EXISTS SHOW_DATA_1; """
+  sql """ DROP DATABASE IF EXISTS SHOW_DATA_2; """
+  sql """ CREATE DATABASE SHOW_DATA_1; """
+  sql """ CREATE DATABASE SHOW_DATA_2; """
+
+  sql """ USE SHOW_DATA_1; """
+
+  sql """ CREATE TABLE `table` (
+    `siteid` int(11) NOT NULL COMMENT "",
+    `citycode` int(11) NOT NULL COMMENT "",
+    `userid` int(11) NOT NULL COMMENT "",
+    `pv` int(11) NOT NULL COMMENT ""
+  ) ENGINE=OLAP
+  DUPLICATE KEY(`siteid`)
+  COMMENT "OLAP"
+  DISTRIBUTED BY HASH(`siteid`) BUCKETS 1
+  PROPERTIES("replication_num" = "1"); """
+
+  sql """ insert into `table` values
+        (9,10,11,12),
+        (9,10,11,12),
+        (1,2,3,4),
+        (13,21,22,16),
+        (13,14,15,16),
+        (17,18,19,20),
+        (1,2,3,4),
+        (13,21,22,16),
+        (13,14,15,16),
+        (17,18,19,20),
+        (5,6,7,8),
+        (5,6,7,8); """
+
+  sql """ USE SHOW_DATA_2; """
+
+  sql """ CREATE TABLE `table` (
+  `siteid` int(11) NOT NULL COMMENT "",
+  `citycode` int(11) NOT NULL COMMENT "",
+  `userid` int(11) NOT NULL COMMENT "",
+  `pv` int(11) NOT NULL COMMENT ""
+  ) ENGINE=OLAP
+  DUPLICATE KEY(`siteid`)
+  COMMENT "OLAP"
+  DISTRIBUTED BY HASH(`siteid`) BUCKETS 1
+  PROPERTIES("replication_num" = "1"); """
+
+  sql """ insert into `table` values
+        (9,10,11,12),
+        (9,10,11,12),
+        (1,2,3,4),
+        (13,21,22,16),
+        (13,14,15,16); """
+  
+  // wait for heartbeat
+
+  long start = System.currentTimeMillis()
+  long dataSize = 0
+  long current = -1
+  do {
+    current = System.currentTimeMillis()
+    def res = sql """ show data properties("entire_warehouse"="true","db_names"="SHOW_DATA_1"); """
+    for (row : res) {
+      print row
+      if (row[0].toString() == "SHOW_DATA_1") {
+        dataSize = row[1].toInteger()
+      }
+    }
+    sleep(2000)
+  } while (dataSize == 0 && current - start < 600000)
+
+  qt_show_1 """ show data properties("entire_warehouse"="true","db_names"="SHOW_DATA_1"); """
+
+  qt_show_2 """ show data properties("entire_warehouse"="true","db_names"="SHOW_DATA_2"); """
+
+  qt_show_3 """ show data properties("entire_warehouse"="true","db_names"="SHOW_DATA_1,SHOW_DATA_2"); """
+
+  def result = sql """show data properties("entire_warehouse"="true")"""
+
+  assertTrue(result.size() >= 3)
+
+  sql """ DROP DATABASE IF EXISTS SHOW_DATA_1; """
+  result = sql """show data properties("entire_warehouse"="true")"""
+  assertTrue(result.size() > 0)
+  for (row : result) {
+    if (row[0].toString().equalsIgnoreCase("total")) {
+      assertTrue(row[2].toInteger() > 0)
+    }
+  }
+
+
+}

From db46888eed8f6d65adda69e99b145d055ab1f522 Mon Sep 17 00:00:00 2001
From: Gavin Chou <gavineaglechou@gmail.com>
Date: Tue, 30 Apr 2024 10:14:44 +0800
Subject: [PATCH 140/163] [chore](collaborator) Readd gavinchou (#34273)

---
 .asf.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.asf.yaml b/.asf.yaml
index ba63dcdcc09eec..fbd16fadcbc4f1 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -111,6 +111,7 @@ github:
     - wm1581066
     - KassieZ
     - yujun777
+    - gavinchou
 
 notifications:
     pullrequests_status:  commits@doris.apache.org

From c0300dcb4b37b93baf66543a3a06fb56ebe796dd Mon Sep 17 00:00:00 2001
From: morrySnow <101034200+morrySnow@users.noreply.github.com>
Date: Tue, 30 Apr 2024 10:19:17 +0800
Subject: [PATCH 141/163] [chore](planner) remove useless
 BuiltinAggregateFunction (#34306)

---
 .../analysis/BuiltinAggregateFunction.java    | 151 ------------------
 .../doris/analysis/FunctionCallExpr.java      |   5 -
 2 files changed, 156 deletions(-)
 delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/analysis/BuiltinAggregateFunction.java

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/BuiltinAggregateFunction.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/BuiltinAggregateFunction.java
deleted file mode 100644
index f9113d93489543..00000000000000
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/BuiltinAggregateFunction.java
+++ /dev/null
@@ -1,151 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-package org.apache.doris.analysis;
-
-import org.apache.doris.catalog.Function;
-import org.apache.doris.catalog.ScalarType;
-import org.apache.doris.catalog.Type;
-import org.apache.doris.thrift.TAggregateFunction;
-import org.apache.doris.thrift.TAggregationOp;
-import org.apache.doris.thrift.TFunction;
-import org.apache.doris.thrift.TFunctionBinaryType;
-
-import com.google.common.base.Preconditions;
-
-import java.util.ArrayList;
-
-/**
- * Internal representation of a builtin aggregate function.
- */
-public class BuiltinAggregateFunction extends Function {
-    private final Operator op;
-    // this is to judge the analytic function
-    private boolean isAnalyticFn = false;
-
-    public boolean isAnalyticFn() {
-        return isAnalyticFn;
-    }
-
-    // TODO: this is not used yet until the planner understand this.
-    private org.apache.doris.catalog.Type intermediateType;
-    private boolean reqIntermediateTuple = false;
-
-    public boolean isReqIntermediateTuple() {
-        return reqIntermediateTuple;
-    }
-
-    public BuiltinAggregateFunction(Operator op, ArrayList<Type> argTypes,
-            Type retType, org.apache.doris.catalog.Type intermediateType, boolean isAnalyticFn) {
-        super(FunctionName.createBuiltinName(op.toString()), argTypes, retType, false);
-        Preconditions.checkState(intermediateType != null);
-        Preconditions.checkState(op != null);
-        // may be no need to analyze
-        // intermediateType.analyze();
-        this.op = op;
-        this.intermediateType = intermediateType;
-        if (isAnalyticFn && !intermediateType.equals(retType)) {
-            reqIntermediateTuple = true;
-        }
-        setBinaryType(TFunctionBinaryType.BUILTIN);
-        this.isAnalyticFn = isAnalyticFn;
-    }
-
-    @Override
-    public TFunction toThrift(Type realReturnType, Type[] realArgTypes, Boolean[] realArgTypeNullables) {
-        TFunction fn = super.toThrift(realReturnType, realArgTypes, realArgTypeNullables);
-        // TODO: for now, just put the op_ enum as the id.
-        if (op == BuiltinAggregateFunction.Operator.FIRST_VALUE_REWRITE) {
-            fn.setId(0);
-        } else {
-            fn.setId(op.thriftOp.ordinal());
-        }
-        fn.setAggregateFn(new TAggregateFunction(intermediateType.toThrift()));
-        return fn;
-    }
-
-    public Operator op() {
-        return op;
-    }
-
-    public org.apache.doris.catalog.Type getIntermediateType() {
-        return intermediateType;
-    }
-
-    public void setIntermediateType(org.apache.doris.catalog.Type t) {
-        intermediateType = t;
-    }
-
-    // TODO: this is effectively a catalog of builtin aggregate functions.
-    // We should move this to something in the catalog instead of having it
-    // here like this.
-    public enum Operator {
-        COUNT("COUNT", TAggregationOp.COUNT, Type.BIGINT),
-        MIN("MIN", TAggregationOp.MIN, null),
-        MAX("MAX", TAggregationOp.MAX, null),
-        DISTINCT_PC("DISTINCT_PC", TAggregationOp.DISTINCT_PC, ScalarType.createVarcharType(64)),
-        DISTINCT_PCSA("DISTINCT_PCSA", TAggregationOp.DISTINCT_PCSA, ScalarType.createVarcharType(64)),
-        SUM("SUM", TAggregationOp.SUM, null),
-        AVG("AVG", TAggregationOp.INVALID, null),
-        GROUP_CONCAT("GROUP_CONCAT", TAggregationOp.GROUP_CONCAT, ScalarType.createVarcharType(16)),
-
-        // NDV is the external facing name (i.e. queries should always be written with NDV)
-        // The current implementation of NDV is hyperloglog (but we could change this without
-        // external query changes if we find a better algorithm).
-        NDV("NDV", TAggregationOp.HLL, ScalarType.createVarcharType(64)),
-        HLL_UNION_AGG("HLL_UNION_AGG", TAggregationOp.HLL_C, ScalarType.createVarcharType(64)),
-        BITMAP_UNION("BITMAP_UNION", TAggregationOp.BITMAP_UNION, ScalarType.createVarcharType(10)),
-        COUNT_DISTINCT("COUNT_DISTINCT", TAggregationOp.COUNT_DISTINCT, Type.BIGINT),
-        SUM_DISTINCT("SUM_DISTINCT", TAggregationOp.SUM_DISTINCT, null),
-        LAG("LAG", TAggregationOp.LAG, null),
-        FIRST_VALUE("FIRST_VALUE", TAggregationOp.FIRST_VALUE, null),
-        LAST_VALUE("LAST_VALUE", TAggregationOp.LAST_VALUE, null),
-        RANK("RANK", TAggregationOp.RANK, null),
-        DENSE_RANK("DENSE_RANK", TAggregationOp.DENSE_RANK, null),
-        ROW_NUMBER("ROW_NUMBER", TAggregationOp.ROW_NUMBER, null),
-        LEAD("LEAD", TAggregationOp.LEAD, null),
-        FIRST_VALUE_REWRITE("FIRST_VALUE_REWRITE", null, null),
-        NTILE("NTILE", TAggregationOp.NTILE, null);
-
-        private final String         description;
-        private final TAggregationOp thriftOp;
-
-        // The intermediate type for this function if it is constant regardless of
-        // input type. Set to null if it can only be determined during analysis.
-        private final org.apache.doris.catalog.Type intermediateType;
-
-        Operator(String description, TAggregationOp thriftOp,
-                org.apache.doris.catalog.Type intermediateType) {
-            this.description = description;
-            this.thriftOp = thriftOp;
-            this.intermediateType = intermediateType;
-        }
-
-        @Override
-        public String toString() {
-            return description;
-        }
-
-        public TAggregationOp toThrift() {
-            return thriftOp;
-        }
-
-        public org.apache.doris.catalog.Type intermediateType() {
-            return intermediateType;
-        }
-    }
-}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
index 3978c3802bc1e4..47182ebd6d59f5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
@@ -808,11 +808,6 @@ public boolean isAggregateFunction() {
         return fn instanceof AggregateFunction && !isAnalyticFnCall;
     }
 
-    public boolean isBuiltin() {
-        Preconditions.checkState(fn != null);
-        return fn instanceof BuiltinAggregateFunction && !isAnalyticFnCall;
-    }
-
     /**
      * Returns true if this is a call to an aggregate function that returns
      * non-null on an empty input (e.g. count).

From 307afde0dba639d2ce6d2a7f86b1f71b106ba999 Mon Sep 17 00:00:00 2001
From: Ashin Gau <AshinGau@users.noreply.github.com>
Date: Tue, 30 Apr 2024 10:37:32 +0800
Subject: [PATCH 142/163] [feature](iceberg) support iceberg equality delete
 (#34223)

Support iceberg equality delete.
If there's only one delete column in delete file, use `SimpleEqualityDelete`, which uses optimized `HybridSetBase` to build the hash set. If there are more delete columns in delete file, use `MultiEqualityDelete`, which generates a hash column from all delete columns, and only compare the values when the hash values are the same.

### Remaining Issues
1. Support type changes in delete columns(todo)
2. Support column rename in delete columns(todo)
3. Test large data files with large delete files(todo)
---
 .../vec/exec/format/table/equality_delete.cpp | 160 ++++++++++++++++++
 .../vec/exec/format/table/equality_delete.h   | 105 ++++++++++++
 .../vec/exec/format/table/iceberg_reader.cpp  | 103 +++++++++--
 be/src/vec/exec/format/table/iceberg_reader.h |  31 +++-
 .../iceberg/source/IcebergApiSource.java      |  10 +-
 .../iceberg/source/IcebergHMSSource.java      |  10 +-
 .../iceberg/source/IcebergScanNode.java       |   8 +-
 .../iceberg/source/IcebergSource.java         |   3 +
 gensrc/thrift/PlanNodes.thrift                |   3 +
 .../iceberg/iceberg_equality_delete.out       |  85 ++++++++++
 .../iceberg/iceberg_equality_delete.groovy    |  49 ++++++
 .../iceberg/iceberg_position_delete.groovy    |   1 +
 12 files changed, 546 insertions(+), 22 deletions(-)
 create mode 100644 be/src/vec/exec/format/table/equality_delete.cpp
 create mode 100644 be/src/vec/exec/format/table/equality_delete.h
 create mode 100644 regression-test/data/external_table_p2/iceberg/iceberg_equality_delete.out
 create mode 100644 regression-test/suites/external_table_p2/iceberg/iceberg_equality_delete.groovy

diff --git a/be/src/vec/exec/format/table/equality_delete.cpp b/be/src/vec/exec/format/table/equality_delete.cpp
new file mode 100644
index 00000000000000..94f807a408b8dd
--- /dev/null
+++ b/be/src/vec/exec/format/table/equality_delete.cpp
@@ -0,0 +1,160 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/format/table/equality_delete.h"
+
+namespace doris::vectorized {
+
+std::unique_ptr<EqualityDeleteBase> EqualityDeleteBase::get_delete_impl(Block* delete_block) {
+    if (delete_block->columns() == 1) {
+        return std::make_unique<SimpleEqualityDelete>(delete_block);
+    } else {
+        return std::make_unique<MultiEqualityDelete>(delete_block);
+    }
+}
+
+Status SimpleEqualityDelete::_build_set() {
+    COUNTER_UPDATE(num_delete_rows, _delete_block->rows());
+    if (_delete_block->columns() != 1) {
+        return Status::InternalError("Simple equality delete can be only applied with one column");
+    }
+    auto& column_and_type = _delete_block->get_by_position(0);
+    _delete_column_name = column_and_type.name;
+    _delete_column_type = remove_nullable(column_and_type.type)->get_type_as_type_descriptor().type;
+    _hybrid_set.reset(create_set(_delete_column_type, _delete_block->rows()));
+    _hybrid_set->insert_fixed_len(column_and_type.column, 0);
+    return Status::OK();
+}
+
+Status SimpleEqualityDelete::filter_data_block(Block* data_block) {
+    SCOPED_TIMER(equality_delete_time);
+    auto* column_and_type = data_block->try_get_by_name(_delete_column_name);
+    if (column_and_type == nullptr) {
+        return Status::InternalError("Can't find the delete column '{}' in data file",
+                                     _delete_column_name);
+    }
+    if (remove_nullable(column_and_type->type)->get_type_as_type_descriptor().type !=
+        _delete_column_type) {
+        return Status::InternalError("Not support type change in column '{}'", _delete_column_name);
+    }
+    size_t rows = data_block->rows();
+    // _filter: 1 => in _hybrid_set; 0 => not in _hybrid_set
+    if (_filter == nullptr) {
+        _filter = std::make_unique<IColumn::Filter>(rows, 0);
+    } else {
+        _filter->resize_fill(rows, 0);
+    }
+
+    if (column_and_type->column->is_nullable()) {
+        const NullMap& null_map =
+                reinterpret_cast<const ColumnNullable*>(column_and_type->column.get())
+                        ->get_null_map_data();
+        _hybrid_set->find_batch_nullable(
+                remove_nullable(column_and_type->column)->assume_mutable_ref(), rows, null_map,
+                *_filter.get());
+        if (_hybrid_set->contain_null()) {
+            auto* filter_data = _filter->data();
+            for (size_t i = 0; i < rows; ++i) {
+                filter_data[i] = filter_data[i] || null_map[i];
+            }
+        }
+    } else {
+        _hybrid_set->find_batch(column_and_type->column->assume_mutable_ref(), rows,
+                                *_filter.get());
+    }
+    // should reverse _filter
+    auto* filter_data = _filter->data();
+    for (size_t i = 0; i < rows; ++i) {
+        filter_data[i] = !filter_data[i];
+    }
+
+    Block::filter_block_internal(data_block, *_filter.get(), data_block->columns());
+    return Status::OK();
+}
+
+Status MultiEqualityDelete::_build_set() {
+    COUNTER_UPDATE(num_delete_rows, _delete_block->rows());
+    size_t rows = _delete_block->rows();
+    _delete_hashes.clear();
+    _delete_hashes.resize(rows, 0);
+    for (ColumnPtr column : _delete_block->get_columns()) {
+        column->update_hashes_with_value(_delete_hashes.data(), nullptr);
+    }
+    for (size_t i = 0; i < rows; ++i) {
+        _delete_hash_map.insert({_delete_hashes[i], i});
+    }
+    _data_column_index.resize(_delete_block->columns());
+    return Status::OK();
+}
+
+Status MultiEqualityDelete::filter_data_block(Block* data_block) {
+    SCOPED_TIMER(equality_delete_time);
+    size_t column_index = 0;
+    for (string column_name : _delete_block->get_names()) {
+        auto* column_and_type = data_block->try_get_by_name(column_name);
+        if (column_and_type == nullptr) {
+            return Status::InternalError("Can't find the delete column '{}' in data file",
+                                         column_name);
+        }
+        if (!_delete_block->get_by_name(column_name).type->equals(*column_and_type->type)) {
+            return Status::InternalError("Not support type change in column '{}'", column_name);
+        }
+        _data_column_index[column_index++] = data_block->get_position_by_name(column_name);
+    }
+    size_t rows = data_block->rows();
+    _data_hashes.clear();
+    _data_hashes.resize(rows, 0);
+    for (size_t index : _data_column_index) {
+        data_block->get_by_position(index).column->update_hashes_with_value(_data_hashes.data(),
+                                                                            nullptr);
+    }
+
+    if (_filter == nullptr) {
+        _filter = std::make_unique<IColumn::Filter>(rows, 1);
+    } else {
+        _filter->resize_fill(rows, 1);
+    }
+    auto* filter_data = _filter->data();
+    for (size_t i = 0; i < rows; ++i) {
+        for (auto beg = _delete_hash_map.lower_bound(_data_hashes[i]),
+                  end = _delete_hash_map.upper_bound(_data_hashes[i]);
+             beg != end; ++beg) {
+            if (_equal(data_block, i, beg->second)) {
+                filter_data[i] = 0;
+                break;
+            }
+        }
+    }
+
+    Block::filter_block_internal(data_block, *_filter.get(), data_block->columns());
+    return Status::OK();
+}
+
+bool MultiEqualityDelete::_equal(Block* data_block, size_t data_row_index,
+                                 size_t delete_row_index) {
+    for (size_t i = 0; i < _delete_block->columns(); ++i) {
+        ColumnPtr data_col = data_block->get_by_position(_data_column_index[i]).column;
+        ColumnPtr delete_col = _delete_block->get_by_position(i).column;
+        if (data_col->compare_at(data_row_index, delete_row_index, delete_col->assume_mutable_ref(),
+                                 -1) != 0) {
+            return false;
+        }
+    }
+    return true;
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/exec/format/table/equality_delete.h b/be/src/vec/exec/format/table/equality_delete.h
new file mode 100644
index 00000000000000..6ac7d05a4c9818
--- /dev/null
+++ b/be/src/vec/exec/format/table/equality_delete.h
@@ -0,0 +1,105 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exprs/create_predicate_function.h"
+#include "util/runtime_profile.h"
+#include "vec/core/block.h"
+
+namespace doris::vectorized {
+
+/**
+ * Support Iceberg equality delete.
+ * If there's only one delete column in delete file, use `SimpleEqualityDelete`,
+ * which uses optimized `HybridSetBase` to build the hash set.
+ * If there are more delete columns in delete file, use `MultiEqualityDelete`,
+ * which generates a hash column from all delete columns, and only compare the values
+ * when the hash values are the same.
+ */
+class EqualityDeleteBase {
+protected:
+    RuntimeProfile::Counter* num_delete_rows;
+    RuntimeProfile::Counter* build_set_time;
+    RuntimeProfile::Counter* equality_delete_time;
+
+    Block* _delete_block;
+
+    virtual Status _build_set() = 0;
+
+public:
+    EqualityDeleteBase(Block* delete_block) : _delete_block(delete_block) {}
+    virtual ~EqualityDeleteBase() = default;
+
+    Status init(RuntimeProfile* profile) {
+        static const char* delete_profile = "EqualityDelete";
+        ADD_TIMER_WITH_LEVEL(profile, delete_profile, 1);
+        num_delete_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "NumRowsInDeleteFile", TUnit::UNIT,
+                                                       delete_profile, 1);
+        build_set_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "BuildHashSetTime", delete_profile, 1);
+        equality_delete_time =
+                ADD_CHILD_TIMER_WITH_LEVEL(profile, "EqualityDeleteFilterTime", delete_profile, 1);
+        SCOPED_TIMER(build_set_time);
+        return _build_set();
+    }
+
+    virtual Status filter_data_block(Block* data_block) = 0;
+
+    static std::unique_ptr<EqualityDeleteBase> get_delete_impl(Block* delete_block);
+};
+
+class SimpleEqualityDelete : public EqualityDeleteBase {
+protected:
+    std::shared_ptr<HybridSetBase> _hybrid_set;
+    std::string _delete_column_name;
+    PrimitiveType _delete_column_type;
+    std::unique_ptr<IColumn::Filter> _filter;
+
+    Status _build_set() override;
+
+public:
+    SimpleEqualityDelete(Block* delete_block) : EqualityDeleteBase(delete_block) {}
+
+    Status filter_data_block(Block* data_block) override;
+};
+
+/**
+ * `MultiEqualityDelete` will generate the hash column for delete block and data block.
+ */
+class MultiEqualityDelete : public EqualityDeleteBase {
+protected:
+    // hash column for delete block
+    std::vector<uint64_t> _delete_hashes;
+    // hash column for data block
+    std::vector<uint64_t> _data_hashes;
+    // hash code => row index
+    // if hash values are equal, then compare the real values
+    // the row index records the row number of the delete row in delete block
+    std::multimap<uint64_t, size_t> _delete_hash_map;
+    // the delete column indexes in data block
+    std::vector<size_t> _data_column_index;
+    std::unique_ptr<IColumn::Filter> _filter;
+
+    Status _build_set() override;
+
+    bool _equal(Block* data_block, size_t data_row_index, size_t delete_row_index);
+
+public:
+    MultiEqualityDelete(Block* delete_block) : EqualityDeleteBase(delete_block) {}
+
+    Status filter_data_block(Block* data_block) override;
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/exec/format/table/iceberg_reader.cpp b/be/src/vec/exec/format/table/iceberg_reader.cpp
index 7e5a5bf6db7df2..12ef77f59d0fad 100644
--- a/be/src/vec/exec/format/table/iceberg_reader.cpp
+++ b/be/src/vec/exec/format/table/iceberg_reader.cpp
@@ -142,6 +142,11 @@ Status IcebergTableReader::get_next_block(Block* block, size_t* read_rows, bool*
         }
         block->initialize_index_by_name();
     }
+
+    if (_equality_delete_impl != nullptr) {
+        RETURN_IF_ERROR(_equality_delete_impl->filter_data_block(block));
+        *read_rows = block->rows();
+    }
     return res;
 }
 
@@ -173,23 +178,97 @@ Status IcebergTableReader::init_row_filters(const TFileRangeDesc& range) {
     if (version < MIN_SUPPORT_DELETE_FILES_VERSION) {
         return Status::OK();
     }
-    auto& delete_file_type = table_desc.content;
-    const std::vector<TIcebergDeleteFileDesc>& files = table_desc.delete_files;
-    if (files.empty()) {
-        return Status::OK();
+
+    std::vector<TIcebergDeleteFileDesc> position_delete_files;
+    std::vector<TIcebergDeleteFileDesc> equality_delete_files;
+    for (const TIcebergDeleteFileDesc& desc : table_desc.delete_files) {
+        if (desc.content == POSITION_DELETE) {
+            position_delete_files.emplace_back(desc);
+        } else if (desc.content == EQUALITY_DELETE) {
+            equality_delete_files.emplace_back(desc);
+        }
     }
 
-    if (delete_file_type == POSITION_DELETE) {
-        RETURN_IF_ERROR(_position_delete_base(files));
-    } else if (delete_file_type == EQUALITY_DELETE) {
-        // todo: equality delete
-        //       If it is a count operation and it has equality delete file kind,
-        //       the push down operation of the count for this split needs to be canceled.
-        return Status::NotSupported("NOT SUPPORT EQUALITY_DELETE!");
+    if (position_delete_files.size() > 0) {
+        RETURN_IF_ERROR(_position_delete_base(position_delete_files));
+    }
+    if (equality_delete_files.size() > 0) {
+        RETURN_IF_ERROR(_equality_delete_base(equality_delete_files));
     }
-    COUNTER_UPDATE(_iceberg_profile.num_delete_files, files.size());
+
+    COUNTER_UPDATE(_iceberg_profile.num_delete_files, table_desc.delete_files.size());
     return Status::OK();
 }
+
+Status IcebergTableReader::_equality_delete_base(
+        const std::vector<TIcebergDeleteFileDesc>& delete_files) {
+    bool init_schema = false;
+    std::vector<std::string> equality_delete_col_names;
+    std::vector<TypeDescriptor> equality_delete_col_types;
+    std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
+            partition_columns;
+    std::unordered_map<std::string, VExprContextSPtr> missing_columns;
+    std::vector<std::string> not_in_file_col_names;
+
+    for (auto& delete_file : delete_files) {
+        TFileRangeDesc delete_desc;
+        // must use __set() method to make sure __isset is true
+        delete_desc.__set_fs_name(_range.fs_name);
+        delete_desc.path = delete_file.path;
+        delete_desc.start_offset = 0;
+        delete_desc.size = -1;
+        delete_desc.file_size = -1;
+        std::unique_ptr<GenericReader> delete_reader = _create_equality_reader(delete_desc);
+        if (!init_schema) {
+            RETURN_IF_ERROR(delete_reader->get_parsed_schema(&equality_delete_col_names,
+                                                             &equality_delete_col_types));
+            _generate_equality_delete_block(&_equality_delete_block, equality_delete_col_names,
+                                            equality_delete_col_types);
+            init_schema = true;
+        }
+        if (auto* parquet_reader = typeid_cast<ParquetReader*>(delete_reader.get())) {
+            RETURN_IF_ERROR(parquet_reader->open());
+            RETURN_IF_ERROR(parquet_reader->init_reader(equality_delete_col_names,
+                                                        not_in_file_col_names, nullptr, {}, nullptr,
+                                                        nullptr, nullptr, nullptr, nullptr, false));
+        } else if (auto* orc_reader = typeid_cast<OrcReader*>(delete_reader.get())) {
+            RETURN_IF_ERROR(orc_reader->init_reader(&equality_delete_col_names, nullptr, {}, false,
+                                                    {}, {}, nullptr, nullptr));
+        } else {
+            return Status::InternalError("Unsupported format of delete file");
+        }
+
+        RETURN_IF_ERROR(delete_reader->set_fill_columns(partition_columns, missing_columns));
+
+        bool eof = false;
+        while (!eof) {
+            Block block;
+            _generate_equality_delete_block(&block, equality_delete_col_names,
+                                            equality_delete_col_types);
+            size_t read_rows = 0;
+            RETURN_IF_ERROR(delete_reader->get_next_block(&block, &read_rows, &eof));
+            if (read_rows > 0) {
+                MutableBlock mutable_block(&_equality_delete_block);
+                RETURN_IF_ERROR(mutable_block.merge(block));
+            }
+        }
+    }
+    _equality_delete_impl = EqualityDeleteBase::get_delete_impl(&_equality_delete_block);
+    return _equality_delete_impl->init(_profile);
+}
+
+void IcebergTableReader::_generate_equality_delete_block(
+        Block* block, const std::vector<std::string>& equality_delete_col_names,
+        const std::vector<TypeDescriptor>& equality_delete_col_types) {
+    for (int i = 0; i < equality_delete_col_names.size(); ++i) {
+        DataTypePtr data_type =
+                DataTypeFactory::instance().create_data_type(equality_delete_col_types[i], true);
+        MutableColumnPtr data_column = data_type->create_column();
+        block->insert(ColumnWithTypeAndName(std::move(data_column), data_type,
+                                            equality_delete_col_names[i]));
+    }
+}
+
 Status IcebergTableReader::_position_delete_base(
         const std::vector<TIcebergDeleteFileDesc>& delete_files) {
     std::string data_file_path = _range.path;
diff --git a/be/src/vec/exec/format/table/iceberg_reader.h b/be/src/vec/exec/format/table/iceberg_reader.h
index 50c8d31bed9915..81c5613d6817ea 100644
--- a/be/src/vec/exec/format/table/iceberg_reader.h
+++ b/be/src/vec/exec/format/table/iceberg_reader.h
@@ -37,7 +37,9 @@
 #include "vec/columns/column_dictionary.h"
 #include "vec/exec/format/orc/vorc_reader.h"
 #include "vec/exec/format/parquet/vparquet_reader.h"
+#include "vec/exec/format/table/equality_delete.h"
 #include "vec/exprs/vslot_ref.h"
+
 namespace tparquet {
 class KeyValue;
 } // namespace tparquet
@@ -92,8 +94,6 @@ class IcebergTableReader : public TableFormatReader {
     Status get_columns(std::unordered_map<std::string, TypeDescriptor>* name_to_type,
                        std::unordered_set<std::string>* missing_cols) final;
 
-    Status _position_delete_base(const std::vector<TIcebergDeleteFileDesc>& delete_files);
-
     enum { DATA, POSITION_DELETE, EQUALITY_DELETE };
     enum Fileformat { NONE, PARQUET, ORC, AVRO };
 
@@ -130,6 +130,14 @@ class IcebergTableReader : public TableFormatReader {
     void _gen_new_colname_to_value_range();
     static std::string _delet_file_cache_key(const std::string& path) { return "delete_" + path; }
 
+    Status _position_delete_base(const std::vector<TIcebergDeleteFileDesc>& delete_files);
+    Status _equality_delete_base(const std::vector<TIcebergDeleteFileDesc>& delete_files);
+    virtual std::unique_ptr<GenericReader> _create_equality_reader(
+            const TFileRangeDesc& delete_desc) = 0;
+    void _generate_equality_delete_block(
+            Block* block, const std::vector<std::string>& equality_delete_col_names,
+            const std::vector<TypeDescriptor>& equality_delete_col_types);
+
     RuntimeProfile* _profile;
     RuntimeState* _state;
     const TFileScanRangeParams& _params;
@@ -175,6 +183,10 @@ class IcebergTableReader : public TableFormatReader {
 
     void _gen_position_delete_file_range(Block& block, DeleteFile* const position_delete,
                                          size_t read_rows, bool file_path_column_dictionary_coded);
+
+    // equality delete
+    Block _equality_delete_block;
+    std::unique_ptr<EqualityDeleteBase> _equality_delete_impl;
 };
 
 class IcebergParquetReader final : public IcebergTableReader {
@@ -206,6 +218,14 @@ class IcebergParquetReader final : public IcebergTableReader {
     }
 
     Status _gen_col_name_maps(std::vector<tparquet::KeyValue> parquet_meta_kv);
+
+protected:
+    std::unique_ptr<GenericReader> _create_equality_reader(
+            const TFileRangeDesc& delete_desc) override {
+        return ParquetReader::create_unique(
+                _profile, _params, delete_desc, READ_DELETE_FILE_BATCH_SIZE,
+                const_cast<cctz::time_zone*>(&_state->timezone_obj()), _io_ctx, _state);
+    }
 };
 class IcebergOrcReader final : public IcebergTableReader {
 public:
@@ -238,6 +258,13 @@ class IcebergOrcReader final : public IcebergTableReader {
 
     Status _gen_col_name_maps(OrcReader* orc_reader);
 
+protected:
+    std::unique_ptr<GenericReader> _create_equality_reader(
+            const TFileRangeDesc& delete_desc) override {
+        return OrcReader::create_unique(_profile, _state, _params, delete_desc,
+                                        READ_DELETE_FILE_BATCH_SIZE, _state->timezone(), _io_ctx);
+    }
+
 private:
     const std::string ICEBERG_ORC_ATTRIBUTE = "iceberg.id";
 };
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergApiSource.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergApiSource.java
index 9c2334383f2c30..42e0d709e05757 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergApiSource.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergApiSource.java
@@ -61,8 +61,14 @@ public TupleDescriptor getDesc() {
 
     @Override
     public String getFileFormat() {
-        return originTable.properties()
-            .getOrDefault(TableProperties.DEFAULT_FILE_FORMAT, TableProperties.DEFAULT_FILE_FORMAT_DEFAULT);
+        Map<String, String> properties = originTable.properties();
+        if (properties.containsKey(TableProperties.DEFAULT_FILE_FORMAT)) {
+            return properties.get(TableProperties.DEFAULT_FILE_FORMAT);
+        }
+        if (properties.containsKey(FLINK_WRITE_FORMAT)) {
+            return properties.get(FLINK_WRITE_FORMAT);
+        }
+        return TableProperties.DEFAULT_FILE_FORMAT_DEFAULT;
     }
 
     @Override
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergHMSSource.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergHMSSource.java
index 0f197678dadc97..632120e5c452ec 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergHMSSource.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergHMSSource.java
@@ -59,8 +59,14 @@ public TupleDescriptor getDesc() {
 
     @Override
     public String getFileFormat() throws DdlException, MetaNotFoundException {
-        return hmsTable.getRemoteTable().getParameters()
-            .getOrDefault(TableProperties.DEFAULT_FILE_FORMAT, TableProperties.DEFAULT_FILE_FORMAT_DEFAULT);
+        Map<String, String> properties = hmsTable.getRemoteTable().getParameters();
+        if (properties.containsKey(TableProperties.DEFAULT_FILE_FORMAT)) {
+            return properties.get(TableProperties.DEFAULT_FILE_FORMAT);
+        }
+        if (properties.containsKey(FLINK_WRITE_FORMAT)) {
+            return properties.get(FLINK_WRITE_FORMAT);
+        }
+        return TableProperties.DEFAULT_FILE_FORMAT_DEFAULT;
     }
 
     public org.apache.iceberg.Table getIcebergTable() throws MetaNotFoundException {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
index 21826dfd8d53eb..ab8b889fdc59d6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
@@ -153,7 +153,6 @@ public void setIcebergParams(TFileRangeDesc rangeDesc, IcebergSplit icebergSplit
                 Path splitDeletePath = locationPath.toStorageLocation();
                 deleteFileDesc.setPath(splitDeletePath.toString());
                 if (filter instanceof IcebergDeleteFileFilter.PositionDelete) {
-                    fileDesc.setContent(FileContent.POSITION_DELETES.id());
                     IcebergDeleteFileFilter.PositionDelete positionDelete =
                             (IcebergDeleteFileFilter.PositionDelete) filter;
                     OptionalLong lowerBound = positionDelete.getPositionLowerBound();
@@ -164,11 +163,12 @@ public void setIcebergParams(TFileRangeDesc rangeDesc, IcebergSplit icebergSplit
                     if (upperBound.isPresent()) {
                         deleteFileDesc.setPositionUpperBound(upperBound.getAsLong());
                     }
+                    deleteFileDesc.setContent(FileContent.POSITION_DELETES.id());
                 } else {
-                    fileDesc.setContent(FileContent.EQUALITY_DELETES.id());
                     IcebergDeleteFileFilter.EqualityDelete equalityDelete =
                             (IcebergDeleteFileFilter.EqualityDelete) filter;
                     deleteFileDesc.setFieldIds(equalityDelete.getFieldIds());
+                    deleteFileDesc.setContent(FileContent.EQUALITY_DELETES.id());
                 }
                 fileDesc.addToDeleteFiles(deleteFileDesc);
             }
@@ -327,8 +327,8 @@ private List<IcebergDeleteFileFilter> getDeleteFileFilters(FileScanTask spitTask
                 filters.add(IcebergDeleteFileFilter.createPositionDelete(delete.path().toString(),
                         positionLowerBound.orElse(-1L), positionUpperBound.orElse(-1L)));
             } else if (delete.content() == FileContent.EQUALITY_DELETES) {
-                // todo: filters.add(IcebergDeleteFileFilter.createEqualityDelete(delete.path().toString(),
-                throw new IllegalStateException("Don't support equality delete file");
+                filters.add(IcebergDeleteFileFilter.createEqualityDelete(
+                        delete.path().toString(), delete.equalityFieldIds()));
             } else {
                 throw new IllegalStateException("Unknown delete content: " + delete.content());
             }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergSource.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergSource.java
index b4b1bf2a805d12..270a4d4df18f61 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergSource.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergSource.java
@@ -27,6 +27,9 @@
 
 public interface IcebergSource {
 
+    // compatible with flink, which is "write.format.default" in spark
+    String FLINK_WRITE_FORMAT = "write-format";
+
     TupleDescriptor getDesc();
 
     org.apache.iceberg.Table getIcebergTable() throws MetaNotFoundException;
diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift
index bb9a23b18f1060..ba7e69a872a44f 100644
--- a/gensrc/thrift/PlanNodes.thrift
+++ b/gensrc/thrift/PlanNodes.thrift
@@ -292,11 +292,14 @@ struct TIcebergDeleteFileDesc {
     2: optional i64 position_lower_bound;
     3: optional i64 position_upper_bound;
     4: optional list<i32> field_ids;
+    // Iceberg file type, 0: data, 1: position delete, 2: equality delete.
+    5: optional i32 content;
 }
 
 struct TIcebergFileDesc {
     1: optional i32 format_version;
     // Iceberg file type, 0: data, 1: position delete, 2: equality delete.
+    // deprecated, a data file can have both position and delete files
     2: optional i32 content;
     // When open a delete file, filter the data file path with the 'file_path' property
     3: optional list<TIcebergDeleteFileDesc> delete_files;
diff --git a/regression-test/data/external_table_p2/iceberg/iceberg_equality_delete.out b/regression-test/data/external_table_p2/iceberg/iceberg_equality_delete.out
new file mode 100644
index 00000000000000..2f7f599929b34d
--- /dev/null
+++ b/regression-test/data/external_table_p2/iceberg/iceberg_equality_delete.out
@@ -0,0 +1,85 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !one_delete_column --
+1	Customer#000000001	IVhzIApeRb ot,c,E	151	update-phone-1	711.56	BUILDING	update-comment-1
+2	Customer#000000002	XSTf4,NCwDVaWNe6tEgvwfmRchLXak	13	23-768-687-3665	121.65	AUTOMOBILE	l accounts. blithely ironic theodolites integrate boldly: caref
+3	Customer#000000003	MG9kdTD2WBHm	1	11-719-748-3364	7498.12	AUTOMOBILE	 deposits eat slyly ironic, even instructions. express foxes detect slyly. blithely even accounts abov
+4	Customer#000000004	XxVSJsLAGtn	47	update-phone-4	15.39	MACHINERY	update-comment-2
+6	Customer#000000006	sKZz0CsnMD7mp4Xd0YrBvx,LREYKUWAh yVn	20	30-114-968-4951	7638.57	AUTOMOBILE	tions. even deposits boost according to the slyly bold packages. final accounts cajole requests. furious
+7	Customer#000000007	TcGe5gaZNgVePxU5kRrvXBfkasDTea	18	28-190-982-9759	9561.95	AUTOMOBILE	ainst the ironic, express theodolites. express, even pinto beans among the exp
+8	Customer#000000008	I0B10bB0AymmC, 0PrRYBCP1yGJ8xcBPmWhl5	17	27-147-574-9335	6819.74	BUILDING	among the slyly regular theodolites kindle blithely courts. carefully even theodolites haggle slyly along the ide
+9	Customer#000000009	xKiAFTjUsCuxfeleNqefumTrjS	8	18-338-906-3675	8324.07	FURNITURE	r theodolites according to the requests wake thinly excuses: pending requests haggle furiousl
+10	Customer#000000010	6LrEaV6KR6PLVcgl2ArL Q3rqzLzcT1 v2	5	15-741-346-9870	2753.54	HOUSEHOLD	es regular deposits haggle. fur
+11	Customer#000000011	PkWS 3HlXqwTuzrKg633BEi	23	33-464-151-3439	-272.60	BUILDING	ckages. requests sleep slyly. quickly even pinto beans promise above the slyly regular pinto beans. 
+12	Customer#000000012	9PWKuhzT4Zr1Q	13	23-791-276-1263	3396.49	HOUSEHOLD	 to the carefully final braids. blithely regular requests nag. ironic theodolites boost quickly along
+13	Customer#000000013	nsXQu0oVjD7PM659uC3SRSp	3	13-761-547-5974	3857.34	BUILDING	ounts sleep carefully after the close frays. carefully bold notornis use ironic requests. blithely
+14	Customer#000000014	KXkletMlL2JQEA 	1	11-845-129-3851	5266.30	FURNITURE	, ironic packages across the unus
+15	Customer#000000015	YtWggXoOLdwdo7b0y,BZaGUQMLJMX1Y,EC,6Dn	23	33-687-542-7601	2788.52	HOUSEHOLD	 platelets. regular deposits detect asymptotes. blithely unusual packages nag slyly at the fluf
+16	Customer#000000016	cYiaeMLZSMAOQ2 d0W,	10	20-781-609-3107	4681.03	FURNITURE	kly silent courts. thinly regular theodolites sleep fluffily after 
+17	Customer#000000017	izrh 6jdqtp2eqdtbkswDD8SG4SzXruMfIXyR7	2	12-970-682-3487	6.34	AUTOMOBILE	packages wake! blithely even pint
+18	Customer#000000018	3txGO AiuFux3zT0Z9NYaFRnZt	6	16-155-215-1315	5494.43	BUILDING	s sleep. carefully even instructions nag furiously alongside of t
+19	Customer#000000019	uc,3bHIx84H,wdrmLOjVsiqXCq2tr	18	28-396-526-5053	8914.71	HOUSEHOLD	 nag. furiously careful packages are slyly at the accounts. furiously regular in
+20	Customer#000000020	JrPk8Pqplj4Ne	22	32-957-234-8742	7603.40	FURNITURE	g alongside of the special excuses-- fluffily enticing packages wake 
+
+-- !one_delete_column_orc --
+1	Customer#000000001	IVhzIApeRb ot,c,E	151	update-phone-1	711.56	BUILDING	update-comment-1
+2	Customer#000000002	XSTf4,NCwDVaWNe6tEgvwfmRchLXak	13	23-768-687-3665	121.65	AUTOMOBILE	l accounts. blithely ironic theodolites integrate boldly: caref
+3	Customer#000000003	MG9kdTD2WBHm	1	11-719-748-3364	7498.12	AUTOMOBILE	 deposits eat slyly ironic, even instructions. express foxes detect slyly. blithely even accounts abov
+4	Customer#000000004	XxVSJsLAGtn	47	update-phone-4	15.39	MACHINERY	update-comment-2
+6	Customer#000000006	sKZz0CsnMD7mp4Xd0YrBvx,LREYKUWAh yVn	20	30-114-968-4951	7638.57	AUTOMOBILE	tions. even deposits boost according to the slyly bold packages. final accounts cajole requests. furious
+7	Customer#000000007	TcGe5gaZNgVePxU5kRrvXBfkasDTea	18	28-190-982-9759	9561.95	AUTOMOBILE	ainst the ironic, express theodolites. express, even pinto beans among the exp
+8	Customer#000000008	I0B10bB0AymmC, 0PrRYBCP1yGJ8xcBPmWhl5	17	27-147-574-9335	6819.74	BUILDING	among the slyly regular theodolites kindle blithely courts. carefully even theodolites haggle slyly along the ide
+9	Customer#000000009	xKiAFTjUsCuxfeleNqefumTrjS	8	18-338-906-3675	8324.07	FURNITURE	r theodolites according to the requests wake thinly excuses: pending requests haggle furiousl
+10	Customer#000000010	6LrEaV6KR6PLVcgl2ArL Q3rqzLzcT1 v2	5	15-741-346-9870	2753.54	HOUSEHOLD	es regular deposits haggle. fur
+11	Customer#000000011	PkWS 3HlXqwTuzrKg633BEi	23	33-464-151-3439	-272.60	BUILDING	ckages. requests sleep slyly. quickly even pinto beans promise above the slyly regular pinto beans. 
+12	Customer#000000012	9PWKuhzT4Zr1Q	13	23-791-276-1263	3396.49	HOUSEHOLD	 to the carefully final braids. blithely regular requests nag. ironic theodolites boost quickly along
+13	Customer#000000013	nsXQu0oVjD7PM659uC3SRSp	3	13-761-547-5974	3857.34	BUILDING	ounts sleep carefully after the close frays. carefully bold notornis use ironic requests. blithely
+14	Customer#000000014	KXkletMlL2JQEA 	1	11-845-129-3851	5266.30	FURNITURE	, ironic packages across the unus
+15	Customer#000000015	YtWggXoOLdwdo7b0y,BZaGUQMLJMX1Y,EC,6Dn	23	33-687-542-7601	2788.52	HOUSEHOLD	 platelets. regular deposits detect asymptotes. blithely unusual packages nag slyly at the fluf
+16	Customer#000000016	cYiaeMLZSMAOQ2 d0W,	10	20-781-609-3107	4681.03	FURNITURE	kly silent courts. thinly regular theodolites sleep fluffily after 
+17	Customer#000000017	izrh 6jdqtp2eqdtbkswDD8SG4SzXruMfIXyR7	2	12-970-682-3487	6.34	AUTOMOBILE	packages wake! blithely even pint
+18	Customer#000000018	3txGO AiuFux3zT0Z9NYaFRnZt	6	16-155-215-1315	5494.43	BUILDING	s sleep. carefully even instructions nag furiously alongside of t
+19	Customer#000000019	uc,3bHIx84H,wdrmLOjVsiqXCq2tr	18	28-396-526-5053	8914.71	HOUSEHOLD	 nag. furiously careful packages are slyly at the accounts. furiously regular in
+20	Customer#000000020	JrPk8Pqplj4Ne	22	32-957-234-8742	7603.40	FURNITURE	g alongside of the special excuses-- fluffily enticing packages wake 
+
+-- !one_delete_column --
+1	Customer#000000001	IVhzIApeRb ot,c,E	151	update-phone-1	711.56	BUILDING	update-comment-1
+2	Customer#000000002	XSTf4,NCwDVaWNe6tEgvwfmRchLXak	13	23-768-687-3665	121.65	AUTOMOBILE	l accounts. blithely ironic theodolites integrate boldly: caref
+3	Customer#000000003	MG9kdTD2WBHm	1	11-719-748-3364	7498.12	AUTOMOBILE	 deposits eat slyly ironic, even instructions. express foxes detect slyly. blithely even accounts abov
+4	Customer#000000004	XxVSJsLAGtn	47	update-phone-4	15.39	MACHINERY	update-comment-2
+6	Customer#000000006	sKZz0CsnMD7mp4Xd0YrBvx,LREYKUWAh yVn	20	30-114-968-4951	7638.57	AUTOMOBILE	tions. even deposits boost according to the slyly bold packages. final accounts cajole requests. furious
+7	Customer#000000007	TcGe5gaZNgVePxU5kRrvXBfkasDTea	18	28-190-982-9759	9561.95	AUTOMOBILE	ainst the ironic, express theodolites. express, even pinto beans among the exp
+8	Customer#000000008	I0B10bB0AymmC, 0PrRYBCP1yGJ8xcBPmWhl5	17	27-147-574-9335	6819.74	BUILDING	among the slyly regular theodolites kindle blithely courts. carefully even theodolites haggle slyly along the ide
+9	Customer#000000009	xKiAFTjUsCuxfeleNqefumTrjS	8	18-338-906-3675	8324.07	FURNITURE	r theodolites according to the requests wake thinly excuses: pending requests haggle furiousl
+10	Customer#000000010	6LrEaV6KR6PLVcgl2ArL Q3rqzLzcT1 v2	5	15-741-346-9870	2753.54	HOUSEHOLD	es regular deposits haggle. fur
+11	Customer#000000011	PkWS 3HlXqwTuzrKg633BEi	23	33-464-151-3439	-272.60	BUILDING	ckages. requests sleep slyly. quickly even pinto beans promise above the slyly regular pinto beans. 
+12	Customer#000000012	9PWKuhzT4Zr1Q	13	23-791-276-1263	3396.49	HOUSEHOLD	 to the carefully final braids. blithely regular requests nag. ironic theodolites boost quickly along
+13	Customer#000000013	nsXQu0oVjD7PM659uC3SRSp	3	13-761-547-5974	3857.34	BUILDING	ounts sleep carefully after the close frays. carefully bold notornis use ironic requests. blithely
+14	Customer#000000014	KXkletMlL2JQEA 	1	11-845-129-3851	5266.30	FURNITURE	, ironic packages across the unus
+15	Customer#000000015	YtWggXoOLdwdo7b0y,BZaGUQMLJMX1Y,EC,6Dn	23	33-687-542-7601	2788.52	HOUSEHOLD	 platelets. regular deposits detect asymptotes. blithely unusual packages nag slyly at the fluf
+16	Customer#000000016	cYiaeMLZSMAOQ2 d0W,	10	20-781-609-3107	4681.03	FURNITURE	kly silent courts. thinly regular theodolites sleep fluffily after 
+17	Customer#000000017	izrh 6jdqtp2eqdtbkswDD8SG4SzXruMfIXyR7	2	12-970-682-3487	6.34	AUTOMOBILE	packages wake! blithely even pint
+18	Customer#000000018	3txGO AiuFux3zT0Z9NYaFRnZt	6	16-155-215-1315	5494.43	BUILDING	s sleep. carefully even instructions nag furiously alongside of t
+19	Customer#000000019	uc,3bHIx84H,wdrmLOjVsiqXCq2tr	18	28-396-526-5053	8914.71	HOUSEHOLD	 nag. furiously careful packages are slyly at the accounts. furiously regular in
+20	Customer#000000020	JrPk8Pqplj4Ne	22	32-957-234-8742	7603.40	FURNITURE	g alongside of the special excuses-- fluffily enticing packages wake 
+
+-- !one_delete_column_orc --
+1	Customer#000000001	IVhzIApeRb ot,c,E	151	update-phone-1	711.56	BUILDING	update-comment-1
+2	Customer#000000002	XSTf4,NCwDVaWNe6tEgvwfmRchLXak	13	23-768-687-3665	121.65	AUTOMOBILE	l accounts. blithely ironic theodolites integrate boldly: caref
+3	Customer#000000003	MG9kdTD2WBHm	1	11-719-748-3364	7498.12	AUTOMOBILE	 deposits eat slyly ironic, even instructions. express foxes detect slyly. blithely even accounts abov
+4	Customer#000000004	XxVSJsLAGtn	47	update-phone-4	15.39	MACHINERY	update-comment-2
+6	Customer#000000006	sKZz0CsnMD7mp4Xd0YrBvx,LREYKUWAh yVn	20	30-114-968-4951	7638.57	AUTOMOBILE	tions. even deposits boost according to the slyly bold packages. final accounts cajole requests. furious
+7	Customer#000000007	TcGe5gaZNgVePxU5kRrvXBfkasDTea	18	28-190-982-9759	9561.95	AUTOMOBILE	ainst the ironic, express theodolites. express, even pinto beans among the exp
+8	Customer#000000008	I0B10bB0AymmC, 0PrRYBCP1yGJ8xcBPmWhl5	17	27-147-574-9335	6819.74	BUILDING	among the slyly regular theodolites kindle blithely courts. carefully even theodolites haggle slyly along the ide
+9	Customer#000000009	xKiAFTjUsCuxfeleNqefumTrjS	8	18-338-906-3675	8324.07	FURNITURE	r theodolites according to the requests wake thinly excuses: pending requests haggle furiousl
+10	Customer#000000010	6LrEaV6KR6PLVcgl2ArL Q3rqzLzcT1 v2	5	15-741-346-9870	2753.54	HOUSEHOLD	es regular deposits haggle. fur
+11	Customer#000000011	PkWS 3HlXqwTuzrKg633BEi	23	33-464-151-3439	-272.60	BUILDING	ckages. requests sleep slyly. quickly even pinto beans promise above the slyly regular pinto beans. 
+12	Customer#000000012	9PWKuhzT4Zr1Q	13	23-791-276-1263	3396.49	HOUSEHOLD	 to the carefully final braids. blithely regular requests nag. ironic theodolites boost quickly along
+13	Customer#000000013	nsXQu0oVjD7PM659uC3SRSp	3	13-761-547-5974	3857.34	BUILDING	ounts sleep carefully after the close frays. carefully bold notornis use ironic requests. blithely
+14	Customer#000000014	KXkletMlL2JQEA 	1	11-845-129-3851	5266.30	FURNITURE	, ironic packages across the unus
+15	Customer#000000015	YtWggXoOLdwdo7b0y,BZaGUQMLJMX1Y,EC,6Dn	23	33-687-542-7601	2788.52	HOUSEHOLD	 platelets. regular deposits detect asymptotes. blithely unusual packages nag slyly at the fluf
+16	Customer#000000016	cYiaeMLZSMAOQ2 d0W,	10	20-781-609-3107	4681.03	FURNITURE	kly silent courts. thinly regular theodolites sleep fluffily after 
+17	Customer#000000017	izrh 6jdqtp2eqdtbkswDD8SG4SzXruMfIXyR7	2	12-970-682-3487	6.34	AUTOMOBILE	packages wake! blithely even pint
+18	Customer#000000018	3txGO AiuFux3zT0Z9NYaFRnZt	6	16-155-215-1315	5494.43	BUILDING	s sleep. carefully even instructions nag furiously alongside of t
+19	Customer#000000019	uc,3bHIx84H,wdrmLOjVsiqXCq2tr	18	28-396-526-5053	8914.71	HOUSEHOLD	 nag. furiously careful packages are slyly at the accounts. furiously regular in
+20	Customer#000000020	JrPk8Pqplj4Ne	22	32-957-234-8742	7603.40	FURNITURE	g alongside of the special excuses-- fluffily enticing packages wake 
+
diff --git a/regression-test/suites/external_table_p2/iceberg/iceberg_equality_delete.groovy b/regression-test/suites/external_table_p2/iceberg/iceberg_equality_delete.groovy
new file mode 100644
index 00000000000000..a5a21b3da6a2a1
--- /dev/null
+++ b/regression-test/suites/external_table_p2/iceberg/iceberg_equality_delete.groovy
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("iceberg_equality_delete", "p2,external,iceberg,external_remote,external_remote_iceberg") {
+
+    String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+
+        String catalog_name = "test_external_iceberg_equality_delete"
+        String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost")
+        String extHdfsPort = context.config.otherConfigs.get("extHdfsPort")
+        sql """drop catalog if exists ${catalog_name};"""
+        sql """
+            create catalog if not exists ${catalog_name} properties (
+                'type'='iceberg',
+                'iceberg.catalog.type'='hadoop',
+                'warehouse' = 'hdfs://${extHiveHmsHost}:${extHdfsPort}/usr/hive/warehouse/hadoop_catalog'
+            );
+        """
+
+        logger.info("catalog " + catalog_name + " created")
+        sql """switch ${catalog_name};"""
+        logger.info("switched to catalog " + catalog_name)
+        sql """ use multi_catalog;""" 
+
+        // one delete column
+        qt_one_delete_column """select * from customer_flink_one order by c_custkey"""
+        qt_one_delete_column_orc """select * from customer_flink_one_orc order by c_custkey"""
+        // three delete columns
+        qt_one_delete_column """select * from customer_flink_three order by c_custkey"""
+        qt_one_delete_column_orc """select * from customer_flink_three_orc order by c_custkey"""
+
+        sql """drop catalog ${catalog_name}"""
+    }
+}
diff --git a/regression-test/suites/external_table_p2/iceberg/iceberg_position_delete.groovy b/regression-test/suites/external_table_p2/iceberg/iceberg_position_delete.groovy
index 4cb497c307897b..7774b108da3127 100644
--- a/regression-test/suites/external_table_p2/iceberg/iceberg_position_delete.groovy
+++ b/regression-test/suites/external_table_p2/iceberg/iceberg_position_delete.groovy
@@ -101,6 +101,7 @@ suite("iceberg_position_delete", "p2,external,iceberg,external_remote,external_r
         qt_parquet_19 """ select count(*) from iceberg_position_parquet where  name != 'final entryxxxxxx' ;""" 
         qt_parquet_20 """ select count(*) from iceberg_position_parquet; """ 
 
+        sql """drop catalog ${catalog_name}"""
     }
 }
 /*

From 2852aa308c6e261fcad8bf0e361673ddddd77126 Mon Sep 17 00:00:00 2001
From: Calvin Kirs <kirs@apache.org>
Date: Tue, 30 Apr 2024 10:51:24 +0800
Subject: [PATCH 143/163] [Feat](Job) Job supports task execution statistics
 (#34109)

* Support statistics

* - Fix Failed task not showing up in the task list
- Task metadata add jobName
- Fix Finished job clear time error
- Job metadata add successCount, failedCount, totalTaskCount

* add test
---
 .../apache/doris/job/base/AbstractJob.java    |  35 ++++-
 .../job/extensions/insert/InsertJob.java      | 140 +++++++++++-------
 .../job/extensions/insert/InsertTask.java     |  42 ++++--
 .../doris/job/extensions/mtmv/MTMVTask.java   |   4 +-
 .../apache/doris/job/manager/JobManager.java  |   5 +-
 .../doris/job/scheduler/JobScheduler.java     |   2 +-
 .../apache/doris/job/task/AbstractTask.java   |   5 +-
 .../java/org/apache/doris/job/task/Task.java  |   2 +-
 .../tablefunction/MetadataGenerator.java      |   2 +-
 .../suites/job_p0/test_base_insert_job.groovy |  19 +--
 10 files changed, 157 insertions(+), 99 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/job/base/AbstractJob.java b/fe/fe-core/src/main/java/org/apache/doris/job/base/AbstractJob.java
index 8c1d6da3a63565..3f595d6daf5362 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/job/base/AbstractJob.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/job/base/AbstractJob.java
@@ -51,6 +51,7 @@
 import java.util.List;
 import java.util.Set;
 import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.concurrent.atomic.AtomicLong;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReentrantLock;
 import java.util.stream.Collectors;
@@ -58,7 +59,11 @@
 @Data
 @Log4j2
 public abstract class AbstractJob<T extends AbstractTask, C> implements Job<T, C>, Writable {
-
+    public static final ImmutableList<Column> COMMON_SCHEMA = ImmutableList.of(
+            new Column("SucceedTaskCount", ScalarType.createStringType()),
+            new Column("FailedTaskCount", ScalarType.createStringType()),
+            new Column("CanceledTaskCount", ScalarType.createStringType())
+            );
     @SerializedName(value = "jid")
     private Long jobId;
 
@@ -92,6 +97,16 @@ public abstract class AbstractJob<T extends AbstractTask, C> implements Job<T, C
     @SerializedName(value = "sql")
     String executeSql;
 
+
+    @SerializedName(value = "stc")
+    private AtomicLong succeedTaskCount = new AtomicLong(0);
+
+    @SerializedName(value = "ftc")
+    private AtomicLong failedTaskCount = new AtomicLong(0);
+
+    @SerializedName(value = "ctc")
+    private AtomicLong canceledTaskCount = new AtomicLong(0);
+
     public AbstractJob() {
     }
 
@@ -142,6 +157,7 @@ public void cancelAllTasks() throws JobException {
             task.cancel();
         }
         runningTasks = new CopyOnWriteArrayList<>();
+        logUpdateOperation();
     }
 
     private static final ImmutableList<String> TITLE_NAMES =
@@ -290,14 +306,18 @@ public void logUpdateOperation() {
 
     @Override
     public void onTaskFail(T task) throws JobException {
+        failedTaskCount.incrementAndGet();
         updateJobStatusIfEnd(false);
         runningTasks.remove(task);
+        logUpdateOperation();
     }
 
     @Override
     public void onTaskSuccess(T task) throws JobException {
+        succeedTaskCount.incrementAndGet();
         updateJobStatusIfEnd(true);
         runningTasks.remove(task);
+        logUpdateOperation();
 
     }
 
@@ -309,12 +329,15 @@ private void updateJobStatusIfEnd(boolean taskSuccess) throws JobException {
         }
         switch (executeType) {
             case ONE_TIME:
+                updateJobStatus(JobStatus.FINISHED);
+                this.finishTimeMs = System.currentTimeMillis();
+                break;
             case INSTANT:
                 this.finishTimeMs = System.currentTimeMillis();
                 if (taskSuccess) {
-                    Env.getCurrentEnv().getJobManager().getJob(jobId).updateJobStatus(JobStatus.FINISHED);
+                    updateJobStatus(JobStatus.FINISHED);
                 } else {
-                    Env.getCurrentEnv().getJobManager().getJob(jobId).updateJobStatus(JobStatus.STOPPED);
+                    updateJobStatus(JobStatus.STOPPED);
                 }
                 break;
             case RECURRING:
@@ -322,7 +345,8 @@ private void updateJobStatusIfEnd(boolean taskSuccess) throws JobException {
                 if (null != timerDefinition.getEndTimeMs()
                         && timerDefinition.getEndTimeMs() < System.currentTimeMillis()
                         + timerDefinition.getIntervalUnit().getIntervalMs(timerDefinition.getInterval())) {
-                    Env.getCurrentEnv().getJobManager().getJob(jobId).updateJobStatus(JobStatus.FINISHED);
+                    this.finishTimeMs = System.currentTimeMillis();
+                    updateJobStatus(JobStatus.FINISHED);
                 }
                 break;
             default:
@@ -360,6 +384,9 @@ public TRow getCommonTvfInfo() {
         trow.addToColumnValue(new TCell().setStringVal(jobStatus.name()));
         trow.addToColumnValue(new TCell().setStringVal(executeSql));
         trow.addToColumnValue(new TCell().setStringVal(TimeUtils.longToTimeString(createTimeMs)));
+        trow.addToColumnValue(new TCell().setStringVal(String.valueOf(succeedTaskCount.get())));
+        trow.addToColumnValue(new TCell().setStringVal(String.valueOf(failedTaskCount.get())));
+        trow.addToColumnValue(new TCell().setStringVal(String.valueOf(canceledTaskCount.get())));
         trow.addToColumnValue(new TCell().setStringVal(comment));
         return trow;
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/job/extensions/insert/InsertJob.java b/fe/fe-core/src/main/java/org/apache/doris/job/extensions/insert/InsertJob.java
index a9e6bd4fc40497..47d52c170b2bd6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/job/extensions/insert/InsertJob.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/job/extensions/insert/InsertJob.java
@@ -30,7 +30,6 @@
 import org.apache.doris.common.ErrorReport;
 import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.LabelAlreadyUsedException;
-import org.apache.doris.common.MetaNotFoundException;
 import org.apache.doris.common.io.Text;
 import org.apache.doris.common.util.LogBuilder;
 import org.apache.doris.common.util.LogKey;
@@ -67,38 +66,39 @@
 import com.google.gson.annotations.SerializedName;
 import lombok.Data;
 import lombok.EqualsAndHashCode;
-import lombok.extern.slf4j.Slf4j;
+import lombok.extern.log4j.Log4j2;
 import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.lang3.StringUtils;
 
 import java.io.DataOutput;
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.atomic.AtomicLong;
 import java.util.stream.Collectors;
 
 @EqualsAndHashCode(callSuper = true)
 @Data
-@Slf4j
+@Log4j2
 public class InsertJob extends AbstractJob<InsertTask, Map<Object, Object>> implements GsonPostProcessable {
 
-    public static final ImmutableList<Column> SCHEMA = ImmutableList.of(
-            new Column("Id", ScalarType.createStringType()),
-            new Column("Name", ScalarType.createStringType()),
-            new Column("Definer", ScalarType.createStringType()),
-            new Column("ExecuteType", ScalarType.createStringType()),
-            new Column("RecurringStrategy", ScalarType.createStringType()),
-            new Column("Status", ScalarType.createStringType()),
-            new Column("ExecuteSql", ScalarType.createStringType()),
-            new Column("CreateTime", ScalarType.createStringType()),
-            new Column("Comment", ScalarType.createStringType()));
+    public static final ImmutableList<Column> SCHEMA = ImmutableList.<Column>builder()
+            .add(new Column("Id", ScalarType.createStringType()))
+            .add(new Column("Name", ScalarType.createStringType()))
+            .add(new Column("Definer", ScalarType.createStringType()))
+            .add(new Column("ExecuteType", ScalarType.createStringType()))
+            .add(new Column("RecurringStrategy", ScalarType.createStringType()))
+            .add(new Column("Status", ScalarType.createStringType()))
+            .add(new Column("ExecuteSql", ScalarType.createStringType()))
+            .add(new Column("CreateTime", ScalarType.createStringType()))
+            .addAll(COMMON_SCHEMA)
+            .add(new Column("Comment", ScalarType.createStringType()))
+            .build();
 
     private static final ShowResultSetMetaData TASK_META_DATA =
             ShowResultSetMetaData.builder()
@@ -126,8 +126,10 @@ public class InsertJob extends AbstractJob<InsertTask, Map<Object, Object>> impl
         COLUMN_TO_INDEX = builder.build();
     }
 
+    //we used insertTaskQueue to store the task info, and we will query the task info from it
+    @Deprecated
     @SerializedName("tis")
-    ConcurrentLinkedQueue<Long> historyTaskIdList;
+    ConcurrentLinkedQueue<Long> historyTaskIdList = new ConcurrentLinkedQueue<>();
     @SerializedName("did")
     private final long dbId;
     @SerializedName("ln")
@@ -146,7 +148,9 @@ public class InsertJob extends AbstractJob<InsertTask, Map<Object, Object>> impl
     private List<InsertIntoTableCommand> plans = new ArrayList<>();
     private LoadStatistic loadStatistic = new LoadStatistic();
     private Set<Long> finishedTaskIds = new HashSet<>();
-    private ConcurrentHashMap<Long, InsertTask> idToTasks = new ConcurrentHashMap<>();
+
+    @SerializedName("tas")
+    private ConcurrentLinkedQueue<InsertTask> insertTaskQueue = new ConcurrentLinkedQueue<>();
     private Map<String, String> properties = new HashMap<>();
     private Set<String> tableNames;
     private AuthorizationInfo authorizationInfo;
@@ -164,8 +168,8 @@ public void gsonPostProcess() throws IOException {
         if (null == plans) {
             plans = new ArrayList<>();
         }
-        if (null == idToTasks) {
-            idToTasks = new ConcurrentHashMap<>();
+        if (null == insertTaskQueue) {
+            insertTaskQueue = new ConcurrentLinkedQueue<>();
         }
         if (null == loadStatistic) {
             loadStatistic = new LoadStatistic();
@@ -182,6 +186,15 @@ public void gsonPostProcess() throws IOException {
         if (null == historyTaskIdList) {
             historyTaskIdList = new ConcurrentLinkedQueue<>();
         }
+        if (null == getSucceedTaskCount()) {
+            setSucceedTaskCount(new AtomicLong(0));
+        }
+        if (null == getFailedTaskCount()) {
+            setFailedTaskCount(new AtomicLong(0));
+        }
+        if (null == getCanceledTaskCount()) {
+            setCanceledTaskCount(new AtomicLong(0));
+        }
     }
 
     /**
@@ -250,9 +263,7 @@ public List<InsertTask> createTasks(TaskType taskType, Map<Object, Object> taskC
         List<InsertTask> newTasks = new ArrayList<>();
         if (plans.isEmpty()) {
             InsertTask task = new InsertTask(labelName, getCurrentDbName(), getExecuteSql(), getCreateUser());
-            idToTasks.put(task.getTaskId(), task);
             newTasks.add(task);
-            recordTask(task.getTaskId());
         } else {
             // use for load stmt
             for (InsertIntoTableCommand logicalPlan : plans) {
@@ -260,28 +271,24 @@ public List<InsertTask> createTasks(TaskType taskType, Map<Object, Object> taskC
                     throw new IllegalArgumentException("Load plan need label name.");
                 }
                 InsertTask task = new InsertTask(logicalPlan, ctx, stmtExecutor, loadStatistic);
-                idToTasks.put(task.getTaskId(), task);
                 newTasks.add(task);
-                recordTask(task.getTaskId());
             }
         }
         initTasks(newTasks, taskType);
+        recordTasks(newTasks);
         return new ArrayList<>(newTasks);
     }
 
-    public void recordTask(long id) {
+    public void recordTasks(List<InsertTask> tasks) {
         if (Config.max_persistence_task_count < 1) {
             return;
         }
-        if (CollectionUtils.isEmpty(historyTaskIdList)) {
-            historyTaskIdList = new ConcurrentLinkedQueue<>();
-            historyTaskIdList.add(id);
-            Env.getCurrentEnv().getEditLog().logUpdateJob(this);
-            return;
-        }
-        historyTaskIdList.add(id);
-        if (historyTaskIdList.size() >= Config.max_persistence_task_count) {
-            historyTaskIdList.poll();
+        insertTaskQueue.addAll(tasks);
+
+        while (insertTaskQueue.size() > Config.max_persistence_task_count) {
+            insertTaskQueue.poll();
+            //since we have insertTaskQueue, we do not need to store the task id in historyTaskIdList, so we clear it
+            historyTaskIdList.clear();
         }
         Env.getCurrentEnv().getEditLog().logUpdateJob(this);
     }
@@ -319,35 +326,54 @@ protected void checkJobParamsInternal() {
 
     @Override
     public List<InsertTask> queryTasks() {
-        if (CollectionUtils.isEmpty(historyTaskIdList)) {
+        if (historyTaskIdList.isEmpty() && insertTaskQueue.isEmpty()) {
             return new ArrayList<>();
         }
+
         //TODO it's will be refactor, we will storage task info in job inner and query from it
-        List<Long> taskIdList = new ArrayList<>(this.historyTaskIdList);
+
+        // merge task info from insertTaskQueue and historyTaskIdList
+        List<Long> taskIds = insertTaskQueue.stream().map(InsertTask::getTaskId).collect(Collectors.toList());
+        taskIds.addAll(historyTaskIdList);
+        taskIds.stream().distinct().collect(Collectors.toList());
         if (getJobConfig().getExecuteType().equals(JobExecuteType.INSTANT)) {
-            Collections.reverse(taskIdList);
-            return queryLoadTasksByTaskIds(taskIdList);
+            return queryLoadTasksByTaskIds(taskIds);
         }
-        List<LoadJob> loadJobs = Env.getCurrentEnv().getLoadManager().queryLoadJobsByJobIds(taskIdList);
-        if (CollectionUtils.isEmpty(loadJobs)) {
-            return new ArrayList<>();
+        // query from load job
+        List<LoadJob> loadJobs = Env.getCurrentEnv().getLoadManager().queryLoadJobsByJobIds(taskIds);
+
+        Map<Long, LoadJob> loadJobMap = loadJobs.stream().collect(Collectors.toMap(LoadJob::getId, loadJob -> loadJob));
+        List<InsertTask> tasksRsp = new ArrayList<>();
+        //read task info from insertTaskQueue
+        insertTaskQueue.forEach(task -> {
+            if (task.getJobInfo() == null) {
+                LoadJob loadJob = loadJobMap.get(task.getTaskId());
+                if (loadJob != null) {
+                    task.setJobInfo(loadJob);
+                }
+            }
+            tasksRsp.add(task);
+        });
+        if (CollectionUtils.isEmpty(historyTaskIdList)) {
+            return tasksRsp;
         }
-        List<InsertTask> tasks = new ArrayList<>();
-        loadJobs.forEach(loadJob -> {
-            InsertTask task;
-            try {
-                task = new InsertTask(loadJob.getLabel(), loadJob.getDb().getFullName(), null, getCreateUser());
-                task.setCreateTimeMs(loadJob.getCreateTimestamp());
-            } catch (MetaNotFoundException e) {
-                log.warn("load job not found, job id is {}", loadJob.getId());
+
+        historyTaskIdList.forEach(historyTaskId -> {
+            LoadJob loadJob = loadJobMap.get(historyTaskId);
+            if (null == loadJob) {
                 return;
             }
+            InsertTask task = new InsertTask(loadJob.getLabel(), getCurrentDbName(), null, getCreateUser());
             task.setJobId(getJobId());
             task.setTaskId(loadJob.getId());
             task.setJobInfo(loadJob);
-            tasks.add(task);
+            task.setJobId(getJobId());
+            task.setTaskId(loadJob.getId());
+            task.setJobInfo(loadJob);
+            tasksRsp.add(task);
         });
-        return tasks;
+        return tasksRsp;
+
 
     }
 
@@ -355,13 +381,13 @@ public List<InsertTask> queryLoadTasksByTaskIds(List<Long> taskIdList) {
         if (taskIdList.isEmpty()) {
             return new ArrayList<>();
         }
-        List<InsertTask> tasks = new ArrayList<>();
-        taskIdList.forEach(id -> {
-            if (null != idToTasks.get(id)) {
-                tasks.add(idToTasks.get(id));
+        List<InsertTask> queryTasks = new ArrayList<>();
+        insertTaskQueue.forEach(task -> {
+            if (taskIdList.contains(task.getTaskId())) {
+                queryTasks.add(task);
             }
         });
-        return tasks;
+        return queryTasks;
     }
 
     @Override
@@ -462,7 +488,7 @@ public List<String> getShowInfo() {
             // load end time
             jobInfo.add(TimeUtils.longToTimeString(getFinishTimeMs()));
             // tracking urls
-            List<String> trackingUrl = idToTasks.values().stream()
+            List<String> trackingUrl = insertTaskQueue.stream()
                     .map(task -> {
                         if (StringUtils.isNotEmpty(task.getTrackingUrl())) {
                             return task.getTrackingUrl();
@@ -527,7 +553,7 @@ public String errorTabletsToJson() {
     public void updateLoadingStatus(Long beId, TUniqueId loadId, TUniqueId fragmentId, long scannedRows,
                                     long scannedBytes, boolean isDone) {
         loadStatistic.updateLoadProgress(beId, loadId, fragmentId, scannedRows, scannedBytes, isDone);
-        progress = (int) ((double) finishedTaskIds.size() / idToTasks.size() * 100);
+        progress = (int) ((double) finishedTaskIds.size() / insertTaskQueue.size() * 100);
         if (progress == 100) {
             progress = 99;
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/job/extensions/insert/InsertTask.java b/fe/fe-core/src/main/java/org/apache/doris/job/extensions/insert/InsertTask.java
index 8fe786555ce04a..6e6f59758b452b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/job/extensions/insert/InsertTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/job/extensions/insert/InsertTask.java
@@ -37,6 +37,7 @@
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
+import com.google.gson.annotations.SerializedName;
 import lombok.Getter;
 import lombok.Setter;
 import lombok.extern.log4j.Log4j2;
@@ -52,11 +53,12 @@ public class InsertTask extends AbstractTask {
     public static final ImmutableList<Column> SCHEMA = ImmutableList.of(
             new Column("TaskId", ScalarType.createStringType()),
             new Column("JobId", ScalarType.createStringType()),
+            new Column("JobName", ScalarType.createStringType()),
             new Column("Label", ScalarType.createStringType()),
             new Column("Status", ScalarType.createStringType()),
             new Column("ErrorMsg", ScalarType.createStringType()),
-            new Column("CreateTimeMs", ScalarType.createStringType()),
-            new Column("FinishTimeMs", ScalarType.createStringType()),
+            new Column("CreateTime", ScalarType.createStringType()),
+            new Column("FinishTime", ScalarType.createStringType()),
             new Column("TrackingUrl", ScalarType.createStringType()),
             new Column("LoadStatistic", ScalarType.createStringType()),
             new Column("User", ScalarType.createStringType()));
@@ -64,7 +66,7 @@ public class InsertTask extends AbstractTask {
     public static final ImmutableMap<String, Integer> COLUMN_TO_INDEX;
 
     static {
-        ImmutableMap.Builder<String, Integer> builder = new ImmutableMap.Builder();
+        ImmutableMap.Builder<String, Integer> builder = new ImmutableMap.Builder<>();
         for (int i = 0; i < SCHEMA.size(); i++) {
             builder.put(SCHEMA.get(i).getName().toLowerCase(), i);
         }
@@ -77,6 +79,7 @@ public class InsertTask extends AbstractTask {
     private ConnectContext ctx;
     private String sql;
     private String currentDb;
+    @SerializedName(value = "uif")
     private UserIdentity userIdentity;
     private LoadStatistic loadStatistic;
     private AtomicBoolean isCanceled = new AtomicBoolean(false);
@@ -211,42 +214,51 @@ public void cancel() throws JobException {
     }
 
     @Override
-    public TRow getTvfInfo() {
+    public TRow getTvfInfo(String jobName) {
         TRow trow = new TRow();
         if (jobInfo == null) {
             // if task not start, load job is null,return pending task show info
-            return getPendingTaskTVFInfo();
+            return getPendingTaskTVFInfo(jobName);
         }
         trow.addToColumnValue(new TCell().setStringVal(String.valueOf(jobInfo.getId())));
         trow.addToColumnValue(new TCell().setStringVal(String.valueOf(getJobId())));
-        trow.addToColumnValue(new TCell().setStringVal(labelName));
+        trow.addToColumnValue(new TCell().setStringVal(jobName));
+        trow.addToColumnValue(new TCell().setStringVal(getJobId() + LABEL_SPLITTER + getTaskId()));
         trow.addToColumnValue(new TCell().setStringVal(jobInfo.getState().name()));
         // err msg
-        String errMsg = "";
+        String errorMsg = "";
         if (failMsg != null) {
-            errMsg = "type:" + failMsg.getCancelType() + "; msg:" + failMsg.getMsg();
+            errorMsg = failMsg.getMsg();
+        }
+        if (StringUtils.isNotBlank(getErrMsg())) {
+            errorMsg = getErrMsg();
         }
-        trow.addToColumnValue(new TCell().setStringVal(errMsg));
+        trow.addToColumnValue(new TCell().setStringVal(errorMsg));
         // create time
-        trow.addToColumnValue(new TCell().setStringVal(TimeUtils.longToTimeString(jobInfo.getCreateTimestamp())));
+        trow.addToColumnValue(new TCell().setStringVal(TimeUtils.longToTimeString(getCreateTimeMs())));
         // load end time
-        trow.addToColumnValue(new TCell().setStringVal(TimeUtils.longToTimeString(jobInfo.getFinishTimestamp())));
+        trow.addToColumnValue(new TCell().setStringVal(TimeUtils.longToTimeString(getFinishTimeMs())));
         // tracking url
         trow.addToColumnValue(new TCell().setStringVal(trackingUrl));
-        if (null != loadStatistic) {
-            trow.addToColumnValue(new TCell().setStringVal(loadStatistic.toJson()));
+        if (null != jobInfo.getLoadStatistic()) {
+            trow.addToColumnValue(new TCell().setStringVal(jobInfo.getLoadStatistic().toJson()));
         } else {
             trow.addToColumnValue(new TCell().setStringVal(""));
         }
-        trow.addToColumnValue(new TCell().setStringVal(userIdentity.getQualifiedUser()));
+        if (userIdentity == null) {
+            trow.addToColumnValue(new TCell().setStringVal(""));
+        } else {
+            trow.addToColumnValue(new TCell().setStringVal(userIdentity.getQualifiedUser()));
+        }
         return trow;
     }
 
     // if task not start, load job is null,return pending task show info
-    private TRow getPendingTaskTVFInfo() {
+    private TRow getPendingTaskTVFInfo(String jobName) {
         TRow trow = new TRow();
         trow.addToColumnValue(new TCell().setStringVal(String.valueOf(getTaskId())));
         trow.addToColumnValue(new TCell().setStringVal(String.valueOf(getJobId())));
+        trow.addToColumnValue(new TCell().setStringVal(jobName));
         trow.addToColumnValue(new TCell().setStringVal(getJobId() + LABEL_SPLITTER + getTaskId()));
         trow.addToColumnValue(new TCell().setStringVal(getStatus().name()));
         trow.addToColumnValue(new TCell().setStringVal(""));
diff --git a/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java b/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java
index f67144cd0d3e6f..9e872cf034c107 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java
@@ -299,11 +299,11 @@ public void runTask() throws JobException {
     }
 
     @Override
-    public TRow getTvfInfo() {
+    public TRow getTvfInfo(String jobName) {
         TRow trow = new TRow();
         trow.addToColumnValue(new TCell().setStringVal(String.valueOf(super.getTaskId())));
         trow.addToColumnValue(new TCell().setStringVal(String.valueOf(super.getJobId())));
-        trow.addToColumnValue(new TCell().setStringVal(super.getJobName()));
+        trow.addToColumnValue(new TCell().setStringVal(jobName));
         String dbName = "";
         String mvName = "";
         try {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/job/manager/JobManager.java b/fe/fe-core/src/main/java/org/apache/doris/job/manager/JobManager.java
index d8a30a968a6716..4eb333426c42f0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/job/manager/JobManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/job/manager/JobManager.java
@@ -29,8 +29,6 @@
 import org.apache.doris.common.PatternMatcher;
 import org.apache.doris.common.PatternMatcherWrapper;
 import org.apache.doris.common.io.Writable;
-import org.apache.doris.common.util.LogBuilder;
-import org.apache.doris.common.util.LogKey;
 import org.apache.doris.datasource.InternalCatalog;
 import org.apache.doris.job.base.AbstractJob;
 import org.apache.doris.job.common.JobStatus;
@@ -267,8 +265,7 @@ public void replayCreateJob(T job) throws JobException {
      **/
     public void replayUpdateJob(T job) {
         jobMap.put(job.getJobId(), job);
-        log.info(new LogBuilder(LogKey.SCHEDULER_JOB, job.getJobId())
-                .add("msg", "replay update scheduler job").build());
+        job.logUpdateOperation();
     }
 
     /**
diff --git a/fe/fe-core/src/main/java/org/apache/doris/job/scheduler/JobScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/job/scheduler/JobScheduler.java
index a104d3895e1485..2100511d22bd2f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/job/scheduler/JobScheduler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/job/scheduler/JobScheduler.java
@@ -196,7 +196,7 @@ private void executeTimerJobIdsWithinLastTenMinutesWindow() {
     }
 
     private void clearEndJob(T job) {
-        if (job.getFinishTimeMs() + FINISHED_JOB_CLEANUP_THRESHOLD_TIME_MS < System.currentTimeMillis()) {
+        if (job.getFinishTimeMs() + FINISHED_JOB_CLEANUP_THRESHOLD_TIME_MS > System.currentTimeMillis()) {
             return;
         }
         try {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/job/task/AbstractTask.java b/fe/fe-core/src/main/java/org/apache/doris/job/task/AbstractTask.java
index 71f6ff1c4f7eeb..7bd2e58f87e7bc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/job/task/AbstractTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/job/task/AbstractTask.java
@@ -63,7 +63,7 @@ private static long getNextTaskId() {
     }
 
     @Override
-    public void onFail(String msg) throws JobException {
+    public void onFail() throws JobException {
         status = TaskStatus.FAILED;
         if (!isCallable()) {
             return;
@@ -72,12 +72,13 @@ public void onFail(String msg) throws JobException {
     }
 
     @Override
-    public void onFail() throws JobException {
+    public void onFail(String errMsg) throws JobException {
         if (TaskStatus.CANCELED.equals(status)) {
             return;
         }
         status = TaskStatus.FAILED;
         setFinishTimeMs(System.currentTimeMillis());
+        setErrMsg(errMsg);
         if (!isCallable()) {
             return;
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/job/task/Task.java b/fe/fe-core/src/main/java/org/apache/doris/job/task/Task.java
index 8e82984c2f09e3..ee205c55c315ab 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/job/task/Task.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/job/task/Task.java
@@ -70,5 +70,5 @@ public interface Task {
      * get info for tvf `tasks`
      * @return TRow
      */
-    TRow getTvfInfo();
+    TRow getTvfInfo(String jobName);
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java
index 50d97e69705ff9..d5b43b6cfff13c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java
@@ -809,7 +809,7 @@ private static TFetchSchemaTableDataResult taskMetadataResult(TMetadataTableRequ
             }
             List<AbstractTask> tasks = job.queryAllTasks();
             for (AbstractTask task : tasks) {
-                TRow tvfInfo = task.getTvfInfo();
+                TRow tvfInfo = task.getTvfInfo(job.getJobName());
                 if (tvfInfo != null) {
                     dataBatch.add(tvfInfo);
                 }
diff --git a/regression-test/suites/job_p0/test_base_insert_job.groovy b/regression-test/suites/job_p0/test_base_insert_job.groovy
index 8fe7c0e7749239..3f5cd5692f95cd 100644
--- a/regression-test/suites/job_p0/test_base_insert_job.groovy
+++ b/regression-test/suites/job_p0/test_base_insert_job.groovy
@@ -118,13 +118,11 @@ suite("test_base_insert_job") {
      """
     
     Thread.sleep(2000)
-    def onceJob = sql """ select id,ExecuteSql from jobs("type"="insert") where Name like '%${jobName}%' and ExecuteType='ONE_TIME' """
+    def onceJob = sql """ select SucceedTaskCount from jobs("type"="insert") where Name like '%${jobName}%' and ExecuteType='ONE_TIME' """
     assert onceJob.size() == 1
-    def onceJobId = onceJob.get(0).get(0);
-    def onceJobSql = onceJob.get(0).get(1);
-    println onceJobSql
-    // test cancel task
-    def datas = sql """select status,taskid from tasks("type"="insert") where jobid= ${onceJobId}"""
+    //check succeed task count
+    assert '1' == onceJob.get(0).get(0)
+    def datas = sql """select status,taskid from tasks("type"="insert") where jobName= '${jobName}'"""
     println datas
     assert datas.size() == 1
     assert datas.get(0).get(0) == "FINISHED"
@@ -154,7 +152,7 @@ suite("test_base_insert_job") {
         DROP JOB IF EXISTS where jobname =  '${jobName}'
     """
     sql """
-          CREATE JOB ${jobName}  ON SCHEDULE every 1 second   comment 'test for test&68686781jbjbhj//ncsa' DO insert into ${tableName}  values  ('2023-07-19',5, 1001);
+          CREATE JOB ${jobName}  ON SCHEDULE every 1 second starts current_timestamp  comment 'test for test&68686781jbjbhj//ncsa' DO insert into ${tableName}  values  ('2023-07-19',5, 1001);
      """
 
     Thread.sleep(2000)
@@ -162,15 +160,12 @@ suite("test_base_insert_job") {
     sql """
         PAUSE JOB where jobname =  '${jobName}'
     """
-    def job = sql """ select id,ExecuteSql from jobs("type"="insert") where Name like '%${jobName}%'  """
-    assert job.size() == 1
-    def jobId = job.get(0).get(0);
-    def tasks = sql """ select status from tasks("type"="insert") where jobid= ${jobId}  """
+    def tasks = sql """ select status from tasks("type"="insert") where JobName= '${jobName}'  """
     sql """
         RESUME JOB where jobname =  '${jobName}'
     """
     Thread.sleep(2500)
-    def afterResumeTasks = sql """ select status from tasks("type"="insert") where jobid= ${jobId}  """
+    def afterResumeTasks = sql """ select status from tasks("type"="insert") where JobName= '${jobName}'   """
     println afterResumeTasks
     assert afterResumeTasks.size() >tasks.size
     // assert same job name

From b7c20e99f0430bce37e88e74d73e8132c7a303ba Mon Sep 17 00:00:00 2001
From: Pxl <pxl290@qq.com>
Date: Tue, 30 Apr 2024 11:18:46 +0800
Subject: [PATCH 144/163] [Chore](build) remove all vla usage and enable vla
 check (#34001)

remove all vla usage and enable vla check
---
 be/CMakeLists.txt                             |  3 +-
 .../schema_charsets_scanner.cpp               | 16 ++--
 .../schema_collations_scanner.cpp             | 24 +++---
 .../schema_scanner/schema_columns_scanner.cpp | 56 ++++++-------
 .../schema_metadata_name_ids_scanner.cpp      | 16 ++--
 .../schema_scanner/schema_rowsets_scanner.cpp | 46 +++++-----
 .../schema_schema_privileges_scanner.cpp      | 16 ++--
 .../schema_schemata_scanner.cpp               | 10 +--
 .../schema_table_privileges_scanner.cpp       | 22 ++---
 .../schema_scanner/schema_tables_scanner.cpp  | 44 +++++-----
 .../schema_user_privileges_scanner.cpp        | 12 +--
 .../schema_variables_scanner.cpp              |  8 +-
 .../schema_scanner/schema_views_scanner.cpp   |  8 +-
 be/src/io/fs/local_file_writer.cpp            |  8 +-
 be/src/olap/accept_null_predicate.h           | 24 +++---
 be/src/olap/block_column_predicate.cpp        | 28 +++----
 be/src/olap/match_predicate.cpp               |  8 +-
 .../rowset/segment_v2/binary_dict_page.cpp    |  7 +-
 .../olap/rowset/segment_v2/binary_dict_page.h |  2 +
 .../rowset/segment_v2/binary_plain_page.h     | 29 ++++---
 .../olap/rowset/segment_v2/bitshuffle_page.h  | 11 ++-
 .../segment_v2/inverted_index_reader.cpp      | 10 +--
 .../rowset/segment_v2/segment_iterator.cpp    | 55 ++++++------
 .../olap/rowset/segment_v2/segment_iterator.h |  4 +-
 .../partitioned_hash_join_probe_operator.cpp  |  8 +-
 .../partitioned_hash_join_sink_operator.cpp   |  8 +-
 .../aggregate_functions/aggregate_function.h  | 14 ++--
 .../aggregate_function_foreach.h              |  4 +-
 .../aggregate_function_null.h                 |  8 +-
 be/src/vec/columns/column_dictionary.h        |  9 +-
 be/src/vec/columns/predicate_column.h         |  9 +-
 be/src/vec/common/strong_typedef.h            | 82 ------------------
 be/src/vec/data_types/data_type_hll.cpp       |  8 +-
 .../serde/data_type_struct_serde.cpp          |  6 +-
 .../exec/format/parquet/parquet_thrift_util.h | 12 +--
 .../exec/format/parquet/vparquet_reader.cpp   | 12 +--
 be/src/vec/exprs/vectorized_agg_fn.cpp        |  5 +-
 .../array/function_array_constructor.cpp      |  6 +-
 .../functions/function_bitmap_variadic.cpp    | 14 ++--
 be/src/vec/functions/function_coalesce.cpp    |  6 +-
 be/src/vec/functions/function_encryption.cpp  |  2 +-
 be/src/vec/functions/function_string.h        | 58 ++++++-------
 be/src/vec/functions/least_greast.cpp         |  2 +-
 be/test/io/fs/buffered_reader_test.cpp        |  4 +-
 be/test/olap/block_column_predicate_test.cpp  | 22 ++---
 .../segment_v2/block_bloom_filter_test.cpp    |  4 +-
 .../vec/exec/parquet/parquet_thrift_test.cpp  | 14 ++--
 be/test/vec/function/function_string_test.cpp | 84 +++++++++----------
 48 files changed, 399 insertions(+), 469 deletions(-)
 delete mode 100644 be/src/vec/common/strong_typedef.h

diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index 28b3fee115aab0..7016fc8a9dd817 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -282,8 +282,7 @@ if (COMPILER_CLANG)
                         -Wunused-member-function
                         -Wunused-macros
                         -Wconversion)
-    add_compile_options(-Wno-vla-extension
-                        -Wno-gnu-statement-expression
+    add_compile_options( -Wno-gnu-statement-expression
                         -Wno-implicit-float-conversion
                         -Wno-implicit-int-conversion
                         -Wno-sign-conversion
diff --git a/be/src/exec/schema_scanner/schema_charsets_scanner.cpp b/be/src/exec/schema_scanner/schema_charsets_scanner.cpp
index 9bd7ad7919cdc8..534f045341b7e3 100644
--- a/be/src/exec/schema_scanner/schema_charsets_scanner.cpp
+++ b/be/src/exec/schema_scanner/schema_charsets_scanner.cpp
@@ -69,38 +69,38 @@ Status SchemaCharsetsScanner::_fill_block_impl(vectorized::Block* block) {
 
     // variables names
     {
-        StringRef strs[row_num];
+        std::vector<StringRef> strs(row_num);
         for (int i = 0; i < row_num; ++i) {
             strs[i] = StringRef(_s_charsets[i].charset, strlen(_s_charsets[i].charset));
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 0, datas));
     }
     // DEFAULT_COLLATE_NAME
     {
-        StringRef strs[row_num];
+        std::vector<StringRef> strs(row_num);
         for (int i = 0; i < row_num; ++i) {
             strs[i] = StringRef(_s_charsets[i].default_collation,
                                 strlen(_s_charsets[i].default_collation));
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 1, datas));
     }
     // DESCRIPTION
     {
-        StringRef strs[row_num];
+        std::vector<StringRef> strs(row_num);
         for (int i = 0; i < row_num; ++i) {
             strs[i] = StringRef(_s_charsets[i].description, strlen(_s_charsets[i].description));
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 2, datas));
     }
     // maxlen
     {
-        int64_t srcs[row_num];
+        std::vector<int64_t> srcs(row_num);
         for (int i = 0; i < row_num; ++i) {
             srcs[i] = _s_charsets[i].maxlen;
-            datas[i] = srcs + i;
+            datas[i] = srcs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 3, datas));
     }
diff --git a/be/src/exec/schema_scanner/schema_collations_scanner.cpp b/be/src/exec/schema_scanner/schema_collations_scanner.cpp
index 812a8cff18e997..9d50b5216303d8 100644
--- a/be/src/exec/schema_scanner/schema_collations_scanner.cpp
+++ b/be/src/exec/schema_scanner/schema_collations_scanner.cpp
@@ -72,55 +72,55 @@ Status SchemaCollationsScanner::_fill_block_impl(vectorized::Block* block) {
 
     // COLLATION_NAME
     {
-        StringRef strs[row_num];
+        std::vector<StringRef> strs(row_num);
         for (int i = 0; i < row_num; ++i) {
             strs[i] = StringRef(_s_collations[i].name, strlen(_s_collations[i].name));
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 0, datas));
     }
     // charset
     {
-        StringRef strs[row_num];
+        std::vector<StringRef> strs(row_num);
         for (int i = 0; i < row_num; ++i) {
             strs[i] = StringRef(_s_collations[i].charset, strlen(_s_collations[i].charset));
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 1, datas));
     }
     // id
     {
-        int64_t srcs[row_num];
+        std::vector<int64_t> srcs(row_num);
         for (int i = 0; i < row_num; ++i) {
             srcs[i] = _s_collations[i].id;
-            datas[i] = srcs + i;
+            datas[i] = srcs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 2, datas));
     }
     // is_default
     {
-        StringRef strs[row_num];
+        std::vector<StringRef> strs(row_num);
         for (int i = 0; i < row_num; ++i) {
             strs[i] = StringRef(_s_collations[i].is_default, strlen(_s_collations[i].is_default));
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 3, datas));
     }
     // IS_COMPILED
     {
-        StringRef strs[row_num];
+        std::vector<StringRef> strs(row_num);
         for (int i = 0; i < row_num; ++i) {
             strs[i] = StringRef(_s_collations[i].is_compile, strlen(_s_collations[i].is_compile));
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 4, datas));
     }
     // sortlen
     {
-        int64_t srcs[row_num];
+        std::vector<int64_t> srcs(row_num);
         for (int i = 0; i < row_num; ++i) {
             srcs[i] = _s_collations[i].sortlen;
-            datas[i] = srcs + i;
+            datas[i] = srcs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 5, datas));
     }
diff --git a/be/src/exec/schema_scanner/schema_columns_scanner.cpp b/be/src/exec/schema_scanner/schema_columns_scanner.cpp
index 56a6c5f256e0f4..6d16ecbd31dbcb 100644
--- a/be/src/exec/schema_scanner/schema_columns_scanner.cpp
+++ b/be/src/exec/schema_scanner/schema_columns_scanner.cpp
@@ -398,7 +398,7 @@ Status SchemaColumnsScanner::_fill_block_impl(vectorized::Block* block) {
     }
     // TABLE_NAME
     {
-        StringRef strs[columns_num];
+        std::vector<StringRef> strs(columns_num);
         int offset_index = 0;
         int cur_table_index = _table_index - _desc_result.tables_offset.size();
 
@@ -409,23 +409,23 @@ Status SchemaColumnsScanner::_fill_block_impl(vectorized::Block* block) {
             }
             strs[i] = StringRef(_table_result.tables[cur_table_index].c_str(),
                                 _table_result.tables[cur_table_index].length());
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 2, datas));
     }
     // COLUMN_NAME
     {
-        StringRef strs[columns_num];
+        std::vector<StringRef> strs(columns_num);
         for (int i = 0; i < columns_num; ++i) {
             strs[i] = StringRef(_desc_result.columns[i].columnDesc.columnName.c_str(),
                                 _desc_result.columns[i].columnDesc.columnName.length());
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 3, datas));
     }
     // ORDINAL_POSITION
     {
-        int64_t srcs[columns_num];
+        std::vector<int64_t> srcs(columns_num);
         int offset_index = 0;
         int columns_index = 1;
         for (int i = 0; i < columns_num; ++i) {
@@ -434,7 +434,7 @@ Status SchemaColumnsScanner::_fill_block_impl(vectorized::Block* block) {
                 columns_index = 1;
             }
             srcs[i] = columns_index++;
-            datas[i] = srcs + i;
+            datas[i] = srcs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 4, datas));
     }
@@ -459,26 +459,26 @@ Status SchemaColumnsScanner::_fill_block_impl(vectorized::Block* block) {
     }
     // DATA_TYPE
     {
-        std::string buffers[columns_num];
-        StringRef strs[columns_num];
+        std::vector<std::string> buffers(columns_num);
+        std::vector<StringRef> strs(columns_num);
         for (int i = 0; i < columns_num; ++i) {
             buffers[i] = _to_mysql_data_type_string(_desc_result.columns[i].columnDesc);
             strs[i] = StringRef(buffers[i].c_str(), buffers[i].length());
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 7, datas));
     }
     // CHARACTER_MAXIMUM_LENGTH
     // For string columns, the maximum length in characters.
     {
-        int64_t srcs[columns_num];
+        std::vector<int64_t> srcs(columns_num);
         for (int i = 0; i < columns_num; ++i) {
             int data_type = _desc_result.columns[i].columnDesc.columnType;
             if (data_type == TPrimitiveType::VARCHAR || data_type == TPrimitiveType::CHAR ||
                 data_type == TPrimitiveType::STRING) {
                 if (_desc_result.columns[i].columnDesc.__isset.columnLength) {
                     srcs[i] = _desc_result.columns[i].columnDesc.columnLength;
-                    datas[i] = srcs + i;
+                    datas[i] = srcs.data() + i;
                 } else {
                     datas[i] = nullptr;
                 }
@@ -491,14 +491,14 @@ Status SchemaColumnsScanner::_fill_block_impl(vectorized::Block* block) {
     // CHARACTER_OCTET_LENGTH
     // For string columns, the maximum length in bytes.
     {
-        int64_t srcs[columns_num];
+        std::vector<int64_t> srcs(columns_num);
         for (int i = 0; i < columns_num; ++i) {
             int data_type = _desc_result.columns[i].columnDesc.columnType;
             if (data_type == TPrimitiveType::VARCHAR || data_type == TPrimitiveType::CHAR ||
                 data_type == TPrimitiveType::STRING) {
                 if (_desc_result.columns[i].columnDesc.__isset.columnLength) {
                     srcs[i] = _desc_result.columns[i].columnDesc.columnLength * 4L;
-                    datas[i] = srcs + i;
+                    datas[i] = srcs.data() + i;
                 } else {
                     datas[i] = nullptr;
                 }
@@ -510,11 +510,11 @@ Status SchemaColumnsScanner::_fill_block_impl(vectorized::Block* block) {
     }
     // NUMERIC_PRECISION
     {
-        int64_t srcs[columns_num];
+        std::vector<int64_t> srcs(columns_num);
         for (int i = 0; i < columns_num; ++i) {
             if (_desc_result.columns[i].columnDesc.__isset.columnPrecision) {
                 srcs[i] = _desc_result.columns[i].columnDesc.columnPrecision;
-                datas[i] = srcs + i;
+                datas[i] = srcs.data() + i;
             } else {
                 datas[i] = nullptr;
             }
@@ -523,11 +523,11 @@ Status SchemaColumnsScanner::_fill_block_impl(vectorized::Block* block) {
     }
     // NUMERIC_SCALE
     {
-        int64_t srcs[columns_num];
+        std::vector<int64_t> srcs(columns_num);
         for (int i = 0; i < columns_num; ++i) {
             if (_desc_result.columns[i].columnDesc.__isset.columnScale) {
                 srcs[i] = _desc_result.columns[i].columnDesc.columnScale;
-                datas[i] = srcs + i;
+                datas[i] = srcs.data() + i;
             } else {
                 datas[i] = nullptr;
             }
@@ -542,24 +542,24 @@ Status SchemaColumnsScanner::_fill_block_impl(vectorized::Block* block) {
     { RETURN_IF_ERROR(fill_dest_column_for_range(block, 14, null_datas)); }
     // COLUMN_TYPE
     {
-        std::string buffers[columns_num];
-        StringRef strs[columns_num];
+        std::vector<std::string> buffers(columns_num);
+        std::vector<StringRef> strs(columns_num);
         for (int i = 0; i < columns_num; ++i) {
             buffers[i] = _type_to_string(_desc_result.columns[i].columnDesc);
             strs[i] = StringRef(buffers[i].c_str(), buffers[i].length());
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 15, datas));
     }
     // COLUMN_KEY
     {
         StringRef str = StringRef("", 0);
-        StringRef strs[columns_num];
+        std::vector<StringRef> strs(columns_num);
         for (int i = 0; i < columns_num; ++i) {
             if (_desc_result.columns[i].columnDesc.__isset.columnKey) {
                 strs[i] = StringRef(_desc_result.columns[i].columnDesc.columnKey.c_str(),
                                     _desc_result.columns[i].columnDesc.columnKey.length());
-                datas[i] = strs + i;
+                datas[i] = strs.data() + i;
             } else {
                 datas[i] = &str;
             }
@@ -580,21 +580,21 @@ Status SchemaColumnsScanner::_fill_block_impl(vectorized::Block* block) {
     }
     // COLUMN_COMMENT
     {
-        StringRef strs[columns_num];
+        std::vector<StringRef> strs(columns_num);
         for (int i = 0; i < columns_num; ++i) {
             strs[i] = StringRef(_desc_result.columns[i].comment.c_str(),
                                 _desc_result.columns[i].comment.length());
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 19, datas));
     }
     // COLUMN_SIZE
     {
-        int64_t srcs[columns_num];
+        std::vector<int64_t> srcs(columns_num);
         for (int i = 0; i < columns_num; ++i) {
             if (_desc_result.columns[i].columnDesc.__isset.columnLength) {
                 srcs[i] = _desc_result.columns[i].columnDesc.columnLength;
-                datas[i] = srcs + i;
+                datas[i] = srcs.data() + i;
             } else {
                 datas[i] = nullptr;
             }
@@ -603,11 +603,11 @@ Status SchemaColumnsScanner::_fill_block_impl(vectorized::Block* block) {
     }
     // DECIMAL_DIGITS
     {
-        int64_t srcs[columns_num];
+        std::vector<int64_t> srcs(columns_num);
         for (int i = 0; i < columns_num; ++i) {
             if (_desc_result.columns[i].columnDesc.__isset.columnScale) {
                 srcs[i] = _desc_result.columns[i].columnDesc.columnScale;
-                datas[i] = srcs + i;
+                datas[i] = srcs.data() + i;
             } else {
                 datas[i] = nullptr;
             }
diff --git a/be/src/exec/schema_scanner/schema_metadata_name_ids_scanner.cpp b/be/src/exec/schema_scanner/schema_metadata_name_ids_scanner.cpp
index ef7b2b69c1e710..928567a2e4a99e 100644
--- a/be/src/exec/schema_scanner/schema_metadata_name_ids_scanner.cpp
+++ b/be/src/exec/schema_scanner/schema_metadata_name_ids_scanner.cpp
@@ -138,12 +138,12 @@ Status SchemaMetadataNameIdsScanner::_fill_block_impl(vectorized::Block* block)
 
     // catalog_id
     {
-        int64_t srcs[table_num];
+        std::vector<int64_t> srcs(table_num);
         if (_db_result.__isset.catalog_ids) {
             int64_t id = _db_result.catalog_ids[_db_index - 1];
             for (int i = 0; i < table_num; ++i) {
                 srcs[i] = id;
-                datas[i] = srcs + i;
+                datas[i] = srcs.data() + i;
             }
             RETURN_IF_ERROR(fill_dest_column_for_range(block, 0, datas));
         } else {
@@ -167,12 +167,12 @@ Status SchemaMetadataNameIdsScanner::_fill_block_impl(vectorized::Block* block)
 
     // database_id
     {
-        int64_t srcs[table_num];
+        std::vector<int64_t> srcs(table_num);
         if (_db_result.__isset.db_ids) {
             int64_t id = _db_result.db_ids[_db_index - 1];
             for (int i = 0; i < table_num; ++i) {
                 srcs[i] = id;
-                datas[i] = srcs + i;
+                datas[i] = srcs.data() + i;
             }
             RETURN_IF_ERROR(fill_dest_column_for_range(block, 2, datas));
         } else {
@@ -195,11 +195,11 @@ Status SchemaMetadataNameIdsScanner::_fill_block_impl(vectorized::Block* block)
     }
     //  table_id
     {
-        int64_t srcs[table_num];
+        std::vector<int64_t> srcs(table_num);
         for (int i = 0; i < table_num; ++i) {
             if (_table_result.tables[i].__isset.id) {
                 srcs[i] = _table_result.tables[i].id;
-                datas[i] = srcs + i;
+                datas[i] = srcs.data() + i;
             } else {
                 datas[i] = nullptr;
             }
@@ -209,12 +209,12 @@ Status SchemaMetadataNameIdsScanner::_fill_block_impl(vectorized::Block* block)
 
     //table_name
     {
-        StringRef strs[table_num];
+        std::vector<StringRef> strs(table_num);
         for (int i = 0; i < table_num; ++i) {
             if (_table_result.tables[i].__isset.name) {
                 const std::string* src = &_table_result.tables[i].name;
                 strs[i] = StringRef(src->c_str(), src->size());
-                datas[i] = strs + i;
+                datas[i] = strs.data() + i;
             } else {
                 datas[i] = nullptr;
             }
diff --git a/be/src/exec/schema_scanner/schema_rowsets_scanner.cpp b/be/src/exec/schema_scanner/schema_rowsets_scanner.cpp
index 02df46a2af1119..b03a30b2d92bdc 100644
--- a/be/src/exec/schema_scanner/schema_rowsets_scanner.cpp
+++ b/be/src/exec/schema_scanner/schema_rowsets_scanner.cpp
@@ -128,114 +128,114 @@ Status SchemaRowsetsScanner::_fill_block_impl(vectorized::Block* block) {
     }
     // ROWSET_ID
     {
-        std::string rowset_ids[fill_rowsets_num];
-        StringRef strs[fill_rowsets_num];
+        std::vector<std::string> rowset_ids(fill_rowsets_num);
+        std::vector<StringRef> strs(fill_rowsets_num);
         for (int i = fill_idx_begin; i < fill_idx_end; ++i) {
             RowsetSharedPtr rowset = rowsets_[i];
             rowset_ids[i - fill_idx_begin] = rowset->rowset_id().to_string();
             strs[i - fill_idx_begin] = StringRef(rowset_ids[i - fill_idx_begin].c_str(),
                                                  rowset_ids[i - fill_idx_begin].size());
-            datas[i - fill_idx_begin] = strs + i - fill_idx_begin;
+            datas[i - fill_idx_begin] = strs.data() + i - fill_idx_begin;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 1, datas));
     }
     // TABLET_ID
     {
-        int64_t srcs[fill_rowsets_num];
+        std::vector<int64_t> srcs(fill_rowsets_num);
         for (int i = fill_idx_begin; i < fill_idx_end; ++i) {
             RowsetSharedPtr rowset = rowsets_[i];
             srcs[i - fill_idx_begin] = rowset->rowset_meta()->tablet_id();
-            datas[i - fill_idx_begin] = srcs + i - fill_idx_begin;
+            datas[i - fill_idx_begin] = srcs.data() + i - fill_idx_begin;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 2, datas));
     }
     // ROWSET_NUM_ROWS
     {
-        int64_t srcs[fill_rowsets_num];
+        std::vector<int64_t> srcs(fill_rowsets_num);
         for (int i = fill_idx_begin; i < fill_idx_end; ++i) {
             RowsetSharedPtr rowset = rowsets_[i];
             srcs[i - fill_idx_begin] = rowset->num_rows();
-            datas[i - fill_idx_begin] = srcs + i - fill_idx_begin;
+            datas[i - fill_idx_begin] = srcs.data() + i - fill_idx_begin;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 3, datas));
     }
     // TXN_ID
     {
-        int64_t srcs[fill_rowsets_num];
+        std::vector<int64_t> srcs(fill_rowsets_num);
         for (int i = fill_idx_begin; i < fill_idx_end; ++i) {
             RowsetSharedPtr rowset = rowsets_[i];
             srcs[i - fill_idx_begin] = rowset->txn_id();
-            datas[i - fill_idx_begin] = srcs + i - fill_idx_begin;
+            datas[i - fill_idx_begin] = srcs.data() + i - fill_idx_begin;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 4, datas));
     }
     // NUM_SEGMENTS
     {
-        int64_t srcs[fill_rowsets_num];
+        std::vector<int64_t> srcs(fill_rowsets_num);
         for (int i = fill_idx_begin; i < fill_idx_end; ++i) {
             RowsetSharedPtr rowset = rowsets_[i];
             srcs[i - fill_idx_begin] = rowset->num_segments();
-            datas[i - fill_idx_begin] = srcs + i - fill_idx_begin;
+            datas[i - fill_idx_begin] = srcs.data() + i - fill_idx_begin;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 5, datas));
     }
     // START_VERSION
     {
-        int64_t srcs[fill_rowsets_num];
+        std::vector<int64_t> srcs(fill_rowsets_num);
         for (int i = fill_idx_begin; i < fill_idx_end; ++i) {
             RowsetSharedPtr rowset = rowsets_[i];
             srcs[i - fill_idx_begin] = rowset->start_version();
-            datas[i - fill_idx_begin] = srcs + i - fill_idx_begin;
+            datas[i - fill_idx_begin] = srcs.data() + i - fill_idx_begin;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 6, datas));
     }
     // END_VERSION
     {
-        int64_t srcs[fill_rowsets_num];
+        std::vector<int64_t> srcs(fill_rowsets_num);
         for (int i = fill_idx_begin; i < fill_idx_end; ++i) {
             RowsetSharedPtr rowset = rowsets_[i];
             srcs[i - fill_idx_begin] = rowset->end_version();
-            datas[i - fill_idx_begin] = srcs + i - fill_idx_begin;
+            datas[i - fill_idx_begin] = srcs.data() + i - fill_idx_begin;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 7, datas));
     }
     // INDEX_DISK_SIZE
     {
-        size_t srcs[fill_rowsets_num];
+        std::vector<int64_t> srcs(fill_rowsets_num);
         for (int i = fill_idx_begin; i < fill_idx_end; ++i) {
             RowsetSharedPtr rowset = rowsets_[i];
             srcs[i - fill_idx_begin] = rowset->index_disk_size();
-            datas[i - fill_idx_begin] = srcs + i - fill_idx_begin;
+            datas[i - fill_idx_begin] = srcs.data() + i - fill_idx_begin;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 8, datas));
     }
     // DATA_DISK_SIZE
     {
-        size_t srcs[fill_rowsets_num];
+        std::vector<int64_t> srcs(fill_rowsets_num);
         for (int i = fill_idx_begin; i < fill_idx_end; ++i) {
             RowsetSharedPtr rowset = rowsets_[i];
             srcs[i - fill_idx_begin] = rowset->data_disk_size();
-            datas[i - fill_idx_begin] = srcs + i - fill_idx_begin;
+            datas[i - fill_idx_begin] = srcs.data() + i - fill_idx_begin;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 9, datas));
     }
     // CREATION_TIME
     {
-        size_t srcs[fill_rowsets_num];
+        std::vector<int64_t> srcs(fill_rowsets_num);
         for (int i = fill_idx_begin; i < fill_idx_end; ++i) {
             RowsetSharedPtr rowset = rowsets_[i];
             srcs[i - fill_idx_begin] = rowset->creation_time();
-            datas[i - fill_idx_begin] = srcs + i - fill_idx_begin;
+            datas[i - fill_idx_begin] = srcs.data() + i - fill_idx_begin;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 10, datas));
     }
     // NEWEST_WRITE_TIMESTAMP
     {
-        size_t srcs[fill_rowsets_num];
+        std::vector<int64_t> srcs(fill_rowsets_num);
         for (int i = fill_idx_begin; i < fill_idx_end; ++i) {
             RowsetSharedPtr rowset = rowsets_[i];
             srcs[i - fill_idx_begin] = rowset->newest_write_timestamp();
-            datas[i - fill_idx_begin] = srcs + i - fill_idx_begin;
+            datas[i - fill_idx_begin] = srcs.data() + i - fill_idx_begin;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 11, datas));
     }
diff --git a/be/src/exec/schema_scanner/schema_schema_privileges_scanner.cpp b/be/src/exec/schema_scanner/schema_schema_privileges_scanner.cpp
index 09c470ff50a3a7..9789b6c72d6f30 100644
--- a/be/src/exec/schema_scanner/schema_schema_privileges_scanner.cpp
+++ b/be/src/exec/schema_scanner/schema_schema_privileges_scanner.cpp
@@ -104,11 +104,11 @@ Status SchemaSchemaPrivilegesScanner::_fill_block_impl(vectorized::Block* block)
 
     // grantee
     {
-        StringRef strs[privileges_num];
+        std::vector<StringRef> strs(privileges_num);
         for (int i = 0; i < privileges_num; ++i) {
             const TPrivilegeStatus& priv_status = _priv_result.privileges[i];
             strs[i] = StringRef(priv_status.grantee.c_str(), priv_status.grantee.size());
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 0, datas));
     }
@@ -124,32 +124,32 @@ Status SchemaSchemaPrivilegesScanner::_fill_block_impl(vectorized::Block* block)
     }
     // schema
     {
-        StringRef strs[privileges_num];
+        std::vector<StringRef> strs(privileges_num);
         for (int i = 0; i < privileges_num; ++i) {
             const TPrivilegeStatus& priv_status = _priv_result.privileges[i];
             strs[i] = StringRef(priv_status.schema.c_str(), priv_status.schema.size());
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 2, datas));
     }
     // privilege type
     {
-        StringRef strs[privileges_num];
+        std::vector<StringRef> strs(privileges_num);
         for (int i = 0; i < privileges_num; ++i) {
             const TPrivilegeStatus& priv_status = _priv_result.privileges[i];
             strs[i] = StringRef(priv_status.privilege_type.c_str(),
                                 priv_status.privilege_type.size());
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 3, datas));
     }
     // is grantable
     {
-        StringRef strs[privileges_num];
+        std::vector<StringRef> strs(privileges_num);
         for (int i = 0; i < privileges_num; ++i) {
             const TPrivilegeStatus& priv_status = _priv_result.privileges[i];
             strs[i] = StringRef(priv_status.is_grantable.c_str(), priv_status.is_grantable.size());
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 4, datas));
     }
diff --git a/be/src/exec/schema_scanner/schema_schemata_scanner.cpp b/be/src/exec/schema_scanner/schema_schemata_scanner.cpp
index e09817ca31044b..1854e4f2b54af1 100644
--- a/be/src/exec/schema_scanner/schema_schemata_scanner.cpp
+++ b/be/src/exec/schema_scanner/schema_schemata_scanner.cpp
@@ -107,22 +107,22 @@ Status SchemaSchemataScanner::_fill_block_impl(vectorized::Block* block) {
         if (!_db_result.__isset.catalogs) {
             RETURN_IF_ERROR(fill_dest_column_for_range(block, 0, null_datas));
         } else {
-            StringRef strs[dbs_num];
+            std::vector<StringRef> strs(dbs_num);
             for (int i = 0; i < dbs_num; ++i) {
                 strs[i] = StringRef(_db_result.catalogs[i].c_str(), _db_result.catalogs[i].size());
-                datas[i] = strs + i;
+                datas[i] = strs.data() + i;
             }
             RETURN_IF_ERROR(fill_dest_column_for_range(block, 0, datas));
         }
     }
     // schema
     {
-        std::string db_names[dbs_num];
-        StringRef strs[dbs_num];
+        std::vector<std::string> db_names(dbs_num);
+        std::vector<StringRef> strs(dbs_num);
         for (int i = 0; i < dbs_num; ++i) {
             db_names[i] = SchemaHelper::extract_db_name(_db_result.dbs[i]);
             strs[i] = StringRef(db_names[i].c_str(), db_names[i].size());
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 1, datas));
     }
diff --git a/be/src/exec/schema_scanner/schema_table_privileges_scanner.cpp b/be/src/exec/schema_scanner/schema_table_privileges_scanner.cpp
index 41a3faf7c5a8a4..fe8aa725b73b80 100644
--- a/be/src/exec/schema_scanner/schema_table_privileges_scanner.cpp
+++ b/be/src/exec/schema_scanner/schema_table_privileges_scanner.cpp
@@ -93,7 +93,7 @@ Status SchemaTablePrivilegesScanner::get_next_block(vectorized::Block* block, bo
     }
 
     *eos = true;
-    if (!_priv_result.privileges.size()) {
+    if (_priv_result.privileges.empty()) {
         return Status::OK();
     }
     return _fill_block_impl(block);
@@ -106,11 +106,11 @@ Status SchemaTablePrivilegesScanner::_fill_block_impl(vectorized::Block* block)
 
     // grantee
     {
-        StringRef strs[privileges_num];
+        std::vector<StringRef> strs(privileges_num);
         for (int i = 0; i < privileges_num; ++i) {
             const TPrivilegeStatus& priv_status = _priv_result.privileges[i];
             strs[i] = StringRef(priv_status.grantee.c_str(), priv_status.grantee.size());
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 0, datas));
     }
@@ -126,42 +126,42 @@ Status SchemaTablePrivilegesScanner::_fill_block_impl(vectorized::Block* block)
     }
     // schema
     {
-        StringRef strs[privileges_num];
+        std::vector<StringRef> strs(privileges_num);
         for (int i = 0; i < privileges_num; ++i) {
             const TPrivilegeStatus& priv_status = _priv_result.privileges[i];
             strs[i] = StringRef(priv_status.schema.c_str(), priv_status.schema.size());
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 2, datas));
     }
     // table name
     {
-        StringRef strs[privileges_num];
+        std::vector<StringRef> strs(privileges_num);
         for (int i = 0; i < privileges_num; ++i) {
             const TPrivilegeStatus& priv_status = _priv_result.privileges[i];
             strs[i] = StringRef(priv_status.table_name.c_str(), priv_status.table_name.size());
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 3, datas));
     }
     // privilege type
     {
-        StringRef strs[privileges_num];
+        std::vector<StringRef> strs(privileges_num);
         for (int i = 0; i < privileges_num; ++i) {
             const TPrivilegeStatus& priv_status = _priv_result.privileges[i];
             strs[i] = StringRef(priv_status.privilege_type.c_str(),
                                 priv_status.privilege_type.size());
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 4, datas));
     }
     // is grantable
     {
-        StringRef strs[privileges_num];
+        std::vector<StringRef> strs(privileges_num);
         for (int i = 0; i < privileges_num; ++i) {
             const TPrivilegeStatus& priv_status = _priv_result.privileges[i];
             strs[i] = StringRef(priv_status.is_grantable.c_str(), priv_status.is_grantable.size());
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 5, datas));
     }
diff --git a/be/src/exec/schema_scanner/schema_tables_scanner.cpp b/be/src/exec/schema_scanner/schema_tables_scanner.cpp
index 375ceb2c470f7c..093acf9cecbcb1 100644
--- a/be/src/exec/schema_scanner/schema_tables_scanner.cpp
+++ b/be/src/exec/schema_scanner/schema_tables_scanner.cpp
@@ -166,33 +166,33 @@ Status SchemaTablesScanner::_fill_block_impl(vectorized::Block* block) {
     }
     // name
     {
-        StringRef strs[table_num];
+        std::vector<StringRef> strs(table_num);
         for (int i = 0; i < table_num; ++i) {
             const std::string* src = &_table_result.tables[i].name;
             strs[i] = StringRef(src->c_str(), src->size());
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 2, datas));
     }
     // type
     {
-        StringRef strs[table_num];
+        std::vector<StringRef> strs(table_num);
         for (int i = 0; i < table_num; ++i) {
             const std::string* src = &_table_result.tables[i].type;
             strs[i] = StringRef(src->c_str(), src->size());
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 3, datas));
     }
     // engine
     {
-        StringRef strs[table_num];
+        std::vector<StringRef> strs(table_num);
         for (int i = 0; i < table_num; ++i) {
             const TTableStatus& tbl_status = _table_result.tables[i];
             if (tbl_status.__isset.engine) {
                 const std::string* src = &tbl_status.engine;
                 strs[i] = StringRef(src->c_str(), src->size());
-                datas[i] = strs + i;
+                datas[i] = strs.data() + i;
             } else {
                 datas[i] = nullptr;
             }
@@ -205,12 +205,12 @@ Status SchemaTablesScanner::_fill_block_impl(vectorized::Block* block) {
     { RETURN_IF_ERROR(fill_dest_column_for_range(block, 6, null_datas)); }
     // rows
     {
-        int64_t srcs[table_num];
+        std::vector<int64_t> srcs(table_num);
         for (int i = 0; i < table_num; ++i) {
             const TTableStatus& tbl_status = _table_result.tables[i];
             if (tbl_status.__isset.rows) {
                 srcs[i] = tbl_status.rows;
-                datas[i] = srcs + i;
+                datas[i] = srcs.data() + i;
             } else {
                 datas[i] = nullptr;
             }
@@ -219,12 +219,12 @@ Status SchemaTablesScanner::_fill_block_impl(vectorized::Block* block) {
     }
     // avg_row_length
     {
-        int64_t srcs[table_num];
+        std::vector<int64_t> srcs(table_num);
         for (int i = 0; i < table_num; ++i) {
             const TTableStatus& tbl_status = _table_result.tables[i];
             if (tbl_status.__isset.avg_row_length) {
                 srcs[i] = tbl_status.avg_row_length;
-                datas[i] = srcs + i;
+                datas[i] = srcs.data() + i;
             } else {
                 datas[i] = nullptr;
             }
@@ -233,12 +233,12 @@ Status SchemaTablesScanner::_fill_block_impl(vectorized::Block* block) {
     }
     // data_length
     {
-        int64_t srcs[table_num];
+        std::vector<int64_t> srcs(table_num);
         for (int i = 0; i < table_num; ++i) {
             const TTableStatus& tbl_status = _table_result.tables[i];
             if (tbl_status.__isset.avg_row_length) {
                 srcs[i] = tbl_status.data_length;
-                datas[i] = srcs + i;
+                datas[i] = srcs.data() + i;
             } else {
                 datas[i] = nullptr;
             }
@@ -255,7 +255,7 @@ Status SchemaTablesScanner::_fill_block_impl(vectorized::Block* block) {
     { RETURN_IF_ERROR(fill_dest_column_for_range(block, 13, null_datas)); }
     // creation_time
     {
-        VecDateTimeValue srcs[table_num];
+        std::vector<VecDateTimeValue> srcs(table_num);
         for (int i = 0; i < table_num; ++i) {
             const TTableStatus& tbl_status = _table_result.tables[i];
             if (tbl_status.__isset.create_time) {
@@ -264,7 +264,7 @@ Status SchemaTablesScanner::_fill_block_impl(vectorized::Block* block) {
                     datas[i] = nullptr;
                 } else {
                     srcs[i].from_unixtime(create_time, TimezoneUtils::default_time_zone);
-                    datas[i] = srcs + i;
+                    datas[i] = srcs.data() + i;
                 }
             } else {
                 datas[i] = nullptr;
@@ -274,7 +274,7 @@ Status SchemaTablesScanner::_fill_block_impl(vectorized::Block* block) {
     }
     // update_time
     {
-        VecDateTimeValue srcs[table_num];
+        std::vector<VecDateTimeValue> srcs(table_num);
         for (int i = 0; i < table_num; ++i) {
             const TTableStatus& tbl_status = _table_result.tables[i];
             if (tbl_status.__isset.update_time) {
@@ -283,7 +283,7 @@ Status SchemaTablesScanner::_fill_block_impl(vectorized::Block* block) {
                     datas[i] = nullptr;
                 } else {
                     srcs[i].from_unixtime(update_time, TimezoneUtils::default_time_zone);
-                    datas[i] = srcs + i;
+                    datas[i] = srcs.data() + i;
                 }
             } else {
                 datas[i] = nullptr;
@@ -293,7 +293,7 @@ Status SchemaTablesScanner::_fill_block_impl(vectorized::Block* block) {
     }
     // check_time
     {
-        VecDateTimeValue srcs[table_num];
+        std::vector<VecDateTimeValue> srcs(table_num);
         for (int i = 0; i < table_num; ++i) {
             const TTableStatus& tbl_status = _table_result.tables[i];
             if (tbl_status.__isset.last_check_time) {
@@ -302,7 +302,7 @@ Status SchemaTablesScanner::_fill_block_impl(vectorized::Block* block) {
                     datas[i] = nullptr;
                 } else {
                     srcs[i].from_unixtime(check_time, TimezoneUtils::default_time_zone);
-                    datas[i] = srcs + i;
+                    datas[i] = srcs.data() + i;
                 }
             } else {
                 datas[i] = nullptr;
@@ -312,13 +312,13 @@ Status SchemaTablesScanner::_fill_block_impl(vectorized::Block* block) {
     }
     // collation
     {
-        StringRef strs[table_num];
+        std::vector<StringRef> strs(table_num);
         for (int i = 0; i < table_num; ++i) {
             const TTableStatus& tbl_status = _table_result.tables[i];
             if (tbl_status.__isset.collation) {
                 const std::string* src = &tbl_status.collation;
                 strs[i] = StringRef(src->c_str(), src->size());
-                datas[i] = strs + i;
+                datas[i] = strs.data() + i;
             } else {
                 datas[i] = nullptr;
             }
@@ -331,11 +331,11 @@ Status SchemaTablesScanner::_fill_block_impl(vectorized::Block* block) {
     { RETURN_IF_ERROR(fill_dest_column_for_range(block, 19, null_datas)); }
     // create_comment
     {
-        StringRef strs[table_num];
+        std::vector<StringRef> strs(table_num);
         for (int i = 0; i < table_num; ++i) {
             const std::string* src = &_table_result.tables[i].comment;
             strs[i] = StringRef(src->c_str(), src->size());
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 20, datas));
     }
diff --git a/be/src/exec/schema_scanner/schema_user_privileges_scanner.cpp b/be/src/exec/schema_scanner/schema_user_privileges_scanner.cpp
index b636ff65fd73cc..6a12d846fbd560 100644
--- a/be/src/exec/schema_scanner/schema_user_privileges_scanner.cpp
+++ b/be/src/exec/schema_scanner/schema_user_privileges_scanner.cpp
@@ -103,11 +103,11 @@ Status SchemaUserPrivilegesScanner::_fill_block_impl(vectorized::Block* block) {
 
     // grantee
     {
-        StringRef strs[privileges_num];
+        std::vector<StringRef> strs(privileges_num);
         for (int i = 0; i < privileges_num; ++i) {
             const TPrivilegeStatus& priv_status = _priv_result.privileges[i];
             strs[i] = StringRef(priv_status.grantee.c_str(), priv_status.grantee.size());
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 0, datas));
     }
@@ -123,22 +123,22 @@ Status SchemaUserPrivilegesScanner::_fill_block_impl(vectorized::Block* block) {
     }
     // privilege type
     {
-        StringRef strs[privileges_num];
+        std::vector<StringRef> strs(privileges_num);
         for (int i = 0; i < privileges_num; ++i) {
             const TPrivilegeStatus& priv_status = _priv_result.privileges[i];
             strs[i] = StringRef(priv_status.privilege_type.c_str(),
                                 priv_status.privilege_type.size());
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 2, datas));
     }
     // is grantable
     {
-        StringRef strs[privileges_num];
+        std::vector<StringRef> strs(privileges_num);
         for (int i = 0; i < privileges_num; ++i) {
             const TPrivilegeStatus& priv_status = _priv_result.privileges[i];
             strs[i] = StringRef(priv_status.is_grantable.c_str(), priv_status.is_grantable.size());
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 3, datas));
     }
diff --git a/be/src/exec/schema_scanner/schema_variables_scanner.cpp b/be/src/exec/schema_scanner/schema_variables_scanner.cpp
index 491a11f25722ef..546a0a471cfb01 100644
--- a/be/src/exec/schema_scanner/schema_variables_scanner.cpp
+++ b/be/src/exec/schema_scanner/schema_variables_scanner.cpp
@@ -91,22 +91,22 @@ Status SchemaVariablesScanner::_fill_block_impl(vectorized::Block* block) {
     std::vector<void*> datas(row_num);
     // variables names
     {
-        StringRef strs[row_num];
+        std::vector<StringRef> strs(row_num);
         int idx = 0;
         for (auto& it : _var_result.variables) {
             strs[idx] = StringRef(it.first.c_str(), it.first.size());
-            datas[idx] = strs + idx;
+            datas[idx] = strs.data() + idx;
             ++idx;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 0, datas));
     }
     // value
     {
-        StringRef strs[row_num];
+        std::vector<StringRef> strs(row_num);
         int idx = 0;
         for (auto& it : _var_result.variables) {
             strs[idx] = StringRef(it.second.c_str(), it.second.size());
-            datas[idx] = strs + idx;
+            datas[idx] = strs.data() + idx;
             ++idx;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 1, datas));
diff --git a/be/src/exec/schema_scanner/schema_views_scanner.cpp b/be/src/exec/schema_scanner/schema_views_scanner.cpp
index 7d9ce671a663fd..6c3b5f2e21bc3a 100644
--- a/be/src/exec/schema_scanner/schema_views_scanner.cpp
+++ b/be/src/exec/schema_scanner/schema_views_scanner.cpp
@@ -152,23 +152,23 @@ Status SchemaViewsScanner::_fill_block_impl(vectorized::Block* block) {
     }
     // name
     {
-        StringRef strs[tables_num];
+        std::vector<StringRef> strs(tables_num);
         for (int i = 0; i < tables_num; ++i) {
             const TTableStatus& tbl_status = _table_result.tables[i];
             const std::string* src = &tbl_status.name;
             strs[i] = StringRef(src->c_str(), src->size());
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 2, datas));
     }
     // definition
     {
-        StringRef strs[tables_num];
+        std::vector<StringRef> strs(tables_num);
         for (int i = 0; i < tables_num; ++i) {
             const TTableStatus& tbl_status = _table_result.tables[i];
             const std::string* src = &tbl_status.ddl_sql;
             strs[i] = StringRef(src->c_str(), src->length());
-            datas[i] = strs + i;
+            datas[i] = strs.data() + i;
         }
         RETURN_IF_ERROR(fill_dest_column_for_range(block, 3, datas));
     }
diff --git a/be/src/io/fs/local_file_writer.cpp b/be/src/io/fs/local_file_writer.cpp
index ddef8e25a301f3..2e4a970d6c2c72 100644
--- a/be/src/io/fs/local_file_writer.cpp
+++ b/be/src/io/fs/local_file_writer.cpp
@@ -112,7 +112,7 @@ Status LocalFileWriter::appendv(const Slice* data, size_t data_cnt) {
     // Convert the results into the iovec vector to request
     // and calculate the total bytes requested.
     size_t bytes_req = 0;
-    struct iovec iov[data_cnt];
+    std::vector<iovec> iov(data_cnt);
     for (size_t i = 0; i < data_cnt; i++) {
         const Slice& result = data[i];
         bytes_req += result.size;
@@ -125,9 +125,9 @@ Status LocalFileWriter::appendv(const Slice* data, size_t data_cnt) {
         // Never request more than IOV_MAX in one request.
         size_t iov_count = std::min(data_cnt - completed_iov, static_cast<size_t>(IOV_MAX));
         ssize_t res;
-        RETRY_ON_EINTR(res,
-                       SYNC_POINT_HOOK_RETURN_VALUE(::writev(_fd, iov + completed_iov, iov_count),
-                                                    "LocalFileWriter::writev", _fd));
+        RETRY_ON_EINTR(res, SYNC_POINT_HOOK_RETURN_VALUE(
+                                    ::writev(_fd, iov.data() + completed_iov, iov_count),
+                                    "LocalFileWriter::writev", _fd));
         if (UNLIKELY(res < 0)) {
             return localfs_error(errno, fmt::format("failed to write {}", _path.native()));
         }
diff --git a/be/src/olap/accept_null_predicate.h b/be/src/olap/accept_null_predicate.h
index 1f6f9f6ec3523b..c9fe651f8026fa 100644
--- a/be/src/olap/accept_null_predicate.h
+++ b/be/src/olap/accept_null_predicate.h
@@ -64,18 +64,11 @@ class AcceptNullPredicate : public ColumnPredicate {
     void evaluate_and(const vectorized::IColumn& column, const uint16_t* sel, uint16_t size,
                       bool* flags) const override {
         if (column.has_null()) {
-            // copy original flags
-            bool original_flags[size];
-            memcpy(original_flags, flags, size * sizeof(bool));
-
             const auto& nullable_col = assert_cast<const vectorized::ColumnNullable&>(column);
-            // call evaluate_and and restore true for NULL rows
             _nested->evaluate_and(nullable_col.get_nested_column(), sel, size, flags);
+            const auto& nullmap = nullable_col.get_null_map_data();
             for (uint16_t i = 0; i < size; ++i) {
-                uint16_t idx = sel[i];
-                if (original_flags[i] && !flags[i] && nullable_col.is_null_at(idx)) {
-                    flags[i] = true;
-                }
+                flags[i] |= nullmap[sel[i]];
             }
         } else {
             _nested->evaluate_and(column, sel, size, flags);
@@ -138,8 +131,8 @@ class AcceptNullPredicate : public ColumnPredicate {
                           bool* flags) const override {
         if (column.has_null()) {
             // copy original flags
-            bool original_flags[size];
-            memcpy(original_flags, flags, size * sizeof(bool));
+            std::vector<uint8_t> original_flags(size);
+            memcpy(original_flags.data(), flags, size);
 
             const auto& nullable_col = assert_cast<const vectorized::ColumnNullable&>(column);
             // call evaluate_and_vec and restore true for NULL rows
@@ -160,18 +153,21 @@ class AcceptNullPredicate : public ColumnPredicate {
     uint16_t _evaluate_inner(const vectorized::IColumn& column, uint16_t* sel,
                              uint16_t size) const override {
         if (column.has_null()) {
-            if (size == 0) return 0;
+            if (size == 0) {
+                return 0;
+            }
             // create selected_flags
             uint16_t max_idx = sel[size - 1];
-            bool selected[max_idx + 1];
 
             const auto& nullable_col = assert_cast<const vectorized::ColumnNullable&>(column);
-            memcpy(selected, nullable_col.get_null_map_data().data(), (max_idx + 1) * sizeof(bool));
             // call nested predicate evaluate
             uint16_t new_size = _nested->evaluate(nullable_col.get_nested_column(), sel, size);
 
             // process NULL values
             if (new_size < size) {
+                std::vector<uint8_t> selected(max_idx + 1);
+                memcpy(selected.data(), nullable_col.get_null_map_data().data(),
+                       (max_idx + 1) * sizeof(bool));
                 // add rows selected by _nested->evaluate
                 for (uint16_t i = 0; i < new_size; ++i) {
                     uint16_t row_idx = sel[i];
diff --git a/be/src/olap/block_column_predicate.cpp b/be/src/olap/block_column_predicate.cpp
index 58d9c02d8bef3b..01a9aa771b119e 100644
--- a/be/src/olap/block_column_predicate.cpp
+++ b/be/src/olap/block_column_predicate.cpp
@@ -87,10 +87,10 @@ uint16_t OrBlockColumnPredicate::evaluate(vectorized::MutableColumns& block, uin
         if (!selected_size) {
             return 0;
         }
-        bool ret_flags[selected_size];
-        memset(ret_flags, false, selected_size);
+        std::vector<uint8_t> ret_flags(selected_size, 0);
         for (int i = 0; i < num_of_column_predicate(); ++i) {
-            _block_column_predicate_vec[i]->evaluate_or(block, sel, selected_size, ret_flags);
+            _block_column_predicate_vec[i]->evaluate_or(block, sel, selected_size,
+                                                        (bool*)ret_flags.data());
         }
 
         uint16_t new_size = 0;
@@ -115,10 +115,10 @@ void OrBlockColumnPredicate::evaluate_and(vectorized::MutableColumns& block, uin
     if (num_of_column_predicate() == 1) {
         _block_column_predicate_vec[0]->evaluate_and(block, sel, selected_size, flags);
     } else {
-        bool ret_flags[selected_size];
-        memset(ret_flags, false, selected_size);
+        std::vector<uint8_t> ret_flags(selected_size, 0);
         for (int i = 0; i < num_of_column_predicate(); ++i) {
-            _block_column_predicate_vec[i]->evaluate_or(block, sel, selected_size, ret_flags);
+            _block_column_predicate_vec[i]->evaluate_or(block, sel, selected_size,
+                                                        (bool*)ret_flags.data());
         }
 
         for (int i = 0; i < selected_size; ++i) {
@@ -176,11 +176,10 @@ void AndBlockColumnPredicate::evaluate_or(vectorized::MutableColumns& block, uin
     if (num_of_column_predicate() == 1) {
         _block_column_predicate_vec[0]->evaluate_or(block, sel, selected_size, flags);
     } else {
-        bool new_flags[selected_size];
-        memset(new_flags, true, selected_size);
-
-        for (auto& block_column_predicate : _block_column_predicate_vec) {
-            block_column_predicate->evaluate_and(block, sel, selected_size, new_flags);
+        std::vector<uint8_t> new_flags(selected_size, 1);
+        for (const auto& block_column_predicate : _block_column_predicate_vec) {
+            block_column_predicate->evaluate_and(block, sel, selected_size,
+                                                 (bool*)new_flags.data());
         }
 
         for (uint16_t i = 0; i < selected_size; i++) {
@@ -195,11 +194,12 @@ void AndBlockColumnPredicate::evaluate_vec(vectorized::MutableColumns& block, ui
     if (num_of_column_predicate() == 1) {
         _block_column_predicate_vec[0]->evaluate_vec(block, size, flags);
     } else {
-        bool new_flags[size];
+        std::vector<uint8_t> new_flags(size);
+
         bool initialized = false;
-        for (auto& block_column_predicate : _block_column_predicate_vec) {
+        for (const auto& block_column_predicate : _block_column_predicate_vec) {
             if (initialized) {
-                block_column_predicate->evaluate_vec(block, size, new_flags);
+                block_column_predicate->evaluate_vec(block, size, (bool*)new_flags.data());
                 for (uint16_t j = 0; j < size; j++) {
                     flags[j] &= new_flags[j];
                 }
diff --git a/be/src/olap/match_predicate.cpp b/be/src/olap/match_predicate.cpp
index c000d3b635dee0..6220a71a78b9a1 100644
--- a/be/src/olap/match_predicate.cpp
+++ b/be/src/olap/match_predicate.cpp
@@ -71,12 +71,12 @@ Status MatchPredicate::evaluate(const vectorized::NameAndTypePair& name_with_typ
     } else if (column_desc.type == TYPE_ARRAY &&
                is_numeric_type(
                        TabletColumn::get_field_type_by_type(column_desc.children[0].type))) {
-        char buf[column_desc.children[0].len];
+        std::vector<char> buf(column_desc.children[0].len);
         const TypeInfo* type_info = get_scalar_type_info(
                 TabletColumn::get_field_type_by_type(column_desc.children[0].type));
-        RETURN_IF_ERROR(type_info->from_string(buf, _value));
-        RETURN_IF_ERROR(iterator->read_from_inverted_index(name, buf, inverted_index_query_type,
-                                                           num_rows, roaring, true));
+        RETURN_IF_ERROR(type_info->from_string(buf.data(), _value));
+        RETURN_IF_ERROR(iterator->read_from_inverted_index(
+                name, buf.data(), inverted_index_query_type, num_rows, roaring, true));
     }
 
     // mask out null_bitmap, since NULL cmp VALUE will produce NULL
diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
index ff61f1a392d1d2..6d4cf401d302bc 100644
--- a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
+++ b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
@@ -296,18 +296,19 @@ Status BinaryDictPageDecoder::read_by_rowids(const rowid_t* rowids, ordinal_t pa
     const auto* data_array = reinterpret_cast<const int32_t*>(_bit_shuffle_ptr->get_data(0));
     auto total = *n;
     size_t read_count = 0;
-    int32_t data[total];
+    _buffer.resize(total);
     for (size_t i = 0; i < total; ++i) {
         ordinal_t ord = rowids[i] - page_first_ordinal;
         if (PREDICT_FALSE(ord >= _bit_shuffle_ptr->_num_elements)) {
             break;
         }
 
-        data[read_count++] = data_array[ord];
+        _buffer[read_count++] = data_array[ord];
     }
 
     if (LIKELY(read_count > 0)) {
-        dst->insert_many_dict_data(data, 0, _dict_word_info, read_count, _dict_decoder->_num_elems);
+        dst->insert_many_dict_data(_buffer.data(), 0, _dict_word_info, read_count,
+                                   _dict_decoder->_num_elems);
     }
     *n = read_count;
     return Status::OK();
diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.h b/be/src/olap/rowset/segment_v2/binary_dict_page.h
index 1592a927e5fb4e..b127b0de1df39d 100644
--- a/be/src/olap/rowset/segment_v2/binary_dict_page.h
+++ b/be/src/olap/rowset/segment_v2/binary_dict_page.h
@@ -140,6 +140,8 @@ class BinaryDictPageDecoder : public PageDecoder {
     EncodingTypePB _encoding_type;
 
     StringRef* _dict_word_info = nullptr;
+
+    std::vector<int32_t> _buffer;
 };
 
 } // namespace segment_v2
diff --git a/be/src/olap/rowset/segment_v2/binary_plain_page.h b/be/src/olap/rowset/segment_v2/binary_plain_page.h
index 0e9836bf0e496e..176d5c67f532d2 100644
--- a/be/src/olap/rowset/segment_v2/binary_plain_page.h
+++ b/be/src/olap/rowset/segment_v2/binary_plain_page.h
@@ -220,12 +220,12 @@ class BinaryPlainPageDecoder : public PageDecoder {
         const size_t max_fetch = std::min(*n, static_cast<size_t>(_num_elems - _cur_idx));
 
         uint32_t last_offset = guarded_offset(_cur_idx);
-        uint32_t offsets[max_fetch + 1];
-        offsets[0] = last_offset;
+        _offsets.resize(max_fetch + 1);
+        _offsets[0] = last_offset;
         for (int i = 0; i < max_fetch - 1; i++, _cur_idx++) {
             const uint32_t start_offset = last_offset;
             last_offset = guarded_offset(_cur_idx + 1);
-            offsets[i + 1] = last_offset;
+            _offsets[i + 1] = last_offset;
             if constexpr (Type == FieldType::OLAP_FIELD_TYPE_OBJECT) {
                 if (_options.need_check_bitmap) {
                     RETURN_IF_ERROR(BitmapTypeCode::validate(*(_data.data + start_offset)));
@@ -233,13 +233,13 @@ class BinaryPlainPageDecoder : public PageDecoder {
             }
         }
         _cur_idx++;
-        offsets[max_fetch] = offset(_cur_idx);
+        _offsets[max_fetch] = offset(_cur_idx);
         if constexpr (Type == FieldType::OLAP_FIELD_TYPE_OBJECT) {
             if (_options.need_check_bitmap) {
                 RETURN_IF_ERROR(BitmapTypeCode::validate(*(_data.data + last_offset)));
             }
         }
-        dst->insert_many_continuous_binary_data(_data.data, offsets, max_fetch);
+        dst->insert_many_continuous_binary_data(_data.data, _offsets.data(), max_fetch);
 
         *n = max_fetch;
         return Status::OK();
@@ -255,8 +255,8 @@ class BinaryPlainPageDecoder : public PageDecoder {
 
         auto total = *n;
         size_t read_count = 0;
-        uint32_t len_array[total];
-        uint32_t start_offset_array[total];
+        _len_array.resize(total);
+        _start_offset_array.resize(total);
         for (size_t i = 0; i < total; ++i) {
             ordinal_t ord = rowids[i] - page_first_ordinal;
             if (UNLIKELY(ord >= _num_elems)) {
@@ -264,14 +264,15 @@ class BinaryPlainPageDecoder : public PageDecoder {
             }
 
             const uint32_t start_offset = offset(ord);
-            start_offset_array[read_count] = start_offset;
-            len_array[read_count] = offset(ord + 1) - start_offset;
+            _start_offset_array[read_count] = start_offset;
+            _len_array[read_count] = offset(ord + 1) - start_offset;
             read_count++;
         }
 
-        if (LIKELY(read_count > 0))
-            dst->insert_many_binary_data(_data.mutable_data(), len_array, start_offset_array,
-                                         read_count);
+        if (LIKELY(read_count > 0)) {
+            dst->insert_many_binary_data(_data.mutable_data(), _len_array.data(),
+                                         _start_offset_array.data(), read_count);
+        }
 
         *n = read_count;
         return Status::OK();
@@ -348,6 +349,10 @@ class BinaryPlainPageDecoder : public PageDecoder {
     uint32_t _num_elems;
     uint32_t _offsets_pos;
 
+    std::vector<uint32_t> _offsets;
+    std::vector<uint32_t> _len_array;
+    std::vector<uint32_t> _start_offset_array;
+
     // Index of the currently seeked element in the page.
     uint32_t _cur_idx;
     friend class BinaryDictPageDecoder;
diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.h b/be/src/olap/rowset/segment_v2/bitshuffle_page.h
index 89180c2cd1b903..186d8c0a9a0c2f 100644
--- a/be/src/olap/rowset/segment_v2/bitshuffle_page.h
+++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.h
@@ -23,6 +23,7 @@
 #include <cstdint>
 #include <cstring>
 #include <ostream>
+#include <type_traits>
 
 #include "common/compiler_util.h" // IWYU pragma: keep
 #include "common/status.h"
@@ -399,17 +400,19 @@ class BitShufflePageDecoder : public PageDecoder {
 
         auto total = *n;
         auto read_count = 0;
-        CppType data[total];
+        _buffer.resize(total);
         for (size_t i = 0; i < total; ++i) {
             ordinal_t ord = rowids[i] - page_first_ordinal;
             if (UNLIKELY(ord >= _num_elements)) {
                 break;
             }
 
-            data[read_count++] = *reinterpret_cast<CppType*>(get_data(ord));
+            _buffer[read_count++] = *reinterpret_cast<CppType*>(get_data(ord));
         }
 
-        if (LIKELY(read_count > 0)) dst->insert_many_fix_len_data((const char*)data, read_count);
+        if (LIKELY(read_count > 0)) {
+            dst->insert_many_fix_len_data((char*)_buffer.data(), read_count);
+        }
 
         *n = read_count;
         return Status::OK();
@@ -445,6 +448,8 @@ class BitShufflePageDecoder : public PageDecoder {
     int _size_of_element;
     size_t _cur_index;
 
+    std::vector<std::conditional_t<std::is_same_v<CppType, bool>, uint8_t, CppType>> _buffer;
+
     friend class BinaryDictPageDecoder;
 };
 
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
index 522c7125db508d..c831f086deb6ca 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
@@ -566,20 +566,20 @@ template <InvertedIndexQueryType QT>
 Status BkdIndexReader::construct_bkd_query_value(const void* query_value,
                                                  std::shared_ptr<lucene::util::bkd::bkd_reader> r,
                                                  InvertedIndexVisitor<QT>* visitor) {
-    char tmp[r->bytes_per_dim_];
+    std::vector<char> tmp(r->bytes_per_dim_);
     if constexpr (QT == InvertedIndexQueryType::EQUAL_QUERY) {
         _value_key_coder->full_encode_ascending(query_value, &visitor->query_max);
         _value_key_coder->full_encode_ascending(query_value, &visitor->query_min);
     } else if constexpr (QT == InvertedIndexQueryType::LESS_THAN_QUERY ||
                          QT == InvertedIndexQueryType::LESS_EQUAL_QUERY) {
         _value_key_coder->full_encode_ascending(query_value, &visitor->query_max);
-        _type_info->set_to_min(tmp);
-        _value_key_coder->full_encode_ascending(tmp, &visitor->query_min);
+        _type_info->set_to_min(tmp.data());
+        _value_key_coder->full_encode_ascending(tmp.data(), &visitor->query_min);
     } else if constexpr (QT == InvertedIndexQueryType::GREATER_THAN_QUERY ||
                          QT == InvertedIndexQueryType::GREATER_EQUAL_QUERY) {
         _value_key_coder->full_encode_ascending(query_value, &visitor->query_min);
-        _type_info->set_to_max(tmp);
-        _value_key_coder->full_encode_ascending(tmp, &visitor->query_max);
+        _type_info->set_to_max(tmp.data());
+        _value_key_coder->full_encode_ascending(tmp.data(), &visitor->query_max);
     } else {
         return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
                 "invalid query type when query bkd index");
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 3adef84e7dda55..62b3592626ea6b 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -2014,10 +2014,10 @@ void SegmentIterator::_replace_version_col(size_t num_rows) {
         return;
     }
 
-    auto column_desc = _schema->column(version_idx);
+    const auto* column_desc = _schema->column(version_idx);
     auto column = Schema::get_data_type_ptr(*column_desc)->create_column();
     DCHECK(_schema->column(version_idx)->type() == FieldType::OLAP_FIELD_TYPE_BIGINT);
-    auto col_ptr = reinterpret_cast<vectorized::ColumnVector<vectorized::Int64>*>(column.get());
+    auto* col_ptr = assert_cast<vectorized::ColumnInt64*>(column.get());
     for (size_t j = 0; j < num_rows; j++) {
         col_ptr->insert_value(_opts.version.second);
     }
@@ -2044,7 +2044,7 @@ uint16_t SegmentIterator::_evaluate_vectorization_predicate(uint16_t* sel_rowid_
     }
 
     uint16_t original_size = selected_size;
-    bool ret_flags[original_size];
+    _ret_flags.resize(original_size);
     DCHECK(!_pre_eval_block_predicate.empty());
     bool is_first = true;
     for (auto& pred : _pre_eval_block_predicate) {
@@ -2054,10 +2054,10 @@ uint16_t SegmentIterator::_evaluate_vectorization_predicate(uint16_t* sel_rowid_
         auto column_id = pred->column_id();
         auto& column = _current_return_columns[column_id];
         if (is_first) {
-            pred->evaluate_vec(*column, original_size, ret_flags);
+            pred->evaluate_vec(*column, original_size, (bool*)_ret_flags.data());
             is_first = false;
         } else {
-            pred->evaluate_and_vec(*column, original_size, ret_flags);
+            pred->evaluate_and_vec(*column, original_size, (bool*)_ret_flags.data());
         }
     }
 
@@ -2069,7 +2069,7 @@ uint16_t SegmentIterator::_evaluate_vectorization_predicate(uint16_t* sel_rowid_
     const uint32_t sel_end_simd = sel_pos + selected_size / SIMD_BYTES * SIMD_BYTES;
 
     while (sel_pos < sel_end_simd) {
-        auto mask = simd::bytes32_mask_to_bits32_mask(ret_flags + sel_pos);
+        auto mask = simd::bytes32_mask_to_bits32_mask(_ret_flags.data() + sel_pos);
         if (0 == mask) {
             //pass
         } else if (0xffffffff == mask) {
@@ -2087,7 +2087,7 @@ uint16_t SegmentIterator::_evaluate_vectorization_predicate(uint16_t* sel_rowid_
     }
 
     for (; sel_pos < sel_end; sel_pos++) {
-        if (ret_flags[sel_pos]) {
+        if (_ret_flags[sel_pos]) {
             sel_rowid_idx[new_size++] = sel_pos;
         }
     }
@@ -2105,14 +2105,14 @@ uint16_t SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* vec_sel_ro
     }
 
     uint16_t original_size = selected_size;
-    for (auto predicate : _short_cir_eval_predicate) {
+    for (auto* predicate : _short_cir_eval_predicate) {
         auto column_id = predicate->column_id();
         auto& short_cir_column = _current_return_columns[column_id];
         selected_size = predicate->evaluate(*short_cir_column, vec_sel_rowid_idx, selected_size);
     }
 
     // collect profile
-    for (auto p : _filter_info_id) {
+    for (auto* p : _filter_info_id) {
         _opts.stats->filter_info[p->get_filter_id()] = p->get_filtered_info();
     }
     _opts.stats->short_circuit_cond_input_rows += original_size;
@@ -2330,19 +2330,19 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
         _output_index_result_column(nullptr, 0, block);
     } else {
         uint16_t selected_size = _current_batch_rows_read;
-        uint16_t sel_rowid_idx[selected_size];
+        _sel_rowid_idx.resize(selected_size);
 
         if (_is_need_vec_eval || _is_need_short_eval) {
             _convert_dict_code_for_predicate_if_necessary();
 
             // step 1: evaluate vectorization predicate
-            selected_size = _evaluate_vectorization_predicate(sel_rowid_idx, selected_size);
+            selected_size = _evaluate_vectorization_predicate(_sel_rowid_idx.data(), selected_size);
 
             // step 2: evaluate short circuit predicate
             // todo(wb) research whether need to read short predicate after vectorization evaluation
             //          to reduce cost of read short circuit columns.
             //          In SSB test, it make no difference; So need more scenarios to test
-            selected_size = _evaluate_short_circuit_predicate(sel_rowid_idx, selected_size);
+            selected_size = _evaluate_short_circuit_predicate(_sel_rowid_idx.data(), selected_size);
 
             if (selected_size > 0) {
                 // step 3.1: output short circuit and predicate column
@@ -2350,7 +2350,7 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
                 // see _vec_init_lazy_materialization
                 // todo(wb) need to tell input columnids from output columnids
                 RETURN_IF_ERROR(_output_column_by_sel_idx(block, _first_read_column_ids,
-                                                          sel_rowid_idx, selected_size));
+                                                          _sel_rowid_idx.data(), selected_size));
 
                 // step 3.2: read remaining expr column and evaluate it.
                 if (_is_need_expr_eval) {
@@ -2358,7 +2358,7 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
                     if (!_second_read_column_ids.empty()) {
                         SCOPED_RAW_TIMER(&_opts.stats->second_read_ns);
                         RETURN_IF_ERROR(_read_columns_by_rowids(
-                                _second_read_column_ids, _block_rowids, sel_rowid_idx,
+                                _second_read_column_ids, _block_rowids, _sel_rowid_idx.data(),
                                 selected_size, &_current_return_columns));
                         if (std::find(_second_read_column_ids.begin(),
                                       _second_read_column_ids.end(), _schema->version_col_idx()) !=
@@ -2384,14 +2384,16 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
                         auto col_const = vectorized::ColumnConst::create(std::move(res_column),
                                                                          selected_size);
                         block->replace_by_position(0, std::move(col_const));
-                        _output_index_result_column(sel_rowid_idx, selected_size, block);
+                        _output_index_result_column(_sel_rowid_idx.data(), selected_size, block);
                         block->shrink_char_type_column_suffix_zero(_char_type_idx_no_0);
-                        RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, selected_size, block));
+                        RETURN_IF_ERROR(
+                                _execute_common_expr(_sel_rowid_idx.data(), selected_size, block));
                         block->replace_by_position(0, std::move(col0));
                     } else {
-                        _output_index_result_column(sel_rowid_idx, selected_size, block);
+                        _output_index_result_column(_sel_rowid_idx.data(), selected_size, block);
                         block->shrink_char_type_column_suffix_zero(_char_type_idx);
-                        RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, selected_size, block));
+                        RETURN_IF_ERROR(
+                                _execute_common_expr(_sel_rowid_idx.data(), selected_size, block));
                     }
                 }
             } else if (_is_need_expr_eval) {
@@ -2410,7 +2412,7 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
                 block->replace_by_position(loc, std::move(_current_return_columns[cid]));
             }
             for (uint32_t i = 0; i < selected_size; ++i) {
-                sel_rowid_idx[i] = i;
+                _sel_rowid_idx[i] = i;
             }
 
             if (block->rows() == 0) {
@@ -2421,23 +2423,20 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
                 auto col_const =
                         vectorized::ColumnConst::create(std::move(res_column), selected_size);
                 block->replace_by_position(0, std::move(col_const));
-                _output_index_result_column(sel_rowid_idx, selected_size, block);
+                _output_index_result_column(_sel_rowid_idx.data(), selected_size, block);
                 block->shrink_char_type_column_suffix_zero(_char_type_idx_no_0);
-                RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, selected_size, block));
+                RETURN_IF_ERROR(_execute_common_expr(_sel_rowid_idx.data(), selected_size, block));
                 block->replace_by_position(0, std::move(col0));
             } else {
-                _output_index_result_column(sel_rowid_idx, selected_size, block);
+                _output_index_result_column(_sel_rowid_idx.data(), selected_size, block);
                 block->shrink_char_type_column_suffix_zero(_char_type_idx);
-                RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, selected_size, block));
+                RETURN_IF_ERROR(_execute_common_expr(_sel_rowid_idx.data(), selected_size, block));
             }
         }
 
         if (UNLIKELY(_opts.record_rowids)) {
             _sel_rowid_idx.resize(selected_size);
             _selected_size = selected_size;
-            for (auto i = 0; i < _selected_size; i++) {
-                _sel_rowid_idx[i] = sel_rowid_idx[i];
-            }
         }
 
         if (_non_predicate_columns.empty()) {
@@ -2449,7 +2448,7 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
         // step4: read non_predicate column
         if (selected_size > 0) {
             RETURN_IF_ERROR(_read_columns_by_rowids(_non_predicate_columns, _block_rowids,
-                                                    sel_rowid_idx, selected_size,
+                                                    _sel_rowid_idx.data(), selected_size,
                                                     &_current_return_columns));
             if (std::find(_non_predicate_columns.begin(), _non_predicate_columns.end(),
                           _schema->version_col_idx()) != _non_predicate_columns.end()) {
@@ -2462,7 +2461,7 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
         _output_non_pred_columns(block);
 
         if (!_is_need_expr_eval) {
-            _output_index_result_column(sel_rowid_idx, selected_size, block);
+            _output_index_result_column(_sel_rowid_idx.data(), selected_size, block);
         }
     }
 
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h
index c01141509b553e..55ef213e071a29 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -481,7 +481,7 @@ class SegmentIterator : public RowwiseIterator {
     uint32_t _current_batch_rows_read = 0;
     // used for compaction, record selectd rowids of current batch
     uint16_t _selected_size;
-    vector<uint16_t> _sel_rowid_idx;
+    std::vector<uint16_t> _sel_rowid_idx;
 
     std::unique_ptr<ObjectPool> _pool;
 
@@ -492,6 +492,8 @@ class SegmentIterator : public RowwiseIterator {
     std::set<int32_t> _output_columns;
 
     std::unique_ptr<HierarchicalDataReader> _path_reader;
+
+    std::vector<uint8_t> _ret_flags;
 };
 
 } // namespace segment_v2
diff --git a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp
index ef01bed3809902..a2cff74f6e04e9 100644
--- a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp
+++ b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp
@@ -539,8 +539,8 @@ Status PartitionedHashJoinProbeOperatorX::push(RuntimeState* state, vectorized::
                                                                   local_state._mem_tracker.get()));
     }
 
-    std::vector<uint32_t> partition_indexes[_partition_count];
-    auto* channel_ids = local_state._partitioner->get_channel_ids().get<uint32_t>();
+    std::vector<std::vector<uint32_t>> partition_indexes(_partition_count);
+    const auto* channel_ids = local_state._partitioner->get_channel_ids().get<uint32_t>();
     for (uint32_t i = 0; i != rows; ++i) {
         partition_indexes[channel_ids[i]].emplace_back(i);
     }
@@ -556,8 +556,8 @@ Status PartitionedHashJoinProbeOperatorX::push(RuntimeState* state, vectorized::
             partitioned_blocks[i] =
                     vectorized::MutableBlock::create_unique(input_block->clone_empty());
         }
-        partitioned_blocks[i]->add_rows(input_block, &(partition_indexes[i][0]),
-                                        &(partition_indexes[i][count]));
+        partitioned_blocks[i]->add_rows(input_block, partition_indexes[i].data(),
+                                        partition_indexes[i].data() + count);
 
         if (partitioned_blocks[i]->rows() > 2 * 1024 * 1024 ||
             (eos && partitioned_blocks[i]->rows() > 0)) {
diff --git a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp
index b2f2e9581f23ff..0cadb7da4c0808 100644
--- a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp
+++ b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp
@@ -314,8 +314,8 @@ Status PartitionedHashJoinSinkLocalState::_partition_block(RuntimeState* state,
 
     auto& p = _parent->cast<PartitionedHashJoinSinkOperatorX>();
     SCOPED_TIMER(_partition_shuffle_timer);
-    auto* channel_ids = _partitioner->get_channel_ids().get<uint32_t>();
-    std::vector<uint32_t> partition_indexes[p._partition_count];
+    const auto* channel_ids = _partitioner->get_channel_ids().get<uint32_t>();
+    std::vector<std::vector<uint32_t>> partition_indexes(p._partition_count);
     DCHECK_LT(begin, end);
     for (size_t i = begin; i != end; ++i) {
         partition_indexes[channel_ids[i]].emplace_back(i);
@@ -332,8 +332,8 @@ Status PartitionedHashJoinSinkLocalState::_partition_block(RuntimeState* state,
             partitioned_blocks[i] =
                     vectorized::MutableBlock::create_unique(in_block->clone_empty());
         }
-        partitioned_blocks[i]->add_rows(in_block, &(partition_indexes[i][0]),
-                                        &(partition_indexes[i][count]));
+        partitioned_blocks[i]->add_rows(in_block, partition_indexes[i].data(),
+                                        partition_indexes[i].data() + count);
     }
 
     return Status::OK();
diff --git a/be/src/vec/aggregate_functions/aggregate_function.h b/be/src/vec/aggregate_functions/aggregate_function.h
index c24cd70ebead65..44d0db7c9d9f0a 100644
--- a/be/src/vec/aggregate_functions/aggregate_function.h
+++ b/be/src/vec/aggregate_functions/aggregate_function.h
@@ -332,12 +332,12 @@ class IAggregateFunctionHelper : public IAggregateFunction {
 
     void streaming_agg_serialize(const IColumn** columns, BufferWritable& buf,
                                  const size_t num_rows, Arena* arena) const override {
-        char place[size_of_data()];
+        std::vector<char> place(size_of_data());
         for (size_t i = 0; i != num_rows; ++i) {
-            assert_cast<const Derived*>(this)->create(place);
-            DEFER({ assert_cast<const Derived*>(this)->destroy(place); });
-            assert_cast<const Derived*>(this)->add(place, columns, i, arena);
-            assert_cast<const Derived*>(this)->serialize(place, buf);
+            assert_cast<const Derived*>(this)->create(place.data());
+            DEFER({ assert_cast<const Derived*>(this)->destroy(place.data()); });
+            assert_cast<const Derived*>(this)->add(place.data(), columns, i, arena);
+            assert_cast<const Derived*>(this)->serialize(place.data(), buf);
             buf.commit();
         }
     }
@@ -450,11 +450,11 @@ class IAggregateFunctionHelper : public IAggregateFunction {
                                                  Arena* arena) const override {
         DCHECK(end <= column.size() && begin <= end)
                 << ", begin:" << begin << ", end:" << end << ", column.size():" << column.size();
+        std::vector<char> deserialized_data(size_of_data());
+        auto* deserialized_place = (AggregateDataPtr)deserialized_data.data();
         for (size_t i = begin; i <= end; ++i) {
             VectorBufferReader buffer_reader(
                     (assert_cast<const ColumnString&>(column)).get_data_at(i));
-            char deserialized_data[size_of_data()];
-            AggregateDataPtr deserialized_place = (AggregateDataPtr)deserialized_data;
             assert_cast<const Derived*>(this)->create(deserialized_place);
             DEFER({ assert_cast<const Derived*>(this)->destroy(deserialized_place); });
             assert_cast<const Derived*>(this)->deserialize_and_merge(place, deserialized_place,
diff --git a/be/src/vec/aggregate_functions/aggregate_function_foreach.h b/be/src/vec/aggregate_functions/aggregate_function_foreach.h
index 039c2d507b852b..01c202dd2fcefb 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_foreach.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_foreach.h
@@ -227,7 +227,7 @@ class AggregateFunctionForEach : public IAggregateFunctionDataHelper<AggregateFu
 
     void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num,
              Arena* arena) const override {
-        const IColumn* nested[num_arguments];
+        std::vector<const IColumn*> nested(num_arguments);
 
         for (size_t i = 0; i < num_arguments; ++i) {
             nested[i] = &assert_cast<const ColumnArray&>(*columns[i]).get_data();
@@ -256,7 +256,7 @@ class AggregateFunctionForEach : public IAggregateFunctionDataHelper<AggregateFu
 
         char* nested_state = state.array_of_aggregate_datas;
         for (size_t i = begin; i < end; ++i) {
-            nested_function->add(nested_state, nested, i, arena);
+            nested_function->add(nested_state, nested.data(), i, arena);
             nested_state += nested_size_of_data;
         }
     }
diff --git a/be/src/vec/aggregate_functions/aggregate_function_null.h b/be/src/vec/aggregate_functions/aggregate_function_null.h
index a91a172fc0567b..8150384e5fec68 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_null.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_null.h
@@ -304,12 +304,11 @@ class AggregateFunctionNullVariadicInline final
     void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num,
              Arena* arena) const override {
         /// This container stores the columns we really pass to the nested function.
-        const IColumn* nested_columns[number_of_arguments];
+        std::vector<const IColumn*> nested_columns(number_of_arguments);
 
         for (size_t i = 0; i < number_of_arguments; ++i) {
             if (is_nullable[i]) {
-                const ColumnNullable& nullable_col =
-                        assert_cast<const ColumnNullable&>(*columns[i]);
+                const auto& nullable_col = assert_cast<const ColumnNullable&>(*columns[i]);
                 if (nullable_col.is_null_at(row_num)) {
                     /// If at least one column has a null value in the current row,
                     /// we don't process this row.
@@ -322,7 +321,8 @@ class AggregateFunctionNullVariadicInline final
         }
 
         this->set_flag(place);
-        this->nested_function->add(this->nested_place(place), nested_columns, row_num, arena);
+        this->nested_function->add(this->nested_place(place), nested_columns.data(), row_num,
+                                   arena);
     }
 
     bool allocates_memory_in_arena() const override {
diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h
index 7f10e71f858eec..7bd0c27910d759 100644
--- a/be/src/vec/columns/column_dictionary.h
+++ b/be/src/vec/columns/column_dictionary.h
@@ -217,17 +217,17 @@ class ColumnDictionary final : public COWHelper<IColumn, ColumnDictionary<T>> {
 
     Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) override {
         auto* res_col = assert_cast<vectorized::ColumnString*>(col_ptr);
-        StringRef strings[sel_size];
+        _strings.resize(sel_size);
         size_t length = 0;
         for (size_t i = 0; i != sel_size; ++i) {
             auto& value = _dict.get_value(_codes[sel[i]]);
-            strings[i].data = value.data;
-            strings[i].size = value.size;
+            _strings[i].data = value.data;
+            _strings[i].size = value.size;
             length += value.size;
         }
         res_col->get_offsets().reserve(sel_size + res_col->get_offsets().size());
         res_col->get_chars().reserve(length + res_col->get_chars().size());
-        res_col->insert_many_strings_without_reserve(strings, sel_size);
+        res_col->insert_many_strings_without_reserve(_strings.data(), sel_size);
         return Status::OK();
     }
 
@@ -552,6 +552,7 @@ class ColumnDictionary final : public COWHelper<IColumn, ColumnDictionary<T>> {
     Container _codes;
     FieldType _type;
     std::pair<RowsetId, uint32_t> _rowset_segment_id;
+    std::vector<StringRef> _strings;
 };
 
 template class ColumnDictionary<int32_t>;
diff --git a/be/src/vec/columns/predicate_column.h b/be/src/vec/columns/predicate_column.h
index 4480f6ee8cae5b..322226c8225419 100644
--- a/be/src/vec/columns/predicate_column.h
+++ b/be/src/vec/columns/predicate_column.h
@@ -49,18 +49,18 @@ class PredicateColumnType final : public COWHelper<IColumn, PredicateColumnType<
     using ColumnType = typename PrimitiveTypeTraits<Type>::ColumnType;
 
     void insert_string_to_res_column(const uint16_t* sel, size_t sel_size, ColumnString* res_ptr) {
-        StringRef refs[sel_size];
+        _refs.resize(sel_size);
         size_t length = 0;
         for (size_t i = 0; i < sel_size; i++) {
             uint16_t n = sel[i];
             auto& sv = reinterpret_cast<StringRef&>(data[n]);
-            refs[i].data = sv.data;
-            refs[i].size = sv.size;
+            _refs[i].data = sv.data;
+            _refs[i].size = sv.size;
             length += sv.size;
         }
         res_ptr->get_offsets().reserve(sel_size + res_ptr->get_offsets().size());
         res_ptr->get_chars().reserve(length + res_ptr->get_chars().size());
-        res_ptr->insert_many_strings_without_reserve(refs, sel_size);
+        res_ptr->insert_many_strings_without_reserve(_refs.data(), sel_size);
     }
 
     template <typename Y, template <typename> typename ColumnContainer>
@@ -477,6 +477,7 @@ class PredicateColumnType final : public COWHelper<IColumn, PredicateColumnType<
     Container data;
     // manages the memory for slice's data(For string type)
     std::unique_ptr<Arena> _arena;
+    std::vector<StringRef> _refs;
 };
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/common/strong_typedef.h b/be/src/vec/common/strong_typedef.h
deleted file mode 100644
index c7500a2c516119..00000000000000
--- a/be/src/vec/common/strong_typedef.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-// This file is copied from
-// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/StrongTypedef.h
-// and modified by Doris
-
-#pragma once
-
-#include <boost/operators.hpp>
-#include <type_traits>
-
-/** https://svn.boost.org/trac/boost/ticket/5182
-  */
-
-template <class T, class Tag>
-struct StrongTypedef : boost::totally_ordered1<StrongTypedef<T, Tag>,
-                                               boost::totally_ordered2<StrongTypedef<T, Tag>, T>> {
-private:
-    using Self = StrongTypedef;
-    T t;
-
-public:
-    template <class Enable = typename std::is_copy_constructible<T>::type>
-    explicit StrongTypedef(const T& t_) : t(t_) {}
-    template <class Enable = typename std::is_move_constructible<T>::type>
-    explicit StrongTypedef(T&& t_) : t(std::move(t_)) {}
-
-    template <class Enable = typename std::is_default_constructible<T>::type>
-    StrongTypedef() : t() {}
-
-    StrongTypedef(const Self&) = default;
-    StrongTypedef(Self&&) = default;
-
-    Self& operator=(const Self&) = default;
-    Self& operator=(Self&&) = default;
-
-    template <class Enable = typename std::is_copy_assignable<T>::type>
-    Self& operator=(const T& rhs) {
-        t = rhs;
-        return *this;
-    }
-
-    template <class Enable = typename std::is_move_assignable<T>::type>
-    Self& operator=(T&& rhs) {
-        t = std::move(rhs);
-        return *this;
-    }
-
-    operator const T&() const { return t; }
-    operator T&() { return t; }
-
-    bool operator==(const Self& rhs) const { return t == rhs.t; }
-    bool operator<(const Self& rhs) const { return t < rhs.t; }
-
-    T& to_under_type() { return t; }
-    const T& to_under_type() const { return t; }
-};
-
-template <class T, class Tag>
-struct std::hash<StrongTypedef<T, Tag>> {
-    size_t operator()(const StrongTypedef<T, Tag>& x) const {
-        return std::hash<T>()(x.to_under_type());
-    }
-};
-
-#define STRONG_TYPEDEF(T, D) \
-    struct D##Tag {};        \
-    using D = StrongTypedef<T, D##Tag>;
diff --git a/be/src/vec/data_types/data_type_hll.cpp b/be/src/vec/data_types/data_type_hll.cpp
index 92c585af51ad1d..9f647aeb660c8d 100644
--- a/be/src/vec/data_types/data_type_hll.cpp
+++ b/be/src/vec/data_types/data_type_hll.cpp
@@ -39,7 +39,7 @@ char* DataTypeHLL::serialize(const IColumn& column, char* buf, int be_exec_versi
     auto& data_column = assert_cast<const ColumnHLL&>(*ptr);
 
     size_t row_num = column.size();
-    size_t hll_size_array[row_num + 1];
+    std::vector<size_t> hll_size_array(row_num + 1);
     hll_size_array[0] = row_num;
 
     auto allocate_len_size = sizeof(size_t) * (row_num + 1);
@@ -53,7 +53,7 @@ char* DataTypeHLL::serialize(const IColumn& column, char* buf, int be_exec_versi
         buf += actual_size;
     }
 
-    memcpy(buf_start, hll_size_array, allocate_len_size);
+    memcpy(buf_start, hll_size_array.data(), allocate_len_size);
     return buf;
 }
 
@@ -66,8 +66,8 @@ const char* DataTypeHLL::deserialize(const char* buf, IColumn* column, int be_ex
 
     size_t row_num = *reinterpret_cast<const size_t*>(buf);
     buf += sizeof(size_t);
-    size_t hll_size_array[row_num];
-    memcpy(hll_size_array, buf, sizeof(size_t) * row_num);
+    std::vector<size_t> hll_size_array(row_num);
+    memcpy(hll_size_array.data(), buf, sizeof(size_t) * row_num);
     buf += sizeof(size_t) * row_num;
 
     data.resize(row_num);
diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
index a574d9553a7b3f..e3304577813b55 100644
--- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
@@ -399,8 +399,8 @@ Status DataTypeStructSerDe::write_column_to_orc(const std::string& timezone, con
                                                 orc::ColumnVectorBatch* orc_col_batch, int start,
                                                 int end,
                                                 std::vector<StringRef>& buffer_list) const {
-    orc::StructVectorBatch* cur_batch = dynamic_cast<orc::StructVectorBatch*>(orc_col_batch);
-    const ColumnStruct& struct_col = assert_cast<const ColumnStruct&>(column);
+    auto* cur_batch = dynamic_cast<orc::StructVectorBatch*>(orc_col_batch);
+    const auto& struct_col = assert_cast<const ColumnStruct&>(column);
     for (size_t row_id = start; row_id < end; row_id++) {
         for (int i = 0; i < struct_col.tuple_size(); ++i) {
             RETURN_IF_ERROR(elem_serdes_ptrs[i]->write_column_to_orc(
@@ -419,7 +419,7 @@ Status DataTypeStructSerDe::write_column_to_pb(const IColumn& column, PValues& r
     auto* ptype = result.mutable_type();
     ptype->set_id(PGenericType::STRUCT);
     auto tuple_size = struct_col.tuple_size();
-    PValues* child_elements[tuple_size];
+    std::vector<PValues*> child_elements(tuple_size);
     for (int i = 0; i < tuple_size; ++i) {
         child_elements[i] = result.add_child_element();
     }
diff --git a/be/src/vec/exec/format/parquet/parquet_thrift_util.h b/be/src/vec/exec/format/parquet/parquet_thrift_util.h
index 64ccda6fe2e6ad..b767f177f4a326 100644
--- a/be/src/vec/exec/format/parquet/parquet_thrift_util.h
+++ b/be/src/vec/exec/format/parquet/parquet_thrift_util.h
@@ -40,12 +40,12 @@ static Status parse_thrift_footer(io::FileReaderSPtr file, FileMetaData** file_m
                                   size_t* meta_size, io::IOContext* io_ctx) {
     size_t file_size = file->size();
     size_t bytes_read = std::min(file_size, INIT_META_SIZE);
-    uint8_t footer[bytes_read];
-    RETURN_IF_ERROR(
-            file->read_at(file_size - bytes_read, Slice(footer, bytes_read), &bytes_read, io_ctx));
+    std::vector<uint8_t> footer(bytes_read);
+    RETURN_IF_ERROR(file->read_at(file_size - bytes_read, Slice(footer.data(), bytes_read),
+                                  &bytes_read, io_ctx));
 
     // validate magic
-    uint8_t* magic_ptr = footer + bytes_read - 4;
+    uint8_t* magic_ptr = footer.data() + bytes_read - 4;
     if (bytes_read < PARQUET_FOOTER_SIZE ||
         memcmp(magic_ptr, PARQUET_VERSION_NUMBER, sizeof(PARQUET_VERSION_NUMBER)) != 0) {
         return Status::Corruption(
@@ -56,7 +56,7 @@ static Status parse_thrift_footer(io::FileReaderSPtr file, FileMetaData** file_m
     }
 
     // get metadata_size
-    uint32_t metadata_size = decode_fixed32_le(footer + bytes_read - PARQUET_FOOTER_SIZE);
+    uint32_t metadata_size = decode_fixed32_le(footer.data() + bytes_read - PARQUET_FOOTER_SIZE);
     if (metadata_size > file_size - PARQUET_FOOTER_SIZE) {
         return Status::Corruption("Parquet footer size({}) is large than file size({})",
                                   metadata_size, file_size);
@@ -69,7 +69,7 @@ static Status parse_thrift_footer(io::FileReaderSPtr file, FileMetaData** file_m
                                       Slice(new_buff.get(), metadata_size), &bytes_read, io_ctx));
         meta_ptr = new_buff.get();
     } else {
-        meta_ptr = footer + bytes_read - PARQUET_FOOTER_SIZE - metadata_size;
+        meta_ptr = footer.data() + bytes_read - PARQUET_FOOTER_SIZE - metadata_size;
     }
 
     tparquet::FileMetaData t_metadata;
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index ded745f5a5c95a..c7399d9e1221c2 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -736,16 +736,16 @@ Status ParquetReader::_process_page_index(const tparquet::RowGroup& row_group,
         read_whole_row_group();
         return Status::OK();
     }
-    uint8_t col_index_buff[page_index._column_index_size];
+    std::vector<uint8_t> col_index_buff(page_index._column_index_size);
     size_t bytes_read = 0;
-    Slice result(col_index_buff, page_index._column_index_size);
+    Slice result(col_index_buff.data(), page_index._column_index_size);
     RETURN_IF_ERROR(
             _file_reader->read_at(page_index._column_index_start, result, &bytes_read, _io_ctx));
     _column_statistics.read_bytes += bytes_read;
     auto& schema_desc = _file_metadata->schema();
     std::vector<RowRange> skipped_row_ranges;
-    uint8_t off_index_buff[page_index._offset_index_size];
-    Slice res(off_index_buff, page_index._offset_index_size);
+    std::vector<uint8_t> off_index_buff(page_index._offset_index_size);
+    Slice res(off_index_buff.data(), page_index._offset_index_size);
     RETURN_IF_ERROR(
             _file_reader->read_at(page_index._offset_index_start, res, &bytes_read, _io_ctx));
     _column_statistics.read_bytes += bytes_read;
@@ -766,7 +766,7 @@ Status ParquetReader::_process_page_index(const tparquet::RowGroup& row_group,
             continue;
         }
         tparquet::ColumnIndex column_index;
-        RETURN_IF_ERROR(page_index.parse_column_index(chunk, col_index_buff, &column_index));
+        RETURN_IF_ERROR(page_index.parse_column_index(chunk, col_index_buff.data(), &column_index));
         const int num_of_pages = column_index.null_pages.size();
         if (num_of_pages <= 0) {
             continue;
@@ -780,7 +780,7 @@ Status ParquetReader::_process_page_index(const tparquet::RowGroup& row_group,
             continue;
         }
         tparquet::OffsetIndex offset_index;
-        RETURN_IF_ERROR(page_index.parse_offset_index(chunk, off_index_buff, &offset_index));
+        RETURN_IF_ERROR(page_index.parse_offset_index(chunk, off_index_buff.data(), &offset_index));
         for (int page_id : skipped_page_range) {
             RowRange skipped_row_range;
             static_cast<void>(page_index.create_skipped_row_range(offset_index, row_group.num_rows,
diff --git a/be/src/vec/exprs/vectorized_agg_fn.cpp b/be/src/vec/exprs/vectorized_agg_fn.cpp
index d0fbf3637277f1..c96d84db16c89c 100644
--- a/be/src/vec/exprs/vectorized_agg_fn.cpp
+++ b/be/src/vec/exprs/vectorized_agg_fn.cpp
@@ -299,13 +299,14 @@ std::string AggFnEvaluator::debug_string() const {
 Status AggFnEvaluator::_calc_argument_columns(Block* block) {
     SCOPED_TIMER(_expr_timer);
     _agg_columns.resize(_input_exprs_ctxs.size());
-    int column_ids[_input_exprs_ctxs.size()];
+    std::vector<int> column_ids(_input_exprs_ctxs.size());
     for (int i = 0; i < _input_exprs_ctxs.size(); ++i) {
         int column_id = -1;
         RETURN_IF_ERROR(_input_exprs_ctxs[i]->execute(block, &column_id));
         column_ids[i] = column_id;
     }
-    materialize_block_inplace(*block, column_ids, column_ids + _input_exprs_ctxs.size());
+    materialize_block_inplace(*block, column_ids.data(),
+                              column_ids.data() + _input_exprs_ctxs.size());
     for (int i = 0; i < _input_exprs_ctxs.size(); ++i) {
         _agg_columns[i] = block->get_by_position(column_ids[i]).column.get();
     }
diff --git a/be/src/vec/functions/array/function_array_constructor.cpp b/be/src/vec/functions/array/function_array_constructor.cpp
index d19d29ec254a1a..af64027ec298f6 100644
--- a/be/src/vec/functions/array/function_array_constructor.cpp
+++ b/be/src/vec/functions/array/function_array_constructor.cpp
@@ -62,7 +62,7 @@ class FunctionArrayConstructor : public IFunction {
     size_t get_number_of_arguments() const override { return 1; }
 
     DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
-        DCHECK(arguments.size() > 0)
+        DCHECK(!arguments.empty())
                 << "function: " << get_name() << ", arguments should not be empty";
         return std::make_shared<DataTypeArray>(make_nullable(remove_nullable(arguments[0])));
     }
@@ -71,14 +71,14 @@ class FunctionArrayConstructor : public IFunction {
                         size_t result, size_t input_rows_count) const override {
         size_t num_element = arguments.size();
         auto result_col = block.get_by_position(result).type->create_column();
-        auto result_array_col = static_cast<ColumnArray*>(result_col.get());
+        auto* result_array_col = static_cast<ColumnArray*>(result_col.get());
         IColumn& result_nested_col = result_array_col->get_data();
         ColumnArray::Offsets64& result_offset_col = result_array_col->get_offsets();
         result_nested_col.reserve(input_rows_count * num_element);
         result_offset_col.resize(input_rows_count);
 
         // convert to nullable column
-        ColumnPtr arg[num_element];
+        std::vector<ColumnPtr> arg(num_element);
         for (size_t i = 0; i < num_element; ++i) {
             auto& col = block.get_by_position(arguments[i]).column;
             col = col->convert_to_full_column_if_const();
diff --git a/be/src/vec/functions/function_bitmap_variadic.cpp b/be/src/vec/functions/function_bitmap_variadic.cpp
index 9fd22a1a052c28..c1e044a2a4aa69 100644
--- a/be/src/vec/functions/function_bitmap_variadic.cpp
+++ b/be/src/vec/functions/function_bitmap_variadic.cpp
@@ -69,7 +69,7 @@ namespace doris::vectorized {
         static Status vector_vector(ColumnPtr argument_columns[], size_t col_size,                \
                                     size_t input_rows_count, std::vector<BitmapValue>& res,       \
                                     IColumn* res_nulls) {                                         \
-            const ColumnUInt8::value_type* null_map_datas[col_size];                              \
+            std::vector<const ColumnUInt8::value_type*> null_map_datas(col_size);                 \
             int nullable_cols_count = 0;                                                          \
             ColumnUInt8::value_type* __restrict res_nulls_data = nullptr;                         \
             if (res_nulls) {                                                                      \
@@ -198,11 +198,7 @@ class FunctionBitMapVariadic : public IFunction {
         // result is null only when all columns is null for bitmap_or.
         // for count functions, result is always not null, and if the bitmap op result is null,
         // the count is 0
-        if (std::is_same_v<Impl, BitmapOr> || is_count()) {
-            return false;
-        } else {
-            return true;
-        }
+        return !static_cast<bool>(std::is_same_v<Impl, BitmapOr> || is_count());
     }
 
     Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
@@ -224,7 +220,7 @@ class FunctionBitMapVariadic : public IFunction {
                                  const ColumnNumbers& arguments, size_t result,
                                  size_t input_rows_count) const {
         size_t argument_size = arguments.size();
-        ColumnPtr argument_columns[argument_size];
+        std::vector<ColumnPtr> argument_columns(argument_size);
 
         for (size_t i = 0; i < argument_size; ++i) {
             argument_columns[i] =
@@ -250,8 +246,8 @@ class FunctionBitMapVariadic : public IFunction {
         auto& vec_res = col_res->get_data();
         vec_res.resize(input_rows_count);
 
-        RETURN_IF_ERROR(Impl::vector_vector(argument_columns, argument_size, input_rows_count,
-                                            vec_res, col_res_nulls));
+        RETURN_IF_ERROR(Impl::vector_vector(argument_columns.data(), argument_size,
+                                            input_rows_count, vec_res, col_res_nulls));
         if (!use_default_implementation_for_nulls() && result_info.type->is_nullable()) {
             block.replace_by_position(
                     result, ColumnNullable::create(std::move(col_res), std::move(col_res_nulls)));
diff --git a/be/src/vec/functions/function_coalesce.cpp b/be/src/vec/functions/function_coalesce.cpp
index 6a18b9989b2221..62ebb7a5d6d4a5 100644
--- a/be/src/vec/functions/function_coalesce.cpp
+++ b/be/src/vec/functions/function_coalesce.cpp
@@ -137,14 +137,16 @@ class FunctionCoalesce : public IFunction {
         auto null_map = ColumnUInt8::create(
                 input_rows_count, 1); //if null_map_data==1, the current row should be null
         auto* __restrict null_map_data = null_map->get_data().data();
-        ColumnPtr argument_columns[argument_size]; //use to save nested_column if is nullable column
+        std::vector<ColumnPtr> argument_columns(
+                argument_size); //use to save nested_column if is nullable column
 
         for (size_t i = 0; i < argument_size; ++i) {
             block.get_by_position(filtered_args[i]).column =
                     block.get_by_position(filtered_args[i])
                             .column->convert_to_full_column_if_const();
             argument_columns[i] = block.get_by_position(filtered_args[i]).column;
-            if (auto* nullable = check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
+            if (const auto* nullable =
+                        check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
                 argument_columns[i] = nullable->get_nested_column_ptr();
             }
         }
diff --git a/be/src/vec/functions/function_encryption.cpp b/be/src/vec/functions/function_encryption.cpp
index 736e49fa2b2b1e..8d3fb2f0036e59 100644
--- a/be/src/vec/functions/function_encryption.cpp
+++ b/be/src/vec/functions/function_encryption.cpp
@@ -111,7 +111,7 @@ class FunctionEncryptionAndDecrypt : public IFunction {
     Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
                         size_t result, size_t input_rows_count) const override {
         size_t argument_size = arguments.size();
-        ColumnPtr argument_columns[argument_size];
+        std::vector<ColumnPtr> argument_columns(argument_size);
         std::vector<const ColumnString::Offsets*> offsets_list(argument_size);
         std::vector<const ColumnString::Chars*> chars_list(argument_size);
 
diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h
index 515f9ad11acdfe..a57e352f2092fe 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -1111,7 +1111,7 @@ class FunctionStringConcat : public IFunction {
         }
 
         int argument_size = arguments.size();
-        ColumnPtr argument_columns[argument_size];
+        std::vector<ColumnPtr> argument_columns(argument_size);
 
         std::vector<const ColumnString::Offsets*> offsets_list(argument_size);
         std::vector<const ColumnString::Chars*> chars_list(argument_size);
@@ -1322,13 +1322,14 @@ class FunctionStringConcatWs : public IFunction {
         std::vector<const Chars*> chars_list(argument_size);
         std::vector<const ColumnUInt8::Container*> null_list(argument_size);
 
-        ColumnPtr argument_columns[argument_size];
-        ColumnPtr argument_null_columns[argument_size];
+        std::vector<ColumnPtr> argument_columns(argument_size);
+        std::vector<ColumnPtr> argument_null_columns(argument_size);
 
         for (size_t i = 0; i < argument_size; ++i) {
             argument_columns[i] =
                     block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
-            if (auto* nullable = check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
+            if (const auto* nullable =
+                        check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
                 // Danger: Here must dispose the null map data first! Because
                 // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem
                 // of column nullable mem of null map
@@ -1343,7 +1344,7 @@ class FunctionStringConcatWs : public IFunction {
                 continue;
             }
 
-            auto col_str = assert_cast<const ColumnString*>(argument_columns[i].get());
+            const auto* col_str = assert_cast<const ColumnString*>(argument_columns[i].get());
             offsets_list[i] = &col_str->get_offsets();
             chars_list[i] = &col_str->get_chars();
         }
@@ -1358,8 +1359,7 @@ class FunctionStringConcatWs : public IFunction {
 
         if (check_column<ColumnArray>(argument_columns[1].get())) {
             // Determine if the nested type of the array is String
-            const ColumnArray& array_column =
-                    reinterpret_cast<const ColumnArray&>(*argument_columns[1]);
+            const auto& array_column = reinterpret_cast<const ColumnArray&>(*argument_columns[1]);
             if (!array_column.get_data().is_column_string()) {
                 return Status::NotSupported(
                         fmt::format("unsupported nested array of type {} for function {}",
@@ -1641,11 +1641,12 @@ class FunctionStringPad : public IFunction {
         auto res = ColumnString::create();
 
         size_t argument_size = arguments.size();
-        ColumnPtr argument_columns[argument_size];
+        std::vector<ColumnPtr> argument_columns(argument_size);
         for (size_t i = 0; i < argument_size; ++i) {
             argument_columns[i] =
                     block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
-            if (auto* nullable = check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
+            if (const auto* nullable =
+                        check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
                 // Danger: Here must dispose the null map data first! Because
                 // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem
                 // of column nullable mem of null map
@@ -1660,16 +1661,16 @@ class FunctionStringPad : public IFunction {
         auto& res_chars = res->get_chars();
         res_offsets.resize(input_rows_count);
 
-        auto strcol = assert_cast<const ColumnString*>(argument_columns[0].get());
-        auto& strcol_offsets = strcol->get_offsets();
-        auto& strcol_chars = strcol->get_chars();
+        const auto* strcol = assert_cast<const ColumnString*>(argument_columns[0].get());
+        const auto& strcol_offsets = strcol->get_offsets();
+        const auto& strcol_chars = strcol->get_chars();
 
-        auto col_len = assert_cast<const ColumnInt32*>(argument_columns[1].get());
-        auto& col_len_data = col_len->get_data();
+        const auto* col_len = assert_cast<const ColumnInt32*>(argument_columns[1].get());
+        const auto& col_len_data = col_len->get_data();
 
-        auto padcol = assert_cast<const ColumnString*>(argument_columns[2].get());
-        auto& padcol_offsets = padcol->get_offsets();
-        auto& padcol_chars = padcol->get_chars();
+        const auto* padcol = assert_cast<const ColumnString*>(argument_columns[2].get());
+        const auto& padcol_offsets = padcol->get_offsets();
+        const auto& padcol_chars = padcol->get_chars();
 
         std::vector<size_t> str_index;
         std::vector<size_t> pad_index;
@@ -1788,11 +1789,12 @@ class FunctionSplitPart : public IFunction {
         res_offsets.resize(input_rows_count);
 
         const size_t argument_size = arguments.size();
-        ColumnPtr argument_columns[argument_size];
+        std::vector<ColumnPtr> argument_columns(argument_size);
         for (size_t i = 0; i < argument_size; ++i) {
             argument_columns[i] =
                     block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
-            if (auto* nullable = check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
+            if (const auto* nullable =
+                        check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
                 // Danger: Here must dispose the null map data first! Because
                 // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem
                 // of column nullable mem of null map
@@ -1802,12 +1804,12 @@ class FunctionSplitPart : public IFunction {
             }
         }
 
-        auto str_col = assert_cast<const ColumnString*>(argument_columns[0].get());
+        const auto* str_col = assert_cast<const ColumnString*>(argument_columns[0].get());
 
-        auto delimiter_col = assert_cast<const ColumnString*>(argument_columns[1].get());
+        const auto* delimiter_col = assert_cast<const ColumnString*>(argument_columns[1].get());
 
-        auto part_num_col = assert_cast<const ColumnInt32*>(argument_columns[2].get());
-        auto& part_num_col_data = part_num_col->get_data();
+        const auto* part_num_col = assert_cast<const ColumnInt32*>(argument_columns[2].get());
+        const auto& part_num_col_data = part_num_col->get_data();
 
         for (size_t i = 0; i < input_rows_count; ++i) {
             if (part_num_col_data[i] == 0) {
@@ -2539,7 +2541,7 @@ class FunctionStringDigestOneArg : public IFunction {
         DCHECK_GE(arguments.size(), 1);
 
         int argument_size = arguments.size();
-        ColumnPtr argument_columns[argument_size];
+        std::vector<ColumnPtr> argument_columns(argument_size);
 
         std::vector<const ColumnString::Offsets*> offsets_list(argument_size);
         std::vector<const ColumnString::Chars*> chars_list(argument_size);
@@ -2547,7 +2549,7 @@ class FunctionStringDigestOneArg : public IFunction {
         for (int i = 0; i < argument_size; ++i) {
             argument_columns[i] =
                     block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
-            if (auto col_str = assert_cast<const ColumnString*>(argument_columns[i].get())) {
+            if (const auto* col_str = assert_cast<const ColumnString*>(argument_columns[i].get())) {
                 offsets_list[i] = &col_str->get_offsets();
                 chars_list[i] = &col_str->get_chars();
             } else {
@@ -2566,8 +2568,8 @@ class FunctionStringDigestOneArg : public IFunction {
             using ObjectData = typename Impl::ObjectData;
             ObjectData digest;
             for (size_t j = 0; j < offsets_list.size(); ++j) {
-                auto& current_offsets = *offsets_list[j];
-                auto& current_chars = *chars_list[j];
+                const auto& current_offsets = *offsets_list[j];
+                const auto& current_chars = *chars_list[j];
 
                 int size = current_offsets[i] - current_offsets[i - 1];
                 if (size < 1) {
@@ -2755,7 +2757,7 @@ class FunctionStringParseUrl : public IFunction {
         size_t argument_size = arguments.size();
         bool has_key = argument_size >= 3;
 
-        ColumnPtr argument_columns[argument_size];
+        std::vector<ColumnPtr> argument_columns(argument_size);
         for (size_t i = 0; i < argument_size; ++i) {
             argument_columns[i] =
                     block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
diff --git a/be/src/vec/functions/least_greast.cpp b/be/src/vec/functions/least_greast.cpp
index 06f14ec669ee80..02b9242fa93b6a 100644
--- a/be/src/vec/functions/least_greast.cpp
+++ b/be/src/vec/functions/least_greast.cpp
@@ -175,7 +175,7 @@ struct FunctionFieldImpl {
         auto& res_data = static_cast<ColumnInt32*>(result_column)->get_data();
 
         const auto& column_size = arguments.size();
-        ColumnPtr argument_columns[column_size];
+        std::vector<ColumnPtr> argument_columns(column_size);
         for (int i = 0; i < column_size; ++i) {
             argument_columns[i] = block.get_by_position(arguments[i]).column;
         }
diff --git a/be/test/io/fs/buffered_reader_test.cpp b/be/test/io/fs/buffered_reader_test.cpp
index 2dc2ac600fc379..d0a504162d3bb7 100644
--- a/be/test/io/fs/buffered_reader_test.cpp
+++ b/be/test/io/fs/buffered_reader_test.cpp
@@ -282,8 +282,8 @@ TEST_F(BufferedReaderTest, test_read_amplify) {
     random_access_ranges.emplace_back(512 * kb, 2048 * kb); // column4
 
     io::MergeRangeFileReader merge_reader(nullptr, offset_reader, random_access_ranges);
-    char data[2048 * kb]; // 2MB
-    Slice result(data, 2048 * kb);
+    std::vector<char> data(2048 * kb); // 2MB
+    Slice result(data.data(), 2048 * kb);
     size_t bytes_read = 0;
 
     // read column4
diff --git a/be/test/olap/block_column_predicate_test.cpp b/be/test/olap/block_column_predicate_test.cpp
index 8c8231e62ae3a1..e1d5b249b1f44e 100644
--- a/be/test/olap/block_column_predicate_test.cpp
+++ b/be/test/olap/block_column_predicate_test.cpp
@@ -70,7 +70,7 @@ TEST_F(BlockColumnPredicateTest, SINGLE_COLUMN_VEC) {
             new ComparisonPredicateBase<TYPE_INT, PredicateType::EQ>(col_idx, value));
     SingleColumnBlockPredicate single_column_block_pred(pred.get());
 
-    uint16_t sel_idx[rows];
+    std::vector<uint16_t> sel_idx(rows);
     uint16_t selected_size = rows;
     block[col_idx]->reserve(rows);
     for (int i = 0; i < rows; i++) {
@@ -79,7 +79,7 @@ TEST_F(BlockColumnPredicateTest, SINGLE_COLUMN_VEC) {
         sel_idx[i] = i;
     }
 
-    selected_size = single_column_block_pred.evaluate(block, sel_idx, selected_size);
+    selected_size = single_column_block_pred.evaluate(block, sel_idx.data(), selected_size);
     EXPECT_EQ(selected_size, 1);
     auto* pred_col =
             reinterpret_cast<vectorized::PredicateColumnType<TYPE_INT>*>(block[col_idx].get());
@@ -105,7 +105,7 @@ TEST_F(BlockColumnPredicateTest, AND_MUTI_COLUMN_VEC) {
     and_block_column_pred.add_column_predicate(std::move(single_less_pred));
     and_block_column_pred.add_column_predicate(std::move(single_great_pred));
 
-    uint16_t sel_idx[rows];
+    std::vector<uint16_t> sel_idx(rows);
     uint16_t selected_size = rows;
     block[col_idx]->reserve(rows);
     for (int i = 0; i < rows; i++) {
@@ -114,7 +114,7 @@ TEST_F(BlockColumnPredicateTest, AND_MUTI_COLUMN_VEC) {
         sel_idx[i] = i;
     }
 
-    selected_size = and_block_column_pred.evaluate(block, sel_idx, selected_size);
+    selected_size = and_block_column_pred.evaluate(block, sel_idx.data(), selected_size);
     EXPECT_EQ(selected_size, 1);
     auto* pred_col =
             reinterpret_cast<vectorized::PredicateColumnType<TYPE_INT>*>(block[col_idx].get());
@@ -140,7 +140,7 @@ TEST_F(BlockColumnPredicateTest, OR_MUTI_COLUMN_VEC) {
     or_block_column_pred.add_column_predicate(std::move(single_less_pred));
     or_block_column_pred.add_column_predicate(std::move(single_great_pred));
 
-    uint16_t sel_idx[rows];
+    std::vector<uint16_t> sel_idx(rows);
     uint16_t selected_size = rows;
     block[col_idx]->reserve(rows);
     for (int i = 0; i < rows; i++) {
@@ -149,7 +149,7 @@ TEST_F(BlockColumnPredicateTest, OR_MUTI_COLUMN_VEC) {
         sel_idx[i] = i;
     }
 
-    selected_size = or_block_column_pred.evaluate(block, sel_idx, selected_size);
+    selected_size = or_block_column_pred.evaluate(block, sel_idx.data(), selected_size);
     EXPECT_EQ(selected_size, 10);
     auto* pred_col =
             reinterpret_cast<vectorized::PredicateColumnType<TYPE_INT>*>(block[col_idx].get());
@@ -184,7 +184,7 @@ TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN_VEC) {
     or_block_column_pred.add_column_predicate(
             SingleColumnBlockPredicate::create_unique(less_pred1.get()));
 
-    uint16_t sel_idx[rows];
+    std::vector<uint16_t> sel_idx(rows);
     uint16_t selected_size = rows;
     block[col_idx]->reserve(rows);
     for (int i = 0; i < rows; i++) {
@@ -193,7 +193,7 @@ TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN_VEC) {
         sel_idx[i] = i;
     }
 
-    selected_size = or_block_column_pred.evaluate(block, sel_idx, selected_size);
+    selected_size = or_block_column_pred.evaluate(block, sel_idx.data(), selected_size);
     EXPECT_EQ(selected_size, 4);
     auto* pred_col =
             reinterpret_cast<vectorized::PredicateColumnType<TYPE_INT>*>(block[col_idx].get());
@@ -215,7 +215,7 @@ TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN_VEC) {
             SingleColumnBlockPredicate::create_unique(less_pred1.get()));
     or_block_column_pred1.add_column_predicate(std::move(and_block_column_pred1));
 
-    selected_size = or_block_column_pred1.evaluate(block, sel_idx, selected_size);
+    selected_size = or_block_column_pred1.evaluate(block, sel_idx.data(), selected_size);
     EXPECT_EQ(selected_size, 4);
     EXPECT_EQ(pred_col->get_data()[sel_idx[0]], 0);
     EXPECT_EQ(pred_col->get_data()[sel_idx[1]], 1);
@@ -251,7 +251,7 @@ TEST_F(BlockColumnPredicateTest, AND_OR_MUTI_COLUMN_VEC) {
     and_block_column_pred.add_column_predicate(
             SingleColumnBlockPredicate::create_unique(great_pred.get()));
 
-    uint16_t sel_idx[rows];
+    std::vector<uint16_t> sel_idx(rows);
     uint16_t selected_size = rows;
     block[col_idx]->reserve(rows);
     for (int i = 0; i < rows; i++) {
@@ -260,7 +260,7 @@ TEST_F(BlockColumnPredicateTest, AND_OR_MUTI_COLUMN_VEC) {
         sel_idx[i] = i;
     }
 
-    selected_size = and_block_column_pred.evaluate(block, sel_idx, selected_size);
+    selected_size = and_block_column_pred.evaluate(block, sel_idx.data(), selected_size);
 
     auto* pred_col =
             reinterpret_cast<vectorized::PredicateColumnType<TYPE_INT>*>(block[col_idx].get());
diff --git a/be/test/olap/rowset/segment_v2/block_bloom_filter_test.cpp b/be/test/olap/rowset/segment_v2/block_bloom_filter_test.cpp
index 0363f377e50d6d..e4db2828666827 100644
--- a/be/test/olap/rowset/segment_v2/block_bloom_filter_test.cpp
+++ b/be/test/olap/rowset/segment_v2/block_bloom_filter_test.cpp
@@ -115,13 +115,13 @@ TEST_F(BlockBloomFilterTest, SP) {
     EXPECT_TRUE(bf2->size() > 0);
 
     int num = _expected_num;
-    int32_t values[num];
+    std::vector<int32_t> values(num);
     for (int32_t i = 0; i < num; ++i) {
         values[i] = i * 10 + 1;
         bf->add_bytes((char*)&values[i], sizeof(int32_t));
     }
 
-    int32_t values2[num];
+    std::vector<int32_t> values2(num);
     for (int32_t i = 0; i < num; ++i) {
         values2[i] = 15360 + i * 10 + 1;
         bf2->add_bytes((char*)&values2[i], sizeof(int32_t));
diff --git a/be/test/vec/exec/parquet/parquet_thrift_test.cpp b/be/test/vec/exec/parquet/parquet_thrift_test.cpp
index 741bb242bfdae1..132de072127e14 100644
--- a/be/test/vec/exec/parquet/parquet_thrift_test.cpp
+++ b/be/test/vec/exec/parquet/parquet_thrift_test.cpp
@@ -409,7 +409,7 @@ static void read_parquet_data_and_check(const std::string& parquet_file,
     tparquet::FileMetaData t_metadata = metadata->to_thrift();
     FieldDescriptor schema_descriptor;
     static_cast<void>(schema_descriptor.parse_from_thrift(t_metadata.schema));
-    level_t defs[rows];
+    std::vector<level_t> defs(rows);
 
     for (int c = 0; c < 14; ++c) {
         auto& column_name_with_type = block->get_by_position(c);
@@ -418,7 +418,7 @@ static void read_parquet_data_and_check(const std::string& parquet_file,
         static_cast<void>(
                 get_column_values(reader, &t_metadata.row_groups[0].columns[c],
                                   const_cast<FieldSchema*>(schema_descriptor.get_column(c)),
-                                  data_column, data_type, defs));
+                                  data_column, data_type, defs.data()));
     }
     // `date_v2_col` date, // 14 - 13, DATEV2
     {
@@ -428,7 +428,7 @@ static void read_parquet_data_and_check(const std::string& parquet_file,
         static_cast<void>(
                 get_column_values(reader, &t_metadata.row_groups[0].columns[13],
                                   const_cast<FieldSchema*>(schema_descriptor.get_column(13)),
-                                  data_column, data_type, defs));
+                                  data_column, data_type, defs.data()));
     }
     // `timestamp_v2_col` timestamp, // 15 - 9, DATETIMEV2
     {
@@ -438,18 +438,18 @@ static void read_parquet_data_and_check(const std::string& parquet_file,
         static_cast<void>(
                 get_column_values(reader, &t_metadata.row_groups[0].columns[9],
                                   const_cast<FieldSchema*>(schema_descriptor.get_column(9)),
-                                  data_column, data_type, defs));
+                                  data_column, data_type, defs.data()));
     }
 
     io::FileReaderSPtr result;
     auto rst = local_fs->open_file(result_file, &result);
     EXPECT_TRUE(rst.ok());
-    uint8_t result_buf[result->size() + 1];
+    std::vector<uint8_t> result_buf(result->size() + 1);
     result_buf[result->size()] = '\0';
     size_t bytes_read;
-    Slice res(result_buf, result->size());
+    Slice res(result_buf.data(), result->size());
     static_cast<void>(result->read_at(0, res, &bytes_read));
-    ASSERT_STREQ(block->dump_data(0, rows).c_str(), reinterpret_cast<char*>(result_buf));
+    ASSERT_STREQ(block->dump_data(0, rows).c_str(), reinterpret_cast<char*>(result_buf.data()));
     delete metadata;
 }
 
diff --git a/be/test/vec/function/function_string_test.cpp b/be/test/vec/function/function_string_test.cpp
index 5606f2cf4f0639..b40e7e27cdecdc 100644
--- a/be/test/vec/function/function_string_test.cpp
+++ b/be/test/vec/function/function_string_test.cpp
@@ -848,12 +848,12 @@ TEST(function_string_test, function_aes_encrypt_test) {
 
         for (int i = 0; i < 5; i++) {
             int cipher_len = strlen(src[i]) + 16;
-            char p[cipher_len];
+            std::vector<char> p(cipher_len);
 
             int outlen = EncryptionUtil::encrypt(
                     EncryptionMode::AES_128_ECB, (unsigned char*)src[i], strlen(src[i]),
-                    (unsigned char*)key, strlen(key), nullptr, 0, true, (unsigned char*)p);
-            r[i] = std::string(p, outlen);
+                    (unsigned char*)key, strlen(key), nullptr, 0, true, (unsigned char*)p.data());
+            r[i] = std::string(p.data(), outlen);
         }
 
         DataSet data_set = {{{std::string(src[0]), std::string(key), std::string(mode)}, r[0]},
@@ -877,17 +877,17 @@ TEST(function_string_test, function_aes_encrypt_test) {
 
         for (int i = 0; i < 5; i++) {
             int cipher_len = strlen(src[i]) + 16;
-            char p[cipher_len];
+            std::vector<char> p(cipher_len);
             int iv_len = 32;
             std::unique_ptr<char[]> init_vec;
             init_vec.reset(new char[iv_len]);
             std::memset(init_vec.get(), 0, strlen(iv) + 1);
             memcpy(init_vec.get(), iv, strlen(iv));
-            int outlen =
-                    EncryptionUtil::encrypt(EncryptionMode::AES_256_ECB, (unsigned char*)src[i],
-                                            strlen(src[i]), (unsigned char*)key, strlen(key),
-                                            init_vec.get(), strlen(iv), true, (unsigned char*)p);
-            r[i] = std::string(p, outlen);
+            int outlen = EncryptionUtil::encrypt(EncryptionMode::AES_256_ECB,
+                                                 (unsigned char*)src[i], strlen(src[i]),
+                                                 (unsigned char*)key, strlen(key), init_vec.get(),
+                                                 strlen(iv), true, (unsigned char*)p.data());
+            r[i] = std::string(p.data(), outlen);
         }
 
         DataSet data_set = {
@@ -916,12 +916,12 @@ TEST(function_string_test, function_aes_decrypt_test) {
 
         for (int i = 0; i < 5; i++) {
             int cipher_len = strlen(src[i]) + 16;
-            char p[cipher_len];
+            std::vector<char> p(cipher_len);
 
             int outlen = EncryptionUtil::encrypt(
                     EncryptionMode::AES_128_ECB, (unsigned char*)src[i], strlen(src[i]),
-                    (unsigned char*)key, strlen(key), nullptr, 0, true, (unsigned char*)p);
-            r[i] = std::string(p, outlen);
+                    (unsigned char*)key, strlen(key), nullptr, 0, true, (unsigned char*)p.data());
+            r[i] = std::string(p.data(), outlen);
         }
 
         DataSet data_set = {{{r[0], std::string(key), std::string(mode)}, std::string(src[0])},
@@ -944,17 +944,17 @@ TEST(function_string_test, function_aes_decrypt_test) {
         std::string r[5];
         for (int i = 0; i < 5; i++) {
             int cipher_len = strlen(src[i]) + 16;
-            char p[cipher_len];
+            std::vector<char> p(cipher_len);
             int iv_len = 32;
             std::unique_ptr<char[]> init_vec;
             init_vec.reset(new char[iv_len]);
             std::memset(init_vec.get(), 0, strlen(iv) + 1);
             memcpy(init_vec.get(), iv, strlen(iv));
-            int outlen =
-                    EncryptionUtil::encrypt(EncryptionMode::AES_128_OFB, (unsigned char*)src[i],
-                                            strlen(src[i]), (unsigned char*)key, strlen(key),
-                                            init_vec.get(), strlen(iv), true, (unsigned char*)p);
-            r[i] = std::string(p, outlen);
+            int outlen = EncryptionUtil::encrypt(EncryptionMode::AES_128_OFB,
+                                                 (unsigned char*)src[i], strlen(src[i]),
+                                                 (unsigned char*)key, strlen(key), init_vec.get(),
+                                                 strlen(iv), true, (unsigned char*)p.data());
+            r[i] = std::string(p.data(), outlen);
         }
         DataSet data_set = {
                 {{r[0], std::string(key), std::string(iv), std::string(mode)}, std::string(src[0])},
@@ -982,17 +982,17 @@ TEST(function_string_test, function_sm4_encrypt_test) {
 
         for (int i = 0; i < 5; i++) {
             int cipher_len = strlen(src[i]) + 16;
-            char p[cipher_len];
+            std::vector<char> p(cipher_len);
             int iv_len = 32;
             std::unique_ptr<char[]> init_vec;
             init_vec.reset(new char[iv_len]);
             std::memset(init_vec.get(), 0, strlen(iv) + 1);
             memcpy(init_vec.get(), iv, strlen(iv));
-            int outlen =
-                    EncryptionUtil::encrypt(EncryptionMode::SM4_128_ECB, (unsigned char*)src[i],
-                                            strlen(src[i]), (unsigned char*)key, strlen(key),
-                                            init_vec.get(), strlen(iv), true, (unsigned char*)p);
-            r[i] = std::string(p, outlen);
+            int outlen = EncryptionUtil::encrypt(EncryptionMode::SM4_128_ECB,
+                                                 (unsigned char*)src[i], strlen(src[i]),
+                                                 (unsigned char*)key, strlen(key), init_vec.get(),
+                                                 strlen(iv), true, (unsigned char*)p.data());
+            r[i] = std::string(p.data(), outlen);
         }
 
         DataSet data_set = {
@@ -1020,17 +1020,17 @@ TEST(function_string_test, function_sm4_encrypt_test) {
 
         for (int i = 0; i < 5; i++) {
             int cipher_len = strlen(src[i]) + 16;
-            char p[cipher_len];
+            std::vector<char> p(cipher_len);
             int iv_len = 32;
             std::unique_ptr<char[]> init_vec;
             init_vec.reset(new char[iv_len]);
             std::memset(init_vec.get(), 0, strlen(iv) + 1);
             memcpy(init_vec.get(), iv, strlen(iv));
-            int outlen =
-                    EncryptionUtil::encrypt(EncryptionMode::SM4_128_CTR, (unsigned char*)src[i],
-                                            strlen(src[i]), (unsigned char*)key, strlen(key),
-                                            init_vec.get(), strlen(iv), true, (unsigned char*)p);
-            r[i] = std::string(p, outlen);
+            int outlen = EncryptionUtil::encrypt(EncryptionMode::SM4_128_CTR,
+                                                 (unsigned char*)src[i], strlen(src[i]),
+                                                 (unsigned char*)key, strlen(key), init_vec.get(),
+                                                 strlen(iv), true, (unsigned char*)p.data());
+            r[i] = std::string(p.data(), outlen);
         }
 
         DataSet data_set = {
@@ -1061,17 +1061,17 @@ TEST(function_string_test, function_sm4_decrypt_test) {
 
         for (int i = 0; i < 5; i++) {
             int cipher_len = strlen(src[i]) + 16;
-            char p[cipher_len];
+            std::vector<char> p(cipher_len);
             int iv_len = 32;
             std::unique_ptr<char[]> init_vec;
             init_vec.reset(new char[iv_len]);
             std::memset(init_vec.get(), 0, strlen(iv) + 1);
             memcpy(init_vec.get(), iv, strlen(iv));
-            int outlen =
-                    EncryptionUtil::encrypt(EncryptionMode::SM4_128_ECB, (unsigned char*)src[i],
-                                            strlen(src[i]), (unsigned char*)key, strlen(key),
-                                            init_vec.get(), strlen(iv), true, (unsigned char*)p);
-            r[i] = std::string(p, outlen);
+            int outlen = EncryptionUtil::encrypt(EncryptionMode::SM4_128_ECB,
+                                                 (unsigned char*)src[i], strlen(src[i]),
+                                                 (unsigned char*)key, strlen(key), init_vec.get(),
+                                                 strlen(iv), true, (unsigned char*)p.data());
+            r[i] = std::string(p.data(), outlen);
         }
 
         DataSet data_set = {
@@ -1097,17 +1097,17 @@ TEST(function_string_test, function_sm4_decrypt_test) {
 
         for (int i = 0; i < 5; i++) {
             int cipher_len = strlen(src[i]) + 16;
-            char p[cipher_len];
+            std::vector<char> p(cipher_len);
             int iv_len = 32;
             std::unique_ptr<char[]> init_vec;
             init_vec.reset(new char[iv_len]);
             std::memset(init_vec.get(), 0, strlen(iv) + 1);
             memcpy(init_vec.get(), iv, strlen(iv));
-            int outlen =
-                    EncryptionUtil::encrypt(EncryptionMode::SM4_128_OFB, (unsigned char*)src[i],
-                                            strlen(src[i]), (unsigned char*)key, strlen(key),
-                                            init_vec.get(), strlen(iv), true, (unsigned char*)p);
-            r[i] = std::string(p, outlen);
+            int outlen = EncryptionUtil::encrypt(EncryptionMode::SM4_128_OFB,
+                                                 (unsigned char*)src[i], strlen(src[i]),
+                                                 (unsigned char*)key, strlen(key), init_vec.get(),
+                                                 strlen(iv), true, (unsigned char*)p.data());
+            r[i] = std::string(p.data(), outlen);
         }
 
         DataSet data_set = {

From c23e422f4b8cb64d007c5429a18611952314fb98 Mon Sep 17 00:00:00 2001
From: camby <cambyzhu@tencent.com>
Date: Tue, 30 Apr 2024 11:46:22 +0800
Subject: [PATCH 145/163] [fix](json) fix be crash while load json data
 (#34283)

---
 be/src/vec/exec/format/json/new_json_reader.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp b/be/src/vec/exec/format/json/new_json_reader.cpp
index 45072e6a3ce0e2..b658e15e467e4d 100644
--- a/be/src/vec/exec/format/json/new_json_reader.cpp
+++ b/be/src/vec/exec/format/json/new_json_reader.cpp
@@ -496,7 +496,9 @@ Status NewJsonReader::_vhandle_simple_json(RuntimeState* /*state*/, Block& block
                 objectValue = _json_doc;
             }
             _next_row = 0;
-            if (_fuzzy_parse) {
+            // Here we expect the incoming `objectValue` to be a Json Object, such as {"key" : "value"}
+            // If a Json Array comes here, it maybe crashed in it->name.GetString()
+            if (_fuzzy_parse && objectValue->IsObject()) {
                 for (auto* v : slot_descs) {
                     for (int i = 0; i < objectValue->MemberCount(); ++i) {
                         auto it = objectValue->MemberBegin() + i;

From 696849b5c9032f553eb7c8b61d3630dead5b25d8 Mon Sep 17 00:00:00 2001
From: Lightman <31928846+Lchangliang@users.noreply.github.com>
Date: Tue, 30 Apr 2024 11:49:18 +0800
Subject: [PATCH 146/163] (cloud-merge) Check the cache config size whether is
 larger than disk_resource_limit_mode_percent (#34137)

---
 be/src/io/cache/block_file_cache.cpp         | 18 ++++++++++++++++--
 be/src/io/cache/block_file_cache.h           |  2 ++
 be/src/io/cache/block_file_cache_factory.cpp | 15 ++++++++++-----
 be/src/io/cache/fs_file_cache_storage.cpp    |  7 +++----
 4 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/be/src/io/cache/block_file_cache.cpp b/be/src/io/cache/block_file_cache.cpp
index 6a1c873966c3a9..e9bf9016bb6cfb 100644
--- a/be/src/io/cache/block_file_cache.cpp
+++ b/be/src/io/cache/block_file_cache.cpp
@@ -66,6 +66,17 @@ BlockFileCache::BlockFileCache(const std::string& cache_base_path,
     _cur_disposable_queue_cache_size_metrics = std::make_shared<bvar::Status<size_t>>(
             _cache_base_path.c_str(), "file_cache_disposable_queue_cache_size", 0);
 
+    _queue_evict_size_metrics[0] = std::make_shared<bvar::Adder<size_t>>(
+            _cache_base_path.c_str(), "file_cache_index_queue_evict_size");
+    _queue_evict_size_metrics[1] = std::make_shared<bvar::Adder<size_t>>(
+            _cache_base_path.c_str(), "file_cache_normal_queue_evict_size");
+    _queue_evict_size_metrics[2] = std::make_shared<bvar::Adder<size_t>>(
+            _cache_base_path.c_str(), "file_cache_disposable_queue_evict_size");
+    _queue_evict_size_metrics[3] = std::make_shared<bvar::Adder<size_t>>(
+            _cache_base_path.c_str(), "file_cache_ttl_cache_evict_size");
+    _total_evict_size_metrics = std::make_shared<bvar::Adder<size_t>>(
+            _cache_base_path.c_str(), "file_cache_total_evict_size");
+
     _disposable_queue = LRUQueue(cache_settings.disposable_queue_size,
                                  cache_settings.disposable_queue_elements, 60 * 60);
     _index_queue = LRUQueue(cache_settings.index_queue_size, cache_settings.index_queue_elements,
@@ -1124,6 +1135,9 @@ void BlockFileCache::remove(FileBlockSPtr file_block, T& cache_lock, U& block_lo
         auto& queue = get_queue(file_block->cache_type());
         queue.remove(*cell->queue_iterator, cache_lock);
     }
+    *_queue_evict_size_metrics[static_cast<int>(file_block->cache_type())]
+            << file_block->range().size();
+    *_total_evict_size_metrics << file_block->range().size();
     if (cell->file_block->state_unlock(block_lock) == FileBlock::State::DOWNLOADED) {
         FileCacheKey key;
         key.hash = hash;
@@ -1316,8 +1330,8 @@ void BlockFileCache::check_disk_resource_limit(const std::string& path) {
         return;
     }
     auto [capacity_percentage, inode_percentage] = percent;
-    auto inode_is_insufficient = [](const int& inode_remain) {
-        return inode_remain >= config::file_cache_enter_disk_resource_limit_mode_percent;
+    auto inode_is_insufficient = [](const int& inode_percentage) {
+        return inode_percentage >= config::file_cache_enter_disk_resource_limit_mode_percent;
     };
     DCHECK(capacity_percentage >= 0 && capacity_percentage <= 100);
     DCHECK(inode_percentage >= 0 && inode_percentage <= 100);
diff --git a/be/src/io/cache/block_file_cache.h b/be/src/io/cache/block_file_cache.h
index 6f19095eace8d8..4b3fccba2a2fdc 100644
--- a/be/src/io/cache/block_file_cache.h
+++ b/be/src/io/cache/block_file_cache.h
@@ -419,6 +419,8 @@ class BlockFileCache {
     std::shared_ptr<bvar::Status<size_t>> _cur_index_queue_cache_size_metrics;
     std::shared_ptr<bvar::Status<size_t>> _cur_disposable_queue_element_count_metrics;
     std::shared_ptr<bvar::Status<size_t>> _cur_disposable_queue_cache_size_metrics;
+    std::array<std::shared_ptr<bvar::Adder<size_t>>, 4> _queue_evict_size_metrics;
+    std::shared_ptr<bvar::Adder<size_t>> _total_evict_size_metrics;
 };
 
 } // namespace doris::io
diff --git a/be/src/io/cache/block_file_cache_factory.cpp b/be/src/io/cache/block_file_cache_factory.cpp
index 298d14234df981..93029871e61db4 100644
--- a/be/src/io/cache/block_file_cache_factory.cpp
+++ b/be/src/io/cache/block_file_cache_factory.cpp
@@ -80,10 +80,15 @@ Status FileCacheFactory::create_file_cache(const std::string& cache_base_path,
         LOG_ERROR("").tag("file cache path", cache_base_path).tag("error", strerror(errno));
         return Status::IOError("{} statfs error {}", cache_base_path, strerror(errno));
     }
-    size_t disk_total_size = static_cast<size_t>(stat.f_blocks) * static_cast<size_t>(stat.f_bsize);
-    if (disk_total_size < file_cache_settings.capacity) {
-        file_cache_settings = get_file_cache_settings(size_t(disk_total_size * 0.9),
-                                                      file_cache_settings.max_query_cache_size);
+    size_t disk_capacity = static_cast<size_t>(
+            static_cast<size_t>(stat.f_blocks) * static_cast<size_t>(stat.f_bsize) *
+            (static_cast<double>(config::file_cache_enter_disk_resource_limit_mode_percent) / 100));
+    if (disk_capacity < file_cache_settings.capacity) {
+        LOG_INFO("The cache {} config size {} is larger than {}% disk size {}, recalc it.",
+                 cache_base_path, file_cache_settings.capacity,
+                 config::file_cache_enter_disk_resource_limit_mode_percent, disk_capacity);
+        file_cache_settings =
+                get_file_cache_settings(disk_capacity, file_cache_settings.max_query_cache_size);
     }
     auto cache = std::make_unique<BlockFileCache>(cache_base_path, file_cache_settings);
     RETURN_IF_ERROR(cache->initialize());
@@ -91,7 +96,7 @@ Status FileCacheFactory::create_file_cache(const std::string& cache_base_path,
     _caches.push_back(std::move(cache));
     LOG(INFO) << "[FileCache] path: " << cache_base_path
               << " total_size: " << file_cache_settings.capacity
-              << " disk_total_size: " << disk_total_size;
+              << " disk_total_size: " << disk_capacity;
     _capacity += file_cache_settings.capacity;
     return Status::OK();
 }
diff --git a/be/src/io/cache/fs_file_cache_storage.cpp b/be/src/io/cache/fs_file_cache_storage.cpp
index 42841795df424f..2b12abf48df03b 100644
--- a/be/src/io/cache/fs_file_cache_storage.cpp
+++ b/be/src/io/cache/fs_file_cache_storage.cpp
@@ -118,10 +118,9 @@ Status FSFileCacheStorage::append(const FileCacheKey& key, const Slice& value) {
             writer = iter->second.get();
         } else {
             std::string dir = get_path_in_local_cache(key.hash, key.meta.expiration_time);
-            bool exists {false};
-            RETURN_IF_ERROR(fs->exists(dir, &exists));
-            if (!exists) {
-                RETURN_IF_ERROR(fs->create_directory(dir));
+            auto st = fs->create_directory(dir, true);
+            if (!st.ok() && !st.is<ErrorCode::ALREADY_EXIST>()) {
+                return st;
             }
             std::string tmp_file = get_path_in_local_cache(dir, key.offset, key.meta.type, true);
             FileWriterPtr file_writer;

From e206e332551bed24d7a07341847ea52ccc145394 Mon Sep 17 00:00:00 2001
From: Lightman <31928846+Lchangliang@users.noreply.github.com>
Date: Tue, 30 Apr 2024 11:49:57 +0800
Subject: [PATCH 147/163] (cloud-merge)  Optimize the lru queue elimination
 strategy (#34139)

---
 be/src/common/config.cpp                   |   1 +
 be/src/common/config.h                     |   1 +
 be/src/io/cache/block_file_cache.cpp       | 117 ++++++----
 be/src/io/cache/block_file_cache.h         |  10 +
 be/test/io/cache/block_file_cache_test.cpp | 242 ++++++++++++++++++---
 5 files changed, 299 insertions(+), 72 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index c7258d0f1c6402..28d0e7e6f7d309 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -989,6 +989,7 @@ DEFINE_Bool(enable_file_cache_query_limit, "false");
 DEFINE_mInt32(file_cache_enter_disk_resource_limit_mode_percent, "90");
 DEFINE_mInt32(file_cache_exit_disk_resource_limit_mode_percent, "80");
 DEFINE_mBool(enable_read_cache_file_directly, "false");
+DEFINE_mBool(file_cache_enable_evict_from_other_queue_by_size, "false");
 
 DEFINE_mInt32(index_cache_entry_stay_time_after_lookup_s, "1800");
 DEFINE_mInt32(inverted_index_cache_stale_sweep_time_sec, "600");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index f67eb6ccf7ace8..3ab7ea16cd777a 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1033,6 +1033,7 @@ DECLARE_Bool(enable_file_cache_query_limit);
 DECLARE_Int32(file_cache_enter_disk_resource_limit_mode_percent);
 DECLARE_Int32(file_cache_exit_disk_resource_limit_mode_percent);
 DECLARE_mBool(enable_read_cache_file_directly);
+DECLARE_Bool(file_cache_enable_evict_from_other_queue_by_size);
 
 // inverted index searcher cache
 // cache entry stay time after lookup
diff --git a/be/src/io/cache/block_file_cache.cpp b/be/src/io/cache/block_file_cache.cpp
index e9bf9016bb6cfb..b98cd6aec70233 100644
--- a/be/src/io/cache/block_file_cache.cpp
+++ b/be/src/io/cache/block_file_cache.cpp
@@ -713,10 +713,6 @@ bool BlockFileCache::try_reserve_for_ttl(size_t size, std::lock_guard<std::mutex
     if ((_cur_ttl_size + size) * 100 > limit) {
         return false;
     }
-    auto is_overflow = [&] {
-        return _disk_resource_limit_mode ? removed_size < size
-                                         : cur_cache_size + size - removed_size > _capacity;
-    };
     auto remove_file_block_if = [&](FileBlockCell* cell) {
         FileBlockSPtr file_block = cell->file_block;
         if (file_block) {
@@ -727,14 +723,14 @@ bool BlockFileCache::try_reserve_for_ttl(size_t size, std::lock_guard<std::mutex
     size_t normal_queue_size = _normal_queue.get_capacity(cache_lock);
     size_t disposable_queue_size = _disposable_queue.get_capacity(cache_lock);
     size_t index_queue_size = _index_queue.get_capacity(cache_lock);
-    if (is_overflow() && normal_queue_size == 0 && disposable_queue_size == 0 &&
-        index_queue_size == 0) {
+    if (is_overflow(removed_size, size, cur_cache_size) && normal_queue_size == 0 &&
+        disposable_queue_size == 0 && index_queue_size == 0) {
         return false;
     }
     std::vector<FileBlockCell*> to_evict;
     auto collect_eliminate_fragments = [&](LRUQueue& queue) {
         for (const auto& [entry_key, entry_offset, entry_size] : queue) {
-            if (!is_overflow()) {
+            if (!is_overflow(removed_size, size, cur_cache_size)) {
                 break;
             }
             auto* cell = get_cell(entry_key, entry_offset, cache_lock);
@@ -769,7 +765,7 @@ bool BlockFileCache::try_reserve_for_ttl(size_t size, std::lock_guard<std::mutex
         collect_eliminate_fragments(get_queue(FileCacheType::INDEX));
     }
     std::for_each(to_evict.begin(), to_evict.end(), remove_file_block_if);
-    if (is_overflow()) {
+    if (is_overflow(removed_size, size, cur_cache_size)) {
         return false;
     }
     return true;
@@ -972,8 +968,6 @@ std::vector<FileCacheType> BlockFileCache::get_other_cache_type(FileCacheType cu
         return {FileCacheType::DISPOSABLE, FileCacheType::NORMAL};
     case FileCacheType::NORMAL:
         return {FileCacheType::DISPOSABLE, FileCacheType::INDEX};
-    case FileCacheType::DISPOSABLE:
-        return {FileCacheType::NORMAL, FileCacheType::INDEX};
     default:
         return {};
     }
@@ -998,21 +992,16 @@ void BlockFileCache::reset_range(const UInt128Wrapper& hash, size_t offset, size
     _cur_cache_size += new_size;
 }
 
-bool BlockFileCache::try_reserve_from_other_queue(FileCacheType cur_cache_type, size_t size,
-                                                  int64_t cur_time,
-                                                  std::lock_guard<std::mutex>& cache_lock) {
-    auto other_cache_types = get_other_cache_type(cur_cache_type);
+bool BlockFileCache::try_reserve_from_other_queue_by_hot_interval(
+        std::vector<FileCacheType> other_cache_types, size_t size, int64_t cur_time,
+        std::lock_guard<std::mutex>& cache_lock) {
     size_t removed_size = 0;
     size_t cur_cache_size = _cur_cache_size;
-    auto is_overflow = [&] {
-        return _disk_resource_limit_mode ? removed_size < size
-                                         : cur_cache_size + size - removed_size > _capacity;
-    };
     std::vector<FileBlockCell*> to_evict;
     for (FileCacheType cache_type : other_cache_types) {
         auto& queue = get_queue(cache_type);
         for (const auto& [entry_key, entry_offset, entry_size] : queue) {
-            if (!is_overflow()) {
+            if (!is_overflow(removed_size, size, cur_cache_size)) {
                 break;
             }
             auto* cell = get_cell(entry_key, entry_offset, cache_lock);
@@ -1045,11 +1034,78 @@ bool BlockFileCache::try_reserve_from_other_queue(FileCacheType cur_cache_type,
 
     std::for_each(to_evict.begin(), to_evict.end(), remove_file_block_if);
 
-    if (is_overflow()) {
-        return false;
+    return !is_overflow(removed_size, size, cur_cache_size);
+}
+
+bool BlockFileCache::is_overflow(size_t removed_size, size_t need_size,
+                                 size_t cur_cache_size) const {
+    return _disk_resource_limit_mode ? removed_size < need_size
+                                     : cur_cache_size + need_size - removed_size > _capacity;
+}
+
+bool BlockFileCache::try_reserve_from_other_queue_by_size(
+        std::vector<FileCacheType> other_cache_types, size_t size,
+        std::lock_guard<std::mutex>& cache_lock) {
+    size_t removed_size = 0;
+    size_t cur_cache_size = _cur_cache_size;
+    std::vector<FileBlockCell*> to_evict;
+    for (FileCacheType cache_type : other_cache_types) {
+        auto& queue = get_queue(cache_type);
+        for (const auto& [entry_key, entry_offset, entry_size] : queue) {
+            if (!is_overflow(removed_size, size, cur_cache_size)) {
+                break;
+            }
+            auto* cell = get_cell(entry_key, entry_offset, cache_lock);
+            DCHECK(cell) << "Cache became inconsistent. Key: " << entry_key.to_string()
+                         << ", offset: " << entry_offset;
+
+            size_t cell_size = cell->size();
+            DCHECK(entry_size == cell_size);
+
+            if (cell->releasable()) {
+                auto& file_block = cell->file_block;
+
+                std::lock_guard segment_lock(file_block->_mutex);
+                DCHECK(file_block->_download_state == FileBlock::State::DOWNLOADED);
+                to_evict.push_back(cell);
+                removed_size += cell_size;
+            }
+        }
     }
+    auto remove_file_block_if = [&](FileBlockCell* cell) {
+        FileBlockSPtr file_block = cell->file_block;
+        if (file_block) {
+            std::lock_guard segment_lock(file_block->_mutex);
+            remove(file_block, cache_lock, segment_lock);
+        }
+    };
 
-    return true;
+    std::for_each(to_evict.begin(), to_evict.end(), remove_file_block_if);
+
+    return !is_overflow(removed_size, size, cur_cache_size);
+}
+
+bool BlockFileCache::try_reserve_from_other_queue(FileCacheType cur_cache_type, size_t size,
+                                                  int64_t cur_time,
+                                                  std::lock_guard<std::mutex>& cache_lock) {
+    // disposable queue cannot reserve other queues
+    if (cur_cache_type == FileCacheType::DISPOSABLE) {
+        return false;
+    }
+    auto other_cache_types = get_other_cache_type(cur_cache_type);
+    bool reserve_success = try_reserve_from_other_queue_by_hot_interval(other_cache_types, size,
+                                                                        cur_time, cache_lock);
+    if (reserve_success || !config::file_cache_enable_evict_from_other_queue_by_size) {
+        return reserve_success;
+    }
+    auto& cur_queue = get_queue(cur_cache_type);
+    size_t cur_queue_size = cur_queue.get_capacity(cache_lock);
+    size_t cur_queue_max_size = cur_queue.get_max_size();
+    // Hit the soft limit by self, cannot remove from other queues
+    if (_cur_cache_size + size > _capacity && cur_queue_size + size > cur_queue_max_size) {
+        return false;
+    }
+    return try_reserve_from_other_queue_by_size(other_cache_types, size, cache_lock);
 }
 
 bool BlockFileCache::try_reserve_for_lru(const UInt128Wrapper& hash,
@@ -1062,23 +1118,11 @@ bool BlockFileCache::try_reserve_for_lru(const UInt128Wrapper& hash,
     if (!try_reserve_from_other_queue(context.cache_type, size, cur_time, cache_lock)) {
         auto& queue = get_queue(context.cache_type);
         size_t removed_size = 0;
-        size_t queue_element_size = queue.get_elements_num(cache_lock);
-        size_t queue_size = queue.get_capacity(cache_lock);
         size_t cur_cache_size = _cur_cache_size;
 
-        size_t max_size = queue.get_max_size();
-        size_t max_element_size = queue.get_max_element_size();
-        auto is_overflow = [&] {
-            return _disk_resource_limit_mode
-                           ? removed_size < size
-                           : cur_cache_size + size - removed_size > _capacity ||
-                                     (queue_size + size - removed_size > max_size) ||
-                                     queue_element_size >= max_element_size;
-        };
-
         std::vector<FileBlockCell*> to_evict;
         for (const auto& [entry_key, entry_offset, entry_size] : queue) {
-            if (!is_overflow()) {
+            if (!is_overflow(removed_size, size, cur_cache_size)) {
                 break;
             }
             auto* cell = get_cell(entry_key, entry_offset, cache_lock);
@@ -1097,7 +1141,6 @@ bool BlockFileCache::try_reserve_for_lru(const UInt128Wrapper& hash,
                 to_evict.push_back(cell);
 
                 removed_size += cell_size;
-                --queue_element_size;
             }
         }
 
@@ -1111,7 +1154,7 @@ bool BlockFileCache::try_reserve_for_lru(const UInt128Wrapper& hash,
 
         std::for_each(to_evict.begin(), to_evict.end(), remove_file_block_if);
 
-        if (is_overflow()) {
+        if (is_overflow(removed_size, size, cur_cache_size)) {
             return false;
         }
     }
diff --git a/be/src/io/cache/block_file_cache.h b/be/src/io/cache/block_file_cache.h
index 4b3fccba2a2fdc..3efe41e6402943 100644
--- a/be/src/io/cache/block_file_cache.h
+++ b/be/src/io/cache/block_file_cache.h
@@ -20,6 +20,7 @@
 #include <bvar/bvar.h>
 
 #include <memory>
+#include <mutex>
 #include <optional>
 #include <thread>
 
@@ -370,6 +371,15 @@ class BlockFileCache {
 
     void recycle_deleted_blocks();
 
+    bool try_reserve_from_other_queue_by_hot_interval(std::vector<FileCacheType> other_cache_types,
+                                                      size_t size, int64_t cur_time,
+                                                      std::lock_guard<std::mutex>& cache_lock);
+
+    bool try_reserve_from_other_queue_by_size(std::vector<FileCacheType> other_cache_types,
+                                              size_t size, std::lock_guard<std::mutex>& cache_lock);
+
+    bool is_overflow(size_t removed_size, size_t need_size, size_t cur_cache_size) const;
+
     // info
     std::string _cache_base_path;
     size_t _capacity = 0;
diff --git a/be/test/io/cache/block_file_cache_test.cpp b/be/test/io/cache/block_file_cache_test.cpp
index 902f0cd8df6f30..71bb6bf94b20e6 100644
--- a/be/test/io/cache/block_file_cache_test.cpp
+++ b/be/test/io/cache/block_file_cache_test.cpp
@@ -450,9 +450,9 @@ void test_file_cache(io::FileCacheType cache_type) {
             assert_range(31, blocks[0], io::FileBlock::Range(0, 9),
                          io::FileBlock::State::DOWNLOADED);
         }
-        /// Current mgr:    [__________][_____][_][____][_]  [_][___][_]    [__]
-        ///                   ^          ^^     ^   ^    ^        ^   ^  ^    ^  ^
-        ///                   0          910    14  17   20       24  26 27   30 31
+        // Current cache:    [__________][_____][_][____][_]  [_][___][_]    [__]
+        //                   ^          ^^     ^   ^    ^        ^   ^  ^    ^  ^
+        //                   0          910    14  17   20       24  26 27   30 31
 
         {
             auto holder = mgr.get_or_set(key, 25, 5, context); /// Get [25, 29]
@@ -509,10 +509,10 @@ void test_file_cache(io::FileCacheType cache_type) {
 
             other_1.join();
         }
-        ASSERT_EQ(mgr.get_file_blocks_num(cache_type), 5);
-        /// Current mgr:    [__________] [___][_][__][__]
-        ///                   ^          ^ ^   ^  ^    ^  ^
-        ///                   0          9 24  26 27   30 31
+        ASSERT_EQ(mgr.get_file_blocks_num(cache_type), 9);
+        // Current cache:    [__________][_____][_][____][_]  [_][___][_]    [__]
+        //                   ^          ^^     ^   ^    ^        ^   ^  ^    ^  ^
+        //                   0          910    14  17   20       24  26 27   30 31
 
         {
             /// Now let's check the similar case but getting ERROR state after block->wait(), when
@@ -523,16 +523,14 @@ void test_file_cache(io::FileCacheType cache_type) {
             holder.emplace(mgr.get_or_set(key, 3, 23, context)); /// Get [3, 25]
 
             auto blocks = fromHolder(*holder);
-            ASSERT_EQ(blocks.size(), 3);
+            ASSERT_EQ(blocks.size(), 8);
 
             assert_range(38, blocks[0], io::FileBlock::Range(0, 9),
                          io::FileBlock::State::DOWNLOADED);
 
-            assert_range(39, blocks[1], io::FileBlock::Range(10, 23), io::FileBlock::State::EMPTY);
-            ASSERT_TRUE(blocks[1]->get_or_set_downloader() == io::FileBlock::get_caller_id());
-            ASSERT_TRUE(blocks[1]->state() == io::FileBlock::State::DOWNLOADING);
-            assert_range(38, blocks[2], io::FileBlock::Range(24, 26),
-                         io::FileBlock::State::DOWNLOADED);
+            assert_range(39, blocks[5], io::FileBlock::Range(22, 22), io::FileBlock::State::EMPTY);
+            ASSERT_TRUE(blocks[5]->get_or_set_downloader() == io::FileBlock::get_caller_id());
+            ASSERT_TRUE(blocks[5]->state() == io::FileBlock::State::DOWNLOADING);
 
             bool lets_start_download = false;
             std::mutex mutex;
@@ -542,17 +540,15 @@ void test_file_cache(io::FileCacheType cache_type) {
                 auto holder_2 =
                         mgr.get_or_set(key, 3, 23, other_context); /// Get [3, 25] once again
                 auto blocks_2 = fromHolder(*holder);
-                ASSERT_EQ(blocks_2.size(), 3);
+                ASSERT_EQ(blocks_2.size(), 8);
 
                 assert_range(41, blocks_2[0], io::FileBlock::Range(0, 9),
                              io::FileBlock::State::DOWNLOADED);
-                assert_range(42, blocks_2[1], io::FileBlock::Range(10, 23),
+                assert_range(42, blocks_2[5], io::FileBlock::Range(22, 22),
                              io::FileBlock::State::DOWNLOADING);
-                assert_range(43, blocks_2[2], io::FileBlock::Range(24, 26),
-                             io::FileBlock::State::DOWNLOADED);
 
-                ASSERT_TRUE(blocks_2[1]->get_downloader() != io::FileBlock::get_caller_id());
-                ASSERT_TRUE(blocks_2[1]->state() == io::FileBlock::State::DOWNLOADING);
+                ASSERT_TRUE(blocks_2[5]->get_downloader() != io::FileBlock::get_caller_id());
+                ASSERT_TRUE(blocks_2[5]->state() == io::FileBlock::State::DOWNLOADING);
 
                 {
                     std::lock_guard lock(mutex);
@@ -560,11 +556,11 @@ void test_file_cache(io::FileCacheType cache_type) {
                 }
                 cv.notify_one();
 
-                while (blocks_2[1]->wait() == io::FileBlock::State::DOWNLOADING) {
+                while (blocks_2[5]->wait() == io::FileBlock::State::DOWNLOADING) {
                 }
-                ASSERT_TRUE(blocks_2[1]->state() == io::FileBlock::State::EMPTY);
-                ASSERT_TRUE(blocks_2[1]->get_or_set_downloader() == io::FileBlock::get_caller_id());
-                download(blocks_2[1]);
+                ASSERT_TRUE(blocks_2[5]->state() == io::FileBlock::State::EMPTY);
+                ASSERT_TRUE(blocks_2[5]->get_or_set_downloader() == io::FileBlock::get_caller_id());
+                download(blocks_2[5]);
             });
 
             {
@@ -573,12 +569,12 @@ void test_file_cache(io::FileCacheType cache_type) {
             }
             holder.reset();
             other_1.join();
-            ASSERT_TRUE(blocks[1]->state() == io::FileBlock::State::DOWNLOADED);
+            ASSERT_TRUE(blocks[5]->state() == io::FileBlock::State::DOWNLOADED);
         }
     }
-    /// Current mgr:    [__________][___][___][_][__]
-    ///                   ^          ^      ^    ^  ^ ^
-    ///                   0          9      24  26 27  29
+    // Current cache:    [__________][_][____][_]  [_][___][_]    [__]
+    //                   ^          ^   ^    ^        ^   ^  ^    ^  ^
+    //                   0          9  17   20       24  26 27   30 31
     {
         /// Test LRUCache::restore().
 
@@ -593,16 +589,15 @@ void test_file_cache(io::FileCacheType cache_type) {
         auto holder1 = cache2.get_or_set(key, 2, 28, context); /// Get [2, 29]
 
         auto blocks1 = fromHolder(holder1);
-        ASSERT_EQ(blocks1.size(), 5);
+        ASSERT_EQ(blocks1.size(), 10);
 
         assert_range(44, blocks1[0], io::FileBlock::Range(0, 9), io::FileBlock::State::DOWNLOADED);
-        assert_range(45, blocks1[1], io::FileBlock::Range(10, 23),
-                     io::FileBlock::State::DOWNLOADED);
-        assert_range(45, blocks1[2], io::FileBlock::Range(24, 26),
+        assert_range(45, blocks1[1], io::FileBlock::Range(10, 14), io::FileBlock::State::EMPTY);
+        assert_range(45, blocks1[2], io::FileBlock::Range(15, 16),
                      io::FileBlock::State::DOWNLOADED);
-        assert_range(46, blocks1[3], io::FileBlock::Range(27, 27),
+        assert_range(46, blocks1[3], io::FileBlock::Range(17, 20),
                      io::FileBlock::State::DOWNLOADED);
-        assert_range(47, blocks1[4], io::FileBlock::Range(28, 29),
+        assert_range(47, blocks1[4], io::FileBlock::Range(21, 21),
                      io::FileBlock::State::DOWNLOADED);
     }
 
@@ -1468,7 +1463,7 @@ TEST_F(BlockFileCacheTest, test_lazy_load) {
         };
         std::this_thread::sleep_for(std::chrono::milliseconds(1));
     }
-    EXPECT_EQ(cache.get_file_blocks_num(io::FileCacheType::NORMAL), 6);
+    EXPECT_EQ(cache.get_file_blocks_num(io::FileCacheType::NORMAL), 10);
     if (fs::exists(cache_base_path)) {
         fs::remove_all(cache_base_path);
     }
@@ -1527,7 +1522,7 @@ TEST_F(BlockFileCacheTest, test_lazy_load_with_limit) {
         };
         std::this_thread::sleep_for(std::chrono::milliseconds(1));
     }
-    EXPECT_EQ(cache.get_file_blocks_num(io::FileCacheType::NORMAL), 5);
+    EXPECT_EQ(cache.get_file_blocks_num(io::FileCacheType::NORMAL), 9);
     if (fs::exists(cache_base_path)) {
         fs::remove_all(cache_base_path);
     }
@@ -3809,4 +3804,181 @@ TEST_F(BlockFileCacheTest, cached_remote_file_reader_opt_lock) {
     config::enable_read_cache_file_directly = false;
 }
 
+TEST_F(BlockFileCacheTest, remove_from_other_queue_1) {
+    config::file_cache_enable_evict_from_other_queue_by_size = false;
+    if (fs::exists(cache_base_path)) {
+        fs::remove_all(cache_base_path);
+    }
+    fs::create_directories(cache_base_path);
+    TUniqueId query_id;
+    query_id.hi = 1;
+    query_id.lo = 1;
+    io::FileCacheSettings settings;
+    settings.query_queue_size = 30;
+    settings.query_queue_elements = 5;
+    settings.index_queue_size = 30;
+    settings.index_queue_elements = 5;
+    settings.disposable_queue_size = 0;
+    settings.disposable_queue_elements = 0;
+    settings.capacity = 60;
+    settings.max_file_block_size = 30;
+    settings.max_query_cache_size = 30;
+    io::CacheContext context;
+    context.query_id = query_id;
+    auto key = io::BlockFileCache::hash("key1");
+    io::BlockFileCache cache(cache_base_path, settings);
+    context.cache_type = io::FileCacheType::INDEX;
+
+    ASSERT_TRUE(cache.initialize());
+    for (int i = 0; i < 100; i++) {
+        if (cache.get_lazy_open_success()) {
+            break;
+        };
+        std::this_thread::sleep_for(std::chrono::milliseconds(1));
+    }
+    for (int64_t offset = 0; offset < 60; offset += 5) {
+        auto holder = cache.get_or_set(key, offset, 5, context);
+        auto segments = fromHolder(holder);
+        ASSERT_EQ(segments.size(), 1);
+        assert_range(1, segments[0], io::FileBlock::Range(offset, offset + 4),
+                     io::FileBlock::State::EMPTY);
+        ASSERT_TRUE(segments[0]->get_or_set_downloader() == io::FileBlock::get_caller_id());
+        download(segments[0]);
+        assert_range(1, segments[0], io::FileBlock::Range(offset, offset + 4),
+                     io::FileBlock::State::DOWNLOADED);
+    }
+    context.cache_type = io::FileCacheType::NORMAL;
+    for (int64_t offset = 60; offset < 70; offset += 5) {
+        auto holder = cache.get_or_set(key, offset, 5, context);
+        auto segments = fromHolder(holder);
+        ASSERT_EQ(segments.size(), 1);
+        assert_range(1, segments[0], io::FileBlock::Range(offset, offset + 4),
+                     io::FileBlock::State::SKIP_CACHE);
+    }
+    config::file_cache_enable_evict_from_other_queue_by_size = true;
+    for (int64_t offset = 60; offset < 70; offset += 5) {
+        auto holder = cache.get_or_set(key, offset, 5, context);
+        auto segments = fromHolder(holder);
+        ASSERT_EQ(segments.size(), 1);
+        assert_range(1, segments[0], io::FileBlock::Range(offset, offset + 4),
+                     io::FileBlock::State::EMPTY);
+        ASSERT_TRUE(segments[0]->get_or_set_downloader() == io::FileBlock::get_caller_id());
+        download(segments[0]);
+        assert_range(1, segments[0], io::FileBlock::Range(offset, offset + 4),
+                     io::FileBlock::State::DOWNLOADED);
+    }
+    EXPECT_EQ(cache._cur_cache_size, 60);
+    EXPECT_EQ(cache._index_queue.cache_size, 50);
+    EXPECT_EQ(cache._normal_queue.cache_size, 10);
+    if (fs::exists(cache_base_path)) {
+        fs::remove_all(cache_base_path);
+    }
+}
+
+TEST_F(BlockFileCacheTest, remove_from_other_queue_2) {
+    config::file_cache_enable_evict_from_other_queue_by_size = true;
+    if (fs::exists(cache_base_path)) {
+        fs::remove_all(cache_base_path);
+    }
+    fs::create_directories(cache_base_path);
+    TUniqueId query_id;
+    query_id.hi = 1;
+    query_id.lo = 1;
+    io::FileCacheSettings settings;
+    settings.query_queue_size = 30;
+    settings.query_queue_elements = 5;
+    settings.index_queue_size = 30;
+    settings.index_queue_elements = 5;
+    settings.disposable_queue_size = 0;
+    settings.disposable_queue_elements = 0;
+    settings.capacity = 60;
+    settings.max_file_block_size = 30;
+    settings.max_query_cache_size = 30;
+    io::CacheContext context;
+    context.query_id = query_id;
+    auto key = io::BlockFileCache::hash("key1");
+    io::BlockFileCache cache(cache_base_path, settings);
+    context.cache_type = io::FileCacheType::INDEX;
+
+    ASSERT_TRUE(cache.initialize());
+    for (int i = 0; i < 100; i++) {
+        if (cache.get_lazy_open_success()) {
+            break;
+        };
+        std::this_thread::sleep_for(std::chrono::milliseconds(1));
+    }
+    for (int64_t offset = 0; offset < 40; offset += 5) {
+        auto holder = cache.get_or_set(key, offset, 5, context);
+        auto segments = fromHolder(holder);
+        ASSERT_EQ(segments.size(), 1);
+        assert_range(1, segments[0], io::FileBlock::Range(offset, offset + 4),
+                     io::FileBlock::State::EMPTY);
+        ASSERT_TRUE(segments[0]->get_or_set_downloader() == io::FileBlock::get_caller_id());
+        download(segments[0]);
+        assert_range(1, segments[0], io::FileBlock::Range(offset, offset + 4),
+                     io::FileBlock::State::DOWNLOADED);
+    }
+    context.cache_type = io::FileCacheType::NORMAL;
+    for (int64_t offset = 40; offset < 60; offset += 5) {
+        auto holder = cache.get_or_set(key, offset, 5, context);
+        auto segments = fromHolder(holder);
+        ASSERT_EQ(segments.size(), 1);
+        assert_range(1, segments[0], io::FileBlock::Range(offset, offset + 4),
+                     io::FileBlock::State::EMPTY);
+        ASSERT_TRUE(segments[0]->get_or_set_downloader() == io::FileBlock::get_caller_id());
+        download(segments[0]);
+        assert_range(1, segments[0], io::FileBlock::Range(offset, offset + 4),
+                     io::FileBlock::State::DOWNLOADED);
+    }
+    context.cache_type = io::FileCacheType::INDEX;
+    {
+        int64_t offset = 60;
+        auto holder = cache.get_or_set(key, offset, 1, context);
+        auto segments = fromHolder(holder);
+        ASSERT_EQ(segments.size(), 1);
+        assert_range(1, segments[0], io::FileBlock::Range(offset, offset),
+                     io::FileBlock::State::EMPTY);
+        ASSERT_TRUE(segments[0]->get_or_set_downloader() == io::FileBlock::get_caller_id());
+        download(segments[0]);
+        assert_range(1, segments[0], io::FileBlock::Range(offset, offset),
+                     io::FileBlock::State::DOWNLOADED);
+        EXPECT_EQ(cache._cur_cache_size, 56);
+        EXPECT_EQ(cache._index_queue.cache_size, 36);
+        EXPECT_EQ(cache._normal_queue.cache_size, 20);
+    }
+    {
+        int64_t offset = 61;
+        auto holder = cache.get_or_set(key, offset, 9, context);
+        auto segments = fromHolder(holder);
+        ASSERT_EQ(segments.size(), 1);
+        assert_range(1, segments[0], io::FileBlock::Range(offset, offset + 8),
+                     io::FileBlock::State::EMPTY);
+        ASSERT_TRUE(segments[0]->get_or_set_downloader() == io::FileBlock::get_caller_id());
+        download(segments[0]);
+        assert_range(1, segments[0], io::FileBlock::Range(offset, offset + 8),
+                     io::FileBlock::State::DOWNLOADED);
+        EXPECT_EQ(cache._cur_cache_size, 60);
+        EXPECT_EQ(cache._index_queue.cache_size, 40);
+        EXPECT_EQ(cache._normal_queue.cache_size, 20);
+    }
+    {
+        int64_t offset = 70;
+        auto holder = cache.get_or_set(key, offset, 5, context);
+        auto segments = fromHolder(holder);
+        ASSERT_EQ(segments.size(), 1);
+        assert_range(1, segments[0], io::FileBlock::Range(offset, offset + 4),
+                     io::FileBlock::State::EMPTY);
+        ASSERT_TRUE(segments[0]->get_or_set_downloader() == io::FileBlock::get_caller_id());
+        download(segments[0]);
+        assert_range(1, segments[0], io::FileBlock::Range(offset, offset + 4),
+                     io::FileBlock::State::DOWNLOADED);
+        EXPECT_EQ(cache._cur_cache_size, 60);
+        EXPECT_EQ(cache._index_queue.cache_size, 40);
+        EXPECT_EQ(cache._normal_queue.cache_size, 20);
+    }
+    if (fs::exists(cache_base_path)) {
+        fs::remove_all(cache_base_path);
+    }
+}
+
 } // namespace doris::io

From 07705cad0eab88d92020b54c370b2a9514ecf2e9 Mon Sep 17 00:00:00 2001
From: LiBinfeng <46676950+LiBinfeng-01@users.noreply.github.com>
Date: Tue, 30 Apr 2024 12:21:56 +0800
Subject: [PATCH 148/163] [Fix](Nereids) fix leading with multi level of brace
 pairs (#34169)

fix leading with multi level of brace pairs
example:
leading(t1 {{t2 t3} {t4 t5}} t6) can be reduced to leading(t1 {t2 t3 {t4 t5}} t6)
also update cases which remove project node from explain shape plan
---
 .../doris/nereids/hint/LeadingHint.java       |  44 +-
 .../nereids_hint_tpcds_p0/shape/query64.out   |  18 +-
 .../data/nereids_p0/hint/fix_leading.out      |  59 +-
 .../data/nereids_p0/hint/multi_leading.out    | 575 ++++++----------
 .../data/nereids_p0/hint/test_leading.out     | 613 +++++++-----------
 .../suites/nereids_p0/hint/fix_leading.groovy |   8 +
 .../nereids_p0/hint/multi_leading.groovy      |   1 +
 .../nereids_p0/hint/test_leading.groovy       |   1 +
 8 files changed, 496 insertions(+), 823 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/hint/LeadingHint.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/hint/LeadingHint.java
index 3ef5217566a44d..4d58990fbc09a1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/hint/LeadingHint.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/hint/LeadingHint.java
@@ -55,7 +55,7 @@ public class LeadingHint extends Hint {
 
     private List<String> parameters;
     private final List<String> tablelist = new ArrayList<>();
-    private final List<Integer> levellist = new ArrayList<>();
+    private final List<Integer> levelList = new ArrayList<>();
 
     private final Map<Integer, DistributeHint> distributeHints = new HashMap<>();
 
@@ -120,18 +120,46 @@ public LeadingHint(String hintName, List<String> parameters, String originalStri
                 }
             } else {
                 tablelist.add(parameter);
-                levellist.add(level);
+                levelList.add(level);
             }
             lastParameter = parameter;
         }
+        normalizeLevelList();
+    }
+
+    private void removeGap(int left, int right, int gap) {
+        for (int i = left; i <= right; i++) {
+            levelList.set(i, levelList.get(i) - (gap - 1));
+        }
+    }
+
+    // when we write leading like: leading(t1 {{t2 t3} {t4 t5}} t6)
+    // levelList would like 0 2 2 3 3 0, it could be reduced to 0 1 1 2 2 0 like leading(t1 {t2 t3 {t4 t5}} t6)
+    // gap is like 0 to 2 or 3 to 0 in upper example, and this function is to remove gap when we use a lot of braces
+    private void normalizeLevelList() {
+        int leftIndex = 0;
+        // at lease two tables were needed
+        for (int i = 1; i < levelList.size(); i++) {
+            if ((levelList.get(i) - levelList.get(leftIndex)) > 1) {
+                int rightIndex = i;
+                for (int j = i; j < levelList.size(); j++) {
+                    if ((levelList.get(rightIndex) - levelList.get(j)) > 1) {
+                        removeGap(i, rightIndex, Math.min(levelList.get(i) - levelList.get(leftIndex),
+                                levelList.get(rightIndex) - levelList.get(j)));
+                    }
+                    rightIndex = j;
+                }
+            }
+            leftIndex = i;
+        }
     }
 
     public List<String> getTablelist() {
         return tablelist;
     }
 
-    public List<Integer> getLevellist() {
-        return levellist;
+    public List<Integer> getLevelList() {
+        return levelList;
     }
 
     public Map<RelationId, LogicalPlan> getRelationIdToScanMap() {
@@ -485,10 +513,10 @@ public Plan generateLeadingJoinPlan() {
         }
         logicalPlan = makeFilterPlanIfExist(getFilters(), logicalPlan);
         assert (logicalPlan != null);
-        stack.push(Pair.of(getLevellist().get(index), Pair.of(logicalPlan, index)));
-        int stackTopLevel = getLevellist().get(index++);
+        stack.push(Pair.of(getLevelList().get(index), Pair.of(logicalPlan, index)));
+        int stackTopLevel = getLevelList().get(index++);
         while (index < getTablelist().size()) {
-            int currentLevel = getLevellist().get(index);
+            int currentLevel = getLevelList().get(index);
             if (currentLevel == stackTopLevel) {
                 // should return error if can not found table
                 logicalPlan = getLogicalPlanByName(getTablelist().get(index++));
@@ -531,7 +559,7 @@ public Plan generateLeadingJoinPlan() {
                     logicalJoin.setBitmap(LongBitmap.or(getBitmap(newStackTop.second.first), getBitmap(logicalPlan)));
                     if (stackTopLevel > 0) {
                         if (index < getTablelist().size()) {
-                            if (stackTopLevel > getLevellist().get(index)) {
+                            if (stackTopLevel > getLevelList().get(index)) {
                                 stackTopLevel--;
                             }
                         } else {
diff --git a/regression-test/data/nereids_hint_tpcds_p0/shape/query64.out b/regression-test/data/nereids_hint_tpcds_p0/shape/query64.out
index 35bfa59388ea4d..ceca7b3b1caee1 100644
--- a/regression-test/data/nereids_hint_tpcds_p0/shape/query64.out
+++ b/regression-test/data/nereids_hint_tpcds_p0/shape/query64.out
@@ -43,25 +43,25 @@ PhysicalCteAnchor ( cteId=CTEId#1 )
 --------------------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_cdemo_sk = cd1.cd_demo_sk)) otherCondition=(( not (cd_marital_status = cd_marital_status))) build RFs:RF6 cd_demo_sk->[ss_cdemo_sk]
 ----------------------------------------------------------------------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------------------------------------------------------------------PhysicalProject
---------------------------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ss_customer_sk]
+--------------------------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=() build RFs:RF5 cd_demo_sk->[c_current_cdemo_sk]
 ----------------------------------------------------------------------------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------------------------------------------------------------------------PhysicalProject
---------------------------------------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 RF6 RF8 RF9 RF10 RF11 RF12 RF16 RF17 RF18 RF19
-----------------------------------------------------------------------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------------------------------------------------------------------PhysicalProject
---------------------------------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=() build RFs:RF4 cd_demo_sk->[c_current_cdemo_sk]
+--------------------------------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 c_customer_sk->[ss_customer_sk]
+----------------------------------------------------------------------------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------------------------------------------------------------------------PhysicalProject
+--------------------------------------------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF4 RF6 RF8 RF9 RF10 RF11 RF12 RF16 RF17 RF18 RF19
 ----------------------------------------------------------------------------------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------------------------------------------------------------------------------PhysicalProject
 --------------------------------------------------------------------------------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_first_sales_date_sk = d2.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[c_first_sales_date_sk]
 ----------------------------------------------------------------------------------------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------------------------------------------------------------------------------------PhysicalProject
---------------------------------------------------------------------------------------------------------PhysicalOlapScan[customer] apply RFs: RF3 RF4 RF7 RF13 RF14
+--------------------------------------------------------------------------------------------------------PhysicalOlapScan[customer] apply RFs: RF3 RF5 RF7 RF13 RF14
 ----------------------------------------------------------------------------------------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------------------------------------------------------------------------------------PhysicalProject
 --------------------------------------------------------------------------------------------------------PhysicalOlapScan[date_dim]
-----------------------------------------------------------------------------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------------------------------------------------------------------------PhysicalProject
---------------------------------------------------------------------------------------------------PhysicalOlapScan[customer_demographics]
+----------------------------------------------------------------------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------------------------------------------------------------------PhysicalProject
+--------------------------------------------------------------------------------------------PhysicalOlapScan[customer_demographics]
 ----------------------------------------------------------------------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------------------------------------------------------------------PhysicalProject
 --------------------------------------------------------------------------------------PhysicalOlapScan[customer_demographics]
diff --git a/regression-test/data/nereids_p0/hint/fix_leading.out b/regression-test/data/nereids_p0/hint/fix_leading.out
index 3acc6a7d7909bd..987db0245fcb8f 100644
--- a/regression-test/data/nereids_p0/hint/fix_leading.out
+++ b/regression-test/data/nereids_p0/hint/fix_leading.out
@@ -236,48 +236,45 @@ SyntaxError:
 PhysicalResultSink
 --hashAgg[GLOBAL]
 ----hashAgg[LOCAL]
-------PhysicalProject
---------NestedLoopJoin[RIGHT_OUTER_JOIN](c3 > 500)
-----------PhysicalDistribute[DistributionSpecGather]
-------------PhysicalProject
---------------NestedLoopJoin[LEFT_OUTER_JOIN](c1 < 200)(c1 > 500)
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecReplicated]
-------------------PhysicalProject
---------------------filter((t2.c2 > 500))
-----------------------PhysicalOlapScan[t2]
-----------PhysicalDistribute[DistributionSpecGather]
-------------PhysicalProject
---------------PhysicalOlapScan[t3]
+------NestedLoopJoin[RIGHT_OUTER_JOIN](c3 > 500)
+--------PhysicalDistribute[DistributionSpecGather]
+----------NestedLoopJoin[LEFT_OUTER_JOIN](c1 < 200)(c1 > 500)
+------------PhysicalOlapScan[t1]
+------------PhysicalDistribute[DistributionSpecReplicated]
+--------------filter((t2.c2 > 500))
+----------------PhysicalOlapScan[t2]
+--------PhysicalDistribute[DistributionSpecGather]
+----------PhysicalOlapScan[t3]
 
 Hint log:
 Used: leading(t1 t2 t3 )
 UnUsed:
 SyntaxError:
 
--- !select5_1 --
+-- !select6_1 --
 PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------NestedLoopJoin[LEFT_OUTER_JOIN](t3.c3 > 500)
-------------PhysicalProject
---------------PhysicalOlapScan[t3]
-------------PhysicalDistribute[DistributionSpecReplicated]
---------------PhysicalProject
-----------------NestedLoopJoin[LEFT_OUTER_JOIN](t1.c1 > 500)
-------------------PhysicalProject
---------------------filter((t1.c1 < 200))
-----------------------PhysicalOlapScan[t1]
-------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------PhysicalProject
-----------------------filter((t2.c2 > 500))
-------------------------PhysicalOlapScan[t2]
+--------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t6.c6)) otherCondition=()
+----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2) and (t1.c1 = t3.c3) and (t1.c1 = t4.c4) and (t1.c1 = t5.c5)) otherCondition=()
+------------PhysicalOlapScan[t1]
+------------PhysicalDistribute[DistributionSpecHash]
+--------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t4.c4) and (t2.c2 = t5.c5) and (t3.c3 = t4.c4) and (t3.c3 = t5.c5)) otherCondition=()
+----------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+------------------PhysicalOlapScan[t2]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------PhysicalOlapScan[t3]
+----------------PhysicalDistribute[DistributionSpecHash]
+------------------hashJoin[INNER_JOIN] hashCondition=((t4.c4 = t5.c5)) otherCondition=()
+--------------------PhysicalOlapScan[t4]
+--------------------PhysicalDistribute[DistributionSpecHash]
+----------------------PhysicalOlapScan[t5]
+----------PhysicalDistribute[DistributionSpecHash]
+------------PhysicalOlapScan[t6]
 
 Hint log:
-Used:
+Used: leading(t1 { { t2 t3 } { t4 t5 } } t6 )
 UnUsed:
-SyntaxError: leading(t1 t2) Msg:leading should have all tables in query block, missing tables: t3 
+SyntaxError:
 
diff --git a/regression-test/data/nereids_p0/hint/multi_leading.out b/regression-test/data/nereids_p0/hint/multi_leading.out
index 71db5aec524b10..4f7056de310d1b 100644
--- a/regression-test/data/nereids_p0/hint/multi_leading.out
+++ b/regression-test/data/nereids_p0/hint/multi_leading.out
@@ -4,42 +4,32 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t1.c1)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------filter((t2.c2 > 300))
---------------------PhysicalOlapScan[t2]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------filter((cte.c1 > 300))
-----------------------PhysicalOlapScan[t1]
+--------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t1.c1)) otherCondition=()
+----------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t2.c2)) otherCondition=()
+------------filter((t2.c2 > 300))
+--------------PhysicalOlapScan[t2]
 ------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------filter((t1.c1 > 300))
-------------------PhysicalOlapScan[t1]
+--------------filter((cte.c1 > 300))
+----------------PhysicalOlapScan[t1]
+----------PhysicalDistribute[DistributionSpecHash]
+------------filter((t1.c1 > 300))
+--------------PhysicalOlapScan[t1]
 
 -- !sql1_2 --
 PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t1.c1)) otherCondition=()
-------------PhysicalProject
---------------filter((t1.c1 > 300))
-----------------PhysicalOlapScan[t1]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t2.c2)) otherCondition=()
-------------------PhysicalProject
---------------------filter((t2.c2 > 300))
-----------------------PhysicalOlapScan[t2]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------filter((cte.c1 > 300))
-------------------------PhysicalOlapScan[t1]
+--------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t1.c1)) otherCondition=()
+----------filter((t1.c1 > 300))
+------------PhysicalOlapScan[t1]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t2.c2)) otherCondition=()
+--------------filter((t2.c2 > 300))
+----------------PhysicalOlapScan[t2]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------filter((cte.c1 > 300))
+------------------PhysicalOlapScan[t1]
 
 Hint log:
 Used: leading(t2 t1 ) leading(t1 cte )
@@ -51,20 +41,15 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t1.c1)) otherCondition=()
-------------PhysicalProject
---------------filter((t1.c1 > 300))
-----------------PhysicalOlapScan[t1]
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------filter((cte.c1 > 300))
---------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------filter((t2.c2 > 300))
-----------------------PhysicalOlapScan[t2]
+--------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t1.c1)) otherCondition=()
+----------filter((t1.c1 > 300))
+------------PhysicalOlapScan[t1]
+----------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t2.c2)) otherCondition=()
+------------filter((cte.c1 > 300))
+--------------PhysicalOlapScan[t1]
+------------PhysicalDistribute[DistributionSpecHash]
+--------------filter((t2.c2 > 300))
+----------------PhysicalOlapScan[t2]
 
 Hint log:
 Used: leading(t1 t2 ) leading(t1 cte )
@@ -76,20 +61,15 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t1.c1)) otherCondition=()
-------------PhysicalProject
---------------filter((t1.c1 > 300))
-----------------PhysicalOlapScan[t1]
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------filter((cte.c1 > 300))
---------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------filter((t2.c2 > 300))
-----------------------PhysicalOlapScan[t2]
+--------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t1.c1)) otherCondition=()
+----------filter((t1.c1 > 300))
+------------PhysicalOlapScan[t1]
+----------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t2.c2)) otherCondition=()
+------------filter((cte.c1 > 300))
+--------------PhysicalOlapScan[t1]
+------------PhysicalDistribute[DistributionSpecHash]
+--------------filter((t2.c2 > 300))
+----------------PhysicalOlapScan[t2]
 
 Hint log:
 Used: leading(t1 t2 ) leading(t1 cte )
@@ -113,36 +93,26 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
+--------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
+----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+------------PhysicalOlapScan[t1]
 ------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------------PhysicalOlapScan[t2]
+----------PhysicalDistribute[DistributionSpecHash]
+------------PhysicalOlapScan[t3]
 
 -- !sql2_2 --
 PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t3]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t2]
+--------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
+----------PhysicalOlapScan[t3]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+--------------PhysicalOlapScan[t1]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalOlapScan[t2]
 
 Hint log:
 Used: leading(t3 alias1 )
@@ -154,18 +124,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t2]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
+--------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
+----------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t2.c2)) otherCondition=()
+------------PhysicalOlapScan[t2]
 ------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------------PhysicalOlapScan[t1]
+----------PhysicalDistribute[DistributionSpecHash]
+------------PhysicalOlapScan[t3]
 
 Hint log:
 Used: leading(t2 t1 )
@@ -177,18 +142,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t3]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t2.c2)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t1]
+--------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
+----------PhysicalOlapScan[t3]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t2.c2)) otherCondition=()
+--------------PhysicalOlapScan[t2]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalOlapScan[t1]
 
 Hint log:
 Used: leading(t2 t1 ) leading(t3 alias1 )
@@ -212,56 +172,38 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = cte.c11)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
-----------------PhysicalProject
-------------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t1]
---------------------PhysicalDistribute[DistributionSpecHash]
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[t2]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t3]
+--------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = cte.c11)) otherCondition=()
+----------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
+------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+--------------PhysicalOlapScan[t1]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalOlapScan[t2]
 ------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t2.c2)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t2]
+--------------PhysicalOlapScan[t3]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t2.c2)) otherCondition=()
+--------------PhysicalOlapScan[t1]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalOlapScan[t2]
 
 -- !sql3_2 --
 PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = cte.c11)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t3]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[t1]
-----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[t2]
+--------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = cte.c11)) otherCondition=()
+----------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
+------------PhysicalOlapScan[t3]
 ------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t2.c2)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t1]
+--------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+----------------PhysicalOlapScan[t1]
+----------------PhysicalDistribute[DistributionSpecHash]
+------------------PhysicalOlapScan[t2]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t2.c2)) otherCondition=()
+--------------PhysicalOlapScan[t2]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalOlapScan[t1]
 
 Hint log:
 Used: leading(t2 t1 ) leading(t3 alias1 cte )
@@ -273,28 +215,19 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = cte.c11)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
-----------------PhysicalProject
-------------------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t2.c2)) otherCondition=()
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t2]
---------------------PhysicalDistribute[DistributionSpecHash]
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t3]
+--------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = cte.c11)) otherCondition=()
+----------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
+------------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t2.c2)) otherCondition=()
+--------------PhysicalOlapScan[t2]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalOlapScan[t1]
 ------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t2.c2)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t2]
+--------------PhysicalOlapScan[t3]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t2.c2)) otherCondition=()
+--------------PhysicalOlapScan[t1]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalOlapScan[t2]
 
 Hint log:
 Used: leading(t2 t1 )
@@ -306,28 +239,19 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = cte.c11)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t3]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t2.c2)) otherCondition=()
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[t2]
-----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[t1]
+--------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = cte.c11)) otherCondition=()
+----------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
+------------PhysicalOlapScan[t3]
 ------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t2.c2)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t1]
+--------------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t2.c2)) otherCondition=()
+----------------PhysicalOlapScan[t2]
+----------------PhysicalDistribute[DistributionSpecHash]
+------------------PhysicalOlapScan[t1]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((cte.c1 = t2.c2)) otherCondition=()
+--------------PhysicalOlapScan[t2]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalOlapScan[t1]
 
 Hint log:
 Used: leading(t2 t1 ) leading(t2 t1 ) leading(t2 t1 ) leading(t3 alias1 cte )
@@ -351,46 +275,32 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = alias2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t4.c4)) otherCondition=()
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[t2]
-----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[t4]
+--------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
+----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = alias2.c2)) otherCondition=()
+------------PhysicalOlapScan[t1]
 ------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t4.c4)) otherCondition=()
+----------------PhysicalOlapScan[t2]
+----------------PhysicalDistribute[DistributionSpecHash]
+------------------PhysicalOlapScan[t4]
+----------PhysicalDistribute[DistributionSpecHash]
+------------PhysicalOlapScan[t3]
 
 -- !sql4_1 --
 PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t3]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = alias2.c2)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
+--------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
+----------PhysicalOlapScan[t3]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = alias2.c2)) otherCondition=()
+--------------PhysicalOlapScan[t1]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t4.c4)) otherCondition=()
+------------------PhysicalOlapScan[t2]
 ------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t4.c4)) otherCondition=()
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[t2]
-------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[t4]
+--------------------PhysicalOlapScan[t4]
 
 Hint log:
 Used: leading(t3 alias1 )
@@ -402,23 +312,16 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = alias2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t4.c4)) otherCondition=()
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t2]
---------------------PhysicalDistribute[DistributionSpecHash]
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[t4]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
+--------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
+----------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = alias2.c2)) otherCondition=()
+------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t4.c4)) otherCondition=()
+--------------PhysicalOlapScan[t2]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalOlapScan[t4]
 ------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------------PhysicalOlapScan[t1]
+----------PhysicalDistribute[DistributionSpecHash]
+------------PhysicalOlapScan[t3]
 
 Hint log:
 Used: leading(alias2 t1 )
@@ -430,23 +333,16 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = alias2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t4.c4)) otherCondition=()
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[t4]
-----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[t2]
+--------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
+----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = alias2.c2)) otherCondition=()
+------------PhysicalOlapScan[t1]
 ------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t4.c4)) otherCondition=()
+----------------PhysicalOlapScan[t4]
+----------------PhysicalDistribute[DistributionSpecHash]
+------------------PhysicalOlapScan[t2]
+----------PhysicalDistribute[DistributionSpecHash]
+------------PhysicalOlapScan[t3]
 
 Hint log:
 Used: leading(t4 t2 )
@@ -458,113 +354,22 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t3]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = alias2.c2)) otherCondition=()
-------------------PhysicalProject
---------------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t4.c4)) otherCondition=()
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[t2]
-----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[t4]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t1]
+--------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
+----------PhysicalOlapScan[t3]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = alias2.c2)) otherCondition=()
+--------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t4.c4)) otherCondition=()
+----------------PhysicalOlapScan[t2]
+----------------PhysicalDistribute[DistributionSpecHash]
+------------------PhysicalOlapScan[t4]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalOlapScan[t1]
 
 Hint log:
 Used: leading(alias2 t1 ) leading(t3 alias1 )
 UnUsed:
 SyntaxError:
 
--- !sql4_5 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = alias2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t4.c4)) otherCondition=()
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[t2]
-----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[t4]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
-
-Hint log:
-Used:
-UnUsed: leading(t3 alias1)
-SyntaxError: leading(t4 t2) Msg:one query block can only have one leading clause
-
--- !sql4_6 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = alias2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t4.c4)) otherCondition=()
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[t2]
-----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[t4]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
-
-Hint log:
-Used:
-UnUsed: leading(alias2 t1)
-SyntaxError: leading(t4 t2) Msg:one query block can only have one leading clause
-
--- !sql4_7 --
-PhysicalResultSink
---hashAgg[GLOBAL]
-----PhysicalDistribute[DistributionSpecGather]
-------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t3]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((alias1.c1 = alias2.c2)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t4.c4)) otherCondition=()
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[t2]
-------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[t4]
-
-Hint log:
-Used: leading(t3 alias1 )
-UnUsed: leading(alias2 t1)
-SyntaxError: leading(t4 t2) Msg:one query block can only have one leading clause
-
 -- !sql4_res_0 --
 6224
 
@@ -592,55 +397,45 @@ SyntaxError: leading(t4 t2) Msg:one query block can only have one leading clause
 -- !sql5_1 --
 PhysicalCteAnchor ( cteId=CTEId#0 )
 --PhysicalCteProducer ( cteId=CTEId#0 )
-----PhysicalProject
-------PhysicalOlapScan[t1]
+----PhysicalOlapScan[t1]
 --PhysicalResultSink
 ----PhysicalDistribute[DistributionSpecGather]
-------PhysicalProject
---------NestedLoopJoin[INNER_JOIN](cast(sum(c11) as DOUBLE) > (cast(0.05 as DOUBLE) * avg(c11)))
-----------PhysicalProject
-------------hashAgg[GLOBAL]
---------------PhysicalDistribute[DistributionSpecHash]
-----------------hashAgg[LOCAL]
-------------------PhysicalDistribute[DistributionSpecExecutionAny]
+------NestedLoopJoin[INNER_JOIN](cast(sum(c11) as DOUBLE) > (cast(0.05 as DOUBLE) * avg(c11)))
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashAgg[LOCAL]
+--------------PhysicalDistribute[DistributionSpecExecutionAny]
+----------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------PhysicalDistribute[DistributionSpecReplicated]
+----------hashAgg[GLOBAL]
+------------PhysicalDistribute[DistributionSpecGather]
+--------------hashAgg[LOCAL]
+----------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = cte.c11)) otherCondition=()
+------------------PhysicalOlapScan[t1]
+------------------PhysicalDistribute[DistributionSpecHash]
 --------------------PhysicalCteConsumer ( cteId=CTEId#0 )
-----------PhysicalDistribute[DistributionSpecReplicated]
-------------PhysicalProject
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute[DistributionSpecGather]
-------------------hashAgg[LOCAL]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = cte.c11)) otherCondition=()
-------------------------PhysicalOlapScan[t1]
-------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 
 -- !sql5_2 --
 PhysicalCteAnchor ( cteId=CTEId#0 )
 --PhysicalCteProducer ( cteId=CTEId#0 )
-----PhysicalProject
-------PhysicalOlapScan[t1]
+----PhysicalOlapScan[t1]
 --PhysicalResultSink
 ----PhysicalDistribute[DistributionSpecGather]
-------PhysicalProject
---------NestedLoopJoin[INNER_JOIN](cast(sum(c11) as DOUBLE) > (cast(0.05 as DOUBLE) * avg(c11)))
-----------PhysicalProject
-------------hashAgg[GLOBAL]
---------------PhysicalDistribute[DistributionSpecHash]
-----------------hashAgg[LOCAL]
+------NestedLoopJoin[INNER_JOIN](cast(sum(c11) as DOUBLE) > (cast(0.05 as DOUBLE) * avg(c11)))
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashAgg[LOCAL]
+--------------PhysicalDistribute[DistributionSpecExecutionAny]
+----------------PhysicalCteConsumer ( cteId=CTEId#0 )
+--------PhysicalDistribute[DistributionSpecReplicated]
+----------hashAgg[GLOBAL]
+------------PhysicalDistribute[DistributionSpecGather]
+--------------hashAgg[LOCAL]
+----------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = cte.c11)) otherCondition=()
 ------------------PhysicalDistribute[DistributionSpecExecutionAny]
 --------------------PhysicalCteConsumer ( cteId=CTEId#0 )
-----------PhysicalDistribute[DistributionSpecReplicated]
-------------PhysicalProject
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute[DistributionSpecGather]
-------------------hashAgg[LOCAL]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = cte.c11)) otherCondition=()
-------------------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
-------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------PhysicalOlapScan[t1]
+------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------PhysicalOlapScan[t1]
 
 Hint log:
 Used: leading(cte t1 )
diff --git a/regression-test/data/nereids_p0/hint/test_leading.out b/regression-test/data/nereids_p0/hint/test_leading.out
index 31df4aafe732ba..058fdf8b52fb0a 100644
--- a/regression-test/data/nereids_p0/hint/test_leading.out
+++ b/regression-test/data/nereids_p0/hint/test_leading.out
@@ -2152,13 +2152,10 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t1]
-------------PhysicalDistribute[DistributionSpecReplicated]
---------------PhysicalProject
-----------------PhysicalOlapScan[t2]
+--------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+----------PhysicalOlapScan[t1]
+----------PhysicalDistribute[DistributionSpecReplicated]
+------------PhysicalOlapScan[t2]
 
 Hint log:
 Used: [broadcast]_2
@@ -2170,19 +2167,14 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
-------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+--------------PhysicalOlapScan[t1]
+--------------PhysicalDistribute[DistributionSpecReplicated]
+----------------PhysicalOlapScan[t2]
+----------PhysicalDistribute[DistributionSpecHash]
+------------PhysicalOlapScan[t3]
 
 Hint log:
 Used: [broadcast]_2 [shuffle]_3
@@ -2194,19 +2186,14 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
-------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+--------------PhysicalOlapScan[t1]
+--------------PhysicalDistribute[DistributionSpecReplicated]
+----------------PhysicalOlapScan[t2]
+----------PhysicalDistribute[DistributionSpecHash]
+------------PhysicalOlapScan[t3]
 
 Hint log:
 Used: [broadcast]_2 [shuffle]_3
@@ -2218,18 +2205,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecReplicated]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+------------PhysicalOlapScan[t1]
+------------PhysicalDistribute[DistributionSpecHash]
+--------------PhysicalOlapScan[t2]
+----------PhysicalDistribute[DistributionSpecReplicated]
+------------PhysicalOlapScan[t3]
 
 Hint log:
 Used:   leading(t1 shuffle t2 broadcast t3 )
@@ -2241,18 +2223,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t1]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t3]
+--------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+----------PhysicalOlapScan[t1]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+--------------PhysicalOlapScan[t2]
+--------------PhysicalDistribute[DistributionSpecReplicated]
+----------------PhysicalOlapScan[t3]
 
 Hint log:
 Used:   leading(t1 shuffle { t2 broadcast t3 } )
@@ -2264,18 +2241,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t1]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t3]
-------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t2]
+--------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+----------PhysicalOlapScan[t1]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+--------------PhysicalOlapScan[t3]
+--------------PhysicalDistribute[DistributionSpecReplicated]
+----------------PhysicalOlapScan[t2]
 
 Hint log:
 Used:   leading(t1 shuffle { t3 broadcast t2 } )
@@ -2287,18 +2259,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t2]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
-------------PhysicalDistribute[DistributionSpecReplicated]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+------------PhysicalOlapScan[t2]
+------------PhysicalDistribute[DistributionSpecHash]
+--------------PhysicalOlapScan[t1]
+----------PhysicalDistribute[DistributionSpecReplicated]
+------------PhysicalOlapScan[t3]
 
 Hint log:
 Used:   leading(t2 shuffle t1 broadcast t3 )
@@ -2310,17 +2277,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2) and (t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t3.c3)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecReplicated]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t3]
+--------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2) and (t2.c2 = t3.c3)) otherCondition=()
+----------PhysicalOlapScan[t2]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t3.c3)) otherCondition=()
+--------------PhysicalOlapScan[t1]
+--------------PhysicalDistribute[DistributionSpecReplicated]
+----------------PhysicalOlapScan[t3]
 
 Hint log:
 Used:   leading(t2 shuffle { t1 broadcast t3 } )
@@ -2332,17 +2295,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2) and (t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t3.c3)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t3]
-----------------PhysicalDistribute[DistributionSpecReplicated]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
+--------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2) and (t2.c2 = t3.c3)) otherCondition=()
+----------PhysicalOlapScan[t2]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t3.c3)) otherCondition=()
+--------------PhysicalOlapScan[t3]
+--------------PhysicalDistribute[DistributionSpecReplicated]
+----------------PhysicalOlapScan[t1]
 
 Hint log:
 Used:   leading(t2 shuffle { t3 broadcast t1 } )
@@ -2354,18 +2313,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecReplicated]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+------------PhysicalOlapScan[t1]
+------------PhysicalDistribute[DistributionSpecHash]
+--------------PhysicalOlapScan[t2]
+----------PhysicalDistribute[DistributionSpecReplicated]
+------------PhysicalOlapScan[t3]
 
 Hint log:
 Used:  leading(t1 t2 broadcast t3 )
@@ -2377,18 +2331,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t1]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t3]
+--------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+----------PhysicalOlapScan[t1]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+--------------PhysicalOlapScan[t2]
+--------------PhysicalDistribute[DistributionSpecReplicated]
+----------------PhysicalOlapScan[t3]
 
 Hint log:
 Used:  leading(t1 { t2 broadcast t3 } )
@@ -2400,18 +2349,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t1]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t3]
-------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t2]
+--------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+----------PhysicalOlapScan[t1]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+--------------PhysicalOlapScan[t3]
+--------------PhysicalDistribute[DistributionSpecReplicated]
+----------------PhysicalOlapScan[t2]
 
 Hint log:
 Used:  leading(t1 { t3 broadcast t2 } )
@@ -2423,18 +2367,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t2]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
-------------PhysicalDistribute[DistributionSpecReplicated]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+------------PhysicalOlapScan[t2]
+------------PhysicalDistribute[DistributionSpecHash]
+--------------PhysicalOlapScan[t1]
+----------PhysicalDistribute[DistributionSpecReplicated]
+------------PhysicalOlapScan[t3]
 
 Hint log:
 Used:  leading(t2 t1 broadcast t3 )
@@ -2446,17 +2385,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2) and (t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t3.c3)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecReplicated]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t3]
+--------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2) and (t2.c2 = t3.c3)) otherCondition=()
+----------PhysicalOlapScan[t2]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t3.c3)) otherCondition=()
+--------------PhysicalOlapScan[t1]
+--------------PhysicalDistribute[DistributionSpecReplicated]
+----------------PhysicalOlapScan[t3]
 
 Hint log:
 Used:  leading(t2 { t1 broadcast t3 } )
@@ -2468,17 +2403,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2) and (t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t3.c3)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t3]
-----------------PhysicalDistribute[DistributionSpecReplicated]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
+--------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2) and (t2.c2 = t3.c3)) otherCondition=()
+----------PhysicalOlapScan[t2]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t3.c3)) otherCondition=()
+--------------PhysicalOlapScan[t3]
+--------------PhysicalDistribute[DistributionSpecReplicated]
+----------------PhysicalOlapScan[t1]
 
 Hint log:
 Used:  leading(t2 { t3 broadcast t1 } )
@@ -2490,18 +2421,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
+--------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+------------PhysicalOlapScan[t1]
 ------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------------PhysicalOlapScan[t2]
+----------PhysicalDistribute[DistributionSpecHash]
+------------PhysicalOlapScan[t3]
 
 Hint log:
 Used:  leading(t1 shuffle t2 t3 )
@@ -2513,18 +2439,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t1]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t3]
+--------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+----------PhysicalOlapScan[t1]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+--------------PhysicalOlapScan[t2]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalOlapScan[t3]
 
 Hint log:
 Used:  leading(t1 shuffle { t2 t3 } )
@@ -2536,18 +2457,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t1]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t3]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t2]
+--------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+----------PhysicalOlapScan[t1]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+--------------PhysicalOlapScan[t3]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalOlapScan[t2]
 
 Hint log:
 Used:  leading(t1 shuffle { t3 t2 } )
@@ -2559,18 +2475,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t2]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
+--------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+------------PhysicalOlapScan[t2]
 ------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------------PhysicalOlapScan[t1]
+----------PhysicalDistribute[DistributionSpecHash]
+------------PhysicalOlapScan[t3]
 
 Hint log:
 Used:  leading(t2 shuffle t1 t3 )
@@ -2582,17 +2493,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2) and (t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t3.c3)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t3]
+--------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2) and (t2.c2 = t3.c3)) otherCondition=()
+----------PhysicalOlapScan[t2]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t3.c3)) otherCondition=()
+--------------PhysicalOlapScan[t1]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalOlapScan[t3]
 
 Hint log:
 Used:  leading(t2 shuffle { t1 t3 } )
@@ -2604,17 +2511,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2) and (t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t3.c3)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t3]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
+--------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2) and (t2.c2 = t3.c3)) otherCondition=()
+----------PhysicalOlapScan[t2]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t3.c3)) otherCondition=()
+--------------PhysicalOlapScan[t3]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalOlapScan[t1]
 
 Hint log:
 Used:  leading(t2 shuffle { t3 t1 } )
@@ -2626,18 +2529,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecReplicated]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
+--------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+------------PhysicalOlapScan[t1]
 ------------PhysicalDistribute[DistributionSpecReplicated]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------------PhysicalOlapScan[t2]
+----------PhysicalDistribute[DistributionSpecReplicated]
+------------PhysicalOlapScan[t3]
 
 Hint log:
 Used:  leading(t1 broadcast t2 t3 )
@@ -2649,18 +2547,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t2]
-----------------PhysicalDistribute[DistributionSpecReplicated]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+------------PhysicalOlapScan[t2]
+------------PhysicalDistribute[DistributionSpecReplicated]
+--------------PhysicalOlapScan[t1]
+----------PhysicalDistribute[DistributionSpecHash]
+------------PhysicalOlapScan[t3]
 
 Hint log:
 Used:  leading(t2 broadcast t1 t3 )
@@ -2672,18 +2565,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t3]
-------------PhysicalDistribute[DistributionSpecReplicated]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t2]
+--------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+----------PhysicalOlapScan[t3]
+----------PhysicalDistribute[DistributionSpecReplicated]
+------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+--------------PhysicalOlapScan[t1]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalOlapScan[t2]
 
 Hint log:
 Used:  leading(t3 broadcast { t1 t2 } )
@@ -2695,18 +2583,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t3]
-------------PhysicalDistribute[DistributionSpecReplicated]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t1]
+--------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+----------PhysicalOlapScan[t3]
+----------PhysicalDistribute[DistributionSpecReplicated]
+------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+--------------PhysicalOlapScan[t2]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalOlapScan[t1]
 
 Hint log:
 Used:  leading(t3 broadcast { t2 t1 } )
@@ -2718,18 +2601,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecReplicated]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+------------PhysicalOlapScan[t1]
+------------PhysicalDistribute[DistributionSpecHash]
+--------------PhysicalOlapScan[t2]
+----------PhysicalDistribute[DistributionSpecReplicated]
+------------PhysicalOlapScan[t3]
 
 Hint log:
 Used:   leading(t1 shuffle t2 broadcast t3 )
@@ -2741,18 +2619,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t2]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
-------------PhysicalDistribute[DistributionSpecReplicated]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+------------PhysicalOlapScan[t2]
+------------PhysicalDistribute[DistributionSpecHash]
+--------------PhysicalOlapScan[t1]
+----------PhysicalDistribute[DistributionSpecReplicated]
+------------PhysicalOlapScan[t3]
 
 Hint log:
 Used:   leading(t2 shuffle t1 broadcast t3 )
@@ -2764,18 +2637,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t3]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
-------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t2]
+--------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+----------PhysicalOlapScan[t3]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+--------------PhysicalOlapScan[t1]
+--------------PhysicalDistribute[DistributionSpecReplicated]
+----------------PhysicalOlapScan[t2]
 
 Hint log:
 Used:   leading(t3 shuffle { t1 broadcast t2 } )
@@ -2787,18 +2655,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t3]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t1]
+--------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+----------PhysicalOlapScan[t3]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+--------------PhysicalOlapScan[t2]
+--------------PhysicalDistribute[DistributionSpecReplicated]
+----------------PhysicalOlapScan[t1]
 
 Hint log:
 Used:   leading(t3 shuffle { t2 broadcast t1 } )
@@ -2810,19 +2673,14 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
-------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t2]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+--------------PhysicalOlapScan[t1]
+--------------PhysicalDistribute[DistributionSpecReplicated]
+----------------PhysicalOlapScan[t2]
+----------PhysicalDistribute[DistributionSpecHash]
+------------PhysicalOlapScan[t3]
 
 Hint log:
 Used:   leading(t1 broadcast t2 shuffle t3 )
@@ -2834,18 +2692,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-----------------PhysicalProject
-------------------PhysicalOlapScan[t2]
-----------------PhysicalDistribute[DistributionSpecReplicated]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
-------------PhysicalDistribute[DistributionSpecHash]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+----------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+------------PhysicalOlapScan[t2]
+------------PhysicalDistribute[DistributionSpecReplicated]
+--------------PhysicalOlapScan[t1]
+----------PhysicalDistribute[DistributionSpecHash]
+------------PhysicalOlapScan[t3]
 
 Hint log:
 Used:   leading(t2 broadcast t1 shuffle t3 )
@@ -2857,18 +2710,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t3]
-------------PhysicalDistribute[DistributionSpecReplicated]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t2]
+--------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+----------PhysicalOlapScan[t3]
+----------PhysicalDistribute[DistributionSpecReplicated]
+------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+--------------PhysicalOlapScan[t1]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalOlapScan[t2]
 
 Hint log:
 Used:   leading(t3 broadcast { t1 shuffle t2 } )
@@ -2880,18 +2728,13 @@ PhysicalResultSink
 --hashAgg[GLOBAL]
 ----PhysicalDistribute[DistributionSpecGather]
 ------hashAgg[LOCAL]
---------PhysicalProject
-----------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
-------------PhysicalProject
---------------PhysicalOlapScan[t3]
-------------PhysicalDistribute[DistributionSpecReplicated]
---------------PhysicalProject
-----------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t1]
+--------hashJoin[INNER_JOIN] hashCondition=((t2.c2 = t3.c3)) otherCondition=()
+----------PhysicalOlapScan[t3]
+----------PhysicalDistribute[DistributionSpecReplicated]
+------------hashJoin[INNER_JOIN] hashCondition=((t1.c1 = t2.c2)) otherCondition=()
+--------------PhysicalOlapScan[t2]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------PhysicalOlapScan[t1]
 
 Hint log:
 Used:   leading(t3 broadcast { t2 shuffle t1 } )
diff --git a/regression-test/suites/nereids_p0/hint/fix_leading.groovy b/regression-test/suites/nereids_p0/hint/fix_leading.groovy
index 4c88fb2ac30f62..172b02f9e5a714 100644
--- a/regression-test/suites/nereids_p0/hint/fix_leading.groovy
+++ b/regression-test/suites/nereids_p0/hint/fix_leading.groovy
@@ -27,6 +27,8 @@ suite("fix_leading") {
     sql 'set enable_nereids_planner=true'
     sql 'set enable_fallback_to_original_planner=false'
     sql 'set runtime_filter_mode=OFF'
+    sql "set ignore_shape_nodes='PhysicalProject'"
+
 
     // create tables
     sql """drop table if exists t1;"""
@@ -38,6 +40,8 @@ suite("fix_leading") {
     sql """create table t2 (c2 int, c22 int) distributed by hash(c2) buckets 3 properties('replication_num' = '1');"""
     sql """create table t3 (c3 int, c33 int) distributed by hash(c3) buckets 3 properties('replication_num' = '1');"""
     sql """create table t4 (c4 int, c44 int) distributed by hash(c4) buckets 3 properties('replication_num' = '1');"""
+    sql """create table t5 (c5 int, c55 int) distributed by hash(c5) buckets 3 properties('replication_num' = '1');"""
+    sql """create table t6 (c6 int, c66 int) distributed by hash(c6) buckets 3 properties('replication_num' = '1');"""
 
     streamLoad {
         table "t1"
@@ -176,4 +180,8 @@ suite("fix_leading") {
         sql """shape plan select /*+ leading(t1 t2)*/ count(*) from t1 left join t2 on c1 > 500 and c2 >500 right join t3 on c3 > 500 and c1 < 200;"""
         contains("SyntaxError: leading(t1 t2) Msg:leading should have all tables in query block, missing tables: t3")
     }
+
+    // check brace problem
+    qt_select6_1 """explain shape plan select /*+ leading(t1 {{t2 t3}{t4 t5}} t6) */ count(*) from t1 join t2 on c1 = c2 join t3 on c1 = c3 join t4 on c1 = c4 join t5 on c1 = c5 join t6 on c1 = c6;"""
+
 }
diff --git a/regression-test/suites/nereids_p0/hint/multi_leading.groovy b/regression-test/suites/nereids_p0/hint/multi_leading.groovy
index f716b5c98d05e0..b1d6b010de999d 100644
--- a/regression-test/suites/nereids_p0/hint/multi_leading.groovy
+++ b/regression-test/suites/nereids_p0/hint/multi_leading.groovy
@@ -24,6 +24,7 @@ suite("multi_leading") {
 
     // setting planner to nereids
     sql 'set enable_nereids_planner=true'
+    sql "set ignore_shape_nodes='PhysicalProject'"
     sql 'set enable_fallback_to_original_planner=false'
     sql 'set runtime_filter_mode=OFF'
 
diff --git a/regression-test/suites/nereids_p0/hint/test_leading.groovy b/regression-test/suites/nereids_p0/hint/test_leading.groovy
index d1e11144b527c7..3908f55504b254 100644
--- a/regression-test/suites/nereids_p0/hint/test_leading.groovy
+++ b/regression-test/suites/nereids_p0/hint/test_leading.groovy
@@ -25,6 +25,7 @@ suite("test_leading") {
 
     // setting planner to nereids
     sql 'set enable_nereids_planner=true'
+    sql "set ignore_shape_nodes='PhysicalProject'"
     sql 'set enable_fallback_to_original_planner=false'
     sql 'set runtime_filter_mode=OFF'
 

From c2f621beabe4b3ed8d7ab54b0aa58db4fab236cb Mon Sep 17 00:00:00 2001
From: walter <w41ter.l@gmail.com>
Date: Tue, 30 Apr 2024 14:14:49 +0800
Subject: [PATCH 149/163] [improve](cloud) get versions in batch_get (#34269)

---
 cloud/src/meta-service/meta_service.cpp | 78 +++++++++++++++----------
 1 file changed, 47 insertions(+), 31 deletions(-)

diff --git a/cloud/src/meta-service/meta_service.cpp b/cloud/src/meta-service/meta_service.cpp
index 6cc37f788570d9..1c115f4e795233 100644
--- a/cloud/src/meta-service/meta_service.cpp
+++ b/cloud/src/meta-service/meta_service.cpp
@@ -318,7 +318,12 @@ void MetaServiceImpl::batch_get_version(::google::protobuf::RpcController* contr
     response->mutable_table_ids()->CopyFrom(request->table_ids());
     response->mutable_partition_ids()->CopyFrom(request->partition_ids());
 
-    while (code == MetaServiceCode::OK &&
+    constexpr size_t BATCH_SIZE = 500;
+    std::vector<std::string> version_keys;
+    std::vector<std::optional<std::string>> version_values;
+    version_keys.reserve(BATCH_SIZE);
+    version_values.reserve(BATCH_SIZE);
+    while ((code == MetaServiceCode::OK || code == MetaServiceCode::KV_TXN_TOO_OLD) &&
            response->versions_size() < response->partition_ids_size()) {
         std::unique_ptr<Transaction> txn;
         TxnErrorCode err = txn_kv_->create_txn(&txn);
@@ -327,48 +332,59 @@ void MetaServiceImpl::batch_get_version(::google::protobuf::RpcController* contr
             code = cast_as<ErrCategory::CREATE>(err);
             break;
         }
-        for (size_t i = response->versions_size(); i < num_acquired; ++i) {
-            int64_t db_id = request->db_ids(i);
-            int64_t table_id = request->table_ids(i);
-            int64_t partition_id = request->partition_ids(i);
-            std::string ver_key;
-            if (is_table_version) {
-                table_version_key({instance_id, db_id, table_id}, &ver_key);
-            } else {
-                partition_version_key({instance_id, db_id, table_id, partition_id}, &ver_key);
+
+        for (size_t i = response->versions_size(); i < num_acquired; i += BATCH_SIZE) {
+            size_t limit = (i + BATCH_SIZE < num_acquired) ? i + BATCH_SIZE : num_acquired;
+            version_keys.clear();
+            version_values.clear();
+            for (size_t j = i; j < limit; j++) {
+                int64_t db_id = request->db_ids(j);
+                int64_t table_id = request->table_ids(j);
+                int64_t partition_id = request->partition_ids(j);
+                std::string ver_key;
+                if (is_table_version) {
+                    table_version_key({instance_id, db_id, table_id}, &ver_key);
+                } else {
+                    partition_version_key({instance_id, db_id, table_id, partition_id}, &ver_key);
+                }
+                version_keys.push_back(std::move(ver_key));
             }
 
-            // TODO(walter) support batch get.
-            std::string ver_val;
-            err = txn->get(ver_key, &ver_val, true);
+            err = txn->batch_get(&version_values, version_keys);
             TEST_SYNC_POINT_CALLBACK("batch_get_version_err", &err);
-            VLOG_DEBUG << "xxx get version_key=" << hex(ver_key);
-            if (err == TxnErrorCode::TXN_OK) {
-                if (is_table_version) {
-                    int64_t version = *reinterpret_cast<const int64_t*>(ver_val.data());
+            if (err == TxnErrorCode::TXN_TOO_OLD) {
+                // txn too old, fallback to non-snapshot versions.
+                LOG(WARNING) << "batch_get_version execution time exceeds the txn mvcc window, "
+                                "fallback to acquire non-snapshot versions, partition_ids_size="
+                             << request->partition_ids_size() << ", index=" << i;
+                break;
+            } else if (err != TxnErrorCode::TXN_OK) {
+                msg = fmt::format("failed to batch get versions, index={}, err={}", i, err);
+                code = cast_as<ErrCategory::READ>(err);
+                break;
+            }
+
+            for (auto&& value : version_values) {
+                if (!value.has_value()) {
+                    // return -1 if the target version is not exists.
+                    response->add_versions(-1);
+                } else if (is_table_version) {
+                    if (value->size() != sizeof(int64_t)) {
+                        code = MetaServiceCode::PROTOBUF_PARSE_ERR;
+                        msg = "malformed table version value";
+                        break;
+                    }
+                    int64_t version = *reinterpret_cast<const int64_t*>(value->data());
                     response->add_versions(version);
                 } else {
                     VersionPB version_pb;
-                    if (!version_pb.ParseFromString(ver_val)) {
+                    if (!version_pb.ParseFromString(*value)) {
                         code = MetaServiceCode::PROTOBUF_PARSE_ERR;
                         msg = "malformed version value";
                         break;
                     }
                     response->add_versions(version_pb.version());
                 }
-            } else if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
-                // return -1 if the target version is not exists.
-                response->add_versions(-1);
-            } else if (err == TxnErrorCode::TXN_TOO_OLD) {
-                // txn too old, fallback to non-snapshot versions.
-                LOG(WARNING) << "batch_get_version execution time exceeds the txn mvcc window, "
-                                "fallback to acquire non-snapshot versions, partition_ids_size="
-                             << request->partition_ids_size() << ", index=" << i;
-                break;
-            } else {
-                msg = fmt::format("failed to get txn, err={}", err);
-                code = cast_as<ErrCategory::READ>(err);
-                break;
             }
         }
     }

From dec1989ec835ee2e2b4bc9286e88b42bae2218e4 Mon Sep 17 00:00:00 2001
From: walter <w41ter.l@gmail.com>
Date: Tue, 30 Apr 2024 14:16:05 +0800
Subject: [PATCH 150/163] [chore](regression) Add log about ccr downstream url
 and db name (#34325)

---
 .../src/main/groovy/org/apache/doris/regression/Config.groovy | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy
index cb75a43836c153..5ac95a236e5f84 100644
--- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy
+++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy
@@ -721,7 +721,7 @@ class Config {
 
         if (config.pluginPath == null) {
             config.pluginPath = "regression-test/plugins"
-            log.info("Set dataPath to '${config.pluginPath}' because not specify.".toString())
+            log.info("Set pluginPath to '${config.pluginPath}' because not specify.".toString())
         }
 
         if (config.sslCertificatePath == null) {
@@ -854,6 +854,8 @@ class Config {
     }
 
     Connection getDownstreamConnectionByDbName(String dbName) {
+        log.info("get downstream connection, url: ${ccrDownstreamUrl}, db: ${dbName}, " +
+                "user: ${ccrDownstreamUser}, passwd: ${ccrDownstreamPassword}")
         String dbUrl = buildUrlWithDb(ccrDownstreamUrl, dbName)
         tryCreateDbIfNotExist(dbName)
         log.info("connect to ${dbUrl}".toString())

From 66bf4d97be02ddb2fc9673ac6026aa4805d8e187 Mon Sep 17 00:00:00 2001
From: plat1ko <platonekosama@gmail.com>
Date: Tue, 30 Apr 2024 15:01:20 +0800
Subject: [PATCH 151/163] [feature](cloud) Support cache warmup for any remote
 filesystem (#34191)

---
 be/src/cloud/cloud_backend_service.cpp        |  26 +-
 be/src/cloud/cloud_backend_service.h          |   2 +-
 .../cloud_engine_calc_delete_bitmap_task.cpp  |   2 +-
 be/src/cloud/cloud_storage_engine.cpp         |  14 +-
 be/src/cloud/cloud_storage_engine.h           |  40 +-
 be/src/cloud/cloud_tablet.cpp                 |  29 +-
 be/src/cloud/cloud_tablet_hotspot.cpp         |  17 +-
 be/src/cloud/cloud_tablet_hotspot.h           |   3 +-
 be/src/cloud/cloud_warm_up_manager.cpp        |  50 ++-
 be/src/cloud/cloud_warm_up_manager.h          |   4 +-
 .../io/cache/block_file_cache_downloader.cpp  | 389 +++++-------------
 be/src/io/cache/block_file_cache_downloader.h |  77 ++--
 be/src/io/fs/s3_file_bufferpool.cpp           |  17 -
 be/src/io/fs/s3_file_writer.cpp               |   6 +-
 be/src/io/io_common.h                         |   2 +-
 be/src/olap/parallel_scanner_builder.cpp      |   9 +-
 be/src/olap/tablet_meta.h                     |   8 +-
 be/src/runtime/exec_env.h                     |  25 --
 be/src/runtime/exec_env_init.cpp              |  26 --
 be/src/vec/exec/scan/new_olap_scanner.cpp     |   9 +-
 20 files changed, 286 insertions(+), 469 deletions(-)

diff --git a/be/src/cloud/cloud_backend_service.cpp b/be/src/cloud/cloud_backend_service.cpp
index 7fb98330e5b281..f576b60045d54d 100644
--- a/be/src/cloud/cloud_backend_service.cpp
+++ b/be/src/cloud/cloud_backend_service.cpp
@@ -72,12 +72,12 @@ void CloudBackendService::sync_load_for_tablets(TSyncLoadForTabletsResponse&,
             }
         });
     };
-    static_cast<void>(_exec_env->sync_load_for_tablets_thread_pool()->submit_func(std::move(f)));
+    static_cast<void>(_engine.sync_load_for_tablets_thread_pool().submit_func(std::move(f)));
 }
 
 void CloudBackendService::get_top_n_hot_partitions(TGetTopNHotPartitionsResponse& response,
                                                    const TGetTopNHotPartitionsRequest& request) {
-    TabletHotspot::instance()->get_top_n_hot_partition(&response.hot_tables);
+    _engine.tablet_hotspot().get_top_n_hot_partition(&response.hot_tables);
     response.file_cache_size = io::FileCacheFactory::instance()->get_capacity();
     response.__isset.hot_tables = !response.hot_tables.empty();
 }
@@ -85,13 +85,13 @@ void CloudBackendService::get_top_n_hot_partitions(TGetTopNHotPartitionsResponse
 void CloudBackendService::warm_up_tablets(TWarmUpTabletsResponse& response,
                                           const TWarmUpTabletsRequest& request) {
     Status st;
-    auto* manager = CloudWarmUpManager::instance();
+    auto& manager = _engine.cloud_warm_up_manager();
     switch (request.type) {
     case TWarmUpTabletsRequestType::SET_JOB: {
         LOG_INFO("receive the warm up request.")
                 .tag("request_type", "SET_JOB")
                 .tag("job_id", request.job_id);
-        st = manager->check_and_set_job_id(request.job_id);
+        st = manager.check_and_set_job_id(request.job_id);
         if (!st) {
             LOG_WARNING("SET_JOB failed.").error(st);
             break;
@@ -105,9 +105,9 @@ void CloudBackendService::warm_up_tablets(TWarmUpTabletsResponse& response,
                 .tag("batch_id", request.batch_id)
                 .tag("jobs size", request.job_metas.size());
         bool retry = false;
-        st = manager->check_and_set_batch_id(request.job_id, request.batch_id, &retry);
+        st = manager.check_and_set_batch_id(request.job_id, request.batch_id, &retry);
         if (!retry && st) {
-            manager->add_job(request.job_metas);
+            manager.add_job(request.job_metas);
         } else {
             if (retry) {
                 LOG_WARNING("retry the job.")
@@ -121,7 +121,7 @@ void CloudBackendService::warm_up_tablets(TWarmUpTabletsResponse& response,
     }
     case TWarmUpTabletsRequestType::GET_CURRENT_JOB_STATE_AND_LEASE: {
         auto [job_id, batch_id, pending_job_size, finish_job_size] =
-                manager->get_current_job_state();
+                manager.get_current_job_state();
         LOG_INFO("receive the warm up request.")
                 .tag("request_type", "GET_CURRENT_JOB_STATE_AND_LEASE")
                 .tag("job_id", job_id)
@@ -138,7 +138,7 @@ void CloudBackendService::warm_up_tablets(TWarmUpTabletsResponse& response,
         LOG_INFO("receive the warm up request.")
                 .tag("request_type", "CLEAR_JOB")
                 .tag("job_id", request.job_id);
-        st = manager->clear_job(request.job_id);
+        st = manager.clear_job(request.job_id);
         break;
     }
     default:
@@ -165,12 +165,8 @@ void CloudBackendService::warm_up_cache_async(TWarmUpCacheAsyncResponse& respons
     PGetFileCacheMetaResponse brpc_response;
     brpc_stub->get_file_cache_meta_by_tablet_id(&cntl, &brpc_request, &brpc_response, nullptr);
     if (!cntl.Failed()) {
-        std::vector<FileCacheBlockMeta> metas;
-        std::transform(brpc_response.file_cache_block_metas().cbegin(),
-                       brpc_response.file_cache_block_metas().cend(), std::back_inserter(metas),
-                       [](const FileCacheBlockMeta& meta) { return meta; });
-        io::DownloadTask download_task(std::move(metas));
-        io::FileCacheBlockDownloader::instance()->submit_download_task(download_task);
+        _engine.file_cache_block_downloader().submit_download_task(
+                std::move(*brpc_response.mutable_file_cache_block_metas()));
     } else {
         st = Status::RpcError("{} isn't connected", brpc_addr);
     }
@@ -181,7 +177,7 @@ void CloudBackendService::warm_up_cache_async(TWarmUpCacheAsyncResponse& respons
 void CloudBackendService::check_warm_up_cache_async(TCheckWarmUpCacheAsyncResponse& response,
                                                     const TCheckWarmUpCacheAsyncRequest& request) {
     std::map<int64_t, bool> task_done;
-    io::FileCacheBlockDownloader::instance()->check_download_task(request.tablets, &task_done);
+    _engine.file_cache_block_downloader().check_download_task(request.tablets, &task_done);
     response.__set_task_done(task_done);
 
     Status st = Status::OK();
diff --git a/be/src/cloud/cloud_backend_service.h b/be/src/cloud/cloud_backend_service.h
index b6d0ab2f93bafc..893875b34792db 100644
--- a/be/src/cloud/cloud_backend_service.h
+++ b/be/src/cloud/cloud_backend_service.h
@@ -56,7 +56,7 @@ class CloudBackendService final : public BaseBackendService {
                                    const TCheckWarmUpCacheAsyncRequest& request) override;
 
 private:
-    [[maybe_unused]] CloudStorageEngine& _engine;
+    CloudStorageEngine& _engine;
 };
 
 } // namespace doris
diff --git a/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp b/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp
index d31e7bd64ff179..5d5664c11db5a2 100644
--- a/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp
+++ b/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp
@@ -64,7 +64,7 @@ Status CloudEngineCalcDeleteBitmapTask::execute() {
     OlapStopWatch watch;
     VLOG_NOTICE << "begin to calculate delete bitmap. transaction_id=" << transaction_id;
     std::unique_ptr<ThreadPoolToken> token =
-            _engine.calc_tablet_delete_bitmap_task_thread_pool()->new_token(
+            _engine.calc_tablet_delete_bitmap_task_thread_pool().new_token(
                     ThreadPool::ExecutionMode::CONCURRENT);
 
     for (const auto& partition : _cal_delete_bitmap_req.partitions) {
diff --git a/be/src/cloud/cloud_storage_engine.cpp b/be/src/cloud/cloud_storage_engine.cpp
index c95998ecba1446..269ce4f231cb15 100644
--- a/be/src/cloud/cloud_storage_engine.cpp
+++ b/be/src/cloud/cloud_storage_engine.cpp
@@ -31,9 +31,12 @@
 #include "cloud/cloud_cumulative_compaction_policy.h"
 #include "cloud/cloud_full_compaction.h"
 #include "cloud/cloud_meta_mgr.h"
+#include "cloud/cloud_tablet_hotspot.h"
 #include "cloud/cloud_tablet_mgr.h"
 #include "cloud/cloud_txn_delete_bitmap_cache.h"
+#include "cloud/cloud_warm_up_manager.h"
 #include "cloud/config.h"
+#include "io/cache/block_file_cache_downloader.h"
 #include "io/fs/file_system.h"
 #include "io/fs/hdfs_file_system.h"
 #include "io/fs/s3_file_system.h"
@@ -181,7 +184,16 @@ Status CloudStorageEngine::open() {
             std::make_unique<CloudTxnDeleteBitmapCache>(config::delete_bitmap_agg_cache_capacity);
     RETURN_IF_ERROR(_txn_delete_bitmap_cache->init());
 
-    return Status::OK();
+    _file_cache_block_downloader = std::make_unique<io::FileCacheBlockDownloader>(*this);
+
+    _cloud_warm_up_manager = std::make_unique<CloudWarmUpManager>(*this);
+
+    _tablet_hotspot = std::make_unique<TabletHotspot>();
+
+    return ThreadPoolBuilder("SyncLoadForTabletsThreadPool")
+            .set_max_threads(config::sync_load_for_tablets_thread)
+            .set_min_threads(config::sync_load_for_tablets_thread)
+            .build(&_sync_load_for_tablets_thread_pool);
 }
 
 void CloudStorageEngine::stop() {
diff --git a/be/src/cloud/cloud_storage_engine.h b/be/src/cloud/cloud_storage_engine.h
index e57d0059e974a8..6311fa21993c99 100644
--- a/be/src/cloud/cloud_storage_engine.h
+++ b/be/src/cloud/cloud_storage_engine.h
@@ -33,11 +33,16 @@ namespace doris {
 namespace cloud {
 class CloudMetaMgr;
 }
+namespace io {
+class FileCacheBlockDownloader;
+}
 
 class CloudTabletMgr;
 class CloudCumulativeCompaction;
 class CloudBaseCompaction;
 class CloudFullCompaction;
+class TabletHotspot;
+class CloudWarmUpManager;
 
 class CloudStorageEngine final : public BaseStorageEngine {
 public:
@@ -58,13 +63,13 @@ class CloudStorageEngine final : public BaseStorageEngine {
         return Status::OK();
     }
 
-    cloud::CloudMetaMgr& meta_mgr() { return *_meta_mgr; }
+    cloud::CloudMetaMgr& meta_mgr() const { return *_meta_mgr; }
 
-    CloudTabletMgr& tablet_mgr() { return *_tablet_mgr; }
+    CloudTabletMgr& tablet_mgr() const { return *_tablet_mgr; }
 
-    CloudTxnDeleteBitmapCache& txn_delete_bitmap_cache() { return *_txn_delete_bitmap_cache; }
-    std::unique_ptr<ThreadPool>& calc_tablet_delete_bitmap_task_thread_pool() {
-        return _calc_tablet_delete_bitmap_task_thread_pool;
+    CloudTxnDeleteBitmapCache& txn_delete_bitmap_cache() const { return *_txn_delete_bitmap_cache; }
+    ThreadPool& calc_tablet_delete_bitmap_task_thread_pool() const {
+        return *_calc_tablet_delete_bitmap_task_thread_pool;
     }
 
     io::FileSystemSPtr get_fs_by_vault_id(const std::string& vault_id) const {
@@ -93,17 +98,17 @@ class CloudStorageEngine final : public BaseStorageEngine {
 
     bool has_base_compaction(int64_t tablet_id) const {
         std::lock_guard lock(_compaction_mtx);
-        return _submitted_base_compactions.count(tablet_id);
+        return _submitted_base_compactions.contains(tablet_id);
     }
 
     bool has_cumu_compaction(int64_t tablet_id) const {
         std::lock_guard lock(_compaction_mtx);
-        return _submitted_cumu_compactions.count(tablet_id);
+        return _submitted_cumu_compactions.contains(tablet_id);
     }
 
     bool has_full_compaction(int64_t tablet_id) const {
         std::lock_guard lock(_compaction_mtx);
-        return _submitted_full_compactions.count(tablet_id);
+        return _submitted_full_compactions.contains(tablet_id);
     }
 
     std::shared_ptr<CloudCumulativeCompactionPolicy> cumu_compaction_policy(
@@ -111,6 +116,18 @@ class CloudStorageEngine final : public BaseStorageEngine {
 
     void sync_storage_vault();
 
+    io::FileCacheBlockDownloader& file_cache_block_downloader() const {
+        return *_file_cache_block_downloader;
+    }
+
+    CloudWarmUpManager& cloud_warm_up_manager() const { return *_cloud_warm_up_manager; }
+
+    TabletHotspot& tablet_hotspot() const { return *_tablet_hotspot; }
+
+    ThreadPool& sync_load_for_tablets_thread_pool() const {
+        return *_sync_load_for_tablets_thread_pool;
+    }
+
 private:
     void _refresh_storage_vault_info_thread_callback();
     void _vacuum_stale_rowsets_thread_callback();
@@ -131,6 +148,13 @@ class CloudStorageEngine final : public BaseStorageEngine {
     std::unique_ptr<CloudTxnDeleteBitmapCache> _txn_delete_bitmap_cache;
     std::unique_ptr<ThreadPool> _calc_tablet_delete_bitmap_task_thread_pool;
 
+    // Components for cache warmup
+    std::unique_ptr<io::FileCacheBlockDownloader> _file_cache_block_downloader;
+    // Depended by `FileCacheBlockDownloader`
+    std::unique_ptr<CloudWarmUpManager> _cloud_warm_up_manager;
+    std::unique_ptr<TabletHotspot> _tablet_hotspot;
+    std::unique_ptr<ThreadPool> _sync_load_for_tablets_thread_pool;
+
     // FileSystem with latest shared storage info, new data will be written to this fs.
     mutable std::mutex _latest_fs_mtx;
     io::FileSystemSPtr _latest_fs;
diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp
index cf3de273e11bf0..50072aa0900868 100644
--- a/be/src/cloud/cloud_tablet.cpp
+++ b/be/src/cloud/cloud_tablet.cpp
@@ -197,8 +197,7 @@ void CloudTablet::add_rowsets(std::vector<RowsetSharedPtr> to_add, bool version_
 #ifndef BE_TEST
                 // Warmup rowset data in background
                 for (int seg_id = 0; seg_id < rs->num_segments(); ++seg_id) {
-                    io::S3FileMeta download_file_meta;
-                    auto rowset_meta = rs->rowset_meta();
+                    const auto& rowset_meta = rs->rowset_meta();
                     constexpr int64_t interval = 600; // 10 mins
                     // When BE restart and receive the `load_sync` rpc, it will sync all historical rowsets first time.
                     // So we need to filter out the old rowsets avoid to download the whole table.
@@ -206,17 +205,31 @@ void CloudTablet::add_rowsets(std::vector<RowsetSharedPtr> to_add, bool version_
                         ::time(nullptr) - rowset_meta->newest_write_timestamp() >= interval) {
                         continue;
                     }
-                    download_file_meta.file_size = 0;
-                    download_file_meta.file_system = rowset_meta->fs();
-                    download_file_meta.path = io::Path(rs->segment_file_path(seg_id));
-                    download_file_meta.expiration_time =
+
+                    auto fs = rowset_meta->fs();
+                    if (!fs) {
+                        LOG(WARNING) << "failed to get fs. tablet_id=" << tablet_id()
+                                     << " rowset_id=" << rowset_meta->rowset_id()
+                                     << " resource_id=" << rowset_meta->resource_id();
+                        continue;
+                    }
+
+                    int64_t expiration_time =
                             _tablet_meta->ttl_seconds() == 0 ||
                                             rowset_meta->newest_write_timestamp() <= 0
                                     ? 0
                                     : rowset_meta->newest_write_timestamp() +
                                               _tablet_meta->ttl_seconds();
-                    io::FileCacheBlockDownloader::instance()->submit_download_task(
-                            std::move(download_file_meta));
+                    _engine.file_cache_block_downloader().submit_download_task(
+                            io::DownloadFileMeta {
+                                    .path = rs->segment_file_path(seg_id),
+                                    .file_size = rs->rowset_meta()->segment_file_size(seg_id),
+                                    .file_system = std::move(fs),
+                                    .ctx =
+                                            {
+                                                    .expiration_time = expiration_time,
+                                            },
+                            });
                 }
 #endif
             }
diff --git a/be/src/cloud/cloud_tablet_hotspot.cpp b/be/src/cloud/cloud_tablet_hotspot.cpp
index 578b14662df23a..ae8b3a54d2b6cf 100644
--- a/be/src/cloud/cloud_tablet_hotspot.cpp
+++ b/be/src/cloud/cloud_tablet_hotspot.cpp
@@ -26,16 +26,15 @@
 
 namespace doris {
 
-void TabletHotspot::count(const BaseTabletSPtr& tablet) {
-    if (!config::is_cloud_mode()) return;
-    size_t slot_idx = tablet->tablet_id() % s_slot_size;
+void TabletHotspot::count(const BaseTablet& tablet) {
+    size_t slot_idx = tablet.tablet_id() % s_slot_size;
     auto& slot = _tablets_hotspot[slot_idx];
     std::lock_guard lock(slot.mtx);
     HotspotCounterPtr counter;
-    if (auto iter = slot.map.find(tablet->tablet_id()); iter == slot.map.end()) {
-        counter = std::make_shared<HotspotCounter>(tablet->table_id(), tablet->index_id(),
-                                                   tablet->partition_id());
-        slot.map.insert(std::make_pair(tablet->tablet_id(), counter));
+    if (auto iter = slot.map.find(tablet.tablet_id()); iter == slot.map.end()) {
+        counter = std::make_shared<HotspotCounter>(tablet.table_id(), tablet.index_id(),
+                                                   tablet.partition_id());
+        slot.map.insert(std::make_pair(tablet.tablet_id(), counter));
     } else {
         counter = iter->second;
     }
@@ -71,10 +70,6 @@ struct TabletHotspotMapValue {
 
 using TabletHotspotMapKey = std::pair<int64_t, int64_t>;
 
-TabletHotspot* TabletHotspot::instance() {
-    return ExecEnv::GetInstance()->tablet_hotspot();
-}
-
 void TabletHotspot::get_top_n_hot_partition(std::vector<THotTableMessage>* hot_tables) {
     // map<pair<table_id, index_id>, map<partition_id, value>> for day
     std::unordered_map<TabletHotspotMapKey, std::unordered_map<int64_t, TabletHotspotMapValue>,
diff --git a/be/src/cloud/cloud_tablet_hotspot.h b/be/src/cloud/cloud_tablet_hotspot.h
index 85e84295bdd777..af98f99a558b9b 100644
--- a/be/src/cloud/cloud_tablet_hotspot.h
+++ b/be/src/cloud/cloud_tablet_hotspot.h
@@ -52,11 +52,10 @@ using HotspotCounterPtr = std::shared_ptr<HotspotCounter>;
 
 class TabletHotspot {
 public:
-    static TabletHotspot* instance();
     TabletHotspot();
     ~TabletHotspot();
     // When query the tablet, count it
-    void count(const BaseTabletSPtr& tablet);
+    void count(const BaseTablet& tablet);
     void get_top_n_hot_partition(std::vector<THotTableMessage>* hot_tables);
 
 private:
diff --git a/be/src/cloud/cloud_warm_up_manager.cpp b/be/src/cloud/cloud_warm_up_manager.cpp
index e5ec691e8d0248..b53e291fa3be35 100644
--- a/be/src/cloud/cloud_warm_up_manager.cpp
+++ b/be/src/cloud/cloud_warm_up_manager.cpp
@@ -34,10 +34,6 @@
 
 namespace doris {
 
-CloudWarmUpManager* CloudWarmUpManager::instance() {
-    return ExecEnv::GetInstance()->cloud_warm_up_manager();
-}
-
 CloudWarmUpManager::CloudWarmUpManager(CloudStorageEngine& engine) : _engine(engine) {
     _download_thread = std::thread(&CloudWarmUpManager::handle_jobs, this);
 }
@@ -81,27 +77,41 @@ void CloudWarmUpManager::handle_jobs() {
             auto rs_metas = tablet_meta->snapshot_rs_metas();
             for (auto& [_, rs] : rs_metas) {
                 for (int64_t seg_id = 0; seg_id < rs->num_segments(); seg_id++) {
-                    io::S3FileMeta download_file_meta;
-                    download_file_meta.file_system = rs->fs();
-                    std::string seg_path = BetaRowset::remote_segment_path(rs->tablet_id(),
-                                                                           rs->rowset_id(), seg_id);
-                    download_file_meta.path = seg_path;
-                    download_file_meta.expiration_time =
+                    auto fs = rs->fs();
+                    if (!fs) {
+                        LOG(WARNING) << "failed to get fs. tablet_id=" << tablet_id
+                                     << " rowset_id=" << rs->rowset_id()
+                                     << " resource_id=" << rs->resource_id();
+                        continue;
+                    }
+
+                    int64_t expiration_time =
                             tablet_meta->ttl_seconds() == 0 || rs->newest_write_timestamp() <= 0
                                     ? 0
                                     : rs->newest_write_timestamp() + tablet_meta->ttl_seconds();
-                    if (download_file_meta.expiration_time <= UnixSeconds()) {
-                        download_file_meta.expiration_time = 0;
+                    if (expiration_time <= UnixSeconds()) {
+                        expiration_time = 0;
                     }
-                    download_file_meta.download_callback = [=](Status st) {
-                        if (!st) {
-                            LOG_WARNING("Warm up error ").error(st);
-                        }
-                        wait->signal();
-                    };
+
                     wait->add_count();
-                    io::FileCacheBlockDownloader::instance()->submit_download_task(
-                            std::move(download_file_meta));
+                    _engine.file_cache_block_downloader().submit_download_task(
+                            io::DownloadFileMeta {
+                                    .path = BetaRowset::remote_segment_path(
+                                            rs->tablet_id(), rs->rowset_id(), seg_id),
+                                    .file_size = rs->segment_file_size(seg_id),
+                                    .file_system = std::move(fs),
+                                    .ctx =
+                                            {
+                                                    .expiration_time = expiration_time,
+                                            },
+                                    .download_done =
+                                            [wait](Status st) {
+                                                if (!st) {
+                                                    LOG_WARNING("Warm up error ").error(st);
+                                                }
+                                                wait->signal();
+                                            },
+                            });
                 }
             }
             if (!wait->wait()) {
diff --git a/be/src/cloud/cloud_warm_up_manager.h b/be/src/cloud/cloud_warm_up_manager.h
index 6ddf040c8fc11f..e8621f10bb917a 100644
--- a/be/src/cloud/cloud_warm_up_manager.h
+++ b/be/src/cloud/cloud_warm_up_manager.h
@@ -45,9 +45,7 @@ struct JobMeta {
 
 class CloudWarmUpManager {
 public:
-    static CloudWarmUpManager* instance();
-
-    CloudWarmUpManager(CloudStorageEngine& engine);
+    explicit CloudWarmUpManager(CloudStorageEngine& engine);
     ~CloudWarmUpManager();
     // Set the job id if the id is zero
     Status check_and_set_job_id(int64_t job_id);
diff --git a/be/src/io/cache/block_file_cache_downloader.cpp b/be/src/io/cache/block_file_cache_downloader.cpp
index f5a68f46d8f959..ea8fa918e372a9 100644
--- a/be/src/io/cache/block_file_cache_downloader.cpp
+++ b/be/src/io/cache/block_file_cache_downloader.cpp
@@ -20,8 +20,6 @@
 
 #include "io/cache/block_file_cache_downloader.h"
 
-#include <aws/transfer/TransferHandle.h>
-#include <aws/transfer/TransferManager.h>
 #include <bthread/countdown_event.h>
 #include <bvar/bvar.h>
 #include <fmt/core.h>
@@ -35,201 +33,36 @@
 #include "common/config.h"
 #include "common/logging.h"
 #include "common/sync_point.h"
-#include "io/cache/block_file_cache.h"
-#include "io/cache/block_file_cache_factory.h"
-#include "io/cache/file_block.h"
-#include "io/cache/file_cache_common.h"
-#include "io/fs/err_utils.h"
-#include "io/fs/s3_common.h"
-#include "io/fs/s3_file_bufferpool.h"
-#include "io/fs/s3_file_system.h"
+#include "io/fs/file_reader.h"
+#include "io/io_common.h"
 #include "olap/rowset/beta_rowset.h"
-#include "olap/tablet.h"
-// #include "runtime/exec_env.h"
 #include "util/bvar_helper.h"
-#include "util/s3_util.h"
-
-using Aws::S3::Model::GetObjectRequest;
 
 namespace doris::io {
 
-namespace {
-bvar::Adder<uint64_t> file_cache_downloader_counter("file_cache_downloader", "size");
-
-std::string s3_path(std::string_view bucket, std::string_view key) {
-    return fmt::format("s3://{}/{}", bucket, key);
+FileCacheBlockDownloader::FileCacheBlockDownloader(CloudStorageEngine& engine) : _engine(engine) {
+    _poller = std::thread(&FileCacheBlockDownloader::polling_download_task, this);
+    auto st = ThreadPoolBuilder("FileCacheBlockDownloader")
+                      .set_min_threads(4)
+                      .set_max_threads(16)
+                      .build(&_workers);
+    CHECK(st.ok()) << "failed to create FileCacheBlockDownloader";
 }
 
-Status _download_part(std::shared_ptr<::Aws::S3::S3Client> client, std::string key_name,
-                      std::string bucket, size_t offset, size_t size, Slice& s) {
-    GetObjectRequest request;
-    request.WithBucket(bucket).WithKey(key_name);
-    request.SetRange(fmt::format("bytes={}-{}", offset, offset + size - 1));
-    request.SetResponseStreamFactory(AwsWriteableStreamFactory((void*)s.get_data(), size));
-    SCOPED_BVAR_LATENCY(s3_bvar::s3_get_latency);
-    auto outcome = SYNC_POINT_HOOK_RETURN_VALUE(DO_S3_GET_RATE_LIMIT(client->GetObject(request)),
-                                                "io::_download_part", std::cref(request).get(), &s);
-
-    TEST_SYNC_POINT_CALLBACK("io::_download_part::error", &outcome);
-    if (!outcome.IsSuccess()) {
-        return s3fs_error(outcome.GetError(),
-                          fmt::format("failed to read from {}", s3_path(bucket, key_name)));
-    }
-    auto bytes_read = outcome.GetResult().GetContentLength();
-    if (bytes_read != size) {
-        return Status::InternalError("failed to read from {}(bytes read: {}, bytes req: {})",
-                                     s3_path(bucket, key_name), bytes_read, size);
+FileCacheBlockDownloader::~FileCacheBlockDownloader() {
+    {
+        std::lock_guard lock(_mtx);
+        _closed = true;
     }
-    s.size = bytes_read;
-
-    return Status::OK();
-}
+    _empty.notify_all();
 
-// maybe we should move this logic inside s3 file bufferpool.cpp
-void _append_data_to_file_cache(FileBlocksHolderPtr holder, Slice data) {
-    size_t offset = 0;
-    std::for_each(
-            holder->file_blocks.begin(), holder->file_blocks.end(), [&](FileBlockSPtr& file_block) {
-                if (file_block->is_downloader() && offset < data.size) {
-                    size_t append_size = std::min(data.size - offset, file_block->range().size());
-                    Slice append_data(data.data + offset, append_size);
-                    Status st;
-                    st = file_block->append(append_data);
-                    if (st.ok()) {
-                        st = file_block->finalize();
-                    }
-                    if (!st.ok()) {
-                        LOG_WARNING("failed to append data to file cache").error(st);
-                    }
-                }
-                offset += file_block->range().size();
-            });
-}
-
-} // namespace
-
-struct DownloadTaskExecutor {
-    DownloadTaskExecutor() = default;
-    ~DownloadTaskExecutor() = default;
-
-    void execute(std::shared_ptr<Aws::S3::S3Client> client, std::string key_name, size_t offset,
-                 size_t size, std::string bucket,
-                 std::function<FileBlocksHolderPtr(size_t, size_t)> alloc_holder,
-                 std::function<void(Status)> download_callback, Slice user_slice) {
-        if (!user_slice.empty()) {
-            DCHECK(user_slice.get_size() >= size)
-                    << "request size " << size << " is larger than preserved size "
-                    << user_slice.get_size();
-        }
-        size_t one_single_task_size = config::s3_write_buffer_size;
-        size_t task_num = (size + one_single_task_size - 1) / one_single_task_size;
-        auto sync_task = [this, task_num, download_callback](Status st) {
-            Defer defer {[&] { _countdown_event.signal(); }};
-            bool ret = false;
-            if (!st.ok()) [[unlikely]] {
-                bool expect = false;
-                if (_failed.compare_exchange_strong(expect, true)) {
-                    _st = std::move(st);
-                }
-                ret = true;
-            }
-            _finished_num++;
-            if (_finished_num == task_num) {
-                if (download_callback) {
-                    download_callback(_st);
-                }
-            }
-            return ret;
-        };
-        _countdown_event.add_count(task_num);
-        for (size_t i = 0; i < task_num; i++) {
-            size_t cur_task_off = offset + i * one_single_task_size;
-            FileBufferBuilder builder;
-            size_t cur_task_size = std::min(one_single_task_size, size - cur_task_off);
-            auto download = [client, key_name, bucket, cur_task_off,
-                             cur_task_size](Slice& s) mutable {
-                return _download_part(client, std::move(key_name), std::move(bucket), cur_task_off,
-                                      cur_task_size, s);
-            };
-            auto append_file_cache = [](FileBlocksHolderPtr holder, Slice s) mutable {
-                _append_data_to_file_cache(std::move(holder), s);
-            };
-            if (alloc_holder != nullptr) {
-                builder.set_allocate_file_blocks_holder(
-                        [cur_task_off, one_single_task_size, alloc_holder]() {
-                            return alloc_holder(cur_task_off, one_single_task_size);
-                        });
-            }
-            builder.set_type(BufferType::DOWNLOAD)
-                    .set_download_callback(std::move(download))
-                    .set_sync_after_complete_task(sync_task)
-                    .set_write_to_local_file_cache(std::move(append_file_cache))
-                    .set_is_cancelled([this]() { return _failed.load(); });
-            if (!user_slice.empty()) {
-                auto write_to_use_buffer = [user_slice, cur_task_off](Slice content,
-                                                                      size_t /*off*/) {
-                    std::memcpy((void*)(user_slice.get_data() + cur_task_off), content.get_data(),
-                                content.get_size());
-                };
-                builder.set_write_to_use_buffer(std::move(write_to_use_buffer));
-            }
-            std::shared_ptr<io::FileBuffer> buffer;
-            if (auto st = builder.build(&buffer); !st.ok()) {
-                LOG_WARNING("build download buffer failed due to {}", st);
-                sync_task(std::move(st));
-                continue;
-            }
-            Status st = buffer->submit(std::move(buffer));
-            if (!st.ok()) {
-                LOG_WARNING("").error(st);
-            }
-        }
-        auto timeout_duration = config::s3_file_writer_log_interval_second;
-        timespec current_time;
-        // We don't need high accuracy here, so we use time(nullptr)
-        // since it's the fastest way to get current time(second)
-        auto current_time_second = time(nullptr);
-        current_time.tv_sec = current_time_second + timeout_duration;
-        current_time.tv_nsec = 0;
-        // bthread::countdown_event::timed_wait() should use absolute time
-        while (0 != _countdown_event.timed_wait(current_time)) {
-            current_time.tv_sec += timeout_duration;
-            auto cur_finish_num = _finished_num.load();
-            LOG_WARNING("Downloading {} {} {} {} already takes {} seconds, progress {}/{}", bucket,
-                        key_name, offset, size, timeout_duration, cur_finish_num, task_num);
-        }
+    if (_poller.joinable()) {
+        _poller.join();
     }
 
-private:
-    std::atomic_bool _failed {false};
-    std::atomic_uint64_t _finished_num {0};
-    Status _st {Status::OK()};
-    // **Attention** call add_count() before submitting buf to async thread pool
-    bthread::CountdownEvent _countdown_event {0};
-};
-extern void download_file(std::shared_ptr<Aws::S3::S3Client> client, std::string key_name,
-                          size_t offset, size_t size, std::string bucket,
-                          std::function<FileBlocksHolderPtr(size_t, size_t)> alloc_holder = nullptr,
-                          std::function<void(Status)> download_callback = nullptr,
-                          Slice s = Slice());
-
-void download_file(std::shared_ptr<Aws::S3::S3Client> client, std::string key_name, size_t offset,
-                   size_t size, std::string bucket,
-                   std::function<FileBlocksHolderPtr(size_t, size_t)> alloc_holder,
-                   std::function<void(Status)> download_callback, Slice s) {
-    static_cast<void>(
-            ExecEnv::GetInstance()->s3_downloader_download_poller_thread_pool()->submit_func(
-                    [c = std::move(client), key_name_ = std::move(key_name), offset, size,
-                     bucket_ = std::move(bucket), s, holder = std::move(alloc_holder),
-                     cb = std::move(download_callback)]() mutable {
-                        DownloadTaskExecutor task;
-                        task.execute(std::move(c), std::move(key_name_), offset, size,
-                                     std::move(bucket_), std::move(holder), std::move(cb), s);
-                    }));
-}
-
-FileCacheBlockDownloader* FileCacheBlockDownloader::instance() {
-    return ExecEnv::GetInstance()->file_cache_block_downloader();
+    if (_workers) {
+        _workers->shutdown();
+    }
 }
 
 void FileCacheBlockDownloader::submit_download_task(DownloadTask task) {
@@ -237,28 +70,25 @@ void FileCacheBlockDownloader::submit_download_task(DownloadTask task) {
         LOG(INFO) << "Skip submit download file task because file cache is not enabled";
         return;
     }
-    if (task.task_message.index() == 0) {
+
+    if (task.task_message.index() == 0) { // download file cache block task
         std::lock_guard lock(_inflight_mtx);
         for (auto& meta : std::get<0>(task.task_message)) {
-            auto it = _inflight_tablets.find(meta.tablet_id());
-            if (it == _inflight_tablets.end()) {
-                _inflight_tablets.insert({meta.tablet_id(), 1});
-            } else {
-                it->second++;
-            }
+            ++_inflight_tablets[meta.tablet_id()];
         }
     }
+
     {
         std::lock_guard lock(_mtx);
         if (_task_queue.size() == _max_size) {
-            if (_task_queue.front().task_message.index() == 1) {
-                auto& s3_file_meta = std::get<1>(_task_queue.front().task_message);
-                if (s3_file_meta.download_callback) {
-                    s3_file_meta.download_callback(
+            if (_task_queue.front().task_message.index() == 1) { // download segment file task
+                auto& download_file_meta = std::get<1>(_task_queue.front().task_message);
+                if (download_file_meta.download_done) {
+                    download_file_meta.download_done(
                             Status::InternalError("The downloader queue is full"));
                 }
             }
-            _task_queue.pop_front();
+            _task_queue.pop_front(); // Eliminate the earliest task in the queue
         }
         _task_queue.push_back(std::move(task));
         _empty.notify_all();
@@ -266,17 +96,16 @@ void FileCacheBlockDownloader::submit_download_task(DownloadTask task) {
 }
 
 void FileCacheBlockDownloader::polling_download_task() {
-    static int64_t hot_interval = 2 * 60 * 60; // 2 hours
+    constexpr int64_t hot_interval = 2 * 60 * 60; // 2 hours
     while (!_closed) {
         DownloadTask task;
         {
             std::unique_lock lock(_mtx);
-            if (_task_queue.empty()) {
-                _empty.wait(lock, [this]() { return !_task_queue.empty() || _closed; });
-            }
+            _empty.wait(lock, [this]() { return !_task_queue.empty() || _closed; });
             if (_closed) {
                 break;
             }
+
             task = std::move(_task_queue.front());
             _task_queue.pop_front();
         }
@@ -297,110 +126,116 @@ void FileCacheBlockDownloader::check_download_task(const std::vector<int64_t>& t
     }
 }
 
-void FileCacheBlockS3Downloader::download_file_cache_block(std::vector<FileCacheBlockMeta>& metas) {
+void FileCacheBlockDownloader::download_file_cache_block(
+        const DownloadTask::FileCacheBlockMetaVec& metas) {
     std::ranges::for_each(metas, [&](const FileCacheBlockMeta& meta) {
-        auto download_callback = [&, tablet_id = meta.tablet_id()](Status) {
+        CloudTabletSPtr tablet;
+        if (auto res = _engine.tablet_mgr().get_tablet(meta.tablet_id(), false); !res.has_value()) {
+            LOG(INFO) << "failed to find tablet " << meta.tablet_id() << " : " << res.error();
+            return;
+        } else {
+            tablet = std::move(res).value();
+        }
+
+        auto id_to_rowset_meta_map = tablet->tablet_meta()->snapshot_rs_metas();
+        auto find_it = id_to_rowset_meta_map.find(meta.rowset_id());
+        if (find_it == id_to_rowset_meta_map.end()) {
+            return;
+        }
+
+        auto fs = find_it->second->fs();
+        if (!fs) {
+            LOG(WARNING) << "failed to get fs. tablet_id=" << meta.tablet_id()
+                         << " rowset_id=" << find_it->second->rowset_id()
+                         << " resource_id=" << find_it->second->resource_id();
+            return;
+        }
+
+        auto download_done = [&, tablet_id = meta.tablet_id()](Status) {
             std::lock_guard lock(_inflight_mtx);
             auto it = _inflight_tablets.find(tablet_id);
-            TEST_SYNC_POINT_CALLBACK("FileCacheBlockS3Downloader::download_file_cache_block");
+            TEST_SYNC_POINT_CALLBACK("FileCacheBlockDownloader::download_file_cache_block");
             if (it == _inflight_tablets.end()) {
                 LOG(WARNING) << "inflight ref cnt not exist, tablet id " << tablet_id;
             } else {
                 it->second--;
-                if (it->second < 0) {
-                    LOG(WARNING) << "reference count is less than 0, tablet id " << tablet_id
-                                 << " ref cnt " << it->second;
-                }
                 if (it->second <= 0) {
+                    DCHECK_EQ(it->second, 0) << it->first;
                     _inflight_tablets.erase(it);
                 }
             }
         };
-        auto res = _engine.tablet_mgr().get_tablet(meta.tablet_id(), false);
-        if (!res.has_value()) {
-            LOG_WARNING("Failed to find tablet {}", meta.tablet_id()).error(res.error());
-            return;
-        }
-        auto id_to_rowset_meta_map = res.value()->tablet_meta()->snapshot_rs_metas();
-        if (auto iter = id_to_rowset_meta_map.find(meta.rowset_id());
-            iter != id_to_rowset_meta_map.end()) {
-            UInt128Wrapper cache_key = BlockFileCache::hash(meta.file_name());
-            BlockFileCache* cache = FileCacheFactory::instance()->get_by_path(cache_key);
-            CacheContext context;
-            switch (meta.cache_type()) {
-            case ::doris::FileCacheType::TTL:
-                context.cache_type = FileCacheType::TTL;
-                break;
-            case ::doris::FileCacheType::INDEX:
-                context.cache_type = FileCacheType::INDEX;
-                break;
-            default:
-                context.cache_type = FileCacheType::NORMAL;
-            }
-            context.expiration_time = meta.expiration_time();
-            context.is_cold_data = true;
-            auto* s3_file_system = dynamic_cast<S3FileSystem*>(iter->second->fs().get());
-            DCHECK(s3_file_system != nullptr);
-            auto client = s3_file_system->client_holder()->get();
-            if (!client) {
-                return;
-            }
-            auto alloc_holder = [k = cache_key, c = cache, ctx = context](size_t off, size_t size) {
-                auto h = c->get_or_set(k, off, size, ctx);
-                return std::make_unique<FileBlocksHolder>(std::move(h));
-            };
-            std::string key_name = s3_file_system->prefix() + '/' +
-                                   BetaRowset::remote_segment_path(
-                                           meta.tablet_id(), meta.rowset_id(), meta.segment_id());
-            TEST_SYNC_POINT_CALLBACK("BlockFileCache::mock_key", &key_name);
-            download_file(client, key_name, meta.offset(), meta.size(), s3_file_system->bucket(),
-                          std::move(alloc_holder), download_callback);
-        }
+
+        DownloadFileMeta download_meta {
+                .path = BetaRowset::remote_segment_path(meta.tablet_id(), meta.rowset_id(),
+                                                        meta.segment_id()),
+                .file_size = meta.offset() + meta.size(), // To avoid trigger get file size IO
+                .offset = meta.offset(),
+                .download_size = meta.size(),
+                .file_system = std::move(fs),
+                .ctx =
+                        {
+                                .is_index_data = meta.cache_type() == ::doris::FileCacheType::INDEX,
+                                .expiration_time = meta.expiration_time(),
+                        },
+                .download_done = std::move(download_done),
+        };
+        download_segment_file(download_meta);
     });
 }
 
-void FileCacheBlockS3Downloader::download_s3_file(S3FileMeta& meta) {
-    auto* s3_file_system = dynamic_cast<S3FileSystem*>(meta.file_system.get());
-    DCHECK(s3_file_system != nullptr);
-    auto client = s3_file_system->client_holder()->get();
-    int64_t file_size = meta.file_size;
-    if (!client) {
+void FileCacheBlockDownloader::download_segment_file(const DownloadFileMeta& meta) {
+    FileReaderSPtr file_reader;
+    FileReaderOptions opts {
+            .cache_type = FileCachePolicy::FILE_BLOCK_CACHE,
+            .is_doris_table = true,
+            .file_size = meta.file_size,
+    };
+    auto st = meta.file_system->open_file(meta.path, &file_reader, &opts);
+    if (!st.ok()) {
+        LOG(WARNING) << "failed to download file: " << st;
+        if (meta.download_done) {
+            meta.download_done(std::move(st));
+        }
         return;
     }
-    if (file_size == 0 || file_size == -1) {
-        Status st = s3_file_system->file_size(meta.path, &file_size);
+
+    size_t one_single_task_size = config::s3_write_buffer_size;
+
+    int64_t download_size = meta.download_size > 0 ? meta.download_size : file_reader->size();
+    size_t task_num = (download_size + one_single_task_size - 1) / one_single_task_size;
+
+    std::unique_ptr<char[]> buffer(new char[one_single_task_size]);
+
+    for (size_t i = 0; i < task_num; i++) {
+        size_t offset = meta.offset + i * one_single_task_size;
+        size_t size = std::min(one_single_task_size, meta.download_size - offset);
+        size_t bytes_read;
+        // TODO(plat1ko):
+        //  1. Directly append buffer data to file cache
+        //  2. Provide `FileReader::async_read()` interface
+        auto st = file_reader->read_at(offset, {buffer.get(), size}, &bytes_read, &meta.ctx);
         if (!st.ok()) {
-            LOG_WARNING("").error(st);
+            LOG(WARNING) << "failed to download file: " << st;
+            if (meta.download_done) {
+                meta.download_done(std::move(st));
+            }
             return;
         }
     }
-    UInt128Wrapper cache_key = BlockFileCache::hash(meta.path.filename().native());
-    BlockFileCache* cache = FileCacheFactory::instance()->get_by_path(cache_key);
-    CacheContext context;
-    if (meta.expiration_time == 0) {
-        context.cache_type = FileCacheType::NORMAL;
-    } else {
-        context.cache_type = FileCacheType::TTL;
+
+    if (meta.download_done) {
+        meta.download_done(Status::OK());
     }
-    context.is_cold_data = meta.is_cold_data;
-    context.expiration_time = meta.expiration_time;
-    auto alloc_holder = [k = cache_key, c = cache, ctx = context](size_t off, size_t size) {
-        auto h = c->get_or_set(k, off, size, ctx);
-        return std::make_unique<FileBlocksHolder>(std::move(h));
-    };
-    std::string key_name = s3_file_system->prefix() + '/' + meta.path.native();
-    TEST_SYNC_POINT_CALLBACK("BlockFileCache::remove_prefix", &key_name);
-    download_file(client, key_name, 0, file_size, s3_file_system->bucket(), std::move(alloc_holder),
-                  std::move(meta.download_callback));
 }
 
-void FileCacheBlockS3Downloader::download_blocks(DownloadTask& task) {
+void FileCacheBlockDownloader::download_blocks(DownloadTask& task) {
     switch (task.task_message.index()) {
     case 0:
         download_file_cache_block(std::get<0>(task.task_message));
         break;
     case 1:
-        download_s3_file(std::get<1>(task.task_message));
+        download_segment_file(std::get<1>(task.task_message));
         break;
     }
 }
diff --git a/be/src/io/cache/block_file_cache_downloader.h b/be/src/io/cache/block_file_cache_downloader.h
index aad061f348d4c5..30827b69580553 100644
--- a/be/src/io/cache/block_file_cache_downloader.h
+++ b/be/src/io/cache/block_file_cache_downloader.h
@@ -36,41 +36,37 @@
 
 namespace doris::io {
 
-struct S3FileMeta {
+struct DownloadFileMeta {
     Path path;
-    size_t file_size {0};
+    int64_t file_size {-1};
+    int64_t offset {0};
+    int64_t download_size {-1};
     io::FileSystemSPtr file_system;
-    uint64_t expiration_time {0};
-    bool is_cold_data {false};
-    std::function<void(Status)> download_callback;
+    IOContext ctx;
+    std::function<void(Status)> download_done;
 };
 
 struct DownloadTask {
     std::chrono::steady_clock::time_point atime = std::chrono::steady_clock::now();
-    std::variant<std::vector<FileCacheBlockMeta>, S3FileMeta> task_message;
-    DownloadTask(std::vector<FileCacheBlockMeta> metas) : task_message(std::move(metas)) {}
-    DownloadTask(S3FileMeta meta) : task_message(std::move(meta)) {}
+
+    using FileCacheBlockMetaVec =
+            ::google::protobuf::RepeatedPtrField< ::doris::FileCacheBlockMeta>;
+
+    std::variant<FileCacheBlockMetaVec, DownloadFileMeta> task_message;
+    DownloadTask(FileCacheBlockMetaVec metas) : task_message(std::move(metas)) {}
+    DownloadTask(DownloadFileMeta meta) : task_message(std::move(meta)) {}
     DownloadTask() = default;
 };
 
 class FileCacheBlockDownloader {
 public:
-    FileCacheBlockDownloader(CloudStorageEngine& engine) : _engine(engine) {
-        _download_thread = std::thread(&FileCacheBlockDownloader::polling_download_task, this);
-    }
-
-    virtual ~FileCacheBlockDownloader() {
-        _closed = true;
-        _empty.notify_all();
-        if (_download_thread.joinable()) {
-            _download_thread.join();
-        }
-    }
-
-    // dowloan into cache block
-    virtual void download_blocks(DownloadTask& task) = 0;
-
-    static FileCacheBlockDownloader* instance();
+    explicit FileCacheBlockDownloader(CloudStorageEngine& engine);
+
+    ~FileCacheBlockDownloader();
+
+    // download into cache block
+    void download_blocks(DownloadTask& task);
+
     // submit the task to download queue
     void submit_download_task(DownloadTask task);
     // polling the queue, get the task to download
@@ -78,32 +74,25 @@ class FileCacheBlockDownloader {
     // check whether the tasks about tables finish or not
     void check_download_task(const std::vector<int64_t>& tablets, std::map<int64_t, bool>* done);
 
-protected:
-    std::mutex _mtx;
-    std::mutex _inflight_mtx;
-    // tablet id -> inflight block num of tablet
-    std::unordered_map<int64_t, int64_t> _inflight_tablets;
+private:
+    void download_file_cache_block(const DownloadTask::FileCacheBlockMetaVec&);
+    void download_segment_file(const DownloadFileMeta&);
+
     CloudStorageEngine& _engine;
 
-private:
-    std::thread _download_thread;
+    std::thread _poller;
+    std::unique_ptr<ThreadPool> _workers;
+
+    std::mutex _mtx;
+    bool _closed {false};
     std::condition_variable _empty;
     std::deque<DownloadTask> _task_queue;
-    std::atomic_bool _closed {false};
-    const size_t _max_size {10240};
-};
-
-class FileCacheBlockS3Downloader : public FileCacheBlockDownloader {
-public:
-    FileCacheBlockS3Downloader(CloudStorageEngine& engine) : FileCacheBlockDownloader(engine) {}
-    ~FileCacheBlockS3Downloader() override = default;
 
-    void download_blocks(DownloadTask& task) override;
+    std::mutex _inflight_mtx;
+    // tablet id -> inflight block num of tablet
+    std::unordered_map<int64_t, int64_t> _inflight_tablets;
 
-private:
-    void download_file_cache_block(std::vector<FileCacheBlockMeta>&);
-    static void download_s3_file(S3FileMeta&);
-    std::atomic<size_t> _cur_download_file {0};
+    static inline constexpr size_t _max_size {10240};
 };
 
 } // namespace doris::io
diff --git a/be/src/io/fs/s3_file_bufferpool.cpp b/be/src/io/fs/s3_file_bufferpool.cpp
index 82493fa9514f10..a3b16110cb2b61 100644
--- a/be/src/io/fs/s3_file_bufferpool.cpp
+++ b/be/src/io/fs/s3_file_bufferpool.cpp
@@ -128,28 +128,11 @@ std::ostream& operator<<(std::ostream& os, const BufferType& value) {
     return os;
 }
 
-/**
- * submit the on_download() task to executor
- */
-static Status submit_download_buffer(std::shared_ptr<FileBuffer> buffer) {
-    // Currently download file buffer is only served for cache prefetching
-    // so we just skip executing the download task when file cache is not enabled
-    if (!config::enable_file_cache) [[unlikely]] {
-        LOG(INFO) << "Skip download file task because file cache is not enabled";
-        return Status::InternalError("Download failed because file cache not enabled");
-    }
-    return ExecEnv::GetInstance()->s3_downloader_download_thread_pool()->submit_func(
-            [buf = std::move(buffer)]() { buf->execute_async(); });
-}
-
 Status FileBuffer::submit(std::shared_ptr<FileBuffer> buf) {
     switch (buf->_type) {
     case BufferType::UPLOAD:
         return submit_upload_buffer(std::move(buf));
         break;
-    case BufferType::DOWNLOAD:
-        return submit_download_buffer(std::move(buf));
-        break;
     default:
         CHECK(false) << "should never come here, the illegal type is " << buf->_type;
     };
diff --git a/be/src/io/fs/s3_file_writer.cpp b/be/src/io/fs/s3_file_writer.cpp
index 22bf6d01bc0fbd..952377314a1091 100644
--- a/be/src/io/fs/s3_file_writer.cpp
+++ b/be/src/io/fs/s3_file_writer.cpp
@@ -234,7 +234,7 @@ Status S3FileWriter::close() {
 
     if (_pending_buf != nullptr) {
         _countdown_event.add_count();
-        RETURN_IF_ERROR(_pending_buf->submit(std::move(_pending_buf)));
+        RETURN_IF_ERROR(FileBuffer::submit(std::move(_pending_buf)));
         _pending_buf = nullptr;
     }
 
@@ -321,7 +321,7 @@ Status S3FileWriter::appendv(const Slice* data, size_t data_cnt) {
                 }
                 _cur_part_num++;
                 _countdown_event.add_count();
-                RETURN_IF_ERROR(_pending_buf->submit(std::move(_pending_buf)));
+                RETURN_IF_ERROR(FileBuffer::submit(std::move(_pending_buf)));
                 _pending_buf = nullptr;
             }
             _bytes_appended += data_size_to_append;
@@ -471,7 +471,7 @@ Status S3FileWriter::finalize() {
             RETURN_IF_ERROR(_set_upload_to_remote_less_than_buffer_size());
         }
         _countdown_event.add_count();
-        RETURN_IF_ERROR(_pending_buf->submit(std::move(_pending_buf)));
+        RETURN_IF_ERROR(FileBuffer::submit(std::move(_pending_buf)));
         _pending_buf = nullptr;
     }
     _wait_until_finish("finalize");
diff --git a/be/src/io/io_common.h b/be/src/io/io_common.h
index 45957137d9b55f..80a594473dc376 100644
--- a/be/src/io/io_common.h
+++ b/be/src/io/io_common.h
@@ -57,7 +57,7 @@ struct IOContext {
     bool is_persistent = false;
     // stop reader when reading, used in some interrupted operations
     bool should_stop = false;
-    uint64_t expiration_time = 0;
+    int64_t expiration_time = 0;
     const TUniqueId* query_id = nullptr;             // Ref
     FileCacheStatistics* file_cache_stats = nullptr; // Ref
 };
diff --git a/be/src/olap/parallel_scanner_builder.cpp b/be/src/olap/parallel_scanner_builder.cpp
index cf2d517773eb35..f6183c26237411 100644
--- a/be/src/olap/parallel_scanner_builder.cpp
+++ b/be/src/olap/parallel_scanner_builder.cpp
@@ -17,7 +17,9 @@
 
 #include "parallel_scanner_builder.h"
 
+#include "cloud/cloud_storage_engine.h"
 #include "cloud/cloud_tablet_hotspot.h"
+#include "cloud/config.h"
 #include "olap/rowset/beta_rowset.h"
 #include "pipeline/exec/olap_scan_operator.h"
 #include "vec/exec/scan/new_olap_scanner.h"
@@ -47,7 +49,12 @@ Status ParallelScannerBuilder<ParentType>::_build_scanners_by_rowid(
         auto& rowsets = _all_rowsets[tablet->tablet_id()];
 
         TabletReader::ReadSource reade_source_with_delete_info;
-        TabletHotspot::instance()->count(tablet);
+
+        if (config::is_cloud_mode()) {
+            // FIXME(plat1ko): Avoid pointer cast
+            ExecEnv::GetInstance()->storage_engine().to_cloud().tablet_hotspot().count(*tablet);
+        }
+
         if (!_state->skip_delete_predicate()) {
             RETURN_IF_ERROR(tablet->capture_rs_readers(
                     {0, version}, &reade_source_with_delete_info.rs_splits, false));
diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h
index 12999ed7d31b44..9ba9b6bdcdcdee 100644
--- a/be/src/olap/tablet_meta.h
+++ b/be/src/olap/tablet_meta.h
@@ -194,7 +194,7 @@ class TabletMeta {
     const std::vector<RowsetMetaSharedPtr>& all_stale_rs_metas() const;
     // return the snapshot of rowset_meta
     // the return value is map<rowset_id_str, rowset_meta_sptr>
-    std::map<std::string, RowsetMetaSharedPtr> snapshot_rs_metas() const;
+    std::unordered_map<std::string, RowsetMetaSharedPtr> snapshot_rs_metas() const;
     RowsetMetaSharedPtr acquire_rs_meta_by_version(const Version& version) const;
     void delete_stale_rs_meta_by_version(const Version& version);
     RowsetMetaSharedPtr acquire_stale_rs_meta_by_version(const Version& version) const;
@@ -692,11 +692,11 @@ inline bool TabletMeta::all_beta() const {
     return true;
 }
 
-inline std::map<std::string, RowsetMetaSharedPtr> TabletMeta::snapshot_rs_metas() const {
+inline std::unordered_map<std::string, RowsetMetaSharedPtr> TabletMeta::snapshot_rs_metas() const {
+    std::unordered_map<std::string, RowsetMetaSharedPtr> id_to_rowset_meta_map;
     std::shared_lock rlock(_meta_lock);
-    std::map<std::string, RowsetMetaSharedPtr> id_to_rowset_meta_map;
     std::for_each(_rs_metas.cbegin(), _rs_metas.cend(), [&](const auto& rowset_meta) {
-        id_to_rowset_meta_map[rowset_meta->rowset_id().to_string()] = rowset_meta;
+        id_to_rowset_meta_map.emplace(rowset_meta->rowset_id().to_string(), rowset_meta);
     });
     return id_to_rowset_meta_map;
 }
diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h
index 1cf0aea8349d94..0f6aa71c974d85 100644
--- a/be/src/runtime/exec_env.h
+++ b/be/src/runtime/exec_env.h
@@ -51,7 +51,6 @@ class WorkloadGroupMgr;
 struct WriteCooldownMetaExecutors;
 namespace io {
 class FileCacheFactory;
-class FileCacheBlockDownloader;
 } // namespace io
 namespace segment_v2 {
 class InvertedIndexSearcherCache;
@@ -104,8 +103,6 @@ class DummyLRUCache;
 class CacheManager;
 class WalManager;
 class DNSCache;
-class TabletHotspot;
-class CloudWarmUpManager;
 
 inline bool k_doris_exit = false;
 
@@ -197,18 +194,9 @@ class ExecEnv {
     }
     ThreadPool* send_table_stats_thread_pool() { return _send_table_stats_thread_pool.get(); }
     ThreadPool* s3_file_upload_thread_pool() { return _s3_file_upload_thread_pool.get(); }
-    ThreadPool* s3_downloader_download_poller_thread_pool() {
-        return _s3_downloader_download_poller_thread_pool.get();
-    }
     ThreadPool* send_report_thread_pool() { return _send_report_thread_pool.get(); }
     ThreadPool* join_node_thread_pool() { return _join_node_thread_pool.get(); }
     ThreadPool* lazy_release_obj_pool() { return _lazy_release_obj_pool.get(); }
-    ThreadPool* sync_load_for_tablets_thread_pool() {
-        return _sync_load_for_tablets_thread_pool.get();
-    }
-    ThreadPool* s3_downloader_download_thread_pool() {
-        return _s3_downloader_download_thread_pool.get();
-    }
 
     Status init_pipeline_task_scheduler();
     void init_file_cache_factory();
@@ -314,12 +302,6 @@ class ExecEnv {
 
     segment_v2::TmpFileDirs* get_tmp_file_dirs() { return _tmp_file_dirs.get(); }
     io::FDCache* file_cache_open_fd_cache() const { return _file_cache_open_fd_cache.get(); }
-    io::FileCacheBlockDownloader* file_cache_block_downloader() const {
-        return _file_cache_block_downloader;
-    }
-    TabletHotspot* tablet_hotspot() const { return _tablet_hotspot; }
-
-    CloudWarmUpManager* cloud_warm_up_manager() const { return _cloud_warm_up_manager; }
 
 private:
     ExecEnv();
@@ -378,8 +360,6 @@ class ExecEnv {
     std::unique_ptr<ThreadPool> _buffered_reader_prefetch_thread_pool;
     // Threadpool used to send TableStats to FE
     std::unique_ptr<ThreadPool> _send_table_stats_thread_pool;
-    // Threadpool used to do s3 get operation for s3 downloader's polling operation
-    std::unique_ptr<ThreadPool> _s3_downloader_download_poller_thread_pool;
     // Threadpool used to upload local file to s3
     std::unique_ptr<ThreadPool> _s3_file_upload_thread_pool;
     // Pool used by fragment manager to send profile or status to FE coordinator
@@ -388,8 +368,6 @@ class ExecEnv {
     std::unique_ptr<ThreadPool> _join_node_thread_pool;
     // Pool to use a new thread to release object
     std::unique_ptr<ThreadPool> _lazy_release_obj_pool;
-    std::unique_ptr<ThreadPool> _sync_load_for_tablets_thread_pool;
-    std::unique_ptr<ThreadPool> _s3_downloader_download_thread_pool;
 
     FragmentMgr* _fragment_mgr = nullptr;
     pipeline::TaskScheduler* _without_group_task_scheduler = nullptr;
@@ -444,9 +422,6 @@ class ExecEnv {
     segment_v2::InvertedIndexQueryCache* _inverted_index_query_cache = nullptr;
     std::shared_ptr<DummyLRUCache> _dummy_lru_cache = nullptr;
     std::unique_ptr<io::FDCache> _file_cache_open_fd_cache;
-    io::FileCacheBlockDownloader* _file_cache_block_downloader;
-    TabletHotspot* _tablet_hotspot;
-    CloudWarmUpManager* _cloud_warm_up_manager;
 
     pipeline::RuntimeFilterTimerQueue* _runtime_filter_timer_queue = nullptr;
 
diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp
index ae36757b10a37c..818df8de22c6da 100644
--- a/be/src/runtime/exec_env_init.cpp
+++ b/be/src/runtime/exec_env_init.cpp
@@ -208,11 +208,6 @@ Status ExecEnv::_init(const std::vector<StorePath>& store_paths,
                               .set_max_threads(32)
                               .build(&_send_table_stats_thread_pool));
 
-    static_cast<void>(ThreadPoolBuilder("S3DownloaderDownloadPollerThreadPool")
-                              .set_min_threads(4)
-                              .set_max_threads(16)
-                              .build(&_s3_downloader_download_poller_thread_pool));
-
     auto [s3_file_upload_min_threads, s3_file_upload_max_threads] =
             get_num_threads(config::num_s3_file_upload_thread_pool_min_thread,
                             config::num_s3_file_upload_thread_pool_max_thread);
@@ -241,16 +236,6 @@ Status ExecEnv::_init(const std::vector<StorePath>& store_paths,
                               .set_max_queue_size(1000000)
                               .build(&_lazy_release_obj_pool));
 
-    static_cast<void>(ThreadPoolBuilder("SyncLoadForTabletsThreadPool")
-                              .set_max_threads(config::sync_load_for_tablets_thread)
-                              .set_min_threads(config::sync_load_for_tablets_thread)
-                              .build(&_sync_load_for_tablets_thread_pool));
-
-    static_cast<void>(ThreadPoolBuilder("S3DownloaderDownloadThreadPool")
-                              .set_min_threads(16)
-                              .set_max_threads(64)
-                              .build(&_s3_downloader_download_thread_pool));
-
     // NOTE: runtime query statistics mgr could be visited by query and daemon thread
     // so it should be created before all query begin and deleted after all query and daemon thread stoppped
     _runtime_query_statistics_mgr = new RuntimeQueryStatiticsMgr();
@@ -325,11 +310,6 @@ Status ExecEnv::_init(const std::vector<StorePath>& store_paths,
     if (config::is_cloud_mode()) {
         std::cout << "start BE in cloud mode" << std::endl;
         _storage_engine = std::make_unique<CloudStorageEngine>(options.backend_uid);
-        _file_cache_block_downloader = new io::FileCacheBlockS3Downloader(
-                *dynamic_cast<CloudStorageEngine*>(_storage_engine.get()));
-        _cloud_warm_up_manager =
-                new CloudWarmUpManager(*dynamic_cast<CloudStorageEngine*>(_storage_engine.get()));
-        _tablet_hotspot = new TabletHotspot();
     } else {
         std::cout << "start BE in local mode" << std::endl;
         _storage_engine = std::make_unique<StorageEngine>(options);
@@ -664,11 +644,6 @@ void ExecEnv::destroy() {
     SAFE_DELETE(_schema_cache);
     SAFE_DELETE(_segment_loader);
     SAFE_DELETE(_row_cache);
-    if (config::is_cloud_mode()) {
-        SAFE_DELETE(_file_cache_block_downloader);
-        SAFE_DELETE(_tablet_hotspot);
-        SAFE_DELETE(_cloud_warm_up_manager);
-    }
 
     // Free resource after threads are stopped.
     // Some threads are still running, like threads created by _new_load_stream_mgr ...
@@ -702,7 +677,6 @@ void ExecEnv::destroy() {
     _lazy_release_obj_pool.reset(nullptr);
     _send_report_thread_pool.reset(nullptr);
     _send_table_stats_thread_pool.reset(nullptr);
-    _s3_downloader_download_poller_thread_pool.reset(nullptr);
     _buffered_reader_prefetch_thread_pool.reset(nullptr);
     _s3_file_upload_thread_pool.reset(nullptr);
     _send_batch_thread_pool.reset(nullptr);
diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp
index f65f814f569ec5..c058c8f32994a2 100644
--- a/be/src/vec/exec/scan/new_olap_scanner.cpp
+++ b/be/src/vec/exec/scan/new_olap_scanner.cpp
@@ -30,6 +30,7 @@
 #include <set>
 #include <shared_mutex>
 
+#include "cloud/cloud_storage_engine.h"
 #include "cloud/cloud_tablet_hotspot.h"
 #include "cloud/config.h"
 #include "common/config.h"
@@ -51,6 +52,7 @@
 #include "olap/tablet_schema_cache.h"
 #include "pipeline/exec/olap_scan_operator.h"
 #include "runtime/descriptors.h"
+#include "runtime/exec_env.h"
 #include "runtime/runtime_state.h"
 #include "service/backend_options.h"
 #include "util/doris_metrics.h"
@@ -184,7 +186,12 @@ Status NewOlapScanner::init() {
             // to prevent this case: when there are lots of olap scanners to run for example 10000
             // the rowsets maybe compacted when the last olap scanner starts
             ReadSource read_source;
-            TabletHotspot::instance()->count(tablet);
+
+            if (config::is_cloud_mode()) {
+                // FIXME(plat1ko): Avoid pointer cast
+                ExecEnv::GetInstance()->storage_engine().to_cloud().tablet_hotspot().count(*tablet);
+            }
+
             auto st = tablet->capture_rs_readers(_tablet_reader_params.version,
                                                  &read_source.rs_splits,
                                                  _state->skip_missing_version());

From a566ba0d7c1154e46b00c81949d06253c79d4520 Mon Sep 17 00:00:00 2001
From: minghong <englefly@gmail.com>
Date: Tue, 30 Apr 2024 15:12:50 +0800
Subject: [PATCH 152/163] [opt](nereids) Only the SQL key columns are used as
 the basis for selecting the derivation algorithm (#34148)

---
 .../apache/doris/nereids/NereidsPlanner.java  |  13 +-
 .../doris/nereids/StatementContext.java       |  11 ++
 .../nereids/rules/rewrite/ColumnPruning.java  |  31 +++++
 .../doris/nereids/stats/StatsCalculator.java  |  15 ++-
 .../nereids_p0/stats/column_stats.groovy      | 111 ++++++++++++++++++
 .../nereids_syntax_p0/agg_with_const.groovy   |   6 +-
 6 files changed, 177 insertions(+), 10 deletions(-)
 create mode 100644 regression-test/suites/nereids_p0/stats/column_stats.groovy

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
index 5c18ffd84c3f13..6a24aa6f5a71ae 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
@@ -500,10 +500,17 @@ public String getExplainString(ExplainOptions explainOptions) {
                 plan = super.getExplainString(explainOptions)
                         + MaterializationContext.toSummaryString(cascadesContext.getMaterializationContexts(),
                         materializationListChosenByCbo);
+                if (statementContext != null) {
+                    if (statementContext.isHasUnknownColStats()) {
+                        plan += "planed with unknown column statistics\n";
+                    }
+                }
         }
-        if (statementContext != null && !statementContext.getHints().isEmpty()) {
-            String hint = getHintExplainString(statementContext.getHints());
-            return plan + hint;
+        if (statementContext != null) {
+            if (!statementContext.getHints().isEmpty()) {
+                String hint = getHintExplainString(statementContext.getHints());
+                return plan + hint;
+            }
         }
         return plan;
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
index 3274233c16be75..2ee253d9577c89 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
@@ -18,6 +18,7 @@
 package org.apache.doris.nereids;
 
 import org.apache.doris.analysis.StatementBase;
+import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.TableIf;
 import org.apache.doris.common.IdGenerator;
 import org.apache.doris.common.Pair;
@@ -128,6 +129,8 @@ public class StatementContext implements Closeable {
     // Relation for example LogicalOlapScan
     private final Map<Slot, Relation> slotToRelation = Maps.newHashMap();
 
+    // the columns in Plan.getExpressions(), such as columns in join condition or filter condition, group by expression
+    private final Set<Column> keyColumns = Sets.newHashSet();
     private BitSet disableRules;
 
     // table locks
@@ -485,4 +488,12 @@ public String toString() {
                     + ",\n  sql:\n" + sql + "\n}";
         }
     }
+
+    public void addKeyColumn(Column column) {
+        keyColumns.add(column);
+    }
+
+    public boolean isKeyColumn(Column column) {
+        return keyColumns.contains(column);
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java
index 4cb18e8a380177..1257a3fb3af102 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java
@@ -17,6 +17,7 @@
 
 package org.apache.doris.nereids.rules.rewrite;
 
+import org.apache.doris.nereids.StatementContext;
 import org.apache.doris.nereids.jobs.JobContext;
 import org.apache.doris.nereids.rules.rewrite.ColumnPruning.PruneContext;
 import org.apache.doris.nereids.trees.expressions.Alias;
@@ -42,6 +43,7 @@
 import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter;
 import org.apache.doris.nereids.util.ExpressionUtils;
 import org.apache.doris.nereids.util.Utils;
+import org.apache.doris.qe.ConnectContext;
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
@@ -83,6 +85,9 @@ public class ColumnPruning extends DefaultPlanRewriter<PruneContext> implements
 
     /**
      * collect all columns used in expressions, which should not be pruned
+     * the purpose to collect keys are:
+     * 1. used for count(*), '*' is replaced by the smallest(data type in byte size) column
+     * 2. for StatsDerive, only when col-stats of keys are not available, we fall back to no-stats algorithm
      */
     public static class KeyColumnCollector
             extends DefaultPlanRewriter<JobContext> implements CustomRewriter {
@@ -105,6 +110,20 @@ public Plan visit(Plan plan, JobContext jobContext) {
             }
             return plan;
         }
+
+        @Override
+        public LogicalAggregate<? extends Plan> visitLogicalAggregate(LogicalAggregate<? extends Plan> agg,
+                JobContext jobContext) {
+            agg.child().accept(this, jobContext);
+            for (Expression expression : agg.getExpressions()) {
+                if (expression instanceof SlotReference) {
+                    keys.add((Slot) expression);
+                } else {
+                    keys.addAll(expression.getInputSlots());
+                }
+            }
+            return agg;
+        }
     }
 
     @Override
@@ -112,6 +131,18 @@ public Plan rewriteRoot(Plan plan, JobContext jobContext) {
         KeyColumnCollector keyColumnCollector = new KeyColumnCollector();
         plan.accept(keyColumnCollector, jobContext);
         keys = keyColumnCollector.keys;
+        if (ConnectContext.get() != null) {
+            StatementContext stmtContext = ConnectContext.get().getStatementContext();
+            // in ut, stmtContext is null
+            if (stmtContext != null) {
+                for (Slot key : keys) {
+                    if (key instanceof SlotReference) {
+                        ((SlotReference) key).getColumn().ifPresent(stmtContext::addKeyColumn);
+                    }
+                }
+            }
+        }
+
         return plan.accept(this, new PruneContext(plan.getOutputSet(), null));
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 34248d5a55a87e..7e00b3680a0680 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -769,7 +769,7 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {
         // rows newly updated after last analyze
         long deltaRowCount = tableMeta == null ? 0 : tableMeta.updatedRows.get();
         double rowCount = catalogRelation.getTable().getRowCountForNereids();
-        boolean hasUnknownCol = false;
+        boolean hasUnknownKeyCol = false;
         long idxId = -1;
         if (catalogRelation instanceof OlapScan) {
             OlapScan olapScan = (OlapScan) catalogRelation;
@@ -782,6 +782,11 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {
                     catalogRelation.getTable().getName());
         }
         for (SlotReference slotReference : slotSet) {
+            boolean usedAsKey = false;
+            if (ConnectContext.get() != null && slotReference.getColumn().isPresent()
+                    && ConnectContext.get().getStatementContext() != null) {
+                usedAsKey = ConnectContext.get().getStatementContext().isKeyColumn(slotReference.getColumn().get());
+            }
             String colName = slotReference.getColumn().isPresent()
                     ? slotReference.getColumn().get().getName()
                     : slotReference.getName();
@@ -804,7 +809,9 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {
             if (!cache.isUnKnown) {
                 rowCount = Math.max(rowCount, cache.count + deltaRowCount);
             } else {
-                hasUnknownCol = true;
+                if (usedAsKey) {
+                    hasUnknownKeyCol = true;
+                }
             }
             if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().enableStats) {
                 if (deltaRowCount > 0) {
@@ -818,10 +825,10 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {
                 columnStatisticBuilderMap.put(slotReference, colStatsBuilder);
             } else {
                 columnStatisticBuilderMap.put(slotReference, new ColumnStatisticBuilder(ColumnStatistic.UNKNOWN));
-                hasUnknownCol = true;
+                hasUnknownKeyCol = true;
             }
         }
-        if (hasUnknownCol && ConnectContext.get() != null && ConnectContext.get().getStatementContext() != null) {
+        if (hasUnknownKeyCol && ConnectContext.get() != null && ConnectContext.get().getStatementContext() != null) {
             ConnectContext.get().getStatementContext().setHasUnknownColStats(true);
         }
         return normalizeCatalogRelationColumnStatsRowCount(rowCount, columnStatisticBuilderMap);
diff --git a/regression-test/suites/nereids_p0/stats/column_stats.groovy b/regression-test/suites/nereids_p0/stats/column_stats.groovy
new file mode 100644
index 00000000000000..26fb8a82b99027
--- /dev/null
+++ b/regression-test/suites/nereids_p0/stats/column_stats.groovy
@@ -0,0 +1,111 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("column_stats") {
+    multi_sql """
+        set global enable_auto_analyze=false;
+        SET enable_nereids_planner=true;
+        
+        SET enable_fallback_to_original_planner=false;
+        
+        drop table if exists region;
+        CREATE TABLE region  (
+        r_regionkey      int NOT NULL,
+        r_name       VARCHAR(25) NOT NULL,
+        r_comment    VARCHAR(152)
+        )ENGINE=OLAP
+        DUPLICATE KEY(`r_regionkey`)
+        COMMENT "OLAP"
+        DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1
+        PROPERTIES (
+            "replication_num" = "1"
+        );
+
+        drop table if exists nation;
+         CREATE TABLE `nation` (
+        `n_nationkey` int(11) NOT NULL,
+        `n_name`      varchar(25) NOT NULL,
+        `n_regionkey` int(11) NOT NULL,
+        `n_comment`   varchar(152) NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`N_NATIONKEY`)
+        COMMENT "OLAP"
+        DISTRIBUTED BY HASH(`N_NATIONKEY`) BUCKETS 1
+        PROPERTIES (
+            "replication_num" = "1"
+        );
+        alter table nation modify column n_nationkey set stats ('ndv'='25', 'num_nulls'='0', 'min_value'='0', 'max_value'='24', 'row_count'='25');
+
+        alter table nation modify column n_name set stats ('ndv'='25', 'num_nulls'='0', 'min_value'='ALGERIA', 'max_value'='VIETNAM', 'row_count'='25');
+
+        alter table nation modify column n_regionkey set stats ('ndv'='5', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='25');
+
+        alter table nation modify column n_comment set stats ('ndv'='25', 'num_nulls'='0', 'min_value'=' haggle. carefully final deposits detect slyly agai', 'max_value'='y final packages. slow foxes cajole quickly. quickly silent platelets breach ironic accounts. unusual pinto be', 'row_count'='25');
+
+    """
+
+    explain {
+        sql "select * from region"
+        notContains("planed with unknown column statistics")
+    }
+
+    explain {
+        sql "select * from region where r_regionkey=1"
+        contains("planed with unknown column statistics")
+    }
+
+    explain {
+        sql "select r_regionkey from region group by r_regionkey"
+        contains("planed with unknown column statistics")
+    }
+
+    explain {
+        sql "select r_regionkey from region join nation on r_regionkey=n_regionkey"
+        contains("planed with unknown column statistics")
+    }
+
+    sql "alter table region modify column r_regionkey set stats ('ndv'='5', 'num_nulls'='0', 'min_value'='0', 'max_value'='4', 'row_count'='5');"
+    
+    explain {
+        sql "select * from region where r_regionkey=1"
+        notContains("planed with unknown column statistics")
+    }
+
+    explain {
+        sql "select r_regionkey from region group by r_regionkey"
+        notContains("planed with unknown column statistics")
+    }
+
+    explain {
+        sql "select r_regionkey from region join nation on r_regionkey=n_regionkey"
+        notContains("planed with unknown column statistics")
+    }
+
+    explain {
+        sql "select r_name from region join nation on r_regionkey=n_regionkey"
+        notContains("planed with unknown column statistics")
+    }
+
+    explain {
+        sql """
+            select r_name 
+            from (select r_name, r_regionkey + 1 x from region) T join nation on T.x=n_regionkey
+            """
+        notContains("planed with unknown column statistics")
+    }
+    sql "set global enable_auto_analyze=true;"
+}
diff --git a/regression-test/suites/nereids_syntax_p0/agg_with_const.groovy b/regression-test/suites/nereids_syntax_p0/agg_with_const.groovy
index 375517ab6a610d..a8b384e8dd56f1 100644
--- a/regression-test/suites/nereids_syntax_p0/agg_with_const.groovy
+++ b/regression-test/suites/nereids_syntax_p0/agg_with_const.groovy
@@ -50,8 +50,8 @@ suite("agg_with_const") {
     """
 
     explain {
-        sql """select count(*) from ( select distinct col1 as a0, null as a1, null as a2 from agg_with_const_tbl)t"""
-        contains "projections: NULL"
+        sql """select count(*) from ( select distinct col1 as a0, null as not_appear_col from agg_with_const_tbl)t"""
+        notContains "not_appear_col"
     }
 
-}
+}
\ No newline at end of file

From 4d69cb2ec5a1a879cb2f86a40c141dbdcc6efee6 Mon Sep 17 00:00:00 2001
From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com>
Date: Tue, 30 Apr 2024 16:18:06 +0800
Subject: [PATCH 153/163] [fix](statistics) Use column update rows to decide
 min/max stats are valid or not (#34263)

This is a following pr of #33685
After #33703 merged, need to check update rows in column level instead of table level.
---
 .../doris/nereids/stats/StatsCalculator.java  | 26 ++++++++++++++-----
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 7e00b3680a0680..c8deba349c2170 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -19,6 +19,7 @@
 
 import org.apache.doris.analysis.IntLiteral;
 import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.OlapTable;
 import org.apache.doris.catalog.TableIf;
 import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.Pair;
@@ -124,6 +125,7 @@
 import org.apache.doris.nereids.util.PlanUtils;
 import org.apache.doris.qe.ConnectContext;
 import org.apache.doris.statistics.AnalysisManager;
+import org.apache.doris.statistics.ColStatsMeta;
 import org.apache.doris.statistics.ColumnStatistic;
 import org.apache.doris.statistics.ColumnStatisticBuilder;
 import org.apache.doris.statistics.Histogram;
@@ -764,10 +766,10 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {
         Set<SlotReference> slotSet = slotSetBuilder.build();
         Map<Expression, ColumnStatisticBuilder> columnStatisticBuilderMap = new HashMap<>();
         TableIf table = catalogRelation.getTable();
+        boolean isOlapTable = table instanceof OlapTable;
         AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager();
         TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(table.getId());
-        // rows newly updated after last analyze
-        long deltaRowCount = tableMeta == null ? 0 : tableMeta.updatedRows.get();
+        long tableUpdatedRows = tableMeta == null ? 0 : tableMeta.updatedRows.get();
         double rowCount = catalogRelation.getTable().getRowCountForNereids();
         boolean hasUnknownKeyCol = false;
         long idxId = -1;
@@ -777,10 +779,6 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {
                 idxId = olapScan.getSelectedIndexId();
             }
         }
-        if (deltaRowCount > 0 && LOG.isDebugEnabled()) {
-            LOG.debug("{} is partially analyzed, clear min/max values in column stats",
-                    catalogRelation.getTable().getName());
-        }
         for (SlotReference slotReference : slotSet) {
             boolean usedAsKey = false;
             if (ConnectContext.get() != null && slotReference.getColumn().isPresent()
@@ -795,6 +793,13 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {
             if (colName == null) {
                 throw new RuntimeException(String.format("Invalid slot: %s", slotReference.getExprId()));
             }
+            long deltaRowCount = 0;
+            if (isOlapTable) {
+                OlapTable olapTable = (OlapTable) table;
+                ColStatsMeta colMeta = tableMeta == null ? null : tableMeta.findColumnStatsMeta(
+                        olapTable.getIndexNameById(idxId == -1 ? olapTable.getBaseIndexId() : idxId), colName);
+                deltaRowCount = colMeta == null ? 0 : tableUpdatedRows - colMeta.updatedRows;
+            }
             ColumnStatistic cache;
             if (!FeConstants.enableInternalSchemaDb
                     || shouldIgnoreThisCol) {
@@ -814,13 +819,20 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) {
                 }
             }
             if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().enableStats) {
+                // deltaRowCount > 0 indicates that
+                // new data is loaded to the table after this column was analyzed last time.
+                // In this case, need to eliminate min/max value for this column.
                 if (deltaRowCount > 0) {
                     // clear min-max to avoid error estimation
                     // for example, after yesterday data loaded, user send query about yesterday immediately.
                     // since yesterday data are not analyzed, the max date is before yesterday, and hence optimizer
                     // estimates the filter result is zero
                     colStatsBuilder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY)
-                            .setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY);
+                        .setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY);
+                    if (LOG.isDebugEnabled()) {
+                        LOG.debug("{}.{} is partially analyzed, clear min/max values in column stats",
+                                table.getName(), colName);
+                    }
                 }
                 columnStatisticBuilderMap.put(slotReference, colStatsBuilder);
             } else {

From 7442b12deb8114c2a0a3e35641eea22f9f0c8d4c Mon Sep 17 00:00:00 2001
From: morrySnow <101034200+morrySnow@users.noreply.github.com>
Date: Tue, 30 Apr 2024 16:43:00 +0800
Subject: [PATCH 154/163] [opt](Nereids) remove canEliminate flag on
 LogicalProject  (#31977)

we intro canEliminate flag in LogicalProject in PR #15020
this flag use to keep project under set operation to keep
project order between set operation's children.
We have refator set operation to keep children output order in itself.
So canEliminate flag is useless. This PR remove it
---
 .../org/apache/doris/nereids/memo/Memo.java   |   2 +-
 .../rewrite/EliminateUnnecessaryProject.java  |  56 ++----
 .../nereids/rules/rewrite/MergeProjects.java  |   3 +-
 .../trees/plans/logical/LogicalProject.java   |  39 +----
 .../doris/nereids/DistributeHintTest.java     |  28 +--
 .../doris/nereids/sqltest/InferTest.java      |  11 +-
 .../data/nereids_hint_tpch_p0/shape/q10.out   |  41 +++--
 .../data/nereids_hint_tpch_p0/shape/q3.out    |  33 ++--
 .../eliminate_outer_join.out                  |  43 ++---
 .../push_down_alias_through_join.out          |  85 +++++----
 .../infer_set_operator_distinct.out           | 164 +++++++-----------
 .../nereids_rules_p0/pkfk/eliminate_inner.out | 113 +++++++-----
 .../nereids_ssb_shape_sf100_p0/shape/q2.1.out |  30 ++--
 .../nereids_ssb_shape_sf100_p0/shape/q2.2.out |  30 ++--
 .../nereids_ssb_shape_sf100_p0/shape/q2.3.out |  30 ++--
 .../bs_downgrade_shape/query95.out            |   6 +-
 .../shape/query38.out                         |  99 +++++------
 .../shape/query87.out                         |  99 +++++------
 .../noStatsRfPrune/query38.out                |  93 +++++-----
 .../noStatsRfPrune/query87.out                |  93 +++++-----
 .../no_stats_shape/query38.out                |  93 +++++-----
 .../no_stats_shape/query87.out                |  93 +++++-----
 .../rf_prune/query38.out                      |  99 +++++------
 .../rf_prune/query87.out                      |  99 +++++------
 .../shape/query38.out                         |  99 +++++------
 .../shape/query87.out                         |  99 +++++------
 .../nostats_rf_prune/q10.out                  |  45 +++--
 .../nostats_rf_prune/q3.out                   |  33 ++--
 .../rf_prune/q10.out                          |  18 +-
 .../rf_prune/q3.out                           |  33 ++--
 .../shape/q10.out                             |  18 +-
 .../nereids_tpch_shape_sf1000_p0/shape/q3.out |  33 ++--
 .../shape_no_stats/q10.out                    |  45 +++--
 .../shape_no_stats/q3.out                     |  33 ++--
 34 files changed, 912 insertions(+), 1026 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java
index d7d46ecc15e439..95bbac5728a370 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java
@@ -129,7 +129,7 @@ public long getRefreshVersion() {
 
     private Plan skipProject(Plan plan, Group targetGroup) {
         // Some top project can't be eliminated
-        if (plan instanceof LogicalProject && ((LogicalProject<?>) plan).canEliminate()) {
+        if (plan instanceof LogicalProject) {
             LogicalProject<?> logicalProject = (LogicalProject<?>) plan;
             if (targetGroup != root) {
                 if (logicalProject.getOutputSet().equals(logicalProject.child().getOutputSet())) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateUnnecessaryProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateUnnecessaryProject.java
index adb355f94dbe28..852c6f5ea21515 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateUnnecessaryProject.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateUnnecessaryProject.java
@@ -23,8 +23,6 @@
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.logical.LogicalEmptyRelation;
 import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
-import org.apache.doris.nereids.trees.plans.logical.LogicalSetOperation;
-import org.apache.doris.nereids.trees.plans.logical.OutputSavePoint;
 import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter;
 
 import java.util.ArrayList;
@@ -39,62 +37,34 @@ public class EliminateUnnecessaryProject implements CustomRewriter {
 
     @Override
     public Plan rewriteRoot(Plan plan, JobContext jobContext) {
-        return rewrite(plan, false);
+        return rewrite(plan);
     }
 
-    private Plan rewrite(Plan plan, boolean outputSavePoint) {
-        if (plan instanceof LogicalSetOperation) {
-            return rewriteLogicalSetOperation((LogicalSetOperation) plan, outputSavePoint);
-        } else if (plan instanceof LogicalProject) {
-            return rewriteProject((LogicalProject) plan, outputSavePoint);
-        } else if (plan instanceof OutputSavePoint) {
-            return rewriteChildren(plan, true);
+    private Plan rewrite(Plan plan) {
+        if (plan instanceof LogicalProject) {
+            return rewriteProject((LogicalProject<?>) plan);
         } else {
-            return rewriteChildren(plan, outputSavePoint);
+            return rewriteChildren(plan);
         }
     }
 
-    private Plan rewriteProject(LogicalProject<Plan> project, boolean outputSavePoint) {
+    private Plan rewriteProject(LogicalProject<?> project) {
         if (project.child() instanceof LogicalEmptyRelation) {
             // eliminate unnecessary project
             return new LogicalEmptyRelation(StatementScopeIdGenerator.newRelationId(), project.getProjects());
-        } else if (project.canEliminate() && outputSavePoint
-                && project.getOutputSet().equals(project.child().getOutputSet())) {
+        } else if (project.getOutputSet().equals(project.child().getOutputSet())) {
             // eliminate unnecessary project
-            return rewrite(project.child(), outputSavePoint);
-        } else if (project.canEliminate() && project.getOutput().equals(project.child().getOutput())) {
-            // eliminate unnecessary project
-            return rewrite(project.child(), outputSavePoint);
+            return rewrite(project.child());
         } else {
-            return rewriteChildren(project, true);
+            return rewriteChildren(project);
         }
     }
 
-    private Plan rewriteLogicalSetOperation(LogicalSetOperation set, boolean outputSavePoint) {
-        if (set.arity() == 2) {
-            Plan left = set.child(0);
-            Plan right = set.child(1);
-            boolean changed = false;
-            if (isCanEliminateProject(left)) {
-                changed = true;
-                left = ((LogicalProject) left).withEliminate(false);
-            }
-            if (isCanEliminateProject(right)) {
-                changed = true;
-                right = ((LogicalProject) right).withEliminate(false);
-            }
-            if (changed) {
-                set = (LogicalSetOperation) set.withChildren(left, right);
-            }
-        }
-        return rewriteChildren(set, outputSavePoint);
-    }
-
-    private Plan rewriteChildren(Plan plan, boolean outputSavePoint) {
+    private Plan rewriteChildren(Plan plan) {
         List<Plan> newChildren = new ArrayList<>();
         boolean hasNewChildren = false;
         for (Plan child : plan.children()) {
-            Plan newChild = rewrite(child, outputSavePoint);
+            Plan newChild = rewrite(child);
             if (newChild != child) {
                 hasNewChildren = true;
             }
@@ -102,8 +72,4 @@ private Plan rewriteChildren(Plan plan, boolean outputSavePoint) {
         }
         return hasNewChildren ? plan.withChildren(newChildren) : plan;
     }
-
-    private static boolean isCanEliminateProject(Plan plan) {
-        return plan instanceof LogicalProject && ((LogicalProject<?>) plan).canEliminate();
-    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeProjects.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeProjects.java
index 3ea903f8565928..77784253440fab 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeProjects.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergeProjects.java
@@ -51,7 +51,6 @@ public Rule build() {
     public static Plan mergeProjects(LogicalProject<?> project) {
         LogicalProject<? extends Plan> childProject = (LogicalProject<?>) project.child();
         List<NamedExpression> projectExpressions = project.mergeProjections(childProject);
-        LogicalProject<?> newProject = childProject.canEliminate() ? project : childProject;
-        return newProject.withProjectsAndChild(projectExpressions, childProject.child(0));
+        return project.withProjectsAndChild(projectExpressions, childProject.child(0));
     }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java
index de23bc6f5b07c8..862945ce287772 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalProject.java
@@ -57,33 +57,22 @@ public class LogicalProject<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD_
     private final List<NamedExpression> projects;
     private final List<NamedExpression> excepts;
     private final boolean isDistinct;
-    private final boolean canEliminate;
 
     public LogicalProject(List<NamedExpression> projects, CHILD_TYPE child) {
-        this(projects, ImmutableList.of(), false, true, ImmutableList.of(child));
-    }
-
-    public LogicalProject(List<NamedExpression> projects, CHILD_TYPE child, boolean canEliminate) {
-        this(projects, ImmutableList.of(), false, canEliminate, ImmutableList.of(child));
+        this(projects, ImmutableList.of(), false, ImmutableList.of(child));
     }
 
     public LogicalProject(List<NamedExpression> projects, List<NamedExpression> excepts,
             boolean isDistinct, List<Plan> child) {
-        this(projects, excepts, isDistinct, true, Optional.empty(), Optional.empty(), child);
+        this(projects, excepts, isDistinct, Optional.empty(), Optional.empty(), child);
     }
 
     public LogicalProject(List<NamedExpression> projects, List<NamedExpression> excepts,
             boolean isDistinct, Plan child) {
-        this(projects, excepts, isDistinct, true, Optional.empty(), Optional.empty(), ImmutableList.of(child));
-    }
-
-    private LogicalProject(List<NamedExpression> projects, List<NamedExpression> excepts,
-            boolean isDistinct, boolean canEliminate, List<Plan> child) {
-        this(projects, excepts, isDistinct, canEliminate, Optional.empty(), Optional.empty(), child);
+        this(projects, excepts, isDistinct, Optional.empty(), Optional.empty(), ImmutableList.of(child));
     }
 
     private LogicalProject(List<NamedExpression> projects, List<NamedExpression> excepts, boolean isDistinct,
-            boolean canEliminate,
             Optional<GroupExpression> groupExpression, Optional<LogicalProperties> logicalProperties,
             List<Plan> child) {
         super(PlanType.LOGICAL_PROJECT, groupExpression, logicalProperties, child);
@@ -97,7 +86,6 @@ private LogicalProject(List<NamedExpression> projects, List<NamedExpression> exc
                 : projects;
         this.excepts = Utils.fastToImmutableList(excepts);
         this.isDistinct = isDistinct;
-        this.canEliminate = canEliminate;
     }
 
     /**
@@ -173,18 +161,18 @@ public boolean equals(Object o) {
 
     @Override
     public int hashCode() {
-        return Objects.hash(projects, canEliminate);
+        return Objects.hash(projects);
     }
 
     @Override
     public LogicalProject<Plan> withChildren(List<Plan> children) {
         Preconditions.checkArgument(children.size() == 1);
-        return new LogicalProject<>(projects, excepts, isDistinct, canEliminate, Utils.fastToImmutableList(children));
+        return new LogicalProject<>(projects, excepts, isDistinct, Utils.fastToImmutableList(children));
     }
 
     @Override
     public LogicalProject<Plan> withGroupExpression(Optional<GroupExpression> groupExpression) {
-        return new LogicalProject<>(projects, excepts, isDistinct, canEliminate,
+        return new LogicalProject<>(projects, excepts, isDistinct,
                 groupExpression, Optional.of(getLogicalProperties()), children);
     }
 
@@ -192,31 +180,22 @@ public LogicalProject<Plan> withGroupExpression(Optional<GroupExpression> groupE
     public Plan withGroupExprLogicalPropChildren(Optional<GroupExpression> groupExpression,
             Optional<LogicalProperties> logicalProperties, List<Plan> children) {
         Preconditions.checkArgument(children.size() == 1);
-        return new LogicalProject<>(projects, excepts, isDistinct, canEliminate,
+        return new LogicalProject<>(projects, excepts, isDistinct,
                 groupExpression, logicalProperties, children);
     }
 
-    public LogicalProject<Plan> withEliminate(boolean isEliminate) {
-        return new LogicalProject<>(projects, excepts, isDistinct, isEliminate,
-                Optional.empty(), Optional.of(getLogicalProperties()), children);
-    }
-
     public LogicalProject<Plan> withProjects(List<NamedExpression> projects) {
-        return new LogicalProject<>(projects, excepts, isDistinct, canEliminate, children);
+        return new LogicalProject<>(projects, excepts, isDistinct, children);
     }
 
     public LogicalProject<Plan> withProjectsAndChild(List<NamedExpression> projects, Plan child) {
-        return new LogicalProject<>(projects, excepts, isDistinct, canEliminate, ImmutableList.of(child));
+        return new LogicalProject<>(projects, excepts, isDistinct, ImmutableList.of(child));
     }
 
     public boolean isDistinct() {
         return isDistinct;
     }
 
-    public boolean canEliminate() {
-        return canEliminate;
-    }
-
     @Override
     public List<NamedExpression> getOutputs() {
         return projects;
diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/DistributeHintTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/DistributeHintTest.java
index 0029574a995f80..b27a333f7d5826 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/nereids/DistributeHintTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/DistributeHintTest.java
@@ -108,7 +108,7 @@ public void testShuffleJoinHint() {
     }
 
     @Test
-    public void testHintWithReorderCrossJoin() throws Exception {
+    public void testHintWithReorderCrossJoin() {
         String sql = "select t1.a , t2.x, t.x from "
                 + "t1 join [shuffle] t2, (select x from t3) t where t1.a=t.x and t2.x=t.x";
         PlanChecker.from(connectContext).checkExplain(sql, planner -> {
@@ -116,19 +116,19 @@ public void testHintWithReorderCrossJoin() throws Exception {
             MatchingUtils.assertMatches(plan,
                     physicalResultSink(
                             physicalDistribute(
-                                    physicalProject(
-                                            physicalHashJoin(
-                                                    physicalHashJoin(physicalDistribute().when(dis -> {
-                                                        DistributionSpec spec = dis.getDistributionSpec();
-                                                        Assertions.assertTrue(spec instanceof DistributionSpecHash);
-                                                        DistributionSpecHash hashSpec = (DistributionSpecHash) spec;
-                                                        Assertions.assertEquals(ShuffleType.EXECUTION_BUCKETED,
-                                                                hashSpec.getShuffleType());
-                                                        return true;
-                                                    }), physicalDistribute()),
-                                                    physicalDistribute()
-                                            ).when(join -> join.getDistributeHint().distributeType == DistributeType.SHUFFLE_RIGHT)
-                                    )
+                                    physicalHashJoin(
+                                            physicalHashJoin(physicalDistribute().when(dis -> {
+                                                DistributionSpec spec = dis.getDistributionSpec();
+                                                Assertions.assertInstanceOf(DistributionSpecHash.class, spec);
+                                                DistributionSpecHash hashSpec = (DistributionSpecHash) spec;
+                                                Assertions.assertEquals(ShuffleType.EXECUTION_BUCKETED,
+                                                        hashSpec.getShuffleType());
+                                                return true;
+                                            }), physicalDistribute()),
+                                            physicalDistribute()
+                                    ).when(join -> join.getDistributeHint().distributeType
+                                            == DistributeType.SHUFFLE_RIGHT)
+
                             )
                     ));
         });
diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/sqltest/InferTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/sqltest/InferTest.java
index 1f6ab496eb6131..2a292b18e95594 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/nereids/sqltest/InferTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/sqltest/InferTest.java
@@ -46,13 +46,12 @@ void testInferNotNullFromFilterAndEliminateOuter2() {
                 .rewrite()
                 .printlnTree()
                 .matches(
-                    logicalProject(
-                        innerLogicalJoin(
-                            logicalOlapScan(),
-                            logicalFilter().when(
-                                    f -> f.getPredicate().toString().equals("((id#0 = 4) OR (id#0 > 4))"))
-                        )
+                    innerLogicalJoin(
+                        logicalOlapScan(),
+                        logicalFilter().when(
+                                f -> f.getPredicate().toString().equals("((id#0 = 4) OR (id#0 > 4))"))
                     )
+
                 );
     }
 
diff --git a/regression-test/data/nereids_hint_tpch_p0/shape/q10.out b/regression-test/data/nereids_hint_tpch_p0/shape/q10.out
index b46fa9784e51b7..8368239e4346b7 100644
--- a/regression-test/data/nereids_hint_tpch_p0/shape/q10.out
+++ b/regression-test/data/nereids_hint_tpch_p0/shape/q10.out
@@ -4,29 +4,28 @@ PhysicalResultSink
 --PhysicalTopN[MERGE_SORT]
 ----PhysicalDistribute[DistributionSpecGather]
 ------PhysicalTopN[LOCAL_SORT]
---------PhysicalProject
-----------hashAgg[GLOBAL]
-------------PhysicalDistribute[DistributionSpecHash]
---------------hashAgg[LOCAL]
-----------------PhysicalProject
-------------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=()
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashAgg[LOCAL]
+--------------PhysicalProject
+----------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=()
+------------------PhysicalProject
+--------------------filter((lineitem.l_returnflag = 'R'))
+----------------------PhysicalOlapScan[lineitem]
+------------------PhysicalDistribute[DistributionSpecHash]
 --------------------PhysicalProject
-----------------------filter((lineitem.l_returnflag = 'R'))
-------------------------PhysicalOlapScan[lineitem]
---------------------PhysicalDistribute[DistributionSpecHash]
-----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_nationkey = nation.n_nationkey)) otherCondition=()
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=()
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[customer]
-------------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------PhysicalProject
-----------------------------------filter((orders.o_orderdate < '1994-01-01') and (orders.o_orderdate >= '1993-10-01'))
-------------------------------------PhysicalOlapScan[orders]
---------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_nationkey = nation.n_nationkey)) otherCondition=()
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=()
 ----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[nation]
+------------------------------PhysicalOlapScan[customer]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------PhysicalProject
+--------------------------------filter((orders.o_orderdate < '1994-01-01') and (orders.o_orderdate >= '1993-10-01'))
+----------------------------------PhysicalOlapScan[orders]
+------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[nation]
 
 Hint log:
 Used:   leading(lineitem shuffle { { customer shuffle orders } broadcast nation } )
diff --git a/regression-test/data/nereids_hint_tpch_p0/shape/q3.out b/regression-test/data/nereids_hint_tpch_p0/shape/q3.out
index a1c0695e752280..b67d23ddd8d076 100644
--- a/regression-test/data/nereids_hint_tpch_p0/shape/q3.out
+++ b/regression-test/data/nereids_hint_tpch_p0/shape/q3.out
@@ -4,24 +4,23 @@ PhysicalResultSink
 --PhysicalTopN[MERGE_SORT]
 ----PhysicalDistribute[DistributionSpecGather]
 ------PhysicalTopN[LOCAL_SORT]
---------PhysicalProject
-----------hashAgg[LOCAL]
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=()
+--------hashAgg[LOCAL]
+----------PhysicalProject
+------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=()
+--------------PhysicalProject
+----------------filter((lineitem.l_shipdate > '1995-03-15'))
+------------------PhysicalOlapScan[lineitem]
+--------------PhysicalDistribute[DistributionSpecHash]
 ----------------PhysicalProject
-------------------filter((lineitem.l_shipdate > '1995-03-15'))
---------------------PhysicalOlapScan[lineitem]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=()
-----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------filter((orders.o_orderdate < '1995-03-15'))
-----------------------------PhysicalOlapScan[orders]
-----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------filter((customer.c_mktsegment = 'BUILDING'))
-----------------------------PhysicalOlapScan[customer]
+------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=()
+--------------------PhysicalDistribute[DistributionSpecHash]
+----------------------PhysicalProject
+------------------------filter((orders.o_orderdate < '1995-03-15'))
+--------------------------PhysicalOlapScan[orders]
+--------------------PhysicalDistribute[DistributionSpecHash]
+----------------------PhysicalProject
+------------------------filter((customer.c_mktsegment = 'BUILDING'))
+--------------------------PhysicalOlapScan[customer]
 
 Hint log:
 Used:  leading(lineitem { orders shuffle customer } )
diff --git a/regression-test/data/nereids_rules_p0/eliminate_outer_join/eliminate_outer_join.out b/regression-test/data/nereids_rules_p0/eliminate_outer_join/eliminate_outer_join.out
index f35ff586ce4ae2..31867c89935655 100644
--- a/regression-test/data/nereids_rules_p0/eliminate_outer_join/eliminate_outer_join.out
+++ b/regression-test/data/nereids_rules_p0/eliminate_outer_join/eliminate_outer_join.out
@@ -10,11 +10,10 @@ PhysicalResultSink
 -- !right_outer --
 PhysicalResultSink
 --PhysicalDistribute[DistributionSpecGather]
-----PhysicalProject
-------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=()
+----hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=()
+------PhysicalOlapScan[t]
+------filter((t1.score > 10))
 --------PhysicalOlapScan[t]
---------filter((t1.score > 10))
-----------PhysicalOlapScan[t]
 
 -- !full_outer_join --
 PhysicalResultSink
@@ -73,24 +72,22 @@ PhysicalResultSink
 -- !multiple_right_outer_1 --
 PhysicalResultSink
 --PhysicalDistribute[DistributionSpecGather]
-----PhysicalProject
-------hashJoin[INNER_JOIN] hashCondition=((t1.id = t3.id)) otherCondition=()
+----hashJoin[INNER_JOIN] hashCondition=((t1.id = t3.id)) otherCondition=()
+------PhysicalOlapScan[t]
+------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=()
 --------PhysicalOlapScan[t]
---------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=()
+--------filter((t1.score > 10))
 ----------PhysicalOlapScan[t]
-----------filter((t1.score > 10))
-------------PhysicalOlapScan[t]
 
 -- !multiple_right_outer_2 --
 PhysicalResultSink
 --PhysicalDistribute[DistributionSpecGather]
-----PhysicalProject
-------hashJoin[INNER_JOIN] hashCondition=((t1.id = t3.id)) otherCondition=()
+----hashJoin[INNER_JOIN] hashCondition=((t1.id = t3.id)) otherCondition=()
+------PhysicalOlapScan[t]
+------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=()
 --------PhysicalOlapScan[t]
---------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=()
+--------filter((t2.score > 10))
 ----------PhysicalOlapScan[t]
-----------filter((t2.score > 10))
-------------PhysicalOlapScan[t]
 
 -- !multiple_full_outer_1 --
 PhysicalResultSink
@@ -124,11 +121,10 @@ PhysicalResultSink
 -- !right_outer_join_non_null_assertion --
 PhysicalResultSink
 --PhysicalDistribute[DistributionSpecGather]
-----PhysicalProject
-------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=()
+----hashJoin[RIGHT_OUTER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=()
+------PhysicalOlapScan[t]
+------filter(( not id IS NULL) and (t2.score > 5))
 --------PhysicalOlapScan[t]
---------filter(( not id IS NULL) and (t2.score > 5))
-----------PhysicalOlapScan[t]
 
 -- !full_outer_join_compound_conditions --
 PhysicalResultSink
@@ -195,12 +191,11 @@ PhysicalResultSink
 -- !right_outer --
 PhysicalResultSink
 --PhysicalDistribute[DistributionSpecGather]
-----PhysicalProject
-------hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=()
---------filter(( not name IS NULL))
-----------PhysicalOlapScan[t]
---------filter((t1.score > 10))
-----------PhysicalOlapScan[t]
+----hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=()
+------filter(( not name IS NULL))
+--------PhysicalOlapScan[t]
+------filter((t1.score > 10))
+--------PhysicalOlapScan[t]
 
 -- !full_outer --
 PhysicalResultSink
diff --git a/regression-test/data/nereids_rules_p0/filter_push_down/push_down_alias_through_join.out b/regression-test/data/nereids_rules_p0/filter_push_down/push_down_alias_through_join.out
index e99770d7c76980..4e56682f946650 100644
--- a/regression-test/data/nereids_rules_p0/filter_push_down/push_down_alias_through_join.out
+++ b/regression-test/data/nereids_rules_p0/filter_push_down/push_down_alias_through_join.out
@@ -2,57 +2,52 @@
 -- !pushdown_inner_join --
 PhysicalResultSink
 --PhysicalDistribute[DistributionSpecGather]
-----PhysicalProject
-------NestedLoopJoin[INNER_JOIN](id1 > id2)
+----NestedLoopJoin[INNER_JOIN](id1 > id2)
+------PhysicalProject
+--------PhysicalOlapScan[t1]
+------PhysicalDistribute[DistributionSpecReplicated]
 --------PhysicalProject
-----------PhysicalOlapScan[t1]
---------PhysicalDistribute[DistributionSpecReplicated]
-----------PhysicalProject
-------------PhysicalOlapScan[t2]
+----------PhysicalOlapScan[t2]
 
 -- !pushdown_left_outer_join --
 PhysicalResultSink
 --PhysicalDistribute[DistributionSpecGather]
-----PhysicalProject
-------NestedLoopJoin[LEFT_OUTER_JOIN](id1 > id2)
+----NestedLoopJoin[LEFT_OUTER_JOIN](id1 > id2)
+------PhysicalProject
+--------PhysicalOlapScan[t1]
+------PhysicalDistribute[DistributionSpecReplicated]
 --------PhysicalProject
-----------PhysicalOlapScan[t1]
---------PhysicalDistribute[DistributionSpecReplicated]
-----------PhysicalProject
-------------PhysicalOlapScan[t2]
+----------PhysicalOlapScan[t2]
 
 -- !pushdown_right_outer_join --
 PhysicalResultSink
---PhysicalProject
-----NestedLoopJoin[RIGHT_OUTER_JOIN](id1 > id2)
-------PhysicalDistribute[DistributionSpecGather]
---------PhysicalProject
-----------PhysicalOlapScan[t1]
-------PhysicalDistribute[DistributionSpecGather]
---------PhysicalProject
-----------PhysicalOlapScan[t2]
+--NestedLoopJoin[RIGHT_OUTER_JOIN](id1 > id2)
+----PhysicalDistribute[DistributionSpecGather]
+------PhysicalProject
+--------PhysicalOlapScan[t1]
+----PhysicalDistribute[DistributionSpecGather]
+------PhysicalProject
+--------PhysicalOlapScan[t2]
 
 -- !pushdown_full_outer_join --
 PhysicalResultSink
---PhysicalProject
-----NestedLoopJoin[FULL_OUTER_JOIN](id1 > id2)
-------PhysicalDistribute[DistributionSpecGather]
---------PhysicalProject
-----------PhysicalOlapScan[t1]
-------PhysicalDistribute[DistributionSpecGather]
---------PhysicalProject
-----------PhysicalOlapScan[t2]
+--NestedLoopJoin[FULL_OUTER_JOIN](id1 > id2)
+----PhysicalDistribute[DistributionSpecGather]
+------PhysicalProject
+--------PhysicalOlapScan[t1]
+----PhysicalDistribute[DistributionSpecGather]
+------PhysicalProject
+--------PhysicalOlapScan[t2]
 
 -- !pushdown_left_semi_join --
 PhysicalResultSink
 --PhysicalDistribute[DistributionSpecGather]
-----PhysicalProject
-------NestedLoopJoin[LEFT_SEMI_JOIN](id1 > t2.id)
+----NestedLoopJoin[LEFT_SEMI_JOIN](id1 > t2.id)
+------PhysicalProject
+--------PhysicalOlapScan[t1]
+------PhysicalDistribute[DistributionSpecReplicated]
 --------PhysicalProject
-----------PhysicalOlapScan[t1]
---------PhysicalDistribute[DistributionSpecReplicated]
-----------PhysicalProject
-------------PhysicalOlapScan[t2]
+----------PhysicalOlapScan[t2]
 
 -- !pushdown_right_semi_join --
 PhysicalResultSink
@@ -77,24 +72,22 @@ PhysicalResultSink
 -- !pushdown_left_anti_join --
 PhysicalResultSink
 --PhysicalDistribute[DistributionSpecGather]
-----PhysicalProject
-------NestedLoopJoin[LEFT_ANTI_JOIN](id1 > t2.id)
+----NestedLoopJoin[LEFT_ANTI_JOIN](id1 > t2.id)
+------PhysicalProject
+--------PhysicalOlapScan[t1]
+------PhysicalDistribute[DistributionSpecReplicated]
 --------PhysicalProject
-----------PhysicalOlapScan[t1]
---------PhysicalDistribute[DistributionSpecReplicated]
-----------PhysicalProject
-------------PhysicalOlapScan[t2]
+----------PhysicalOlapScan[t2]
 
 -- !pushdown_cross_join --
 PhysicalResultSink
 --PhysicalDistribute[DistributionSpecGather]
-----PhysicalProject
-------NestedLoopJoin[INNER_JOIN](id1 > t2.id)
+----NestedLoopJoin[INNER_JOIN](id1 > t2.id)
+------PhysicalProject
+--------PhysicalOlapScan[t1]
+------PhysicalDistribute[DistributionSpecReplicated]
 --------PhysicalProject
-----------PhysicalOlapScan[t1]
---------PhysicalDistribute[DistributionSpecReplicated]
-----------PhysicalProject
-------------PhysicalOlapScan[t2]
+----------PhysicalOlapScan[t2]
 
 -- !pushdown_multiple_joins --
 PhysicalResultSink
diff --git a/regression-test/data/nereids_rules_p0/infer_set_operator_distinct/infer_set_operator_distinct.out b/regression-test/data/nereids_rules_p0/infer_set_operator_distinct/infer_set_operator_distinct.out
index 786072eae0d87e..912ca2554d0abd 100644
--- a/regression-test/data/nereids_rules_p0/infer_set_operator_distinct/infer_set_operator_distinct.out
+++ b/regression-test/data/nereids_rules_p0/infer_set_operator_distinct/infer_set_operator_distinct.out
@@ -7,11 +7,9 @@ PhysicalResultSink
 --------hashAgg[LOCAL]
 ----------PhysicalUnion
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------PhysicalProject
-----------------PhysicalOlapScan[t1]
+--------------PhysicalOlapScan[t1]
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------PhysicalProject
-----------------PhysicalOlapScan[t2]
+--------------PhysicalOlapScan[t2]
 
 -- !union_complex_conditions --
 PhysicalResultSink
@@ -21,13 +19,11 @@ PhysicalResultSink
 --------hashAgg[LOCAL]
 ----------PhysicalUnion
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------PhysicalProject
-----------------filter((t1.score > 10))
-------------------PhysicalOlapScan[t1]
+--------------filter((t1.score > 10))
+----------------PhysicalOlapScan[t1]
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------PhysicalProject
-----------------filter((t2.name = 'Test'))
-------------------PhysicalOlapScan[t2]
+--------------filter((t2.name = 'Test'))
+----------------PhysicalOlapScan[t2]
 
 -- !multi_union --
 PhysicalResultSink
@@ -37,61 +33,51 @@ PhysicalResultSink
 --------hashAgg[LOCAL]
 ----------PhysicalUnion
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------PhysicalProject
-----------------PhysicalOlapScan[t1]
+--------------PhysicalOlapScan[t1]
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------PhysicalProject
-----------------PhysicalOlapScan[t2]
+--------------PhysicalOlapScan[t2]
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------------PhysicalOlapScan[t3]
 
 -- !except_distinct --
 PhysicalResultSink
 --PhysicalDistribute[DistributionSpecGather]
 ----PhysicalExcept
 ------PhysicalDistribute[DistributionSpecHash]
---------PhysicalProject
-----------PhysicalOlapScan[t1]
+--------PhysicalOlapScan[t1]
 ------PhysicalDistribute[DistributionSpecHash]
---------PhysicalProject
-----------PhysicalOlapScan[t2]
+--------PhysicalOlapScan[t2]
 
 -- !except_with_filter --
 PhysicalResultSink
 --PhysicalDistribute[DistributionSpecGather]
 ----PhysicalExcept
 ------PhysicalDistribute[DistributionSpecHash]
---------PhysicalProject
-----------filter((t1.id > 100))
-------------PhysicalOlapScan[t1]
+--------filter((t1.id > 100))
+----------PhysicalOlapScan[t1]
 ------PhysicalDistribute[DistributionSpecHash]
---------PhysicalProject
-----------filter((t2.id < 50))
-------------PhysicalOlapScan[t2]
+--------filter((t2.id < 50))
+----------PhysicalOlapScan[t2]
 
 -- !intersect_distinct --
 PhysicalResultSink
 --PhysicalDistribute[DistributionSpecGather]
 ----PhysicalIntersect
 ------PhysicalDistribute[DistributionSpecHash]
---------PhysicalProject
-----------PhysicalOlapScan[t1]
+--------PhysicalOlapScan[t1]
 ------PhysicalDistribute[DistributionSpecHash]
---------PhysicalProject
-----------PhysicalOlapScan[t2]
+--------PhysicalOlapScan[t2]
 
 -- !intersect_with_aggregate --
 PhysicalResultSink
 --PhysicalDistribute[DistributionSpecGather]
 ----PhysicalIntersect
 ------PhysicalDistribute[DistributionSpecHash]
---------PhysicalProject
-----------hashAgg[GLOBAL]
-------------PhysicalDistribute[DistributionSpecGather]
---------------hashAgg[LOCAL]
-----------------PhysicalProject
-------------------PhysicalOlapScan[t1]
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute[DistributionSpecGather]
+------------hashAgg[LOCAL]
+--------------PhysicalProject
+----------------PhysicalOlapScan[t1]
 ------PhysicalDistribute[DistributionSpecHash]
 --------PhysicalProject
 ----------hashAgg[GLOBAL]
@@ -110,17 +96,13 @@ PhysicalResultSink
 ------------hashAgg[LOCAL]
 --------------PhysicalUnion
 ----------------PhysicalDistribute[DistributionSpecExecutionAny]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t1]
+------------------PhysicalOlapScan[t1]
 ----------------PhysicalDistribute[DistributionSpecExecutionAny]
-------------------PhysicalProject
---------------------PhysicalOlapScan[t2]
+------------------PhysicalOlapScan[t2]
 --------PhysicalDistribute[DistributionSpecHash]
-----------PhysicalProject
-------------PhysicalOlapScan[t3]
+----------PhysicalOlapScan[t3]
 ------PhysicalDistribute[DistributionSpecHash]
---------PhysicalProject
-----------PhysicalOlapScan[t4]
+--------PhysicalOlapScan[t4]
 
 -- !join_with_union --
 PhysicalResultSink
@@ -137,8 +119,7 @@ PhysicalResultSink
 --------------------PhysicalProject
 ----------------------PhysicalOlapScan[t2]
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------------PhysicalOlapScan[t3]
 
 -- !set_operator_with_subquery --
 PhysicalResultSink
@@ -148,13 +129,11 @@ PhysicalResultSink
 --------hashAgg[LOCAL]
 ----------PhysicalUnion
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------PhysicalProject
-----------------filter((t1.score > 10))
-------------------PhysicalOlapScan[t1]
+--------------filter((t1.score > 10))
+----------------PhysicalOlapScan[t1]
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------PhysicalProject
-----------------filter((t2.score < 5))
-------------------PhysicalOlapScan[t2]
+--------------filter((t2.score < 5))
+----------------PhysicalOlapScan[t2]
 
 -- !nested_union --
 PhysicalResultSink
@@ -164,17 +143,13 @@ PhysicalResultSink
 --------hashAgg[LOCAL]
 ----------PhysicalUnion
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------PhysicalProject
-----------------PhysicalOlapScan[t1]
+--------------PhysicalOlapScan[t1]
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------PhysicalProject
-----------------PhysicalOlapScan[t2]
+--------------PhysicalOlapScan[t2]
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------------PhysicalOlapScan[t3]
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------PhysicalProject
-----------------PhysicalOlapScan[t4]
+--------------PhysicalOlapScan[t4]
 
 -- !union_order_limit --
 PhysicalResultSink
@@ -184,8 +159,7 @@ PhysicalResultSink
 --------hashAgg[LOCAL]
 ----------PhysicalUnion
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------PhysicalProject
-----------------PhysicalOlapScan[t1]
+--------------PhysicalOlapScan[t1]
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
 --------------PhysicalTopN[MERGE_SORT]
 ----------------PhysicalDistribute[DistributionSpecGather]
@@ -207,8 +181,7 @@ PhysicalResultSink
 --------------------PhysicalProject
 ----------------------PhysicalOlapScan[t2]
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------------PhysicalOlapScan[t3]
 
 -- !union_left_join_combination --
 PhysicalResultSink
@@ -225,8 +198,7 @@ PhysicalResultSink
 --------------------PhysicalProject
 ----------------------PhysicalOlapScan[t2]
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------------PhysicalOlapScan[t3]
 
 -- !union_right_join_combination --
 PhysicalResultSink
@@ -243,8 +215,7 @@ PhysicalResultSink
 ------------------PhysicalDistribute[DistributionSpecHash]
 --------------------PhysicalOlapScan[t1]
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------------PhysicalOlapScan[t3]
 
 -- !union_full_join_combination --
 PhysicalResultSink
@@ -261,8 +232,7 @@ PhysicalResultSink
 ------------------PhysicalProject
 --------------------PhysicalOlapScan[t2]
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------------PhysicalOlapScan[t3]
 
 -- !union_left_semi_join_combination --
 PhysicalResultSink
@@ -272,27 +242,23 @@ PhysicalResultSink
 --------hashAgg[LOCAL]
 ----------PhysicalUnion
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------PhysicalProject
-----------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=()
-------------------PhysicalOlapScan[t1]
-------------------PhysicalDistribute[DistributionSpecHash]
---------------------PhysicalProject
-----------------------PhysicalOlapScan[t2]
+--------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=()
+----------------PhysicalOlapScan[t1]
+----------------PhysicalDistribute[DistributionSpecHash]
+------------------PhysicalProject
+--------------------PhysicalOlapScan[t2]
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------PhysicalProject
-----------------PhysicalOlapScan[t3]
+--------------PhysicalOlapScan[t3]
 
 -- !except_with_subquery --
 PhysicalResultSink
 --PhysicalDistribute[DistributionSpecGather]
 ----PhysicalExcept
 ------PhysicalDistribute[DistributionSpecHash]
---------PhysicalProject
-----------PhysicalOlapScan[t1]
+--------PhysicalOlapScan[t1]
 ------PhysicalDistribute[DistributionSpecHash]
---------PhysicalProject
-----------filter((t2.score > 10))
-------------PhysicalOlapScan[t2]
+--------filter((t2.score > 10))
+----------PhysicalOlapScan[t2]
 
 -- !intersect_different_types --
 PhysicalResultSink
@@ -313,32 +279,28 @@ PhysicalResultSink
 --------hashAgg[LOCAL]
 ----------PhysicalUnion
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------PhysicalProject
-----------------hashAgg[GLOBAL]
-------------------PhysicalDistribute[DistributionSpecGather]
---------------------hashAgg[LOCAL]
-----------------------PhysicalProject
-------------------------filter((t1.id > 100))
---------------------------PhysicalOlapScan[t1]
+--------------hashAgg[GLOBAL]
+----------------PhysicalDistribute[DistributionSpecGather]
+------------------hashAgg[LOCAL]
+--------------------PhysicalProject
+----------------------filter((t1.id > 100))
+------------------------PhysicalOlapScan[t1]
 ------------PhysicalDistribute[DistributionSpecExecutionAny]
---------------PhysicalProject
-----------------hashAgg[GLOBAL]
-------------------PhysicalDistribute[DistributionSpecGather]
---------------------hashAgg[LOCAL]
-----------------------PhysicalProject
-------------------------filter((t2.id < 50))
---------------------------PhysicalOlapScan[t2]
+--------------hashAgg[GLOBAL]
+----------------PhysicalDistribute[DistributionSpecGather]
+------------------hashAgg[LOCAL]
+--------------------PhysicalProject
+----------------------filter((t2.id < 50))
+------------------------PhysicalOlapScan[t2]
 
 -- !union_all_distinct --
 PhysicalResultSink
 --PhysicalDistribute[DistributionSpecGather]
 ----PhysicalUnion
 ------PhysicalDistribute[DistributionSpecExecutionAny]
---------PhysicalProject
-----------PhysicalOlapScan[t1]
+--------PhysicalOlapScan[t1]
 ------PhysicalDistribute[DistributionSpecExecutionAny]
---------PhysicalProject
-----------PhysicalOlapScan[t2]
+--------PhysicalOlapScan[t2]
 
 -- !except_complex_subquery --
 PhysicalResultSink
diff --git a/regression-test/data/nereids_rules_p0/pkfk/eliminate_inner.out b/regression-test/data/nereids_rules_p0/pkfk/eliminate_inner.out
index 5a9c194c37342d..be36a1fc265e90 100644
--- a/regression-test/data/nereids_rules_p0/pkfk/eliminate_inner.out
+++ b/regression-test/data/nereids_rules_p0/pkfk/eliminate_inner.out
@@ -16,7 +16,9 @@ with_pk_col
 
 -- !shape --
 PhysicalResultSink
---PhysicalOlapScan[fkt_not_null]
+--hashJoin[INNER_JOIN] hashCondition=((pkt.pk = fkt_not_null.fk)) otherCondition=()
+----PhysicalOlapScan[pkt]
+----PhysicalOlapScan[fkt_not_null]
 
 -- !res --
 1	John	1
@@ -28,9 +30,11 @@ with_pk_col
 
 -- !shape --
 PhysicalResultSink
---hashJoin[INNER_JOIN] hashCondition=((pk = fkt_not_null2.fk)) otherCondition=()
-----PhysicalOlapScan[fkt_not_null]
-----PhysicalOlapScan[fkt_not_null]
+--hashJoin[INNER_JOIN] hashCondition=((pkt.pk = fkt_not_null.fk)) otherCondition=()
+----PhysicalOlapScan[pkt]
+----hashJoin[INNER_JOIN] hashCondition=((fkt_not_null1.fk = fkt_not_null2.fk)) otherCondition=()
+------PhysicalOlapScan[fkt_not_null]
+------PhysicalOlapScan[fkt_not_null]
 
 -- !res --
 1	John	1
@@ -42,11 +46,14 @@ with_pk_col
 
 -- !shape --
 PhysicalResultSink
---hashJoin[INNER_JOIN] hashCondition=((pk = fkt_not_null2.fk)) otherCondition=()
-----filter((fkt_not_null1.fk > 1))
-------PhysicalOlapScan[fkt_not_null]
-----filter((fkt_not_null2.fk > 1))
-------PhysicalOlapScan[fkt_not_null]
+--hashJoin[INNER_JOIN] hashCondition=((pkt.pk = fkt_not_null.fk)) otherCondition=()
+----filter((pkt.pk > 1))
+------PhysicalOlapScan[pkt]
+----hashJoin[INNER_JOIN] hashCondition=((fkt_not_null1.fk = fkt_not_null2.fk)) otherCondition=()
+------filter((fkt_not_null1.fk > 1))
+--------PhysicalOlapScan[fkt_not_null]
+------filter((fkt_not_null2.fk > 1))
+--------PhysicalOlapScan[fkt_not_null]
 
 -- !res --
 2	Alice	2
@@ -57,8 +64,10 @@ with_pk_col
 
 -- !shape --
 PhysicalResultSink
---hashAgg[LOCAL]
-----PhysicalOlapScan[fkt_not_null]
+--hashJoin[INNER_JOIN] hashCondition=((pkt.pk = fkt_not_null.fk)) otherCondition=()
+----PhysicalOlapScan[pkt]
+----hashAgg[LOCAL]
+------PhysicalOlapScan[fkt_not_null]
 
 -- !res --
 1	1
@@ -88,9 +97,11 @@ fk with window
 
 -- !shape --
 PhysicalResultSink
---PhysicalWindow
-----PhysicalQuickSort[LOCAL_SORT]
-------PhysicalOlapScan[fkt_not_null]
+--hashJoin[INNER_JOIN] hashCondition=((pkt.pk = fkt_not_null.fk)) otherCondition=()
+----PhysicalOlapScan[pkt]
+----PhysicalWindow
+------PhysicalQuickSort[LOCAL_SORT]
+--------PhysicalOlapScan[fkt_not_null]
 
 -- !res --
 1	1	1
@@ -102,9 +113,11 @@ fk with limit
 
 -- !shape --
 PhysicalResultSink
---PhysicalTopN[MERGE_SORT]
-----PhysicalTopN[LOCAL_SORT]
-------PhysicalOlapScan[fkt_not_null]
+--hashJoin[INNER_JOIN] hashCondition=((pkt.pk = fkt_not_null.fk)) otherCondition=()
+----PhysicalOlapScan[pkt]
+----PhysicalTopN[MERGE_SORT]
+------PhysicalTopN[LOCAL_SORT]
+--------PhysicalOlapScan[fkt_not_null]
 
 -- !res --
 1	1
@@ -114,8 +127,11 @@ pk with filter that same as fk
 
 -- !shape --
 PhysicalResultSink
---filter((fkt_not_null.fk = 1))
-----PhysicalOlapScan[fkt_not_null]
+--hashJoin[INNER_JOIN] hashCondition=((pkt.pk = fkt_not_null.fk)) otherCondition=()
+----filter((pkt.pk = 1))
+------PhysicalOlapScan[pkt]
+----filter((fkt_not_null.fk = 1))
+------PhysicalOlapScan[fkt_not_null]
 
 -- !res --
 1	John	1
@@ -125,8 +141,11 @@ pk with filter that included same as fk
 
 -- !shape --
 PhysicalResultSink
---filter((cast(f as DOUBLE) = 1.0) and (fkt_not_null.fk = 1))
-----PhysicalOlapScan[fkt_not_null]
+--hashJoin[INNER_JOIN] hashCondition=((pkt.pk = fkt_not_null.fk)) otherCondition=()
+----filter((pkt.pk = 1))
+------PhysicalOlapScan[pkt]
+----filter((cast(f as DOUBLE) = 1.0) and (fkt_not_null.fk = 1))
+------PhysicalOlapScan[fkt_not_null]
 
 -- !res --
 
@@ -160,7 +179,8 @@ with_pk_col
 
 -- !shape --
 PhysicalResultSink
---filter(( not fk IS NULL))
+--hashJoin[INNER_JOIN] hashCondition=((pkt.pk = fkt.fk)) otherCondition=()
+----PhysicalOlapScan[pkt]
 ----PhysicalOlapScan[fkt]
 
 -- !res --
@@ -172,10 +192,11 @@ with_pk_col
 
 -- !shape --
 PhysicalResultSink
---hashJoin[INNER_JOIN] hashCondition=((pk = fkt2.fk)) otherCondition=()
-----filter(( not fk IS NULL))
+--hashJoin[INNER_JOIN] hashCondition=((pkt.pk = fkt.fk)) otherCondition=()
+----PhysicalOlapScan[pkt]
+----hashJoin[INNER_JOIN] hashCondition=((fkt1.fk = fkt2.fk)) otherCondition=()
+------PhysicalOlapScan[fkt]
 ------PhysicalOlapScan[fkt]
-----PhysicalOlapScan[fkt]
 
 -- !res --
 1	John	1
@@ -186,11 +207,14 @@ with_pk_col
 
 -- !shape --
 PhysicalResultSink
---hashJoin[INNER_JOIN] hashCondition=((pk = fkt2.fk)) otherCondition=()
-----filter(( not fk IS NULL) and (fkt1.fk > 1))
-------PhysicalOlapScan[fkt]
-----filter((fkt2.fk > 1))
-------PhysicalOlapScan[fkt]
+--hashJoin[INNER_JOIN] hashCondition=((pkt.pk = fkt.fk)) otherCondition=()
+----filter((pkt.pk > 1))
+------PhysicalOlapScan[pkt]
+----hashJoin[INNER_JOIN] hashCondition=((fkt1.fk = fkt2.fk)) otherCondition=()
+------filter((fkt1.fk > 1))
+--------PhysicalOlapScan[fkt]
+------filter((fkt2.fk > 1))
+--------PhysicalOlapScan[fkt]
 
 -- !res --
 2	Alice	2
@@ -200,8 +224,9 @@ with_pk_col
 
 -- !shape --
 PhysicalResultSink
---hashAgg[LOCAL]
-----filter(( not fk IS NULL))
+--hashJoin[INNER_JOIN] hashCondition=((pkt.pk = fkt.fk)) otherCondition=()
+----PhysicalOlapScan[pkt]
+----hashAgg[LOCAL]
 ------PhysicalOlapScan[fkt]
 
 -- !res --
@@ -230,9 +255,10 @@ fk with window
 
 -- !shape --
 PhysicalResultSink
---PhysicalWindow
-----PhysicalQuickSort[LOCAL_SORT]
-------filter(( not fk IS NULL))
+--hashJoin[INNER_JOIN] hashCondition=((pkt.pk = fkt.fk)) otherCondition=()
+----PhysicalOlapScan[pkt]
+----PhysicalWindow
+------PhysicalQuickSort[LOCAL_SORT]
 --------PhysicalOlapScan[fkt]
 
 -- !res --
@@ -244,7 +270,8 @@ fk with limit
 
 -- !shape --
 PhysicalResultSink
---filter(( not fk IS NULL))
+--hashJoin[INNER_JOIN] hashCondition=((pkt.pk = fkt.fk)) otherCondition=()
+----PhysicalOlapScan[pkt]
 ----PhysicalTopN[MERGE_SORT]
 ------PhysicalTopN[LOCAL_SORT]
 --------PhysicalOlapScan[fkt]
@@ -256,8 +283,11 @@ pk with filter that same as fk
 
 -- !shape --
 PhysicalResultSink
---filter(( not fk IS NULL) and (fkt.fk = 1))
-----PhysicalOlapScan[fkt]
+--hashJoin[INNER_JOIN] hashCondition=((pkt.pk = fkt.fk)) otherCondition=()
+----filter((pkt.pk = 1))
+------PhysicalOlapScan[pkt]
+----filter((fkt.fk = 1))
+------PhysicalOlapScan[fkt]
 
 -- !res --
 1	John	1
@@ -267,8 +297,11 @@ pk with filter that included same as fk
 
 -- !shape --
 PhysicalResultSink
---filter(( not fk IS NULL) and (cast(f as DOUBLE) = 1.0) and (fkt.fk = 1))
-----PhysicalOlapScan[fkt]
+--hashJoin[INNER_JOIN] hashCondition=((pkt.pk = fkt.fk)) otherCondition=()
+----filter((pkt.pk = 1))
+------PhysicalOlapScan[pkt]
+----filter((cast(f as DOUBLE) = 1.0) and (fkt.fk = 1))
+------PhysicalOlapScan[fkt]
 
 -- !res --
 
diff --git a/regression-test/data/nereids_ssb_shape_sf100_p0/shape/q2.1.out b/regression-test/data/nereids_ssb_shape_sf100_p0/shape/q2.1.out
index 744d3efb0b0544..f16eee1b2cad84 100644
--- a/regression-test/data/nereids_ssb_shape_sf100_p0/shape/q2.1.out
+++ b/regression-test/data/nereids_ssb_shape_sf100_p0/shape/q2.1.out
@@ -4,14 +4,14 @@ PhysicalResultSink
 --PhysicalQuickSort[MERGE_SORT]
 ----PhysicalDistribute[DistributionSpecGather]
 ------PhysicalQuickSort[LOCAL_SORT]
---------PhysicalProject
-----------hashAgg[GLOBAL]
-------------PhysicalDistribute[DistributionSpecHash]
---------------hashAgg[LOCAL]
-----------------PhysicalProject
-------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_orderdate = dates.d_datekey)) otherCondition=() build RFs:RF2 d_datekey->[lo_orderdate]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_suppkey = supplier.s_suppkey)) otherCondition=() build RFs:RF1 s_suppkey->[lo_suppkey]
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashAgg[LOCAL]
+--------------PhysicalProject
+----------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_orderdate = dates.d_datekey)) otherCondition=() build RFs:RF2 d_datekey->[lo_orderdate]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_suppkey = supplier.s_suppkey)) otherCondition=() build RFs:RF1 s_suppkey->[lo_suppkey]
+----------------------PhysicalProject
 ------------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_partkey = part.p_partkey)) otherCondition=() build RFs:RF0 p_partkey->[lo_partkey]
 --------------------------PhysicalProject
 ----------------------------PhysicalOlapScan[lineorder] apply RFs: RF0 RF1 RF2
@@ -19,11 +19,11 @@ PhysicalResultSink
 ----------------------------PhysicalProject
 ------------------------------filter((part.p_category = 'MFGR#12'))
 --------------------------------PhysicalOlapScan[part]
-------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------PhysicalProject
-----------------------------filter((supplier.s_region = 'AMERICA'))
-------------------------------PhysicalOlapScan[supplier]
---------------------PhysicalDistribute[DistributionSpecReplicated]
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[dates]
+----------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------PhysicalProject
+--------------------------filter((supplier.s_region = 'AMERICA'))
+----------------------------PhysicalOlapScan[supplier]
+------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[dates]
 
diff --git a/regression-test/data/nereids_ssb_shape_sf100_p0/shape/q2.2.out b/regression-test/data/nereids_ssb_shape_sf100_p0/shape/q2.2.out
index d3b4e0af1ac768..9bf2e90dbbfbd5 100644
--- a/regression-test/data/nereids_ssb_shape_sf100_p0/shape/q2.2.out
+++ b/regression-test/data/nereids_ssb_shape_sf100_p0/shape/q2.2.out
@@ -4,14 +4,14 @@ PhysicalResultSink
 --PhysicalQuickSort[MERGE_SORT]
 ----PhysicalDistribute[DistributionSpecGather]
 ------PhysicalQuickSort[LOCAL_SORT]
---------PhysicalProject
-----------hashAgg[GLOBAL]
-------------PhysicalDistribute[DistributionSpecHash]
---------------hashAgg[LOCAL]
-----------------PhysicalProject
-------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_orderdate = dates.d_datekey)) otherCondition=() build RFs:RF2 d_datekey->[lo_orderdate]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_suppkey = supplier.s_suppkey)) otherCondition=() build RFs:RF1 s_suppkey->[lo_suppkey]
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashAgg[LOCAL]
+--------------PhysicalProject
+----------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_orderdate = dates.d_datekey)) otherCondition=() build RFs:RF2 d_datekey->[lo_orderdate]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_suppkey = supplier.s_suppkey)) otherCondition=() build RFs:RF1 s_suppkey->[lo_suppkey]
+----------------------PhysicalProject
 ------------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_partkey = part.p_partkey)) otherCondition=() build RFs:RF0 p_partkey->[lo_partkey]
 --------------------------PhysicalProject
 ----------------------------PhysicalOlapScan[lineorder] apply RFs: RF0 RF1 RF2
@@ -19,11 +19,11 @@ PhysicalResultSink
 ----------------------------PhysicalProject
 ------------------------------filter((part.p_brand <= 'MFGR#2228') and (part.p_brand >= 'MFGR#2221'))
 --------------------------------PhysicalOlapScan[part]
-------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------PhysicalProject
-----------------------------filter((supplier.s_region = 'ASIA'))
-------------------------------PhysicalOlapScan[supplier]
---------------------PhysicalDistribute[DistributionSpecReplicated]
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[dates]
+----------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------PhysicalProject
+--------------------------filter((supplier.s_region = 'ASIA'))
+----------------------------PhysicalOlapScan[supplier]
+------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[dates]
 
diff --git a/regression-test/data/nereids_ssb_shape_sf100_p0/shape/q2.3.out b/regression-test/data/nereids_ssb_shape_sf100_p0/shape/q2.3.out
index 9bf90ed84c24e7..26aea370224329 100644
--- a/regression-test/data/nereids_ssb_shape_sf100_p0/shape/q2.3.out
+++ b/regression-test/data/nereids_ssb_shape_sf100_p0/shape/q2.3.out
@@ -4,14 +4,14 @@ PhysicalResultSink
 --PhysicalQuickSort[MERGE_SORT]
 ----PhysicalDistribute[DistributionSpecGather]
 ------PhysicalQuickSort[LOCAL_SORT]
---------PhysicalProject
-----------hashAgg[GLOBAL]
-------------PhysicalDistribute[DistributionSpecHash]
---------------hashAgg[LOCAL]
-----------------PhysicalProject
-------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_orderdate = dates.d_datekey)) otherCondition=() build RFs:RF2 d_datekey->[lo_orderdate]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_suppkey = supplier.s_suppkey)) otherCondition=() build RFs:RF1 s_suppkey->[lo_suppkey]
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashAgg[LOCAL]
+--------------PhysicalProject
+----------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_orderdate = dates.d_datekey)) otherCondition=() build RFs:RF2 d_datekey->[lo_orderdate]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_suppkey = supplier.s_suppkey)) otherCondition=() build RFs:RF1 s_suppkey->[lo_suppkey]
+----------------------PhysicalProject
 ------------------------hashJoin[INNER_JOIN] hashCondition=((lineorder.lo_partkey = part.p_partkey)) otherCondition=() build RFs:RF0 p_partkey->[lo_partkey]
 --------------------------PhysicalProject
 ----------------------------PhysicalOlapScan[lineorder] apply RFs: RF0 RF1 RF2
@@ -19,11 +19,11 @@ PhysicalResultSink
 ----------------------------PhysicalProject
 ------------------------------filter((part.p_brand = 'MFGR#2239'))
 --------------------------------PhysicalOlapScan[part]
-------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------PhysicalProject
-----------------------------filter((supplier.s_region = 'EUROPE'))
-------------------------------PhysicalOlapScan[supplier]
---------------------PhysicalDistribute[DistributionSpecReplicated]
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[dates]
+----------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------PhysicalProject
+--------------------------filter((supplier.s_region = 'EUROPE'))
+----------------------------PhysicalOlapScan[supplier]
+------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[dates]
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query95.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query95.out
index 476a65baed6102..1fd11f4156cce2 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query95.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/bs_downgrade_shape/query95.out
@@ -24,16 +24,14 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
 ------------------------PhysicalProject
 --------------------------hashJoin[INNER_JOIN] hashCondition=((web_returns.wr_order_number = ws_wh.ws_order_number)) otherCondition=() build RFs:RF5 wr_order_number->[ws_order_number]
 ----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5 RF6
+------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF5 RF6
 ----------------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------------PhysicalProject
 --------------------------------PhysicalOlapScan[web_returns] apply RFs: RF6
 ----------------------PhysicalProject
 ------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws_wh.ws_order_number)) otherCondition=() build RFs:RF7 ws_order_number->[ws_order_number,ws_order_number]
 --------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
+----------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
 --------------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF3 web_site_sk->[ws_web_site_sk]
 ------------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_ship_date_sk]
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query38.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query38.out
index a15b4f7724502e..2a4fdf3ee67e3b 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query38.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query38.out
@@ -8,58 +8,55 @@ PhysicalResultSink
 ----------hashAgg[LOCAL]
 ------------PhysicalProject
 --------------PhysicalIntersect
-----------------PhysicalProject
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute[DistributionSpecHash]
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ss_customer_sk]
-----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ss_customer_sk]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------PhysicalProject
+------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+--------------------------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
-----------------------------------PhysicalDistribute[DistributionSpecReplicated]
-------------------------------------PhysicalProject
---------------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189))
-----------------------------------------PhysicalOlapScan[date_dim]
-----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[customer]
-----------------PhysicalProject
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute[DistributionSpecHash]
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk]
-----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
+------------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189))
+--------------------------------------PhysicalOlapScan[date_dim]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[customer]
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------PhysicalProject
+------------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
+--------------------------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
-----------------------------------PhysicalDistribute[DistributionSpecReplicated]
-------------------------------------PhysicalProject
---------------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189))
-----------------------------------------PhysicalOlapScan[date_dim]
-----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[customer]
-----------------PhysicalProject
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute[DistributionSpecHash]
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ws_bill_customer_sk]
-----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk]
+------------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189))
+--------------------------------------PhysicalOlapScan[date_dim]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[customer]
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ws_bill_customer_sk]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------PhysicalProject
+------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5
+--------------------------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5
-----------------------------------PhysicalDistribute[DistributionSpecReplicated]
-------------------------------------PhysicalProject
---------------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189))
-----------------------------------------PhysicalOlapScan[date_dim]
-----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[customer]
+------------------------------------filter((date_dim.d_month_seq <= 1200) and (date_dim.d_month_seq >= 1189))
+--------------------------------------PhysicalOlapScan[date_dim]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[customer]
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query87.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query87.out
index bb52cda65884d2..20ece13139de00 100644
--- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query87.out
+++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query87.out
@@ -6,58 +6,55 @@ PhysicalResultSink
 ------hashAgg[LOCAL]
 --------PhysicalProject
 ----------PhysicalExcept
-------------PhysicalProject
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------hashAgg[LOCAL]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ss_customer_sk]
-------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------hashAgg[LOCAL]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ss_customer_sk]
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+----------------------------PhysicalDistribute[DistributionSpecReplicated]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
-------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------PhysicalProject
-----------------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202))
-------------------------------------PhysicalOlapScan[date_dim]
-------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[customer]
-------------PhysicalProject
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------hashAgg[LOCAL]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk]
-------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
+--------------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202))
+----------------------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------PhysicalProject
+--------------------------PhysicalOlapScan[customer]
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------hashAgg[LOCAL]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk]
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
+----------------------------PhysicalDistribute[DistributionSpecReplicated]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
-------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------PhysicalProject
-----------------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202))
-------------------------------------PhysicalOlapScan[date_dim]
-------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[customer]
-------------PhysicalProject
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------hashAgg[LOCAL]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ws_bill_customer_sk]
-------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk]
+--------------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202))
+----------------------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------PhysicalProject
+--------------------------PhysicalOlapScan[customer]
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------hashAgg[LOCAL]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ws_bill_customer_sk]
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5
+----------------------------PhysicalDistribute[DistributionSpecReplicated]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5
-------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------PhysicalProject
-----------------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202))
-------------------------------------PhysicalOlapScan[date_dim]
-------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[customer]
+--------------------------------filter((date_dim.d_month_seq <= 1213) and (date_dim.d_month_seq >= 1202))
+----------------------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------PhysicalProject
+--------------------------PhysicalOlapScan[customer]
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query38.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query38.out
index 42730bedeba877..2269a3cb1eb612 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query38.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query38.out
@@ -8,55 +8,52 @@ PhysicalResultSink
 ----------hashAgg[LOCAL]
 ------------PhysicalProject
 --------------PhysicalIntersect
-----------------PhysicalProject
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute[DistributionSpecHash]
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
-----------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=()
-------------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1
-------------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[customer]
-----------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
+--------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=()
+----------------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------------PhysicalProject
---------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
-----------------------------------PhysicalOlapScan[date_dim]
-----------------PhysicalProject
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute[DistributionSpecHash]
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk]
-----------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=()
-------------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3
-------------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[customer]
-----------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1
+----------------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------------PhysicalProject
---------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
-----------------------------------PhysicalOlapScan[date_dim]
-----------------PhysicalProject
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute[DistributionSpecHash]
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk]
-----------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=()
-------------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF5
-------------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[customer]
-----------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------PhysicalOlapScan[customer]
+--------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------PhysicalProject
+------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
+--------------------------------PhysicalOlapScan[date_dim]
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk]
+--------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=()
+----------------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------------PhysicalProject
---------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
-----------------------------------PhysicalOlapScan[date_dim]
+--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[customer]
+--------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------PhysicalProject
+------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
+--------------------------------PhysicalOlapScan[date_dim]
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk]
+--------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=()
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF5
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[customer]
+--------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------PhysicalProject
+------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
+--------------------------------PhysicalOlapScan[date_dim]
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query87.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query87.out
index 766ce582ad7067..22e2539e9544dc 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query87.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query87.out
@@ -6,55 +6,52 @@ PhysicalResultSink
 ------hashAgg[LOCAL]
 --------PhysicalProject
 ----------PhysicalExcept
-------------PhysicalProject
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------hashAgg[LOCAL]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
-------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=()
---------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1
---------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[customer]
-------------------------PhysicalDistribute[DistributionSpecReplicated]
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------hashAgg[LOCAL]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
+----------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=()
+------------------------PhysicalDistribute[DistributionSpecHash]
 --------------------------PhysicalProject
-----------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
-------------------------------PhysicalOlapScan[date_dim]
-------------PhysicalProject
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------hashAgg[LOCAL]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk]
-------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=()
---------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3
---------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[customer]
-------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------PhysicalOlapScan[store_sales] apply RFs: RF1
+------------------------PhysicalDistribute[DistributionSpecHash]
 --------------------------PhysicalProject
-----------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
-------------------------------PhysicalOlapScan[date_dim]
-------------PhysicalProject
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------hashAgg[LOCAL]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk]
-------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=()
---------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[web_sales] apply RFs: RF5
---------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[customer]
-------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------PhysicalOlapScan[customer]
+----------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------PhysicalProject
+--------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
+----------------------------PhysicalOlapScan[date_dim]
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------hashAgg[LOCAL]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk]
+----------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=()
+------------------------PhysicalDistribute[DistributionSpecHash]
 --------------------------PhysicalProject
-----------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
-------------------------------PhysicalOlapScan[date_dim]
+----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF3
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[customer]
+----------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------PhysicalProject
+--------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
+----------------------------PhysicalOlapScan[date_dim]
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------hashAgg[LOCAL]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk]
+----------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=()
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[web_sales] apply RFs: RF5
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[customer]
+----------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------PhysicalProject
+--------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
+----------------------------PhysicalOlapScan[date_dim]
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query38.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query38.out
index eff7a693b70dbd..b702cdbbfaed45 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query38.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query38.out
@@ -8,55 +8,52 @@ PhysicalResultSink
 ----------hashAgg[LOCAL]
 ------------PhysicalProject
 --------------PhysicalIntersect
-----------------PhysicalProject
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute[DistributionSpecHash]
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
-----------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF0 c_customer_sk->[ss_customer_sk]
-------------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
-------------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[customer]
-----------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
+--------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF0 c_customer_sk->[ss_customer_sk]
+----------------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------------PhysicalProject
---------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
-----------------------------------PhysicalOlapScan[date_dim]
-----------------PhysicalProject
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute[DistributionSpecHash]
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk]
-----------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[cs_bill_customer_sk]
-------------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
-------------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[customer]
-----------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+----------------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------------PhysicalProject
---------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
-----------------------------------PhysicalOlapScan[date_dim]
-----------------PhysicalProject
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute[DistributionSpecHash]
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk]
-----------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 c_customer_sk->[ws_bill_customer_sk]
-------------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5
-------------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[customer]
-----------------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------------------PhysicalOlapScan[customer]
+--------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------PhysicalProject
+------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
+--------------------------------PhysicalOlapScan[date_dim]
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk]
+--------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[cs_bill_customer_sk]
+----------------------------PhysicalDistribute[DistributionSpecHash]
 ------------------------------PhysicalProject
---------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
-----------------------------------PhysicalOlapScan[date_dim]
+--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[customer]
+--------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------PhysicalProject
+------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
+--------------------------------PhysicalOlapScan[date_dim]
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk]
+--------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 c_customer_sk->[ws_bill_customer_sk]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[customer]
+--------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------PhysicalProject
+------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
+--------------------------------PhysicalOlapScan[date_dim]
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query87.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query87.out
index e0672ae2c43052..6c02127e8bb051 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query87.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query87.out
@@ -6,55 +6,52 @@ PhysicalResultSink
 ------hashAgg[LOCAL]
 --------PhysicalProject
 ----------PhysicalExcept
-------------PhysicalProject
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------hashAgg[LOCAL]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
-------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF0 c_customer_sk->[ss_customer_sk]
---------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
---------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[customer]
-------------------------PhysicalDistribute[DistributionSpecReplicated]
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------hashAgg[LOCAL]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
+----------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF0 c_customer_sk->[ss_customer_sk]
+------------------------PhysicalDistribute[DistributionSpecHash]
 --------------------------PhysicalProject
-----------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
-------------------------------PhysicalOlapScan[date_dim]
-------------PhysicalProject
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------hashAgg[LOCAL]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk]
-------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[cs_bill_customer_sk]
---------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
---------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[customer]
-------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+------------------------PhysicalDistribute[DistributionSpecHash]
 --------------------------PhysicalProject
-----------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
-------------------------------PhysicalOlapScan[date_dim]
-------------PhysicalProject
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------hashAgg[LOCAL]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk]
-------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 c_customer_sk->[ws_bill_customer_sk]
---------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5
---------------------------PhysicalDistribute[DistributionSpecHash]
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[customer]
-------------------------PhysicalDistribute[DistributionSpecReplicated]
+----------------------------PhysicalOlapScan[customer]
+----------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------PhysicalProject
+--------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
+----------------------------PhysicalOlapScan[date_dim]
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------hashAgg[LOCAL]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[cs_sold_date_sk]
+----------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[cs_bill_customer_sk]
+------------------------PhysicalDistribute[DistributionSpecHash]
 --------------------------PhysicalProject
-----------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
-------------------------------PhysicalOlapScan[date_dim]
+----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[customer]
+----------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------PhysicalProject
+--------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
+----------------------------PhysicalOlapScan[date_dim]
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------hashAgg[LOCAL]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[ws_sold_date_sk]
+----------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF4 c_customer_sk->[ws_bill_customer_sk]
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5
+------------------------PhysicalDistribute[DistributionSpecHash]
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[customer]
+----------------------PhysicalDistribute[DistributionSpecReplicated]
+------------------------PhysicalProject
+--------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
+----------------------------PhysicalOlapScan[date_dim]
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query38.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query38.out
index 2b490866508ee7..b3a68601f825c4 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query38.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query38.out
@@ -8,58 +8,55 @@ PhysicalResultSink
 ----------hashAgg[LOCAL]
 ------------PhysicalProject
 --------------PhysicalIntersect
-----------------PhysicalProject
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute[DistributionSpecHash]
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=()
-----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=()
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------PhysicalProject
+------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0
+--------------------------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0
-----------------------------------PhysicalDistribute[DistributionSpecReplicated]
-------------------------------------PhysicalProject
---------------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
-----------------------------------------PhysicalOlapScan[date_dim]
-----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[customer]
-----------------PhysicalProject
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute[DistributionSpecHash]
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=()
-----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
+------------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
+--------------------------------------PhysicalOlapScan[date_dim]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[customer]
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=()
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------PhysicalProject
+------------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2
+--------------------------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2
-----------------------------------PhysicalDistribute[DistributionSpecReplicated]
-------------------------------------PhysicalProject
---------------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
-----------------------------------------PhysicalOlapScan[date_dim]
-----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[customer]
-----------------PhysicalProject
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute[DistributionSpecHash]
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=()
-----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk]
+------------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
+--------------------------------------PhysicalOlapScan[date_dim]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[customer]
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=()
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------PhysicalProject
+------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4
+--------------------------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4
-----------------------------------PhysicalDistribute[DistributionSpecReplicated]
-------------------------------------PhysicalProject
---------------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
-----------------------------------------PhysicalOlapScan[date_dim]
-----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[customer]
+------------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
+--------------------------------------PhysicalOlapScan[date_dim]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[customer]
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query87.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query87.out
index da9a5d873be49f..2a1cbdc26556b2 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query87.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query87.out
@@ -6,58 +6,55 @@ PhysicalResultSink
 ------hashAgg[LOCAL]
 --------PhysicalProject
 ----------PhysicalExcept
-------------PhysicalProject
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------hashAgg[LOCAL]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=()
-------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------hashAgg[LOCAL]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=()
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0
+----------------------------PhysicalDistribute[DistributionSpecReplicated]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0
-------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------PhysicalProject
-----------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
-------------------------------------PhysicalOlapScan[date_dim]
-------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[customer]
-------------PhysicalProject
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------hashAgg[LOCAL]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=()
-------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
+--------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
+----------------------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------PhysicalProject
+--------------------------PhysicalOlapScan[customer]
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------hashAgg[LOCAL]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=()
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2
+----------------------------PhysicalDistribute[DistributionSpecReplicated]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2
-------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------PhysicalProject
-----------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
-------------------------------------PhysicalOlapScan[date_dim]
-------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[customer]
-------------PhysicalProject
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------hashAgg[LOCAL]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=()
-------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk]
+--------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
+----------------------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------PhysicalProject
+--------------------------PhysicalOlapScan[customer]
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------hashAgg[LOCAL]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=()
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4
+----------------------------PhysicalDistribute[DistributionSpecReplicated]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4
-------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------PhysicalProject
-----------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
-------------------------------------PhysicalOlapScan[date_dim]
-------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[customer]
+--------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
+----------------------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------PhysicalProject
+--------------------------PhysicalOlapScan[customer]
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query38.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query38.out
index 79bb99dd567a3e..addc5e88e712e9 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query38.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query38.out
@@ -8,58 +8,55 @@ PhysicalResultSink
 ----------hashAgg[LOCAL]
 ------------PhysicalProject
 --------------PhysicalIntersect
-----------------PhysicalProject
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute[DistributionSpecHash]
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ss_customer_sk]
-----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ss_customer_sk]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------PhysicalProject
+------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+--------------------------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
-----------------------------------PhysicalDistribute[DistributionSpecReplicated]
-------------------------------------PhysicalProject
---------------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
-----------------------------------------PhysicalOlapScan[date_dim]
-----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[customer]
-----------------PhysicalProject
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute[DistributionSpecHash]
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk]
-----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
+------------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
+--------------------------------------PhysicalOlapScan[date_dim]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[customer]
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------PhysicalProject
+------------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
+--------------------------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
-----------------------------------PhysicalDistribute[DistributionSpecReplicated]
-------------------------------------PhysicalProject
---------------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
-----------------------------------------PhysicalOlapScan[date_dim]
-----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[customer]
-----------------PhysicalProject
-------------------hashAgg[GLOBAL]
---------------------PhysicalDistribute[DistributionSpecHash]
-----------------------hashAgg[LOCAL]
-------------------------PhysicalProject
---------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ws_bill_customer_sk]
-----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk]
+------------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
+--------------------------------------PhysicalOlapScan[date_dim]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[customer]
+----------------hashAgg[GLOBAL]
+------------------PhysicalDistribute[DistributionSpecHash]
+--------------------hashAgg[LOCAL]
+----------------------PhysicalProject
+------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ws_bill_customer_sk]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------PhysicalProject
+------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk]
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5
+--------------------------------PhysicalDistribute[DistributionSpecReplicated]
 ----------------------------------PhysicalProject
-------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5
-----------------------------------PhysicalDistribute[DistributionSpecReplicated]
-------------------------------------PhysicalProject
---------------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
-----------------------------------------PhysicalOlapScan[date_dim]
-----------------------------PhysicalDistribute[DistributionSpecHash]
-------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[customer]
+------------------------------------filter((date_dim.d_month_seq <= 1194) and (date_dim.d_month_seq >= 1183))
+--------------------------------------PhysicalOlapScan[date_dim]
+--------------------------PhysicalDistribute[DistributionSpecHash]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[customer]
 
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query87.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query87.out
index c0210a316c8d05..abf84ad40ba111 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query87.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query87.out
@@ -6,58 +6,55 @@ PhysicalResultSink
 ------hashAgg[LOCAL]
 --------PhysicalProject
 ----------PhysicalExcept
-------------PhysicalProject
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------hashAgg[LOCAL]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ss_customer_sk]
-------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------hashAgg[LOCAL]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF1 c_customer_sk->[ss_customer_sk]
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
+----------------------------PhysicalDistribute[DistributionSpecReplicated]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1
-------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------PhysicalProject
-----------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
-------------------------------------PhysicalOlapScan[date_dim]
-------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[customer]
-------------PhysicalProject
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------hashAgg[LOCAL]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk]
-------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
+--------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
+----------------------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------PhysicalProject
+--------------------------PhysicalOlapScan[customer]
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------hashAgg[LOCAL]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[cs_bill_customer_sk]
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
+----------------------------PhysicalDistribute[DistributionSpecReplicated]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3
-------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------PhysicalProject
-----------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
-------------------------------------PhysicalOlapScan[date_dim]
-------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[customer]
-------------PhysicalProject
---------------hashAgg[GLOBAL]
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------hashAgg[LOCAL]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ws_bill_customer_sk]
-------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk]
+--------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
+----------------------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------PhysicalProject
+--------------------------PhysicalOlapScan[customer]
+------------hashAgg[GLOBAL]
+--------------PhysicalDistribute[DistributionSpecHash]
+----------------hashAgg[LOCAL]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_bill_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ws_bill_customer_sk]
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF4 d_date_sk->[ws_sold_date_sk]
+----------------------------PhysicalProject
+------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5
+----------------------------PhysicalDistribute[DistributionSpecReplicated]
 ------------------------------PhysicalProject
---------------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 RF5
-------------------------------PhysicalDistribute[DistributionSpecReplicated]
---------------------------------PhysicalProject
-----------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
-------------------------------------PhysicalOlapScan[date_dim]
-------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalProject
-----------------------------PhysicalOlapScan[customer]
+--------------------------------filter((date_dim.d_month_seq <= 1195) and (date_dim.d_month_seq >= 1184))
+----------------------------------PhysicalOlapScan[date_dim]
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------PhysicalProject
+--------------------------PhysicalOlapScan[customer]
 
diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q10.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q10.out
index dbdd14a80741c0..d9df0d1a8b675e 100644
--- a/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q10.out
+++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q10.out
@@ -4,28 +4,27 @@ PhysicalResultSink
 --PhysicalTopN[MERGE_SORT]
 ----PhysicalDistribute[DistributionSpecGather]
 ------PhysicalTopN[LOCAL_SORT]
---------PhysicalProject
-----------hashAgg[GLOBAL]
-------------PhysicalDistribute[DistributionSpecHash]
---------------hashAgg[LOCAL]
-----------------PhysicalProject
-------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_nationkey = nation.n_nationkey)) otherCondition=()
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=() build RFs:RF1 o_orderkey->[l_orderkey]
-------------------------PhysicalProject
---------------------------filter((lineitem.l_returnflag = 'R'))
-----------------------------PhysicalOlapScan[lineitem] apply RFs: RF1
-------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=()
-------------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------PhysicalProject
-----------------------------------filter((orders.o_orderdate < '1994-01-01') and (orders.o_orderdate >= '1993-10-01'))
-------------------------------------PhysicalOlapScan[orders]
-------------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[customer]
---------------------PhysicalDistribute[DistributionSpecReplicated]
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashAgg[LOCAL]
+--------------PhysicalProject
+----------------hashJoin[INNER_JOIN] hashCondition=((customer.c_nationkey = nation.n_nationkey)) otherCondition=()
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=() build RFs:RF1 o_orderkey->[l_orderkey]
 ----------------------PhysicalProject
-------------------------PhysicalOlapScan[nation]
+------------------------filter((lineitem.l_returnflag = 'R'))
+--------------------------PhysicalOlapScan[lineitem] apply RFs: RF1
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=()
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------PhysicalProject
+--------------------------------filter((orders.o_orderdate < '1994-01-01') and (orders.o_orderdate >= '1993-10-01'))
+----------------------------------PhysicalOlapScan[orders]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[customer]
+------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[nation]
 
diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q3.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q3.out
index 829d5394c83523..79be870fc91a8b 100644
--- a/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q3.out
+++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q3.out
@@ -4,22 +4,21 @@ PhysicalResultSink
 --PhysicalTopN[MERGE_SORT]
 ----PhysicalDistribute[DistributionSpecGather]
 ------PhysicalTopN[LOCAL_SORT]
---------PhysicalProject
-----------hashAgg[LOCAL]
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=() build RFs:RF1 o_orderkey->[l_orderkey]
+--------hashAgg[LOCAL]
+----------PhysicalProject
+------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=() build RFs:RF1 o_orderkey->[l_orderkey]
+--------------PhysicalProject
+----------------filter((lineitem.l_shipdate > '1995-03-15'))
+------------------PhysicalOlapScan[lineitem] apply RFs: RF1
+--------------PhysicalDistribute[DistributionSpecHash]
 ----------------PhysicalProject
-------------------filter((lineitem.l_shipdate > '1995-03-15'))
---------------------PhysicalOlapScan[lineitem] apply RFs: RF1
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=() build RFs:RF0 c_custkey->[o_custkey]
-----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------filter((orders.o_orderdate < '1995-03-15'))
-----------------------------PhysicalOlapScan[orders] apply RFs: RF0
-----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------filter((customer.c_mktsegment = 'BUILDING'))
-----------------------------PhysicalOlapScan[customer]
+------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=() build RFs:RF0 c_custkey->[o_custkey]
+--------------------PhysicalDistribute[DistributionSpecHash]
+----------------------PhysicalProject
+------------------------filter((orders.o_orderdate < '1995-03-15'))
+--------------------------PhysicalOlapScan[orders] apply RFs: RF0
+--------------------PhysicalDistribute[DistributionSpecHash]
+----------------------PhysicalProject
+------------------------filter((customer.c_mktsegment = 'BUILDING'))
+--------------------------PhysicalOlapScan[customer]
 
diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/rf_prune/q10.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/rf_prune/q10.out
index 29aa25e1bf2755..5b2e2d9f2bbf15 100644
--- a/regression-test/data/nereids_tpch_shape_sf1000_p0/rf_prune/q10.out
+++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/rf_prune/q10.out
@@ -4,16 +4,16 @@ PhysicalResultSink
 --PhysicalTopN[MERGE_SORT]
 ----PhysicalDistribute[DistributionSpecGather]
 ------PhysicalTopN[LOCAL_SORT]
---------PhysicalProject
-----------hashAgg[GLOBAL]
-------------PhysicalDistribute[DistributionSpecHash]
---------------hashAgg[LOCAL]
-----------------PhysicalProject
-------------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=() build RFs:RF2 o_orderkey->[l_orderkey]
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashAgg[LOCAL]
+--------------PhysicalProject
+----------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=() build RFs:RF2 o_orderkey->[l_orderkey]
+------------------PhysicalProject
+--------------------filter((lineitem.l_returnflag = 'R'))
+----------------------PhysicalOlapScan[lineitem] apply RFs: RF2
+------------------PhysicalDistribute[DistributionSpecHash]
 --------------------PhysicalProject
-----------------------filter((lineitem.l_returnflag = 'R'))
-------------------------PhysicalOlapScan[lineitem] apply RFs: RF2
---------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_nationkey = nation.n_nationkey)) otherCondition=()
 ------------------------PhysicalProject
 --------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=() build RFs:RF0 o_custkey->[c_custkey]
diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/rf_prune/q3.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/rf_prune/q3.out
index 829d5394c83523..79be870fc91a8b 100644
--- a/regression-test/data/nereids_tpch_shape_sf1000_p0/rf_prune/q3.out
+++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/rf_prune/q3.out
@@ -4,22 +4,21 @@ PhysicalResultSink
 --PhysicalTopN[MERGE_SORT]
 ----PhysicalDistribute[DistributionSpecGather]
 ------PhysicalTopN[LOCAL_SORT]
---------PhysicalProject
-----------hashAgg[LOCAL]
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=() build RFs:RF1 o_orderkey->[l_orderkey]
+--------hashAgg[LOCAL]
+----------PhysicalProject
+------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=() build RFs:RF1 o_orderkey->[l_orderkey]
+--------------PhysicalProject
+----------------filter((lineitem.l_shipdate > '1995-03-15'))
+------------------PhysicalOlapScan[lineitem] apply RFs: RF1
+--------------PhysicalDistribute[DistributionSpecHash]
 ----------------PhysicalProject
-------------------filter((lineitem.l_shipdate > '1995-03-15'))
---------------------PhysicalOlapScan[lineitem] apply RFs: RF1
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=() build RFs:RF0 c_custkey->[o_custkey]
-----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------filter((orders.o_orderdate < '1995-03-15'))
-----------------------------PhysicalOlapScan[orders] apply RFs: RF0
-----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------filter((customer.c_mktsegment = 'BUILDING'))
-----------------------------PhysicalOlapScan[customer]
+------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=() build RFs:RF0 c_custkey->[o_custkey]
+--------------------PhysicalDistribute[DistributionSpecHash]
+----------------------PhysicalProject
+------------------------filter((orders.o_orderdate < '1995-03-15'))
+--------------------------PhysicalOlapScan[orders] apply RFs: RF0
+--------------------PhysicalDistribute[DistributionSpecHash]
+----------------------PhysicalProject
+------------------------filter((customer.c_mktsegment = 'BUILDING'))
+--------------------------PhysicalOlapScan[customer]
 
diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q10.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q10.out
index 95c3719f4625f4..dbf7d083c3f3e5 100644
--- a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q10.out
+++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q10.out
@@ -4,16 +4,16 @@ PhysicalResultSink
 --PhysicalTopN[MERGE_SORT]
 ----PhysicalDistribute[DistributionSpecGather]
 ------PhysicalTopN[LOCAL_SORT]
---------PhysicalProject
-----------hashAgg[GLOBAL]
-------------PhysicalDistribute[DistributionSpecHash]
---------------hashAgg[LOCAL]
-----------------PhysicalProject
-------------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=() build RFs:RF2 o_orderkey->[l_orderkey]
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashAgg[LOCAL]
+--------------PhysicalProject
+----------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=() build RFs:RF2 o_orderkey->[l_orderkey]
+------------------PhysicalProject
+--------------------filter((lineitem.l_returnflag = 'R'))
+----------------------PhysicalOlapScan[lineitem] apply RFs: RF2
+------------------PhysicalDistribute[DistributionSpecHash]
 --------------------PhysicalProject
-----------------------filter((lineitem.l_returnflag = 'R'))
-------------------------PhysicalOlapScan[lineitem] apply RFs: RF2
---------------------PhysicalDistribute[DistributionSpecHash]
 ----------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF1 n_nationkey->[c_nationkey]
 ------------------------PhysicalProject
 --------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=() build RFs:RF0 o_custkey->[c_custkey]
diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q3.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q3.out
index 829d5394c83523..79be870fc91a8b 100644
--- a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q3.out
+++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q3.out
@@ -4,22 +4,21 @@ PhysicalResultSink
 --PhysicalTopN[MERGE_SORT]
 ----PhysicalDistribute[DistributionSpecGather]
 ------PhysicalTopN[LOCAL_SORT]
---------PhysicalProject
-----------hashAgg[LOCAL]
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=() build RFs:RF1 o_orderkey->[l_orderkey]
+--------hashAgg[LOCAL]
+----------PhysicalProject
+------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=() build RFs:RF1 o_orderkey->[l_orderkey]
+--------------PhysicalProject
+----------------filter((lineitem.l_shipdate > '1995-03-15'))
+------------------PhysicalOlapScan[lineitem] apply RFs: RF1
+--------------PhysicalDistribute[DistributionSpecHash]
 ----------------PhysicalProject
-------------------filter((lineitem.l_shipdate > '1995-03-15'))
---------------------PhysicalOlapScan[lineitem] apply RFs: RF1
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=() build RFs:RF0 c_custkey->[o_custkey]
-----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------filter((orders.o_orderdate < '1995-03-15'))
-----------------------------PhysicalOlapScan[orders] apply RFs: RF0
-----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------filter((customer.c_mktsegment = 'BUILDING'))
-----------------------------PhysicalOlapScan[customer]
+------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=() build RFs:RF0 c_custkey->[o_custkey]
+--------------------PhysicalDistribute[DistributionSpecHash]
+----------------------PhysicalProject
+------------------------filter((orders.o_orderdate < '1995-03-15'))
+--------------------------PhysicalOlapScan[orders] apply RFs: RF0
+--------------------PhysicalDistribute[DistributionSpecHash]
+----------------------PhysicalProject
+------------------------filter((customer.c_mktsegment = 'BUILDING'))
+--------------------------PhysicalOlapScan[customer]
 
diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q10.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q10.out
index 4bdbde8a4c74d3..33f41b42ac0111 100644
--- a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q10.out
+++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q10.out
@@ -4,28 +4,27 @@ PhysicalResultSink
 --PhysicalTopN[MERGE_SORT]
 ----PhysicalDistribute[DistributionSpecGather]
 ------PhysicalTopN[LOCAL_SORT]
---------PhysicalProject
-----------hashAgg[GLOBAL]
-------------PhysicalDistribute[DistributionSpecHash]
---------------hashAgg[LOCAL]
-----------------PhysicalProject
-------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF2 n_nationkey->[c_nationkey]
---------------------PhysicalProject
-----------------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=() build RFs:RF1 o_orderkey->[l_orderkey]
-------------------------PhysicalProject
---------------------------filter((lineitem.l_returnflag = 'R'))
-----------------------------PhysicalOlapScan[lineitem] apply RFs: RF1
-------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=() build RFs:RF0 c_custkey->[o_custkey]
-------------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------PhysicalProject
-----------------------------------filter((orders.o_orderdate < '1994-01-01') and (orders.o_orderdate >= '1993-10-01'))
-------------------------------------PhysicalOlapScan[orders] apply RFs: RF0
-------------------------------PhysicalDistribute[DistributionSpecHash]
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[customer] apply RFs: RF2
---------------------PhysicalDistribute[DistributionSpecReplicated]
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute[DistributionSpecHash]
+------------hashAgg[LOCAL]
+--------------PhysicalProject
+----------------hashJoin[INNER_JOIN] hashCondition=((customer.c_nationkey = nation.n_nationkey)) otherCondition=() build RFs:RF2 n_nationkey->[c_nationkey]
+------------------PhysicalProject
+--------------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=() build RFs:RF1 o_orderkey->[l_orderkey]
 ----------------------PhysicalProject
-------------------------PhysicalOlapScan[nation]
+------------------------filter((lineitem.l_returnflag = 'R'))
+--------------------------PhysicalOlapScan[lineitem] apply RFs: RF1
+----------------------PhysicalDistribute[DistributionSpecHash]
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=() build RFs:RF0 c_custkey->[o_custkey]
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------PhysicalProject
+--------------------------------filter((orders.o_orderdate < '1994-01-01') and (orders.o_orderdate >= '1993-10-01'))
+----------------------------------PhysicalOlapScan[orders] apply RFs: RF0
+----------------------------PhysicalDistribute[DistributionSpecHash]
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[customer] apply RFs: RF2
+------------------PhysicalDistribute[DistributionSpecReplicated]
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[nation]
 
diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q3.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q3.out
index 829d5394c83523..79be870fc91a8b 100644
--- a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q3.out
+++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q3.out
@@ -4,22 +4,21 @@ PhysicalResultSink
 --PhysicalTopN[MERGE_SORT]
 ----PhysicalDistribute[DistributionSpecGather]
 ------PhysicalTopN[LOCAL_SORT]
---------PhysicalProject
-----------hashAgg[LOCAL]
-------------PhysicalProject
---------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=() build RFs:RF1 o_orderkey->[l_orderkey]
+--------hashAgg[LOCAL]
+----------PhysicalProject
+------------hashJoin[INNER_JOIN] hashCondition=((lineitem.l_orderkey = orders.o_orderkey)) otherCondition=() build RFs:RF1 o_orderkey->[l_orderkey]
+--------------PhysicalProject
+----------------filter((lineitem.l_shipdate > '1995-03-15'))
+------------------PhysicalOlapScan[lineitem] apply RFs: RF1
+--------------PhysicalDistribute[DistributionSpecHash]
 ----------------PhysicalProject
-------------------filter((lineitem.l_shipdate > '1995-03-15'))
---------------------PhysicalOlapScan[lineitem] apply RFs: RF1
-----------------PhysicalDistribute[DistributionSpecHash]
-------------------PhysicalProject
---------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=() build RFs:RF0 c_custkey->[o_custkey]
-----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------filter((orders.o_orderdate < '1995-03-15'))
-----------------------------PhysicalOlapScan[orders] apply RFs: RF0
-----------------------PhysicalDistribute[DistributionSpecHash]
-------------------------PhysicalProject
---------------------------filter((customer.c_mktsegment = 'BUILDING'))
-----------------------------PhysicalOlapScan[customer]
+------------------hashJoin[INNER_JOIN] hashCondition=((customer.c_custkey = orders.o_custkey)) otherCondition=() build RFs:RF0 c_custkey->[o_custkey]
+--------------------PhysicalDistribute[DistributionSpecHash]
+----------------------PhysicalProject
+------------------------filter((orders.o_orderdate < '1995-03-15'))
+--------------------------PhysicalOlapScan[orders] apply RFs: RF0
+--------------------PhysicalDistribute[DistributionSpecHash]
+----------------------PhysicalProject
+------------------------filter((customer.c_mktsegment = 'BUILDING'))
+--------------------------PhysicalOlapScan[customer]
 

From a092799f97ae130ec97079c54306f8e38e361a21 Mon Sep 17 00:00:00 2001
From: zhangdong <493738387@qq.com>
Date: Tue, 30 Apr 2024 16:49:46 +0800
Subject: [PATCH 155/163] [enhance](mtmv) allow add index for MTMV (#34225)

Previously, the limitation on whether operations can be performed on materialized views was to determine `opType`.

Now, a `allowOpMTMV()` method is implemented through various `clauses`.

Because some operations have the same `opType`, but some operations allow and some do not.

For example, the `opType` for both `add column` and `create index` is `SCHEMA-CHANGE`, but `add column` is not allowed and `create index` is allowed.
---
 .../org/apache/doris/alter/AlterOpType.java   |   6 -
 .../apache/doris/alter/AlterOperations.java   |   4 +-
 .../doris/analysis/AddColumnClause.java       |   5 +
 .../doris/analysis/AddColumnsClause.java      |   5 +
 .../doris/analysis/AddPartitionClause.java    |   5 +
 .../analysis/AddPartitionLikeClause.java      |   5 +
 .../doris/analysis/AddRollupClause.java       |   5 +
 .../apache/doris/analysis/AlterClause.java    |   4 +
 .../doris/analysis/AlterTableClause.java      |   2 +
 .../doris/analysis/BuildIndexClause.java      |   5 +
 .../doris/analysis/ColumnRenameClause.java    |   5 +
 .../doris/analysis/CreateIndexClause.java     |   5 +
 .../doris/analysis/DropColumnClause.java      |   5 +
 .../doris/analysis/DropIndexClause.java       |   5 +
 .../doris/analysis/DropPartitionClause.java   |   5 +
 .../DropPartitionFromIndexClause.java         |   5 +
 .../doris/analysis/DropRollupClause.java      |   5 +
 .../doris/analysis/EnableFeatureClause.java   |   5 +
 .../doris/analysis/ModifyColumnClause.java    |   5 +
 .../analysis/ModifyColumnCommentClause.java   |   5 +
 .../analysis/ModifyDistributionClause.java    |   5 +
 .../doris/analysis/ModifyEngineClause.java    |   5 +
 .../doris/analysis/ModifyPartitionClause.java |   5 +
 .../analysis/ModifyTableCommentClause.java    |   5 +
 .../analysis/ModifyTablePropertiesClause.java |   5 +
 .../doris/analysis/PartitionRenameClause.java |   5 +
 .../doris/analysis/ReorderColumnsClause.java  |   5 +
 .../analysis/ReplacePartitionClause.java      |   5 +
 .../doris/analysis/ReplaceTableClause.java    |   5 +
 .../doris/analysis/RollupRenameClause.java    |   5 +
 .../doris/analysis/TableRenameClause.java     |   5 +
 .../suites/mtmv_p0/test_build_mtmv.groovy     |  52 ----
 .../suites/mtmv_p0/test_limit_op_mtmv.groovy  | 247 ++++++++++++++++++
 33 files changed, 390 insertions(+), 60 deletions(-)
 create mode 100644 regression-test/suites/mtmv_p0/test_limit_op_mtmv.groovy

diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/AlterOpType.java b/fe/fe-core/src/main/java/org/apache/doris/alter/AlterOpType.java
index 270132c1e1798b..818ceda2ceb845 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/alter/AlterOpType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/alter/AlterOpType.java
@@ -70,10 +70,4 @@ public enum AlterOpType {
     public boolean needCheckCapacity() {
         return this == ADD_ROLLUP || this == SCHEMA_CHANGE || this == ADD_PARTITION || this == ENABLE_FEATURE;
     }
-
-    public boolean mtmvAllowOp() {
-        return this == MODIFY_TABLE_PROPERTY || this == MODIFY_DISTRIBUTION || this == MODIFY_TABLE_COMMENT
-                || this == ADD_PARTITION || this == DROP_PARTITION || this == REPLACE_PARTITION
-                || this == MODIFY_PARTITION;
-    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/AlterOperations.java b/fe/fe-core/src/main/java/org/apache/doris/alter/AlterOperations.java
index a6fefb119c87bb..f7ce1ea56f478e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/alter/AlterOperations.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/alter/AlterOperations.java
@@ -50,8 +50,8 @@ public void checkConflict(List<AlterClause> alterClauses) throws DdlException {
 
     public void checkMTMVAllow(List<AlterClause> alterClauses) throws DdlException {
         for (AlterClause alterClause : alterClauses) {
-            if (!alterClause.getOpType().mtmvAllowOp()) {
-                throw new DdlException("Alter operation " + alterClause.getOpType() + " Not allowed to MTMV");
+            if (!(alterClause.allowOpMTMV())) {
+                throw new DdlException("Not allowed to perform current operation on MTMV");
             }
         }
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AddColumnClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AddColumnClause.java
index aa8e189b104750..08fa27151dce0c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AddColumnClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AddColumnClause.java
@@ -110,6 +110,11 @@ public Map<String, String> getProperties() {
         return this.properties;
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return false;
+    }
+
     @Override
     public String toSql() {
         StringBuilder sb = new StringBuilder();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AddColumnsClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AddColumnsClause.java
index 3180331faa8c2e..765c6eb6856414 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AddColumnsClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AddColumnsClause.java
@@ -78,6 +78,11 @@ public void analyze(Analyzer analyzer) throws AnalysisException {
         }
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return false;
+    }
+
     @Override
     public String toSql() {
         StringBuilder sb = new StringBuilder();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AddPartitionClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AddPartitionClause.java
index eddac80644c344..6adea949826afd 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AddPartitionClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AddPartitionClause.java
@@ -60,6 +60,11 @@ public boolean isTempPartition() {
     public void analyze(Analyzer analyzer) throws AnalysisException {
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return false;
+    }
+
     @Override
     public String toSql() {
         StringBuilder sb = new StringBuilder();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AddPartitionLikeClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AddPartitionLikeClause.java
index a414ed7992bf60..4ae585bc5dc085 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AddPartitionLikeClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AddPartitionLikeClause.java
@@ -46,6 +46,11 @@ public AddPartitionLikeClause(String partitionName,
     public void analyze(Analyzer analyzer) throws AnalysisException {
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return false;
+    }
+
     @Override
     public String toSql() {
         StringBuilder sb = new StringBuilder();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AddRollupClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AddRollupClause.java
index 02852f9bf9ac1f..03e4c868fb65ac 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AddRollupClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AddRollupClause.java
@@ -89,6 +89,11 @@ public void analyze(Analyzer analyzer) throws AnalysisException {
         baseRollupName = Strings.emptyToNull(baseRollupName);
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return false;
+    }
+
     @Override
     public String toSql() {
         StringBuilder stringBuilder = new StringBuilder();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterClause.java
index d52f72be7d062a..d81cfec05f9304 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterClause.java
@@ -39,4 +39,8 @@ public Map<String, String> getProperties() {
     public AlterOpType getOpType() {
         return opType;
     }
+
+    public boolean allowOpMTMV() {
+        return true;
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterTableClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterTableClause.java
index ad611bb95adbcd..eed07491f7a646 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterTableClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterTableClause.java
@@ -38,4 +38,6 @@ public boolean isNeedTableStable() {
     public void setTableName(TableName tableName) {
         this.tableName = tableName;
     }
+
+    public abstract boolean allowOpMTMV();
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/BuildIndexClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/BuildIndexClause.java
index 99aee373b1e774..806cb6492b65fa 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/BuildIndexClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/BuildIndexClause.java
@@ -76,6 +76,11 @@ public void analyze(Analyzer analyzer) throws AnalysisException {
                 indexDef.getProperties(), indexDef.getComment());
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return true;
+    }
+
     @Override
     public String toSql() {
         if (alter) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnRenameClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnRenameClause.java
index 5924da555e8c9e..127be5324548db 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnRenameClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnRenameClause.java
@@ -63,6 +63,11 @@ public Map<String, String> getProperties() {
         return null;
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return false;
+    }
+
     @Override
     public String toSql() {
         return "RENAME COLUMN " + colName + " " + newColName;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateIndexClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateIndexClause.java
index 632492191c76e9..008735dac4629e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateIndexClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateIndexClause.java
@@ -76,6 +76,11 @@ public void analyze(Analyzer analyzer) throws AnalysisException {
                 indexDef.getProperties(), indexDef.getComment());
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return true;
+    }
+
     @Override
     public String toSql() {
         if (alter) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropColumnClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropColumnClause.java
index 4546a28c189216..db1190d87d6f7b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropColumnClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropColumnClause.java
@@ -65,6 +65,11 @@ public Map<String, String> getProperties() {
         return this.properties;
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return false;
+    }
+
     @Override
     public String toSql() {
         StringBuilder sb = new StringBuilder();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropIndexClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropIndexClause.java
index 45cde1838bbc35..bb85ca098865d4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropIndexClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropIndexClause.java
@@ -68,6 +68,11 @@ public void analyze(Analyzer analyzer) throws UserException {
         }
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return true;
+    }
+
     @Override
     public String toSql() {
         StringBuilder stringBuilder = new StringBuilder();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropPartitionClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropPartitionClause.java
index 59e3388f1de60d..4b3c70699c1853 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropPartitionClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropPartitionClause.java
@@ -71,6 +71,11 @@ public Map<String, String> getProperties() {
         return null;
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return false;
+    }
+
     @Override
     public String toSql() {
         StringBuilder sb = new StringBuilder();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropPartitionFromIndexClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropPartitionFromIndexClause.java
index 408c9406e80a2c..fe0cc97b254bf5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropPartitionFromIndexClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropPartitionFromIndexClause.java
@@ -77,6 +77,11 @@ public Map<String, String> getProperties() {
         return null;
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return false;
+    }
+
     @Override
     public String toSql() {
         StringBuilder sb = new StringBuilder();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropRollupClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropRollupClause.java
index 5929136d588fad..c3d41a4bf757d4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropRollupClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropRollupClause.java
@@ -48,6 +48,11 @@ public Map<String, String> getProperties() {
         return this.properties;
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return false;
+    }
+
     @Override
     public String toSql() {
         StringBuilder stringBuilder = new StringBuilder();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/EnableFeatureClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/EnableFeatureClause.java
index 8aee07316ef922..d8b38457e49f75 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/EnableFeatureClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/EnableFeatureClause.java
@@ -84,6 +84,11 @@ public void analyze(Analyzer analyzer) throws UserException {
         }
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return true;
+    }
+
     @Override
     public String toSql() {
         StringBuilder sb = new StringBuilder();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyColumnClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyColumnClause.java
index 49f5447163dbfd..c2aebb9e1deceb 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyColumnClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyColumnClause.java
@@ -96,6 +96,11 @@ public Map<String, String> getProperties() {
         return this.properties;
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return false;
+    }
+
     @Override
     public String toSql() {
         StringBuilder sb = new StringBuilder();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyColumnCommentClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyColumnCommentClause.java
index ba927fb3c3d941..8ae0ac33aca05e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyColumnCommentClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyColumnCommentClause.java
@@ -59,6 +59,11 @@ public void analyze(Analyzer analyzer) throws AnalysisException {
         }
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return false;
+    }
+
     @Override
     public String toSql() {
         StringBuilder sb = new StringBuilder();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyDistributionClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyDistributionClause.java
index 688a87b3f9a4fc..c1de80237c7597 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyDistributionClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyDistributionClause.java
@@ -40,6 +40,11 @@ public ModifyDistributionClause(DistributionDesc distributionDesc) {
     public void analyze(Analyzer analyzer) throws AnalysisException {
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return true;
+    }
+
     @Override
     public String toSql() {
         StringBuilder sb = new StringBuilder();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyEngineClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyEngineClause.java
index 0a1aec866bc580..1c6c95d062fc47 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyEngineClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyEngineClause.java
@@ -64,6 +64,11 @@ public void analyze(Analyzer analyzer) throws AnalysisException {
         }
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return false;
+    }
+
     @Override
     public String toSql() {
         StringBuilder sb = new StringBuilder();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyPartitionClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyPartitionClause.java
index 7fd48d751199ed..bf7c1619d4177c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyPartitionClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyPartitionClause.java
@@ -131,6 +131,11 @@ public boolean isNeedExpand() {
         return this.needExpand;
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return false;
+    }
+
     @Override
     public String toSql() {
         StringBuilder sb = new StringBuilder();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTableCommentClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTableCommentClause.java
index 745ec0ffccbca8..8741e2ffcade59 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTableCommentClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTableCommentClause.java
@@ -50,6 +50,11 @@ public Map<String, String> getProperties() {
         return Maps.newHashMap();
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return true;
+    }
+
     @Override
     public String toSql() {
         StringBuilder sb = new StringBuilder();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTablePropertiesClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTablePropertiesClause.java
index 29cf66ac05d96b..e0b0914634bbcf 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTablePropertiesClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTablePropertiesClause.java
@@ -318,6 +318,11 @@ public Map<String, String> getProperties() {
         return this.properties;
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return true;
+    }
+
     @Override
     public String toSql() {
         StringBuilder sb = new StringBuilder();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionRenameClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionRenameClause.java
index e3cf8390cbc52a..fc6ad24948caa0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionRenameClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionRenameClause.java
@@ -63,6 +63,11 @@ public Map<String, String> getProperties() {
         return null;
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return false;
+    }
+
     @Override
     public String toSql() {
         return "RENAME PARTITION " + partitionName + " " + newPartitionName;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ReorderColumnsClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ReorderColumnsClause.java
index 0d630720e9e5d3..13cf5f0412d853 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ReorderColumnsClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ReorderColumnsClause.java
@@ -67,6 +67,11 @@ public Map<String, String> getProperties() {
         return this.properties;
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return false;
+    }
+
     @Override
     public String toSql() {
         StringBuilder sb = new StringBuilder();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ReplacePartitionClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ReplacePartitionClause.java
index 6d7f88c089bce2..3fece452e2f46b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ReplacePartitionClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ReplacePartitionClause.java
@@ -107,6 +107,11 @@ public Map<String, String> getProperties() {
         return this.properties;
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return false;
+    }
+
     @Override
     public String toSql() {
         StringBuilder sb = new StringBuilder();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ReplaceTableClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ReplaceTableClause.java
index 20d153ce3bef59..464bbd70920c9d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ReplaceTableClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ReplaceTableClause.java
@@ -70,6 +70,11 @@ public Map<String, String> getProperties() {
         return this.properties;
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return false;
+    }
+
     @Override
     public String toSql() {
         StringBuilder sb = new StringBuilder();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/RollupRenameClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/RollupRenameClause.java
index a51dff332f0204..b3af2ec09d8381 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/RollupRenameClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/RollupRenameClause.java
@@ -63,6 +63,11 @@ public Map<String, String> getProperties() {
         return null;
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return false;
+    }
+
     @Override
     public String toSql() {
         return "RENAME ROLLUP " + rollupName + " " + newRollupName;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRenameClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRenameClause.java
index e35eff747e9336..d1551169d2ba73 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRenameClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRenameClause.java
@@ -53,6 +53,11 @@ public Map<String, String> getProperties() {
         return null;
     }
 
+    @Override
+    public boolean allowOpMTMV() {
+        return false;
+    }
+
     @Override
     public String toSql() {
         return "RENAME " + newTableName;
diff --git a/regression-test/suites/mtmv_p0/test_build_mtmv.groovy b/regression-test/suites/mtmv_p0/test_build_mtmv.groovy
index dcc26d86f827d1..226e05cf02fea2 100644
--- a/regression-test/suites/mtmv_p0/test_build_mtmv.groovy
+++ b/regression-test/suites/mtmv_p0/test_build_mtmv.groovy
@@ -439,58 +439,6 @@ suite("test_build_mtmv") {
     """
     order_qt_select "select MvProperties from mv_infos('database'='regression_test_mtmv_p0') where Name = '${mvName}'"
 
-    // use alter table
-    // not allow rename
-    try {
-        sql """
-            alter table ${mvName} rename ${mvNameRenamed}
-            """
-        Assert.fail();
-    } catch (Exception e) {
-        log.info(e.getMessage())
-    }
-
-
-    // not allow modify `grace_period`
-    try {
-        sql """
-            alter table ${mvName} set("grace_period"="3333");
-            """
-        Assert.fail();
-    } catch (Exception e) {
-        log.info(e.getMessage())
-    }
-
-    // allow modify comment
-    try {
-        sql """
-            alter table ${mvName} MODIFY COMMENT "new table comment";
-            """
-    } catch (Exception e) {
-        log.info(e.getMessage())
-        Assert.fail();
-    }
-
-    // not allow modify column
-    try {
-        sql """
-            alter table ${mvName} DROP COLUMN pv;
-            """
-        Assert.fail();
-    } catch (Exception e) {
-        log.info(e.getMessage())
-    }
-
-    // not allow replace
-    try {
-        sql """
-            alter table ${mvName} REPLACE WITH TABLE ${tableName};
-            """
-        Assert.fail();
-    } catch (Exception e) {
-        log.info(e.getMessage())
-    }
-
     // not allow use mv modify property of table
     if (!isCloudMode()) {
         try {
diff --git a/regression-test/suites/mtmv_p0/test_limit_op_mtmv.groovy b/regression-test/suites/mtmv_p0/test_limit_op_mtmv.groovy
new file mode 100644
index 00000000000000..8a3d43dd803fd5
--- /dev/null
+++ b/regression-test/suites/mtmv_p0/test_limit_op_mtmv.groovy
@@ -0,0 +1,247 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.junit.Assert;
+
+suite("test_limit_op_mtmv") {
+    def tableName = "t_test_limit_op_mtmv_user"
+    def mvName = "test_limit_op_mtmv"
+    def dbName = "regression_test_mtmv_p0"
+    sql """drop table if exists `${tableName}`"""
+    sql """drop materialized view if exists ${mvName};"""
+
+    sql """
+        CREATE TABLE `${tableName}` (
+          `user_id` LARGEINT NOT NULL COMMENT '\"用户id\"',
+          `num` SMALLINT NOT NULL COMMENT '\"数量\"',
+          `k3` DATE
+        ) ENGINE=OLAP
+        COMMENT 'OLAP'
+        PARTITION BY RANGE(`k3`)
+        (
+            FROM ("2020-01-01") TO ("2020-01-03") INTERVAL 1 DAY
+        )
+        DISTRIBUTED BY HASH(`user_id`) BUCKETS 2
+        PROPERTIES ('replication_num' = '1') ;
+        """
+    
+    sql """
+        CREATE MATERIALIZED VIEW ${mvName}
+        BUILD DEFERRED REFRESH AUTO ON MANUAL
+        partition by(`k3`)
+        DISTRIBUTED BY RANDOM BUCKETS 2
+        PROPERTIES ('replication_num' = '1')
+        AS
+        SELECT * FROM ${tableName};
+    """
+
+    // not allow add partition
+    try {
+        sql """
+            alter table ${mvName} add partition p_20200103_20200104 values less than ("2020-01-04");
+            """
+        Assert.fail();
+    } catch (Exception e) {
+        log.info(e.getMessage())
+    }
+
+    // not allow drop partition
+    try {
+        sql """
+            alter table ${mvName} drop partition p_20200102_20200103;
+            """
+        Assert.fail();
+    } catch (Exception e) {
+        log.info(e.getMessage())
+    }
+
+    // not allow modify partition
+    try {
+        sql """
+            alter table ${mvName} MODIFY PARTITION (*) SET("storage_medium"="HDD");
+            """
+        Assert.fail();
+    } catch (Exception e) {
+        log.info(e.getMessage())
+    }
+
+    // not allow replace partition
+    try {
+        sql """
+            ALTER TABLE ${mvName} REPLACE PARTITION (p_20200102_20200103) WITH TEMPORARY PARTITION (tp_20200102_20200103);
+            """
+        Assert.fail();
+    } catch (Exception e) {
+        log.info(e.getMessage())
+    }
+
+    // not allow rename
+    try {
+        sql """
+            alter table ${mvName} rename ${mvName}1
+            """
+        Assert.fail();
+    } catch (Exception e) {
+        log.info(e.getMessage())
+    }
+
+    // not allow replace table
+    try {
+        sql """
+            alter table ${mvName} REPLACE WITH TABLE tbl2
+            """
+        Assert.fail();
+    } catch (Exception e) {
+        log.info(e.getMessage())
+    }
+
+
+    // not allow modify property of mv
+    try {
+        sql """
+            alter table ${mvName} set("grace_period"="3333");
+            """
+        Assert.fail();
+    } catch (Exception e) {
+        log.info(e.getMessage())
+    }
+
+    // not allow add column
+    try {
+        sql """
+            alter table ${mvName} ADD COLUMN new_col INT DEFAULT "0" AFTER num;
+            """
+        Assert.fail();
+    } catch (Exception e) {
+        log.info(e.getMessage())
+    }
+
+    // not allow add columns
+    try {
+        sql """
+            alter table ${mvName} ADD COLUMN (new_col1 INT DEFAULT "0" ,new_col2 INT DEFAULT "0");
+            """
+        Assert.fail();
+    } catch (Exception e) {
+        log.info(e.getMessage())
+    }
+
+    // not allow drop column
+    try {
+        sql """
+            alter table ${mvName} DROP COLUMN num;
+            """
+        Assert.fail();
+    } catch (Exception e) {
+        log.info(e.getMessage())
+    }
+
+    // not allow modify column
+    try {
+        sql """
+            alter table ${mvName} modify COLUMN num BIGINT;
+            """
+        Assert.fail();
+    } catch (Exception e) {
+        log.info(e.getMessage())
+    }
+
+
+    // not allow reorder column
+    try {
+        sql """
+            alter table ${mvName} ORDER BY(num,k3,user_id);
+            """
+        Assert.fail();
+    } catch (Exception e) {
+        log.info(e.getMessage())
+    }
+
+    // not allow modify column
+    try {
+        sql """
+            alter table ${mvName} modify COLUMN num BIGINT;
+            """
+        Assert.fail();
+    } catch (Exception e) {
+        log.info(e.getMessage())
+    }
+
+    // not allow replace
+    try {
+        sql """
+            alter table ${mvName} REPLACE WITH TABLE ${tableName};
+            """
+        Assert.fail();
+    } catch (Exception e) {
+        log.info(e.getMessage())
+    }
+
+
+    // not allow add rollup
+    try {
+        sql """
+            alter table ${mvName} ADD ROLLUP example_rollup_index(num, k3);;
+            """
+        Assert.fail();
+    } catch (Exception e) {
+        log.info(e.getMessage())
+    }
+
+    // not allow drop rollup
+    try {
+        sql """
+            alter table ${mvName} drop ROLLUP example_rollup_index;
+            """
+        Assert.fail();
+    } catch (Exception e) {
+        log.info(e.getMessage())
+    }
+
+    // allow modify comment
+    try {
+        sql """
+            alter table ${mvName} MODIFY COMMENT "new table comment";
+            """
+    } catch (Exception e) {
+        log.info(e.getMessage())
+        Assert.fail();
+    }
+
+    // allow add index
+    try {
+        sql """
+            CREATE INDEX index1 ON ${mvName} (num) USING INVERTED;
+            """
+    } catch (Exception e) {
+        log.info(e.getMessage())
+        Assert.fail();
+    }
+
+    // allow drop index
+    try {
+        sql """
+            DROP INDEX index1 ON ${mvName};
+            """
+    } catch (Exception e) {
+        log.info(e.getMessage())
+        Assert.fail();
+    }
+
+    sql """drop table if exists `${tableName}`"""
+    sql """drop materialized view if exists ${mvName};"""
+}

From 0e5173aa53cd40a101ee3360729f6a43db462a67 Mon Sep 17 00:00:00 2001
From: abmdocrt <Yukang.Lian2022@gmail.com>
Date: Tue, 30 Apr 2024 19:14:58 +0800
Subject: [PATCH 156/163] [Fix](partial-update) Fix partial update case
 introduced by #33656 (#34260)

---
 .../nereids/analyzer/UnboundTableSink.java    |  6 +++-
 .../plans/commands/insert/InsertUtils.java    | 32 +++++++++++--------
 .../insert_into_table/partial_update.groovy   |  2 +-
 .../partial_update_complex.groovy             |  2 +-
 4 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundTableSink.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundTableSink.java
index 8ecd417691de02..23c58ba42fb17e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundTableSink.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundTableSink.java
@@ -48,7 +48,7 @@ public class UnboundTableSink<CHILD_TYPE extends Plan> extends UnboundLogicalSin
     private final List<String> hints;
     private final boolean temporaryPartition;
     private final List<String> partitions;
-    private final boolean isPartialUpdate;
+    private boolean isPartialUpdate;
     private final DMLCommandType dmlCommandType;
     private final boolean autoDetectPartition;
 
@@ -114,6 +114,10 @@ public boolean isPartialUpdate() {
         return isPartialUpdate;
     }
 
+    public void setPartialUpdate(boolean isPartialUpdate) {
+        this.isPartialUpdate = isPartialUpdate;
+    }
+
     @Override
     public Plan withChildren(List<Plan> children) {
         Preconditions.checkArgument(children.size() == 1, "UnboundOlapTableSink only accepts one child");
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertUtils.java
index f0e7fab736cdbc..ad974e9e7bc005 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertUtils.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertUtils.java
@@ -247,21 +247,25 @@ public static Plan normalizePlan(Plan plan, TableIf table) {
                     && ((UnboundTableSink<? extends Plan>) unboundLogicalSink).isPartialUpdate()) {
                 // check the necessary conditions for partial updates
                 OlapTable olapTable = (OlapTable) table;
+
                 if (!olapTable.getEnableUniqueKeyMergeOnWrite()) {
-                    throw new AnalysisException("Partial update is only allowed on "
-                            + "unique table with merge-on-write enabled.");
-                }
-                if (unboundLogicalSink.getDMLCommandType() == DMLCommandType.INSERT) {
-                    if (unboundLogicalSink.getColNames().isEmpty()) {
-                        throw new AnalysisException("You must explicitly specify the columns to be updated when "
-                                + "updating partial columns using the INSERT statement.");
-                    }
-                    for (Column col : olapTable.getFullSchema()) {
-                        Optional<String> insertCol = unboundLogicalSink.getColNames().stream()
-                                .filter(c -> c.equalsIgnoreCase(col.getName())).findFirst();
-                        if (col.isKey() && !insertCol.isPresent()) {
-                            throw new AnalysisException("Partial update should include all key columns, missing: "
-                                    + col.getName());
+                    // when enable_unique_key_partial_update = true,
+                    // only unique table with MOW insert with target columns can consider be a partial update,
+                    // and unique table without MOW, insert will be like a normal insert.
+                    ((UnboundTableSink<? extends Plan>) unboundLogicalSink).setPartialUpdate(false);
+                } else {
+                    if (unboundLogicalSink.getDMLCommandType() == DMLCommandType.INSERT) {
+                        if (unboundLogicalSink.getColNames().isEmpty()) {
+                            throw new AnalysisException("You must explicitly specify the columns to be updated when "
+                                    + "updating partial columns using the INSERT statement.");
+                        }
+                        for (Column col : olapTable.getFullSchema()) {
+                            Optional<String> insertCol = unboundLogicalSink.getColNames().stream()
+                                    .filter(c -> c.equalsIgnoreCase(col.getName())).findFirst();
+                            if (col.isKey() && !insertCol.isPresent()) {
+                                throw new AnalysisException("Partial update should include all key columns, missing: "
+                                        + col.getName());
+                            }
                         }
                     }
                 }
diff --git a/regression-test/suites/nereids_p0/insert_into_table/partial_update.groovy b/regression-test/suites/nereids_p0/insert_into_table/partial_update.groovy
index fd2145a71ed7b2..c734bcf1846f9f 100644
--- a/regression-test/suites/nereids_p0/insert_into_table/partial_update.groovy
+++ b/regression-test/suites/nereids_p0/insert_into_table/partial_update.groovy
@@ -56,7 +56,7 @@ suite("nereids_partial_update_native_insert_stmt", "p0") {
             qt_1 """ select * from ${tableName} order by id; """
             test {
                 sql """insert into ${tableName} values(2,400),(1,200),(4,400)"""
-                exception "Column count doesn't match value count"
+                exception "You must explicitly specify the columns to be updated when updating partial columns using the INSERT statement."
             }
             sql "set enable_unique_key_partial_update=false;"
             sql "sync;"
diff --git a/regression-test/suites/nereids_p0/insert_into_table/partial_update_complex.groovy b/regression-test/suites/nereids_p0/insert_into_table/partial_update_complex.groovy
index 00e13176e47ff6..537b812d01c154 100644
--- a/regression-test/suites/nereids_p0/insert_into_table/partial_update_complex.groovy
+++ b/regression-test/suites/nereids_p0/insert_into_table/partial_update_complex.groovy
@@ -91,7 +91,7 @@ suite("nereids_partial_update_native_insert_stmt_complex", "p0") {
                 sql """insert into ${tbName1}
                 select ${tbName2}.id, ${tbName2}.c1, ${tbName2}.c3 * 100
                 from ${tbName2} inner join ${tbName3} on ${tbName2}.id = ${tbName3}.id; """
-                exception "Column count doesn't match value count"
+                exception "You must explicitly specify the columns to be updated when updating partial columns using the INSERT statement"
             }
             sql "truncate table ${tbName1};"
             sql "truncate table ${tbName2};"

From 22c24e93caf79bcd2e7ae02c3b0db4dbe9fd2ae5 Mon Sep 17 00:00:00 2001
From: yujun <yu.jun.reach@gmail.com>
Date: Tue, 30 Apr 2024 19:27:28 +0800
Subject: [PATCH 157/163] [fix](autobucket) calc bucket num exclude today's
 partition (#34304)

---
 .../clone/DynamicPartitionScheduler.java      | 31 +++++++++----------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java
index 8ed3c8c14e294b..e2a700c92bcd1d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java
@@ -185,7 +185,8 @@ private static long getNextPartitionSize(ArrayList<Long> historyPartitionsSize)
         }
     }
 
-    private static int getBucketsNum(DynamicPartitionProperty property, OlapTable table, boolean executeFirstTime) {
+    private static int getBucketsNum(DynamicPartitionProperty property, OlapTable table,
+            String nowPartitionName, boolean executeFirstTime) {
         // if execute first time, all partitions no contain data
         if (!table.isAutoBucket() || executeFirstTime) {
             return property.getBuckets();
@@ -193,31 +194,21 @@ private static int getBucketsNum(DynamicPartitionProperty property, OlapTable ta
 
         // auto bucket
         // get all history partitions
-        List<Partition> partitions = Lists.newArrayList();
+        ArrayList<Long> partitionSizeArray = Lists.newArrayList();
         RangePartitionInfo info = (RangePartitionInfo) (table.getPartitionInfo());
         List<Map.Entry<Long, PartitionItem>> idToItems = new ArrayList<>(info.getIdToItem(false).entrySet());
         idToItems.sort(Comparator.comparing(o -> ((RangePartitionItem) o.getValue()).getItems().upperEndpoint()));
         for (Map.Entry<Long, PartitionItem> idToItem : idToItems) {
             Partition partition = table.getPartition(idToItem.getKey());
-            if (partition != null) {
-                partitions.add(partition);
-            }
-        }
-
-        // no exist history partition
-        if (partitions.size() == 0) {
-            return property.getBuckets();
-        }
-
-        ArrayList<Long> partitionSizeArray = Lists.newArrayList();
-        for (Partition partition : partitions) {
-            if (partition.getVisibleVersion() >= 2) {
+            // exclude current partition because its data isn't enough one week/day/hour.
+            if (partition != null && !partition.getName().equals(nowPartitionName)
+                    && partition.getVisibleVersion() >= 2) {
                 partitionSizeArray.add(partition.getAllDataSize(true));
             }
         }
 
         // no exist history partition data
-        if (partitionSizeArray.size() == 0) {
+        if (partitionSizeArray.isEmpty()) {
             return property.getBuckets();
         }
 
@@ -250,6 +241,12 @@ private ArrayList<AddPartitionClause> getAddPartitionClause(Database db, OlapTab
         int hotPartitionNum = dynamicPartitionProperty.getHotPartitionNum();
         String storagePolicyName = dynamicPartitionProperty.getStoragePolicy();
 
+        String nowPartitionPrevBorder = DynamicPartitionUtil.getPartitionRangeString(
+                dynamicPartitionProperty, now, 0, partitionFormat);
+        String nowPartitionName = dynamicPartitionProperty.getPrefix()
+                + DynamicPartitionUtil.getFormattedPartitionName(dynamicPartitionProperty.getTimeZone(),
+                nowPartitionPrevBorder, dynamicPartitionProperty.getTimeUnit());
+
         for (; idx <= dynamicPartitionProperty.getEnd(); idx++) {
             String prevBorder = DynamicPartitionUtil.getPartitionRangeString(
                     dynamicPartitionProperty, now, idx, partitionFormat);
@@ -323,7 +320,7 @@ private ArrayList<AddPartitionClause> getAddPartitionClause(Database db, OlapTab
 
             DistributionDesc distributionDesc = null;
             DistributionInfo distributionInfo = olapTable.getDefaultDistributionInfo();
-            int bucketsNum = getBucketsNum(dynamicPartitionProperty, olapTable, executeFirstTime);
+            int bucketsNum = getBucketsNum(dynamicPartitionProperty, olapTable, nowPartitionName, executeFirstTime);
             if (distributionInfo.getType() == DistributionInfo.DistributionInfoType.HASH) {
                 HashDistributionInfo hashDistributionInfo = (HashDistributionInfo) distributionInfo;
                 List<String> distColumnNames = new ArrayList<>();

From 61040bcbc95ba406a7377f4962ffa0697d50feb6 Mon Sep 17 00:00:00 2001
From: yujun <yu.jun.reach@gmail.com>
Date: Tue, 30 Apr 2024 19:28:38 +0800
Subject: [PATCH 158/163]  [fix](regression test) fix test_schema_change_fail
 due to set force_olap_table_replication_num (#34343)

---
 .../test_schema_change_fail.groovy            | 62 +++++++++++--------
 1 file changed, 35 insertions(+), 27 deletions(-)

diff --git a/regression-test/suites/schema_change_p2/test_schema_change_fail.groovy b/regression-test/suites/schema_change_p2/test_schema_change_fail.groovy
index 238d1377bd6b2f..7d9f7333001475 100644
--- a/regression-test/suites/schema_change_p2/test_schema_change_fail.groovy
+++ b/regression-test/suites/schema_change_p2/test_schema_change_fail.groovy
@@ -31,50 +31,56 @@ suite('test_schema_change_fail', 'p0,p2,nonConcurrent') {
     }
 
     def tbl = 'test_schema_change_fail'
-
-    def beId = backends[0].BackendId.toLong()
-    def beHost = backends[0].Host
-    def beHttpPort = backends[0].HttpPort.toInteger()
     def injectName = 'SchemaChangeJob.process_alter_tablet.alter_fail'
+    def injectBe = null
 
     def checkReplicaBad = { ->
         def tabletId = sql_return_maparray("SHOW TABLETS FROM ${tbl}")[0].TabletId.toLong()
         def replicas = sql_return_maparray(sql_return_maparray("SHOW TABLET ${tabletId}").DetailCmd)
-        assertEquals(backends.size(), replicas.size())
+        int badReplicaNum = 0
         for (def replica : replicas) {
-            if (replica.BackendId.toLong() == beId) {
+            if (replica.BackendId.toLong() == injectBe.BackendId.toLong()) {
                 assertEquals(true, replica.IsBad.toBoolean())
+                badReplicaNum++
+            } else {
+                assertEquals(false, replica.IsBad.toBoolean())
             }
         }
+        assertEquals(1, badReplicaNum)
     }
 
     def followFe = frontends.stream().filter(fe -> !fe.IsMaster.toBoolean()).findFirst().orElse(null)
     def followFeUrl =  "jdbc:mysql://${followFe.Host}:${followFe.QueryPort}/?useLocalSessionState=false&allowLoadLocalInfile=false"
     followFeUrl = context.config.buildUrlWithDb(followFeUrl, context.dbName)
 
-    sql "DROP TABLE IF EXISTS ${tbl} FORCE"
-    sql """
-        CREATE TABLE ${tbl}
-        (
-            `a` TINYINT NOT NULL,
-            `b` TINYINT NULL
-        )
-        UNIQUE KEY (`a`)
-        DISTRIBUTED BY HASH(`a`) BUCKETS 1
-        PROPERTIES
-        (
-            'replication_num' = '${backends.size()}',
-            'light_schema_change' = 'false'
-        )
-    """
-
-    sql "INSERT INTO ${tbl} VALUES (1, 2), (3, 4)"
-
     try {
-        DebugPoint.enableDebugPoint(beHost, beHttpPort, NodeType.BE, injectName)
         setFeConfig('disable_tablet_scheduler', true)
 
-        sleep(1000)
+        sleep(3000)
+
+        sql "DROP TABLE IF EXISTS ${tbl} FORCE"
+        sql """
+            CREATE TABLE ${tbl}
+            (
+                `a` TINYINT NOT NULL,
+                `b` TINYINT NULL
+            )
+            UNIQUE KEY (`a`)
+            DISTRIBUTED BY HASH(`a`) BUCKETS 1
+            PROPERTIES
+            (
+                'replication_num' = '${backends.size()}',
+                'light_schema_change' = 'false'
+            )
+        """
+
+        def injectBeId = sql_return_maparray("SHOW TABLETS FROM ${tbl}")[0].BackendId.toLong()
+        injectBe = backends.stream().filter(be -> be.BackendId.toLong() == injectBeId).findFirst().orElse(null)
+        assertNotNull(injectBe)
+
+        sql "INSERT INTO ${tbl} VALUES (1, 2), (3, 4)"
+
+        DebugPoint.enableDebugPoint(injectBe.Host, injectBe.HttpPort.toInteger(), NodeType.BE, injectName)
         sql "ALTER TABLE ${tbl} MODIFY COLUMN b DOUBLE"
         sleep(5 * 1000)
 
@@ -87,8 +93,10 @@ suite('test_schema_change_fail', 'p0,p2,nonConcurrent') {
             checkReplicaBad()
         }
     } finally {
-        DebugPoint.disableDebugPoint(beHost, beHttpPort, NodeType.BE, injectName)
         setFeConfig('disable_tablet_scheduler', false)
+        if (injectBe != null) {
+            DebugPoint.disableDebugPoint(injectBe.Host, injectBe.HttpPort.toInteger(), NodeType.BE, injectName)
+        }
         sql "DROP TABLE IF EXISTS ${tbl} FORCE"
     }
 }

From 9ca2b836bfe7bbadb8ab4a942bf0b45d18401bdf Mon Sep 17 00:00:00 2001
From: wangbo <wangbo@apache.org>
Date: Tue, 30 Apr 2024 19:33:41 +0800
Subject: [PATCH 159/163] [Improvement](executor)Refactor Workload group memory
 GC (#33797)

* just gc group's overcommit query when minor gc

* add process usage
---
 be/src/common/config.cpp                      |  2 +
 be/src/common/config.h                        |  2 +
 .../runtime/workload_group/workload_group.cpp | 52 +++++++++++++------
 .../runtime/workload_group/workload_group.h   |  2 +-
 be/src/util/mem_info.cpp                      | 32 +++++++-----
 be/src/util/mem_info.h                        |  2 +-
 6 files changed, 60 insertions(+), 32 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 28d0e7e6f7d309..e5686adf76c14e 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1135,6 +1135,8 @@ DEFINE_Bool(enable_flush_file_cache_async, "true");
 DEFINE_mString(doris_cgroup_cpu_path, "");
 DEFINE_mBool(enable_cgroup_cpu_soft_limit, "true");
 
+DEFINE_mBool(enable_workload_group_memory_gc, "true");
+
 DEFINE_Bool(ignore_always_true_predicate_for_segment, "true");
 
 // Dir of default timezone files
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 3ab7ea16cd777a..0e4300c3c97e90 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1214,6 +1214,8 @@ DECLARE_mBool(exit_on_exception);
 DECLARE_mString(doris_cgroup_cpu_path);
 DECLARE_mBool(enable_cgroup_cpu_soft_limit);
 
+DECLARE_mBool(enable_workload_group_memory_gc);
+
 // This config controls whether the s3 file writer would flush cache asynchronously
 DECLARE_Bool(enable_flush_file_cache_async);
 
diff --git a/be/src/runtime/workload_group/workload_group.cpp b/be/src/runtime/workload_group/workload_group.cpp
index 05e38b973c6057..88c8880b060fc8 100644
--- a/be/src/runtime/workload_group/workload_group.cpp
+++ b/be/src/runtime/workload_group/workload_group.cpp
@@ -142,30 +142,47 @@ void WorkloadGroup::remove_mem_tracker_limiter(std::shared_ptr<MemTrackerLimiter
     }
 }
 
-int64_t WorkloadGroup::gc_memory(int64_t need_free_mem, RuntimeProfile* profile) {
+int64_t WorkloadGroup::gc_memory(int64_t need_free_mem, RuntimeProfile* profile, bool is_minor_gc) {
     if (need_free_mem <= 0) {
         return 0;
     }
     int64_t used_memory = memory_used();
     int64_t freed_mem = 0;
 
-    std::string cancel_str = fmt::format(
-            "work load group memory exceeded limit, group id:{}, name:{}, used:{}, limit:{}, "
-            "backend:{}.",
-            _id, _name, MemTracker::print_bytes(used_memory),
-            MemTracker::print_bytes(_memory_limit), BackendOptions::get_localhost());
-    auto cancel_top_overcommit_str = [cancel_str](int64_t mem_consumption,
-                                                  const std::string& label) {
+    std::string cancel_str = "";
+    if (is_minor_gc) {
+        cancel_str = fmt::format(
+                "MinorGC kill overcommit query, wg id:{}, name:{}, used:{}, limit:{}, "
+                "backend:{}.",
+                _id, _name, MemTracker::print_bytes(used_memory),
+                MemTracker::print_bytes(_memory_limit), BackendOptions::get_localhost());
+    } else {
+        if (_enable_memory_overcommit) {
+            cancel_str = fmt::format(
+                    "FullGC release wg overcommit mem, wg id:{}, name:{}, "
+                    "used:{},limit:{},backend:{}.",
+                    _id, _name, MemTracker::print_bytes(used_memory),
+                    MemTracker::print_bytes(_memory_limit), BackendOptions::get_localhost());
+        } else {
+            cancel_str = fmt::format(
+                    "GC wg for hard limit, wg id:{}, name:{}, used:{}, limit:{}, "
+                    "backend:{}.",
+                    _id, _name, MemTracker::print_bytes(used_memory),
+                    MemTracker::print_bytes(_memory_limit), BackendOptions::get_localhost());
+        }
+    }
+    std::string process_mem_usage_str = MemTrackerLimiter::process_mem_log_str();
+    auto cancel_top_overcommit_str = [cancel_str, process_mem_usage_str](int64_t mem_consumption,
+                                                                         const std::string& label) {
         return fmt::format(
-                "{} cancel top memory overcommit tracker <{}> consumption {}. execute again after "
-                "enough memory, details see be.INFO.",
-                cancel_str, label, MemTracker::print_bytes(mem_consumption));
+                "{} cancel top memory overcommit tracker <{}> consumption {}. details:{}",
+                cancel_str, label, MemTracker::print_bytes(mem_consumption), process_mem_usage_str);
     };
-    auto cancel_top_usage_str = [cancel_str](int64_t mem_consumption, const std::string& label) {
-        return fmt::format(
-                "{} cancel top memory used tracker <{}> consumption {}. execute again after "
-                "enough memory, details see be.INFO.",
-                cancel_str, label, MemTracker::print_bytes(mem_consumption));
+    auto cancel_top_usage_str = [cancel_str, process_mem_usage_str](int64_t mem_consumption,
+                                                                    const std::string& label) {
+        return fmt::format("{} cancel top memory used tracker <{}> consumption {}. details:{}",
+                           cancel_str, label, MemTracker::print_bytes(mem_consumption),
+                           process_mem_usage_str);
     };
 
     LOG(INFO) << fmt::format(
@@ -188,7 +205,8 @@ int64_t WorkloadGroup::gc_memory(int64_t need_free_mem, RuntimeProfile* profile)
                 _mem_tracker_limiter_pool, cancel_top_overcommit_str, tmq_profile,
                 MemTrackerLimiter::GCType::WORK_LOAD_GROUP);
     }
-    if (freed_mem >= need_free_mem) {
+    // To be compatible with the non-group's gc logic, minorGC just gc overcommit query
+    if (is_minor_gc || freed_mem >= need_free_mem) {
         return freed_mem;
     }
 
diff --git a/be/src/runtime/workload_group/workload_group.h b/be/src/runtime/workload_group/workload_group.h
index 54fe3bd3157477..8386d778aece6a 100644
--- a/be/src/runtime/workload_group/workload_group.h
+++ b/be/src/runtime/workload_group/workload_group.h
@@ -145,7 +145,7 @@ class WorkloadGroup : public std::enable_shared_from_this<WorkloadGroup> {
         return _query_ctxs.size();
     }
 
-    int64_t gc_memory(int64_t need_free_mem, RuntimeProfile* profile);
+    int64_t gc_memory(int64_t need_free_mem, RuntimeProfile* profile, bool is_minor_gc);
 
     void upsert_task_scheduler(WorkloadGroupInfo* tg_info, ExecEnv* exec_env);
 
diff --git a/be/src/util/mem_info.cpp b/be/src/util/mem_info.cpp
index a36acddc139442..75c68cce4d1e8b 100644
--- a/be/src/util/mem_info.cpp
+++ b/be/src/util/mem_info.cpp
@@ -147,10 +147,13 @@ bool MemInfo::process_minor_gc() {
         return true;
     }
 
-    RuntimeProfile* tg_profile = profile->create_child("WorkloadGroup", true, true);
-    freed_mem += tg_enable_overcommit_group_gc(_s_process_minor_gc_size - freed_mem, tg_profile);
-    if (freed_mem > _s_process_minor_gc_size) {
-        return true;
+    if (config::enable_workload_group_memory_gc) {
+        RuntimeProfile* tg_profile = profile->create_child("WorkloadGroup", true, true);
+        freed_mem += tg_enable_overcommit_group_gc(_s_process_minor_gc_size - freed_mem, tg_profile,
+                                                   true);
+        if (freed_mem > _s_process_minor_gc_size) {
+            return true;
+        }
     }
 
     if (config::enable_query_memory_overcommit) {
@@ -198,10 +201,13 @@ bool MemInfo::process_full_gc() {
         return true;
     }
 
-    RuntimeProfile* tg_profile = profile->create_child("WorkloadGroup", true, true);
-    freed_mem += tg_enable_overcommit_group_gc(_s_process_full_gc_size - freed_mem, tg_profile);
-    if (freed_mem > _s_process_full_gc_size) {
-        return true;
+    if (config::enable_workload_group_memory_gc) {
+        RuntimeProfile* tg_profile = profile->create_child("WorkloadGroup", true, true);
+        freed_mem += tg_enable_overcommit_group_gc(_s_process_full_gc_size - freed_mem, tg_profile,
+                                                   false);
+        if (freed_mem > _s_process_full_gc_size) {
+            return true;
+        }
     }
 
     VLOG_NOTICE << MemTrackerLimiter::type_detail_usage(
@@ -286,14 +292,14 @@ int64_t MemInfo::tg_not_enable_overcommit_group_gc() {
 
     for (const auto& workload_group : task_groups_overcommit) {
         auto used = workload_group->memory_used();
-        total_free_memory +=
-                workload_group->gc_memory(used - workload_group->memory_limit(), tg_profile.get());
+        total_free_memory += workload_group->gc_memory(used - workload_group->memory_limit(),
+                                                       tg_profile.get(), false);
     }
     return total_free_memory;
 }
 
-int64_t MemInfo::tg_enable_overcommit_group_gc(int64_t request_free_memory,
-                                               RuntimeProfile* profile) {
+int64_t MemInfo::tg_enable_overcommit_group_gc(int64_t request_free_memory, RuntimeProfile* profile,
+                                               bool is_minor_gc) {
     MonotonicStopWatch watch;
     watch.start();
     std::vector<WorkloadGroupPtr> task_groups;
@@ -359,7 +365,7 @@ int64_t MemInfo::tg_enable_overcommit_group_gc(int64_t request_free_memory,
                                 : static_cast<double>(exceeded_memorys[i]) / total_exceeded_memory *
                                           request_free_memory); // exceeded memory as a weight
         auto workload_group = task_groups[i];
-        total_free_memory += workload_group->gc_memory(tg_need_free_memory, profile);
+        total_free_memory += workload_group->gc_memory(tg_need_free_memory, profile, is_minor_gc);
     }
     return total_free_memory;
 }
diff --git a/be/src/util/mem_info.h b/be/src/util/mem_info.h
index 8d702ddf065679..c2bb8b209be24f 100644
--- a/be/src/util/mem_info.h
+++ b/be/src/util/mem_info.h
@@ -195,7 +195,7 @@ class MemInfo {
 
     static int64_t tg_not_enable_overcommit_group_gc();
     static int64_t tg_enable_overcommit_group_gc(int64_t request_free_memory,
-                                                 RuntimeProfile* profile);
+                                                 RuntimeProfile* profile, bool is_minor_gc);
 
     // It is only used after the memory limit is exceeded. When multiple threads are waiting for the available memory of the process,
     // avoid multiple threads starting at the same time and causing OOM.

From 41b842120b27c53c9a79050db994b5c66eea6e3c Mon Sep 17 00:00:00 2001
From: walter <w41ter.l@gmail.com>
Date: Tue, 30 Apr 2024 19:33:53 +0800
Subject: [PATCH 160/163] [chore](binlog) Add logs about binlog gc (#34359)

---
 .../src/main/java/org/apache/doris/binlog/DBBinlog.java       | 4 ++++
 .../src/main/java/org/apache/doris/binlog/TableBinlog.java    | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/binlog/DBBinlog.java b/fe/fe-core/src/main/java/org/apache/doris/binlog/DBBinlog.java
index a3133bfb5c7828..6a5f96488f70c4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/binlog/DBBinlog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/binlog/DBBinlog.java
@@ -297,6 +297,7 @@ private void removeExpiredMetaData(long largestExpiredCommitSeq) {
             TBinlog dummy = binlogIter.next();
             boolean foundFirstUsingBinlog = false;
             long lastCommitSeq = -1;
+            long removed = 0;
 
             while (binlogIter.hasNext()) {
                 TBinlog binlog = binlogIter.next();
@@ -304,6 +305,7 @@ private void removeExpiredMetaData(long largestExpiredCommitSeq) {
                 if (commitSeq <= largestExpiredCommitSeq) {
                     if (binlog.table_ref <= 0) {
                         binlogIter.remove();
+                        ++removed;
                         if (!foundFirstUsingBinlog) {
                             lastCommitSeq = commitSeq;
                         }
@@ -318,6 +320,8 @@ private void removeExpiredMetaData(long largestExpiredCommitSeq) {
             if (lastCommitSeq != -1) {
                 dummy.setCommitSeq(lastCommitSeq);
             }
+
+            LOG.info("remove {} expired binlogs, dbId: {}, left: {}", removed, dbId, allBinlogs.size());
         } finally {
             lock.writeLock().unlock();
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/binlog/TableBinlog.java b/fe/fe-core/src/main/java/org/apache/doris/binlog/TableBinlog.java
index 3dd290a07f81ee..541d1ef4ed4658 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/binlog/TableBinlog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/binlog/TableBinlog.java
@@ -180,8 +180,8 @@ public BinlogTombstone ttlGc() {
 
         long ttlSeconds = tableBinlogConfig.getTtlSeconds();
         long expiredMs = BinlogUtils.getExpiredMs(ttlSeconds);
-
         if (expiredMs < 0) {
+            LOG.info("skip ttl gc. dbId: {}, tableId: {}, ttlSeconds: {}", dbId, tableId, ttlSeconds);
             return null;
         }
         LOG.info("ttl gc. dbId: {}, tableId: {}, expiredMs: {}", dbId, tableId, expiredMs);

From 39907967c82952b58dd632fcee88bbeef351b936 Mon Sep 17 00:00:00 2001
From: AlexYue <yj976240184@gmail.com>
Date: Tue, 30 Apr 2024 19:35:41 +0800
Subject: [PATCH 161/163] [enhance](Backup) Obscure hdfs's authentication's
 info for show backup's repository stmt (#34337)

---
 .../src/main/java/org/apache/doris/backup/Repository.java | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/Repository.java b/fe/fe-core/src/main/java/org/apache/doris/backup/Repository.java
index a1bbd1144bd640..c5a04b7ca109d0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/backup/Repository.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/backup/Repository.java
@@ -27,6 +27,7 @@
 import org.apache.doris.common.Pair;
 import org.apache.doris.common.io.Text;
 import org.apache.doris.common.io.Writable;
+import org.apache.doris.common.security.authentication.AuthenticationConfig;
 import org.apache.doris.common.util.PrintableMap;
 import org.apache.doris.common.util.TimeUtils;
 import org.apache.doris.datasource.property.constants.S3Properties;
@@ -746,8 +747,11 @@ public String getCreateStatement() {
         Map<String, String> properties = new HashMap();
         properties.putAll(this.getRemoteFileSystem().getProperties());
         // WE should not return the acturl secret key to user for safety consideration
-        properties.putIfAbsent(S3Properties.SECRET_KEY, "xxxxxx");
-        properties.putIfAbsent(S3Properties.Env.SECRET_KEY, "xxxxxx");
+        List<String> secretKeys = List.of(S3Properties.SECRET_KEY, S3Properties.Env.SECRET_KEY,
+                AuthenticationConfig.HADOOP_SECURITY_AUTHENTICATION,
+                        AuthenticationConfig.HADOOP_KERBEROS_AUTHORIZATION);
+        secretKeys.stream()
+                .forEach(s -> properties.replace(s, "xxxxxx"));
         stmtBuilder.append(new PrintableMap<>(properties, " = ", true, true, true));
         stmtBuilder.append("\n)");
         return stmtBuilder.toString();

From 0753461d8f64785ad10b37e812703a2b294869d7 Mon Sep 17 00:00:00 2001
From: plat1ko <platonekosama@gmail.com>
Date: Tue, 30 Apr 2024 19:39:10 +0800
Subject: [PATCH 162/163] [enhance](hdfs) Add UT for HdfsFileWriter (#34348)

- Add UT for HdfsFileWriter
---
 be/src/cloud/cloud_backend_service.h       |   2 -
 be/src/cloud/cloud_internal_service.h      |   4 +-
 be/src/common/sync_point.cpp               |  18 +-
 be/src/common/sync_point.h                 |  36 ++-
 be/src/io/fs/hdfs_file_writer.cpp          |  22 +-
 be/test/io/cache/block_file_cache_test.cpp | 243 +++++++++++++--------
 be/test/io/fs/hdfs_file_system_test.cpp    | 149 +++++++++++++
 be/test/io/fs/local_file_system_test.cpp   | 126 ++++++-----
 be/test/vec/exec/vwal_scanner_test.cpp     |  16 +-
 9 files changed, 437 insertions(+), 179 deletions(-)
 create mode 100644 be/test/io/fs/hdfs_file_system_test.cpp

diff --git a/be/src/cloud/cloud_backend_service.h b/be/src/cloud/cloud_backend_service.h
index 893875b34792db..88f0099fe73f09 100644
--- a/be/src/cloud/cloud_backend_service.h
+++ b/be/src/cloud/cloud_backend_service.h
@@ -33,8 +33,6 @@ class CloudBackendService final : public BaseBackendService {
 
     ~CloudBackendService() override;
 
-    // TODO(plat1ko): cloud backend functions
-
     // If another cluster load, FE need to notify the cluster to sync the load data
     void sync_load_for_tablets(TSyncLoadForTabletsResponse& response,
                                const TSyncLoadForTabletsRequest& request) override;
diff --git a/be/src/cloud/cloud_internal_service.h b/be/src/cloud/cloud_internal_service.h
index de209f4dd32af4..db93a82a719e37 100644
--- a/be/src/cloud/cloud_internal_service.h
+++ b/be/src/cloud/cloud_internal_service.h
@@ -29,8 +29,6 @@ class CloudInternalServiceImpl final : public PInternalService {
 
     ~CloudInternalServiceImpl() override;
 
-    // TODO(plat1ko): cloud internal service functions
-
     void alter_vault_sync(google::protobuf::RpcController* controller,
                           const doris::PAlterVaultSyncRequest* request,
                           PAlterVaultSyncResponse* response,
@@ -43,7 +41,7 @@ class CloudInternalServiceImpl final : public PInternalService {
                                           google::protobuf::Closure* done) override;
 
 private:
-    [[maybe_unused]] CloudStorageEngine& _engine;
+    CloudStorageEngine& _engine;
 };
 
 } // namespace doris
diff --git a/be/src/common/sync_point.cpp b/be/src/common/sync_point.cpp
index 816c5a82a94bac..18069582f5b0c6 100644
--- a/be/src/common/sync_point.cpp
+++ b/be/src/common/sync_point.cpp
@@ -44,7 +44,7 @@ struct SyncPoint::Data { // impl
                                 const std::vector<SyncPointPair>& markers);
   bool predecessors_all_cleared(const std::string& point);
   void set_call_back(const std::string& point,
-                    const std::function<void(std::vector<std::any>&&)>& callback);
+                    const std::function<void(std::vector<std::any>&&)>& callback, CallbackGuard*);
   void clear_call_back(const std::string& point);
   void clear_all_call_backs();
   void enable_processing();
@@ -86,8 +86,8 @@ void SyncPoint::load_dependency_and_markers(
   impl_->load_dependency_and_markers(dependencies, markers);
 }
 void SyncPoint::set_call_back(const std::string& point,
-                              const std::function<void(std::vector<std::any>&&)>& callback) {
-  impl_->set_call_back(point, callback);
+                              const std::function<void(std::vector<std::any>&&)>& callback, CallbackGuard* guard) {
+  impl_->set_call_back(point, callback, guard);
 }
 void SyncPoint::clear_call_back(const std::string& point) {
   impl_->clear_call_back(point);
@@ -213,9 +213,15 @@ bool SyncPoint::Data::disable_by_marker(const std::string& point,
 }
 
 void SyncPoint::Data::set_call_back(const std::string& point,
-                                  const std::function<void(std::vector<std::any>&&)>& callback) {
-  std::lock_guard lock(mutex_);
-  callbacks_[point] = callback;
+                                  const std::function<void(std::vector<std::any>&&)>& callback, CallbackGuard* guard) {
+  {
+    std::lock_guard lock(mutex_);
+    callbacks_[point] = callback;
+  }
+
+  if (guard != nullptr) {
+    *guard = CallbackGuard(point);
+  }
 }
 
 void SyncPoint::Data::clear_trace() {
diff --git a/be/src/common/sync_point.h b/be/src/common/sync_point.h
index 81764a09ee9e51..fc6c46349e20ca 100644
--- a/be/src/common/sync_point.h
+++ b/be/src/common/sync_point.h
@@ -93,11 +93,45 @@ class SyncPoint {
                                 const std::vector<SyncPointPair>& dependencies,
                                 const std::vector<SyncPointPair>& markers);
 
+  class CallbackGuard {
+  public:
+    CallbackGuard() = default;
+    explicit CallbackGuard(std::string point) : _point(std::move(point)) {}
+    ~CallbackGuard() {
+      if (!_point.empty()) {
+        get_instance()->clear_call_back(_point);
+      }
+    }
+    CallbackGuard(const CallbackGuard&) = delete;
+    CallbackGuard& operator=(const CallbackGuard&) = delete;
+
+    CallbackGuard(CallbackGuard&& other) noexcept {
+      if (!_point.empty() && _point != other._point) {
+        get_instance()->clear_call_back(_point);
+      }
+      _point = std::move(other._point);
+    }
+
+    CallbackGuard& operator=(CallbackGuard&& other) noexcept {
+      if (!_point.empty() && _point != other._point) {
+        get_instance()->clear_call_back(_point);
+      }
+      _point = std::move(other._point);
+      return *this;
+    };
+
+  private:
+    std::string _point;
+  };
+
   // The argument to the callback is passed through from
   // TEST_SYNC_POINT_CALLBACK(); nullptr if TEST_SYNC_POINT or
   // TEST_IDX_SYNC_POINT was used.
+  // If `guard` is not nullptr, method will return a `CallbackGuard` object which will clear the
+  // callback when it is destructed.
   void set_call_back(const std::string& point,
-                     const std::function<void(std::vector<std::any>&&)>& callback);
+                     const std::function<void(std::vector<std::any>&&)>& callback,
+                     CallbackGuard* guard = nullptr);
 
   // Clear callback function by point
   void clear_call_back(const std::string& point);
diff --git a/be/src/io/fs/hdfs_file_writer.cpp b/be/src/io/fs/hdfs_file_writer.cpp
index 54e609a040bda0..98a8780485387a 100644
--- a/be/src/io/fs/hdfs_file_writer.cpp
+++ b/be/src/io/fs/hdfs_file_writer.cpp
@@ -63,6 +63,8 @@ HdfsFileWriter::HdfsFileWriter(Path path, std::shared_ptr<HdfsHandler> handler,
     }
     hdfs_file_writer_total << 1;
     hdfs_file_being_written << 1;
+
+    TEST_SYNC_POINT("HdfsFileWriter");
 }
 
 HdfsFileWriter::~HdfsFileWriter() {
@@ -87,12 +89,14 @@ Status HdfsFileWriter::close() {
         {
             SCOPED_BVAR_LATENCY(hdfs_bvar::hdfs_hsync_latency);
 #ifdef USE_LIBHDFS3
-            ret = hdfsSync(_hdfs_handler->hdfs_fs, _hdfs_file);
+            ret = SYNC_POINT_HOOK_RETURN_VALUE(hdfsSync(_hdfs_handler->hdfs_fs, _hdfs_file),
+                                               "HdfsFileWriter::close::hdfsHSync");
 #else
-            ret = hdfsHSync(_hdfs_handler->hdfs_fs, _hdfs_file);
+            ret = SYNC_POINT_HOOK_RETURN_VALUE(hdfsHSync(_hdfs_handler->hdfs_fs, _hdfs_file),
+                                               "HdfsFileWriter::close::hdfsHSync");
 #endif
         }
-        TEST_INJECTION_POINT_RETURN_WITH_VALUE("HdfsFileWriter::hdfeSync",
+        TEST_INJECTION_POINT_RETURN_WITH_VALUE("HdfsFileWriter::hdfsSync",
                                                Status::InternalError("failed to sync hdfs file"));
 
         if (ret != 0) {
@@ -106,13 +110,13 @@ Status HdfsFileWriter::close() {
         SCOPED_BVAR_LATENCY(hdfs_bvar::hdfs_close_latency);
         // The underlying implementation will invoke `hdfsHFlush` to flush buffered data and wait for
         // the HDFS response, but won't guarantee the synchronization of data to HDFS.
-        ret = hdfsCloseFile(_hdfs_handler->hdfs_fs, _hdfs_file);
+        ret = SYNC_POINT_HOOK_RETURN_VALUE(hdfsCloseFile(_hdfs_handler->hdfs_fs, _hdfs_file),
+                                           "HdfsFileWriter::close::hdfsCloseFile");
     }
     _hdfs_file = nullptr;
     TEST_INJECTION_POINT_RETURN_WITH_VALUE("HdfsFileWriter::hdfsCloseFile",
                                            Status::InternalError("failed to close hdfs file"));
     if (ret != 0) {
-        std::string err_msg = hdfs_error();
         return Status::InternalError(
                 "Write hdfs file failed. (BE: {}) namenode:{}, path:{}, err: {}, file_size={}",
                 BackendOptions::get_localhost(), _fs_name, _path.native(), hdfs_error(),
@@ -182,8 +186,9 @@ Status HdfsFileWriter::append_hdfs_file(std::string_view content) {
         {
             TEST_INJECTION_POINT_CALLBACK("HdfsFileWriter::append_hdfs_file_delay");
             SCOPED_BVAR_LATENCY(hdfs_bvar::hdfs_write_latency);
-            written_bytes =
-                    hdfsWrite(_hdfs_handler->hdfs_fs, _hdfs_file, content.data(), content.size());
+            written_bytes = SYNC_POINT_HOOK_RETURN_VALUE(
+                    hdfsWrite(_hdfs_handler->hdfs_fs, _hdfs_file, content.data(), content.size()),
+                    "HdfsFileWriter::append_hdfs_file::hdfsWrite", content);
             {
                 TEST_INJECTION_POINT_RETURN_WITH_VALUE(
                         "HdfsFileWriter::append_hdfs_file_error",
@@ -262,7 +267,8 @@ Status HdfsFileWriter::finalize() {
     }
 
     // Flush buffered data to HDFS without waiting for HDFS response
-    int ret = hdfsFlush(_hdfs_handler->hdfs_fs, _hdfs_file);
+    int ret = SYNC_POINT_HOOK_RETURN_VALUE(hdfsFlush(_hdfs_handler->hdfs_fs, _hdfs_file),
+                                           "HdfsFileWriter::finalize::hdfsFlush");
     TEST_INJECTION_POINT_RETURN_WITH_VALUE("HdfsFileWriter::hdfsFlush",
                                            Status::InternalError("failed to flush hdfs file"));
     if (ret != 0) {
diff --git a/be/test/io/cache/block_file_cache_test.cpp b/be/test/io/cache/block_file_cache_test.cpp
index 71bb6bf94b20e6..244f10fa754dc9 100644
--- a/be/test/io/cache/block_file_cache_test.cpp
+++ b/be/test/io/cache/block_file_cache_test.cpp
@@ -1365,7 +1365,6 @@ TEST_F(BlockFileCacheTest, fix_tmp_file) {
     fs::create_directories(cache_base_path);
     test_file_cache(io::FileCacheType::NORMAL);
     auto sp = SyncPoint::get_instance();
-    Defer defer {[sp] { sp->clear_all_call_backs(); }};
     sp->enable_processing();
     TUniqueId query_id;
     query_id.hi = 1;
@@ -1381,11 +1380,17 @@ TEST_F(BlockFileCacheTest, fix_tmp_file) {
     context.query_id = query_id;
     auto key = io::BlockFileCache::hash("key1");
     std::atomic_bool flag1 {false}, flag2 {false};
-    sp->set_call_back("BlockFileCache::TmpFile1", [&](auto&&) {
-        while (!flag1) {
-        }
-    });
-    sp->set_call_back("BlockFileCache::TmpFile2", [&](auto&&) { flag2 = true; });
+    SyncPoint::CallbackGuard guard1;
+    sp->set_call_back(
+            "BlockFileCache::TmpFile1",
+            [&](auto&&) {
+                while (!flag1) {
+                }
+            },
+            &guard1);
+    SyncPoint::CallbackGuard guard2;
+    sp->set_call_back(
+            "BlockFileCache::TmpFile2", [&](auto&&) { flag2 = true; }, &guard2);
     io::BlockFileCache cache(cache_base_path, settings);
     ASSERT_TRUE(cache.initialize());
     auto holder = cache.get_or_set(key, 100, 1, context); /// Add range [9, 9]
@@ -1418,7 +1423,6 @@ TEST_F(BlockFileCacheTest, test_lazy_load) {
     fs::create_directories(cache_base_path);
     test_file_cache(io::FileCacheType::NORMAL);
     auto sp = SyncPoint::get_instance();
-    Defer defer {[sp] { sp->clear_all_call_backs(); }};
     sp->enable_processing();
     TUniqueId query_id;
     query_id.hi = 1;
@@ -1434,10 +1438,14 @@ TEST_F(BlockFileCacheTest, test_lazy_load) {
     context.query_id = query_id;
     auto key = io::BlockFileCache::hash("key1");
     std::atomic_bool flag1 {false};
-    sp->set_call_back("BlockFileCache::TmpFile2", [&](auto&&) {
-        while (!flag1) {
-        }
-    });
+    SyncPoint::CallbackGuard guard1;
+    sp->set_call_back(
+            "BlockFileCache::TmpFile2",
+            [&](auto&&) {
+                while (!flag1) {
+                }
+            },
+            &guard1);
     io::BlockFileCache cache(cache_base_path, settings);
     ASSERT_TRUE(cache.initialize());
     std::this_thread::sleep_for(std::chrono::milliseconds(10)); // wait to load disk
@@ -1476,7 +1484,6 @@ TEST_F(BlockFileCacheTest, test_lazy_load_with_limit) {
     fs::create_directories(cache_base_path);
     test_file_cache(io::FileCacheType::NORMAL);
     auto sp = SyncPoint::get_instance();
-    Defer defer {[sp] { sp->clear_all_call_backs(); }};
     sp->enable_processing();
     TUniqueId query_id;
     query_id.hi = 1;
@@ -1492,10 +1499,14 @@ TEST_F(BlockFileCacheTest, test_lazy_load_with_limit) {
     context.query_id = query_id;
     auto key = io::BlockFileCache::hash("key1");
     std::atomic_bool flag1 {false};
-    sp->set_call_back("BlockFileCache::TmpFile2", [&](auto&&) {
-        while (!flag1) {
-        }
-    });
+    SyncPoint::CallbackGuard guard1;
+    sp->set_call_back(
+            "BlockFileCache::TmpFile2",
+            [&](auto&&) {
+                while (!flag1) {
+                }
+            },
+            &guard1);
     io::BlockFileCache cache(cache_base_path, settings);
     ASSERT_TRUE(cache.initialize());
     std::this_thread::sleep_for(std::chrono::milliseconds(10)); // wait to load disk
@@ -1535,9 +1546,10 @@ TEST_F(BlockFileCacheTest, ttl_normal) {
     fs::create_directories(cache_base_path);
     test_file_cache(io::FileCacheType::NORMAL);
     auto sp = SyncPoint::get_instance();
-    Defer defer {[sp] { sp->clear_call_back("BlockFileCache::set_sleep_time"); }};
-    sp->set_call_back("BlockFileCache::set_sleep_time",
-                      [](auto&& args) { *try_any_cast<int64_t*>(args[0]) = 1; });
+    SyncPoint::CallbackGuard guard1;
+    sp->set_call_back(
+            "BlockFileCache::set_sleep_time",
+            [](auto&& args) { *try_any_cast<int64_t*>(args[0]) = 1; }, &guard1);
     sp->enable_processing();
     TUniqueId query_id;
     query_id.hi = 1;
@@ -1617,9 +1629,10 @@ TEST_F(BlockFileCacheTest, ttl_modify) {
     fs::create_directories(cache_base_path);
     test_file_cache(io::FileCacheType::NORMAL);
     auto sp = SyncPoint::get_instance();
-    Defer defer {[sp] { sp->clear_call_back("BlockFileCache::set_sleep_time"); }};
-    sp->set_call_back("BlockFileCache::set_sleep_time",
-                      [](auto&& args) { *try_any_cast<int64_t*>(args[0]) = 1; });
+    SyncPoint::CallbackGuard guard1;
+    sp->set_call_back(
+            "BlockFileCache::set_sleep_time",
+            [](auto&& args) { *try_any_cast<int64_t*>(args[0]) = 1; }, &guard1);
     sp->enable_processing();
     TUniqueId query_id;
     query_id.hi = 1;
@@ -1903,11 +1916,15 @@ TEST_F(BlockFileCacheTest, io_error) {
     auto sp = SyncPoint::get_instance();
     sp->enable_processing();
     {
-        Defer defer {[sp] { sp->clear_all_call_backs(); }};
-        sp->set_call_back("LocalFileSystem::open_file_impl", [&](auto&& values) {
-            std::pair<Status, bool>* pairs = try_any_cast<std::pair<Status, bool>*>(values.back());
-            pairs->second = true;
-        });
+        SyncPoint::CallbackGuard guard1;
+        sp->set_call_back(
+                "LocalFileSystem::open_file_impl",
+                [&](auto&& values) {
+                    std::pair<Status, bool>* pairs =
+                            try_any_cast<std::pair<Status, bool>*>(values.back());
+                    pairs->second = true;
+                },
+                &guard1);
         auto holder = cache.get_or_set(key, 0, 10, context); /// Add range [0, 9]
         auto blocks = fromHolder(holder);
         ASSERT_EQ(blocks.size(), 1);
@@ -1916,11 +1933,15 @@ TEST_F(BlockFileCacheTest, io_error) {
         EXPECT_FALSE(blocks[0]->read(Slice(buffer.get(), 10), 0).ok());
     }
     {
-        Defer defer {[sp] { sp->clear_all_call_backs(); }};
-        sp->set_call_back("LocalFileReader::read_at_impl", [&](auto&& values) {
-            std::pair<Status, bool>* pairs = try_any_cast<std::pair<Status, bool>*>(values.back());
-            pairs->second = true;
-        });
+        SyncPoint::CallbackGuard guard1;
+        sp->set_call_back(
+                "LocalFileReader::read_at_impl",
+                [&](auto&& values) {
+                    std::pair<Status, bool>* pairs =
+                            try_any_cast<std::pair<Status, bool>*>(values.back());
+                    pairs->second = true;
+                },
+                &guard1);
         auto holder = cache.get_or_set(key, 0, 10, context); /// Add range [0, 9]
         auto blocks = fromHolder(holder);
         ASSERT_EQ(blocks.size(), 1);
@@ -1929,11 +1950,15 @@ TEST_F(BlockFileCacheTest, io_error) {
         EXPECT_FALSE(blocks[0]->read(Slice(buffer.get(), 10), 0).ok());
     }
     {
-        Defer defer {[sp] { sp->clear_all_call_backs(); }};
-        sp->set_call_back("LocalFileSystem::create_file_impl", [&](auto&& values) {
-            std::pair<Status, bool>* pairs = try_any_cast<std::pair<Status, bool>*>(values.back());
-            pairs->second = true;
-        });
+        SyncPoint::CallbackGuard guard1;
+        sp->set_call_back(
+                "LocalFileSystem::create_file_impl",
+                [&](auto&& values) {
+                    std::pair<Status, bool>* pairs =
+                            try_any_cast<std::pair<Status, bool>*>(values.back());
+                    pairs->second = true;
+                },
+                &guard1);
         auto holder = cache.get_or_set(key, 50, 10, context); /// Add range [50, 59]
         auto blocks = fromHolder(holder);
         ASSERT_EQ(blocks.size(), 1);
@@ -1943,11 +1968,15 @@ TEST_F(BlockFileCacheTest, io_error) {
         EXPECT_FALSE(blocks[0]->append(result).ok());
     }
     {
-        Defer defer {[sp] { sp->clear_all_call_backs(); }};
-        sp->set_call_back("LocalFileWriter::appendv", [&](auto&& values) {
-            std::pair<Status, bool>* pairs = try_any_cast<std::pair<Status, bool>*>(values.back());
-            pairs->second = true;
-        });
+        SyncPoint::CallbackGuard guard1;
+        sp->set_call_back(
+                "LocalFileWriter::appendv",
+                [&](auto&& values) {
+                    std::pair<Status, bool>* pairs =
+                            try_any_cast<std::pair<Status, bool>*>(values.back());
+                    pairs->second = true;
+                },
+                &guard1);
         auto holder = cache.get_or_set(key, 50, 10, context); /// Add range [50, 59]
         auto blocks = fromHolder(holder);
         ASSERT_EQ(blocks.size(), 1);
@@ -1958,11 +1987,15 @@ TEST_F(BlockFileCacheTest, io_error) {
         EXPECT_FALSE(blocks[0]->append(result).ok());
     }
     {
-        Defer defer {[sp] { sp->clear_all_call_backs(); }};
-        sp->set_call_back("LocalFileSystem::rename", [](auto&& values) {
-            std::pair<Status, bool>* pairs = try_any_cast<std::pair<Status, bool>*>(values.back());
-            pairs->second = true;
-        });
+        SyncPoint::CallbackGuard guard1;
+        sp->set_call_back(
+                "LocalFileSystem::rename",
+                [](auto&& values) {
+                    std::pair<Status, bool>* pairs =
+                            try_any_cast<std::pair<Status, bool>*>(values.back());
+                    pairs->second = true;
+                },
+                &guard1);
         auto holder = cache.get_or_set(key, 50, 10, context); /// Add range [50, 59]
         auto blocks = fromHolder(holder);
         ASSERT_EQ(blocks.size(), 1);
@@ -1974,11 +2007,15 @@ TEST_F(BlockFileCacheTest, io_error) {
         EXPECT_FALSE(blocks[0]->finalize().ok());
     }
     {
-        Defer defer {[sp] { sp->clear_all_call_backs(); }};
-        sp->set_call_back("LocalFileWriter::close", [&](auto&& values) {
-            std::pair<Status, bool>* pairs = try_any_cast<std::pair<Status, bool>*>(values.back());
-            pairs->second = true;
-        });
+        SyncPoint::CallbackGuard guard1;
+        sp->set_call_back(
+                "LocalFileWriter::close",
+                [&](auto&& values) {
+                    std::pair<Status, bool>* pairs =
+                            try_any_cast<std::pair<Status, bool>*>(values.back());
+                    pairs->second = true;
+                },
+                &guard1);
         auto holder = cache.get_or_set(key, 50, 10, context); /// Add range [50, 59]
         auto blocks = fromHolder(holder);
         ASSERT_EQ(blocks.size(), 1);
@@ -2084,19 +2121,22 @@ TEST_F(BlockFileCacheTest, recyle_cache_async) {
     auto key = io::BlockFileCache::hash("key1");
     io::BlockFileCache cache(cache_base_path, settings);
     auto sp = SyncPoint::get_instance();
-    Defer defer {[sp] {
-        sp->clear_call_back("BlockFileCache::set_remove_batch");
-        sp->clear_call_back("BlockFileCache::recycle_deleted_blocks");
-        sp->clear_call_back("BlockFileCache::set_sleep_time");
-    }};
-    sp->set_call_back("BlockFileCache::set_sleep_time",
-                      [](auto&& args) { *try_any_cast<int64_t*>(args[0]) = 1; });
-    sp->set_call_back("BlockFileCache::set_remove_batch",
-                      [](auto&& args) { *try_any_cast<int*>(args[0]) = 2; });
-    sp->set_call_back("BlockFileCache::recycle_deleted_blocks", [&](auto&&) {
-        context.cache_type = io::FileCacheType::NORMAL;
-        cache.get_or_set(key, 0, 5, context);
-    });
+    SyncPoint::CallbackGuard guard1;
+    sp->set_call_back(
+            "BlockFileCache::set_sleep_time",
+            [](auto&& args) { *try_any_cast<int64_t*>(args[0]) = 1; }, &guard1);
+    SyncPoint::CallbackGuard guard2;
+    sp->set_call_back(
+            "BlockFileCache::set_remove_batch",
+            [](auto&& args) { *try_any_cast<int*>(args[0]) = 2; }, &guard2);
+    SyncPoint::CallbackGuard guard3;
+    sp->set_call_back(
+            "BlockFileCache::recycle_deleted_blocks",
+            [&](auto&&) {
+                context.cache_type = io::FileCacheType::NORMAL;
+                cache.get_or_set(key, 0, 5, context);
+            },
+            &guard3);
     sp->enable_processing();
     ASSERT_TRUE(cache.initialize());
     while (true) {
@@ -2152,19 +2192,22 @@ TEST_F(BlockFileCacheTest, recyle_cache_async_ttl) {
     context.cache_type = io::FileCacheType::TTL;
     context.expiration_time = UnixSeconds() + 3600;
     auto sp = SyncPoint::get_instance();
-    Defer defer {[sp] {
-        sp->clear_call_back("BlockFileCache::set_remove_batch");
-        sp->clear_call_back("BlockFileCache::recycle_deleted_blocks");
-        sp->clear_call_back("BlockFileCache::set_sleep_time");
-    }};
-    sp->set_call_back("BlockFileCache::set_sleep_time",
-                      [](auto&& args) { *try_any_cast<int64_t*>(args[0]) = 1; });
-    sp->set_call_back("BlockFileCache::set_remove_batch",
-                      [](auto&& args) { *try_any_cast<int*>(args[0]) = 2; });
-    sp->set_call_back("BlockFileCache::recycle_deleted_blocks", [&](auto&&) {
-        context.cache_type = io::FileCacheType::TTL;
-        cache.get_or_set(key, 0, 5, context);
-    });
+    SyncPoint::CallbackGuard guard1;
+    sp->set_call_back(
+            "BlockFileCache::set_sleep_time",
+            [](auto&& args) { *try_any_cast<int64_t*>(args[0]) = 1; }, &guard1);
+    SyncPoint::CallbackGuard guard2;
+    sp->set_call_back(
+            "BlockFileCache::set_remove_batch",
+            [](auto&& args) { *try_any_cast<int*>(args[0]) = 2; }, &guard2);
+    SyncPoint::CallbackGuard guard3;
+    sp->set_call_back(
+            "BlockFileCache::recycle_deleted_blocks",
+            [&](auto&&) {
+                context.cache_type = io::FileCacheType::NORMAL;
+                cache.get_or_set(key, 0, 5, context);
+            },
+            &guard3);
     sp->enable_processing();
     ASSERT_TRUE(cache.initialize());
     while (true) {
@@ -2276,14 +2319,14 @@ TEST_F(BlockFileCacheTest, test_factory_1) {
         fs::remove_all(cache_path2);
     }
     auto sp = SyncPoint::get_instance();
-    Defer defer {[sp] {
-        sp->clear_call_back("BlockFileCache::set_remove_batch");
-        sp->clear_call_back("BlockFileCache::set_sleep_time");
-    }};
-    sp->set_call_back("BlockFileCache::set_sleep_time",
-                      [](auto&& args) { *try_any_cast<int64_t*>(args[0]) = 1; });
-    sp->set_call_back("BlockFileCache::set_remove_batch",
-                      [](auto&& args) { *try_any_cast<int*>(args[0]) = 2; });
+    SyncPoint::CallbackGuard guard1;
+    sp->set_call_back(
+            "BlockFileCache::set_sleep_time",
+            [](auto&& args) { *try_any_cast<int64_t*>(args[0]) = 1; }, &guard1);
+    SyncPoint::CallbackGuard guard2;
+    sp->set_call_back(
+            "BlockFileCache::set_remove_batch",
+            [](auto&& args) { *try_any_cast<int*>(args[0]) = 2; }, &guard2);
     sp->enable_processing();
     io::FileCacheSettings settings;
     settings.query_queue_size = 30;
@@ -2628,11 +2671,15 @@ TEST_F(BlockFileCacheTest, append_many_time) {
         ASSERT_TRUE(blocks[0]->change_cache_type_self(FileCacheType::INDEX).ok());
         auto sp = SyncPoint::get_instance();
         sp->enable_processing();
-        Defer defer {[sp] { sp->clear_call_back("LocalFileSystem::rename"); }};
-        sp->set_call_back("LocalFileSystem::rename", [&](auto&& values) {
-            std::pair<Status, bool>* pairs = try_any_cast<std::pair<Status, bool>*>(values.back());
-            pairs->second = true;
-        });
+        SyncPoint::CallbackGuard guard1;
+        sp->set_call_back(
+                "LocalFileSystem::rename",
+                [&](auto&& values) {
+                    std::pair<Status, bool>* pairs =
+                            try_any_cast<std::pair<Status, bool>*>(values.back());
+                    pairs->second = true;
+                },
+                &guard1);
         {
             ASSERT_FALSE(blocks[0]->change_cache_type_self(FileCacheType::NORMAL).ok());
             EXPECT_EQ(blocks[0]->cache_type(), FileCacheType::INDEX);
@@ -2653,11 +2700,15 @@ TEST_F(BlockFileCacheTest, append_many_time) {
     {
         auto sp = SyncPoint::get_instance();
         sp->enable_processing();
-        Defer defer {[sp] { sp->clear_all_call_backs(); }};
-        sp->set_call_back("LocalFileWriter::close", [&](auto&& values) {
-            std::pair<Status, bool>* pairs = try_any_cast<std::pair<Status, bool>*>(values.back());
-            pairs->second = true;
-        });
+        SyncPoint::CallbackGuard guard1;
+        sp->set_call_back(
+                "LocalFileWriter::close",
+                [&](auto&& values) {
+                    std::pair<Status, bool>* pairs =
+                            try_any_cast<std::pair<Status, bool>*>(values.back());
+                    pairs->second = true;
+                },
+                &guard1);
         auto holder = cache.get_or_set(key, 5, 5, context);
         auto blocks = fromHolder(holder);
         ASSERT_EQ(blocks.size(), 1);
diff --git a/be/test/io/fs/hdfs_file_system_test.cpp b/be/test/io/fs/hdfs_file_system_test.cpp
new file mode 100644
index 00000000000000..87fe5d52dd821d
--- /dev/null
+++ b/be/test/io/fs/hdfs_file_system_test.cpp
@@ -0,0 +1,149 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "common/config.h"
+#include "common/sync_point.h"
+#include "io/fs/file_writer.h"
+#include "io/fs/hdfs_file_writer.h"
+#include "io/fs/local_file_system.h"
+
+namespace doris {
+
+static constexpr std::string_view test_dir = "ut_dir/hdfs_test";
+
+TEST(HdfsFileSystemTest, Write) {
+    Status st;
+    const auto& local_fs = io::global_local_filesystem();
+    st = local_fs->delete_directory(test_dir);
+    ASSERT_TRUE(st.ok()) << st;
+    st = local_fs->create_directory(test_dir);
+    ASSERT_TRUE(st.ok()) << st;
+
+    auto* sp = SyncPoint::get_instance();
+    sp->enable_processing();
+
+    io::FileWriterPtr local_file_writer;
+    st = local_fs->create_file(fmt::format("{}/mock_hdfs_file", test_dir), &local_file_writer);
+    ASSERT_TRUE(st.ok()) << st;
+
+    SyncPoint::CallbackGuard guard1;
+    sp->set_call_back(
+            "HdfsFileWriter::close::hdfsHSync",
+            [](auto&& args) {
+                auto* ret = try_any_cast_ret<int>(args);
+                ret->first = 0; // noop, return success
+                ret->second = true;
+            },
+            &guard1);
+
+    SyncPoint::CallbackGuard guard2;
+    sp->set_call_back(
+            "HdfsFileWriter::close::hdfsCloseFile",
+            [&](auto&& args) {
+                auto st = local_file_writer->close();
+                ASSERT_TRUE(st.ok()) << st;
+                auto* ret = try_any_cast_ret<int>(args);
+                ret->first = 0; // return success
+                ret->second = true;
+            },
+            &guard2);
+
+    SyncPoint::CallbackGuard guard3;
+    sp->set_call_back(
+            "HdfsFileWriter::append_hdfs_file::hdfsWrite",
+            [&](auto&& args) {
+                auto content = try_any_cast<std::string_view>(args[0]);
+                auto st = local_file_writer->append({content.data(), content.size()});
+                ASSERT_TRUE(st.ok()) << st;
+                auto* ret = try_any_cast_ret<int>(args);
+                ret->first = content.size(); // return bytes written
+                ret->second = true;
+            },
+            &guard3);
+
+    SyncPoint::CallbackGuard guard4;
+    sp->set_call_back(
+            "HdfsFileWriter::finalize::hdfsFlush",
+            [](auto&& args) {
+                auto* ret = try_any_cast_ret<int>(args);
+                ret->first = 0; // noop, return success
+                ret->second = true;
+            },
+            &guard4);
+
+    config::hdfs_write_batch_buffer_size_mb = 1;
+    // TODO(plat1ko): Test write file cache
+
+    io::FileWriterOptions opts {.write_file_cache = true};
+    auto hdfs_file_writer = std::make_unique<io::HdfsFileWriter>("mock_hdfs_file", nullptr, nullptr,
+                                                                 "fs_name", &opts);
+
+    std::string content_1000(1000, 'a');
+    constexpr size_t MB = 1024 * 1024;
+    std::string content_1M(MB, 'b');
+    std::string content_2M(2 * MB, 'c');
+
+    for (int i = 0; i < 1200; ++i) {
+        st = hdfs_file_writer->append(content_1000);
+        ASSERT_TRUE(st.ok()) << st;
+    }
+
+    st = hdfs_file_writer->append(content_1M);
+    ASSERT_TRUE(st.ok()) << st;
+
+    st = hdfs_file_writer->append(content_2M);
+    ASSERT_TRUE(st.ok()) << st;
+
+    st = hdfs_file_writer->finalize();
+    ASSERT_TRUE(st.ok()) << st;
+
+    st = hdfs_file_writer->close();
+    ASSERT_TRUE(st.ok()) << st;
+
+    // Check mock file content
+    io::FileReaderSPtr local_file_reader;
+    st = local_fs->open_file(fmt::format("{}/mock_hdfs_file", test_dir), &local_file_reader);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(hdfs_file_writer->bytes_appended(), local_file_reader->size());
+
+    auto buf = std::make_unique<char[]>(1024 * 1024 * 2);
+    size_t bytes_read;
+    size_t offset = 0;
+    for (int i = 0; i < 1200; ++i) {
+        st = local_file_reader->read_at(offset, {buf.get(), content_1000.size()}, &bytes_read);
+        ASSERT_TRUE(st.ok()) << st;
+        ASSERT_EQ(bytes_read, content_1000.size());
+        ASSERT_EQ(std::string_view(buf.get(), bytes_read), content_1000);
+        offset += bytes_read;
+    }
+    st = local_file_reader->read_at(offset, {buf.get(), content_1M.size()}, &bytes_read);
+    ASSERT_EQ(bytes_read, content_1M.size());
+    offset += bytes_read;
+    ASSERT_EQ(std::string_view(buf.get(), bytes_read), content_1M);
+    st = local_file_reader->read_at(offset, {buf.get(), content_2M.size()}, &bytes_read);
+    ASSERT_EQ(bytes_read, content_2M.size());
+    ASSERT_EQ(std::string_view(buf.get(), bytes_read), content_2M);
+    offset += bytes_read;
+
+    // TODO(plat1ko): Check cached content
+
+    st = local_fs->delete_directory(test_dir);
+}
+
+} // namespace doris
diff --git a/be/test/io/fs/local_file_system_test.cpp b/be/test/io/fs/local_file_system_test.cpp
index 5cfdb9ac73b9db..556fb1adecc225 100644
--- a/be/test/io/fs/local_file_system_test.cpp
+++ b/be/test/io/fs/local_file_system_test.cpp
@@ -258,10 +258,6 @@ TEST_F(LocalFileSystemTest, AbnormalFileWriter) {
 
 TEST_F(LocalFileSystemTest, AbnormalWriteRead) {
     auto sp = SyncPoint::get_instance();
-    Defer defer {[sp] {
-        sp->clear_call_back("LocalFileWriter::writev");
-        sp->clear_call_back("LocalFileReader::pread");
-    }};
     sp->enable_processing();
 
     // Test EIO
@@ -269,28 +265,35 @@ TEST_F(LocalFileSystemTest, AbnormalWriteRead) {
     io::FileWriterPtr file_writer;
     auto st = io::global_local_filesystem()->create_file(fname, &file_writer);
     ASSERT_TRUE(st.ok()) << st;
-    sp->set_call_back("LocalFileWriter::writev", [](auto&& args) {
-        auto* ret = try_any_cast_ret<ssize_t>(args);
-        ret->first = -1;
-        ret->second = true;
-        errno = EIO;
-    });
+    SyncPoint::CallbackGuard guard1;
+    sp->set_call_back(
+            "LocalFileWriter::writev",
+            [](auto&& args) {
+                auto* ret = try_any_cast_ret<ssize_t>(args);
+                ret->first = -1;
+                ret->second = true;
+                errno = EIO;
+            },
+            &guard1);
     st = file_writer->append("abc");
     ASSERT_FALSE(st.ok()) << st;
 
     // Test EINTR
     int retry = 2;
-    sp->set_call_back("LocalFileWriter::writev", [&retry](auto&& args) {
-        if (retry-- > 0) {
-            auto* ret = try_any_cast_ret<ssize_t>(args);
-            ret->first = -1;
-            ret->second = true;
-            errno = EINTR;
-        } else {
-            auto* ret = try_any_cast_ret<ssize_t>(args);
-            ret->second = false;
-        }
-    });
+    sp->set_call_back(
+            "LocalFileWriter::writev",
+            [&retry](auto&& args) {
+                if (retry-- > 0) {
+                    auto* ret = try_any_cast_ret<ssize_t>(args);
+                    ret->first = -1;
+                    ret->second = true;
+                    errno = EINTR;
+                } else {
+                    auto* ret = try_any_cast_ret<ssize_t>(args);
+                    ret->second = false;
+                }
+            },
+            &guard1);
     st = file_writer->append("abc");
     ASSERT_TRUE(st.ok()) << st;
     ASSERT_EQ(file_writer->bytes_appended(), 3);
@@ -303,14 +306,17 @@ TEST_F(LocalFileSystemTest, AbnormalWriteRead) {
             {{content[2].data, 4}},
             {{content[2].data + 4, 3}, {content[3].data, 5}}};
     size_t idx = 0;
-    sp->set_call_back("LocalFileWriter::writev", [&partial_content, &idx](auto&& args) {
-        // Mock partial write
-        auto fd = try_any_cast<int>(args[0]);
-        auto* ret = try_any_cast_ret<ssize_t>(args);
-        ret->first = ::writev(fd, partial_content[idx].data(), partial_content[idx].size());
-        ret->second = true;
-        ++idx;
-    });
+    sp->set_call_back(
+            "LocalFileWriter::writev",
+            [&partial_content, &idx](auto&& args) {
+                // Mock partial write
+                auto fd = try_any_cast<int>(args[0]);
+                auto* ret = try_any_cast_ret<ssize_t>(args);
+                ret->first = ::writev(fd, partial_content[idx].data(), partial_content[idx].size());
+                ret->second = true;
+                ++idx;
+            },
+            &guard1);
     st = file_writer->appendv(content.data(), content.size());
     ASSERT_TRUE(st.ok()) << st;
     ASSERT_EQ(file_writer->bytes_appended(), 26);
@@ -329,28 +335,35 @@ TEST_F(LocalFileSystemTest, AbnormalWriteRead) {
     EXPECT_EQ(std::string_view(buf, 26), "abcdefghijklmnopqrstuvwxyz");
 
     // Test EIO
-    sp->set_call_back("LocalFileReader::pread", [](auto&& args) {
-        auto* ret = try_any_cast_ret<ssize_t>(args);
-        ret->first = -1;
-        ret->second = true;
-        errno = EIO;
-    });
+    SyncPoint::CallbackGuard guard2;
+    sp->set_call_back(
+            "LocalFileReader::pread",
+            [](auto&& args) {
+                auto* ret = try_any_cast_ret<ssize_t>(args);
+                ret->first = -1;
+                ret->second = true;
+                errno = EIO;
+            },
+            &guard2);
     st = file_reader->read_at(0, {buf, 26}, &bytes_read);
     ASSERT_FALSE(st.ok()) << st;
 
     // Test EINTR
     retry = 2;
-    sp->set_call_back("LocalFileReader::pread", [&retry](auto&& args) {
-        if (retry-- > 0) {
-            auto* ret = try_any_cast_ret<ssize_t>(args);
-            ret->first = -1;
-            ret->second = true;
-            errno = EINTR;
-        } else {
-            auto* ret = try_any_cast_ret<ssize_t>(args);
-            ret->second = false;
-        }
-    });
+    sp->set_call_back(
+            "LocalFileReader::pread",
+            [&retry](auto&& args) {
+                if (retry-- > 0) {
+                    auto* ret = try_any_cast_ret<ssize_t>(args);
+                    ret->first = -1;
+                    ret->second = true;
+                    errno = EINTR;
+                } else {
+                    auto* ret = try_any_cast_ret<ssize_t>(args);
+                    ret->second = false;
+                }
+            },
+            &guard2);
     memset(buf, 0, sizeof(buf));
     st = file_reader->read_at(0, {buf, 26}, &bytes_read);
     ASSERT_TRUE(st.ok()) << st;
@@ -359,15 +372,18 @@ TEST_F(LocalFileSystemTest, AbnormalWriteRead) {
 
     // Test partial read
     size_t offset = 0;
-    sp->set_call_back("LocalFileReader::pread", [&offset](auto&& args) {
-        // Mock partial read
-        auto fd = try_any_cast<int>(args[0]);
-        auto* buf = try_any_cast<char*>(args[1]);
-        auto* ret = try_any_cast_ret<ssize_t>(args);
-        ret->first = ::pread(fd, buf, 5, offset);
-        ret->second = true;
-        offset += ret->first;
-    });
+    sp->set_call_back(
+            "LocalFileReader::pread",
+            [&offset](auto&& args) {
+                // Mock partial read
+                auto fd = try_any_cast<int>(args[0]);
+                auto* buf = try_any_cast<char*>(args[1]);
+                auto* ret = try_any_cast_ret<ssize_t>(args);
+                ret->first = ::pread(fd, buf, 5, offset);
+                ret->second = true;
+                offset += ret->first;
+            },
+            &guard2);
     memset(buf, 0, sizeof(buf));
     st = file_reader->read_at(0, {buf, 26}, &bytes_read);
     ASSERT_TRUE(st.ok()) << st;
diff --git a/be/test/vec/exec/vwal_scanner_test.cpp b/be/test/vec/exec/vwal_scanner_test.cpp
index a362504a4bfb37..4455126790bfe7 100644
--- a/be/test/vec/exec/vwal_scanner_test.cpp
+++ b/be/test/vec/exec/vwal_scanner_test.cpp
@@ -309,14 +309,14 @@ TEST_F(VWalScannerTest, normal) {
 
 TEST_F(VWalScannerTest, fail_with_not_equal) {
     auto sp = SyncPoint::get_instance();
-    Defer defer {[sp] {
-        sp->clear_call_back("WalReader::set_column_id_count");
-        sp->clear_call_back("WalReader::set_out_block_column_size");
-    }};
-    sp->set_call_back("WalReader::set_column_id_count",
-                      [](auto&& args) { *try_any_cast<int64_t*>(args[0]) = 2; });
-    sp->set_call_back("WalReader::set_out_block_column_size",
-                      [](auto&& args) { *try_any_cast<size_t*>(args[0]) = 2; });
+    SyncPoint::CallbackGuard guard1;
+    sp->set_call_back(
+            "WalReader::set_column_id_count",
+            [](auto&& args) { *try_any_cast<int64_t*>(args[0]) = 2; }, &guard1);
+    SyncPoint::CallbackGuard guard2;
+    sp->set_call_back(
+            "WalReader::set_out_block_column_size",
+            [](auto&& args) { *try_any_cast<size_t*>(args[0]) = 2; }, &guard2);
     sp->enable_processing();
 
     _runtime_state._wal_id = _txn_id_1;

From c9582197c82b4d802c2c57b1a2eb67e40de29944 Mon Sep 17 00:00:00 2001
From: HHoflittlefish777 <77738092+HHoflittlefish777@users.noreply.github.com>
Date: Tue, 30 Apr 2024 20:03:32 +0800
Subject: [PATCH 163/163] [fix](cloud-streamload) support stream load cluster
 load balance (#34336)

---
 .../apache/doris/httpv2/rest/LoadAction.java  | 27 +++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/LoadAction.java b/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/LoadAction.java
index 692f0cc8ad3d90..c5f442ba832b62 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/LoadAction.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/LoadAction.java
@@ -275,7 +275,18 @@ private Object executeWithoutPassword(HttpServletRequest request,
                     return new RestBaseResult(e.getMessage());
                 }
             } else {
-                redirectAddr = selectRedirectBackend(request, groupCommit);
+                if (Config.isCloudMode()) {
+                    String cloudClusterName = getCloudClusterName(request);
+                    if (Strings.isNullOrEmpty(cloudClusterName)) {
+                        LOG.warn("cluster name is empty in stream load");
+                        return new RestBaseResult("No cloud cluster name selected.");
+                    }
+                    String reqHostStr = request.getHeader(HttpHeaderNames.HOST.toString());
+                    LOG.info("host header {}", reqHostStr);
+                    redirectAddr = selectCloudRedirectBackend(cloudClusterName, reqHostStr, groupCommit);
+                } else {
+                    redirectAddr = selectRedirectBackend(request, groupCommit);
+                }
             }
 
             LOG.info("redirect load action to destination={}, stream: {}, db: {}, tbl: {}, label: {}",
@@ -308,7 +319,19 @@ private Object executeStreamLoad2PC(HttpServletRequest request, String db) {
                 return new RestBaseResult("No transaction operation(\'commit\' or \'abort\') selected.");
             }
 
-            TNetworkAddress redirectAddr = selectRedirectBackend(request, false);
+            TNetworkAddress redirectAddr;
+            if (Config.isCloudMode()) {
+                String cloudClusterName = getCloudClusterName(request);
+                if (Strings.isNullOrEmpty(cloudClusterName)) {
+                    LOG.warn("cluster name is empty in stream load");
+                    return new RestBaseResult("No cloud cluster name selected.");
+                }
+                String reqHostStr = request.getHeader(HttpHeaderNames.HOST.toString());
+                LOG.info("host header {}", reqHostStr);
+                redirectAddr = selectCloudRedirectBackend(cloudClusterName, reqHostStr, false);
+            } else {
+                redirectAddr = selectRedirectBackend(request, false);
+            }
             LOG.info("redirect stream load 2PC action to destination={}, db: {}, txn: {}, operation: {}",
                     redirectAddr.toString(), dbName, request.getHeader(TXN_ID_KEY), txnOperation);