diff --git a/Cargo.lock b/Cargo.lock index a7a68203b2a5..d69c13da4644 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2435,6 +2435,7 @@ dependencies = [ "criterion", "databend-common-base", "databend-common-exception", + "databend-common-functions", "databend-common-io", "databend-common-meta-app", "enum-as-inner 0.5.1", @@ -2751,7 +2752,6 @@ dependencies = [ "borsh 1.2.1", "bstr 1.6.2", "bumpalo", - "bytes", "chrono", "chrono-tz", "comfy-table 6.1.4", @@ -2793,6 +2793,7 @@ dependencies = [ "siphasher", "streaming_algorithms", "strength_reduce", + "stringslice", "twox-hash", ] @@ -3195,6 +3196,7 @@ dependencies = [ "databend-common-expression", "futures", "minitrace", + "once_cell", "petgraph", "serde", "serde_json", @@ -3261,7 +3263,6 @@ dependencies = [ "databend-common-exception", "databend-common-expression", "databend-common-pipeline-core", - "databend-common-profile", "itertools 0.10.5", "jsonb 0.3.0 (git+https://github.com/datafuselabs/jsonb?rev=a0669bf)", "match-template", @@ -3270,14 +3271,6 @@ dependencies = [ "typetag", ] -[[package]] -name = "databend-common-profile" -version = "0.1.0" -dependencies = [ - "dashmap", - "databend-common-base", -] - [[package]] name = "databend-common-proto-conv" version = "0.1.0" @@ -3388,7 +3381,6 @@ dependencies = [ "databend-common-pipeline-core", "databend-common-pipeline-sources", "databend-common-pipeline-transforms", - "databend-common-profile", "databend-common-settings", "databend-common-storage", "databend-common-storages-delta", @@ -3856,7 +3848,6 @@ dependencies = [ "databend-common-metrics", "databend-common-pipeline-core", "databend-common-pipeline-sources", - "databend-common-profile", "databend-common-sql", "databend-common-storage", "databend-common-storages-fuse", @@ -4233,7 +4224,6 @@ dependencies = [ "databend-common-pipeline-sinks", "databend-common-pipeline-sources", "databend-common-pipeline-transforms", - "databend-common-profile", "databend-common-settings", "databend-common-sharing", "databend-common-sql", @@ -12164,6 +12154,12 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82" +[[package]] +name = "stringslice" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8dad614e9c8a0773603982e0c1d3192f2bce1c0ee193caf0526b8ae3c6c3e38c" + [[package]] name = "strsim" version = "0.10.0" diff --git a/Cargo.toml b/Cargo.toml index b7a7404a2d93..8771d38de23a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,7 +46,6 @@ members = [ "src/query/pipeline/sinks", "src/query/pipeline/sources", "src/query/pipeline/transforms", - "src/query/profile", "src/query/settings", "src/query/sql", "src/query/storages/common/blocks", diff --git a/src/common/cloud_control/proto/task.proto b/src/common/cloud_control/proto/task.proto index eb19965a06b6..dda4f3776405 100644 --- a/src/common/cloud_control/proto/task.proto +++ b/src/common/cloud_control/proto/task.proto @@ -33,6 +33,7 @@ message CreateTaskRequest { // DAG repeated string after = 11; // optional string when_condition = 12; + map session_parameters = 13; } message TaskError { @@ -90,6 +91,7 @@ message Task { optional string last_suspended_at = 16; repeated string after = 17; optional string when_condition = 18; + map session_parameters = 19; } @@ -134,6 +136,8 @@ message AlterTaskRequest { optional string when_condition = 11; repeated string add_after = 12; repeated string remove_after = 13; + bool set_session_parameters = 14; + map session_parameters = 15; } message AlterTaskResponse { @@ -179,6 +183,7 @@ message TaskRun { string query_id = 17; string condition_text = 18; string root_task_id = 19; + map session_parameters = 20; } message ShowTaskRunsResponse { diff --git a/src/common/cloud_control/src/task_utils.rs b/src/common/cloud_control/src/task_utils.rs index 1615ec0753e9..be8b17cfcad5 100644 --- a/src/common/cloud_control/src/task_utils.rs +++ b/src/common/cloud_control/src/task_utils.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::BTreeMap; use std::fmt::Display; use std::fmt::Formatter; @@ -77,6 +78,7 @@ pub struct Task { pub created_at: DateTime, pub updated_at: DateTime, pub last_suspended_at: Option>, + pub session_params: BTreeMap, } pub fn format_schedule_options(s: &ScheduleOptions) -> Result { @@ -203,6 +205,7 @@ impl TryFrom for Task { status, created_at, updated_at, + session_params: value.session_parameters, }; Ok(t) } @@ -227,6 +230,7 @@ pub struct TaskRun { pub error_code: i64, pub error_message: Option, pub root_task_id: String, + pub session_params: BTreeMap, } // convert from crate::pb::taskRun to struct taskRun @@ -305,6 +309,7 @@ impl TryFrom for TaskRun { scheduled_at, completed_at, root_task_id: value.root_task_id, + session_params: value.session_parameters, }; Ok(tr) } diff --git a/src/common/cloud_control/tests/it/task_client.rs b/src/common/cloud_control/tests/it/task_client.rs index e8b09118fd20..68fa51f248a1 100644 --- a/src/common/cloud_control/tests/it/task_client.rs +++ b/src/common/cloud_control/tests/it/task_client.rs @@ -82,6 +82,7 @@ impl TaskService for MockTaskService { last_suspended_at: None, after: vec![], when_condition: None, + session_parameters: Default::default(), }), error: None, })) @@ -196,6 +197,7 @@ async fn test_task_client_success_cases() -> Result<()> { if_not_exist: false, after: vec![], when_condition: None, + session_parameters: Default::default(), }); let response = client.create_task(request).await?; diff --git a/src/common/hashtable/src/hashjoin_hashtable.rs b/src/common/hashtable/src/hashjoin_hashtable.rs index 7c126159fa14..235200e83f7f 100644 --- a/src/common/hashtable/src/hashjoin_hashtable.rs +++ b/src/common/hashtable/src/hashjoin_hashtable.rs @@ -209,56 +209,80 @@ where count } - // Using hashes to probe hash table and converting them in-place to pointers for memory reuse. - fn early_filtering_probe(&self, hashes: &mut [u64], bitmap: Option) -> usize { + // Perform early filtering probe, store matched indexes in `matched_selection` and store unmatched indexes + // in `unmatched_selection`, return the number of matched and unmatched indexes. + fn early_filtering_probe( + &self, + hashes: &mut [u64], + bitmap: Option, + matched_selection: &mut [u32], + unmatched_selection: &mut [u32], + ) -> (usize, usize) { let mut valids = None; if let Some(bitmap) = bitmap { if bitmap.unset_bits() == bitmap.len() { - hashes.iter_mut().for_each(|hash| { - *hash = 0; - }); - return 0; + unmatched_selection + .iter_mut() + .enumerate() + .for_each(|(idx, val)| { + *val = idx as u32; + }); + return (0, hashes.len()); } else if bitmap.unset_bits() > 0 { valids = Some(bitmap); } } - let mut count = 0; + let mut matched_idx = 0; + let mut unmatched_idx = 0; match valids { Some(valids) => { - valids - .iter() - .zip(hashes.iter_mut()) - .for_each(|(valid, hash)| { + valids.iter().zip(hashes.iter_mut().enumerate()).for_each( + |(valid, (idx, hash))| { if valid { let header = self.pointers[(*hash >> self.hash_shift) as usize]; if header != 0 && early_filtering(header, *hash) { *hash = remove_header_tag(header); - count += 1; + unsafe { + *matched_selection.get_unchecked_mut(matched_idx) = idx as u32 + }; + matched_idx += 1; } else { - *hash = 0; + unsafe { + *unmatched_selection.get_unchecked_mut(unmatched_idx) = + idx as u32 + }; + unmatched_idx += 1; } } else { - *hash = 0; + unsafe { + *unmatched_selection.get_unchecked_mut(unmatched_idx) = idx as u32 + }; + unmatched_idx += 1; } - }); + }, + ); } None => { - hashes.iter_mut().for_each(|hash| { + hashes.iter_mut().enumerate().for_each(|(idx, hash)| { let header = self.pointers[(*hash >> self.hash_shift) as usize]; if header != 0 && early_filtering(header, *hash) { *hash = remove_header_tag(header); - count += 1; + unsafe { *matched_selection.get_unchecked_mut(matched_idx) = idx as u32 }; + matched_idx += 1; } else { - *hash = 0; + unsafe { + *unmatched_selection.get_unchecked_mut(unmatched_idx) = idx as u32 + }; + unmatched_idx += 1; } }); } } - count + (matched_idx, unmatched_idx) } - // Using hashes to probe hash table and converting them in-place to pointers for memory reuse. - fn early_filtering_probe_with_selection( + // Perform early filtering probe and store matched indexes in `selection`, return the number of matched indexes. + fn early_filtering_matched_probe( &self, hashes: &mut [u64], bitmap: Option, diff --git a/src/common/hashtable/src/hashjoin_string_hashtable.rs b/src/common/hashtable/src/hashjoin_string_hashtable.rs index c7ca141e7f03..7112d2ee68fb 100644 --- a/src/common/hashtable/src/hashjoin_string_hashtable.rs +++ b/src/common/hashtable/src/hashjoin_string_hashtable.rs @@ -138,20 +138,31 @@ where A: Allocator + Clone + 'static count } - // Using hashes to probe hash table and converting them in-place to pointers for memory reuse. - fn early_filtering_probe(&self, hashes: &mut [u64], bitmap: Option) -> usize { + // Perform early filtering probe, store matched indexes in `matched_selection` and store unmatched indexes + // in `unmatched_selection`, return the number of matched and unmatched indexes. + fn early_filtering_probe( + &self, + hashes: &mut [u64], + bitmap: Option, + matched_selection: &mut [u32], + unmatched_selection: &mut [u32], + ) -> (usize, usize) { let mut valids = None; if let Some(bitmap) = bitmap { if bitmap.unset_bits() == bitmap.len() { - hashes.iter_mut().for_each(|hash| { - *hash = 0; - }); - return 0; + unmatched_selection + .iter_mut() + .enumerate() + .for_each(|(idx, val)| { + *val = idx as u32; + }); + return (0, hashes.len()); } else if bitmap.unset_bits() > 0 { valids = Some(bitmap); } } - let mut count = 0; + let mut matched_idx = 0; + let mut unmatched_idx = 0; match valids { Some(valids) => { hashes.iter_mut().enumerate().for_each(|(idx, hash)| { @@ -159,32 +170,45 @@ where A: Allocator + Clone + 'static let header = self.pointers[(*hash >> self.hash_shift) as usize]; if header != 0 && early_filtering(header, *hash) { *hash = remove_header_tag(header); - count += 1; + unsafe { + *matched_selection.get_unchecked_mut(matched_idx) = idx as u32 + }; + matched_idx += 1; } else { - *hash = 0; + unsafe { + *unmatched_selection.get_unchecked_mut(unmatched_idx) = idx as u32 + }; + unmatched_idx += 1; } } else { - *hash = 0; - }; + unsafe { + *unmatched_selection.get_unchecked_mut(unmatched_idx) = idx as u32 + }; + unmatched_idx += 1; + } }); } None => { - hashes.iter_mut().for_each(|hash| { + hashes.iter_mut().enumerate().for_each(|(idx, hash)| { let header = self.pointers[(*hash >> self.hash_shift) as usize]; if header != 0 && early_filtering(header, *hash) { *hash = remove_header_tag(header); - count += 1; + unsafe { *matched_selection.get_unchecked_mut(matched_idx) = idx as u32 }; + matched_idx += 1; } else { - *hash = 0; + unsafe { + *unmatched_selection.get_unchecked_mut(unmatched_idx) = idx as u32 + }; + unmatched_idx += 1; } }); } } - count + (matched_idx, unmatched_idx) } - // Using hashes to probe hash table and converting them in-place to pointers for memory reuse. - fn early_filtering_probe_with_selection( + // Perform early filtering probe and store matched indexes in `selection`, return the number of matched indexes. + fn early_filtering_matched_probe( &self, hashes: &mut [u64], bitmap: Option, diff --git a/src/common/hashtable/src/traits.rs b/src/common/hashtable/src/traits.rs index f49ad9472b8b..14ff77790d80 100644 --- a/src/common/hashtable/src/traits.rs +++ b/src/common/hashtable/src/traits.rs @@ -351,6 +351,13 @@ impl FastHash for [u8] { } } +impl FastHash for str { + #[inline(always)] + fn fast_hash(&self) -> u64 { + self.as_bytes().fast_hash() + } +} + // trick for unsized_hashtable impl FastHash for ([u64; N], NonZeroU64) { #[inline(always)] @@ -506,24 +513,21 @@ pub trait HashtableLike { pub trait HashJoinHashtableLike { type Key: ?Sized; - // Using hashes to probe hash table and converting them in-place to pointers for memory reuse. - // same with `early_filtering_probe`, but we don't use early_filter + // Probe hash table, use `hashes` to probe hash table and convert it in-place to pointers for memory reuse. fn probe(&self, hashes: &mut [u64], bitmap: Option) -> usize; - // Using hashes to probe hash table and converting them in-place to pointers for memory reuse. - // 1. same with `early_filtering_probe_with_selection`, but we don't use selection to preserve the - // unfiltered indexes, we just set the filtered hashes as zero. - // 2. return the unfiltered counts. - fn early_filtering_probe(&self, hashes: &mut [u64], bitmap: Option) -> usize; - - // Using hashes to probe hash table and converting them in-place to pointers for memory reuse. - // we use `early_filtering_probe_with_selection` to do the first round probe. - // 1. `hashes` is the hash value of probe block's rows. we will use this one to - // do early filtering. if we can't early filter one row(at idx), we will assign pointer in - // the bucket to hashes[idx] to reuse the memory. - // 2. `selection` is used to preserved the indexes which can't be early_filtered. - // 3. return the count of preserved the indexes in `selection` - fn early_filtering_probe_with_selection( + // Perform early filtering probe, store matched indexes in `matched_selection` and store unmatched indexes + // in `unmatched_selection`, return the number of matched and unmatched indexes. + fn early_filtering_probe( + &self, + hashes: &mut [u64], + valids: Option, + matched_selection: &mut [u32], + unmatched_selection: &mut [u32], + ) -> (usize, usize); + + // Perform early filtering probe and store matched indexes in `selection`, return the number of matched indexes. + fn early_filtering_matched_probe( &self, hashes: &mut [u64], valids: Option, @@ -534,7 +538,7 @@ pub trait HashJoinHashtableLike { // the ptr is the link header. fn next_contains(&self, key: &Self::Key, ptr: u64) -> bool; - /// 1. `key` is the serialize build key from one row + /// 1. `key` is the serialize probe key from one row /// 2. `ptr` pointers to the *RawEntry for of the bucket correlated to key.So before this method, /// we will do a round probe firstly. If the ptr is zero, it means there is no correlated bucket /// for key diff --git a/src/common/storage/src/statistics.rs b/src/common/storage/src/statistics.rs index 0debe1ed8930..5907a5a58a04 100644 --- a/src/common/storage/src/statistics.rs +++ b/src/common/storage/src/statistics.rs @@ -50,7 +50,7 @@ impl Datum { } Scalar::Number(NumberScalar::Float64(v)) => Some(Datum::Float(v)), Scalar::Binary(v) => Some(Datum::Bytes(v)), - Scalar::String(v) => Some(Datum::Bytes(v)), + Scalar::String(v) => Some(Datum::Bytes(v.as_bytes().to_vec())), _ => None, } } diff --git a/src/query/ast/Cargo.toml b/src/query/ast/Cargo.toml index 4121907b9166..183004b44f70 100644 --- a/src/query/ast/Cargo.toml +++ b/src/query/ast/Cargo.toml @@ -13,6 +13,7 @@ doctest = false # Workspace dependencies databend-common-base = { path = "../../common/base" } databend-common-exception = { path = "../../common/exception" } +databend-common-functions = { path = "../functions" } databend-common-io = { path = "../../common/io" } databend-common-meta-app = { path = "../../meta/app" } diff --git a/src/query/ast/src/ast/expr.rs b/src/query/ast/src/ast/expr.rs index a0b19450086a..3522ba10ab29 100644 --- a/src/query/ast/src/ast/expr.rs +++ b/src/query/ast/src/ast/expr.rs @@ -18,6 +18,7 @@ use std::fmt::Formatter; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_exception::Span; +use databend_common_functions::aggregates::AggregateFunctionFactory; use databend_common_io::display_decimal_256; use databend_common_io::escape_string_with_quote; use enum_as_inner::EnumAsInner; @@ -1340,3 +1341,90 @@ pub fn split_equivalent_predicate_expr(expr: &Expr) -> Option<(Expr, Expr)> { _ => None, } } + +// If contain agg function in Expr +pub fn contain_agg_func(expr: &Expr) -> bool { + match expr { + Expr::ColumnRef { .. } => false, + Expr::IsNull { expr, .. } => contain_agg_func(expr), + Expr::IsDistinctFrom { left, right, .. } => { + contain_agg_func(left) || contain_agg_func(right) + } + Expr::InList { expr, list, .. } => { + contain_agg_func(expr) || list.iter().any(contain_agg_func) + } + Expr::InSubquery { expr, .. } => contain_agg_func(expr), + Expr::Between { + expr, low, high, .. + } => contain_agg_func(expr) || contain_agg_func(low) || contain_agg_func(high), + Expr::BinaryOp { left, right, .. } => contain_agg_func(left) || contain_agg_func(right), + Expr::JsonOp { left, right, .. } => contain_agg_func(left) || contain_agg_func(right), + Expr::UnaryOp { expr, .. } => contain_agg_func(expr), + Expr::Cast { expr, .. } => contain_agg_func(expr), + Expr::TryCast { expr, .. } => contain_agg_func(expr), + Expr::Extract { expr, .. } => contain_agg_func(expr), + Expr::DatePart { expr, .. } => contain_agg_func(expr), + Expr::Position { + substr_expr, + str_expr, + .. + } => contain_agg_func(substr_expr) || contain_agg_func(str_expr), + Expr::Substring { + expr, + substring_for, + substring_from, + .. + } => { + if let Some(substring_for) = substring_for { + contain_agg_func(expr) || contain_agg_func(substring_for) + } else { + contain_agg_func(expr) || contain_agg_func(substring_from) + } + } + Expr::Trim { expr, .. } => contain_agg_func(expr), + Expr::Literal { .. } => false, + Expr::CountAll { .. } => false, + Expr::Tuple { exprs, .. } => exprs.iter().any(contain_agg_func), + Expr::FunctionCall { name, .. } => { + AggregateFunctionFactory::instance().contains(name.to_string()) + } + Expr::Case { + operand, + conditions, + results, + else_result, + .. + } => { + if let Some(operand) = operand { + if contain_agg_func(operand) { + return true; + } + } + if conditions.iter().any(contain_agg_func) { + return true; + } + if results.iter().any(contain_agg_func) { + return true; + } + if let Some(else_result) = else_result { + if contain_agg_func(else_result) { + return true; + } + } + false + } + Expr::Exists { .. } => false, + Expr::Subquery { .. } => false, + Expr::MapAccess { expr, .. } => contain_agg_func(expr), + Expr::Array { exprs, .. } => exprs.iter().any(contain_agg_func), + Expr::Map { kvs, .. } => kvs.iter().any(|(_, v)| contain_agg_func(v)), + Expr::Interval { expr, .. } => contain_agg_func(expr), + Expr::DateAdd { interval, date, .. } => { + contain_agg_func(interval) || contain_agg_func(date) + } + Expr::DateSub { interval, date, .. } => { + contain_agg_func(interval) || contain_agg_func(date) + } + Expr::DateTrunc { date, .. } => contain_agg_func(date), + } +} diff --git a/src/query/ast/src/ast/statements/task.rs b/src/query/ast/src/ast/statements/task.rs index b101c4b31925..9a164eec4919 100644 --- a/src/query/ast/src/ast/statements/task.rs +++ b/src/query/ast/src/ast/statements/task.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::BTreeMap; use std::fmt::Display; use std::fmt::Formatter; @@ -23,6 +24,7 @@ pub struct CreateTaskStmt { pub name: String, pub warehouse_opts: WarehouseOptions, pub schedule_opts: Option, + pub session_parameters: BTreeMap, pub suspend_task_after_num_failures: Option, pub comments: String, pub after: Vec, @@ -43,8 +45,14 @@ impl Display for CreateTaskStmt { write!(f, "{}", schedule_opt)?; } + if !self.session_parameters.is_empty() { + for (key, value) in &self.session_parameters { + write!(f, " {} = '{}'", key, value)?; + } + } + if let Some(num) = self.suspend_task_after_num_failures { - write!(f, " SUSPEND TASK AFTER {} FAILURES", num)?; + write!(f, " SUSPEND_TASK_AFTER {} FAILURES", num)?; } if !self.comments.is_empty() { @@ -117,6 +125,7 @@ pub enum AlterTaskOptions { schedule: Option, suspend_task_after_num_failures: Option, comments: Option, + session_parameters: Option>, }, Unset { warehouse: bool, @@ -137,6 +146,7 @@ impl Display for AlterTaskOptions { warehouse, schedule, suspend_task_after_num_failures, + session_parameters, comments, } => { if let Some(wh) = warehouse { @@ -151,6 +161,11 @@ impl Display for AlterTaskOptions { if let Some(comments) = comments { write!(f, " COMMENTS = '{}'", comments)?; } + if let Some(session) = session_parameters { + for (key, value) in session { + write!(f, " {} = '{}'", key, value)?; + } + } Ok(()) } AlterTaskOptions::Unset { warehouse } => { diff --git a/src/query/ast/src/parser/expr.rs b/src/query/ast/src/parser/expr.rs index 5cbd99193bd7..175b92c0785b 100644 --- a/src/query/ast/src/parser/expr.rs +++ b/src/query/ast/src/parser/expr.rs @@ -1574,7 +1574,7 @@ pub fn type_name(i: Input) -> IResult { ); let ty_binary = value( TypeName::Binary, - rule! { ( BINARY | VARBINARY ) ~ ( "(" ~ ^#literal_u64 ~ ^")" )? }, + rule! { ( BINARY | VARBINARY | LONGBLOB | MEDIUMBLOB | TINYBLOB| BLOB ) ~ ( "(" ~ ^#literal_u64 ~ ^")" )? }, ); let ty_string = value( TypeName::String, diff --git a/src/query/ast/src/parser/statement.rs b/src/query/ast/src/parser/statement.rs index 4d3186e6dd0f..9b1ca0a9bdae 100644 --- a/src/query/ast/src/parser/statement.rs +++ b/src/query/ast/src/parser/statement.rs @@ -107,12 +107,14 @@ pub fn statement(i: Input) -> IResult { let create_task = map( rule! { CREATE ~ TASK ~ ( IF ~ ^NOT ~ ^EXISTS )? - ~ #ident ~ #task_warehouse_option + ~ #ident + ~ #task_warehouse_option ~ (SCHEDULE ~ "=" ~ #task_schedule_option)? ~ (AFTER ~ #comma_separated_list0(literal_string))? ~ (WHEN ~ #expr )? ~ (SUSPEND_TASK_AFTER_NUM_FAILURES ~ "=" ~ #literal_u64)? ~ ( (COMMENT | COMMENTS) ~ ^"=" ~ ^#literal_string )? + ~ (#set_table_option)? ~ AS ~ #statement }, |( @@ -126,11 +128,12 @@ pub fn statement(i: Input) -> IResult { when_conditions, suspend_opt, comment_opt, + session_opts, _, sql, )| { let sql = format!("{}", sql.stmt); - + let session_opts = session_opts.unwrap_or_default(); Statement::CreateTask(CreateTaskStmt { if_not_exists: opt_if_not_exists.is_some(), name: task.to_string(), @@ -144,6 +147,7 @@ pub fn statement(i: Input) -> IResult { }, when_condition: when_conditions.map(|(_, cond)| cond.to_string()), sql, + session_parameters: session_opts, }) }, ); @@ -2956,12 +2960,16 @@ pub fn alter_task_option(i: Input) -> IResult { ~ ( SCHEDULE ~ "=" ~ #task_schedule_option )? ~ ( SUSPEND_TASK_AFTER_NUM_FAILURES ~ "=" ~ #literal_u64 )? ~ ( COMMENT ~ "=" ~ #literal_string )? - }, - |(_, warehouse_opts, schedule_opts, suspend_opts, comment)| AlterTaskOptions::Set { - warehouse: warehouse_opts.map(|(_, _, warehouse)| warehouse), - schedule: schedule_opts.map(|(_, _, schedule)| schedule), - suspend_task_after_num_failures: suspend_opts.map(|(_, _, num)| num), - comments: comment.map(|(_, _, comment)| comment), + ~ (#set_table_option)? + }, + |(_, warehouse_opts, schedule_opts, suspend_opts, comment, session_opts)| { + AlterTaskOptions::Set { + warehouse: warehouse_opts.map(|(_, _, warehouse)| warehouse), + schedule: schedule_opts.map(|(_, _, schedule)| schedule), + suspend_task_after_num_failures: suspend_opts.map(|(_, _, num)| num), + comments: comment.map(|(_, _, comment)| comment), + session_parameters: session_opts, + } }, ); let unset = map( diff --git a/src/query/ast/src/parser/token.rs b/src/query/ast/src/parser/token.rs index 4ee536190e24..945f7b343ef8 100644 --- a/src/query/ast/src/parser/token.rs +++ b/src/query/ast/src/parser/token.rs @@ -364,6 +364,14 @@ pub enum TokenKind { BIGINT, #[token("BINARY", ignore(ascii_case))] BINARY, + #[token("LONGBLOB", ignore(ascii_case))] + LONGBLOB, + #[token("MEDIUMBLOB", ignore(ascii_case))] + MEDIUMBLOB, + #[token("TINYBLOB", ignore(ascii_case))] + TINYBLOB, + #[token("BLOB", ignore(ascii_case))] + BLOB, #[token("BINARY_FORMAT", ignore(ascii_case))] BINARY_FORMAT, #[token("BITMAP", ignore(ascii_case))] @@ -971,6 +979,12 @@ pub enum TokenKind { TABLES, #[token("TEXT", ignore(ascii_case))] TEXT, + #[token("LONGTEXT", ignore(ascii_case))] + LONGTEXT, + #[token("MEDIUMTEXT", ignore(ascii_case))] + MEDIUMTEXT, + #[token("TINYTEXT", ignore(ascii_case))] + TINYTEXT, #[token("TENANTSETTING", ignore(ascii_case))] TENANTSETTING, #[token("TENANTS", ignore(ascii_case))] diff --git a/src/query/ast/tests/it/parser.rs b/src/query/ast/tests/it/parser.rs index 2b0b837796af..d9636a64dc54 100644 --- a/src/query/ast/tests/it/parser.rs +++ b/src/query/ast/tests/it/parser.rs @@ -507,13 +507,14 @@ fn test_statement() { r#"CREATE NETWORK POLICY mypolicy ALLOWED_IP_LIST=('192.168.10.0/24') BLOCKED_IP_LIST=('192.168.10.99') COMMENT='test'"#, r#"ALTER NETWORK POLICY mypolicy SET ALLOWED_IP_LIST=('192.168.10.0/24','192.168.255.1') BLOCKED_IP_LIST=('192.168.1.99') COMMENT='test'"#, // tasks - r#"CREATE TASK IF NOT EXISTS MyTask1 WAREHOUSE = 'MyWarehouse' SCHEDULE = 15 MINUTE SUSPEND_TASK_AFTER_NUM_FAILURES = 3 COMMENT = 'This is test task 1' AS SELECT * FROM MyTable1"#, + r#"CREATE TASK IF NOT EXISTS MyTask1 WAREHOUSE = 'MyWarehouse' SCHEDULE = 15 MINUTE SUSPEND_TASK_AFTER_NUM_FAILURES = 3 COMMENT = 'This is test task 1' DATABASE = 'target', TIMEZONE = 'America/Los Angeles' AS SELECT * FROM MyTable1"#, r#"CREATE TASK IF NOT EXISTS MyTask1 WAREHOUSE = 'MyWarehouse' SCHEDULE = 15 SECOND SUSPEND_TASK_AFTER_NUM_FAILURES = 3 COMMENT = 'This is test task 1' AS SELECT * FROM MyTable1"#, r#"CREATE TASK IF NOT EXISTS MyTask1 WAREHOUSE = 'MyWarehouse' SCHEDULE = 1215 SECOND SUSPEND_TASK_AFTER_NUM_FAILURES = 3 COMMENT = 'This is test task 1' AS SELECT * FROM MyTable1"#, r#"CREATE TASK IF NOT EXISTS MyTask1 SCHEDULE = USING CRON '0 6 * * *' 'America/Los_Angeles' COMMENT = 'serverless + cron' AS insert into t (c1, c2) values (1, 2), (3, 4)"#, r#"CREATE TASK IF NOT EXISTS MyTask1 SCHEDULE = USING CRON '0 12 * * *' AS copy into streams_test.paper_table from @stream_stage FILE_FORMAT = (TYPE = PARQUET) PURGE=true"#, r#"CREATE TASK IF NOT EXISTS MyTask1 SCHEDULE = USING CRON '0 13 * * *' AS COPY INTO @my_internal_stage FROM canadian_city_population FILE_FORMAT = (TYPE = PARQUET)"#, r#"CREATE TASK IF NOT EXISTS MyTask1 AFTER 'task2', 'task3' WHEN SYSTEM$GET_PREDECESSOR_RETURN_VALUE('task_name') != 'VALIDATION' AS VACUUM TABLE t"#, + r#"CREATE TASK IF NOT EXISTS MyTask1 DATABASE = 'target', TIMEZONE = 'America/Los Angeles' AS VACUUM TABLE t"#, r#"ALTER TASK MyTask1 RESUME"#, r#"ALTER TASK MyTask1 SUSPEND"#, r#"ALTER TASK MyTask1 ADD AFTER 'task2', 'task3'"#, @@ -521,6 +522,7 @@ fn test_statement() { r#"ALTER TASK MyTask1 SET WAREHOUSE= 'MyWarehouse' SCHEDULE = USING CRON '0 6 * * *' 'America/Los_Angeles' COMMENT = 'serverless + cron'"#, r#"ALTER TASK MyTask1 SET WAREHOUSE= 'MyWarehouse' SCHEDULE = 13 MINUTE SUSPEND_TASK_AFTER_NUM_FAILURES = 10 COMMENT = 'serverless + cron'"#, r#"ALTER TASK MyTask1 SET WAREHOUSE= 'MyWarehouse' SCHEDULE = 5 SECOND SUSPEND_TASK_AFTER_NUM_FAILURES = 10 COMMENT = 'serverless + cron'"#, + r#"ALTER TASK MyTask1 SET DATABASE='newDB', TIMEZONE='America/Los_Angeles'"#, r#"ALTER TASK MyTask2 MODIFY AS SELECT CURRENT_VERSION()"#, r#"ALTER TASK MyTask1 MODIFY WHEN SYSTEM$GET_PREDECESSOR_RETURN_VALUE('task_name') != 'VALIDATION'"#, r#"DROP TASK MyTask1"#, diff --git a/src/query/ast/tests/it/testdata/expr-error.txt b/src/query/ast/tests/it/testdata/expr-error.txt index 0cdfbf5f9d5b..dda8730e2edd 100644 --- a/src/query/ast/tests/it/testdata/expr-error.txt +++ b/src/query/ast/tests/it/testdata/expr-error.txt @@ -30,7 +30,7 @@ error: --> SQL:1:14 | 1 | CAST(col1 AS foo) - | ---- ^^^ unexpected `foo`, expecting `BOOL`, `FLOAT`, `BOOLEAN`, `FLOAT32`, `FLOAT64`, `JSON`, `DOUBLE`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, or `NULLABLE` + | ---- ^^^ unexpected `foo`, expecting `BOOL`, `FLOAT`, `BOOLEAN`, `FLOAT32`, `FLOAT64`, `BLOB`, `JSON`, `DOUBLE`, `LONGBLOB`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `MEDIUMBLOB`, `TINYBLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, or `NULLABLE` | | | while parsing `CAST(... AS ...)` | while parsing expression diff --git a/src/query/ast/tests/it/testdata/statement-error.txt b/src/query/ast/tests/it/testdata/statement-error.txt index e7292e72492c..cde27fda1b6b 100644 --- a/src/query/ast/tests/it/testdata/statement-error.txt +++ b/src/query/ast/tests/it/testdata/statement-error.txt @@ -29,7 +29,7 @@ error: --> SQL:1:19 | 1 | create table a (c varch) - | ------ - ^^^^^ unexpected `varch`, expecting `VARCHAR`, `CHAR`, `VARIANT`, `CHARACTER`, `VARBINARY`, `ARRAY`, `BINARY`, `MAP`, `DATE`, `STRING`, `FLOAT32`, `FLOAT64`, `DECIMAL`, `SMALLINT`, `DATETIME`, `NULLABLE`, `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT`, `DOUBLE`, `BITMAP`, `TUPLE`, `TIMESTAMP`, `TEXT`, or `JSON` + | ------ - ^^^^^ unexpected `varch`, expecting `VARCHAR`, `CHAR`, `VARIANT`, `CHARACTER`, `VARBINARY`, `ARRAY`, `BINARY`, `MAP`, `DATE`, `STRING`, `FLOAT32`, `FLOAT64`, `DECIMAL`, `SMALLINT`, `DATETIME`, `NULLABLE`, `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT`, `DOUBLE`, `BITMAP`, `TUPLE`, `TIMESTAMP`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `TEXT`, or `JSON` | | | | | while parsing ` [DEFAULT ] [AS () VIRTUAL] [AS () STORED] [COMMENT '']` | while parsing `CREATE TABLE [IF NOT EXISTS] [.] [] []` @@ -42,7 +42,7 @@ error: --> SQL:1:25 | 1 | create table a (c tuple()) - | ------ - ----- ^ unexpected `)`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, `NULLABLE`, , or + | ------ - ----- ^ unexpected `)`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, `NULLABLE`, , or | | | | | | | while parsing type name | | while parsing ` [DEFAULT ] [AS () VIRTUAL] [AS () STORED] [COMMENT '']` @@ -70,7 +70,7 @@ error: --> SQL:1:38 | 1 | create table a (b tuple(c int, uint64)); - | ------ - ----- ^ unexpected `)`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, or `NULLABLE` + | ------ - ----- ^ unexpected `)`, expecting `BOOLEAN`, `BOOL`, `UINT8`, `TINYINT`, `UINT16`, `SMALLINT`, `UINT32`, `INT`, `INTEGER`, `UINT64`, `UNSIGNED`, `BIGINT`, `INT8`, `INT16`, `INT32`, `INT64`, `SIGNED`, `FLOAT32`, `FLOAT`, `FLOAT64`, `DOUBLE`, `DECIMAL`, `ARRAY`, `MAP`, `BITMAP`, `TUPLE`, `DATE`, `DATETIME`, `TIMESTAMP`, `BINARY`, `VARBINARY`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `STRING`, `VARCHAR`, `CHAR`, `CHARACTER`, `TEXT`, `VARIANT`, `JSON`, or `NULLABLE` | | | | | | | while parsing TUPLE( , ...) | | | while parsing type name diff --git a/src/query/ast/tests/it/testdata/statement.txt b/src/query/ast/tests/it/testdata/statement.txt index 966fd97396b8..53f13cd4fbd9 100644 --- a/src/query/ast/tests/it/testdata/statement.txt +++ b/src/query/ast/tests/it/testdata/statement.txt @@ -13856,9 +13856,9 @@ AlterNetworkPolicy( ---------- Input ---------- -CREATE TASK IF NOT EXISTS MyTask1 WAREHOUSE = 'MyWarehouse' SCHEDULE = 15 MINUTE SUSPEND_TASK_AFTER_NUM_FAILURES = 3 COMMENT = 'This is test task 1' AS SELECT * FROM MyTable1 +CREATE TASK IF NOT EXISTS MyTask1 WAREHOUSE = 'MyWarehouse' SCHEDULE = 15 MINUTE SUSPEND_TASK_AFTER_NUM_FAILURES = 3 COMMENT = 'This is test task 1' DATABASE = 'target', TIMEZONE = 'America/Los Angeles' AS SELECT * FROM MyTable1 ---------- Output --------- -CREATE TASK IF NOT EXISTS MyTask1 WAREHOUSE = MyWarehouse SCHEDULE 900 SECOND SUSPEND TASK AFTER 3 FAILURES COMMENTS = 'This is test task 1' AS SELECT * FROM MyTable1 +CREATE TASK IF NOT EXISTS MyTask1 WAREHOUSE = MyWarehouse SCHEDULE 900 SECOND database = 'target' timezone = 'America/Los Angeles' SUSPEND_TASK_AFTER 3 FAILURES COMMENTS = 'This is test task 1' AS SELECT * FROM MyTable1 ---------- AST ------------ CreateTask( CreateTaskStmt { @@ -13874,6 +13874,10 @@ CreateTask( 900, ), ), + session_parameters: { + "database": "target", + "timezone": "America/Los Angeles", + }, suspend_task_after_num_failures: Some( 3, ), @@ -13888,7 +13892,7 @@ CreateTask( ---------- Input ---------- CREATE TASK IF NOT EXISTS MyTask1 WAREHOUSE = 'MyWarehouse' SCHEDULE = 15 SECOND SUSPEND_TASK_AFTER_NUM_FAILURES = 3 COMMENT = 'This is test task 1' AS SELECT * FROM MyTable1 ---------- Output --------- -CREATE TASK IF NOT EXISTS MyTask1 WAREHOUSE = MyWarehouse SCHEDULE 15 SECOND SUSPEND TASK AFTER 3 FAILURES COMMENTS = 'This is test task 1' AS SELECT * FROM MyTable1 +CREATE TASK IF NOT EXISTS MyTask1 WAREHOUSE = MyWarehouse SCHEDULE 15 SECOND SUSPEND_TASK_AFTER 3 FAILURES COMMENTS = 'This is test task 1' AS SELECT * FROM MyTable1 ---------- AST ------------ CreateTask( CreateTaskStmt { @@ -13904,6 +13908,7 @@ CreateTask( 15, ), ), + session_parameters: {}, suspend_task_after_num_failures: Some( 3, ), @@ -13918,7 +13923,7 @@ CreateTask( ---------- Input ---------- CREATE TASK IF NOT EXISTS MyTask1 WAREHOUSE = 'MyWarehouse' SCHEDULE = 1215 SECOND SUSPEND_TASK_AFTER_NUM_FAILURES = 3 COMMENT = 'This is test task 1' AS SELECT * FROM MyTable1 ---------- Output --------- -CREATE TASK IF NOT EXISTS MyTask1 WAREHOUSE = MyWarehouse SCHEDULE 1215 SECOND SUSPEND TASK AFTER 3 FAILURES COMMENTS = 'This is test task 1' AS SELECT * FROM MyTable1 +CREATE TASK IF NOT EXISTS MyTask1 WAREHOUSE = MyWarehouse SCHEDULE 1215 SECOND SUSPEND_TASK_AFTER 3 FAILURES COMMENTS = 'This is test task 1' AS SELECT * FROM MyTable1 ---------- AST ------------ CreateTask( CreateTaskStmt { @@ -13934,6 +13939,7 @@ CreateTask( 1215, ), ), + session_parameters: {}, suspend_task_after_num_failures: Some( 3, ), @@ -13965,6 +13971,7 @@ CreateTask( ), ), ), + session_parameters: {}, suspend_task_after_num_failures: None, comments: "serverless + cron", after: [], @@ -13992,6 +13999,7 @@ CreateTask( None, ), ), + session_parameters: {}, suspend_task_after_num_failures: None, comments: "", after: [], @@ -14019,6 +14027,7 @@ CreateTask( None, ), ), + session_parameters: {}, suspend_task_after_num_failures: None, comments: "", after: [], @@ -14041,6 +14050,7 @@ CreateTask( warehouse: None, }, schedule_opts: None, + session_parameters: {}, suspend_task_after_num_failures: None, comments: "", after: [ @@ -14055,6 +14065,32 @@ CreateTask( ) +---------- Input ---------- +CREATE TASK IF NOT EXISTS MyTask1 DATABASE = 'target', TIMEZONE = 'America/Los Angeles' AS VACUUM TABLE t +---------- Output --------- +CREATE TASK IF NOT EXISTS MyTask1 database = 'target' timezone = 'America/Los Angeles' AS VACUUM TABLE t +---------- AST ------------ +CreateTask( + CreateTaskStmt { + if_not_exists: true, + name: "MyTask1", + warehouse_opts: WarehouseOptions { + warehouse: None, + }, + schedule_opts: None, + session_parameters: { + "database": "target", + "timezone": "America/Los Angeles", + }, + suspend_task_after_num_failures: None, + comments: "", + after: [], + when_condition: None, + sql: "VACUUM TABLE t ", + }, +) + + ---------- Input ---------- ALTER TASK MyTask1 RESUME ---------- Output --------- @@ -14145,6 +14181,7 @@ AlterTask( comments: Some( "serverless + cron", ), + session_parameters: None, }, }, ) @@ -14174,6 +14211,7 @@ AlterTask( comments: Some( "serverless + cron", ), + session_parameters: None, }, }, ) @@ -14203,6 +14241,32 @@ AlterTask( comments: Some( "serverless + cron", ), + session_parameters: None, + }, + }, +) + + +---------- Input ---------- +ALTER TASK MyTask1 SET DATABASE='newDB', TIMEZONE='America/Los_Angeles' +---------- Output --------- +ALTER TASK MyTask1 database = 'newDB' timezone = 'America/Los_Angeles' +---------- AST ------------ +AlterTask( + AlterTaskStmt { + if_exists: false, + name: "MyTask1", + options: Set { + warehouse: None, + schedule: None, + suspend_task_after_num_failures: None, + comments: None, + session_parameters: Some( + { + "database": "newDB", + "timezone": "America/Los_Angeles", + }, + ), }, }, ) diff --git a/src/query/catalog/src/plan/internal_column.rs b/src/query/catalog/src/plan/internal_column.rs index 480c20b896d8..c473990ece51 100644 --- a/src/query/catalog/src/plan/internal_column.rs +++ b/src/query/catalog/src/plan/internal_column.rs @@ -254,12 +254,12 @@ impl InternalColumn { let mut row_ids = Vec::with_capacity(num_rows); if let Some(offsets) = &meta.offsets { for i in offsets { - let row_id = format!("{}{:06x}", uuid, *i).as_bytes().to_vec(); + let row_id = format!("{}{:06x}", uuid, *i); row_ids.push(row_id); } } else { for i in 0..num_rows { - let row_id = format!("{}{:06x}", uuid, i).as_bytes().to_vec(); + let row_id = format!("{}{:06x}", uuid, i); row_ids.push(row_id); } } diff --git a/src/query/catalog/src/query_kind.rs b/src/query/catalog/src/query_kind.rs index b2e27cd799d5..74e2aca44e84 100644 --- a/src/query/catalog/src/query_kind.rs +++ b/src/query/catalog/src/query_kind.rs @@ -24,6 +24,7 @@ pub enum QueryKind { Query, Explain, CopyIntoTable, + CopyIntoLocation, Update, Insert, Other, diff --git a/src/query/catalog/src/table_args.rs b/src/query/catalog/src/table_args.rs index 0397e6f65827..6878605e80f6 100644 --- a/src/query/catalog/src/table_args.rs +++ b/src/query/catalog/src/table_args.rs @@ -79,7 +79,7 @@ impl TableArgs { .into_string() .map_err(|_| ErrorCode::BadArguments("Expected string argument"))?; - Ok(String::from_utf8(arg)?) + Ok(arg) }) .collect::>>() } diff --git a/src/query/expression/src/aggregate/payload_row.rs b/src/query/expression/src/aggregate/payload_row.rs index 54187c34a369..97ded71a4d58 100644 --- a/src/query/expression/src/aggregate/payload_row.rs +++ b/src/query/expression/src/aggregate/payload_row.rs @@ -108,7 +108,7 @@ pub unsafe fn serialize_column_to_rowformat( Column::String(v) => { for i in 0..rows { let index = select_index.get_index(i); - let data = arena.alloc_slice_copy(v.index_unchecked(index)); + let data = arena.alloc_slice_copy(v.index_unchecked(index).as_bytes()); store(&(data.len() as u32), address[index].add(offset) as *mut u8); diff --git a/src/query/expression/src/converts/arrow2/from.rs b/src/query/expression/src/converts/arrow2/from.rs index c51fe0bf4036..03f4e818a910 100644 --- a/src/query/expression/src/converts/arrow2/from.rs +++ b/src/query/expression/src/converts/arrow2/from.rs @@ -29,7 +29,6 @@ use crate::types::array::ArrayColumn; use crate::types::binary::BinaryColumn; use crate::types::decimal::DecimalColumn; use crate::types::nullable::NullableColumn; -use crate::types::string::CheckUTF8; use crate::types::string::StringColumn; use crate::types::DataType; use crate::types::DecimalDataType; @@ -423,7 +422,6 @@ impl Column { .map(|x| *x as u64) .collect::>(); let column = StringColumn::new(arrow_col.values().clone(), offsets.into()); - column.check_utf8()?; Column::String(column) } (DataType::String, ArrowDataType::LargeBinary) => { @@ -437,7 +435,6 @@ impl Column { let offsets = unsafe { std::mem::transmute::, Buffer>(offsets) }; let column = StringColumn::new(arrow_col.values().clone(), offsets); - column.check_utf8()?; Column::String(column) } (DataType::String, ArrowDataType::FixedSizeBinary(size)) => { @@ -451,7 +448,6 @@ impl Column { .map(|x| x * (*size) as u64) .collect::>(); let column = StringColumn::new(arrow_col.values().clone(), offsets.into()); - column.check_utf8()?; Column::String(column) } (DataType::String, ArrowDataType::Utf8) => { @@ -467,10 +463,12 @@ impl Column { .iter() .map(|x| *x as u64) .collect::>(); - Column::String(StringColumn::new( - arrow_col.values().clone(), - offsets.into(), - )) + unsafe { + Column::String(StringColumn::new_unchecked( + arrow_col.values().clone(), + offsets.into(), + )) + } } (DataType::String, ArrowDataType::LargeUtf8) => { let arrow_col = arrow_col @@ -482,7 +480,12 @@ impl Column { let offsets = arrow_col.offsets().clone().into_inner(); let offsets = unsafe { std::mem::transmute::, Buffer>(offsets) }; - Column::String(StringColumn::new(arrow_col.values().clone(), offsets)) + unsafe { + Column::String(StringColumn::new_unchecked( + arrow_col.values().clone(), + offsets, + )) + } } (DataType::Timestamp, ArrowDataType::Timestamp(uint, _)) => { let values = arrow_col diff --git a/src/query/expression/src/converts/arrow2/to.rs b/src/query/expression/src/converts/arrow2/to.rs index 1b77f9d738be..c331d9911718 100644 --- a/src/query/expression/src/converts/arrow2/to.rs +++ b/src/query/expression/src/converts/arrow2/to.rs @@ -25,7 +25,6 @@ use super::ARROW_EXT_TYPE_EMPTY_ARRAY; use super::ARROW_EXT_TYPE_EMPTY_MAP; use super::ARROW_EXT_TYPE_VARIANT; use crate::types::decimal::DecimalColumn; -use crate::types::string::CheckUTF8; use crate::types::DecimalDataType; use crate::types::NumberColumn; use crate::types::NumberDataType; @@ -301,11 +300,6 @@ impl Column { ) } Column::String(col) => { - // todo!("new string") - // always check utf8 until we can guarantee the correctness of data in string column - // #[cfg(debug_assertions)] - col.check_utf8().unwrap(); - let offsets: Buffer = col.offsets().iter().map(|offset| *offset as i64).collect(); diff --git a/src/query/expression/src/converts/datavalues/from.rs b/src/query/expression/src/converts/datavalues/from.rs index 93a6b13508b8..c04a12af6105 100644 --- a/src/query/expression/src/converts/datavalues/from.rs +++ b/src/query/expression/src/converts/datavalues/from.rs @@ -126,7 +126,9 @@ pub fn from_scalar(datavalue: &DataValue, datatype: &DataType) -> Scalar { )), DataType::Timestamp => Scalar::Timestamp(*datavalue.as_int64().unwrap()), DataType::Date => Scalar::Date(*datavalue.as_int64().unwrap() as i32), - DataType::String => Scalar::String(datavalue.as_string().unwrap().to_vec()), + DataType::String => { + Scalar::String(String::from_utf8_lossy(datavalue.as_string().unwrap()).into_owned()) + } DataType::Variant => match datavalue { DataValue::String(x) => Scalar::Variant(x.clone()), DataValue::Variant(x) => { diff --git a/src/query/expression/src/converts/datavalues/to.rs b/src/query/expression/src/converts/datavalues/to.rs index f9809ab201f4..50fd5af94916 100644 --- a/src/query/expression/src/converts/datavalues/to.rs +++ b/src/query/expression/src/converts/datavalues/to.rs @@ -39,7 +39,8 @@ pub fn scalar_to_datavalue(scalar: &Scalar) -> DataValue { Scalar::Timestamp(x) => DataValue::Int64(*x), Scalar::Date(x) => DataValue::Int64(*x as i64), Scalar::Boolean(x) => DataValue::Boolean(*x), - Scalar::String(x) | Scalar::Variant(x) => DataValue::String(x.clone()), + Scalar::Variant(x) => DataValue::String(x.clone()), + Scalar::String(x) => DataValue::String(x.as_bytes().to_vec()), Scalar::Array(x) => { let values = (0..x.len()) .map(|idx| scalar_to_datavalue(&x.index(idx).unwrap().to_owned())) diff --git a/src/query/expression/src/converts/meta/bincode.rs b/src/query/expression/src/converts/meta/bincode.rs index 76414f527e8c..491c98763424 100644 --- a/src/query/expression/src/converts/meta/bincode.rs +++ b/src/query/expression/src/converts/meta/bincode.rs @@ -95,7 +95,7 @@ impl From for Scalar { LegacyScalar::Timestamp(ts) => Scalar::Timestamp(ts), LegacyScalar::Date(date) => Scalar::Date(date), LegacyScalar::Boolean(b) => Scalar::Boolean(b), - LegacyScalar::String(s) => Scalar::String(s), + LegacyScalar::String(s) => Scalar::String(String::from_utf8_lossy(&s).into_owned()), LegacyScalar::Array(col) => Scalar::Array(col.into()), LegacyScalar::Map(col) => Scalar::Map(col.into()), LegacyScalar::Bitmap(bmp) => Scalar::Bitmap(bmp), @@ -152,7 +152,7 @@ impl From for LegacyScalar { Scalar::Date(date) => LegacyScalar::Date(date), Scalar::Boolean(b) => LegacyScalar::Boolean(b), Scalar::Binary(_) => unreachable!(), - Scalar::String(string) => LegacyScalar::String(string), + Scalar::String(string) => LegacyScalar::String(string.as_bytes().to_vec()), Scalar::Array(column) => LegacyScalar::Array(column.into()), Scalar::Map(column) => LegacyScalar::Map(column.into()), Scalar::Bitmap(bitmap) => LegacyScalar::Bitmap(bitmap), diff --git a/src/query/expression/src/converts/meta/index_scalar.rs b/src/query/expression/src/converts/meta/index_scalar.rs index c54ddcfe77d2..c4419955d97f 100644 --- a/src/query/expression/src/converts/meta/index_scalar.rs +++ b/src/query/expression/src/converts/meta/index_scalar.rs @@ -49,7 +49,7 @@ pub enum IndexScalar { Array(IndexColumn), Map(IndexColumn), Bitmap(Vec), - Tuple(Vec), + Tuple(Vec), Variant(Vec), } @@ -91,11 +91,13 @@ impl From for Scalar { IndexScalar::Timestamp(ts) => Scalar::Timestamp(ts), IndexScalar::Date(date) => Scalar::Date(date), IndexScalar::Boolean(b) => Scalar::Boolean(b), - IndexScalar::String(s) => Scalar::String(s), + IndexScalar::String(s) => Scalar::String(unsafe { String::from_utf8_unchecked(s) }), IndexScalar::Array(col) => Scalar::Array(col.into()), IndexScalar::Map(col) => Scalar::Map(col.into()), IndexScalar::Bitmap(bmp) => Scalar::Bitmap(bmp), - IndexScalar::Tuple(tuple) => Scalar::Tuple(tuple), + IndexScalar::Tuple(tuple) => { + Scalar::Tuple(tuple.into_iter().map(|c| c.into()).collect()) + } IndexScalar::Variant(variant) => Scalar::Variant(variant), } } @@ -143,12 +145,14 @@ impl From for IndexScalar { Scalar::Timestamp(ts) => IndexScalar::Timestamp(ts), Scalar::Date(date) => IndexScalar::Date(date), Scalar::Boolean(b) => IndexScalar::Boolean(b), - Scalar::String(string) => IndexScalar::String(string), + Scalar::String(string) => IndexScalar::String(string.as_bytes().to_vec()), Scalar::Binary(s) => IndexScalar::String(s), Scalar::Array(column) => IndexScalar::Array(column.into()), Scalar::Map(column) => IndexScalar::Map(column.into()), Scalar::Bitmap(bitmap) => IndexScalar::Bitmap(bitmap), - Scalar::Tuple(tuple) => IndexScalar::Tuple(tuple), + Scalar::Tuple(tuple) => { + IndexScalar::Tuple(tuple.into_iter().map(|c| c.into()).collect()) + } Scalar::Variant(variant) => IndexScalar::Variant(variant), Scalar::EmptyArray | Scalar::EmptyMap => unreachable!(), } diff --git a/src/query/expression/src/kernels/group_by_hash/method_single_string.rs b/src/query/expression/src/kernels/group_by_hash/method_single_string.rs index 16f7bdc0e186..3d5770dee7a3 100644 --- a/src/query/expression/src/kernels/group_by_hash/method_single_string.rs +++ b/src/query/expression/src/kernels/group_by_hash/method_single_string.rs @@ -93,6 +93,8 @@ impl KeyAccessor for BinaryKeyAccessor { /// # Safety /// Calling this method with an out-of-bounds index is *[undefined behavior]*. unsafe fn key_unchecked(&self, index: usize) -> &Self::Key { + debug_assert!(index + 1 < self.offsets.len()); + &self.data[*self.offsets.get_unchecked(index) as usize ..*self.offsets.get_unchecked(index + 1) as usize] } diff --git a/src/query/expression/src/property.rs b/src/query/expression/src/property.rs index 4df8f598f8af..89fce25a59a1 100644 --- a/src/query/expression/src/property.rs +++ b/src/query/expression/src/property.rs @@ -237,12 +237,12 @@ impl Domain { has_true: this.has_true || other.has_true, }), (Domain::String(this), Domain::String(other)) => Domain::String(StringDomain { - min: this.min.as_slice().min(&other.min).to_vec(), + min: this.min.as_str().min(&other.min).to_string(), max: this .max .as_ref() .zip(other.max.as_ref()) - .map(|(self_max, other_max)| self_max.max(other_max).to_vec()), + .map(|(self_max, other_max)| self_max.max(other_max).to_string()), }), (Domain::Timestamp(this), Domain::Timestamp(other)) => { Domain::Timestamp(SimpleDomain { @@ -421,6 +421,7 @@ pub trait SimpleDomainCmp { fn domain_gte(&self, other: &Self) -> FunctionDomain; fn domain_lt(&self, other: &Self) -> FunctionDomain; fn domain_lte(&self, other: &Self) -> FunctionDomain; + fn domain_contains(&self, other: &Self) -> FunctionDomain; } const ALL_TRUE_DOMAIN: BooleanDomain = BooleanDomain { @@ -489,36 +490,76 @@ impl SimpleDomainCmp for SimpleDomain { FunctionDomain::Full } } + + fn domain_contains(&self, other: &Self) -> FunctionDomain { + if self.min > other.max || self.max < other.min { + FunctionDomain::Domain(ALL_FALSE_DOMAIN) + } else { + FunctionDomain::Full + } + } } impl SimpleDomainCmp for StringDomain { fn domain_eq(&self, other: &Self) -> FunctionDomain { - let (d1, d2) = self.unify(other); + let (d1, d2) = unify_string(self, other); d1.domain_eq(&d2) } fn domain_noteq(&self, other: &Self) -> FunctionDomain { - let (d1, d2) = self.unify(other); + let (d1, d2) = unify_string(self, other); d1.domain_noteq(&d2) } fn domain_gt(&self, other: &Self) -> FunctionDomain { - let (d1, d2) = self.unify(other); + let (d1, d2) = unify_string(self, other); d1.domain_gt(&d2) } fn domain_gte(&self, other: &Self) -> FunctionDomain { - let (d1, d2) = self.unify(other); + let (d1, d2) = unify_string(self, other); d1.domain_gte(&d2) } fn domain_lt(&self, other: &Self) -> FunctionDomain { - let (d1, d2) = self.unify(other); + let (d1, d2) = unify_string(self, other); d1.domain_lt(&d2) } fn domain_lte(&self, other: &Self) -> FunctionDomain { - let (d1, d2) = self.unify(other); + let (d1, d2) = unify_string(self, other); d1.domain_lte(&d2) } + + fn domain_contains(&self, other: &Self) -> FunctionDomain { + let (d1, d2) = unify_string(self, other); + d1.domain_contains(&d2) + } +} + +fn unify_string( + lhs: &StringDomain, + rhs: &StringDomain, +) -> (SimpleDomain, SimpleDomain) { + let mut max = lhs.min.as_str().max(&rhs.min); + if let Some(lhs_max) = &lhs.max { + max = max.max(lhs_max); + } + if let Some(rhs_max) = &rhs.max { + max = max.max(rhs_max); + } + + let mut max = max.to_string(); + max.push('\0'); + + ( + SimpleDomain { + min: lhs.min.clone(), + max: lhs.max.clone().unwrap_or_else(|| max.clone()), + }, + SimpleDomain { + min: rhs.min.clone(), + max: rhs.max.clone().unwrap_or_else(|| max.clone()), + }, + ) } diff --git a/src/query/expression/src/row/row_converter.rs b/src/query/expression/src/row/row_converter.rs index 3c26bcd93c2b..3c4d967ae5c0 100644 --- a/src/query/expression/src/row/row_converter.rs +++ b/src/query/expression/src/row/row_converter.rs @@ -156,16 +156,16 @@ impl RowConverter { .iter() .zip(validity.iter()) .zip(lengths.iter_mut()) - .for_each(|((bytes, v), length)| { - *length += variable::encoded_len(bytes, !v) as u64 + .for_each(|((str, v), length)| { + *length += variable::encoded_len(str.as_bytes(), !v) as u64 }) } else { col.as_string() .unwrap() .iter() .zip(lengths.iter_mut()) - .for_each(|(bytes, length)| { - *length += variable::encoded_len(bytes, false) as u64 + .for_each(|(str, length)| { + *length += variable::encoded_len(str.as_bytes(), false) as u64 }) } } @@ -255,7 +255,13 @@ fn encode_column(out: &mut BinaryColumnBuilder, column: &Column, asc: bool, null Column::Timestamp(col) => fixed::encode(out, col, validity, asc, nulls_first), Column::Date(col) => fixed::encode(out, col, validity, asc, nulls_first), Column::Binary(col) => variable::encode(out, col.iter(), validity, asc, nulls_first), - Column::String(col) => variable::encode(out, col.iter(), validity, asc, nulls_first), + Column::String(col) => variable::encode( + out, + col.iter().map(|s| s.as_bytes()), + validity, + asc, + nulls_first, + ), Column::Variant(col) => variable::encode(out, col.iter(), validity, asc, nulls_first), _ => unimplemented!(), } diff --git a/src/query/expression/src/types/binary.rs b/src/query/expression/src/types/binary.rs index f09aaa9de38a..c78459d1b75f 100644 --- a/src/query/expression/src/types/binary.rs +++ b/src/query/expression/src/types/binary.rs @@ -182,6 +182,7 @@ pub struct BinaryColumn { impl BinaryColumn { pub fn new(data: Buffer, offsets: Buffer) -> Self { debug_assert!({ offsets.windows(2).all(|w| w[0] <= w[1]) }); + BinaryColumn { data, offsets } } @@ -216,7 +217,9 @@ impl BinaryColumn { /// Calling this method with an out-of-bounds index is *[undefined behavior]* #[inline] pub unsafe fn index_unchecked(&self, index: usize) -> &[u8] { - &self.data[(self.offsets[index] as usize)..(self.offsets[index + 1] as usize)] + let start = *self.offsets.get_unchecked(index) as usize; + let end = *self.offsets.get_unchecked(index + 1) as usize; + self.data.get_unchecked(start..end) } pub fn slice(&self, range: Range) -> Self { @@ -314,6 +317,8 @@ impl BinaryColumnBuilder { } pub fn from_data(data: Vec, offsets: Vec) -> Self { + debug_assert!({ offsets.windows(2).all(|w| w[0] <= w[1]) }); + BinaryColumnBuilder { need_estimated: false, data, @@ -419,6 +424,7 @@ impl BinaryColumnBuilder { pub fn build_scalar(self) -> Vec { assert_eq!(self.offsets.len(), 2); + self.data[(self.offsets[0] as usize)..(self.offsets[1] as usize)].to_vec() } @@ -431,9 +437,10 @@ impl BinaryColumnBuilder { /// /// Calling this method with an out-of-bounds index is *[undefined behavior]* pub unsafe fn index_unchecked(&self, row: usize) -> &[u8] { + debug_assert!(row + 1 < self.offsets.len()); + let start = *self.offsets.get_unchecked(row) as usize; let end = *self.offsets.get_unchecked(row + 1) as usize; - // soundness: the invariant of the struct self.data.get_unchecked(start..end) } diff --git a/src/query/expression/src/types/boolean.rs b/src/query/expression/src/types/boolean.rs index 78785968ecb7..c3c8ed751e46 100644 --- a/src/query/expression/src/types/boolean.rs +++ b/src/query/expression/src/types/boolean.rs @@ -110,6 +110,8 @@ impl ValueType for BooleanType { #[inline(always)] unsafe fn index_column_unchecked(col: &Self::Column, index: usize) -> Self::ScalarRef<'_> { + debug_assert!(index < col.len()); + col.get_bit_unchecked(index) } diff --git a/src/query/expression/src/types/date.rs b/src/query/expression/src/types/date.rs index 92ff4e3f3ddc..12951630afa3 100644 --- a/src/query/expression/src/types/date.rs +++ b/src/query/expression/src/types/date.rs @@ -134,6 +134,8 @@ impl ValueType for DateType { #[inline(always)] unsafe fn index_column_unchecked(col: &Self::Column, index: usize) -> Self::ScalarRef<'_> { + debug_assert!(index < col.len()); + *col.get_unchecked(index) } diff --git a/src/query/expression/src/types/decimal.rs b/src/query/expression/src/types/decimal.rs index 8219ba3cba79..814d6300288a 100644 --- a/src/query/expression/src/types/decimal.rs +++ b/src/query/expression/src/types/decimal.rs @@ -120,6 +120,8 @@ impl ValueType for DecimalType { #[inline(always)] unsafe fn index_column_unchecked(col: &Self::Column, index: usize) -> Self::ScalarRef<'_> { + debug_assert!(index < col.len()); + *col.get_unchecked(index) } @@ -944,6 +946,7 @@ impl DecimalColumn { /// /// Calling this method with an out-of-bounds index is *[undefined behavior]* pub unsafe fn index_unchecked(&self, index: usize) -> DecimalScalar { + debug_assert!(index < self.len()); crate::with_decimal_type!(|DECIMAL_TYPE| match self { DecimalColumn::DECIMAL_TYPE(col, size) => DecimalScalar::DECIMAL_TYPE(*col.get_unchecked(index), *size), diff --git a/src/query/expression/src/types/nullable.rs b/src/query/expression/src/types/nullable.rs index 2503a6dee784..c6eca90939cd 100755 --- a/src/query/expression/src/types/nullable.rs +++ b/src/query/expression/src/types/nullable.rs @@ -253,6 +253,8 @@ impl NullableColumn { /// /// Calling this method with an out-of-bounds index is *[undefined behavior]* pub unsafe fn index_unchecked(&self, index: usize) -> Option> { + debug_assert!(index < self.validity.len()); + match self.validity.get_bit_unchecked(index) { true => Some(T::index_column(&self.column, index).unwrap()), false => None, diff --git a/src/query/expression/src/types/number.rs b/src/query/expression/src/types/number.rs index d10c57fdfe15..dbd794e15849 100644 --- a/src/query/expression/src/types/number.rs +++ b/src/query/expression/src/types/number.rs @@ -168,6 +168,8 @@ impl ValueType for NumberType { #[inline(always)] unsafe fn index_column_unchecked(col: &Self::Column, index: usize) -> Self::ScalarRef<'_> { + debug_assert!(index < col.len()); + *col.get_unchecked(index) } @@ -580,6 +582,8 @@ impl NumberColumn { /// /// Calling this method with an out-of-bounds index is *[undefined behavior]* pub unsafe fn index_unchecked(&self, index: usize) -> NumberScalar { + debug_assert!(index < self.len()); + crate::with_number_type!(|NUM_TYPE| match self { NumberColumn::NUM_TYPE(col) => NumberScalar::NUM_TYPE(*col.get_unchecked(index)), }) diff --git a/src/query/expression/src/types/string.rs b/src/query/expression/src/types/string.rs index 861e5b362e8c..2b3063bd5bf2 100644 --- a/src/query/expression/src/types/string.rs +++ b/src/query/expression/src/types/string.rs @@ -25,7 +25,6 @@ use serde::Serialize; use super::binary::BinaryColumn; use super::binary::BinaryColumnBuilder; use super::binary::BinaryIterator; -use super::SimpleDomain; use crate::property::Domain; use crate::types::ArgType; use crate::types::DataType; @@ -41,20 +40,20 @@ use crate::ScalarRef; pub struct StringType; impl ValueType for StringType { - type Scalar = Vec; - type ScalarRef<'a> = &'a [u8]; + type Scalar = String; + type ScalarRef<'a> = &'a str; type Column = StringColumn; type Domain = StringDomain; type ColumnIterator<'a> = StringIterator<'a>; type ColumnBuilder = StringColumnBuilder; #[inline] - fn upcast_gat<'short, 'long: 'short>(long: &'long [u8]) -> &'short [u8] { + fn upcast_gat<'short, 'long: 'short>(long: &'long str) -> &'short str { long } fn to_owned_scalar(scalar: Self::ScalarRef<'_>) -> Self::Scalar { - scalar.to_vec() + scalar.to_string() } fn to_scalar_ref(scalar: &Self::Scalar) -> Self::ScalarRef<'_> { @@ -108,22 +107,12 @@ impl ValueType for StringType { } fn index_column(col: &Self::Column, index: usize) -> Option> { - let x = col.index(index)?; - - #[cfg(debug_assertions)] - x.check_utf8().unwrap(); - - Some(x) + col.index(index) } #[inline(always)] unsafe fn index_column_unchecked(col: &Self::Column, index: usize) -> Self::ScalarRef<'_> { - let x = col.index_unchecked(index); - - #[cfg(debug_assertions)] - x.check_utf8().unwrap(); - - x + col.index_unchecked(index) } fn slice_column(col: &Self::Column, range: Range) -> Self::Column { @@ -143,7 +132,7 @@ impl ValueType for StringType { } fn push_item(builder: &mut Self::ColumnBuilder, item: Self::ScalarRef<'_>) { - builder.put_slice(item); + builder.put_str(item); builder.commit_row(); } @@ -209,7 +198,7 @@ impl ArgType for StringType { fn full_domain() -> Self::Domain { StringDomain { - min: vec![], + min: "".to_string(), max: None, } } @@ -227,27 +216,34 @@ pub struct StringColumn { impl StringColumn { pub fn new(data: Buffer, offsets: Buffer) -> Self { - debug_assert!({ offsets.windows(2).all(|w| w[0] <= w[1]) }); - let col = StringColumn { data, offsets }; - // todo!("new string") + let col = BinaryColumn::new(data, offsets); + col.check_utf8().unwrap(); - col + + unsafe { Self::from_binary_unchecked(col) } } /// # Safety /// This function is unsound iff: /// * the offsets are not monotonically increasing - /// * The `values` between two consecutive `offsets` are not valid utf8 + /// * The `data` between two consecutive `offsets` are not valid utf8 pub unsafe fn new_unchecked(data: Buffer, offsets: Buffer) -> Self { - debug_assert!({ offsets.windows(2).all(|w| w[0] <= w[1]) }); - StringColumn { data, offsets } + let col = BinaryColumn::new(data, offsets); + + #[cfg(debug_assertions)] + col.check_utf8().unwrap(); + + unsafe { Self::from_binary_unchecked(col) } } /// # Safety /// This function is unsound iff: /// * the offsets are not monotonically increasing - /// * The `values` between two consecutive `offsets` are not valid utf8 + /// * The `data` between two consecutive `offsets` are not valid utf8 pub unsafe fn from_binary_unchecked(col: BinaryColumn) -> Self { + #[cfg(debug_assertions)] + col.check_utf8().unwrap(); + StringColumn { data: col.data, offsets: col.offsets, @@ -272,20 +268,34 @@ impl StringColumn { len * 8 + (offsets[len - 1] - offsets[0]) as usize } - pub fn index(&self, index: usize) -> Option<&[u8]> { - if index + 1 < self.offsets.len() { - Some(&self.data[(self.offsets[index] as usize)..(self.offsets[index + 1] as usize)]) - } else { - None + pub fn index(&self, index: usize) -> Option<&str> { + if index + 1 >= self.offsets.len() { + return None; } + + let bytes = &self.data[(self.offsets[index] as usize)..(self.offsets[index + 1] as usize)]; + + #[cfg(debug_assertions)] + bytes.check_utf8().unwrap(); + + unsafe { Some(std::str::from_utf8_unchecked(bytes)) } } /// # Safety /// /// Calling this method with an out-of-bounds index is *[undefined behavior]* #[inline] - pub unsafe fn index_unchecked(&self, index: usize) -> &[u8] { - &self.data[(self.offsets[index] as usize)..(self.offsets[index + 1] as usize)] + pub unsafe fn index_unchecked(&self, index: usize) -> &str { + debug_assert!(index + 1 < self.offsets.len()); + + let start = *self.offsets.get_unchecked(index) as usize; + let end = *self.offsets.get_unchecked(index + 1) as usize; + let bytes = &self.data.get_unchecked(start..end); + + #[cfg(debug_assertions)] + bytes.check_utf8().unwrap(); + + std::str::from_utf8_unchecked(bytes) } pub fn slice(&self, range: Range) -> Self { @@ -366,12 +376,18 @@ pub struct StringIterator<'a> { } impl<'a> Iterator for StringIterator<'a> { - type Item = &'a [u8]; + type Item = &'a str; fn next(&mut self) -> Option { - self.offsets + let bytes = self + .offsets .next() - .map(|range| &self.data[(range[0] as usize)..(range[1] as usize)]) + .map(|range| &self.data[(range[0] as usize)..(range[1] as usize)])?; + + #[cfg(debug_assertions)] + bytes.check_utf8().unwrap(); + + unsafe { Some(std::str::from_utf8_unchecked(bytes)) } } fn size_hint(&self) -> (usize, Option) { @@ -411,18 +427,32 @@ impl StringColumnBuilder { } pub fn from_data(data: Vec, offsets: Vec) -> Self { + let builder = BinaryColumnBuilder::from_data(data, offsets); + builder.check_utf8().unwrap(); + unsafe { StringColumnBuilder::from_binary_unchecked(builder) } + } + + /// # Safety + /// This function is unsound iff: + /// * the offsets are not monotonically increasing + /// * The `data` between two consecutive `offsets` are not valid utf8 + pub unsafe fn from_binary_unchecked(col: BinaryColumnBuilder) -> Self { + #[cfg(debug_assertions)] + col.check_utf8().unwrap(); + StringColumnBuilder { - need_estimated: false, - data, - offsets, + need_estimated: col.need_estimated, + data: col.data, + offsets: col.offsets, } } - pub fn repeat(scalar: &[u8], n: usize) -> Self { - let len = scalar.len(); + pub fn repeat(scalar: &str, n: usize) -> Self { + let bytes = scalar.as_bytes(); + let len = bytes.len(); let mut data = Vec::with_capacity(len * n); for _ in 0..n { - data.extend_from_slice(scalar); + data.extend_from_slice(bytes); } let offsets = once(0) .chain((0..n).map(|i| (len * (i + 1)) as u64)) @@ -442,23 +472,23 @@ impl StringColumnBuilder { self.offsets.len() * 8 + self.data.len() } - pub fn put_u8(&mut self, item: u8) { - self.data.push(item); - } - pub fn put_char(&mut self, item: char) { self.data .extend_from_slice(item.encode_utf8(&mut [0; 4]).as_bytes()); } #[inline] - pub fn put_str(&mut self, item: &str) { - self.data.extend_from_slice(item.as_bytes()); + #[deprecated] + pub fn put_slice(&mut self, item: &[u8]) { + #[cfg(debug_assertions)] + item.check_utf8().unwrap(); + + self.data.extend_from_slice(item); } #[inline] - pub fn put_slice(&mut self, item: &[u8]) { - self.data.extend_from_slice(item); + pub fn put_str(&mut self, item: &str) { + self.data.extend_from_slice(item.as_bytes()); } pub fn put_char_iter(&mut self, iter: impl Iterator) { @@ -469,10 +499,6 @@ impl StringColumnBuilder { } } - pub fn put(&mut self, item: &[u8]) { - self.data.extend_from_slice(item); - } - #[inline] pub fn commit_row(&mut self) { self.offsets.push(self.data.len() as u64); @@ -511,12 +537,18 @@ impl StringColumnBuilder { } pub fn build(self) -> StringColumn { - StringColumn::new(self.data.into(), self.offsets.into()) + unsafe { StringColumn::new_unchecked(self.data.into(), self.offsets.into()) } } - pub fn build_scalar(self) -> Vec { + pub fn build_scalar(self) -> String { assert_eq!(self.offsets.len(), 2); - self.data[(self.offsets[0] as usize)..(self.offsets[1] as usize)].to_vec() + + let bytes = self.data[(self.offsets[0] as usize)..(self.offsets[1] as usize)].to_vec(); + + #[cfg(debug_assertions)] + bytes.check_utf8().unwrap(); + + unsafe { String::from_utf8_unchecked(bytes) } } #[inline] @@ -527,32 +559,42 @@ impl StringColumnBuilder { /// # Safety /// /// Calling this method with an out-of-bounds index is *[undefined behavior]* - pub unsafe fn index_unchecked(&self, row: usize) -> &[u8] { + pub unsafe fn index_unchecked(&self, row: usize) -> &str { + debug_assert!(row + 1 < self.offsets.len()); + let start = *self.offsets.get_unchecked(row) as usize; let end = *self.offsets.get_unchecked(row + 1) as usize; - // soundness: the invariant of the struct - self.data.get_unchecked(start..end) + let bytes = self.data.get_unchecked(start..end); + + #[cfg(debug_assertions)] + bytes.check_utf8().unwrap(); + + std::str::from_utf8_unchecked(bytes) } - pub fn pop(&mut self) -> Option> { + pub fn pop(&mut self) -> Option { if self.len() > 0 { let index = self.len() - 1; let start = unsafe { *self.offsets.get_unchecked(index) as usize }; self.offsets.pop(); let val = self.data.split_off(start); - Some(val) + + #[cfg(debug_assertions)] + val.check_utf8().unwrap(); + + Some(unsafe { String::from_utf8_unchecked(val) }) } else { None } } } -impl<'a> FromIterator<&'a [u8]> for StringColumnBuilder { - fn from_iter>(iter: T) -> Self { +impl<'a> FromIterator<&'a str> for StringColumnBuilder { + fn from_iter>(iter: T) -> Self { let iter = iter.into_iter(); let mut builder = StringColumnBuilder::with_capacity(iter.size_hint().0, 0); for item in iter { - builder.put_slice(item); + builder.put_str(item); builder.commit_row(); } builder @@ -584,34 +626,9 @@ impl TryFrom for StringColumnBuilder { #[derive(Debug, Clone, PartialEq, Eq)] pub struct StringDomain { - pub min: Vec, + pub min: String, // max value is None for full domain - pub max: Option>, -} - -impl StringDomain { - pub fn unify(&self, other: &Self) -> (SimpleDomain>, SimpleDomain>) { - let mut max_size = self.min.len().max(other.min.len()); - if let Some(max) = &self.max { - max_size = max_size.max(max.len()); - } - if let Some(max) = &other.max { - max_size = max_size.max(max.len()); - } - - let max_value = vec![255; max_size + 1]; - - ( - SimpleDomain { - min: self.min.clone(), - max: self.max.clone().unwrap_or_else(|| max_value.clone()), - }, - SimpleDomain { - min: other.min.clone(), - max: other.max.clone().unwrap_or_else(|| max_value.clone()), - }, - ) - } + pub max: Option, } pub trait CheckUTF8 { @@ -634,16 +651,13 @@ impl CheckUTF8 for &[u8] { } } -impl CheckUTF8 for BinaryColumn { +impl CheckUTF8 for Vec { fn check_utf8(&self) -> Result<()> { - for val in self.iter() { - val.check_utf8()?; - } - Ok(()) + self.as_slice().check_utf8() } } -impl CheckUTF8 for StringColumn { +impl CheckUTF8 for BinaryColumn { fn check_utf8(&self) -> Result<()> { for val in self.iter() { val.check_utf8()?; @@ -660,12 +674,3 @@ impl CheckUTF8 for BinaryColumnBuilder { Ok(()) } } - -impl CheckUTF8 for StringColumnBuilder { - fn check_utf8(&self) -> Result<()> { - for row in 0..self.len() { - unsafe { self.index_unchecked(row) }.check_utf8()?; - } - Ok(()) - } -} diff --git a/src/query/expression/src/types/timestamp.rs b/src/query/expression/src/types/timestamp.rs index 8bdd9e3cb41c..4fbf8e0a8ef5 100644 --- a/src/query/expression/src/types/timestamp.rs +++ b/src/query/expression/src/types/timestamp.rs @@ -141,6 +141,8 @@ impl ValueType for TimestampType { #[inline(always)] unsafe fn index_column_unchecked(col: &Self::Column, index: usize) -> Self::ScalarRef<'_> { + debug_assert!(index < col.len()); + *col.get_unchecked(index) } diff --git a/src/query/expression/src/types/variant.rs b/src/query/expression/src/types/variant.rs index 64cea7623112..31a408ea52d8 100644 --- a/src/query/expression/src/types/variant.rs +++ b/src/query/expression/src/types/variant.rs @@ -207,7 +207,7 @@ pub fn cast_scalar_to_variant(scalar: ScalarRef, tz: TzLUT, buf: &mut Vec) { ScalarRef::Decimal(x) => x.to_float64().into(), ScalarRef::Boolean(b) => jsonb::Value::Bool(b), ScalarRef::Binary(s) => jsonb::Value::String(hex::encode_upper(s).into()), - ScalarRef::String(s) => jsonb::Value::String(String::from_utf8_lossy(s)), + ScalarRef::String(s) => jsonb::Value::String(s.into()), ScalarRef::Timestamp(ts) => timestamp_to_string(ts, inner_tz).to_string().into(), ScalarRef::Date(d) => date_to_string(d, inner_tz).to_string().into(), ScalarRef::Array(col) => { @@ -221,7 +221,7 @@ pub fn cast_scalar_to_variant(scalar: ScalarRef, tz: TzLUT, buf: &mut Vec) { .iter() .map(|(k, v)| { let key = match k { - ScalarRef::String(v) => unsafe { String::from_utf8_unchecked(v.to_vec()) }, + ScalarRef::String(v) => v.to_string(), ScalarRef::Number(v) => v.to_string(), ScalarRef::Decimal(v) => v.to_string(), ScalarRef::Boolean(v) => v.to_string(), diff --git a/src/query/expression/src/utils/column_from.rs b/src/query/expression/src/utils/column_from.rs index 0c180ba3508e..7c3dfef7faa0 100755 --- a/src/query/expression/src/utils/column_from.rs +++ b/src/query/expression/src/utils/column_from.rs @@ -84,13 +84,13 @@ impl<'a> FromData<&'a [u8]> for BinaryType { impl<'a> FromData<&'a str> for StringType { fn from_data(d: Vec<&'a str>) -> Column { - StringType::from_data(d.into_iter().map(|d| d.as_bytes().to_vec()).collect_vec()) + StringType::from_data(d.into_iter().map(|d| d.to_string()).collect_vec()) } fn from_opt_data(d: Vec>) -> Column { StringType::from_opt_data( d.into_iter() - .map(|d| d.map(|d| d.as_bytes().to_vec())) + .map(|d| d.map(|d| d.to_string())) .collect_vec(), ) } diff --git a/src/query/expression/src/utils/display.rs b/src/query/expression/src/utils/display.rs index 5044de970612..a257c5ec92f4 100755 --- a/src/query/expression/src/utils/display.rs +++ b/src/query/expression/src/utils/display.rs @@ -121,15 +121,7 @@ impl<'a> Debug for ScalarRef<'a> { } Ok(()) } - ScalarRef::String(s) => match std::str::from_utf8(s) { - Ok(v) => write!(f, "{:?}", v), - Err(_e) => { - for c in *s { - write!(f, "{c:02X}")?; - } - Ok(()) - } - }, + ScalarRef::String(s) => write!(f, "{s:?}"), ScalarRef::Timestamp(t) => write!(f, "{t:?}"), ScalarRef::Date(d) => write!(f, "{d:?}"), ScalarRef::Array(col) => write!(f, "[{}]", col.iter().join(", ")), @@ -212,15 +204,7 @@ impl<'a> Display for ScalarRef<'a> { } Ok(()) } - ScalarRef::String(s) => match std::str::from_utf8(s) { - Ok(v) => write!(f, "'{}'", v), - Err(_e) => { - for c in *s { - write!(f, "{c:02X}")?; - } - Ok(()) - } - }, + ScalarRef::String(s) => write!(f, "'{s}'"), ScalarRef::Timestamp(t) => write!(f, "'{}'", timestamp_to_string(*t, Tz::UTC)), ScalarRef::Date(d) => write!(f, "'{}'", date_to_string(*d as i64, Tz::UTC)), ScalarRef::Array(col) => write!(f, "[{}]", col.iter().join(", ")), @@ -955,14 +939,9 @@ impl Display for BooleanDomain { impl Display for StringDomain { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { if let Some(max) = &self.max { - write!( - f, - "{{{:?}..={:?}}}", - String::from_utf8_lossy(&self.min), - String::from_utf8_lossy(max) - ) + write!(f, "{{{:?}..={:?}}}", &self.min, max) } else { - write!(f, "{{{:?}..}}", String::from_utf8_lossy(&self.min)) + write!(f, "{{{:?}..}}", &self.min) } } } diff --git a/src/query/expression/src/values.rs b/src/query/expression/src/values.rs index 9acdc5d1f10a..12c80fa4133a 100755 --- a/src/query/expression/src/values.rs +++ b/src/query/expression/src/values.rs @@ -66,7 +66,6 @@ use crate::types::number::NumberScalar; use crate::types::number::SimpleDomain; use crate::types::number::F32; use crate::types::number::F64; -use crate::types::string::CheckUTF8; use crate::types::string::StringColumn; use crate::types::string::StringColumnBuilder; use crate::types::string::StringDomain; @@ -113,7 +112,7 @@ pub enum Scalar { Date(i32), Boolean(bool), Binary(Vec), - String(Vec), + String(String), Array(Column), Map(Column), Bitmap(Vec), @@ -131,7 +130,7 @@ pub enum ScalarRef<'a> { Decimal(DecimalScalar), Boolean(bool), Binary(&'a [u8]), - String(&'a [u8]), + String(&'a str), Timestamp(i64), Date(i32), Array(Column), @@ -345,7 +344,7 @@ impl Scalar { Scalar::Decimal(d) => ScalarRef::Decimal(*d), Scalar::Boolean(b) => ScalarRef::Boolean(*b), Scalar::Binary(s) => ScalarRef::Binary(s.as_slice()), - Scalar::String(s) => ScalarRef::String(s.as_slice()), + Scalar::String(s) => ScalarRef::String(s.as_str()), Scalar::Timestamp(t) => ScalarRef::Timestamp(*t), Scalar::Date(d) => ScalarRef::Date(*d), Scalar::Array(col) => ScalarRef::Array(col.clone()), @@ -362,8 +361,8 @@ impl Scalar { DataType::EmptyArray => Scalar::EmptyArray, DataType::EmptyMap => Scalar::EmptyMap, DataType::Boolean => Scalar::Boolean(false), - DataType::Binary => Scalar::Binary(vec![]), - DataType::String => Scalar::String(vec![]), + DataType::Binary => Scalar::Binary(Vec::new()), + DataType::String => Scalar::String(String::new()), DataType::Number(num_ty) => Scalar::Number(match num_ty { NumberDataType::UInt8 => NumberScalar::UInt8(0), NumberDataType::UInt16 => NumberScalar::UInt16(0), @@ -441,7 +440,7 @@ impl<'a> ScalarRef<'a> { ScalarRef::Decimal(d) => Scalar::Decimal(*d), ScalarRef::Boolean(b) => Scalar::Boolean(*b), ScalarRef::Binary(s) => Scalar::Binary(s.to_vec()), - ScalarRef::String(s) => Scalar::String(s.to_vec()), + ScalarRef::String(s) => Scalar::String(s.to_string()), ScalarRef::Timestamp(t) => Scalar::Timestamp(*t), ScalarRef::Date(d) => Scalar::Date(*d), ScalarRef::Array(col) => Scalar::Array(col.clone()), @@ -483,8 +482,8 @@ impl<'a> ScalarRef<'a> { has_true: false, }), ScalarRef::String(s) => Domain::String(StringDomain { - min: s.to_vec(), - max: Some(s.to_vec()), + min: s.to_string(), + max: Some(s.to_string()), }), ScalarRef::Timestamp(t) => Domain::Timestamp(SimpleDomain { min: *t, max: *t }), ScalarRef::Date(d) => Domain::Date(SimpleDomain { min: *d, max: *d }), @@ -927,8 +926,8 @@ impl Column { Column::String(col) => { let (min, max) = StringType::iter_column(col).minmax().into_option().unwrap(); Domain::String(StringDomain { - min: min.to_vec(), - max: Some(max.to_vec()), + min: min.to_string(), + max: Some(max.to_string()), }) } Column::Timestamp(col) => { @@ -1085,8 +1084,8 @@ impl Column { rng.sample_iter(&Alphanumeric) // randomly generate 5 characters. .take(5) - .map(u8::from) - .collect::>() + .map(char::from) + .collect::() }) .collect_vec(), ), @@ -1620,7 +1619,7 @@ impl ColumnBuilder { builder.commit_row(); } (ColumnBuilder::String(builder), ScalarRef::String(value)) => { - builder.put_slice(value); + builder.put_str(value); builder.commit_row(); } (ColumnBuilder::Timestamp(builder), ScalarRef::Timestamp(value)) => { @@ -1723,7 +1722,7 @@ impl ColumnBuilder { builder.commit_row(); #[cfg(debug_assertions)] - (&builder.data[last..last + offset]).check_utf8().unwrap(); + string::CheckUTF8::check_utf8(&(&builder.data[last..last + offset])).unwrap(); } ColumnBuilder::Timestamp(builder) => { let value: i64 = reader.read_scalar()?; @@ -1813,11 +1812,13 @@ impl ColumnBuilder { } ColumnBuilder::String(builder) => { for row in 0..rows { + let bytes = &reader[step * row..]; + #[cfg(debug_assertions)] - (&reader[step * row..]).check_utf8().unwrap(); + string::CheckUTF8::check_utf8(&bytes).unwrap(); - let reader = &reader[step * row..]; - builder.put_slice(reader); + let s = unsafe { std::str::from_utf8_unchecked(bytes) }; + builder.put_str(s); builder.commit_row(); } } diff --git a/src/query/expression/tests/it/block.rs b/src/query/expression/tests/it/block.rs index d94ac3a93560..117b9ab15b0e 100644 --- a/src/query/expression/tests/it/block.rs +++ b/src/query/expression/tests/it/block.rs @@ -12,10 +12,10 @@ use crate::common::new_block; #[test] fn test_split_block() { - let value = b"abc"; + let value = "abc"; let n = 10; let block = new_block(&[Column::String( - StringColumnBuilder::repeat(&value[..], n).build(), + StringColumnBuilder::repeat(value, n).build(), )]); let sizes = block .split_by_rows_if_needed_no_tail(3) @@ -27,11 +27,11 @@ fn test_split_block() { #[test] fn test_box_render_block() { - let value = b"abc"; + let value = "abc"; let n = 10; let block = new_block(&[ Int32Type::from_data(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), - Column::String(StringColumnBuilder::repeat(&value[..], n).build()), + Column::String(StringColumnBuilder::repeat(value, n).build()), ]); let schema = DataSchemaRefExt::create(vec![ diff --git a/src/query/expression/tests/it/meta_scalar.rs b/src/query/expression/tests/it/meta_scalar.rs index 9499329598c0..e420ad0c1589 100644 --- a/src/query/expression/tests/it/meta_scalar.rs +++ b/src/query/expression/tests/it/meta_scalar.rs @@ -99,9 +99,9 @@ pub fn test_simple_converts() -> databend_common_exception::Result<()> { } // TODO: comment these when we swith string in scalar - let data = rmp_serde::to_vec(&scalars).unwrap(); - let new_scalars: Vec = rmp_serde::from_slice(&data).unwrap(); - assert_eq!(simple_scalars, new_scalars); + // let data = rmp_serde::to_vec(&scalars).unwrap(); + // let new_scalars: Vec = rmp_serde::from_slice(&data).unwrap(); + // assert_eq!(simple_scalars, new_scalars); } } diff --git a/src/query/expression/tests/it/row.rs b/src/query/expression/tests/it/row.rs index dd6a9dc18f2e..1e8106395c28 100644 --- a/src/query/expression/tests/it/row.rs +++ b/src/query/expression/tests/it/row.rs @@ -33,6 +33,7 @@ use itertools::Itertools; use jsonb::convert_to_comparable; use jsonb::parse_value; use ordered_float::OrderedFloat; +use rand::distributions::Alphanumeric; use rand::distributions::Standard; use rand::prelude::Distribution; use rand::thread_rng; @@ -303,13 +304,8 @@ fn test_binary() { #[test] fn test_string() { - let col = StringType::from_opt_data(vec![ - Some("hello".as_bytes().to_vec()), - Some("he".as_bytes().to_vec()), - None, - Some("foo".as_bytes().to_vec()), - Some("".as_bytes().to_vec()), - ]); + let col = + StringType::from_opt_data(vec![Some("hello"), Some("he"), None, Some("foo"), Some("")]); let converter = RowConverter::new(vec![SortField::new(DataType::String.wrap_nullable())]).unwrap(); @@ -327,13 +323,13 @@ fn test_string() { let col = StringType::from_opt_data(vec![ None, - Some(vec![0_u8; 0]), - Some(vec![0_u8; 6]), - Some(vec![0_u8; BLOCK_SIZE]), - Some(vec![0_u8; BLOCK_SIZE + 1]), - Some(vec![1_u8; 6]), - Some(vec![1_u8; BLOCK_SIZE]), - Some(vec![1_u8; BLOCK_SIZE + 1]), + Some(String::from_utf8(vec![0_u8; 0]).unwrap()), + Some(String::from_utf8(vec![0_u8; 6]).unwrap()), + Some(String::from_utf8(vec![0_u8; BLOCK_SIZE]).unwrap()), + Some(String::from_utf8(vec![0_u8; BLOCK_SIZE + 1]).unwrap()), + Some(String::from_utf8(vec![1_u8; 6]).unwrap()), + Some(String::from_utf8(vec![1_u8; BLOCK_SIZE]).unwrap()), + Some(String::from_utf8(vec![1_u8; BLOCK_SIZE + 1]).unwrap()), ]); let num_rows = col.len(); @@ -481,7 +477,12 @@ fn generate_string_column(len: usize, valid_percent: f64) -> Column { .map(|_| { rng.gen_bool(valid_percent).then(|| { let len = rng.gen_range(0..100); - (0..len).map(|_| rng.gen_range(0..128)).collect_vec() + thread_rng() + .sample_iter(&Alphanumeric) + // randomly generate 5 characters. + .take(len) + .map(char::from) + .collect::() }) }) .collect::>(); diff --git a/src/query/expression/tests/it/schema.rs b/src/query/expression/tests/it/schema.rs index 27294100e7d2..241b426a030e 100644 --- a/src/query/expression/tests/it/schema.rs +++ b/src/query/expression/tests/it/schema.rs @@ -215,7 +215,7 @@ fn test_field_leaf_default_values() -> Result<()> { Scalar::Tuple(vec![ Scalar::Tuple(vec![ Scalar::Boolean(true), - Scalar::String(['a', 'b'].iter().map(|c| *c as u8).collect::>()), + Scalar::String("ab".to_string()), ]), Scalar::Number(databend_common_expression::types::number::NumberScalar::Int64(2)), ]), @@ -229,10 +229,7 @@ fn test_field_leaf_default_values() -> Result<()> { Scalar::Number(databend_common_expression::types::number::NumberScalar::UInt64(1)), ), (1, Scalar::Boolean(true)), - ( - 2, - Scalar::String(['a', 'b'].iter().map(|c| *c as u8).collect::>()), - ), + (2, Scalar::String("ab".to_string())), ( 3, Scalar::Number(databend_common_expression::types::number::NumberScalar::Int64(2)), diff --git a/src/query/formats/src/field_encoder/csv.rs b/src/query/formats/src/field_encoder/csv.rs index 5c151e4984eb..1f02dab517c1 100644 --- a/src/query/formats/src/field_encoder/csv.rs +++ b/src/query/formats/src/field_encoder/csv.rs @@ -128,7 +128,7 @@ impl FieldEncoderCSV { } Column::String(c) => { let buf = unsafe { c.index_unchecked(row_index) }; - self.string_formatter.write_string(buf, out_buf); + self.string_formatter.write_string(buf.as_bytes(), out_buf); } Column::Date(..) | Column::Timestamp(..) | Column::Bitmap(..) | Column::Variant(..) => { diff --git a/src/query/formats/src/field_encoder/json.rs b/src/query/formats/src/field_encoder/json.rs index 49c7473a9a13..b097b4209c94 100644 --- a/src/query/formats/src/field_encoder/json.rs +++ b/src/query/formats/src/field_encoder/json.rs @@ -63,7 +63,7 @@ impl FieldEncoderJSON { } Column::String(c) => { let buf = unsafe { c.index_unchecked(row_index) }; - self.write_string(buf, out_buf); + self.write_string(buf.as_bytes(), out_buf); } Column::Date(..) | Column::Timestamp(..) | Column::Bitmap(..) => { diff --git a/src/query/formats/src/field_encoder/values.rs b/src/query/formats/src/field_encoder/values.rs index 352eadb5a368..490c1e49df6a 100644 --- a/src/query/formats/src/field_encoder/values.rs +++ b/src/query/formats/src/field_encoder/values.rs @@ -232,7 +232,7 @@ impl FieldEncoderValues { in_nested: bool, ) { self.write_string_inner( - unsafe { column.index_unchecked(row_index) }, + unsafe { column.index_unchecked(row_index).as_bytes() }, out_buf, in_nested, ); diff --git a/src/query/formats/src/output_format/json.rs b/src/query/formats/src/output_format/json.rs index 3edddf01f2a6..9e65f282f35e 100644 --- a/src/query/formats/src/output_format/json.rs +++ b/src/query/formats/src/output_format/json.rs @@ -99,7 +99,7 @@ fn scalar_to_json(s: ScalarRef<'_>, format: &FormatSettings) -> JsonValue { ScalarRef::EmptyArray => JsonValue::Array(vec![]), ScalarRef::EmptyMap => JsonValue::Object(JsonMap::new()), ScalarRef::Binary(x) => JsonValue::String(hex::encode_upper(x)), - ScalarRef::String(x) => JsonValue::String(String::from_utf8_lossy(x).to_string()), + ScalarRef::String(x) => JsonValue::String(x.to_string()), ScalarRef::Array(x) => { let vals = x .iter() diff --git a/src/query/functions/Cargo.toml b/src/query/functions/Cargo.toml index 702507bf4271..fad69009319d 100644 --- a/src/query/functions/Cargo.toml +++ b/src/query/functions/Cargo.toml @@ -27,7 +27,6 @@ blake3 = "1.3.1" borsh = { workspace = true, features = ["derive"] } bstr = "1.0.1" bumpalo = { workspace = true } -bytes = { workspace = true } chrono = { workspace = true } chrono-tz = { workspace = true } crc32fast = "1.3.2" @@ -60,6 +59,7 @@ simdutf8 = "0.1.4" siphasher = "0.3" streaming_algorithms = { git = "https://github.com/ariesdevil/streaming_algorithms", rev = "2839d5d" } strength_reduce = "0.2.3" +stringslice = "0.2.0" twox-hash = "1.6.3" [dev-dependencies] diff --git a/src/query/functions/src/aggregates/aggregate_distinct_state.rs b/src/query/functions/src/aggregates/aggregate_distinct_state.rs index 31f57a207104..b9be795b7376 100644 --- a/src/query/functions/src/aggregates/aggregate_distinct_state.rs +++ b/src/query/functions/src/aggregates/aggregate_distinct_state.rs @@ -194,7 +194,7 @@ impl DistinctStateFunc for AggregateDistinctStringState { fn add(&mut self, columns: &[Column], row: usize) -> Result<()> { let column = StringType::try_downcast_column(&columns[0]).unwrap(); let data = unsafe { column.index_unchecked(row) }; - let _ = self.set.set_insert(data); + let _ = self.set.set_insert(data.as_bytes()); Ok(()) } @@ -211,14 +211,14 @@ impl DistinctStateFunc for AggregateDistinctStringState { for row in 0..input_rows { if v.get_bit(row) { let data = unsafe { column.index_unchecked(row) }; - let _ = self.set.set_insert(data); + let _ = self.set.set_insert(data.as_bytes()); } } } None => { for row in 0..input_rows { let data = unsafe { column.index_unchecked(row) }; - let _ = self.set.set_insert(data); + let _ = self.set.set_insert(data.as_bytes()); } } } @@ -233,7 +233,7 @@ impl DistinctStateFunc for AggregateDistinctStringState { fn build_columns(&mut self, _types: &[DataType]) -> Result> { let mut builder = StringColumnBuilder::with_capacity(self.set.len(), self.set.len() * 2); for key in self.set.iter() { - builder.put_slice(key.key()); + builder.put_str(unsafe { std::str::from_utf8_unchecked(key.key()) }); builder.commit_row(); } Ok(vec![Column::String(builder.build())]) @@ -360,7 +360,7 @@ impl DistinctStateFunc for AggregateUniqStringState { let column = columns[0].as_string().unwrap(); let data = unsafe { column.index_unchecked(row) }; let mut hasher = SipHasher24::new(); - hasher.write(data); + hasher.write(data.as_bytes()); let hash128 = hasher.finish128(); let _ = self.set.set_insert(hash128.into()).is_ok(); Ok(()) @@ -378,7 +378,7 @@ impl DistinctStateFunc for AggregateUniqStringState { for (t, v) in column.iter().zip(v.iter()) { if v { let mut hasher = SipHasher24::new(); - hasher.write(t); + hasher.write(t.as_bytes()); let hash128 = hasher.finish128(); let _ = self.set.set_insert(hash128.into()).is_ok(); } @@ -388,7 +388,7 @@ impl DistinctStateFunc for AggregateUniqStringState { for row in 0..input_rows { let data = unsafe { column.index_unchecked(row) }; let mut hasher = SipHasher24::new(); - hasher.write(data); + hasher.write(data.as_bytes()); let hash128 = hasher.finish128(); let _ = self.set.set_insert(hash128.into()).is_ok(); } diff --git a/src/query/functions/src/aggregates/aggregate_string_agg.rs b/src/query/functions/src/aggregates/aggregate_string_agg.rs index 29f024770a8b..354e1660b4d9 100644 --- a/src/query/functions/src/aggregates/aggregate_string_agg.rs +++ b/src/query/functions/src/aggregates/aggregate_string_agg.rs @@ -37,13 +37,13 @@ use crate::aggregates::AggregateFunction; #[derive(BorshSerialize, BorshDeserialize, Debug)] pub struct StringAggState { - values: Vec, + values: String, } #[derive(Clone)] pub struct AggregateStringAggFunction { display_name: String, - delimiter: Vec, + delimiter: String, } impl AggregateFunction for AggregateStringAggFunction { @@ -56,7 +56,9 @@ impl AggregateFunction for AggregateStringAggFunction { } fn init_state(&self, place: StateAddr) { - place.write(|| StringAggState { values: Vec::new() }); + place.write(|| StringAggState { + values: String::new(), + }); } fn state_layout(&self) -> Layout { @@ -76,15 +78,15 @@ impl AggregateFunction for AggregateStringAggFunction { Some(validity) => { column.iter().zip(validity.iter()).for_each(|(v, b)| { if b { - state.values.extend_from_slice(v); - state.values.extend_from_slice(self.delimiter.as_slice()); + state.values.push_str(v); + state.values.push_str(&self.delimiter); } }); } None => { column.iter().for_each(|v| { - state.values.extend_from_slice(v); - state.values.extend_from_slice(self.delimiter.as_slice()); + state.values.push_str(v); + state.values.push_str(&self.delimiter); }); } } @@ -103,8 +105,8 @@ impl AggregateFunction for AggregateStringAggFunction { column_iter.zip(places.iter()).for_each(|(v, place)| { let addr = place.next(offset); let state = addr.get::(); - state.values.extend_from_slice(v); - state.values.extend_from_slice(self.delimiter.as_slice()); + state.values.push_str(v); + state.values.push_str(&self.delimiter); }); Ok(()) } @@ -114,8 +116,8 @@ impl AggregateFunction for AggregateStringAggFunction { let v = StringType::index_column(&column, row); if let Some(v) = v { let state = place.get::(); - state.values.extend_from_slice(v); - state.values.extend_from_slice(self.delimiter.as_slice()); + state.values.push_str(v); + state.values.push_str(&self.delimiter); } Ok(()) } @@ -129,14 +131,14 @@ impl AggregateFunction for AggregateStringAggFunction { fn merge(&self, place: StateAddr, reader: &mut &[u8]) -> Result<()> { let state = place.get::(); let rhs: StringAggState = borsh_deserialize_state(reader)?; - state.values.extend_from_slice(rhs.values.as_slice()); + state.values.push_str(&rhs.values); Ok(()) } fn merge_states(&self, place: StateAddr, rhs: StateAddr) -> Result<()> { let state = place.get::(); let other = rhs.get::(); - state.values.extend_from_slice(other.values.as_slice()); + state.values.push_str(&other.values); Ok(()) } @@ -145,7 +147,7 @@ impl AggregateFunction for AggregateStringAggFunction { let builder = StringType::try_downcast_builder(builder).unwrap(); if !state.values.is_empty() { let len = state.values.len() - self.delimiter.len(); - builder.put_slice(&state.values.as_slice()[..len]); + builder.put_str(&state.values[..len]); } builder.commit_row(); Ok(()) @@ -168,7 +170,7 @@ impl fmt::Display for AggregateStringAggFunction { } impl AggregateStringAggFunction { - fn try_create(display_name: &str, delimiter: Vec) -> Result> { + fn try_create(display_name: &str, delimiter: String) -> Result> { let func = AggregateStringAggFunction { display_name: display_name.to_string(), delimiter, @@ -193,7 +195,7 @@ pub fn try_create_aggregate_string_agg_function( let delimiter = if params.len() == 1 { params[0].as_string().unwrap().clone() } else { - vec![] + String::new() }; AggregateStringAggFunction::try_create(display_name, delimiter) } diff --git a/src/query/functions/src/lib.rs b/src/query/functions/src/lib.rs index 12b59465f6df..0a6661974fc1 100644 --- a/src/query/functions/src/lib.rs +++ b/src/query/functions/src/lib.rs @@ -20,6 +20,7 @@ #![feature(try_blocks)] #![feature(downcast_unchecked)] #![feature(lazy_cell)] +#![feature(str_internals)] use aggregates::AggregateFunctionFactory; use ctor::ctor; diff --git a/src/query/functions/src/scalars/arithmetic.rs b/src/query/functions/src/scalars/arithmetic.rs index 190e5bc37425..8db8dcc0f217 100644 --- a/src/query/functions/src/scalars/arithmetic.rs +++ b/src/query/functions/src/scalars/arithmetic.rs @@ -833,8 +833,7 @@ fn register_string_to_number(registry: &mut FunctionRegistry) { |_, _| FunctionDomain::MayThrow, vectorize_with_builder_1_arg::>( move |val, output, ctx| { - let str_val = String::from_utf8_lossy(val); - match str_val.parse::() { + match val.parse::() { Ok(new_val) => output.push(new_val), Err(e) => { ctx.set_error(output.len(), e.to_string()); @@ -854,8 +853,7 @@ fn register_string_to_number(registry: &mut FunctionRegistry) { StringType, NullableType>, >(|val, output, _| { - let str_val = String::from_utf8_lossy(val); - if let Ok(new_val) = str_val.parse::() { + if let Ok(new_val) = val.parse::() { output.push(new_val); } else { output.push_null(); @@ -876,7 +874,7 @@ pub fn register_number_to_string(registry: &mut FunctionRegistry) { "to_string", |_, _| FunctionDomain::Full, |from, _| match from { - ValueRef::Scalar(s) => Value::Scalar(s.to_string().into_bytes()), + ValueRef::Scalar(s) => Value::Scalar(s.to_string()), ValueRef::Column(from) => { let options = NUM_TYPE::lexical_options(); const FORMAT: u128 = lexical_core::format::STANDARD; @@ -913,7 +911,7 @@ pub fn register_number_to_string(registry: &mut FunctionRegistry) { "try_to_string", |_, _| FunctionDomain::Full, |from, _| match from { - ValueRef::Scalar(s) => Value::Scalar(Some(s.to_string().into_bytes())), + ValueRef::Scalar(s) => Value::Scalar(Some(s.to_string())), ValueRef::Column(from) => { let options = NUM_TYPE::lexical_options(); const FORMAT: u128 = lexical_core::format::STANDARD; diff --git a/src/query/functions/src/scalars/array.rs b/src/query/functions/src/scalars/array.rs index 4d29a9c1972e..e96bfdb68f1c 100644 --- a/src/query/functions/src/scalars/array.rs +++ b/src/query/functions/src/scalars/array.rs @@ -57,6 +57,7 @@ use databend_common_expression::FunctionRegistry; use databend_common_expression::FunctionSignature; use databend_common_expression::Scalar; use databend_common_expression::ScalarRef; +use databend_common_expression::SimpleDomainCmp; use databend_common_expression::SortColumnDescription; use databend_common_expression::Value; use databend_common_expression::ValueRef; @@ -280,9 +281,9 @@ pub fn register(registry: &mut FunctionRegistry) { |lhs, rhs, output, _| { for (i, d) in lhs.iter().enumerate() { if i != 0 { - output.put_slice(rhs); + output.put_str(rhs); } - output.put_slice(d); + output.put_str(d); } output.commit_row(); }, @@ -481,11 +482,9 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_passthrough_nullable_2_arg::>, NumberType, BooleanType, _, _>( "contains", |_, lhs, rhs| { - let has_true = lhs.is_some_and(|lhs| !(lhs.min > rhs.max || lhs.max < rhs.min)); - FunctionDomain::Domain(BooleanDomain { - has_false: true, - has_true, - }) + lhs.as_ref().map(|lhs| { + lhs.domain_contains(rhs) + }).unwrap_or(FunctionDomain::Full) }, |lhs, rhs, _| eval_contains::>(lhs, rhs) ); @@ -495,9 +494,11 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_passthrough_nullable_2_arg::, StringType, BooleanType, _, _>( "contains", - |_, _, _| { - FunctionDomain::Full - }, + |_, lhs, rhs| { + lhs.as_ref().map(|lhs| { + lhs.domain_contains(rhs) + }).unwrap_or(FunctionDomain::Full) + }, |lhs, rhs, _| { match lhs { ValueRef::Scalar(array) => { diff --git a/src/query/functions/src/scalars/binary.rs b/src/query/functions/src/scalars/binary.rs index dae984d1038d..58113ec7a9e2 100644 --- a/src/query/functions/src/scalars/binary.rs +++ b/src/query/functions/src/scalars/binary.rs @@ -13,20 +13,32 @@ // limitations under the License. use std::io::Write; +use std::sync::Arc; use databend_common_arrow::arrow::bitmap::Bitmap; use databend_common_expression::error_to_null; +use databend_common_expression::passthrough_nullable; use databend_common_expression::types::binary::BinaryColumn; use databend_common_expression::types::binary::BinaryColumnBuilder; use databend_common_expression::types::nullable::NullableColumn; use databend_common_expression::types::string::StringColumn; use databend_common_expression::types::string::StringColumnBuilder; +use databend_common_expression::types::AnyType; use databend_common_expression::types::BinaryType; +use databend_common_expression::types::DataType; +use databend_common_expression::types::NumberDataType; use databend_common_expression::types::NumberType; use databend_common_expression::types::StringType; +use databend_common_expression::types::UInt8Type; +use databend_common_expression::types::ValueType; +use databend_common_expression::Column; use databend_common_expression::EvalContext; +use databend_common_expression::Function; use databend_common_expression::FunctionDomain; +use databend_common_expression::FunctionEval; use databend_common_expression::FunctionRegistry; +use databend_common_expression::FunctionSignature; +use databend_common_expression::Scalar; use databend_common_expression::Value; use databend_common_expression::ValueRef; @@ -68,7 +80,7 @@ pub fn register(registry: &mut FunctionRegistry) { "to_binary", |_, _| FunctionDomain::Full, |val, _| match val { - ValueRef::Scalar(val) => Value::Scalar(val.to_vec()), + ValueRef::Scalar(val) => Value::Scalar(val.as_bytes().to_vec()), ValueRef::Column(col) => Value::Column(col.into()), }, ); @@ -77,7 +89,7 @@ pub fn register(registry: &mut FunctionRegistry) { "try_to_binary", |_, _| FunctionDomain::Full, |val, _| match val { - ValueRef::Scalar(val) => Value::Scalar(Some(val.to_vec())), + ValueRef::Scalar(val) => Value::Scalar(Some(val.as_bytes().to_vec())), ValueRef::Column(col) => Value::Column(NullableColumn { validity: Bitmap::new_constant(true, col.len()), column: col.into(), @@ -140,14 +152,61 @@ pub fn register(registry: &mut FunctionRegistry) { |_, _| FunctionDomain::Full, error_to_null(eval_from_base64), ); + + registry.register_function_factory("char", |_, args_type| { + if args_type.is_empty() { + return None; + } + let has_null = args_type.iter().any(|t| t.is_nullable_or_null()); + let f = Function { + signature: FunctionSignature { + name: "char".to_string(), + args_type: vec![DataType::Number(NumberDataType::UInt8); args_type.len()], + return_type: DataType::Binary, + }, + eval: FunctionEval::Scalar { + calc_domain: Box::new(|_, _| FunctionDomain::Full), + eval: Box::new(char_fn), + }, + }; + + if has_null { + Some(Arc::new(f.passthrough_nullable())) + } else { + Some(Arc::new(f)) + } + }); + + // nullable char + registry.register_function_factory("char", |_, args_type| { + if args_type.is_empty() { + return None; + } + Some(Arc::new(Function { + signature: FunctionSignature { + name: "char".to_string(), + args_type: vec![ + DataType::Nullable(Box::new(DataType::Number( + NumberDataType::UInt8 + ))); + args_type.len() + ], + return_type: DataType::Nullable(Box::new(DataType::Binary)), + }, + eval: FunctionEval::Scalar { + calc_domain: Box::new(|_, _| FunctionDomain::MayThrow), + eval: Box::new(passthrough_nullable(char_fn)), + }, + })) + }); } fn eval_binary_to_string(val: ValueRef, ctx: &mut EvalContext) -> Value { vectorize_binary_to_string( |col| col.data().len(), |val, output, ctx| { - if simdutf8::basic::from_utf8(val).is_ok() { - output.put_slice(val); + if let Ok(val) = simdutf8::basic::from_utf8(val) { + output.put_str(val); } else { ctx.set_error(output.len(), "invalid utf8 sequence"); } @@ -213,7 +272,7 @@ pub fn vectorize_binary_to_string( /// String to Binary scalar function with estimated output column capacity. pub fn vectorize_string_to_binary( estimate_bytes: impl Fn(&StringColumn) -> usize + Copy, - func: impl Fn(&[u8], &mut BinaryColumnBuilder, &mut EvalContext) + Copy, + func: impl Fn(&str, &mut BinaryColumnBuilder, &mut EvalContext) + Copy, ) -> impl Fn(ValueRef, &mut EvalContext) -> Value + Copy { move |arg1, ctx| match arg1 { ValueRef::Scalar(val) => { @@ -232,3 +291,47 @@ pub fn vectorize_string_to_binary( } } } + +fn char_fn(args: &[ValueRef], _: &mut EvalContext) -> Value { + let args = args + .iter() + .map(|arg| arg.try_downcast::().unwrap()) + .collect::>(); + + let len = args.iter().find_map(|arg| match arg { + ValueRef::Column(col) => Some(col.len()), + _ => None, + }); + let input_rows = len.unwrap_or(1); + + let mut values: Vec = vec![0; input_rows * args.len()]; + let values_ptr = values.as_mut_ptr(); + + for (i, arg) in args.iter().enumerate() { + match arg { + ValueRef::Scalar(v) => { + for j in 0..input_rows { + unsafe { + *values_ptr.add(args.len() * j + i) = *v; + } + } + } + ValueRef::Column(c) => { + for (j, ch) in UInt8Type::iter_column(c).enumerate() { + unsafe { + *values_ptr.add(args.len() * j + i) = ch; + } + } + } + } + } + let offsets = (0..(input_rows + 1) as u64 * args.len() as u64) + .step_by(args.len()) + .collect::>(); + let result = BinaryColumn::new(values.into(), offsets.into()); + + match len { + Some(_) => Value::Column(Column::Binary(result)), + _ => Value::Scalar(Scalar::Binary(result.index(0).unwrap().to_vec())), + } +} diff --git a/src/query/functions/src/scalars/bitmap.rs b/src/query/functions/src/scalars/bitmap.rs index 46927c8fbeb8..6adc7eb2245a 100644 --- a/src/query/functions/src/scalars/bitmap.rs +++ b/src/query/functions/src/scalars/bitmap.rs @@ -45,7 +45,7 @@ pub fn register(registry: &mut FunctionRegistry) { "to_bitmap", |_, _| FunctionDomain::MayThrow, vectorize_with_builder_1_arg::(|s, builder, ctx| { - match parse_bitmap(s) { + match parse_bitmap(s.as_bytes()) { Ok(rb) => { rb.serialize_into(&mut builder.data).unwrap(); } diff --git a/src/query/functions/src/scalars/boolean.rs b/src/query/functions/src/scalars/boolean.rs index e364bdddd2ab..8f28f5d8b5fc 100644 --- a/src/query/functions/src/scalars/boolean.rs +++ b/src/query/functions/src/scalars/boolean.rs @@ -462,9 +462,9 @@ fn eval_boolean_to_string(val: ValueRef, ctx: &mut EvalContext) -> fn eval_string_to_boolean(val: ValueRef, ctx: &mut EvalContext) -> Value { vectorize_with_builder_1_arg::(|val, output, ctx| { - if val.eq_ignore_ascii_case(b"true") { + if val.eq_ignore_ascii_case("true") { output.push(true); - } else if val.eq_ignore_ascii_case(b"false") { + } else if val.eq_ignore_ascii_case("false") { output.push(false); } else { ctx.set_error(output.len(), "cannot parse to type `BOOLEAN`"); diff --git a/src/query/functions/src/scalars/comparison.rs b/src/query/functions/src/scalars/comparison.rs index e4d04f7f3661..adfb70674759 100644 --- a/src/query/functions/src/scalars/comparison.rs +++ b/src/query/functions/src/scalars/comparison.rs @@ -48,7 +48,7 @@ use databend_common_expression::SimpleDomainCmp; use databend_common_expression::ValueRef; use memchr::memchr; use memchr::memmem; -use regex::bytes::Regex; +use regex::Regex; use crate::scalars::decimal::register_decimal_compare_op; use crate::scalars::string_multi_args::regexp; @@ -495,38 +495,29 @@ fn register_like(registry: &mut FunctionRegistry) { "like", |_, lhs, rhs| { if rhs.max.as_ref() == Some(&rhs.min) { - let pattern_type = check_pattern_type(&rhs.min, false); - if pattern_type == PatternType::EndOfPercent - || pattern_type == PatternType::OrdinalStr - { - let (min, max) = if pattern_type == PatternType::EndOfPercent { - let min = rhs.min[..rhs.min.len() - 1].to_vec(); - let mut max = min.clone(); + let pattern_type = check_pattern_type(rhs.min.as_bytes(), false); - let l = max.len(); - if max[l - 1] != u8::MAX { - max[l - 1] += 1; - } else { - return FunctionDomain::Full; - } - (min, max) - } else { - (rhs.min.clone(), rhs.min.clone()) - }; + if pattern_type == PatternType::OrdinalStr { + return lhs.domain_eq(rhs); + } + if pattern_type == PatternType::EndOfPercent { + let mut pat_str = rhs.min.clone(); + // remove the last char '%' + pat_str.pop(); + let pat_len = pat_str.chars().count(); let other = StringDomain { - min, - max: Some(max), + min: pat_str.clone(), + max: Some(pat_str), }; - let gte = lhs.domain_gte(&other); - let lt = lhs.domain_lt(&other); - - if let (FunctionDomain::Domain(lhs), FunctionDomain::Domain(rhs)) = (lt, gte) { - return FunctionDomain::Domain(BooleanDomain { - has_false: lhs.has_false || rhs.has_false, - has_true: lhs.has_true && rhs.has_true, - }); - } + let lhs = StringDomain { + min: lhs.min.chars().take(pat_len).collect(), + max: lhs + .max + .as_ref() + .map(|max| max.chars().take(pat_len).collect()), + }; + return lhs.domain_eq(&other); } } FunctionDomain::Full @@ -572,7 +563,7 @@ fn register_like(registry: &mut FunctionRegistry) { match regexp::build_regexp_from_pattern("regexp", pat, None) { Ok(re) => { builder.push(re.is_match(str)); - map.insert(pat.to_vec(), re); + map.insert(pat.to_string(), re); } Err(e) => { ctx.set_error(builder.len(), e); @@ -590,24 +581,24 @@ fn vectorize_like( { move |arg1, arg2, ctx| match (arg1, arg2) { (ValueRef::Scalar(arg1), ValueRef::Scalar(arg2)) => { - let pattern_type = check_pattern_type(arg2, false); - Value::Scalar(func(arg1, arg2, ctx, &pattern_type)) + let pattern_type = check_pattern_type(arg2.as_bytes(), false); + Value::Scalar(func(arg1.as_bytes(), arg2.as_bytes(), ctx, &pattern_type)) } (ValueRef::Column(arg1), ValueRef::Scalar(arg2)) => { let arg1_iter = StringType::iter_column(&arg1); - let pattern_type = check_pattern_type(arg2, false); + let pattern_type = check_pattern_type(arg2.as_bytes(), false); // faster path for memmem to have a single instance of Finder if pattern_type == PatternType::SurroundByPercent && arg2.len() > 2 { let finder = memmem::Finder::new(&arg2[1..arg2.len() - 1]); - let it = arg1_iter.map(|arg1| finder.find(arg1).is_some()); + let it = arg1_iter.map(|arg1| finder.find(arg1.as_bytes()).is_some()); let bitmap = BooleanType::column_from_iter(it, &[]); return Value::Column(bitmap); } let mut builder = MutableBitmap::with_capacity(arg1.len()); for arg1 in arg1_iter { - builder.push(func(arg1, arg2, ctx, &pattern_type)); + builder.push(func(arg1.as_bytes(), arg2.as_bytes(), ctx, &pattern_type)); } Value::Column(builder.into()) } @@ -615,8 +606,8 @@ fn vectorize_like( let arg2_iter = StringType::iter_column(&arg2); let mut builder = MutableBitmap::with_capacity(arg2.len()); for arg2 in arg2_iter { - let pattern_type = check_pattern_type(arg2, false); - builder.push(func(arg1, arg2, ctx, &pattern_type)); + let pattern_type = check_pattern_type(arg2.as_bytes(), false); + builder.push(func(arg1.as_bytes(), arg2.as_bytes(), ctx, &pattern_type)); } Value::Column(builder.into()) } @@ -625,8 +616,8 @@ fn vectorize_like( let arg2_iter = StringType::iter_column(&arg2); let mut builder = MutableBitmap::with_capacity(arg2.len()); for (arg1, arg2) in arg1_iter.zip(arg2_iter) { - let pattern_type = check_pattern_type(arg2, false); - builder.push(func(arg1, arg2, ctx, &pattern_type)); + let pattern_type = check_pattern_type(arg2.as_bytes(), false); + builder.push(func(arg1.as_bytes(), arg2.as_bytes(), ctx, &pattern_type)); } Value::Column(builder.into()) } @@ -639,13 +630,13 @@ fn variant_vectorize_like( { move |arg1, arg2, ctx| match (arg1, arg2) { (ValueRef::Scalar(arg1), ValueRef::Scalar(arg2)) => { - let pattern_type = check_pattern_type(arg2, false); - Value::Scalar(func(arg1, arg2, ctx, &pattern_type)) + let pattern_type = check_pattern_type(arg2.as_bytes(), false); + Value::Scalar(func(arg1, arg2.as_bytes(), ctx, &pattern_type)) } (ValueRef::Column(arg1), ValueRef::Scalar(arg2)) => { let arg1_iter = VariantType::iter_column(&arg1); - let pattern_type = check_pattern_type(arg2, false); + let pattern_type = check_pattern_type(arg2.as_bytes(), false); // faster path for memmem to have a single instance of Finder if pattern_type == PatternType::SurroundByPercent && arg2.len() > 2 { let finder = memmem::Finder::new(&arg2[1..arg2.len() - 1]); @@ -656,7 +647,7 @@ fn variant_vectorize_like( let mut builder = MutableBitmap::with_capacity(arg1.len()); for arg1 in arg1_iter { - builder.push(func(arg1, arg2, ctx, &pattern_type)); + builder.push(func(arg1, arg2.as_bytes(), ctx, &pattern_type)); } Value::Column(builder.into()) } @@ -664,8 +655,8 @@ fn variant_vectorize_like( let arg2_iter = StringType::iter_column(&arg2); let mut builder = MutableBitmap::with_capacity(arg2.len()); for arg2 in arg2_iter { - let pattern_type = check_pattern_type(arg2, false); - builder.push(func(arg1, arg2, ctx, &pattern_type)); + let pattern_type = check_pattern_type(arg2.as_bytes(), false); + builder.push(func(arg1, arg2.as_bytes(), ctx, &pattern_type)); } Value::Column(builder.into()) } @@ -674,8 +665,8 @@ fn variant_vectorize_like( let arg2_iter = StringType::iter_column(&arg2); let mut builder = MutableBitmap::with_capacity(arg2.len()); for (arg1, arg2) in arg1_iter.zip(arg2_iter) { - let pattern_type = check_pattern_type(arg2, false); - builder.push(func(arg1, arg2, ctx, &pattern_type)); + let pattern_type = check_pattern_type(arg2.as_bytes(), false); + builder.push(func(arg1, arg2.as_bytes(), ctx, &pattern_type)); } Value::Column(builder.into()) } @@ -684,11 +675,11 @@ fn variant_vectorize_like( fn vectorize_regexp( func: impl Fn( - &[u8], - &[u8], + &str, + &str, &mut MutableBitmap, &mut EvalContext, - &mut HashMap, Regex>, + &mut HashMap, &mut HashMap, String>, ) + Copy, ) -> impl Fn(ValueRef, ValueRef, &mut EvalContext) -> Value + Copy diff --git a/src/query/functions/src/scalars/datetime.rs b/src/query/functions/src/scalars/datetime.rs index c7e5b3de9fd4..9116b7478fc7 100644 --- a/src/query/functions/src/scalars/datetime.rs +++ b/src/query/functions/src/scalars/datetime.rs @@ -162,20 +162,11 @@ fn register_string_to_timestamp(registry: &mut FunctionRegistry) { if format.is_empty() { output.push_null(); } else { - match (std::str::from_utf8(timestamp), std::str::from_utf8(format)) { - (Ok(date), Ok(format)) => { - // date need has timezone info. - if let Ok(res) = DateTime::parse_from_str(date, format) { - output.push( - res.with_timezone(&ctx.func_ctx.tz.tz).timestamp_micros(), - ); - } else { - output.push_null(); - } - } - _ => { - output.push_null(); - } + // date need has timezone info. + if let Ok(res) = DateTime::parse_from_str(timestamp, format) { + output.push(res.with_timezone(&ctx.func_ctx.tz.tz).timestamp_micros()); + } else { + output.push_null(); } } }, @@ -190,17 +181,11 @@ fn register_string_to_timestamp(registry: &mut FunctionRegistry) { if format.is_empty() { output.push_null(); } else { - match (std::str::from_utf8(date), std::str::from_utf8(format)) { - (Ok(date), Ok(format)) => match NaiveDate::parse_from_str(date, format) { - Ok(res) => { - output.push(res.num_days_from_ce() - EPOCH_DAYS_FROM_CE); - } - Err(e) => { - ctx.set_error(output.len(), e.to_string()); - output.push_null(); - } - }, - (Err(e), _) | (_, Err(e)) => { + match NaiveDate::parse_from_str(date, format) { + Ok(res) => { + output.push(res.num_days_from_ce() - EPOCH_DAYS_FROM_CE); + } + Err(e) => { ctx.set_error(output.len(), e.to_string()); output.push_null(); } @@ -420,16 +405,8 @@ fn register_to_string(registry: &mut FunctionRegistry) { output.push_null(); } else { let ts = date.to_timestamp(ctx.func_ctx.tz.tz); - match std::str::from_utf8(format) { - Ok(format) => { - let res = ts.format(format).to_string(); - output.push(res.as_bytes()); - } - Err(e) => { - ctx.set_error(output.len(), e.to_string()); - output.push_null(); - } - } + let res = ts.format(format).to_string(); + output.push(&res); } }, ), @@ -464,7 +441,7 @@ fn register_to_string(registry: &mut FunctionRegistry) { FunctionDomain::Domain(NullableDomain { has_null: false, value: Some(Box::new(StringDomain { - min: vec![], + min: "".to_string(), max: None, })), }) @@ -487,7 +464,7 @@ fn register_to_string(registry: &mut FunctionRegistry) { FunctionDomain::Domain(NullableDomain { has_null: false, value: Some(Box::new(StringDomain { - min: vec![], + min: "".to_string(), max: None, })), }) diff --git a/src/query/functions/src/scalars/decimal/arithmetic.rs b/src/query/functions/src/scalars/decimal/arithmetic.rs index ba9dfb7ff1c8..7bc9b63b20b9 100644 --- a/src/query/functions/src/scalars/decimal/arithmetic.rs +++ b/src/query/functions/src/scalars/decimal/arithmetic.rs @@ -28,6 +28,9 @@ use databend_common_expression::FunctionRegistry; use databend_common_expression::FunctionSignature; use ethnum::i256; +use super::convert_to_decimal; +use super::convert_to_decimal_domain; + #[derive(Copy, Clone, Debug)] enum ArithmeticOp { Plus, @@ -267,16 +270,13 @@ macro_rules! register_decimal_binary_op { let function = Function { signature: FunctionSignature { name: format!("{:?}", $arithmetic_op).to_lowercase(), - args_type: vec![ - DataType::Decimal(left.clone()), - DataType::Decimal(right.clone()), - ], + args_type: args_type.clone(), return_type: DataType::Decimal(return_decimal_type), }, eval: FunctionEval::Scalar { - calc_domain: Box::new(move |_ctx, d| { - let lhs = d[0].as_decimal(); - let rhs = d[1].as_decimal(); + calc_domain: Box::new(move |ctx, d| { + let lhs = convert_to_decimal_domain(ctx, d[0].clone(), left.clone()); + let rhs = convert_to_decimal_domain(ctx, d[1].clone(), right.clone()); if lhs.is_none() || rhs.is_none() { return FunctionDomain::Full; @@ -308,9 +308,15 @@ macro_rules! register_decimal_binary_op { .unwrap_or($default_domain) }), eval: Box::new(move |args, ctx| { + let a = convert_to_decimal(&args[0], ctx, &args_type[0], left); + let b = convert_to_decimal(&args[1], ctx, &args_type[1], right); + + let a = a.as_ref(); + let b = b.as_ref(); + let res = op_decimal!( - &args[0], - &args[1], + &a, + &b, ctx, left, right, diff --git a/src/query/functions/src/scalars/decimal/cast.rs b/src/query/functions/src/scalars/decimal/cast.rs index e1c4e9b3168a..53e6a20fd215 100644 --- a/src/query/functions/src/scalars/decimal/cast.rs +++ b/src/query/functions/src/scalars/decimal/cast.rs @@ -328,7 +328,7 @@ fn decimal_to_string( }) } -fn convert_to_decimal( +pub fn convert_to_decimal( arg: &ValueRef, ctx: &mut EvalContext, from_type: &DataType, @@ -387,7 +387,7 @@ fn convert_to_decimal( }) } -fn convert_to_decimal_domain( +pub fn convert_to_decimal_domain( func_ctx: &FunctionContext, domain: Domain, dest_type: DecimalDataType, @@ -468,14 +468,16 @@ fn string_to_decimal( where T: Decimal + Mul, { - let f = |x: &[u8], builder: &mut Vec, ctx: &mut EvalContext| { - let value = match read_decimal_with_size::(x, size, true, ctx.func_ctx.rounding_mode) { - Ok((d, _)) => d, - Err(e) => { - ctx.set_error(builder.len(), e.message()); - T::zero() - } - }; + let f = |x: &str, builder: &mut Vec, ctx: &mut EvalContext| { + let value = + match read_decimal_with_size::(x.as_bytes(), size, true, ctx.func_ctx.rounding_mode) + { + Ok((d, _)) => d, + Err(e) => { + ctx.set_error(builder.len(), e.message()); + T::zero() + } + }; builder.push(value); }; diff --git a/src/query/functions/src/scalars/decimal/mod.rs b/src/query/functions/src/scalars/decimal/mod.rs index c55ea2d36f56..ec533ecd828d 100644 --- a/src/query/functions/src/scalars/decimal/mod.rs +++ b/src/query/functions/src/scalars/decimal/mod.rs @@ -18,6 +18,8 @@ mod comparison; mod math; pub(crate) use arithmetic::register_decimal_arithmetic; +pub(crate) use cast::convert_to_decimal; +pub(crate) use cast::convert_to_decimal_domain; pub(crate) use cast::register_decimal_to_float; pub(crate) use cast::register_decimal_to_int; pub(crate) use cast::register_decimal_to_string; diff --git a/src/query/functions/src/scalars/geo.rs b/src/query/functions/src/scalars/geo.rs index 3a2b5c084610..83ecdd1464f0 100644 --- a/src/query/functions/src/scalars/geo.rs +++ b/src/query/functions/src/scalars/geo.rs @@ -183,13 +183,10 @@ pub fn register(registry: &mut FunctionRegistry) { "geohash_decode", |_, _| FunctionDomain::Full, vectorize_with_builder_1_arg::>( - |encoded, builder, ctx| match std::str::from_utf8(encoded) - .map_err(|e| e.to_string()) - .and_then(|s| geohash::decode(s).map_err(|e| e.to_string())) - { + |encoded, builder, ctx| match geohash::decode(encoded) { Ok((c, _, _)) => builder.push((c.x.into(), c.y.into())), Err(e) => { - ctx.set_error(builder.len(), e); + ctx.set_error(builder.len(), e.to_string()); builder.push((F64::from(0.0), F64::from(0.0))) } }, diff --git a/src/query/functions/src/scalars/geo_h3.rs b/src/query/functions/src/scalars/geo_h3.rs index b99d7b258b35..045327555552 100644 --- a/src/query/functions/src/scalars/geo_h3.rs +++ b/src/query/functions/src/scalars/geo_h3.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::str; - use databend_common_expression::types::map::KvPair; use databend_common_expression::types::ArrayType; use databend_common_expression::types::BooleanType; @@ -288,13 +286,10 @@ pub fn register(registry: &mut FunctionRegistry) { "string_to_h3", |_, _| FunctionDomain::Full, vectorize_with_builder_1_arg::(|h3_str, builder, ctx| { - match str::from_utf8(h3_str) - .map_err(|e| e.to_string()) - .and_then(|h3_str| str::parse::(h3_str).map_err(|e| e.to_string())) - { + match h3_str.parse::() { Ok(index) => builder.push(index.into()), Err(err) => { - ctx.set_error(builder.len(), err); + ctx.set_error(builder.len(), err.to_string()); builder.push(0); } } diff --git a/src/query/functions/src/scalars/hash.rs b/src/query/functions/src/scalars/hash.rs index 66034e6d746a..152ee21b4f27 100644 --- a/src/query/functions/src/scalars/hash.rs +++ b/src/query/functions/src/scalars/hash.rs @@ -104,7 +104,7 @@ pub fn register(registry: &mut FunctionRegistry) { output.data.resize(old_len + 40, 0); // TODO sha1 lib doesn't allow encode into buffer... let mut m = ::sha1::Sha1::new(); - sha1::digest::Update::update(&mut m, val); + sha1::digest::Update::update(&mut m, val.as_bytes()); if let Err(err) = hex::encode_to_slice(m.finalize().as_slice(), &mut output.data[old_len..]) @@ -124,9 +124,10 @@ pub fn register(registry: &mut FunctionRegistry) { |val, output, ctx| { let old_len = output.data.len(); output.data.resize(old_len + 64, 0); - if let Err(err) = - hex::encode_to_slice(blake3::hash(val).as_bytes(), &mut output.data[old_len..]) - { + if let Err(err) = hex::encode_to_slice( + blake3::hash(val.as_bytes()).as_bytes(), + &mut output.data[old_len..], + ) { ctx.set_error(output.len(), err.to_string()); } output.commit_row(); @@ -143,22 +144,22 @@ pub fn register(registry: &mut FunctionRegistry) { let res = match l { 224 => { let mut h = sha2::Sha224::new(); - sha2::digest::Update::update(&mut h, val); + sha2::digest::Update::update(&mut h, val.as_bytes()); format!("{:x}", h.finalize()) } 256 | 0 => { let mut h = sha2::Sha256::new(); - sha2::digest::Update::update(&mut h, val); + sha2::digest::Update::update(&mut h, val.as_bytes()); format!("{:x}", h.finalize()) } 384 => { let mut h = sha2::Sha384::new(); - sha2::digest::Update::update(&mut h, val); + sha2::digest::Update::update(&mut h, val.as_bytes()); format!("{:x}", h.finalize()) } 512 => { let mut h = sha2::Sha512::new(); - sha2::digest::Update::update(&mut h, val); + sha2::digest::Update::update(&mut h, val.as_bytes()); format!("{:x}", h.finalize()) } v => { @@ -172,7 +173,7 @@ pub fn register(registry: &mut FunctionRegistry) { String::new() }, }; - output.put_slice(res.as_bytes()); + output.put_str(&res); output.commit_row(); }, ), @@ -340,6 +341,27 @@ impl<'a> DFHash for &'a [u8] { } } +impl<'a> DFHash for &'a str { + #[inline] + fn hash(&self, state: &mut H) { + Hash::hash_slice(self.as_bytes(), state); + } +} + +impl DFHash for [u8] { + #[inline] + fn hash(&self, state: &mut H) { + Hash::hash_slice(self, state); + } +} + +impl DFHash for str { + #[inline] + fn hash(&self, state: &mut H) { + Hash::hash_slice(self.as_bytes(), state); + } +} + impl DFHash for bool { #[inline] fn hash(&self, state: &mut H) { @@ -357,10 +379,11 @@ impl DFHash for Scalar { DFHash::hash(v, state); } }), - Scalar::Binary(vals) | Scalar::String(vals) | Scalar::Variant(vals) => { - for v in vals { - DFHash::hash(v, state); - } + Scalar::Binary(vals) | Scalar::Variant(vals) => { + DFHash::hash(vals.as_slice(), state); + } + Scalar::String(vals) => { + DFHash::hash(vals.as_str(), state); } _ => {} } diff --git a/src/query/functions/src/scalars/math.rs b/src/query/functions/src/scalars/math.rs index a89953a91724..a81c592307ef 100644 --- a/src/query/functions/src/scalars/math.rs +++ b/src/query/functions/src/scalars/math.rs @@ -209,7 +209,7 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_1_arg::, _, _>( "crc32", |_, _| FunctionDomain::Full, - |val, _| crc32fast::hash(val), + |val, _| crc32fast::hash(val.as_bytes()), ); registry.register_1_arg::, NumberType, _, _>( diff --git a/src/query/functions/src/scalars/other.rs b/src/query/functions/src/scalars/other.rs index 46c02e2ca2fa..e3abc326c4e1 100644 --- a/src/query/functions/src/scalars/other.rs +++ b/src/query/functions/src/scalars/other.rs @@ -164,7 +164,7 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_1_arg_core::, StringType, _, _>( "typeof", |_, _| FunctionDomain::Full, - |_, ctx| Value::Scalar(ctx.generics[0].sql_name().into_bytes()), + |_, ctx| Value::Scalar(ctx.generics[0].sql_name()), ); registry.register_function_factory("ignore", |_, args_type| { @@ -249,8 +249,7 @@ fn register_inet_aton(registry: &mut FunctionRegistry) { ); fn eval_inet_aton(val: ValueRef, ctx: &mut EvalContext) -> Value { - vectorize_with_builder_1_arg::(|v, output, ctx| { - let addr_str = String::from_utf8_lossy(v); + vectorize_with_builder_1_arg::(|addr_str, output, ctx| { match addr_str.parse::() { Ok(addr) => { let addr_binary = u32::from(addr); diff --git a/src/query/functions/src/scalars/string.rs b/src/query/functions/src/scalars/string.rs index d23c80839521..7a4c59586533 100644 --- a/src/query/functions/src/scalars/string.rs +++ b/src/query/functions/src/scalars/string.rs @@ -15,7 +15,6 @@ use std::cmp::Ordering; use std::io::Write; -use bstr::ByteSlice; use databend_common_base::base::uuid::Uuid; use databend_common_expression::types::decimal::Decimal128Type; use databend_common_expression::types::number::SimpleDomain; @@ -34,16 +33,23 @@ use databend_common_expression::FunctionDomain; use databend_common_expression::FunctionRegistry; use databend_common_expression::Value; use databend_common_expression::ValueRef; -use itertools::izip; +use stringslice::StringSlice; pub fn register(registry: &mut FunctionRegistry) { registry.register_aliases("to_string", &["to_varchar", "to_text"]); registry.register_aliases("upper", &["ucase"]); registry.register_aliases("lower", &["lcase"]); - registry.register_aliases("length", &["octet_length"]); - registry.register_aliases("char_length", &["character_length", "length_utf8"]); - registry.register_aliases("substr", &["substring", "mid"]); - registry.register_aliases("substr_utf8", &["substring_utf8"]); + registry.register_aliases("length", &[ + "char_length", + "character_length", + "length_utf8", + ]); + registry.register_aliases("substr", &[ + "substring", + "mid", + "substr_utf8", + "substring_utf8", + ]); registry.register_passthrough_nullable_1_arg::( "upper", @@ -51,12 +57,9 @@ pub fn register(registry: &mut FunctionRegistry) { vectorize_string_to_string( |col| col.data().len(), |val, output, _| { - for (start, end, ch) in val.char_indices() { - if ch == '\u{FFFD}' { - // If char is invalid, just copy it. - output.put_slice(&val.as_bytes()[start..end]); - } else if ch.is_ascii() { - output.put_u8(ch.to_ascii_uppercase() as u8); + for ch in val.chars() { + if ch.is_ascii() { + output.put_char(ch.to_ascii_uppercase()); } else { for x in ch.to_uppercase() { output.put_char(x); @@ -74,12 +77,9 @@ pub fn register(registry: &mut FunctionRegistry) { vectorize_string_to_string( |col| col.data().len(), |val, output, _| { - for (start, end, ch) in val.char_indices() { - if ch == '\u{FFFD}' { - // If char is invalid, just copy it. - output.put_slice(&val.as_bytes()[start..end]); - } else if ch.is_ascii() { - output.put_u8(ch.to_ascii_lowercase() as u8); + for ch in val.chars() { + if ch.is_ascii() { + output.put_char(ch.to_ascii_lowercase()); } else { for x in ch.to_lowercase() { output.put_char(x); @@ -98,7 +98,7 @@ pub fn register(registry: &mut FunctionRegistry) { ); registry.register_passthrough_nullable_1_arg::, _, _>( - "length", + "octet_length", |_, _| FunctionDomain::Full, |val, _| match val { ValueRef::Scalar(s) => Value::Scalar(s.len() as u64), @@ -115,20 +115,10 @@ pub fn register(registry: &mut FunctionRegistry) { }, ); - registry.register_passthrough_nullable_1_arg::, _, _>( - "char_length", - |_, _| FunctionDomain::MayThrow, - vectorize_with_builder_1_arg::>(|s, output, ctx| { - match std::str::from_utf8(s) { - Ok(s) => { - output.push(s.chars().count() as u64); - } - Err(err) => { - ctx.set_error(output.len(), err.to_string()); - output.push(0); - } - } - }), + registry.register_1_arg::, _, _>( + "length", + |_, _| FunctionDomain::Full, + |val, _ctx| val.chars().count() as u64, ); const MAX_PADDING_LENGTH: usize = 1000000; @@ -138,24 +128,26 @@ pub fn register(registry: &mut FunctionRegistry) { vectorize_with_builder_3_arg::, StringType, StringType>( |s, pad_len, pad, output, ctx| { let pad_len = pad_len as usize; + let s_len = s.chars().count(); if pad_len > MAX_PADDING_LENGTH { ctx.set_error(output.len(), format!("padding length '{}' is too big, max is: '{}'", pad_len, MAX_PADDING_LENGTH)); - } else if pad_len <= s.len() { - output.put_slice(&s[..pad_len]); + } else if pad_len <= s_len { + output.put_str(s.slice(..pad_len)); } else if pad.is_empty() { - ctx.set_error(output.len(), format!("can't fill the '{}' length to '{}' with an empty pad string", String::from_utf8_lossy(s), pad_len)); + ctx.set_error(output.len(), format!("can't fill the '{}' length to '{}' with an empty pad string", s, pad_len)); } else { - let mut remain_pad_len = pad_len - s.len(); + let mut remain_pad_len = pad_len - s_len; + let p_len = pad.chars().count(); while remain_pad_len > 0 { - if remain_pad_len < pad.len() { - output.put_slice(&pad[..remain_pad_len]); + if remain_pad_len < p_len { + output.put_str(pad.slice(..remain_pad_len)); remain_pad_len = 0; } else { - output.put_slice(pad); - remain_pad_len -= pad.len(); + output.put_str(pad); + remain_pad_len -= p_len; } } - output.put_slice(s); + output.put_str(s); } output.commit_row(); } @@ -169,14 +161,15 @@ pub fn register(registry: &mut FunctionRegistry) { |srcstr, pos, len, substr, output, _| { let pos = pos as usize; let len = len as usize; - if pos < 1 || pos > srcstr.len() { - output.put_slice(srcstr); + let srcstr_len = srcstr.chars().count(); + if pos < 1 || pos > srcstr_len { + output.put_str(srcstr); } else { let pos = pos - 1; - output.put_slice(&srcstr[0..pos]); - output.put_slice(substr); - if pos + len < srcstr.len() { - output.put_slice(&srcstr[(pos + len)..]); + output.put_str(srcstr.slice(0..pos)); + output.put_str(substr); + if pos + len < srcstr_len { + output.put_str(srcstr.slice(pos + len .. )); } } output.commit_row(); @@ -187,29 +180,31 @@ pub fn register(registry: &mut FunctionRegistry) { "rpad", |_, _, _, _| FunctionDomain::MayThrow, vectorize_with_builder_3_arg::, StringType, StringType>( - |s: &[u8], pad_len: u64, pad: &[u8], output, ctx| { - let pad_len = pad_len as usize; - if pad_len > MAX_PADDING_LENGTH { - ctx.set_error(output.len(), format!("padding length '{}' is too big, max is: '{}'", pad_len, MAX_PADDING_LENGTH)); - } else if pad_len <= s.len() { - output.put_slice(&s[..pad_len]) - } else if pad.is_empty() { - ctx.set_error(output.len(), format!("can't fill the '{}' length to '{}' with an empty pad string", String::from_utf8_lossy(s), pad_len)); - } else { - output.put_slice(s); - let mut remain_pad_len = pad_len - s.len(); - while remain_pad_len > 0 { - if remain_pad_len < pad.len() { - output.put_slice(&pad[..remain_pad_len]); - remain_pad_len = 0; - } else { - output.put_slice(pad); - remain_pad_len -= pad.len(); + |s, pad_len, pad, output, ctx| { + let pad_len = pad_len as usize; + let s_len = s.chars().count(); + if pad_len > MAX_PADDING_LENGTH { + ctx.set_error(output.len(), format!("padding length '{}' is too big, max is: '{}'", pad_len, MAX_PADDING_LENGTH)); + } else if pad_len <= s_len { + output.put_str(s.slice(..pad_len)); + } else if pad.is_empty() { + ctx.set_error(output.len(), format!("can't fill the '{}' length to '{}' with an empty pad string", s, pad_len)); + } else { + output.put_str(s); + let mut remain_pad_len = pad_len - s_len; + let p_len = pad.chars().count(); + while remain_pad_len > 0 { + if remain_pad_len < p_len { + output.put_str(pad.slice(..remain_pad_len)); + remain_pad_len = 0; + } else { + output.put_str(pad); + remain_pad_len -= p_len; + } } } - } - output.commit_row(); - }), + output.commit_row(); + }), ); registry.register_passthrough_nullable_3_arg::( @@ -217,26 +212,22 @@ pub fn register(registry: &mut FunctionRegistry) { |_, _, _, _| FunctionDomain::Full, vectorize_with_builder_3_arg::( |str, from, to, output, _| { - if from.is_empty() || from == to { - output.put_slice(str); - output.commit_row(); - return; - } - let mut skip = 0; - for (p, w) in str.windows(from.len()).enumerate() { - if w == from { - output.put_slice(to); - skip = from.len(); - } else if p + w.len() == str.len() { - output.put_slice(w); - } else if skip > 1 { - skip -= 1; - } else { - output.put_slice(&w[0..1]); + if from.is_empty() || from == to { + output.put_str(str); + output.commit_row(); + return; } - } - output.commit_row(); - }), + + let mut last_end = 0; + for (start, _) in str.match_indices(from) { + output.put_str(&str[last_end..start]); + output.put_str(to); + last_end = start + from.len(); + } + output.put_str(&str[last_end..]); + + output.commit_row(); + }), ); registry.register_passthrough_nullable_3_arg::( @@ -244,23 +235,23 @@ pub fn register(registry: &mut FunctionRegistry) { |_, _, _, _| FunctionDomain::Full, vectorize_with_builder_3_arg::( |str, from, to, output, _| { - if from.is_empty() || from == to { - output.put_slice(str); - output.commit_row(); - return; - } - let to_len = to.len(); - str.iter().for_each(|x| { - if let Some(index) = from.find([*x]) { - if index < to_len { - output.put_u8(to[index]); - } - } else { - output.put_u8(*x); + if from.is_empty() || from == to { + output.put_str(str); + output.commit_row(); + return; } - }); - output.commit_row(); - }), + let to_len = to.chars().count(); + str.chars().for_each(|x| { + if let Some(index) = from.chars().position(|c| c == x) { + if index < to_len { + output.put_char(to.chars().nth(index).unwrap()); + } + } else { + output.put_char(x); + } + }); + output.commit_row(); + }), ); registry.register_passthrough_nullable_1_arg::( @@ -269,7 +260,7 @@ pub fn register(registry: &mut FunctionRegistry) { vectorize_with_builder_1_arg::(|arg, output, _| { let uuid = Uuid::from_u128(arg as u128); let str = uuid.as_simple().to_string(); - output.put_slice(str.as_bytes()); + output.put_str(str.as_str()); output.commit_row(); }), ); @@ -277,32 +268,14 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_2_arg::, _, _>( "strcmp", |_, _, _| FunctionDomain::Full, - |s1, s2, _| { - let res = match s1.len().cmp(&s2.len()) { - Ordering::Equal => { - let mut res = Ordering::Equal; - for (s1i, s2i) in izip!(s1, s2) { - match s1i.cmp(s2i) { - Ordering::Equal => continue, - ord => { - res = ord; - break; - } - } - } - res - } - ord => ord, - }; - match res { - Ordering::Equal => 0, - Ordering::Greater => 1, - Ordering::Less => -1, - } + |s1, s2, _| match s1.cmp(s2) { + Ordering::Equal => 0, + Ordering::Greater => 1, + Ordering::Less => -1, }, ); - let find_at = |str: &[u8], substr: &[u8], pos: u64| { + let find_at = |s: &str, substr: &str, pos: u64| { if substr.is_empty() { // the same behavior as MySQL, Postgres and Clickhouse return if pos == 0 { 1_u64 } else { pos }; @@ -313,37 +286,37 @@ pub fn register(registry: &mut FunctionRegistry) { return 0_u64; } let p = pos - 1; - if p + substr.len() <= str.len() { - str[p..] - .windows(substr.len()) - .position(|w| w == substr) - .map_or(0, |i| i + 1 + p) as u64 + + let src = s.slice(p..); + if let Some(find_at) = src.find(substr) { + (src[..find_at].chars().count() + p + 1) as u64 } else { 0_u64 } }; + registry.register_2_arg::, _, _>( "instr", |_, _, _| FunctionDomain::Full, - move |str: &[u8], substr: &[u8], _| find_at(str, substr, 1), + move |s: &str, substr: &str, _| find_at(s, substr, 1), ); registry.register_2_arg::, _, _>( "position", |_, _, _| FunctionDomain::Full, - move |substr: &[u8], str: &[u8], _| find_at(str, substr, 1), + move |substr: &str, s: &str, _| find_at(s, substr, 1), ); registry.register_2_arg::, _, _>( "locate", |_, _, _| FunctionDomain::Full, - move |substr: &[u8], str: &[u8], _| find_at(str, substr, 1), + move |substr: &str, s: &str, _| find_at(s, substr, 1), ); registry.register_3_arg::, NumberType, _, _>( "locate", |_, _, _, _| FunctionDomain::Full, - move |substr: &[u8], str: &[u8], pos: u64, _| find_at(str, substr, pos), + move |substr: &str, s: &str, pos: u64, _| find_at(s, substr, pos), ); registry.register_passthrough_nullable_1_arg::( @@ -352,17 +325,17 @@ pub fn register(registry: &mut FunctionRegistry) { vectorize_string_to_string( |col| col.data().len() * 2, |val, output, _| { - for ch in val { + for ch in val.chars() { match ch { - 0 => output.put_slice(&[b'\\', b'0']), - b'\'' => output.put_slice(&[b'\\', b'\'']), - b'\"' => output.put_slice(&[b'\\', b'\"']), - 8 => output.put_slice(&[b'\\', b'b']), - b'\n' => output.put_slice(&[b'\\', b'n']), - b'\r' => output.put_slice(&[b'\\', b'r']), - b'\t' => output.put_slice(&[b'\\', b't']), - b'\\' => output.put_slice(&[b'\\', b'\\']), - c => output.put_u8(*c), + '\0' => output.put_str("\\0"), + '\'' => output.put_str("\\\'"), + '\"' => output.put_str("\\\""), + '\u{8}' => output.put_str("\\b"), + '\n' => output.put_str("\\n"), + '\r' => output.put_str("\\r"), + '\t' => output.put_str("\\t"), + '\\' => output.put_str("\\\\"), + c => output.put_char(c), } } output.commit_row(); @@ -376,10 +349,9 @@ pub fn register(registry: &mut FunctionRegistry) { vectorize_string_to_string( |col| col.data().len(), |val, output, _| { - let start = output.data.len(); - output.put_slice(val); - let buf = &mut output.data[start..]; - buf.reverse(); + for char in val.chars().rev() { + output.put_char(char); + } output.commit_row(); }, ), @@ -389,14 +361,15 @@ pub fn register(registry: &mut FunctionRegistry) { "ascii", |_, domain| { FunctionDomain::Domain(SimpleDomain { - min: domain.min.first().cloned().unwrap_or(0), + min: domain.min.as_bytes().first().map_or(0, |v| *v), max: domain .max .as_ref() - .map_or(u8::MAX, |v| v.first().cloned().unwrap_or_default()), + .and_then(|x| x.as_bytes().first()) + .map_or(u8::MAX, |v| *v), }) }, - |val, _| val.first().cloned().unwrap_or_default(), + |val, _| val.as_bytes().first().map_or(0, |v| *v), ); // Trim functions @@ -406,10 +379,7 @@ pub fn register(registry: &mut FunctionRegistry) { vectorize_string_to_string( |col| col.data().len(), |val, output, _| { - let pos = val.iter().position(|ch| *ch != b' ' && *ch != b'\t'); - if let Some(idx) = pos { - output.put_slice(&val.as_bytes()[idx..]); - } + output.put_str(val.trim_start()); output.commit_row(); }, ), @@ -421,10 +391,7 @@ pub fn register(registry: &mut FunctionRegistry) { vectorize_string_to_string( |col| col.data().len(), |val, output, _| { - let pos = val.iter().rev().position(|ch| *ch != b' ' && *ch != b'\t'); - if let Some(idx) = pos { - output.put_slice(&val.as_bytes()[..val.len() - idx]); - } + output.put_str(val.trim_end()); output.commit_row(); }, ), @@ -436,11 +403,7 @@ pub fn register(registry: &mut FunctionRegistry) { vectorize_string_to_string( |col| col.data().len(), |val, output, _| { - let start_pos = val.iter().position(|ch| *ch != b' ' && *ch != b'\t'); - let end_pos = val.iter().rev().position(|ch| *ch != b' ' && *ch != b'\t'); - if let (Some(start_idx), Some(end_idx)) = (start_pos, end_pos) { - output.put_slice(&val.as_bytes()[start_idx..val.len() - end_idx]); - } + output.put_str(val.trim()); output.commit_row(); }, ), @@ -452,17 +415,13 @@ pub fn register(registry: &mut FunctionRegistry) { vectorize_string_to_string_2_arg( |col, _| col.data().len(), |val, trim_str, _, output| { - let chunk_size = trim_str.len(); - if chunk_size == 0 { - output.put_slice(val); + if trim_str.is_empty() { + output.put_str(val); output.commit_row(); return; } - let pos = val.chunks(chunk_size).position(|chunk| chunk != trim_str); - if let Some(idx) = pos { - output.put_slice(&val.as_bytes()[idx * chunk_size..]); - } + output.put_str(val.trim_start_matches(trim_str)); output.commit_row(); }, ), @@ -474,17 +433,13 @@ pub fn register(registry: &mut FunctionRegistry) { vectorize_string_to_string_2_arg( |col, _| col.data().len(), |val, trim_str, _, output| { - let chunk_size = trim_str.len(); - if chunk_size == 0 { - output.put_slice(val); + if trim_str.is_empty() { + output.put_str(val); output.commit_row(); return; } - let pos = val.rchunks(chunk_size).position(|chunk| chunk != trim_str); - if let Some(idx) = pos { - output.put_slice(&val.as_bytes()[..val.len() - idx * chunk_size]); - } + output.put_str(val.trim_end_matches(trim_str)); output.commit_row(); }, ), @@ -496,29 +451,22 @@ pub fn register(registry: &mut FunctionRegistry) { vectorize_string_to_string_2_arg( |col, _| col.data().len(), |val, trim_str, _, output| { - let chunk_size = trim_str.len(); - if chunk_size == 0 { - output.put_slice(val); + if trim_str.is_empty() { + output.put_str(val); output.commit_row(); return; } - let start_pos = val.chunks(chunk_size).position(|chunk| chunk != trim_str); + let mut res = val; - // Trim all - if start_pos.is_none() { - output.commit_row(); - return; + while res.starts_with(trim_str) { + res = &res[trim_str.len()..]; } - - let end_pos = val.rchunks(chunk_size).position(|chunk| chunk != trim_str); - - if let (Some(start_idx), Some(end_idx)) = (start_pos, end_pos) { - output.put_slice( - &val.as_bytes()[start_idx * chunk_size..val.len() - end_idx * chunk_size], - ); + while res.ends_with(trim_str) { + res = &res[..res.len() - trim_str.len()]; } + output.put_str(res); output.commit_row(); }, ), @@ -581,7 +529,7 @@ pub fn register(registry: &mut FunctionRegistry) { ), ); } else { - (0..times).for_each(|_| output.put_slice(a)); + (0..times).for_each(|_| output.put_str(a)); } output.commit_row(); }, @@ -591,7 +539,8 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_1_arg::( "ord", |_, _| FunctionDomain::Full, - |str: &[u8], _| { + |str: &str, _| { + let str = str.as_bytes(); let mut res: u64 = 0; if !str.is_empty() { if str[0].is_ascii() { @@ -621,7 +570,6 @@ pub fn register(registry: &mut FunctionRegistry) { ), ); - const SPACE: u8 = 0x20; const MAX_SPACE_LENGTH: u64 = 1000000; registry.register_passthrough_nullable_1_arg::, StringType, _, _>( "space", @@ -633,9 +581,9 @@ pub fn register(registry: &mut FunctionRegistry) { 0, format!("space length is too big, max is: {}", MAX_SPACE_LENGTH), ); - Value::Scalar(vec![]) + Value::Scalar("".to_string()) } else { - Value::Scalar(vec![SPACE; times as usize]) + Value::Scalar(" ".repeat(times as usize)) } } ValueRef::Column(col) => { @@ -658,7 +606,7 @@ pub fn register(registry: &mut FunctionRegistry) { total_space = 0; } let col = StringColumnBuilder { - data: vec![SPACE; total_space as usize], + data: " ".repeat(total_space as usize).into_bytes(), offsets, need_estimated: false, } @@ -674,10 +622,11 @@ pub fn register(registry: &mut FunctionRegistry) { vectorize_with_builder_2_arg::, StringType>( |s, n, output, _| { let n = n as usize; - if n < s.len() { - output.put_slice(&s[0..n]); + let s_len = s.chars().count(); + if n < s_len { + output.put_str(s.slice(0..n)); } else { - output.put_slice(s); + output.put_str(s); } output.commit_row(); }, @@ -690,10 +639,11 @@ pub fn register(registry: &mut FunctionRegistry) { vectorize_with_builder_2_arg::, StringType>( |s, n, output, _| { let n = n as usize; - if n < s.len() { - output.put_slice(&s[s.len() - n..]); + let s_len = s.chars().count(); + if n < s_len { + output.put_str(s.slice(s_len - n..)); } else { - output.put_slice(s); + output.put_str(s); } output.commit_row(); }, @@ -704,82 +654,38 @@ pub fn register(registry: &mut FunctionRegistry) { "substr", |_, _, _| FunctionDomain::Full, vectorize_with_builder_2_arg::, StringType>( - |s, pos, output, _| { - output.put_slice(substr(s, pos, s.len() as u64)); - output.commit_row(); + |s, pos, output, _ctx| { + substr(output, s, pos, s.len() as u64); }, ), ); registry.register_passthrough_nullable_3_arg::, NumberType, StringType, _, _>( "substr", - |_, _, _, _| FunctionDomain::Full, - vectorize_with_builder_3_arg::, NumberType, StringType>(|s, pos, len, output, _| { - output.put_slice(substr(s, pos, len)); - output.commit_row(); - }), - ); - - registry.register_passthrough_nullable_2_arg::, StringType, _, _>( - "substr_utf8", - |_, _, _| FunctionDomain::MayThrow, - vectorize_with_builder_2_arg::, StringType>( - |s, pos, output, ctx| match std::str::from_utf8(s) { - Ok(s) => substr_utf8(output, s, pos, s.len() as u64), - Err(e) => { - ctx.set_error(output.len(), e.to_string()); - output.commit_row(); - } - }, - ), - ); - - registry.register_passthrough_nullable_3_arg::, NumberType, StringType, _, _>( - "substr_utf8", - |_, _, _, _| FunctionDomain::MayThrow, - vectorize_with_builder_3_arg::, NumberType, StringType>(|s, pos, len, output, ctx| { - match std::str::from_utf8(s) { - Ok(s) => substr_utf8(output, s, pos, len), - Err(e) => { - ctx.set_error(output.len(), e.to_string()); - output.commit_row(); - }, - } - }), - ); + |_, _, _, _| FunctionDomain::Full, + vectorize_with_builder_3_arg::, NumberType, StringType>(|s, pos, len, output, _ctx| { + substr(output, s, pos, len); + }), + ); registry .register_passthrough_nullable_2_arg::, _, _>( "split", |_, _, _| FunctionDomain::Full, vectorize_with_builder_2_arg::>( - |str, sep, output, ctx| match std::str::from_utf8(str) { - Ok(s) => match std::str::from_utf8(sep) { - Ok(sep) => { - if s == sep { - output.builder.put_slice(&[]); - output.builder.commit_row(); - } else if sep.is_empty() { - output.builder.put_slice(str); - output.builder.commit_row(); - } else { - let split = s.split(&sep); - for i in split { - output.builder.put_slice(i.as_bytes()); - output.builder.commit_row(); - } - } - output.commit_row() - } - Err(e) => { - ctx.set_error(output.len(), e.to_string()); - output.commit_row(); + |s, sep, output, _ctx| { + if s == sep { + output.builder.commit_row(); + } else if sep.is_empty() { + output.builder.put_str(s); + output.builder.commit_row(); + } else { + for v in s.split(sep) { + output.builder.put_str(v); + output.builder.commit_row(); } - }, - Err(e) => { - ctx.set_error(output.len(), e.to_string()); - output.commit_row(); } + output.commit_row(); }, ), ); @@ -789,52 +695,35 @@ pub fn register(registry: &mut FunctionRegistry) { "split_part", |_, _, _, _| FunctionDomain::Full, vectorize_with_builder_3_arg::, StringType>( - |str, sep, part, output, ctx| match std::str::from_utf8(str) { - Ok(s) => match std::str::from_utf8(sep) { - Ok(sep) => { - if s == sep { - output.commit_row() - } else if sep.is_empty() { - if part == 0 || part == 1 || part == -1 { - output.put_slice(str); + |s, sep, part, output, _| { + if sep.is_empty() { + if part == 0 || part == 1 || part == -1 { + output.put_str(s); + } + } else if s != sep { + if part < 0 { + let idx = (-part-1) as usize; + for (i, v) in s.rsplit(sep).enumerate() { + if i == idx { + output.put_str(v); + break; } - output.commit_row() + } + } else { + let idx = if part == 0 { + 0usize } else { - if part < 0 { - let split = s.rsplit(&sep); - let idx = (-part-1) as usize; - for (count, i) in split.enumerate() { - if idx == count { - output.put_slice(i.as_bytes()); - break - } - } - } else { - let split = s.split(&sep); - let idx = if part == 0 { - 0usize - } else { - (part - 1) as usize - }; - for (count, i) in split.enumerate() { - if idx == count { - output.put_slice(i.as_bytes()); - break - } - } + (part - 1) as usize + }; + for (i, v) in s.split(sep).enumerate() { + if i == idx { + output.put_str(v); + break; } - output.commit_row(); } } - Err(e) => { - ctx.set_error(output.len(), e.to_string()); - output.commit_row(); - } - }, - Err(e) => { - ctx.set_error(output.len(), e.to_string()); - output.commit_row(); } + output.commit_row(); }, ), ) @@ -844,11 +733,11 @@ pub(crate) mod soundex { use databend_common_expression::types::string::StringColumnBuilder; use databend_common_expression::EvalContext; - pub fn soundex(val: &[u8], output: &mut StringColumnBuilder, _eval_context: &mut EvalContext) { + pub fn soundex(val: &str, output: &mut StringColumnBuilder, _eval_context: &mut EvalContext) { let mut last = None; let mut count = 0; - for ch in String::from_utf8_lossy(val).chars() { + for ch in val.chars() { let score = number_map(ch); if last.is_none() { if !is_uni_alphabetic(ch) { @@ -903,30 +792,7 @@ pub(crate) mod soundex { } #[inline] -fn substr(str: &[u8], pos: i64, len: u64) -> &[u8] { - if pos > 0 && pos <= str.len() as i64 { - let l = str.len(); - let s = (pos - 1) as usize; - let mut e = len as usize + s; - if e > l { - e = l; - } - return &str[s..e]; - } - if pos < 0 && -(pos) <= str.len() as i64 { - let l = str.len(); - let s = l - -pos as usize; - let mut e = len as usize + s; - if e > l { - e = l; - } - return &str[s..e]; - } - &str[0..0] -} - -#[inline] -fn substr_utf8(builder: &mut StringColumnBuilder, str: &str, pos: i64, len: u64) { +fn substr(builder: &mut StringColumnBuilder, str: &str, pos: i64, len: u64) { if pos == 0 || len == 0 { builder.commit_row(); return; @@ -948,7 +814,7 @@ fn substr_utf8(builder: &mut StringColumnBuilder, str: &str, pos: i64, len: u64) /// String to String scalar function with estimated output column capacity. pub fn vectorize_string_to_string( estimate_bytes: impl Fn(&StringColumn) -> usize + Copy, - func: impl Fn(&[u8], &mut StringColumnBuilder, &mut EvalContext) + Copy, + func: impl Fn(&str, &mut StringColumnBuilder, &mut EvalContext) + Copy, ) -> impl Fn(ValueRef, &mut EvalContext) -> Value + Copy { move |arg1, ctx| match arg1 { ValueRef::Scalar(val) => { @@ -971,7 +837,7 @@ pub fn vectorize_string_to_string( /// (String, String) to String scalar function with estimated output column capacity. fn vectorize_string_to_string_2_arg( estimate_bytes: impl Fn(&StringColumn, &StringColumn) -> usize + Copy, - func: impl Fn(&[u8], &[u8], &mut EvalContext, &mut StringColumnBuilder) + Copy, + func: impl Fn(&str, &str, &mut EvalContext, &mut StringColumnBuilder) + Copy, ) -> impl Fn(ValueRef, ValueRef, &mut EvalContext) -> Value + Copy { move |arg1, arg2, ctx| match (arg1, arg2) { diff --git a/src/query/functions/src/scalars/string_multi_args.rs b/src/query/functions/src/scalars/string_multi_args.rs index e78020417a92..f8135f3745c8 100644 --- a/src/query/functions/src/scalars/string_multi_args.rs +++ b/src/query/functions/src/scalars/string_multi_args.rs @@ -19,8 +19,6 @@ use databend_common_expression::passthrough_nullable; use databend_common_expression::types::nullable::NullableColumn; use databend_common_expression::types::number::Int64Type; use databend_common_expression::types::number::NumberScalar; -use databend_common_expression::types::number::UInt8Type; -use databend_common_expression::types::string::StringColumn; use databend_common_expression::types::string::StringColumnBuilder; use databend_common_expression::types::string::StringDomain; use databend_common_expression::types::NumberColumn; @@ -123,9 +121,9 @@ pub fn register(registry: &mut FunctionRegistry) { for idx in 0..size { for (arg_index, arg) in args.iter().skip(1).enumerate() { if arg_index != 0 { - builder.put_slice(sep); + builder.put_str(sep); } - unsafe { builder.put_slice(arg.index_unchecked(idx)) } + builder.put_str(unsafe { arg.index_unchecked(idx) }); } builder.commit_row(); } @@ -134,11 +132,9 @@ pub fn register(registry: &mut FunctionRegistry) { for idx in 0..size { for (arg_index, arg) in args.iter().skip(1).enumerate() { if arg_index != 0 { - unsafe { - builder.put_slice(c.index_unchecked(idx)); - } + builder.put_str(unsafe { c.index_unchecked(idx) }); } - unsafe { builder.put_slice(arg.index_unchecked(idx)) } + builder.put_str(unsafe { arg.index_unchecked(idx) }); } builder.commit_row(); } @@ -197,9 +193,9 @@ pub fn register(registry: &mut FunctionRegistry) { .enumerate() { if i != 0 { - builder.put_slice(v); + builder.put_str(v); } - builder.put_slice(s); + builder.put_str(s); } builder.commit_row(); } @@ -212,16 +208,16 @@ pub fn register(registry: &mut FunctionRegistry) { unsafe { match new_args[0].index_unchecked(idx) { Some(sep) => { - for (i, str) in new_args + for (i, s) in new_args .iter() .skip(1) .filter_map(|arg| arg.index_unchecked(idx)) .enumerate() { if i != 0 { - builder.put_slice(sep); + builder.put_str(sep); } - builder.put_slice(str); + builder.put_str(s); } builder.commit_row(); validity.push(true); @@ -247,53 +243,6 @@ pub fn register(registry: &mut FunctionRegistry) { })) }); - registry.register_function_factory("char", |_, args_type| { - if args_type.is_empty() { - return None; - } - let has_null = args_type.iter().any(|t| t.is_nullable_or_null()); - let f = Function { - signature: FunctionSignature { - name: "char".to_string(), - args_type: vec![DataType::Number(NumberDataType::UInt8); args_type.len()], - return_type: DataType::String, - }, - eval: FunctionEval::Scalar { - calc_domain: Box::new(|_, _| FunctionDomain::Full), - eval: Box::new(char_fn), - }, - }; - - if has_null { - Some(Arc::new(f.passthrough_nullable())) - } else { - Some(Arc::new(f)) - } - }); - - // nullable char - registry.register_function_factory("char", |_, args_type| { - if args_type.is_empty() { - return None; - } - Some(Arc::new(Function { - signature: FunctionSignature { - name: "char".to_string(), - args_type: vec![ - DataType::Nullable(Box::new(DataType::Number( - NumberDataType::UInt8 - ))); - args_type.len() - ], - return_type: DataType::Nullable(Box::new(DataType::String)), - }, - eval: FunctionEval::Scalar { - calc_domain: Box::new(|_, _| FunctionDomain::MayThrow), - eval: Box::new(passthrough_nullable(char_fn)), - }, - })) - }); - // Notes: https://dev.mysql.com/doc/refman/8.0/en/regexp.html#function_regexp-instr registry.register_function_factory("regexp_instr", |_, args_type| { let has_null = args_type.iter().any(|t| t.is_nullable_or_null()); @@ -484,7 +433,7 @@ fn concat_fn(args: &[ValueRef], _: &mut EvalContext) -> Value let mut builder = StringColumnBuilder::with_capacity(size, 0); for idx in 0..size { for arg in &args { - unsafe { builder.put_slice(arg.index_unchecked(idx)) } + builder.put_str(unsafe { arg.index_unchecked(idx) }); } builder.commit_row(); } @@ -495,51 +444,6 @@ fn concat_fn(args: &[ValueRef], _: &mut EvalContext) -> Value } } -fn char_fn(args: &[ValueRef], _: &mut EvalContext) -> Value { - let args = args - .iter() - .map(|arg| arg.try_downcast::().unwrap()) - .collect::>(); - - let len = args.iter().find_map(|arg| match arg { - ValueRef::Column(col) => Some(col.len()), - _ => None, - }); - let input_rows = len.unwrap_or(1); - - let mut values: Vec = vec![0; input_rows * args.len()]; - let values_ptr = values.as_mut_ptr(); - - for (i, arg) in args.iter().enumerate() { - match arg { - ValueRef::Scalar(v) => { - for j in 0..input_rows { - unsafe { - *values_ptr.add(args.len() * j + i) = *v; - } - } - } - ValueRef::Column(c) => { - for (j, ch) in UInt8Type::iter_column(c).enumerate() { - unsafe { - *values_ptr.add(args.len() * j + i) = ch; - } - } - } - } - } - let offsets = (0..(input_rows + 1) as u64 * args.len() as u64) - .step_by(args.len()) - .collect::>(); - let result = StringColumn::new(values.into(), offsets.into()); - - let col = Column::String(result); - match len { - Some(_) => Value::Column(col), - _ => Value::Scalar(AnyType::index_column(&col, 0).unwrap().to_owned()), - } -} - fn regexp_instr_fn(args: &[ValueRef], ctx: &mut EvalContext) -> Value { let len = args.iter().find_map(|arg| match arg { ValueRef::Column(col) => Some(col.len()), @@ -589,7 +493,7 @@ fn regexp_instr_fn(args: &[ValueRef], ctx: &mut EvalContext) -> Value], ctx: &mut EvalContext) -> Value], ctx: &mut EvalContext) -> Value } if source.is_empty() || pat.is_empty() { - builder.data.extend_from_slice(source); + builder.put_str(source); builder.commit_row(); continue; } @@ -809,7 +716,7 @@ fn regexp_replace_fn(args: &[ValueRef], ctx: &mut EvalContext) -> Value let pos = pos.unwrap_or(1); let occur = occur.unwrap_or(0); - regexp::regexp_replace(source, re, repl, pos, occur, &mut builder.data); + regexp::regexp_replace(source, re, repl, pos, occur, &mut builder); builder.commit_row(); } match len { @@ -911,7 +818,7 @@ fn regexp_substr_fn(args: &[ValueRef], ctx: &mut EvalContext) -> Value< let substr = regexp::regexp_substr(source, re, pos, occur); match substr { Some(substr) => { - builder.put_slice(substr); + builder.put_str(substr); validity.push(true); } None => { @@ -942,33 +849,31 @@ fn regexp_substr_fn(args: &[ValueRef], ctx: &mut EvalContext) -> Value< } pub mod regexp { - use bstr::ByteSlice; - use regex::bytes::Match; - use regex::bytes::Regex; - use regex::bytes::RegexBuilder; + use databend_common_expression::types::string::StringColumnBuilder; + use regex::Regex; + use regex::RegexBuilder; #[inline] pub fn build_regexp_from_pattern( fn_name: &str, - pat: &[u8], - mt: Option<&[u8]>, + pat: &str, + mt: Option<&str>, ) -> Result { let pattern = match pat.is_empty() { true => "^$", - false => simdutf8::basic::from_utf8(pat).map_err(|e| { - format!("Unable to convert the {} pattern to string: {}", fn_name, e) - })?, + false => pat, }; + // the default match type value is 'i', if it is empty let mt = match mt { Some(mt) => { if mt.is_empty() { - "i".as_bytes() + "i" } else { mt } } - None => "i".as_bytes(), + None => "i", }; let mut builder = RegexBuilder::new(pattern); @@ -1040,115 +945,76 @@ pub mod regexp { } #[inline] - pub fn regexp_instr(s: &[u8], re: &Regex, pos: i64, occur: i64, ro: i64) -> u64 { - let pos = (pos - 1) as usize; // set the index start from 0 - - // the 'pos' position is the character index, - // so we should iterate the character to find the byte index. - let mut pos = match s.char_indices().nth(pos) { - Some((start, _, _)) => start, - None => return 0, - }; + pub fn regexp_instr( + expr: &str, + regex: &Regex, + pos: i64, + occurrence: i64, + return_option: i64, + ) -> u64 { + if let Some(m) = regex.find_iter(expr).nth((occurrence - 1) as usize) { + let find_pos = if return_option == 0 { + m.start() + } else { + m.end() + }; - let m = regexp_match_result(s, re, &mut pos, &occur); - if m.is_none() { - return 0; + let count = expr[..find_pos].chars().count() as i64; + return (count + pos) as _; } - - // the matched result is the byte index, but the 'regexp_instr' function returns the character index, - // so we should iterate the character to find the character index. - let mut instr = 0_usize; - for (p, (start, end, _)) in s.char_indices().enumerate() { - if ro == 0 { - if start == m.unwrap().start() { - instr = p + 1; - break; - } - } else if end == m.unwrap().end() { - instr = p + 2; - break; - } - } - - instr as u64 + 0 } #[inline] pub fn regexp_replace( - s: &[u8], + s: &str, re: &Regex, - repl: &[u8], + repl: &str, pos: i64, occur: i64, - buf: &mut Vec, + builder: &mut StringColumnBuilder, ) { let pos = (pos - 1) as usize; // set the index start from 0 - // the 'pos' position is the character index, // so we should iterate the character to find the byte index. - let mut pos = match s.char_indices().nth(pos) { - Some((start, _, _)) => start, + let char_pos = match s.char_indices().nth(pos) { + Some((start, _)) => start, None => { - buf.extend_from_slice(s); + builder.put_str(s); return; } }; - let m = regexp_match_result(s, re, &mut pos, &occur); - if m.is_none() { - buf.extend_from_slice(s); - return; - } - - buf.extend_from_slice(&s[..m.unwrap().start()]); + let (before_trimmed, trimmed) = (&s[..char_pos], &s[char_pos..]); + builder.put_str(before_trimmed); + // means we should replace all matched strings if occur == 0 { - let s = &s[m.unwrap().start()..]; - buf.extend_from_slice(&re.replace_all(s, repl)); + builder.put_str(&re.replace_all(trimmed, repl)); + } else if let Some(capature) = re.captures_iter(trimmed).nth((occur - 1) as _) { + // unwrap on 0 is OK because captures only reports matches. + let m = capature.get(0).unwrap(); + builder.put_str(&trimmed[0..m.start()]); + builder.put_str(repl); + builder.put_str(&trimmed[m.end()..]); } else { - buf.extend_from_slice(repl); - buf.extend_from_slice(&s[m.unwrap().end()..]) + builder.put_str(trimmed); } } #[inline] - pub fn regexp_substr<'a>(s: &'a [u8], re: &Regex, pos: i64, occur: i64) -> Option<&'a [u8]> { + pub fn regexp_substr<'a>(s: &'a str, re: &Regex, pos: i64, occur: i64) -> Option<&'a str> { let occur = if occur < 1 { 1 } else { occur }; let pos = if pos < 1 { 0 } else { (pos - 1) as usize }; // the 'pos' position is the character index, // so we should iterate the character to find the byte index. - let mut pos = match s.char_indices().nth(pos) { - Some((start, _, _)) => start, + let char_pos = match s.char_indices().nth(pos) { + Some((start, _)) => start, None => return None, }; - let m = regexp_match_result(s, re, &mut pos, &occur); - - m.map(|m| m.as_bytes()) - } - - #[inline] - fn regexp_match_result<'a>( - s: &'a [u8], - re: &Regex, - pos: &mut usize, - occur: &i64, - ) -> Option> { - let mut i = 1_i64; - let m = loop { - let m = re.find_at(s, *pos); - if i >= *occur || m.is_none() { - break m; - } - - i += 1; - if let Some(m) = m { - // set the start position of 'find_at' function to the position following the matched substring - *pos = m.end(); - } - }; - - m + let m = re.find_iter(&s[char_pos..]).nth((occur - 1) as _); + m.map(|m| m.as_str()) } } diff --git a/src/query/functions/src/scalars/variant.rs b/src/query/functions/src/scalars/variant.rs index bf2bd559026a..850616b3782f 100644 --- a/src/query/functions/src/scalars/variant.rs +++ b/src/query/functions/src/scalars/variant.rs @@ -135,7 +135,7 @@ pub fn register(registry: &mut FunctionRegistry) { return; } } - match parse_value(s) { + match parse_value(s.as_bytes()) { Ok(value) => { value.write_to_vec(&mut output.data); } @@ -180,7 +180,7 @@ pub fn register(registry: &mut FunctionRegistry) { return; } } - match parse_value(s) { + match parse_value(s.as_bytes()) { Ok(value) => { output.validity.push(true); value.write_to_vec(&mut output.builder.data); @@ -205,7 +205,7 @@ pub fn register(registry: &mut FunctionRegistry) { let val = to_string(s); match parse_value(val.as_bytes()) { Ok(_) => output.push_null(), - Err(e) => output.push(e.to_string().as_bytes()), + Err(e) => output.push(&e.to_string()), } }), ); @@ -220,9 +220,9 @@ pub fn register(registry: &mut FunctionRegistry) { return; } } - match parse_value(s) { + match parse_value(s.as_bytes()) { Ok(_) => output.push_null(), - Err(e) => output.push(e.to_string().as_bytes()), + Err(e) => output.push(&e.to_string()), } }), ); @@ -291,7 +291,7 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_combine_nullable_2_arg::( "get", - |_, _, _| FunctionDomain::MayThrow, + |_, _, _| FunctionDomain::Full, vectorize_with_builder_2_arg::>( |val, name, output, ctx| { if let Some(validity) = &ctx.validity { @@ -300,24 +300,11 @@ pub fn register(registry: &mut FunctionRegistry) { return; } } - match std::str::from_utf8(name) { - Ok(name) => match get_by_name(val, name, false) { - Some(v) => { - output.push(&v); - } - None => { - output.push_null(); - } - }, - Err(err) => { - ctx.set_error( - output.len(), - format!( - "Unable convert name '{}' to string: {}", - &String::from_utf8_lossy(name), - err - ), - ); + match get_by_name(val, name, false) { + Some(v) => { + output.push(&v); + } + None => { output.push_null(); } } @@ -354,7 +341,7 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_combine_nullable_2_arg::( "get_ignore_case", - |_, _, _| FunctionDomain::MayThrow, + |_, _, _| FunctionDomain::Full, vectorize_with_builder_2_arg::>( |val, name, output, ctx| { if let Some(validity) = &ctx.validity { @@ -363,22 +350,9 @@ pub fn register(registry: &mut FunctionRegistry) { return; } } - match std::str::from_utf8(name) { - Ok(name) => match get_by_name(val, name, true) { - Some(v) => output.push(&v), - None => output.push_null(), - }, - Err(err) => { - ctx.set_error( - output.len(), - format!( - "Unable convert name '{}' to string: {}", - &String::from_utf8_lossy(name), - err - ), - ); - output.push_null(); - } + match get_by_name(val, name, true) { + Some(v) => output.push(&v), + None => output.push_null(), } }, ), @@ -386,7 +360,7 @@ pub fn register(registry: &mut FunctionRegistry) { registry.register_combine_nullable_2_arg::( "get_string", - |_, _, _| FunctionDomain::MayThrow, + |_, _, _| FunctionDomain::Full, vectorize_with_builder_2_arg::>( |val, name, output, ctx| { if let Some(validity) = &ctx.validity { @@ -395,27 +369,12 @@ pub fn register(registry: &mut FunctionRegistry) { return; } } - match std::str::from_utf8(name) { - Ok(name) => match get_by_name(val, name, false) { - Some(v) => { - let json_str = cast_to_string(&v); - output.builder.put_str(&json_str); - output.builder.commit_row(); - output.validity.push(true); - } - None => output.push_null(), - }, - Err(err) => { - ctx.set_error( - output.len(), - format!( - "Unable convert name '{}' to string: {}", - &String::from_utf8_lossy(name), - err - ), - ); - output.push_null(); + match get_by_name(val, name, false) { + Some(v) => { + let json_str = cast_to_string(&v); + output.push(&json_str); } + None => output.push_null(), } }, ), @@ -438,9 +397,7 @@ pub fn register(registry: &mut FunctionRegistry) { match get_by_index(val, idx as usize) { Some(v) => { let json_str = cast_to_string(&v); - output.builder.put_str(&json_str); - output.builder.commit_row(); - output.validity.push(true); + output.push(&json_str); } None => { output.push_null(); @@ -462,7 +419,7 @@ pub fn register(registry: &mut FunctionRegistry) { return; } } - match parse_json_path(path) { + match parse_json_path(path.as_bytes()) { Ok(json_path) => { get_by_path_array( val, @@ -477,10 +434,7 @@ pub fn register(registry: &mut FunctionRegistry) { } } Err(_) => { - ctx.set_error( - output.len(), - format!("Invalid JSON Path '{}'", &String::from_utf8_lossy(path),), - ); + ctx.set_error(output.len(), format!("Invalid JSON Path '{path}'")); output.push_null(); } } @@ -499,7 +453,7 @@ pub fn register(registry: &mut FunctionRegistry) { return; } } - match parse_json_path(path) { + match parse_json_path(path.as_bytes()) { Ok(json_path) => { get_by_path_first( val, @@ -514,10 +468,7 @@ pub fn register(registry: &mut FunctionRegistry) { } } Err(_) => { - ctx.set_error( - output.len(), - format!("Invalid JSON Path '{}'", &String::from_utf8_lossy(path),), - ); + ctx.set_error(output.len(), format!("Invalid JSON Path '{path}'")); output.push_null(); } } @@ -584,7 +535,7 @@ pub fn register(registry: &mut FunctionRegistry) { return; } } - match parse_json_path(path) { + match parse_json_path(path.as_bytes()) { Ok(json_path) => { get_by_path( val, @@ -599,10 +550,7 @@ pub fn register(registry: &mut FunctionRegistry) { } } Err(_) => { - ctx.set_error( - output.len(), - format!("Invalid JSON Path '{}'", &String::from_utf8_lossy(path),), - ); + ctx.set_error(output.len(), format!("Invalid JSON Path '{path}'")); output.push_null(); } } @@ -621,11 +569,11 @@ pub fn register(registry: &mut FunctionRegistry) { return; } } - match parse_value(s) { + match parse_value(s.as_bytes()) { Ok(val) => { let mut buf = Vec::new(); val.write_to_vec(&mut buf); - match parse_json_path(path) { + match parse_json_path(path.as_bytes()) { Ok(json_path) => { let mut out_buf = Vec::new(); let mut out_offsets = Vec::new(); @@ -634,19 +582,11 @@ pub fn register(registry: &mut FunctionRegistry) { output.push_null(); } else { let json_str = cast_to_string(&out_buf); - output.builder.put_str(&json_str); - output.builder.commit_row(); - output.validity.push(true); + output.push(&json_str); } } Err(_) => { - ctx.set_error( - output.len(), - format!( - "Invalid JSON Path '{}'", - &String::from_utf8_lossy(path), - ), - ); + ctx.set_error(output.len(), format!("Invalid JSON Path '{path}'")); output.push_null(); } } @@ -722,7 +662,7 @@ pub fn register(registry: &mut FunctionRegistry) { } } match as_str(v) { - Some(val) => output.push(val.as_bytes()), + Some(val) => output.push(&val), None => output.push_null(), } }), @@ -873,9 +813,7 @@ pub fn register(registry: &mut FunctionRegistry) { } } let json_str = cast_to_string(val); - output.builder.put_str(&json_str); - output.builder.commit_row(); - output.validity.push(true); + output.push(&json_str); }, ), ); @@ -1069,7 +1007,7 @@ pub fn register(registry: &mut FunctionRegistry) { } } let s = to_pretty_string(val); - output.put_slice(s.as_bytes()); + output.put_str(&s); output.commit_row(); }), ); @@ -1247,7 +1185,7 @@ pub fn register(registry: &mut FunctionRegistry) { return; } } - let result = exists_any_keys(val, keys.iter()); + let result = exists_any_keys(val, keys.iter().map(|k| k.as_bytes())); output.push(result); }, ), @@ -1264,7 +1202,7 @@ pub fn register(registry: &mut FunctionRegistry) { return; } } - let result = exists_all_keys(val, keys.iter()); + let result = exists_all_keys(val, keys.iter().map(|k| k.as_bytes())); output.push(result); }, ), @@ -1281,7 +1219,7 @@ pub fn register(registry: &mut FunctionRegistry) { return; } } - let result = exists_all_keys(val, once(key)); + let result = exists_all_keys(val, once(key.as_bytes())); output.push(result); }, ), @@ -1351,7 +1289,7 @@ fn json_object_impl_fn( continue; } let key = match k { - ScalarRef::String(v) => unsafe { String::from_utf8_unchecked(v.to_vec()) }, + ScalarRef::String(v) => v, _ => { has_err = true; ctx.set_error(builder.len(), "Key must be a string value"); @@ -1363,7 +1301,7 @@ fn json_object_impl_fn( ctx.set_error(builder.len(), "Keys have to be unique"); break; } - set.insert(key.clone()); + set.insert(key); let mut val = vec![]; cast_scalar_to_variant(v, ctx.func_ctx.tz, &mut val); kvs.push((key, val)); @@ -1413,7 +1351,7 @@ fn get_by_keypath_fn( string_res: bool, ) -> Value { let scalar_keypath = match &args[1] { - ValueRef::Scalar(ScalarRef::String(v)) => Some(parse_key_paths(v)), + ValueRef::Scalar(ScalarRef::String(v)) => Some(parse_key_paths(v.as_bytes())), _ => None, }; let len_opt = args.iter().find_map(|arg| match arg { @@ -1436,7 +1374,7 @@ fn get_by_keypath_fn( ValueRef::Column(col) => { let scalar = unsafe { col.index_unchecked(idx) }; let path = match scalar { - ScalarRef::String(buf) => Some(parse_key_paths(buf)), + ScalarRef::String(buf) => Some(parse_key_paths(buf.as_bytes())), _ => None, }; Cow::Owned(path) @@ -1507,8 +1445,7 @@ where { let scalar_jsonpath = match &args[1] { ValueRef::Scalar(ScalarRef::String(v)) => { - let res = parse_json_path(v) - .map_err(|_| format!("Invalid JSON Path '{}'", &String::from_utf8_lossy(v))); + let res = parse_json_path(v.as_bytes()).map_err(|_| format!("Invalid JSON Path '{v}'")); Some(res) } _ => None, @@ -1530,9 +1467,8 @@ where let scalar = unsafe { col.index_unchecked(idx) }; match scalar { ScalarRef::String(buf) => { - let res = parse_json_path(buf).map_err(|_| { - format!("Invalid JSON Path '{}'", &String::from_utf8_lossy(buf)) - }); + let res = parse_json_path(buf.as_bytes()) + .map_err(|_| format!("Invalid JSON Path '{buf}'")); Some(res) } _ => None, diff --git a/src/query/functions/src/scalars/vector.rs b/src/query/functions/src/scalars/vector.rs index aa3d65a19fed..b551f2539bf3 100644 --- a/src/query/functions/src/scalars/vector.rs +++ b/src/query/functions/src/scalars/vector.rs @@ -141,17 +141,6 @@ pub fn register(registry: &mut FunctionRegistry) { } } - let data = match std::str::from_utf8(data) { - Ok(data) => data, - Err(_) => { - ctx.set_error( - output.len(), - format!("Invalid data: {:?}", String::from_utf8_lossy(data)), - ); - output.push(vec![F32::from(0.0)].into()); - return; - } - }; if ctx.func_ctx.openai_api_key.is_empty() { ctx.set_error(output.len(), "openai_api_key is empty".to_string()); output.push(vec![F32::from(0.0)].into()); @@ -205,18 +194,6 @@ pub fn register(registry: &mut FunctionRegistry) { } } - let data = match std::str::from_utf8(data) { - Ok(data) => data, - Err(_) => { - ctx.set_error( - output.len(), - format!("Invalid data: {:?}", String::from_utf8_lossy(data)), - ); - output.put_str(""); - output.commit_row(); - return; - } - }; if ctx.func_ctx.openai_api_key.is_empty() { ctx.set_error(output.len(), "openai_api_key is empty".to_string()); output.put_str(""); diff --git a/src/query/functions/src/srfs/variant.rs b/src/query/functions/src/srfs/variant.rs index 6b6c5c3b1ccf..38a9e2cb261e 100644 --- a/src/query/functions/src/srfs/variant.rs +++ b/src/query/functions/src/srfs/variant.rs @@ -78,41 +78,38 @@ pub fn register(registry: &mut FunctionRegistry) { let path_arg = args[1].clone().to_owned(); let mut results = Vec::with_capacity(ctx.num_rows); match path_arg { - Value::Scalar(Scalar::String(path)) => match parse_json_path(&path) { - Ok(json_path) => { - let selector = Selector::new(json_path, SelectorMode::All); - for (row, max_nums_per_row) in - max_nums_per_row.iter_mut().enumerate().take(ctx.num_rows) - { - let val = unsafe { val_arg.index_unchecked(row) }; - let mut builder = BinaryColumnBuilder::with_capacity(0, 0); - if let ScalarRef::Variant(val) = val { - selector.select( - val, - &mut builder.data, - &mut builder.offsets, - ); + Value::Scalar(Scalar::String(path)) => { + match parse_json_path(path.as_bytes()) { + Ok(json_path) => { + let selector = Selector::new(json_path, SelectorMode::All); + for (row, max_nums_per_row) in + max_nums_per_row.iter_mut().enumerate().take(ctx.num_rows) + { + let val = unsafe { val_arg.index_unchecked(row) }; + let mut builder = BinaryColumnBuilder::with_capacity(0, 0); + if let ScalarRef::Variant(val) = val { + selector.select( + val, + &mut builder.data, + &mut builder.offsets, + ); + } + let array = + Column::Variant(builder.build()).wrap_nullable(None); + let array_len = array.len(); + *max_nums_per_row = + std::cmp::max(*max_nums_per_row, array_len); + results.push(( + Value::Column(Column::Tuple(vec![array])), + array_len, + )); } - let array = - Column::Variant(builder.build()).wrap_nullable(None); - let array_len = array.len(); - *max_nums_per_row = std::cmp::max(*max_nums_per_row, array_len); - results.push(( - Value::Column(Column::Tuple(vec![array])), - array_len, - )); + } + Err(_) => { + ctx.set_error(0, format!("Invalid JSON Path '{}'", &path,)); } } - Err(_) => { - ctx.set_error( - 0, - format!( - "Invalid JSON Path '{}'", - &String::from_utf8_lossy(&path), - ), - ); - } - }, + } _ => { for (row, max_nums_per_row) in max_nums_per_row.iter_mut().enumerate().take(ctx.num_rows) @@ -121,7 +118,7 @@ pub fn register(registry: &mut FunctionRegistry) { let path = unsafe { path_arg.index_unchecked(row) }; let mut builder = BinaryColumnBuilder::with_capacity(0, 0); if let ScalarRef::String(path) = path { - match parse_json_path(path) { + match parse_json_path(path.as_bytes()) { Ok(json_path) => { if let ScalarRef::Variant(val) = val { let selector = @@ -136,10 +133,7 @@ pub fn register(registry: &mut FunctionRegistry) { Err(_) => { ctx.set_error( row, - format!( - "Invalid JSON Path '{}'", - &String::from_utf8_lossy(path), - ), + format!("Invalid JSON Path '{}'", &path,), ); break; } @@ -297,23 +291,18 @@ pub fn register(registry: &mut FunctionRegistry) { if args.len() >= 2 { match &args[1] { - ValueRef::Scalar(ScalarRef::String(v)) => match parse_json_path(v) { - Ok(jsonpath) => { - let path = unsafe { std::str::from_utf8_unchecked(v) }; - let selector = Selector::new(jsonpath, SelectorMode::First); - json_path = Some((path, selector)); - } - Err(_) => { - ctx.set_error( - 0, - format!( - "Invalid JSON Path {:?}", - String::from_utf8_lossy(v) - ), - ); - return results; + ValueRef::Scalar(ScalarRef::String(v)) => { + match parse_json_path(v.as_bytes()) { + Ok(jsonpath) => { + let selector = Selector::new(jsonpath, SelectorMode::First); + json_path = Some((v, selector)); + } + Err(_) => { + ctx.set_error(0, format!("Invalid JSON Path {v:?}",)); + return results; + } } - }, + } ValueRef::Column(_) => { ctx.set_error( 0, @@ -360,30 +349,18 @@ pub fn register(registry: &mut FunctionRegistry) { if args.len() >= 5 { match args[4] { ValueRef::Scalar(ScalarRef::String(v)) => { - match String::from_utf8(v.to_vec()) { - Ok(val) => match val.to_lowercase().as_str() { - "object" => { - mode = FlattenMode::Object; - } - "array" => { - mode = FlattenMode::Array; - } - "both" => { - mode = FlattenMode::Both; - } - _ => { - ctx.set_error(0, format!("Invalid mode {:?}", val)); - return results; - } - }, - Err(_) => { - ctx.set_error( - 0, - format!( - "Invalid mode string {:?}", - String::from_utf8_lossy(v) - ), - ); + match v.to_lowercase().as_str() { + "object" => { + mode = FlattenMode::Object; + } + "array" => { + mode = FlattenMode::Array; + } + "both" => { + mode = FlattenMode::Both; + } + _ => { + ctx.set_error(0, format!("Invalid mode {v:?}")); return results; } } @@ -491,7 +468,7 @@ fn unnest_variant_obj( max_nums_per_row[row] = std::cmp::max(max_nums_per_row[row], len); for (key, val) in vals { - key_builder.put_slice(&key); + key_builder.put_str(&String::from_utf8_lossy(&key)); key_builder.commit_row(); val_builder.put_slice(&val); val_builder.commit_row(); @@ -615,7 +592,7 @@ impl FlattenGenerator { key_builder.push_null(); } if let Some(path_builder) = path_builder { - path_builder.put_slice(inner_path.as_bytes()); + path_builder.put_str(&inner_path); path_builder.commit_row(); } if let Some(index_builder) = index_builder { @@ -672,10 +649,10 @@ impl FlattenGenerator { }; if let Some(key_builder) = key_builder { - key_builder.push(name.as_bytes()); + key_builder.push(name.as_ref()); } if let Some(path_builder) = path_builder { - path_builder.put_slice(inner_path.as_bytes()); + path_builder.put_str(&inner_path); path_builder.commit_row(); } if let Some(index_builder) = index_builder { @@ -768,13 +745,13 @@ impl FlattenGenerator { let key_column = if let Some(key_builder) = key_builder { NullableType::::upcast_column(key_builder.build()) } else { - StringType::upcast_column(StringColumnBuilder::repeat(&[], rows).build()) + StringType::upcast_column(StringColumnBuilder::repeat("", rows).build()) .wrap_nullable(None) }; let path_column = if let Some(path_builder) = path_builder { StringType::upcast_column(path_builder.build()).wrap_nullable(None) } else { - StringType::upcast_column(StringColumnBuilder::repeat(&[], rows).build()) + StringType::upcast_column(StringColumnBuilder::repeat("", rows).build()) .wrap_nullable(None) }; let index_column = if let Some(index_builder) = index_builder { diff --git a/src/query/functions/tests/it/aggregates/agg_hashtable.rs b/src/query/functions/tests/it/aggregates/agg_hashtable.rs index 3c1d98807e9d..4e92c0ef2f8b 100644 --- a/src/query/functions/tests/it/aggregates/agg_hashtable.rs +++ b/src/query/functions/tests/it/aggregates/agg_hashtable.rs @@ -57,11 +57,7 @@ fn test_agg_hashtable() { let m: usize = 4; for n in [100, 1000, 10_000, 100_000] { let columns = vec![ - StringType::from_data( - (0..n) - .map(|x| format!("{}", x % m).as_bytes().to_vec()) - .collect_vec(), - ), + StringType::from_data((0..n).map(|x| format!("{}", x % m)).collect_vec()), Int64Type::from_data((0..n).map(|x| (x % m) as i64).collect_vec()), Int32Type::from_data((0..n).map(|x| (x % m) as i32).collect_vec()), Int16Type::from_data((0..n).map(|x| (x % m) as i16).collect_vec()), diff --git a/src/query/functions/tests/it/scalars/array.rs b/src/query/functions/tests/it/scalars/array.rs index e83608c4511b..10fbc8f5daf9 100644 --- a/src/query/functions/tests/it/scalars/array.rs +++ b/src/query/functions/tests/it/scalars/array.rs @@ -155,6 +155,10 @@ fn test_contains(file: &mut impl Write) { let columns = [ ("int8_col", Int8Type::from_data(vec![1i8, 2, 7, 8])), + ( + "string_col", + StringType::from_data(vec![r#"1"#, r#"2"#, r#"5"#, r#"1234"#]), + ), ( "nullable_col", Int64Type::from_data_with_validity(vec![9i64, 10, 11, 12], vec![ @@ -164,6 +168,20 @@ fn test_contains(file: &mut impl Write) { ]; run_ast(file, "int8_col not in (1, 2, 3, 4, 5, null)", &columns); + run_ast( + file, + "contains(['5000', '6000', '7000'], string_col)", + &columns, + ); + + run_ast(file, "contains(['1', '5'], string_col)", &columns); + + run_ast( + file, + "contains(['15000', '6000', '7000'], string_col)", + &columns, + ); + run_ast(file, "contains([1,2,null], nullable_col)", &columns); run_ast( file, diff --git a/src/query/functions/tests/it/scalars/comparison.rs b/src/query/functions/tests/it/scalars/comparison.rs index c93cc6046858..bb0f85c3ff17 100644 --- a/src/query/functions/tests/it/scalars/comparison.rs +++ b/src/query/functions/tests/it/scalars/comparison.rs @@ -295,6 +295,8 @@ fn test_gt(file: &mut impl Write) { ]; run_ast(file, "parse_json(lhs) > parse_json(rhs)", &table); run_ast(file, "lhs > rhs", &table); + let table = [("col", StringType::from_data(vec![r#"bcd"#, r#"efg"#]))]; + run_ast(file, "col > 'efg'", &table); } fn test_gte(file: &mut impl Write) { diff --git a/src/query/functions/tests/it/scalars/parser.rs b/src/query/functions/tests/it/scalars/parser.rs index 2f0c898b5800..db8934468c20 100644 --- a/src/query/functions/tests/it/scalars/parser.rs +++ b/src/query/functions/tests/it/scalars/parser.rs @@ -354,7 +354,7 @@ pub fn transform_expr(ast: AExpr, columns: &[(&str, DataType)]) -> RawExpr { transform_expr(*expr, columns), RawExpr::Constant { span, - scalar: Scalar::String(key.name.into_bytes()), + scalar: Scalar::String(key.name), }, ]), MapAccessor::DotNumber { key } => { @@ -567,7 +567,7 @@ pub fn transform_literal(lit: ASTLiteral) -> Scalar { precision, scale, })), - ASTLiteral::String(s) => Scalar::String(s.as_bytes().to_vec()), + ASTLiteral::String(s) => Scalar::String(s), ASTLiteral::Boolean(b) => Scalar::Boolean(b), ASTLiteral::Null => Scalar::Null, ASTLiteral::Float64(f) => Scalar::Number(NumberScalar::Float64(OrderedFloat(f))), diff --git a/src/query/functions/tests/it/scalars/string.rs b/src/query/functions/tests/it/scalars/string.rs index 089e77e093bc..e5909df54f09 100644 --- a/src/query/functions/tests/it/scalars/string.rs +++ b/src/query/functions/tests/it/scalars/string.rs @@ -86,6 +86,10 @@ fn test_lower(file: &mut impl Write) { fn test_bit_length(file: &mut impl Write) { run_ast(file, "bit_length('latin')", &[]); + run_ast(file, "bit_length('CAFÉ')", &[]); + run_ast(file, "bit_length('数据库')", &[]); + run_ast(file, "bit_length('НОЧЬ НА ОКРАИНЕ МОСКВЫ')", &[]); + run_ast(file, "bit_length('قاعدة البيانات')", &[]); run_ast(file, "bit_length(NULL)", &[]); run_ast(file, "bit_length(a)", &[( "a", @@ -95,8 +99,12 @@ fn test_bit_length(file: &mut impl Write) { fn test_octet_length(file: &mut impl Write) { run_ast(file, "octet_length('latin')", &[]); + run_ast(file, "octet_length('CAFÉ')", &[]); + run_ast(file, "octet_length('数据库')", &[]); + run_ast(file, "octet_length('НОЧЬ НА ОКРАИНЕ МОСКВЫ')", &[]); + run_ast(file, "octet_length('قاعدة البيانات')", &[]); run_ast(file, "octet_length(NULL)", &[]); - run_ast(file, "length(a)", &[( + run_ast(file, "octet_length(a)", &[( "a", StringType::from_data(vec!["latin", "кириллица", "кириллица and latin"]), )]); @@ -104,6 +112,10 @@ fn test_octet_length(file: &mut impl Write) { fn test_char_length(file: &mut impl Write) { run_ast(file, "char_length('latin')", &[]); + run_ast(file, "char_length('CAFÉ')", &[]); + run_ast(file, "char_length('数据库')", &[]); + run_ast(file, "char_length('НОЧЬ НА ОКРАИНЕ МОСКВЫ')", &[]); + run_ast(file, "char_length('قاعدة البيانات')", &[]); run_ast(file, "char_length(NULL)", &[]); run_ast(file, "character_length(a)", &[( "a", @@ -162,6 +174,10 @@ fn test_ascii(file: &mut impl Write) { fn test_ltrim(file: &mut impl Write) { run_ast(file, "ltrim(' abc ')", &[]); run_ast(file, "ltrim(' ')", &[]); + run_ast(file, "ltrim(' 你 好 ')", &[]); + run_ast(file, "ltrim(' 분산 데이터베이스 ')", &[]); + run_ast(file, "ltrim(' あなたのことが好きです ')", &[ + ]); run_ast(file, "ltrim(NULL)", &[]); run_ast(file, "ltrim(a)", &[( "a", @@ -172,6 +188,10 @@ fn test_ltrim(file: &mut impl Write) { fn test_rtrim(file: &mut impl Write) { run_ast(file, "rtrim(' abc ')", &[]); run_ast(file, "rtrim(' ')", &[]); + run_ast(file, "rtrim(' 你 好 ')", &[]); + run_ast(file, "rtrim(' 분산 데이터베이스 ')", &[]); + run_ast(file, "rtrim(' あなたのことが好きです ')", &[ + ]); run_ast(file, "rtrim(NULL)", &[]); run_ast(file, "rtrim(a)", &[( "a", @@ -187,6 +207,12 @@ fn test_trim_leading(file: &mut impl Write) { run_ast(file, "trim_leading(NULL, 'a')", &[]); run_ast(file, "trim_leading('aaaaaaaa', NULL)", &[]); run_ast(file, "trim_leading('aaaaaaaa', '')", &[]); + run_ast(file, "trim_leading('분산 데이터베이스', '분산 ')", &[]); + run_ast( + file, + "trim_leading('あなたのことが好きです', 'あなたの')", + &[], + ); let table = [ ( @@ -209,6 +235,12 @@ fn test_trim_trailing(file: &mut impl Write) { run_ast(file, "trim_trailing(NULL, 'a')", &[]); run_ast(file, "trim_trailing('aaaaaaaa', NULL)", &[]); run_ast(file, "trim_trailing('aaaaaaaa', '')", &[]); + run_ast(file, "trim_trailing('분산 데이터베이스', '베이스')", &[]); + run_ast( + file, + "trim_trailing('あなたのことが好きです', '好きです')", + &[], + ); let table = [ ( @@ -231,6 +263,10 @@ fn test_trim_both(file: &mut impl Write) { run_ast(file, "trim_both(NULL, 'a')", &[]); run_ast(file, "trim_both('aaaaaaaa', NULL)", &[]); run_ast(file, "trim_both('aaaaaaaa', '')", &[]); + run_ast(file, "trim_both(' 你 好 ', ' ')", &[]); + run_ast(file, "trim_both(' 분산 데이터베이스 ', ' ')", &[ + ]); + run_ast(file, "trim_both(' あなたのことが好きです ', ' ')", &[]); let table = [ ( @@ -313,6 +349,9 @@ fn test_trim(file: &mut impl Write) { // TRIM() run_ast(file, "trim(' abc ')", &[]); run_ast(file, "trim(' ')", &[]); + run_ast(file, "trim(' 你 好 ')", &[]); + run_ast(file, "trim(' 분산 데이터베이스 ')", &[]); + run_ast(file, "trim(' あなたのことが好きです ')", &[]); run_ast(file, "trim(NULL)", &[]); run_ast(file, "trim(a)", &[( "a", @@ -328,6 +367,11 @@ fn test_trim(file: &mut impl Write) { fn test_concat(file: &mut impl Write) { run_ast(file, "concat('5', '3', '4')", &[]); run_ast(file, "concat(NULL, '3', '4')", &[]); + run_ast( + file, + "concat('忠犬ハチ公', 'CAFÉ', '数据库', 'قاعدة البيانات', 'НОЧЬ НА ОКРАИНЕ МОСКВЫ')", + &[], + ); run_ast(file, "concat(a, '3', '4', '5')", &[( "a", StringType::from_data(vec!["abc", " abc", " abc ", "abc "]), @@ -342,6 +386,11 @@ fn test_concat(file: &mut impl Write) { run_ast(file, "concat_ws('-', '3', null, '4', null, '5')", &[]); run_ast(file, "concat_ws(NULL, '3', '4')", &[]); + run_ast( + file, + "concat_ws(',', '忠犬ハチ公', 'CAFÉ', '数据库', 'قاعدة البيانات', 'НОЧЬ НА ОКРАИНЕ МОСКВЫ')", + &[], + ); run_ast(file, "concat_ws(a, '3', '4', '5')", &[( "a", StringType::from_data(vec![",", "-", ",", "-"]), @@ -498,8 +547,8 @@ fn test_strcmp(file: &mut impl Write) { run_ast(file, "strcmp('hii', 'hii')", &[]); let table = [ - ("a", StringType::from_data(vec!["hi", "test", "cc"])), - ("b", StringType::from_data(vec!["i", "test", "ccb"])), + ("a", StringType::from_data(vec!["i", "h", "test", "cc"])), + ("b", StringType::from_data(vec!["hi", "hi", "test", "ccb"])), ]; run_ast(file, "strcmp(a, b)", &table); } @@ -508,6 +557,7 @@ fn test_locate(file: &mut impl Write) { run_ast(file, "locate('bar', 'foobarbar')", &[]); run_ast(file, "locate('', 'foobarbar')", &[]); run_ast(file, "locate('', '')", &[]); + run_ast(file, "locate('好世', '你好世界')", &[]); run_ast(file, "instr('foobarbar', 'bar')", &[]); run_ast(file, "instr('foobarbar', '')", &[]); run_ast(file, "instr('', '')", &[]); @@ -516,6 +566,7 @@ fn test_locate(file: &mut impl Write) { run_ast(file, "position('' IN '')", &[]); run_ast(file, "position('foobarbar' IN 'bar')", &[]); run_ast(file, "locate('bar', 'foobarbar', 5)", &[]); + run_ast(file, "locate('好世', '你好世界', 1)", &[]); let table = [ ("a", StringType::from_data(vec!["bar", "cc", "cc", "q"])), @@ -570,6 +621,8 @@ fn test_ord(file: &mut impl Write) { fn test_repeat(file: &mut impl Write) { run_ast(file, "repeat('3', NULL)", &[]); run_ast(file, "repeat('3', 5)", &[]); + run_ast(file, "repeat('你好世界', 3)", &[]); + run_ast(file, "repeat('こんにちは', 2)", &[]); run_ast(file, "repeat('3', 1000001)", &[]); let table = [("a", StringType::from_data(vec!["a", "b", "c"]))]; run_ast(file, "repeat(a, 3)", &table); @@ -584,6 +637,7 @@ fn test_insert(file: &mut impl Write) { run_ast(file, "insert('Quadratic', 3, 100, NULL)", &[]); run_ast(file, "insert('Quadratic', 3, NULL, 'NULL')", &[]); run_ast(file, "insert('Quadratic', NULL, 100, 'NULL')", &[]); + run_ast(file, "insert('你好世界', 1, 2, 'こんにちは')", &[]); run_ast(file, "insert(NULL, 2, 100, 'NULL')", &[]); let table = [ @@ -653,6 +707,8 @@ fn test_substr(file: &mut impl Write) { run_ast(file, "substr('Sakila' from -4 for 2)", &[]); run_ast(file, "substr('sakila' FROM -4)", &[]); run_ast(file, "substr('abc',2)", &[]); + run_ast(file, "substr('你好世界', 3)", &[]); + run_ast(file, "substr('こんにちは', 2)", &[]); run_ast(file, "substr('abc', pos, len)", &[ ( "pos", @@ -675,6 +731,8 @@ fn test_split(file: &mut impl Write) { run_ast(file, "split('Sakila', 'il')", &[]); run_ast(file, "split('sakila', 'a')", &[]); run_ast(file, "split('abc','b')", &[]); + run_ast(file, "split('你好世界', '好')", &[]); + run_ast(file, "split('こんにちは', 'に')", &[]); run_ast(file, "split(str, sep)", &[ ( "str", diff --git a/src/query/functions/tests/it/scalars/testdata/arithmetic.txt b/src/query/functions/tests/it/scalars/testdata/arithmetic.txt index fc61530bf044..2f506b6dfdab 100644 --- a/src/query/functions/tests/it/scalars/testdata/arithmetic.txt +++ b/src/query/functions/tests/it/scalars/testdata/arithmetic.txt @@ -69,8 +69,7 @@ evaluation (internal): ast : c + 0.5 raw expr : plus(c::UInt32, 0.5) -checked expr : plus(to_decimal(12, 1)(c), to_decimal(12, 1)(0.5_d128(1,1))) -optimized expr : plus(to_decimal(12, 1)(c), 0.5_d128(12,1)) +checked expr : plus(c, 0.5_d128(1,1)) evaluation: +--------+-----------+----------------+ | | c | Output | @@ -138,7 +137,7 @@ evaluation (internal): ast : c + e raw expr : plus(c::UInt32, e::Decimal(10, 1)) -checked expr : plus(to_decimal(12, 1)(c), to_decimal(12, 1)(e)) +checked expr : plus(c, e) evaluation: +--------+-----------+----------------+----------------+ | | c | e | Output | @@ -184,7 +183,7 @@ evaluation (internal): ast : d2 + e raw expr : plus(d2::UInt8 NULL, e::Decimal(10, 1)) -checked expr : plus(CAST(d2 AS Decimal(11, 1) NULL), CAST(e AS Decimal(11, 1) NULL)) +checked expr : plus(d2, CAST(e AS Decimal(10, 1) NULL)) evaluation: +--------+------------------+----------------+------------------------+ | | d2 | e | Output | @@ -207,7 +206,7 @@ evaluation (internal): ast : d2 + f raw expr : plus(d2::UInt8 NULL, f::Decimal(76, 2)) -checked expr : plus(CAST(d2 AS Decimal(76, 2) NULL), CAST(f AS Decimal(76, 2) NULL)) +checked expr : plus(d2, CAST(f AS Decimal(76, 2) NULL)) evaluation: +--------+------------------+----------------+-------------------------+ | | d2 | f | Output | @@ -230,7 +229,7 @@ evaluation (internal): ast : e + f raw expr : plus(e::Decimal(10, 1), f::Decimal(76, 2)) -checked expr : plus(to_decimal(76, 2)(e), f) +checked expr : plus(e, f) evaluation: +--------+----------------+----------------+-----------------+ | | e | f | Output | @@ -322,8 +321,7 @@ evaluation (internal): ast : c - 0.5 raw expr : minus(c::UInt32, 0.5) -checked expr : minus(to_decimal(12, 1)(c), to_decimal(12, 1)(0.5_d128(1,1))) -optimized expr : minus(to_decimal(12, 1)(c), 0.5_d128(12,1)) +checked expr : minus(c, 0.5_d128(1,1)) evaluation: +--------+-----------+----------------+ | | c | Output | @@ -391,7 +389,7 @@ evaluation (internal): ast : c - e raw expr : minus(c::UInt32, e::Decimal(10, 1)) -checked expr : minus(to_decimal(12, 1)(c), to_decimal(12, 1)(e)) +checked expr : minus(c, e) evaluation: +--------+-----------+----------------+-----------------+ | | c | e | Output | @@ -437,7 +435,7 @@ evaluation (internal): ast : d2 - e raw expr : minus(d2::UInt8 NULL, e::Decimal(10, 1)) -checked expr : minus(CAST(d2 AS Decimal(11, 1) NULL), CAST(e AS Decimal(11, 1) NULL)) +checked expr : minus(d2, CAST(e AS Decimal(10, 1) NULL)) evaluation: +--------+------------------+----------------+--------------------------+ | | d2 | e | Output | @@ -460,7 +458,7 @@ evaluation (internal): ast : d2 - f raw expr : minus(d2::UInt8 NULL, f::Decimal(76, 2)) -checked expr : minus(CAST(d2 AS Decimal(76, 2) NULL), CAST(f AS Decimal(76, 2) NULL)) +checked expr : minus(d2, CAST(f AS Decimal(76, 2) NULL)) evaluation: +--------+------------------+----------------+--------------------------+ | | d2 | f | Output | @@ -483,7 +481,7 @@ evaluation (internal): ast : e - f raw expr : minus(e::Decimal(10, 1), f::Decimal(76, 2)) -checked expr : minus(to_decimal(76, 2)(e), f) +checked expr : minus(e, f) evaluation: +--------+----------------+----------------+------------------+ | | e | f | Output | @@ -759,8 +757,7 @@ evaluation (internal): ast : c * 0.5 raw expr : multiply(c::UInt32, 0.5) -checked expr : multiply(to_decimal(11, 0)(c), to_decimal(11, 1)(0.5_d128(1,1))) -optimized expr : multiply(to_decimal(11, 0)(c), 0.5_d128(11,1)) +checked expr : multiply(c, 0.5_d128(1,1)) evaluation: +--------+-----------+----------------+ | | c | Output | @@ -828,7 +825,7 @@ evaluation (internal): ast : c * e raw expr : multiply(c::UInt32, e::Decimal(10, 1)) -checked expr : multiply(to_decimal(20, 0)(c), to_decimal(20, 1)(e)) +checked expr : multiply(c, e) evaluation: +--------+-----------+----------------+-----------------+ | | c | e | Output | @@ -874,7 +871,7 @@ evaluation (internal): ast : d2 * e raw expr : multiply(d2::UInt8 NULL, e::Decimal(10, 1)) -checked expr : multiply(CAST(d2 AS Decimal(13, 0) NULL), CAST(e AS Decimal(13, 1) NULL)) +checked expr : multiply(d2, CAST(e AS Decimal(10, 1) NULL)) evaluation: +--------+------------------+----------------+------------------------+ | | d2 | e | Output | @@ -897,7 +894,7 @@ evaluation (internal): ast : d2 * f raw expr : multiply(d2::UInt8 NULL, f::Decimal(76, 2)) -checked expr : multiply(CAST(d2 AS Decimal(76, 0) NULL), CAST(f AS Decimal(76, 2) NULL)) +checked expr : multiply(d2, CAST(f AS Decimal(76, 2) NULL)) evaluation: +--------+------------------+----------------+-------------------------+ | | d2 | f | Output | @@ -920,7 +917,7 @@ evaluation (internal): ast : e * f raw expr : multiply(e::Decimal(10, 1), f::Decimal(76, 2)) -checked expr : multiply(to_decimal(76, 1)(e), f) +checked expr : multiply(e, f) evaluation: +--------+----------------+----------------+--------------------+ | | e | f | Output | @@ -943,8 +940,7 @@ evaluation (internal): ast : e * 0.5 raw expr : multiply(e::Decimal(10, 1), 0.5) -checked expr : multiply(to_decimal(11, 1)(e), to_decimal(11, 1)(0.5_d128(1,1))) -optimized expr : multiply(to_decimal(11, 1)(e), 0.5_d128(11,1)) +checked expr : multiply(e, 0.5_d128(1,1)) evaluation: +--------+----------------+----------------+ | | e | Output | @@ -1035,7 +1031,7 @@ evaluation (internal): ast : c / 0.5 raw expr : divide(c::UInt32, 0.5) -checked expr : divide(to_decimal(10, 0)(c), 0.5_d128(1,1)) +checked expr : divide(c, 0.5_d128(1,1)) evaluation: +--------+-----------+----------------+ | | c | Output | @@ -1134,7 +1130,7 @@ error: ast : c / e raw expr : divide(c::UInt32, e::Decimal(10, 1)) -checked expr : divide(to_decimal(10, 0)(c), e) +checked expr : divide(c, e) evaluation: +--------+-----------+----------------+----------------+ | | c | e | Output | @@ -1180,7 +1176,7 @@ evaluation (internal): ast : d2 / e raw expr : divide(d2::UInt8 NULL, e::Decimal(10, 1)) -checked expr : divide(CAST(d2 AS Decimal(3, 0) NULL), CAST(e AS Decimal(10, 1) NULL)) +checked expr : divide(d2, CAST(e AS Decimal(10, 1) NULL)) evaluation: +--------+------------------+----------------+---------------------+ | | d2 | e | Output | @@ -1203,7 +1199,7 @@ evaluation (internal): ast : d2 / f raw expr : divide(d2::UInt8 NULL, f::Decimal(76, 2)) -checked expr : divide(CAST(d2 AS Decimal(39, 0) NULL), CAST(f AS Decimal(76, 2) NULL)) +checked expr : divide(d2, CAST(f AS Decimal(76, 2) NULL)) evaluation: +--------+------------------+----------------+---------------------+ | | d2 | f | Output | @@ -1226,7 +1222,7 @@ evaluation (internal): ast : e / f raw expr : divide(e::Decimal(10, 1), f::Decimal(76, 2)) -checked expr : divide(to_decimal(39, 1)(e), f) +checked expr : divide(e, f) evaluation: +--------+----------------+----------------+----------------+ | | e | f | Output | diff --git a/src/query/functions/tests/it/scalars/testdata/array.txt b/src/query/functions/tests/it/scalars/testdata/array.txt index b928a0187d23..41be493ae10d 100644 --- a/src/query/functions/tests/it/scalars/testdata/array.txt +++ b/src/query/functions/tests/it/scalars/testdata/array.txt @@ -439,6 +439,78 @@ evaluation (internal): +----------+-----------------------+ +ast : contains(['5000', '6000', '7000'], string_col) +raw expr : contains(array('5000', '6000', '7000'), string_col::String) +checked expr : contains(array("5000", "6000", "7000"), string_col) +optimized expr : false +evaluation: ++--------+-------------+---------+ +| | string_col | Output | ++--------+-------------+---------+ +| Type | String | Boolean | +| Domain | {"1"..="5"} | {FALSE} | +| Row 0 | '1' | false | +| Row 1 | '2' | false | +| Row 2 | '5' | false | +| Row 3 | '1234' | false | ++--------+-------------+---------+ +evaluation (internal): ++------------+-------------------------------------------------------------------+ +| Column | Data | ++------------+-------------------------------------------------------------------+ +| string_col | StringColumn { data: 0x31323531323334, offsets: [0, 1, 2, 3, 7] } | +| Output | Boolean([0b____0000]) | ++------------+-------------------------------------------------------------------+ + + +ast : contains(['1', '5'], string_col) +raw expr : contains(array('1', '5'), string_col::String) +checked expr : contains(array("1", "5"), string_col) +optimized expr : contains(['1', '5'], string_col) +evaluation: ++--------+-------------+---------------+ +| | string_col | Output | ++--------+-------------+---------------+ +| Type | String | Boolean | +| Domain | {"1"..="5"} | {FALSE, TRUE} | +| Row 0 | '1' | true | +| Row 1 | '2' | false | +| Row 2 | '5' | true | +| Row 3 | '1234' | false | ++--------+-------------+---------------+ +evaluation (internal): ++------------+-------------------------------------------------------------------+ +| Column | Data | ++------------+-------------------------------------------------------------------+ +| string_col | StringColumn { data: 0x31323531323334, offsets: [0, 1, 2, 3, 7] } | +| Output | Boolean([0b____0101]) | ++------------+-------------------------------------------------------------------+ + + +ast : contains(['15000', '6000', '7000'], string_col) +raw expr : contains(array('15000', '6000', '7000'), string_col::String) +checked expr : contains(array("15000", "6000", "7000"), string_col) +optimized expr : contains(['15000', '6000', '7000'], string_col) +evaluation: ++--------+-------------+---------------+ +| | string_col | Output | ++--------+-------------+---------------+ +| Type | String | Boolean | +| Domain | {"1"..="5"} | {FALSE, TRUE} | +| Row 0 | '1' | false | +| Row 1 | '2' | false | +| Row 2 | '5' | false | +| Row 3 | '1234' | false | ++--------+-------------+---------------+ +evaluation (internal): ++------------+-------------------------------------------------------------------+ +| Column | Data | ++------------+-------------------------------------------------------------------+ +| string_col | StringColumn { data: 0x31323531323334, offsets: [0, 1, 2, 3, 7] } | +| Output | Boolean([0b____0000]) | ++------------+-------------------------------------------------------------------+ + + ast : contains([1,2,null], nullable_col) raw expr : contains(array(1, 2, NULL), nullable_col::Int64 NULL) checked expr : contains(CAST(array(CAST(1_u8 AS UInt8 NULL), CAST(2_u8 AS UInt8 NULL), CAST(NULL AS UInt8 NULL)) AS Array(Int64 NULL)), nullable_col) diff --git a/src/query/functions/tests/it/scalars/testdata/comparison.txt b/src/query/functions/tests/it/scalars/testdata/comparison.txt index 97194c73aef2..4949d145c888 100644 --- a/src/query/functions/tests/it/scalars/testdata/comparison.txt +++ b/src/query/functions/tests/it/scalars/testdata/comparison.txt @@ -1019,6 +1019,28 @@ evaluation (internal): +--------+-----------------------------------------------------------------------------------------------------------------------------------------------+ +ast : col > 'efg' +raw expr : gt(col::String, 'efg') +checked expr : gt(col, "efg") +optimized expr : false +evaluation: ++--------+-----------------+---------+ +| | col | Output | ++--------+-----------------+---------+ +| Type | String | Boolean | +| Domain | {"bcd"..="efg"} | {FALSE} | +| Row 0 | 'bcd' | false | +| Row 1 | 'efg' | false | ++--------+-----------------+---------+ +evaluation (internal): ++--------+-----------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------+ +| col | StringColumn { data: 0x626364656667, offsets: [0, 3, 6] } | +| Output | Boolean([0b______00]) | ++--------+-----------------------------------------------------------+ + + ast : '2'>='1' raw expr : gte('2', '1') checked expr : gte("2", "1") diff --git a/src/query/functions/tests/it/scalars/testdata/function_list.txt b/src/query/functions/tests/it/scalars/testdata/function_list.txt index d6faa40fe3ff..66017085b9dc 100644 --- a/src/query/functions/tests/it/scalars/testdata/function_list.txt +++ b/src/query/functions/tests/it/scalars/testdata/function_list.txt @@ -7,7 +7,8 @@ array_slice -> slice bitmap_and_not -> bitmap_not bitmap_cardinality -> bitmap_count ceiling -> ceil -character_length -> char_length +char_length -> length +character_length -> length current_timestamp -> now date_format -> to_string hex -> to_hex @@ -16,21 +17,21 @@ ipv4_num_to_string -> inet_ntoa ipv4_string_to_num -> inet_aton json_to_string -> to_string lcase -> lower -length_utf8 -> char_length +length_utf8 -> length mid -> substr mod -> modulo neg -> minus negate -> minus object_keys -> json_object_keys -octet_length -> length remove_nullable -> assume_not_null rlike -> regexp sha1 -> sha siphash -> siphash64 str_to_date -> to_date str_to_timestamp -> to_timestamp +substr_utf8 -> substr substring -> substr -substring_utf8 -> substr_utf8 +substring_utf8 -> substr subtract -> minus to_datetime -> to_timestamp to_text -> to_string @@ -785,8 +786,6 @@ Functions overloads: 20 ceil(Float64 NULL) :: Float64 NULL 0 char FACTORY 1 char FACTORY -0 char_length(String) :: UInt64 -1 char_length(String NULL) :: UInt64 NULL 0 check_json(Variant) :: String NULL 1 check_json(Variant NULL) :: String NULL 2 check_json(String) :: String NULL @@ -2774,6 +2773,8 @@ Functions overloads: 0 now() :: Timestamp 0 oct(Int64) :: String 1 oct(Int64 NULL) :: String NULL +0 octet_length(String) :: UInt64 +1 octet_length(String NULL) :: UInt64 NULL 0 or(Boolean, Boolean) :: Boolean 1 or(Boolean NULL, Boolean NULL) :: Boolean NULL 0 ord(String) :: UInt64 @@ -3165,10 +3166,6 @@ Functions overloads: 1 substr(String NULL, Int64 NULL) :: String NULL 2 substr(String, Int64, UInt64) :: String 3 substr(String NULL, Int64 NULL, UInt64 NULL) :: String NULL -0 substr_utf8(String, Int64) :: String -1 substr_utf8(String NULL, Int64 NULL) :: String NULL -2 substr_utf8(String, Int64, UInt64) :: String -3 substr_utf8(String NULL, Int64 NULL, UInt64 NULL) :: String NULL 0 subtract_days(Date, Int64) :: Date 1 subtract_days(Date NULL, Int64 NULL) :: Date NULL 2 subtract_days(Timestamp, Int64) :: Timestamp diff --git a/src/query/functions/tests/it/scalars/testdata/math.txt b/src/query/functions/tests/it/scalars/testdata/math.txt index 1f46c3cf5bd0..0f664b7f9ee8 100644 --- a/src/query/functions/tests/it/scalars/testdata/math.txt +++ b/src/query/functions/tests/it/scalars/testdata/math.txt @@ -464,7 +464,7 @@ output : 120 ast : truncate(0)(10.28*100, 0) raw expr : truncate(0)(multiply(10.28, 100), 0) -checked expr : truncate(0)(multiply(to_decimal(7, 2)(10.28_d128(4,2)), to_decimal(7, 0)(100_u8)), 0_u8) +checked expr : truncate(0)(multiply(10.28_d128(4,2), 100_u8), 0_u8) optimized expr : 1028_d128(7,0) output type : Decimal(7, 0) output domain : {1028..=1028} diff --git a/src/query/functions/tests/it/scalars/testdata/string.txt b/src/query/functions/tests/it/scalars/testdata/string.txt index 38b4fb241f09..fc612631b73a 100644 --- a/src/query/functions/tests/it/scalars/testdata/string.txt +++ b/src/query/functions/tests/it/scalars/testdata/string.txt @@ -123,6 +123,42 @@ output domain : {40..=40} output : 40 +ast : bit_length('CAFÉ') +raw expr : bit_length('CAFÉ') +checked expr : bit_length("CAFÉ") +optimized expr : 40_u64 +output type : UInt64 +output domain : {40..=40} +output : 40 + + +ast : bit_length('数据库') +raw expr : bit_length('数据库') +checked expr : bit_length("数据库") +optimized expr : 72_u64 +output type : UInt64 +output domain : {72..=72} +output : 72 + + +ast : bit_length('НОЧЬ НА ОКРАИНЕ МОСКВЫ') +raw expr : bit_length('НОЧЬ НА ОКРАИНЕ МОСКВЫ') +checked expr : bit_length("НОЧЬ НА ОКРАИНЕ МОСКВЫ") +optimized expr : 328_u64 +output type : UInt64 +output domain : {328..=328} +output : 328 + + +ast : bit_length('قاعدة البيانات') +raw expr : bit_length('قاعدة البيانات') +checked expr : bit_length("قاعدة البيانات") +optimized expr : 216_u64 +output type : UInt64 +output domain : {216..=216} +output : 216 + + ast : bit_length(NULL) raw expr : bit_length(NULL) checked expr : bit_length(CAST(NULL AS String NULL)) @@ -156,25 +192,61 @@ evaluation (internal): ast : octet_length('latin') raw expr : octet_length('latin') -checked expr : length("latin") +checked expr : octet_length("latin") +optimized expr : 5_u64 +output type : UInt64 +output domain : {5..=5} +output : 5 + + +ast : octet_length('CAFÉ') +raw expr : octet_length('CAFÉ') +checked expr : octet_length("CAFÉ") optimized expr : 5_u64 output type : UInt64 output domain : {5..=5} output : 5 +ast : octet_length('数据库') +raw expr : octet_length('数据库') +checked expr : octet_length("数据库") +optimized expr : 9_u64 +output type : UInt64 +output domain : {9..=9} +output : 9 + + +ast : octet_length('НОЧЬ НА ОКРАИНЕ МОСКВЫ') +raw expr : octet_length('НОЧЬ НА ОКРАИНЕ МОСКВЫ') +checked expr : octet_length("НОЧЬ НА ОКРАИНЕ МОСКВЫ") +optimized expr : 41_u64 +output type : UInt64 +output domain : {41..=41} +output : 41 + + +ast : octet_length('قاعدة البيانات') +raw expr : octet_length('قاعدة البيانات') +checked expr : octet_length("قاعدة البيانات") +optimized expr : 27_u64 +output type : UInt64 +output domain : {27..=27} +output : 27 + + ast : octet_length(NULL) raw expr : octet_length(NULL) -checked expr : length(CAST(NULL AS Variant NULL)) +checked expr : octet_length(CAST(NULL AS String NULL)) optimized expr : NULL -output type : UInt32 NULL +output type : UInt64 NULL output domain : {NULL} output : NULL -ast : length(a) -raw expr : length(a::String) -checked expr : length(a) +ast : octet_length(a) +raw expr : octet_length(a::String) +checked expr : octet_length(a) evaluation: +--------+-----------------------------------+----------------------------+ | | a | Output | @@ -196,35 +268,71 @@ evaluation (internal): ast : char_length('latin') raw expr : char_length('latin') -checked expr : char_length("latin") +checked expr : length("latin") optimized expr : 5_u64 output type : UInt64 output domain : {5..=5} output : 5 +ast : char_length('CAFÉ') +raw expr : char_length('CAFÉ') +checked expr : length("CAFÉ") +optimized expr : 4_u64 +output type : UInt64 +output domain : {4..=4} +output : 4 + + +ast : char_length('数据库') +raw expr : char_length('数据库') +checked expr : length("数据库") +optimized expr : 3_u64 +output type : UInt64 +output domain : {3..=3} +output : 3 + + +ast : char_length('НОЧЬ НА ОКРАИНЕ МОСКВЫ') +raw expr : char_length('НОЧЬ НА ОКРАИНЕ МОСКВЫ') +checked expr : length("НОЧЬ НА ОКРАИНЕ МОСКВЫ") +optimized expr : 22_u64 +output type : UInt64 +output domain : {22..=22} +output : 22 + + +ast : char_length('قاعدة البيانات') +raw expr : char_length('قاعدة البيانات') +checked expr : length("قاعدة البيانات") +optimized expr : 14_u64 +output type : UInt64 +output domain : {14..=14} +output : 14 + + ast : char_length(NULL) raw expr : char_length(NULL) -checked expr : char_length(CAST(NULL AS String NULL)) +checked expr : length(CAST(NULL AS Variant NULL)) optimized expr : NULL -output type : UInt64 NULL +output type : UInt32 NULL output domain : {NULL} output : NULL ast : character_length(a) raw expr : character_length(a::String) -checked expr : char_length(a) -evaluation: -+--------+-----------------------------------+---------+ -| | a | Output | -+--------+-----------------------------------+---------+ -| Type | String | UInt64 | -| Domain | {"latin"..="кириллица and latin"} | Unknown | -| Row 0 | 'latin' | 5 | -| Row 1 | 'кириллица' | 9 | -| Row 2 | 'кириллица and latin' | 19 | -+--------+-----------------------------------+---------+ +checked expr : length(a) +evaluation: ++--------+-----------------------------------+----------------------------+ +| | a | Output | ++--------+-----------------------------------+----------------------------+ +| Type | String | UInt64 | +| Domain | {"latin"..="кириллица and latin"} | {0..=18446744073709551615} | +| Row 0 | 'latin' | 5 | +| Row 1 | 'кириллица' | 9 | +| Row 2 | 'кириллица and latin' | 19 | ++--------+-----------------------------------+----------------------------+ evaluation (internal): +--------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ | Column | Data | @@ -395,28 +503,28 @@ output : '' ast : reverse('你好') raw expr : reverse('你好') checked expr : reverse("你好") -optimized expr : BDA5E5A0BDE4 +optimized expr : "好你" output type : String -output domain : {"��堽�"..="��堽�"} -output : BDA5E5A0BDE4 +output domain : {"好你"..="好你"} +output : '好你' ast : reverse('ß😀山') raw expr : reverse('ß😀山') checked expr : reverse("ß😀山") -optimized expr : B1B1E580989FF09FC3 +optimized expr : "山😀ß" output type : String -output domain : {"��倘���"..="��倘���"} -output : B1B1E580989FF09FC3 +output domain : {"山😀ß"..="山😀ß"} +output : '山😀ß' ast : reverse('Dobrý den') raw expr : reverse('Dobrý den') checked expr : reverse("Dobrý den") -optimized expr : 6E656420BDC372626F44 +optimized expr : "ned ýrboD" output type : String -output domain : {"ned ��rboD"..="ned ��rboD"} -output : 6E656420BDC372626F44 +output domain : {"ned ýrboD"..="ned ýrboD"} +output : 'ned ýrboD' ast : reverse(Null) @@ -575,6 +683,33 @@ output domain : {""..=""} output : '' +ast : ltrim(' 你 好 ') +raw expr : ltrim(' 你 好 ') +checked expr : ltrim(" 你 好 ") +optimized expr : "你 好 " +output type : String +output domain : {"你 好 "..="你 好 "} +output : '你 好 ' + + +ast : ltrim(' 분산 데이터베이스 ') +raw expr : ltrim(' 분산 데이터베이스 ') +checked expr : ltrim(" 분산 데이터베이스 ") +optimized expr : "분산 데이터베이스 " +output type : String +output domain : {"분산 데이터베이스 "..="분산 데이터베이스 "} +output : '분산 데이터베이스 ' + + +ast : ltrim(' あなたのことが好きです ') +raw expr : ltrim(' あなたのことが好きです ') +checked expr : ltrim(" あなたのことが好きです ") +optimized expr : "あなたのことが好きです " +output type : String +output domain : {"あなたのことが好きです "..="あなたのことが好きです "} +output : 'あなたのことが好きです ' + + ast : ltrim(NULL) raw expr : ltrim(NULL) checked expr : ltrim(CAST(NULL AS String NULL)) @@ -625,6 +760,33 @@ output domain : {""..=""} output : '' +ast : rtrim(' 你 好 ') +raw expr : rtrim(' 你 好 ') +checked expr : rtrim(" 你 好 ") +optimized expr : " 你 好" +output type : String +output domain : {" 你 好"..=" 你 好"} +output : ' 你 好' + + +ast : rtrim(' 분산 데이터베이스 ') +raw expr : rtrim(' 분산 데이터베이스 ') +checked expr : rtrim(" 분산 데이터베이스 ") +optimized expr : " 분산 데이터베이스" +output type : String +output domain : {" 분산 데이터베이스"..=" 분산 데이터베이스"} +output : ' 분산 데이터베이스' + + +ast : rtrim(' あなたのことが好きです ') +raw expr : rtrim(' あなたのことが好きです ') +checked expr : rtrim(" あなたのことが好きです ") +optimized expr : " あなたのことが好きです" +output type : String +output domain : {" あなたのことが好きです"..=" あなたのことが好きです"} +output : ' あなたのことが好きです' + + ast : rtrim(NULL) raw expr : rtrim(NULL) checked expr : rtrim(CAST(NULL AS String NULL)) @@ -720,6 +882,24 @@ output domain : {"aaaaaaaa"..="aaaaaaaa"} output : 'aaaaaaaa' +ast : trim_leading('분산 데이터베이스', '분산 ') +raw expr : trim_leading('분산 데이터베이스', '분산 ') +checked expr : trim_leading("분산 데이터베이스", "분산 ") +optimized expr : "데이터베이스" +output type : String +output domain : {"데이터베이스"..="데이터베이스"} +output : '데이터베이스' + + +ast : trim_leading('あなたのことが好きです', 'あなたの') +raw expr : trim_leading('あなたのことが好きです', 'あなたの') +checked expr : trim_leading("あなたのことが好きです", "あなたの") +optimized expr : "ことが好きです" +output type : String +output domain : {"ことが好きです"..="ことが好きです"} +output : 'ことが好きです' + + ast : trim_leading(a, 'a') raw expr : trim_leading(a::String, 'a') checked expr : trim_leading(a, "a") @@ -853,6 +1033,24 @@ output domain : {"aaaaaaaa"..="aaaaaaaa"} output : 'aaaaaaaa' +ast : trim_trailing('분산 데이터베이스', '베이스') +raw expr : trim_trailing('분산 데이터베이스', '베이스') +checked expr : trim_trailing("분산 데이터베이스", "베이스") +optimized expr : "분산 데이터" +output type : String +output domain : {"분산 데이터"..="분산 데이터"} +output : '분산 데이터' + + +ast : trim_trailing('あなたのことが好きです', '好きです') +raw expr : trim_trailing('あなたのことが好きです', '好きです') +checked expr : trim_trailing("あなたのことが好きです", "好きです") +optimized expr : "あなたのことが" +output type : String +output domain : {"あなたのことが"..="あなたのことが"} +output : 'あなたのことが' + + ast : trim_trailing(a, 'b') raw expr : trim_trailing(a::String, 'b') checked expr : trim_trailing(a, "b") @@ -986,6 +1184,33 @@ output domain : {"aaaaaaaa"..="aaaaaaaa"} output : 'aaaaaaaa' +ast : trim_both(' 你 好 ', ' ') +raw expr : trim_both(' 你 好 ', ' ') +checked expr : trim_both(" 你 好 ", " ") +optimized expr : "你 好" +output type : String +output domain : {"你 好"..="你 好"} +output : '你 好' + + +ast : trim_both(' 분산 데이터베이스 ', ' ') +raw expr : trim_both(' 분산 데이터베이스 ', ' ') +checked expr : trim_both(" 분산 데이터베이스 ", " ") +optimized expr : "분산 데이터베이스" +output type : String +output domain : {"분산 데이터베이스"..="분산 데이터베이스"} +output : '분산 데이터베이스' + + +ast : trim_both(' あなたのことが好きです ', ' ') +raw expr : trim_both(' あなたのことが好きです ', ' ') +checked expr : trim_both(" あなたのことが好きです ", " ") +optimized expr : "あなたのことが好きです" +output type : String +output domain : {"あなたのことが好きです"..="あなたのことが好きです"} +output : 'あなたのことが好きです' + + ast : trim_both(a, 'a') raw expr : trim_both(a::String, 'a') checked expr : trim_both(a, "a") @@ -1074,6 +1299,33 @@ output domain : {""..=""} output : '' +ast : trim(' 你 好 ') +raw expr : trim(' 你 好 ') +checked expr : trim(" 你 好 ") +optimized expr : "你 好" +output type : String +output domain : {"你 好"..="你 好"} +output : '你 好' + + +ast : trim(' 분산 데이터베이스 ') +raw expr : trim(' 분산 데이터베이스 ') +checked expr : trim(" 분산 데이터베이스 ") +optimized expr : "분산 데이터베이스" +output type : String +output domain : {"분산 데이터베이스"..="분산 데이터베이스"} +output : '분산 데이터베이스' + + +ast : trim(' あなたのことが好きです ') +raw expr : trim(' あなたのことが好きです ') +checked expr : trim(" あなたのことが好きです ") +optimized expr : "あなたのことが好きです" +output type : String +output domain : {"あなたのことが好きです"..="あなたのことが好きです"} +output : 'あなたのことが好きです' + + ast : trim(NULL) raw expr : trim(NULL) checked expr : trim(CAST(NULL AS String NULL)) @@ -1553,6 +1805,15 @@ output domain : {NULL} output : NULL +ast : concat('忠犬ハチ公', 'CAFÉ', '数据库', 'قاعدة البيانات', 'НОЧЬ НА ОКРАИНЕ МОСКВЫ') +raw expr : concat('忠犬ハチ公', 'CAFÉ', '数据库', 'قاعدة البيانات', 'НОЧЬ НА ОКРАИНЕ МОСКВЫ') +checked expr : concat("忠犬ハチ公", "CAFÉ", "数据库", "قاعدة البيانات", "НОЧЬ НА ОКРАИНЕ МОСКВЫ") +optimized expr : "忠犬ハチ公CAFÉ数据库قاعدة البياناتНОЧЬ НА ОКРАИНЕ МОСКВЫ" +output type : String +output domain : {"忠犬ハチ公CAFÉ数据库قاعدة البياناتНОЧЬ НА ОКРАИНЕ МОСКВЫ"..="忠犬ハチ公CAFÉ数据库قاعدة البياناتНОЧЬ НА ОКРАИНЕ МОСКВЫ"} +output : '忠犬ハチ公CAFÉ数据库قاعدة البياناتНОЧЬ НА ОКРАИНЕ МОСКВЫ' + + ast : concat(a, '3', '4', '5') raw expr : concat(a::String, '3', '4', '5') checked expr : concat(a, "3", "4", "5") @@ -1618,6 +1879,15 @@ output domain : {NULL} output : NULL +ast : concat_ws(',', '忠犬ハチ公', 'CAFÉ', '数据库', 'قاعدة البيانات', 'НОЧЬ НА ОКРАИНЕ МОСКВЫ') +raw expr : concat_ws(',', '忠犬ハチ公', 'CAFÉ', '数据库', 'قاعدة البيانات', 'НОЧЬ НА ОКРАИНЕ МОСКВЫ') +checked expr : concat_ws(",", "忠犬ハチ公", "CAFÉ", "数据库", "قاعدة البيانات", "НОЧЬ НА ОКРАИНЕ МОСКВЫ") +optimized expr : "忠犬ハチ公,CAFÉ,数据库,قاعدة البيانات,НОЧЬ НА ОКРАИНЕ МОСКВЫ" +output type : String +output domain : {"忠犬ハチ公,CAFÉ,数据库,قاعدة البيانات,НОЧЬ НА ОКРАИНЕ МОСКВЫ"..="忠犬ハチ公,CAFÉ,数据库,قاعدة البيانات,НОЧЬ НА ОКРАИНЕ МОСКВЫ"} +output : '忠犬ハチ公,CAFÉ,数据库,قاعدة البيانات,НОЧЬ НА ОКРАИНЕ МОСКВЫ' + + ast : concat_ws(a, '3', '4', '5') raw expr : concat_ws(a::String, '3', '4', '5') checked expr : concat_ws(a, "3", "4", "5") @@ -2430,18 +2700,19 @@ evaluation: +--------+-----------------+------------------+--------------+ | Type | String | String | Int8 | | Domain | {"cc"..="test"} | {"ccb"..="test"} | {-128..=127} | -| Row 0 | 'hi' | 'i' | 1 | -| Row 1 | 'test' | 'test' | 0 | -| Row 2 | 'cc' | 'ccb' | -1 | +| Row 0 | 'i' | 'hi' | 1 | +| Row 1 | 'h' | 'hi' | -1 | +| Row 2 | 'test' | 'test' | 0 | +| Row 3 | 'cc' | 'ccb' | -1 | +--------+-----------------+------------------+--------------+ evaluation (internal): -+--------+------------------------------------------------------------------+ -| Column | Data | -+--------+------------------------------------------------------------------+ -| a | StringColumn { data: 0x6869746573746363, offsets: [0, 2, 6, 8] } | -| b | StringColumn { data: 0x6974657374636362, offsets: [0, 1, 5, 8] } | -| Output | Int8([1, 0, -1]) | -+--------+------------------------------------------------------------------+ ++--------+----------------------------------------------------------------------------+ +| Column | Data | ++--------+----------------------------------------------------------------------------+ +| a | StringColumn { data: 0x6968746573746363, offsets: [0, 1, 2, 6, 8] } | +| b | StringColumn { data: 0x6869686974657374636362, offsets: [0, 2, 4, 8, 11] } | +| Output | Int8([1, -1, 0, -1]) | ++--------+----------------------------------------------------------------------------+ ast : locate('bar', 'foobarbar') @@ -2471,6 +2742,15 @@ output domain : {1..=1} output : 1 +ast : locate('好世', '你好世界') +raw expr : locate('好世', '你好世界') +checked expr : locate("好世", "你好世界") +optimized expr : 2_u64 +output type : UInt64 +output domain : {2..=2} +output : 2 + + ast : instr('foobarbar', 'bar') raw expr : instr('foobarbar', 'bar') checked expr : instr("foobarbar", "bar") @@ -2543,6 +2823,15 @@ output domain : {7..=7} output : 7 +ast : locate('好世', '你好世界', 1) +raw expr : locate('好世', '你好世界', 1) +checked expr : locate("好世", "你好世界", to_uint64(1_u8)) +optimized expr : 2_u64 +output type : UInt64 +output domain : {2..=2} +output : 2 + + ast : locate(a, b, c) raw expr : locate(a::String, b::String, c::UInt8) checked expr : locate(a, b, to_uint64(c)) @@ -2571,17 +2860,17 @@ evaluation (internal): ast : char(65,66,67) raw expr : char(65, 66, 67) checked expr : char(65_u8, 66_u8, 67_u8) -optimized expr : "ABC" -output type : String -output domain : {"ABC"..="ABC"} -output : 'ABC' +optimized expr : 414243 +output type : Binary +output domain : Undefined +output : 414243 ast : char(65, null) raw expr : char(65, NULL) checked expr : char(CAST(65_u8 AS UInt8 NULL), CAST(NULL AS UInt8 NULL)) optimized expr : NULL -output type : String NULL +output type : Binary NULL output domain : {NULL} output : NULL @@ -2590,14 +2879,14 @@ ast : char(a, b, c) raw expr : char(a::UInt8, b::UInt8, c::UInt8) checked expr : char(a, b, c) evaluation: -+--------+-----------+-----------+-----------+--------+ -| | a | b | c | Output | -+--------+-----------+-----------+-----------+--------+ -| Type | UInt8 | UInt8 | UInt8 | String | -| Domain | {66..=67} | {98..=99} | {68..=69} | {""..} | -| Row 0 | 66 | 98 | 68 | 'BbD' | -| Row 1 | 67 | 99 | 69 | 'CcE' | -+--------+-----------+-----------+-----------+--------+ ++--------+-----------+-----------+-----------+-----------+ +| | a | b | c | Output | ++--------+-----------+-----------+-----------+-----------+ +| Type | UInt8 | UInt8 | UInt8 | Binary | +| Domain | {66..=67} | {98..=99} | {68..=69} | Undefined | +| Row 0 | 66 | 98 | 68 | 426244 | +| Row 1 | 67 | 99 | 69 | 436345 | ++--------+-----------+-----------+-----------+-----------+ evaluation (internal): +--------+-----------------------------------------------------------+ | Column | Data | @@ -2605,7 +2894,7 @@ evaluation (internal): | a | UInt8([66, 67]) | | b | UInt8([98, 99]) | | c | UInt8([68, 69]) | -| Output | StringColumn { data: 0x426244436345, offsets: [0, 3, 6] } | +| Output | BinaryColumn { data: 0x426244436345, offsets: [0, 3, 6] } | +--------+-----------------------------------------------------------+ @@ -2613,14 +2902,14 @@ ast : char(a2, b, c) raw expr : char(a2::UInt8 NULL, b::UInt8, c::UInt8) checked expr : char(a2, CAST(b AS UInt8 NULL), CAST(c AS UInt8 NULL)) evaluation: -+--------+-----------+-----------+--------------------+-----------------+ -| | b | c | a2 | Output | -+--------+-----------+-----------+--------------------+-----------------+ -| Type | UInt8 | UInt8 | UInt8 NULL | String NULL | -| Domain | {98..=99} | {68..=69} | {66..=67} ∪ {NULL} | {""..} ∪ {NULL} | -| Row 0 | 98 | 68 | 66 | 'BbD' | -| Row 1 | 99 | 69 | NULL | NULL | -+--------+-----------+-----------+--------------------+-----------------+ ++--------+-----------+-----------+--------------------+--------------------+ +| | b | c | a2 | Output | ++--------+-----------+-----------+--------------------+--------------------+ +| Type | UInt8 | UInt8 | UInt8 NULL | Binary NULL | +| Domain | {98..=99} | {68..=69} | {66..=67} ∪ {NULL} | Undefined ∪ {NULL} | +| Row 0 | 98 | 68 | 66 | 426244 | +| Row 1 | 99 | 69 | NULL | NULL | ++--------+-----------+-----------+--------------------+--------------------+ evaluation (internal): +--------+--------------------------------------------------------------------------------------------------------------+ | Column | Data | @@ -2628,7 +2917,7 @@ evaluation (internal): | b | UInt8([98, 99]) | | c | UInt8([68, 69]) | | a2 | NullableColumn { column: UInt8([66, 67]), validity: [0b______01] } | -| Output | NullableColumn { column: StringColumn { data: 0x426244436345, offsets: [0, 3, 6] }, validity: [0b______01] } | +| Output | NullableColumn { column: BinaryColumn { data: 0x426244436345, offsets: [0, 3, 6] }, validity: [0b______01] } | +--------+--------------------------------------------------------------------------------------------------------------+ @@ -2639,8 +2928,8 @@ error: | ^^^^^^^^ no overload satisfies `char(UInt16)` has tried possible overloads: - char(UInt8) :: String : unable to unify `UInt16` with `UInt8` - char(UInt8 NULL) :: String NULL : unable to unify `UInt16` with `UInt8` + char(UInt8) :: Binary : unable to unify `UInt16` with `UInt8` + char(UInt8 NULL) :: Binary NULL : unable to unify `UInt16` with `UInt8` @@ -2757,6 +3046,24 @@ output domain : {"33333"..="33333"} output : '33333' +ast : repeat('你好世界', 3) +raw expr : repeat('你好世界', 3) +checked expr : repeat("你好世界", to_uint64(3_u8)) +optimized expr : "你好世界你好世界你好世界" +output type : String +output domain : {"你好世界你好世界你好世界"..="你好世界你好世界你好世界"} +output : '你好世界你好世界你好世界' + + +ast : repeat('こんにちは', 2) +raw expr : repeat('こんにちは', 2) +checked expr : repeat("こんにちは", to_uint64(2_u8)) +optimized expr : "こんにちはこんにちは" +output type : String +output domain : {"こんにちはこんにちは"..="こんにちはこんにちは"} +output : 'こんにちはこんにちは' + + error: --> SQL:1:1 | @@ -2858,6 +3165,15 @@ output domain : {NULL} output : NULL +ast : insert('你好世界', 1, 2, 'こんにちは') +raw expr : insert('你好世界', 1, 2, 'こんにちは') +checked expr : insert("你好世界", to_int64(1_u8), to_int64(2_u8), "こんにちは") +optimized expr : "こんにちは世界" +output type : String +output domain : {"こんにちは世界"..="こんにちは世界"} +output : 'こんにちは世界' + + ast : insert(NULL, 2, 100, 'NULL') raw expr : insert(NULL, 2, 100, 'NULL') checked expr : insert(CAST(NULL AS String NULL), CAST(2_u8 AS Int64 NULL), CAST(100_u8 AS Int64 NULL), CAST("NULL" AS String NULL)) @@ -3124,6 +3440,24 @@ output domain : {"bc"..="bc"} output : 'bc' +ast : substr('你好世界', 3) +raw expr : substr('你好世界', 3) +checked expr : substr("你好世界", to_int64(3_u8)) +optimized expr : "世界" +output type : String +output domain : {"世界"..="世界"} +output : '世界' + + +ast : substr('こんにちは', 2) +raw expr : substr('こんにちは', 2) +checked expr : substr("こんにちは", to_int64(2_u8)) +optimized expr : "んにちは" +output type : String +output domain : {"んにちは"..="んにちは"} +output : 'んにちは' + + ast : substr('abc', pos, len) raw expr : substr('abc', pos::Int8, len::UInt8) checked expr : substr("abc", to_int64(pos), to_uint64(len)) @@ -3216,6 +3550,24 @@ output domain : [{"a"..="c"}] output : ['a', 'c'] +ast : split('你好世界', '好') +raw expr : split('你好世界', '好') +checked expr : split("你好世界", "好") +optimized expr : ['你', '世界'] +output type : Array(String) +output domain : [{"世界"..="你"}] +output : ['你', '世界'] + + +ast : split('こんにちは', 'に') +raw expr : split('こんにちは', 'に') +checked expr : split("こんにちは", "に") +optimized expr : ['こん', 'ちは'] +output type : Array(String) +output domain : [{"こん"..="ちは"}] +output : ['こん', 'ちは'] + + ast : split(str, sep) raw expr : split(str::String NULL, sep::String NULL) checked expr : split(str, sep) diff --git a/src/query/pipeline/core/Cargo.toml b/src/query/pipeline/core/Cargo.toml index 7e7c96c9a2f3..0984b255a25a 100644 --- a/src/query/pipeline/core/Cargo.toml +++ b/src/query/pipeline/core/Cargo.toml @@ -19,6 +19,7 @@ async-backtrace = { workspace = true } async-trait = { workspace = true } futures = { workspace = true } minitrace = { workspace = true } +once_cell = { workspace = true } petgraph = "0.6.2" serde = { workspace = true } diff --git a/src/query/pipeline/core/src/lib.rs b/src/query/pipeline/core/src/lib.rs index 731667028d39..ff9ca5a777e3 100644 --- a/src/query/pipeline/core/src/lib.rs +++ b/src/query/pipeline/core/src/lib.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#![feature(once_cell_try)] +#![feature(variant_count)] #![allow(clippy::arc_with_non_send_sync)] pub mod processors; @@ -33,5 +35,7 @@ pub use pipe::SourcePipeBuilder; pub use pipe::TransformPipeBuilder; pub use pipeline::query_spill_prefix; pub use pipeline::Pipeline; +pub use processors::get_statistics_desc; pub use processors::PlanScope; pub use processors::PlanScopeGuard; +pub use processors::ProfileLabel; diff --git a/src/query/pipeline/core/src/processors/mod.rs b/src/query/pipeline/core/src/processors/mod.rs index 5c51f4ee2392..fc2a3d899d41 100644 --- a/src/query/pipeline/core/src/processors/mod.rs +++ b/src/query/pipeline/core/src/processors/mod.rs @@ -18,6 +18,7 @@ mod processor; mod duplicate_processor; mod port_trigger; pub mod profile; +mod profiles; mod resize_processor; mod shuffle_processor; @@ -34,6 +35,11 @@ pub use processor::Processor; pub use processor::ProcessorPtr; pub use profile::PlanScope; pub use profile::PlanScopeGuard; +pub use profile::Profile; +pub use profile::ProfileLabel; +pub use profiles::get_statistics_desc; +pub use profiles::ProfileDesc; +pub use profiles::ProfileStatisticsName; pub use resize_processor::create_resize_item; pub use resize_processor::ResizeProcessor; pub use shuffle_processor::ShuffleProcessor; diff --git a/src/query/pipeline/core/src/processors/port.rs b/src/query/pipeline/core/src/processors/port.rs index d39e519775fb..2d7c521edf60 100644 --- a/src/query/pipeline/core/src/processors/port.rs +++ b/src/query/pipeline/core/src/processors/port.rs @@ -19,6 +19,8 @@ use std::sync::Arc; use databend_common_exception::Result; use databend_common_expression::DataBlock; +use crate::processors::Profile; +use crate::processors::ProfileStatisticsName; use crate::processors::UpdateTrigger; use crate::unsafe_cell_wrap::UnSafeCellWrap; @@ -206,6 +208,7 @@ impl InputPort { } pub struct OutputPort { + record_profile: UnSafeCellWrap, shared: UnSafeCellWrap>, update_trigger: UnSafeCellWrap<*mut UpdateTrigger>, } @@ -213,6 +216,7 @@ pub struct OutputPort { impl OutputPort { pub fn create() -> Arc { Arc::new(OutputPort { + record_profile: UnSafeCellWrap::create(false), shared: UnSafeCellWrap::create(SharedStatus::create()), update_trigger: UnSafeCellWrap::create(std::ptr::null_mut()), }) @@ -223,6 +227,19 @@ impl OutputPort { unsafe { UpdateTrigger::update_output(&self.update_trigger); + if *self.record_profile { + if let Ok(data_block) = &data { + Profile::record_usize_profile( + ProfileStatisticsName::OutputRows, + data_block.num_rows(), + ); + Profile::record_usize_profile( + ProfileStatisticsName::OutputBytes, + data_block.memory_size(), + ); + } + } + let data = Box::into_raw(Box::new(SharedData(data))); self.shared.swap(data, HAS_DATA, HAS_DATA); } @@ -271,6 +288,13 @@ impl OutputPort { pub unsafe fn set_trigger(&self, update_trigger: *mut UpdateTrigger) { self.update_trigger.set_value(update_trigger) } + + /// # Safety + /// + /// Method is thread unsafe and require thread safe call + pub unsafe fn record_profile(&self) { + self.record_profile.set_value(true); + } } /// Connect input and output ports. diff --git a/src/query/pipeline/core/src/processors/processor.rs b/src/query/pipeline/core/src/processors/processor.rs index e9e01d0af9c4..444d500da61a 100644 --- a/src/query/pipeline/core/src/processors/processor.rs +++ b/src/query/pipeline/core/src/processors/processor.rs @@ -25,8 +25,6 @@ use minitrace::prelude::*; use petgraph::graph::node_index; use petgraph::prelude::NodeIndex; -use crate::processors::profile::Profile; - #[derive(Debug)] pub enum Event { NeedData, @@ -82,8 +80,6 @@ pub trait Processor: Send { Err(ErrorCode::Unimplemented("Unimplemented async_process.")) } - fn record_profile(&self, _profile: &Profile) {} - fn details_status(&self) -> Option { None } @@ -158,11 +154,6 @@ impl ProcessorPtr { (*self.inner.get()).un_reacted(cause, self.id().index()) } - /// # Safety - pub unsafe fn record_profile(&self, profile: &Profile) { - (*self.inner.get()).record_profile(profile) - } - /// # Safety pub unsafe fn interrupt(&self) { (*self.inner.get()).interrupt() @@ -246,8 +237,4 @@ impl Processor for Box { fn details_status(&self) -> Option { (**self).details_status() } - - fn record_profile(&self, profile: &Profile) { - (**self).record_profile(profile) - } } diff --git a/src/query/pipeline/core/src/processors/profile.rs b/src/query/pipeline/core/src/processors/profile.rs index c68b447c2977..8bc36a33b0fc 100644 --- a/src/query/pipeline/core/src/processors/profile.rs +++ b/src/query/pipeline/core/src/processors/profile.rs @@ -12,53 +12,20 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::atomic::AtomicU64; +use std::cell::RefCell; use std::sync::atomic::AtomicUsize; use std::sync::atomic::Ordering; use std::sync::Arc; -#[derive(Default, Debug)] -pub struct Profile { - /// The id of processor - pub pid: usize, - /// The name of processor - pub p_name: String, - - pub plan_id: Option, - pub plan_name: Option, - pub plan_parent_id: Option, - - /// The time spent to process in nanoseconds - pub cpu_time: AtomicU64, - /// The time spent to wait in nanoseconds, usually used to - /// measure the time spent on waiting for I/O - pub wait_time: AtomicU64, - - pub exchange_rows: AtomicUsize, - pub exchange_bytes: AtomicUsize, -} - -impl Profile { - pub fn create(pid: usize, p_name: String, scope: Option) -> Profile { - Profile { - pid, - p_name, - cpu_time: AtomicU64::new(0), - wait_time: AtomicU64::new(0), - exchange_rows: AtomicUsize::new(0), - exchange_bytes: AtomicUsize::new(0), - plan_id: scope.as_ref().map(|x| x.id), - plan_name: scope.as_ref().map(|x| x.name.clone()), - plan_parent_id: scope.as_ref().and_then(|x| x.parent_id), - } - } -} +use crate::processors::profiles::ProfileStatisticsName; #[derive(Clone, serde::Serialize, serde::Deserialize)] pub struct PlanProfile { pub id: Option, pub name: Option, pub parent_id: Option, + pub title: Arc, + pub labels: Arc>, /// The time spent to process in nanoseconds pub cpu_time: usize, @@ -68,6 +35,8 @@ pub struct PlanProfile { pub exchange_rows: usize, pub exchange_bytes: usize, + + pub statistics: [usize; std::mem::variant_count::()], } impl PlanProfile { @@ -76,21 +45,34 @@ impl PlanProfile { id: profile.plan_id, name: profile.plan_name.clone(), parent_id: profile.plan_parent_id, - cpu_time: profile.cpu_time.load(Ordering::SeqCst) as usize, - wait_time: profile.wait_time.load(Ordering::SeqCst) as usize, - exchange_rows: profile.exchange_rows.load(Ordering::SeqCst), - exchange_bytes: profile.exchange_bytes.load(Ordering::SeqCst), + title: profile.title.clone(), + labels: profile.labels.clone(), + cpu_time: profile.load_profile(ProfileStatisticsName::CpuTime), + wait_time: profile.load_profile(ProfileStatisticsName::WaitTime), + exchange_rows: profile.load_profile(ProfileStatisticsName::ExchangeRows), + exchange_bytes: profile.load_profile(ProfileStatisticsName::ExchangeBytes), + statistics: std::array::from_fn(|index| { + profile.statistics[index].load(Ordering::SeqCst) + }), } } pub fn accumulate(&mut self, profile: &Profile) { - self.cpu_time += profile.cpu_time.load(Ordering::SeqCst) as usize; - self.wait_time += profile.wait_time.load(Ordering::SeqCst) as usize; - self.exchange_rows += profile.exchange_rows.load(Ordering::SeqCst); - self.exchange_bytes += profile.exchange_bytes.load(Ordering::SeqCst); + for index in 0..std::mem::variant_count::() { + self.statistics[index] += profile.statistics[index].load(Ordering::SeqCst); + } + + self.cpu_time += profile.load_profile(ProfileStatisticsName::CpuTime); + self.wait_time += profile.load_profile(ProfileStatisticsName::WaitTime); + self.exchange_rows += profile.load_profile(ProfileStatisticsName::ExchangeRows); + self.exchange_bytes += profile.load_profile(ProfileStatisticsName::ExchangeBytes); } pub fn merge(&mut self, profile: &PlanProfile) { + for index in 0..std::mem::variant_count::() { + self.statistics[index] += profile.statistics[index]; + } + self.cpu_time += profile.cpu_time; self.wait_time += profile.wait_time; self.exchange_rows += profile.exchange_rows; @@ -124,14 +106,95 @@ pub struct PlanScope { pub id: u32, pub name: String, pub parent_id: Option, + pub title: Arc, + pub labels: Arc>, } impl PlanScope { - pub fn create(id: u32, name: String) -> PlanScope { + pub fn create( + id: u32, + name: String, + title: Arc, + labels: Arc>, + ) -> PlanScope { PlanScope { id, + labels, + title, parent_id: None, name, } } } + +#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] +pub struct ProfileLabel { + pub name: String, + pub value: Vec, +} + +impl ProfileLabel { + pub fn create(name: String, value: Vec) -> ProfileLabel { + ProfileLabel { name, value } + } +} + +pub struct Profile { + /// The id of processor + pub pid: usize, + /// The name of processor + pub p_name: String, + + pub plan_id: Option, + pub plan_name: Option, + pub plan_parent_id: Option, + pub labels: Arc>, + pub title: Arc, + + pub statistics: [AtomicUsize; std::mem::variant_count::()], +} + +thread_local! { + static CURRENT_PROFILE: RefCell>> = const { RefCell::new(None) }; +} + +impl Profile { + fn create_items() -> [AtomicUsize; std::mem::variant_count::()] { + std::array::from_fn(|_| AtomicUsize::new(0)) + } + pub fn create(pid: usize, p_name: String, scope: Option) -> Profile { + Profile { + pid, + p_name, + plan_id: scope.as_ref().map(|x| x.id), + plan_name: scope.as_ref().map(|x| x.name.clone()), + plan_parent_id: scope.as_ref().and_then(|x| x.parent_id), + statistics: Self::create_items(), + labels: scope + .as_ref() + .map(|x| x.labels.clone()) + .unwrap_or(Arc::new(vec![])), + title: scope + .as_ref() + .map(|x| x.title.clone()) + .unwrap_or(Arc::new(String::new())), + } + } + + pub fn track_profile(profile: &Arc) { + CURRENT_PROFILE.set(Some(profile.clone())) + } + + pub fn record_usize_profile(name: ProfileStatisticsName, value: usize) { + CURRENT_PROFILE.with(|x| match x.borrow().as_ref() { + None => {} + Some(profile) => { + profile.statistics[name as usize].fetch_add(value, Ordering::SeqCst); + } + }); + } + + pub fn load_profile(&self, name: ProfileStatisticsName) -> usize { + self.statistics[name as usize].load(Ordering::SeqCst) + } +} diff --git a/src/query/pipeline/core/src/processors/profiles.rs b/src/query/pipeline/core/src/processors/profiles.rs new file mode 100644 index 000000000000..b9119343417c --- /dev/null +++ b/src/query/pipeline/core/src/processors/profiles.rs @@ -0,0 +1,175 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::fmt::Display; +use std::fmt::Formatter; +use std::sync::Arc; + +use once_cell::sync::OnceCell; + +#[derive(Clone, Hash, Eq, PartialEq, serde::Serialize, serde::Deserialize, Debug)] +pub enum ProfileStatisticsName { + /// The time spent to process in nanoseconds + CpuTime, + /// The time spent to wait in nanoseconds, usually used to + /// measure the time spent on waiting for I/O + WaitTime, + ExchangeRows, + ExchangeBytes, + OutputRows, + OutputBytes, + ScanBytes, + ScanCacheBytes, + ScanPartitions, + SpillWriteCount, + SpillWriteBytes, + SpillWriteTime, + SpillReadCount, + SpillReadBytes, + SpillReadTime, +} + +impl Display for ProfileStatisticsName { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self) + } +} + +impl From for ProfileStatisticsName { + fn from(value: usize) -> Self { + let statistics_index = get_statistics_name_index(); + + if value > statistics_index.len() { + panic!("logical error"); + } + + match &statistics_index[value] { + None => panic!("logical error {}", value), + Some(statistics_name) => statistics_name.clone(), + } + } +} + +#[derive(Clone, serde::Serialize, serde::Deserialize)] +pub struct ProfileDesc { + desc: &'static str, + display_name: &'static str, + index: usize, +} + +pub static PROFILES_DESC: OnceCell>> = + OnceCell::new(); + +pub static PROFILES_INDEX: OnceCell< + Arc<[Option; std::mem::variant_count::()]>, +> = OnceCell::new(); + +fn get_statistics_name_index() +-> Arc<[Option; std::mem::variant_count::()]> { + PROFILES_INDEX + .get_or_init(|| { + let statistics_desc = get_statistics_desc(); + let mut statistics_index = std::array::from_fn(|_v| None); + + for (k, v) in statistics_desc.iter() { + statistics_index[v.index] = Some(k.clone()); + } + + Arc::new(statistics_index) + }) + .clone() +} + +pub fn get_statistics_desc() -> Arc> { + PROFILES_DESC.get_or_init(|| { + Arc::new(HashMap::from([ + (ProfileStatisticsName::CpuTime, ProfileDesc { + display_name: "cpu time", + desc: "The time spent to process in nanoseconds", + index: ProfileStatisticsName::CpuTime as usize, + }), + (ProfileStatisticsName::WaitTime, ProfileDesc { + display_name: "wait time", + desc: "The time spent to wait in nanoseconds, usually used to measure the time spent on waiting for I/O", + index: ProfileStatisticsName::WaitTime as usize, + }), + (ProfileStatisticsName::ExchangeRows, ProfileDesc { + display_name: "exchange rows", + desc: "The number of data rows exchange between nodes in cluster mode", + index: ProfileStatisticsName::ExchangeRows as usize, + }), + (ProfileStatisticsName::ExchangeBytes, ProfileDesc { + display_name: "exchange bytes", + desc: "The number of data bytes exchange between nodes in cluster mode", + index: ProfileStatisticsName::ExchangeBytes as usize, + }), + (ProfileStatisticsName::OutputRows, ProfileDesc { + display_name: "output rows", + desc: "The number of rows from the physical plan output to the next physical plan", + index: ProfileStatisticsName::OutputRows as usize, + }), + (ProfileStatisticsName::OutputBytes, ProfileDesc { + display_name: "output bytes", + desc: "The number of bytes from the physical plan output to the next physical plan", + index: ProfileStatisticsName::OutputBytes as usize, + }), + (ProfileStatisticsName::ScanBytes, ProfileDesc { + display_name: "bytes scanned", + desc: "The bytes scanned of query", + index: ProfileStatisticsName::ScanBytes as usize, + }), + (ProfileStatisticsName::ScanCacheBytes, ProfileDesc { + display_name: "bytes scanned from cache", + desc: "The bytes scanned from cache of query", + index: ProfileStatisticsName::ScanCacheBytes as usize, + }), + (ProfileStatisticsName::ScanPartitions, ProfileDesc { + display_name: "partitions scanned", + desc: "The partitions scanned of query", + index: ProfileStatisticsName::ScanPartitions as usize, + }), + (ProfileStatisticsName::SpillWriteCount, ProfileDesc { + display_name: "numbers spilled by write", + desc: "The number of spilled by write", + index: ProfileStatisticsName::SpillWriteCount as usize, + }), + (ProfileStatisticsName::SpillWriteBytes, ProfileDesc { + display_name: "bytes spilled by write", + desc: "The bytes spilled by write", + index: ProfileStatisticsName::SpillWriteBytes as usize, + }), + (ProfileStatisticsName::SpillWriteTime, ProfileDesc { + display_name: "spilled time by write", + desc: "The time spent to write spill in millisecond", + index: ProfileStatisticsName::SpillWriteTime as usize, + }), + (ProfileStatisticsName::SpillReadCount, ProfileDesc { + display_name: "numbers spilled by read", + desc: "The number of spilled by read", + index: ProfileStatisticsName::SpillReadCount as usize, + }), + (ProfileStatisticsName::SpillReadBytes, ProfileDesc { + display_name: "bytes spilled by read", + desc: "The bytes spilled by read", + index: ProfileStatisticsName::SpillReadBytes as usize, + }), + (ProfileStatisticsName::SpillReadTime, ProfileDesc { + display_name: "spilled time by read", + desc: "The time spent to read spill in millisecond", + index: ProfileStatisticsName::SpillReadTime as usize, + }), + ])) + }).clone() +} diff --git a/src/query/pipeline/sinks/src/async_sink.rs b/src/query/pipeline/sinks/src/async_sink.rs index be7ed120acea..47c170cb9ffb 100644 --- a/src/query/pipeline/sinks/src/async_sink.rs +++ b/src/query/pipeline/sinks/src/async_sink.rs @@ -22,7 +22,6 @@ use databend_common_base::runtime::TrySpawn; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; use databend_common_expression::DataBlock; -use databend_common_pipeline_core::processors::profile::Profile; use databend_common_pipeline_core::processors::Event; use databend_common_pipeline_core::processors::InputPort; use databend_common_pipeline_core::processors::Processor; @@ -47,8 +46,6 @@ pub trait AsyncSink: Send { fn details_status(&self) -> Option { None } - - fn record_profile(&self, _profile: &Profile) {} } pub struct AsyncSinker { @@ -173,8 +170,4 @@ impl Processor for AsyncSinker { fn details_status(&self) -> Option { self.inner.as_ref().and_then(|x| x.details_status()) } - - fn record_profile(&self, profile: &Profile) { - self.inner.as_ref().unwrap().record_profile(profile); - } } diff --git a/src/query/pipeline/sources/src/async_source.rs b/src/query/pipeline/sources/src/async_source.rs index ca1b651a0c2e..f3477f96219c 100644 --- a/src/query/pipeline/sources/src/async_source.rs +++ b/src/query/pipeline/sources/src/async_source.rs @@ -25,6 +25,8 @@ use databend_common_pipeline_core::processors::EventCause; use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::processors::Processor; use databend_common_pipeline_core::processors::ProcessorPtr; +use databend_common_pipeline_core::processors::Profile; +use databend_common_pipeline_core::processors::ProfileStatisticsName; #[async_trait::async_trait] pub trait AsyncSource: Send { @@ -120,6 +122,10 @@ impl Processor for AsyncSourcer { bytes: data_block.memory_size(), }; self.scan_progress.incr(&progress_values); + Profile::record_usize_profile( + ProfileStatisticsName::ScanBytes, + data_block.memory_size(), + ); } if !T::SKIP_EMPTY_DATA_BLOCK || !data_block.is_empty() { diff --git a/src/query/pipeline/sources/src/input_formats/source_aligner.rs b/src/query/pipeline/sources/src/input_formats/source_aligner.rs index a9c3fe92461b..6c23694c08df 100644 --- a/src/query/pipeline/sources/src/input_formats/source_aligner.rs +++ b/src/query/pipeline/sources/src/input_formats/source_aligner.rs @@ -26,6 +26,8 @@ use databend_common_pipeline_core::processors::Event; use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::processors::Processor; use databend_common_pipeline_core::processors::ProcessorPtr; +use databend_common_pipeline_core::processors::Profile; +use databend_common_pipeline_core::processors::ProfileStatisticsName; use log::debug; use crate::input_formats::input_pipeline::AligningStateTrait; @@ -131,6 +133,10 @@ impl Processor for Aligner { self.state = None; self.batch_rx = None; } + Profile::record_usize_profile( + ProfileStatisticsName::ScanBytes, + process_values.bytes, + ); self.ctx.scan_progress.incr(&process_values); Ok(()) } diff --git a/src/query/pipeline/sources/src/sync_source.rs b/src/query/pipeline/sources/src/sync_source.rs index c35907efff3d..3f3bcdc2348c 100644 --- a/src/query/pipeline/sources/src/sync_source.rs +++ b/src/query/pipeline/sources/src/sync_source.rs @@ -24,6 +24,8 @@ use databend_common_pipeline_core::processors::Event; use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::processors::Processor; use databend_common_pipeline_core::processors::ProcessorPtr; +use databend_common_pipeline_core::processors::Profile; +use databend_common_pipeline_core::processors::ProfileStatisticsName; /// Synchronized source. such as: /// - Memory storage engine. @@ -106,6 +108,10 @@ impl Processor for SyncSourcer { bytes: data_block.memory_size(), }; self.scan_progress.incr(&progress_values); + Profile::record_usize_profile( + ProfileStatisticsName::ScanBytes, + data_block.memory_size(), + ); self.generated_data = Some(data_block) } }; diff --git a/src/query/pipeline/transforms/Cargo.toml b/src/query/pipeline/transforms/Cargo.toml index 3fff363caa10..9637be9611d8 100644 --- a/src/query/pipeline/transforms/Cargo.toml +++ b/src/query/pipeline/transforms/Cargo.toml @@ -13,7 +13,6 @@ databend-common-base = { path = "../../../common/base" } databend-common-exception = { path = "../../../common/exception" } databend-common-expression = { path = "../../expression" } databend-common-pipeline-core = { path = "../core" } -databend-common-profile = { path = "../../profile" } async-backtrace = { workspace = true } async-trait = { workspace = true } diff --git a/src/query/pipeline/transforms/src/processors/mod.rs b/src/query/pipeline/transforms/src/processors/mod.rs index 0ffd8d9a2580..a9c76cf3f0a3 100644 --- a/src/query/pipeline/transforms/src/processors/mod.rs +++ b/src/query/pipeline/transforms/src/processors/mod.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -mod profile_wrapper; mod transforms; -pub use profile_wrapper::*; pub use transforms::*; diff --git a/src/query/pipeline/transforms/src/processors/profile_wrapper.rs b/src/query/pipeline/transforms/src/processors/profile_wrapper.rs deleted file mode 100644 index 51cc4388ab27..000000000000 --- a/src/query/pipeline/transforms/src/processors/profile_wrapper.rs +++ /dev/null @@ -1,280 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; -use std::time::Instant; - -use databend_common_exception::Result; -use databend_common_expression::DataBlock; -use databend_common_pipeline_core::processors::Event; -use databend_common_pipeline_core::processors::InputPort; -use databend_common_pipeline_core::processors::OutputPort; -use databend_common_pipeline_core::processors::Processor; -use databend_common_profile::ProcessorProfile; -use databend_common_profile::SharedProcessorProfiles; - -use crate::processors::transforms::Transform; -use crate::processors::transforms::Transformer; - -/// A profile wrapper for `Processor` trait. -/// This wrapper will record the time cost of each processor. -/// But because of the limitation of `Processor` trait, -/// we can't get the number of rows processed by the processor. -pub struct ProcessorProfileWrapper { - inner: T, - prof_id: u32, - proc_profs: SharedProcessorProfiles, - - prof: ProcessorProfile, -} - -impl ProcessorProfileWrapper -where T: Processor + 'static -{ - pub fn create( - inner: T, - prof_id: u32, - proc_profs: SharedProcessorProfiles, - ) -> Box { - Box::new(Self { - inner, - prof_id, - proc_profs, - prof: ProcessorProfile::default(), - }) - } -} - -#[async_trait::async_trait] -impl Processor for ProcessorProfileWrapper -where T: Processor + 'static -{ - fn name(&self) -> String { - self.inner.name() - } - - fn as_any(&mut self) -> &mut dyn std::any::Any { - self - } - - fn event(&mut self) -> Result { - match self.inner.event()? { - Event::Finished => { - self.proc_profs - .lock() - .unwrap() - .update(self.prof_id, self.prof); - Ok(Event::Finished) - } - v => Ok(v), - } - } - - fn process(&mut self) -> Result<()> { - let instant = Instant::now(); - self.inner.process()?; - let elapsed = instant.elapsed(); - self.prof = self.prof - + ProcessorProfile { - cpu_time: elapsed, - ..Default::default() - }; - Ok(()) - } - - #[async_backtrace::framed] - async fn async_process(&mut self) -> Result<()> { - let instant = Instant::now(); - self.inner.async_process().await?; - let elapsed = instant.elapsed(); - self.prof = self.prof - + ProcessorProfile { - wait_time: elapsed, - ..Default::default() - }; - Ok(()) - } -} - -/// A profile wrapper for `Transform` trait. -/// This wrapper will record the time cost and the information -/// about the number of rows processed by the processor. -pub struct TransformProfileWrapper { - inner: T, - prof_id: u32, - proc_profs: SharedProcessorProfiles, - - prof: ProcessorProfile, -} - -impl TransformProfileWrapper -where T: Transform + 'static -{ - pub fn create( - inner: T, - input_port: Arc, - output_port: Arc, - prof_id: u32, - proc_profs: SharedProcessorProfiles, - ) -> Box { - Box::new(Transformer::create(input_port, output_port, Self { - inner, - prof_id, - proc_profs, - prof: ProcessorProfile::default(), - })) - } -} - -impl Transform for TransformProfileWrapper -where T: Transform + 'static -{ - const NAME: &'static str = "TransformProfileWrapper"; - - fn transform(&mut self, data: DataBlock) -> Result { - let input_rows = data.num_rows(); - let input_bytes = data.memory_size(); - - let instant = Instant::now(); - let res = self.inner.transform(data)?; - let elapsed = instant.elapsed(); - self.prof = self.prof - + ProcessorProfile { - cpu_time: elapsed, - wait_time: Default::default(), - input_rows, - input_bytes, - output_rows: res.num_rows(), - output_bytes: res.memory_size(), - }; - Ok(res) - } - - fn on_finish(&mut self) -> Result<()> { - self.proc_profs - .lock() - .unwrap() - .update(self.prof_id, self.prof); - Ok(()) - } -} - -/// A stub transform for collecting profile information -/// at some point of the pipeline. -/// For example, we can profiling the output data of a -/// processor by adding a [`ProfileStub`] after it. -/// [`ProfileStub`] will pass through all the data without -/// any modification. -#[allow(clippy::type_complexity)] -pub struct ProfileStub { - prof_id: u32, - proc_profs: SharedProcessorProfiles, - prof: ProcessorProfile, - - /// Callback function for processing the start event. - on_start: Box ProcessorProfile + Send + Sync + 'static>, - /// Callback function for processing the input data. - on_process: - Box ProcessorProfile + Send + Sync + 'static>, - /// Callback function for processing the finish event. - on_finish: Box ProcessorProfile + Send + Sync + 'static>, -} - -impl ProfileStub { - pub fn new(prof_id: u32, proc_profs: SharedProcessorProfiles) -> Self { - Self { - prof_id, - proc_profs, - prof: Default::default(), - on_start: Box::new(|_| Default::default()), - on_process: Box::new(|_, _| Default::default()), - on_finish: Box::new(|_| Default::default()), - } - } - - /// Create a new [`ProfileStub`] with `on_start` callback. - /// The previous callback will be called before the new one. - pub fn on_start( - self, - f: impl Fn(&ProcessorProfile) -> ProcessorProfile + Sync + Send + 'static, - ) -> Self { - Self { - on_start: Box::new(move |prof| f(&(self.on_start)(prof))), - ..self - } - } - - /// Create a new [`ProfileStub`] with `on_process` callback. - /// The previous callback will be called before the new one. - pub fn on_process( - self, - f: impl Fn(&DataBlock, &ProcessorProfile) -> ProcessorProfile + Sync + Send + 'static, - ) -> Self { - Self { - on_process: Box::new(move |data, prof| f(data, &(self.on_process)(data, prof))), - ..self - } - } - - /// Create a new [`ProfileStub`] with `on_finish` callback. - /// The previous callback will be called before the new one. - pub fn on_finish( - self, - f: impl Fn(&ProcessorProfile) -> ProcessorProfile + Sync + Send + 'static, - ) -> Self { - Self { - on_finish: Box::new(move |prof| f(&(self.on_finish)(prof))), - ..self - } - } - - /// Accumulate the number of output rows. - pub fn accumulate_output_rows(self) -> Self { - self.on_process(|data, prof| ProcessorProfile { - output_rows: prof.output_rows + data.num_rows(), - ..*prof - }) - } - - /// Accumulate the number of output bytes. - pub fn accumulate_output_bytes(self) -> Self { - self.on_process(|data, prof| ProcessorProfile { - output_bytes: prof.output_bytes + data.memory_size(), - ..*prof - }) - } -} - -impl Transform for ProfileStub { - const NAME: &'static str = "ProfileStub"; - - fn transform(&mut self, data: DataBlock) -> Result { - self.prof = self.prof + (self.on_process)(&data, &self.prof); - Ok(data) - } - - fn on_start(&mut self) -> Result<()> { - self.prof = self.prof + (self.on_start)(&self.prof); - Ok(()) - } - - fn on_finish(&mut self) -> Result<()> { - self.prof = self.prof + (self.on_finish)(&self.prof); - self.proc_profs - .lock() - .unwrap() - .update(self.prof_id, self.prof); - Ok(()) - } -} diff --git a/src/query/pipeline/transforms/src/processors/transforms/transform.rs b/src/query/pipeline/transforms/src/processors/transforms/transform.rs index 91c8cde21e00..ae6ec7a0b785 100644 --- a/src/query/pipeline/transforms/src/processors/transforms/transform.rs +++ b/src/query/pipeline/transforms/src/processors/transforms/transform.rs @@ -21,7 +21,6 @@ use databend_common_exception::Result; use databend_common_expression::BlockMetaInfo; use databend_common_expression::BlockMetaInfoDowncast; use databend_common_expression::DataBlock; -use databend_common_pipeline_core::processors::profile::Profile; use databend_common_pipeline_core::processors::Event; use databend_common_pipeline_core::processors::InputPort; use databend_common_pipeline_core::processors::OutputPort; @@ -45,8 +44,6 @@ pub trait Transform: Send { fn on_finish(&mut self) -> Result<()> { Ok(()) } - - fn record_profile(&self, _: &Profile) {} } pub struct Transformer { @@ -125,10 +122,6 @@ impl Processor for Transformer { Ok(()) } - - fn record_profile(&self, profile: &Profile) { - self.transform.record_profile(profile) - } } impl Transformer { diff --git a/src/query/pipeline/transforms/src/processors/transforms/transform_multi_sort_merge.rs b/src/query/pipeline/transforms/src/processors/transforms/transform_multi_sort_merge.rs index 51f995edca21..8b227df8c583 100644 --- a/src/query/pipeline/transforms/src/processors/transforms/transform_multi_sort_merge.rs +++ b/src/query/pipeline/transforms/src/processors/transforms/transform_multi_sort_merge.rs @@ -38,14 +38,12 @@ use databend_common_pipeline_core::processors::ProcessorPtr; use databend_common_pipeline_core::Pipe; use databend_common_pipeline_core::PipeItem; use databend_common_pipeline_core::Pipeline; -use databend_common_profile::SharedProcessorProfiles; use super::sort::HeapMerger; use super::sort::Rows; use super::sort::SimpleRows; use super::sort::SortedStream; use crate::processors::sort::utils::ORDER_COL_NAME; -use crate::processors::ProcessorProfileWrapper; pub fn try_add_multi_sort_merge( pipeline: &mut Pipeline, @@ -53,7 +51,6 @@ pub fn try_add_multi_sort_merge( block_size: usize, limit: Option, sort_columns_descriptions: Arc>, - prof_info: Option<(u32, SharedProcessorProfiles)>, remove_order_col: bool, ) -> Result<()> { debug_assert!(if !remove_order_col { @@ -75,7 +72,8 @@ pub fn try_add_multi_sort_merge( inputs_port.push(InputPort::create()); } let output_port = OutputPort::create(); - let processor = create_processor( + + let processor = ProcessorPtr::create(create_processor( inputs_port.clone(), output_port.clone(), schema, @@ -83,17 +81,7 @@ pub fn try_add_multi_sort_merge( limit, sort_columns_descriptions, remove_order_col, - )?; - - let processor = if let Some((plan_id, prof)) = &prof_info { - ProcessorPtr::create(ProcessorProfileWrapper::create( - processor, - *plan_id, - prof.clone(), - )) - } else { - ProcessorPtr::create(processor) - }; + )?); pipeline.add_pipe(Pipe::create(inputs_port.len(), 1, vec![PipeItem::create( processor, diff --git a/src/query/profile/Cargo.toml b/src/query/profile/Cargo.toml deleted file mode 100644 index ed81eeee1078..000000000000 --- a/src/query/profile/Cargo.toml +++ /dev/null @@ -1,16 +0,0 @@ -[package] -name = "databend-common-profile" -version = { workspace = true } -authors = { workspace = true } -license = { workspace = true } -publish = { workspace = true } -edition = { workspace = true } - -[lib] -doctest = false -test = false - -[dependencies] -databend-common-base = { path = "../../common/base" } - -dashmap = { workspace = true } diff --git a/src/query/profile/src/mgr.rs b/src/query/profile/src/mgr.rs deleted file mode 100644 index 9f3e29f02af7..000000000000 --- a/src/query/profile/src/mgr.rs +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::VecDeque; -use std::sync::Arc; -use std::sync::Mutex; - -use dashmap::mapref::entry::Entry; -use dashmap::DashMap; -use databend_common_base::base::GlobalInstance; - -use crate::prof::QueryProfile; - -/// Default capacity of the LRU cache of query profiles. -const DEFAULT_QUERY_PROFILE_LIMIT: usize = 20; - -/// Manager of query profiling. -/// This is a singleton in every databend-query process. -pub struct QueryProfileManager { - /// The LRU cache of query profiles. - profiles: Lru, -} - -impl QueryProfileManager { - fn new(capacity: usize) -> Self { - QueryProfileManager { - profiles: Lru::new(capacity), - } - } - - pub fn init() { - GlobalInstance::set(Arc::new(Self::new(DEFAULT_QUERY_PROFILE_LIMIT))); - } - - pub fn instance() -> Arc { - GlobalInstance::get() - } - - /// Try to get the query profile by query ID. - pub fn get(&self, query_id: &str) -> Option> { - self.profiles.get(query_id) - } - - /// Inserts a query profile. - pub fn insert(&self, query_profile: Arc) { - self.profiles - .insert(query_profile.query_id.clone(), query_profile); - } - - /// Lists all query profiles. - pub fn list_all(&self) -> Vec> { - self.profiles.list_all() - } -} - -impl Default for QueryProfileManager { - fn default() -> Self { - QueryProfileManager::new(DEFAULT_QUERY_PROFILE_LIMIT) - } -} - -/// An LRU cache of query profiles. The expired query profiles -/// will be removed. -struct Lru { - /// The maximum number of query profiles to keep in memory. - /// If the number of query profiles exceeds this number, - /// the oldest one will be removed. - capacity: usize, - - /// The query profiles. - /// The key is the query ID. - /// The value is the query profile. - profiles: DashMap>, - - /// An LRU list of query IDs. - lru: Mutex>, -} - -impl Lru { - /// Creates a new LRU cache. - pub fn new(capacity: usize) -> Self { - Lru { - capacity, - profiles: DashMap::with_capacity(capacity), - lru: Mutex::new(VecDeque::with_capacity(capacity)), - } - } - - /// Gets the query profile by the query ID. - /// Notice that this method required to acquire the shared lock of the LRU list. - /// So don't call this method when the lock is already acquired. - pub fn get(&self, query_id: &str) -> Option> { - self.profiles.get(query_id).map(|v| v.value().clone()) - } - - /// Inserts a query profile. - /// This operation is thread-safe. - pub fn insert(&self, query_id: String, query_profile: Arc) { - // Lock the LRU list to ensure the consistency between the LRU list and the query profiles. - let mut lru = self.lru.lock().unwrap(); - - if let Entry::Occupied(mut prof) = self.profiles.entry(query_id.clone()) { - prof.insert(query_profile); - return; - } - - if self.profiles.len() >= self.capacity { - if let Some(query_id) = lru.pop_front() { - self.profiles.remove(&query_id); - } - } - - self.profiles.insert(query_id.clone(), query_profile); - lru.push_back(query_id); - } - - /// Lists all query profiles. - /// Notice that this method required to acquire the shared lock of the LRU list. - /// So don't call this method when the lock is already acquired. - pub fn list_all(&self) -> Vec> { - self.profiles.iter().map(|v| v.value().clone()).collect() - } -} diff --git a/src/query/profile/src/proc.rs b/src/query/profile/src/proc.rs deleted file mode 100644 index bd91dc88fabf..000000000000 --- a/src/query/profile/src/proc.rs +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::HashMap; -use std::fmt::Debug; -use std::sync::Arc; -use std::sync::Mutex; -use std::time::Duration; - -pub type SharedProcessorProfiles = Arc>>; - -/// Execution profile information of a `Processor`. -/// Can be merged with other `ProcessorProfile` using -/// `add` or `+` operator. -/// -/// # Example -/// ``` -/// let profile1 = ProcessorProfile::default(); -/// let profile2 = ProcessorProfile::default(); -/// let profile = profile1 + profile2; -/// ``` -#[derive(Default, Clone, Copy, Debug)] -pub struct ProcessorProfile { - /// The time spent to process in nanoseconds - pub cpu_time: Duration, - /// The time spent to wait in nanoseconds, usually used to - /// measure the time spent on waiting for I/O - pub wait_time: Duration, - /// Row count of the input data - pub input_rows: usize, - /// Byte size of the input data - pub input_bytes: usize, - /// Row count of the output data - pub output_rows: usize, - /// Byte size of the output data - pub output_bytes: usize, -} - -impl std::ops::Add for ProcessorProfile { - type Output = Self; - - fn add(self, rhs: Self) -> Self::Output { - Self { - cpu_time: self.cpu_time + rhs.cpu_time, - wait_time: self.wait_time + rhs.wait_time, - input_rows: self.input_rows + rhs.input_rows, - input_bytes: self.input_bytes + rhs.input_bytes, - output_rows: self.output_rows + rhs.output_rows, - output_bytes: self.output_bytes + rhs.output_bytes, - } - } -} - -#[derive(Default)] -pub struct ProcessorProfiles { - spans: HashMap, -} - -impl ProcessorProfiles -where K: std::hash::Hash + Eq + PartialEq + Clone + Debug -{ - pub fn update(&mut self, key: K, span: ProcessorProfile) { - let entry = self.spans.entry(key).or_default(); - *entry = *entry + span; - } - - pub fn iter(&self) -> impl Iterator { - self.spans.iter() - } - - pub fn get(&self, k: &K) -> Option<&ProcessorProfile> { - self.spans.get(k) - } -} diff --git a/src/query/profile/src/prof.rs b/src/query/profile/src/prof.rs deleted file mode 100644 index b42e7852076f..000000000000 --- a/src/query/profile/src/prof.rs +++ /dev/null @@ -1,217 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::fmt::Display; -use std::fmt::Formatter; -use std::time::Duration; - -use crate::ProcessorProfile; - -#[derive(Debug, Clone)] -pub struct QueryProfile { - /// Query ID of the query profile - pub query_id: String, - - /// Flattened plan node profiles - pub operator_profiles: Vec, -} - -impl QueryProfile { - pub fn new(query_id: String, operator_profiles: Vec) -> Self { - QueryProfile { - query_id, - operator_profiles, - } - } -} - -#[derive(Debug, Clone)] -pub struct OperatorProfile { - /// ID of the plan node - pub id: u32, - - /// Type of the plan operator, e.g. `HashJoin` - pub operator_type: OperatorType, - - /// IDs of the children plan nodes - pub children: Vec, - - /// The execution information of the plan operator - pub execution_info: OperatorExecutionInfo, - - /// Attribute of the plan operator - pub attribute: OperatorAttribute, -} - -#[derive(Debug, Clone)] -pub enum OperatorType { - Join, - Aggregate, - AggregateExpand, - Filter, - ProjectSet, - EvalScalar, - Limit, - TableScan, - CteScan, - Sort, - UnionAll, - Project, - Window, - RowFetch, - Exchange, - Insert, - ConstantTableScan, - Udf, -} - -impl Display for OperatorType { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - OperatorType::Join => write!(f, "Join"), - OperatorType::Aggregate => write!(f, "Aggregate"), - OperatorType::AggregateExpand => write!(f, "AggregateExpand"), - OperatorType::Filter => write!(f, "Filter"), - OperatorType::ProjectSet => write!(f, "ProjectSet"), - OperatorType::EvalScalar => write!(f, "EvalScalar"), - OperatorType::Limit => write!(f, "Limit"), - OperatorType::TableScan => write!(f, "TableScan"), - OperatorType::Sort => write!(f, "Sort"), - OperatorType::UnionAll => write!(f, "UnionAll"), - OperatorType::Project => write!(f, "Project"), - OperatorType::Window => write!(f, "Window"), - OperatorType::RowFetch => write!(f, "RowFetch"), - OperatorType::Exchange => write!(f, "Exchange"), - OperatorType::Insert => write!(f, "Insert"), - OperatorType::CteScan => write!(f, "CteScan"), - OperatorType::ConstantTableScan => write!(f, "ConstantTableScan"), - OperatorType::Udf => write!(f, "Udf"), - } - } -} - -#[derive(Debug, Clone, Default)] -pub struct OperatorExecutionInfo { - pub process_time: Duration, - pub input_rows: usize, - pub input_bytes: usize, - pub output_rows: usize, - pub output_bytes: usize, -} - -impl From for OperatorExecutionInfo { - fn from(value: ProcessorProfile) -> Self { - (&value).into() - } -} - -impl From<&ProcessorProfile> for OperatorExecutionInfo { - fn from(value: &ProcessorProfile) -> Self { - OperatorExecutionInfo { - process_time: value.cpu_time, - input_rows: value.input_rows, - input_bytes: value.input_bytes, - output_rows: value.output_rows, - output_bytes: value.output_bytes, - } - } -} - -#[derive(Debug, Clone)] -pub enum OperatorAttribute { - Join(JoinAttribute), - Aggregate(AggregateAttribute), - AggregateExpand(AggregateExpandAttribute), - Filter(FilterAttribute), - EvalScalar(EvalScalarAttribute), - ProjectSet(ProjectSetAttribute), - Limit(LimitAttribute), - TableScan(TableScanAttribute), - Sort(SortAttribute), - Window(WindowAttribute), - Exchange(ExchangeAttribute), - CteScan(CteScanAttribute), - Udf(UdfAttribute), - Empty, -} - -#[derive(Debug, Clone)] -pub struct JoinAttribute { - pub join_type: String, - pub equi_conditions: String, - pub non_equi_conditions: String, -} - -#[derive(Debug, Clone)] -pub struct AggregateAttribute { - pub group_keys: String, - pub functions: String, -} - -#[derive(Debug, Clone)] -pub struct AggregateExpandAttribute { - pub group_keys: String, - pub aggr_exprs: String, -} - -#[derive(Debug, Clone)] -pub struct EvalScalarAttribute { - pub scalars: String, -} - -#[derive(Debug, Clone)] -pub struct ProjectSetAttribute { - pub functions: String, -} - -#[derive(Debug, Clone)] -pub struct FilterAttribute { - pub predicate: String, -} - -#[derive(Debug, Clone)] -pub struct LimitAttribute { - pub limit: usize, - pub offset: usize, -} - -#[derive(Debug, Clone)] -pub struct SortAttribute { - pub sort_keys: String, -} - -#[derive(Debug, Clone)] -pub struct TableScanAttribute { - pub qualified_name: String, -} - -#[derive(Debug, Clone)] -pub struct CteScanAttribute { - pub cte_idx: usize, -} - -#[derive(Debug, Clone)] -pub struct WindowAttribute { - pub functions: String, -} - -#[derive(Debug, Clone)] -pub struct ExchangeAttribute { - pub exchange_mode: String, -} - -#[derive(Debug, Clone)] -pub struct UdfAttribute { - pub scalars: String, -} diff --git a/src/query/service/Cargo.toml b/src/query/service/Cargo.toml index f2d59c499aa2..8b4e0cadd97e 100644 --- a/src/query/service/Cargo.toml +++ b/src/query/service/Cargo.toml @@ -61,7 +61,6 @@ databend-common-pipeline-core = { path = "../pipeline/core" } databend-common-pipeline-sinks = { path = "../pipeline/sinks" } databend-common-pipeline-sources = { path = "../pipeline/sources" } databend-common-pipeline-transforms = { path = "../pipeline/transforms" } -databend-common-profile = { path = "../profile" } databend-common-settings = { path = "../settings" } databend-common-sharing = { path = "../sharing" } databend-common-sql = { path = "../sql" } diff --git a/src/query/service/src/api/rpc/exchange/exchange_manager.rs b/src/query/service/src/api/rpc/exchange/exchange_manager.rs index 2a9512bd26ce..f6e7ca1e2a3e 100644 --- a/src/query/service/src/api/rpc/exchange/exchange_manager.rs +++ b/src/query/service/src/api/rpc/exchange/exchange_manager.rs @@ -31,7 +31,6 @@ use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_grpc::ConnectionFactory; use databend_common_pipeline_core::processors::profile::Profile; -use databend_common_profile::SharedProcessorProfiles; use databend_common_sql::executor::PhysicalPlan; use minitrace::prelude::*; use parking_lot::Mutex; @@ -231,9 +230,7 @@ impl DataExchangeManager { "Query {} not found in cluster.", packet.query_id ))), - Some(query_coordinator) => { - query_coordinator.prepare_pipeline(ctx, packet.enable_profiling, packet) - } + Some(query_coordinator) => query_coordinator.prepare_pipeline(ctx, packet), } } @@ -300,7 +297,6 @@ impl DataExchangeManager { pub async fn commit_actions( &self, ctx: Arc, - enable_profiling: bool, actions: QueryFragmentsActions, ) -> Result { let settings = ctx.get_settings(); @@ -327,7 +323,7 @@ impl DataExchangeManager { self.init_query_fragments_plan(&ctx, &local_query_fragments_plan_packet)?; // Get local pipeline of local task - let build_res = self.get_root_pipeline(ctx, enable_profiling, root_actions)?; + let build_res = self.get_root_pipeline(ctx, root_actions)?; actions .get_execute_partial_query_packets()? @@ -339,7 +335,6 @@ impl DataExchangeManager { fn get_root_pipeline( &self, ctx: Arc, - enable_profiling: bool, root_actions: &QueryFragmentActions, ) -> Result { let query_id = ctx.get_id(); @@ -353,12 +348,8 @@ impl DataExchangeManager { Some(query_coordinator) => { assert!(query_coordinator.fragment_exchanges.is_empty()); let injector = DefaultExchangeInjector::create(); - let mut build_res = query_coordinator.subscribe_fragment( - &ctx, - enable_profiling, - fragment_id, - injector, - )?; + let mut build_res = + query_coordinator.subscribe_fragment(&ctx, fragment_id, injector)?; let exchanges = std::mem::take(&mut query_coordinator.statistics_exchanges); let statistics_receiver = StatisticsReceiver::spawn_receiver(&ctx, exchanges)?; @@ -415,7 +406,6 @@ impl DataExchangeManager { &self, query_id: &str, fragment_id: usize, - enable_profiling: bool, injector: Arc, ) -> Result { let queries_coordinator_guard = self.queries_coordinator.lock(); @@ -431,12 +421,7 @@ impl DataExchangeManager { .query_ctx .clone(); - query_coordinator.subscribe_fragment( - &query_ctx, - enable_profiling, - fragment_id, - injector, - ) + query_coordinator.subscribe_fragment(&query_ctx, fragment_id, injector) } } } @@ -599,7 +584,6 @@ impl QueryCoordinator { pub fn prepare_pipeline( &mut self, ctx: &Arc, - enable_profiling: bool, packet: &QueryFragmentsPlanPacket, ) -> Result<()> { self.info = Some(QueryInfo { @@ -619,7 +603,7 @@ impl QueryCoordinator { for fragment in &packet.fragments { let fragment_id = fragment.fragment_id; if let Some(coordinator) = self.fragments_coordinator.get_mut(&fragment_id) { - coordinator.prepare_pipeline(ctx.clone(), enable_profiling)?; + coordinator.prepare_pipeline(ctx.clone())?; } } @@ -629,14 +613,13 @@ impl QueryCoordinator { pub fn subscribe_fragment( &mut self, ctx: &Arc, - enable_profiling: bool, fragment_id: usize, injector: Arc, ) -> Result { // Merge pipelines if exist locally pipeline if let Some(mut fragment_coordinator) = self.fragments_coordinator.remove(&fragment_id) { let info = self.info.as_ref().expect("QueryInfo is none"); - fragment_coordinator.prepare_pipeline(ctx.clone(), enable_profiling)?; + fragment_coordinator.prepare_pipeline(ctx.clone())?; if fragment_coordinator.pipeline_build_res.is_none() { return Err(ErrorCode::Internal( @@ -843,11 +826,7 @@ impl FragmentCoordinator { Err(ErrorCode::Internal("Cannot find data exchange.")) } - pub fn prepare_pipeline( - &mut self, - ctx: Arc, - enable_profiling: bool, - ) -> Result<()> { + pub fn prepare_pipeline(&mut self, ctx: Arc) -> Result<()> { if !self.initialized { self.initialized = true; @@ -857,8 +836,6 @@ impl FragmentCoordinator { pipeline_ctx.get_function_context()?, pipeline_ctx.get_settings(), pipeline_ctx, - enable_profiling, - SharedProcessorProfiles::default(), vec![], ); diff --git a/src/query/service/src/api/rpc/exchange/exchange_sink_writer.rs b/src/query/service/src/api/rpc/exchange/exchange_sink_writer.rs index 4de8c9941cc6..90c8ed4093ed 100644 --- a/src/query/service/src/api/rpc/exchange/exchange_sink_writer.rs +++ b/src/query/service/src/api/rpc/exchange/exchange_sink_writer.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::atomic::AtomicUsize; -use std::sync::atomic::Ordering; use std::sync::Arc; use databend_common_catalog::table_context::TableContext; @@ -26,6 +24,7 @@ use databend_common_pipeline_core::processors::profile::Profile; use databend_common_pipeline_core::processors::InputPort; use databend_common_pipeline_core::processors::Processor; use databend_common_pipeline_core::processors::ProcessorPtr; +use databend_common_pipeline_core::processors::ProfileStatisticsName; use databend_common_pipeline_core::PipeItem; use databend_common_pipeline_sinks::AsyncSink; use databend_common_pipeline_sinks::AsyncSinker; @@ -41,7 +40,6 @@ pub struct ExchangeWriterSink { source: String, destination: String, fragment: usize, - exchange_bytes: AtomicUsize, } impl ExchangeWriterSink { @@ -58,7 +56,6 @@ impl ExchangeWriterSink { source: source_id.to_string(), destination: destination_id.to_string(), fragment: fragment_id, - exchange_bytes: AtomicUsize::new(0), }) } } @@ -101,19 +98,12 @@ impl AsyncSink for ExchangeWriterSink { { metrics_inc_exchange_write_count(count); metrics_inc_exchange_write_bytes(bytes); - self.exchange_bytes.fetch_add(bytes, Ordering::Relaxed); + Profile::record_usize_profile(ProfileStatisticsName::ExchangeBytes, bytes); } Ok(false) } - fn record_profile(&self, profile: &Profile) { - profile.exchange_bytes.fetch_add( - self.exchange_bytes.swap(0, Ordering::Relaxed), - Ordering::Relaxed, - ); - } - fn details_status(&self) -> Option { #[derive(Debug)] #[allow(dead_code)] diff --git a/src/query/service/src/api/rpc/exchange/serde/exchange_serializer.rs b/src/query/service/src/api/rpc/exchange/serde/exchange_serializer.rs index 9599311c0fbf..00f645232d78 100644 --- a/src/query/service/src/api/rpc/exchange/serde/exchange_serializer.rs +++ b/src/query/service/src/api/rpc/exchange/serde/exchange_serializer.rs @@ -14,8 +14,6 @@ use std::fmt::Debug; use std::fmt::Formatter; -use std::sync::atomic::AtomicUsize; -use std::sync::atomic::Ordering; use std::sync::Arc; use databend_common_arrow::arrow::chunk::Chunk; @@ -32,10 +30,11 @@ use databend_common_expression::BlockMetaInfoPtr; use databend_common_expression::DataBlock; use databend_common_io::prelude::bincode_serialize_into_buf; use databend_common_io::prelude::BinaryWrite; -use databend_common_pipeline_core::processors::profile::Profile; use databend_common_pipeline_core::processors::InputPort; use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::processors::ProcessorPtr; +use databend_common_pipeline_core::processors::Profile; +use databend_common_pipeline_core::processors::ProfileStatisticsName; use databend_common_pipeline_transforms::processors::BlockMetaTransform; use databend_common_pipeline_transforms::processors::BlockMetaTransformer; use databend_common_pipeline_transforms::processors::Transform; @@ -99,7 +98,6 @@ impl BlockMetaInfo for ExchangeSerializeMeta { pub struct TransformExchangeSerializer { options: WriteOptions, ipc_fields: Vec, - exchange_rows: AtomicUsize, } impl TransformExchangeSerializer { @@ -125,7 +123,6 @@ impl TransformExchangeSerializer { TransformExchangeSerializer { ipc_fields, options: WriteOptions { compression }, - exchange_rows: AtomicUsize::new(0), }, ))) } @@ -135,17 +132,9 @@ impl Transform for TransformExchangeSerializer { const NAME: &'static str = "ExchangeSerializerTransform"; fn transform(&mut self, data_block: DataBlock) -> Result { - self.exchange_rows - .fetch_add(data_block.num_rows(), Ordering::Relaxed); + Profile::record_usize_profile(ProfileStatisticsName::ExchangeRows, data_block.num_rows()); serialize_block(0, data_block, &self.ipc_fields, &self.options) } - - fn record_profile(&self, profile: &Profile) { - profile.exchange_rows.fetch_add( - self.exchange_rows.swap(0, Ordering::Relaxed), - Ordering::Relaxed, - ); - } } pub struct TransformScatterExchangeSerializer { diff --git a/src/query/service/src/api/rpc/packets/packet_executor.rs b/src/query/service/src/api/rpc/packets/packet_executor.rs index 77272cee6646..430a6630ecfd 100644 --- a/src/query/service/src/api/rpc/packets/packet_executor.rs +++ b/src/query/service/src/api/rpc/packets/packet_executor.rs @@ -38,8 +38,6 @@ pub struct QueryFragmentsPlanPacket { pub changed_settings: Arc, // We send nodes info for each node. This is a bad choice pub executors_info: HashMap>, - /// Enable profiling for this query - pub enable_profiling: bool, } impl QueryFragmentsPlanPacket { @@ -52,7 +50,6 @@ impl QueryFragmentsPlanPacket { executors_info: HashMap>, changed_settings: Arc, request_executor: String, - enable_profiling: bool, ) -> QueryFragmentsPlanPacket { QueryFragmentsPlanPacket { query_id, @@ -62,7 +59,6 @@ impl QueryFragmentsPlanPacket { executors_info, changed_settings, request_executor, - enable_profiling, } } } diff --git a/src/query/service/src/databases/system/system_database.rs b/src/query/service/src/databases/system/system_database.rs index 328b118121b1..04b686a5d6db 100644 --- a/src/query/service/src/databases/system/system_database.rs +++ b/src/query/service/src/databases/system/system_database.rs @@ -46,8 +46,6 @@ use databend_common_storages_system::ProcessesTable; use databend_common_storages_system::ProcessorProfileTable; use databend_common_storages_system::QueryCacheTable; use databend_common_storages_system::QueryLogTable; -use databend_common_storages_system::QueryProfileTable; -use databend_common_storages_system::QuerySummaryTable; use databend_common_storages_system::RolesTable; use databend_common_storages_system::SettingsTable; use databend_common_storages_system::StagesTable; @@ -117,12 +115,10 @@ impl SystemDatabase { TableFunctionsTable::create(sys_db_meta.next_table_id()), CachesTable::create(sys_db_meta.next_table_id()), IndexesTable::create(sys_db_meta.next_table_id()), - QueryProfileTable::create(sys_db_meta.next_table_id()), BackgroundTaskTable::create(sys_db_meta.next_table_id()), BackgroundJobTable::create(sys_db_meta.next_table_id()), BacktraceTable::create(sys_db_meta.next_table_id()), TempFilesTable::create(sys_db_meta.next_table_id()), - QuerySummaryTable::create(sys_db_meta.next_table_id()), TasksTable::create(sys_db_meta.next_table_id()), TaskHistoryTable::create(sys_db_meta.next_table_id()), ProcessorProfileTable::create(sys_db_meta.next_table_id()), diff --git a/src/query/service/src/global_services.rs b/src/query/service/src/global_services.rs index 72061499d66d..3dbe19e6284d 100644 --- a/src/query/service/src/global_services.rs +++ b/src/query/service/src/global_services.rs @@ -25,7 +25,6 @@ use databend_common_config::GlobalConfig; use databend_common_config::InnerConfig; use databend_common_exception::Result; use databend_common_meta_app::schema::CatalogType; -use databend_common_profile::QueryProfileManager; use databend_common_sharing::ShareEndpointManager; use databend_common_storage::DataOperator; use databend_common_storage::ShareTableConfig; @@ -110,7 +109,6 @@ impl GlobalServices { .await?; RoleCacheManager::init()?; ShareEndpointManager::init()?; - QueryProfileManager::init(); DataOperator::init(&config.storage).await?; ShareTableConfig::init( diff --git a/src/query/service/src/interpreters/interpreter.rs b/src/query/service/src/interpreters/interpreter.rs index 6e8b2f981fa9..dbdd515fd560 100644 --- a/src/query/service/src/interpreters/interpreter.rs +++ b/src/query/service/src/interpreters/interpreter.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; use std::sync::Arc; use std::time::SystemTime; @@ -19,7 +20,10 @@ use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::SendableDataBlockStream; +use databend_common_pipeline_core::get_statistics_desc; use databend_common_pipeline_core::processors::profile::PlanProfile; +use databend_common_pipeline_core::processors::ProfileDesc; +use databend_common_pipeline_core::processors::ProfileStatisticsName; use databend_common_pipeline_core::SourcePipeBuilder; use log::error; use log::info; @@ -91,12 +95,18 @@ pub trait Interpreter: Sync + Send { struct QueryProfiles { query_id: String, profiles: Vec, + statistics_desc: Arc>, } - info!(target: "databend::log::profile", "{}", serde_json::to_string(&QueryProfiles { - query_id: query_ctx.get_id(), - profiles: query_profiles, - })?); + info!( + target: "databend::log::profile", + "{}", + serde_json::to_string(&QueryProfiles { + query_id: query_ctx.get_id(), + profiles: query_profiles, + statistics_desc: get_statistics_desc(), + })? + ); } } diff --git a/src/query/service/src/interpreters/interpreter_catalog_show_create.rs b/src/query/service/src/interpreters/interpreter_catalog_show_create.rs index df5a8c81066a..e16b571ac897 100644 --- a/src/query/service/src/interpreters/interpreter_catalog_show_create.rs +++ b/src/query/service/src/interpreters/interpreter_catalog_show_create.rs @@ -72,18 +72,12 @@ impl Interpreter for ShowCreateCatalogInterpreter { let block = DataBlock::new( vec![ + BlockEntry::new(DataType::String, Value::Scalar(Scalar::String(name))), BlockEntry::new( DataType::String, - Value::Scalar(Scalar::String(name.into_bytes())), - ), - BlockEntry::new( - DataType::String, - Value::Scalar(Scalar::String(catalog_type.into_bytes())), - ), - BlockEntry::new( - DataType::String, - Value::Scalar(Scalar::String(option.into_bytes())), + Value::Scalar(Scalar::String(catalog_type)), ), + BlockEntry::new(DataType::String, Value::Scalar(Scalar::String(option))), ], 1, ); diff --git a/src/query/service/src/interpreters/interpreter_connection_desc.rs b/src/query/service/src/interpreters/interpreter_connection_desc.rs index 8f15a2f35cb4..19cf3ed569ce 100644 --- a/src/query/service/src/interpreters/interpreter_connection_desc.rs +++ b/src/query/service/src/interpreters/interpreter_connection_desc.rs @@ -54,11 +54,11 @@ impl Interpreter for DescConnectionInterpreter { .get_connection(&tenant, self.plan.name.as_str()) .await?; - let names = vec![connection.name.as_bytes().to_vec()]; - let types = vec![connection.storage_type.as_bytes().to_vec()]; + let names = vec![connection.name.clone()]; + let types = vec![connection.storage_type.clone()]; let conn = Connection::new(connection.storage_params).mask(); connection.storage_params = conn.conns; - let params = vec![connection.storage_params_display().as_bytes().to_vec()]; + let params = vec![connection.storage_params_display().clone()]; PipelineBuildResult::from_blocks(vec![DataBlock::new_from_columns(vec![ StringType::from_data(names), diff --git a/src/query/service/src/interpreters/interpreter_connection_show.rs b/src/query/service/src/interpreters/interpreter_connection_show.rs index 4f5683daf83d..42b65606c62a 100644 --- a/src/query/service/src/interpreters/interpreter_connection_show.rs +++ b/src/query/service/src/interpreters/interpreter_connection_show.rs @@ -55,14 +55,11 @@ impl Interpreter for ShowConnectionsInterpreter { formats.sort_by(|a, b| a.name.cmp(&b.name)); - let names = formats - .iter() - .map(|x| x.name.as_bytes().to_vec()) - .collect::>(); + let names = formats.iter().map(|x| x.name.clone()).collect::>(); let types = formats .iter() - .map(|x| x.storage_type.as_bytes().to_vec()) + .map(|x| x.storage_type.clone()) .collect::>(); let options = formats @@ -70,7 +67,7 @@ impl Interpreter for ShowConnectionsInterpreter { .map(|x| { let conn = Connection::new(x.storage_params.clone()).mask(); x.storage_params = conn.conns; - x.storage_params_display().as_bytes().to_vec() + x.storage_params_display().clone() }) .collect::>(); diff --git a/src/query/service/src/interpreters/interpreter_copy_into_table.rs b/src/query/service/src/interpreters/interpreter_copy_into_table.rs index 5c2042f4da49..1fba025d4cb1 100644 --- a/src/query/service/src/interpreters/interpreter_copy_into_table.rs +++ b/src/query/service/src/interpreters/interpreter_copy_into_table.rs @@ -206,13 +206,13 @@ impl CopyIntoTableInterpreter { for entry in results { let status = entry.value(); if let Some(err) = &status.error { - files.push(entry.key().as_bytes().to_vec()); + files.push(entry.key().clone()); rows_loaded.push(status.num_rows_loaded as i32); errors_seen.push(err.num_errors as i32); - first_error.push(Some(err.first_error.error.to_string().as_bytes().to_vec())); + first_error.push(Some(err.first_error.error.to_string().clone())); first_error_line.push(Some(err.first_error.line as i32 + 1)); } else if return_all { - files.push(entry.key().as_bytes().to_vec()); + files.push(entry.key().clone()); rows_loaded.push(status.num_rows_loaded as i32); errors_seen.push(0); first_error.push(None); @@ -304,8 +304,7 @@ impl Interpreter for CopyIntoTableInterpreter { let (physical_plan, files, update_stream_meta) = self.build_physical_plan(&self.plan).await?; let mut build_res = - build_query_pipeline_without_render_result_set(&self.ctx, &physical_plan, false) - .await?; + build_query_pipeline_without_render_result_set(&self.ctx, &physical_plan).await?; // Build commit insertion pipeline. { diff --git a/src/query/service/src/interpreters/interpreter_data_mask_desc.rs b/src/query/service/src/interpreters/interpreter_data_mask_desc.rs index d5a3c6ba259b..491bed72cd66 100644 --- a/src/query/service/src/interpreters/interpreter_data_mask_desc.rs +++ b/src/query/service/src/interpreters/interpreter_data_mask_desc.rs @@ -71,8 +71,8 @@ impl Interpreter for DescDataMaskInterpreter { } }; - let name: Vec> = vec![self.plan.name.as_bytes().to_vec()]; - let create_on: Vec> = vec![policy.create_on.to_string().as_bytes().to_vec()]; + let name: Vec = vec![self.plan.name.clone()]; + let create_on: Vec = vec![policy.create_on.to_string().clone()]; let args = format!( "({})", policy @@ -83,12 +83,12 @@ impl Interpreter for DescDataMaskInterpreter { .collect::>() .join(",") ); - let signature: Vec> = vec![args.as_bytes().to_vec()]; - let return_type = vec![policy.return_type.as_bytes().to_vec()]; - let body = vec![policy.body.as_bytes().to_vec()]; + let signature: Vec = vec![args.clone()]; + let return_type = vec![policy.return_type.clone()]; + let body = vec![policy.body.clone()]; let comment = vec![match policy.comment { - Some(comment) => comment.as_bytes().to_vec(), - None => "".to_string().as_bytes().to_vec(), + Some(comment) => comment.clone(), + None => "".to_string().clone(), }]; let blocks = vec![DataBlock::new_from_columns(vec![ diff --git a/src/query/service/src/interpreters/interpreter_database_show_create.rs b/src/query/service/src/interpreters/interpreter_database_show_create.rs index 7211abb0438a..7a5df3743619 100644 --- a/src/query/service/src/interpreters/interpreter_database_show_create.rs +++ b/src/query/service/src/interpreters/interpreter_database_show_create.rs @@ -74,11 +74,11 @@ impl Interpreter for ShowCreateDatabaseInterpreter { vec![ BlockEntry::new( DataType::String, - Value::Scalar(Scalar::String(name.as_bytes().to_vec())), + Value::Scalar(Scalar::String(name.to_string())), ), BlockEntry::new( DataType::String, - Value::Scalar(Scalar::String(info.as_bytes().to_vec())), + Value::Scalar(Scalar::String(info.clone())), ), ], 1, diff --git a/src/query/service/src/interpreters/interpreter_delete.rs b/src/query/service/src/interpreters/interpreter_delete.rs index 15d6ca7ae3dc..ac9168212296 100644 --- a/src/query/service/src/interpreters/interpreter_delete.rs +++ b/src/query/service/src/interpreters/interpreter_delete.rs @@ -230,8 +230,7 @@ impl Interpreter for DeleteInterpreter { )?; build_res = - build_query_pipeline_without_render_result_set(&self.ctx, &physical_plan, false) - .await?; + build_query_pipeline_without_render_result_set(&self.ctx, &physical_plan).await?; } build_res.main_pipeline.add_lock_guard(lock_guard); diff --git a/src/query/service/src/interpreters/interpreter_explain.rs b/src/query/service/src/interpreters/interpreter_explain.rs index 1e2662543f21..dd4149fdfc2d 100644 --- a/src/query/service/src/interpreters/interpreter_explain.rs +++ b/src/query/service/src/interpreters/interpreter_explain.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; use std::sync::Arc; use databend_common_ast::ast::ExplainKind; @@ -22,9 +23,7 @@ use databend_common_exception::Result; use databend_common_expression::types::StringType; use databend_common_expression::DataBlock; use databend_common_expression::FromData; -use databend_common_profile::QueryProfileManager; -use databend_common_profile::SharedProcessorProfiles; -use databend_common_sql::executor::ProfileHelper; +use databend_common_pipeline_core::processors::profile::PlanProfile; use databend_common_sql::optimizer::ColumnSet; use databend_common_sql::plans::UpdatePlan; use databend_common_sql::BindContext; @@ -260,7 +259,7 @@ impl ExplainInterpreter { } let result = plan - .format(metadata.clone(), SharedProcessorProfiles::default())? + .format(metadata.clone(), Default::default())? .format_pretty()?; let line_split_result: Vec<&str> = result.lines().collect(); let formatted_plan = StringType::from_data(line_split_result); @@ -283,7 +282,7 @@ impl ExplainInterpreter { // Format root pipeline let line_split_result = format!("{}", build_res.main_pipeline.display_indent()) .lines() - .map(|s| s.as_bytes().to_vec()) + .map(|l| l.to_string()) .collect::>(); let column = StringType::from_data(line_split_result); blocks.push(DataBlock::new_from_columns(vec![column])); @@ -291,7 +290,7 @@ impl ExplainInterpreter { for pipeline in build_res.sources_pipelines.iter() { let line_split_result = format!("\n{}", pipeline.display_indent()) .lines() - .map(|s| s.as_bytes().to_vec()) + .map(|l| l.to_string()) .collect::>(); let column = StringType::from_data(line_split_result); blocks.push(DataBlock::new_from_columns(vec![column])); @@ -313,14 +312,11 @@ impl ExplainInterpreter { let root_fragment = Fragmenter::try_create(ctx.clone())?.build_fragment(&plan)?; - let mut fragments_actions = QueryFragmentsActions::create(ctx.clone(), false); + let mut fragments_actions = QueryFragmentsActions::create(ctx.clone()); root_fragment.get_actions(ctx, &mut fragments_actions)?; let display_string = fragments_actions.display_indent(&metadata).to_string(); - let line_split_result = display_string - .lines() - .map(|s| s.as_bytes().to_vec()) - .collect::>(); + let line_split_result = display_string.lines().collect::>(); let formatted_plan = StringType::from_data(line_split_result); Ok(vec![DataBlock::new_from_columns(vec![formatted_plan])]) } @@ -331,7 +327,7 @@ impl ExplainInterpreter { let display_string = if let Some(plan) = interpreter.get_physical_plan().await? { let root_fragment = Fragmenter::try_create(self.ctx.clone())?.build_fragment(&plan)?; - let mut fragments_actions = QueryFragmentsActions::create(self.ctx.clone(), false); + let mut fragments_actions = QueryFragmentsActions::create(self.ctx.clone()); root_fragment.get_actions(self.ctx.clone(), &mut fragments_actions)?; let ident = fragments_actions.display_indent(&update.metadata); @@ -339,10 +335,7 @@ impl ExplainInterpreter { } else { "Nothing to update".to_string() }; - let line_split_result = display_string - .lines() - .map(|s| s.as_bytes().to_vec()) - .collect::>(); + let line_split_result = display_string.lines().collect::>(); let formatted_plan = StringType::from_data(line_split_result); Ok(vec![DataBlock::new_from_columns(vec![formatted_plan])]) } @@ -357,46 +350,68 @@ impl ExplainInterpreter { ) -> Result> { let mut builder = PhysicalPlanBuilder::new(metadata.clone(), self.ctx.clone(), true); let plan = builder.build(s_expr, required).await?; - let mut build_res = build_query_pipeline(&self.ctx, &[], &plan, ignore_result).await?; + let build_res = build_query_pipeline(&self.ctx, &[], &plan, ignore_result).await?; - let prof_span_set = build_res.prof_span_set.clone(); + // Drain the data + let query_profiles = self.execute_and_get_profiles(build_res)?; + let result = plan + .format(metadata.clone(), query_profiles)? + .format_pretty()?; + let line_split_result: Vec<&str> = result.lines().collect(); + let formatted_plan = StringType::from_data(line_split_result); + Ok(vec![DataBlock::new_from_columns(vec![formatted_plan])]) + } + + fn execute_and_get_profiles( + &self, + mut build_res: PipelineBuildResult, + ) -> Result> { let settings = self.ctx.get_settings(); let query_id = self.ctx.get_id(); build_res.set_max_threads(settings.get_max_threads()? as usize); let settings = ExecutorSettings::try_create(&settings, query_id.clone())?; - // Drain the data - if build_res.main_pipeline.is_complete_pipeline()? { - let mut pipelines = build_res.sources_pipelines; - pipelines.push(build_res.main_pipeline); - - let complete_executor = PipelineCompleteExecutor::from_pipelines(pipelines, settings)?; - complete_executor.execute()?; - } else { - let mut pulling_executor = - PipelinePullingExecutor::from_pipelines(build_res, settings)?; - pulling_executor.start(); - while (pulling_executor.pull_data()?).is_some() {} + match build_res.main_pipeline.is_complete_pipeline()? { + true => { + let mut pipelines = build_res.sources_pipelines; + pipelines.push(build_res.main_pipeline); + + let executor = PipelineCompleteExecutor::from_pipelines(pipelines, settings)?; + executor.execute()?; + self.ctx.add_query_profiles( + &executor + .get_inner() + .get_profiles() + .iter() + .filter(|x| x.plan_id.is_some()) + .map(|x| PlanProfile::create(x)) + .collect::>(), + ); + } + false => { + let mut executor = PipelinePullingExecutor::from_pipelines(build_res, settings)?; + executor.start(); + while (executor.pull_data()?).is_some() {} + self.ctx.add_query_profiles( + &executor + .get_inner() + .get_profiles() + .iter() + .filter(|x| x.plan_id.is_some()) + .map(|x| PlanProfile::create(x)) + .collect::>(), + ); + } } - let profile = ProfileHelper::build_query_profile( - &query_id, - metadata, - &plan, - &prof_span_set.lock().unwrap(), - )?; - - // Record the query profile - let prof_mgr = QueryProfileManager::instance(); - prof_mgr.insert(Arc::new(profile)); - - let result = plan - .format(metadata.clone(), prof_span_set)? - .format_pretty()?; - let line_split_result: Vec<&str> = result.lines().collect(); - let formatted_plan = StringType::from_data(line_split_result); - Ok(vec![DataBlock::new_from_columns(vec![formatted_plan])]) + Ok(self + .ctx + .get_query_profiles() + .into_iter() + .filter(|x| x.id.is_some()) + .map(|x| (x.id.unwrap(), x)) + .collect::>()) } async fn explain_query( diff --git a/src/query/service/src/interpreters/interpreter_file_format_show.rs b/src/query/service/src/interpreters/interpreter_file_format_show.rs index dace4ee3430b..93edb23c74d7 100644 --- a/src/query/service/src/interpreters/interpreter_file_format_show.rs +++ b/src/query/service/src/interpreters/interpreter_file_format_show.rs @@ -54,14 +54,11 @@ impl Interpreter for ShowFileFormatsInterpreter { formats.sort_by(|a, b| a.name.cmp(&b.name)); - let names = formats - .iter() - .map(|x| x.name.as_bytes().to_vec()) - .collect::>(); + let names = formats.iter().map(|x| x.name.clone()).collect::>(); let options = formats .iter() - .map(|x| x.file_format_params.to_string().as_bytes().to_vec()) + .map(|x| x.file_format_params.to_string()) .collect::>(); PipelineBuildResult::from_blocks(vec![DataBlock::new_from_columns(vec![ diff --git a/src/query/service/src/interpreters/interpreter_index_refresh.rs b/src/query/service/src/interpreters/interpreter_index_refresh.rs index 4058364f67e4..3edbef5be492 100644 --- a/src/query/service/src/interpreters/interpreter_index_refresh.rs +++ b/src/query/service/src/interpreters/interpreter_index_refresh.rs @@ -291,7 +291,7 @@ impl Interpreter for RefreshIndexInterpreter { query_plan = replace_read_source.replace(&query_plan)?; let mut build_res = - build_query_pipeline_without_render_result_set(&self.ctx, &query_plan, false).await?; + build_query_pipeline_without_render_result_set(&self.ctx, &query_plan).await?; let input_schema = query_plan.output_schema()?; diff --git a/src/query/service/src/interpreters/interpreter_insert.rs b/src/query/service/src/interpreters/interpreter_insert.rs index 9bd6b8dd4e14..ad60dc2b4404 100644 --- a/src/query/service/src/interpreters/interpreter_insert.rs +++ b/src/query/service/src/interpreters/interpreter_insert.rs @@ -230,12 +230,9 @@ impl Interpreter for InsertInterpreter { } }; - let mut build_res = build_query_pipeline_without_render_result_set( - &self.ctx, - &insert_select_plan, - false, - ) - .await?; + let mut build_res = + build_query_pipeline_without_render_result_set(&self.ctx, &insert_select_plan) + .await?; table.commit_insertion( self.ctx.clone(), diff --git a/src/query/service/src/interpreters/interpreter_merge_into.rs b/src/query/service/src/interpreters/interpreter_merge_into.rs index 930a24c5cdc5..88ac30ca6222 100644 --- a/src/query/service/src/interpreters/interpreter_merge_into.rs +++ b/src/query/service/src/interpreters/interpreter_merge_into.rs @@ -92,8 +92,7 @@ impl Interpreter for MergeIntoInterpreter { async fn execute2(&self) -> Result { let (physical_plan, _) = self.build_physical_plan().await?; let mut build_res = - build_query_pipeline_without_render_result_set(&self.ctx, &physical_plan, false) - .await?; + build_query_pipeline_without_render_result_set(&self.ctx, &physical_plan).await?; // Add table lock before execution. // todo!(@zhyass) :But for now the lock maybe exist problem, let's open this after fix it. diff --git a/src/query/service/src/interpreters/interpreter_network_policies_show.rs b/src/query/service/src/interpreters/interpreter_network_policies_show.rs index d92d0d1be1c9..f38be2b417f4 100644 --- a/src/query/service/src/interpreters/interpreter_network_policies_show.rs +++ b/src/query/service/src/interpreters/interpreter_network_policies_show.rs @@ -53,10 +53,10 @@ impl Interpreter for ShowNetworkPoliciesInterpreter { let mut blocked_ip_lists = Vec::with_capacity(network_policies.len()); let mut comments = Vec::with_capacity(network_policies.len()); for network_policy in network_policies { - names.push(network_policy.name.as_bytes().to_vec()); - allowed_ip_lists.push(network_policy.allowed_ip_list.join(",").as_bytes().to_vec()); - blocked_ip_lists.push(network_policy.blocked_ip_list.join(",").as_bytes().to_vec()); - comments.push(network_policy.comment.as_bytes().to_vec()); + names.push(network_policy.name.clone()); + allowed_ip_lists.push(network_policy.allowed_ip_list.join(",").clone()); + blocked_ip_lists.push(network_policy.blocked_ip_list.join(",").clone()); + comments.push(network_policy.comment.clone()); } PipelineBuildResult::from_blocks(vec![DataBlock::new_from_columns(vec![ diff --git a/src/query/service/src/interpreters/interpreter_network_policy_desc.rs b/src/query/service/src/interpreters/interpreter_network_policy_desc.rs index 94d2fb1a0300..080b9e20a2ea 100644 --- a/src/query/service/src/interpreters/interpreter_network_policy_desc.rs +++ b/src/query/service/src/interpreters/interpreter_network_policy_desc.rs @@ -53,10 +53,10 @@ impl Interpreter for DescNetworkPolicyInterpreter { .get_network_policy(&tenant, self.plan.name.as_str()) .await?; - let names = vec![network_policy.name.as_bytes().to_vec()]; - let allowed_ip_lists = vec![network_policy.allowed_ip_list.join(",").as_bytes().to_vec()]; - let blocked_ip_lists = vec![network_policy.blocked_ip_list.join(",").as_bytes().to_vec()]; - let comments = vec![network_policy.comment.as_bytes().to_vec()]; + let names = vec![network_policy.name.clone()]; + let allowed_ip_lists = vec![network_policy.allowed_ip_list.join(",").clone()]; + let blocked_ip_lists = vec![network_policy.blocked_ip_list.join(",").clone()]; + let comments = vec![network_policy.comment.clone()]; PipelineBuildResult::from_blocks(vec![DataBlock::new_from_columns(vec![ StringType::from_data(names), diff --git a/src/query/service/src/interpreters/interpreter_password_policy_desc.rs b/src/query/service/src/interpreters/interpreter_password_policy_desc.rs index 8aafd73e6aeb..cdc9759a3793 100644 --- a/src/query/service/src/interpreters/interpreter_password_policy_desc.rs +++ b/src/query/service/src/interpreters/interpreter_password_policy_desc.rs @@ -64,19 +64,19 @@ impl Interpreter for DescPasswordPolicyInterpreter { .await?; let properties = vec![ - "NAME".as_bytes().to_vec(), - "COMMENT".as_bytes().to_vec(), - "PASSWORD_MIN_LENGTH".as_bytes().to_vec(), - "PASSWORD_MAX_LENGTH".as_bytes().to_vec(), - "PASSWORD_MIN_UPPER_CASE_CHARS".as_bytes().to_vec(), - "PASSWORD_MIN_LOWER_CASE_CHARS".as_bytes().to_vec(), - "PASSWORD_MIN_NUMERIC_CHARS".as_bytes().to_vec(), - "PASSWORD_MIN_SPECIAL_CHARS".as_bytes().to_vec(), - "PASSWORD_MIN_AGE_DAYS".as_bytes().to_vec(), - "PASSWORD_MAX_AGE_DAYS".as_bytes().to_vec(), - "PASSWORD_MAX_RETRIES".as_bytes().to_vec(), - "PASSWORD_LOCKOUT_TIME_MINS".as_bytes().to_vec(), - "PASSWORD_HISTORY".as_bytes().to_vec(), + "NAME".to_string(), + "COMMENT".to_string(), + "PASSWORD_MIN_LENGTH".to_string(), + "PASSWORD_MAX_LENGTH".to_string(), + "PASSWORD_MIN_UPPER_CASE_CHARS".to_string(), + "PASSWORD_MIN_LOWER_CASE_CHARS".to_string(), + "PASSWORD_MIN_NUMERIC_CHARS".to_string(), + "PASSWORD_MIN_SPECIAL_CHARS".to_string(), + "PASSWORD_MIN_AGE_DAYS".to_string(), + "PASSWORD_MAX_AGE_DAYS".to_string(), + "PASSWORD_MAX_RETRIES".to_string(), + "PASSWORD_LOCKOUT_TIME_MINS".to_string(), + "PASSWORD_HISTORY".to_string(), ]; let min_length = format!("{}", password_policy.min_length); @@ -92,19 +92,19 @@ impl Interpreter for DescPasswordPolicyInterpreter { let history = format!("{}", password_policy.history); let values = vec![ - password_policy.name.as_bytes().to_vec(), - password_policy.comment.as_bytes().to_vec(), - min_length.as_bytes().to_vec(), - max_length.as_bytes().to_vec(), - min_upper_case_chars.as_bytes().to_vec(), - min_lower_case_chars.as_bytes().to_vec(), - min_numeric_chars.as_bytes().to_vec(), - min_special_chars.as_bytes().to_vec(), - min_age_days.as_bytes().to_vec(), - max_age_days.as_bytes().to_vec(), - max_retries.as_bytes().to_vec(), - lockout_time_mins.as_bytes().to_vec(), - history.as_bytes().to_vec(), + password_policy.name.clone(), + password_policy.comment.clone(), + min_length.clone(), + max_length.clone(), + min_upper_case_chars.clone(), + min_lower_case_chars.clone(), + min_numeric_chars.clone(), + min_special_chars.clone(), + min_age_days.clone(), + max_age_days.clone(), + max_retries.clone(), + lockout_time_mins.clone(), + history.clone(), ]; let defaults = vec![ @@ -124,19 +124,19 @@ impl Interpreter for DescPasswordPolicyInterpreter { ]; let descriptions = vec![ - "Name of password policy.".as_bytes().to_vec(), - "Comment of password policy.".as_bytes().to_vec(), - "Minimum length of new password.".as_bytes().to_vec(), - "Maximum length of new password.".as_bytes().to_vec(), - "Minimum number of uppercase characters in new password.".as_bytes().to_vec(), - "Minimum number of lowercase characters in new password.".as_bytes().to_vec(), - "Minimum number of numeric characters in new password.".as_bytes().to_vec(), - "Minimum number of special characters in new password.".as_bytes().to_vec(), - "Period after a password is changed during which a password cannot be changed again, in days.".as_bytes().to_vec(), - "Period after which password must be changed, in days.".as_bytes().to_vec(), - "Number of attempts users have to enter the correct password before their account is locked.".as_bytes().to_vec(), - "Period of time for which users will be locked after entering their password incorrectly many times (specified by MAX_RETRIES), in minutes.".as_bytes().to_vec(), - "Number of most recent passwords that may not be repeated by the user.".as_bytes().to_vec(), + "Name of password policy.".to_string(), + "Comment of password policy.".to_string(), + "Minimum length of new password.".to_string(), + "Maximum length of new password.".to_string(), + "Minimum number of uppercase characters in new password.".to_string(), + "Minimum number of lowercase characters in new password.".to_string(), + "Minimum number of numeric characters in new password.".to_string(), + "Minimum number of special characters in new password.".to_string(), + "Period after a password is changed during which a password cannot be changed again, in days.".to_string(), + "Period after which password must be changed, in days.".to_string(), + "Number of attempts users have to enter the correct password before their account is locked.".to_string(), + "Period of time for which users will be locked after entering their password incorrectly many times (specified by MAX_RETRIES), in minutes.".to_string(), + "Number of most recent passwords that may not be repeated by the user.".to_string(), ]; PipelineBuildResult::from_blocks(vec![DataBlock::new_from_columns(vec![ diff --git a/src/query/service/src/interpreters/interpreter_presign.rs b/src/query/service/src/interpreters/interpreter_presign.rs index 82cacd65c501..8ef714a32118 100644 --- a/src/query/service/src/interpreters/interpreter_presign.rs +++ b/src/query/service/src/interpreters/interpreter_presign.rs @@ -95,9 +95,7 @@ impl Interpreter for PresignInterpreter { vec![ BlockEntry::new( DataType::String, - Value::Scalar(Scalar::String( - presigned_req.method().as_str().as_bytes().to_vec(), - )), + Value::Scalar(Scalar::String(presigned_req.method().as_str().to_string())), ), BlockEntry::new( DataType::Variant, @@ -105,9 +103,7 @@ impl Interpreter for PresignInterpreter { ), BlockEntry::new( DataType::String, - Value::Scalar(Scalar::String( - presigned_req.uri().to_string().as_bytes().to_vec(), - )), + Value::Scalar(Scalar::String(presigned_req.uri().to_string())), ), ], 1, diff --git a/src/query/service/src/interpreters/interpreter_replace.rs b/src/query/service/src/interpreters/interpreter_replace.rs index 6834a4e1d0df..71f740e9fb20 100644 --- a/src/query/service/src/interpreters/interpreter_replace.rs +++ b/src/query/service/src/interpreters/interpreter_replace.rs @@ -86,8 +86,7 @@ impl Interpreter for ReplaceInterpreter { // replace let (physical_plan, purge_info) = self.build_physical_plan().await?; let mut pipeline = - build_query_pipeline_without_render_result_set(&self.ctx, &physical_plan, false) - .await?; + build_query_pipeline_without_render_result_set(&self.ctx, &physical_plan).await?; // purge if let Some((files, stage_info)) = purge_info { diff --git a/src/query/service/src/interpreters/interpreter_role_show.rs b/src/query/service/src/interpreters/interpreter_role_show.rs index e39322148dbe..c6023caf851d 100644 --- a/src/query/service/src/interpreters/interpreter_role_show.rs +++ b/src/query/service/src/interpreters/interpreter_role_show.rs @@ -64,10 +64,7 @@ impl Interpreter for ShowRolesInterpreter { .cloned() .unwrap_or_default(); - let names = roles - .iter() - .map(|x| x.name.as_bytes().to_vec()) - .collect::>(); + let names = roles.iter().map(|x| x.name.clone()).collect::>(); let inherited_roles: Vec = roles .iter() .map(|x| x.grants.roles().len() as u64) diff --git a/src/query/service/src/interpreters/interpreter_select.rs b/src/query/service/src/interpreters/interpreter_select.rs index 4c1493ea61f5..ff0d3a1250fc 100644 --- a/src/query/service/src/interpreters/interpreter_select.rs +++ b/src/query/service/src/interpreters/interpreter_select.rs @@ -26,7 +26,6 @@ use databend_common_pipeline_core::Pipe; use databend_common_pipeline_core::PipeItem; use databend_common_pipeline_core::Pipeline; use databend_common_pipeline_transforms::processors::TransformDummy; -use databend_common_profile::SharedProcessorProfiles; use databend_common_sql::executor::physical_plans::FragmentKind; use databend_common_sql::executor::PhysicalPlan; use databend_common_sql::parse_result_scan_args; @@ -233,7 +232,7 @@ impl Interpreter for SelectInterpreter { // 0. Need to build physical plan first to get the partitions. let physical_plan = self.build_physical_plan().await?; let query_plan = physical_plan - .format(self.metadata.clone(), SharedProcessorProfiles::default())? + .format(self.metadata.clone(), Default::default())? .format_pretty()?; info!( "Query id: {}, query plan: \n{}", diff --git a/src/query/service/src/interpreters/interpreter_share_desc.rs b/src/query/service/src/interpreters/interpreter_share_desc.rs index 815410d9660e..32ea39406a5f 100644 --- a/src/query/service/src/interpreters/interpreter_share_desc.rs +++ b/src/query/service/src/interpreters/interpreter_share_desc.rs @@ -61,21 +61,21 @@ impl Interpreter for DescShareInterpreter { return Ok(PipelineBuildResult::create()); } - let mut names: Vec> = vec![]; - let mut kinds: Vec> = vec![]; - let mut shared_owns: Vec> = vec![]; + let mut names: Vec = vec![]; + let mut kinds: Vec = vec![]; + let mut shared_owns: Vec = vec![]; for entry in resp.objects.iter() { match &entry.object { ShareGrantObjectName::Database(db) => { - kinds.push("DATABASE".to_string().as_bytes().to_vec()); - names.push(db.clone().as_bytes().to_vec()); + kinds.push("DATABASE".to_string()); + names.push(db.clone()); } ShareGrantObjectName::Table(db, table_name) => { - kinds.push("TABLE".to_string().as_bytes().to_vec()); - names.push(format!("{}.{}", db, table_name).as_bytes().to_vec()); + kinds.push("TABLE".to_string()); + names.push(format!("{}.{}", db, table_name)); } } - shared_owns.push(entry.grant_on.to_string().as_bytes().to_vec()); + shared_owns.push(entry.grant_on.to_string()); } PipelineBuildResult::from_blocks(vec![DataBlock::new_from_columns(vec![ diff --git a/src/query/service/src/interpreters/interpreter_share_endpoint_show.rs b/src/query/service/src/interpreters/interpreter_share_endpoint_show.rs index 1664feb9de28..25bbbcb18f46 100644 --- a/src/query/service/src/interpreters/interpreter_share_endpoint_show.rs +++ b/src/query/service/src/interpreters/interpreter_share_endpoint_show.rs @@ -49,19 +49,19 @@ impl Interpreter for ShowShareEndpointInterpreter { .get_share_endpoint(self.plan.clone().into()) .await?; - let mut endpoints: Vec> = vec![]; - let mut urls: Vec> = vec![]; - let mut to_tenants: Vec> = vec![]; - let mut args: Vec> = vec![]; - let mut comments: Vec> = vec![]; - let mut created_on_vec: Vec> = vec![]; + let mut endpoints: Vec = vec![]; + let mut urls: Vec = vec![]; + let mut to_tenants: Vec = vec![]; + let mut args: Vec = vec![]; + let mut comments: Vec = vec![]; + let mut created_on_vec: Vec = vec![]; for (endpoint, meta) in resp.share_endpoint_meta_vec { - endpoints.push(endpoint.endpoint.clone().as_bytes().to_vec()); - urls.push(meta.url.clone().as_bytes().to_vec()); - to_tenants.push(meta.tenant.clone().as_bytes().to_vec()); - args.push(format!("{:?}", meta.args).as_bytes().to_vec()); - comments.push(meta.comment.unwrap_or_default().as_bytes().to_vec()); - created_on_vec.push(meta.create_on.to_string().as_bytes().to_vec()); + endpoints.push(endpoint.endpoint.clone()); + urls.push(meta.url.clone()); + to_tenants.push(meta.tenant.clone()); + args.push(format!("{:?}", meta.args)); + comments.push(meta.comment.unwrap_or_default()); + created_on_vec.push(meta.create_on.to_string()); } PipelineBuildResult::from_blocks(vec![DataBlock::new_from_columns(vec![ diff --git a/src/query/service/src/interpreters/interpreter_share_show.rs b/src/query/service/src/interpreters/interpreter_share_show.rs index 18f18c59a039..91bea064e650 100644 --- a/src/query/service/src/interpreters/interpreter_share_show.rs +++ b/src/query/service/src/interpreters/interpreter_share_show.rs @@ -48,40 +48,26 @@ impl Interpreter for ShowSharesInterpreter { async fn execute2(&self) -> Result { let meta_api = UserApiProvider::instance().get_meta_store_client(); let tenant = self.ctx.get_tenant(); - let mut names: Vec> = vec![]; - let mut kinds: Vec> = vec![]; - let mut created_owns: Vec> = vec![]; - let mut database_names: Vec> = vec![]; - let mut from: Vec> = vec![]; - let mut to: Vec> = vec![]; - let mut comments: Vec> = vec![]; + let mut names: Vec = vec![]; + let mut kinds: Vec = vec![]; + let mut created_owns: Vec = vec![]; + let mut database_names: Vec = vec![]; + let mut from: Vec = vec![]; + let mut to: Vec = vec![]; + let mut comments: Vec = vec![]; // query all share endpoint for other tenant inbound shares let share_specs = ShareEndpointManager::instance() .get_inbound_shares(&tenant, None, None) .await?; for (from_tenant, share_spec) in share_specs { - names.push(share_spec.name.clone().as_bytes().to_vec()); - kinds.push("INBOUND".to_string().as_bytes().to_vec()); - created_owns.push( - share_spec - .share_on - .unwrap_or_default() - .to_string() - .as_bytes() - .to_vec(), - ); - database_names.push( - share_spec - .database - .unwrap_or_default() - .name - .as_bytes() - .to_vec(), - ); - from.push(from_tenant.as_bytes().to_vec()); - to.push(tenant.clone().as_bytes().to_vec()); - comments.push(share_spec.comment.unwrap_or_default().as_bytes().to_vec()); + names.push(share_spec.name.clone()); + kinds.push("INBOUND".to_string()); + created_owns.push(share_spec.share_on.unwrap_or_default().to_string()); + database_names.push(share_spec.database.unwrap_or_default().name); + from.push(from_tenant); + to.push(tenant.clone()); + comments.push(share_spec.comment.unwrap_or_default()); } let req = ShowSharesReq { @@ -90,19 +76,17 @@ impl Interpreter for ShowSharesInterpreter { let resp = meta_api.show_shares(req).await?; for entry in resp.outbound_accounts { - names.push(entry.share_name.share_name.clone().as_bytes().to_vec()); - kinds.push("OUTBOUND".to_string().as_bytes().to_vec()); - created_owns.push(entry.create_on.to_string().as_bytes().to_vec()); - database_names.push(entry.database_name.unwrap_or_default().as_bytes().to_vec()); - from.push(entry.share_name.tenant.clone().as_bytes().to_vec()); + names.push(entry.share_name.share_name.clone()); + kinds.push("OUTBOUND".to_string()); + created_owns.push(entry.create_on.to_string()); + database_names.push(entry.database_name.unwrap_or_default()); + from.push(entry.share_name.tenant.clone()); to.push( entry .accounts - .map_or("".to_string().as_bytes().to_vec(), |accounts| { - accounts.join(",").as_bytes().to_vec() - }), + .map_or("".to_string(), |accounts| accounts.join(",")), ); - comments.push(entry.comment.unwrap_or_default().as_bytes().to_vec()); + comments.push(entry.comment.unwrap_or_default()); } PipelineBuildResult::from_blocks(vec![DataBlock::new_from_columns(vec![ diff --git a/src/query/service/src/interpreters/interpreter_share_show_grant_tenants.rs b/src/query/service/src/interpreters/interpreter_share_show_grant_tenants.rs index dc585d3cb3dc..d3c5b979ee3f 100644 --- a/src/query/service/src/interpreters/interpreter_share_show_grant_tenants.rs +++ b/src/query/service/src/interpreters/interpreter_share_show_grant_tenants.rs @@ -61,12 +61,12 @@ impl Interpreter for ShowGrantTenantsOfShareInterpreter { return Ok(PipelineBuildResult::create()); } - let mut granted_owns: Vec> = vec![]; - let mut accounts: Vec> = vec![]; + let mut granted_owns: Vec = vec![]; + let mut accounts: Vec = vec![]; for account in resp.accounts { - granted_owns.push(account.grant_on.to_string().as_bytes().to_vec()); - accounts.push(account.account.clone().as_bytes().to_vec()); + granted_owns.push(account.grant_on.to_string()); + accounts.push(account.account.clone()); } PipelineBuildResult::from_blocks(vec![DataBlock::new_from_columns(vec![ diff --git a/src/query/service/src/interpreters/interpreter_show_grants.rs b/src/query/service/src/interpreters/interpreter_show_grants.rs index 5f05122010e6..f3800577f432 100644 --- a/src/query/service/src/interpreters/interpreter_show_grants.rs +++ b/src/query/service/src/interpreters/interpreter_show_grants.rs @@ -81,7 +81,7 @@ impl Interpreter for ShowGrantsInterpreter { .fold(grant_set, |a, b| a | b) .entries(); - let mut grant_list: Vec> = Vec::new(); + let mut grant_list: Vec = Vec::new(); for grant_entry in grant_entries { let object = grant_entry.object(); match object { @@ -95,13 +95,10 @@ impl Interpreter for ShowGrantsInterpreter { let catalog = self.ctx.get_catalog(catalog_name).await?; let db_name = catalog.get_db_name_by_id(*db_id).await?; let table_name = catalog.get_table_name_by_id(*table_id).await?; - grant_list.push( - format!( - "GRANT {} ON '{}'.'{}'.'{}' TO {}", - &privileges_str, catalog_name, db_name, table_name, identity - ) - .into(), - ); + grant_list.push(format!( + "GRANT {} ON '{}'.'{}'.'{}' TO {}", + &privileges_str, catalog_name, db_name, table_name, identity + )); } GrantObject::DatabaseById(catalog_name, db_id) => { let privileges_str = if grant_entry.has_all_available_privileges() { @@ -112,21 +109,13 @@ impl Interpreter for ShowGrantsInterpreter { }; let catalog = self.ctx.get_catalog(catalog_name).await?; let db_name = catalog.get_db_name_by_id(*db_id).await?; - grant_list.push( - format!( - "GRANT {} ON '{}'.'{}'.* TO {}", - &privileges_str, catalog_name, db_name, identity - ) - .as_bytes() - .to_vec(), - ); + grant_list.push(format!( + "GRANT {} ON '{}'.'{}'.* TO {}", + &privileges_str, catalog_name, db_name, identity + )); } _ => { - grant_list.push( - format!("{} TO {}", grant_entry, identity) - .as_bytes() - .to_vec(), - ); + grant_list.push(format!("{} TO {}", grant_entry, identity)); } } } diff --git a/src/query/service/src/interpreters/interpreter_show_object_grant_privileges.rs b/src/query/service/src/interpreters/interpreter_show_object_grant_privileges.rs index 403a6a503be5..4fb3e6215a28 100644 --- a/src/query/service/src/interpreters/interpreter_show_object_grant_privileges.rs +++ b/src/query/service/src/interpreters/interpreter_show_object_grant_privileges.rs @@ -56,14 +56,14 @@ impl Interpreter for ShowObjectGrantPrivilegesInterpreter { if resp.privileges.is_empty() { return Ok(PipelineBuildResult::create()); } - let mut share_names: Vec> = vec![]; - let mut privileges: Vec> = vec![]; - let mut created_owns: Vec> = vec![]; + let mut share_names: Vec = vec![]; + let mut privileges: Vec = vec![]; + let mut created_owns: Vec = vec![]; for privilege in resp.privileges { - share_names.push(privilege.share_name.as_bytes().to_vec()); - privileges.push(privilege.privileges.to_string().as_bytes().to_vec()); - created_owns.push(privilege.grant_on.to_string().as_bytes().to_vec()); + share_names.push(privilege.share_name); + privileges.push(privilege.privileges.to_string()); + created_owns.push(privilege.grant_on.to_string()); } PipelineBuildResult::from_blocks(vec![DataBlock::new_from_columns(vec![ diff --git a/src/query/service/src/interpreters/interpreter_table_describe.rs b/src/query/service/src/interpreters/interpreter_table_describe.rs index 1efd4285b148..71a01056f3be 100644 --- a/src/query/service/src/interpreters/interpreter_table_describe.rs +++ b/src/query/service/src/interpreters/interpreter_table_describe.rs @@ -71,30 +71,30 @@ impl Interpreter for DescribeTableInterpreter { Ok(table.schema()) }?; - let mut names: Vec> = vec![]; - let mut types: Vec> = vec![]; - let mut nulls: Vec> = vec![]; - let mut default_exprs: Vec> = vec![]; - let mut extras: Vec> = vec![]; + let mut names: Vec = vec![]; + let mut types: Vec = vec![]; + let mut nulls: Vec = vec![]; + let mut default_exprs: Vec = vec![]; + let mut extras: Vec = vec![]; for field in schema.fields().iter() { - names.push(field.name().to_string().as_bytes().to_vec()); + names.push(field.name().to_string()); let non_null_type = field.data_type().remove_recursive_nullable(); - types.push(non_null_type.sql_name().as_bytes().to_vec()); + types.push(non_null_type.sql_name()); nulls.push(if field.is_nullable() { - "YES".to_string().as_bytes().to_vec() + "YES".to_string() } else { - "NO".to_string().as_bytes().to_vec() + "NO".to_string() }); match field.default_expr() { Some(expr) => { - default_exprs.push(expr.as_bytes().to_vec()); + default_exprs.push(expr.clone()); } None => { let value = Scalar::default_value(&field.data_type().into()); - default_exprs.push(value.to_string().as_bytes().to_vec()); + default_exprs.push(value.to_string()); } } let extra = match field.computed_expr() { @@ -102,7 +102,7 @@ impl Interpreter for DescribeTableInterpreter { Some(ComputedExpr::Stored(expr)) => format!("STORED COMPUTED COLUMN `{}`", expr), _ => "".to_string(), }; - extras.push(extra.as_bytes().to_vec()); + extras.push(extra); } PipelineBuildResult::from_blocks(vec![DataBlock::new_from_columns(vec![ diff --git a/src/query/service/src/interpreters/interpreter_table_modify_column.rs b/src/query/service/src/interpreters/interpreter_table_modify_column.rs index e9ee907e213b..06fd45622bfc 100644 --- a/src/query/service/src/interpreters/interpreter_table_modify_column.rs +++ b/src/query/service/src/interpreters/interpreter_table_modify_column.rs @@ -428,7 +428,7 @@ impl ModifyTableColumnInterpreter { cast_needed: true, })); let mut build_res = - build_query_pipeline_without_render_result_set(&self.ctx, &insert_plan, false).await?; + build_query_pipeline_without_render_result_set(&self.ctx, &insert_plan).await?; // 6. commit new meta schema and snapshots new_table.commit_insertion( diff --git a/src/query/service/src/interpreters/interpreter_table_optimize.rs b/src/query/service/src/interpreters/interpreter_table_optimize.rs index 687c951b7299..1748769ffc16 100644 --- a/src/query/service/src/interpreters/interpreter_table_optimize.rs +++ b/src/query/service/src/interpreters/interpreter_table_optimize.rs @@ -188,8 +188,7 @@ impl OptimizeTableInterpreter { )?; let build_res = - build_query_pipeline_without_render_result_set(&self.ctx, &physical_plan, false) - .await?; + build_query_pipeline_without_render_result_set(&self.ctx, &physical_plan).await?; build_res.main_pipeline } else { Pipeline::create() @@ -237,12 +236,9 @@ impl OptimizeTableInterpreter { mutator.removed_segment_summary, )?; - build_res = build_query_pipeline_without_render_result_set( - &self.ctx, - &physical_plan, - false, - ) - .await?; + build_res = + build_query_pipeline_without_render_result_set(&self.ctx, &physical_plan) + .await?; let ctx = self.ctx.clone(); let plan = self.plan.clone(); diff --git a/src/query/service/src/interpreters/interpreter_table_recluster.rs b/src/query/service/src/interpreters/interpreter_table_recluster.rs index c7dc851a710e..accbc8171839 100644 --- a/src/query/service/src/interpreters/interpreter_table_recluster.rs +++ b/src/query/service/src/interpreters/interpreter_table_recluster.rs @@ -166,8 +166,7 @@ impl Interpreter for ReclusterTableInterpreter { )?; let mut build_res = - build_query_pipeline_without_render_result_set(&self.ctx, &physical_plan, false) - .await?; + build_query_pipeline_without_render_result_set(&self.ctx, &physical_plan).await?; assert!(build_res.main_pipeline.is_complete_pipeline()?); build_res.set_max_threads(max_threads); diff --git a/src/query/service/src/interpreters/interpreter_table_show_create.rs b/src/query/service/src/interpreters/interpreter_table_show_create.rs index 908d08282132..fde21dd3cf26 100644 --- a/src/query/service/src/interpreters/interpreter_table_show_create.rs +++ b/src/query/service/src/interpreters/interpreter_table_show_create.rs @@ -179,11 +179,11 @@ impl ShowCreateTableInterpreter { vec![ BlockEntry::new( DataType::String, - Value::Scalar(Scalar::String(name.as_bytes().to_vec())), + Value::Scalar(Scalar::String(name.to_string())), ), BlockEntry::new( DataType::String, - Value::Scalar(Scalar::String(table_create_sql.into_bytes())), + Value::Scalar(Scalar::String(table_create_sql)), ), ], 1, @@ -204,11 +204,11 @@ impl ShowCreateTableInterpreter { vec![ BlockEntry::new( DataType::String, - Value::Scalar(Scalar::String(name.as_bytes().to_vec())), + Value::Scalar(Scalar::String(name.to_string())), ), BlockEntry::new( DataType::String, - Value::Scalar(Scalar::String(view_create_sql.into_bytes())), + Value::Scalar(Scalar::String(view_create_sql)), ), ], 1, @@ -240,12 +240,9 @@ impl ShowCreateTableInterpreter { vec![ BlockEntry::new( DataType::String, - Value::Scalar(Scalar::String(stream_table.name().as_bytes().to_vec())), - ), - BlockEntry::new( - DataType::String, - Value::Scalar(Scalar::String(create_sql.into_bytes())), + Value::Scalar(Scalar::String(stream_table.name().to_string())), ), + BlockEntry::new(DataType::String, Value::Scalar(Scalar::String(create_sql))), ], 1, ); @@ -277,12 +274,9 @@ impl ShowCreateTableInterpreter { vec![ BlockEntry::new( DataType::String, - Value::Scalar(Scalar::String(name.as_bytes().to_vec())), - ), - BlockEntry::new( - DataType::String, - Value::Scalar(Scalar::String(ddl.into_bytes())), + Value::Scalar(Scalar::String(name.to_string())), ), + BlockEntry::new(DataType::String, Value::Scalar(Scalar::String(ddl))), ], 1, ); diff --git a/src/query/service/src/interpreters/interpreter_table_vacuum.rs b/src/query/service/src/interpreters/interpreter_table_vacuum.rs index 243739fb08b6..b409586dd1f8 100644 --- a/src/query/service/src/interpreters/interpreter_table_vacuum.rs +++ b/src/query/service/src/interpreters/interpreter_table_vacuum.rs @@ -86,16 +86,9 @@ impl Interpreter for VacuumTableInterpreter { match purge_files_opt { None => return Ok(PipelineBuildResult::create()), - Some(purge_files) => { - let mut files: Vec> = Vec::with_capacity(purge_files.len()); - for file in purge_files.into_iter() { - files.push(file.as_bytes().to_vec()); - } - - PipelineBuildResult::from_blocks(vec![DataBlock::new_from_columns(vec![ - StringType::from_data(files), - ])]) - } + Some(purge_files) => PipelineBuildResult::from_blocks(vec![ + DataBlock::new_from_columns(vec![StringType::from_data(purge_files)]), + ]), } } } diff --git a/src/query/service/src/interpreters/interpreter_task_alter.rs b/src/query/service/src/interpreters/interpreter_task_alter.rs index ccdaf4be222f..8104c9661230 100644 --- a/src/query/service/src/interpreters/interpreter_task_alter.rs +++ b/src/query/service/src/interpreters/interpreter_task_alter.rs @@ -68,6 +68,8 @@ impl AlterTaskInterpreter { when_condition: None, add_after: vec![], remove_after: vec![], + set_session_parameters: false, + session_parameters: Default::default(), }; match plan.alter_options { AlterTaskOptions::Resume => { @@ -81,6 +83,7 @@ impl AlterTaskInterpreter { comments, warehouse, suspend_task_after_num_failures, + session_parameters, } => { req.alter_task_type = AlterTaskType::Set as i32; req.schedule_options = schedule.map(make_schedule_options); @@ -91,6 +94,10 @@ impl AlterTaskInterpreter { }); req.suspend_task_after_num_failures = suspend_task_after_num_failures.map(|i| i as i32); + if let Some(session_parameters) = session_parameters { + req.set_session_parameters = true; + req.session_parameters = session_parameters; + } } AlterTaskOptions::Unset { .. } => { todo!() diff --git a/src/query/service/src/interpreters/interpreter_task_create.rs b/src/query/service/src/interpreters/interpreter_task_create.rs index 93ec5a276430..603274378485 100644 --- a/src/query/service/src/interpreters/interpreter_task_create.rs +++ b/src/query/service/src/interpreters/interpreter_task_create.rs @@ -63,6 +63,7 @@ impl CreateTaskInterpreter { if_not_exist: plan.if_not_exists, after: plan.after, when_condition: plan.when_condition, + session_parameters: plan.session_parameters, } } } diff --git a/src/query/service/src/interpreters/interpreter_update.rs b/src/query/service/src/interpreters/interpreter_update.rs index a8ef7f8f1682..11cdacd158f2 100644 --- a/src/query/service/src/interpreters/interpreter_update.rs +++ b/src/query/service/src/interpreters/interpreter_update.rs @@ -108,8 +108,7 @@ impl Interpreter for UpdateInterpreter { let mut build_res = PipelineBuildResult::create(); if let Some(physical_plan) = physical_plan { build_res = - build_query_pipeline_without_render_result_set(&self.ctx, &physical_plan, false) - .await?; + build_query_pipeline_without_render_result_set(&self.ctx, &physical_plan).await?; { let hook_operator = HookOperator::create( self.ctx.clone(), diff --git a/src/query/service/src/interpreters/interpreter_vacuum_drop_tables.rs b/src/query/service/src/interpreters/interpreter_vacuum_drop_tables.rs index 91e192f1945c..526a0fad20db 100644 --- a/src/query/service/src/interpreters/interpreter_vacuum_drop_tables.rs +++ b/src/query/service/src/interpreters/interpreter_vacuum_drop_tables.rs @@ -138,12 +138,12 @@ impl Interpreter for VacuumDropTablesInterpreter { if let Some(limit) = self.plan.option.limit { len = min(len, limit); } - let mut tables: Vec> = Vec::with_capacity(len); - let mut files: Vec> = Vec::with_capacity(len); + let mut tables: Vec = Vec::with_capacity(len); + let mut files: Vec = Vec::with_capacity(len); let purge_files = &purge_files[0..len]; for file in purge_files.iter() { - tables.push(file.0.to_string().as_bytes().to_vec()); - files.push(file.1.to_string().as_bytes().to_vec()); + tables.push(file.0.to_string()); + files.push(file.1.to_string()); } PipelineBuildResult::from_blocks(vec![DataBlock::new_from_columns(vec![ diff --git a/src/query/service/src/lib.rs b/src/query/service/src/lib.rs index b9152a509b7b..64fdf4828f08 100644 --- a/src/query/service/src/lib.rs +++ b/src/query/service/src/lib.rs @@ -35,6 +35,7 @@ #![feature(let_chains)] #![feature(try_blocks)] #![feature(lazy_cell)] +#![feature(variant_count)] #![allow(clippy::diverging_sub_expression)] #![allow(clippy::arc_with_non_send_sync)] diff --git a/src/query/service/src/local/executor.rs b/src/query/service/src/local/executor.rs index fa3d986bc0d1..cc38ac6e5375 100644 --- a/src/query/service/src/local/executor.rs +++ b/src/query/service/src/local/executor.rs @@ -117,7 +117,7 @@ impl SessionExecutor { .convert_to_full_column(&DataType::String, num_rows); let value = StringType::try_downcast_column(&col).unwrap(); for r in value.iter() { - keywords.push(unsafe { String::from_utf8_unchecked(r.to_vec()) }); + keywords.push(r.to_string()); } } } diff --git a/src/query/service/src/pipelines/builders/builder_aggregate.rs b/src/query/service/src/pipelines/builders/builder_aggregate.rs index d67fc5fff89e..d79e905ea637 100644 --- a/src/query/service/src/pipelines/builders/builder_aggregate.rs +++ b/src/query/service/src/pipelines/builders/builder_aggregate.rs @@ -25,9 +25,6 @@ use databend_common_expression::HashMethodKind; use databend_common_functions::aggregates::AggregateFunctionFactory; use databend_common_pipeline_core::processors::ProcessorPtr; use databend_common_pipeline_core::query_spill_prefix; -use databend_common_pipeline_transforms::processors::ProcessorProfileWrapper; -use databend_common_pipeline_transforms::processors::ProfileStub; -use databend_common_pipeline_transforms::processors::Transformer; use databend_common_sql::executor::physical_plans::AggregateExpand; use databend_common_sql::executor::physical_plans::AggregateFinal; use databend_common_sql::executor::physical_plans::AggregateFunctionDesc; @@ -110,17 +107,9 @@ impl PipelineBuilder { if params.group_columns.is_empty() { return self.main_pipeline.add_transform(|input, output| { - let transform = PartialSingleStateAggregator::try_create(input, output, ¶ms)?; - - if self.enable_profiling { - Ok(ProcessorPtr::create(ProcessorProfileWrapper::create( - transform, - aggregate.plan_id, - self.proc_profs.clone(), - ))) - } else { - Ok(ProcessorPtr::create(transform)) - } + Ok(ProcessorPtr::create( + PartialSingleStateAggregator::try_create(input, output, ¶ms)?, + )) }); } @@ -132,76 +121,60 @@ impl PipelineBuilder { let method = DataBlock::choose_hash_method(&sample_block, group_cols, efficiently_memory)?; self.main_pipeline.add_transform(|input, output| { - let transform = match params.aggregate_functions.is_empty() { - true => with_mappedhash_method!(|T| match method.clone() { - HashMethodKind::T(method) => TransformPartialGroupBy::try_create( - self.ctx.clone(), - method, - input, - output, - params.clone() - ), - }), - false => with_mappedhash_method!(|T| match method.clone() { - HashMethodKind::T(method) => TransformPartialAggregate::try_create( - self.ctx.clone(), - method, - input, - output, - params.clone() - ), - }), - }?; - - if self.enable_profiling { - Ok(ProcessorPtr::create(ProcessorProfileWrapper::create( - transform, - aggregate.plan_id, - self.proc_profs.clone(), - ))) - } else { - Ok(ProcessorPtr::create(transform)) - } - })?; - - // If cluster mode, spill write will be completed in exchange serialize, because we need scatter the block data first - if self.ctx.get_cluster().is_empty() { - let operator = DataOperator::instance().operator(); - let location_prefix = query_spill_prefix(&self.ctx.get_tenant()); - self.main_pipeline.add_transform(|input, output| { - let transform = match params.aggregate_functions.is_empty() { + Ok(ProcessorPtr::create( + match params.aggregate_functions.is_empty() { true => with_mappedhash_method!(|T| match method.clone() { - HashMethodKind::T(method) => TransformGroupBySpillWriter::create( + HashMethodKind::T(method) => TransformPartialGroupBy::try_create( self.ctx.clone(), + method, input, output, - method, - operator.clone(), - location_prefix.clone() + params.clone() ), }), false => with_mappedhash_method!(|T| match method.clone() { - HashMethodKind::T(method) => TransformAggregateSpillWriter::create( + HashMethodKind::T(method) => TransformPartialAggregate::try_create( self.ctx.clone(), + method, input, output, - method, - operator.clone(), - params.clone(), - location_prefix.clone() + params.clone() ), }), - }; + }?, + )) + })?; - if self.enable_profiling { - Ok(ProcessorPtr::create(ProcessorProfileWrapper::create( - transform, - aggregate.plan_id, - self.proc_profs.clone(), - ))) - } else { - Ok(ProcessorPtr::create(transform)) - } + // If cluster mode, spill write will be completed in exchange serialize, because we need scatter the block data first + if self.ctx.get_cluster().is_empty() { + let operator = DataOperator::instance().operator(); + let location_prefix = query_spill_prefix(&self.ctx.get_tenant()); + self.main_pipeline.add_transform(|input, output| { + Ok(ProcessorPtr::create( + match params.aggregate_functions.is_empty() { + true => with_mappedhash_method!(|T| match method.clone() { + HashMethodKind::T(method) => TransformGroupBySpillWriter::create( + self.ctx.clone(), + input, + output, + method, + operator.clone(), + location_prefix.clone() + ), + }), + false => with_mappedhash_method!(|T| match method.clone() { + HashMethodKind::T(method) => TransformAggregateSpillWriter::create( + self.ctx.clone(), + input, + output, + method, + operator.clone(), + params.clone(), + location_prefix.clone() + ), + }), + }, + )) })?; } @@ -231,32 +204,11 @@ impl PipelineBuilder { self.build_pipeline(&aggregate.input)?; self.main_pipeline.try_resize(1)?; self.main_pipeline.add_transform(|input, output| { - let transform = FinalSingleStateAggregator::try_create(input, output, ¶ms)?; - - if self.enable_profiling { - Ok(ProcessorPtr::create(ProcessorProfileWrapper::create( - transform, - aggregate.plan_id, - self.proc_profs.clone(), - ))) - } else { - Ok(ProcessorPtr::create(transform)) - } + Ok(ProcessorPtr::create( + FinalSingleStateAggregator::try_create(input, output, ¶ms)?, + )) })?; - // Append a profile stub to record the output rows and bytes - if self.enable_profiling { - self.main_pipeline.add_transform(|input, output| { - Ok(ProcessorPtr::create(Transformer::create( - input, - output, - ProfileStub::new(aggregate.plan_id, self.proc_profs.clone()) - .accumulate_output_rows() - .accumulate_output_bytes(), - ))) - })?; - } - return Ok(()); } @@ -283,14 +235,7 @@ impl PipelineBuilder { self.build_pipeline(&aggregate.input)?; self.exchange_injector = old_inject; - build_partition_bucket::<_, ()>( - v, - &mut self.main_pipeline, - params.clone(), - self.enable_profiling, - aggregate.plan_id, - self.proc_profs.clone(), - ) + build_partition_bucket::<_, ()>(v, &mut self.main_pipeline, params.clone()) } }), false => with_hash_method!(|T| match method { @@ -305,14 +250,7 @@ impl PipelineBuilder { } self.build_pipeline(&aggregate.input)?; self.exchange_injector = old_inject; - build_partition_bucket::<_, usize>( - v, - &mut self.main_pipeline, - params.clone(), - self.enable_profiling, - aggregate.plan_id, - self.proc_profs.clone(), - ) + build_partition_bucket::<_, usize>(v, &mut self.main_pipeline, params.clone()) } }), } diff --git a/src/query/service/src/pipelines/builders/builder_exchange.rs b/src/query/service/src/pipelines/builders/builder_exchange.rs index 66d13b358d6a..25b93c469383 100644 --- a/src/query/service/src/pipelines/builders/builder_exchange.rs +++ b/src/query/service/src/pipelines/builders/builder_exchange.rs @@ -24,7 +24,6 @@ impl PipelineBuilder { let build_res = exchange_manager.get_fragment_source( &exchange_source.query_id, exchange_source.source_fragment_id, - self.enable_profiling, self.exchange_injector.clone(), )?; // add sharing data diff --git a/src/query/service/src/pipelines/builders/builder_filter.rs b/src/query/service/src/pipelines/builders/builder_filter.rs index 3768544d3c1b..e70fc1deaa39 100644 --- a/src/query/service/src/pipelines/builders/builder_filter.rs +++ b/src/query/service/src/pipelines/builders/builder_filter.rs @@ -19,7 +19,6 @@ use databend_common_expression::type_check::check_function; use databend_common_expression::types::DataType; use databend_common_functions::BUILTIN_FUNCTIONS; use databend_common_pipeline_core::processors::ProcessorPtr; -use databend_common_pipeline_transforms::processors::ProcessorProfileWrapper; use databend_common_sql::executor::physical_plans::Filter; use crate::pipelines::processors::transforms::TransformFilter; @@ -47,7 +46,7 @@ impl PipelineBuilder { let max_block_size = self.settings.get_max_block_size()? as usize; let (select_expr, has_or) = build_select_expr(&predicate).into(); self.main_pipeline.add_transform(|input, output| { - let transform = TransformFilter::create( + Ok(ProcessorPtr::create(TransformFilter::create( input, output, select_expr.clone(), @@ -55,17 +54,7 @@ impl PipelineBuilder { filter.projections.clone(), self.func_ctx.clone(), max_block_size, - ); - - if self.enable_profiling { - Ok(ProcessorPtr::create(ProcessorProfileWrapper::create( - transform, - filter.plan_id, - self.proc_profs.clone(), - ))) - } else { - Ok(ProcessorPtr::create(transform)) - } + ))) })?; Ok(()) diff --git a/src/query/service/src/pipelines/builders/builder_join.rs b/src/query/service/src/pipelines/builders/builder_join.rs index 3db9373560dc..77ce87887b63 100644 --- a/src/query/service/src/pipelines/builders/builder_join.rs +++ b/src/query/service/src/pipelines/builders/builder_join.rs @@ -18,9 +18,6 @@ use databend_common_base::base::tokio::sync::Barrier; use databend_common_exception::Result; use databend_common_pipeline_core::processors::ProcessorPtr; use databend_common_pipeline_sinks::Sinker; -use databend_common_pipeline_transforms::processors::ProcessorProfileWrapper; -use databend_common_pipeline_transforms::processors::ProfileStub; -use databend_common_pipeline_transforms::processors::Transformer; use databend_common_sql::executor::physical_plans::HashJoin; use databend_common_sql::executor::physical_plans::MaterializedCte; use databend_common_sql::executor::physical_plans::RangeJoin; @@ -50,17 +47,6 @@ impl PipelineBuilder { let state = Arc::new(RangeJoinState::new(self.ctx.clone(), range_join)); self.expand_right_side_pipeline(range_join, state.clone())?; self.build_left_side(range_join, state)?; - if self.enable_profiling { - self.main_pipeline.add_transform(|input, output| { - Ok(ProcessorPtr::create(Transformer::create( - input, - output, - ProfileStub::new(range_join.plan_id, self.proc_profs.clone()) - .accumulate_output_rows() - .accumulate_output_bytes(), - ))) - })?; - } Ok(()) } @@ -73,16 +59,11 @@ impl PipelineBuilder { let max_threads = self.settings.get_max_threads()? as usize; self.main_pipeline.try_resize(max_threads)?; self.main_pipeline.add_transform(|input, output| { - let transform = TransformRangeJoinLeft::create(input, output, state.clone()); - if self.enable_profiling { - Ok(ProcessorPtr::create(ProcessorProfileWrapper::create( - transform, - range_join.plan_id, - self.proc_profs.clone(), - ))) - } else { - Ok(ProcessorPtr::create(transform)) - } + Ok(ProcessorPtr::create(TransformRangeJoinLeft::create( + input, + output, + state.clone(), + ))) })?; Ok(()) } @@ -97,26 +78,17 @@ impl PipelineBuilder { self.func_ctx.clone(), self.settings.clone(), right_side_context, - self.enable_profiling, - self.proc_profs.clone(), self.main_pipeline.get_scopes(), ); right_side_builder.cte_state = self.cte_state.clone(); let mut right_res = right_side_builder.finalize(&range_join.right)?; right_res.main_pipeline.add_sink(|input| { - let transform = Sinker::::create( - input, - TransformRangeJoinRight::create(state.clone()), - ); - if self.enable_profiling { - Ok(ProcessorPtr::create(ProcessorProfileWrapper::create( - transform, - range_join.plan_id, - self.proc_profs.clone(), - ))) - } else { - Ok(ProcessorPtr::create(transform)) - } + Ok(ProcessorPtr::create( + Sinker::::create( + input, + TransformRangeJoinRight::create(state.clone()), + ), + )) })?; self.pipelines.push(right_res.main_pipeline.finalize()); self.pipelines.extend(right_res.sources_pipelines); @@ -124,14 +96,12 @@ impl PipelineBuilder { } pub(crate) fn build_join(&mut self, join: &HashJoin) -> Result<()> { - let id = join.probe.get_table_index(); // for merge into target table as build side. let (merge_into_build_table_index, merge_into_is_distributed) = self.merge_into_get_optimization_flag(join); let state = self.build_join_state( join, - id, merge_into_build_table_index, merge_into_is_distributed, )?; @@ -142,7 +112,6 @@ impl PipelineBuilder { fn build_join_state( &mut self, join: &HashJoin, - id: IndexType, merge_into_target_table_index: IndexType, merge_into_is_distributed: bool, ) -> Result> { @@ -152,7 +121,6 @@ impl PipelineBuilder { &join.build_projections, HashJoinDesc::create(join)?, &join.probe_to_build, - id, merge_into_target_table_index, merge_into_is_distributed, ) @@ -169,8 +137,6 @@ impl PipelineBuilder { self.func_ctx.clone(), self.settings.clone(), build_side_context, - self.enable_profiling, - self.proc_profs.clone(), self.main_pipeline.get_scopes(), ); build_side_builder.cte_state = self.cte_state.clone(); @@ -201,18 +167,12 @@ impl PipelineBuilder { } else { None }; - let transform = - TransformHashJoinBuild::try_create(input, build_state.clone(), spill_state)?; - if self.enable_profiling { - Ok(ProcessorPtr::create(ProcessorProfileWrapper::create( - transform, - hash_join_plan.plan_id, - self.proc_profs.clone(), - ))) - } else { - Ok(ProcessorPtr::create(transform)) - } + Ok(ProcessorPtr::create(TransformHashJoinBuild::try_create( + input, + build_state.clone(), + spill_state, + )?)) }; // for distributed merge into when source as build side. if hash_join_plan.need_hold_hash_table { @@ -257,7 +217,8 @@ impl PipelineBuilder { } else { None }; - let transform = TransformHashJoinProbe::create( + + Ok(ProcessorPtr::create(TransformHashJoinProbe::create( input, output, join.projections.clone(), @@ -268,32 +229,9 @@ impl PipelineBuilder { &join.join_type, !join.non_equi_conditions.is_empty(), has_string_column, - )?; - - if self.enable_profiling { - Ok(ProcessorPtr::create(ProcessorProfileWrapper::create( - transform, - join.plan_id, - self.proc_profs.clone(), - ))) - } else { - Ok(ProcessorPtr::create(transform)) - } + )?)) })?; - if self.enable_profiling { - // Add a stub after the probe processor to accumulate the output rows. - self.main_pipeline.add_transform(|input, output| { - Ok(ProcessorPtr::create(Transformer::create( - input, - output, - ProfileStub::new(join.plan_id, self.proc_profs.clone()) - .accumulate_output_rows() - .accumulate_output_bytes(), - ))) - })?; - } - if join.need_hold_hash_table { let mut projected_probe_fields = vec![]; for (i, field) in probe_state.probe_schema.fields().iter().enumerate() { @@ -332,8 +270,6 @@ impl PipelineBuilder { self.func_ctx.clone(), self.settings.clone(), left_side_ctx, - self.enable_profiling, - self.proc_profs.clone(), self.main_pipeline.get_scopes(), ); left_side_builder.cte_state = self.cte_state.clone(); diff --git a/src/query/service/src/pipelines/builders/builder_limit.rs b/src/query/service/src/pipelines/builders/builder_limit.rs index 7b150531fadc..e509274cc82c 100644 --- a/src/query/service/src/pipelines/builders/builder_limit.rs +++ b/src/query/service/src/pipelines/builders/builder_limit.rs @@ -14,7 +14,6 @@ use databend_common_exception::Result; use databend_common_pipeline_core::processors::ProcessorPtr; -use databend_common_pipeline_transforms::processors::ProcessorProfileWrapper; use databend_common_sql::executor::physical_plans::Limit; use crate::pipelines::processors::TransformLimit; @@ -27,18 +26,12 @@ impl PipelineBuilder { if limit.limit.is_some() || limit.offset != 0 { self.main_pipeline.try_resize(1)?; return self.main_pipeline.add_transform(|input, output| { - let transform = - TransformLimit::try_create(limit.limit, limit.offset, input, output)?; - - if self.enable_profiling { - Ok(ProcessorPtr::create(ProcessorProfileWrapper::create( - transform, - limit.plan_id, - self.proc_profs.clone(), - ))) - } else { - Ok(ProcessorPtr::create(transform)) - } + Ok(ProcessorPtr::create(TransformLimit::try_create( + limit.limit, + limit.offset, + input, + output, + )?)) }); } Ok(()) diff --git a/src/query/service/src/pipelines/builders/builder_project.rs b/src/query/service/src/pipelines/builders/builder_project.rs index b5baacc8569b..ef8433182d1d 100644 --- a/src/query/service/src/pipelines/builders/builder_project.rs +++ b/src/query/service/src/pipelines/builders/builder_project.rs @@ -19,7 +19,6 @@ use databend_common_functions::BUILTIN_FUNCTIONS; use databend_common_pipeline_core::processors::ProcessorPtr; use databend_common_pipeline_core::Pipeline; use databend_common_pipeline_sinks::EmptySink; -use databend_common_pipeline_transforms::processors::ProcessorProfileWrapper; use databend_common_sql::evaluator::BlockOperator; use databend_common_sql::evaluator::CompoundBlockOperator; use databend_common_sql::executor::physical_plans::Project; @@ -90,24 +89,14 @@ impl PipelineBuilder { let max_block_size = self.settings.get_max_block_size()? as usize; self.main_pipeline.add_transform(|input, output| { - let transform = TransformSRF::try_create( + Ok(ProcessorPtr::create(TransformSRF::try_create( input, output, self.func_ctx.clone(), project_set.projections.clone(), srf_exprs.clone(), max_block_size, - ); - - if self.enable_profiling { - Ok(ProcessorPtr::create(ProcessorProfileWrapper::create( - transform, - project_set.plan_id, - self.proc_profs.clone(), - ))) - } else { - Ok(ProcessorPtr::create(transform)) - } + ))) }) } } diff --git a/src/query/service/src/pipelines/builders/builder_scalar.rs b/src/query/service/src/pipelines/builders/builder_scalar.rs index a7bd3113a04b..7cbe9b62002d 100644 --- a/src/query/service/src/pipelines/builders/builder_scalar.rs +++ b/src/query/service/src/pipelines/builders/builder_scalar.rs @@ -15,7 +15,6 @@ use databend_common_exception::Result; use databend_common_functions::BUILTIN_FUNCTIONS; use databend_common_pipeline_core::processors::ProcessorPtr; -use databend_common_pipeline_transforms::processors::TransformProfileWrapper; use databend_common_pipeline_transforms::processors::Transformer; use databend_common_sql::evaluator::BlockOperator; use databend_common_sql::evaluator::CompoundBlockOperator; @@ -46,25 +45,15 @@ impl PipelineBuilder { let num_input_columns = input_schema.num_fields(); self.main_pipeline.add_transform(|input, output| { - let transform = CompoundBlockOperator::new( - vec![op.clone()], - self.func_ctx.clone(), - num_input_columns, - ); - - if self.enable_profiling { - Ok(ProcessorPtr::create(TransformProfileWrapper::create( - transform, - input, - output, - eval_scalar.plan_id, - self.proc_profs.clone(), - ))) - } else { - Ok(ProcessorPtr::create(Transformer::create( - input, output, transform, - ))) - } + Ok(ProcessorPtr::create(Transformer::create( + input, + output, + CompoundBlockOperator::new( + vec![op.clone()], + self.func_ctx.clone(), + num_input_columns, + ), + ))) })?; Ok(()) diff --git a/src/query/service/src/pipelines/builders/builder_scan.rs b/src/query/service/src/pipelines/builders/builder_scan.rs index 3c4d87a93c2c..c1432a2ded6c 100644 --- a/src/query/service/src/pipelines/builders/builder_scan.rs +++ b/src/query/service/src/pipelines/builders/builder_scan.rs @@ -12,17 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; -use std::sync::Mutex; -use std::time::Instant; - use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; use databend_common_expression::DataBlock; use databend_common_pipeline_core::processors::ProcessorPtr; use databend_common_pipeline_sources::OneBlockSource; -use databend_common_pipeline_transforms::processors::ProfileStub; -use databend_common_pipeline_transforms::processors::Transformer; use databend_common_sql::evaluator::BlockOperator; use databend_common_sql::evaluator::CompoundBlockOperator; use databend_common_sql::executor::physical_plans::ConstantTableScan; @@ -44,31 +38,6 @@ impl PipelineBuilder { true, )?; - if self.enable_profiling { - self.main_pipeline.add_transform(|input, output| { - // shared timer between `on_start` and `on_finish` - let start_timer = Arc::new(Mutex::new(Instant::now())); - let finish_timer = Arc::new(Mutex::new(Instant::now())); - Ok(ProcessorPtr::create(Transformer::create( - input, - output, - ProfileStub::new(scan.plan_id, self.proc_profs.clone()) - .on_start(move |v| { - *start_timer.lock().unwrap() = Instant::now(); - *v - }) - .on_finish(move |prof| { - let elapsed = finish_timer.lock().unwrap().elapsed(); - let mut prof = *prof; - prof.wait_time = elapsed; - prof - }) - .accumulate_output_bytes() - .accumulate_output_rows(), - ))) - })?; - } - // Fill internal columns if needed. if let Some(internal_columns) = &scan.internal_column { self.main_pipeline.add_transform(|input, output| { diff --git a/src/query/service/src/pipelines/builders/builder_sort.rs b/src/query/service/src/pipelines/builders/builder_sort.rs index 68edb97bf100..9990f1f4ca99 100644 --- a/src/query/service/src/pipelines/builders/builder_sort.rs +++ b/src/query/service/src/pipelines/builders/builder_sort.rs @@ -22,10 +22,8 @@ use databend_common_pipeline_core::query_spill_prefix; use databend_common_pipeline_core::Pipeline; use databend_common_pipeline_transforms::processors::sort::utils::add_order_field; use databend_common_pipeline_transforms::processors::try_add_multi_sort_merge; -use databend_common_pipeline_transforms::processors::ProcessorProfileWrapper; use databend_common_pipeline_transforms::processors::TransformSortMergeBuilder; use databend_common_pipeline_transforms::processors::TransformSortPartial; -use databend_common_profile::SharedProcessorProfiles; use databend_common_sql::evaluator::BlockOperator; use databend_common_sql::evaluator::CompoundBlockOperator; use databend_common_sql::executor::physical_plans::Sort; @@ -89,20 +87,13 @@ impl PipelineBuilder { }) .collect::>>()?; - self.build_sort_pipeline( - plan_schema, - sort_desc, - sort.plan_id, - sort.limit, - sort.after_exchange, - ) + self.build_sort_pipeline(plan_schema, sort_desc, sort.limit, sort.after_exchange) } pub(crate) fn build_sort_pipeline( &mut self, plan_schema: DataSchemaRef, sort_desc: Vec, - plan_id: u32, limit: Option, after_exchange: Option, ) -> Result<()> { @@ -114,18 +105,12 @@ impl PipelineBuilder { if self.main_pipeline.output_len() == 1 || max_threads == 1 { self.main_pipeline.try_resize(max_threads)?; } - let prof_info = if self.enable_profiling { - Some((plan_id, self.proc_profs.clone())) - } else { - None - }; let mut builder = SortPipelineBuilder::create(self.ctx.clone(), plan_schema.clone(), sort_desc.clone()) .with_partial_block_size(block_size) .with_final_block_size(block_size) - .with_limit(limit) - .with_prof_info(prof_info.clone()); + .with_limit(limit); match after_exchange { Some(true) => { @@ -140,7 +125,6 @@ impl PipelineBuilder { block_size, limit, sort_desc, - prof_info, true, ) } else { @@ -171,7 +155,6 @@ pub struct SortPipelineBuilder { limit: Option, partial_block_size: usize, final_block_size: usize, - prof_info: Option<(u32, SharedProcessorProfiles)>, remove_order_col_at_last: bool, } @@ -188,7 +171,6 @@ impl SortPipelineBuilder { limit: None, partial_block_size: 0, final_block_size: 0, - prof_info: None, remove_order_col_at_last: false, } } @@ -208,11 +190,6 @@ impl SortPipelineBuilder { self } - pub fn with_prof_info(mut self, prof_info: Option<(u32, SharedProcessorProfiles)>) -> Self { - self.prof_info = prof_info; - self - } - pub fn remove_order_col_at_last(mut self) -> Self { self.remove_order_col_at_last = true; self @@ -221,21 +198,12 @@ impl SortPipelineBuilder { pub fn build_full_sort_pipeline(self, pipeline: &mut Pipeline) -> Result<()> { // Partial sort pipeline.add_transform(|input, output| { - let transform = TransformSortPartial::try_create( + Ok(ProcessorPtr::create(TransformSortPartial::try_create( input, output, self.limit, self.sort_desc.clone(), - )?; - if let Some((plan_id, prof)) = &self.prof_info { - Ok(ProcessorPtr::create(ProcessorProfileWrapper::create( - transform, - *plan_id, - prof.clone(), - ))) - } else { - Ok(ProcessorPtr::create(transform)) - } + )?)) })?; self.build_merge_sort_pipeline(pipeline, false) @@ -310,16 +278,7 @@ impl SortPipelineBuilder { .with_max_memory_usage(max_memory_usage) .with_spilling_bytes_threshold_per_core(bytes_limit_per_proc); - let transform = builder.build()?; - if let Some((plan_id, prof)) = &self.prof_info { - Ok(ProcessorPtr::create(ProcessorProfileWrapper::create( - transform, - *plan_id, - prof.clone(), - ))) - } else { - Ok(ProcessorPtr::create(transform)) - } + Ok(ProcessorPtr::create(builder.build()?)) })?; if may_spill { @@ -329,7 +288,7 @@ impl SortPipelineBuilder { let op = DataOperator::instance().operator(); let spiller = Spiller::create(self.ctx.clone(), op, config.clone(), SpillerType::OrderBy); - let transform = create_transform_sort_spill( + Ok(ProcessorPtr::create(create_transform_sort_spill( input, output, schema.clone(), @@ -337,16 +296,7 @@ impl SortPipelineBuilder { self.limit, spiller, output_order_col, - ); - if let Some((plan_id, prof)) = &self.prof_info { - Ok(ProcessorPtr::create(ProcessorProfileWrapper::create( - transform, - *plan_id, - prof.clone(), - ))) - } else { - Ok(ProcessorPtr::create(transform)) - } + ))) })?; } @@ -358,7 +308,6 @@ impl SortPipelineBuilder { self.final_block_size, self.limit, self.sort_desc, - self.prof_info.clone(), self.remove_order_col_at_last, )?; } diff --git a/src/query/service/src/pipelines/builders/builder_udf.rs b/src/query/service/src/pipelines/builders/builder_udf.rs index e82e17799537..9511ca35b687 100644 --- a/src/query/service/src/pipelines/builders/builder_udf.rs +++ b/src/query/service/src/pipelines/builders/builder_udf.rs @@ -14,7 +14,6 @@ use databend_common_exception::Result; use databend_common_pipeline_core::processors::ProcessorPtr; -use databend_common_pipeline_transforms::processors::ProcessorProfileWrapper; use databend_common_sql::executor::physical_plans::Udf; use crate::pipelines::processors::transforms::TransformUdf; @@ -25,21 +24,12 @@ impl PipelineBuilder { self.build_pipeline(&udf.input)?; self.main_pipeline.add_transform(|input, output| { - let transform = TransformUdf::try_create( + Ok(ProcessorPtr::create(TransformUdf::try_create( self.func_ctx.clone(), udf.udf_funcs.clone(), input, output, - )?; - if self.enable_profiling { - Ok(ProcessorPtr::create(ProcessorProfileWrapper::create( - transform, - udf.plan_id, - self.proc_profs.clone(), - ))) - } else { - Ok(ProcessorPtr::create(transform)) - } + )?)) }) } } diff --git a/src/query/service/src/pipelines/builders/builder_union_all.rs b/src/query/service/src/pipelines/builders/builder_union_all.rs index 7b21330757a3..7ed40bc87e80 100644 --- a/src/query/service/src/pipelines/builders/builder_union_all.rs +++ b/src/query/service/src/pipelines/builders/builder_union_all.rs @@ -17,7 +17,6 @@ use databend_common_exception::Result; use databend_common_expression::DataBlock; use databend_common_pipeline_core::processors::ProcessorPtr; use databend_common_pipeline_sinks::UnionReceiveSink; -use databend_common_pipeline_transforms::processors::ProcessorProfileWrapper; use databend_common_sql::executor::physical_plans::UnionAll; use databend_common_sql::executor::PhysicalPlan; @@ -28,43 +27,27 @@ use crate::sessions::QueryContext; impl PipelineBuilder { pub fn build_union_all(&mut self, union_all: &UnionAll) -> Result<()> { self.build_pipeline(&union_all.left)?; - let union_all_receiver = self.expand_union_all(&union_all.right, union_all)?; + let union_all_receiver = self.expand_union_all(&union_all.right)?; self.main_pipeline .add_transform(|transform_input_port, transform_output_port| { - let transform = TransformMergeBlock::try_create( + Ok(ProcessorPtr::create(TransformMergeBlock::try_create( transform_input_port, transform_output_port, union_all.left.output_schema()?, union_all.right.output_schema()?, union_all.pairs.clone(), union_all_receiver.clone(), - )?; - - if self.enable_profiling { - Ok(ProcessorPtr::create(ProcessorProfileWrapper::create( - transform, - union_all.plan_id, - self.proc_profs.clone(), - ))) - } else { - Ok(ProcessorPtr::create(transform)) - } + )?)) })?; Ok(()) } - fn expand_union_all( - &mut self, - input: &PhysicalPlan, - union_plan: &UnionAll, - ) -> Result> { + fn expand_union_all(&mut self, input: &PhysicalPlan) -> Result> { let union_ctx = QueryContext::create_from(self.ctx.clone()); let mut pipeline_builder = PipelineBuilder::create( self.func_ctx.clone(), self.settings.clone(), union_ctx, - self.enable_profiling, - self.proc_profs.clone(), self.main_pipeline.get_scopes(), ); pipeline_builder.cte_state = self.cte_state.clone(); @@ -76,18 +59,11 @@ impl PipelineBuilder { let (tx, rx) = async_channel::unbounded(); build_res.main_pipeline.add_sink(|input_port| { - let transform = - UnionReceiveSink::create(Some(tx.clone()), input_port, self.ctx.clone()); - - if self.enable_profiling { - Ok(ProcessorPtr::create(ProcessorProfileWrapper::create( - transform, - union_plan.plan_id, - self.proc_profs.clone(), - ))) - } else { - Ok(ProcessorPtr::create(transform)) - } + Ok(ProcessorPtr::create(UnionReceiveSink::create( + Some(tx.clone()), + input_port, + self.ctx.clone(), + ))) })?; self.pipelines.push(build_res.main_pipeline.finalize()); diff --git a/src/query/service/src/pipelines/builders/builder_window.rs b/src/query/service/src/pipelines/builders/builder_window.rs index 82af7ab49986..41043661a364 100644 --- a/src/query/service/src/pipelines/builders/builder_window.rs +++ b/src/query/service/src/pipelines/builders/builder_window.rs @@ -70,7 +70,7 @@ impl PipelineBuilder { sort_desc.extend(order_by.clone()); - self.build_sort_pipeline(input_schema.clone(), sort_desc, window.plan_id, None, None)?; + self.build_sort_pipeline(input_schema.clone(), sort_desc, window.limit, None)?; } // `TransformWindow` is a pipeline breaker. self.main_pipeline.try_resize(1)?; diff --git a/src/query/service/src/pipelines/executor/executor_graph.rs b/src/query/service/src/pipelines/executor/executor_graph.rs index 134fb505cc1e..e2d416fba33e 100644 --- a/src/query/service/src/pipelines/executor/executor_graph.rs +++ b/src/query/service/src/pipelines/executor/executor_graph.rs @@ -23,8 +23,8 @@ use databend_common_base::runtime::TrackedFuture; use databend_common_base::runtime::TrySpawn; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_pipeline_core::processors::profile::Profile; use databend_common_pipeline_core::processors::EventCause; +use databend_common_pipeline_core::processors::Profile; use databend_common_pipeline_core::Pipeline; use databend_common_pipeline_core::PlanScope; use log::debug; @@ -207,6 +207,12 @@ impl ExecutingGraph { let output_trigger = graph[source_node].create_trigger(edge_index); graph[source_node].outputs_port[source_port].set_trigger(output_trigger); + if graph[source_node].profile.plan_id.is_some() + && graph[source_node].profile.plan_id != graph[target_node].profile.plan_id + { + graph[source_node].outputs_port[source_port].record_profile(); + } + connect( &graph[target_node].inputs_port[target_port], &graph[source_node].outputs_port[source_port], @@ -276,6 +282,8 @@ impl ExecutingGraph { if let Some(schedule_index) = need_schedule_nodes.pop_front() { let node = &locker.graph[schedule_index]; + Profile::track_profile(&node.profile); + if state_guard_cache.is_none() { state_guard_cache = Some(node.state.lock().unwrap()); } @@ -386,6 +394,7 @@ impl ScheduleQueue { unsafe { workers_condvar.inc_active_async_worker(); let weak_executor = Arc::downgrade(executor); + let node_profile = executor.graph.get_node_profile(proc.id()).clone(); let process_future = proc.async_process(); executor.async_runtime.spawn( query_id.as_ref().clone(), @@ -396,6 +405,7 @@ impl ScheduleQueue { global_queue, workers_condvar, weak_executor, + node_profile, process_future, )) .in_span(Span::enter_with_local_parent(std::any::type_name::< @@ -443,8 +453,8 @@ impl RunningGraph { Ok(schedule_queue) } - pub(crate) fn get_node(&self, pid: NodeIndex) -> &Node { - &self.0.graph[pid] + pub(crate) fn get_node_profile(&self, pid: NodeIndex) -> &Arc { + &self.0.graph[pid].profile } pub fn get_proc_profiles(&self) -> Vec> { diff --git a/src/query/service/src/pipelines/executor/executor_settings.rs b/src/query/service/src/pipelines/executor/executor_settings.rs index 1275791cc92f..b45bc8465582 100644 --- a/src/query/service/src/pipelines/executor/executor_settings.rs +++ b/src/query/service/src/pipelines/executor/executor_settings.rs @@ -21,16 +21,13 @@ use databend_common_settings::Settings; #[derive(Clone)] pub struct ExecutorSettings { pub query_id: Arc, - pub enable_profiling: bool, pub max_execute_time_in_seconds: Duration, } impl ExecutorSettings { pub fn try_create(settings: &Settings, query_id: String) -> Result { - let enable_profiling = settings.get_enable_query_profiling()?; let max_execute_time_in_seconds = settings.get_max_execute_time_in_seconds()?; Ok(ExecutorSettings { - enable_profiling, query_id: Arc::new(query_id), max_execute_time_in_seconds: Duration::from_secs(max_execute_time_in_seconds), }) diff --git a/src/query/service/src/pipelines/executor/executor_tasks.rs b/src/query/service/src/pipelines/executor/executor_tasks.rs index 3168cce975a1..cc70fcd199ee 100644 --- a/src/query/service/src/pipelines/executor/executor_tasks.rs +++ b/src/query/service/src/pipelines/executor/executor_tasks.rs @@ -16,7 +16,6 @@ use std::collections::VecDeque; use std::sync::atomic::AtomicBool; use std::sync::atomic::Ordering; use std::sync::Arc; -use std::time::Duration; use databend_common_exception::Result; use parking_lot::Mutex; @@ -198,22 +197,11 @@ pub struct CompletedAsyncTask { pub id: NodeIndex, pub worker_id: usize, pub res: Result<()>, - pub elapsed: Option, } impl CompletedAsyncTask { - pub fn create( - id: NodeIndex, - worker_id: usize, - res: Result<()>, - elapsed: Option, - ) -> Self { - CompletedAsyncTask { - id, - worker_id, - res, - elapsed, - } + pub fn create(id: NodeIndex, worker_id: usize, res: Result<()>) -> Self { + CompletedAsyncTask { id, worker_id, res } } } diff --git a/src/query/service/src/pipelines/executor/executor_worker_context.rs b/src/query/service/src/pipelines/executor/executor_worker_context.rs index deb86134fa3a..9b430dc302e3 100644 --- a/src/query/service/src/pipelines/executor/executor_worker_context.rs +++ b/src/query/service/src/pipelines/executor/executor_worker_context.rs @@ -14,15 +14,18 @@ use std::fmt::Debug; use std::fmt::Formatter; +use std::intrinsics::assume; use std::sync::Arc; -use std::time::Duration; use std::time::Instant; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_pipeline_core::processors::Profile; +use databend_common_pipeline_core::processors::ProfileStatisticsName; use petgraph::prelude::NodeIndex; use crate::pipelines::executor::CompletedAsyncTask; +use crate::pipelines::executor::RunningGraph; use crate::pipelines::executor::WorkersCondvar; use crate::pipelines::processors::ProcessorPtr; @@ -70,35 +73,33 @@ impl ExecutorWorkerContext { } /// # Safety - pub unsafe fn execute_task( - &mut self, - ) -> Result<(NodeIndex, bool, Option)> { + pub unsafe fn execute_task(&mut self, graph: &RunningGraph) -> Result { match std::mem::replace(&mut self.task, ExecutorTask::None) { ExecutorTask::None => Err(ErrorCode::Internal("Execute none task.")), - ExecutorTask::Sync(processor) => self.execute_sync_task::(processor), + ExecutorTask::Sync(processor) => self.execute_sync_task(processor, graph), ExecutorTask::AsyncCompleted(task) => match task.res { - Ok(_) => Ok((task.id, true, task.elapsed)), + Ok(_) => Ok(task.id), Err(cause) => Err(cause), }, } } /// # Safety - unsafe fn execute_sync_task( + unsafe fn execute_sync_task( &mut self, proc: ProcessorPtr, - ) -> Result<(NodeIndex, bool, Option)> { - match ENABLE_PROFILING { - true => { - let instant = Instant::now(); - proc.process()?; - Ok((proc.id(), false, Some(instant.elapsed()))) - } - false => { - proc.process()?; - Ok((proc.id(), false, None)) - } - } + graph: &RunningGraph, + ) -> Result { + Profile::track_profile(graph.get_node_profile(proc.id())); + + let instant = Instant::now(); + + proc.process()?; + + let nanos = instant.elapsed().as_nanos(); + assume(nanos < 18446744073709551615_u128); + Profile::record_usize_profile(ProfileStatisticsName::CpuTime, nanos as usize); + Ok(proc.id()) } pub fn get_workers_condvar(&self) -> &Arc { diff --git a/src/query/service/src/pipelines/executor/pipeline_executor.rs b/src/query/service/src/pipelines/executor/pipeline_executor.rs index 7d8c077d5a87..027a865bbb17 100644 --- a/src/query/service/src/pipelines/executor/pipeline_executor.rs +++ b/src/query/service/src/pipelines/executor/pipeline_executor.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::intrinsics::assume; -use std::sync::atomic::Ordering; use std::sync::Arc; use std::time::Instant; @@ -27,7 +25,7 @@ use databend_common_base::runtime::TrySpawn; use databend_common_base::GLOBAL_TASK; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_pipeline_core::processors::profile::Profile; +use databend_common_pipeline_core::processors::Profile; use databend_common_pipeline_core::LockGuard; use databend_common_pipeline_core::Pipeline; use futures::future::select; @@ -357,14 +355,8 @@ impl PipelineExecutor { thread_join_handles.push(Thread::named_spawn(Some(name), move || unsafe { let _g = span.set_local_parent(); let this_clone = this.clone(); - let enable_profiling = this.settings.enable_profiling; let try_result = catch_unwind(move || -> Result<()> { - let res = match enable_profiling { - true => this_clone.execute_single_thread::(thread_num), - false => this_clone.execute_single_thread::(thread_num), - }; - - match res { + match this_clone.execute_single_thread(thread_num) { Ok(_) => Ok(()), Err(cause) => { if log::max_level() == LevelFilter::Trace { @@ -393,10 +385,7 @@ impl PipelineExecutor { /// # Safety /// /// Method is thread unsafe and require thread safe call - pub unsafe fn execute_single_thread( - self: &Arc, - thread_num: usize, - ) -> Result<()> { + pub unsafe fn execute_single_thread(self: &Arc, thread_num: usize) -> Result<()> { let workers_condvar = self.workers_condvar.clone(); let mut context = ExecutorWorkerContext::create( thread_num, @@ -411,28 +400,7 @@ impl PipelineExecutor { } while !self.global_tasks_queue.is_finished() && context.has_task() { - let (executed_pid, is_async, elapsed) = - context.execute_task::()?; - - if ENABLE_PROFILING { - let node = self.graph.get_node(executed_pid); - if let Some(elapsed) = elapsed { - let nanos = elapsed.as_nanos(); - assume(nanos < 18446744073709551615_u128); - - if is_async { - node.profile - .wait_time - .fetch_add(nanos as u64, Ordering::Relaxed); - } else { - node.profile - .cpu_time - .fetch_add(nanos as u64, Ordering::Relaxed); - } - } - - node.processor.record_profile(&node.profile); - } + let executed_pid = context.execute_task(&self.graph)?; // Not scheduled graph if pipeline is finished. if !self.global_tasks_queue.is_finished() { diff --git a/src/query/service/src/pipelines/executor/processor_async_task.rs b/src/query/service/src/pipelines/executor/processor_async_task.rs index 11bd17b0f801..696acd10cfc0 100644 --- a/src/query/service/src/pipelines/executor/processor_async_task.rs +++ b/src/query/service/src/pipelines/executor/processor_async_task.rs @@ -13,6 +13,7 @@ // limitations under the License. use std::future::Future; +use std::intrinsics::assume; use std::pin::Pin; use std::sync::Arc; use std::sync::Weak; @@ -26,6 +27,8 @@ use databend_common_base::runtime::catch_unwind; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_pipeline_core::processors::ProcessorPtr; +use databend_common_pipeline_core::processors::Profile; +use databend_common_pipeline_core::processors::ProfileStatisticsName; use futures_util::future::BoxFuture; use futures_util::future::Either; use futures_util::FutureExt; @@ -43,7 +46,10 @@ pub struct ProcessorAsyncTask { processor_id: NodeIndex, queue: Arc, workers_condvar: Arc, - inner: BoxFuture<'static, (Duration, Result<()>)>, + profile: Arc, + instant: Instant, + last_nanos: usize, + inner: BoxFuture<'static, Result<()>>, } impl ProcessorAsyncTask { @@ -54,6 +60,7 @@ impl ProcessorAsyncTask { queue: Arc, workers_condvar: Arc, weak_executor: Weak, + profile: Arc, inner: Inner, ) -> ProcessorAsyncTask { let finished_notify = queue.get_finished_notify(); @@ -111,17 +118,21 @@ impl ProcessorAsyncTask { }; } Either::Right((res, _)) => { - return (start.elapsed(), res); + return res; } } } }; + let instant = Instant::now(); ProcessorAsyncTask { worker_id, processor_id, queue, workers_condvar, + profile, + last_nanos: instant.elapsed().as_nanos() as usize, + instant, inner: inner.boxed(), } } @@ -135,26 +146,40 @@ impl Future for ProcessorAsyncTask { return Poll::Ready(()); } + Profile::track_profile(&self.profile); + + let last_nanos = self.last_nanos; + let last_instant = self.instant; let inner = self.inner.as_mut(); - match catch_unwind(move || inner.poll(cx)) { - Ok(Poll::Pending) => Poll::Pending, - Ok(Poll::Ready((elapsed, res))) => { + let before_poll_nanos = elapsed_nanos(last_instant); + let wait_nanos = before_poll_nanos - last_nanos; + Profile::record_usize_profile(ProfileStatisticsName::WaitTime, wait_nanos); + + let poll_res = catch_unwind(move || inner.poll(cx)); + + let after_poll_nanos = elapsed_nanos(last_instant); + Profile::record_usize_profile( + ProfileStatisticsName::CpuTime, + after_poll_nanos - before_poll_nanos, + ); + + match poll_res { + Ok(Poll::Pending) => { + self.last_nanos = after_poll_nanos; + Poll::Pending + } + Ok(Poll::Ready(res)) => { self.queue.completed_async_task( self.workers_condvar.clone(), - CompletedAsyncTask::create( - self.processor_id, - self.worker_id, - res, - Some(elapsed), - ), + CompletedAsyncTask::create(self.processor_id, self.worker_id, res), ); Poll::Ready(()) } Err(cause) => { self.queue.completed_async_task( self.workers_condvar.clone(), - CompletedAsyncTask::create(self.processor_id, self.worker_id, Err(cause), None), + CompletedAsyncTask::create(self.processor_id, self.worker_id, Err(cause)), ); Poll::Ready(()) @@ -162,3 +187,11 @@ impl Future for ProcessorAsyncTask { } } } + +fn elapsed_nanos(instant: Instant) -> usize { + let nanos = (Instant::now() - instant).as_nanos(); + unsafe { + assume(nanos < 18446744073709551615_u128); + } + nanos as usize +} diff --git a/src/query/service/src/pipelines/pipeline_build_res.rs b/src/query/service/src/pipelines/pipeline_build_res.rs index b08c0b0635dc..ad2fb9ca72ff 100644 --- a/src/query/service/src/pipelines/pipeline_build_res.rs +++ b/src/query/service/src/pipelines/pipeline_build_res.rs @@ -21,7 +21,6 @@ use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::Pipeline; use databend_common_pipeline_core::SourcePipeBuilder; use databend_common_pipeline_sources::OneBlockSource; -use databend_common_profile::SharedProcessorProfiles; use crate::api::DefaultExchangeInjector; use crate::api::ExchangeInjector; @@ -38,10 +37,6 @@ pub struct PipelineBuildResult { // Containing some sub queries pipelines, must be complete pipeline pub sources_pipelines: Vec, - /// Set of profiling spans for the query. - /// Will be empty if profiling is disabled. - pub prof_span_set: SharedProcessorProfiles, - pub exchange_injector: Arc, /// for local fragment data sharing pub builder_data: PipelineBuilderData, @@ -52,7 +47,6 @@ impl PipelineBuildResult { PipelineBuildResult { main_pipeline: Pipeline::create(), sources_pipelines: vec![], - prof_span_set: SharedProcessorProfiles::default(), exchange_injector: DefaultExchangeInjector::create(), builder_data: PipelineBuilderData { input_join_state: None, @@ -75,7 +69,6 @@ impl PipelineBuildResult { Ok(PipelineBuildResult { main_pipeline, sources_pipelines: vec![], - prof_span_set: SharedProcessorProfiles::default(), exchange_injector: DefaultExchangeInjector::create(), builder_data: PipelineBuilderData { input_join_state: None, diff --git a/src/query/service/src/pipelines/pipeline_builder.rs b/src/query/service/src/pipelines/pipeline_builder.rs index d5ca7c0af540..075426da8960 100644 --- a/src/query/service/src/pipelines/pipeline_builder.rs +++ b/src/query/service/src/pipelines/pipeline_builder.rs @@ -19,10 +19,10 @@ use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::DataField; use databend_common_expression::FunctionContext; +use databend_common_pipeline_core::processors::profile::ProfileLabel; use databend_common_pipeline_core::Pipeline; use databend_common_pipeline_core::PlanScope; use databend_common_pipeline_core::PlanScopeGuard; -use databend_common_profile::SharedProcessorProfiles; use databend_common_settings::Settings; use databend_common_sql::executor::PhysicalPlan; use databend_common_sql::IndexType; @@ -50,8 +50,6 @@ pub struct PipelineBuilder { // Cte -> state, each cte has it's own state pub cte_state: HashMap>, - pub(crate) enable_profiling: bool, - pub(crate) proc_profs: SharedProcessorProfiles, pub(crate) exchange_injector: Arc, } @@ -60,18 +58,14 @@ impl PipelineBuilder { func_ctx: FunctionContext, settings: Arc, ctx: Arc, - enable_profiling: bool, - prof_span_set: SharedProcessorProfiles, scopes: Vec, ) -> PipelineBuilder { PipelineBuilder { - enable_profiling, ctx, func_ctx, settings, pipelines: vec![], main_pipeline: Pipeline::with_scopes(scopes), - proc_profs: prof_span_set, exchange_injector: DefaultExchangeInjector::create(), cte_state: HashMap::new(), merge_into_probe_data_fields: None, @@ -93,7 +87,6 @@ impl PipelineBuilder { Ok(PipelineBuildResult { main_pipeline: self.main_pipeline, sources_pipelines: self.pipelines, - prof_span_set: self.proc_profs, exchange_injector: self.exchange_injector, builder_data: PipelineBuilderData { input_join_state: self.join_state, @@ -102,18 +95,30 @@ impl PipelineBuilder { }) } - pub(crate) fn add_plan_scope(&mut self, plan: &PhysicalPlan) -> Option { + pub(crate) fn add_plan_scope(&mut self, plan: &PhysicalPlan) -> Result> { match plan { - PhysicalPlan::EvalScalar(v) if v.exprs.is_empty() => None, + PhysicalPlan::EvalScalar(v) if v.exprs.is_empty() => Ok(None), _ => { - let scope = PlanScope::create(plan.get_id(), plan.name()); - Some(self.main_pipeline.add_plan_scope(scope)) + let desc = plan.get_desc()?; + let plan_labels = plan.get_labels()?; + let mut profile_labels = Vec::with_capacity(plan_labels.len()); + for (name, value) in plan_labels { + profile_labels.push(ProfileLabel::create(name, value)); + } + + let scope = PlanScope::create( + plan.get_id(), + plan.name(), + Arc::new(desc), + Arc::new(profile_labels), + ); + Ok(Some(self.main_pipeline.add_plan_scope(scope))) } } } pub(crate) fn build_pipeline(&mut self, plan: &PhysicalPlan) -> Result<()> { - let _guard = self.add_plan_scope(plan); + let _guard = self.add_plan_scope(plan)?; match plan { PhysicalPlan::TableScan(scan) => self.build_table_scan(scan), PhysicalPlan::CteScan(scan) => self.build_cte_scan(scan), diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_aggregate_spill_writer.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_aggregate_spill_writer.rs index c53da0393099..cfb2be722203 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_aggregate_spill_writer.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_aggregate_spill_writer.rs @@ -30,6 +30,8 @@ use databend_common_pipeline_core::processors::Event; use databend_common_pipeline_core::processors::InputPort; use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::processors::Processor; +use databend_common_pipeline_core::processors::Profile; +use databend_common_pipeline_core::processors::ProfileStatisticsName; use futures_util::future::BoxFuture; use log::info; use opendal::Operator; @@ -258,6 +260,13 @@ pub fn spilling_aggregate_payload( metrics_inc_aggregate_spill_write_count(); metrics_inc_aggregate_spill_write_bytes(write_bytes as u64); metrics_inc_aggregate_spill_write_milliseconds(instant.elapsed().as_millis() as u64); + + Profile::record_usize_profile(ProfileStatisticsName::SpillWriteCount, 1); + Profile::record_usize_profile(ProfileStatisticsName::SpillWriteBytes, write_bytes); + Profile::record_usize_profile( + ProfileStatisticsName::SpillWriteTime, + instant.elapsed().as_millis() as usize, + ); } { diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_aggregate_serializer.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_aggregate_serializer.rs index 183d12e8629f..823721b4666e 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_aggregate_serializer.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_aggregate_serializer.rs @@ -39,6 +39,8 @@ use databend_common_metrics::transform::*; use databend_common_pipeline_core::processors::InputPort; use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::processors::Processor; +use databend_common_pipeline_core::processors::Profile; +use databend_common_pipeline_core::processors::ProfileStatisticsName; use databend_common_pipeline_transforms::processors::BlockMetaTransform; use databend_common_pipeline_transforms::processors::BlockMetaTransformer; use databend_common_settings::FlightCompression; @@ -261,6 +263,13 @@ fn spilling_aggregate_payload( metrics_inc_aggregate_spill_write_count(); metrics_inc_aggregate_spill_write_bytes(write_bytes as u64); metrics_inc_aggregate_spill_write_milliseconds(instant.elapsed().as_millis() as u64); + + Profile::record_usize_profile(ProfileStatisticsName::SpillWriteCount, 1); + Profile::record_usize_profile(ProfileStatisticsName::SpillWriteBytes, write_bytes); + Profile::record_usize_profile( + ProfileStatisticsName::SpillWriteTime, + instant.elapsed().as_millis() as usize, + ); } { diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_group_by_serializer.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_group_by_serializer.rs index 53c51c5a005f..afc4fa7a333c 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_group_by_serializer.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_group_by_serializer.rs @@ -44,6 +44,8 @@ use databend_common_metrics::transform::*; use databend_common_pipeline_core::processors::InputPort; use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::processors::Processor; +use databend_common_pipeline_core::processors::Profile; +use databend_common_pipeline_core::processors::ProfileStatisticsName; use databend_common_pipeline_transforms::processors::BlockMetaTransform; use databend_common_pipeline_transforms::processors::BlockMetaTransformer; use databend_common_pipeline_transforms::processors::UnknownMode; @@ -313,6 +315,13 @@ fn spilling_group_by_payload( metrics_inc_group_by_spill_write_count(); metrics_inc_group_by_spill_write_bytes(write_bytes as u64); metrics_inc_group_by_spill_write_milliseconds(instant.elapsed().as_millis() as u64); + + Profile::record_usize_profile(ProfileStatisticsName::SpillWriteCount, 1); + Profile::record_usize_profile(ProfileStatisticsName::SpillWriteBytes, write_bytes); + Profile::record_usize_profile( + ProfileStatisticsName::SpillWriteTime, + instant.elapsed().as_millis() as usize, + ); } { diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_group_by_spill_writer.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_group_by_spill_writer.rs index 78905671f23e..1ddb32550494 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_group_by_spill_writer.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_group_by_spill_writer.rs @@ -30,6 +30,8 @@ use databend_common_pipeline_core::processors::Event; use databend_common_pipeline_core::processors::InputPort; use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::processors::Processor; +use databend_common_pipeline_core::processors::Profile; +use databend_common_pipeline_core::processors::ProfileStatisticsName; use futures_util::future::BoxFuture; use log::info; use opendal::Operator; @@ -251,6 +253,13 @@ pub fn spilling_group_by_payload( metrics_inc_group_by_spill_write_count(); metrics_inc_group_by_spill_write_bytes(write_bytes as u64); metrics_inc_group_by_spill_write_milliseconds(instant.elapsed().as_millis() as u64); + + Profile::record_usize_profile(ProfileStatisticsName::SpillWriteCount, 1); + Profile::record_usize_profile(ProfileStatisticsName::SpillWriteBytes, write_bytes); + Profile::record_usize_profile( + ProfileStatisticsName::SpillWriteTime, + instant.elapsed().as_millis() as usize, + ); } { diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_spill_reader.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_spill_reader.rs index eb52e34c3d97..f0cfc0a1b1ab 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_spill_reader.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_spill_reader.rs @@ -29,6 +29,8 @@ use databend_common_pipeline_core::processors::InputPort; use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::processors::Processor; use databend_common_pipeline_core::processors::ProcessorPtr; +use databend_common_pipeline_core::processors::Profile; +use databend_common_pipeline_core::processors::ProfileStatisticsName; use itertools::Itertools; use log::info; use opendal::Operator; @@ -212,6 +214,19 @@ impl Processor metrics_inc_aggregate_spill_read_milliseconds( instant.elapsed().as_millis() as u64, ); + + Profile::record_usize_profile( + ProfileStatisticsName::SpillReadCount, + 1, + ); + Profile::record_usize_profile( + ProfileStatisticsName::SpillReadBytes, + data.len(), + ); + Profile::record_usize_profile( + ProfileStatisticsName::SpillReadTime, + instant.elapsed().as_millis() as usize, + ); } info!( diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/transform_partition_bucket.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_partition_bucket.rs index 550dc19610f9..8f1b414a7997 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/transform_partition_bucket.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_partition_bucket.rs @@ -33,10 +33,6 @@ use databend_common_pipeline_core::processors::ProcessorPtr; use databend_common_pipeline_core::Pipe; use databend_common_pipeline_core::PipeItem; use databend_common_pipeline_core::Pipeline; -use databend_common_pipeline_transforms::processors::ProcessorProfileWrapper; -use databend_common_pipeline_transforms::processors::ProfileStub; -use databend_common_pipeline_transforms::processors::Transformer; -use databend_common_profile::SharedProcessorProfiles; use databend_common_storage::DataOperator; use crate::pipelines::processors::transforms::aggregator::aggregate_meta::AggregateMeta; @@ -443,9 +439,6 @@ pub fn build_partition_bucket, - enable_profiling: bool, - prof_id: u32, - proc_profs: SharedProcessorProfiles, ) -> Result<()> { let input_nums = pipeline.output_len(); let transform = TransformPartitionBucket::::create(method.clone(), input_nums)?; @@ -471,36 +464,23 @@ pub fn build_partition_bucket { - TransformFinalGroupBy::try_create(input, output, method.clone(), params.clone())? - } - false => { - TransformFinalAggregate::try_create(input, output, method.clone(), params.clone())? - } - }; - if enable_profiling { - Ok(ProcessorPtr::create(ProcessorProfileWrapper::create( - transform, - prof_id, - proc_profs.clone(), - ))) - } else { - Ok(ProcessorPtr::create(transform)) - } + Ok(ProcessorPtr::create( + match params.aggregate_functions.is_empty() { + true => TransformFinalGroupBy::try_create( + input, + output, + method.clone(), + params.clone(), + )?, + false => TransformFinalAggregate::try_create( + input, + output, + method.clone(), + params.clone(), + )?, + }, + )) })?; - // Append a profile stub to record the output rows and bytes - if enable_profiling { - pipeline.add_transform(|input, output| { - Ok(ProcessorPtr::create(Transformer::create( - input, - output, - ProfileStub::new(prof_id, proc_profs.clone()) - .accumulate_output_rows() - .accumulate_output_bytes(), - ))) - })?; - } Ok(()) } diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/desc.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/desc.rs index 3f7ea0c3ef1a..21244bb01fb5 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/desc.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/desc.rs @@ -18,6 +18,7 @@ use databend_common_expression::Expr; use databend_common_expression::RemoteExpr; use databend_common_functions::BUILTIN_FUNCTIONS; use databend_common_sql::executor::physical_plans::HashJoin; +use databend_common_sql::IndexType; use parking_lot::RwLock; use crate::sql::plans::JoinType; @@ -35,6 +36,7 @@ pub struct HashJoinDesc { pub(crate) build_keys: Vec, pub(crate) probe_keys: Vec, pub(crate) join_type: JoinType, + pub(crate) single_to_inner: Option, /// when we have non-equal conditions for hash join, /// for example `a = b and c = d and e > f`, we will use `and_filters` /// to wrap `e > f` as a other_predicate to do next step's check. @@ -42,9 +44,11 @@ pub struct HashJoinDesc { pub(crate) marker_join_desc: MarkJoinDesc, /// Whether the Join are derived from correlated subquery. pub(crate) from_correlated_subquery: bool, - pub(crate) probe_keys_rt: Vec>>, + pub(crate) probe_keys_rt: Vec, IndexType)>>, // Under cluster, mark if the join is broadcast join. pub broadcast: bool, + // If enable bloom runtime filter + pub enable_bloom_runtime_filter: bool, } impl HashJoinDesc { @@ -62,10 +66,14 @@ impl HashJoinDesc { .map(|k| k.as_expr(&BUILTIN_FUNCTIONS)) .collect(); - let probe_keys_rt: Vec>> = join + let probe_keys_rt: Vec, IndexType)>> = join .probe_keys_rt .iter() - .map(|k| k.as_ref().map(|v| v.as_expr(&BUILTIN_FUNCTIONS))) + .map(|probe_key_rt| { + probe_key_rt + .as_ref() + .map(|(expr, idx)| (expr.as_expr(&BUILTIN_FUNCTIONS), *idx)) + }) .collect(); Ok(HashJoinDesc { @@ -80,6 +88,8 @@ impl HashJoinDesc { from_correlated_subquery: join.from_correlated_subquery, probe_keys_rt, broadcast: join.broadcast, + single_to_inner: join.single_to_inner.clone(), + enable_bloom_runtime_filter: join.enable_bloom_runtime_filter, }) } diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_build_state.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_build_state.rs index 34c3d2122e4b..39eda78b1fc3 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_build_state.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_build_state.rs @@ -152,7 +152,9 @@ impl HashJoinBuildState { if !is_cluster || is_broadcast_join { enable_inlist_runtime_filter = true; enable_min_max_runtime_filter = true; - if ctx.get_settings().get_runtime_filter()? { + if ctx.get_settings().get_bloom_runtime_filter()? + && hash_join_state.hash_join_desc.enable_bloom_runtime_filter + { enable_bloom_runtime_filter = true; } } @@ -211,7 +213,11 @@ impl HashJoinBuildState { // Add `data_block` for build table to `row_space` pub(crate) fn add_build_block(&self, data_block: DataBlock) -> Result<()> { - let block_outer_scan_map = if self.hash_join_state.need_outer_scan() { + let block_outer_scan_map = if self.hash_join_state.need_outer_scan() + || matches!( + self.hash_join_state.hash_join_desc.single_to_inner, + Some(JoinType::RightSingle) + ) { vec![false; data_block.num_rows()] } else { vec![] @@ -227,7 +233,12 @@ impl HashJoinBuildState { // Acquire lock in current scope let _lock = self.mutex.lock(); let build_state = unsafe { &mut *self.hash_join_state.build_state.get() }; - if self.hash_join_state.need_outer_scan() { + if self.hash_join_state.need_outer_scan() + || matches!( + self.hash_join_state.hash_join_desc.single_to_inner, + Some(JoinType::RightSingle) + ) + { build_state.outer_scan_map.push(block_outer_scan_map); } if self.hash_join_state.need_mark_scan() { @@ -280,24 +291,7 @@ impl HashJoinBuildState { .clone() }; - let mut runtime_filter = RuntimeFilterInfo::default(); - if self.enable_inlist_runtime_filter && build_num_rows < INLIST_RUNTIME_FILTER_THRESHOLD - { - self.inlist_runtime_filter(&mut runtime_filter, &build_chunks)?; - } - // If enable bloom runtime filter, collect hashes for build keys - if self.enable_bloom_runtime_filter { - self.bloom_runtime_filter(&self.func_ctx, &build_chunks, &mut runtime_filter)?; - } - - if self.enable_min_max_runtime_filter { - self.min_max_runtime_filter(&self.func_ctx, &build_chunks, &mut runtime_filter)?; - } - - if !runtime_filter.is_empty() { - self.ctx - .set_runtime_filter((self.hash_join_state.table_index, runtime_filter)); - } + self.add_runtime_filter(&build_chunks, build_num_rows)?; if self.hash_join_state.hash_join_desc.join_type == JoinType::Cross { return Ok(()); @@ -797,58 +791,88 @@ impl HashJoinBuildState { Ok(()) } - fn bloom_runtime_filter( - &self, - func_ctx: &FunctionContext, - data_blocks: &[DataBlock], - runtime_filter: &mut RuntimeFilterInfo, - ) -> Result<()> { - for (build_key, probe_key) in self + fn add_runtime_filter(&self, build_chunks: &[DataBlock], build_num_rows: usize) -> Result<()> { + for (build_key, probe_key, table_index) in self .hash_join_state .hash_join_desc .build_keys .iter() .zip(self.hash_join_state.hash_join_desc.probe_keys_rt.iter()) + .filter_map(|(b, p)| p.as_ref().map(|(p, index)| (b, p, index))) { - if !build_key.data_type().remove_nullable().is_numeric() - && !build_key.data_type().remove_nullable().is_string() + let mut runtime_filter = RuntimeFilterInfo::default(); + if self.enable_inlist_runtime_filter && build_num_rows < INLIST_RUNTIME_FILTER_THRESHOLD { - return Ok(()); + self.inlist_runtime_filter( + &mut runtime_filter, + build_chunks, + build_key, + probe_key, + )?; } - if let Some(Expr::ColumnRef { id, .. }) = probe_key { - let mut columns = Vec::with_capacity(data_blocks.len()); - for block in data_blocks.iter() { - if block.num_columns() == 0 { - continue; - } - let evaluator = Evaluator::new(block, func_ctx, &BUILTIN_FUNCTIONS); - let column = evaluator - .run(build_key)? - .convert_to_full_column(build_key.data_type(), block.num_rows()); - columns.push(column); - } - if columns.is_empty() { - return Ok(()); - } - let build_key_column = Column::concat_columns(columns.into_iter())?; - // Generate bloom filter using build column - let data_type = build_key.data_type().clone(); - let num_rows = build_key_column.len(); - let method = DataBlock::choose_hash_method_with_types(&[data_type.clone()], false)?; - let mut hashes = HashSet::with_capacity(num_rows); - hash_by_method( - &method, - &[(build_key_column, data_type)], - num_rows, - &mut hashes, + if self.enable_bloom_runtime_filter { + self.bloom_runtime_filter(build_chunks, &mut runtime_filter, build_key, probe_key)?; + } + if self.enable_min_max_runtime_filter { + self.min_max_runtime_filter( + build_chunks, + &mut runtime_filter, + build_key, + probe_key, )?; - let mut hashes_vec = Vec::with_capacity(num_rows); - hashes.into_iter().for_each(|hash| { - hashes_vec.push(hash); - }); - let filter = BinaryFuse16::try_from(&hashes_vec)?; - runtime_filter.add_bloom((id.to_string(), filter)); } + if !runtime_filter.is_empty() { + self.ctx.set_runtime_filter((*table_index, runtime_filter)); + } + } + Ok(()) + } + + fn bloom_runtime_filter( + &self, + data_blocks: &[DataBlock], + runtime_filter: &mut RuntimeFilterInfo, + build_key: &Expr, + probe_key: &Expr, + ) -> Result<()> { + if !build_key.data_type().remove_nullable().is_numeric() + && !build_key.data_type().remove_nullable().is_string() + { + return Ok(()); + } + if let Expr::ColumnRef { id, .. } = probe_key { + let mut columns = Vec::with_capacity(data_blocks.len()); + for block in data_blocks.iter() { + if block.num_columns() == 0 { + continue; + } + let evaluator = Evaluator::new(block, &self.func_ctx, &BUILTIN_FUNCTIONS); + let column = evaluator + .run(build_key)? + .convert_to_full_column(build_key.data_type(), block.num_rows()); + columns.push(column); + } + if columns.is_empty() { + return Ok(()); + } + let build_key_column = Column::concat_columns(columns.into_iter())?; + // Generate bloom filter using build column + let data_type = build_key.data_type().clone(); + let num_rows = build_key_column.len(); + let method = DataBlock::choose_hash_method_with_types(&[data_type.clone()], false)?; + let mut hashes = HashSet::with_capacity(num_rows); + hash_by_method( + &method, + &[(build_key_column, data_type)], + num_rows, + &mut hashes, + )?; + let mut hashes_vec = Vec::with_capacity(num_rows); + hashes.into_iter().for_each(|hash| { + hashes_vec.push(hash); + }); + let filter = BinaryFuse16::try_from(&hashes_vec)?; + runtime_filter.add_bloom((id.to_string(), filter)); } Ok(()) } @@ -857,22 +881,14 @@ impl HashJoinBuildState { &self, runtime_filter: &mut RuntimeFilterInfo, data_blocks: &[DataBlock], + build_key: &Expr, + probe_key: &Expr, ) -> Result<()> { - for (build_key, probe_key) in self - .hash_join_state - .hash_join_desc - .build_keys - .iter() - .zip(self.hash_join_state.hash_join_desc.probe_keys_rt.iter()) + if let Some(distinct_build_column) = + dedup_build_key_column(&self.func_ctx, data_blocks, build_key)? { - if let Some(distinct_build_column) = - dedup_build_key_column(&self.func_ctx, data_blocks, build_key)? - { - if let Some(probe_key) = probe_key { - if let Some(filter) = inlist_filter(probe_key, distinct_build_column.clone())? { - runtime_filter.add_inlist(filter); - } - } + if let Some(filter) = inlist_filter(probe_key, distinct_build_column.clone())? { + runtime_filter.add_inlist(filter); } } Ok(()) @@ -880,107 +896,99 @@ impl HashJoinBuildState { fn min_max_runtime_filter( &self, - func_ctx: &FunctionContext, data_blocks: &[DataBlock], runtime_filter: &mut RuntimeFilterInfo, + build_key: &Expr, + probe_key: &Expr, ) -> Result<()> { - for (build_key, probe_key) in self - .hash_join_state - .hash_join_desc - .build_keys - .iter() - .zip(self.hash_join_state.hash_join_desc.probe_keys_rt.iter()) - .filter_map(|(b, p)| p.as_ref().map(|p| (b, p))) + if !build_key.data_type().remove_nullable().is_numeric() + && !build_key.data_type().remove_nullable().is_string() { - if !build_key.data_type().remove_nullable().is_numeric() - && !build_key.data_type().remove_nullable().is_string() - { + return Ok(()); + } + if let Expr::ColumnRef { .. } = probe_key { + let mut columns = Vec::with_capacity(data_blocks.len()); + for block in data_blocks.iter() { + if block.num_columns() == 0 { + continue; + } + let evaluator = Evaluator::new(block, &self.func_ctx, &BUILTIN_FUNCTIONS); + let column = evaluator + .run(build_key)? + .convert_to_full_column(build_key.data_type(), block.num_rows()); + columns.push(column); + } + if columns.is_empty() { + return Ok(()); + } + let build_key_column = Column::concat_columns(columns.into_iter())?; + if build_key_column.len() == 0 { return Ok(()); } - if let Expr::ColumnRef { .. } = probe_key { - let mut columns = Vec::with_capacity(data_blocks.len()); - for block in data_blocks.iter() { - if block.num_columns() == 0 { - continue; + // Generate min max filter using build column + let min_max = build_key_column.remove_nullable().domain(); + let min_max_filter = match min_max { + Domain::Number(domain) => match domain { + NumberDomain::UInt8(simple_domain) => { + let min = Scalar::Number(NumberScalar::from(simple_domain.min)); + let max = Scalar::Number(NumberScalar::from(simple_domain.max)); + min_max_filter(min, max, probe_key)? } - let evaluator = Evaluator::new(block, func_ctx, &BUILTIN_FUNCTIONS); - let column = evaluator - .run(build_key)? - .convert_to_full_column(build_key.data_type(), block.num_rows()); - columns.push(column); - } - if columns.is_empty() { - return Ok(()); - } - let build_key_column = Column::concat_columns(columns.into_iter())?; - if build_key_column.len() == 0 { - return Ok(()); - } - // Generate min max filter using build column - let min_max = build_key_column.remove_nullable().domain(); - let min_max_filter = match min_max { - Domain::Number(domain) => match domain { - NumberDomain::UInt8(simple_domain) => { - let min = Scalar::Number(NumberScalar::from(simple_domain.min)); - let max = Scalar::Number(NumberScalar::from(simple_domain.max)); - min_max_filter(min, max, probe_key)? - } - NumberDomain::UInt16(simple_domain) => { - let min = Scalar::Number(NumberScalar::from(simple_domain.min)); - let max = Scalar::Number(NumberScalar::from(simple_domain.max)); - min_max_filter(min, max, probe_key)? - } - NumberDomain::UInt32(simple_domain) => { - let min = Scalar::Number(NumberScalar::from(simple_domain.min)); - let max = Scalar::Number(NumberScalar::from(simple_domain.max)); - min_max_filter(min, max, probe_key)? - } - NumberDomain::UInt64(simple_domain) => { - let min = Scalar::Number(NumberScalar::from(simple_domain.min)); - let max = Scalar::Number(NumberScalar::from(simple_domain.max)); - min_max_filter(min, max, probe_key)? - } - NumberDomain::Int8(simple_domain) => { - let min = Scalar::Number(NumberScalar::from(simple_domain.min)); - let max = Scalar::Number(NumberScalar::from(simple_domain.max)); - min_max_filter(min, max, probe_key)? - } - NumberDomain::Int16(simple_domain) => { - let min = Scalar::Number(NumberScalar::from(simple_domain.min)); - let max = Scalar::Number(NumberScalar::from(simple_domain.max)); - min_max_filter(min, max, probe_key)? - } - NumberDomain::Int32(simple_domain) => { - let min = Scalar::Number(NumberScalar::from(simple_domain.min)); - let max = Scalar::Number(NumberScalar::from(simple_domain.max)); - min_max_filter(min, max, probe_key)? - } - NumberDomain::Int64(simple_domain) => { - let min = Scalar::Number(NumberScalar::from(simple_domain.min)); - let max = Scalar::Number(NumberScalar::from(simple_domain.max)); - min_max_filter(min, max, probe_key)? - } - NumberDomain::Float32(simple_domain) => { - let min = Scalar::Number(NumberScalar::from(simple_domain.min)); - let max = Scalar::Number(NumberScalar::from(simple_domain.max)); - min_max_filter(min, max, probe_key)? - } - NumberDomain::Float64(simple_domain) => { - let min = Scalar::Number(NumberScalar::from(simple_domain.min)); - let max = Scalar::Number(NumberScalar::from(simple_domain.max)); - min_max_filter(min, max, probe_key)? - } - }, - Domain::String(domain) => { - let min = Scalar::String(domain.min); - let max = Scalar::String(domain.max.unwrap()); + NumberDomain::UInt16(simple_domain) => { + let min = Scalar::Number(NumberScalar::from(simple_domain.min)); + let max = Scalar::Number(NumberScalar::from(simple_domain.max)); min_max_filter(min, max, probe_key)? } - _ => unreachable!(), - }; - if let Some(min_max_filter) = min_max_filter { - runtime_filter.add_min_max(min_max_filter); + NumberDomain::UInt32(simple_domain) => { + let min = Scalar::Number(NumberScalar::from(simple_domain.min)); + let max = Scalar::Number(NumberScalar::from(simple_domain.max)); + min_max_filter(min, max, probe_key)? + } + NumberDomain::UInt64(simple_domain) => { + let min = Scalar::Number(NumberScalar::from(simple_domain.min)); + let max = Scalar::Number(NumberScalar::from(simple_domain.max)); + min_max_filter(min, max, probe_key)? + } + NumberDomain::Int8(simple_domain) => { + let min = Scalar::Number(NumberScalar::from(simple_domain.min)); + let max = Scalar::Number(NumberScalar::from(simple_domain.max)); + min_max_filter(min, max, probe_key)? + } + NumberDomain::Int16(simple_domain) => { + let min = Scalar::Number(NumberScalar::from(simple_domain.min)); + let max = Scalar::Number(NumberScalar::from(simple_domain.max)); + min_max_filter(min, max, probe_key)? + } + NumberDomain::Int32(simple_domain) => { + let min = Scalar::Number(NumberScalar::from(simple_domain.min)); + let max = Scalar::Number(NumberScalar::from(simple_domain.max)); + min_max_filter(min, max, probe_key)? + } + NumberDomain::Int64(simple_domain) => { + let min = Scalar::Number(NumberScalar::from(simple_domain.min)); + let max = Scalar::Number(NumberScalar::from(simple_domain.max)); + min_max_filter(min, max, probe_key)? + } + NumberDomain::Float32(simple_domain) => { + let min = Scalar::Number(NumberScalar::from(simple_domain.min)); + let max = Scalar::Number(NumberScalar::from(simple_domain.max)); + min_max_filter(min, max, probe_key)? + } + NumberDomain::Float64(simple_domain) => { + let min = Scalar::Number(NumberScalar::from(simple_domain.min)); + let max = Scalar::Number(NumberScalar::from(simple_domain.max)); + min_max_filter(min, max, probe_key)? + } + }, + Domain::String(domain) => { + let min = Scalar::String(domain.min); + let max = Scalar::String(domain.max.unwrap()); + min_max_filter(min, max, probe_key)? } + _ => unreachable!(), + }; + if let Some(min_max_filter) = min_max_filter { + runtime_filter.add_min_max(min_max_filter); } } Ok(()) diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_probe_state.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_probe_state.rs index 534386ad8655..bfb74cf90240 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_probe_state.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_probe_state.rs @@ -176,7 +176,7 @@ impl HashJoinProbeState { pub fn probe_join( &self, - input: DataBlock, + mut input: DataBlock, probe_state: &mut ProbeState, ) -> Result> { let input_num_rows = input.num_rows(); @@ -259,7 +259,9 @@ impl HashJoinProbeState { *ty = ty.remove_nullable(); } - let input = input.project(&self.probe_projections); + if self.hash_join_state.hash_join_desc.join_type != JoinType::LeftMark { + input = input.project(&self.probe_projections); + } probe_state.generation_state.is_probe_projected = input.num_columns() > 0; if self.hash_join_state.fast_return.load(Ordering::Relaxed) @@ -289,7 +291,7 @@ impl HashJoinProbeState { // Probe: // (1) INNER / RIGHT / RIGHT SINGLE / RIGHT SEMI / RIGHT ANTI / RIGHT MARK / LEFT SEMI / LEFT MARK - // prefer_early_filtering is true => early_filtering_probe_with_selection + // prefer_early_filtering is true => early_filtering_matched_probe // prefer_early_filtering is false => probe // (2) LEFT / LEFT SINGLE / LEFT ANTI / FULL // prefer_early_filtering is true => early_filtering_probe @@ -306,36 +308,42 @@ impl HashJoinProbeState { .build_keys_accessor_and_hashes(keys_state, &mut probe_state.hashes)?; // Perform a round of hash table probe. - if Self::check_for_selection(&self.hash_join_state.hash_join_desc.join_type) { - probe_state.selection_count = if prefer_early_filtering { + probe_state.probe_with_selection = prefer_early_filtering; + probe_state.selection_count = if !Self::need_unmatched_selection( + &self.hash_join_state.hash_join_desc.join_type, + probe_state.with_conjunction, + ) { + if prefer_early_filtering { // Early filtering, use selection to get better performance. - probe_state.probe_with_selection = true; - - table.hash_table.early_filtering_probe_with_selection( + table.hash_table.early_filtering_matched_probe( &mut probe_state.hashes, valids, &mut probe_state.selection, ) } else { // If don't do early filtering, don't use selection. - probe_state.probe_with_selection = false; - table.hash_table.probe(&mut probe_state.hashes, valids) - }; - probe_state.num_keys_hash_matched += probe_state.selection_count as u64; + } } else { - // For left join, left single join, full join and left anti join, don't use selection. - probe_state.probe_with_selection = false; - - let count = if prefer_early_filtering { - table - .hash_table - .early_filtering_probe(&mut probe_state.hashes, valids) + if prefer_early_filtering { + // Early filtering, use matched selection and unmatched selection to get better performance. + let unmatched_selection = + probe_state.probe_unmatched_indexes.as_mut().unwrap(); + let (matched_count, unmatched_count) = + table.hash_table.early_filtering_probe( + &mut probe_state.hashes, + valids, + &mut probe_state.selection, + unmatched_selection, + ); + probe_state.probe_unmatched_indexes_count = unmatched_count; + matched_count } else { + // If don't do early filtering, don't use selection. table.hash_table.probe(&mut probe_state.hashes, valids) - }; - probe_state.num_keys_hash_matched += count as u64; - } + } + }; + probe_state.num_keys_hash_matched += probe_state.selection_count as u64; // Continue to probe hash table and process data blocks. self.result_blocks(&input, keys, &table.hash_table, probe_state) @@ -367,19 +375,12 @@ impl HashJoinProbeState { ) } - /// Checks if a join type can use selection. - pub fn check_for_selection(join_type: &JoinType) -> bool { + /// Checks if the join type need to use unmatched selection. + pub fn need_unmatched_selection(join_type: &JoinType, with_conjunction: bool) -> bool { matches!( join_type, - JoinType::Inner - | JoinType::Right - | JoinType::RightSingle - | JoinType::RightSemi - | JoinType::RightAnti - | JoinType::RightMark - | JoinType::LeftSemi - | JoinType::LeftMark - ) + JoinType::Left | JoinType::LeftSingle | JoinType::Full | JoinType::LeftAnti + ) && !with_conjunction } pub fn probe_attach(&self) -> Result { diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_state.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_state.rs index 706de842b75b..47268c34e97f 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_state.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/hash_join_state.rs @@ -120,9 +120,6 @@ pub struct HashJoinState { pub(crate) partition_id: AtomicI8, pub(crate) enable_spill: bool, - /// If the join node generate runtime filters, the scan node will use it to do prune. - pub(crate) table_index: IndexType, - pub(crate) merge_into_state: Option>, } @@ -133,7 +130,6 @@ impl HashJoinState { build_projections: &ColumnSet, hash_join_desc: HashJoinDesc, probe_to_build: &[(usize, (bool, bool))], - table_index: IndexType, merge_into_target_table_index: IndexType, merge_into_is_distributed: bool, ) -> Result> { @@ -170,7 +166,6 @@ impl HashJoinState { _continue_build_dummy_receiver, partition_id: AtomicI8::new(-2), enable_spill, - table_index, merge_into_state: MergeIntoState::try_create_merge_into_state( merge_into_target_table_index, merge_into_is_distributed, diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/inner_join.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/inner_join.rs index 0b6245fc1311..9885931245a1 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/inner_join.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/inner_join.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::atomic::AtomicBool; use std::sync::atomic::Ordering; use databend_common_exception::ErrorCode; @@ -33,7 +34,12 @@ use crate::pipelines::processors::transforms::hash_join::HashJoinProbeState; use crate::pipelines::processors::transforms::hash_join::ProbeState; impl HashJoinProbeState { - pub(crate) fn inner_join<'a, H: HashJoinHashtableLike>( + pub(crate) fn inner_join< + 'a, + H: HashJoinHashtableLike, + const FROM_LEFT_SINGLE: bool, + const FROM_RIGHT_SINGLE: bool, + >( &self, input: &DataBlock, keys: Box<(dyn KeyAccessor)>, @@ -52,7 +58,18 @@ impl HashJoinProbeState { let pointers = probe_state.hashes.as_slice(); // Build states. - let build_state = unsafe { &*self.hash_join_state.build_state.get() }; + let build_state = unsafe { &mut *self.hash_join_state.build_state.get() }; + let outer_scan_map = &mut build_state.outer_scan_map; + let mut right_single_scan_map = if FROM_RIGHT_SINGLE { + outer_scan_map + .iter_mut() + .map(|sp| unsafe { + std::mem::transmute::<*mut bool, *mut AtomicBool>(sp.as_mut_ptr()) + }) + .collect::>() + } else { + vec![] + }; // Results. let mut matched_idx = 0; @@ -66,12 +83,18 @@ impl HashJoinProbeState { let ptr = unsafe { *pointers.get_unchecked(*idx as usize) }; // Probe hash table and fill `build_indexes`. - let (mut match_count, mut incomplete_ptr) = + let (match_count, mut incomplete_ptr) = hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); if match_count == 0 { continue; } + if FROM_LEFT_SINGLE && match_count > 1 { + return Err(ErrorCode::Internal( + "Scalar subquery can't return more than one row", + )); + } + // Fill `probe_indexes`. for _ in 0..match_count { unsafe { *probe_indexes.get_unchecked_mut(matched_idx) = *idx }; @@ -79,26 +102,25 @@ impl HashJoinProbeState { } while matched_idx == max_block_size { - result_blocks.push(self.process_inner_join_block( + result_blocks.push(self.process_inner_join_block::( matched_idx, input, probe_indexes, build_indexes, &mut probe_state.generation_state, &build_state.generation_state, + &mut right_single_scan_map, )?); - matched_idx = 0; - (match_count, incomplete_ptr) = hash_table.next_probe( - key, - incomplete_ptr, - build_indexes_ptr, - matched_idx, - max_block_size, - ); - for _ in 0..match_count { - unsafe { *probe_indexes.get_unchecked_mut(matched_idx) = *idx }; - matched_idx += 1; - } + (matched_idx, incomplete_ptr) = self + .fill_probe_and_build_indexes::<_, FROM_LEFT_SINGLE>( + hash_table, + key, + incomplete_ptr, + *idx, + probe_indexes, + build_indexes_ptr, + max_block_size, + )?; } } } else { @@ -107,12 +129,18 @@ impl HashJoinProbeState { let ptr = unsafe { *pointers.get_unchecked(idx) }; // Probe hash table and fill `build_indexes`. - let (mut match_count, mut incomplete_ptr) = + let (match_count, mut incomplete_ptr) = hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); if match_count == 0 { continue; } + if FROM_LEFT_SINGLE && match_count > 1 { + return Err(ErrorCode::Internal( + "Scalar subquery can't return more than one row", + )); + } + // Fill `probe_indexes`. for _ in 0..match_count { unsafe { *probe_indexes.get_unchecked_mut(matched_idx) = idx as u32 }; @@ -120,38 +148,38 @@ impl HashJoinProbeState { } while matched_idx == max_block_size { - result_blocks.push(self.process_inner_join_block( + result_blocks.push(self.process_inner_join_block::( matched_idx, input, probe_indexes, build_indexes, &mut probe_state.generation_state, &build_state.generation_state, + &mut right_single_scan_map, )?); - matched_idx = 0; - (match_count, incomplete_ptr) = hash_table.next_probe( - key, - incomplete_ptr, - build_indexes_ptr, - matched_idx, - max_block_size, - ); - for _ in 0..match_count { - unsafe { *probe_indexes.get_unchecked_mut(matched_idx) = idx as u32 }; - matched_idx += 1; - } + (matched_idx, incomplete_ptr) = self + .fill_probe_and_build_indexes::<_, FROM_LEFT_SINGLE>( + hash_table, + key, + incomplete_ptr, + idx as u32, + probe_indexes, + build_indexes_ptr, + max_block_size, + )?; } } } if matched_idx > 0 { - result_blocks.push(self.process_inner_join_block( + result_blocks.push(self.process_inner_join_block::( matched_idx, input, probe_indexes, build_indexes, &mut probe_state.generation_state, &build_state.generation_state, + &mut right_single_scan_map, )?); } @@ -188,7 +216,7 @@ impl HashJoinProbeState { } #[inline] - fn process_inner_join_block( + fn process_inner_join_block( &self, matched_idx: usize, input: &DataBlock, @@ -196,6 +224,7 @@ impl HashJoinProbeState { build_indexes: &[RowPtr], probe_state: &mut ProbeBlockGenerationState, build_state: &BuildBlockGenerationState, + right_single_scan_map: &mut [*mut AtomicBool], ) -> Result { if self.hash_join_state.interrupt.load(Ordering::Relaxed) { return Err(ErrorCode::AbortedQuery( @@ -242,6 +271,53 @@ impl HashJoinProbeState { result_block.add_column(entry); } } + + if FROM_RIGHT_SINGLE { + self.update_right_single_scan_map( + &build_indexes[0..matched_idx], + right_single_scan_map, + None, + )?; + } + Ok(result_block) } + + #[inline(always)] + #[allow(clippy::too_many_arguments)] + pub(crate) fn fill_probe_and_build_indexes< + 'a, + H: HashJoinHashtableLike, + const FROM_LEFT_SINGLE: bool, + >( + &self, + hash_table: &H, + key: &H::Key, + incomplete_ptr: u64, + idx: u32, + probe_indexes: &mut [u32], + build_indexes_ptr: *mut RowPtr, + max_block_size: usize, + ) -> Result<(usize, u64)> + where + H::Key: 'a, + { + let (match_count, ptr) = + hash_table.next_probe(key, incomplete_ptr, build_indexes_ptr, 0, max_block_size); + if match_count == 0 { + return Ok((0, 0)); + } + + if FROM_LEFT_SINGLE { + return Err(ErrorCode::Internal( + "Scalar subquery can't return more than one row", + )); + } + + for i in 0..match_count { + unsafe { *probe_indexes.get_unchecked_mut(i) = idx }; + } + + Ok((match_count, ptr)) + } } diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_anti_join.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_anti_join.rs index caea33882c97..9a6e709a7307 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_anti_join.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_anti_join.rs @@ -39,23 +39,37 @@ impl HashJoinProbeState { H::Key: 'a, { // Probe states. - let mutable_indexes = &mut probe_state.mutable_indexes; - let probe_indexes = &mut mutable_indexes.probe_indexes; let pointers = probe_state.hashes.as_slice(); - // Results. - let mut matched_idx = 0; - let mut result_blocks = vec![]; - // Probe hash table and generate data blocks. - for idx in 0..input.num_rows() { - let key = unsafe { keys.key_unchecked(idx) }; - let ptr = unsafe { *pointers.get_unchecked(idx) }; - if !hash_table.next_contains(key, ptr) { - unsafe { *probe_indexes.get_unchecked_mut(matched_idx) = idx as u32 }; - matched_idx += 1; + let (probe_indexes, count) = if probe_state.probe_with_selection { + // Safe to unwrap. + let probe_unmatched_indexes = probe_state.probe_unmatched_indexes.as_mut().unwrap(); + let mut unmatched_idx = probe_state.probe_unmatched_indexes_count; + let selection = &probe_state.selection.as_slice()[0..probe_state.selection_count]; + for idx in selection.iter() { + let key = unsafe { keys.key_unchecked(*idx as usize) }; + let ptr = unsafe { *pointers.get_unchecked(*idx as usize) }; + if !hash_table.next_contains(key, ptr) { + unsafe { *probe_unmatched_indexes.get_unchecked_mut(unmatched_idx) = *idx }; + unmatched_idx += 1; + } } - } + (probe_unmatched_indexes, unmatched_idx) + } else { + let mutable_indexes = &mut probe_state.mutable_indexes; + let probe_indexes = &mut mutable_indexes.probe_indexes; + let mut unmatched_idx = 0; + for idx in 0..input.num_rows() { + let key = unsafe { keys.key_unchecked(idx) }; + let ptr = unsafe { *pointers.get_unchecked(idx) }; + if !hash_table.next_contains(key, ptr) { + unsafe { *probe_indexes.get_unchecked_mut(unmatched_idx) = idx as u32 }; + unmatched_idx += 1; + } + } + (probe_indexes, unmatched_idx) + }; if self.hash_join_state.interrupt.load(Ordering::Relaxed) { return Err(ErrorCode::AbortedQuery( @@ -63,15 +77,16 @@ impl HashJoinProbeState { )); } - if matched_idx > 0 { - result_blocks.push(DataBlock::take( - input, - &probe_indexes[0..matched_idx], - &mut probe_state.generation_state.string_items_buf, - )?); + let result_block = DataBlock::take( + input, + &probe_indexes[0..count], + &mut probe_state.generation_state.string_items_buf, + )?; + if result_block.is_empty() { + Ok(vec![]) + } else { + Ok(vec![result_block]) } - - Ok(result_blocks) } pub(crate) fn left_anti_join_with_conjunct<'a, H: HashJoinHashtableLike>( @@ -109,47 +124,88 @@ impl HashJoinProbeState { let mut result_blocks = vec![]; // Probe hash table and generate data blocks. - for idx in 0..input.num_rows() { - let key = unsafe { keys.key_unchecked(idx) }; - let ptr = unsafe { *pointers.get_unchecked(idx) }; + if probe_state.probe_with_selection { + let selection = &probe_state.selection.as_slice()[0..probe_state.selection_count]; + for idx in selection.iter() { + let key = unsafe { keys.key_unchecked(*idx as usize) }; + let ptr = unsafe { *pointers.get_unchecked(*idx as usize) }; - // Probe hash table and fill `build_indexes`. - let (mut match_count, mut incomplete_ptr) = - hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); + // Probe hash table and fill `build_indexes`. + let (match_count, mut incomplete_ptr) = + hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); - if match_count == 0 { - continue; - } + if match_count == 0 { + continue; + } + + // Fill `probe_indexes`. + for _ in 0..match_count { + unsafe { *probe_indexes.get_unchecked_mut(matched_idx) = *idx }; + matched_idx += 1; + } - // Fill `probe_indexes`. - for _ in 0..match_count { - unsafe { *probe_indexes.get_unchecked_mut(matched_idx) = idx as u32 }; - matched_idx += 1; + while matched_idx == max_block_size { + self.process_left_anti_join_block( + matched_idx, + input, + probe_indexes, + build_indexes, + &mut probe_state.generation_state, + &build_state.generation_state, + other_predicate, + &mut row_state, + )?; + (matched_idx, incomplete_ptr) = self.fill_probe_and_build_indexes::<_, false>( + hash_table, + key, + incomplete_ptr, + *idx, + probe_indexes, + build_indexes_ptr, + max_block_size, + )?; + } } + } else { + for idx in 0..input.num_rows() { + let key = unsafe { keys.key_unchecked(idx) }; + let ptr = unsafe { *pointers.get_unchecked(idx) }; + + // Probe hash table and fill `build_indexes`. + let (match_count, mut incomplete_ptr) = + hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); + + if match_count == 0 { + continue; + } - while matched_idx == max_block_size { - self.process_left_anti_join_block( - matched_idx, - input, - probe_indexes, - build_indexes, - &mut probe_state.generation_state, - &build_state.generation_state, - other_predicate, - &mut row_state, - )?; - matched_idx = 0; - (match_count, incomplete_ptr) = hash_table.next_probe( - key, - incomplete_ptr, - build_indexes_ptr, - matched_idx, - max_block_size, - ); + // Fill `probe_indexes`. for _ in 0..match_count { unsafe { *probe_indexes.get_unchecked_mut(matched_idx) = idx as u32 }; matched_idx += 1; } + + while matched_idx == max_block_size { + self.process_left_anti_join_block( + matched_idx, + input, + probe_indexes, + build_indexes, + &mut probe_state.generation_state, + &build_state.generation_state, + other_predicate, + &mut row_state, + )?; + (matched_idx, incomplete_ptr) = self.fill_probe_and_build_indexes::<_, false>( + hash_table, + key, + incomplete_ptr, + idx as u32, + probe_indexes, + build_indexes_ptr, + max_block_size, + )?; + } } } @@ -167,17 +223,17 @@ impl HashJoinProbeState { } // Find all unmatched indexes and generate the result `DataBlock`. - matched_idx = 0; + let mut unmatched_idx = 0; for (i, state) in row_state.iter().enumerate() { if !*state { - unsafe { *probe_indexes.get_unchecked_mut(matched_idx) = i as u32 }; - matched_idx += 1; + unsafe { *probe_indexes.get_unchecked_mut(unmatched_idx) = i as u32 }; + unmatched_idx += 1; } } - if matched_idx > 0 { + if unmatched_idx > 0 { result_blocks.push(DataBlock::take( input, - &probe_indexes[0..matched_idx], + &probe_indexes[0..unmatched_idx], &mut probe_state.generation_state.string_items_buf, )?); } diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_join.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_join.rs index bdf26a1dc984..1335e4fd8877 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_join.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_join.rs @@ -33,7 +33,7 @@ use crate::pipelines::processors::transforms::hash_join::ProbeState; use crate::sql::plans::JoinType; impl HashJoinProbeState { - pub(crate) fn left_join<'a, H: HashJoinHashtableLike>( + pub(crate) fn left_join<'a, H: HashJoinHashtableLike, const LEFT_SINGLE: bool>( &self, input: &DataBlock, keys: Box<(dyn KeyAccessor)>, @@ -63,69 +63,114 @@ impl HashJoinProbeState { let mut unmatched_idx = 0; let mut result_blocks = vec![]; - // Probe hash table and generate data blocks. - for idx in 0..input_rows { - let key = unsafe { keys.key_unchecked(idx) }; - let ptr = unsafe { *pointers.get_unchecked(idx) }; - - // Probe hash table and fill `build_indexes`. - let (mut match_count, mut incomplete_ptr) = - hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); - - let mut total_probe_matched = 0; - if match_count > 0 { - total_probe_matched += match_count; - if self.hash_join_state.hash_join_desc.join_type == JoinType::LeftSingle - && total_probe_matched > 1 - { - return Err(ErrorCode::Internal( - "Scalar subquery can't return more than one row", - )); + if probe_state.probe_with_selection { + unmatched_idx = probe_state.probe_unmatched_indexes_count; + let selection = &probe_state.selection.as_slice()[0..probe_state.selection_count]; + for idx in selection.iter() { + let key = unsafe { keys.key_unchecked(*idx as usize) }; + let ptr = unsafe { *pointers.get_unchecked(*idx as usize) }; + // Probe hash table and fill `build_indexes`. + let (match_count, mut incomplete_ptr) = + hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); + + if match_count > 0 { + if LEFT_SINGLE && match_count > 1 { + return Err(ErrorCode::Internal( + "Scalar subquery can't return more than one row", + )); + } + + for _ in 0..match_count { + unsafe { *probe_indexes.get_unchecked_mut(matched_idx) = *idx }; + matched_idx += 1; + } + } else { + unsafe { *probe_unmatched_indexes.get_unchecked_mut(unmatched_idx) = *idx }; + unmatched_idx += 1; } - for _ in 0..match_count { - unsafe { *probe_indexes.get_unchecked_mut(matched_idx) = idx as u32 }; - matched_idx += 1; + + while matched_idx == max_block_size { + self.process_left_or_full_join_block( + matched_idx, + input, + probe_indexes, + build_indexes, + &mut probe_state.generation_state, + &build_state.generation_state, + outer_scan_map, + &mut result_blocks, + None, + None, + None, + )?; + (matched_idx, incomplete_ptr) = self.fill_left_outer_states::<_, LEFT_SINGLE>( + hash_table, + key, + incomplete_ptr, + *idx, + probe_indexes, + build_indexes_ptr, + max_block_size, + false, + None, + None, + )?; } - } else { - unsafe { *probe_unmatched_indexes.get_unchecked_mut(unmatched_idx) = idx as u32 }; - unmatched_idx += 1; } + } else { + // Probe hash table and generate data blocks. + for idx in 0..input_rows { + let key = unsafe { keys.key_unchecked(idx) }; + let ptr = unsafe { *pointers.get_unchecked(idx) }; + + // Probe hash table and fill `build_indexes`. + let (match_count, mut incomplete_ptr) = + hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); - while matched_idx == max_block_size { - self.process_left_or_full_join_block( - matched_idx, - input, - probe_indexes, - build_indexes, - &mut probe_state.generation_state, - &build_state.generation_state, - outer_scan_map, - &mut result_blocks, - None, - None, - None, - )?; - matched_idx = 0; - (match_count, incomplete_ptr) = hash_table.next_probe( - key, - incomplete_ptr, - build_indexes_ptr, - matched_idx, - max_block_size, - ); if match_count > 0 { - total_probe_matched += match_count; - if self.hash_join_state.hash_join_desc.join_type == JoinType::LeftSingle - && total_probe_matched > 1 - { + if LEFT_SINGLE && match_count > 1 { return Err(ErrorCode::Internal( "Scalar subquery can't return more than one row", )); } + for _ in 0..match_count { unsafe { *probe_indexes.get_unchecked_mut(matched_idx) = idx as u32 }; matched_idx += 1; } + } else { + unsafe { + *probe_unmatched_indexes.get_unchecked_mut(unmatched_idx) = idx as u32 + }; + unmatched_idx += 1; + } + + while matched_idx == max_block_size { + self.process_left_or_full_join_block( + matched_idx, + input, + probe_indexes, + build_indexes, + &mut probe_state.generation_state, + &build_state.generation_state, + outer_scan_map, + &mut result_blocks, + None, + None, + None, + )?; + (matched_idx, incomplete_ptr) = self.fill_left_outer_states::<_, LEFT_SINGLE>( + hash_table, + key, + incomplete_ptr, + idx as u32, + probe_indexes, + build_indexes_ptr, + max_block_size, + false, + None, + None, + )?; } } } @@ -159,7 +204,7 @@ impl HashJoinProbeState { Ok(result_blocks) } - pub(crate) fn left_join_with_conjunct<'a, H: HashJoinHashtableLike>( + pub(crate) fn left_join_with_conjunct<'a, H: HashJoinHashtableLike, const LEFT_SINGLE: bool>( &self, input: &DataBlock, keys: Box<(dyn KeyAccessor)>, @@ -200,62 +245,72 @@ impl HashJoinProbeState { let mut result_blocks = vec![]; // Probe hash table and generate data blocks. - for idx in 0..input_rows { - let key = unsafe { keys.key_unchecked(idx) }; - let ptr = unsafe { *pointers.get_unchecked(idx) }; - - // Probe hash table and fill `build_indexes`. - let (mut match_count, mut incomplete_ptr) = - hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); - // `total_probe_matched` is used to record the matched rows count for current `idx` row from probe_block - let mut total_probe_matched = 0; - if match_count > 0 { - total_probe_matched += match_count; - if self.hash_join_state.hash_join_desc.join_type == JoinType::LeftSingle - && total_probe_matched > 1 - { - return Err(ErrorCode::Internal( - "Scalar subquery can't return more than one row", - )); - } + if probe_state.probe_with_selection { + let selection = &probe_state.selection.as_slice()[0..probe_state.selection_count]; + for idx in selection.iter() { + let key = unsafe { keys.key_unchecked(*idx as usize) }; + let ptr = unsafe { *pointers.get_unchecked(*idx as usize) }; - unsafe { - *row_state.get_unchecked_mut(idx) += match_count; - for _ in 0..match_count { - *row_state_indexes.get_unchecked_mut(matched_idx) = idx; - *probe_indexes.get_unchecked_mut(matched_idx) = idx as u32; - matched_idx += 1; + // Probe hash table and fill `build_indexes`. + let (match_count, mut incomplete_ptr) = + hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); + + if match_count > 0 { + if LEFT_SINGLE && match_count > 1 { + return Err(ErrorCode::Internal( + "Scalar subquery can't return more than one row", + )); + } + + unsafe { + *row_state.get_unchecked_mut(*idx as usize) += match_count; + for _ in 0..match_count { + *row_state_indexes.get_unchecked_mut(matched_idx) = *idx as usize; + *probe_indexes.get_unchecked_mut(matched_idx) = *idx; + matched_idx += 1; + } } } + + while matched_idx == max_block_size { + self.process_left_or_full_join_block( + matched_idx, + input, + probe_indexes, + build_indexes, + &mut probe_state.generation_state, + &build_state.generation_state, + outer_scan_map, + &mut result_blocks, + Some(other_predicate), + Some(row_state), + Some(row_state_indexes), + )?; + (matched_idx, incomplete_ptr) = self.fill_left_outer_states::<_, LEFT_SINGLE>( + hash_table, + key, + incomplete_ptr, + *idx, + probe_indexes, + build_indexes_ptr, + max_block_size, + true, + Some(row_state), + Some(row_state_indexes), + )?; + } } + } else { + for idx in 0..input_rows { + let key = unsafe { keys.key_unchecked(idx) }; + let ptr = unsafe { *pointers.get_unchecked(idx) }; + + // Probe hash table and fill `build_indexes`. + let (match_count, mut incomplete_ptr) = + hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); - while matched_idx == max_block_size { - self.process_left_or_full_join_block( - matched_idx, - input, - probe_indexes, - build_indexes, - &mut probe_state.generation_state, - &build_state.generation_state, - outer_scan_map, - &mut result_blocks, - Some(other_predicate), - Some(row_state), - Some(row_state_indexes), - )?; - matched_idx = 0; - (match_count, incomplete_ptr) = hash_table.next_probe( - key, - incomplete_ptr, - build_indexes_ptr, - matched_idx, - max_block_size, - ); if match_count > 0 { - total_probe_matched += match_count; - if self.hash_join_state.hash_join_desc.join_type == JoinType::LeftSingle - && total_probe_matched > 1 - { + if LEFT_SINGLE && match_count > 1 { return Err(ErrorCode::Internal( "Scalar subquery can't return more than one row", )); @@ -270,6 +325,34 @@ impl HashJoinProbeState { } } } + + while matched_idx == max_block_size { + self.process_left_or_full_join_block( + matched_idx, + input, + probe_indexes, + build_indexes, + &mut probe_state.generation_state, + &build_state.generation_state, + outer_scan_map, + &mut result_blocks, + Some(other_predicate), + Some(row_state), + Some(row_state_indexes), + )?; + (matched_idx, incomplete_ptr) = self.fill_left_outer_states::<_, LEFT_SINGLE>( + hash_table, + key, + incomplete_ptr, + idx as u32, + probe_indexes, + build_indexes_ptr, + max_block_size, + true, + Some(row_state), + Some(row_state_indexes), + )?; + } } } @@ -538,4 +621,53 @@ impl HashJoinProbeState { Ok(()) } + + #[inline(always)] + #[allow(clippy::too_many_arguments)] + fn fill_left_outer_states<'a, H: HashJoinHashtableLike, const LEFT_SINGLE: bool>( + &self, + hash_table: &H, + key: &H::Key, + incomplete_ptr: u64, + idx: u32, + probe_indexes: &mut [u32], + build_indexes_ptr: *mut RowPtr, + max_block_size: usize, + with_conjunct: bool, + row_state: Option<&mut Vec>, + row_state_indexes: Option<&mut Vec>, + ) -> Result<(usize, u64)> + where + H::Key: 'a, + { + let (match_count, ptr) = + hash_table.next_probe(key, incomplete_ptr, build_indexes_ptr, 0, max_block_size); + if match_count == 0 { + return Ok((0, 0)); + } + + if LEFT_SINGLE { + return Err(ErrorCode::Internal( + "Scalar subquery can't return more than one row", + )); + } + + if !with_conjunct { + for i in 0..match_count { + unsafe { *probe_indexes.get_unchecked_mut(i) = idx }; + } + } else { + let row_state = row_state.unwrap(); + let row_state_indexes = row_state_indexes.unwrap(); + unsafe { + *row_state.get_unchecked_mut(idx as usize) += match_count; + for i in 0..match_count { + *row_state_indexes.get_unchecked_mut(i) = idx as usize; + *probe_indexes.get_unchecked_mut(i) = idx; + } + } + } + + Ok((match_count, ptr)) + } } diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_mark_join.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_mark_join.rs index 163c77290888..6dc6498cd47a 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_mark_join.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_mark_join.rs @@ -59,6 +59,7 @@ impl HashJoinProbeState { .write(); *has_null = true; } + let mutable_indexes = &mut probe_state.mutable_indexes; let build_indexes = &mut mutable_indexes.build_indexes; let build_indexes_ptr = build_indexes.as_mut_ptr(); @@ -80,7 +81,7 @@ impl HashJoinProbeState { let ptr = unsafe { *pointers.get_unchecked(*idx as usize) }; // Probe hash table and fill `build_indexes`. - let (mut match_count, mut incomplete_ptr) = + let (match_count, mut incomplete_ptr) = hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); if match_count == 0 { continue; @@ -89,28 +90,14 @@ impl HashJoinProbeState { matched_idx += match_count; while matched_idx == max_block_size { - if self.hash_join_state.interrupt.load(Ordering::Relaxed) { - return Err(ErrorCode::AbortedQuery( - "Aborted query, because the server is shutting down or the query was killed.", - )); - } - for probed_row in build_indexes.iter() { - unsafe { - *mark_scan_map - .get_unchecked_mut(probed_row.chunk_index as usize) - .get_unchecked_mut(probed_row.row_index as usize) = - MARKER_KIND_TRUE; - } - } - matched_idx = 0; - (match_count, incomplete_ptr) = hash_table.next_probe( + self.process_left_mark_join_block(build_indexes, mark_scan_map)?; + (matched_idx, incomplete_ptr) = hash_table.next_probe( key, incomplete_ptr, build_indexes_ptr, - matched_idx, + 0, max_block_size, ); - matched_idx += match_count; } } } else { @@ -119,7 +106,7 @@ impl HashJoinProbeState { let ptr = unsafe { *pointers.get_unchecked(idx) }; // Probe hash table and fill build_indexes. - let (mut match_count, mut incomplete_ptr) = + let (match_count, mut incomplete_ptr) = hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); if match_count == 0 { continue; @@ -128,32 +115,22 @@ impl HashJoinProbeState { matched_idx += match_count; while matched_idx == max_block_size { - if self.hash_join_state.interrupt.load(Ordering::Relaxed) { - return Err(ErrorCode::AbortedQuery( - "Aborted query, because the server is shutting down or the query was killed.", - )); - } - for probed_row in build_indexes.iter() { - unsafe { - *mark_scan_map - .get_unchecked_mut(probed_row.chunk_index as usize) - .get_unchecked_mut(probed_row.row_index as usize) = - MARKER_KIND_TRUE; - } - } - matched_idx = 0; - (match_count, incomplete_ptr) = hash_table.next_probe( + self.process_left_mark_join_block(build_indexes, mark_scan_map)?; + (matched_idx, incomplete_ptr) = hash_table.next_probe( key, incomplete_ptr, build_indexes_ptr, - matched_idx, + 0, max_block_size, ); - matched_idx += match_count; } } } + if matched_idx > 0 { + self.process_left_mark_join_block(&build_indexes[0..matched_idx], mark_scan_map)?; + } + Ok(vec![]) } @@ -210,7 +187,7 @@ impl HashJoinProbeState { let ptr = unsafe { *pointers.get_unchecked(*idx as usize) }; // Probe hash table and fill `build_indexes`. - let (mut match_count, mut incomplete_ptr) = + let (match_count, mut incomplete_ptr) = hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); if match_count == 0 { continue; @@ -223,7 +200,7 @@ impl HashJoinProbeState { } while matched_idx == max_block_size { - self.process_left_mark_join_block( + self.process_left_mark_join_with_conjunct_block( matched_idx, input, probe_indexes, @@ -233,18 +210,15 @@ impl HashJoinProbeState { other_predicate, mark_scan_map, )?; - matched_idx = 0; - (match_count, incomplete_ptr) = hash_table.next_probe( + (matched_idx, incomplete_ptr) = self.fill_probe_and_build_indexes::<_, false>( + hash_table, key, incomplete_ptr, + *idx, + probe_indexes, build_indexes_ptr, - matched_idx, max_block_size, - ); - for _ in 0..match_count { - unsafe { *probe_indexes.get_unchecked_mut(matched_idx) = *idx }; - matched_idx += 1; - } + )?; } } } else { @@ -253,7 +227,7 @@ impl HashJoinProbeState { let ptr = unsafe { *pointers.get_unchecked(idx) }; // Probe hash table and fill build_indexes. - let (mut match_count, mut incomplete_ptr) = + let (match_count, mut incomplete_ptr) = hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); if match_count == 0 { continue; @@ -266,7 +240,7 @@ impl HashJoinProbeState { } while matched_idx == max_block_size { - self.process_left_mark_join_block( + self.process_left_mark_join_with_conjunct_block( matched_idx, input, probe_indexes, @@ -276,18 +250,15 @@ impl HashJoinProbeState { other_predicate, mark_scan_map, )?; - matched_idx = 0; - (match_count, incomplete_ptr) = hash_table.next_probe( + (matched_idx, incomplete_ptr) = self.fill_probe_and_build_indexes::<_, false>( + hash_table, key, incomplete_ptr, + idx as u32, + probe_indexes, build_indexes_ptr, - matched_idx, max_block_size, - ); - for _ in 0..match_count { - unsafe { *probe_indexes.get_unchecked_mut(matched_idx) = idx as u32 }; - matched_idx += 1; - } + )?; } } } @@ -305,7 +276,7 @@ impl HashJoinProbeState { } if matched_idx > 0 { - self.process_left_mark_join_block( + self.process_left_mark_join_with_conjunct_block( matched_idx, input, probe_indexes, @@ -323,6 +294,30 @@ impl HashJoinProbeState { #[inline] #[allow(clippy::too_many_arguments)] fn process_left_mark_join_block( + &self, + build_indexes: &[RowPtr], + mark_scan_map: &mut [Vec], + ) -> Result<()> { + if self.hash_join_state.interrupt.load(Ordering::Relaxed) { + return Err(ErrorCode::AbortedQuery( + "Aborted query, because the server is shutting down or the query was killed.", + )); + } + + for probed_row in build_indexes.iter() { + unsafe { + *mark_scan_map + .get_unchecked_mut(probed_row.chunk_index as usize) + .get_unchecked_mut(probed_row.row_index as usize) = MARKER_KIND_TRUE; + } + } + + Ok(()) + } + + #[inline] + #[allow(clippy::too_many_arguments)] + fn process_left_mark_join_with_conjunct_block( &self, matched_idx: usize, input: &DataBlock, diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_semi_join.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_semi_join.rs index 56dd6fcb8efd..3a280f5b7554 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_semi_join.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/left_semi_join.rs @@ -128,7 +128,7 @@ impl HashJoinProbeState { let ptr = unsafe { *pointers.get_unchecked(*idx as usize) }; // Probe hash table and fill `build_indexes`. - let (mut match_count, mut incomplete_ptr) = + let (match_count, mut incomplete_ptr) = hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); if match_count == 0 { continue; @@ -151,18 +151,15 @@ impl HashJoinProbeState { other_predicate, &mut row_state, )?; - matched_idx = 0; - (match_count, incomplete_ptr) = hash_table.next_probe( + (matched_idx, incomplete_ptr) = self.fill_probe_and_build_indexes::<_, false>( + hash_table, key, incomplete_ptr, + *idx, + probe_indexes, build_indexes_ptr, - matched_idx, max_block_size, - ); - for _ in 0..match_count { - unsafe { *probe_indexes.get_unchecked_mut(matched_idx) = *idx }; - matched_idx += 1; - } + )?; } } } else { @@ -171,7 +168,7 @@ impl HashJoinProbeState { let ptr = unsafe { *pointers.get_unchecked(idx) }; // Probe hash table and fill build_indexes. - let (mut match_count, mut incomplete_ptr) = + let (match_count, mut incomplete_ptr) = hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); if match_count == 0 { continue; @@ -194,18 +191,15 @@ impl HashJoinProbeState { other_predicate, &mut row_state, )?; - matched_idx = 0; - (match_count, incomplete_ptr) = hash_table.next_probe( + (matched_idx, incomplete_ptr) = self.fill_probe_and_build_indexes::<_, false>( + hash_table, key, incomplete_ptr, + idx as u32, + probe_indexes, build_indexes_ptr, - matched_idx, max_block_size, - ); - for _ in 0..match_count { - unsafe { *probe_indexes.get_unchecked_mut(matched_idx) = idx as u32 }; - matched_idx += 1; - } + )?; } } } diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_join.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_join.rs index 2f5413bf968c..809476118049 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_join.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_join.rs @@ -76,7 +76,7 @@ impl HashJoinProbeState { let ptr = unsafe { *pointers.get_unchecked(*idx as usize) }; // Probe hash table and fill `build_indexes`. - let (mut match_count, mut incomplete_ptr) = + let (match_count, mut incomplete_ptr) = hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); if match_count == 0 { continue; @@ -100,18 +100,15 @@ impl HashJoinProbeState { outer_scan_map, &mut right_single_scan_map, )?; - matched_idx = 0; - (match_count, incomplete_ptr) = hash_table.next_probe( + (matched_idx, incomplete_ptr) = self.fill_probe_and_build_indexes::<_, false>( + hash_table, key, incomplete_ptr, + *idx, + probe_indexes, build_indexes_ptr, - matched_idx, max_block_size, - ); - for _ in 0..match_count { - unsafe { *probe_indexes.get_unchecked_mut(matched_idx) = *idx }; - matched_idx += 1; - } + )?; } } } else { @@ -120,7 +117,7 @@ impl HashJoinProbeState { let ptr = unsafe { *pointers.get_unchecked(idx) }; // Probe hash table and fill build_indexes. - let (mut match_count, mut incomplete_ptr) = + let (match_count, mut incomplete_ptr) = hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); if match_count == 0 { continue; @@ -144,18 +141,15 @@ impl HashJoinProbeState { outer_scan_map, &mut right_single_scan_map, )?; - matched_idx = 0; - (match_count, incomplete_ptr) = hash_table.next_probe( + (matched_idx, incomplete_ptr) = self.fill_probe_and_build_indexes::<_, false>( + hash_table, key, incomplete_ptr, + idx as u32, + probe_indexes, build_indexes_ptr, - matched_idx, max_block_size, - ); - for _ in 0..match_count { - unsafe { *probe_indexes.get_unchecked_mut(matched_idx) = idx as u32 }; - matched_idx += 1; - } + )?; } } } @@ -177,7 +171,7 @@ impl HashJoinProbeState { Ok(result_blocks) } - fn update_right_single_scan_map( + pub(crate) fn update_right_single_scan_map( &self, build_indexes: &[RowPtr], right_single_scan_map: &[*mut AtomicBool], diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_mark_join.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_mark_join.rs index 45d8a23be659..fed7c8d44cb5 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_mark_join.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_mark_join.rs @@ -140,7 +140,7 @@ impl HashJoinProbeState { let ptr = unsafe { *pointers.get_unchecked(*idx as usize) }; // Probe hash table and fill `build_indexes`. - let (mut match_count, mut incomplete_ptr) = + let (match_count, mut incomplete_ptr) = hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); if match_count == 0 { continue; @@ -163,18 +163,15 @@ impl HashJoinProbeState { markers, other_predicate, )?; - matched_idx = 0; - (match_count, incomplete_ptr) = hash_table.next_probe( + (matched_idx, incomplete_ptr) = self.fill_probe_and_build_indexes::<_, false>( + hash_table, key, incomplete_ptr, + *idx, + probe_indexes, build_indexes_ptr, - matched_idx, max_block_size, - ); - for _ in 0..match_count { - unsafe { *probe_indexes.get_unchecked_mut(matched_idx) = *idx }; - matched_idx += 1; - } + )?; } } } else { @@ -183,7 +180,7 @@ impl HashJoinProbeState { let ptr = unsafe { *pointers.get_unchecked(idx) }; // Probe hash table and fill build_indexes. - let (mut match_count, mut incomplete_ptr) = + let (match_count, mut incomplete_ptr) = hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); if match_count == 0 { continue; @@ -206,18 +203,15 @@ impl HashJoinProbeState { markers, other_predicate, )?; - matched_idx = 0; - (match_count, incomplete_ptr) = hash_table.next_probe( + (matched_idx, incomplete_ptr) = self.fill_probe_and_build_indexes::<_, false>( + hash_table, key, incomplete_ptr, + idx as u32, + probe_indexes, build_indexes_ptr, - matched_idx, max_block_size, - ); - for _ in 0..match_count { - unsafe { *probe_indexes.get_unchecked_mut(matched_idx) = idx as u32 }; - matched_idx += 1; - } + )?; } } } diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_semi_anti_join.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_semi_anti_join.rs index 4bc5d6a08bc3..4c190d4dc454 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_semi_anti_join.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_join/right_semi_anti_join.rs @@ -60,7 +60,7 @@ impl HashJoinProbeState { let ptr = unsafe { *pointers.get_unchecked(*idx as usize) }; // Probe hash table and fill `build_indexes`. - let (mut match_count, mut incomplete_ptr) = + let (match_count, mut incomplete_ptr) = hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); if match_count == 0 { continue; @@ -69,29 +69,14 @@ impl HashJoinProbeState { matched_idx += match_count; while matched_idx == max_block_size { - if self.hash_join_state.interrupt.load(Ordering::Relaxed) { - return Err(ErrorCode::AbortedQuery( - "Aborted query, because the server is shutting down or the query was killed.", - )); - } - - for row_ptr in build_indexes.iter() { - unsafe { - *outer_scan_map - .get_unchecked_mut(row_ptr.chunk_index as usize) - .get_unchecked_mut(row_ptr.row_index as usize) = true - }; - } - - matched_idx = 0; - (match_count, incomplete_ptr) = hash_table.next_probe( + self.process_right_semi_anti_join_block(build_indexes, outer_scan_map)?; + (matched_idx, incomplete_ptr) = hash_table.next_probe( key, incomplete_ptr, build_indexes_ptr, - matched_idx, + 0, max_block_size, ); - matched_idx += match_count; } } } else { @@ -100,7 +85,7 @@ impl HashJoinProbeState { let ptr = unsafe { *pointers.get_unchecked(idx) }; // Probe hash table and fill `build_indexes`. - let (mut match_count, mut incomplete_ptr) = + let (match_count, mut incomplete_ptr) = hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); if match_count == 0 { continue; @@ -108,39 +93,23 @@ impl HashJoinProbeState { matched_idx += match_count; while matched_idx == max_block_size { - if self.hash_join_state.interrupt.load(Ordering::Relaxed) { - return Err(ErrorCode::AbortedQuery( - "Aborted query, because the server is shutting down or the query was killed.", - )); - } - - for row_ptr in build_indexes.iter() { - unsafe { - *outer_scan_map - .get_unchecked_mut(row_ptr.chunk_index as usize) - .get_unchecked_mut(row_ptr.row_index as usize) = true - }; - } - - matched_idx = 0; - (match_count, incomplete_ptr) = hash_table.next_probe( + self.process_right_semi_anti_join_block(build_indexes, outer_scan_map)?; + (matched_idx, incomplete_ptr) = hash_table.next_probe( key, incomplete_ptr, build_indexes_ptr, - matched_idx, + 0, max_block_size, ); - matched_idx += match_count; } } } - for row_ptr in build_indexes[0..matched_idx].iter() { - unsafe { - *outer_scan_map - .get_unchecked_mut(row_ptr.chunk_index as usize) - .get_unchecked_mut(row_ptr.row_index as usize) = true - }; + if matched_idx > 0 { + self.process_right_semi_anti_join_block( + &build_indexes[0..matched_idx], + outer_scan_map, + )?; } Ok(vec![]) @@ -185,7 +154,7 @@ impl HashJoinProbeState { let ptr = unsafe { *pointers.get_unchecked(*idx as usize) }; // Probe hash table and fill `build_indexes`. - let (mut match_count, mut incomplete_ptr) = + let (match_count, mut incomplete_ptr) = hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); if match_count == 0 { continue; @@ -198,7 +167,7 @@ impl HashJoinProbeState { } while matched_idx == max_block_size { - self.process_right_semi_anti_join_block( + self.process_right_semi_anti_join_with_conjunct_block( matched_idx, input, probe_indexes, @@ -208,25 +177,22 @@ impl HashJoinProbeState { outer_scan_map, other_predicate, )?; - matched_idx = 0; - (match_count, incomplete_ptr) = hash_table.next_probe( + (matched_idx, incomplete_ptr) = self.fill_probe_and_build_indexes::<_, false>( + hash_table, key, incomplete_ptr, + *idx, + probe_indexes, build_indexes_ptr, - matched_idx, max_block_size, - ); - for _ in 0..match_count { - unsafe { *probe_indexes.get_unchecked_mut(matched_idx) = *idx }; - matched_idx += 1; - } + )?; } } } else { for idx in 0..input.num_rows() { let key = unsafe { keys.key_unchecked(idx) }; let ptr = unsafe { *pointers.get_unchecked(idx) }; - let (mut match_count, mut incomplete_ptr) = + let (match_count, mut incomplete_ptr) = hash_table.next_probe(key, ptr, build_indexes_ptr, matched_idx, max_block_size); if match_count == 0 { continue; @@ -238,7 +204,7 @@ impl HashJoinProbeState { } while matched_idx == max_block_size { - self.process_right_semi_anti_join_block( + self.process_right_semi_anti_join_with_conjunct_block( matched_idx, input, probe_indexes, @@ -248,24 +214,21 @@ impl HashJoinProbeState { outer_scan_map, other_predicate, )?; - matched_idx = 0; - (match_count, incomplete_ptr) = hash_table.next_probe( + (matched_idx, incomplete_ptr) = self.fill_probe_and_build_indexes::<_, false>( + hash_table, key, incomplete_ptr, + idx as u32, + probe_indexes, build_indexes_ptr, - matched_idx, max_block_size, - ); - for _ in 0..match_count { - unsafe { *probe_indexes.get_unchecked_mut(matched_idx) = idx as u32 }; - matched_idx += 1; - } + )?; } } } if matched_idx > 0 { - self.process_right_semi_anti_join_block( + self.process_right_semi_anti_join_with_conjunct_block( matched_idx, input, probe_indexes, @@ -283,6 +246,30 @@ impl HashJoinProbeState { #[inline] #[allow(clippy::too_many_arguments)] fn process_right_semi_anti_join_block( + &self, + build_indexes: &[RowPtr], + outer_scan_map: &mut [Vec], + ) -> Result<()> { + if self.hash_join_state.interrupt.load(Ordering::Relaxed) { + return Err(ErrorCode::AbortedQuery( + "Aborted query, because the server is shutting down or the query was killed.", + )); + } + + for row_ptr in build_indexes.iter() { + unsafe { + *outer_scan_map + .get_unchecked_mut(row_ptr.chunk_index as usize) + .get_unchecked_mut(row_ptr.row_index as usize) = true + }; + } + + Ok(()) + } + + #[inline] + #[allow(clippy::too_many_arguments)] + fn process_right_semi_anti_join_with_conjunct_block( &self, matched_idx: usize, input: &DataBlock, diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_state.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_state.rs index f4215a541e8d..8be9a294a40d 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/probe_state.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/probe_state.rs @@ -41,18 +41,23 @@ pub struct ProbeState { pub(crate) probe_unmatched_indexes: Option>, // The `markers` is used for right mark join. pub(crate) markers: Option>, + // If hash join other condition is not empty. + pub(crate) with_conjunction: bool, // Early filtering. // 1.The `selection` is used to store the indexes of input which matched by hash. pub(crate) selection: Vec, // 2.The indexes of [0, selection_count) in `selection` are valid. pub(crate) selection_count: usize, - // 3.Statistics for **adaptive** early filtering, the `num_keys` indicates the number of valid keys in probe side, - // the `num_keys_hash_matched` indicates the number of keys which matched by hash. + // 3.Statistics for **adaptive** early filtering, the `num_keys` indicates the number of valid keys + // in probe side, the `num_keys_hash_matched` indicates the number of keys which matched by hash. pub(crate) num_keys: u64, pub(crate) num_keys_hash_matched: u64, // 4.Whether to probe with selection. pub(crate) probe_with_selection: bool, + // 5.If join type is LEFT / LEFT SINGLE / LEFT ANTI / FULL, we use it to store unmatched indexes + // count during early filtering. + pub(crate) probe_unmatched_indexes_count: usize, } impl ProbeState { @@ -64,27 +69,28 @@ impl ProbeState { pub fn create( max_block_size: usize, join_type: &JoinType, - with_conjunct: bool, + with_conjunction: bool, has_string_column: bool, func_ctx: FunctionContext, ) -> Self { - let (row_state, row_state_indexes, probe_unmatched_indexes) = match &join_type { + let (row_state, row_state_indexes) = match &join_type { JoinType::Left | JoinType::LeftSingle | JoinType::Full => { - if with_conjunct { - ( - Some(vec![0; max_block_size]), - Some(vec![0; max_block_size]), - None, - ) + if with_conjunction { + (Some(vec![0; max_block_size]), Some(vec![0; max_block_size])) } else { - ( - Some(vec![0; max_block_size]), - None, - Some(vec![0; max_block_size]), - ) + (Some(vec![0; max_block_size]), None) } } - _ => (None, None, None), + _ => (None, None), + }; + let probe_unmatched_indexes = if matches!( + &join_type, + JoinType::Left | JoinType::LeftSingle | JoinType::Full | JoinType::LeftAnti + ) && !with_conjunction + { + Some(vec![0; max_block_size]) + } else { + None }; let markers = if matches!(&join_type, JoinType::RightMark) { Some(vec![MARKER_KIND_FALSE; max_block_size]) @@ -106,6 +112,8 @@ impl ProbeState { row_state_indexes, probe_unmatched_indexes, markers, + probe_unmatched_indexes_count: 0, + with_conjunction, } } @@ -138,17 +146,11 @@ impl MutableIndexes { } pub struct ProbeBlockGenerationState { - /// in fact, it means whether we need to output some probe blocks's columns, - /// we use probe_projections to check whether we can get a non-empty result - /// block. + // The is_probe_projected means whether we need to output probe columns. pub(crate) is_probe_projected: bool, - /// for Right/Full/RightSingle we use true_validity to reduce memory, because - /// we need to wrap probe block's all column type as nullable(if they are not). - /// But when we need to wrap this way, the validity is all true, so we use this - /// one to share the memory. + // When we need a bitmap that is all true, we can directly slice it to reduce memory usage. pub(crate) true_validity: Bitmap, - /// we use `string_items_buf` for Binary/String/Bitmap/Variant Column - /// to store the (pointer,length). So we can reuse the memory for all take. + // The string_items_buf is used as a buffer to reduce memory allocation when taking [u8] Columns. pub(crate) string_items_buf: Option>, } diff --git a/src/query/service/src/pipelines/processors/transforms/hash_join/result_blocks.rs b/src/query/service/src/pipelines/processors/transforms/hash_join/result_blocks.rs index cf4af55b6b0c..3676f45c35c9 100644 --- a/src/query/service/src/pipelines/processors/transforms/hash_join/result_blocks.rs +++ b/src/query/service/src/pipelines/processors/transforms/hash_join/result_blocks.rs @@ -56,10 +56,26 @@ impl HashJoinProbeState { .other_predicate .is_none(); match self.hash_join_state.hash_join_desc.join_type { - JoinType::Inner => self.inner_join(input, keys, hash_table, probe_state), - JoinType::Left | JoinType::LeftSingle | JoinType::Full => match has_other_predicate { - true => self.left_join(input, keys, hash_table, probe_state), - false => self.left_join_with_conjunct(input, keys, hash_table, probe_state), + JoinType::Inner => match self.hash_join_state.hash_join_desc.single_to_inner { + Some(JoinType::LeftSingle) => { + self.inner_join::<_, true, false>(input, keys, hash_table, probe_state) + } + Some(JoinType::RightSingle) => { + self.inner_join::<_, false, true>(input, keys, hash_table, probe_state) + } + _ => self.inner_join::<_, false, false>(input, keys, hash_table, probe_state), + }, + JoinType::Left | JoinType::Full => match has_other_predicate { + true => self.left_join::<_, false>(input, keys, hash_table, probe_state), + false => { + self.left_join_with_conjunct::<_, false>(input, keys, hash_table, probe_state) + } + }, + JoinType::LeftSingle => match has_other_predicate { + true => self.left_join::<_, true>(input, keys, hash_table, probe_state), + false => { + self.left_join_with_conjunct::<_, true>(input, keys, hash_table, probe_state) + } }, JoinType::LeftSemi => match has_other_predicate { true => self.left_semi_join(input, keys, hash_table, probe_state), diff --git a/src/query/service/src/pipelines/processors/transforms/transform_sort_spill.rs b/src/query/service/src/pipelines/processors/transforms/transform_sort_spill.rs index c29a193f766f..c96f454accf4 100644 --- a/src/query/service/src/pipelines/processors/transforms/transform_sort_spill.rs +++ b/src/query/service/src/pipelines/processors/transforms/transform_sort_spill.rs @@ -38,6 +38,8 @@ use databend_common_pipeline_core::processors::Event; use databend_common_pipeline_core::processors::InputPort; use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::processors::Processor; +use databend_common_pipeline_core::processors::Profile; +use databend_common_pipeline_core::processors::ProfileStatisticsName; use databend_common_pipeline_transforms::processors::sort::CommonRows; use databend_common_pipeline_transforms::processors::sort::DateRows; use databend_common_pipeline_transforms::processors::sort::HeapMerger; @@ -283,6 +285,13 @@ where R: Rows + Sync + Send + 'static metrics_inc_sort_spill_write_count(); metrics_inc_sort_spill_write_bytes(bytes); metrics_inc_sort_spill_write_milliseconds(ins.elapsed().as_millis() as u64); + + Profile::record_usize_profile(ProfileStatisticsName::SpillWriteCount, 1); + Profile::record_usize_profile(ProfileStatisticsName::SpillWriteBytes, bytes as usize); + Profile::record_usize_profile( + ProfileStatisticsName::SpillWriteTime, + ins.elapsed().as_millis() as usize, + ); } self.unmerged_blocks.push_back(vec![location].into()); @@ -342,6 +351,16 @@ where R: Rows + Sync + Send + 'static metrics_inc_sort_spill_read_count(); metrics_inc_sort_spill_read_bytes(bytes); metrics_inc_sort_spill_read_milliseconds(ins.elapsed().as_millis() as u64); + + Profile::record_usize_profile(ProfileStatisticsName::SpillReadCount, 1); + Profile::record_usize_profile( + ProfileStatisticsName::SpillReadBytes, + bytes as usize, + ); + Profile::record_usize_profile( + ProfileStatisticsName::SpillReadTime, + ins.elapsed().as_millis() as usize, + ); } self.output_data = Some(block); @@ -376,6 +395,16 @@ where R: Rows + Sync + Send + 'static metrics_inc_sort_spill_write_count(); metrics_inc_sort_spill_write_bytes(bytes); metrics_inc_sort_spill_write_milliseconds(ins.elapsed().as_millis() as u64); + + Profile::record_usize_profile(ProfileStatisticsName::SpillWriteCount, 1); + Profile::record_usize_profile( + ProfileStatisticsName::SpillWriteBytes, + bytes as usize, + ); + Profile::record_usize_profile( + ProfileStatisticsName::SpillWriteTime, + ins.elapsed().as_millis() as usize, + ); } spilled.push_back(location); @@ -408,6 +437,16 @@ impl SortedStream for BlockStream { metrics_inc_sort_spill_read_count(); metrics_inc_sort_spill_read_bytes(bytes); metrics_inc_sort_spill_read_milliseconds(ins.elapsed().as_millis() as u64); + + Profile::record_usize_profile(ProfileStatisticsName::SpillReadCount, 1); + Profile::record_usize_profile( + ProfileStatisticsName::SpillReadBytes, + bytes as usize, + ); + Profile::record_usize_profile( + ProfileStatisticsName::SpillReadTime, + ins.elapsed().as_millis() as usize, + ); } Some(block) diff --git a/src/query/service/src/schedulers/fragments/fragmenter.rs b/src/query/service/src/schedulers/fragments/fragmenter.rs index 20bebb05bc3b..b7e974b59d01 100644 --- a/src/query/service/src/schedulers/fragments/fragmenter.rs +++ b/src/query/service/src/schedulers/fragments/fragmenter.rs @@ -251,7 +251,9 @@ impl PhysicalPlanReplacer for Fragmenter { need_hold_hash_table: plan.need_hold_hash_table, stat_info: plan.stat_info.clone(), probe_keys_rt: plan.probe_keys_rt.clone(), + enable_bloom_runtime_filter: plan.enable_bloom_runtime_filter, broadcast: plan.broadcast, + single_to_inner: plan.single_to_inner.clone(), })) } diff --git a/src/query/service/src/schedulers/fragments/query_fragment_actions.rs b/src/query/service/src/schedulers/fragments/query_fragment_actions.rs index 0884578eef5b..69daa9f491ba 100644 --- a/src/query/service/src/schedulers/fragments/query_fragment_actions.rs +++ b/src/query/service/src/schedulers/fragments/query_fragment_actions.rs @@ -105,15 +105,13 @@ impl QueryFragmentActions { pub struct QueryFragmentsActions { ctx: Arc, - enable_profiling: bool, pub fragments_actions: Vec, } impl QueryFragmentsActions { - pub fn create(ctx: Arc, enable_profiling: bool) -> QueryFragmentsActions { + pub fn create(ctx: Arc) -> QueryFragmentsActions { QueryFragmentsActions { ctx, - enable_profiling, fragments_actions: Vec::new(), } } @@ -183,7 +181,6 @@ impl QueryFragmentsActions { nodes_info.clone(), settings.clone(), cluster.local_id(), - self.enable_profiling, ); for (executor, fragments) in fragments_packets.into_iter() { @@ -199,7 +196,6 @@ impl QueryFragmentsActions { executors_info, settings.clone(), cluster.local_id(), - self.enable_profiling, )); } diff --git a/src/query/service/src/schedulers/fragments/query_fragment_actions_display.rs b/src/query/service/src/schedulers/fragments/query_fragment_actions_display.rs index 647bd1b2ba0a..f715a8a70675 100644 --- a/src/query/service/src/schedulers/fragments/query_fragment_actions_display.rs +++ b/src/query/service/src/schedulers/fragments/query_fragment_actions_display.rs @@ -15,7 +15,6 @@ use std::fmt::Display; use std::fmt::Formatter; -use databend_common_profile::SharedProcessorProfiles; use databend_common_sql::MetadataRef; use crate::api::DataExchange; @@ -80,7 +79,7 @@ impl<'a> Display for QueryFragmentActionsWrap<'a> { let fragment_action = &self.inner.fragment_actions[0]; let plan_display_string = fragment_action .physical_plan - .format(self.metadata.clone(), SharedProcessorProfiles::default()) + .format(self.metadata.clone(), Default::default()) .and_then(|node| node.format_pretty_with_prefix(" ")) .unwrap(); write!(f, "{}", plan_display_string)?; diff --git a/src/query/service/src/schedulers/scheduler.rs b/src/query/service/src/schedulers/scheduler.rs index dd1830218e7c..2bd3a8f427c7 100644 --- a/src/query/service/src/schedulers/scheduler.rs +++ b/src/query/service/src/schedulers/scheduler.rs @@ -15,7 +15,6 @@ use std::sync::Arc; use databend_common_exception::Result; -use databend_common_profile::SharedProcessorProfiles; use crate::pipelines::PipelineBuildResult; use crate::pipelines::PipelineBuilder; @@ -36,9 +35,7 @@ pub async fn build_query_pipeline( plan: &PhysicalPlan, ignore_result: bool, ) -> Result { - let enable_profile = ctx.get_settings().get_enable_query_profiling()?; - let mut build_res = - build_query_pipeline_without_render_result_set(ctx, plan, enable_profile).await?; + let mut build_res = build_query_pipeline_without_render_result_set(ctx, plan).await?; let input_schema = plan.output_schema()?; PipelineBuilder::build_result_projection( @@ -55,12 +52,11 @@ pub async fn build_query_pipeline( pub async fn build_query_pipeline_without_render_result_set( ctx: &Arc, plan: &PhysicalPlan, - enable_profiling: bool, ) -> Result { let build_res = if !plan.is_distributed_plan() { - build_local_pipeline(ctx, plan, enable_profiling).await + build_local_pipeline(ctx, plan).await } else { - build_distributed_pipeline(ctx, plan, false).await + build_distributed_pipeline(ctx, plan).await }?; Ok(build_res) } @@ -70,14 +66,11 @@ pub async fn build_query_pipeline_without_render_result_set( pub async fn build_local_pipeline( ctx: &Arc, plan: &PhysicalPlan, - enable_profiling: bool, ) -> Result { let pipeline = PipelineBuilder::create( ctx.get_function_context()?, ctx.get_settings(), ctx.clone(), - enable_profiling, - SharedProcessorProfiles::default(), vec![], ); let mut build_res = pipeline.finalize(plan)?; @@ -92,18 +85,17 @@ pub async fn build_local_pipeline( pub async fn build_distributed_pipeline( ctx: &Arc, plan: &PhysicalPlan, - enable_profiling: bool, ) -> Result { let fragmenter = Fragmenter::try_create(ctx.clone())?; let root_fragment = fragmenter.build_fragment(plan)?; - let mut fragments_actions = QueryFragmentsActions::create(ctx.clone(), enable_profiling); + let mut fragments_actions = QueryFragmentsActions::create(ctx.clone()); root_fragment.get_actions(ctx.clone(), &mut fragments_actions)?; let exchange_manager = ctx.get_exchange_manager(); let mut build_res = exchange_manager - .commit_actions(ctx.clone(), enable_profiling, fragments_actions) + .commit_actions(ctx.clone(), fragments_actions) .await?; let settings = ctx.get_settings(); diff --git a/src/query/service/src/servers/flight_sql/flight_sql_service/query.rs b/src/query/service/src/servers/flight_sql/flight_sql_service/query.rs index 99a56cfbd17a..7d49ce80e2cd 100644 --- a/src/query/service/src/servers/flight_sql/flight_sql_service/query.rs +++ b/src/query/service/src/servers/flight_sql/flight_sql_service/query.rs @@ -26,6 +26,7 @@ use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::DataBlock; use databend_common_expression::DataSchema; +use databend_common_sql::get_query_kind; use databend_common_sql::plans::Plan; use databend_common_sql::PlanExtras; use databend_common_sql::Planner; @@ -95,7 +96,10 @@ impl FlightSqlServiceImpl { .await .map_err(|e| status!("Could not create_query_context", e))?; - context.attach_query_str(plan.kind(), plan_extras.statement.to_mask_sql()); + context.attach_query_str( + get_query_kind(&plan_extras.statement), + plan_extras.statement.to_mask_sql(), + ); let interpreter = InterpreterFactory::get(context.clone(), plan).await?; let mut blocks = interpreter.execute(context.clone()).await?; @@ -120,7 +124,10 @@ impl FlightSqlServiceImpl { .await .map_err(|e| status!("Could not create_query_context", e))?; - context.attach_query_str(plan.kind(), plan_extras.statement.to_mask_sql()); + context.attach_query_str( + get_query_kind(&plan_extras.statement), + plan_extras.statement.to_mask_sql(), + ); let interpreter = InterpreterFactory::get(context.clone(), plan).await?; let data_schema = plan.schema(); diff --git a/src/query/service/src/servers/http/clickhouse_handler.rs b/src/query/service/src/servers/http/clickhouse_handler.rs index 661a07a4354f..6d7b364fdabf 100644 --- a/src/query/service/src/servers/http/clickhouse_handler.rs +++ b/src/query/service/src/servers/http/clickhouse_handler.rs @@ -267,8 +267,6 @@ pub async fn clickhouse_handler_get( .map_err(|err| err.display_with_sql(&sql)) .map_err(BadRequest)?; let format = get_format_with_default(extras.format, default_format)?; - - context.attach_query_str(plan.kind(), extras.statement.to_mask_sql()); let interpreter = InterpreterFactory::get(context.clone(), &plan) .await .map_err(|err| err.display_with_sql(&sql)) @@ -346,7 +344,6 @@ pub async fn clickhouse_handler_post( .map_err(|err| err.display_with_sql(&sql)) .map_err(BadRequest)?; let schema = plan.schema(); - ctx.attach_query_str(plan.kind(), extras.statement.to_mask_sql()); let mut handle = None; if let Plan::Insert(insert) = &mut plan { if let InsertInputSource::StreamingWithFormat(format, start, input_context_ref) = diff --git a/src/query/service/src/servers/http/v1/load.rs b/src/query/service/src/servers/http/v1/load.rs index 5cd523103686..19ca53bd7143 100644 --- a/src/query/service/src/servers/http/v1/load.rs +++ b/src/query/service/src/servers/http/v1/load.rs @@ -128,12 +128,11 @@ pub async fn streaming_load( } let mut planner = Planner::new(context.clone()); - let (mut plan, extras) = planner + let (mut plan, _) = planner .plan_sql(insert_sql) .await .map_err(|err| err.display_with_sql(insert_sql)) .map_err(InternalServerError)?; - context.attach_query_str(plan.kind(), extras.statement.to_mask_sql()); let schema = plan.schema(); match &mut plan { diff --git a/src/query/service/src/servers/http/v1/query/execute_state.rs b/src/query/service/src/servers/http/v1/query/execute_state.rs index 7c6b2867d0e8..0f1c96b57e38 100644 --- a/src/query/service/src/servers/http/v1/query/execute_state.rs +++ b/src/query/service/src/servers/http/v1/query/execute_state.rs @@ -272,12 +272,10 @@ impl ExecuteState { pub(crate) async fn try_start_query( executor: Arc>, plan: Plan, - extras: PlanExtras, session: Arc, ctx: Arc, block_sender: SizedChannelSender, ) -> Result<()> { - ctx.attach_query_str(plan.kind(), extras.statement.to_mask_sql()); let interpreter = InterpreterFactory::get(ctx.clone(), &plan).await?; let running_state = ExecuteRunning { session, @@ -325,7 +323,7 @@ async fn execute( // duplicate codes, but there is an async call let data = BlockEntry::new( DataType::String, - databend_common_expression::Value::Scalar(Scalar::String(err.to_string().into_bytes())), + databend_common_expression::Value::Scalar(Scalar::String(err.to_string())), ); block_sender.send(DataBlock::new(vec![data], 1), 1).await; return Err(err); @@ -342,9 +340,7 @@ async fn execute( // duplicate codes, but there is an async call let data = BlockEntry::new( DataType::String, - databend_common_expression::Value::Scalar(Scalar::String( - err.to_string().into_bytes(), - )), + databend_common_expression::Value::Scalar(Scalar::String(err.to_string())), ); block_sender.send(DataBlock::new(vec![data], 1), 1).await; Executor::stop(&executor, Err(err), false).await; @@ -363,7 +359,7 @@ async fn execute( let data = BlockEntry::new( DataType::String, databend_common_expression::Value::Scalar(Scalar::String( - err.to_string().into_bytes(), + err.to_string(), )), ); block_sender.send(DataBlock::new(vec![data], 1), 1).await; diff --git a/src/query/service/src/servers/http/v1/query/http_query.rs b/src/query/service/src/servers/http/v1/query/http_query.rs index 48e064f631ff..6d33f62fe33c 100644 --- a/src/query/service/src/servers/http/v1/query/http_query.rs +++ b/src/query/service/src/servers/http/v1/query/http_query.rs @@ -341,7 +341,7 @@ impl HttpQuery { let sql = request.sql.clone(); let query_id_clone = query_id.clone(); - let (plan, plan_extras) = ExecuteState::plan_sql(&sql, ctx.clone()).await?; + let (plan, _) = ExecuteState::plan_sql(&sql, ctx.clone()).await?; let schema = plan.schema(); let span = if let Some(parent) = SpanContext::current_local_parent() { @@ -358,7 +358,6 @@ impl HttpQuery { if let Err(e) = ExecuteState::try_start_query( state, plan, - plan_extras, session, ctx_clone.clone(), block_sender, diff --git a/src/query/service/src/servers/mysql/mysql_federated.rs b/src/query/service/src/servers/mysql/mysql_federated.rs index 88723083246c..bc040afbb1f2 100644 --- a/src/query/service/src/servers/mysql/mysql_federated.rs +++ b/src/query/service/src/servers/mysql/mysql_federated.rs @@ -43,9 +43,8 @@ impl MySQLFederated { // |value| fn select_function_block(name: &str, value: &str) -> Option<(TableSchemaRef, DataBlock)> { let schema = TableSchemaRefExt::create(vec![TableField::new(name, TableDataType::String)]); - let block = DataBlock::new_from_columns(vec![StringType::from_data(vec![ - value.as_bytes().to_vec(), - ])]); + let block = + DataBlock::new_from_columns(vec![StringType::from_data(vec![value.to_string()])]); Some((schema, block)) } @@ -59,8 +58,8 @@ impl MySQLFederated { TableField::new("Value", TableDataType::String), ]); let block = DataBlock::new_from_columns(vec![ - StringType::from_data(vec![name.as_bytes().to_vec()]), - StringType::from_data(vec![value.as_bytes().to_vec()]), + StringType::from_data(vec![name.to_string()]), + StringType::from_data(vec![value.to_string()]), ]); Some((schema, block)) } @@ -103,7 +102,7 @@ impl MySQLFederated { // var is 'cc'. let var = vars_as[0]; let value = default_map.get(var).unwrap_or(&"0").to_string(); - values.push(StringType::from_data(vec![value.as_bytes().to_vec()])); + values.push(StringType::from_data(vec![value])); } else { // @@aa // var is 'aa' @@ -113,7 +112,7 @@ impl MySQLFederated { )); let value = default_map.get(var).unwrap_or(&"0").to_string(); - values.push(StringType::from_data(vec![value.as_bytes().to_vec()])); + values.push(StringType::from_data(vec![value])); } } } diff --git a/src/query/service/src/servers/mysql/mysql_interactive_worker.rs b/src/query/service/src/servers/mysql/mysql_interactive_worker.rs index dbc5a2fad137..79b0147a2c12 100644 --- a/src/query/service/src/servers/mysql/mysql_interactive_worker.rs +++ b/src/query/service/src/servers/mysql/mysql_interactive_worker.rs @@ -354,9 +354,7 @@ impl InteractiveWorkerBase { let context = self.session.create_query_context().await?; let mut planner = Planner::new(context.clone()); - let (plan, extras) = planner.plan_sql(query).await?; - - context.attach_query_str(plan.kind(), extras.statement.to_mask_sql()); + let (plan, _) = planner.plan_sql(query).await?; let interpreter = InterpreterFactory::get(context.clone(), &plan).await; let has_result_set = plan.has_result_set(); diff --git a/src/query/service/src/sessions/query_ctx.rs b/src/query/service/src/sessions/query_ctx.rs index 49a1981fec0b..722bba610a6b 100644 --- a/src/query/service/src/sessions/query_ctx.rs +++ b/src/query/service/src/sessions/query_ctx.rs @@ -71,6 +71,7 @@ use databend_common_meta_app::schema::TableInfo; use databend_common_metrics::storage::*; use databend_common_pipeline_core::processors::profile::PlanProfile; use databend_common_pipeline_core::processors::profile::Profile; +use databend_common_pipeline_core::processors::ProfileStatisticsName; use databend_common_pipeline_core::InputError; use databend_common_settings::Settings; use databend_common_sql::IndexType; @@ -406,15 +407,20 @@ impl TableContext for QueryContext { } fn get_partition(&self) -> Option { - self.partition_queue.write().pop_front() + if let Some(part) = self.partition_queue.write().pop_front() { + Profile::record_usize_profile(ProfileStatisticsName::ScanPartitions, 1); + return Some(part); + } + + None } fn get_partitions(&self, num: usize) -> Vec { let mut res = Vec::with_capacity(num); - let mut partition_queue = self.partition_queue.write(); + let mut queue_guard = self.partition_queue.write(); for _index in 0..num { - match partition_queue.pop_front() { + match queue_guard.pop_front() { None => { break; } @@ -424,6 +430,8 @@ impl TableContext for QueryContext { }; } + Profile::record_usize_profile(ProfileStatisticsName::ScanPartitions, res.len()); + res } diff --git a/src/query/service/src/table_functions/async_crash_me.rs b/src/query/service/src/table_functions/async_crash_me.rs index b2408e5760be..a3345542d89d 100644 --- a/src/query/service/src/table_functions/async_crash_me.rs +++ b/src/query/service/src/table_functions/async_crash_me.rs @@ -61,10 +61,10 @@ impl AsyncCrashMeTable { let args = table_args.expect_all_positioned(table_func_name, None)?; if args.len() == 1 { let arg = args[0].clone(); - panic_message = - Some(String::from_utf8(arg.into_string().map_err(|_| { - ErrorCode::BadArguments("Expected string argument") - })?)?); + panic_message = Some( + arg.into_string() + .map_err(|_| ErrorCode::BadArguments("Expected string argument"))?, + ); } let table_info = TableInfo { @@ -119,7 +119,7 @@ impl Table for AsyncCrashMeTable { fn table_args(&self) -> Option { let args = match &self.panic_message { - Some(s) => vec![Scalar::String(s.as_bytes().to_vec())], + Some(s) => vec![Scalar::String(s.clone())], None => vec![], }; Some(TableArgs::new_positioned(args)) diff --git a/src/query/service/src/table_functions/cloud/task_dependents.rs b/src/query/service/src/table_functions/cloud/task_dependents.rs index 11344caf1b24..8273f8cbc257 100644 --- a/src/query/service/src/table_functions/cloud/task_dependents.rs +++ b/src/query/service/src/table_functions/cloud/task_dependents.rs @@ -194,38 +194,30 @@ impl TaskDependentsSource { } fn to_block(&self, tasks: &Vec) -> databend_common_exception::Result { let mut created_on: Vec = Vec::with_capacity(tasks.len()); - let mut name: Vec> = Vec::with_capacity(tasks.len()); - let mut owner: Vec> = Vec::with_capacity(tasks.len()); - let mut comment: Vec>> = Vec::with_capacity(tasks.len()); - let mut warehouse: Vec>> = Vec::with_capacity(tasks.len()); - let mut schedule: Vec>> = Vec::with_capacity(tasks.len()); - let mut predecessors: Vec>> = Vec::with_capacity(tasks.len()); + let mut name: Vec = Vec::with_capacity(tasks.len()); + let mut owner: Vec = Vec::with_capacity(tasks.len()); + let mut comment: Vec> = Vec::with_capacity(tasks.len()); + let mut warehouse: Vec> = Vec::with_capacity(tasks.len()); + let mut schedule: Vec> = Vec::with_capacity(tasks.len()); + let mut predecessors: Vec> = Vec::with_capacity(tasks.len()); - let mut state: Vec> = Vec::with_capacity(tasks.len()); - let mut definition: Vec> = Vec::with_capacity(tasks.len()); - let mut condition_text: Vec> = Vec::with_capacity(tasks.len()); + let mut state: Vec = Vec::with_capacity(tasks.len()); + let mut definition: Vec = Vec::with_capacity(tasks.len()); + let mut condition_text: Vec = Vec::with_capacity(tasks.len()); for task in tasks { let task = task.clone(); let tsk: task_utils::Task = task.try_into()?; created_on.push(tsk.created_at.timestamp_micros()); - name.push(tsk.task_name.into_bytes()); - owner.push(tsk.owner.into_bytes()); - comment.push(tsk.comment.map(|s| s.into_bytes())); - warehouse.push( - tsk.warehouse_options - .and_then(|s| s.warehouse.map(|v| v.into_bytes())), - ); - schedule.push(tsk.schedule_options.map(|s| s.into_bytes())); - predecessors.push( - tsk.after - .into_iter() - .map(|s| s.into_bytes()) - .collect::>(), - ); - state.push(tsk.status.to_string().into_bytes()); - definition.push(tsk.query_text.into_bytes()); - condition_text.push(tsk.condition_text.into_bytes()); + name.push(tsk.task_name.clone()); + owner.push(tsk.owner.clone()); + comment.push(tsk.comment.clone()); + warehouse.push(tsk.warehouse_options.and_then(|s| s.warehouse.clone())); + schedule.push(tsk.schedule_options.clone()); + predecessors.push(tsk.after.clone()); + state.push(tsk.status.to_string()); + definition.push(tsk.query_text.clone()); + condition_text.push(tsk.condition_text.clone()); } Ok(DataBlock::new_from_columns(vec![ @@ -302,17 +294,12 @@ impl TaskDependentsParsed { pub fn parse(table_args: &TableArgs) -> databend_common_exception::Result { let args = table_args.expect_all_named("task_dependents")?; - let mut task_name = String::from(""); - let mut recursive = false; + let mut task_name = None; + let mut recursive = None; for (k, v) in &args { match k.to_lowercase().as_str() { - "task_name" => { - task_name = - String::from_utf8_lossy(v.as_string().unwrap().as_slice()).to_string(); - } - "recursive" => { - recursive = *v.as_boolean().unwrap(); - } + "task_name" => task_name = v.as_string().cloned(), + "recursive" => recursive = v.as_boolean().cloned(), _ => { return Err(ErrorCode::BadArguments(format!( "unknown param {} for {}", @@ -321,7 +308,8 @@ impl TaskDependentsParsed { } } } - if task_name.is_empty() { + + if task_name.is_none() { return Err(ErrorCode::BadArguments(format!( "task_name must be specified for {}", "task_dependents" @@ -329,8 +317,8 @@ impl TaskDependentsParsed { } Ok(Self { - task_name, - recursive, + task_name: task_name.unwrap(), + recursive: recursive.unwrap_or_default(), }) } } diff --git a/src/query/service/src/table_functions/cloud/task_dependents_enable.rs b/src/query/service/src/table_functions/cloud/task_dependents_enable.rs index 81b267944591..875287d8036a 100644 --- a/src/query/service/src/table_functions/cloud/task_dependents_enable.rs +++ b/src/query/service/src/table_functions/cloud/task_dependents_enable.rs @@ -58,8 +58,7 @@ impl TaskDependentsEnableTable { table_args: TableArgs, ) -> Result> { let args = table_args.expect_all_positioned(table_func_name, Some(1))?; - let task_name = - String::from_utf8_lossy(args[0].as_string().unwrap().as_slice()).to_string(); + let task_name = args[0].as_string().unwrap(); let table_info = TableInfo { ident: TableIdent::new(table_id, 0), @@ -81,7 +80,7 @@ impl TaskDependentsEnableTable { Ok(Arc::new(TaskDependentsEnableTable { table_info, - task_name, + task_name: task_name.to_string(), })) } } @@ -113,7 +112,7 @@ impl Table for TaskDependentsEnableTable { fn table_args(&self) -> Option { Some(TableArgs::new_positioned(vec![Scalar::String( - self.task_name.as_bytes().to_vec(), + self.task_name.clone(), )])) } diff --git a/src/query/service/src/table_functions/infer_schema/infer_schema_table.rs b/src/query/service/src/table_functions/infer_schema/infer_schema_table.rs index 3e3dbaf66735..d668290062c9 100644 --- a/src/query/service/src/table_functions/infer_schema/infer_schema_table.rs +++ b/src/query/service/src/table_functions/infer_schema/infer_schema_table.rs @@ -262,15 +262,15 @@ impl AsyncSource for InferSchemaSource { } }; - let mut names: Vec> = vec![]; - let mut types: Vec> = vec![]; + let mut names: Vec = vec![]; + let mut types: Vec = vec![]; let mut nulls: Vec = vec![]; for field in schema.fields().iter() { - names.push(field.name().to_string().as_bytes().to_vec()); + names.push(field.name().to_string()); let non_null_type = field.data_type().remove_recursive_nullable(); - types.push(non_null_type.sql_name().as_bytes().to_vec()); + types.push(non_null_type.sql_name()); nulls.push(field.is_nullable()); } diff --git a/src/query/service/src/table_functions/inspect_parquet/inspect_parquet_table.rs b/src/query/service/src/table_functions/inspect_parquet/inspect_parquet_table.rs index e105f801505a..cbf20d85cd02 100644 --- a/src/query/service/src/table_functions/inspect_parquet/inspect_parquet_table.rs +++ b/src/query/service/src/table_functions/inspect_parquet/inspect_parquet_table.rs @@ -69,12 +69,10 @@ impl InspectParquetTable { table_args: TableArgs, ) -> Result> { let args = table_args.expect_all_positioned(table_func_name, Some(1))?; - let file_path = String::from_utf8( - args[0] - .clone() - .into_string() - .map_err(|_| ErrorCode::BadArguments("Expected string argument."))?, - )?; + let file_path = args[0] + .clone() + .into_string() + .map_err(|_| ErrorCode::BadArguments("Expected string argument."))?; if !file_path.starts_with('@') { return Err(ErrorCode::BadArguments(format!( "stage path must start with @, but got {}", @@ -265,7 +263,7 @@ impl AsyncSource for InspectParquetSource { vec![ BlockEntry::new( DataType::String, - Value::Scalar(StringType::upcast_scalar(created.into())), + Value::Scalar(StringType::upcast_scalar(created)), ), BlockEntry::new( DataType::Number(NumberDataType::UInt64), diff --git a/src/query/service/src/table_functions/list_stage/list_stage_table.rs b/src/query/service/src/table_functions/list_stage/list_stage_table.rs index 69055fe7b043..045c71dbf94a 100644 --- a/src/query/service/src/table_functions/list_stage/list_stage_table.rs +++ b/src/query/service/src/table_functions/list_stage/list_stage_table.rs @@ -211,28 +211,24 @@ impl AsyncSource for ListStagesSource { let files = files_info.list(&op, false, None).await?; - let names: Vec> = files - .iter() - .map(|file| file.path.to_string().into_bytes()) - .collect(); + let names: Vec = files.iter().map(|file| file.path.to_string()).collect(); let sizes: Vec = files.iter().map(|file| file.size).collect(); - let etags: Vec>> = files + let etags: Vec> = files .iter() - .map(|file| file.etag.as_ref().map(|f| f.to_string().into_bytes())) + .map(|file| file.etag.as_ref().map(|f| f.to_string())) .collect(); - let last_modifieds: Vec> = files + let last_modifieds: Vec = files .iter() .map(|file| { file.last_modified .format("%Y-%m-%d %H:%M:%S.%3f %z") .to_string() - .into_bytes() }) .collect(); - let creators: Vec>> = files + let creators: Vec> = files .iter() - .map(|file| file.creator.as_ref().map(|c| c.to_string().into_bytes())) + .map(|file| file.creator.as_ref().map(|c| c.to_string())) .collect(); let block = DataBlock::new_from_columns(vec![ diff --git a/src/query/service/src/table_functions/openai/ai_to_sql.rs b/src/query/service/src/table_functions/openai/ai_to_sql.rs index 078f4e46415d..3bb83a5e33e8 100644 --- a/src/query/service/src/table_functions/openai/ai_to_sql.rs +++ b/src/query/service/src/table_functions/openai/ai_to_sql.rs @@ -64,12 +64,10 @@ impl GPT2SQLTable { ) -> Result> { // Check args. let args = table_args.expect_all_positioned(table_func_name, Some(1))?; - let prompt = String::from_utf8( - args[0] - .clone() - .into_string() - .map_err(|_| ErrorCode::BadArguments("Expected string argument."))?, - )?; + let prompt = args[0] + .clone() + .into_string() + .map_err(|_| ErrorCode::BadArguments("Expected string argument."))?; let schema = TableSchema::new(vec![ TableField::new("database", TableDataType::String), @@ -248,8 +246,8 @@ impl AsyncSource for GPT2SQLSource { let sql = format!("SELECT {}", sql); info!("openai response sql: {}", sql); let database = self.ctx.get_current_database(); - let database: Vec> = vec![database.into_bytes()]; - let sql: Vec> = vec![sql.into_bytes()]; + let database: Vec = vec![database]; + let sql: Vec = vec![sql]; // Mark done. self.finished = true; diff --git a/src/query/service/src/table_functions/others/execute_background_job.rs b/src/query/service/src/table_functions/others/execute_background_job.rs index 22e8705a6942..bc903c16fdb7 100644 --- a/src/query/service/src/table_functions/others/execute_background_job.rs +++ b/src/query/service/src/table_functions/others/execute_background_job.rs @@ -53,7 +53,7 @@ impl ExecuteBackgroundJobTable { table_args: TableArgs, ) -> Result> { let args = table_args.expect_all_positioned(table_func_name, Some(1))?; - let job_name = String::from_utf8_lossy(args[0].as_string().unwrap().as_slice()).to_string(); + let job_name = args[0].as_string().unwrap().clone(); let table_info = TableInfo { ident: TableIdent::new(table_id, 0), @@ -107,7 +107,7 @@ impl Table for ExecuteBackgroundJobTable { fn table_args(&self) -> Option { Some(TableArgs::new_positioned(vec![Scalar::String( - self.job_name.as_bytes().to_vec(), + self.job_name.clone(), )])) } diff --git a/src/query/service/src/table_functions/others/license_info.rs b/src/query/service/src/table_functions/others/license_info.rs index df62b0207e40..228476c67ecc 100644 --- a/src/query/service/src/table_functions/others/license_info.rs +++ b/src/query/service/src/table_functions/others/license_info.rs @@ -161,33 +161,19 @@ impl LicenseInfoSource { BlockEntry::new( DataType::String, Value::Scalar(Scalar::String( - info.issuer - .clone() - .unwrap_or("".to_string()) - .into_bytes() - .to_vec(), + info.issuer.clone().unwrap_or("".to_string()), )), ), BlockEntry::new( DataType::String, Value::Scalar(Scalar::String( - info.custom - .r#type - .clone() - .unwrap_or("".to_string()) - .into_bytes() - .to_vec(), + info.custom.r#type.clone().unwrap_or("".to_string()), )), ), BlockEntry::new( DataType::String, Value::Scalar(Scalar::String( - info.custom - .org - .clone() - .unwrap_or("".to_string()) - .into_bytes() - .to_vec(), + info.custom.org.clone().unwrap_or("".to_string()), )), ), BlockEntry::new( @@ -204,9 +190,7 @@ impl LicenseInfoSource { ), BlockEntry::new( DataType::String, - Value::Scalar(Scalar::String( - human_readable_available_time.into_bytes().to_vec(), - )), + Value::Scalar(Scalar::String(human_readable_available_time)), ), ], 1, diff --git a/src/query/service/src/table_functions/others/suggested_background_tasks.rs b/src/query/service/src/table_functions/others/suggested_background_tasks.rs index 6032138024b0..2f96e5f96902 100644 --- a/src/query/service/src/table_functions/others/suggested_background_tasks.rs +++ b/src/query/service/src/table_functions/others/suggested_background_tasks.rs @@ -164,8 +164,7 @@ impl SuggestedBackgroundTasksSource { sql: String, ) -> Result> { let mut planner = Planner::new(ctx.clone()); - let (plan, plan_extras) = planner.plan_sql(sql.as_str()).await?; - ctx.attach_query_str(plan.kind(), plan_extras.statement.to_mask_sql()); + let (plan, _) = planner.plan_sql(sql.as_str()).await?; let data_schema = plan.schema(); let interpreter = InterpreterFactory::get(ctx.clone(), &plan).await?; let stream = interpreter.execute(ctx.clone()).await?; @@ -220,11 +219,11 @@ impl SuggestedBackgroundTasksSource { table_stats, .. } => { - suggestion_type.push("compaction".to_string().into_bytes().to_vec()); + suggestion_type.push("compaction".to_string()); should_do_segment_compact.push(Some(need_compact_segment)); should_do_compact.push(Some(need_compact_block)); - database_name.push(db_name.into_bytes().to_vec()); - table_names.push(table_name.into_bytes().to_vec()); + database_name.push(db_name); + table_names.push(table_name); table_statistics.push(serde_json::to_vec(&table_stats).unwrap()); } } diff --git a/src/query/service/src/table_functions/others/tenant_quota.rs b/src/query/service/src/table_functions/others/tenant_quota.rs index a1cd50241efd..2cb900527464 100644 --- a/src/query/service/src/table_functions/others/tenant_quota.rs +++ b/src/query/service/src/table_functions/others/tenant_quota.rs @@ -137,7 +137,7 @@ impl Table for TenantQuotaTable { let args = self .args .iter() - .map(|s| Scalar::String(s.as_bytes().to_vec())) + .map(|s| Scalar::String(s.clone())) .collect(); Some(TableArgs::new_positioned(args)) } diff --git a/src/query/service/src/table_functions/sync_crash_me.rs b/src/query/service/src/table_functions/sync_crash_me.rs index 131fa7999839..e968a962d1a4 100644 --- a/src/query/service/src/table_functions/sync_crash_me.rs +++ b/src/query/service/src/table_functions/sync_crash_me.rs @@ -61,10 +61,10 @@ impl SyncCrashMeTable { let args = table_args.expect_all_positioned(table_func_name, None)?; if args.len() == 1 { let arg = args[0].clone(); - panic_message = - Some(String::from_utf8(arg.into_string().map_err(|_| { - ErrorCode::BadArguments("Expected string argument.") - })?)?); + panic_message = Some( + arg.into_string() + .map_err(|_| ErrorCode::BadArguments("Expected string argument."))?, + ); } let table_info = TableInfo { @@ -119,7 +119,7 @@ impl Table for SyncCrashMeTable { fn table_args(&self) -> Option { let args = match &self.panic_message { - Some(s) => vec![Scalar::String(s.as_bytes().to_vec())], + Some(s) => vec![Scalar::String(s.clone())], None => vec![], }; Some(TableArgs::new_positioned(args)) diff --git a/src/query/service/src/test_kits/config.rs b/src/query/service/src/test_kits/config.rs index e90f1cf8a1a4..fa64f2fcf2d9 100644 --- a/src/query/service/src/test_kits/config.rs +++ b/src/query/service/src/test_kits/config.rs @@ -43,6 +43,7 @@ impl ConfigBuilder { let tmp_dir = TempDir::new().expect("create tmp dir failed"); let root = tmp_dir.path().to_str().unwrap().to_string(); conf.storage.params = StorageParams::Fs(StorageFsConfig { root }); + conf.storage.allow_insecure = true; ConfigBuilder { conf } } diff --git a/src/query/service/src/test_kits/fixture.rs b/src/query/service/src/test_kits/fixture.rs index 7143e4505dd9..0326c300c091 100644 --- a/src/query/service/src/test_kits/fixture.rs +++ b/src/query/service/src/test_kits/fixture.rs @@ -638,7 +638,7 @@ impl TestFixture { let mut d_values = Vec::with_capacity(rows_per_block); for i in 0..rows_per_block { id_values.push(i as i32 + start * 3); - c_values.push(format!("s-{}-{}", start, i).as_bytes().to_vec()); + c_values.push(format!("s-{}-{}", start, i)); d_values.push(i as i64 + (start * 10) as i64); } let column0 = Int32Type::from_data(id_values); diff --git a/src/query/service/tests/it/api/http/status.rs b/src/query/service/tests/it/api/http/status.rs index b5b5db2d3ff2..ac9b8ed229bc 100644 --- a/src/query/service/tests/it/api/http/status.rs +++ b/src/query/service/tests/it/api/http/status.rs @@ -25,7 +25,6 @@ use databend_query::interpreters::InterpreterFactory; use databend_query::sessions::QueryContext; use databend_query::sessions::SessionManager; use databend_query::sessions::SessionType; -use databend_query::sessions::TableContext; use databend_query::sql::Planner; use databend_query::test_kits::*; use poem::get; @@ -65,8 +64,7 @@ async fn run_query(query_ctx: &Arc) -> Result .set_authed_user(user, None) .await?; let mut planner = Planner::new(query_ctx.clone()); - let (plan, extras) = planner.plan_sql(sql).await?; - query_ctx.attach_query_str(plan.kind(), extras.statement.to_mask_sql()); + let (plan, _) = planner.plan_sql(sql).await?; InterpreterFactory::get(query_ctx.clone(), &plan).await } diff --git a/src/query/service/tests/it/pipelines/executor/executor_graph.rs b/src/query/service/tests/it/pipelines/executor/executor_graph.rs index a247b7b4bad9..e7e436cf6e2d 100644 --- a/src/query/service/tests/it/pipelines/executor/executor_graph.rs +++ b/src/query/service/tests/it/pipelines/executor/executor_graph.rs @@ -492,7 +492,6 @@ async fn create_executor_with_simple_pipeline( pipeline.add_pipe(sink_pipe); pipeline.set_max_threads(size); let settings = ExecutorSettings { - enable_profiling: false, query_id: Arc::new("".to_string()), max_execute_time_in_seconds: Default::default(), }; diff --git a/src/query/service/tests/it/pipelines/executor/pipeline_executor.rs b/src/query/service/tests/it/pipelines/executor/pipeline_executor.rs index d68fa1a16750..907c37840c03 100644 --- a/src/query/service/tests/it/pipelines/executor/pipeline_executor.rs +++ b/src/query/service/tests/it/pipelines/executor/pipeline_executor.rs @@ -41,7 +41,6 @@ async fn test_always_call_on_finished() -> Result<()> { let fixture = TestFixture::setup().await?; let settings = ExecutorSettings { - enable_profiling: false, query_id: Arc::new("".to_string()), max_execute_time_in_seconds: Default::default(), }; diff --git a/src/query/service/tests/it/sql/exec/get_table_bind_test.rs b/src/query/service/tests/it/sql/exec/get_table_bind_test.rs index d5abe55eb88e..2b63f04dd248 100644 --- a/src/query/service/tests/it/sql/exec/get_table_bind_test.rs +++ b/src/query/service/tests/it/sql/exec/get_table_bind_test.rs @@ -492,9 +492,7 @@ impl TableContext for CtxDelegation { todo!() } - fn attach_query_str(&self, _kind: QueryKind, _query: String) { - todo!() - } + fn attach_query_str(&self, _kind: QueryKind, _query: String) {} fn get_query_str(&self) -> String { todo!() @@ -812,6 +810,7 @@ async fn test_get_same_table_once() -> Result<()> { let mut planner = Planner::new(ctx.clone()); let (_, _) = planner.plan_sql(query.as_str()).await?; + assert_eq!( ctx.table_without_cache .load(std::sync::atomic::Ordering::SeqCst), diff --git a/src/query/service/tests/it/sql/planner/builders/binder.rs b/src/query/service/tests/it/sql/planner/builders/binder.rs new file mode 100644 index 000000000000..a3829436c27a --- /dev/null +++ b/src/query/service/tests/it/sql/planner/builders/binder.rs @@ -0,0 +1,28 @@ +use databend_common_catalog::query_kind::QueryKind; +use databend_common_exception::Result; +use databend_common_sql::Planner; +use databend_common_storages_fuse::TableContext; +use databend_query::sessions::SessionType; +use databend_query::test_kits::TestFixture; + +#[tokio::test(flavor = "multi_thread")] +async fn test_query_kind() -> Result<()> { + let fixture = TestFixture::setup().await?; + fixture.create_default_database().await?; + fixture.create_default_table().await?; + + let http_session = fixture + .new_session_with_type(SessionType::HTTPQuery) + .await?; + let ctx = http_session.create_query_context().await?; + let mut planner = Planner::new(ctx.clone()); + let sql = format!( + "COPY INTO {}.{} from @~/ pattern='.*' FILE_FORMAT = (TYPE = 'csv') PURGE=true FORCE=true max_files=10000;", + fixture.default_db_name(), + fixture.default_table_name() + ); + let (_, _) = planner.plan_sql(&sql).await?; + let kind = ctx.get_query_kind(); + assert_eq!(kind, QueryKind::CopyIntoTable); + Ok(()) +} diff --git a/src/query/service/tests/it/sql/planner/builders/mod.rs b/src/query/service/tests/it/sql/planner/builders/mod.rs index 2a94d6a6251a..f7a62d3c019d 100644 --- a/src/query/service/tests/it/sql/planner/builders/mod.rs +++ b/src/query/service/tests/it/sql/planner/builders/mod.rs @@ -12,4 +12,5 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod binder; mod select_builder; diff --git a/src/query/service/tests/it/sql/planner/format/mod.rs b/src/query/service/tests/it/sql/planner/format/mod.rs index 317fff56e603..bd3fade8aca2 100644 --- a/src/query/service/tests/it/sql/planner/format/mod.rs +++ b/src/query/service/tests/it/sql/planner/format/mod.rs @@ -153,7 +153,8 @@ fn test_format() { marker_index: None, from_correlated_subquery: false, need_hold_hash_table: false, - broadcast: false, + is_lateral: false, + original_join_type: None, } .into(), ), diff --git a/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs b/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs index 4a6467113885..de5e306c01a9 100644 --- a/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs +++ b/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs @@ -294,8 +294,8 @@ fn build_test_segment_info( }); let col_stat = ColumnStatistics::new( - Scalar::String(String::from_utf8(vec![b'a'; STATS_STRING_PREFIX_LEN])?.into_bytes()), - Scalar::String(String::from_utf8(vec![b'a'; STATS_STRING_PREFIX_LEN])?.into_bytes()), + Scalar::String(String::from_utf8(vec![b'a'; STATS_STRING_PREFIX_LEN])?), + Scalar::String(String::from_utf8(vec![b'a'; STATS_STRING_PREFIX_LEN])?), 0, 0, None, diff --git a/src/query/service/tests/it/storages/fuse/operations/commit.rs b/src/query/service/tests/it/storages/fuse/operations/commit.rs index b5db2eaa3563..f7a13c7f4526 100644 --- a/src/query/service/tests/it/storages/fuse/operations/commit.rs +++ b/src/query/service/tests/it/storages/fuse/operations/commit.rs @@ -468,9 +468,7 @@ impl TableContext for CtxDelegation { todo!() } - fn attach_query_str(&self, _kind: QueryKind, _query: String) { - todo!() - } + fn attach_query_str(&self, _kind: QueryKind, _query: String) {} fn get_query_str(&self) -> String { todo!() diff --git a/src/query/service/tests/it/storages/fuse/operations/mutation/block_compact_mutator.rs b/src/query/service/tests/it/storages/fuse/operations/mutation/block_compact_mutator.rs index a5d108589b67..591d2c879de8 100644 --- a/src/query/service/tests/it/storages/fuse/operations/mutation/block_compact_mutator.rs +++ b/src/query/service/tests/it/storages/fuse/operations/mutation/block_compact_mutator.rs @@ -122,7 +122,7 @@ async fn do_compact(ctx: Arc, table: Arc) -> Result databend_common_exception::Result<()> { assert_eq!(col_stats.max(), Scalar::Number(NumberScalar::Int32(3))); assert_eq!(col_stats.distinct_of_values, Some(3)); let col_stats = r.get(&1).unwrap(); - assert_eq!(col_stats.min(), Scalar::String(b"aa".to_vec())); - assert_eq!(col_stats.max(), Scalar::String(b"bb".to_vec())); + assert_eq!(col_stats.min(), Scalar::String("aa".to_string())); + assert_eq!(col_stats.max(), Scalar::String("bb".to_string())); assert_eq!(col_stats.distinct_of_values, Some(2)); Ok(()) } @@ -102,8 +102,8 @@ fn test_ft_stats_block_stats_with_column_distinct_count() -> databend_common_exc assert_eq!(col_stats.max(), Scalar::Number(NumberScalar::Int32(3))); assert_eq!(col_stats.distinct_of_values, Some(3)); let col_stats = r.get(&1).unwrap(); - assert_eq!(col_stats.min(), Scalar::String(b"aa".to_vec())); - assert_eq!(col_stats.max(), Scalar::String(b"bb".to_vec())); + assert_eq!(col_stats.min(), Scalar::String("aa".to_string())); + assert_eq!(col_stats.max(), Scalar::String("bb".to_string())); assert_eq!(col_stats.distinct_of_values, Some(2)); Ok(()) } @@ -447,8 +447,8 @@ fn test_ft_stats_block_stats_string_columns_trimming() -> databend_common_except let min_expr = rand_strings.iter().min().unwrap(); let max_expr = rand_strings.iter().max().unwrap(); - let data_value_min = Scalar::String(min_expr.clone().into_bytes()); - let data_value_max = Scalar::String(max_expr.clone().into_bytes()); + let data_value_min = Scalar::String(min_expr.clone()); + let data_value_max = Scalar::String(max_expr.clone()); let trimmed_min = data_value_min.clone().trim_min(STATS_STRING_PREFIX_LEN); let trimmed_max = data_value_max.clone().trim_max(STATS_STRING_PREFIX_LEN); @@ -534,7 +534,7 @@ fn test_ft_stats_block_stats_string_columns_trimming_using_eval() // - the length of string value is larger or equal than STRING_PREFIX_LEN // - AND the string has a prefix of length STRING_PREFIX_LEN, for all the char C in prefix, // C > REPLACEMENT_CHAR; which means we can not replace any of them. - let string_max_expr = String::from_utf8(max_expr.as_string().unwrap().to_vec()).unwrap(); + let string_max_expr = max_expr.as_string().unwrap().to_string(); let meaningless_to_collect_max = is_degenerated_case(string_max_expr.as_str()); if meaningless_to_collect_max { @@ -584,12 +584,8 @@ fn is_degenerated_case(value: &str) -> bool { larger_than_prefix_len && prefixed_with_irreplaceable_chars } -fn char_len(value: &[u8]) -> usize { - String::from_utf8(value.to_vec()) - .unwrap() - .as_str() - .chars() - .count() +fn char_len(value: &str) -> usize { + value.chars().count() } #[test] diff --git a/src/query/service/tests/it/storages/fuse/table_functions/clustering_information_table.rs b/src/query/service/tests/it/storages/fuse/table_functions/clustering_information_table.rs index bd14c1c56e14..f5691e5ca303 100644 --- a/src/query/service/tests/it/storages/fuse/table_functions/clustering_information_table.rs +++ b/src/query/service/tests/it/storages/fuse/table_functions/clustering_information_table.rs @@ -42,8 +42,8 @@ async fn test_clustering_information_table_read() -> Result<()> { fixture.create_default_table().await?; // func args - let arg_db = Scalar::String(db.as_bytes().to_vec()); - let arg_tbl = Scalar::String(tbl.as_bytes().to_vec()); + let arg_db = Scalar::String(db.clone()); + let arg_tbl = Scalar::String(tbl.clone()); { let expected = vec![ diff --git a/src/query/service/tests/it/storages/testdata/columns_table.txt b/src/query/service/tests/it/storages/testdata/columns_table.txt index 846aa4c00165..1b1056cd780b 100644 --- a/src/query/service/tests/it/storages/testdata/columns_table.txt +++ b/src/query/service/tests/it/storages/testdata/columns_table.txt @@ -66,7 +66,6 @@ DB.Table: 'system'.'columns', Table: columns-table_id:1, ver:0, Engine: SystemCo | 'constraint_name' | 'information_schema' | 'key_column_usage' | 'NULL' | 'NULL' | '' | '' | 'NO' | '' | | 'constraint_schema' | 'information_schema' | 'key_column_usage' | 'NULL' | 'NULL' | '' | '' | 'NO' | '' | | 'copy_options' | 'system' | 'stages' | 'String' | 'VARCHAR' | '' | '' | 'NO' | '' | -| 'cpu_time' | 'system' | 'processor_profile' | 'UInt64' | 'BIGINT UNSIGNED' | '' | '' | 'NO' | '' | | 'cpu_usage' | 'system' | 'query_log' | 'UInt32' | 'INT UNSIGNED' | '' | '' | 'NO' | '' | | 'create_time' | 'information_schema' | 'tables' | 'Timestamp' | 'TIMESTAMP' | '' | '' | 'NO' | '' | | 'created_on' | 'system' | 'background_jobs' | 'Timestamp' | 'TIMESTAMP' | '' | '' | 'NO' | '' | @@ -142,9 +141,6 @@ DB.Table: 'system'.'columns', Table: columns-table_id:1, ver:0, Engine: SystemCo | 'exception_code' | 'system' | 'task_history' | 'Int64' | 'BIGINT' | '' | '' | 'NO' | '' | | 'exception_text' | 'system' | 'query_log' | 'String' | 'VARCHAR' | '' | '' | 'NO' | '' | | 'exception_text' | 'system' | 'task_history' | 'Nullable(String)' | 'VARCHAR' | '' | '' | 'YES' | '' | -| 'exchange_bytes' | 'system' | 'processor_profile' | 'UInt64' | 'BIGINT UNSIGNED' | '' | '' | 'NO' | '' | -| 'exchange_rows' | 'system' | 'processor_profile' | 'UInt64' | 'BIGINT UNSIGNED' | '' | '' | 'NO' | '' | -| 'execution_info' | 'system' | 'query_profile' | 'Variant' | 'VARIANT' | '' | '' | 'NO' | '' | | 'extra' | 'information_schema' | 'columns' | 'NULL' | 'NULL' | '' | '' | 'NO' | '' | | 'extra' | 'system' | 'query_log' | 'String' | 'VARCHAR' | '' | '' | 'NO' | '' | | 'extra_info' | 'system' | 'locks' | 'String' | 'VARCHAR' | '' | '' | 'NO' | '' | @@ -257,11 +253,6 @@ DB.Table: 'system'.'columns', Table: columns-table_id:1, ver:0, Engine: SystemCo | 'numeric_precision' | 'information_schema' | 'columns' | 'NULL' | 'NULL' | '' | '' | 'NO' | '' | | 'numeric_precision_radix' | 'information_schema' | 'columns' | 'NULL' | 'NULL' | '' | '' | 'NO' | '' | | 'numeric_scale' | 'information_schema' | 'columns' | 'NULL' | 'NULL' | '' | '' | 'NO' | '' | -| 'operator_attribute' | 'system' | 'query_summary' | 'Variant' | 'VARIANT' | '' | '' | 'NO' | '' | -| 'operator_children' | 'system' | 'query_summary' | 'Array(UInt32)' | 'ARRAY(UINT32)' | '' | '' | 'NO' | '' | -| 'operator_id' | 'system' | 'query_profile' | 'UInt32' | 'INT UNSIGNED' | '' | '' | 'NO' | '' | -| 'operator_id' | 'system' | 'query_summary' | 'UInt32' | 'INT UNSIGNED' | '' | '' | 'NO' | '' | -| 'operator_type' | 'system' | 'query_summary' | 'String' | 'VARCHAR' | '' | '' | 'NO' | '' | | 'options' | 'system' | 'password_policies' | 'String' | 'VARCHAR' | '' | '' | 'NO' | '' | | 'ordinal_position' | 'information_schema' | 'columns' | 'UInt8' | 'TINYINT UNSIGNED' | '' | '' | 'NO' | '' | | 'ordinal_position' | 'information_schema' | 'key_column_usage' | 'NULL' | 'NULL' | '' | '' | 'NO' | '' | @@ -290,8 +281,6 @@ DB.Table: 'system'.'columns', Table: columns-table_id:1, ver:0, Engine: SystemCo | 'query_id' | 'system' | 'processor_profile' | 'String' | 'VARCHAR' | '' | '' | 'NO' | '' | | 'query_id' | 'system' | 'query_cache' | 'String' | 'VARCHAR' | '' | '' | 'NO' | '' | | 'query_id' | 'system' | 'query_log' | 'String' | 'VARCHAR' | '' | '' | 'NO' | '' | -| 'query_id' | 'system' | 'query_profile' | 'String' | 'VARCHAR' | '' | '' | 'NO' | '' | -| 'query_id' | 'system' | 'query_summary' | 'String' | 'VARCHAR' | '' | '' | 'NO' | '' | | 'query_id' | 'system' | 'task_history' | 'String' | 'VARCHAR' | '' | '' | 'NO' | '' | | 'query_kind' | 'system' | 'query_log' | 'String' | 'VARCHAR' | '' | '' | 'NO' | '' | | 'query_start_time' | 'system' | 'query_log' | 'Timestamp' | 'TIMESTAMP' | '' | '' | 'NO' | '' | @@ -325,6 +314,8 @@ DB.Table: 'system'.'columns', Table: columns-table_id:1, ver:0, Engine: SystemCo | 'schema_owner' | 'information_schema' | 'schemata' | 'String' | 'VARCHAR' | '' | '' | 'NO' | '' | | 'seq_in_index' | 'information_schema' | 'statistics' | 'NULL' | 'NULL' | '' | '' | 'NO' | '' | | 'server_version' | 'system' | 'query_log' | 'String' | 'VARCHAR' | '' | '' | 'NO' | '' | +| 'session_parameters' | 'system' | 'task_history' | 'Nullable(Variant)' | 'VARIANT' | '' | '' | 'YES' | '' | +| 'session_parameters' | 'system' | 'tasks' | 'Nullable(Variant)' | 'VARIANT' | '' | '' | 'YES' | '' | | 'session_settings' | 'system' | 'query_log' | 'String' | 'VARCHAR' | '' | '' | 'NO' | '' | | 'size' | 'system' | 'caches' | 'UInt64' | 'BIGINT UNSIGNED' | '' | '' | 'NO' | '' | | 'snapshot_location' | 'system' | 'streams' | 'Nullable(String)' | 'VARCHAR' | '' | '' | 'YES' | '' | @@ -342,6 +333,7 @@ DB.Table: 'system'.'columns', Table: columns-table_id:1, ver:0, Engine: SystemCo | 'state' | 'system' | 'task_history' | 'String' | 'VARCHAR' | '' | '' | 'NO' | '' | | 'state' | 'system' | 'tasks' | 'String' | 'VARCHAR' | '' | '' | 'NO' | '' | | 'statistics' | 'system' | 'malloc_stats' | 'Variant' | 'VARIANT' | '' | '' | 'NO' | '' | +| 'statistics' | 'system' | 'processor_profile' | 'Variant' | 'VARIANT' | '' | '' | 'NO' | '' | | 'status' | 'system' | 'backtrace' | 'String' | 'VARCHAR' | '' | '' | 'NO' | '' | | 'status' | 'system' | 'locks' | 'String' | 'VARCHAR' | '' | '' | 'NO' | '' | | 'status' | 'system' | 'processes' | 'String' | 'VARCHAR' | '' | '' | 'NO' | '' | @@ -411,7 +403,6 @@ DB.Table: 'system'.'columns', Table: columns-table_id:1, ver:0, Engine: SystemCo | 'version' | 'system' | 'credits' | 'String' | 'VARCHAR' | '' | '' | 'NO' | '' | | 'view_definition' | 'information_schema' | 'views' | 'NULL' | 'NULL' | '' | '' | 'NO' | '' | | 'virtual_columns' | 'system' | 'virtual_columns' | 'String' | 'VARCHAR' | '' | '' | 'NO' | '' | -| 'wait_time' | 'system' | 'processor_profile' | 'UInt64' | 'BIGINT UNSIGNED' | '' | '' | 'NO' | '' | | 'warehouse' | 'system' | 'task_history' | 'Nullable(String)' | 'VARCHAR' | '' | '' | 'YES' | '' | | 'warehouse' | 'system' | 'tasks' | 'Nullable(String)' | 'VARCHAR' | '' | '' | 'YES' | '' | | 'written_bytes' | 'system' | 'query_log' | 'UInt64' | 'BIGINT UNSIGNED' | '' | '' | 'NO' | '' | diff --git a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt index ea8c7d6f4cbf..00f6663075f0 100644 --- a/src/query/service/tests/it/storages/testdata/configs_table_basic.txt +++ b/src/query/service/tests/it/storages/testdata/configs_table_basic.txt @@ -123,7 +123,7 @@ DB.Table: 'system'.'configs', Table: configs-table_id:1, ver:0, Engine: SystemCo | 'query' | 'udf_server_allow_list' | '' | '' | | 'query' | 'users' | '{"name":"root","auth_type":"no_password","auth_string":null}' | '' | | 'query' | 'wait_timeout_mills' | '5000' | '' | -| 'storage' | 'allow_insecure' | 'false' | '' | +| 'storage' | 'allow_insecure' | 'true' | '' | | 'storage' | 'azblob.account_key' | '' | '' | | 'storage' | 'azblob.account_name' | '' | '' | | 'storage' | 'azblob.container' | '' | '' | diff --git a/src/query/service/tests/it/table_functions/ai_to_sql.rs b/src/query/service/tests/it/table_functions/ai_to_sql.rs index fbefaba8fa69..e02090ab99bb 100644 --- a/src/query/service/tests/it/table_functions/ai_to_sql.rs +++ b/src/query/service/tests/it/table_functions/ai_to_sql.rs @@ -21,16 +21,15 @@ use databend_query::table_functions::GPT2SQLTable; fn test_ai_to_sql_args() -> Result<()> { // 1 arg. { - let tbl_args = - TableArgs::new_positioned(vec![Scalar::String("prompt".to_string().into_bytes())]); + let tbl_args = TableArgs::new_positioned(vec![Scalar::String("prompt".to_string())]); let _ = GPT2SQLTable::create("system", "ai_to_sql", 1, tbl_args)?; } // 2 args. { let tbl_args = TableArgs::new_positioned(vec![ - Scalar::String("prompt".to_string().into_bytes()), - Scalar::String("api-key".to_string().into_bytes()), + Scalar::String("prompt".to_string()), + Scalar::String("api-key".to_string()), ]); let result = GPT2SQLTable::create("system", "ai_to_sql", 1, tbl_args); assert!(result.is_err()); diff --git a/src/query/settings/src/settings_default.rs b/src/query/settings/src/settings_default.rs index fcb35ca3fd34..74b15449d293 100644 --- a/src/query/settings/src/settings_default.rs +++ b/src/query/settings/src/settings_default.rs @@ -266,8 +266,8 @@ impl DefaultSettings { mode: SettingMode::Both, range: None, }), - ("enable_runtime_filter", DefaultSettingValue { - value: UserSettingValue::UInt64(0), + ("enable_bloom_runtime_filter", DefaultSettingValue { + value: UserSettingValue::UInt64(1), desc: "Enables runtime filter optimization for JOIN.", mode: SettingMode::Both, range: Some(SettingRange::Numeric(0..=1)), diff --git a/src/query/settings/src/settings_getter_setter.rs b/src/query/settings/src/settings_getter_setter.rs index 0ec4040cfb0d..3690d6e31708 100644 --- a/src/query/settings/src/settings_getter_setter.rs +++ b/src/query/settings/src/settings_getter_setter.rs @@ -278,8 +278,8 @@ impl Settings { Ok(self.try_get_u64("join_spilling_threshold")? as usize) } - pub fn get_runtime_filter(&self) -> Result { - Ok(self.try_get_u64("enable_runtime_filter")? != 0) + pub fn get_bloom_runtime_filter(&self) -> Result { + Ok(self.try_get_u64("enable_bloom_runtime_filter")? != 0) } pub fn get_prefer_broadcast_join(&self) -> Result { diff --git a/src/query/sql/Cargo.toml b/src/query/sql/Cargo.toml index 398039106d16..15f24f19f1c5 100644 --- a/src/query/sql/Cargo.toml +++ b/src/query/sql/Cargo.toml @@ -34,7 +34,6 @@ databend-common-pipeline-core = { path = "../pipeline/core" } databend-common-pipeline-sources = { path = "../pipeline/sources" } databend-common-pipeline-transforms = { path = "../pipeline/transforms" } -databend-common-profile = { path = "../profile" } databend-common-settings = { path = "../settings" } databend-common-storage = { path = "../../common/storage" } databend-common-storages-delta = { path = "../storages/delta" } diff --git a/src/query/sql/src/executor/format.rs b/src/query/sql/src/executor/format.rs index 56bb49360ba9..8222f9a5c809 100644 --- a/src/query/sql/src/executor/format.rs +++ b/src/query/sql/src/executor/format.rs @@ -12,13 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; + use databend_common_ast::ast::FormatTreeNode; use databend_common_catalog::plan::PartStatistics; use databend_common_exception::Result; use databend_common_expression::DataSchemaRef; use databend_common_functions::BUILTIN_FUNCTIONS; -use databend_common_profile::SharedProcessorProfiles; +use databend_common_pipeline_core::processors::profile::PlanProfile; +use databend_common_pipeline_core::processors::ProfileStatisticsName; use itertools::Itertools; +use time::Duration; use crate::executor::explain::PlanStatsInfo; use crate::executor::physical_plans::AggregateExpand; @@ -60,10 +64,10 @@ impl PhysicalPlan { pub fn format( &self, metadata: MetadataRef, - prof_span_set: SharedProcessorProfiles, + profs: HashMap, ) -> Result> { let metadata = metadata.read().clone(); - to_format_tree(self, &metadata, &prof_span_set) + to_format_tree(self, &metadata, &profs) } pub fn format_join(&self, metadata: &MetadataRef) -> Result> { @@ -81,8 +85,8 @@ impl PhysicalPlan { Ok(FormatTreeNode::with_children( format!( - "Scan: {} (read rows: {})", - table_name, plan.source.statistics.read_rows + "Scan: {} (#{}) (read rows: {})", + table_name, plan.table_index, plan.source.statistics.read_rows ), vec![], )) @@ -176,7 +180,7 @@ impl PhysicalPlan { fn to_format_tree( plan: &PhysicalPlan, metadata: &Metadata, - profs: &SharedProcessorProfiles, + profs: &HashMap, ) -> Result> { match plan { PhysicalPlan::TableScan(plan) => table_scan_to_format_tree(plan, metadata, profs), @@ -238,25 +242,29 @@ fn to_format_tree( /// Helper function to add profile info to the format tree. fn append_profile_info( children: &mut Vec>, - profs: &SharedProcessorProfiles, + profs: &HashMap, plan_id: u32, ) { - if let Some(prof) = profs.lock().unwrap().get(&plan_id) { + if let Some(prof) = profs.get(&plan_id) { children.push(FormatTreeNode::new(format!( "output rows: {}", - prof.output_rows, + prof.statistics[ProfileStatisticsName::OutputRows as usize], ))); children.push(FormatTreeNode::new(format!( "output bytes: {}", - prof.output_bytes, + prof.statistics[ProfileStatisticsName::OutputBytes as usize], ))); children.push(FormatTreeNode::new(format!( "total cpu time: {:.3}ms", - prof.cpu_time.as_secs_f64() * 1000.0 + Duration::nanoseconds(prof.statistics[ProfileStatisticsName::CpuTime as usize] as i64) + .as_seconds_f64() + * 1000.0, ))); children.push(FormatTreeNode::new(format!( "total wait time: {:.3}ms", - prof.wait_time.as_secs_f64() * 1000.0 + Duration::nanoseconds(prof.statistics[ProfileStatisticsName::WaitTime as usize] as i64) + .as_seconds_f64() + * 1000.0, ))); } } @@ -271,7 +279,7 @@ fn copy_into_table(plan: &CopyIntoTable) -> Result> { fn table_scan_to_format_tree( plan: &TableScan, metadata: &Metadata, - profs: &SharedProcessorProfiles, + profs: &HashMap, ) -> Result> { if plan.table_index == DUMMY_TABLE_INDEX { return Ok(FormatTreeNode::new("DummyTableScan".to_string())); @@ -415,7 +423,7 @@ fn constant_table_scan_to_format_tree( fn filter_to_format_tree( plan: &Filter, metadata: &Metadata, - prof_span_set: &SharedProcessorProfiles, + profs: &HashMap, ) -> Result> { let filter = plan .predicates @@ -435,9 +443,9 @@ fn filter_to_format_tree( children.extend(items); } - append_profile_info(&mut children, prof_span_set, plan.plan_id); + append_profile_info(&mut children, profs, plan.plan_id); - children.push(to_format_tree(&plan.input, metadata, prof_span_set)?); + children.push(to_format_tree(&plan.input, metadata, profs)?); Ok(FormatTreeNode::with_children( "Filter".to_string(), @@ -448,7 +456,7 @@ fn filter_to_format_tree( fn project_to_format_tree( plan: &Project, metadata: &Metadata, - prof_span_set: &SharedProcessorProfiles, + profs: &HashMap, ) -> Result> { let mut children = vec![FormatTreeNode::new(format!( "output columns: [{}]", @@ -460,9 +468,9 @@ fn project_to_format_tree( children.extend(items); } - append_profile_info(&mut children, prof_span_set, plan.plan_id); + append_profile_info(&mut children, profs, plan.plan_id); - children.push(to_format_tree(&plan.input, metadata, prof_span_set)?); + children.push(to_format_tree(&plan.input, metadata, profs)?); Ok(FormatTreeNode::with_children( "Project".to_string(), @@ -473,10 +481,10 @@ fn project_to_format_tree( fn eval_scalar_to_format_tree( plan: &EvalScalar, metadata: &Metadata, - prof_span_set: &SharedProcessorProfiles, + profs: &HashMap, ) -> Result> { if plan.exprs.is_empty() { - return to_format_tree(&plan.input, metadata, prof_span_set); + return to_format_tree(&plan.input, metadata, profs); } let scalars = plan .exprs @@ -497,9 +505,9 @@ fn eval_scalar_to_format_tree( children.extend(items); } - append_profile_info(&mut children, prof_span_set, plan.plan_id); + append_profile_info(&mut children, profs, plan.plan_id); - children.push(to_format_tree(&plan.input, metadata, prof_span_set)?); + children.push(to_format_tree(&plan.input, metadata, profs)?); Ok(FormatTreeNode::with_children( "EvalScalar".to_string(), @@ -522,7 +530,7 @@ pub fn pretty_display_agg_desc(desc: &AggregateFunctionDesc, metadata: &Metadata fn aggregate_expand_to_format_tree( plan: &AggregateExpand, metadata: &Metadata, - prof_span_set: &SharedProcessorProfiles, + profs: &HashMap, ) -> Result> { let sets = plan .grouping_sets @@ -551,9 +559,9 @@ fn aggregate_expand_to_format_tree( children.extend(items); } - append_profile_info(&mut children, prof_span_set, plan.plan_id); + append_profile_info(&mut children, profs, plan.plan_id); - children.push(to_format_tree(&plan.input, metadata, prof_span_set)?); + children.push(to_format_tree(&plan.input, metadata, profs)?); Ok(FormatTreeNode::with_children( "AggregateExpand".to_string(), @@ -564,7 +572,7 @@ fn aggregate_expand_to_format_tree( fn aggregate_partial_to_format_tree( plan: &AggregatePartial, metadata: &Metadata, - prof_span_set: &SharedProcessorProfiles, + profs: &HashMap, ) -> Result> { let group_by = plan .group_by @@ -592,9 +600,9 @@ fn aggregate_partial_to_format_tree( children.extend(items); } - append_profile_info(&mut children, prof_span_set, plan.plan_id); + append_profile_info(&mut children, profs, plan.plan_id); - children.push(to_format_tree(&plan.input, metadata, prof_span_set)?); + children.push(to_format_tree(&plan.input, metadata, profs)?); Ok(FormatTreeNode::with_children( "AggregatePartial".to_string(), @@ -605,7 +613,7 @@ fn aggregate_partial_to_format_tree( fn aggregate_final_to_format_tree( plan: &AggregateFinal, metadata: &Metadata, - prof_span_set: &SharedProcessorProfiles, + profs: &HashMap, ) -> Result> { let group_by = plan .group_by @@ -643,9 +651,9 @@ fn aggregate_final_to_format_tree( children.extend(items); } - append_profile_info(&mut children, prof_span_set, plan.plan_id); + append_profile_info(&mut children, profs, plan.plan_id); - children.push(to_format_tree(&plan.input, metadata, prof_span_set)?); + children.push(to_format_tree(&plan.input, metadata, profs)?); Ok(FormatTreeNode::with_children( "AggregateFinal".to_string(), @@ -656,7 +664,7 @@ fn aggregate_final_to_format_tree( fn window_to_format_tree( plan: &Window, metadata: &Metadata, - prof_span_set: &SharedProcessorProfiles, + profs: &HashMap, ) -> Result> { let partition_by = plan .partition_by @@ -696,12 +704,16 @@ fn window_to_format_tree( FormatTreeNode::new(format!("frame: [{frame}]")), ]; - append_profile_info(&mut children, prof_span_set, plan.plan_id); + if let Some(limit) = plan.limit { + children.push(FormatTreeNode::new(format!("limit: [{limit}]"))) + } - children.push(to_format_tree(&plan.input, metadata, prof_span_set)?); + append_profile_info(&mut children, profs, plan.plan_id); + + children.push(to_format_tree(&plan.input, metadata, profs)?); Ok(FormatTreeNode::with_children( - "Window".to_string(), // todo(ariesdevil): show full window expression. + "Window".to_string(), children, )) } @@ -709,7 +721,7 @@ fn window_to_format_tree( fn sort_to_format_tree( plan: &Sort, metadata: &Metadata, - prof_span_set: &SharedProcessorProfiles, + prof_span_set: &HashMap, ) -> Result> { let sort_keys = plan .order_by @@ -753,7 +765,7 @@ fn sort_to_format_tree( fn limit_to_format_tree( plan: &Limit, metadata: &Metadata, - prof_span_set: &SharedProcessorProfiles, + profs: &HashMap, ) -> Result> { let mut children = vec![ FormatTreeNode::new(format!( @@ -773,9 +785,9 @@ fn limit_to_format_tree( children.extend(items); } - append_profile_info(&mut children, prof_span_set, plan.plan_id); + append_profile_info(&mut children, profs, plan.plan_id); - children.push(to_format_tree(&plan.input, metadata, prof_span_set)?); + children.push(to_format_tree(&plan.input, metadata, profs)?); Ok(FormatTreeNode::with_children("Limit".to_string(), children)) } @@ -783,7 +795,7 @@ fn limit_to_format_tree( fn row_fetch_to_format_tree( plan: &RowFetch, metadata: &Metadata, - prof_span_set: &SharedProcessorProfiles, + profs: &HashMap, ) -> Result> { let table_schema = plan.source.source_info.schema(); let projected_schema = plan.cols_to_fetch.project_schema(&table_schema); @@ -805,9 +817,9 @@ fn row_fetch_to_format_tree( children.extend(items); } - append_profile_info(&mut children, prof_span_set, plan.plan_id); + append_profile_info(&mut children, profs, plan.plan_id); - children.push(to_format_tree(&plan.input, metadata, prof_span_set)?); + children.push(to_format_tree(&plan.input, metadata, profs)?); Ok(FormatTreeNode::with_children( "RowFetch".to_string(), @@ -818,7 +830,7 @@ fn row_fetch_to_format_tree( fn range_join_to_format_tree( plan: &RangeJoin, metadata: &Metadata, - prof_span_set: &SharedProcessorProfiles, + profs: &HashMap, ) -> Result> { let range_join_conditions = plan .conditions @@ -843,8 +855,8 @@ fn range_join_to_format_tree( .collect::>() .join(", "); - let mut left_child = to_format_tree(&plan.left, metadata, prof_span_set)?; - let mut right_child = to_format_tree(&plan.right, metadata, prof_span_set)?; + let mut left_child = to_format_tree(&plan.left, metadata, profs)?; + let mut right_child = to_format_tree(&plan.right, metadata, profs)?; left_child.payload = format!("{}(Left)", left_child.payload); right_child.payload = format!("{}(Right)", right_child.payload); @@ -864,7 +876,7 @@ fn range_join_to_format_tree( children.extend(items); } - append_profile_info(&mut children, prof_span_set, plan.plan_id); + append_profile_info(&mut children, profs, plan.plan_id); children.push(left_child); children.push(right_child); @@ -881,7 +893,7 @@ fn range_join_to_format_tree( fn hash_join_to_format_tree( plan: &HashJoin, metadata: &Metadata, - prof_span_set: &SharedProcessorProfiles, + profs: &HashMap, ) -> Result> { let build_keys = plan .build_keys @@ -902,8 +914,8 @@ fn hash_join_to_format_tree( .collect::>() .join(", "); - let mut build_child = to_format_tree(&plan.build, metadata, prof_span_set)?; - let mut probe_child = to_format_tree(&plan.probe, metadata, prof_span_set)?; + let mut build_child = to_format_tree(&plan.build, metadata, profs)?; + let mut probe_child = to_format_tree(&plan.probe, metadata, profs)?; build_child.payload = format!("{}(Build)", build_child.payload); probe_child.payload = format!("{}(Probe)", probe_child.payload); @@ -924,7 +936,7 @@ fn hash_join_to_format_tree( children.extend(items); } - append_profile_info(&mut children, prof_span_set, plan.plan_id); + append_profile_info(&mut children, profs, plan.plan_id); children.push(build_child); children.push(probe_child); @@ -938,7 +950,7 @@ fn hash_join_to_format_tree( fn exchange_to_format_tree( plan: &Exchange, metadata: &Metadata, - prof_span_set: &SharedProcessorProfiles, + profs: &HashMap, ) -> Result> { Ok(FormatTreeNode::with_children("Exchange".to_string(), vec![ FormatTreeNode::new(format!( @@ -958,14 +970,14 @@ fn exchange_to_format_tree( FragmentKind::Expansive => "Broadcast".to_string(), FragmentKind::Merge => "Merge".to_string(), })), - to_format_tree(&plan.input, metadata, prof_span_set)?, + to_format_tree(&plan.input, metadata, profs)?, ])) } fn union_all_to_format_tree( plan: &UnionAll, metadata: &Metadata, - prof_span_set: &SharedProcessorProfiles, + profs: &HashMap, ) -> Result> { let mut children = vec![FormatTreeNode::new(format!( "output columns: [{}]", @@ -977,11 +989,11 @@ fn union_all_to_format_tree( children.extend(items); } - append_profile_info(&mut children, prof_span_set, plan.plan_id); + append_profile_info(&mut children, profs, plan.plan_id); children.extend(vec![ - to_format_tree(&plan.left, metadata, prof_span_set)?, - to_format_tree(&plan.right, metadata, prof_span_set)?, + to_format_tree(&plan.left, metadata, profs)?, + to_format_tree(&plan.right, metadata, profs)?, ]); Ok(FormatTreeNode::with_children( @@ -1043,7 +1055,7 @@ fn exchange_source_to_format_tree( fn exchange_sink_to_format_tree( plan: &ExchangeSink, metadata: &Metadata, - prof_span_set: &SharedProcessorProfiles, + profs: &HashMap, ) -> Result> { let mut children = vec![FormatTreeNode::new(format!( "output columns: [{}]", @@ -1055,7 +1067,7 @@ fn exchange_sink_to_format_tree( plan.destination_fragment_id ))); - children.push(to_format_tree(&plan.input, metadata, prof_span_set)?); + children.push(to_format_tree(&plan.input, metadata, profs)?); Ok(FormatTreeNode::with_children( "ExchangeSink".to_string(), @@ -1066,9 +1078,9 @@ fn exchange_sink_to_format_tree( fn distributed_insert_to_format_tree( plan: &DistributedInsertSelect, metadata: &Metadata, - prof_span_set: &SharedProcessorProfiles, + profs: &HashMap, ) -> Result> { - let children = vec![to_format_tree(&plan.input, metadata, prof_span_set)?]; + let children = vec![to_format_tree(&plan.input, metadata, profs)?]; Ok(FormatTreeNode::with_children( "DistributedInsertSelect".to_string(), @@ -1079,9 +1091,9 @@ fn distributed_insert_to_format_tree( fn recluster_sink_to_format_tree( plan: &ReclusterSink, metadata: &Metadata, - prof_span_set: &SharedProcessorProfiles, + profs: &HashMap, ) -> Result> { - let children = vec![to_format_tree(&plan.input, metadata, prof_span_set)?]; + let children = vec![to_format_tree(&plan.input, metadata, profs)?]; Ok(FormatTreeNode::with_children( "ReclusterSink".to_string(), children, @@ -1091,9 +1103,9 @@ fn recluster_sink_to_format_tree( fn commit_sink_to_format_tree( plan: &CommitSink, metadata: &Metadata, - prof_span_set: &SharedProcessorProfiles, + profs: &HashMap, ) -> Result> { - let children = vec![to_format_tree(&plan.input, metadata, prof_span_set)?]; + let children = vec![to_format_tree(&plan.input, metadata, profs)?]; Ok(FormatTreeNode::with_children( "CommitSink".to_string(), children, @@ -1103,7 +1115,7 @@ fn commit_sink_to_format_tree( fn project_set_to_format_tree( plan: &ProjectSet, metadata: &Metadata, - prof_span_set: &SharedProcessorProfiles, + profs: &HashMap, ) -> Result> { let mut children = vec![FormatTreeNode::new(format!( "output columns: [{}]", @@ -1115,7 +1127,7 @@ fn project_set_to_format_tree( children.extend(items); } - append_profile_info(&mut children, prof_span_set, plan.plan_id); + append_profile_info(&mut children, profs, plan.plan_id); children.extend(vec![FormatTreeNode::new(format!( "set returning functions: {}", @@ -1126,7 +1138,7 @@ fn project_set_to_format_tree( .join(", ") ))]); - children.extend(vec![to_format_tree(&plan.input, metadata, prof_span_set)?]); + children.extend(vec![to_format_tree(&plan.input, metadata, profs)?]); Ok(FormatTreeNode::with_children( "ProjectSet".to_string(), @@ -1137,7 +1149,7 @@ fn project_set_to_format_tree( fn udf_to_format_tree( plan: &Udf, metadata: &Metadata, - prof_span_set: &SharedProcessorProfiles, + profs: &HashMap, ) -> Result> { let mut children = vec![FormatTreeNode::new(format!( "output columns: [{}]", @@ -1149,7 +1161,7 @@ fn udf_to_format_tree( children.extend(items); } - append_profile_info(&mut children, prof_span_set, plan.plan_id); + append_profile_info(&mut children, profs, plan.plan_id); children.extend(vec![FormatTreeNode::new(format!( "udf functions: {}", @@ -1163,7 +1175,7 @@ fn udf_to_format_tree( .join(", ") ))]); - children.extend(vec![to_format_tree(&plan.input, metadata, prof_span_set)?]); + children.extend(vec![to_format_tree(&plan.input, metadata, profs)?]); Ok(FormatTreeNode::with_children("Udf".to_string(), children)) } @@ -1171,15 +1183,15 @@ fn udf_to_format_tree( fn materialized_cte_to_format_tree( plan: &MaterializedCte, metadata: &Metadata, - prof_span_set: &SharedProcessorProfiles, + profs: &HashMap, ) -> Result> { let children = vec![ FormatTreeNode::new(format!( "output columns: [{}]", format_output_columns(plan.output_schema()?, metadata, true) )), - to_format_tree(&plan.left, metadata, prof_span_set)?, - to_format_tree(&plan.right, metadata, prof_span_set)?, + to_format_tree(&plan.left, metadata, profs)?, + to_format_tree(&plan.right, metadata, profs)?, ]; Ok(FormatTreeNode::with_children( "MaterializedCTE".to_string(), diff --git a/src/query/sql/src/executor/mod.rs b/src/query/sql/src/executor/mod.rs index 840393294fd9..f5db1f22246c 100644 --- a/src/query/sql/src/executor/mod.rs +++ b/src/query/sql/src/executor/mod.rs @@ -19,7 +19,6 @@ mod physical_plan_builder; mod physical_plan_display; mod physical_plan_visitor; pub mod physical_plans; -mod profile; mod util; pub mod table_read_plan; @@ -27,5 +26,4 @@ pub mod table_read_plan; pub use physical_plan::PhysicalPlan; pub use physical_plan_builder::PhysicalPlanBuilder; pub use physical_plan_visitor::PhysicalPlanReplacer; -pub use profile::*; pub use util::*; diff --git a/src/query/sql/src/executor/physical_plan.rs b/src/query/sql/src/executor/physical_plan.rs index 429dce5cae5e..bfb5c1c7ad4d 100644 --- a/src/query/sql/src/executor/physical_plan.rs +++ b/src/query/sql/src/executor/physical_plan.rs @@ -12,10 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; + use databend_common_catalog::plan::DataSourcePlan; use databend_common_exception::Result; use databend_common_expression::DataSchemaRef; +use databend_common_functions::BUILTIN_FUNCTIONS; use enum_as_inner::EnumAsInner; +use itertools::Itertools; use crate::executor::physical_plans::AggregateExpand; use crate::executor::physical_plans::AggregateFinal; @@ -377,9 +381,9 @@ impl PhysicalPlan { PhysicalPlan::MaterializedCte(_) | // Todo: support union and range join return valid table index by join probe keys PhysicalPlan::UnionAll(_) | - PhysicalPlan::RangeJoin(_)| + PhysicalPlan::RangeJoin(_) | PhysicalPlan::ConstantTableScan(_) - |PhysicalPlan::CteScan(_) + | PhysicalPlan::CteScan(_) | PhysicalPlan::Udf(_) | PhysicalPlan::DeleteSource(_) | PhysicalPlan::CopyIntoTable(_) @@ -397,4 +401,303 @@ impl PhysicalPlan { | PhysicalPlan::UpdateSource(_) => usize::MAX, } } + + pub fn get_desc(&self) -> Result { + Ok(match self { + PhysicalPlan::TableScan(v) => format!( + "{}.{}", + v.source.catalog_info.name_ident.catalog_name, + v.source.source_info.desc() + ), + PhysicalPlan::Filter(v) => match v.predicates.is_empty() { + true => String::new(), + false => v.predicates[0].as_expr(&BUILTIN_FUNCTIONS).sql_display(), + }, + PhysicalPlan::AggregatePartial(v) => v + .agg_funcs + .iter() + .map(|x| { + format!( + "{}({})", + x.sig.name, + x.arg_indices + .iter() + .map(|x| x.to_string()) + .collect::>() + .join(", ") + ) + }) + .join(", "), + PhysicalPlan::AggregateFinal(v) => v + .agg_funcs + .iter() + .map(|x| { + format!( + "{}({})", + x.sig.name, + x.arg_indices + .iter() + .map(|x| x.to_string()) + .collect::>() + .join(", ") + ) + }) + .join(", "), + PhysicalPlan::Sort(v) => v + .order_by + .iter() + .map(|x| { + format!( + "{}{}{}", + x.order_by, + if x.asc { "" } else { " DESC" }, + if x.nulls_first { " NULLS FIRST" } else { "" }, + ) + }) + .join(", "), + PhysicalPlan::Limit(v) => match v.limit { + Some(limit) => format!("LIMIT {} OFFSET {}", limit, v.offset), + None => format!("OFFSET {}", v.offset), + }, + PhysicalPlan::Project(v) => v + .output_schema()? + .fields + .iter() + .map(|x| x.name()) + .join(", "), + PhysicalPlan::EvalScalar(v) => v + .exprs + .iter() + .map(|(x, _)| x.as_expr(&BUILTIN_FUNCTIONS).sql_display()) + .join(", "), + PhysicalPlan::HashJoin(v) => { + format!( + "{} AND {}", + v.build_keys + .iter() + .zip(v.probe_keys.iter()) + .map(|(l, r)| format!( + "({} = {})", + l.as_expr(&BUILTIN_FUNCTIONS).sql_display(), + r.as_expr(&BUILTIN_FUNCTIONS).sql_display() + )) + .join(" AND "), + v.non_equi_conditions + .iter() + .map(|x| x.as_expr(&BUILTIN_FUNCTIONS).sql_display()) + .join(" AND ") + ) + } + PhysicalPlan::ProjectSet(v) => v + .srf_exprs + .iter() + .map(|(x, _)| x.as_expr(&BUILTIN_FUNCTIONS).sql_display()) + .join(", "), + PhysicalPlan::AggregateExpand(v) => v + .grouping_sets + .sets + .iter() + .map(|set| { + set.iter() + .map(|x| x.to_string()) + .collect::>() + .join(", ") + }) + .map(|s| format!("({})", s)) + .collect::>() + .join(", "), + PhysicalPlan::Window(v) => { + let partition_by = v + .partition_by + .iter() + .map(|x| x.to_string()) + .collect::>() + .join(", "); + + let order_by = v + .order_by + .iter() + .map(|x| { + format!( + "{}{}{}", + x.order_by, + if x.asc { "" } else { " DESC" }, + if x.nulls_first { " NULLS FIRST" } else { "" }, + ) + }) + .collect::>() + .join(", "); + + format!("partition by {}, order by {}", partition_by, order_by) + } + PhysicalPlan::RowFetch(v) => { + let table_schema = v.source.source_info.schema(); + let projected_schema = v.cols_to_fetch.project_schema(&table_schema); + projected_schema.fields.iter().map(|f| f.name()).join(", ") + } + PhysicalPlan::RangeJoin(v) => { + format!( + "{} AND {}", + v.conditions + .iter() + .map(|condition| { + let left = condition + .left_expr + .as_expr(&BUILTIN_FUNCTIONS) + .sql_display(); + let right = condition + .right_expr + .as_expr(&BUILTIN_FUNCTIONS) + .sql_display(); + format!("{left} {:?} {right}", condition.operator) + }) + .join(" AND "), + v.other_conditions + .iter() + .map(|x| x.as_expr(&BUILTIN_FUNCTIONS).sql_display()) + .join(" AND ") + ) + } + PhysicalPlan::Udf(v) => v + .udf_funcs + .iter() + .map(|x| format!("{}({})", x.func_name, x.arg_exprs.join(", "))) + .join(", "), + PhysicalPlan::CteScan(v) => { + format!("CTE index: {}, sub index: {}", v.cte_idx.0, v.cte_idx.1) + } + PhysicalPlan::UnionAll(v) => v + .pairs + .iter() + .map(|(l, r)| format!("{} <- {}", l, r)) + .join(", "), + _ => String::new(), + }) + } + + pub fn get_labels(&self) -> Result>> { + Ok(match self { + PhysicalPlan::TableScan(v) => { + let output_schema = v.output_schema()?; + let source_schema = v.source.source_info.schema(); + let columns_name = format!( + "Columns ({} / {})", + output_schema.num_fields(), + source_schema.num_fields() + ); + HashMap::from([ + (String::from("Full table name"), vec![format!( + "{}.{}", + v.source.catalog_info.name_ident.catalog_name, + v.source.source_info.desc() + )]), + (columns_name, v.name_mapping.keys().cloned().collect()), + (String::from("Total partitions"), vec![ + v.source.statistics.partitions_total.to_string(), + ]), + ]) + } + PhysicalPlan::Filter(v) => HashMap::from([( + String::from("Filter condition"), + v.predicates + .iter() + .map(|x| x.as_expr(&BUILTIN_FUNCTIONS).sql_display()) + .collect(), + )]), + PhysicalPlan::Limit(v) => match v.limit { + Some(limit) => HashMap::from([ + (String::from("Number of rows"), vec![limit.to_string()]), + (String::from("Offset"), vec![v.offset.to_string()]), + ]), + None => HashMap::from([(String::from("Offset"), vec![v.offset.to_string()])]), + }, + PhysicalPlan::EvalScalar(v) => HashMap::from([( + String::from("List of Expressions"), + v.exprs + .iter() + .map(|(x, _)| x.as_expr(&BUILTIN_FUNCTIONS).sql_display()) + .collect(), + )]), + PhysicalPlan::Project(v) => HashMap::from([( + String::from("List of Expressions"), + v.output_schema()? + .fields + .iter() + .map(|x| x.name()) + .cloned() + .collect(), + )]), + PhysicalPlan::AggregatePartial(v) => HashMap::from([ + ( + String::from("Grouping keys"), + v.group_by.iter().map(|x| x.to_string()).collect(), + ), + ( + String::from("Aggregate Functions"), + v.agg_funcs + .iter() + .map(|x| { + format!( + "{}({})", + x.sig.name, + x.arg_indices + .iter() + .map(|x| x.to_string()) + .collect::>() + .join(", ") + ) + }) + .collect(), + ), + ]), + PhysicalPlan::AggregateFinal(v) => HashMap::from([ + ( + String::from("Grouping keys"), + v.group_by.iter().map(|x| x.to_string()).collect(), + ), + ( + String::from("Aggregate Functions"), + v.agg_funcs + .iter() + .map(|x| { + format!( + "{}({})", + x.sig.name, + x.arg_indices + .iter() + .map(|x| x.to_string()) + .collect::>() + .join(", ") + ) + }) + .collect(), + ), + ]), + PhysicalPlan::HashJoin(v) => HashMap::from([ + (String::from("Join Type"), vec![v.join_type.to_string()]), + ( + String::from("Join Build Side Keys"), + v.build_keys + .iter() + .map(|x| x.as_expr(&BUILTIN_FUNCTIONS).sql_display()) + .collect(), + ), + ( + String::from("Join Probe Side Keys"), + v.probe_keys + .iter() + .map(|x| x.as_expr(&BUILTIN_FUNCTIONS).sql_display()) + .collect(), + ), + ( + String::from("Join Conditions"), + v.non_equi_conditions + .iter() + .map(|x| x.as_expr(&BUILTIN_FUNCTIONS).sql_display()) + .collect(), + ), + ]), + _ => HashMap::new(), + }) + } } diff --git a/src/query/sql/src/executor/physical_plan_builder.rs b/src/query/sql/src/executor/physical_plan_builder.rs index 753f8fecc074..b44c0096b73e 100644 --- a/src/query/sql/src/executor/physical_plan_builder.rs +++ b/src/query/sql/src/executor/physical_plan_builder.rs @@ -16,7 +16,6 @@ use std::collections::HashMap; use std::sync::Arc; use databend_common_catalog::table_context::TableContext; -use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::FunctionContext; @@ -112,10 +111,6 @@ impl PhysicalPlanBuilder { } RelOperator::AddRowNumber(_) => self.build_add_row_number(s_expr, required).await, RelOperator::Udf(udf) => self.build_udf(s_expr, udf, required, stat_info).await, - _ => Err(ErrorCode::Internal(format!( - "Unsupported physical plan: {:?}", - s_expr.plan() - ))), } } } diff --git a/src/query/sql/src/executor/physical_plan_visitor.rs b/src/query/sql/src/executor/physical_plan_visitor.rs index c79d9305b2c8..089712cbbe0d 100644 --- a/src/query/sql/src/executor/physical_plan_visitor.rs +++ b/src/query/sql/src/executor/physical_plan_visitor.rs @@ -209,6 +209,7 @@ pub trait PhysicalPlanReplacer { partition_by: plan.partition_by.clone(), order_by: plan.order_by.clone(), window_frame: plan.window_frame.clone(), + limit: plan.limit, })) } @@ -234,7 +235,9 @@ pub trait PhysicalPlanReplacer { need_hold_hash_table: plan.need_hold_hash_table, stat_info: plan.stat_info.clone(), probe_keys_rt: plan.probe_keys_rt.clone(), + enable_bloom_runtime_filter: plan.enable_bloom_runtime_filter, broadcast: plan.broadcast, + single_to_inner: plan.single_to_inner.clone(), })) } diff --git a/src/query/sql/src/executor/physical_plans/physical_hash_join.rs b/src/query/sql/src/executor/physical_plans/physical_hash_join.rs index 31be0ba434c7..33f65539ba5c 100644 --- a/src/query/sql/src/executor/physical_plans/physical_hash_join.rs +++ b/src/query/sql/src/executor/physical_plans/physical_hash_join.rs @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::Arc; + +use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::type_check::check_cast; @@ -27,14 +30,17 @@ use databend_common_functions::BUILTIN_FUNCTIONS; use crate::executor::explain::PlanStatsInfo; use crate::executor::physical_plans::Exchange; +use crate::executor::physical_plans::FragmentKind; use crate::executor::PhysicalPlan; use crate::executor::PhysicalPlanBuilder; use crate::optimizer::ColumnSet; +use crate::optimizer::RelExpr; use crate::optimizer::SExpr; use crate::plans::Join; use crate::plans::JoinType; use crate::ColumnEntry; use crate::IndexType; +use crate::MetadataRef; use crate::ScalarExpr; use crate::TypeCheck; @@ -69,10 +75,15 @@ pub struct HashJoin { // Only used for explain pub stat_info: Option, - // probe keys for runtime filter - pub probe_keys_rt: Vec>>, + // probe keys for runtime filter, and record the index of table that used in probe keys. + pub probe_keys_rt: Vec, IndexType)>>, + // If enable bloom runtime filter + pub enable_bloom_runtime_filter: bool, // Under cluster, mark if the join is broadcast join. pub broadcast: bool, + // When left/right single join converted to inner join, record the original join type + // and do some special processing during runtime. + pub single_to_inner: Option, } impl HashJoin { @@ -93,6 +104,16 @@ impl PhysicalPlanBuilder { ) -> Result { let mut probe_side = Box::new(self.build(s_expr.child(0)?, required.0).await?); let mut build_side = Box::new(self.build(s_expr.child(1)?, required.1).await?); + + let mut is_broadcast = false; + // Check if join is broadcast join + if let PhysicalPlan::Exchange(Exchange { + kind: FragmentKind::Expansive, + .. + }) = build_side.as_ref() + { + is_broadcast = true; + } // Unify the data types of the left and right exchange keys. if let ( PhysicalPlan::Exchange(Exchange { @@ -177,6 +198,7 @@ impl PhysicalPlanBuilder { let mut right_join_conditions = Vec::new(); let mut left_join_conditions_rt = Vec::new(); let mut probe_to_build_index = Vec::new(); + let mut table_index = None; for (left_condition, right_condition) in join .left_conditions .iter() @@ -189,25 +211,38 @@ impl PhysicalPlanBuilder { .type_check(probe_schema.as_ref())? .project_column_ref(|index| probe_schema.index_of(&index.to_string()).unwrap()); - let left_expr_for_runtime_filter = - if left_condition.used_columns().iter().all(|idx| { - // Runtime filter only support column in base table. It's possible to use a wrong derived column with - // the same name as a base table column, so we need to check if the column is a base table column. - matches!( - self.metadata.read().column(*idx), - ColumnEntry::BaseTableColumn(_) - ) - }) && matches!(probe_side, box PhysicalPlan::TableScan(_)) - { - Some( + let left_expr_for_runtime_filter = if left_condition.used_columns().iter().all(|idx| { + // Runtime filter only support column in base table. It's possible to use a wrong derived column with + // the same name as a base table column, so we need to check if the column is a base table column. + matches!( + self.metadata.read().column(*idx), + ColumnEntry::BaseTableColumn(_) + ) + }) { + if let Some(column_idx) = left_condition.used_columns().iter().next() { + // Safe to unwrap because we have checked the column is a base table column. + if table_index.is_none() { + table_index = Some( + self.metadata + .read() + .column(*column_idx) + .table_index() + .unwrap(), + ); + } + Some(( left_condition .as_raw_expr() .type_check(&*self.metadata.read())? .project_column_ref(|col| col.column_name.clone()), - ) + table_index.unwrap(), + )) } else { None - }; + } + } else { + None + }; if join.join_type == JoinType::Inner { if let (ScalarExpr::BoundColumnRef(left), ScalarExpr::BoundColumnRef(right)) = @@ -266,7 +301,10 @@ impl PhysicalPlanBuilder { )?; let left_expr_for_runtime_filter = left_expr_for_runtime_filter - .map(|expr| check_cast(expr.span(), false, expr, &common_ty, &BUILTIN_FUNCTIONS)) + .map(|(expr, idx)| { + check_cast(expr.span(), false, expr, &common_ty, &BUILTIN_FUNCTIONS) + .map(|casted_expr| (casted_expr, idx)) + }) .transpose()?; let (left_expr, _) = @@ -274,13 +312,17 @@ impl PhysicalPlanBuilder { let (right_expr, _) = ConstantFolder::fold(&right_expr, &self.func_ctx, &BUILTIN_FUNCTIONS); - let left_expr_for_runtime_filter = left_expr_for_runtime_filter - .map(|expr| ConstantFolder::fold(&expr, &self.func_ctx, &BUILTIN_FUNCTIONS).0); + let left_expr_for_runtime_filter = left_expr_for_runtime_filter.map(|(expr, idx)| { + ( + ConstantFolder::fold(&expr, &self.func_ctx, &BUILTIN_FUNCTIONS).0, + idx, + ) + }); left_join_conditions.push(left_expr.as_remote_expr()); right_join_conditions.push(right_expr.as_remote_expr()); left_join_conditions_rt - .push(left_expr_for_runtime_filter.map(|expr| expr.as_remote_expr())); + .push(left_expr_for_runtime_filter.map(|(expr, idx)| (expr.as_remote_expr(), idx))); } let mut probe_projections = ColumnSet::new(); @@ -461,7 +503,37 @@ impl PhysicalPlanBuilder { output_schema, need_hold_hash_table: join.need_hold_hash_table, stat_info: Some(stat_info), - broadcast: join.broadcast, + broadcast: is_broadcast, + single_to_inner: join.single_to_inner.clone(), + enable_bloom_runtime_filter: adjust_bloom_runtime_filter( + self.ctx.clone(), + &self.metadata, + table_index, + s_expr, + ) + .await?, })) } } + +// Check if enable bloom runtime filter +async fn adjust_bloom_runtime_filter( + ctx: Arc, + metadata: &MetadataRef, + table_index: Option, + s_expr: &SExpr, +) -> Result { + if let Some(table_index) = table_index { + let table = metadata.read().table(table_index).table(); + if let Some(stats) = table.table_statistics(ctx.clone()).await? { + if let Some(num_rows) = stats.num_rows { + let join_cardinality = RelExpr::with_s_expr(s_expr) + .derive_cardinality()? + .cardinality; + // If the filtered data reduces to less than 1/1000 of the original dataset, we will enable bloom runtime filter. + return Ok(join_cardinality <= (num_rows / 1000) as f64); + } + } + } + Ok(false) +} diff --git a/src/query/sql/src/executor/physical_plans/physical_window.rs b/src/query/sql/src/executor/physical_plans/physical_window.rs index b7639998d4c8..cf2bf05bcec1 100644 --- a/src/query/sql/src/executor/physical_plans/physical_window.rs +++ b/src/query/sql/src/executor/physical_plans/physical_window.rs @@ -55,6 +55,7 @@ pub struct Window { pub partition_by: Vec, pub order_by: Vec, pub window_frame: WindowFuncFrame, + pub limit: Option, } impl Window { @@ -357,6 +358,7 @@ impl PhysicalPlanBuilder { partition_by: partition_items, order_by: order_by_items, window_frame: w.frame.clone(), + limit: w.limit, })) } } diff --git a/src/query/sql/src/executor/profile.rs b/src/query/sql/src/executor/profile.rs deleted file mode 100644 index 189ac0e68534..000000000000 --- a/src/query/sql/src/executor/profile.rs +++ /dev/null @@ -1,508 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use databend_common_exception::Result; -use databend_common_functions::BUILTIN_FUNCTIONS; -use databend_common_profile::AggregateAttribute; -use databend_common_profile::AggregateExpandAttribute; -use databend_common_profile::CteScanAttribute; -use databend_common_profile::EvalScalarAttribute; -use databend_common_profile::ExchangeAttribute; -use databend_common_profile::FilterAttribute; -use databend_common_profile::JoinAttribute; -use databend_common_profile::LimitAttribute; -use databend_common_profile::OperatorAttribute; -use databend_common_profile::OperatorProfile; -use databend_common_profile::OperatorType; -use databend_common_profile::ProcessorProfiles; -use databend_common_profile::ProjectSetAttribute; -use databend_common_profile::QueryProfile; -use databend_common_profile::SortAttribute; -use databend_common_profile::TableScanAttribute; -use databend_common_profile::UdfAttribute; -use databend_common_profile::WindowAttribute; -use itertools::Itertools; - -use crate::executor::format::pretty_display_agg_desc; -use crate::executor::physical_plans::FragmentKind; -use crate::executor::physical_plans::WindowFunction; -use crate::executor::PhysicalPlan; -use crate::planner::Metadata; -use crate::MetadataRef; - -pub struct ProfileHelper; - -impl ProfileHelper { - pub fn build_query_profile( - query_id: &str, - metadata: &MetadataRef, - plan: &PhysicalPlan, - profs: &ProcessorProfiles, - ) -> Result { - let mut plan_node_profs = vec![]; - let metadata = metadata.read().clone(); - flatten_plan_node_profile(&metadata, plan, profs, &mut plan_node_profs)?; - - Ok(QueryProfile::new(query_id.to_string(), plan_node_profs)) - } -} - -fn flatten_plan_node_profile( - metadata: &Metadata, - plan: &PhysicalPlan, - profs: &ProcessorProfiles, - plan_node_profs: &mut Vec, -) -> Result<()> { - match plan { - PhysicalPlan::TableScan(scan) => { - let table = metadata.table(scan.table_index).clone(); - let qualified_name = format!("{}.{}", table.database(), table.name()); - let proc_prof = profs.get(&scan.plan_id).copied().unwrap_or_default(); - let prof = OperatorProfile { - id: scan.plan_id, - operator_type: OperatorType::TableScan, - children: vec![], - execution_info: proc_prof.into(), - attribute: OperatorAttribute::TableScan(TableScanAttribute { qualified_name }), - }; - plan_node_profs.push(prof); - } - PhysicalPlan::CteScan(scan) => { - let prof = OperatorProfile { - id: scan.plan_id, - operator_type: OperatorType::CteScan, - children: vec![], - execution_info: Default::default(), - attribute: OperatorAttribute::CteScan(CteScanAttribute { - cte_idx: scan.cte_idx.0, - }), - }; - plan_node_profs.push(prof) - } - PhysicalPlan::ConstantTableScan(scan) => { - let proc_prof = profs.get(&scan.plan_id).copied().unwrap_or_default(); - let prof = OperatorProfile { - id: scan.plan_id, - operator_type: OperatorType::ConstantTableScan, - children: vec![], - execution_info: proc_prof.into(), - attribute: OperatorAttribute::Empty, - }; - plan_node_profs.push(prof); - } - PhysicalPlan::Filter(filter) => { - flatten_plan_node_profile(metadata, &filter.input, profs, plan_node_profs)?; - let proc_prof = profs.get(&filter.plan_id).copied().unwrap_or_default(); - let prof = OperatorProfile { - id: filter.plan_id, - operator_type: OperatorType::Filter, - children: vec![filter.input.get_id()], - execution_info: proc_prof.into(), - attribute: OperatorAttribute::Filter(FilterAttribute { - predicate: filter - .predicates - .iter() - .map(|pred| pred.as_expr(&BUILTIN_FUNCTIONS).sql_display()) - .join(" AND "), - }), - }; - plan_node_profs.push(prof); - } - PhysicalPlan::Project(project) => { - flatten_plan_node_profile(metadata, &project.input, profs, plan_node_profs)?; - let proc_prof = profs.get(&project.plan_id).copied().unwrap_or_default(); - let prof = OperatorProfile { - id: project.plan_id, - operator_type: OperatorType::Project, - children: vec![project.input.get_id()], - execution_info: proc_prof.into(), - attribute: OperatorAttribute::Empty, - }; - plan_node_profs.push(prof); - } - PhysicalPlan::EvalScalar(eval) => { - flatten_plan_node_profile(metadata, &eval.input, profs, plan_node_profs)?; - let proc_prof = profs.get(&eval.plan_id).copied().unwrap_or_default(); - let prof = OperatorProfile { - id: eval.plan_id, - operator_type: OperatorType::EvalScalar, - execution_info: proc_prof.into(), - children: vec![eval.input.get_id()], - attribute: OperatorAttribute::EvalScalar(EvalScalarAttribute { - scalars: eval - .exprs - .iter() - .map(|(expr, _)| expr.as_expr(&BUILTIN_FUNCTIONS).sql_display()) - .join(", "), - }), - }; - plan_node_profs.push(prof); - } - PhysicalPlan::ProjectSet(project_set) => { - flatten_plan_node_profile(metadata, &project_set.input, profs, plan_node_profs)?; - let proc_prof = profs.get(&project_set.plan_id).copied().unwrap_or_default(); - let prof = OperatorProfile { - id: project_set.plan_id, - operator_type: OperatorType::ProjectSet, - execution_info: proc_prof.into(), - children: vec![project_set.input.get_id()], - attribute: OperatorAttribute::ProjectSet(ProjectSetAttribute { - functions: project_set - .srf_exprs - .iter() - .map(|(expr, _)| expr.as_expr(&BUILTIN_FUNCTIONS).sql_display()) - .join(", "), - }), - }; - plan_node_profs.push(prof); - } - PhysicalPlan::AggregateExpand(expand) => { - flatten_plan_node_profile(metadata, &expand.input, profs, plan_node_profs)?; - let proc_prof = profs.get(&expand.plan_id).copied().unwrap_or_default(); - let prof = OperatorProfile { - id: expand.plan_id, - operator_type: OperatorType::AggregateExpand, - execution_info: proc_prof.into(), - children: vec![expand.input.get_id()], - attribute: OperatorAttribute::AggregateExpand(AggregateExpandAttribute { - group_keys: expand - .grouping_sets - .sets - .iter() - .map(|columns| { - format!( - "[{}]", - columns - .iter() - .map(|column| metadata.column(*column).name()) - .join(", ") - ) - }) - .join(", "), - aggr_exprs: "".to_string(), - }), - }; - plan_node_profs.push(prof); - } - PhysicalPlan::AggregatePartial(agg_partial) => { - flatten_plan_node_profile(metadata, &agg_partial.input, profs, plan_node_profs)?; - let proc_prof = profs.get(&agg_partial.plan_id).copied().unwrap_or_default(); - let prof = OperatorProfile { - id: agg_partial.plan_id, - operator_type: OperatorType::Aggregate, - execution_info: proc_prof.into(), - children: vec![agg_partial.input.get_id()], - attribute: OperatorAttribute::Aggregate(AggregateAttribute { - group_keys: agg_partial - .group_by - .iter() - .map(|column| metadata.column(*column).name()) - .join(", "), - functions: agg_partial - .agg_funcs - .iter() - .map(|desc| pretty_display_agg_desc(desc, metadata)) - .join(", "), - }), - }; - plan_node_profs.push(prof); - } - PhysicalPlan::AggregateFinal(agg_final) => { - flatten_plan_node_profile(metadata, &agg_final.input, profs, plan_node_profs)?; - let proc_prof = profs.get(&agg_final.plan_id).copied().unwrap_or_default(); - let prof = OperatorProfile { - id: agg_final.plan_id, - operator_type: OperatorType::Aggregate, - execution_info: proc_prof.into(), - children: vec![agg_final.input.get_id()], - attribute: OperatorAttribute::Aggregate(AggregateAttribute { - group_keys: agg_final - .group_by - .iter() - .map(|column| metadata.column(*column).name()) - .join(", "), - functions: agg_final - .agg_funcs - .iter() - .map(|desc| pretty_display_agg_desc(desc, metadata)) - .join(", "), - }), - }; - plan_node_profs.push(prof); - } - PhysicalPlan::Window(window) => { - flatten_plan_node_profile(metadata, &window.input, profs, plan_node_profs)?; - let proc_prof = profs.get(&window.plan_id).copied().unwrap_or_default(); - let partition_by = window - .partition_by - .iter() - .map(|&index| { - let name = metadata.column(index).name(); - Ok(name) - }) - .collect::>>()? - .join(", "); - - let order_by = window - .order_by - .iter() - .map(|v| { - let name = metadata.column(v.order_by).name(); - Ok(name) - }) - .collect::>>()? - .join(", "); - - let frame = window.window_frame.to_string(); - - let func = match &window.func { - WindowFunction::Aggregate(agg) => pretty_display_agg_desc(agg, metadata), - func => format!("{}", func), - }; - let prof = OperatorProfile { - id: window.plan_id, - operator_type: OperatorType::Window, - children: vec![window.input.get_id()], - execution_info: proc_prof.into(), - attribute: OperatorAttribute::Window(WindowAttribute { - functions: format!( - "{} OVER (PARTITION BY {} ORDER BY {} {})", - func, partition_by, order_by, frame - ), - }), - }; - plan_node_profs.push(prof); - } - PhysicalPlan::Sort(sort) => { - flatten_plan_node_profile(metadata, &sort.input, profs, plan_node_profs)?; - let proc_prof = profs.get(&sort.plan_id).copied().unwrap_or_default(); - let prof = OperatorProfile { - id: sort.plan_id, - operator_type: OperatorType::Sort, - execution_info: proc_prof.into(), - children: vec![sort.input.get_id()], - attribute: OperatorAttribute::Sort(SortAttribute { - sort_keys: sort - .order_by - .iter() - .map(|desc| { - format!( - "{} {}", - metadata.column(desc.order_by).name(), - if desc.asc { "ASC" } else { "DESC" } - ) - }) - .join(", "), - }), - }; - plan_node_profs.push(prof); - } - PhysicalPlan::Limit(limit) => { - flatten_plan_node_profile(metadata, &limit.input, profs, plan_node_profs)?; - let proc_prof = profs.get(&limit.plan_id).copied().unwrap_or_default(); - let prof = OperatorProfile { - id: limit.plan_id, - operator_type: OperatorType::Limit, - execution_info: proc_prof.into(), - children: vec![limit.input.get_id()], - attribute: OperatorAttribute::Limit(LimitAttribute { - limit: limit.limit.unwrap_or_default(), - offset: limit.offset, - }), - }; - plan_node_profs.push(prof); - } - PhysicalPlan::RowFetch(fetch) => { - flatten_plan_node_profile(metadata, &fetch.input, profs, plan_node_profs)?; - let proc_prof = profs.get(&fetch.plan_id).copied().unwrap_or_default(); - let prof = OperatorProfile { - id: fetch.plan_id, - operator_type: OperatorType::RowFetch, - execution_info: proc_prof.into(), - children: vec![fetch.input.get_id()], - attribute: OperatorAttribute::Empty, - }; - plan_node_profs.push(prof); - } - PhysicalPlan::HashJoin(hash_join) => { - flatten_plan_node_profile(metadata, &hash_join.probe, profs, plan_node_profs)?; - flatten_plan_node_profile(metadata, &hash_join.build, profs, plan_node_profs)?; - let proc_prof = profs.get(&hash_join.plan_id).copied().unwrap_or_default(); - let prof = OperatorProfile { - id: hash_join.plan_id, - operator_type: OperatorType::Join, - execution_info: proc_prof.into(), - children: vec![hash_join.probe.get_id(), hash_join.build.get_id()], - attribute: OperatorAttribute::Join(JoinAttribute { - join_type: hash_join.join_type.to_string(), - equi_conditions: hash_join - .probe_keys - .iter() - .zip(hash_join.build_keys.iter()) - .map(|(l, r)| { - format!( - "{} = {}", - l.as_expr(&BUILTIN_FUNCTIONS).sql_display(), - r.as_expr(&BUILTIN_FUNCTIONS).sql_display(), - ) - }) - .join(" AND "), - non_equi_conditions: hash_join - .non_equi_conditions - .iter() - .map(|expr| expr.as_expr(&BUILTIN_FUNCTIONS).sql_display()) - .join(" AND "), - }), - }; - plan_node_profs.push(prof); - } - PhysicalPlan::RangeJoin(range_join) => { - flatten_plan_node_profile(metadata, &range_join.left, profs, plan_node_profs)?; - flatten_plan_node_profile(metadata, &range_join.right, profs, plan_node_profs)?; - let proc_prof = profs.get(&range_join.plan_id).copied().unwrap_or_default(); - let prof = OperatorProfile { - id: range_join.plan_id, - operator_type: OperatorType::Join, - children: vec![range_join.left.get_id(), range_join.right.get_id()], - execution_info: proc_prof.into(), - attribute: OperatorAttribute::Join(JoinAttribute { - join_type: range_join.join_type.to_string(), - equi_conditions: range_join - .conditions - .iter() - .map(|expr| { - format!( - "{} {} {}", - expr.left_expr.as_expr(&BUILTIN_FUNCTIONS).sql_display(), - expr.operator, - expr.right_expr.as_expr(&BUILTIN_FUNCTIONS).sql_display(), - ) - }) - .join(" AND "), - non_equi_conditions: range_join - .other_conditions - .iter() - .map(|expr| expr.as_expr(&BUILTIN_FUNCTIONS).sql_display()) - .join(" AND "), - }), - }; - plan_node_profs.push(prof); - } - PhysicalPlan::Exchange(exchange) => { - flatten_plan_node_profile(metadata, &exchange.input, profs, plan_node_profs)?; - let proc_prof = profs.get(&exchange.plan_id).copied().unwrap_or_default(); - let prof = OperatorProfile { - id: exchange.plan_id, - operator_type: OperatorType::Exchange, - execution_info: proc_prof.into(), - children: vec![exchange.input.get_id()], - attribute: OperatorAttribute::Exchange(ExchangeAttribute { - exchange_mode: match exchange.kind { - FragmentKind::Init => "Init".to_string(), - FragmentKind::Normal => "Hash".to_string(), - FragmentKind::Expansive => "Broadcast".to_string(), - FragmentKind::Merge => "Merge".to_string(), - }, - }), - }; - plan_node_profs.push(prof); - } - PhysicalPlan::UnionAll(union) => { - flatten_plan_node_profile(metadata, &union.left, profs, plan_node_profs)?; - flatten_plan_node_profile(metadata, &union.right, profs, plan_node_profs)?; - let proc_prof = profs.get(&union.plan_id).copied().unwrap_or_default(); - let prof = OperatorProfile { - id: union.plan_id, - operator_type: OperatorType::UnionAll, - execution_info: proc_prof.into(), - children: vec![union.left.get_id(), union.right.get_id()], - attribute: OperatorAttribute::Empty, - }; - plan_node_profs.push(prof); - } - PhysicalPlan::DistributedInsertSelect(select) => { - flatten_plan_node_profile(metadata, &select.input, profs, plan_node_profs)?; - let proc_prof = profs.get(&select.plan_id).copied().unwrap_or_default(); - let prof = OperatorProfile { - id: select.plan_id, - operator_type: OperatorType::Insert, - execution_info: proc_prof.into(), - children: vec![], - attribute: OperatorAttribute::Empty, - }; - plan_node_profs.push(prof); - } - PhysicalPlan::ExchangeSource(source) => { - let proc_prof = profs.get(&source.plan_id).copied().unwrap_or_default(); - let prof = OperatorProfile { - id: source.plan_id, - operator_type: OperatorType::Exchange, - execution_info: proc_prof.into(), - children: vec![], - attribute: OperatorAttribute::Empty, - }; - plan_node_profs.push(prof); - } - PhysicalPlan::ExchangeSink(sink) => { - flatten_plan_node_profile(metadata, &sink.input, profs, plan_node_profs)?; - let proc_prof = profs.get(&sink.plan_id).copied().unwrap_or_default(); - let prof = OperatorProfile { - id: sink.plan_id, - operator_type: OperatorType::Exchange, - execution_info: proc_prof.into(), - children: vec![], - attribute: OperatorAttribute::Empty, - }; - plan_node_profs.push(prof); - } - PhysicalPlan::Udf(udf) => { - flatten_plan_node_profile(metadata, &udf.input, profs, plan_node_profs)?; - let proc_prof = profs.get(&udf.plan_id).copied().unwrap_or_default(); - let prof = OperatorProfile { - id: udf.plan_id, - operator_type: OperatorType::Udf, - execution_info: proc_prof.into(), - children: vec![udf.input.get_id()], - attribute: OperatorAttribute::Udf(UdfAttribute { - scalars: udf - .udf_funcs - .iter() - .map(|func| { - let arg_exprs = func.arg_exprs.join(", "); - format!("{}({})", func.func_name, arg_exprs) - }) - .join(", "), - }), - }; - plan_node_profs.push(prof); - } - PhysicalPlan::MaterializedCte(_) => todo!(), - PhysicalPlan::DeleteSource(_) - | PhysicalPlan::CommitSink(_) - | PhysicalPlan::CopyIntoTable(_) - | PhysicalPlan::ReplaceAsyncSourcer(_) - | PhysicalPlan::MergeInto(_) - | PhysicalPlan::MergeIntoAddRowNumber(_) - | PhysicalPlan::MergeIntoAppendNotMatched(_) - | PhysicalPlan::MergeIntoSource(_) - | PhysicalPlan::ReplaceDeduplicate(_) - | PhysicalPlan::ReplaceInto(_) - | PhysicalPlan::CompactSource(_) - | PhysicalPlan::ReclusterSource(_) - | PhysicalPlan::ReclusterSink(_) - | PhysicalPlan::UpdateSource(_) => unreachable!(), - } - - Ok(()) -} diff --git a/src/query/sql/src/lib.rs b/src/query/sql/src/lib.rs index b9ce4e6ed454..d66b138eb680 100644 --- a/src/query/sql/src/lib.rs +++ b/src/query/sql/src/lib.rs @@ -19,6 +19,7 @@ #![feature(try_blocks)] #![feature(extend_one)] #![feature(lazy_cell)] +#![feature(if_let_guard)] pub mod evaluator; pub mod executor; diff --git a/src/query/sql/src/planner/binder/binder.rs b/src/query/sql/src/planner/binder/binder.rs index 452bcf0e9084..d1869a138c57 100644 --- a/src/query/sql/src/planner/binder/binder.rs +++ b/src/query/sql/src/planner/binder/binder.rs @@ -161,7 +161,7 @@ impl<'a> Binder { ConstantFolder::fold(&expr, &self.ctx.get_function_context()?, &BUILTIN_FUNCTIONS); match new_expr { Expr::Constant { scalar, .. } => { - let value = String::from_utf8(scalar.into_string().unwrap())?; + let value = scalar.into_string().unwrap(); if variable.to_lowercase().as_str() == "timezone" { let tz = value.trim_matches(|c| c == '\'' || c == '\"'); tz.parse::().map_err(|_| { diff --git a/src/query/sql/src/planner/binder/ddl/task.rs b/src/query/sql/src/planner/binder/ddl/task.rs index f84dc9a8f114..ec748a625b06 100644 --- a/src/query/sql/src/planner/binder/ddl/task.rs +++ b/src/query/sql/src/planner/binder/ddl/task.rs @@ -93,6 +93,7 @@ impl Binder { after, when_condition, sql, + session_parameters, } = stmt; if (schedule_opts.is_none() && after.is_empty()) || (schedule_opts.is_some() && !after.is_empty()) @@ -114,6 +115,7 @@ impl Binder { after: after.clone(), when_condition: when_condition.clone(), comment: comments.clone(), + session_parameters: session_parameters.clone(), sql: sql.clone(), }; Ok(Plan::CreateTask(Box::new(plan))) @@ -135,12 +137,14 @@ impl Binder { schedule, suspend_task_after_num_failures, comments, + session_parameters, } = options { if warehouse.is_none() && schedule.is_none() && suspend_task_after_num_failures.is_none() && comments.is_none() + && session_parameters.is_none() { return Err(ErrorCode::SyntaxException( "alter task must set at least one option".to_string(), diff --git a/src/query/sql/src/planner/binder/join.rs b/src/query/sql/src/planner/binder/join.rs index 336187445b43..be9a8ff7c0a8 100644 --- a/src/query/sql/src/planner/binder/join.rs +++ b/src/query/sql/src/planner/binder/join.rs @@ -209,6 +209,7 @@ impl Binder { } let right_prop = RelExpr::with_s_expr(&right_child).derive_relational_prop()?; + let mut is_lateral = false; if !right_prop.outer_columns.is_empty() { // If there are outer columns in right child, then the join is a correlated lateral join let mut decorrelator = SubqueryRewriter::new(self.metadata.clone()); @@ -229,6 +230,7 @@ impl Binder { if join_type == JoinType::Cross { join_type = JoinType::Inner; } + is_lateral = true; } let logical_join = Join { @@ -239,7 +241,8 @@ impl Binder { marker_index: None, from_correlated_subquery: false, need_hold_hash_table: false, - broadcast: false, + is_lateral, + single_to_inner: None, }; Ok(SExpr::create_binary( Arc::new(logical_join.into()), diff --git a/src/query/sql/src/planner/binder/project.rs b/src/query/sql/src/planner/binder/project.rs index e5a8c12b7239..efa59140cffd 100644 --- a/src/query/sql/src/planner/binder/project.rs +++ b/src/query/sql/src/planner/binder/project.rs @@ -130,6 +130,7 @@ impl Binder { projection_index: Some(column_binding.index), data_type, outer_columns, + contain_agg: None, }) } else { item.scalar.clone() diff --git a/src/query/sql/src/planner/binder/scalar_common.rs b/src/query/sql/src/planner/binder/scalar_common.rs index 695dc75f5c6b..c7da9957fba6 100644 --- a/src/query/sql/src/planner/binder/scalar_common.rs +++ b/src/query/sql/src/planner/binder/scalar_common.rs @@ -22,7 +22,6 @@ use crate::optimizer::RelationalProperty; use crate::plans::walk_expr; use crate::plans::BoundColumnRef; use crate::plans::CastExpr; -use crate::plans::ComparisonOp; use crate::plans::FunctionCall; use crate::plans::ScalarExpr; use crate::plans::Visitor; @@ -116,7 +115,7 @@ pub enum JoinPredicate<'a> { Both { left: &'a ScalarExpr, right: &'a ScalarExpr, - op: ComparisonOp, + is_equal_op: bool, }, Other(&'a ScalarExpr), } @@ -127,10 +126,6 @@ impl<'a> JoinPredicate<'a> { left_prop: &RelationalProperty, right_prop: &RelationalProperty, ) -> Self { - if contain_subquery(scalar) { - return Self::Other(scalar); - } - if scalar.used_columns().is_empty() { return Self::ALL(scalar); } @@ -144,20 +139,28 @@ impl<'a> JoinPredicate<'a> { } if let ScalarExpr::FunctionCall(func) = scalar { - if let Some(op) = ComparisonOp::try_from_func_name(func.func_name.as_str()) { - let left = &func.arguments[0]; - let right = &func.arguments[1]; - if satisfied_by(left, left_prop) && satisfied_by(right, right_prop) { - return Self::Both { left, right, op }; - } + if func.arguments.len() != 2 { + return Self::Other(scalar); + } - if satisfied_by(right, left_prop) && satisfied_by(left, right_prop) { - return Self::Both { - left: right, - right: left, - op, - }; - } + let is_equal_op = func.func_name.as_str() == "eq"; + let left = &func.arguments[0]; + let right = &func.arguments[1]; + + if satisfied_by(left, left_prop) && satisfied_by(right, right_prop) { + return Self::Both { + left, + right, + is_equal_op, + }; + } + + if satisfied_by(right, left_prop) && satisfied_by(left, right_prop) { + return Self::Both { + left: right, + right: left, + is_equal_op, + }; } } diff --git a/src/query/sql/src/planner/binder/setting.rs b/src/query/sql/src/planner/binder/setting.rs index d2a505b05944..51923f9d025a 100644 --- a/src/query/sql/src/planner/binder/setting.rs +++ b/src/query/sql/src/planner/binder/setting.rs @@ -58,7 +58,7 @@ impl Binder { ConstantFolder::fold(&expr, &self.ctx.get_function_context()?, &BUILTIN_FUNCTIONS); match new_expr { databend_common_expression::Expr::Constant { scalar, .. } => { - let value = String::from_utf8(scalar.into_string().unwrap())?; + let value = scalar.into_string().unwrap(); let vars = vec![VarValue { is_global, variable, diff --git a/src/query/sql/src/planner/binder/table.rs b/src/query/sql/src/planner/binder/table.rs index abcd91f14244..6fcc6634d2e7 100644 --- a/src/query/sql/src/planner/binder/table.rs +++ b/src/query/sql/src/planner/binder/table.rs @@ -100,6 +100,7 @@ use crate::optimizer::SExpr; use crate::planner::semantic::normalize_identifier; use crate::planner::semantic::TypeChecker; use crate::plans::CteScan; +use crate::plans::DummyTableScan; use crate::plans::EvalScalar; use crate::plans::FunctionCall; use crate::plans::RelOperator; @@ -135,24 +136,11 @@ impl Binder { } } } - let catalog = CATALOG_DEFAULT; - let database = "system"; - let tenant = self.ctx.get_tenant(); - let table_meta = self - .resolve_data_source(tenant.as_str(), catalog, database, "one", &None) - .await?; - let table_index = self.metadata.write().add_table( - CATALOG_DEFAULT.to_owned(), - database.to_string(), - table_meta, - None, - false, - false, - false, - ); - - self.bind_base_table(bind_context, database, table_index, None) - .await + let bind_context = BindContext::with_parent(Box::new(bind_context.clone())); + Ok(( + SExpr::create_leaf(Arc::new(DummyTableScan.into())), + bind_context, + )) } fn check_view_dep(bind_context: &BindContext, database: &str, view_name: &str) -> Result<()> { @@ -1494,8 +1482,7 @@ impl Binder { // copy from common-storages-fuse to avoid cyclic dependency. fn string_value(value: &Scalar) -> Result { match value { - Scalar::String(val) => String::from_utf8(val.clone()) - .map_err(|e| ErrorCode::BadArguments(format!("invalid string. {}", e))), + Scalar::String(val) => Ok(val.clone()), other => Err(ErrorCode::BadArguments(format!( "Expected a string value, but found a '{}'.", other diff --git a/src/query/sql/src/planner/binder/window.rs b/src/query/sql/src/planner/binder/window.rs index 304de507fc27..d44cfb8bb988 100644 --- a/src/query/sql/src/planner/binder/window.rs +++ b/src/query/sql/src/planner/binder/window.rs @@ -60,6 +60,7 @@ impl Binder { partition_by: window_info.partition_by_items.clone(), order_by: window_info.order_by_items.clone(), frame: window_info.frame.clone(), + limit: None, }; Ok(SExpr::create_unary( diff --git a/src/query/sql/src/planner/format/display_rel_operator.rs b/src/query/sql/src/planner/format/display_rel_operator.rs index 13c7835f9ce5..73dfb39382ec 100644 --- a/src/query/sql/src/planner/format/display_rel_operator.rs +++ b/src/query/sql/src/planner/format/display_rel_operator.rs @@ -71,7 +71,6 @@ impl Display for FormatContext { RelOperator::Limit(_) => write!(f, "Limit"), RelOperator::Exchange(op) => format_exchange(f, metadata, op), RelOperator::UnionAll(_) => write!(f, "Union"), - RelOperator::Pattern(_) => write!(f, "Pattern"), RelOperator::DummyTableScan(_) => write!(f, "DummyTableScan"), RelOperator::Window(_) => write!(f, "WindowFunc"), RelOperator::ProjectSet(_) => write!(f, "ProjectSet"), diff --git a/src/query/sql/src/planner/mod.rs b/src/query/sql/src/planner/mod.rs index 1e6ff878462f..3380ecc260bb 100644 --- a/src/query/sql/src/planner/mod.rs +++ b/src/query/sql/src/planner/mod.rs @@ -39,6 +39,7 @@ pub use bloom_index::BloomIndexColumns; pub use expression_parser::*; pub use format::format_scalar; pub use metadata::*; +pub use planner::get_query_kind; pub use planner::PlanExtras; pub use planner::Planner; pub use plans::insert::InsertInputSource; diff --git a/src/query/sql/src/planner/optimizer/cascades/tasks/optimize_expr.rs b/src/query/sql/src/planner/optimizer/cascades/tasks/optimize_expr.rs index b9750498bac7..4cb8645be82e 100644 --- a/src/query/sql/src/planner/optimizer/cascades/tasks/optimize_expr.rs +++ b/src/query/sql/src/planner/optimizer/cascades/tasks/optimize_expr.rs @@ -26,6 +26,7 @@ use crate::optimizer::cascades::tasks::SharedCounter; use crate::optimizer::cascades::CascadesOptimizer; use crate::optimizer::cost::Cost; use crate::optimizer::cost::CostContext; +use crate::optimizer::extract::Matcher; use crate::optimizer::Distribution; use crate::optimizer::DistributionEnforcer; use crate::optimizer::Enforcer; @@ -33,8 +34,6 @@ use crate::optimizer::PatternExtractor; use crate::optimizer::RelExpr; use crate::optimizer::RequiredProperty; use crate::optimizer::SExpr; -use crate::plans::PatternPlan; -use crate::plans::RelOp; use crate::plans::RelOperator; use crate::IndexType; @@ -401,19 +400,10 @@ impl OptimizeExprTask { return Ok(OptimizeExprEvent::OptimizingSelf); } - let mut extractor = PatternExtractor::create(); + let mut extractor = PatternExtractor::new(); let enforcer_child = Arc::new( extractor - .extract( - &optimizer.memo, - m_expr, - &SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - )), - )? + .extract(&optimizer.memo, m_expr, &Matcher::Leaf)? .pop() .ok_or_else(|| { ErrorCode::Internal(format!("Cannot find child of m_expr: {:?}", m_expr.plan)) diff --git a/src/query/sql/src/planner/optimizer/decorrelate/decorrelate.rs b/src/query/sql/src/planner/optimizer/decorrelate/decorrelate.rs index bf5d319d4db3..b55045ce8675 100644 --- a/src/query/sql/src/planner/optimizer/decorrelate/decorrelate.rs +++ b/src/query/sql/src/planner/optimizer/decorrelate/decorrelate.rs @@ -25,16 +25,15 @@ use crate::binder::Visibility; use crate::optimizer::decorrelate::subquery_rewriter::FlattenInfo; use crate::optimizer::decorrelate::subquery_rewriter::SubqueryRewriter; use crate::optimizer::decorrelate::subquery_rewriter::UnnestResult; +use crate::optimizer::extract::Matcher; use crate::optimizer::ColumnSet; use crate::optimizer::RelExpr; use crate::optimizer::SExpr; use crate::plans::BoundColumnRef; -use crate::plans::ComparisonOp; use crate::plans::Filter; use crate::plans::FunctionCall; use crate::plans::Join; use crate::plans::JoinType; -use crate::plans::PatternPlan; use crate::plans::RelOp; use crate::plans::ScalarExpr; use crate::plans::SubqueryExpr; @@ -87,63 +86,34 @@ impl SubqueryRewriter { // EvalScalar // \ // Get - let patterns = vec![ - SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Filter, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Scan, - } - .into(), - ))), - )), - ), - SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Filter, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Scan, - } - .into(), - ))), - )), - )), - ), + let matchers = vec![ + Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::MatchOp { + op_type: RelOp::Filter, + children: vec![Matcher::MatchOp { + op_type: RelOp::Scan, + children: vec![], + }], + }], + }, + Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::MatchOp { + op_type: RelOp::Filter, + children: vec![Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::MatchOp { + op_type: RelOp::Scan, + children: vec![], + }], + }], + }], + }, ]; let mut matched = false; - for pattern in patterns { - if subquery.subquery.match_pattern(&pattern) { + for matcher in matchers { + if matcher.matches(&subquery.subquery) { matched = true; break; } @@ -195,9 +165,12 @@ impl SubqueryRewriter { } JoinPredicate::Both { - left, right, op, .. + left, + right, + is_equal_op, + .. } => { - if op == ComparisonOp::Equal { + if is_equal_op { left_conditions.push(left.clone()); right_conditions.push(right.clone()); } else { @@ -221,7 +194,8 @@ impl SubqueryRewriter { marker_index: None, from_correlated_subquery: true, need_hold_hash_table: false, - broadcast: false, + is_lateral: false, + single_to_inner: None, }; // Rewrite plan to semi-join. @@ -288,15 +262,30 @@ impl SubqueryRewriter { &mut right_conditions, &mut left_conditions, )?; + + let mut join_type = JoinType::LeftSingle; + if subquery.contain_agg.unwrap() { + let rel_expr = RelExpr::with_s_expr(&subquery.subquery); + let has_precise_cardinality = rel_expr + .derive_cardinality()? + .statistics + .precise_cardinality + .is_some(); + if has_precise_cardinality { + join_type = JoinType::Left; + } + } + let join_plan = Join { left_conditions, right_conditions, non_equi_conditions: vec![], - join_type: JoinType::LeftSingle, + join_type, marker_index: None, from_correlated_subquery: true, need_hold_hash_table: false, - broadcast: false, + is_lateral: false, + single_to_inner: None, }; let s_expr = SExpr::create_binary( Arc::new(join_plan.into()), @@ -344,7 +333,8 @@ impl SubqueryRewriter { marker_index: Some(marker_index), from_correlated_subquery: true, need_hold_hash_table: false, - broadcast: false, + is_lateral: false, + single_to_inner: None, }; let s_expr = SExpr::create_binary( Arc::new(join_plan.into()), @@ -407,7 +397,8 @@ impl SubqueryRewriter { marker_index: Some(marker_index), from_correlated_subquery: true, need_hold_hash_table: false, - broadcast: false, + is_lateral: false, + single_to_inner: None, } .into(); Ok(( @@ -441,6 +432,7 @@ impl SubqueryRewriter { Box::from(column_entry.data_type()), Visibility::Visible, ) + .table_index(column_entry.table_index()) .build(), }); let derive_column = self.derived_columns.get(correlated_column).unwrap(); @@ -453,6 +445,7 @@ impl SubqueryRewriter { Box::from(column_entry.data_type()), Visibility::Visible, ) + .table_index(column_entry.table_index()) .build(), }); left_conditions.push(left_column); diff --git a/src/query/sql/src/planner/optimizer/decorrelate/flatten_plan.rs b/src/query/sql/src/planner/optimizer/decorrelate/flatten_plan.rs index 9a3058ff7105..ee98642ae48f 100644 --- a/src/query/sql/src/planner/optimizer/decorrelate/flatten_plan.rs +++ b/src/query/sql/src/planner/optimizer/decorrelate/flatten_plan.rs @@ -132,7 +132,8 @@ impl SubqueryRewriter { marker_index: None, from_correlated_subquery: false, need_hold_hash_table: false, - broadcast: false, + is_lateral: false, + single_to_inner: None, } .into(); @@ -460,7 +461,8 @@ impl SubqueryRewriter { marker_index: join.marker_index, from_correlated_subquery: false, need_hold_hash_table: false, - broadcast: false, + is_lateral: false, + single_to_inner: None, } .into(), ), diff --git a/src/query/sql/src/planner/optimizer/decorrelate/subquery_rewriter.rs b/src/query/sql/src/planner/optimizer/decorrelate/subquery_rewriter.rs index eb26f4d551f7..b2f4eeddcf73 100644 --- a/src/query/sql/src/planner/optimizer/decorrelate/subquery_rewriter.rs +++ b/src/query/sql/src/planner/optimizer/decorrelate/subquery_rewriter.rs @@ -478,7 +478,8 @@ impl SubqueryRewriter { marker_index: None, from_correlated_subquery: false, need_hold_hash_table: false, - broadcast: false, + is_lateral: false, + single_to_inner: None, } .into(); Ok(( @@ -547,7 +548,8 @@ impl SubqueryRewriter { marker_index: Some(marker_index), from_correlated_subquery: false, need_hold_hash_table: false, - broadcast: false, + is_lateral: false, + single_to_inner: None, } .into(); let s_expr = SExpr::create_binary( @@ -577,7 +579,8 @@ impl SubqueryRewriter { marker_index: None, from_correlated_subquery: false, need_hold_hash_table: false, - broadcast: false, + is_lateral: false, + single_to_inner: None, } .into(); diff --git a/src/query/sql/src/planner/optimizer/distributed/distributed_merge.rs b/src/query/sql/src/planner/optimizer/distributed/distributed_merge.rs index 54bf1bd8b1c4..3ad4c230d99e 100644 --- a/src/query/sql/src/planner/optimizer/distributed/distributed_merge.rs +++ b/src/query/sql/src/planner/optimizer/distributed/distributed_merge.rs @@ -16,21 +16,21 @@ use std::sync::Arc; use databend_common_exception::Result; +use crate::optimizer::extract::Matcher; use crate::optimizer::SExpr; use crate::plans::AddRowNumber; use crate::plans::Exchange::Broadcast; use crate::plans::Join; -use crate::plans::PatternPlan; use crate::plans::RelOp; use crate::plans::RelOperator; pub struct MergeSourceOptimizer { - pub merge_source_pattern: SExpr, + pub merge_source_matcher: Matcher, } impl MergeSourceOptimizer { pub fn create() -> Self { Self { - merge_source_pattern: Self::merge_source_pattern(), + merge_source_matcher: Self::merge_source_matcher(), } } @@ -68,14 +68,13 @@ impl MergeSourceOptimizer { let mut join: Join = join_s_expr.plan().clone().try_into()?; join.need_hold_hash_table = true; - join.broadcast = true; let mut join_s_expr = join_s_expr.replace_plan(Arc::new(RelOperator::Join(join))); join_s_expr = join_s_expr.replace_children(new_join_children); Ok(s_expr.replace_children(vec![Arc::new(join_s_expr)])) } // for right outer join (source as build) - fn merge_source_pattern() -> SExpr { + fn merge_source_matcher() -> Matcher { // Input: // Exchange(Merge) // | @@ -97,49 +96,21 @@ impl MergeSourceOptimizer { // | AddRowNumber // | | // * * - SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Exchange, - } - .into(), - ), - Arc::new(SExpr::create_binary( - Arc::new( - PatternPlan { - plan_type: RelOp::Join, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Exchange, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - )), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Exchange, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - )), - )), - ) + Matcher::MatchOp { + op_type: RelOp::Exchange, + children: vec![Matcher::MatchOp { + op_type: RelOp::Join, + children: vec![ + Matcher::MatchOp { + op_type: RelOp::Exchange, + children: vec![Matcher::Leaf], + }, + Matcher::MatchOp { + op_type: RelOp::Exchange, + children: vec![Matcher::Leaf], + }, + ], + }], + } } } diff --git a/src/query/sql/src/planner/optimizer/distributed/sort_and_limit.rs b/src/query/sql/src/planner/optimizer/distributed/sort_and_limit.rs index 526253e9f68c..2dc99aac4034 100644 --- a/src/query/sql/src/planner/optimizer/distributed/sort_and_limit.rs +++ b/src/query/sql/src/planner/optimizer/distributed/sort_and_limit.rs @@ -16,30 +16,30 @@ use std::sync::Arc; use databend_common_exception::Result; +use crate::optimizer::extract::Matcher; use crate::optimizer::SExpr; use crate::plans::Exchange; use crate::plans::Limit; -use crate::plans::PatternPlan; use crate::plans::RelOp; use crate::plans::RelOperator; use crate::plans::Sort; pub struct SortAndLimitPushDownOptimizer { - sort_pattern: SExpr, - limit_pattern: SExpr, + sort_matcher: Matcher, + limit_matcher: Matcher, } impl SortAndLimitPushDownOptimizer { pub fn create() -> Self { Self { - sort_pattern: Self::sort_pattern(), - limit_pattern: Self::limit_pattern(), + sort_matcher: Self::sort_matcher(), + limit_matcher: Self::limit_matcher(), } } /// `limit` is already pushed down to `Sort`, /// so the TopN scenario is already contained in this pattern. - fn sort_pattern() -> SExpr { + fn sort_matcher() -> Matcher { // Input: // Sort // \ @@ -54,31 +54,16 @@ impl SortAndLimitPushDownOptimizer { // Sort (after_exchange = false) // \ // * - SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Sort, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Exchange, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - )), - ) + Matcher::MatchOp { + op_type: RelOp::Sort, + children: vec![Matcher::MatchOp { + op_type: RelOp::Exchange, + children: vec![Matcher::Leaf], + }], + } } - fn limit_pattern() -> SExpr { + fn limit_matcher() -> Matcher { // Input: // Limit // \ @@ -93,28 +78,13 @@ impl SortAndLimitPushDownOptimizer { // Limit // \ // * - SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Limit, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Exchange, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - )), - ) + Matcher::MatchOp { + op_type: RelOp::Limit, + children: vec![Matcher::MatchOp { + op_type: RelOp::Exchange, + children: vec![Matcher::Leaf], + }], + } } pub fn optimize(&self, s_expr: &SExpr) -> Result { @@ -129,7 +99,7 @@ impl SortAndLimitPushDownOptimizer { } fn apply_sort(&self, s_expr: &SExpr) -> Result { - if !s_expr.match_pattern(&self.sort_pattern) { + if !self.sort_matcher.matches(s_expr) { return Ok(s_expr.clone()); } @@ -153,7 +123,7 @@ impl SortAndLimitPushDownOptimizer { } fn apply_limit(&self, s_expr: &SExpr) -> Result { - if !s_expr.match_pattern(&self.limit_pattern) { + if !self.limit_matcher.matches(s_expr) { return Ok(s_expr.clone()); } diff --git a/src/query/sql/src/planner/optimizer/extract.rs b/src/query/sql/src/planner/optimizer/extract.rs new file mode 100644 index 000000000000..b6996076489c --- /dev/null +++ b/src/query/sql/src/planner/optimizer/extract.rs @@ -0,0 +1,276 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use databend_common_exception::ErrorCode; +use databend_common_exception::Result; + +use crate::optimizer::group::Group; +use crate::optimizer::m_expr::MExpr; +use crate::optimizer::memo::Memo; +use crate::optimizer::SExpr; +use crate::plans::Operator; +use crate::plans::RelOp; +use crate::plans::RelOperator; + +/// A matcher used to describe a pattern to be matched. +pub enum Matcher { + /// A pattern to match an operator with children. + MatchOp { + /// The type of the operator to match. + op_type: RelOp, + children: Vec, + }, + /// A pattern to match an operator with a predicate. + MatchFn { + /// The function to match the operator. + predicate: Box bool + 'static>, + children: Vec, + }, + /// A leaf pattern to match any node. + Leaf, +} + +impl Matcher { + /// Check if the `SExpr` can be matched by the `Matcher`. + pub fn matches(&self, s_expr: &SExpr) -> bool { + match self { + Matcher::MatchOp { op_type, children } => { + if s_expr.plan().rel_op() != *op_type { + return false; + } + + if s_expr.plan().arity() != children.len() { + return false; + } + + for (child_expr, child_matcher) in s_expr.children.iter().zip(children.iter()) { + if !child_matcher.matches(child_expr) { + return false; + } + } + + true + } + + Matcher::MatchFn { + predicate, + children, + } => { + if !predicate(s_expr.plan()) { + return false; + } + + if s_expr.plan().arity() != children.len() { + return false; + } + + for (child_expr, child_matcher) in s_expr.children.iter().zip(children.iter()) { + if !child_matcher.matches(child_expr) { + return false; + } + } + + true + } + + Matcher::Leaf => true, + } + } + + /// Check if the `RelOperator` can be matched by the `Matcher`. + pub fn matches_op(&self, op: &RelOperator) -> bool { + match self { + Matcher::MatchOp { op_type, children } => { + op.rel_op() == *op_type && op.arity() == children.len() + } + + Matcher::MatchFn { + predicate, + children, + } => predicate(op) && op.arity() == children.len(), + + Matcher::Leaf => true, + } + } +} + +/// A helper to extract `SExpr`s from `Memo` that match the given pattern. +pub struct PatternExtractor {} + +impl Default for PatternExtractor { + fn default() -> Self { + Self::new() + } +} + +impl PatternExtractor { + pub fn new() -> Self { + PatternExtractor {} + } + + pub fn extract( + &mut self, + memo: &Memo, + m_expr: &MExpr, + matcher: &Matcher, + ) -> Result> { + match matcher { + Matcher::MatchOp { children, .. } => { + if !matcher.matches_op(&m_expr.plan) { + return Ok(vec![]); + } + + let mut children_results = Vec::with_capacity(m_expr.arity()); + for (child_index, child_matcher) in m_expr.children.iter().zip(children.iter()) { + let child_group = memo.group(*child_index)?; + let result = self.extract_group(memo, child_group, child_matcher)?; + children_results.push(result); + } + + Self::generate_expression_with_children(memo, m_expr, children_results) + } + + Matcher::MatchFn { + predicate, + children, + } => { + if !predicate(&m_expr.plan) { + return Ok(vec![]); + } + + let mut children_results = Vec::with_capacity(m_expr.arity()); + for (child_index, child_matcher) in m_expr.children.iter().zip(children.iter()) { + let child_group = memo.group(*child_index)?; + let result = self.extract_group(memo, child_group, child_matcher)?; + children_results.push(result); + } + + Self::generate_expression_with_children(memo, m_expr, children_results) + } + + Matcher::Leaf => { + // Expand the pattern node to a complete `SExpr` if it's a leaf node. + let child = Self::expand_pattern(memo, m_expr)?; + Ok(vec![child]) + } + } + } + + fn extract_group( + &mut self, + memo: &Memo, + group: &Group, + matcher: &Matcher, + ) -> Result> { + let mut results = vec![]; + for m_expr in group.m_exprs.iter() { + let result = self.extract(memo, m_expr, matcher)?; + results.extend(result.into_iter()); + } + + Ok(results) + } + + fn generate_expression_with_children( + memo: &Memo, + m_expr: &MExpr, + candidates: Vec>, + ) -> Result> { + let mut results = vec![]; + + // Initialize cursors + let mut cursors: Vec = vec![]; + for candidate in candidates.iter() { + if candidate.is_empty() { + // Every child should have at least one candidate + return Ok(results); + } + cursors.push(0); + } + + if cursors.is_empty() { + results.push(SExpr::create( + m_expr.plan.clone(), + vec![], + Some(m_expr.group_index), + Some(memo.group(m_expr.group_index)?.relational_prop.clone()), + Some(memo.group(m_expr.group_index)?.stat_info.clone()), + )); + return Ok(results); + } + + 'LOOP: loop { + let mut children = vec![]; + for (index, cursor) in cursors.iter().enumerate() { + children.push(Arc::new(candidates[index][*cursor].clone())); + } + results.push(SExpr::create( + m_expr.plan.clone(), + children, + Some(m_expr.group_index), + Some(memo.group(m_expr.group_index)?.relational_prop.clone()), + Some(memo.group(m_expr.group_index)?.stat_info.clone()), + )); + + let mut shifted = false; + // Shift cursor + for i in (0..cursors.len()).rev() { + if !shifted { + // Shift cursor + cursors[i] += 1; + shifted = true; + } + + if i == 0 && cursors[0] > candidates[0].len() - 1 { + // Candidates are exhausted + break 'LOOP; + } else if i > 0 && cursors[i] > candidates[i].len() - 1 { + // Shift previous children + cursors[i] = 0; + cursors[i - 1] += 1; + continue; + } else { + break; + } + } + } + + Ok(results) + } + + /// Expand a `Pattern` node to an arbitrary `SExpr` with `m_expr` as the root. + /// Since we don't care about the actual content of the `Pattern` node, we will + /// choose the first `MExpr` in each group to construct the `SExpr`. + fn expand_pattern(memo: &Memo, m_expr: &MExpr) -> Result { + let mut children = Vec::with_capacity(m_expr.arity()); + for child in m_expr.children.iter() { + let child_group = memo.group(*child)?; + let child_m_expr = child_group + .m_exprs + .first() + .ok_or_else(|| ErrorCode::Internal(format!("No MExpr in group {child}")))?; + children.push(Arc::new(Self::expand_pattern(memo, child_m_expr)?)); + } + + Ok(SExpr::create( + m_expr.plan.clone(), + children, + Some(m_expr.group_index), + Some(memo.group(m_expr.group_index)?.relational_prop.clone()), + Some(memo.group(m_expr.group_index)?.stat_info.clone()), + )) + } +} diff --git a/src/query/sql/src/planner/optimizer/filter/deduplicate_join_condition.rs b/src/query/sql/src/planner/optimizer/filter/deduplicate_join_condition.rs new file mode 100644 index 000000000000..45ef4d9bb247 --- /dev/null +++ b/src/query/sql/src/planner/optimizer/filter/deduplicate_join_condition.rs @@ -0,0 +1,134 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::sync::Arc; + +use databend_common_exception::Result; + +use crate::optimizer::SExpr; +use crate::plans::Join; +use crate::plans::JoinType; +use crate::plans::RelOperator; +use crate::ScalarExpr; + +// The DeduplicateJoinConditionOptimizer will use the Union-Find algorithm to remove duplicate join conditions, +// for example: select * from t1, t2, t3 where t1.id = t2.id and t1.id = t3.id and t2.id = t3.id, its join tree is: +// +// Join [t1.id = t3.id, t2.id = t3.id(removed)] +// / \ +// t3 \ +// Join: [t1.id = t2.id] +// / \ +// t1 t2 +// +// Join condition `t2.id = t3.id` is duplicate, so we can remove it. +pub struct DeduplicateJoinConditionOptimizer { + pub scalar_expr_index: HashMap, + pub parent: HashMap, + pub num_scalar_expr: usize, +} + +impl DeduplicateJoinConditionOptimizer { + pub fn new() -> Self { + DeduplicateJoinConditionOptimizer { + scalar_expr_index: HashMap::new(), + parent: HashMap::new(), + num_scalar_expr: 0, + } + } + + pub fn run(mut self, s_expr: &SExpr) -> Result { + self.deduplicate(s_expr) + } + + pub fn deduplicate(&mut self, s_expr: &SExpr) -> Result { + match s_expr.plan.as_ref() { + RelOperator::Join(join) if join.join_type == JoinType::Inner => { + self.deduplicate_join_conditions(s_expr, join) + } + _ => self.deduplicate_children(s_expr), + } + } + + fn deduplicate_join_conditions(&mut self, s_expr: &SExpr, join: &Join) -> Result { + debug_assert!(join.join_type == JoinType::Inner); + + let left = self.deduplicate(s_expr.child(0)?)?; + let right = self.deduplicate(s_expr.child(1)?)?; + let mut join = join.clone(); + let mut new_left_conditions = Vec::new(); + let mut new_right_conditions = Vec::new(); + for (left_condition, right_condition) in join + .left_conditions + .iter() + .zip(join.right_conditions.iter()) + { + let left_index = self.get_scalar_expr_index(left_condition); + let right_index = self.get_scalar_expr_index(right_condition); + let left_parent_index = self.find(left_index); + let right_parent_index = self.find(right_index); + if left_parent_index != right_parent_index { + *self.parent.get_mut(&right_parent_index).unwrap() = left_parent_index; + new_left_conditions.push(left_condition.clone()); + new_right_conditions.push(right_condition.clone()); + } + } + if new_left_conditions.len() != join.left_conditions.len() { + join.left_conditions = new_left_conditions; + join.right_conditions = new_right_conditions; + } + let s_expr = s_expr.replace_plan(Arc::new(RelOperator::Join(join))); + Ok(s_expr.replace_children(vec![Arc::new(left), Arc::new(right)])) + } + + pub fn deduplicate_children(&mut self, s_expr: &SExpr) -> Result { + let mut children = Vec::with_capacity(s_expr.children().len()); + for child in s_expr.children() { + let child = self.deduplicate(child)?; + children.push(Arc::new(child)); + } + Ok(s_expr.replace_children(children)) + } + + fn get_scalar_expr_index(&mut self, scalar_expr: &ScalarExpr) -> usize { + match self.scalar_expr_index.get(scalar_expr) { + Some(index) => *index, + None => { + let index = self.num_scalar_expr; + self.scalar_expr_index.insert(scalar_expr.clone(), index); + self.num_scalar_expr += 1; + index + } + } + } + + fn find(&mut self, index: usize) -> usize { + match self.parent.get(&index) { + Some(parent_index) => { + if index != *parent_index { + let new_parent_index = self.find(*parent_index); + self.parent.insert(index, new_parent_index); + new_parent_index + } else { + index + } + } + None => { + self.parent.insert(index, index); + index + } + } + } +} diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_infer_filter.rs b/src/query/sql/src/planner/optimizer/filter/infer_filter.rs similarity index 58% rename from src/query/sql/src/planner/optimizer/rule/rewrite/rule_infer_filter.rs rename to src/query/sql/src/planner/optimizer/filter/infer_filter.rs index de60d5c79e9f..8df451b523b3 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_infer_filter.rs +++ b/src/query/sql/src/planner/optimizer/filter/infer_filter.rs @@ -13,7 +13,7 @@ // limitations under the License. use std::collections::HashMap; -use std::sync::Arc; +use std::collections::HashSet; use databend_common_exception::Result; use databend_common_expression::types::DataType; @@ -26,144 +26,197 @@ use crate::optimizer::rule::constant::check_float_range; use crate::optimizer::rule::constant::check_int_range; use crate::optimizer::rule::constant::check_uint_range; use crate::optimizer::rule::constant::remove_trivial_type_cast; -use crate::optimizer::rule::Rule; -use crate::optimizer::rule::TransformResult; -use crate::optimizer::RuleID; -use crate::optimizer::SExpr; use crate::plans::ComparisonOp; use crate::plans::ConstantExpr; -use crate::plans::Filter; use crate::plans::FunctionCall; -use crate::plans::PatternPlan; -use crate::plans::RelOp; use crate::plans::ScalarExpr; +use crate::plans::VisitorMut; +use crate::ColumnSet; -// The rule tries to infer new predicates from existing predicates, for example: +// The InferFilterOptimizer tries to infer new predicates from existing predicates, for example: // 1. [A > 1 and A > 5] => [A > 5], [A > 1 and A <= 1 => false], [A = 1 and A < 10] => [A = 1] // 2. [A = 10 and A = B] => [B = 10] -// TODO(Dousir9): [A = B and A = C] => [B = C] -pub struct RuleInferFilter { - id: RuleID, - patterns: Vec, +// 3. [A = B and A = C] => [B = C] +pub struct InferFilterOptimizer<'a> { + // All ScalarExprs. + exprs: Vec, + // The index of ScalarExpr in `exprs`. + expr_index: HashMap, + // The equal ScalarExprs of each ScalarExpr. + expr_equal_to: Vec>, + // The predicates of each ScalarExpr. + expr_predicates: Vec>, + // If the whole predicates is false. + is_falsy: bool, + // The `join_prop` is used for filter push down join. + join_prop: Option>, } -impl RuleInferFilter { - pub fn new() -> Self { +impl<'a> InferFilterOptimizer<'a> { + pub fn new(join_prop: Option>) -> Self { Self { - id: RuleID::InferFilter, - // Filter - // \ - // * - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Filter, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - )], + exprs: vec![], + expr_index: HashMap::new(), + expr_equal_to: vec![], + expr_predicates: vec![], + is_falsy: false, + join_prop, } } -} -#[derive(Eq, PartialEq, Ord, PartialOrd, Clone, Debug)] -pub struct Predicate { - op: ComparisonOp, - constant: ConstantExpr, -} + pub fn run(mut self, mut predicates: Vec) -> Result> { + // Remove trivial type cast. + for predicate in predicates.iter_mut() { + if let ScalarExpr::FunctionCall(func) = predicate { + if ComparisonOp::try_from_func_name(&func.func_name).is_some() { + let (left, right) = remove_trivial_type_cast( + func.arguments[0].clone(), + func.arguments[1].clone(), + ); + if left != func.arguments[0] { + func.arguments[0] = left; + } + if right != func.arguments[1] { + func.arguments[1] = right; + } + } + } + } -pub struct PredicateSet { - exprs: Vec, - num_exprs: usize, - expr_to_idx: HashMap, - equal_exprs: Vec>, - predicates: Vec>, - is_merged: bool, - is_falsy: bool, -} + // Process each predicate, add it to the optimizer if it can be used to infer new predicates, + // otherwise, add it to the remaining predicates. + let mut remaining_predicates = vec![]; + for predicate in predicates.into_iter() { + if let ScalarExpr::FunctionCall(func) = &predicate { + if let Some(op) = ComparisonOp::try_from_func_name(&func.func_name) { + match ( + func.arguments[0].has_one_column_ref(), + func.arguments[1].has_one_column_ref(), + ) { + (true, true) => { + if op == ComparisonOp::Equal { + self.add_equal_expr(&func.arguments[0], &func.arguments[1]); + } else { + remaining_predicates.push(predicate); + } + } + (true, false) + if let ScalarExpr::ConstantExpr(constant) = &func.arguments[1] => + { + let (is_adjusted, constant) = adjust_scalar( + constant.value.clone(), + func.arguments[0].data_type()?, + ); + if is_adjusted { + self.add_expr_predicate(&func.arguments[0], Predicate { + op, + constant, + }); + } else { + remaining_predicates.push(predicate); + } + } + (false, true) + if let ScalarExpr::ConstantExpr(constant) = &func.arguments[0] => + { + let (is_adjusted, constant) = adjust_scalar( + constant.value.clone(), + func.arguments[1].data_type()?, + ); + if is_adjusted { + self.add_expr_predicate(&func.arguments[1], Predicate { + op: op.reverse(), + constant, + }); + } else { + remaining_predicates.push(predicate); + } + } + _ => remaining_predicates.push(predicate), + } + } else { + remaining_predicates.push(predicate); + } + } else { + remaining_predicates.push(predicate); + } + } -enum MergeResult { - All, - Left, - Right, - None, -} + let mut new_predicates = vec![]; + if !self.is_falsy { + // Derive new predicates from existing predicates, `derive_predicates` may change is_falsy to true. + new_predicates = self.derive_predicates(); + } -impl PredicateSet { - fn new() -> Self { - Self { - exprs: vec![], - num_exprs: 0, - expr_to_idx: HashMap::new(), - equal_exprs: vec![], - predicates: vec![], - is_merged: false, - is_falsy: false, + if self.is_falsy { + new_predicates = vec![ + ConstantExpr { + span: None, + value: Scalar::Boolean(false), + } + .into(), + ]; + } else { + // Derive new predicates from remaining predicates. + new_predicates.extend(self.derive_remaining_predicates(remaining_predicates)); } + + Ok(new_predicates) } fn add_expr( &mut self, expr: &ScalarExpr, - predicates: Vec, - equal_exprs: Vec, + expr_predicates: Vec, + expr_equal_to: Vec, ) { + self.expr_index.insert(expr.clone(), self.exprs.len()); self.exprs.push(expr.clone()); - self.expr_to_idx.insert(expr.clone(), self.num_exprs); - self.predicates.push(predicates); - self.equal_exprs.push(equal_exprs); - self.num_exprs += 1; + self.expr_predicates.push(expr_predicates); + self.expr_equal_to.push(expr_equal_to); } - fn add_equal(&mut self, left: &ScalarExpr, right: &ScalarExpr) { - match self.expr_to_idx.get(left) { - Some(idx) => { - let equal_exprs = &mut self.equal_exprs[*idx]; - equal_exprs.push(right.clone()); - } + pub fn add_equal_expr(&mut self, left: &ScalarExpr, right: &ScalarExpr) { + match self.expr_index.get(left) { + Some(index) => self.expr_equal_to[*index].push(right.clone()), None => self.add_expr(left, vec![], vec![right.clone()]), }; - if self.expr_to_idx.get(right).is_none() { - self.add_expr(right, vec![], vec![]); - } + + match self.expr_index.get(right) { + Some(index) => self.expr_equal_to[*index].push(left.clone()), + None => self.add_expr(right, vec![], vec![left.clone()]), + }; } - fn add_predicate(&mut self, left: &ScalarExpr, right: Predicate) { - match self.expr_to_idx.get(left) { - Some(idx) => { - let predicates = &mut self.predicates[*idx]; + fn add_expr_predicate(&mut self, expr: &ScalarExpr, new_predicate: Predicate) { + match self.expr_index.get(expr) { + Some(index) => { + let predicates = &mut self.expr_predicates[*index]; for predicate in predicates.iter_mut() { - match Self::merge(predicate, &right) { + match Self::merge_predicate(predicate, &new_predicate) { MergeResult::None => { self.is_falsy = true; - self.is_merged = true; return; } MergeResult::Left => { - self.is_merged = true; return; } MergeResult::Right => { - *predicate = right; - self.is_merged = true; + *predicate = new_predicate; return; } MergeResult::All => (), } } - predicates.push(right); + predicates.push(new_predicate); + } + None => { + self.add_expr(expr, vec![new_predicate], vec![]); } - None => self.add_expr(left, vec![right], vec![]), }; } - fn merge(left: &Predicate, right: &Predicate) -> MergeResult { + fn merge_predicate(left: &Predicate, right: &Predicate) -> MergeResult { match left.op { ComparisonOp::Equal => match right.op { ComparisonOp::Equal => match left.constant == right.constant { @@ -323,60 +376,216 @@ impl PredicateSet { } } - fn derive_predicates(&mut self) -> (bool, Vec) { - let mut is_updated = self.is_merged; + fn derive_predicates(&mut self) -> Vec { let mut result = vec![]; - let num_exprs = self.num_exprs; + let num_exprs = self.exprs.len(); + + // Using the Union-Find algorithm to construct the equal ScalarExpr index sets. + let mut equal_index_sets: HashMap> = HashMap::new(); let mut parents = vec![0; num_exprs]; for (i, parent) in parents.iter_mut().enumerate().take(num_exprs) { *parent = i; } - for (left_idx, equal_exprs) in self.equal_exprs.iter().enumerate() { - for expr in equal_exprs.iter() { - let right_idx = self.expr_to_idx.get(expr).unwrap(); - Self::union(&mut parents, left_idx, *right_idx); + for (left_index, expr_equal_to) in self.expr_equal_to.iter().enumerate() { + for expr in expr_equal_to.iter() { + let right_index = self.expr_index.get(expr).unwrap(); + Self::union(&mut parents, left_index, *right_index); } } - let mut old_predicates_set = self.predicates.clone(); - for predicates in old_predicates_set.iter_mut() { - predicates.sort(); - } - for idx in 0..num_exprs { - let parent_idx = Self::find(&mut parents, idx); - if idx != parent_idx { - let expr = self.exprs[parent_idx].clone(); - let predicates = self.predicates[idx].clone(); + for index in 0..num_exprs { + let parent_index = Self::find(&mut parents, index); + match equal_index_sets.get_mut(&parent_index) { + Some(equal_index_set) => { + equal_index_set.insert(index); + } + None => { + equal_index_sets.insert(parent_index, HashSet::from([index])); + } + } + if index != parent_index { + // Add the predicates to the parent ScalarExpr. + let expr = self.exprs[parent_index].clone(); + let predicates = self.expr_predicates[index].clone(); for predicate in predicates { - self.add_predicate(&expr, predicate); + self.add_expr_predicate(&expr, predicate); } } } - for predicates in self.predicates.iter_mut() { - predicates.sort(); - } - for (scalar, idx) in self.expr_to_idx.iter() { - let parent_idx = Self::find(&mut parents, *idx); - let old_predicates = &old_predicates_set[*idx]; - let parent_predicates = &self.predicates[parent_idx]; - if old_predicates.len() != parent_predicates.len() { - is_updated = true; - } - for (i, predicate) in parent_predicates.iter().enumerate() { - if i < old_predicates.len() && &old_predicates[i] != predicate { - is_updated = true; - } + + // Construct predicates for each ScalarExpr. + for expr in self.exprs.iter() { + let index = self.expr_index.get(expr).unwrap(); + let parent_index = Self::find(&mut parents, *index); + let parent_predicates = &self.expr_predicates[parent_index]; + for predicate in parent_predicates.iter() { result.push(ScalarExpr::FunctionCall(FunctionCall { span: None, func_name: String::from(predicate.op.to_func_name()), params: vec![], arguments: vec![ - scalar.clone(), + expr.clone(), ScalarExpr::ConstantExpr(predicate.constant.clone()), ], })); } } - (is_updated | self.is_falsy, result) + + // Construct equal condition predicates for each equal ScalarExpr index set. + for index in 0..num_exprs { + let parent_index = Self::find(&mut parents, index); + if index == parent_index { + if let Some(equal_index_set) = equal_index_sets.get(&parent_index) { + let mut equal_indexes = equal_index_set.iter().copied().collect::>(); + equal_indexes.sort(); + let equal_indexes_len = equal_indexes.len(); + for i in 0..equal_indexes_len { + for j in i + 1..equal_indexes_len { + result.push(ScalarExpr::FunctionCall(FunctionCall { + span: None, + func_name: String::from(ComparisonOp::Equal.to_func_name()), + params: vec![], + arguments: vec![ + self.exprs[equal_indexes[i]].clone(), + self.exprs[equal_indexes[j]].clone(), + ], + })); + } + } + } + } + } + + result + } + + fn derive_remaining_predicates(&self, predicates: Vec) -> Vec { + // The ReplaceScalarExpr is used to replace the ScalarExpr of a predicate. + struct ReplaceScalarExpr<'a> { + // The index of ScalarExpr in `exprs`. + expr_index: &'a HashMap, + // The equal ScalarExprs of each ScalarExpr. + expr_equal_to: &'a Vec>, + // The columns used by the predicate. + column_set: HashSet, + // If the predicate can be replaced to generate a new predicate. + can_replace: bool, + } + + impl<'a> ReplaceScalarExpr<'a> { + fn reset(&mut self) { + self.column_set.clear(); + self.can_replace = true; + } + } + + impl<'a> VisitorMut<'_> for ReplaceScalarExpr<'a> { + fn visit(&mut self, expr: &mut ScalarExpr) -> Result<()> { + if let Some(index) = self.expr_index.get(expr) { + let equal_to = &self.expr_equal_to[*index]; + if !equal_to.is_empty() { + let used_columns = expr.used_columns(); + for column in used_columns { + self.column_set.insert(column); + } + *expr = equal_to[0].clone(); + return Ok(()); + } + } + match expr { + ScalarExpr::FunctionCall(expr) => self.visit_function_call(expr), + ScalarExpr::CastExpr(expr) => self.visit_cast_expr(expr), + ScalarExpr::ConstantExpr(_) => Ok(()), + ScalarExpr::BoundColumnRef(_) + | ScalarExpr::WindowFunction(_) + | ScalarExpr::AggregateFunction(_) + | ScalarExpr::LambdaFunction(_) + | ScalarExpr::SubqueryExpr(_) + | ScalarExpr::UDFServerCall(_) + | ScalarExpr::UDFLambdaCall(_) => { + // Can not replace `BoundColumnRef` or can not replace unsupported ScalarExpr. + self.can_replace = false; + Ok(()) + } + } + } + } + + let mut replace = ReplaceScalarExpr { + expr_index: &self.expr_index, + expr_equal_to: &self.expr_equal_to, + column_set: HashSet::new(), + can_replace: true, + }; + + let mut result_predicates = Vec::with_capacity(predicates.len()); + for predicate in predicates { + replace.reset(); + let mut new_predicate = predicate.clone(); + replace.visit(&mut new_predicate).unwrap(); + if !replace.can_replace { + result_predicates.push(predicate); + continue; + } + + let mut can_replace = false; + if let Some(join_prop) = &self.join_prop { + let mut has_left = false; + let mut has_right = false; + for column in replace.column_set.iter() { + if join_prop.left_columns.contains(column) { + has_left = true; + } else if join_prop.right_columns.contains(column) { + has_right = true; + } + } + // We only derive new predicates when the predicate contains columns only from one side of the join. + if has_left && !has_right || !has_left && has_right { + can_replace = true; + } + } else if replace.column_set.len() == 1 { + can_replace = true; + } + + if !can_replace { + result_predicates.push(predicate); + continue; + } + + if new_predicate != predicate { + result_predicates.push(new_predicate); + } + + result_predicates.push(predicate); + } + + result_predicates + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Clone, Debug)] +struct Predicate { + op: ComparisonOp, + constant: ConstantExpr, +} + +enum MergeResult { + All, + Left, + Right, + None, +} + +pub struct JoinProperty<'a> { + left_columns: &'a ColumnSet, + right_columns: &'a ColumnSet, +} + +impl<'a> JoinProperty<'a> { + pub fn new(left_columns: &'a ColumnSet, right_columns: &'a ColumnSet) -> Self { + Self { + left_columns, + right_columns, + } } } @@ -489,127 +698,3 @@ pub fn adjust_scalar(scalar: Scalar, data_type: DataType) -> (bool, ConstantExpr value: scalar, }) } - -impl Rule for RuleInferFilter { - fn id(&self) -> RuleID { - self.id - } - - fn apply(&self, s_expr: &SExpr, state: &mut TransformResult) -> Result<()> { - let filter: Filter = s_expr.plan().clone().try_into()?; - let mut predicates = filter.predicates; - let mut new_predicates = vec![]; - let mut is_rewritten = false; - let mut predicate_set = PredicateSet::new(); - for predicate in predicates.iter_mut() { - if let ScalarExpr::FunctionCall(func) = predicate { - if ComparisonOp::try_from_func_name(&func.func_name).is_some() { - let (left, right) = remove_trivial_type_cast( - func.arguments[0].clone(), - func.arguments[1].clone(), - ); - if left != func.arguments[0] { - is_rewritten = true; - func.arguments[0] = left; - } - if right != func.arguments[1] { - is_rewritten = true; - func.arguments[1] = right; - } - } - } - } - for predicate in predicates.into_iter() { - if let ScalarExpr::FunctionCall(func) = &predicate { - if let Some(op) = ComparisonOp::try_from_func_name(&func.func_name) { - match ( - func.arguments[0].is_column_ref(), - func.arguments[1].is_column_ref(), - ) { - (true, true) => { - if op == ComparisonOp::Equal { - predicate_set.add_equal(&func.arguments[0], &func.arguments[1]); - } - new_predicates.push(predicate); - } - (true, false) => { - if let ScalarExpr::ConstantExpr(constant) = &func.arguments[1] { - let (is_adjusted, constant) = adjust_scalar( - constant.value.clone(), - func.arguments[0].data_type()?, - ); - if is_adjusted { - predicate_set.add_predicate(&func.arguments[0], Predicate { - op, - constant, - }); - } else { - new_predicates.push(predicate); - } - } else { - new_predicates.push(predicate); - } - } - (false, true) => { - if let ScalarExpr::ConstantExpr(constant) = &func.arguments[0] { - let (is_adjusted, constant) = adjust_scalar( - constant.value.clone(), - func.arguments[1].data_type()?, - ); - if is_adjusted { - predicate_set.add_predicate(&func.arguments[1], Predicate { - op: op.reverse(), - constant, - }); - } else { - new_predicates.push(predicate); - } - } else { - new_predicates.push(predicate); - } - } - (false, false) => { - new_predicates.push(predicate); - } - } - } else { - new_predicates.push(predicate); - } - } else { - new_predicates.push(predicate); - } - } - is_rewritten |= predicate_set.is_merged; - if !predicate_set.is_falsy { - // `derive_predicates` may change is_falsy to true. - let (is_merged, infer_predicates) = predicate_set.derive_predicates(); - is_rewritten |= is_merged; - new_predicates.extend(infer_predicates); - } - if predicate_set.is_falsy { - new_predicates = vec![ - ConstantExpr { - span: None, - value: Scalar::Boolean(false), - } - .into(), - ]; - } - if is_rewritten { - state.add_result(SExpr::create_unary( - Arc::new( - Filter { - predicates: new_predicates, - } - .into(), - ), - Arc::new(s_expr.child(0)?.clone()), - )); - } - Ok(()) - } - - fn patterns(&self) -> &Vec { - &self.patterns - } -} diff --git a/src/query/sql/src/planner/optimizer/filter/mod.rs b/src/query/sql/src/planner/optimizer/filter/mod.rs new file mode 100644 index 000000000000..012cd32ead4f --- /dev/null +++ b/src/query/sql/src/planner/optimizer/filter/mod.rs @@ -0,0 +1,24 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod deduplicate_join_condition; +mod infer_filter; +mod normalize_disjunctive_filter; +mod pull_up_filter; + +pub use deduplicate_join_condition::DeduplicateJoinConditionOptimizer; +pub use infer_filter::InferFilterOptimizer; +pub use infer_filter::JoinProperty; +pub use normalize_disjunctive_filter::NormalizeDisjunctiveFilterOptimizer; +pub use pull_up_filter::PullUpFilterOptimizer; diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_normalize_disjunctive_filter.rs b/src/query/sql/src/planner/optimizer/filter/normalize_disjunctive_filter.rs similarity index 66% rename from src/query/sql/src/planner/optimizer/rule/rewrite/rule_normalize_disjunctive_filter.rs rename to src/query/sql/src/planner/optimizer/filter/normalize_disjunctive_filter.rs index cddcf2c0a43c..062740d7d20d 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_normalize_disjunctive_filter.rs +++ b/src/query/sql/src/planner/optimizer/filter/normalize_disjunctive_filter.rs @@ -12,24 +12,43 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; - use databend_common_exception::Result; use databend_common_expression::Scalar; use itertools::Itertools; use crate::binder::split_conjunctions; -use crate::optimizer::rule::Rule; -use crate::optimizer::rule::TransformResult; -use crate::optimizer::RuleID; -use crate::optimizer::SExpr; use crate::plans::ConstantExpr; -use crate::plans::Filter; use crate::plans::FunctionCall; -use crate::plans::PatternPlan; -use crate::plans::RelOp; use crate::plans::ScalarExpr; +// The NormalizeDisjunctiveFilterOptimizer tries to apply the inverse OR distributive law to the predicate. +// (A AND B) OR (A AND C) => A AND (B OR C) +// It'll find all OR expressions and extract the common terms. +#[derive(Default)] +pub struct NormalizeDisjunctiveFilterOptimizer {} + +impl NormalizeDisjunctiveFilterOptimizer { + pub fn new() -> Self { + NormalizeDisjunctiveFilterOptimizer::default() + } +} + +impl NormalizeDisjunctiveFilterOptimizer { + pub fn run(self, predicates: Vec) -> Result> { + let mut rewritten_predicates = Vec::with_capacity(predicates.len()); + for predicate in predicates.iter() { + let predicate_scalar = predicate_scalar(predicate); + let rewritten_predicate_scalar = rewrite_predicate_ors(predicate_scalar); + rewritten_predicates.push(normalize_predicate_scalar(rewritten_predicate_scalar)); + } + let mut split_predicates: Vec = Vec::with_capacity(rewritten_predicates.len()); + for predicate in rewritten_predicates.iter() { + split_predicates.extend_from_slice(&split_conjunctions(predicate)); + } + Ok(split_predicates) + } +} + #[derive(Clone, PartialEq, Eq, Hash, Debug)] enum PredicateScalar { And(Vec), @@ -37,51 +56,43 @@ enum PredicateScalar { Other(Box), } -fn predicate_scalar(scalar: &ScalarExpr) -> (bool, PredicateScalar) { +fn predicate_scalar(scalar: &ScalarExpr) -> PredicateScalar { match scalar { ScalarExpr::FunctionCall(func) if func.func_name == "and" => { let mut and_args = vec![]; - let mut is_rewritten = false; for argument in func.arguments.iter() { // Recursively flatten the AND expressions. - let (rewritten, predicate) = predicate_scalar(argument); - is_rewritten |= rewritten; + let predicate = predicate_scalar(argument); if let PredicateScalar::And(args) = predicate { and_args.extend(args); } else { and_args.push(predicate); } } - let original_len = and_args.len(); and_args = and_args .into_iter() .unique() .collect::>(); - is_rewritten |= original_len != and_args.len(); - (is_rewritten, PredicateScalar::And(and_args)) + PredicateScalar::And(and_args) } ScalarExpr::FunctionCall(func) if func.func_name == "or" => { let mut or_args = vec![]; - let mut is_rewritten = false; for argument in func.arguments.iter() { // Recursively flatten the OR expressions. - let (rewritten, predicate) = predicate_scalar(argument); - is_rewritten |= rewritten; + let predicate = predicate_scalar(argument); if let PredicateScalar::Or(args) = predicate { or_args.extend(args); } else { or_args.push(predicate); } } - let original_len = or_args.len(); or_args = or_args .into_iter() .unique() .collect::>(); - is_rewritten |= original_len != or_args.len(); - (is_rewritten, PredicateScalar::Or(or_args)) + PredicateScalar::Or(or_args) } - _ => (false, PredicateScalar::Other(Box::from(scalar.clone()))), + _ => PredicateScalar::Other(Box::from(scalar.clone())), } } @@ -119,112 +130,36 @@ fn normalize_predicate_scalar(predicate_scalar: PredicateScalar) -> ScalarExpr { } } -// The rule tries to apply the inverse OR distributive law to the predicate. -// (A AND B) OR (A AND C) => A AND (B OR C) -// It'll find all OR expressions and extract the common terms. -pub struct RuleNormalizeDisjunctiveFilter { - id: RuleID, - patterns: Vec, -} - -impl RuleNormalizeDisjunctiveFilter { - pub fn new() -> Self { - Self { - id: RuleID::NormalizeDisjunctiveFilter, - // Filter - // \ - // * - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Filter, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - )], - } - } -} - -impl Rule for RuleNormalizeDisjunctiveFilter { - fn id(&self) -> RuleID { - self.id - } - - fn apply(&self, s_expr: &SExpr, state: &mut TransformResult) -> Result<()> { - let filter: Filter = s_expr.plan().clone().try_into()?; - let predicates = filter.predicates; - let mut rewritten_predicates = Vec::with_capacity(predicates.len()); - let mut is_rewritten = false; - for predicate in predicates.iter() { - let (rewritten, predicate_scalar) = predicate_scalar(predicate); - is_rewritten |= rewritten; - let (rewritten_predicate_scalar, rewritten) = rewrite_predicate_ors(predicate_scalar); - is_rewritten |= rewritten; - rewritten_predicates.push(normalize_predicate_scalar(rewritten_predicate_scalar)); - } - let mut split_predicates: Vec = Vec::with_capacity(rewritten_predicates.len()); - for predicate in rewritten_predicates.iter() { - split_predicates.extend_from_slice(&split_conjunctions(predicate)); - } - if is_rewritten { - state.add_result(SExpr::create_unary( - Arc::new( - Filter { - predicates: split_predicates, - } - .into(), - ), - Arc::new(s_expr.child(0)?.clone()), - )); - } - Ok(()) - } - - fn patterns(&self) -> &Vec { - &self.patterns - } -} - -fn rewrite_predicate_ors(predicate: PredicateScalar) -> (PredicateScalar, bool) { +fn rewrite_predicate_ors(predicate: PredicateScalar) -> PredicateScalar { match predicate { PredicateScalar::Or(args) => { let mut or_args = Vec::with_capacity(args.len()); for arg in args.iter() { - or_args.push(rewrite_predicate_ors(arg.clone()).0); + or_args.push(rewrite_predicate_ors(arg.clone())); } process_duplicate_or_exprs(or_args) } PredicateScalar::And(args) => { let mut and_args = Vec::with_capacity(args.len()); for arg in args.iter() { - and_args.push(rewrite_predicate_ors(arg.clone()).0); + and_args.push(rewrite_predicate_ors(arg.clone())); } - (PredicateScalar::And(and_args), false) + PredicateScalar::And(and_args) } - PredicateScalar::Other(_) => (predicate, false), + PredicateScalar::Other(_) => predicate, } } // Apply the inverse OR distributive law. -fn process_duplicate_or_exprs(mut or_args: Vec) -> (PredicateScalar, bool) { +fn process_duplicate_or_exprs(mut or_args: Vec) -> PredicateScalar { if or_args.is_empty() { - return ( - PredicateScalar::Other(Box::from(ScalarExpr::ConstantExpr(ConstantExpr { - span: None, - value: Scalar::Boolean(false), - }))), - false, - ); + return PredicateScalar::Other(Box::from(ScalarExpr::ConstantExpr(ConstantExpr { + span: None, + value: Scalar::Boolean(false), + }))); } if or_args.len() == 1 { - return (or_args[0].clone(), false); + return or_args[0].clone(); } // choose the shortest AND expression let mut shortest_exprs: Vec = vec![]; @@ -259,7 +194,7 @@ fn process_duplicate_or_exprs(mut or_args: Vec) -> (PredicateSc } if exist_exprs.is_empty() { - return (PredicateScalar::Or(or_args), false); + return PredicateScalar::Or(or_args); } // Rebuild the OR predicate. @@ -297,8 +232,8 @@ fn process_duplicate_or_exprs(mut or_args: Vec) -> (PredicateSc } if exist_exprs.len() == 1 { - (exist_exprs[0].clone(), true) + exist_exprs[0].clone() } else { - (PredicateScalar::And(exist_exprs), true) + PredicateScalar::And(exist_exprs) } } diff --git a/src/query/sql/src/planner/optimizer/filter/pull_up_filter.rs b/src/query/sql/src/planner/optimizer/filter/pull_up_filter.rs new file mode 100644 index 000000000000..9b07d034a3f0 --- /dev/null +++ b/src/query/sql/src/planner/optimizer/filter/pull_up_filter.rs @@ -0,0 +1,248 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use databend_common_exception::Result; + +use crate::binder::split_conjunctions; +use crate::optimizer::filter::InferFilterOptimizer; +use crate::optimizer::filter::NormalizeDisjunctiveFilterOptimizer; +use crate::optimizer::SExpr; +use crate::plans::EvalScalar; +use crate::plans::Filter; +use crate::plans::FunctionCall; +use crate::plans::Join; +use crate::plans::JoinType; +use crate::plans::RelOperator; +use crate::plans::ScalarItem; +use crate::plans::WindowFuncType; +use crate::MetadataRef; +use crate::ScalarExpr; + +// The PullUpFilterOptimizer will pull up filters to the top of the plan tree and infer new filters. +pub struct PullUpFilterOptimizer { + pub predicates: Vec, + pub metadata: MetadataRef, +} + +impl PullUpFilterOptimizer { + pub fn new(metadata: MetadataRef) -> Self { + PullUpFilterOptimizer { + predicates: vec![], + metadata, + } + } + + pub fn run(mut self, s_expr: &SExpr) -> Result { + let mut s_expr = self.pull_up(s_expr)?; + s_expr = self.finish(s_expr)?; + Ok(s_expr) + } + + pub fn finish(self, s_expr: SExpr) -> Result { + if self.predicates.is_empty() { + Ok(s_expr) + } else { + let predicates = InferFilterOptimizer::new(None).run(self.predicates)?; + let predicates = NormalizeDisjunctiveFilterOptimizer::new().run(predicates)?; + let filter = Filter { predicates }; + Ok(SExpr::create_unary( + Arc::new(filter.into()), + Arc::new(s_expr), + )) + } + } + + pub fn pull_up(&mut self, s_expr: &SExpr) -> Result { + match s_expr.plan.as_ref() { + RelOperator::Filter(filter) => self.pull_up_filter(s_expr, filter), + RelOperator::Join(join) if !join.is_lateral => self.pull_up_join(s_expr, join), + RelOperator::EvalScalar(eval_scalar) => self.pull_up_eval_scalar(s_expr, eval_scalar), + RelOperator::MaterializedCte(_) => Ok(s_expr.clone()), + _ => self.pull_up_others(s_expr), + } + } + + fn pull_up_filter(&mut self, s_expr: &SExpr, filter: &Filter) -> Result { + let child = self.pull_up(s_expr.child(0)?)?; + for predicate in filter.predicates.iter() { + self.predicates.extend(split_conjunctions(predicate)); + } + Ok(child) + } + + fn pull_up_join(&mut self, s_expr: &SExpr, join: &Join) -> Result { + let (left_need_pull_up, right_need_pull_up) = match join.join_type { + JoinType::Inner | JoinType::Cross => (true, true), + JoinType::Left | JoinType::LeftSingle | JoinType::LeftSemi | JoinType::LeftAnti => { + (true, false) + } + JoinType::Right | JoinType::RightSingle | JoinType::RightSemi | JoinType::RightAnti => { + (false, true) + } + _ => (false, false), + }; + let mut left_pull_up = PullUpFilterOptimizer::new(self.metadata.clone()); + let mut right_pull_up = PullUpFilterOptimizer::new(self.metadata.clone()); + let mut left = left_pull_up.pull_up(s_expr.child(0)?)?; + let mut right = right_pull_up.pull_up(s_expr.child(1)?)?; + if left_need_pull_up { + for predicate in left_pull_up.predicates { + self.predicates.extend(split_conjunctions(&predicate)); + } + } else { + left = left_pull_up.finish(left)?; + } + if right_need_pull_up { + for predicate in right_pull_up.predicates { + self.predicates.extend(split_conjunctions(&predicate)); + } + } else { + right = right_pull_up.finish(right)?; + } + let mut join = join.clone(); + if left_need_pull_up && right_need_pull_up { + for (left_condition, right_condition) in join + .left_conditions + .iter() + .zip(join.right_conditions.iter()) + { + let predicate = ScalarExpr::FunctionCall(FunctionCall { + span: None, + func_name: "eq".to_string(), + params: vec![], + arguments: vec![left_condition.clone(), right_condition.clone()], + }); + self.predicates.push(predicate); + } + for predicate in join.non_equi_conditions.iter() { + self.predicates.extend(split_conjunctions(predicate)); + } + join.left_conditions.clear(); + join.right_conditions.clear(); + join.non_equi_conditions.clear(); + join.join_type = JoinType::Cross; + } + let s_expr = s_expr.replace_plan(Arc::new(RelOperator::Join(join))); + Ok(s_expr.replace_children(vec![Arc::new(left), Arc::new(right)])) + } + + fn pull_up_eval_scalar(&mut self, s_expr: &SExpr, eval_scalar: &EvalScalar) -> Result { + let child = self.pull_up(s_expr.child(0)?)?; + let mut eval_scalar = eval_scalar.clone(); + for predicate in self.predicates.iter_mut() { + Self::replace_predicate(predicate, &mut eval_scalar.items, &self.metadata)?; + } + let s_expr = s_expr.replace_plan(Arc::new(RelOperator::EvalScalar(eval_scalar))); + Ok(s_expr.replace_children(vec![Arc::new(child)])) + } + + pub fn pull_up_others(&mut self, s_expr: &SExpr) -> Result { + let mut children = Vec::with_capacity(s_expr.children().len()); + for child in s_expr.children() { + let child = PullUpFilterOptimizer::new(self.metadata.clone()).run(child)?; + children.push(Arc::new(child)); + } + Ok(s_expr.replace_children(children)) + } + + fn replace_predicate( + predicate: &mut ScalarExpr, + items: &mut Vec, + metadata: &MetadataRef, + ) -> Result<()> { + match predicate { + ScalarExpr::BoundColumnRef(column) => { + for item in items.iter() { + if item.index == column.column.index { + return Ok(()); + } + if let ScalarExpr::BoundColumnRef(item_column) = &item.scalar { + if item_column.column.index == column.column.index { + column.column.index = item.index; + return Ok(()); + } + } + } + + let new_index = metadata.write().add_derived_column( + column.column.column_name.clone(), + *column.column.data_type.clone(), + ); + let new_column = column.clone(); + items.push(ScalarItem { + scalar: ScalarExpr::BoundColumnRef(new_column), + index: new_index, + }); + column.column.index = new_index; + } + ScalarExpr::WindowFunction(window) => { + match &mut window.func { + WindowFuncType::Aggregate(agg) => { + for arg in agg.args.iter_mut() { + Self::replace_predicate(arg, items, metadata)?; + } + } + WindowFuncType::LagLead(ll) => { + Self::replace_predicate(&mut ll.arg, items, metadata)?; + if let Some(default) = ll.default.as_mut() { + Self::replace_predicate(default, items, metadata)?; + } + } + WindowFuncType::NthValue(func) => { + Self::replace_predicate(&mut func.arg, items, metadata)?; + } + _ => (), + }; + + for window_partition_by in window.partition_by.iter_mut() { + Self::replace_predicate(window_partition_by, items, metadata)?; + } + + for window_order_by in window.order_by.iter_mut() { + Self::replace_predicate(&mut window_order_by.expr, items, metadata)?; + } + } + ScalarExpr::AggregateFunction(agg_func) => { + for arg in agg_func.args.iter_mut() { + Self::replace_predicate(arg, items, metadata)?; + } + } + ScalarExpr::FunctionCall(func) => { + for arg in func.arguments.iter_mut() { + Self::replace_predicate(arg, items, metadata)?; + } + } + ScalarExpr::LambdaFunction(lambda_func) => { + for arg in lambda_func.args.iter_mut() { + Self::replace_predicate(arg, items, metadata)?; + } + } + ScalarExpr::CastExpr(cast) => { + Self::replace_predicate(&mut cast.argument, items, metadata)?; + } + ScalarExpr::UDFServerCall(udf) => { + for arg in udf.arguments.iter_mut() { + Self::replace_predicate(arg, items, metadata)?; + } + } + ScalarExpr::UDFLambdaCall(udf) => { + Self::replace_predicate(&mut udf.scalar, items, metadata)?; + } + _ => (), + } + Ok(()) + } +} diff --git a/src/query/sql/src/planner/optimizer/format.rs b/src/query/sql/src/planner/optimizer/format.rs index 662a8d61b50b..5a3c0bb71766 100644 --- a/src/query/sql/src/planner/optimizer/format.rs +++ b/src/query/sql/src/planner/optimizer/format.rs @@ -60,7 +60,6 @@ pub fn display_rel_op(rel_op: &RelOperator) -> String { Exchange::MergeSort => "MergeSort".to_string(), }) } - RelOperator::Pattern(_) => "Pattern".to_string(), RelOperator::DummyTableScan(_) => "DummyTableScan".to_string(), RelOperator::ProjectSet(_) => "ProjectSet".to_string(), RelOperator::Window(_) => "WindowFunc".to_string(), diff --git a/src/query/sql/src/planner/optimizer/hyper_dp/dphyp.rs b/src/query/sql/src/planner/optimizer/hyper_dp/dphyp.rs index dd84bc13cb42..599f6e335ed8 100644 --- a/src/query/sql/src/planner/optimizer/hyper_dp/dphyp.rs +++ b/src/query/sql/src/planner/optimizer/hyper_dp/dphyp.rs @@ -178,6 +178,12 @@ impl DPhpy { } // Add join conditions for condition_pair in op.left_conditions.iter().zip(op.right_conditions.iter()) { + let left_used_tables = condition_pair.0.used_tables()?; + let right_used_tables = condition_pair.1.used_tables()?; + if left_used_tables.is_empty() || right_used_tables.is_empty() { + is_inner_join = false; + break; + } join_conditions.push((condition_pair.0.clone(), condition_pair.1.clone())); } if !op.non_equi_conditions.is_empty() { @@ -249,7 +255,7 @@ impl DPhpy { self.join_relations.push(JoinRelation::new(&new_s_expr)); Ok((new_s_expr, true)) } - RelOperator::Exchange(_) | RelOperator::AddRowNumber(_) | RelOperator::Pattern(_) => { + RelOperator::Exchange(_) | RelOperator::AddRowNumber(_) => { unreachable!() } RelOperator::DummyTableScan(_) @@ -709,6 +715,9 @@ impl DPhpy { RelOperator::Join(_) => { new_s_expr.plan = join_expr.plan.clone(); new_s_expr.children = join_expr.children.clone(); + if self.filters.is_empty() { + return Ok(new_s_expr); + } // Add filters to `new_s_expr`, then push down filters if possible let mut predicates = vec![]; for filter in self.filters.iter() { @@ -760,9 +769,9 @@ impl DPhpy { let rule = RuleFactory::create_rule(RuleID::PushDownFilterJoin, self.metadata.clone())?; let mut state = TransformResult::new(); if rule - .patterns() + .matchers() .iter() - .any(|pattern| s_expr.match_pattern(pattern)) + .any(|matcher| matcher.matches(&s_expr)) && !s_expr.applied_rule(&rule.id()) { s_expr.set_applied_rule(&rule.id()); diff --git a/src/query/sql/src/planner/optimizer/hyper_dp/join_node.rs b/src/query/sql/src/planner/optimizer/hyper_dp/join_node.rs index 191910e23275..b42cea78ff6b 100644 --- a/src/query/sql/src/planner/optimizer/hyper_dp/join_node.rs +++ b/src/query/sql/src/planner/optimizer/hyper_dp/join_node.rs @@ -87,7 +87,8 @@ impl JoinNode { marker_index: None, from_correlated_subquery: false, need_hold_hash_table: false, - broadcast: false, + is_lateral: false, + single_to_inner: None, }); let children = self .children diff --git a/src/query/profile/src/lib.rs b/src/query/sql/src/planner/optimizer/join/mod.rs similarity index 85% rename from src/query/profile/src/lib.rs rename to src/query/sql/src/planner/optimizer/join/mod.rs index 5c3fb77006e6..d7ea095997ba 100644 --- a/src/query/profile/src/lib.rs +++ b/src/query/sql/src/planner/optimizer/join/mod.rs @@ -12,10 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -mod mgr; -mod proc; -mod prof; +mod single_to_inner; -pub use mgr::QueryProfileManager; -pub use proc::*; -pub use prof::*; +pub use single_to_inner::SingleToInnerOptimizer; diff --git a/src/query/sql/src/planner/optimizer/join/single_to_inner.rs b/src/query/sql/src/planner/optimizer/join/single_to_inner.rs new file mode 100644 index 000000000000..85545c119952 --- /dev/null +++ b/src/query/sql/src/planner/optimizer/join/single_to_inner.rs @@ -0,0 +1,52 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use databend_common_exception::Result; + +use crate::optimizer::SExpr; +use crate::plans::JoinType; +use crate::plans::RelOperator; + +// The SingleToInnerOptimizer will convert some single join to inner join. +pub struct SingleToInnerOptimizer {} + +impl SingleToInnerOptimizer { + pub fn new() -> Self { + SingleToInnerOptimizer {} + } + + pub fn run(self, s_expr: &SExpr) -> Result { + Self::single_to_inner(s_expr) + } + + fn single_to_inner(s_expr: &SExpr) -> Result { + let s_expr = if let RelOperator::Join(join) = s_expr.plan.as_ref() { + let mut join = join.clone(); + if join.single_to_inner.is_some() { + join.join_type = JoinType::Inner; + } + s_expr.replace_plan(Arc::new(RelOperator::Join(join))) + } else { + s_expr.clone() + }; + let mut children = Vec::with_capacity(s_expr.arity()); + for child in s_expr.children() { + let child = Self::single_to_inner(child)?; + children.push(Arc::new(child)); + } + Ok(s_expr.replace_children(children)) + } +} diff --git a/src/query/sql/src/planner/optimizer/m_expr.rs b/src/query/sql/src/planner/optimizer/m_expr.rs index 498ab8691710..630bfd40fa64 100644 --- a/src/query/sql/src/planner/optimizer/m_expr.rs +++ b/src/query/sql/src/planner/optimizer/m_expr.rs @@ -18,13 +18,11 @@ use databend_common_exception::ErrorCode; use databend_common_exception::Result; use super::group::Group; +use crate::optimizer::extract::PatternExtractor; use crate::optimizer::memo::Memo; -use crate::optimizer::pattern_extractor::PatternExtractor; use crate::optimizer::rule::AppliedRules; use crate::optimizer::rule::RulePtr; use crate::optimizer::rule::TransformResult; -use crate::optimizer::SExpr; -use crate::plans::Operator; use crate::plans::RelOperator; use crate::IndexType; @@ -76,19 +74,6 @@ impl MExpr { memo.group(*group_index) } - /// Doesn't check if children are matched - pub fn match_pattern(&self, _memo: &Memo, pattern: &SExpr) -> bool { - if pattern.is_pattern() { - return true; - } - - if self.arity() != pattern.arity() { - return false; - } - - self.plan.rel_op() == pattern.plan().rel_op() - } - pub fn apply_rule( &self, memo: &Memo, @@ -99,8 +84,8 @@ impl MExpr { return Ok(()); } - let mut extractor = PatternExtractor::create(); - for pattern in rule.patterns() { + let mut extractor = PatternExtractor::new(); + for pattern in rule.matchers() { let exprs = extractor.extract(memo, self, pattern)?; for expr in exprs.iter() { rule.apply(expr, transform_state)?; diff --git a/src/query/sql/src/planner/optimizer/mod.rs b/src/query/sql/src/planner/optimizer/mod.rs index e2fad7131b44..5885fb62f0ad 100644 --- a/src/query/sql/src/planner/optimizer/mod.rs +++ b/src/query/sql/src/planner/optimizer/mod.rs @@ -16,14 +16,16 @@ mod cascades; mod cost; mod decorrelate; mod distributed; +mod extract; +mod filter; mod format; mod group; mod hyper_dp; +mod join; mod m_expr; mod memo; #[allow(clippy::module_inception)] mod optimizer; -mod pattern_extractor; mod property; mod rule; pub mod s_expr; @@ -32,6 +34,7 @@ mod util; pub use cascades::CascadesOptimizer; pub use decorrelate::FlattenInfo; pub use decorrelate::SubqueryRewriter; +pub use extract::PatternExtractor; pub use hyper_dp::DPhpy; pub use m_expr::MExpr; pub use memo::Memo; @@ -39,7 +42,6 @@ pub use optimizer::optimize; pub use optimizer::optimize_query; pub use optimizer::OptimizerContext; pub use optimizer::RecursiveOptimizer; -pub use pattern_extractor::PatternExtractor; pub use property::*; pub use rule::agg_index; pub use rule::try_push_down_filter_join; diff --git a/src/query/sql/src/planner/optimizer/optimizer.rs b/src/query/sql/src/planner/optimizer/optimizer.rs index f5f02c7c3dec..f4e89dbf3884 100644 --- a/src/query/sql/src/planner/optimizer/optimizer.rs +++ b/src/query/sql/src/planner/optimizer/optimizer.rs @@ -32,7 +32,10 @@ use crate::optimizer::cascades::CascadesOptimizer; use crate::optimizer::decorrelate::decorrelate_subquery; use crate::optimizer::distributed::optimize_distributed_query; use crate::optimizer::distributed::SortAndLimitPushDownOptimizer; +use crate::optimizer::filter::DeduplicateJoinConditionOptimizer; +use crate::optimizer::filter::PullUpFilterOptimizer; use crate::optimizer::hyper_dp::DPhpy; +use crate::optimizer::join::SingleToInnerOptimizer; use crate::optimizer::rule::TransformResult; use crate::optimizer::util::contains_local_table_scan; use crate::optimizer::RuleFactory; @@ -119,14 +122,13 @@ impl<'a> RecursiveOptimizer<'a> { fn apply_transform_rules(&self, s_expr: &SExpr, rules: &[RuleID]) -> Result { let mut s_expr = s_expr.clone(); - for rule_id in rules { let rule = RuleFactory::create_rule(*rule_id, self.ctx.metadata.clone())?; let mut state = TransformResult::new(); if rule - .patterns() + .matchers() .iter() - .any(|pattern| s_expr.match_pattern(pattern)) + .any(|matcher| matcher.matches(&s_expr)) && !s_expr.applied_rule(&rule.id()) { s_expr.set_applied_rule(&rule.id()); @@ -222,6 +224,9 @@ pub fn optimize_query(opt_ctx: OptimizerContext, mut s_expr: SExpr) -> Result Result Result { - s_expr = - RecursiveOptimizer::new(&[RuleID::EliminateEvalScalar], &opt_ctx).run(&s_expr)?; + let rules = if opt_ctx.enable_join_reorder { + [RuleID::EliminateEvalScalar, RuleID::CommuteJoin].as_slice() + } else { + [RuleID::EliminateEvalScalar].as_slice() + }; + + s_expr = RecursiveOptimizer::new(rules, &opt_ctx).run(&s_expr)?; // Push down sort and limit // TODO(leiysky): do this optimization in cascades optimizer @@ -340,6 +356,8 @@ fn optimize_merge_into(opt_ctx: OptimizerContext, plan: Box) -> Resul Arc::new(right_source), ])); + let join_op = Join::try_from(join_sexpr.plan().clone())?; + let non_equal_join = join_op.right_conditions.is_empty() && join_op.left_conditions.is_empty(); // before, we think source table is always the small table. // 1. for matched only, we use inner join // 2. for insert only, we use right anti join @@ -400,7 +418,7 @@ fn optimize_merge_into(opt_ctx: OptimizerContext, plan: Box) -> Resul { // distributed execution stargeties: // I. change join order is true, we use the `optimize_distributed_query`'s result. - // II. change join order is false and match_pattern and not enable spill, we use right outer join with rownumber distributed strategies. + // II. change join order is false and match_pattern and not enable spill and not non-equal-join, we use right outer join with rownumber distributed strategies. // III otherwise, use `merge_into_join_sexpr` as standalone execution(so if change join order is false,but doesn't match_pattern, we don't support distributed,in fact. case I // can take this at most time, if that's a hash shuffle, the I can take it. We think source is always very small). // input is a Join_SExpr @@ -415,7 +433,10 @@ fn optimize_merge_into(opt_ctx: OptimizerContext, plan: Box) -> Resul .get_join_spilling_threshold()? == 0 && !change_join_order - && merge_into_join_sexpr.match_pattern(&merge_source_optimizer.merge_source_pattern) + && merge_source_optimizer + .merge_source_matcher + .matches(&merge_into_join_sexpr) + && !non_equal_join { ( merge_source_optimizer.optimize(&merge_into_join_sexpr)?, @@ -472,8 +493,7 @@ fn try_to_change_as_broadcast_join( if let RelOperator::Exchange(Exchange::Merge) = merge_into_join_sexpr.plan.as_ref() { let right_exchange = merge_into_join_sexpr.child(0)?.child(1)?; if let RelOperator::Exchange(Exchange::Broadcast) = right_exchange.plan.as_ref() { - let mut join: Join = merge_into_join_sexpr.child(0)?.plan().clone().try_into()?; - join.broadcast = true; + let join: Join = merge_into_join_sexpr.child(0)?.plan().clone().try_into()?; let join_s_expr = merge_into_join_sexpr .child(0)? .replace_plan(Arc::new(RelOperator::Join(join))); diff --git a/src/query/sql/src/planner/optimizer/pattern_extractor.rs b/src/query/sql/src/planner/optimizer/pattern_extractor.rs deleted file mode 100644 index ea3d4b8b418e..000000000000 --- a/src/query/sql/src/planner/optimizer/pattern_extractor.rs +++ /dev/null @@ -1,160 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; - -use databend_common_exception::ErrorCode; -use databend_common_exception::Result; - -use crate::optimizer::group::Group; -use crate::optimizer::m_expr::MExpr; -use crate::optimizer::memo::Memo; -use crate::optimizer::SExpr; - -/// A helper to extract `SExpr`s from `Memo` that match the given pattern. -pub struct PatternExtractor {} - -impl PatternExtractor { - pub fn create() -> Self { - PatternExtractor {} - } - - pub fn extract(&mut self, memo: &Memo, m_expr: &MExpr, pattern: &SExpr) -> Result> { - if !m_expr.match_pattern(memo, pattern) { - return Ok(vec![]); - } - - if pattern.is_pattern() { - // Expand the pattern node to a complete `SExpr`. - let child = Self::expand_pattern(memo, m_expr)?; - return Ok(vec![child]); - } - - let pattern_children = pattern.children(); - - if m_expr.arity() != pattern_children.len() { - return Ok(vec![]); - } - - let mut children_results = vec![]; - for (i, child) in m_expr.children.iter().enumerate().take(m_expr.arity()) { - let pattern = &pattern_children[i]; - let child_group = memo.group(*child)?; - let result = self.extract_group(memo, child_group, pattern)?; - children_results.push(result); - } - - Self::generate_expression_with_children(memo, m_expr, children_results) - } - - fn extract_group(&mut self, memo: &Memo, group: &Group, pattern: &SExpr) -> Result> { - let mut results = vec![]; - for m_expr in group.m_exprs.iter() { - let result = self.extract(memo, m_expr, pattern)?; - results.extend(result.into_iter()); - } - - Ok(results) - } - - fn generate_expression_with_children( - memo: &Memo, - m_expr: &MExpr, - candidates: Vec>, - ) -> Result> { - let mut results = vec![]; - - // Initialize cursors - let mut cursors: Vec = vec![]; - for candidate in candidates.iter() { - if candidate.is_empty() { - // Every child should have at least one candidate - return Ok(results); - } - cursors.push(0); - } - - if cursors.is_empty() { - results.push(SExpr::create( - m_expr.plan.clone(), - vec![], - Some(m_expr.group_index), - Some(memo.group(m_expr.group_index)?.relational_prop.clone()), - Some(memo.group(m_expr.group_index)?.stat_info.clone()), - )); - return Ok(results); - } - - 'LOOP: loop { - let mut children = vec![]; - for (index, cursor) in cursors.iter().enumerate() { - children.push(Arc::new(candidates[index][*cursor].clone())); - } - results.push(SExpr::create( - m_expr.plan.clone(), - children, - Some(m_expr.group_index), - Some(memo.group(m_expr.group_index)?.relational_prop.clone()), - Some(memo.group(m_expr.group_index)?.stat_info.clone()), - )); - - let mut shifted = false; - // Shift cursor - for i in (0..cursors.len()).rev() { - if !shifted { - // Shift cursor - cursors[i] += 1; - shifted = true; - } - - if i == 0 && cursors[0] > candidates[0].len() - 1 { - // Candidates are exhausted - break 'LOOP; - } else if i > 0 && cursors[i] > candidates[i].len() - 1 { - // Shift previous children - cursors[i] = 0; - cursors[i - 1] += 1; - continue; - } else { - break; - } - } - } - - Ok(results) - } - - /// Expand a `Pattern` node to an arbitrary `SExpr` with `m_expr` as the root. - /// Since we don't care about the actual content of the `Pattern` node, we will - /// choose the first `MExpr` in each group to construct the `SExpr`. - fn expand_pattern(memo: &Memo, m_expr: &MExpr) -> Result { - let mut children = Vec::with_capacity(m_expr.arity()); - for child in m_expr.children.iter() { - let child_group = memo.group(*child)?; - let child_m_expr = child_group - .m_exprs - .first() - .ok_or_else(|| ErrorCode::Internal(format!("No MExpr in group {child}")))?; - children.push(Arc::new(Self::expand_pattern(memo, child_m_expr)?)); - } - - Ok(SExpr::create( - m_expr.plan.clone(), - children, - Some(m_expr.group_index), - Some(memo.group(m_expr.group_index)?.relational_prop.clone()), - Some(memo.group(m_expr.group_index)?.stat_info.clone()), - )) - } -} diff --git a/src/query/sql/src/planner/optimizer/rule/factory.rs b/src/query/sql/src/planner/optimizer/rule/factory.rs index e57b176e31c8..55426f0476fc 100644 --- a/src/query/sql/src/planner/optimizer/rule/factory.rs +++ b/src/query/sql/src/planner/optimizer/rule/factory.rs @@ -17,15 +17,13 @@ use databend_common_exception::Result; use super::rewrite::RuleCommuteJoin; use super::rewrite::RuleEliminateEvalScalar; use super::rewrite::RuleFoldCountAggregate; -use super::rewrite::RuleInferFilter; -use super::rewrite::RuleNormalizeDisjunctiveFilter; use super::rewrite::RuleNormalizeScalarFilter; use super::rewrite::RulePushDownFilterAggregate; use super::rewrite::RulePushDownFilterEvalScalar; use super::rewrite::RulePushDownFilterJoin; use super::rewrite::RulePushDownFilterWindow; use super::rewrite::RulePushDownLimitAggregate; -use super::rewrite::RulePushDownLimitExpression; +use super::rewrite::RulePushDownLimitEvalScalar; use super::rewrite::RulePushDownPrewhere; use super::rewrite::RuleTryApplyAggIndex; use crate::optimizer::rule::rewrite::RuleEliminateFilter; @@ -41,6 +39,7 @@ use crate::optimizer::rule::rewrite::RulePushDownLimitOuterJoin; use crate::optimizer::rule::rewrite::RulePushDownLimitScan; use crate::optimizer::rule::rewrite::RulePushDownLimitSort; use crate::optimizer::rule::rewrite::RulePushDownLimitUnion; +use crate::optimizer::rule::rewrite::RulePushDownLimitWindow; use crate::optimizer::rule::rewrite::RulePushDownSortScan; use crate::optimizer::rule::rewrite::RuleSemiToInnerJoin; use crate::optimizer::rule::rewrite::RuleSplitAggregate; @@ -58,10 +57,8 @@ impl RuleFactory { match id { RuleID::EliminateEvalScalar => Ok(Box::new(RuleEliminateEvalScalar::new())), RuleID::PushDownFilterUnion => Ok(Box::new(RulePushDownFilterUnion::new())), - RuleID::PushDownFilterEvalScalar => { - Ok(Box::new(RulePushDownFilterEvalScalar::new(metadata))) - } - RuleID::PushDownFilterJoin => Ok(Box::new(RulePushDownFilterJoin::new(metadata))), + RuleID::PushDownFilterEvalScalar => Ok(Box::new(RulePushDownFilterEvalScalar::new())), + RuleID::PushDownFilterJoin => Ok(Box::new(RulePushDownFilterJoin::new())), RuleID::PushDownFilterScan => Ok(Box::new(RulePushDownFilterScan::new(metadata))), RuleID::PushDownFilterSort => Ok(Box::new(RulePushDownFilterSort::new())), RuleID::PushDownFilterProjectSet => Ok(Box::new(RulePushDownFilterProjectSet::new())), @@ -69,8 +66,9 @@ impl RuleFactory { RuleID::PushDownLimitScan => Ok(Box::new(RulePushDownLimitScan::new())), RuleID::PushDownSortScan => Ok(Box::new(RulePushDownSortScan::new())), RuleID::PushDownLimitOuterJoin => Ok(Box::new(RulePushDownLimitOuterJoin::new())), - RuleID::PushDownLimitExpression => Ok(Box::new(RulePushDownLimitExpression::new())), + RuleID::PushDownLimitEvalScalar => Ok(Box::new(RulePushDownLimitEvalScalar::new())), RuleID::PushDownLimitSort => Ok(Box::new(RulePushDownLimitSort::new())), + RuleID::PushDownLimitWindow => Ok(Box::new(RulePushDownLimitWindow::new())), RuleID::PushDownLimitAggregate => Ok(Box::new(RulePushDownLimitAggregate::new())), RuleID::PushDownFilterAggregate => Ok(Box::new(RulePushDownFilterAggregate::new())), RuleID::PushDownFilterWindow => Ok(Box::new(RulePushDownFilterWindow::new())), @@ -81,10 +79,6 @@ impl RuleFactory { RuleID::NormalizeAggregate => Ok(Box::new(RuleNormalizeAggregate::new())), RuleID::SplitAggregate => Ok(Box::new(RuleSplitAggregate::new())), RuleID::FoldCountAggregate => Ok(Box::new(RuleFoldCountAggregate::new())), - RuleID::NormalizeDisjunctiveFilter => { - Ok(Box::new(RuleNormalizeDisjunctiveFilter::new())) - } - RuleID::InferFilter => Ok(Box::new(RuleInferFilter::new())), RuleID::CommuteJoin => Ok(Box::new(RuleCommuteJoin::new())), RuleID::CommuteJoinBaseTable => Ok(Box::new(RuleCommuteJoinBaseTable::new())), RuleID::LeftExchangeJoin => Ok(Box::new(RuleLeftExchangeJoin::new())), @@ -92,7 +86,7 @@ impl RuleFactory { RuleID::PushDownPrewhere => Ok(Box::new(RulePushDownPrewhere::new(metadata))), RuleID::TryApplyAggIndex => Ok(Box::new(RuleTryApplyAggIndex::new(metadata))), RuleID::EliminateSort => Ok(Box::new(RuleEliminateSort::new())), - RuleID::SemiToInnerJoin => Ok(Box::new(RuleSemiToInnerJoin::new(metadata))), + RuleID::SemiToInnerJoin => Ok(Box::new(RuleSemiToInnerJoin::new())), } } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/filter_join/derive_filter.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/filter_join/derive_filter.rs deleted file mode 100644 index b2d448218888..000000000000 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/filter_join/derive_filter.rs +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::HashMap; -use std::sync::Arc; - -use databend_common_exception::Result; - -use crate::optimizer::SExpr; -use crate::plans::walk_expr_mut; -use crate::plans::Filter; -use crate::plans::Join; -use crate::plans::JoinType; -use crate::plans::VisitorMut; -use crate::ScalarExpr; - -/// Derive filter to push down -pub fn try_derive_predicates( - s_expr: &SExpr, - join: Join, - mut left_push_down: Vec, - mut right_push_down: Vec, -) -> Result { - let join_expr = s_expr.child(0)?; - let mut left_child = join_expr.child(0)?.clone(); - let mut right_child = join_expr.child(1)?.clone(); - - if join.join_type == JoinType::Inner { - let mut new_left_push_down = vec![]; - let mut new_right_push_down = vec![]; - for predicate in left_push_down.iter() { - let used_columns = predicate.used_columns(); - let mut equi_conditions_map = HashMap::new(); - for (idx, left_condition) in join.left_conditions.iter().enumerate() { - if left_condition.used_columns().len() > 1 - || !left_condition.used_columns().is_subset(&used_columns) - { - continue; - } - equi_conditions_map.insert(left_condition, &join.right_conditions[idx]); - } - if used_columns.len() == equi_conditions_map.len() { - derive_predicate( - &mut equi_conditions_map, - predicate, - &mut new_right_push_down, - )?; - } - } - for predicate in right_push_down.iter() { - let used_columns = predicate.used_columns(); - let mut equi_conditions_map = HashMap::new(); - for (idx, right_condition) in join.right_conditions.iter().enumerate() { - if right_condition.used_columns().len() > 1 - || !right_condition.used_columns().is_subset(&used_columns) - { - continue; - } - equi_conditions_map.insert(right_condition, &join.left_conditions[idx]); - } - if used_columns.len() == equi_conditions_map.len() { - derive_predicate(&mut equi_conditions_map, predicate, &mut new_left_push_down)?; - } - } - left_push_down.extend(new_left_push_down); - right_push_down.extend(new_right_push_down); - } - - if !left_push_down.is_empty() { - left_child = SExpr::create_unary( - Arc::new( - Filter { - predicates: left_push_down, - } - .into(), - ), - Arc::new(left_child), - ); - } - - if !right_push_down.is_empty() { - right_child = SExpr::create_unary( - Arc::new( - Filter { - predicates: right_push_down, - } - .into(), - ), - Arc::new(right_child), - ); - } - Ok(SExpr::create_binary( - Arc::new(join.into()), - Arc::new(left_child), - Arc::new(right_child), - )) -} - -fn derive_predicate( - equi_conditions_map: &mut HashMap<&ScalarExpr, &ScalarExpr>, - predicate: &ScalarExpr, - new_push_down: &mut Vec, -) -> Result<()> { - let mut replaced_predicate = predicate.clone(); - replace_column(&mut replaced_predicate, equi_conditions_map); - if &replaced_predicate != predicate { - new_push_down.push(replaced_predicate); - } - Ok(()) -} - -fn replace_column( - scalar: &mut ScalarExpr, - equi_conditions_map: &mut HashMap<&ScalarExpr, &ScalarExpr>, -) { - struct ReplaceColumn<'a> { - equi_conditions_map: &'a HashMap<&'a ScalarExpr, &'a ScalarExpr>, - } - - impl<'a> VisitorMut<'_> for ReplaceColumn<'a> { - fn visit(&mut self, expr: &mut ScalarExpr) -> Result<()> { - if let Some(e) = self.equi_conditions_map.get(expr) { - *expr = (**e).clone(); - return Ok(()); - } else if let ScalarExpr::BoundColumnRef(col) = expr { - for (key, val) in self.equi_conditions_map.iter() { - if let ScalarExpr::BoundColumnRef(key_col) = key { - if key_col.column.index == col.column.index { - *expr = (**val).clone(); - return Ok(()); - } - } - } - } - - walk_expr_mut(self, expr) - } - } - - let mut replace_column = ReplaceColumn { - equi_conditions_map, - }; - - replace_column.visit(scalar).unwrap(); -} diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/filter_join/outer_join_to_inner_join.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/filter_join/outer_join_to_inner_join.rs deleted file mode 100644 index 3c02c76cb946..000000000000 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/filter_join/outer_join_to_inner_join.rs +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#[cfg(feature = "z3-prove")] -use std::sync::Arc; - -use databend_common_exception::Result; - -use crate::optimizer::SExpr; -use crate::plans::Join; - -pub fn outer_to_inner(s_expr: &SExpr) -> Result { - let join: Join = s_expr.child(0)?.plan().clone().try_into()?; - let origin_join_type = join.join_type.clone(); - if !origin_join_type.is_outer_join() { - return Ok(s_expr.clone()); - } - - #[cfg(feature = "z3-prove")] - { - use crate::optimizer::RelExpr; - use crate::plans::Filter; - use crate::plans::JoinType; - - let mut join = join; - let filter: Filter = s_expr.plan().clone().try_into()?; - let constraint_set = crate::optimizer::ConstraintSet::new(&filter.predicates); - - let join_expr = RelExpr::with_s_expr(s_expr.child(0)?); - let left_columns = join_expr - .derive_relational_prop_child(0)? - .output_columns - .clone(); - let right_columns = join_expr - .derive_relational_prop_child(1)? - .output_columns - .clone(); - - let eliminate_left_null = left_columns - .iter() - .any(|col| constraint_set.is_null_reject(col)); - let eliminate_right_null = right_columns - .iter() - .any(|col| constraint_set.is_null_reject(col)); - - let new_join_type = match join.join_type { - JoinType::Left => { - if eliminate_right_null { - JoinType::Inner - } else { - JoinType::Left - } - } - JoinType::Right => { - if eliminate_left_null { - JoinType::Inner - } else { - JoinType::Right - } - } - JoinType::Full => { - if eliminate_left_null && eliminate_right_null { - JoinType::Inner - } else if eliminate_left_null { - JoinType::Left - } else if eliminate_right_null { - JoinType::Right - } else { - JoinType::Full - } - } - _ => unreachable!(), - }; - - join.join_type = new_join_type; - Ok(SExpr::create_unary( - Arc::new(filter.into()), - Arc::new(SExpr::create_binary( - Arc::new(join.into()), - Arc::new(s_expr.child(0)?.child(0)?.clone()), - Arc::new(s_expr.child(0)?.child(1)?.clone()), - )), - )) - } - - #[cfg(not(feature = "z3-prove"))] - Ok(s_expr.clone()) -} diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/mod.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/mod.rs index b2805ee5aa53..1a01c7ac98eb 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/mod.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/mod.rs @@ -13,17 +13,15 @@ // limitations under the License. pub mod agg_index; -mod filter_join; +mod push_down_filter_join; mod rule_commute_join; mod rule_eliminate_eval_scalar; mod rule_eliminate_filter; mod rule_eliminate_sort; mod rule_fold_count_aggregate; -mod rule_infer_filter; mod rule_merge_eval_scalar; mod rule_merge_filter; mod rule_normalize_aggregate; -mod rule_normalize_disjunctive_filter; mod rule_normalize_scalar; mod rule_push_down_filter_aggregate; mod rule_push_down_filter_eval_scalar; @@ -39,6 +37,7 @@ mod rule_push_down_limit_join; mod rule_push_down_limit_scan; mod rule_push_down_limit_sort; mod rule_push_down_limit_union; +mod rule_push_down_limit_window; mod rule_push_down_prewhere; mod rule_push_down_sort_scan; mod rule_semi_to_inner_join; @@ -50,11 +49,9 @@ pub use rule_eliminate_eval_scalar::RuleEliminateEvalScalar; pub use rule_eliminate_filter::RuleEliminateFilter; pub use rule_eliminate_sort::RuleEliminateSort; pub use rule_fold_count_aggregate::RuleFoldCountAggregate; -pub use rule_infer_filter::RuleInferFilter; pub use rule_merge_eval_scalar::RuleMergeEvalScalar; pub use rule_merge_filter::RuleMergeFilter; pub use rule_normalize_aggregate::RuleNormalizeAggregate; -pub use rule_normalize_disjunctive_filter::RuleNormalizeDisjunctiveFilter; pub use rule_normalize_scalar::RuleNormalizeScalarFilter; pub use rule_push_down_filter_aggregate::RulePushDownFilterAggregate; pub use rule_push_down_filter_eval_scalar::RulePushDownFilterEvalScalar; @@ -66,11 +63,12 @@ pub use rule_push_down_filter_sort::RulePushDownFilterSort; pub use rule_push_down_filter_union::RulePushDownFilterUnion; pub use rule_push_down_filter_window::RulePushDownFilterWindow; pub use rule_push_down_limit_aggregate::RulePushDownLimitAggregate; -pub use rule_push_down_limit_expression::RulePushDownLimitExpression; +pub use rule_push_down_limit_expression::RulePushDownLimitEvalScalar; pub use rule_push_down_limit_join::RulePushDownLimitOuterJoin; pub use rule_push_down_limit_scan::RulePushDownLimitScan; pub use rule_push_down_limit_sort::RulePushDownLimitSort; pub use rule_push_down_limit_union::RulePushDownLimitUnion; +pub use rule_push_down_limit_window::RulePushDownLimitWindow; pub use rule_push_down_prewhere::RulePushDownPrewhere; pub use rule_push_down_sort_scan::RulePushDownSortScan; pub use rule_semi_to_inner_join::RuleSemiToInnerJoin; diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/filter_join/extract_or_predicates.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/push_down_filter_join/extract_or_predicates.rs similarity index 100% rename from src/query/sql/src/planner/optimizer/rule/rewrite/filter_join/extract_or_predicates.rs rename to src/query/sql/src/planner/optimizer/rule/rewrite/push_down_filter_join/extract_or_predicates.rs diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/filter_join/mark_join_to_semi_join.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/push_down_filter_join/mark_join_to_semi_join.rs similarity index 86% rename from src/query/sql/src/planner/optimizer/rule/rewrite/filter_join/mark_join_to_semi_join.rs rename to src/query/sql/src/planner/optimizer/rule/rewrite/push_down_filter_join/mark_join_to_semi_join.rs index 028e4455666b..bf8a758de8ac 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/filter_join/mark_join_to_semi_join.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/push_down_filter_join/mark_join_to_semi_join.rs @@ -22,14 +22,14 @@ use crate::plans::Join; use crate::plans::JoinType; use crate::ScalarExpr; -pub fn convert_mark_to_semi_join(s_expr: &SExpr) -> Result { +pub fn convert_mark_to_semi_join(s_expr: &SExpr) -> Result<(SExpr, bool)> { let mut filter: Filter = s_expr.plan().clone().try_into()?; let mut join: Join = s_expr.child(0)?.plan().clone().try_into()?; let has_disjunction = filter.predicates.iter().any( |predicate| matches!(predicate, ScalarExpr::FunctionCall(func) if func.func_name == "or"), ); if !join.join_type.is_mark_join() || has_disjunction { - return Ok(s_expr.clone()); + return Ok((s_expr.clone(), false)); } let mark_index = join.marker_index.unwrap(); @@ -47,7 +47,7 @@ pub fn convert_mark_to_semi_join(s_expr: &SExpr) -> Result { // Check if the argument is mark index, if so, we won't convert it to semi join if let ScalarExpr::BoundColumnRef(col) = &func.arguments[0] { if col.column.index == mark_index { - return Ok(s_expr.clone()); + return Ok((s_expr.clone(), false)); } } } @@ -57,7 +57,7 @@ pub fn convert_mark_to_semi_join(s_expr: &SExpr) -> Result { if !find_mark_index { // To be conservative, we do not convert - return Ok(s_expr.clone()); + return Ok((s_expr.clone(), false)); } join.join_type = match join.join_type { @@ -73,6 +73,8 @@ pub fn convert_mark_to_semi_join(s_expr: &SExpr) -> Result { Arc::new(s_join_expr.child(1)?.clone()), ); - result = SExpr::create_unary(Arc::new(filter.into()), Arc::new(result)); - Ok(result) + if !filter.predicates.is_empty() { + result = SExpr::create_unary(Arc::new(filter.into()), Arc::new(result)); + } + Ok((result, true)) } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/filter_join/mod.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/push_down_filter_join/mod.rs similarity index 87% rename from src/query/sql/src/planner/optimizer/rule/rewrite/filter_join/mod.rs rename to src/query/sql/src/planner/optimizer/rule/rewrite/push_down_filter_join/mod.rs index f35ee3d9e5ef..1f1070b49249 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/filter_join/mod.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/push_down_filter_join/mod.rs @@ -12,12 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -mod derive_filter; mod extract_or_predicates; mod mark_join_to_semi_join; mod outer_join_to_inner_join; -pub use derive_filter::try_derive_predicates; pub use extract_or_predicates::rewrite_predicates; pub use mark_join_to_semi_join::convert_mark_to_semi_join; -pub use outer_join_to_inner_join::outer_to_inner; +pub use outer_join_to_inner_join::can_filter_null; +pub use outer_join_to_inner_join::outer_join_to_inner_join; diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/push_down_filter_join/outer_join_to_inner_join.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/push_down_filter_join/outer_join_to_inner_join.rs new file mode 100644 index 000000000000..f9193afb2857 --- /dev/null +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/push_down_filter_join/outer_join_to_inner_join.rs @@ -0,0 +1,274 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use databend_common_exception::Result; +use databend_common_expression::DataBlock; +use databend_common_expression::DataSchema; +use databend_common_expression::Evaluator; +use databend_common_expression::Expr; +use databend_common_expression::FunctionContext; +use databend_common_expression::Scalar; +use databend_common_expression::Value; +use databend_common_functions::BUILTIN_FUNCTIONS; + +use crate::binder::JoinPredicate; +use crate::executor::cast_expr_to_non_null_boolean; +use crate::optimizer::RelExpr; +use crate::optimizer::SExpr; +use crate::plans::ConstantExpr; +use crate::plans::Filter; +use crate::plans::Join; +use crate::plans::JoinType; +use crate::ColumnSet; +use crate::ScalarExpr; +use crate::TypeCheck; + +pub fn outer_join_to_inner_join(s_expr: &SExpr) -> Result<(SExpr, bool)> { + let mut join: Join = s_expr.child(0)?.plan().clone().try_into()?; + if !join.join_type.is_outer_join() { + return Ok((s_expr.clone(), false)); + } + + let filter: Filter = s_expr.plan().clone().try_into()?; + let join_s_expr = s_expr.child(0)?; + let join_rel_expr = RelExpr::with_s_expr(join_s_expr); + + let mut can_filter_left_null = false; + let mut can_filter_right_null = false; + let left_prop = join_rel_expr.derive_relational_prop_child(0)?; + let right_prop = join_rel_expr.derive_relational_prop_child(1)?; + for predicate in &filter.predicates { + let pred = JoinPredicate::new(predicate, &left_prop, &right_prop); + match pred { + JoinPredicate::Left(_) + if can_filter_null( + predicate, + &left_prop.output_columns, + &right_prop.output_columns, + )? => + { + can_filter_left_null = true; + } + JoinPredicate::Right(_) + if can_filter_null( + predicate, + &left_prop.output_columns, + &right_prop.output_columns, + )? => + { + can_filter_right_null = true; + } + JoinPredicate::Both { .. } + if can_filter_null( + predicate, + &left_prop.output_columns, + &right_prop.output_columns, + )? => + { + can_filter_left_null = true; + can_filter_right_null = true; + } + _ => (), + } + } + + #[cfg(feature = "z3-prove")] + { + let constraint_set = crate::optimizer::ConstraintSet::new(&filter.predicates); + let left_columns = join_rel_expr + .derive_relational_prop_child(0)? + .output_columns + .clone(); + let right_columns = join_rel_expr + .derive_relational_prop_child(1)? + .output_columns + .clone(); + can_filter_left_null |= left_columns + .iter() + .any(|col| constraint_set.is_null_reject(col)); + can_filter_right_null |= right_columns + .iter() + .any(|col| constraint_set.is_null_reject(col)); + } + + let original_join_type = join.join_type.clone(); + join.join_type = + eliminate_outer_join_type(join.join_type, can_filter_left_null, can_filter_right_null); + if join.join_type == original_join_type { + return Ok((s_expr.clone(), false)); + } + + if matches!( + original_join_type, + JoinType::LeftSingle | JoinType::RightSingle + ) { + join.join_type = original_join_type.clone(); + join.single_to_inner = Some(original_join_type); + } + + let result = SExpr::create_unary( + Arc::new(filter.into()), + Arc::new(SExpr::create_binary( + Arc::new(join.into()), + Arc::new(join_s_expr.child(0)?.clone()), + Arc::new(join_s_expr.child(1)?.clone()), + )), + ); + + Ok((result, true)) +} + +fn eliminate_outer_join_type( + join_type: JoinType, + can_filter_left_null: bool, + can_filter_right_null: bool, +) -> JoinType { + match join_type { + JoinType::Left | JoinType::LeftSingle if can_filter_right_null => JoinType::Inner, + JoinType::Right | JoinType::RightSingle if can_filter_left_null => JoinType::Inner, + JoinType::Full => { + if can_filter_left_null && can_filter_right_null { + JoinType::Inner + } else if can_filter_left_null { + JoinType::Left + } else if can_filter_right_null { + JoinType::Right + } else { + join_type + } + } + _ => join_type, + } +} + +pub fn can_filter_null( + predicate: &ScalarExpr, + left_output_columns: &ColumnSet, + right_output_columns: &ColumnSet, +) -> Result { + struct ReplaceColumnBindingsNull<'a> { + can_replace: bool, + left_output_columns: &'a ColumnSet, + right_output_columns: &'a ColumnSet, + } + + impl<'a> ReplaceColumnBindingsNull<'a> { + fn replace( + &mut self, + expr: &mut ScalarExpr, + column_set: &mut Option, + ) -> Result<()> { + if !self.can_replace { + return Ok(()); + } + match expr { + ScalarExpr::BoundColumnRef(column_ref) => { + if let Some(column_set) = column_set { + column_set.insert(column_ref.column.index); + } + *expr = ScalarExpr::ConstantExpr(ConstantExpr { + span: None, + value: Scalar::Null, + }); + Ok(()) + } + ScalarExpr::FunctionCall(func) => { + // If the function is `assume_not_null` or `remove_nullable`, we cannot replace + // the column bindings with `Scalar::Null`. + if matches!( + func.func_name.as_str(), + "assume_not_null" | "remove_nullable" + ) { + self.can_replace = false; + return Ok(()); + } + + if func.func_name != "or" { + for expr in &mut func.arguments { + self.replace(expr, column_set)?; + } + return Ok(()); + } + + let mut children_columns_set = Some(ColumnSet::new()); + for expr in &mut func.arguments { + self.replace(expr, &mut children_columns_set)?; + } + + let mut has_left = false; + let mut has_right = false; + let children_columns_set = children_columns_set.unwrap(); + for column in children_columns_set.iter() { + if self.left_output_columns.contains(column) { + has_left = true; + } else if self.right_output_columns.contains(column) { + has_right = true; + } + } + if has_left && has_right { + self.can_replace = false; + return Ok(()); + } + + if let Some(column_set) = column_set { + *column_set = column_set.union(&children_columns_set).cloned().collect(); + } + + Ok(()) + } + ScalarExpr::CastExpr(cast) => self.replace(&mut cast.argument, column_set), + ScalarExpr::ConstantExpr(_) => Ok(()), + _ => { + self.can_replace = false; + Ok(()) + } + } + } + } + + // Replace the column bindings of predicate with `Scalar::Null` and evaluate the result. + let mut replace = ReplaceColumnBindingsNull { + can_replace: true, + left_output_columns, + right_output_columns, + }; + let mut null_scalar_expr = predicate.clone(); + replace.replace(&mut null_scalar_expr, &mut None).unwrap(); + if replace.can_replace { + let expr = convert_scalar_expr_to_expr(null_scalar_expr)?; + let func_ctx = &FunctionContext::default(); + let data_block = DataBlock::empty(); + let evaluator = Evaluator::new(&data_block, func_ctx, &BUILTIN_FUNCTIONS); + if let Value::Scalar(scalar) = evaluator.run(&expr)? { + // if null column can be filtered, return true. + if matches!(scalar, Scalar::Boolean(false) | Scalar::Null) { + return Ok(true); + } + } + } + Ok(false) +} + +// Convert `ScalarExpr` to `Expr`. +fn convert_scalar_expr_to_expr(scalar_expr: ScalarExpr) -> Result { + let schema = Arc::new(DataSchema::new(vec![])); + let remote_expr = scalar_expr + .type_check(schema.as_ref())? + .project_column_ref(|index| schema.index_of(&index.to_string()).unwrap()) + .as_remote_expr(); + let expr = remote_expr.as_expr(&BUILTIN_FUNCTIONS); + cast_expr_to_non_null_boolean(expr) +} diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_commute_join.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_commute_join.rs index aec744bd1ce6..5b18bd3e37d0 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_commute_join.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_commute_join.rs @@ -16,6 +16,7 @@ use std::sync::Arc; use databend_common_exception::Result; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::TransformResult; use crate::optimizer::RelExpr; @@ -23,7 +24,6 @@ use crate::optimizer::RuleID; use crate::optimizer::SExpr; use crate::plans::Join; use crate::plans::JoinType; -use crate::plans::PatternPlan; use crate::plans::RelOp; /// Rule to apply commutativity of join operator. @@ -31,7 +31,7 @@ use crate::plans::RelOp; /// rule will help us measure which child is the better one. pub struct RuleCommuteJoin { id: RuleID, - patterns: Vec, + matchers: Vec, } impl RuleCommuteJoin { @@ -42,16 +42,10 @@ impl RuleCommuteJoin { // LogicalJoin // | \ // * * - patterns: vec![SExpr::create_binary( - Arc::new( - PatternPlan { - plan_type: RelOp::Join, - } - .into(), - ), - Arc::new(SExpr::create_pattern_leaf()), - Arc::new(SExpr::create_pattern_leaf()), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Join, + children: vec![Matcher::Leaf, Matcher::Leaf], + }], } } } @@ -84,6 +78,7 @@ impl Rule for RuleCommuteJoin { | JoinType::LeftAnti | JoinType::RightAnti | JoinType::LeftMark + | JoinType::RightMark ) } else if left_card == right_card { matches!( @@ -109,7 +104,7 @@ impl Rule for RuleCommuteJoin { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_eval_scalar.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_eval_scalar.rs index 95c89f07871a..6f0a3295c83d 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_eval_scalar.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_eval_scalar.rs @@ -12,21 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; - use databend_common_exception::Result; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::RuleID; use crate::optimizer::rule::TransformResult; use crate::optimizer::SExpr; use crate::plans::EvalScalar; -use crate::plans::PatternPlan; use crate::plans::RelOp; pub struct RuleEliminateEvalScalar { id: RuleID, - patterns: Vec, + matchers: Vec, } impl RuleEliminateEvalScalar { @@ -36,20 +34,10 @@ impl RuleEliminateEvalScalar { // EvalScalar // \ // * - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::Leaf], + }], } } } @@ -70,7 +58,7 @@ impl Rule for RuleEliminateEvalScalar { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_filter.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_filter.rs index f6c562c7fbd5..560d58203234 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_filter.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_filter.rs @@ -17,18 +17,18 @@ use std::sync::Arc; use databend_common_exception::Result; use itertools::Itertools; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::RuleID; use crate::optimizer::rule::TransformResult; use crate::optimizer::SExpr; use crate::plans::Filter; -use crate::plans::PatternPlan; use crate::plans::RelOp; use crate::plans::ScalarExpr; pub struct RuleEliminateFilter { id: RuleID, - patterns: Vec, + matchers: Vec, } impl RuleEliminateFilter { @@ -38,20 +38,10 @@ impl RuleEliminateFilter { // Filter // \ // * - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Filter, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Filter, + children: vec![Matcher::Leaf], + }], } } } @@ -103,7 +93,7 @@ impl Rule for RuleEliminateFilter { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_sort.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_sort.rs index 61b4b1bbaed3..ecf580fa5631 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_sort.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_sort.rs @@ -12,22 +12,20 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; - use databend_common_exception::Result; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::RuleID; use crate::optimizer::rule::TransformResult; use crate::optimizer::RelExpr; use crate::optimizer::SExpr; -use crate::plans::PatternPlan; use crate::plans::RelOp; use crate::plans::Sort; pub struct RuleEliminateSort { id: RuleID, - patterns: Vec, + matchers: Vec, } impl RuleEliminateSort { @@ -37,20 +35,10 @@ impl RuleEliminateSort { // Sort // \ // * - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Sort, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Sort, + children: vec![Matcher::Leaf], + }], } } } @@ -75,7 +63,7 @@ impl Rule for RuleEliminateSort { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_fold_count_aggregate.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_fold_count_aggregate.rs index 8874c0f6a478..c73f353bba36 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_fold_count_aggregate.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_fold_count_aggregate.rs @@ -18,6 +18,7 @@ use databend_common_exception::Result; use databend_common_expression::types::NumberScalar; use databend_common_expression::Scalar; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::RuleID; use crate::optimizer::rule::TransformResult; @@ -28,14 +29,13 @@ use crate::plans::AggregateMode; use crate::plans::ConstantExpr; use crate::plans::DummyTableScan; use crate::plans::EvalScalar; -use crate::plans::PatternPlan; use crate::plans::RelOp; use crate::plans::ScalarExpr; /// Fold simple `COUNT(*)` aggregate with statistics information. pub struct RuleFoldCountAggregate { id: RuleID, - patterns: Vec, + matchers: Vec, } impl RuleFoldCountAggregate { @@ -45,20 +45,10 @@ impl RuleFoldCountAggregate { // Aggregate // \ // * - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Aggregate, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Aggregate, + children: vec![Matcher::Leaf], + }], } } } @@ -127,7 +117,7 @@ impl Rule for RuleFoldCountAggregate { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_merge_eval_scalar.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_merge_eval_scalar.rs index b624b737d4fa..aabcb55c8e09 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_merge_eval_scalar.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_merge_eval_scalar.rs @@ -16,6 +16,7 @@ use std::sync::Arc; use databend_common_exception::Result; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::RuleID; use crate::optimizer::rule::TransformResult; @@ -23,13 +24,12 @@ use crate::optimizer::ColumnSet; use crate::optimizer::RelExpr; use crate::optimizer::SExpr; use crate::plans::EvalScalar; -use crate::plans::PatternPlan; use crate::plans::RelOp; // Merge two adjacent `EvalScalar`s into one pub struct RuleMergeEvalScalar { id: RuleID, - patterns: Vec, + matchers: Vec, } impl RuleMergeEvalScalar { @@ -41,28 +41,13 @@ impl RuleMergeEvalScalar { // EvalScalar // \ // * - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - )), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::Leaf], + }], + }], } } } @@ -107,7 +92,7 @@ impl Rule for RuleMergeEvalScalar { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_merge_filter.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_merge_filter.rs index 57c816ea863f..bb9ac746f260 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_merge_filter.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_merge_filter.rs @@ -16,18 +16,18 @@ use std::sync::Arc; use databend_common_exception::Result; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::RuleID; use crate::optimizer::rule::TransformResult; use crate::optimizer::SExpr; use crate::plans::Filter; -use crate::plans::PatternPlan; use crate::plans::RelOp; // Merge two adjacent `Filter`s into one pub struct RuleMergeFilter { id: RuleID, - patterns: Vec, + matchers: Vec, } impl RuleMergeFilter { @@ -39,28 +39,13 @@ impl RuleMergeFilter { // Filter // \ // * - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Filter, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Filter, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - )), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Filter, + children: vec![Matcher::MatchOp { + op_type: RelOp::Filter, + children: vec![Matcher::Leaf], + }], + }], } } } @@ -89,7 +74,7 @@ impl Rule for RuleMergeFilter { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_normalize_aggregate.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_normalize_aggregate.rs index 1e7eaf08c4a8..bebbd7089415 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_normalize_aggregate.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_normalize_aggregate.rs @@ -16,6 +16,7 @@ use std::sync::Arc; use databend_common_exception::Result; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::TransformResult; use crate::optimizer::RuleID; @@ -23,7 +24,6 @@ use crate::optimizer::SExpr; use crate::plans::Aggregate; use crate::plans::BoundColumnRef; use crate::plans::EvalScalar; -use crate::plans::PatternPlan; use crate::plans::RelOp; use crate::plans::ScalarExpr; use crate::plans::ScalarItem; @@ -32,7 +32,7 @@ use crate::Visibility; pub struct RuleNormalizeAggregate { id: RuleID, - patterns: Vec, + matchers: Vec, } impl RuleNormalizeAggregate { @@ -42,20 +42,10 @@ impl RuleNormalizeAggregate { // Aggregate // \ // * - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Aggregate, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Aggregate, + children: vec![Matcher::Leaf], + }], } } } @@ -195,7 +185,7 @@ impl Rule for RuleNormalizeAggregate { } } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_normalize_scalar.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_normalize_scalar.rs index ec29f7d55715..981a6638ef55 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_normalize_scalar.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_normalize_scalar.rs @@ -17,6 +17,7 @@ use std::sync::Arc; use databend_common_exception::Result; use databend_common_expression::Scalar; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::constant::is_falsy; use crate::optimizer::rule::constant::is_true; use crate::optimizer::rule::Rule; @@ -24,7 +25,6 @@ use crate::optimizer::RuleID; use crate::optimizer::SExpr; use crate::plans::ConstantExpr; use crate::plans::Filter; -use crate::plans::PatternPlan; use crate::plans::RelOp; use crate::plans::ScalarExpr; @@ -58,7 +58,7 @@ fn normalize_falsy_predicate(predicates: Vec) -> Vec { /// whole filter with FALSE pub struct RuleNormalizeScalarFilter { id: RuleID, - patterns: Vec, + matchers: Vec, } impl RuleNormalizeScalarFilter { @@ -68,20 +68,10 @@ impl RuleNormalizeScalarFilter { // Filter // \ // * - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Filter, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Filter, + children: vec![Matcher::Leaf], + }], } } } @@ -114,7 +104,7 @@ impl Rule for RuleNormalizeScalarFilter { } } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_aggregate.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_aggregate.rs index 258805a70598..599122e2a97d 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_aggregate.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_aggregate.rs @@ -14,6 +14,7 @@ use std::sync::Arc; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::TransformResult; use crate::optimizer::RelExpr; @@ -21,9 +22,7 @@ use crate::optimizer::RuleID; use crate::optimizer::SExpr; use crate::plans::Aggregate; use crate::plans::Filter; -use crate::plans::PatternPlan; use crate::plans::RelOp; -use crate::plans::RelOp::Pattern; /// Input: Filter /// \ @@ -48,32 +47,21 @@ use crate::plans::RelOp::Pattern; /// * pub struct RulePushDownFilterAggregate { id: RuleID, - patterns: Vec, + matchers: Vec, } impl RulePushDownFilterAggregate { pub fn new() -> Self { Self { id: RuleID::PushDownFilterAggregate, - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Filter, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Aggregate, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { plan_type: Pattern }.into(), - ))), - )), - )], + + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Filter, + children: vec![Matcher::MatchOp { + op_type: RelOp::Aggregate, + children: vec![Matcher::Leaf], + }], + }], } } } @@ -140,7 +128,7 @@ impl Rule for RulePushDownFilterAggregate { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_eval_scalar.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_eval_scalar.rs index c293d334d95a..431803d0a88d 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_eval_scalar.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_eval_scalar.rs @@ -16,6 +16,7 @@ use std::sync::Arc; use databend_common_exception::Result; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::RuleID; use crate::optimizer::rule::TransformResult; @@ -29,7 +30,6 @@ use crate::plans::FunctionCall; use crate::plans::LagLeadFunction; use crate::plans::LambdaFunc; use crate::plans::NthValueFunction; -use crate::plans::PatternPlan; use crate::plans::RelOp; use crate::plans::ScalarExpr; use crate::plans::ScalarItem; @@ -37,16 +37,14 @@ use crate::plans::UDFServerCall; use crate::plans::WindowFunc; use crate::plans::WindowFuncType; use crate::plans::WindowOrderBy; -use crate::MetadataRef; pub struct RulePushDownFilterEvalScalar { id: RuleID, - patterns: Vec, - _metadata: MetadataRef, + matchers: Vec, } impl RulePushDownFilterEvalScalar { - pub fn new(metadata: MetadataRef) -> Self { + pub fn new() -> Self { Self { id: RuleID::PushDownFilterEvalScalar, // Filter @@ -54,29 +52,13 @@ impl RulePushDownFilterEvalScalar { // EvalScalar // \ // * - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Filter, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - )), - )], - _metadata: metadata, + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Filter, + children: vec![Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::Leaf], + }], + }], } } @@ -293,7 +275,7 @@ impl Rule for RulePushDownFilterEvalScalar { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_join.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_join.rs index 32783bb75dc2..3bddc292c3a4 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_join.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_join.rs @@ -17,12 +17,15 @@ use std::sync::Arc; use databend_common_exception::Result; use crate::binder::JoinPredicate; +use crate::optimizer::extract::Matcher; +use crate::optimizer::filter::InferFilterOptimizer; +use crate::optimizer::filter::JoinProperty; use crate::optimizer::rule::constant::false_constant; use crate::optimizer::rule::constant::is_falsy; -use crate::optimizer::rule::rewrite::filter_join::convert_mark_to_semi_join; -use crate::optimizer::rule::rewrite::filter_join::outer_to_inner; -use crate::optimizer::rule::rewrite::filter_join::rewrite_predicates; -use crate::optimizer::rule::rewrite::filter_join::try_derive_predicates; +use crate::optimizer::rule::rewrite::push_down_filter_join::can_filter_null; +use crate::optimizer::rule::rewrite::push_down_filter_join::convert_mark_to_semi_join; +use crate::optimizer::rule::rewrite::push_down_filter_join::outer_join_to_inner_join; +use crate::optimizer::rule::rewrite::push_down_filter_join::rewrite_predicates; use crate::optimizer::rule::Rule; use crate::optimizer::rule::TransformResult; use crate::optimizer::RelExpr; @@ -30,58 +33,35 @@ use crate::optimizer::RuleID; use crate::optimizer::SExpr; use crate::plans::ComparisonOp; use crate::plans::Filter; +use crate::plans::FunctionCall; use crate::plans::Join; use crate::plans::JoinType; -use crate::plans::PatternPlan; +use crate::plans::Operator; use crate::plans::RelOp; use crate::plans::ScalarExpr; -use crate::MetadataRef; pub struct RulePushDownFilterJoin { id: RuleID, - patterns: Vec, - _metadata: MetadataRef, + matchers: Vec, } impl RulePushDownFilterJoin { - pub fn new(metadata: MetadataRef) -> Self { + pub fn new() -> Self { Self { id: RuleID::PushDownFilterJoin, // Filter // \ - // InnerJoin + // Join // | \ // | * // * - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Filter, - } - .into(), - ), - Arc::new(SExpr::create_binary( - Arc::new( - PatternPlan { - plan_type: RelOp::Join, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - )), - )], - _metadata: metadata, + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Filter, + children: vec![Matcher::MatchOp { + op_type: RelOp::Join, + children: vec![Matcher::Leaf, Matcher::Leaf], + }], + }], } } } @@ -93,38 +73,44 @@ impl Rule for RulePushDownFilterJoin { fn apply(&self, s_expr: &SExpr, state: &mut TransformResult) -> Result<()> { // First, try to convert outer join to inner join - let mut s_expr = outer_to_inner(s_expr)?; + let (s_expr, outer_to_inner) = outer_join_to_inner_join(s_expr)?; + // Second, check if can convert mark join to semi join - s_expr = convert_mark_to_semi_join(&s_expr)?; + let (s_expr, mark_to_semi) = convert_mark_to_semi_join(&s_expr)?; + if s_expr.plan().rel_op() != RelOp::Filter { + state.add_result(s_expr); + return Ok(()); + } let filter: Filter = s_expr.plan().clone().try_into()?; if filter.predicates.is_empty() { state.add_result(s_expr); return Ok(()); } - // Finally, extract or predicates from Filter to push down them to join. - // For example: `select * from t1, t2 where (t1.a=1 and t2.b=2) or (t1.a=2 and t2.b=1)` - // The predicate will be rewritten to `((t1.a=1 and t2.b=2) or (t1.a=2 and t2.b=1)) and (t1.a=1 or t1.a=2) and (t2.b=2 or t2.b=1)` - // So `(t1.a=1 or t1.a=1), (t2.b=2 or t2.b=1)` may be pushed down join and reduce rows between join - let predicates = rewrite_predicates(&s_expr)?; - let (need_push, mut result) = try_push_down_filter_join(&s_expr, predicates)?; - if !need_push { + + // Finally, push down filter to join. + let (need_push, mut result) = try_push_down_filter_join(&s_expr)?; + if !need_push && !outer_to_inner && !mark_to_semi { return Ok(()); } + result.set_applied_rule(&self.id); state.add_result(result); Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } -pub fn try_push_down_filter_join( - s_expr: &SExpr, - predicates: Vec, -) -> Result<(bool, SExpr)> { +pub fn try_push_down_filter_join(s_expr: &SExpr) -> Result<(bool, SExpr)> { + // Extract or predicates from Filter to push down them to join. + // For example: `select * from t1, t2 where (t1.a=1 and t2.b=2) or (t1.a=2 and t2.b=1)` + // The predicate will be rewritten to `((t1.a=1 and t2.b=2) or (t1.a=2 and t2.b=1)) and (t1.a=1 or t1.a=2) and (t2.b=2 or t2.b=1)` + // So `(t1.a=1 or t1.a=1), (t2.b=2 or t2.b=1)` may be pushed down join and reduce rows between join + let predicates = rewrite_predicates(s_expr)?; + let join_expr = s_expr.child(0)?; let mut join: Join = join_expr.plan().clone().try_into()?; @@ -132,62 +118,67 @@ pub fn try_push_down_filter_join( let left_prop = rel_expr.derive_relational_prop_child(0)?; let right_prop = rel_expr.derive_relational_prop_child(1)?; + let original_predicates_count = predicates.len(); + let mut original_predicates = vec![]; let mut left_push_down = vec![]; let mut right_push_down = vec![]; - let mut original_predicates = vec![]; - - let mut need_push = false; - + let mut push_down_predicates = vec![]; + let mut non_equi_predicates = vec![]; for predicate in predicates.into_iter() { if is_falsy(&predicate) { - left_push_down = vec![false_constant()]; - right_push_down = vec![false_constant()]; - need_push = true; + push_down_predicates = vec![false_constant()]; break; } let pred = JoinPredicate::new(&predicate, &left_prop, &right_prop); match pred { JoinPredicate::ALL(_) => { - need_push = true; - left_push_down.push(predicate.clone()); - right_push_down.push(predicate.clone()); + push_down_predicates.push(predicate); } JoinPredicate::Left(_) => { if matches!( join.join_type, JoinType::Right | JoinType::RightSingle | JoinType::Full ) { - original_predicates.push(predicate); - continue; + if can_filter_null( + &predicate, + &left_prop.output_columns, + &right_prop.output_columns, + )? { + left_push_down.push(predicate); + } else { + original_predicates.push(predicate); + } + } else { + left_push_down.push(predicate); } - need_push = true; - left_push_down.push(predicate); } JoinPredicate::Right(_) => { if matches!( join.join_type, JoinType::Left | JoinType::LeftSingle | JoinType::Full ) { - original_predicates.push(predicate); - continue; + if can_filter_null( + &predicate, + &left_prop.output_columns, + &right_prop.output_columns, + )? { + right_push_down.push(predicate); + } else { + original_predicates.push(predicate); + } + } else { + right_push_down.push(predicate); } - need_push = true; - right_push_down.push(predicate); } JoinPredicate::Other(_) => original_predicates.push(predicate), - - JoinPredicate::Both { left, right, op } => { - if op == ComparisonOp::Equal { - if matches!(join.join_type, JoinType::Inner | JoinType::Cross) { - join.join_type = JoinType::Inner; - join.left_conditions.push(left.clone()); - join.right_conditions.push(right.clone()); - need_push = true; + JoinPredicate::Both { is_equal_op, .. } => { + if matches!(join.join_type, JoinType::Inner | JoinType::Cross) { + if is_equal_op { + push_down_predicates.push(predicate); + } else { + non_equi_predicates.push(predicate); } - } else if matches!(join.join_type, JoinType::Inner | JoinType::Cross) { join.join_type = JoinType::Inner; - join.non_equi_conditions.push(predicate.clone()); - need_push = true; } else { original_predicates.push(predicate); } @@ -195,12 +186,110 @@ pub fn try_push_down_filter_join( } } - if !need_push { + if original_predicates.len() == original_predicates_count { return Ok((false, s_expr.clone())); } - // try to derive new predicate and push down filter - let mut result = try_derive_predicates(s_expr, join, left_push_down, right_push_down)?; + if !matches!(join.join_type, JoinType::Full) { + // Infer new predicate and push down filter. + for (left_condition, right_condition) in join + .left_conditions + .iter() + .zip(join.right_conditions.iter()) + { + push_down_predicates.push(ScalarExpr::FunctionCall(FunctionCall { + span: None, + func_name: String::from(ComparisonOp::Equal.to_func_name()), + params: vec![], + arguments: vec![left_condition.clone(), right_condition.clone()], + })); + } + join.left_conditions.clear(); + join.right_conditions.clear(); + match join.join_type { + JoinType::Left | JoinType::LeftSingle => { + push_down_predicates.extend(left_push_down); + left_push_down = vec![]; + } + JoinType::Right | JoinType::RightSingle => { + push_down_predicates.extend(right_push_down); + right_push_down = vec![]; + } + _ => { + push_down_predicates.extend(left_push_down); + left_push_down = vec![]; + push_down_predicates.extend(right_push_down); + right_push_down = vec![]; + } + } + let join_prop = JoinProperty::new(&left_prop.output_columns, &right_prop.output_columns); + let infer_filter = InferFilterOptimizer::new(Some(join_prop)); + push_down_predicates = infer_filter.run(push_down_predicates)?; + } + + let mut all_push_down = vec![]; + for predicate in push_down_predicates.into_iter() { + if is_falsy(&predicate) { + left_push_down = vec![false_constant()]; + right_push_down = vec![false_constant()]; + break; + } + let pred = JoinPredicate::new(&predicate, &left_prop, &right_prop); + match pred { + JoinPredicate::ALL(_) => { + all_push_down.push(predicate); + } + JoinPredicate::Left(_) => { + left_push_down.push(predicate); + } + JoinPredicate::Right(_) => { + right_push_down.push(predicate); + } + JoinPredicate::Both { left, right, .. } => { + join.left_conditions.push(left.clone()); + join.right_conditions.push(right.clone()); + } + _ => original_predicates.push(predicate), + } + } + join.non_equi_conditions.extend(non_equi_predicates); + if !all_push_down.is_empty() { + left_push_down.extend(all_push_down.to_vec()); + right_push_down.extend(all_push_down); + } + + let mut left_child = join_expr.child(0)?.clone(); + let mut right_child = join_expr.child(1)?.clone(); + + if !left_push_down.is_empty() { + left_child = SExpr::create_unary( + Arc::new( + Filter { + predicates: left_push_down, + } + .into(), + ), + Arc::new(left_child), + ); + } + + if !right_push_down.is_empty() { + right_child = SExpr::create_unary( + Arc::new( + Filter { + predicates: right_push_down, + } + .into(), + ), + Arc::new(right_child), + ); + } + + let mut result = SExpr::create_binary( + Arc::new(join.into()), + Arc::new(left_child), + Arc::new(right_child), + ); if !original_predicates.is_empty() { result = SExpr::create_unary( @@ -213,5 +302,6 @@ pub fn try_push_down_filter_join( Arc::new(result), ); } - Ok((need_push, result)) + + Ok((true, result)) } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_project_set.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_project_set.rs index 8c492d73df96..d826d529a250 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_project_set.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_project_set.rs @@ -16,16 +16,15 @@ use std::sync::Arc; use databend_common_exception::Result; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::TransformResult; use crate::optimizer::RelExpr; use crate::optimizer::RuleID; use crate::optimizer::SExpr; use crate::plans::Filter; -use crate::plans::PatternPlan; use crate::plans::ProjectSet; use crate::plans::RelOp; -use crate::plans::RelOp::Pattern; /// Input: Filter /// \ @@ -50,32 +49,20 @@ use crate::plans::RelOp::Pattern; /// * pub struct RulePushDownFilterProjectSet { id: RuleID, - patterns: Vec, + matchers: Vec, } impl RulePushDownFilterProjectSet { pub fn new() -> Self { Self { id: RuleID::PushDownFilterProjectSet, - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Filter, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::ProjectSet, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { plan_type: Pattern }.into(), - ))), - )), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Filter, + children: vec![Matcher::MatchOp { + op_type: RelOp::ProjectSet, + children: vec![Matcher::Leaf], + }], + }], } } } @@ -134,7 +121,7 @@ impl Rule for RulePushDownFilterProjectSet { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_scan.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_scan.rs index 88c5e540ede4..9ca0e1d6505d 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_scan.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_scan.rs @@ -17,6 +17,7 @@ use std::sync::Arc; use databend_common_exception::Result; use crate::binder::ColumnBindingBuilder; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::TransformResult; use crate::optimizer::RuleID; @@ -29,7 +30,6 @@ use crate::plans::FunctionCall; use crate::plans::LagLeadFunction; use crate::plans::LambdaFunc; use crate::plans::NthValueFunction; -use crate::plans::PatternPlan; use crate::plans::RelOp; use crate::plans::Scan; use crate::plans::UDFServerCall; @@ -43,7 +43,7 @@ use crate::TableEntry; pub struct RulePushDownFilterScan { id: RuleID, - patterns: Vec, + matchers: Vec, metadata: MetadataRef, } @@ -54,20 +54,13 @@ impl RulePushDownFilterScan { // Filter // \ // LogicalGet - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Filter, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Scan, - } - .into(), - ))), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Filter, + children: vec![Matcher::MatchOp { + op_type: RelOp::Scan, + children: vec![], + }], + }], metadata, } } @@ -409,7 +402,7 @@ impl Rule for RulePushDownFilterScan { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_sort.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_sort.rs index b1e03e266b38..19bcc2b34fba 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_sort.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_sort.rs @@ -16,14 +16,13 @@ use std::sync::Arc; use databend_common_exception::Result; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::TransformResult; use crate::optimizer::RuleID; use crate::optimizer::SExpr; use crate::plans::Filter; -use crate::plans::PatternPlan; use crate::plans::RelOp; -use crate::plans::RelOp::Pattern; use crate::plans::Sort; /// Input: Filter @@ -39,32 +38,20 @@ use crate::plans::Sort; /// * pub struct RulePushDownFilterSort { id: RuleID, - patterns: Vec, + matchers: Vec, } impl RulePushDownFilterSort { pub fn new() -> Self { Self { id: RuleID::PushDownFilterSort, - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Filter, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Sort, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { plan_type: Pattern }.into(), - ))), - )), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Filter, + children: vec![Matcher::MatchOp { + op_type: RelOp::Sort, + children: vec![Matcher::Leaf], + }], + }], } } } @@ -91,7 +78,7 @@ impl Rule for RulePushDownFilterSort { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_union.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_union.rs index afccc3e11ad8..ae9f77c8b0fa 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_union.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_union.rs @@ -18,13 +18,13 @@ use ahash::HashMap; use databend_common_exception::Result; use crate::binder::ColumnBindingBuilder; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::TransformResult; use crate::optimizer::RuleID; use crate::optimizer::SExpr; use crate::plans::BoundColumnRef; use crate::plans::Filter; -use crate::plans::PatternPlan; use crate::plans::RelOp; use crate::plans::ScalarExpr; use crate::plans::UnionAll; @@ -40,7 +40,7 @@ use crate::Visibility; // So it'll be efficient to push down `filter` to `union`, reduce the size of data to pull from table. pub struct RulePushDownFilterUnion { id: RuleID, - patterns: Vec, + matchers: Vec, } impl RulePushDownFilterUnion { @@ -52,34 +52,13 @@ impl RulePushDownFilterUnion { // UnionAll // / \ // ... ... - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Filter, - } - .into(), - ), - Arc::new(SExpr::create_binary( - Arc::new( - PatternPlan { - plan_type: RelOp::UnionAll, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - )), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Filter, + children: vec![Matcher::MatchOp { + op_type: RelOp::UnionAll, + children: vec![Matcher::Leaf, Matcher::Leaf], + }], + }], } } } @@ -124,8 +103,8 @@ impl Rule for RulePushDownFilterUnion { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_window.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_window.rs index 6c75e2134f09..0e8fceb77474 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_window.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_window.rs @@ -14,14 +14,13 @@ use std::sync::Arc; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::TransformResult; use crate::optimizer::RuleID; use crate::optimizer::SExpr; use crate::plans::Filter; -use crate::plans::PatternPlan; use crate::plans::RelOp; -use crate::plans::RelOp::Pattern; use crate::plans::Window; /// Input: Filter @@ -49,32 +48,20 @@ use crate::plans::Window; /// note that only push down filter used in `Window.partition_by` columns pub struct RulePushDownFilterWindow { id: RuleID, - patterns: Vec, + matchers: Vec, } impl RulePushDownFilterWindow { pub fn new() -> Self { Self { id: RuleID::PushDownFilterWindow, - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Filter, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Window, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { plan_type: Pattern }.into(), - ))), - )), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Filter, + children: vec![Matcher::MatchOp { + op_type: RelOp::Window, + children: vec![Matcher::Leaf], + }], + }], } } } @@ -140,7 +127,7 @@ impl Rule for RulePushDownFilterWindow { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_aggregate.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_aggregate.rs index 7cb786c6378b..d73fc600f3de 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_aggregate.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_aggregate.rs @@ -15,16 +15,14 @@ use std::cmp; use std::sync::Arc; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::TransformResult; use crate::optimizer::RuleID; use crate::optimizer::SExpr; use crate::plans::Aggregate; use crate::plans::Limit; -use crate::plans::PatternPlan; use crate::plans::RelOp; -use crate::plans::RelOp::Aggregate as OpAggregate; -use crate::plans::RelOp::Pattern; use crate::plans::RelOperator; /// Input: Limit @@ -40,32 +38,20 @@ use crate::plans::RelOperator; /// * pub struct RulePushDownLimitAggregate { id: RuleID, - patterns: Vec, + matchers: Vec, } impl RulePushDownLimitAggregate { pub fn new() -> Self { Self { id: RuleID::PushDownLimitAggregate, - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Limit, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: OpAggregate, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { plan_type: Pattern }.into(), - ))), - )), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Limit, + children: vec![Matcher::MatchOp { + op_type: RelOp::Aggregate, + children: vec![Matcher::Leaf], + }], + }], } } } @@ -99,7 +85,7 @@ impl Rule for RulePushDownLimitAggregate { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_expression.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_expression.rs index 9b53d386bd1a..c86354e5ace0 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_expression.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_expression.rs @@ -14,15 +14,14 @@ use std::sync::Arc; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::TransformResult; use crate::optimizer::RuleID; use crate::optimizer::SExpr; use crate::plans::EvalScalar; use crate::plans::Limit; -use crate::plans::PatternPlan; use crate::plans::RelOp; -use crate::plans::RelOp::Pattern; use crate::plans::RelOperator; /// Input: Limit @@ -36,39 +35,27 @@ use crate::plans::RelOperator; /// limit /// \ /// * -pub struct RulePushDownLimitExpression { +pub struct RulePushDownLimitEvalScalar { id: RuleID, - patterns: Vec, + matchers: Vec, } -impl RulePushDownLimitExpression { +impl RulePushDownLimitEvalScalar { pub fn new() -> Self { Self { - id: RuleID::PushDownLimitExpression, - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Limit, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { plan_type: Pattern }.into(), - ))), - )), - )], + id: RuleID::PushDownLimitEvalScalar, + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Limit, + children: vec![Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::Leaf], + }], + }], } } } -impl Rule for RulePushDownLimitExpression { +impl Rule for RulePushDownLimitEvalScalar { fn id(&self) -> RuleID { self.id } @@ -96,7 +83,7 @@ impl Rule for RulePushDownLimitExpression { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_join.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_join.rs index 104ff48c1b1f..b2d126d6a212 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_join.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_join.rs @@ -14,6 +14,7 @@ use std::sync::Arc; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::TransformResult; use crate::optimizer::RuleID; @@ -21,7 +22,6 @@ use crate::optimizer::SExpr; use crate::plans::Join; use crate::plans::JoinType; use crate::plans::Limit; -use crate::plans::PatternPlan; use crate::plans::RelOp; use crate::plans::RelOperator; @@ -41,41 +41,20 @@ use crate::plans::RelOperator; /// * pub struct RulePushDownLimitOuterJoin { id: RuleID, - patterns: Vec, + matchers: Vec, } impl RulePushDownLimitOuterJoin { pub fn new() -> Self { Self { id: RuleID::PushDownLimitOuterJoin, - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Limit, - } - .into(), - ), - Arc::new(SExpr::create_binary( - Arc::new( - PatternPlan { - plan_type: RelOp::Join, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - )), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Limit, + children: vec![Matcher::MatchOp { + op_type: RelOp::Join, + children: vec![Matcher::Leaf, Matcher::Leaf], + }], + }], } } } @@ -139,7 +118,7 @@ impl Rule for RulePushDownLimitOuterJoin { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_scan.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_scan.rs index 5b6705bcdd74..693558918fa4 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_scan.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_scan.rs @@ -17,12 +17,12 @@ use std::sync::Arc; use databend_common_exception::Result; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::TransformResult; use crate::optimizer::RuleID; use crate::optimizer::SExpr; use crate::plans::Limit; -use crate::plans::PatternPlan; use crate::plans::RelOp; use crate::plans::RelOperator; use crate::plans::Scan; @@ -35,30 +35,22 @@ use crate::plans::Scan; /// Limit /// \ /// LogicalGet(padding limit) - pub struct RulePushDownLimitScan { id: RuleID, - patterns: Vec, + matchers: Vec, } impl RulePushDownLimitScan { pub fn new() -> Self { Self { id: RuleID::PushDownLimitScan, - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Limit, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Scan, - } - .into(), - ))), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Limit, + children: vec![Matcher::MatchOp { + op_type: RelOp::Scan, + children: vec![], + }], + }], } } } @@ -84,7 +76,7 @@ impl Rule for RulePushDownLimitScan { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_sort.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_sort.rs index 03011eb1c053..7a06c87c7ca7 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_sort.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_sort.rs @@ -15,17 +15,15 @@ use std::cmp; use std::sync::Arc; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::TransformResult; use crate::optimizer::RuleID; use crate::optimizer::SExpr; use crate::plans::Limit; -use crate::plans::PatternPlan; use crate::plans::RelOp; -use crate::plans::RelOp::Pattern; -use crate::plans::RelOp::Sort; use crate::plans::RelOperator; -use crate::plans::Sort as logsort; +use crate::plans::Sort; /// Input: Limit /// \ @@ -40,27 +38,20 @@ use crate::plans::Sort as logsort; /// * pub struct RulePushDownLimitSort { id: RuleID, - patterns: Vec, + matchers: Vec, } impl RulePushDownLimitSort { pub fn new() -> Self { Self { id: RuleID::PushDownLimitSort, - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Limit, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new(PatternPlan { plan_type: Sort }.into()), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { plan_type: Pattern }.into(), - ))), - )), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Limit, + children: vec![Matcher::MatchOp { + op_type: RelOp::Sort, + children: vec![Matcher::Leaf], + }], + }], } } } @@ -79,7 +70,7 @@ impl Rule for RulePushDownLimitSort { if let Some(mut count) = limit.limit { count += limit.offset; let sort = s_expr.child(0)?; - let mut sort_limit: logsort = sort.plan().clone().try_into()?; + let mut sort_limit: Sort = sort.plan().clone().try_into()?; sort_limit.limit = Some(sort_limit.limit.map_or(count, |c| cmp::max(c, count))); let sort = SExpr::create_unary( Arc::new(RelOperator::Sort(sort_limit)), @@ -93,7 +84,7 @@ impl Rule for RulePushDownLimitSort { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_union.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_union.rs index d4a161c31bee..5ee5e801843e 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_union.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_union.rs @@ -17,18 +17,18 @@ use std::sync::Arc; use databend_common_exception::Result; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::TransformResult; use crate::optimizer::RuleID; use crate::optimizer::SExpr; use crate::plans::Limit; -use crate::plans::PatternPlan; use crate::plans::RelOp; use crate::plans::UnionAll; pub struct RulePushDownLimitUnion { id: RuleID, - patterns: Vec, + matchers: Vec, } impl RulePushDownLimitUnion { @@ -40,34 +40,13 @@ impl RulePushDownLimitUnion { // UnionAll // / \ // ... ... - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Limit, - } - .into(), - ), - Arc::new(SExpr::create_binary( - Arc::new( - PatternPlan { - plan_type: RelOp::UnionAll, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - )), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Limit, + children: vec![Matcher::MatchOp { + op_type: RelOp::UnionAll, + children: vec![Matcher::Leaf, Matcher::Leaf], + }], + }], } } } @@ -121,7 +100,7 @@ impl Rule for RulePushDownLimitUnion { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_window.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_window.rs new file mode 100644 index 000000000000..3a65e684d0f6 --- /dev/null +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_limit_window.rs @@ -0,0 +1,125 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::cmp; +use std::sync::Arc; + +use databend_common_exception::Result; + +use crate::optimizer::extract::Matcher; +use crate::optimizer::rule::Rule; +use crate::optimizer::rule::TransformResult; +use crate::optimizer::RuleID; +use crate::optimizer::SExpr; +use crate::plans::Limit; +use crate::plans::RelOp; +use crate::plans::RelOperator; +use crate::plans::Window as LogicalWindow; +use crate::plans::WindowFuncFrame; +use crate::plans::WindowFuncFrameBound; +use crate::plans::WindowFuncFrameUnits; +use crate::plans::WindowFuncType; + +/// Input: Limit +/// \ +/// Window +/// \ +/// * +/// +/// Output: Limit +/// \ +/// Window(padding limit) +/// \ +/// * +pub struct RulePushDownLimitWindow { + id: RuleID, + matchers: Vec, +} + +impl RulePushDownLimitWindow { + pub fn new() -> Self { + Self { + id: RuleID::PushDownLimitSort, + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Limit, + children: vec![Matcher::MatchOp { + op_type: RelOp::Window, + children: vec![Matcher::Leaf], + }], + }], + } + } +} + +impl Rule for RulePushDownLimitWindow { + fn id(&self) -> RuleID { + self.id + } + + fn apply(&self, s_expr: &SExpr, state: &mut TransformResult) -> Result<()> { + let limit: Limit = s_expr.plan().clone().try_into()?; + if let Some(mut count) = limit.limit { + count += limit.offset; + let window = s_expr.child(0)?; + let mut window_limit: LogicalWindow = window.plan().clone().try_into()?; + if should_apply(window.child(0)?, &window_limit)? { + window_limit.limit = Some(window_limit.limit.map_or(count, |c| cmp::max(c, count))); + let sort = SExpr::create_unary( + Arc::new(RelOperator::Window(window_limit)), + Arc::new(window.child(0)?.clone()), + ); + + let mut result = s_expr.replace_children(vec![Arc::new(sort)]); + result.set_applied_rule(&self.id); + state.add_result(result); + } + } + Ok(()) + } + + fn matchers(&self) -> &[Matcher] { + &self.matchers + } +} + +fn should_apply(child: &SExpr, window: &LogicalWindow) -> Result { + let child_window_exists = child_has_window(child)?; + // ranking functions are frame insensitive + if is_ranking_function(&window.function) { + Ok(!child_window_exists) + } else { + Ok(is_valid_frame(&window.frame) && !child_window_exists) + } +} + +fn is_ranking_function(func: &WindowFuncType) -> bool { + matches!( + func, + WindowFuncType::RowNumber | WindowFuncType::Rank | WindowFuncType::DenseRank + ) +} + +fn is_valid_frame(frame: &WindowFuncFrame) -> bool { + matches!(frame.units, WindowFuncFrameUnits::Rows) + && matches!(frame.start_bound, WindowFuncFrameBound::Preceding(_)) + && matches!(frame.end_bound, WindowFuncFrameBound::CurrentRow) +} + +fn child_has_window(child: &SExpr) -> Result { + match child.plan() { + RelOperator::Window(_) => Ok(true), + RelOperator::Scan(_) => Ok(false), // finish recursion + _ => child_has_window(child.child(0)?), + } +} diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_prewhere.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_prewhere.rs index a6f80c599e21..00eb07d22445 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_prewhere.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_prewhere.rs @@ -18,12 +18,12 @@ use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::TableSchemaRef; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::ColumnSet; use crate::optimizer::RuleID; use crate::optimizer::SExpr; use crate::plans::Filter; -use crate::plans::PatternPlan; use crate::plans::Prewhere; use crate::plans::RelOp; use crate::plans::ScalarExpr; @@ -34,7 +34,7 @@ use crate::Visibility; pub struct RulePushDownPrewhere { id: RuleID, - patterns: Vec, + matchers: Vec, metadata: MetadataRef, } @@ -42,20 +42,13 @@ impl RulePushDownPrewhere { pub fn new(metadata: MetadataRef) -> Self { Self { id: RuleID::PushDownPrewhere, - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Filter, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Scan, - } - .into(), - ))), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Filter, + children: vec![Matcher::MatchOp { + op_type: RelOp::Scan, + children: vec![], + }], + }], metadata, } } @@ -164,10 +157,6 @@ impl Rule for RulePushDownPrewhere { self.id } - fn patterns(&self) -> &Vec { - &self.patterns - } - fn apply( &self, s_expr: &SExpr, @@ -178,4 +167,8 @@ impl Rule for RulePushDownPrewhere { state.add_result(result); Ok(()) } + + fn matchers(&self) -> &[Matcher] { + &self.matchers + } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_sort_scan.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_sort_scan.rs index 8348ce13979d..ecb340f56421 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_sort_scan.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_sort_scan.rs @@ -17,11 +17,11 @@ use std::sync::Arc; use databend_common_exception::Result; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::TransformResult; use crate::optimizer::RuleID; use crate::optimizer::SExpr; -use crate::plans::PatternPlan; use crate::plans::RelOp; use crate::plans::RelOperator; use crate::plans::Scan; @@ -35,30 +35,22 @@ use crate::plans::Sort; /// Sort /// \ /// LogicalGet(padding order_by and limit) - pub struct RulePushDownSortScan { id: RuleID, - patterns: Vec, + matchers: Vec, } impl RulePushDownSortScan { pub fn new() -> Self { Self { id: RuleID::PushDownSortScan, - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Sort, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Scan, - } - .into(), - ))), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Sort, + children: vec![Matcher::MatchOp { + op_type: RelOp::Scan, + children: vec![], + }], + }], } } } @@ -86,7 +78,7 @@ impl Rule for RulePushDownSortScan { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_semi_to_inner_join.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_semi_to_inner_join.rs index 3f413823e05f..0c00281a1223 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_semi_to_inner_join.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_semi_to_inner_join.rs @@ -17,50 +17,34 @@ use std::sync::Arc; use databend_common_exception::Result; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::TransformResult; use crate::optimizer::RuleID; use crate::optimizer::SExpr; use crate::plans::Join; use crate::plans::JoinType; -use crate::plans::PatternPlan; use crate::plans::RelOp; use crate::plans::RelOperator; use crate::IndexType; -use crate::MetadataRef; use crate::ScalarExpr; pub struct RuleSemiToInnerJoin { id: RuleID, - patterns: Vec, - _metadata: MetadataRef, + matchers: Vec, } impl RuleSemiToInnerJoin { - pub fn new(_metadata: MetadataRef) -> Self { + pub fn new() -> Self { Self { id: RuleID::SemiToInnerJoin, - patterns: vec![SExpr::create_binary( - Arc::new( - PatternPlan { - plan_type: RelOp::Join, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - )], - _metadata, + // Join + // | \ + // * * + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Join, + children: vec![Matcher::Leaf, Matcher::Leaf], + }], } } } @@ -116,8 +100,8 @@ impl Rule for RuleSemiToInnerJoin { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } @@ -145,8 +129,7 @@ fn find_group_by_keys(child: &SExpr, group_by_keys: &mut HashSet) -> | RelOperator::Udf(_) | RelOperator::Scan(_) | RelOperator::CteScan(_) - | RelOperator::Join(_) - | RelOperator::Pattern(_) => {} + | RelOperator::Join(_) => {} } Ok(()) } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_split_aggregate.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_split_aggregate.rs index d29dfb7ad7f3..61b12b66aed9 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_split_aggregate.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_split_aggregate.rs @@ -16,19 +16,19 @@ use std::sync::Arc; use databend_common_exception::Result; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::RuleID; use crate::optimizer::rule::TransformResult; use crate::optimizer::SExpr; use crate::plans::Aggregate; use crate::plans::AggregateMode; -use crate::plans::PatternPlan; use crate::plans::RelOp; // Split `Aggregate` into `FinalAggregate` and `PartialAggregate` pub struct RuleSplitAggregate { id: RuleID, - patterns: Vec, + matchers: Vec, } impl RuleSplitAggregate { @@ -38,20 +38,10 @@ impl RuleSplitAggregate { // Aggregate // \ // * - patterns: vec![SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Aggregate, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ))), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Aggregate, + children: vec![Matcher::Leaf], + }], } } } @@ -81,7 +71,7 @@ impl Rule for RuleSplitAggregate { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } } diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_try_apply_agg_index.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_try_apply_agg_index.rs index 0a3040eaccb0..f0cb62841e10 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_try_apply_agg_index.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_try_apply_agg_index.rs @@ -12,15 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; - use databend_common_exception::Result; use super::agg_index; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::RuleID; use crate::optimizer::SExpr; -use crate::plans::PatternPlan; use crate::plans::RelOp; use crate::plans::RelOperator; use crate::IndexType; @@ -30,39 +28,27 @@ pub struct RuleTryApplyAggIndex { id: RuleID, metadata: MetadataRef, - patterns: Vec, + matchers: Vec, } impl RuleTryApplyAggIndex { - fn sorted_patterns() -> Vec { + fn sorted_matchers() -> Vec { vec![ // Expression // | // Sort // | // Scan - SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Sort, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Scan, - } - .into(), - ))), - )), - ), + Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::MatchOp { + op_type: RelOp::Sort, + children: vec![Matcher::MatchOp { + op_type: RelOp::Scan, + children: vec![], + }], + }], + }, // Expression // | // Sort @@ -70,36 +56,19 @@ impl RuleTryApplyAggIndex { // Filter // | // Scan - SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Sort, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Filter, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Scan, - } - .into(), - ))), - )), - )), - ), + Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::MatchOp { + op_type: RelOp::Sort, + children: vec![Matcher::MatchOp { + op_type: RelOp::Filter, + children: vec![Matcher::MatchOp { + op_type: RelOp::Scan, + children: vec![], + }], + }], + }], + }, // Expression // | // Sort @@ -109,52 +78,25 @@ impl RuleTryApplyAggIndex { // Expression // | // Scan - SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Sort, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Aggregate, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Aggregate, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Scan, - } - .into(), - ))), - )), - )), - )), - )), - ), + Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::MatchOp { + op_type: RelOp::Sort, + children: vec![Matcher::MatchOp { + op_type: RelOp::Aggregate, + children: vec![Matcher::MatchOp { + op_type: RelOp::Aggregate, + children: vec![Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::MatchOp { + op_type: RelOp::Scan, + children: vec![], + }], + }], + }], + }], + }], + }, // Expression // | // Sort @@ -166,109 +108,58 @@ impl RuleTryApplyAggIndex { // Filter // | // Scan - SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Sort, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Aggregate, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Aggregate, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Filter, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Scan, - } - .into(), - ))), - )), - )), - )), - )), - )), - ), + Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::MatchOp { + op_type: RelOp::Sort, + children: vec![Matcher::MatchOp { + op_type: RelOp::Aggregate, + children: vec![Matcher::MatchOp { + op_type: RelOp::Aggregate, + children: vec![Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::MatchOp { + op_type: RelOp::Filter, + children: vec![Matcher::MatchOp { + op_type: RelOp::Scan, + children: vec![], + }], + }], + }], + }], + }], + }], + }, ] } - fn normal_patterns() -> Vec { + fn normal_matchers() -> Vec { vec![ // Expression // | // Scan - SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Scan, - } - .into(), - ))), - ), + Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::MatchOp { + op_type: RelOp::Scan, + children: vec![], + }], + }, // Expression // | // Filter // | // Scan - SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Filter, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Scan, - } - .into(), - ))), - )), - ), + Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::MatchOp { + op_type: RelOp::Filter, + children: vec![Matcher::MatchOp { + op_type: RelOp::Scan, + children: vec![], + }], + }], + }, // Expression // | // Aggregation @@ -276,44 +167,22 @@ impl RuleTryApplyAggIndex { // Expression // | // Scan - SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Aggregate, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Aggregate, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Scan, - } - .into(), - ))), - )), - )), - )), - ), + Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::MatchOp { + op_type: RelOp::Aggregate, + children: vec![Matcher::MatchOp { + op_type: RelOp::Aggregate, + children: vec![Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::MatchOp { + op_type: RelOp::Scan, + children: vec![], + }], + }], + }], + }], + }, // Expression // | // Aggregation @@ -323,65 +192,39 @@ impl RuleTryApplyAggIndex { // Filter // | // Scan - SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Aggregate, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Aggregate, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Filter, - } - .into(), - ), - Arc::new(SExpr::create_leaf(Arc::new( - PatternPlan { - plan_type: RelOp::Scan, - } - .into(), - ))), - )), - )), - )), - )), - ), + Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::MatchOp { + op_type: RelOp::Aggregate, + children: vec![Matcher::MatchOp { + op_type: RelOp::Aggregate, + children: vec![Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::MatchOp { + op_type: RelOp::Filter, + children: vec![Matcher::MatchOp { + op_type: RelOp::Scan, + children: vec![], + }], + }], + }], + }], + }], + }, ] } - fn patterns() -> Vec { - let mut patterns = Self::normal_patterns(); - patterns.extend(Self::sorted_patterns()); + fn matchers() -> Vec { + let mut patterns = Self::normal_matchers(); + patterns.extend(Self::sorted_matchers()); patterns } + pub fn new(metadata: MetadataRef) -> Self { Self { id: RuleID::TryApplyAggIndex, metadata, - patterns: Self::patterns(), + matchers: Self::matchers(), } } } @@ -391,10 +234,6 @@ impl Rule for RuleTryApplyAggIndex { self.id } - fn patterns(&self) -> &Vec { - &self.patterns - } - fn apply( &self, s_expr: &SExpr, @@ -424,6 +263,10 @@ impl Rule for RuleTryApplyAggIndex { Ok(()) } + + fn matchers(&self) -> &[Matcher] { + &self.matchers + } } impl RuleTryApplyAggIndex { diff --git a/src/query/sql/src/planner/optimizer/rule/rule.rs b/src/query/sql/src/planner/optimizer/rule/rule.rs index a96a1d212164..30d98f42cd34 100644 --- a/src/query/sql/src/planner/optimizer/rule/rule.rs +++ b/src/query/sql/src/planner/optimizer/rule/rule.rs @@ -20,25 +20,25 @@ use databend_common_exception::Result; use num_derive::FromPrimitive; use num_derive::ToPrimitive; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::TransformResult; use crate::optimizer::SExpr; pub static DEFAULT_REWRITE_RULES: LazyLock> = LazyLock::new(|| { vec![ - RuleID::NormalizeDisjunctiveFilter, RuleID::NormalizeScalarFilter, RuleID::NormalizeAggregate, RuleID::EliminateFilter, RuleID::EliminateSort, RuleID::MergeFilter, - RuleID::InferFilter, RuleID::MergeEvalScalar, RuleID::PushDownFilterUnion, RuleID::PushDownFilterAggregate, RuleID::PushDownFilterWindow, RuleID::PushDownLimitUnion, - RuleID::PushDownLimitExpression, + RuleID::PushDownLimitEvalScalar, RuleID::PushDownLimitSort, + RuleID::PushDownLimitWindow, RuleID::PushDownLimitAggregate, RuleID::PushDownLimitOuterJoin, RuleID::PushDownLimitScan, @@ -63,7 +63,7 @@ pub trait Rule { fn apply(&self, s_expr: &SExpr, state: &mut TransformResult) -> Result<()>; - fn patterns(&self) -> &Vec; + fn matchers(&self) -> &[Matcher]; fn transformation(&self) -> bool { true @@ -77,8 +77,6 @@ pub enum RuleID { // Rewrite rules NormalizeAggregate, NormalizeScalarFilter, - NormalizeDisjunctiveFilter, - InferFilter, PushDownFilterAggregate, PushDownFilterEvalScalar, PushDownFilterUnion, @@ -89,8 +87,9 @@ pub enum RuleID { PushDownFilterWindow, PushDownLimitUnion, PushDownLimitOuterJoin, - PushDownLimitExpression, + PushDownLimitEvalScalar, PushDownLimitSort, + PushDownLimitWindow, PushDownLimitAggregate, PushDownLimitScan, PushDownSortScan, @@ -123,12 +122,13 @@ impl Display for RuleID { RuleID::PushDownFilterProjectSet => write!(f, "PushDownFilterProjectSet"), RuleID::PushDownLimitUnion => write!(f, "PushDownLimitUnion"), RuleID::PushDownLimitOuterJoin => write!(f, "PushDownLimitOuterJoin"), - RuleID::PushDownLimitExpression => write!(f, "PushDownLimitExpression"), + RuleID::PushDownLimitEvalScalar => write!(f, "PushDownLimitEvalScalar"), RuleID::PushDownLimitSort => write!(f, "PushDownLimitSort"), RuleID::PushDownLimitAggregate => write!(f, "PushDownLimitAggregate"), RuleID::PushDownFilterAggregate => write!(f, "PushDownFilterAggregate"), RuleID::PushDownLimitScan => write!(f, "PushDownLimitScan"), RuleID::PushDownSortScan => write!(f, "PushDownSortScan"), + RuleID::PushDownLimitWindow => write!(f, "PushDownLimitWindow"), RuleID::PushDownFilterWindow => write!(f, "PushDownFilterWindow"), RuleID::EliminateEvalScalar => write!(f, "EliminateEvalScalar"), RuleID::EliminateFilter => write!(f, "EliminateFilter"), @@ -138,8 +138,6 @@ impl Display for RuleID { RuleID::NormalizeScalarFilter => write!(f, "NormalizeScalarFilter"), RuleID::NormalizeAggregate => write!(f, "NormalizeAggregate"), RuleID::SplitAggregate => write!(f, "SplitAggregate"), - RuleID::NormalizeDisjunctiveFilter => write!(f, "NormalizeDisjunctiveFilter"), - RuleID::InferFilter => write!(f, "InferFilter"), RuleID::FoldCountAggregate => write!(f, "FoldCountAggregate"), RuleID::PushDownPrewhere => write!(f, "PushDownPrewhere"), diff --git a/src/query/sql/src/planner/optimizer/rule/rule_set.rs b/src/query/sql/src/planner/optimizer/rule/rule_set.rs index c19d615dbd27..2b153f2d4d4d 100644 --- a/src/query/sql/src/planner/optimizer/rule/rule_set.rs +++ b/src/query/sql/src/planner/optimizer/rule/rule_set.rs @@ -82,6 +82,10 @@ impl AppliedRules { pub fn get(&self, id: &RuleID) -> bool { self.rules.contains(id) } + + pub fn clear(&mut self) { + self.rules = RuleSet::create(); + } } impl Hash for AppliedRules { diff --git a/src/query/sql/src/planner/optimizer/rule/transform/rule_commute_join_base_table.rs b/src/query/sql/src/planner/optimizer/rule/transform/rule_commute_join_base_table.rs index 2cc32b9b9e63..3d32db9ab5bd 100644 --- a/src/query/sql/src/planner/optimizer/rule/transform/rule_commute_join_base_table.rs +++ b/src/query/sql/src/planner/optimizer/rule/transform/rule_commute_join_base_table.rs @@ -16,6 +16,7 @@ use std::sync::Arc; use databend_common_exception::Result; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::TransformResult; use crate::optimizer::RuleID; @@ -23,14 +24,13 @@ use crate::optimizer::SExpr; use crate::plans::Join; use crate::plans::JoinType; use crate::plans::Operator; -use crate::plans::PatternPlan; use crate::plans::RelOp; /// Rule to apply commutativity of join operator. /// In opposite to RuleCommuteJoin, this rule only applies to base tables. pub struct RuleCommuteJoinBaseTable { id: RuleID, - patterns: Vec, + matchers: Vec, } impl RuleCommuteJoinBaseTable { @@ -41,16 +41,10 @@ impl RuleCommuteJoinBaseTable { // LogicalJoin // | \ // * * - patterns: vec![SExpr::create_binary( - Arc::new( - PatternPlan { - plan_type: RelOp::Join, - } - .into(), - ), - Arc::new(SExpr::create_pattern_leaf()), - Arc::new(SExpr::create_pattern_leaf()), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Join, + children: vec![Matcher::Leaf, Matcher::Leaf], + }], } } } @@ -104,10 +98,9 @@ impl Rule for RuleCommuteJoinBaseTable { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } - fn transformation(&self) -> bool { false } diff --git a/src/query/sql/src/planner/optimizer/rule/transform/rule_eager_aggregation.rs b/src/query/sql/src/planner/optimizer/rule/transform/rule_eager_aggregation.rs index 67b123bb20a7..6fa83d22d1fd 100644 --- a/src/query/sql/src/planner/optimizer/rule/transform/rule_eager_aggregation.rs +++ b/src/query/sql/src/planner/optimizer/rule/transform/rule_eager_aggregation.rs @@ -22,6 +22,7 @@ use databend_common_functions::aggregates::AggregateFunctionFactory; use crate::binder::wrap_cast; use crate::binder::ColumnBindingBuilder; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::TransformResult; use crate::optimizer::ColumnSet; @@ -36,7 +37,6 @@ use crate::plans::EvalScalar; use crate::plans::FunctionCall; use crate::plans::Join; use crate::plans::JoinType; -use crate::plans::PatternPlan; use crate::plans::RelOp; use crate::plans::RelOperator; use crate::plans::ScalarItem; @@ -128,7 +128,7 @@ use crate::Visibility; pub struct RuleEagerAggregation { id: RuleID, - patterns: Vec, + matchers: Vec, metadata: MetadataRef, } @@ -142,7 +142,7 @@ impl RuleEagerAggregation { // Expression // | // * - patterns: vec![ + matchers: vec![ // Expression // | // Aggregate(final) @@ -152,40 +152,19 @@ impl RuleEagerAggregation { // Join // / \ // * * - SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Aggregate, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Aggregate, - } - .into(), - ), - Arc::new(SExpr::create_binary( - Arc::new( - PatternPlan { - plan_type: RelOp::Join, - } - .into(), - ), - Arc::new(SExpr::create_pattern_leaf()), - Arc::new(SExpr::create_pattern_leaf()), - )), - )), - )), - ), + Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::MatchOp { + op_type: RelOp::Aggregate, + children: vec![Matcher::MatchOp { + op_type: RelOp::Aggregate, + children: vec![Matcher::MatchOp { + op_type: RelOp::Join, + children: vec![Matcher::Leaf, Matcher::Leaf], + }], + }], + }], + }, // Expression // | // Aggregate(final) @@ -197,48 +176,22 @@ impl RuleEagerAggregation { // Join // / \ // * * - SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Aggregate, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Aggregate, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_binary( - Arc::new( - PatternPlan { - plan_type: RelOp::Join, - } - .into(), - ), - Arc::new(SExpr::create_pattern_leaf()), - Arc::new(SExpr::create_pattern_leaf()), - )), - )), - )), - )), - ), + Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::MatchOp { + op_type: RelOp::Aggregate, + children: vec![Matcher::MatchOp { + op_type: RelOp::Aggregate, + children: vec![Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::MatchOp { + op_type: RelOp::Join, + children: vec![Matcher::Leaf, Matcher::Leaf], + }], + }], + }], + }], + }, // Expression // | // Sort @@ -250,48 +203,22 @@ impl RuleEagerAggregation { // Join // / \ // * * - SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Sort, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Aggregate, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Aggregate, - } - .into(), - ), - Arc::new(SExpr::create_binary( - Arc::new( - PatternPlan { - plan_type: RelOp::Join, - } - .into(), - ), - Arc::new(SExpr::create_pattern_leaf()), - Arc::new(SExpr::create_pattern_leaf()), - )), - )), - )), - )), - ), + Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::MatchOp { + op_type: RelOp::Sort, + children: vec![Matcher::MatchOp { + op_type: RelOp::Aggregate, + children: vec![Matcher::MatchOp { + op_type: RelOp::Aggregate, + children: vec![Matcher::MatchOp { + op_type: RelOp::Join, + children: vec![Matcher::Leaf, Matcher::Leaf], + }], + }], + }], + }], + }, // Expression // | // Sort @@ -305,56 +232,25 @@ impl RuleEagerAggregation { // Join // / \ // * * - SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Sort, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Aggregate, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::Aggregate, - } - .into(), - ), - Arc::new(SExpr::create_unary( - Arc::new( - PatternPlan { - plan_type: RelOp::EvalScalar, - } - .into(), - ), - Arc::new(SExpr::create_binary( - Arc::new( - PatternPlan { - plan_type: RelOp::Join, - } - .into(), - ), - Arc::new(SExpr::create_pattern_leaf()), - Arc::new(SExpr::create_pattern_leaf()), - )), - )), - )), - )), - )), - ), + Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::MatchOp { + op_type: RelOp::Sort, + children: vec![Matcher::MatchOp { + op_type: RelOp::Aggregate, + children: vec![Matcher::MatchOp { + op_type: RelOp::Aggregate, + children: vec![Matcher::MatchOp { + op_type: RelOp::EvalScalar, + children: vec![Matcher::MatchOp { + op_type: RelOp::Join, + children: vec![Matcher::Leaf, Matcher::Leaf], + }], + }], + }], + }], + }], + }, ], metadata, } @@ -372,8 +268,8 @@ impl Rule for RuleEagerAggregation { state: &mut TransformResult, ) -> databend_common_exception::Result<()> { let mut matched_idx = 0; - for (idx, pattern) in self.patterns.iter().enumerate() { - if s_expr.match_pattern(pattern) { + for (idx, matcher) in self.matchers.iter().enumerate() { + if matcher.matches(s_expr) { matched_idx = idx + 1; break; } @@ -1254,8 +1150,12 @@ impl Rule for RuleEagerAggregation { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers + } + + fn transformation(&self) -> bool { + false } } diff --git a/src/query/sql/src/planner/optimizer/rule/transform/rule_left_exchange_join.rs b/src/query/sql/src/planner/optimizer/rule/transform/rule_left_exchange_join.rs index 78069e63e084..8ae8e949c1b2 100644 --- a/src/query/sql/src/planner/optimizer/rule/transform/rule_left_exchange_join.rs +++ b/src/query/sql/src/planner/optimizer/rule/transform/rule_left_exchange_join.rs @@ -20,15 +20,14 @@ use databend_common_exception::Result; use super::util::get_join_predicates; use crate::binder::JoinPredicate; +use crate::optimizer::extract::Matcher; use crate::optimizer::rule::Rule; use crate::optimizer::rule::TransformResult; use crate::optimizer::RelExpr; use crate::optimizer::RuleID; use crate::optimizer::SExpr; -use crate::plans::ComparisonOp; use crate::plans::Join; use crate::plans::JoinType; -use crate::plans::PatternPlan; use crate::plans::RelOp; /// Rule to apply swap on a left-deep join. @@ -48,7 +47,7 @@ use crate::plans::RelOp; /// t1 t3 pub struct RuleLeftExchangeJoin { id: RuleID, - patterns: Vec, + matchers: Vec, } impl RuleLeftExchangeJoin { @@ -63,25 +62,16 @@ impl RuleLeftExchangeJoin { // | \ // | * // * - patterns: vec![SExpr::create_binary( - Arc::new( - PatternPlan { - plan_type: RelOp::Join, - } - .into(), - ), - Arc::new(SExpr::create_binary( - Arc::new( - PatternPlan { - plan_type: RelOp::Join, - } - .into(), - ), - Arc::new(SExpr::create_pattern_leaf()), - Arc::new(SExpr::create_pattern_leaf()), - )), - Arc::new(SExpr::create_pattern_leaf()), - )], + matchers: vec![Matcher::MatchOp { + op_type: RelOp::Join, + children: vec![ + Matcher::MatchOp { + op_type: RelOp::Join, + children: vec![Matcher::Leaf, Matcher::Leaf], + }, + Matcher::Leaf, + ], + }], } } } @@ -154,8 +144,12 @@ impl Rule for RuleLeftExchangeJoin { // TODO(leiysky): push down the predicate join_3.non_equi_conditions.push(pred.clone()); } - JoinPredicate::Both { left, right, op } => { - if op == ComparisonOp::Equal { + JoinPredicate::Both { + left, + right, + is_equal_op, + } => { + if is_equal_op { join_3.left_conditions.push(left.clone()); join_3.right_conditions.push(right.clone()); } else { @@ -183,8 +177,12 @@ impl Rule for RuleLeftExchangeJoin { // TODO(leiysky): push down the predicate join_4.non_equi_conditions.push(predicate.clone()); } - JoinPredicate::Both { left, right, op } => { - if op == ComparisonOp::Equal { + JoinPredicate::Both { + left, + right, + is_equal_op, + } => { + if is_equal_op { join_4.left_conditions.push(left.clone()); join_4.right_conditions.push(right.clone()); } else { @@ -229,8 +227,8 @@ impl Rule for RuleLeftExchangeJoin { Ok(()) } - fn patterns(&self) -> &Vec { - &self.patterns + fn matchers(&self) -> &[Matcher] { + &self.matchers } fn transformation(&self) -> bool { diff --git a/src/query/sql/src/planner/optimizer/s_expr.rs b/src/query/sql/src/planner/optimizer/s_expr.rs index 6239ec3e2036..4a3cc626a24c 100644 --- a/src/query/sql/src/planner/optimizer/s_expr.rs +++ b/src/query/sql/src/planner/optimizer/s_expr.rs @@ -25,9 +25,6 @@ use crate::optimizer::rule::AppliedRules; use crate::optimizer::rule::RuleID; use crate::optimizer::StatInfo; use crate::plans::Exchange; -use crate::plans::Operator; -use crate::plans::PatternPlan; -use crate::plans::RelOp; use crate::plans::RelOperator; use crate::plans::Scan; use crate::plans::SubqueryExpr; @@ -98,21 +95,6 @@ impl SExpr { Self::create(plan, vec![], None, None, None) } - pub fn create_pattern_leaf() -> Self { - Self::create( - Arc::new( - PatternPlan { - plan_type: RelOp::Pattern, - } - .into(), - ), - vec![], - None, - None, - None, - ) - } - pub fn plan(&self) -> &RelOperator { &self.plan } @@ -132,37 +114,10 @@ impl SExpr { self.children.len() } - pub fn is_pattern(&self) -> bool { - matches!(self.plan.rel_op(), RelOp::Pattern) - } - pub fn original_group(&self) -> Option { self.original_group } - pub fn match_pattern(&self, pattern: &SExpr) -> bool { - if pattern.plan.rel_op() != RelOp::Pattern { - // Pattern is plan - if self.plan.rel_op() != pattern.plan.rel_op() { - return false; - } - - if self.arity() != pattern.arity() { - // Check if current expression has same arity with current pattern - return false; - } - - for (e, p) in self.children.iter().zip(pattern.children.iter()) { - // Check children - if !e.match_pattern(p) { - return false; - } - } - }; - - true - } - /// Replace children with given new `children`. /// Note that this method will keep the `applied_rules` of /// current `SExpr` unchanged. @@ -362,7 +317,6 @@ impl SExpr { | RelOperator::DummyTableScan(_) | RelOperator::CteScan(_) | RelOperator::AddRowNumber(_) - | RelOperator::Pattern(_) | RelOperator::MaterializedCte(_) | RelOperator::ConstantTableScan(_) => {} }; @@ -416,6 +370,21 @@ impl SExpr { add_internal_column_index_into_child(expr, column_index, table_index) } + + // The method will clear the applied rules of current SExpr and its children. + pub fn clear_applied_rules(&mut self) { + self.applied_rules.clear(); + let children = self + .children() + .iter() + .map(|child| { + let mut child = (**child).clone(); + child.clear_applied_rules(); + Arc::new(child) + }) + .collect::>(); + self.children = children; + } } fn find_subquery(rel_op: &RelOperator) -> bool { @@ -428,7 +397,6 @@ fn find_subquery(rel_op: &RelOperator) -> bool { | RelOperator::DummyTableScan(_) | RelOperator::CteScan(_) | RelOperator::AddRowNumber(_) - | RelOperator::Pattern(_) | RelOperator::MaterializedCte(_) | RelOperator::ConstantTableScan(_) => false, RelOperator::Join(op) => { diff --git a/src/query/sql/src/planner/planner.rs b/src/query/sql/src/planner/planner.rs index 2878bf93b8ea..0debd05b6491 100644 --- a/src/query/sql/src/planner/planner.rs +++ b/src/query/sql/src/planner/planner.rs @@ -93,12 +93,6 @@ impl Planner { // Step 2: Parse the SQL. let (mut stmt, format) = parse_sql(&tokens, sql_dialect)?; - if matches!(stmt, Statement::CopyIntoLocation(_)) { - // Indicate binder there is no need to collect column statistics for the binding table. - self.ctx - .attach_query_str(QueryKind::CopyIntoTable, String::new()); - } - self.replace_stmt(&mut stmt, sql_dialect); // Step 3: Bind AST with catalog, and generate a pure logical SExpr @@ -110,7 +104,14 @@ impl Planner { name_resolution_ctx, metadata.clone(), ); + + // Indicate binder there is no need to collect column statistics for the binding table. + self.ctx + .attach_query_str(get_query_kind(&stmt), stmt.to_mask_sql()); let plan = binder.bind(&stmt).await?; + // attach again to avoid the query kind is overwritten by the subquery + self.ctx + .attach_query_str(get_query_kind(&stmt), stmt.to_mask_sql()); // Step 4: Optimize the SExpr with optimizers, and generate optimized physical SExpr let opt_ctx = OptimizerContext::new(self.ctx.clone(), metadata.clone()) @@ -192,3 +193,19 @@ impl Planner { self.add_max_rows_limit(stmt); } } + +pub fn get_query_kind(stmt: &Statement) -> QueryKind { + match stmt { + Statement::Query { .. } => QueryKind::Query, + Statement::CopyIntoTable(_) => QueryKind::CopyIntoTable, + Statement::CopyIntoLocation(_) => QueryKind::CopyIntoLocation, + Statement::Explain { .. } => QueryKind::Explain, + Statement::Insert(_) => QueryKind::Insert, + Statement::Replace(_) + | Statement::Delete(_) + | Statement::MergeInto(_) + | Statement::OptimizeTable(_) + | Statement::Update(_) => QueryKind::Update, + _ => QueryKind::Other, + } +} diff --git a/src/query/sql/src/planner/plans/aggregate.rs b/src/query/sql/src/planner/plans/aggregate.rs index 9e62b698ddaa..7e1c6e6197dc 100644 --- a/src/query/sql/src/planner/plans/aggregate.rs +++ b/src/query/sql/src/planner/plans/aggregate.rs @@ -268,12 +268,13 @@ impl Operator for Aggregate { f64::min(res, cardinality) }; - let precise_cardinality = - if self.group_items.is_empty() && self.mode == AggregateMode::Final { - Some(1) - } else { - None - }; + let precise_cardinality = if self.group_items.is_empty() + && matches!(self.mode, AggregateMode::Final | AggregateMode::Initial) + { + Some(1) + } else { + None + }; Ok(Arc::new(StatInfo { cardinality, statistics: Statistics { diff --git a/src/query/sql/src/planner/plans/ddl/task.rs b/src/query/sql/src/planner/plans/ddl/task.rs index d3c48b624561..a1fd9c14c556 100644 --- a/src/query/sql/src/planner/plans/ddl/task.rs +++ b/src/query/sql/src/planner/plans/ddl/task.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::BTreeMap; use std::sync::Arc; use databend_common_ast::ast::AlterTaskOptions; @@ -47,6 +48,7 @@ pub fn task_schema() -> DataSchemaRef { DataField::new("next_schedule_time", DataType::Timestamp.wrap_nullable()), DataField::new("last_committed_on", DataType::Timestamp), DataField::new("last_suspended_on", DataType::Timestamp.wrap_nullable()), + DataField::new("session_parameters", DataType::Variant.wrap_nullable()), ])) } @@ -69,6 +71,7 @@ pub fn task_run_schema() -> DataSchemaRef { DataField::new("completed_time", DataType::Timestamp.wrap_nullable()), DataField::new("scheduled_time", DataType::Timestamp), DataField::new("root_task_id", DataType::String), + DataField::new("session_parameters", DataType::Variant.wrap_nullable()), ])) } @@ -82,6 +85,7 @@ pub struct CreateTaskPlan { pub after: Vec, pub when_condition: Option, pub suspend_task_after_num_failures: Option, + pub session_parameters: BTreeMap, pub sql: String, pub comment: String, } diff --git a/src/query/sql/src/planner/plans/join.rs b/src/query/sql/src/planner/plans/join.rs index 8e0cb9213177..f047dc15e964 100644 --- a/src/query/sql/src/planner/plans/join.rs +++ b/src/query/sql/src/planner/plans/join.rs @@ -81,7 +81,14 @@ impl JoinType { } pub fn is_outer_join(&self) -> bool { - matches!(self, JoinType::Left | JoinType::Right | JoinType::Full) + matches!( + self, + JoinType::Left + | JoinType::Right + | JoinType::Full + | JoinType::LeftSingle + | JoinType::RightSingle + ) } pub fn is_mark_join(&self) -> bool { @@ -151,8 +158,10 @@ pub struct Join { // if we execute distributed merge into, we need to hold the // hash table to get not match data from source. pub need_hold_hash_table: bool, - // Under cluster, mark if the join is broadcast join. - pub broadcast: bool, + pub is_lateral: bool, + // When left/right single join converted to inner join, record the original join type + // and do some special processing during runtime. + pub single_to_inner: Option, } impl Default for Join { @@ -165,7 +174,8 @@ impl Default for Join { marker_index: Default::default(), from_correlated_subquery: Default::default(), need_hold_hash_table: false, - broadcast: false, + is_lateral: false, + single_to_inner: None, } } } @@ -526,7 +536,7 @@ impl Operator for Join { | JoinType::Full | JoinType::RightAnti | JoinType::RightSemi - | JoinType::RightMark + | JoinType::LeftMark ) { let left_stat_info = rel_expr.derive_cardinality_child(0)?; let right_stat_info = rel_expr.derive_cardinality_child(1)?; @@ -593,7 +603,7 @@ impl Operator for Join { | JoinType::Full | JoinType::RightAnti | JoinType::RightSemi - | JoinType::RightMark + | JoinType::LeftMark | JoinType::RightSingle ) { // (Any, Broadcast) diff --git a/src/query/sql/src/planner/plans/mod.rs b/src/query/sql/src/planner/plans/mod.rs index 04e5dff0da5f..2f0127173091 100644 --- a/src/query/sql/src/planner/plans/mod.rs +++ b/src/query/sql/src/planner/plans/mod.rs @@ -35,7 +35,6 @@ mod udf; mod copy_into_location; pub mod operator; -mod pattern; mod plan; mod presign; mod project_set; @@ -79,7 +78,6 @@ pub use merge_into::DELETE_NAME; pub use merge_into::INSERT_NAME; pub use merge_into::UPDATE_NAME; pub use operator::*; -pub use pattern::PatternPlan; pub use plan::*; pub use presign::*; pub use project_set::*; diff --git a/src/query/sql/src/planner/plans/operator.rs b/src/query/sql/src/planner/plans/operator.rs index 75328d6c594f..9a7cefae3e11 100644 --- a/src/query/sql/src/planner/plans/operator.rs +++ b/src/query/sql/src/planner/plans/operator.rs @@ -25,7 +25,6 @@ use super::eval_scalar::EvalScalar; use super::filter::Filter; use super::join::Join; use super::limit::Limit; -use super::pattern::PatternPlan; use super::scan::Scan; use super::sort::Sort; use super::union_all::UnionAll; @@ -49,11 +48,6 @@ pub trait Operator { /// Get arity of this operator fn arity(&self) -> usize; - /// Is this operator a pattern operator - fn is_pattern(&self) -> bool { - false - } - /// Derive relational property fn derive_relational_prop(&self, rel_expr: &RelExpr) -> Result>; @@ -128,7 +122,6 @@ pub enum RelOperator { MaterializedCte(MaterializedCte), ConstantTableScan(ConstantTableScan), Udf(Udf), - Pattern(PatternPlan), } impl Operator for RelOperator { @@ -141,7 +134,6 @@ impl Operator for RelOperator { RelOperator::Aggregate(rel_op) => rel_op.rel_op(), RelOperator::Sort(rel_op) => rel_op.rel_op(), RelOperator::Limit(rel_op) => rel_op.rel_op(), - RelOperator::Pattern(rel_op) => rel_op.rel_op(), RelOperator::Exchange(rel_op) => rel_op.rel_op(), RelOperator::UnionAll(rel_op) => rel_op.rel_op(), RelOperator::DummyTableScan(rel_op) => rel_op.rel_op(), @@ -174,7 +166,6 @@ impl Operator for RelOperator { RelOperator::MaterializedCte(rel_op) => rel_op.arity(), RelOperator::ConstantTableScan(rel_op) => rel_op.arity(), RelOperator::Udf(rel_op) => rel_op.arity(), - RelOperator::Pattern(rel_op) => rel_op.arity(), } } @@ -187,7 +178,6 @@ impl Operator for RelOperator { RelOperator::Aggregate(rel_op) => rel_op.derive_relational_prop(rel_expr), RelOperator::Sort(rel_op) => rel_op.derive_relational_prop(rel_expr), RelOperator::Limit(rel_op) => rel_op.derive_relational_prop(rel_expr), - RelOperator::Pattern(rel_op) => rel_op.derive_relational_prop(rel_expr), RelOperator::Exchange(rel_op) => rel_op.derive_relational_prop(rel_expr), RelOperator::UnionAll(rel_op) => rel_op.derive_relational_prop(rel_expr), RelOperator::DummyTableScan(rel_op) => rel_op.derive_relational_prop(rel_expr), @@ -210,7 +200,6 @@ impl Operator for RelOperator { RelOperator::Aggregate(rel_op) => rel_op.derive_physical_prop(rel_expr), RelOperator::Sort(rel_op) => rel_op.derive_physical_prop(rel_expr), RelOperator::Limit(rel_op) => rel_op.derive_physical_prop(rel_expr), - RelOperator::Pattern(rel_op) => rel_op.derive_physical_prop(rel_expr), RelOperator::Exchange(rel_op) => rel_op.derive_physical_prop(rel_expr), RelOperator::UnionAll(rel_op) => rel_op.derive_physical_prop(rel_expr), RelOperator::DummyTableScan(rel_op) => rel_op.derive_physical_prop(rel_expr), @@ -233,7 +222,6 @@ impl Operator for RelOperator { RelOperator::Aggregate(rel_op) => rel_op.derive_stats(rel_expr), RelOperator::Sort(rel_op) => rel_op.derive_stats(rel_expr), RelOperator::Limit(rel_op) => rel_op.derive_stats(rel_expr), - RelOperator::Pattern(rel_op) => rel_op.derive_stats(rel_expr), RelOperator::Exchange(rel_op) => rel_op.derive_stats(rel_expr), RelOperator::UnionAll(rel_op) => rel_op.derive_stats(rel_expr), RelOperator::DummyTableScan(rel_op) => rel_op.derive_stats(rel_expr), @@ -276,9 +264,6 @@ impl Operator for RelOperator { RelOperator::Limit(rel_op) => { rel_op.compute_required_prop_child(ctx, rel_expr, child_index, required) } - RelOperator::Pattern(rel_op) => { - rel_op.compute_required_prop_child(ctx, rel_expr, child_index, required) - } RelOperator::Exchange(rel_op) => { rel_op.compute_required_prop_child(ctx, rel_expr, child_index, required) } @@ -370,9 +355,6 @@ impl Operator for RelOperator { RelOperator::Udf(rel_op) => { rel_op.compute_required_prop_children(ctx, rel_expr, required) } - RelOperator::Pattern(rel_op) => { - rel_op.compute_required_prop_children(ctx, rel_expr, required) - } } } } @@ -562,25 +544,6 @@ impl TryFrom for Limit { } } -impl From for RelOperator { - fn from(v: PatternPlan) -> Self { - Self::Pattern(v) - } -} - -impl TryFrom for PatternPlan { - type Error = ErrorCode; - fn try_from(value: RelOperator) -> Result { - if let RelOperator::Pattern(value) = value { - Ok(value) - } else { - Err(ErrorCode::Internal( - "Cannot downcast RelOperator to Pattern", - )) - } - } -} - impl From for RelOperator { fn from(v: Exchange) -> Self { Self::Exchange(v) diff --git a/src/query/sql/src/planner/plans/pattern.rs b/src/query/sql/src/planner/plans/pattern.rs deleted file mode 100644 index 2f545d321ed9..000000000000 --- a/src/query/sql/src/planner/plans/pattern.rs +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; - -use databend_common_catalog::table_context::TableContext; -use databend_common_exception::ErrorCode; - -use crate::optimizer::PhysicalProperty; -use crate::optimizer::RelExpr; -use crate::optimizer::RelationalProperty; -use crate::optimizer::RequiredProperty; -use crate::optimizer::StatInfo; -use crate::plans::Operator; -use crate::plans::RelOp; - -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub struct PatternPlan { - pub plan_type: RelOp, -} - -impl Operator for PatternPlan { - fn rel_op(&self) -> RelOp { - self.plan_type.clone() - } - - fn arity(&self) -> usize { - 0 - } - - fn is_pattern(&self) -> bool { - true - } - - fn derive_relational_prop( - &self, - _rel_expr: &RelExpr, - ) -> databend_common_exception::Result> { - Err(ErrorCode::Internal( - "Cannot derive relational property for pattern plan", - )) - } - - fn derive_physical_prop( - &self, - _rel_expr: &RelExpr, - ) -> databend_common_exception::Result { - Err(ErrorCode::Internal( - "Cannot derive physical property for pattern plan", - )) - } - - fn derive_stats( - &self, - _rel_expr: &RelExpr, - ) -> databend_common_exception::Result> { - Err(ErrorCode::Internal( - "Cannot derive cardinality for pattern plan", - )) - } - - fn compute_required_prop_child( - &self, - _ctx: Arc, - _rel_expr: &RelExpr, - _child_index: usize, - _required: &RequiredProperty, - ) -> databend_common_exception::Result { - Err(ErrorCode::Internal( - "Cannot compute required property for pattern plan", - )) - } - - fn compute_required_prop_children( - &self, - _ctx: Arc, - _rel_expr: &RelExpr, - _required: &RequiredProperty, - ) -> databend_common_exception::Result>> { - Err(ErrorCode::Internal( - "Cannot compute required property for pattern plan", - )) - } -} diff --git a/src/query/sql/src/planner/plans/scalar_expr.rs b/src/query/sql/src/planner/plans/scalar_expr.rs index 39d3e82fb5ba..1b581c61e684 100644 --- a/src/query/sql/src/planner/plans/scalar_expr.rs +++ b/src/query/sql/src/planner/plans/scalar_expr.rs @@ -176,6 +176,28 @@ impl ScalarExpr { visitor.visit(self)?; Ok(()) } + + pub fn has_one_column_ref(&self) -> bool { + struct BoundColumnRefVisitor { + has_column_ref: bool, + num_column_ref: usize, + } + + impl<'a> Visitor<'a> for BoundColumnRefVisitor { + fn visit_bound_column_ref(&mut self, _col: &'a BoundColumnRef) -> Result<()> { + self.has_column_ref = true; + self.num_column_ref += 1; + Ok(()) + } + } + + let mut visitor = BoundColumnRefVisitor { + has_column_ref: false, + num_column_ref: 0, + }; + visitor.visit(self).unwrap(); + visitor.has_column_ref && visitor.num_column_ref == 1 + } } impl From for ScalarExpr { @@ -566,6 +588,8 @@ pub struct SubqueryExpr { pub(crate) data_type: Box, #[educe(Hash(method = "hash_column_set"))] pub outer_columns: ColumnSet, + // If contain aggregation function in scalar subquery output + pub contain_agg: Option, } impl SubqueryExpr { diff --git a/src/query/sql/src/planner/plans/window.rs b/src/query/sql/src/planner/plans/window.rs index 57985e74ddb3..fc394de650b8 100644 --- a/src/query/sql/src/planner/plans/window.rs +++ b/src/query/sql/src/planner/plans/window.rs @@ -64,6 +64,8 @@ pub struct Window { pub order_by: Vec, // window frames pub frame: WindowFuncFrame, + // limit for potentially possible push-down + pub limit: Option, } impl Window { diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index 2c6558beb74f..72085e023773 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -17,6 +17,7 @@ use std::collections::VecDeque; use std::sync::Arc; use std::vec; +use databend_common_ast::ast::contain_agg_func; use databend_common_ast::ast::BinaryOperator; use databend_common_ast::ast::ColumnID; use databend_common_ast::ast::Expr; @@ -26,6 +27,8 @@ use databend_common_ast::ast::Lambda; use databend_common_ast::ast::Literal; use databend_common_ast::ast::MapAccessor; use databend_common_ast::ast::Query; +use databend_common_ast::ast::SelectTarget; +use databend_common_ast::ast::SetExpr; use databend_common_ast::ast::SubqueryModifier; use databend_common_ast::ast::TrimWhere; use databend_common_ast::ast::TypeName; @@ -2349,6 +2352,17 @@ impl<'a> TypeChecker<'a> { ))); } + let mut contain_agg = None; + if let SetExpr::Select(select_stmt) = &subquery.body { + if typ == SubqueryType::Scalar { + let select = &select_stmt.select_list[0]; + if let SelectTarget::AliasedExpr { expr, .. } = select { + // Check if contain aggregation function + contain_agg = Some(contain_agg_func(expr)); + } + } + } + let mut data_type = output_context.columns[0].data_type.clone(); let rel_expr = RelExpr::with_s_expr(&s_expr); @@ -2374,6 +2388,7 @@ impl<'a> TypeChecker<'a> { data_type: data_type.clone(), typ, outer_columns: rel_prop.outer_columns.clone(), + contain_agg, }; let data_type = subquery_expr.data_type(); @@ -2620,8 +2635,7 @@ impl<'a> TypeChecker<'a> { if args.len() >= 2 { let box (arg, _) = self.resolve(args[1]).await.ok()?; if let Ok(arg) = ConstantExpr::try_from(arg) { - if let Scalar::String(val) = arg.value { - let sort_order = unsafe { std::str::from_utf8_unchecked(&val) }; + if let Scalar::String(sort_order) = arg.value { if sort_order.eq_ignore_ascii_case("asc") { asc = true; } else if sort_order.eq_ignore_ascii_case("desc") { @@ -2645,8 +2659,7 @@ impl<'a> TypeChecker<'a> { if args.len() == 3 { let box (arg, _) = self.resolve(args[2]).await.ok()?; if let Ok(arg) = ConstantExpr::try_from(arg) { - if let Scalar::String(val) = arg.value { - let nulls_order = unsafe { std::str::from_utf8_unchecked(&val) }; + if let Scalar::String(nulls_order) = arg.value { if nulls_order.eq_ignore_ascii_case("nulls first") { nulls_first = true; } else if nulls_order.eq_ignore_ascii_case("nulls last") { @@ -2685,8 +2698,7 @@ impl<'a> TypeChecker<'a> { } let box (arg, _) = self.resolve(args[1]).await.ok()?; if let Ok(arg) = ConstantExpr::try_from(arg) { - if let Scalar::String(arg) = arg.value { - let aggr_func_name = unsafe { std::str::from_utf8_unchecked(&arg) }; + if let Scalar::String(aggr_func_name) = arg.value { let func_name = format!("array_{}", aggr_func_name); let args_ref: Vec<&Expr> = vec![args[0]]; return Some( @@ -2754,7 +2766,7 @@ impl<'a> TypeChecker<'a> { } else { let trim_scalar = ConstantExpr { span, - value: databend_common_expression::Scalar::String(" ".as_bytes().to_vec()), + value: databend_common_expression::Scalar::String(" ".to_string()), } .into(); ("trim_both", trim_scalar, DataType::String) @@ -2782,7 +2794,7 @@ impl<'a> TypeChecker<'a> { scale: *scale, })), Literal::Float64(float) => Scalar::Number(NumberScalar::Float64((*float).into())), - Literal::String(string) => Scalar::String(string.as_bytes().to_vec()), + Literal::String(string) => Scalar::String(string.clone()), Literal::Boolean(boolean) => Scalar::Boolean(*boolean), Literal::Null => Scalar::Null, }; @@ -3092,7 +3104,7 @@ impl<'a> TypeChecker<'a> { { let key = ConstantExpr { span, - value: Scalar::String(field_name.clone().into_bytes()), + value: Scalar::String(field_name.clone()), } .into(); @@ -3411,6 +3423,7 @@ impl<'a> TypeChecker<'a> { data_type: data_type.clone(), typ: SubqueryType::Any, outer_columns: rel_prop.outer_columns.clone(), + contain_agg: None, }; let data_type = subquery_expr.data_type(); Ok(Box::new((subquery_expr.into(), data_type))) @@ -3448,7 +3461,7 @@ impl<'a> TypeChecker<'a> { let keypaths_str = format!("{}", keypaths); let path_scalar = ScalarExpr::ConstantExpr(ConstantExpr { span: None, - value: Scalar::String(keypaths_str.into_bytes()), + value: Scalar::String(keypaths_str), }); let args = vec![scalar, path_scalar]; diff --git a/src/query/sql/src/planner/semantic/virtual_column_rewriter.rs b/src/query/sql/src/planner/semantic/virtual_column_rewriter.rs index 41da8545bc0a..b80b61aba648 100644 --- a/src/query/sql/src/planner/semantic/virtual_column_rewriter.rs +++ b/src/query/sql/src/planner/semantic/virtual_column_rewriter.rs @@ -203,7 +203,7 @@ impl VirtualColumnRewriter { return Some(()); } let name = match constant.value.clone() { - Scalar::String(v) => match parse_key_paths(&v) { + Scalar::String(v) => match parse_key_paths(v.as_bytes()) { Ok(key_paths) => { let mut name = String::new(); name.push_str(&base_column.column_name); diff --git a/src/query/storages/common/cache/src/providers/table_data_cache.rs b/src/query/storages/common/cache/src/providers/table_data_cache.rs index 403efae7a7c5..8fb86874dc81 100644 --- a/src/query/storages/common/cache/src/providers/table_data_cache.rs +++ b/src/query/storages/common/cache/src/providers/table_data_cache.rs @@ -93,6 +93,7 @@ impl CacheAccessor for TableDataCache let k = k.as_ref(); if let Some(item) = self.external_cache.get(k) { metrics_inc_cache_hit_count(1, TABLE_DATA_CACHE_NAME); + // Profile::record_usize_profile(ProfileStatisticsName::ScanCacheBytes, process_values.bytes); Some(item) } else { metrics_inc_cache_miss_count(1, TABLE_DATA_CACHE_NAME); diff --git a/src/query/storages/common/index/tests/it/filters/bloom_filter.rs b/src/query/storages/common/index/tests/it/filters/bloom_filter.rs index c6f5de7de357..3eed74e3762d 100644 --- a/src/query/storages/common/index/tests/it/filters/bloom_filter.rs +++ b/src/query/storages/common/index/tests/it/filters/bloom_filter.rs @@ -82,7 +82,7 @@ fn test_bloom_filter() -> Result<()> { ), BlockEntry::new( DataType::String, - Value::Scalar(Scalar::String(b"a".to_vec())), + Value::Scalar(Scalar::String("a".to_string())), ), BlockEntry::new( map_ty.clone(), @@ -161,7 +161,7 @@ fn test_bloom_filter() -> Result<()> { "1", bloom_fields.clone(), schema.clone(), - Scalar::String(b"a".to_vec()), + Scalar::String("a".to_string()), DataType::String ) ); @@ -172,7 +172,7 @@ fn test_bloom_filter() -> Result<()> { "1", bloom_fields.clone(), schema.clone(), - Scalar::String(b"b".to_vec()), + Scalar::String("b".to_string()), DataType::String ) ); @@ -183,7 +183,7 @@ fn test_bloom_filter() -> Result<()> { "1", bloom_fields, schema.clone(), - Scalar::String(b"d".to_vec()), + Scalar::String("d".to_string()), DataType::String ) ); @@ -197,7 +197,7 @@ fn test_bloom_filter() -> Result<()> { map_ty.clone(), Scalar::Number(NumberScalar::UInt8(1)), DataType::Number(NumberDataType::UInt8), - Scalar::String(b"a".to_vec()), + Scalar::String("a".to_string()), DataType::String ) ); @@ -210,7 +210,7 @@ fn test_bloom_filter() -> Result<()> { map_ty.clone(), Scalar::Number(NumberScalar::UInt8(2)), DataType::Number(NumberDataType::UInt8), - Scalar::String(b"b".to_vec()), + Scalar::String("b".to_string()), DataType::String ) ); @@ -223,7 +223,7 @@ fn test_bloom_filter() -> Result<()> { map_ty, Scalar::Number(NumberScalar::UInt8(3)), DataType::Number(NumberDataType::UInt8), - Scalar::String(b"x".to_vec()), + Scalar::String("x".to_string()), DataType::String ) ); @@ -261,7 +261,7 @@ fn test_specify_bloom_filter() -> Result<()> { "1", fields, schema, - Scalar::String(b"d".to_vec()), + Scalar::String("d".to_string()), DataType::String ) ); @@ -301,7 +301,7 @@ fn test_string_bloom_filter() -> Result<()> { "1", fields, schema, - Scalar::String(b"d".to_vec()), + Scalar::String("d".to_string()), DataType::String ) ); diff --git a/src/query/storages/common/pruner/src/internal_column_pruner.rs b/src/query/storages/common/pruner/src/internal_column_pruner.rs index 21d283a498e5..3641431215d2 100644 --- a/src/query/storages/common/pruner/src/internal_column_pruner.rs +++ b/src/query/storages/common/pruner/src/internal_column_pruner.rs @@ -59,10 +59,9 @@ impl InternalColumnPruner { pub fn should_keep(&self, col_name: &str, value: &str) -> bool { if self.input_domains.contains_key(col_name) { let mut input_domains = self.input_domains.clone(); - let bytes = value.as_bytes().to_vec(); let domain = Domain::String(StringDomain { - min: bytes.clone(), - max: Some(bytes), + min: value.to_string(), + max: Some(value.to_string()), }); input_domains.insert(col_name.to_string(), domain); diff --git a/src/query/storages/common/table_meta/benches/bench.rs b/src/query/storages/common/table_meta/benches/bench.rs index 697b88d1c741..6d95b5f6b07d 100644 --- a/src/query/storages/common/table_meta/benches/bench.rs +++ b/src/query/storages/common/table_meta/benches/bench.rs @@ -157,8 +157,8 @@ fn build_test_segment_info( }); let col_stat = ColumnStatistics::new( - Scalar::String(String::from_utf8(vec![b'a'; STATS_STRING_PREFIX_LEN])?.into_bytes()), - Scalar::String(String::from_utf8(vec![b'a'; STATS_STRING_PREFIX_LEN])?.into_bytes()), + Scalar::String(String::from_utf8(vec![b'a'; STATS_STRING_PREFIX_LEN])?), + Scalar::String(String::from_utf8(vec![b'a'; STATS_STRING_PREFIX_LEN])?), 0, 0, None, diff --git a/src/query/storages/delta/src/partition_columns/values_serde.rs b/src/query/storages/delta/src/partition_columns/values_serde.rs index 66f1c1a13d3f..f35b3c0f3beb 100644 --- a/src/query/storages/delta/src/partition_columns/values_serde.rs +++ b/src/query/storages/delta/src/partition_columns/values_serde.rs @@ -38,7 +38,7 @@ fn field_to_value(value: &ParquetFieldValue) -> Result { ParquetFieldValue::ULong(v) => Ok(Scalar::Number(NumberScalar::UInt64(*v))), ParquetFieldValue::Float(v) => Ok(Scalar::Number(NumberScalar::Float32(OrderedFloat(*v)))), ParquetFieldValue::Double(v) => Ok(Scalar::Number(NumberScalar::Float64(OrderedFloat(*v)))), - ParquetFieldValue::Str(v) => Ok(Scalar::String(v.as_bytes().to_vec())), + ParquetFieldValue::Str(v) => Ok(Scalar::String(v.clone())), _ => Err(ErrorCode::IllegalDataType(format!( "Unsupported parquet type {:?}", value @@ -54,7 +54,7 @@ pub fn str_to_scalar(value: &str, data_type: &DataType) -> Result { } match data_type { DataType::Nullable(t) => str_to_scalar(value, t), - DataType::String => Ok(Scalar::String(value.as_bytes().to_vec())), + DataType::String => Ok(Scalar::String(value.to_string())), DataType::Number(num_ty) => match num_ty { NumberDataType::UInt8 => { let num = value.parse::().unwrap(); diff --git a/src/query/storages/delta/src/table_source.rs b/src/query/storages/delta/src/table_source.rs index 362da585817a..887a8f3ea625 100644 --- a/src/query/storages/delta/src/table_source.rs +++ b/src/query/storages/delta/src/table_source.rs @@ -31,6 +31,8 @@ use databend_common_pipeline_core::processors::Event; use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::processors::Processor; use databend_common_pipeline_core::processors::ProcessorPtr; +use databend_common_pipeline_core::processors::Profile; +use databend_common_pipeline_core::processors::ProfileStatisticsName; use databend_common_storages_parquet::ParquetPart; use databend_common_storages_parquet::ParquetRSFullReader; use opendal::Reader; @@ -134,6 +136,10 @@ impl Processor for DeltaTableSource { bytes: data_block.memory_size(), }; self.scan_progress.incr(&progress_values); + Profile::record_usize_profile( + ProfileStatisticsName::ScanBytes, + data_block.memory_size(), + ); self.output.push_data(Ok(data_block)); Ok(Event::NeedConsume) } diff --git a/src/query/storages/fuse/src/operations/agg_index_sink.rs b/src/query/storages/fuse/src/operations/agg_index_sink.rs index 2c902e81e763..45a3a4b679aa 100644 --- a/src/query/storages/fuse/src/operations/agg_index_sink.rs +++ b/src/query/storages/fuse/src/operations/agg_index_sink.rs @@ -21,7 +21,6 @@ use async_trait::unboxed_simple; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; use databend_common_expression::types::StringType; -use databend_common_expression::types::ValueType; use databend_common_expression::BlockRowIndex; use databend_common_expression::DataBlock; use databend_common_expression::TableSchemaRef; @@ -78,11 +77,7 @@ impl AggIndexSink { let block_name_col = col.value.try_downcast::().unwrap(); let block_id = self.blocks.len(); for i in 0..block.num_rows() { - let location = unsafe { - String::from_utf8_unchecked(StringType::to_owned_scalar( - block_name_col.index(i).unwrap(), - )) - }; + let location = block_name_col.index(i).unwrap().to_string(); self.location_data .entry(location) diff --git a/src/query/storages/fuse/src/operations/read/native_data_source_deserializer.rs b/src/query/storages/fuse/src/operations/read/native_data_source_deserializer.rs index 54a4ceda2472..80101a78bcc4 100644 --- a/src/query/storages/fuse/src/operations/read/native_data_source_deserializer.rs +++ b/src/query/storages/fuse/src/operations/read/native_data_source_deserializer.rs @@ -60,6 +60,8 @@ use databend_common_pipeline_core::processors::InputPort; use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::processors::Processor; use databend_common_pipeline_core::processors::ProcessorPtr; +use databend_common_pipeline_core::processors::Profile; +use databend_common_pipeline_core::processors::ProfileStatisticsName; use databend_common_sql::IndexType; use xorf::BinaryFuse16; @@ -414,6 +416,7 @@ impl NativeDeserializeDataTransform { bytes: data_block.memory_size(), }; self.scan_progress.incr(&progress_values); + Profile::record_usize_profile(ProfileStatisticsName::ScanBytes, data_block.memory_size()); self.output_data = Some(data_block); } diff --git a/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs b/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs index db6438159bc3..dd340ef5dc77 100644 --- a/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs +++ b/src/query/storages/fuse/src/operations/read/parquet_data_source_deserializer.rs @@ -39,6 +39,8 @@ use databend_common_pipeline_core::processors::InputPort; use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::processors::Processor; use databend_common_pipeline_core::processors::ProcessorPtr; +use databend_common_pipeline_core::processors::Profile; +use databend_common_pipeline_core::processors::ProfileStatisticsName; use databend_common_sql::IndexType; use xorf::BinaryFuse16; @@ -251,6 +253,10 @@ impl Processor for DeserializeDataTransform { bytes: block.memory_size(), }; self.scan_progress.incr(&progress_values); + Profile::record_usize_profile( + ProfileStatisticsName::ScanBytes, + block.memory_size(), + ); self.output_data = Some(block); } @@ -299,6 +305,10 @@ impl Processor for DeserializeDataTransform { bytes: data_block.memory_size(), }; self.scan_progress.incr(&progress_values); + Profile::record_usize_profile( + ProfileStatisticsName::ScanBytes, + data_block.memory_size(), + ); let mut data_block = data_block.resort(&self.src_schema, &self.output_schema)?; diff --git a/src/query/storages/fuse/src/operations/read/runtime_filter_prunner.rs b/src/query/storages/fuse/src/operations/read/runtime_filter_prunner.rs index 9090ef2da725..15d5526fbad5 100644 --- a/src/query/storages/fuse/src/operations/read/runtime_filter_prunner.rs +++ b/src/query/storages/fuse/src/operations/read/runtime_filter_prunner.rs @@ -49,7 +49,6 @@ pub fn runtime_filter_pruner( if filters.is_empty() { return Ok(false); } - let part = FusePartInfo::from_part(part)?; let pruned = filters.iter().any(|filter| { let column_refs = filter.column_refs(); diff --git a/src/query/storages/fuse/src/operations/replace_into/mutator/column_hash.rs b/src/query/storages/fuse/src/operations/replace_into/mutator/column_hash.rs index 7518ac607a34..8cbaec3d8c9a 100644 --- a/src/query/storages/fuse/src/operations/replace_into/mutator/column_hash.rs +++ b/src/query/storages/fuse/src/operations/replace_into/mutator/column_hash.rs @@ -70,7 +70,7 @@ pub fn row_hash_of_columns( NumberScalar::Float64(v) => sip.write_u64(v.to_bits()), }, ScalarRef::Timestamp(v) => sip.write_i64(v), - ScalarRef::String(v) => sip.write(v), + ScalarRef::String(v) => sip.write(v.as_bytes()), ScalarRef::Bitmap(v) => sip.write(v), ScalarRef::Decimal(v) => match v { DecimalScalar::Decimal128(i, DecimalSize { precision, scale }) => { diff --git a/src/query/storages/fuse/src/operations/replace_into/mutator/merge_into_mutator.rs b/src/query/storages/fuse/src/operations/replace_into/mutator/merge_into_mutator.rs index f24d4c643b5a..42f8f003f824 100644 --- a/src/query/storages/fuse/src/operations/replace_into/mutator/merge_into_mutator.rs +++ b/src/query/storages/fuse/src/operations/replace_into/mutator/merge_into_mutator.rs @@ -788,8 +788,8 @@ mod tests { ( 1, range( - Scalar::String("a".to_string().into_bytes()), - Scalar::String("z".to_string().into_bytes()), + Scalar::String("a".to_string()), + Scalar::String("z".to_string()), ), ), // range of xx_time [100, 200] @@ -827,8 +827,8 @@ mod tests { ), // for xx_type column, overlaps ( - Scalar::String("b".to_string().into_bytes()), - Scalar::String("y".to_string().into_bytes()), + Scalar::String("b".to_string()), + Scalar::String("y".to_string()), ), // for xx_time column, overlaps ( @@ -872,8 +872,8 @@ mod tests { ), // for xx_type column, overlaps ( - Scalar::String("b".to_string().into_bytes()), - Scalar::String("b".to_string().into_bytes()), + Scalar::String("b".to_string()), + Scalar::String("b".to_string()), ), // for xx_time column, overlaps ( diff --git a/src/query/storages/fuse/src/operations/replace_into/mutator/mutator_replace_into.rs b/src/query/storages/fuse/src/operations/replace_into/mutator/mutator_replace_into.rs index 6c6ab26e69a7..b8fd2f718fa9 100644 --- a/src/query/storages/fuse/src/operations/replace_into/mutator/mutator_replace_into.rs +++ b/src/query/storages/fuse/src/operations/replace_into/mutator/mutator_replace_into.rs @@ -270,11 +270,10 @@ impl ReplaceIntoMutator { ScalarRef::Tuple(_) => "[TUPLE]".to_owned(), ScalarRef::Variant(_) => "[VARIANT]".to_owned(), // for string, return the first 5 chars - ScalarRef::String(s) => { - let val = String::from_utf8_lossy(s).to_string(); + ScalarRef::String(val) => { // take the first 5 chars - match val.as_str().char_indices().nth(5) { - None => val, + match val.char_indices().nth(5) { + None => val.to_string(), Some((idx, _)) => format!("{}...", &val[..idx]), } } diff --git a/src/query/storages/fuse/src/statistics/column_statistic.rs b/src/query/storages/fuse/src/statistics/column_statistic.rs index ebc00004470c..2ca1ea61998b 100644 --- a/src/query/storages/fuse/src/statistics/column_statistic.rs +++ b/src/query/storages/fuse/src/statistics/column_statistic.rs @@ -222,86 +222,74 @@ pub const STATS_STRING_PREFIX_LEN: usize = 16; impl Trim for Scalar { fn trim_min(self, trim_len: usize) -> Option { match self { - Scalar::String(bytes) => match String::from_utf8(bytes) { - Ok(mut v) => { - if v.len() <= trim_len { - Some(Scalar::String(v.into_bytes())) - } else { - // find the character boundary to prevent String::truncate from panic - let vs = v.as_str(); - let slice = match vs.char_indices().nth(trim_len) { - None => vs, - Some((idx, _)) => &vs[..idx], - }; + Scalar::String(mut s) => { + if s.len() <= trim_len { + Some(Scalar::String(s)) + } else { + // find the character boundary to prevent String::truncate from panic + let vs = s.as_str(); + let slice = match vs.char_indices().nth(trim_len) { + None => vs, + Some((idx, _)) => &vs[..idx], + }; - // do truncate - Some(Scalar::String({ - v.truncate(slice.len()); - v.into_bytes() - })) - } - } - Err(_) => { - // if failed to convert the bytes into (utf-8)string, just ignore it. - None + // do truncate + Some(Scalar::String({ + s.truncate(slice.len()); + s + })) } - }, + } v => Some(v), } } fn trim_max(self, trim_len: usize) -> Option { match self { - Scalar::String(bytes) => match String::from_utf8(bytes) { - Ok(v) => { - if v.len() <= trim_len { - // if number of bytes is lesser, just return - Some(Scalar::String(v.into_bytes())) - } else { - // no need to trim, less than STRING_PREFIX_LEN chars - let number_of_chars = v.as_str().chars().count(); - if number_of_chars <= trim_len { - return Some(Scalar::String(v.into_bytes())); - } + Scalar::String(v) => { + if v.len() <= trim_len { + // if number of bytes is lesser, just return + Some(Scalar::String(v)) + } else { + // no need to trim, less than STRING_PREFIX_LEN chars + let number_of_chars = v.as_str().chars().count(); + if number_of_chars <= trim_len { + return Some(Scalar::String(v)); + } - // slice the input (at the boundary of chars), takes at most STRING_PREFIX_LEN chars - let vs = v.as_str(); - let sliced = match vs.char_indices().nth(trim_len) { - None => vs, - Some((idx, _)) => &vs[..idx], - }; + // slice the input (at the boundary of chars), takes at most STRING_PREFIX_LEN chars + let vs = v.as_str(); + let sliced = match vs.char_indices().nth(trim_len) { + None => vs, + Some((idx, _)) => &vs[..idx], + }; - // find the position to replace the char with REPLACEMENT_CHAR - // in reversed order, break at the first one we met - let mut idx = None; - for (i, c) in sliced.char_indices().rev() { - if c < STATS_REPLACEMENT_CHAR { - idx = Some(i); - break; - } + // find the position to replace the char with REPLACEMENT_CHAR + // in reversed order, break at the first one we met + let mut idx = None; + for (i, c) in sliced.char_indices().rev() { + if c < STATS_REPLACEMENT_CHAR { + idx = Some(i); + break; } + } - // grab the replacement_point - let replacement_point = idx?; + // grab the replacement_point + let replacement_point = idx?; - // rebuild the string (since the len of result string is rather small) - let mut r = String::with_capacity(trim_len); - for (i, c) in sliced.char_indices() { - if i < replacement_point { - r.push(c) - } else { - r.push(STATS_REPLACEMENT_CHAR); - } + // rebuild the string (since the len of result string is rather small) + let mut r = String::with_capacity(trim_len); + for (i, c) in sliced.char_indices() { + if i < replacement_point { + r.push(c) + } else { + r.push(STATS_REPLACEMENT_CHAR); } - - Some(Scalar::String(r.into_bytes())) } + + Some(Scalar::String(r)) } - Err(_) => { - // if failed to convert the bytes into (utf-8)string, just ignore it. - None - } - }, + } v => Some(v), } } diff --git a/src/query/storages/fuse/src/table_functions/clustering_information/clustering_information.rs b/src/query/storages/fuse/src/table_functions/clustering_information/clustering_information.rs index 9fe14417f126..c88da1bd7802 100644 --- a/src/query/storages/fuse/src/table_functions/clustering_information/clustering_information.rs +++ b/src/query/storages/fuse/src/table_functions/clustering_information/clustering_information.rs @@ -218,7 +218,7 @@ impl<'a> ClusteringInformation<'a> { vec![ BlockEntry::new( DataType::String, - Value::Scalar(Scalar::String(cluster_key.as_bytes().to_vec())), + Value::Scalar(Scalar::String(cluster_key.clone())), ), BlockEntry::new( DataType::Number(NumberDataType::UInt64), diff --git a/src/query/storages/fuse/src/table_functions/fuse_blocks/fuse_block.rs b/src/query/storages/fuse/src/table_functions/fuse_blocks/fuse_block.rs index 7eac11930553..65653201fa7c 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_blocks/fuse_block.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_blocks/fuse_block.rs @@ -103,7 +103,7 @@ impl<'a> FuseBlock<'a> { let limit = self.limit.unwrap_or(usize::MAX); let len = std::cmp::min(snapshot.summary.block_count as usize, limit); - let snapshot_id = snapshot.snapshot_id.simple().to_string().into_bytes(); + let snapshot_id = snapshot.snapshot_id.simple().to_string(); let timestamp = snapshot.timestamp.unwrap_or_default().timestamp_micros(); let mut block_location = StringColumnBuilder::with_capacity(len, len); let mut block_size = Vec::with_capacity(len); @@ -131,7 +131,7 @@ impl<'a> FuseBlock<'a> { for block in segment.blocks.iter() { let block = block.as_ref(); - block_location.put_slice(block.location.0.as_bytes()); + block_location.put_str(&block.location.0); block_location.commit_row(); block_size.push(block.block_size); file_size.push(block.file_size); @@ -140,7 +140,7 @@ impl<'a> FuseBlock<'a> { block .bloom_filter_index_location .as_ref() - .map(|s| s.0.as_bytes().to_vec()), + .map(|s| s.0.clone()), ); bloom_filter_size.push(block.bloom_filter_index_size); @@ -159,10 +159,7 @@ impl<'a> FuseBlock<'a> { Ok(DataBlock::new( vec![ - BlockEntry::new( - DataType::String, - Value::Scalar(Scalar::String(snapshot_id.to_vec())), - ), + BlockEntry::new(DataType::String, Value::Scalar(Scalar::String(snapshot_id))), BlockEntry::new( DataType::Timestamp, Value::Scalar(Scalar::Timestamp(timestamp)), diff --git a/src/query/storages/fuse/src/table_functions/fuse_columns/fuse_column.rs b/src/query/storages/fuse/src/table_functions/fuse_columns/fuse_column.rs index 6df0a169e8e0..b4082995abbd 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_columns/fuse_column.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_columns/fuse_column.rs @@ -103,7 +103,7 @@ impl<'a> FuseColumn<'a> { let limit = self.limit.unwrap_or(usize::MAX); let len = std::cmp::min(snapshot.summary.block_count as usize, limit); - let snapshot_id = snapshot.snapshot_id.simple().to_string().into_bytes(); + let snapshot_id = snapshot.snapshot_id.simple().to_string(); let timestamp = snapshot.timestamp.unwrap_or_default().timestamp_micros(); let mut block_location = StringColumnBuilder::with_capacity(len, len); let mut block_size = vec![]; @@ -141,16 +141,16 @@ impl<'a> FuseColumn<'a> { for (id, column) in block.col_metas.iter() { if let Some(f) = leaf_fields.iter().find(|f| f.column_id == *id) { - block_location.put_slice(block.location.0.as_bytes()); + block_location.put_str(&block.location.0); block_location.commit_row(); block_size.push(block.block_size); file_size.push(block.file_size); row_count.push(column.total_rows() as u64); - column_name.put_slice(f.name.as_bytes()); + column_name.put_str(&f.name); column_name.commit_row(); - column_type.put_slice(f.data_type.to_string().as_bytes()); + column_type.put_str(&f.data_type.to_string()); column_type.commit_row(); column_id.push(*id); @@ -177,10 +177,7 @@ impl<'a> FuseColumn<'a> { Ok(DataBlock::new( vec![ - BlockEntry::new( - DataType::String, - Value::Scalar(Scalar::String(snapshot_id.to_vec())), - ), + BlockEntry::new(DataType::String, Value::Scalar(Scalar::String(snapshot_id))), BlockEntry::new( DataType::Timestamp, Value::Scalar(Scalar::Timestamp(timestamp)), diff --git a/src/query/storages/fuse/src/table_functions/fuse_encodings/fuse_encoding.rs b/src/query/storages/fuse/src/table_functions/fuse_encodings/fuse_encoding.rs index 93bbfc0aca2e..fd88c487e9bd 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_encodings/fuse_encoding.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_encodings/fuse_encoding.rs @@ -177,15 +177,14 @@ impl<'a> FuseEncoding<'a> { validity_size.reserve(num_row); compressed_size.reserve(num_row); uncompressed_size.reserve(num_row); - let tmp_table_name = StringColumnBuilder::repeat(table.as_bytes(), num_row); - let tmp_column_name = - StringColumnBuilder::repeat(column_info.field.name.as_bytes(), num_row); - let tmp_column_type = StringColumnBuilder::repeat(type_str.as_bytes(), num_row); + let tmp_table_name = StringColumnBuilder::repeat(table, num_row); + let tmp_column_name = StringColumnBuilder::repeat(&column_info.field.name, num_row); + let tmp_column_type = StringColumnBuilder::repeat(type_str, num_row); for p in pages_info { validity_size.push(p.validity_size); compressed_size.push(p.compressed_size); uncompressed_size.push(p.uncompressed_size); - l1.put_slice(encoding_to_string(&p.body).as_bytes()); + l1.put_str(&encoding_to_string(&p.body)); l1.commit_row(); let l2_encoding = match &p.body { PageBody::Dict(dict) => Some(encoding_to_string(&dict.indices.body)), @@ -196,7 +195,7 @@ impl<'a> FuseEncoding<'a> { _ => None, }; if let Some(l2_encoding) = l2_encoding { - l2.push(l2_encoding.as_bytes()); + l2.push(&l2_encoding); } else { l2.push_null(); } diff --git a/src/query/storages/fuse/src/table_functions/fuse_segments/fuse_segment.rs b/src/query/storages/fuse/src/table_functions/fuse_segments/fuse_segment.rs index 75d1cff25898..8c109b65251d 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_segments/fuse_segment.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_segments/fuse_segment.rs @@ -101,7 +101,7 @@ impl<'a> FuseSegment<'a> { let mut row_count: Vec = Vec::with_capacity(len); let mut compressed: Vec = Vec::with_capacity(len); let mut uncompressed: Vec = Vec::with_capacity(len); - let mut file_location: Vec> = Vec::with_capacity(len); + let mut file_location: Vec = Vec::with_capacity(len); let segments_io = SegmentsIO::create( self.ctx.clone(), @@ -125,7 +125,7 @@ impl<'a> FuseSegment<'a> { row_count.push(segment.summary.row_count); compressed.push(segment.summary.compressed_byte_size); uncompressed.push(segment.summary.uncompressed_byte_size); - file_location.push(segment_locations[idx].0.clone().into_bytes()); + file_location.push(segment_locations[idx].0.clone()); row_num += 1; if row_num >= limit { diff --git a/src/query/storages/fuse/src/table_functions/fuse_snapshots/fuse_snapshot.rs b/src/query/storages/fuse/src/table_functions/fuse_snapshots/fuse_snapshot.rs index 406386f7245f..9c88c36249f9 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_snapshots/fuse_snapshot.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_snapshots/fuse_snapshot.rs @@ -93,9 +93,9 @@ impl<'a> FuseSnapshot<'a> { latest_snapshot_version: u64, ) -> Result { let len = snapshots.len(); - let mut snapshot_ids: Vec> = Vec::with_capacity(len); - let mut snapshot_locations: Vec> = Vec::with_capacity(len); - let mut prev_snapshot_ids: Vec>> = Vec::with_capacity(len); + let mut snapshot_ids: Vec = Vec::with_capacity(len); + let mut snapshot_locations: Vec = Vec::with_capacity(len); + let mut prev_snapshot_ids: Vec> = Vec::with_capacity(len); let mut format_versions: Vec = Vec::with_capacity(len); let mut segment_count: Vec = Vec::with_capacity(len); let mut block_count: Vec = Vec::with_capacity(len); @@ -106,15 +106,14 @@ impl<'a> FuseSnapshot<'a> { let mut timestamps: Vec> = Vec::with_capacity(len); let mut current_snapshot_version = latest_snapshot_version; for s in snapshots { - snapshot_ids.push(s.snapshot_id.simple().to_string().into_bytes()); + snapshot_ids.push(s.snapshot_id.simple().to_string()); snapshot_locations.push( location_generator - .snapshot_location_from_uuid(&s.snapshot_id, current_snapshot_version)? - .into_bytes(), + .snapshot_location_from_uuid(&s.snapshot_id, current_snapshot_version)?, ); - let (id, ver) = s.prev_snapshot_id.map_or((None, 0), |(id, v)| { - (Some(id.simple().to_string().into_bytes()), v) - }); + let (id, ver) = s + .prev_snapshot_id + .map_or((None, 0), |(id, v)| (Some(id.simple().to_string()), v)); prev_snapshot_ids.push(id); format_versions.push(s.format_version); segment_count.push(s.segment_count); diff --git a/src/query/storages/fuse/src/table_functions/fuse_statistics/fuse_statistic.rs b/src/query/storages/fuse/src/table_functions/fuse_statistics/fuse_statistic.rs index 960e106acd75..bc2d33073824 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_statistics/fuse_statistic.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_statistics/fuse_statistic.rs @@ -58,13 +58,13 @@ impl<'a> FuseStatistic<'a> { _summy: &Statistics, table_statistics: &Option>, ) -> Result { - let mut col_ndvs: Vec> = Vec::with_capacity(1); + let mut col_ndvs: Vec = Vec::with_capacity(1); if let Some(table_statistics) = table_statistics { let mut ndvs: String = "".to_string(); for (i, n) in table_statistics.column_distinct_values.iter() { ndvs.push_str(&format!("({},{});", *i, *n)); } - col_ndvs.push(ndvs.into_bytes()); + col_ndvs.push(ndvs); }; Ok(DataBlock::new_from_columns(vec![StringType::from_data( diff --git a/src/query/storages/fuse/src/table_functions/table_args.rs b/src/query/storages/fuse/src/table_functions/table_args.rs index 47d083636e0b..e15fd0cde2be 100644 --- a/src/query/storages/fuse/src/table_functions/table_args.rs +++ b/src/query/storages/fuse/src/table_functions/table_args.rs @@ -23,14 +23,13 @@ use crate::table_functions::TableArgs; pub fn string_value(value: &Scalar) -> Result { match value { - Scalar::String(val) => String::from_utf8(val.clone()) - .map_err(|e| ErrorCode::BadArguments(format!("invalid string. {}", e))), + Scalar::String(val) => Ok(val.clone()), _ => Err(ErrorCode::BadArguments("invalid string.")), } } pub fn string_literal(val: &str) -> Scalar { - Scalar::String(val.as_bytes().to_vec()) + Scalar::String(val.to_string()) } pub fn cmp_with_null(v1: &Scalar, v2: &Scalar) -> Ordering { diff --git a/src/query/storages/hive/hive/src/hive_block_filter.rs b/src/query/storages/hive/hive/src/hive_block_filter.rs index 8df4764e8dbb..9414bf48b695 100644 --- a/src/query/storages/hive/hive/src/hive_block_filter.rs +++ b/src/query/storages/hive/hive/src/hive_block_filter.rs @@ -103,7 +103,7 @@ impl HiveBlockFilter { null_count = row_group.num_rows(); Scalar::Null } else { - Scalar::String(p_value.as_bytes().to_vec()) + Scalar::String(p_value) }; let col_stats = ColumnStatistics::new(v.clone(), v, null_count as u64, 0, None); @@ -282,8 +282,12 @@ impl HiveBlockFilter { None } else { let null_count = s.null_count.unwrap(); - let max = StringType::upcast_scalar(s.max_value.clone().unwrap()); - let min = StringType::upcast_scalar(s.min_value.clone().unwrap()); + let max = StringType::upcast_scalar( + String::from_utf8(s.max_value.clone().unwrap()).ok()?, + ); + let min = StringType::upcast_scalar( + String::from_utf8(s.min_value.clone().unwrap()).ok()?, + ); Some((max, min, null_count)) } } diff --git a/src/query/storages/hive/hive/src/hive_table_source.rs b/src/query/storages/hive/hive/src/hive_table_source.rs index 51ebb86d388f..e27488d22bb1 100644 --- a/src/query/storages/hive/hive/src/hive_table_source.rs +++ b/src/query/storages/hive/hive/src/hive_table_source.rs @@ -37,6 +37,8 @@ use databend_common_pipeline_core::processors::Event; use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::processors::Processor; use databend_common_pipeline_core::processors::ProcessorPtr; +use databend_common_pipeline_core::processors::Profile; +use databend_common_pipeline_core::processors::ProfileStatisticsName; use log::debug; use opendal::Operator; @@ -179,6 +181,7 @@ impl HiveTableSource { rows: prewhere_datablocks.iter().map(|x| x.num_rows()).sum(), bytes: prewhere_datablocks.iter().map(|x| x.memory_size()).sum(), }; + Profile::record_usize_profile(ProfileStatisticsName::ScanBytes, progress_values.bytes); self.scan_progress.incr(&progress_values); if let Some(filter) = self.prewhere_filter.as_ref() { diff --git a/src/query/storages/hive/hive/src/utils.rs b/src/query/storages/hive/hive/src/utils.rs index c2b96cf1146e..6312dd015919 100644 --- a/src/query/storages/hive/hive/src/utils.rs +++ b/src/query/storages/hive/hive/src/utils.rs @@ -31,7 +31,7 @@ pub(crate) fn str_field_to_scalar(value: &str, data_type: &DataType) -> Result Ok(Scalar::String(value.as_bytes().to_vec())), + DataType::String => Ok(Scalar::String(value.to_string())), DataType::Number(num_ty) => match num_ty { NumberDataType::UInt8 => { let num = value.parse::().unwrap(); diff --git a/src/query/storages/iceberg/src/stats.rs b/src/query/storages/iceberg/src/stats.rs index 4a53e2ba88c7..8c9bb2785d1d 100644 --- a/src/query/storages/iceberg/src/stats.rs +++ b/src/query/storages/iceberg/src/stats.rs @@ -107,7 +107,7 @@ fn parse_binary_value(ty: &TableDataType, data: &[u8]) -> Option { let v = i64::from_le_bytes(data.try_into().ok()?); Some(Scalar::Timestamp(v)) } - TableDataType::String => Some(Scalar::String(data.to_vec())), + TableDataType::String => Some(Scalar::String(String::from_utf8(data.to_vec()).ok()?)), // TODO: support Decimal. _ => None, // Not supported. } diff --git a/src/query/storages/iceberg/src/table_source.rs b/src/query/storages/iceberg/src/table_source.rs index a980b7368dd5..7377d1cbe9b5 100644 --- a/src/query/storages/iceberg/src/table_source.rs +++ b/src/query/storages/iceberg/src/table_source.rs @@ -27,6 +27,8 @@ use databend_common_pipeline_core::processors::Event; use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::processors::Processor; use databend_common_pipeline_core::processors::ProcessorPtr; +use databend_common_pipeline_core::processors::Profile; +use databend_common_pipeline_core::processors::ProfileStatisticsName; use databend_common_storages_parquet::ParquetPart; use databend_common_storages_parquet::ParquetRSFullReader; use opendal::Reader; @@ -102,6 +104,10 @@ impl Processor for IcebergTableSource { bytes: data_block.memory_size(), }; self.scan_progress.incr(&progress_values); + Profile::record_usize_profile( + ProfileStatisticsName::ScanBytes, + data_block.memory_size(), + ); self.output.push_data(Ok(data_block)); Ok(Event::NeedConsume) } diff --git a/src/query/storages/parquet/src/parquet2/parquet_table/create.rs b/src/query/storages/parquet/src/parquet2/parquet_table/create.rs index 7302ce402a33..ad2c69a1d211 100644 --- a/src/query/storages/parquet/src/parquet2/parquet_table/create.rs +++ b/src/query/storages/parquet/src/parquet2/parquet_table/create.rs @@ -58,7 +58,10 @@ impl Parquet2Table { // If the query is `COPY`, we don't need to collect column statistics. // It's because the only transform could be contained in `COPY` command is projection. - let need_stats_provider = !matches!(ctx.get_query_kind(), QueryKind::CopyIntoTable); + let need_stats_provider = !matches!( + ctx.get_query_kind(), + QueryKind::CopyIntoTable | QueryKind::CopyIntoLocation + ); let mut table_info = create_parquet_table_info(arrow_schema.clone(), &stage_info); let column_statistics_provider = if need_stats_provider { let file_metas = get_parquet2_file_meta( diff --git a/src/query/storages/parquet/src/parquet2/processors/deserialize_transform.rs b/src/query/storages/parquet/src/parquet2/processors/deserialize_transform.rs index e80f1d70a243..7ab94a0d1fa7 100644 --- a/src/query/storages/parquet/src/parquet2/processors/deserialize_transform.rs +++ b/src/query/storages/parquet/src/parquet2/processors/deserialize_transform.rs @@ -42,6 +42,8 @@ use databend_common_pipeline_core::processors::InputPort; use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::processors::Processor; use databend_common_pipeline_core::processors::ProcessorPtr; +use databend_common_pipeline_core::processors::Profile; +use databend_common_pipeline_core::processors::ProfileStatisticsName; use databend_common_storage::CopyStatus; use databend_common_storage::FileStatus; use opendal::services::Memory; @@ -153,6 +155,7 @@ impl Parquet2DeserializeTransform { bytes: data_block.memory_size(), }; self.scan_progress.incr(&progress_values); + Profile::record_usize_profile(ProfileStatisticsName::ScanBytes, data_block.memory_size()); self.output_data.push(data_block); Ok(()) } diff --git a/src/query/storages/parquet/src/parquet_rs/copy_into_table/source.rs b/src/query/storages/parquet/src/parquet_rs/copy_into_table/source.rs index 0ff6b85dbde2..d1c2206acc98 100644 --- a/src/query/storages/parquet/src/parquet_rs/copy_into_table/source.rs +++ b/src/query/storages/parquet/src/parquet_rs/copy_into_table/source.rs @@ -30,6 +30,8 @@ use databend_common_pipeline_core::processors::Event; use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::processors::Processor; use databend_common_pipeline_core::processors::ProcessorPtr; +use databend_common_pipeline_core::processors::Profile; +use databend_common_pipeline_core::processors::ProfileStatisticsName; use opendal::Operator; use crate::parquet_rs::copy_into_table::reader::RowGroupReaderForCopy; @@ -128,6 +130,10 @@ impl Processor for ParquetCopySource { bytes: data_block.memory_size(), }; self.scan_progress.incr(&progress_values); + Profile::record_usize_profile( + ProfileStatisticsName::ScanBytes, + data_block.memory_size(), + ); self.output.push_data(Ok(data_block)); Ok(Event::NeedConsume) } diff --git a/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs b/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs index 6f92f91da583..be130f82a16c 100644 --- a/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs +++ b/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs @@ -135,7 +135,10 @@ impl ParquetRSTable { // If the query is `COPY`, we don't need to collect column statistics. // It's because the only transform could be contained in `COPY` command is projection. - let need_stats_provider = !matches!(ctx.get_query_kind(), QueryKind::CopyIntoTable); + let need_stats_provider = !matches!( + ctx.get_query_kind(), + QueryKind::CopyIntoTable | QueryKind::CopyIntoLocation + ); let settings = ctx.get_settings(); let max_threads = settings.get_max_threads()? as usize; let max_memory_usage = settings.get_max_memory_usage()?; diff --git a/src/query/storages/parquet/src/parquet_rs/source.rs b/src/query/storages/parquet/src/parquet_rs/source.rs index 2fb717009483..4acacadde27c 100644 --- a/src/query/storages/parquet/src/parquet_rs/source.rs +++ b/src/query/storages/parquet/src/parquet_rs/source.rs @@ -28,6 +28,8 @@ use databend_common_pipeline_core::processors::Event; use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::processors::Processor; use databend_common_pipeline_core::processors::ProcessorPtr; +use databend_common_pipeline_core::processors::Profile; +use databend_common_pipeline_core::processors::ProfileStatisticsName; use databend_common_storage::CopyStatus; use databend_common_storage::FileStatus; @@ -136,6 +138,10 @@ impl Processor for ParquetSource { bytes: data_block.memory_size(), }; self.scan_progress.incr(&progress_values); + Profile::record_usize_profile( + ProfileStatisticsName::ScanBytes, + data_block.memory_size(), + ); self.output.push_data(Ok(data_block)); Ok(Event::NeedConsume) } diff --git a/src/query/storages/parquet/src/parquet_rs/statistics/column.rs b/src/query/storages/parquet/src/parquet_rs/statistics/column.rs index 416c12022c4e..96bdde5ecfc9 100644 --- a/src/query/storages/parquet/src/parquet_rs/statistics/column.rs +++ b/src/query/storages/parquet/src/parquet_rs/statistics/column.rs @@ -27,7 +27,7 @@ use super::utils::decode_decimal128_from_bytes; use super::utils::decode_decimal256_from_bytes; /// according to https://github.com/apache/parquet-format/blob/master/LogicalTypes.md -pub fn convert_column_statistics(s: &Statistics, typ: &TableDataType) -> ColumnStatistics { +pub fn convert_column_statistics(s: &Statistics, typ: &TableDataType) -> Option { let (max, min) = if s.has_min_max_set() { match s { Statistics::Boolean(s) => (Scalar::Boolean(*s.max()), Scalar::Boolean(*s.min())), @@ -110,8 +110,8 @@ pub fn convert_column_statistics(s: &Statistics, typ: &TableDataType) -> ColumnS Statistics::Float(s) => (Scalar::from(*s.max()), Scalar::from(*s.min())), Statistics::Double(s) => (Scalar::from(*s.max()), Scalar::from(*s.min())), Statistics::ByteArray(s) => ( - Scalar::String(s.max().as_bytes().to_vec()), - Scalar::String(s.min().as_bytes().to_vec()), + Scalar::String(String::from_utf8(s.max().as_bytes().to_vec()).ok()?), + Scalar::String(String::from_utf8(s.min().as_bytes().to_vec()).ok()?), ), Statistics::FixedLenByteArray(s) => { let (max, min) = (s.max(), s.min()); @@ -131,11 +131,11 @@ pub fn convert_column_statistics(s: &Statistics, typ: &TableDataType) -> ColumnS } else { (Scalar::Null, Scalar::Null) }; - ColumnStatistics::new( + Some(ColumnStatistics::new( min, max, s.null_count(), 0, // this field is not used. s.distinct_count(), - ) + )) } diff --git a/src/query/storages/parquet/src/parquet_rs/statistics/page.rs b/src/query/storages/parquet/src/parquet_rs/statistics/page.rs index cb449ad944a4..0ae34f62bab0 100644 --- a/src/query/storages/parquet/src/parquet_rs/statistics/page.rs +++ b/src/query/storages/parquet/src/parquet_rs/statistics/page.rs @@ -246,8 +246,8 @@ fn convert_page_index_byte_array( ) -> Option { match (&index.min, &index.max, index.null_count) { (Some(min), Some(max), Some(null_count)) => Some(ColumnStatistics::new( - Scalar::String(min.as_bytes().to_vec()), - Scalar::String(max.as_bytes().to_vec()), + Scalar::String(String::from_utf8(min.as_bytes().to_vec()).ok()?), + Scalar::String(String::from_utf8(max.as_bytes().to_vec()).ok()?), null_count as u64, 0, None, diff --git a/src/query/storages/parquet/src/parquet_rs/statistics/row_group.rs b/src/query/storages/parquet/src/parquet_rs/statistics/row_group.rs index e9ab006dd48e..f1bac90e2a35 100644 --- a/src/query/storages/parquet/src/parquet_rs/statistics/row_group.rs +++ b/src/query/storages/parquet/src/parquet_rs/statistics/row_group.rs @@ -53,7 +53,7 @@ pub fn collect_row_group_stats( let column_stats = column.statistics().unwrap(); stats_of_columns.insert( *col_idx as u32, - convert_column_statistics(column_stats, &field.data_type().remove_nullable()), + convert_column_statistics(column_stats, &field.data_type().remove_nullable())?, ); } } else { @@ -63,7 +63,7 @@ pub fn collect_row_group_stats( let column_stats = column.statistics().unwrap(); stats_of_columns.insert( col_idx as u32, - convert_column_statistics(column_stats, &field.data_type().remove_nullable()), + convert_column_statistics(column_stats, &field.data_type().remove_nullable())?, ); } } diff --git a/src/query/storages/result_cache/src/table_function/table.rs b/src/query/storages/result_cache/src/table_function/table.rs index 9f3a542d55e0..06ece0903236 100644 --- a/src/query/storages/result_cache/src/table_function/table.rs +++ b/src/query/storages/result_cache/src/table_function/table.rs @@ -115,7 +115,7 @@ impl Table for ResultScan { } fn table_args(&self) -> Option { - let args = vec![Scalar::String(self.query_id.as_bytes().to_vec())]; + let args = vec![Scalar::String(self.query_id.clone())]; Some(TableArgs::new_positioned(args)) } diff --git a/src/query/storages/stage/src/append/output.rs b/src/query/storages/stage/src/append/output.rs index b7ba6ea4775b..c5b41fd3324f 100644 --- a/src/query/storages/stage/src/append/output.rs +++ b/src/query/storages/stage/src/append/output.rs @@ -154,7 +154,7 @@ fn file_infos_to_block(files: &[OutputFileInfo]) -> DataBlock { let mut rows = Vec::with_capacity(files.len()); let mut sizes = Vec::with_capacity(files.len()); for file in files { - paths.push(file.file_name.clone().as_bytes().to_vec()); + paths.push(file.file_name.as_str()); rows.push(file.summary.row_counts as u64); sizes.push(file.summary.output_bytes as u64); } diff --git a/src/query/storages/system/Cargo.toml b/src/query/storages/system/Cargo.toml index 27afdc5eadd9..9c33b9c157ab 100644 --- a/src/query/storages/system/Cargo.toml +++ b/src/query/storages/system/Cargo.toml @@ -26,7 +26,6 @@ databend-common-meta-types = { path = "../../../meta/types" } databend-common-metrics = { path = "../../../common/metrics" } databend-common-pipeline-core = { path = "../../pipeline/core" } databend-common-pipeline-sources = { path = "../../pipeline/sources" } -databend-common-profile = { path = "../../profile" } databend-common-sql = { path = "../../sql" } databend-common-storage = { path = "../../../common/storage" } databend-common-storages-fuse = { path = "../fuse" } diff --git a/src/query/storages/system/src/background_jobs_table.rs b/src/query/storages/system/src/background_jobs_table.rs index 1d708850dc7e..e43f403abedb 100644 --- a/src/query/storages/system/src/background_jobs_table.rs +++ b/src/query/storages/system/src/background_jobs_table.rs @@ -76,10 +76,10 @@ impl AsyncSystemTable for BackgroundJobTable { let mut creator = Vec::with_capacity(jobs.len()); let mut create_time = Vec::with_capacity(jobs.len()); for (_, name, job) in jobs { - names.push(name.as_bytes().to_vec()); + names.push(name); let job_type = job.job_params.as_ref().map(|x| x.job_type.clone()); if let Some(job_type) = job_type { - job_types.push(Some(job_type.to_string().as_bytes().to_vec())); + job_types.push(Some(job_type.to_string())); match job_type { BackgroundJobType::INTERVAL => { scheduled_job_interval_secs.push(Some( @@ -95,19 +95,13 @@ impl AsyncSystemTable for BackgroundJobTable { BackgroundJobType::CRON => { scheduled_job_interval_secs.push(None); scheduled_job_cron_expression.push(Some( - job.job_params - .as_ref() - .unwrap() - .scheduled_job_cron - .clone() - .as_bytes() - .to_vec(), + job.job_params.as_ref().unwrap().scheduled_job_cron.clone(), )); scheduled_job_cron_timezone.push( job.job_params .unwrap() .scheduled_job_timezone - .map(|tz| tz.to_string().as_bytes().to_vec()), + .map(|tz| tz.to_string()), ); } BackgroundJobType::ONESHOT => { @@ -122,17 +116,9 @@ impl AsyncSystemTable for BackgroundJobTable { scheduled_job_cron_expression.push(None); scheduled_job_cron_timezone.push(None); } - task_types.push(job.task_type.to_string().as_bytes().to_vec()); - job_states.push( - job.job_status - .as_ref() - .map(|x| x.job_state.to_string().as_bytes().to_vec()), - ); - last_task_ids.push( - job.job_status - .as_ref() - .and_then(|x| x.last_task_id.clone().map(|x| x.as_bytes().to_vec())), - ); + task_types.push(job.task_type.to_string()); + job_states.push(job.job_status.as_ref().map(|x| x.job_state.to_string())); + last_task_ids.push(job.job_status.as_ref().and_then(|x| x.last_task_id.clone())); last_task_run_at.push( job.job_status .as_ref() @@ -143,9 +129,9 @@ impl AsyncSystemTable for BackgroundJobTable { .as_ref() .and_then(|x| x.next_task_scheduled_time.map(|x| x.timestamp_micros())), ); - message.push(job.message.as_bytes().to_vec()); + message.push(job.message); last_updated.push(job.last_updated.map(|t| t.timestamp_micros())); - creator.push(job.creator.map(|x| x.to_string().as_bytes().to_vec())); + creator.push(job.creator.map(|x| x.to_string())); create_time.push(job.created_at.timestamp_micros()); } diff --git a/src/query/storages/system/src/background_tasks_table.rs b/src/query/storages/system/src/background_tasks_table.rs index 0b006941931a..b0e80364ce3c 100644 --- a/src/query/storages/system/src/background_tasks_table.rs +++ b/src/query/storages/system/src/background_tasks_table.rs @@ -75,10 +75,10 @@ impl AsyncSystemTable for BackgroundTaskTable { let mut create_timestamps = Vec::with_capacity(tasks.len()); let mut update_timestamps = Vec::with_capacity(tasks.len()); for (_, name, task) in tasks { - names.push(name.as_bytes().to_vec()); - types.push(task.task_type.to_string().as_bytes().to_vec()); - stats.push(task.task_state.to_string().as_bytes().to_vec()); - messages.push(task.message.as_bytes().to_vec()); + names.push(name); + types.push(task.task_type.to_string()); + stats.push(task.task_state.to_string()); + messages.push(task.message); compaction_stats.push( task.compaction_task_stats .as_ref() @@ -98,11 +98,8 @@ impl AsyncSystemTable for BackgroundTaskTable { table_ids.push(0); task_run_secs.push(None); } - creators.push(task.creator.map(|s| s.to_string().as_bytes().to_vec())); - trigger.push( - task.manual_trigger - .map(|s| s.trigger.to_string().as_bytes().to_vec()), - ); + creators.push(task.creator.map(|s| s.to_string())); + trigger.push(task.manual_trigger.map(|s| s.trigger.to_string())); create_timestamps.push(task.created_at.timestamp_micros()); update_timestamps.push(task.last_updated.unwrap_or_default().timestamp_micros()); } diff --git a/src/query/storages/system/src/backtrace_table.rs b/src/query/storages/system/src/backtrace_table.rs index 6ec64a5b82d9..311d003ff1f1 100644 --- a/src/query/storages/system/src/backtrace_table.rs +++ b/src/query/storages/system/src/backtrace_table.rs @@ -51,16 +51,19 @@ impl SyncSystemTable for BacktraceTable { } fn get_full_data(&self, ctx: Arc) -> Result { - let local_node = ctx.get_cluster().local_id.clone().into_bytes(); + let local_node = ctx.get_cluster().local_id.clone(); let (tasks, polling_tasks) = get_all_tasks(false); let tasks_size = tasks.len() + polling_tasks.len(); - let mut nodes: Vec> = Vec::with_capacity(tasks_size); - let mut queries_id: Vec> = Vec::with_capacity(tasks_size); - let mut queries_status: Vec> = Vec::with_capacity(tasks_size); - let mut stacks: Vec> = Vec::with_capacity(tasks_size); + let mut nodes: Vec = Vec::with_capacity(tasks_size); + let mut queries_id: Vec = Vec::with_capacity(tasks_size); + let mut queries_status: Vec = Vec::with_capacity(tasks_size); + let mut stacks: Vec = Vec::with_capacity(tasks_size); - for (status, mut tasks) in [("PENDING", tasks), ("RUNNING", polling_tasks)] { + for (status, mut tasks) in [ + ("PENDING".to_string(), tasks), + ("RUNNING".to_string(), polling_tasks), + ] { tasks.sort_by(|l, r| Ord::cmp(&l.stack_frames.len(), &r.stack_frames.len())); for item in tasks.into_iter().rev() { @@ -102,9 +105,9 @@ impl SyncSystemTable for BacktraceTable { } nodes.push(local_node.clone()); - stacks.push(stack_frames.into_bytes()); - queries_id.push(query_id.into_bytes()); - queries_status.push(status.as_bytes().to_vec()); + stacks.push(stack_frames); + queries_id.push(query_id); + queries_status.push(status.clone()); } } diff --git a/src/query/storages/system/src/build_options_table.rs b/src/query/storages/system/src/build_options_table.rs index 0de2cb47fda3..6d1953a3245f 100644 --- a/src/query/storages/system/src/build_options_table.rs +++ b/src/query/storages/system/src/build_options_table.rs @@ -43,26 +43,26 @@ impl SyncSystemTable for BuildOptionsTable { } fn get_full_data(&self, _: Arc) -> Result { - let mut cargo_features: Vec>; + let mut cargo_features: Vec; if let Some(features) = option_env!("VERGEN_CARGO_FEATURES") { cargo_features = features .split_terminator(',') - .map(|x| x.trim().as_bytes().to_vec()) + .map(|x| x.trim().to_string()) .collect(); } else { - cargo_features = vec!["not available".as_bytes().to_vec()]; + cargo_features = vec!["not available".to_string()]; } - let mut target_features: Vec> = env!("DATABEND_CARGO_CFG_TARGET_FEATURE") + let mut target_features: Vec = env!("DATABEND_CARGO_CFG_TARGET_FEATURE") .split_terminator(',') - .map(|x| x.trim().as_bytes().to_vec()) + .map(|x| x.trim().to_string()) .collect(); let length = max(cargo_features.len(), target_features.len()); - cargo_features.resize(length, "".as_bytes().to_vec()); - target_features.resize(length, "".as_bytes().to_vec()); + cargo_features.resize(length, "".to_string()); + target_features.resize(length, "".to_string()); Ok(DataBlock::new_from_columns(vec![ StringType::from_data(cargo_features), diff --git a/src/query/storages/system/src/caches_table.rs b/src/query/storages/system/src/caches_table.rs index aebb0ea006cb..848da0dfe0bb 100644 --- a/src/query/storages/system/src/caches_table.rs +++ b/src/query/storages/system/src/caches_table.rs @@ -68,68 +68,67 @@ impl SyncSystemTable for CachesTable { let table_column_array_cache = cache_manager.get_table_data_array_cache(); if let Some(table_snapshot_cache) = table_snapshot_cache { - nodes.push(local_node.clone().into_bytes()); - names.push("table_snapshot_cache"); + nodes.push(local_node.clone()); + names.push("table_snapshot_cache".to_string()); num_items.push(table_snapshot_cache.len() as u64); size.push(table_snapshot_cache.size()); } if let Some(table_snapshot_statistic_cache) = table_snapshot_statistic_cache { - nodes.push(local_node.clone().into_bytes()); - names.push("table_snapshot_statistic_cache"); + nodes.push(local_node.clone()); + names.push("table_snapshot_statistic_cache".to_string()); num_items.push(table_snapshot_statistic_cache.len() as u64); size.push(table_snapshot_statistic_cache.size()); } if let Some(segment_info_cache) = segment_info_cache { - nodes.push(local_node.clone().into_bytes()); - names.push("segment_info_cache"); + nodes.push(local_node.clone()); + names.push("segment_info_cache".to_string()); num_items.push(segment_info_cache.len() as u64); size.push(segment_info_cache.size()); } if let Some(bloom_index_filter_cache) = bloom_index_filter_cache { - nodes.push(local_node.clone().into_bytes()); - names.push("bloom_index_filter_cache"); + nodes.push(local_node.clone()); + names.push("bloom_index_filter_cache".to_string()); num_items.push(bloom_index_filter_cache.len() as u64); size.push(bloom_index_filter_cache.size()); } if let Some(bloom_index_meta_cache) = bloom_index_meta_cache { - nodes.push(local_node.clone().into_bytes()); - names.push("bloom_index_meta_cache"); + nodes.push(local_node.clone()); + names.push("bloom_index_meta_cache".to_string()); num_items.push(bloom_index_meta_cache.len() as u64); size.push(bloom_index_meta_cache.size()); } if let Some(prune_partitions_cache) = prune_partitions_cache { - nodes.push(local_node.clone().into_bytes()); - names.push("prune_partitions_cache"); + nodes.push(local_node.clone()); + names.push("prune_partitions_cache".to_string()); num_items.push(prune_partitions_cache.len() as u64); size.push(prune_partitions_cache.size()); } if let Some(file_meta_data_cache) = file_meta_data_cache { - nodes.push(local_node.clone().into_bytes()); - names.push("file_meta_data_cache"); + nodes.push(local_node.clone()); + names.push("file_meta_data_cache".to_string()); num_items.push(file_meta_data_cache.len() as u64); size.push(file_meta_data_cache.size()); } if let Some(table_data_cache) = table_data_cache { - nodes.push(local_node.clone().into_bytes()); - names.push("table_data_cache"); + nodes.push(local_node.clone()); + names.push("table_data_cache".to_string()); num_items.push(table_data_cache.len() as u64); size.push(table_data_cache.size()); } if let Some(table_column_array_cache) = table_column_array_cache { - nodes.push(local_node.into_bytes()); - names.push("table_column_array_cache"); + nodes.push(local_node.clone()); + names.push("table_column_array_cache".to_string()); num_items.push(table_column_array_cache.len() as u64); size.push(table_column_array_cache.size()); } - let names: Vec<_> = names.iter().map(|x| x.as_bytes().to_vec()).collect(); Ok(DataBlock::new_from_columns(vec![ StringType::from_data(nodes), StringType::from_data(names), diff --git a/src/query/storages/system/src/catalogs_table.rs b/src/query/storages/system/src/catalogs_table.rs index dbd94dfe4e58..f279ca115b00 100644 --- a/src/query/storages/system/src/catalogs_table.rs +++ b/src/query/storages/system/src/catalogs_table.rs @@ -56,7 +56,7 @@ impl AsyncSystemTable for CatalogsTable { .list_catalogs(&ctx.get_tenant()) .await? .into_iter() - .map(|v| v.name().into_bytes()) + .map(|v| v.name()) .collect::>(); Ok(DataBlock::new_from_columns(vec![StringType::from_data( diff --git a/src/query/storages/system/src/clustering_history_table.rs b/src/query/storages/system/src/clustering_history_table.rs index 9831126eb8fd..d3223a334de1 100644 --- a/src/query/storages/system/src/clustering_history_table.rs +++ b/src/query/storages/system/src/clustering_history_table.rs @@ -65,11 +65,11 @@ impl SystemLogElement for ClusteringHistoryLogElement { columns .next() .unwrap() - .push(Scalar::String(self.database.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.database.clone()).as_ref()); columns .next() .unwrap() - .push(Scalar::String(self.table.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.table.clone()).as_ref()); columns .next() .unwrap() diff --git a/src/query/storages/system/src/clusters_table.rs b/src/query/storages/system/src/clusters_table.rs index f050035131c2..11573f347ec3 100644 --- a/src/query/storages/system/src/clusters_table.rs +++ b/src/query/storages/system/src/clusters_table.rs @@ -58,10 +58,10 @@ impl SyncSystemTable for ClustersTable { for cluster_node in &cluster_nodes { let (ip, port) = cluster_node.ip_port()?; - names.push(Scalar::String(cluster_node.id.as_bytes().to_vec()).as_ref()); - addresses.push(Scalar::String(ip.as_bytes().to_vec()).as_ref()); + names.push(Scalar::String(cluster_node.id.clone()).as_ref()); + addresses.push(Scalar::String(ip).as_ref()); addresses_port.push(Scalar::Number(NumberScalar::UInt16(port)).as_ref()); - versions.push(Scalar::String(cluster_node.binary_version.as_bytes().to_vec()).as_ref()); + versions.push(Scalar::String(cluster_node.binary_version.clone()).as_ref()); } Ok(DataBlock::new_from_columns(vec![ diff --git a/src/query/storages/system/src/columns_table.rs b/src/query/storages/system/src/columns_table.rs index b3e64031e955..bfb2ca0454ea 100644 --- a/src/query/storages/system/src/columns_table.rs +++ b/src/query/storages/system/src/columns_table.rs @@ -58,22 +58,22 @@ impl AsyncSystemTable for ColumnsTable { push_downs: Option, ) -> Result { let rows = self.dump_table_columns(ctx, push_downs).await?; - let mut names: Vec> = Vec::with_capacity(rows.len()); - let mut tables: Vec> = Vec::with_capacity(rows.len()); - let mut databases: Vec> = Vec::with_capacity(rows.len()); - let mut types: Vec> = Vec::with_capacity(rows.len()); - let mut data_types: Vec> = Vec::with_capacity(rows.len()); - let mut default_kinds: Vec> = Vec::with_capacity(rows.len()); - let mut default_exprs: Vec> = Vec::with_capacity(rows.len()); - let mut is_nullables: Vec> = Vec::with_capacity(rows.len()); - let mut comments: Vec> = Vec::with_capacity(rows.len()); + let mut names: Vec = Vec::with_capacity(rows.len()); + let mut tables: Vec = Vec::with_capacity(rows.len()); + let mut databases: Vec = Vec::with_capacity(rows.len()); + let mut types: Vec = Vec::with_capacity(rows.len()); + let mut data_types: Vec = Vec::with_capacity(rows.len()); + let mut default_kinds: Vec = Vec::with_capacity(rows.len()); + let mut default_exprs: Vec = Vec::with_capacity(rows.len()); + let mut is_nullables: Vec = Vec::with_capacity(rows.len()); + let mut comments: Vec = Vec::with_capacity(rows.len()); for (database_name, table_name, field) in rows.into_iter() { - names.push(field.name().clone().into_bytes()); - tables.push(table_name.into_bytes()); - databases.push(database_name.into_bytes()); - types.push(field.data_type().wrapped_display().into_bytes()); + names.push(field.name().clone()); + tables.push(table_name); + databases.push(database_name); + types.push(field.data_type().wrapped_display()); let data_type = field.data_type().remove_recursive_nullable().sql_name(); - data_types.push(data_type.into_bytes()); + data_types.push(data_type); let mut default_kind = "".to_string(); let mut default_expr = "".to_string(); @@ -81,15 +81,15 @@ impl AsyncSystemTable for ColumnsTable { default_kind = "DEFAULT".to_string(); default_expr = expr.to_string(); } - default_kinds.push(default_kind.into_bytes()); - default_exprs.push(default_expr.into_bytes()); + default_kinds.push(default_kind); + default_exprs.push(default_expr); if field.is_nullable() { - is_nullables.push("YES".to_string().into_bytes()); + is_nullables.push("YES".to_string()); } else { - is_nullables.push("NO".to_string().into_bytes()); + is_nullables.push("NO".to_string()); } - comments.push("".to_string().into_bytes()); + comments.push("".to_string()); } Ok(DataBlock::new_from_columns(vec![ @@ -166,7 +166,7 @@ pub(crate) async fn dump_tables( let tenant = ctx.get_tenant(); let catalog = ctx.get_catalog(CATALOG_DEFAULT).await?; - let mut tables = Vec::new(); + let mut tables: Vec = Vec::new(); let mut databases: Vec = Vec::new(); if let Some(push_downs) = push_downs { @@ -174,19 +174,15 @@ pub(crate) async fn dump_tables( let expr = filter.as_expr(&BUILTIN_FUNCTIONS); find_eq_filter(&expr, &mut |col_name, scalar| { if col_name == "database" { - if let Scalar::String(s) = scalar { - if let Ok(database) = String::from_utf8(s.clone()) { - if !databases.contains(&database) { - databases.push(database); - } + if let Scalar::String(database) = scalar { + if !databases.contains(database) { + databases.push(database.clone()); } } } else if col_name == "table" { - if let Scalar::String(s) = scalar { - if let Ok(table) = String::from_utf8(s.clone()) { - if !tables.contains(&table) { - tables.push(table); - } + if let Scalar::String(table) = scalar { + if !tables.contains(table) { + tables.push(table.clone()); } } } diff --git a/src/query/storages/system/src/configs_table.rs b/src/query/storages/system/src/configs_table.rs index ee64b5b7609b..fb2f5b3e5133 100644 --- a/src/query/storages/system/src/configs_table.rs +++ b/src/query/storages/system/src/configs_table.rs @@ -134,11 +134,6 @@ impl SyncSystemTable for ConfigsTable { storage_config_value, ); - let names: Vec> = names.iter().map(|x| x.as_bytes().to_vec()).collect(); - let values: Vec> = values.iter().map(|x| x.as_bytes().to_vec()).collect(); - let groups: Vec> = groups.iter().map(|x| x.as_bytes().to_vec()).collect(); - let descs: Vec> = descs.iter().map(|x| x.as_bytes().to_vec()).collect(); - Ok(DataBlock::new_from_columns(vec![ StringType::from_data(groups), StringType::from_data(names), diff --git a/src/query/storages/system/src/contributors_table.rs b/src/query/storages/system/src/contributors_table.rs index 5d1216e16689..b70b4989ead7 100644 --- a/src/query/storages/system/src/contributors_table.rs +++ b/src/query/storages/system/src/contributors_table.rs @@ -42,9 +42,9 @@ impl SyncSystemTable for ContributorsTable { } fn get_full_data(&self, _: Arc) -> Result { - let contributors: Vec> = env!("DATABEND_COMMIT_AUTHORS") + let contributors: Vec = env!("DATABEND_COMMIT_AUTHORS") .split_terminator(',') - .map(|x| x.trim().as_bytes().to_vec()) + .map(|x| x.trim().to_string()) .collect(); Ok(DataBlock::new_from_columns(vec![StringType::from_data( diff --git a/src/query/storages/system/src/credits_table.rs b/src/query/storages/system/src/credits_table.rs index 0b14b38847bb..95daf3c58fc8 100644 --- a/src/query/storages/system/src/credits_table.rs +++ b/src/query/storages/system/src/credits_table.rs @@ -42,17 +42,17 @@ impl SyncSystemTable for CreditsTable { } fn get_full_data(&self, _: Arc) -> Result { - let names: Vec> = env!("DATABEND_CREDITS_NAMES") + let names: Vec = env!("DATABEND_CREDITS_NAMES") .split_terminator(',') - .map(|x| x.trim().as_bytes().to_vec()) + .map(|x| x.trim().to_string()) .collect(); - let versions: Vec> = env!("DATABEND_CREDITS_VERSIONS") + let versions: Vec = env!("DATABEND_CREDITS_VERSIONS") .split_terminator(',') - .map(|x| x.trim().as_bytes().to_vec()) + .map(|x| x.trim().to_string()) .collect(); - let licenses: Vec> = env!("DATABEND_CREDITS_LICENSES") + let licenses: Vec = env!("DATABEND_CREDITS_LICENSES") .split_terminator(',') - .map(|x| x.trim().as_bytes().to_vec()) + .map(|x| x.trim().to_string()) .collect(); Ok(DataBlock::new_from_columns(vec![ diff --git a/src/query/storages/system/src/databases_table.rs b/src/query/storages/system/src/databases_table.rs index 2cfcd494a794..ab39d810c7b4 100644 --- a/src/query/storages/system/src/databases_table.rs +++ b/src/query/storages/system/src/databases_table.rs @@ -68,7 +68,7 @@ impl AsyncSystemTable for DatabasesTable { let mut catalog_names = vec![]; let mut db_names = vec![]; let mut db_id = vec![]; - let mut owners: Vec>> = vec![]; + let mut owners: Vec> = vec![]; let visibility_checker = ctx.get_visibility_checker().await?; @@ -86,8 +86,8 @@ impl AsyncSystemTable for DatabasesTable { .collect::>(); for db in final_dbs { - catalog_names.push(ctl_name.clone().into_bytes()); - let db_name = db.name().to_string().into_bytes(); + catalog_names.push(ctl_name.clone()); + let db_name = db.name().to_string(); db_names.push(db_name); let id = db.get_db_info().ident.db_id; db_id.push(id); @@ -99,7 +99,7 @@ impl AsyncSystemTable for DatabasesTable { }) .await .ok() - .and_then(|ownership| ownership.map(|o| o.role.as_bytes().to_vec())), + .and_then(|ownership| ownership.map(|o| o.role.clone())), ); } } diff --git a/src/query/storages/system/src/engines_table.rs b/src/query/storages/system/src/engines_table.rs index 91aea753bf8a..520430aeeeb0 100644 --- a/src/query/storages/system/src/engines_table.rs +++ b/src/query/storages/system/src/engines_table.rs @@ -55,8 +55,8 @@ impl AsyncSystemTable for EnginesTable { let mut engine_name = Vec::with_capacity(table_engine_descriptors.len()); let mut engine_comment = Vec::with_capacity(table_engine_descriptors.len()); for descriptor in &table_engine_descriptors { - engine_name.push(descriptor.engine_name.as_bytes().to_vec()); - engine_comment.push(descriptor.comment.as_bytes().to_vec()); + engine_name.push(descriptor.engine_name.clone()); + engine_comment.push(descriptor.comment.clone()); } Ok(DataBlock::new_from_columns(vec![ diff --git a/src/query/storages/system/src/indexes_table.rs b/src/query/storages/system/src/indexes_table.rs index e5a3604499c0..75ddd041f867 100644 --- a/src/query/storages/system/src/indexes_table.rs +++ b/src/query/storages/system/src/indexes_table.rs @@ -68,10 +68,10 @@ impl AsyncSystemTable for IndexesTable { let mut updated_on = Vec::with_capacity(indexes.len()); for (_, name, index) in indexes { - names.push(name.as_bytes().to_vec()); - types.push(index.index_type.to_string().as_bytes().to_vec()); - originals.push(index.original_query.as_bytes().to_vec()); - defs.push(index.query.as_bytes().to_vec()); + names.push(name.clone()); + types.push(index.index_type.to_string()); + originals.push(index.original_query.clone()); + defs.push(index.query.clone()); created_on.push(index.created_on.timestamp_micros()); updated_on.push(index.updated_on.map(|u| u.timestamp_micros())); } diff --git a/src/query/storages/system/src/lib.rs b/src/query/storages/system/src/lib.rs index f7edbdbbdf07..60f242b232a7 100644 --- a/src/query/storages/system/src/lib.rs +++ b/src/query/storages/system/src/lib.rs @@ -15,6 +15,7 @@ #![allow(clippy::uninlined_format_args)] #![feature(type_alias_impl_trait)] #![feature(impl_trait_in_assoc_type)] +#![feature(variant_count)] extern crate core; @@ -45,8 +46,6 @@ mod processes_table; mod processor_profile_table; mod query_cache_table; mod query_log_table; -mod query_profile_table; -mod query_summary_table; mod roles_table; mod settings_table; mod stages_table; @@ -96,8 +95,6 @@ pub use query_log_table::LogType; pub use query_log_table::QueryLogElement; pub use query_log_table::QueryLogQueue; pub use query_log_table::QueryLogTable; -pub use query_profile_table::QueryProfileTable; -pub use query_summary_table::QuerySummaryTable; pub use roles_table::RolesTable; pub use settings_table::SettingsTable; pub use stages_table::StagesTable; diff --git a/src/query/storages/system/src/locks_table.rs b/src/query/storages/system/src/locks_table.rs index cd9f14e30b2c..577a95d028b1 100644 --- a/src/query/storages/system/src/locks_table.rs +++ b/src/query/storages/system/src/locks_table.rs @@ -101,21 +101,21 @@ impl AsyncSystemTable for LocksTable { for info in lock_infos { lock_table_id.push(info.key.get_table_id()); lock_revision.push(info.revision); - lock_type.push(info.meta.lock_type.to_string().as_bytes().to_vec()); + lock_type.push(info.meta.lock_type.to_string().clone()); if info.meta.acquired_on.is_some() { - lock_status.push("HOLDING".as_bytes().to_vec()); + lock_status.push("HOLDING"); } else { - lock_status.push("WAITING".as_bytes().to_vec()); + lock_status.push("WAITING"); } - lock_user.push(info.meta.user.as_bytes().to_vec()); - lock_node.push(info.meta.node.as_bytes().to_vec()); - lock_query_id.push(info.meta.query_id.as_bytes().to_vec()); + lock_user.push(info.meta.user.clone()); + lock_node.push(info.meta.node.clone()); + lock_query_id.push(info.meta.query_id.clone()); lock_created_on.push(info.meta.created_on.timestamp_micros()); lock_acquired_on.push(info.meta.acquired_on.map(|v| v.timestamp_micros())); if info.meta.extra_info.is_empty() { - lock_extra_info.push(vec![]); + lock_extra_info.push("".to_string()); } else { - lock_extra_info.push(format!("{:?}", info.meta.extra_info).as_bytes().to_vec()); + lock_extra_info.push(format!("{:?}", info.meta.extra_info)); } } } diff --git a/src/query/storages/system/src/malloc_stats_totals_table.rs b/src/query/storages/system/src/malloc_stats_totals_table.rs index a0992029e2ab..9fc9b2b8c13f 100644 --- a/src/query/storages/system/src/malloc_stats_totals_table.rs +++ b/src/query/storages/system/src/malloc_stats_totals_table.rs @@ -41,7 +41,7 @@ macro_rules! set_value { ($stat:ident, $names:expr, $values:expr) => { let mib = $stat::mib()?; let value = mib.read()?; - $names.put_slice($stat::name().as_bytes()); + $names.put_str(&String::from_utf8_lossy($stat::name().as_bytes())); $names.commit_row(); $values.push(value as u64); }; diff --git a/src/query/storages/system/src/metrics_table.rs b/src/query/storages/system/src/metrics_table.rs index 2ade31eeea47..a9a6901804d5 100644 --- a/src/query/storages/system/src/metrics_table.rs +++ b/src/query/storages/system/src/metrics_table.rs @@ -59,17 +59,17 @@ impl SyncSystemTable for MetricsTable { }; samples.extend(self.custom_metric_samples()?); - let mut nodes: Vec> = Vec::with_capacity(samples.len()); - let mut metrics: Vec> = Vec::with_capacity(samples.len()); - let mut labels: Vec> = Vec::with_capacity(samples.len()); - let mut kinds: Vec> = Vec::with_capacity(samples.len()); - let mut values: Vec> = Vec::with_capacity(samples.len()); + let mut nodes: Vec = Vec::with_capacity(samples.len()); + let mut metrics: Vec = Vec::with_capacity(samples.len()); + let mut labels: Vec = Vec::with_capacity(samples.len()); + let mut kinds: Vec = Vec::with_capacity(samples.len()); + let mut values: Vec = Vec::with_capacity(samples.len()); for sample in samples.into_iter() { - nodes.push(local_id.clone().into_bytes()); - metrics.push(sample.name.clone().into_bytes()); - kinds.push(sample.value.kind().into_bytes()); - labels.push(self.display_sample_labels(&sample.labels)?.into_bytes()); - values.push(self.display_sample_value(&sample.value)?.into_bytes()); + nodes.push(local_id.clone()); + metrics.push(sample.name.clone()); + kinds.push(sample.value.kind()); + labels.push(self.display_sample_labels(&sample.labels)?); + values.push(self.display_sample_value(&sample.value)?); } Ok(DataBlock::new_from_columns(vec![ diff --git a/src/query/storages/system/src/password_policies_table.rs b/src/query/storages/system/src/password_policies_table.rs index 7b92e2a2fd50..2f2455ad7cc0 100644 --- a/src/query/storages/system/src/password_policies_table.rs +++ b/src/query/storages/system/src/password_policies_table.rs @@ -62,8 +62,8 @@ impl AsyncSystemTable for PasswordPoliciesTable { let mut created_on_columns = Vec::with_capacity(password_policies.len()); let mut updated_on_columns = Vec::with_capacity(password_policies.len()); for password_policy in password_policies { - names.push(password_policy.name.as_bytes().to_vec()); - comments.push(password_policy.comment.as_bytes().to_vec()); + names.push(password_policy.name.clone()); + comments.push(password_policy.comment.clone()); let values = vec![ format!("MIN_LENGTH={}", password_policy.min_length), @@ -85,7 +85,7 @@ impl AsyncSystemTable for PasswordPoliciesTable { format!("HISTORY={}", password_policy.history), ]; let option = values.join(", "); - options.push(option.as_bytes().to_vec()); + options.push(option); created_on_columns.push(password_policy.create_on.timestamp_micros()); updated_on_columns.push(password_policy.update_on.map(|u| u.timestamp_micros())); diff --git a/src/query/storages/system/src/processes_table.rs b/src/query/storages/system/src/processes_table.rs index b608ed91cf59..25e5bcb97bf4 100644 --- a/src/query/storages/system/src/processes_table.rs +++ b/src/query/storages/system/src/processes_table.rs @@ -55,7 +55,7 @@ impl SyncSystemTable for ProcessesTable { let local_node = ctx.get_cluster().local_id.clone(); - let mut nodes: Vec> = Vec::with_capacity(processes_info.len()); + let mut nodes = Vec::with_capacity(processes_info.len()); let mut processes_id = Vec::with_capacity(processes_info.len()); let mut processes_type = Vec::with_capacity(processes_info.len()); let mut processes_host = Vec::with_capacity(processes_info.len()); @@ -81,21 +81,17 @@ impl SyncSystemTable for ProcessesTable { .unwrap_or(Duration::from_secs(0)) .as_secs(); - nodes.push(local_node.clone().into_bytes()); - processes_id.push(process_info.id.clone().into_bytes()); - processes_type.push(process_info.typ.clone().into_bytes()); - processes_state.push(process_info.state.to_string().into_bytes()); - processes_database.push(process_info.database.clone().into_bytes()); + nodes.push(local_node.clone()); + processes_id.push(process_info.id.clone()); + processes_type.push(process_info.typ.clone()); + processes_state.push(process_info.state.to_string()); + processes_database.push(process_info.database.clone()); processes_host.push(ProcessesTable::process_host(&process_info.client_address)); - processes_user.push( - ProcessesTable::process_option_value(process_info.user.clone()) - .name - .into_bytes(), - ); - processes_extra_info.push( - ProcessesTable::process_option_value(process_info.session_extra_info.clone()) - .into_bytes(), - ); + processes_user + .push(ProcessesTable::process_option_value(process_info.user.clone()).name); + processes_extra_info.push(ProcessesTable::process_option_value( + process_info.session_extra_info.clone(), + )); processes_memory_usage.push(process_info.memory_usage); processes_scan_progress_read_rows.push(scan_progress.rows as u64); processes_scan_progress_read_bytes.push(scan_progress.bytes as u64); @@ -111,13 +107,7 @@ impl SyncSystemTable for ProcessesTable { } // Status info. - processes_status.push( - process_info - .status_info - .clone() - .unwrap_or("".to_owned()) - .into_bytes(), - ); + processes_status.push(process_info.status_info.clone().unwrap_or("".to_owned())); } Ok(DataBlock::new_from_columns(vec![ @@ -196,8 +186,8 @@ impl ProcessesTable { SyncOneBlockSystemTable::create(ProcessesTable { table_info }) } - fn process_host(client_address: &Option) -> Option> { - client_address.as_ref().map(|s| s.to_string().into_bytes()) + fn process_host(client_address: &Option) -> Option { + client_address.as_ref().map(|s| s.to_string()) } fn process_option_value(opt: Option) -> T diff --git a/src/query/storages/system/src/processor_profile_table.rs b/src/query/storages/system/src/processor_profile_table.rs index b551bcae7a88..7c2a2d24cfdf 100644 --- a/src/query/storages/system/src/processor_profile_table.rs +++ b/src/query/storages/system/src/processor_profile_table.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; use std::sync::atomic::Ordering; use std::sync::Arc; @@ -22,6 +23,7 @@ use databend_common_expression::types::NumberDataType; use databend_common_expression::types::StringType; use databend_common_expression::types::UInt32Type; use databend_common_expression::types::UInt64Type; +use databend_common_expression::types::VariantType; use databend_common_expression::DataBlock; use databend_common_expression::FromData; use databend_common_expression::TableDataType; @@ -30,6 +32,7 @@ use databend_common_expression::TableSchemaRefExt; use databend_common_meta_app::schema::TableIdent; use databend_common_meta_app::schema::TableInfo; use databend_common_meta_app::schema::TableMeta; +use databend_common_pipeline_core::processors::ProfileStatisticsName; use crate::SyncOneBlockSystemTable; use crate::SyncSystemTable; @@ -53,32 +56,34 @@ impl SyncSystemTable for ProcessorProfileTable { let local_id = ctx.get_cluster().local_id.clone(); let total_size = queries_profiles.values().map(Vec::len).sum(); - let mut node: Vec> = Vec::with_capacity(total_size); - let mut queries_id: Vec> = Vec::with_capacity(total_size); + let mut node: Vec = Vec::with_capacity(total_size); + let mut queries_id: Vec = Vec::with_capacity(total_size); let mut pid: Vec = Vec::with_capacity(total_size); - let mut p_name: Vec> = Vec::with_capacity(total_size); + let mut p_name: Vec = Vec::with_capacity(total_size); let mut plan_id: Vec> = Vec::with_capacity(total_size); let mut parent_id: Vec> = Vec::with_capacity(total_size); - let mut plan_name: Vec>> = Vec::with_capacity(total_size); - let mut cpu_time: Vec = Vec::with_capacity(total_size); - let mut wait_time: Vec = Vec::with_capacity(total_size); - let mut exchange_rows: Vec = Vec::with_capacity(total_size); - let mut exchange_bytes: Vec = Vec::with_capacity(total_size); + let mut plan_name: Vec> = Vec::with_capacity(total_size); + let mut statistics = Vec::with_capacity(total_size); for (query_id, query_profiles) in queries_profiles { for query_profile in query_profiles { - node.push(local_id.clone().into_bytes()); - queries_id.push(query_id.clone().into_bytes()); + node.push(local_id.clone()); + queries_id.push(query_id.clone()); pid.push(query_profile.pid as u64); - p_name.push(query_profile.p_name.clone().into_bytes()); + p_name.push(query_profile.p_name.clone()); plan_id.push(query_profile.plan_id); parent_id.push(query_profile.plan_parent_id); - plan_name.push(query_profile.plan_name.clone().map(String::into_bytes)); + plan_name.push(query_profile.plan_name.clone()); - cpu_time.push(query_profile.cpu_time.load(Ordering::Relaxed)); - wait_time.push(query_profile.wait_time.load(Ordering::Relaxed)); - exchange_rows.push(query_profile.exchange_rows.load(Ordering::Relaxed) as u64); - exchange_bytes.push(query_profile.exchange_bytes.load(Ordering::Relaxed) as u64); + let mut statistics_map = HashMap::with_capacity(query_profile.statistics.len()); + for (idx, item_value) in query_profile.statistics.iter().enumerate() { + statistics_map.insert( + ProfileStatisticsName::from(idx).to_string(), + item_value.load(Ordering::SeqCst), + ); + } + + statistics.push(serde_json::to_vec(&statistics_map).unwrap()); } } @@ -90,10 +95,7 @@ impl SyncSystemTable for ProcessorProfileTable { UInt32Type::from_opt_data(plan_id), UInt32Type::from_opt_data(parent_id), StringType::from_opt_data(plan_name), - UInt64Type::from_data(cpu_time), - UInt64Type::from_data(wait_time), - UInt64Type::from_data(exchange_rows), - UInt64Type::from_data(exchange_bytes), + VariantType::from_data(statistics), ])) } } @@ -117,16 +119,7 @@ impl ProcessorProfileTable { "plan_name", TableDataType::Nullable(Box::new(TableDataType::String)), ), - TableField::new("cpu_time", TableDataType::Number(NumberDataType::UInt64)), - TableField::new("wait_time", TableDataType::Number(NumberDataType::UInt64)), - TableField::new( - "exchange_rows", - TableDataType::Number(NumberDataType::UInt64), - ), - TableField::new( - "exchange_bytes", - TableDataType::Number(NumberDataType::UInt64), - ), + TableField::new("statistics", TableDataType::Variant), ]); let table_info = TableInfo { diff --git a/src/query/storages/system/src/query_log_table.rs b/src/query/storages/system/src/query_log_table.rs index f4d18a50ee6d..b2ca6604b136 100644 --- a/src/query/storages/system/src/query_log_table.rs +++ b/src/query/storages/system/src/query_log_table.rs @@ -295,45 +295,45 @@ impl SystemLogElement for QueryLogElement { columns .next() .unwrap() - .push(Scalar::String(self.handler_type.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.handler_type.clone()).as_ref()); // User. columns .next() .unwrap() - .push(Scalar::String(self.tenant_id.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.tenant_id.clone()).as_ref()); columns .next() .unwrap() - .push(Scalar::String(self.cluster_id.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.cluster_id.clone()).as_ref()); columns .next() .unwrap() - .push(Scalar::String(self.node_id.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.node_id.clone()).as_ref()); columns .next() .unwrap() - .push(Scalar::String(self.sql_user.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.sql_user.clone()).as_ref()); columns .next() .unwrap() - .push(Scalar::String(self.sql_user_quota.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.sql_user_quota.clone()).as_ref()); columns .next() .unwrap() - .push(Scalar::String(self.sql_user_privileges.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.sql_user_privileges.clone()).as_ref()); // Query. columns .next() .unwrap() - .push(Scalar::String(self.query_id.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.query_id.clone()).as_ref()); columns .next() .unwrap() - .push(Scalar::String(self.query_kind.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.query_kind.clone()).as_ref()); columns .next() .unwrap() - .push(Scalar::String(self.query_text.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.query_text.clone()).as_ref()); columns .next() .unwrap() @@ -354,23 +354,23 @@ impl SystemLogElement for QueryLogElement { columns .next() .unwrap() - .push(Scalar::String(self.current_database.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.current_database.clone()).as_ref()); columns .next() .unwrap() - .push(Scalar::String(self.databases.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.databases.clone()).as_ref()); columns .next() .unwrap() - .push(Scalar::String(self.tables.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.tables.clone()).as_ref()); columns .next() .unwrap() - .push(Scalar::String(self.columns.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.columns.clone()).as_ref()); columns .next() .unwrap() - .push(Scalar::String(self.projections.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.projections.clone()).as_ref()); // Stats. columns .next() @@ -472,15 +472,15 @@ impl SystemLogElement for QueryLogElement { columns .next() .unwrap() - .push(Scalar::String(self.client_info.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.client_info.clone()).as_ref()); columns .next() .unwrap() - .push(Scalar::String(self.client_address.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.client_address.clone()).as_ref()); columns .next() .unwrap() - .push(Scalar::String(self.user_agent.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.user_agent.clone()).as_ref()); // Exception. columns .next() @@ -489,26 +489,26 @@ impl SystemLogElement for QueryLogElement { columns .next() .unwrap() - .push(Scalar::String(self.exception_text.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.exception_text.clone()).as_ref()); columns .next() .unwrap() - .push(Scalar::String(self.stack_trace.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.stack_trace.clone()).as_ref()); // Server. columns .next() .unwrap() - .push(Scalar::String(self.server_version.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.server_version.clone()).as_ref()); // Session settings columns .next() .unwrap() - .push(Scalar::String(self.session_settings.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.session_settings.clone()).as_ref()); // Extra. columns .next() .unwrap() - .push(Scalar::String(self.extra.as_bytes().to_vec()).as_ref()); + .push(Scalar::String(self.extra.clone()).as_ref()); columns .next() .unwrap() diff --git a/src/query/storages/system/src/query_profile_table.rs b/src/query/storages/system/src/query_profile_table.rs deleted file mode 100644 index 59904014c339..000000000000 --- a/src/query/storages/system/src/query_profile_table.rs +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; - -use databend_common_catalog::table::Table; -use databend_common_catalog::table_context::TableContext; -use databend_common_expression::types::NumberDataType; -use databend_common_expression::types::StringType; -use databend_common_expression::types::UInt32Type; -use databend_common_expression::types::VariantType; -use databend_common_expression::DataBlock; -use databend_common_expression::FromData; -use databend_common_expression::TableDataType; -use databend_common_expression::TableField; -use databend_common_expression::TableSchemaRefExt; -use databend_common_meta_app::schema::TableIdent; -use databend_common_meta_app::schema::TableInfo; -use databend_common_meta_app::schema::TableMeta; -use databend_common_profile::OperatorExecutionInfo; -use databend_common_profile::QueryProfileManager; - -use crate::SyncOneBlockSystemTable; -use crate::SyncSystemTable; - -fn encode_operator_execution_info(info: &OperatorExecutionInfo) -> jsonb::Value { - // Process time represent with number of milliseconds. - let process_time = info.process_time.as_nanos() as f64 / 1e6; - (&serde_json::json!({ - "process_time": process_time, - "input_rows": info.input_rows, - "input_bytes": info.input_bytes, - "output_rows": info.output_rows, - "output_bytes": info.output_bytes, - })) - .into() -} - -pub struct QueryProfileTable { - table_info: TableInfo, -} - -impl QueryProfileTable { - pub fn create(table_id: u64) -> Arc { - let schema = TableSchemaRefExt::create(vec![ - TableField::new("query_id", TableDataType::String), - TableField::new("operator_id", TableDataType::Number(NumberDataType::UInt32)), - TableField::new("execution_info", TableDataType::Variant), - ]); - - let table_info = TableInfo { - desc: "'system'.'query_profile'".to_string(), - ident: TableIdent::new(table_id, 0), - name: "query_profile".to_string(), - meta: TableMeta { - schema, - engine: "QueryProfileTable".to_string(), - ..Default::default() - }, - ..Default::default() - }; - - SyncOneBlockSystemTable::create(Self { table_info }) - } -} - -impl SyncSystemTable for QueryProfileTable { - const NAME: &'static str = "system.query_profile"; - - fn get_table_info(&self) -> &TableInfo { - &self.table_info - } - - fn get_full_data( - &self, - _ctx: Arc, - ) -> databend_common_exception::Result { - let profile_mgr = QueryProfileManager::instance(); - let query_profs = profile_mgr.list_all(); - - let mut query_ids: Vec> = Vec::with_capacity(query_profs.len()); - let mut operator_ids: Vec = Vec::with_capacity(query_profs.len()); - let mut execution_infos: Vec> = Vec::with_capacity(query_profs.len()); - - for prof in query_profs.iter() { - for plan_prof in prof.operator_profiles.iter() { - query_ids.push(prof.query_id.clone().into_bytes()); - operator_ids.push(plan_prof.id); - - let execution_info = encode_operator_execution_info(&plan_prof.execution_info); - execution_infos.push(execution_info.to_vec()); - } - } - - let block = DataBlock::new_from_columns(vec![ - // query_id - StringType::from_data(query_ids), - // operator_id - UInt32Type::from_data(operator_ids), - // execution_info - VariantType::from_data(execution_infos), - ]); - - Ok(block) - } -} diff --git a/src/query/storages/system/src/query_summary_table.rs b/src/query/storages/system/src/query_summary_table.rs deleted file mode 100644 index 734b44df8ec0..000000000000 --- a/src/query/storages/system/src/query_summary_table.rs +++ /dev/null @@ -1,180 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; - -use databend_common_catalog::table::Table; -use databend_common_catalog::table_context::TableContext; -use databend_common_expression::types::ArgType; -use databend_common_expression::types::ArrayType; -use databend_common_expression::types::NumberDataType; -use databend_common_expression::types::StringType; -use databend_common_expression::types::UInt32Type; -use databend_common_expression::types::ValueType; -use databend_common_expression::types::VariantType; -use databend_common_expression::DataBlock; -use databend_common_expression::FromData; -use databend_common_expression::TableDataType; -use databend_common_expression::TableField; -use databend_common_expression::TableSchemaRefExt; -use databend_common_meta_app::schema::TableIdent; -use databend_common_meta_app::schema::TableInfo; -use databend_common_meta_app::schema::TableMeta; -use databend_common_profile::OperatorAttribute; -use databend_common_profile::QueryProfileManager; - -use crate::SyncOneBlockSystemTable; -use crate::SyncSystemTable; - -// Encode an `OperatorAttribute` into jsonb::Value. -fn encode_operator_attribute(attr: &OperatorAttribute) -> jsonb::Value { - match attr { - OperatorAttribute::Join(join_attr) => (&serde_json::json! ({ - "join_type": join_attr.join_type, - "equi_conditions": join_attr.equi_conditions, - "non_equi_conditions": join_attr.non_equi_conditions, - })) - .into(), - OperatorAttribute::Aggregate(agg_attr) => (&serde_json::json!({ - "group_keys": agg_attr.group_keys, - "functions": agg_attr.functions, - })) - .into(), - OperatorAttribute::AggregateExpand(expand_attr) => (&serde_json::json!({ - "group_keys": expand_attr.group_keys, - "aggr_exprs": expand_attr.aggr_exprs, - })) - .into(), - OperatorAttribute::Filter(filter_attr) => { - (&serde_json::json!({ "predicate": filter_attr.predicate })).into() - } - OperatorAttribute::EvalScalar(scalar_attr) => { - (&serde_json::json!({ "scalars": scalar_attr.scalars })).into() - } - OperatorAttribute::ProjectSet(project_attr) => { - (&serde_json::json!({ "functions": project_attr.functions })).into() - } - OperatorAttribute::Limit(limit_attr) => (&serde_json::json!({ - "limit": limit_attr.limit, - "offset": limit_attr.offset, - })) - .into(), - OperatorAttribute::TableScan(scan_attr) => { - (&serde_json::json!({ "qualified_name": scan_attr.qualified_name })).into() - } - OperatorAttribute::CteScan(cte_scan_attr) => { - (&serde_json::json!({ "cte_idx": cte_scan_attr.cte_idx })).into() - } - OperatorAttribute::Sort(sort_attr) => { - (&serde_json::json!({ "sort_keys": sort_attr.sort_keys })).into() - } - OperatorAttribute::Window(window_attr) => { - (&serde_json::json!({ "functions": window_attr.functions })).into() - } - OperatorAttribute::Exchange(exchange_attr) => { - (&serde_json::json!({ "exchange_mode": exchange_attr.exchange_mode })).into() - } - OperatorAttribute::Udf(udf_attr) => { - (&serde_json::json!({ "scalars": udf_attr.scalars })).into() - } - OperatorAttribute::Empty => jsonb::Value::Null, - } -} - -pub struct QuerySummaryTable { - table_info: TableInfo, -} - -impl QuerySummaryTable { - pub fn create(table_id: u64) -> Arc { - let schema = TableSchemaRefExt::create(vec![ - TableField::new("query_id", TableDataType::String), - TableField::new("operator_id", TableDataType::Number(NumberDataType::UInt32)), - TableField::new("operator_type", TableDataType::String), - TableField::new( - "operator_children", - TableDataType::Array(Box::new(TableDataType::Number(NumberDataType::UInt32))), - ), - TableField::new("operator_attribute", TableDataType::Variant), - ]); - - let table_info = TableInfo { - desc: "'system'.'query_summary'".to_string(), - ident: TableIdent::new(table_id, 0), - name: "query_summary".to_string(), - meta: TableMeta { - schema, - engine: "QuerySummaryTable".to_string(), - ..Default::default() - }, - ..Default::default() - }; - - SyncOneBlockSystemTable::create(Self { table_info }) - } -} - -impl SyncSystemTable for QuerySummaryTable { - const NAME: &'static str = "system.query_summary"; - - fn get_table_info(&self) -> &TableInfo { - &self.table_info - } - - fn get_full_data( - &self, - _ctx: Arc, - ) -> databend_common_exception::Result { - let profile_mgr = QueryProfileManager::instance(); - let query_profs = profile_mgr.list_all(); - - let mut query_ids: Vec> = Vec::with_capacity(query_profs.len()); - let mut operator_ids: Vec = Vec::with_capacity(query_profs.len()); - let mut operator_types: Vec> = Vec::with_capacity(query_profs.len()); - let mut operator_childrens: Vec> = Vec::with_capacity(query_profs.len()); - let mut operator_attributes: Vec> = Vec::with_capacity(query_profs.len()); - - for prof in query_profs.iter() { - for plan_prof in prof.operator_profiles.iter() { - query_ids.push(prof.query_id.clone().into_bytes()); - operator_ids.push(plan_prof.id); - operator_types.push(plan_prof.operator_type.to_string().into_bytes()); - operator_childrens.push(plan_prof.children.clone()); - - let attribute_value = encode_operator_attribute(&plan_prof.attribute); - operator_attributes.push(attribute_value.to_vec()); - } - } - - let block = DataBlock::new_from_columns(vec![ - // query_id - StringType::from_data(query_ids), - // operator_id - UInt32Type::from_data(operator_ids), - // operator_type - StringType::from_data(operator_types), - // operator_children - ArrayType::upcast_column(ArrayType::::column_from_iter( - operator_childrens - .into_iter() - .map(|children| UInt32Type::column_from_iter(children.into_iter(), &[])), - &[], - )), - // operator_attribute - VariantType::from_data(operator_attributes), - ]); - - Ok(block) - } -} diff --git a/src/query/storages/system/src/settings_table.rs b/src/query/storages/system/src/settings_table.rs index ed5c3d145856..44672258d282 100644 --- a/src/query/storages/system/src/settings_table.rs +++ b/src/query/storages/system/src/settings_table.rs @@ -83,14 +83,6 @@ impl SyncSystemTable for SettingsTable { types.push(typename.to_string()); } - let names: Vec> = names.iter().map(|x| x.as_bytes().to_vec()).collect(); - let values: Vec> = values.iter().map(|x| x.as_bytes().to_vec()).collect(); - let defaults: Vec> = defaults.iter().map(|x| x.as_bytes().to_vec()).collect(); - let ranges: Vec> = ranges.iter().map(|x| x.as_bytes().to_vec()).collect(); - let levels: Vec> = levels.iter().map(|x| x.as_bytes().to_vec()).collect(); - let descs: Vec> = descs.iter().map(|x| x.as_bytes().to_vec()).collect(); - let types: Vec> = types.iter().map(|x| x.as_bytes().to_vec()).collect(); - Ok(DataBlock::new_from_columns(vec![ StringType::from_data(names), StringType::from_data(values), diff --git a/src/query/storages/system/src/stages_table.rs b/src/query/storages/system/src/stages_table.rs index 77d5eb893a53..51264f0b555c 100644 --- a/src/query/storages/system/src/stages_table.rs +++ b/src/query/storages/system/src/stages_table.rs @@ -74,30 +74,30 @@ impl AsyncSystemTable for StagesTable { }; let user_api = UserApiProvider::instance(); - let mut owners: Vec>> = vec![]; - let mut name: Vec> = Vec::with_capacity(stages.len()); - let mut stage_type: Vec> = Vec::with_capacity(stages.len()); - let mut stage_params: Vec> = Vec::with_capacity(stages.len()); - let mut copy_options: Vec> = Vec::with_capacity(stages.len()); - let mut file_format_options: Vec> = Vec::with_capacity(stages.len()); - let mut comment: Vec> = Vec::with_capacity(stages.len()); + let mut owners: Vec> = vec![]; + let mut name: Vec = Vec::with_capacity(stages.len()); + let mut stage_type: Vec = Vec::with_capacity(stages.len()); + let mut stage_params: Vec = Vec::with_capacity(stages.len()); + let mut copy_options: Vec = Vec::with_capacity(stages.len()); + let mut file_format_options: Vec = Vec::with_capacity(stages.len()); + let mut comment: Vec = Vec::with_capacity(stages.len()); let mut number_of_files: Vec> = Vec::with_capacity(stages.len()); - let mut creator: Vec>> = Vec::with_capacity(stages.len()); + let mut creator: Vec> = Vec::with_capacity(stages.len()); let mut created_on = Vec::with_capacity(stages.len()); for stage in stages.into_iter() { let stage_name = stage.stage_name; - name.push(stage_name.clone().into_bytes()); + name.push(stage_name.clone()); owners.push( user_api .get_ownership(&tenant, &OwnershipObject::Stage { name: stage_name }) .await .ok() - .and_then(|ownership| ownership.map(|o| o.role.as_bytes().to_vec())), + .and_then(|ownership| ownership.map(|o| o.role.clone())), ); - stage_type.push(stage.stage_type.clone().to_string().into_bytes()); - stage_params.push(format!("{:?}", stage.stage_params).into_bytes()); - copy_options.push(format!("{:?}", stage.copy_options).into_bytes()); - file_format_options.push(format!("{:?}", stage.file_format_params).into_bytes()); + stage_type.push(stage.stage_type.clone().to_string()); + stage_params.push(format!("{:?}", stage.stage_params)); + copy_options.push(format!("{:?}", stage.copy_options)); + file_format_options.push(format!("{:?}", stage.file_format_params)); // TODO(xuanwo): we will remove this line. match stage.stage_type { StageType::LegacyInternal | StageType::Internal | StageType::User => { @@ -107,9 +107,9 @@ impl AsyncSystemTable for StagesTable { number_of_files.push(None); } }; - creator.push(stage.creator.map(|c| c.to_string().into_bytes().to_vec())); + creator.push(stage.creator.map(|c| c.to_string())); created_on.push(stage.created_on.timestamp_micros()); - comment.push(stage.comment.clone().into_bytes()); + comment.push(stage.comment.clone()); } Ok(DataBlock::new_from_columns(vec![ diff --git a/src/query/storages/system/src/streams_table.rs b/src/query/storages/system/src/streams_table.rs index 3cc7312f17d1..753176d58fee 100644 --- a/src/query/storages/system/src/streams_table.rs +++ b/src/query/storages/system/src/streams_table.rs @@ -83,7 +83,7 @@ impl AsyncSystemTable for StreamsTable { let mut table_name = vec![]; let mut invalid_reason = vec![]; let mut mode = vec![]; - let mut names = vec![]; + let mut names: Vec = vec![]; let mut stream_id = vec![]; let mut created_on = vec![]; let mut updated_on = vec![]; @@ -95,16 +95,14 @@ impl AsyncSystemTable for StreamsTable { for (ctl_name, ctl) in ctls.into_iter() { let mut dbs = Vec::new(); if let Some(push_downs) = &push_downs { - let mut db_name = Vec::new(); + let mut db_name: Vec = Vec::new(); if let Some(filter) = push_downs.filters.as_ref().map(|f| &f.filter) { let expr = filter.as_expr(&BUILTIN_FUNCTIONS); find_eq_filter(&expr, &mut |col_name, scalar| { if col_name == "database" { - if let Scalar::String(s) = scalar { - if let Ok(database) = String::from_utf8(s.clone()) { - if !db_name.contains(&database) { - db_name.push(database); - } + if let Scalar::String(database) = scalar { + if !db_name.contains(database) { + db_name.push(database.clone()); } } } @@ -163,11 +161,11 @@ impl AsyncSystemTable for StreamsTable { t_id, ) && table.engine() == "STREAM" { - catalogs.push(ctl_name.as_bytes().to_vec()); - databases.push(name.as_bytes().to_vec()); + catalogs.push(ctl_name); + databases.push(name); let stream_info = table.get_table_info(); - names.push(table.name().as_bytes().to_vec()); + names.push(table.name().to_string()); stream_id.push(stream_info.ident.table_id); created_on.push(stream_info.meta.created_on.timestamp_micros()); updated_on.push(stream_info.meta.updated_on.timestamp_micros()); @@ -180,27 +178,20 @@ impl AsyncSystemTable for StreamsTable { }) .await .ok() - .and_then(|ownership| { - ownership.map(|o| o.role.as_bytes().to_vec()) - }), + .and_then(|ownership| ownership.map(|o| o.role.clone())), ); - comment.push(stream_info.meta.comment.as_bytes().to_vec()); + comment.push(stream_info.meta.comment.clone()); let stream_table = StreamTable::try_from_table(table.as_ref())?; - table_name.push( - format!( - "{}.{}", - stream_table.source_table_database(), - stream_table.source_table_name() - ) - .as_bytes() - .to_vec(), - ); - mode.push(stream_table.mode().to_string().as_bytes().to_vec()); + table_name.push(format!( + "{}.{}", + stream_table.source_table_database(), + stream_table.source_table_name() + )); + mode.push(stream_table.mode().to_string()); table_version.push(stream_table.offset()); table_id.push(stream_table.source_table_id()); - snapshot_location - .push(stream_table.snapshot_loc().map(|v| v.as_bytes().to_vec())); + snapshot_location.push(stream_table.snapshot_loc()); let mut reason = "".to_string(); match stream_table.source_table(ctx.clone()).await { @@ -220,7 +211,7 @@ impl AsyncSystemTable for StreamsTable { reason = e.display_text(); } } - invalid_reason.push(reason.as_bytes().to_vec()); + invalid_reason.push(reason); } } } diff --git a/src/query/storages/system/src/tables_table.rs b/src/query/storages/system/src/tables_table.rs index e2d49572d92f..f83b39c792cc 100644 --- a/src/query/storages/system/src/tables_table.rs +++ b/src/query/storages/system/src/tables_table.rs @@ -183,22 +183,20 @@ where TablesTable: HistoryAware let mut databases = vec![]; let mut database_tables = vec![]; - let mut owner: Vec>> = Vec::new(); + let mut owner: Vec> = Vec::new(); let user_api = UserApiProvider::instance(); for (ctl_name, ctl) in ctls.into_iter() { let mut dbs = Vec::new(); if let Some(push_downs) = &push_downs { - let mut db_name = Vec::new(); + let mut db_name: Vec = Vec::new(); if let Some(filter) = push_downs.filters.as_ref().map(|f| &f.filter) { let expr = filter.as_expr(&BUILTIN_FUNCTIONS); find_eq_filter(&expr, &mut |col_name, scalar| { if col_name == "database" { - if let Scalar::String(s) = scalar { - if let Ok(database) = String::from_utf8(s.clone()) { - if !db_name.contains(&database) { - db_name.push(database); - } + if let Scalar::String(database) = scalar { + if !db_name.contains(database) { + db_name.push(database.clone()); } } } @@ -276,8 +274,8 @@ where TablesTable: HistoryAware table_id, ) && table.engine() != "STREAM" { - catalogs.push(ctl_name.as_bytes().to_vec()); - databases.push(name.as_bytes().to_vec()); + catalogs.push(ctl_name); + databases.push(name); database_tables.push(table); owner.push( user_api @@ -288,9 +286,7 @@ where TablesTable: HistoryAware }) .await .ok() - .and_then(|ownership| { - ownership.map(|o| o.role.as_bytes().to_vec()) - }), + .and_then(|ownership| ownership.map(|o| o.role.to_string())), ); } } @@ -298,7 +294,6 @@ where TablesTable: HistoryAware } let mut number_of_blocks: Vec> = Vec::new(); - let mut number_of_segments: Vec> = Vec::new(); let mut num_rows: Vec> = Vec::new(); let mut data_size: Vec> = Vec::new(); @@ -328,19 +323,19 @@ where TablesTable: HistoryAware index_size.push(stats.as_ref().and_then(|v| v.index_size)); } - let names: Vec> = database_tables + let names: Vec = database_tables .iter() - .map(|v| v.name().as_bytes().to_vec()) + .map(|v| v.name().to_string()) .collect(); let table_id: Vec = database_tables .iter() .map(|v| v.get_table_info().ident.table_id) .collect(); - let engines: Vec> = database_tables + let engines: Vec = database_tables .iter() - .map(|v| v.engine().as_bytes().to_vec()) + .map(|v| v.engine().to_string()) .collect(); - let engines_full: Vec> = engines.clone(); + let engines_full: Vec = engines.clone(); let created_on: Vec = database_tables .iter() .map(|v| v.get_table_info().meta.created_on.timestamp_micros()) @@ -369,14 +364,13 @@ where TablesTable: HistoryAware .unwrap_or_else(|| "".to_owned()) }) .collect(); - let cluster_bys: Vec> = cluster_bys.iter().map(|s| s.as_bytes().to_vec()).collect(); - let is_transient: Vec> = database_tables + let is_transient: Vec = database_tables .iter() .map(|v| { if v.options().contains_key("TRANSIENT") { - "TRANSIENT".as_bytes().to_vec() + "TRANSIENT".to_string() } else { - vec![] + "".to_string() } }) .collect(); diff --git a/src/query/storages/system/src/task_history_table.rs b/src/query/storages/system/src/task_history_table.rs index b3dc2df7524b..6757208efba0 100644 --- a/src/query/storages/system/src/task_history_table.rs +++ b/src/query/storages/system/src/task_history_table.rs @@ -31,6 +31,7 @@ use databend_common_expression::types::Int64Type; use databend_common_expression::types::StringType; use databend_common_expression::types::TimestampType; use databend_common_expression::types::UInt64Type; +use databend_common_expression::types::VariantType; use databend_common_expression::DataBlock; use databend_common_expression::FromData; use databend_common_meta_app::schema::TableIdent; @@ -42,46 +43,46 @@ use crate::table::AsyncOneBlockSystemTable; use crate::table::AsyncSystemTable; pub fn parse_task_runs_to_datablock(task_runs: Vec) -> Result { - let mut name: Vec> = Vec::with_capacity(task_runs.len()); + let mut name: Vec = Vec::with_capacity(task_runs.len()); let mut id: Vec = Vec::with_capacity(task_runs.len()); - let mut owner: Vec> = Vec::with_capacity(task_runs.len()); - let mut definition: Vec> = Vec::with_capacity(task_runs.len()); - let mut condition_text: Vec> = Vec::with_capacity(task_runs.len()); - let mut comment: Vec>> = Vec::with_capacity(task_runs.len()); - let mut schedule: Vec>> = Vec::with_capacity(task_runs.len()); - let mut warehouse: Vec>> = Vec::with_capacity(task_runs.len()); - let mut state: Vec> = Vec::with_capacity(task_runs.len()); - let mut exception_text: Vec>> = Vec::with_capacity(task_runs.len()); + let mut owner: Vec = Vec::with_capacity(task_runs.len()); + let mut definition: Vec = Vec::with_capacity(task_runs.len()); + let mut condition_text: Vec = Vec::with_capacity(task_runs.len()); + let mut comment: Vec> = Vec::with_capacity(task_runs.len()); + let mut schedule: Vec> = Vec::with_capacity(task_runs.len()); + let mut warehouse: Vec> = Vec::with_capacity(task_runs.len()); + let mut state: Vec = Vec::with_capacity(task_runs.len()); + let mut exception_text: Vec> = Vec::with_capacity(task_runs.len()); let mut exception_code: Vec = Vec::with_capacity(task_runs.len()); - let mut run_id: Vec> = Vec::with_capacity(task_runs.len()); - let mut query_id: Vec> = Vec::with_capacity(task_runs.len()); + let mut run_id: Vec = Vec::with_capacity(task_runs.len()); + let mut query_id: Vec = Vec::with_capacity(task_runs.len()); let mut attempt_number: Vec = Vec::with_capacity(task_runs.len()); let mut scheduled_time: Vec = Vec::with_capacity(task_runs.len()); let mut completed_time: Vec> = Vec::with_capacity(task_runs.len()); - let mut root_task_id: Vec> = Vec::with_capacity(task_runs.len()); + let mut root_task_id: Vec = Vec::with_capacity(task_runs.len()); + let mut session_params: Vec>> = Vec::with_capacity(task_runs.len()); for task_run in task_runs { let tr: databend_common_cloud_control::task_utils::TaskRun = task_run.try_into()?; - name.push(tr.task_name.into_bytes()); + name.push(tr.task_name); id.push(tr.task_id); - owner.push(tr.owner.into_bytes()); - comment.push(tr.comment.map(|s| s.into_bytes())); - schedule.push(tr.schedule_options.map(|s| s.into_bytes())); - warehouse.push( - tr.warehouse_options - .and_then(|s| s.warehouse.map(|v| v.into_bytes())), - ); - state.push(tr.state.to_string().into_bytes()); + owner.push(tr.owner); + comment.push(tr.comment); + schedule.push(tr.schedule_options); + warehouse.push(tr.warehouse_options.and_then(|s| s.warehouse)); + state.push(tr.state.to_string()); exception_code.push(tr.error_code); - exception_text.push(tr.error_message.map(|s| s.into_bytes())); - definition.push(tr.query_text.into_bytes()); - condition_text.push(tr.condition_text.into_bytes()); - run_id.push(tr.run_id.into_bytes()); - query_id.push(tr.query_id.into_bytes()); + exception_text.push(tr.error_message); + definition.push(tr.query_text); + condition_text.push(tr.condition_text); + run_id.push(tr.run_id); + query_id.push(tr.query_id); attempt_number.push(tr.attempt_number); completed_time.push(tr.completed_at.map(|t| t.timestamp_micros())); scheduled_time.push(tr.scheduled_at.timestamp_micros()); - root_task_id.push(tr.root_task_id.into_bytes()); + root_task_id.push(tr.root_task_id); + let serialized_params = serde_json::to_vec(&tr.session_params).unwrap(); + session_params.push(Some(serialized_params)); } Ok(DataBlock::new_from_columns(vec![ StringType::from_data(name), @@ -101,6 +102,7 @@ pub fn parse_task_runs_to_datablock(task_runs: Vec) -> Result) -> Result { let mut created_on: Vec = Vec::with_capacity(tasks.len()); - let mut name: Vec> = Vec::with_capacity(tasks.len()); + let mut name: Vec = Vec::with_capacity(tasks.len()); let mut id: Vec = Vec::with_capacity(tasks.len()); - let mut owner: Vec> = Vec::with_capacity(tasks.len()); - let mut comment: Vec>> = Vec::with_capacity(tasks.len()); - let mut warehouse: Vec>> = Vec::with_capacity(tasks.len()); - let mut schedule: Vec>> = Vec::with_capacity(tasks.len()); - let mut status: Vec> = Vec::with_capacity(tasks.len()); - let mut definition: Vec> = Vec::with_capacity(tasks.len()); - let mut condition_text: Vec> = Vec::with_capacity(tasks.len()); - let mut after: Vec> = Vec::with_capacity(tasks.len()); + let mut owner: Vec = Vec::with_capacity(tasks.len()); + let mut comment: Vec> = Vec::with_capacity(tasks.len()); + let mut warehouse: Vec> = Vec::with_capacity(tasks.len()); + let mut schedule: Vec> = Vec::with_capacity(tasks.len()); + let mut status: Vec = Vec::with_capacity(tasks.len()); + let mut definition: Vec = Vec::with_capacity(tasks.len()); + let mut condition_text: Vec = Vec::with_capacity(tasks.len()); + let mut after: Vec = Vec::with_capacity(tasks.len()); let mut suspend_after_num_failures: Vec> = Vec::with_capacity(tasks.len()); let mut last_committed_on: Vec = Vec::with_capacity(tasks.len()); let mut next_schedule_time: Vec> = Vec::with_capacity(tasks.len()); let mut last_suspended_on: Vec> = Vec::with_capacity(tasks.len()); - + let mut session_params: Vec>> = Vec::with_capacity(tasks.len()); for task in tasks { let tsk: databend_common_cloud_control::task_utils::Task = task.try_into()?; created_on.push(tsk.created_at.timestamp_micros()); - name.push(tsk.task_name.into_bytes()); + name.push(tsk.task_name); id.push(tsk.task_id); - owner.push(tsk.owner.into_bytes()); - comment.push(tsk.comment.map(|s| s.into_bytes())); - warehouse.push( - tsk.warehouse_options - .and_then(|s| s.warehouse.map(|v| v.into_bytes())), - ); - schedule.push(tsk.schedule_options.map(|s| s.into_bytes())); - status.push(tsk.status.to_string().into_bytes()); - definition.push(tsk.query_text.into_bytes()); - condition_text.push(tsk.condition_text.into_bytes()); + owner.push(tsk.owner); + comment.push(tsk.comment); + warehouse.push(tsk.warehouse_options.and_then(|s| s.warehouse)); + schedule.push(tsk.schedule_options); + status.push(tsk.status.to_string()); + definition.push(tsk.query_text); + condition_text.push(tsk.condition_text); // join by comma - after.push( - tsk.after - .into_iter() - .collect::>() - .join(",") - .into_bytes(), - ); + after.push(tsk.after.into_iter().collect::>().join(",")); suspend_after_num_failures.push(tsk.suspend_task_after_num_failures.map(|v| v as u64)); next_schedule_time.push(tsk.next_scheduled_at.map(|t| t.timestamp_micros())); last_committed_on.push(tsk.updated_at.timestamp_micros()); last_suspended_on.push(tsk.last_suspended_at.map(|t| t.timestamp_micros())); + let serialized_params = serde_json::to_vec(&tsk.session_params).unwrap(); + session_params.push(Some(serialized_params)); } Ok(DataBlock::new_from_columns(vec![ @@ -101,6 +95,7 @@ pub fn parse_tasks_to_datablock(tasks: Vec) -> Result { TimestampType::from_opt_data(next_schedule_time), TimestampType::from_data(last_committed_on), TimestampType::from_opt_data(last_suspended_on), + VariantType::from_opt_data(session_params), ])) } diff --git a/src/query/storages/system/src/temp_files_table.rs b/src/query/storages/system/src/temp_files_table.rs index acb2afcd833a..b2cdaa144905 100644 --- a/src/query/storages/system/src/temp_files_table.rs +++ b/src/query/storages/system/src/temp_files_table.rs @@ -64,7 +64,7 @@ impl AsyncSystemTable for TempFilesTable { let tenant = ctx.get_tenant(); let operator = DataOperator::instance().operator(); - let mut temp_files_name = vec![]; + let mut temp_files_name: Vec = vec![]; let mut temp_files_content_length = vec![]; let mut temp_files_last_modified = vec![]; @@ -81,7 +81,7 @@ impl AsyncSystemTable for TempFilesTable { let metadata = entry.metadata(); if metadata.is_file() { - temp_files_name.push(entry.name().as_bytes().to_vec()); + temp_files_name.push(entry.name().to_string()); temp_files_last_modified .push(metadata.last_modified().map(|x| x.timestamp_micros())); @@ -95,7 +95,7 @@ impl AsyncSystemTable for TempFilesTable { vec![ BlockEntry::new( DataType::String, - Value::Scalar(Scalar::String("Spill".as_bytes().to_owned())), + Value::Scalar(Scalar::String("Spill".to_string())), ), BlockEntry::new( DataType::String, diff --git a/src/query/storages/system/src/tracing_table.rs b/src/query/storages/system/src/tracing_table.rs index 13d06de9fd7a..c6fc69ce35b5 100644 --- a/src/query/storages/system/src/tracing_table.rs +++ b/src/query/storages/system/src/tracing_table.rs @@ -181,7 +181,7 @@ impl SyncSource for TracingSource { entry_column = ColumnBuilder::with_capacity(&DataType::String, max_rows); } - entry_column.push(Scalar::String(line.unwrap().as_bytes().to_vec()).as_ref()); + entry_column.push(Scalar::String(line.unwrap()).as_ref()); } if entry_column.len() > 0 { diff --git a/src/query/storages/system/src/users_table.rs b/src/query/storages/system/src/users_table.rs index 90578915620f..c43112cf5df6 100644 --- a/src/query/storages/system/src/users_table.rs +++ b/src/query/storages/system/src/users_table.rs @@ -54,35 +54,25 @@ impl AsyncSystemTable for UsersTable { let tenant = ctx.get_tenant(); let users = UserApiProvider::instance().get_users(&tenant).await?; - let mut names: Vec> = users.iter().map(|x| x.name.as_bytes().to_vec()).collect(); - let mut hostnames: Vec> = users + let mut names: Vec = users.iter().map(|x| x.name.clone()).collect(); + let mut hostnames: Vec = users.iter().map(|x| x.hostname.clone()).collect(); + let mut auth_types: Vec = users .iter() - .map(|x| x.hostname.as_bytes().to_vec()) + .map(|x| x.auth_info.get_type().to_str().to_string()) .collect(); - let mut auth_types: Vec> = users + let mut default_roles: Vec = users .iter() - .map(|x| x.auth_info.get_type().to_str().as_bytes().to_vec()) + .map(|x| x.option.default_role().cloned().unwrap_or_default()) .collect(); - let mut default_roles: Vec> = users - .iter() - .map(|x| { - x.option - .default_role() - .cloned() - .unwrap_or_default() - .as_bytes() - .to_vec() - }) - .collect(); - let mut is_configureds: Vec> = vec!["NO".as_bytes().to_vec(); users.len()]; + let mut is_configureds: Vec = vec!["NO".to_string(); users.len()]; let configured_users = UserApiProvider::instance().get_configured_users(); for (name, auth_info) in configured_users { - names.push(name.as_bytes().to_vec()); - hostnames.push("%".as_bytes().to_vec()); - auth_types.push(auth_info.get_type().to_str().as_bytes().to_vec()); - default_roles.push(BUILTIN_ROLE_ACCOUNT_ADMIN.as_bytes().to_vec()); - is_configureds.push("YES".as_bytes().to_vec()); + names.push(name.clone()); + hostnames.push("%".to_string()); + auth_types.push(auth_info.get_type().to_str().to_string()); + default_roles.push(BUILTIN_ROLE_ACCOUNT_ADMIN.to_string()); + is_configureds.push("YES".to_string()); } // please note that do NOT display the auth_string field in the result, because there're risks of diff --git a/src/query/storages/system/src/virtual_columns_table.rs b/src/query/storages/system/src/virtual_columns_table.rs index 670226f8722a..8e8e061bdd15 100644 --- a/src/query/storages/system/src/virtual_columns_table.rs +++ b/src/query/storages/system/src/virtual_columns_table.rs @@ -65,7 +65,7 @@ impl AsyncSystemTable for VirtualColumnsTable { .await?; let mut database_names = Vec::with_capacity(virtual_column_metas.len()); - let mut table_names = Vec::with_capacity(virtual_column_metas.len()); + let mut table_names: Vec = Vec::with_capacity(virtual_column_metas.len()); let mut virtual_columns = Vec::with_capacity(virtual_column_metas.len()); let mut created_on_columns = Vec::with_capacity(virtual_column_metas.len()); let mut updated_on_columns = Vec::with_capacity(virtual_column_metas.len()); @@ -81,15 +81,9 @@ impl AsyncSystemTable for VirtualColumnsTable { for table in tables { let table_id = table.get_id(); if let Some(virtual_column_meta) = virtual_column_meta_map.remove(&table_id) { - database_names.push(database.as_bytes().to_vec()); - table_names.push(table.name().as_bytes().to_vec()); - virtual_columns.push( - virtual_column_meta - .virtual_columns - .join(", ") - .as_bytes() - .to_vec(), - ); + database_names.push(database.clone()); + table_names.push(table.name().to_string()); + virtual_columns.push(virtual_column_meta.virtual_columns.join(", ")); created_on_columns.push(virtual_column_meta.created_on.timestamp_micros()); updated_on_columns .push(virtual_column_meta.updated_on.map(|u| u.timestamp_micros())); diff --git a/tests/cloud_control_server/simple_server.py b/tests/cloud_control_server/simple_server.py index 2fd225eff44f..1f38ee11b283 100644 --- a/tests/cloud_control_server/simple_server.py +++ b/tests/cloud_control_server/simple_server.py @@ -55,7 +55,8 @@ def create_task_request_to_task(id, create_task_request): task.after.extend(create_task_request.after) task.created_at = datetime.now(timezone.utc).isoformat() task.updated_at = datetime.now(timezone.utc).isoformat() - + # add session parameters + task.session_parameters.update(create_task_request.session_parameters) return task @@ -91,7 +92,7 @@ def create_task_run_from_task(task): task_run.query_id = "qwert" task_run.scheduled_time = datetime.now(timezone.utc).isoformat() task_run.completed_time = datetime.now(timezone.utc).isoformat() - + task_run.session_parameters.update(task.session_parameters) return task_run @@ -210,6 +211,9 @@ def AlterTask(self, request, context): request.suspend_task_after_num_failures ) has_options = True + if request.set_session_parameters: + task.session_parameters.update(request.session_parameters) + has_options = True if has_options is False: return task_pb2.AlterTaskResponse( error=task_pb2.TaskError( @@ -265,14 +269,17 @@ def GetTaskDependents(self, request, context): root = TASK_DB[root.after[0]] l.insert(0, root) return task_pb2.GetTaskDependentsResponse(task=l) + def EnableTaskDependents(self, request, context): print("EnableTaskDependents", request) task_name = request.task_name if task_name not in TASK_DB: - return task_pb2.EnableTaskDependentsResponse() + return task_pb2.EnableTaskDependentsResponse() task = TASK_DB[task_name] task.status = task_pb2.Task.Started return task_pb2.EnableTaskDependentsResponse() + + def serve(): server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) task_pb2_grpc.add_TaskServiceServicer_to_server(TaskService(), server) diff --git a/tests/cloud_control_server/task_pb2.py b/tests/cloud_control_server/task_pb2.py index 3b3ad6c56c62..ba9d8ac91aed 100644 --- a/tests/cloud_control_server/task_pb2.py +++ b/tests/cloud_control_server/task_pb2.py @@ -6,75 +6,92 @@ from google.protobuf import descriptor_pool as _descriptor_pool from google.protobuf import symbol_database as _symbol_database from google.protobuf.internal import builder as _builder + # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\ntask.proto\x12\ttaskproto\"\xe9\x01\n\x0fScheduleOptions\x12\x15\n\x08interval\x18\x01 \x01(\x05H\x00\x88\x01\x01\x12\x11\n\x04\x63ron\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x16\n\ttime_zone\x18\x03 \x01(\tH\x02\x88\x01\x01\x12>\n\rschedule_type\x18\x04 \x01(\x0e\x32\'.taskproto.ScheduleOptions.ScheduleType\"0\n\x0cScheduleType\x12\x11\n\rinterval_type\x10\x00\x12\r\n\tcron_type\x10\x01\x42\x0b\n\t_intervalB\x07\n\x05_cronB\x0c\n\n_time_zone\"t\n\x10WarehouseOptions\x12\x16\n\twarehouse\x18\x01 \x01(\tH\x00\x88\x01\x01\x12!\n\x14using_warehouse_size\x18\x02 \x01(\tH\x01\x88\x01\x01\x42\x0c\n\n_warehouseB\x17\n\x15_using_warehouse_size\"\x93\x03\n\x11\x43reateTaskRequest\x12\x11\n\ttask_name\x18\x01 \x01(\t\x12\x11\n\ttenant_id\x18\x02 \x01(\t\x12\x12\n\nquery_text\x18\x03 \x01(\t\x12\r\n\x05owner\x18\x04 \x01(\t\x12\x14\n\x07\x63omment\x18\x05 \x01(\tH\x00\x88\x01\x01\x12\x34\n\x10schedule_options\x18\x07 \x01(\x0b\x32\x1a.taskproto.ScheduleOptions\x12\x36\n\x11warehouse_options\x18\x08 \x01(\x0b\x32\x1b.taskproto.WarehouseOptions\x12,\n\x1fsuspend_task_after_num_failures\x18\t \x01(\x05H\x01\x88\x01\x01\x12\x14\n\x0cif_not_exist\x18\n \x01(\x08\x12\r\n\x05\x61\x66ter\x18\x0b \x03(\t\x12\x1b\n\x0ewhen_condition\x18\x0c \x01(\tH\x02\x88\x01\x01\x42\n\n\x08_commentB\"\n _suspend_task_after_num_failuresB\x11\n\x0f_when_condition\"8\n\tTaskError\x12\x0c\n\x04kind\x18\x01 \x01(\t\x12\x0f\n\x07message\x18\x02 \x01(\t\x12\x0c\n\x04\x63ode\x18\x03 \x01(\x05\"Y\n\x12\x43reateTaskResponse\x12(\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x14.taskproto.TaskErrorH\x00\x88\x01\x01\x12\x0f\n\x07task_id\x18\x02 \x01(\x04\x42\x08\n\x06_error\"I\n\x0f\x44ropTaskRequest\x12\x11\n\ttenant_id\x18\x01 \x01(\t\x12\x11\n\ttask_name\x18\x02 \x01(\t\x12\x10\n\x08if_exist\x18\x03 \x01(\x08\"F\n\x10\x44ropTaskResponse\x12(\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x14.taskproto.TaskErrorH\x00\x88\x01\x01\x42\x08\n\x06_error\":\n\x12\x45xecuteTaskRequest\x12\x11\n\ttenant_id\x18\x01 \x01(\t\x12\x11\n\ttask_name\x18\x02 \x01(\t\"I\n\x13\x45xecuteTaskResponse\x12(\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x14.taskproto.TaskErrorH\x00\x88\x01\x01\x42\x08\n\x06_error\"M\n\x13\x44\x65scribeTaskRequest\x12\x11\n\ttenant_id\x18\x01 \x01(\t\x12\x11\n\ttask_name\x18\x02 \x01(\t\x12\x10\n\x08if_exist\x18\x03 \x01(\x08\"\xd0\x04\n\x04Task\x12\x0f\n\x07task_id\x18\x01 \x01(\x04\x12\x11\n\ttask_name\x18\x02 \x01(\t\x12\x12\n\nquery_text\x18\x04 \x01(\t\x12\x14\n\x07\x63omment\x18\x05 \x01(\tH\x00\x88\x01\x01\x12\r\n\x05owner\x18\x06 \x01(\t\x12\x34\n\x10schedule_options\x18\x07 \x01(\x0b\x32\x1a.taskproto.ScheduleOptions\x12\x36\n\x11warehouse_options\x18\x08 \x01(\x0b\x32\x1b.taskproto.WarehouseOptions\x12\x1e\n\x11next_scheduled_at\x18\t \x01(\tH\x01\x88\x01\x01\x12,\n\x1fsuspend_task_after_num_failures\x18\n \x01(\x05H\x02\x88\x01\x01\x12&\n\x06status\x18\x0c \x01(\x0e\x32\x16.taskproto.Task.Status\x12\x12\n\ncreated_at\x18\x0e \x01(\t\x12\x12\n\nupdated_at\x18\x0f \x01(\t\x12\x1e\n\x11last_suspended_at\x18\x10 \x01(\tH\x03\x88\x01\x01\x12\r\n\x05\x61\x66ter\x18\x11 \x03(\t\x12\x1b\n\x0ewhen_condition\x18\x12 \x01(\tH\x04\x88\x01\x01\"$\n\x06Status\x12\r\n\tSuspended\x10\x00\x12\x0b\n\x07Started\x10\x01\x42\n\n\x08_commentB\x14\n\x12_next_scheduled_atB\"\n _suspend_task_after_num_failuresB\x14\n\x12_last_suspended_atB\x11\n\x0f_when_condition\"i\n\x14\x44\x65scribeTaskResponse\x12\x1d\n\x04task\x18\x01 \x01(\x0b\x32\x0f.taskproto.Task\x12(\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x14.taskproto.TaskErrorH\x00\x88\x01\x01\x42\x08\n\x06_error\"p\n\x10ShowTasksRequest\x12\x11\n\ttenant_id\x18\x01 \x01(\t\x12\x11\n\tname_like\x18\x02 \x01(\t\x12\x14\n\x0cresult_limit\x18\x04 \x01(\x05\x12\x0e\n\x06owners\x18\x05 \x03(\t\x12\x10\n\x08task_ids\x18\x06 \x03(\t\"g\n\x11ShowTasksResponse\x12\x1e\n\x05tasks\x18\x01 \x03(\x0b\x32\x0f.taskproto.Task\x12(\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x14.taskproto.TaskErrorH\x00\x88\x01\x01\x42\x08\n\x06_error\"\xf0\x04\n\x10\x41lterTaskRequest\x12\x11\n\ttask_name\x18\x01 \x01(\t\x12\x11\n\ttenant_id\x18\x02 \x01(\t\x12\x17\n\nquery_text\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x14\n\x07\x63omment\x18\x04 \x01(\tH\x01\x88\x01\x01\x12\x42\n\x0f\x61lter_task_type\x18\x05 \x01(\x0e\x32).taskproto.AlterTaskRequest.AlterTaskType\x12\r\n\x05owner\x18\x06 \x01(\t\x12\x34\n\x10schedule_options\x18\x07 \x01(\x0b\x32\x1a.taskproto.ScheduleOptions\x12\x10\n\x08if_exist\x18\x08 \x01(\x08\x12\x36\n\x11warehouse_options\x18\t \x01(\x0b\x32\x1b.taskproto.WarehouseOptions\x12,\n\x1fsuspend_task_after_num_failures\x18\n \x01(\x05H\x02\x88\x01\x01\x12\x1b\n\x0ewhen_condition\x18\x0b \x01(\tH\x03\x88\x01\x01\x12\x11\n\tadd_after\x18\x0c \x03(\t\x12\x14\n\x0cremove_after\x18\r \x03(\t\"n\n\rAlterTaskType\x12\x0b\n\x07Suspend\x10\x00\x12\n\n\x06Resume\x10\x01\x12\x07\n\x03Set\x10\x02\x12\x0c\n\x08ModifyAs\x10\x03\x12\x0e\n\nModifyWhen\x10\x04\x12\x0c\n\x08\x41\x64\x64\x41\x66ter\x10\x05\x12\x0f\n\x0bRemoveAfter\x10\x06\x42\r\n\x0b_query_textB\n\n\x08_commentB\"\n _suspend_task_after_num_failuresB\x11\n\x0f_when_condition\"f\n\x11\x41lterTaskResponse\x12(\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x14.taskproto.TaskErrorH\x00\x88\x01\x01\x12\x1d\n\x04task\x18\x02 \x01(\x0b\x32\x0f.taskproto.TaskB\x08\n\x06_error\"\xc1\x01\n\x13ShowTaskRunsRequest\x12\x11\n\ttenant_id\x18\x01 \x01(\t\x12\x1c\n\x14scheduled_time_start\x18\x02 \x01(\t\x12\x1a\n\x12scheduled_time_end\x18\x03 \x01(\t\x12\x14\n\x0cresult_limit\x18\x04 \x01(\x05\x12\x12\n\nerror_only\x18\x05 \x01(\x08\x12\x0e\n\x06owners\x18\x06 \x03(\t\x12\x10\n\x08task_ids\x18\x07 \x03(\t\x12\x11\n\ttask_name\x18\x08 \x01(\t\"\xcc\x04\n\x07TaskRun\x12\x0f\n\x07task_id\x18\x01 \x01(\x04\x12\x11\n\ttask_name\x18\x02 \x01(\t\x12\x12\n\nquery_text\x18\x04 \x01(\t\x12\x14\n\x07\x63omment\x18\x05 \x01(\tH\x00\x88\x01\x01\x12\r\n\x05owner\x18\x06 \x01(\t\x12\x34\n\x10schedule_options\x18\x07 \x01(\x0b\x32\x1a.taskproto.ScheduleOptions\x12\x0e\n\x06run_id\x18\x08 \x01(\t\x12\x16\n\x0e\x61ttempt_number\x18\t \x01(\x05\x12\x36\n\x11warehouse_options\x18\n \x01(\x0b\x32\x1b.taskproto.WarehouseOptions\x12\'\n\x05state\x18\x0b \x01(\x0e\x32\x18.taskproto.TaskRun.State\x12\x12\n\nerror_code\x18\x0c \x01(\x03\x12\x1a\n\rerror_message\x18\r \x01(\tH\x01\x88\x01\x01\x12\x16\n\x0escheduled_time\x18\x0e \x01(\t\x12\x1b\n\x0e\x63ompleted_time\x18\x10 \x01(\tH\x02\x88\x01\x01\x12\x10\n\x08query_id\x18\x11 \x01(\t\x12\x16\n\x0e\x63ondition_text\x18\x12 \x01(\t\x12\x14\n\x0croot_task_id\x18\x13 \x01(\t\"O\n\x05State\x12\r\n\tSCHEDULED\x10\x00\x12\r\n\tEXECUTING\x10\x01\x12\r\n\tSUCCEEDED\x10\x02\x12\n\n\x06\x46\x41ILED\x10\x03\x12\r\n\tCANCELLED\x10\x04\x42\n\n\x08_commentB\x10\n\x0e_error_messageB\x11\n\x0f_completed_time\"q\n\x14ShowTaskRunsResponse\x12%\n\ttask_runs\x18\x01 \x03(\x0b\x32\x12.taskproto.TaskRun\x12(\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x14.taskproto.TaskErrorH\x00\x88\x01\x01\x42\x08\n\x06_error\"S\n\x18GetTaskDependentsRequest\x12\x11\n\ttenant_id\x18\x01 \x01(\t\x12\x11\n\ttask_name\x18\x02 \x01(\t\x12\x11\n\trecursive\x18\x03 \x01(\x08\"n\n\x19GetTaskDependentsResponse\x12\x1d\n\x04task\x18\x01 \x03(\x0b\x32\x0f.taskproto.Task\x12(\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x14.taskproto.TaskErrorH\x00\x88\x01\x01\x42\x08\n\x06_error\"C\n\x1b\x45nableTaskDependentsRequest\x12\x11\n\ttenant_id\x18\x01 \x01(\t\x12\x11\n\ttask_name\x18\x02 \x01(\t\"R\n\x1c\x45nableTaskDependentsResponse\x12(\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x14.taskproto.TaskErrorH\x00\x88\x01\x01\x42\x08\n\x06_error2\xe6\x05\n\x0bTaskService\x12I\n\nCreateTask\x12\x1c.taskproto.CreateTaskRequest\x1a\x1d.taskproto.CreateTaskResponse\x12O\n\x0c\x44\x65scribeTask\x12\x1e.taskproto.DescribeTaskRequest\x1a\x1f.taskproto.DescribeTaskResponse\x12L\n\x0b\x45xecuteTask\x12\x1d.taskproto.ExecuteTaskRequest\x1a\x1e.taskproto.ExecuteTaskResponse\x12\x43\n\x08\x44ropTask\x12\x1a.taskproto.DropTaskRequest\x1a\x1b.taskproto.DropTaskResponse\x12\x46\n\tAlterTask\x12\x1b.taskproto.AlterTaskRequest\x1a\x1c.taskproto.AlterTaskResponse\x12\x46\n\tShowTasks\x12\x1b.taskproto.ShowTasksRequest\x1a\x1c.taskproto.ShowTasksResponse\x12O\n\x0cShowTaskRuns\x12\x1e.taskproto.ShowTaskRunsRequest\x1a\x1f.taskproto.ShowTaskRunsResponse\x12^\n\x11GetTaskDependents\x12#.taskproto.GetTaskDependentsRequest\x1a$.taskproto.GetTaskDependentsResponse\x12g\n\x14\x45nableTaskDependents\x12&.taskproto.EnableTaskDependentsRequest\x1a\'.taskproto.EnableTaskDependentsResponseB!Z\x1f\x64\x61tabend.com/cloudcontrol/protob\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( + b'\n\ntask.proto\x12\ttaskproto"\xe9\x01\n\x0fScheduleOptions\x12\x15\n\x08interval\x18\x01 \x01(\x05H\x00\x88\x01\x01\x12\x11\n\x04\x63ron\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x16\n\ttime_zone\x18\x03 \x01(\tH\x02\x88\x01\x01\x12>\n\rschedule_type\x18\x04 \x01(\x0e\x32\'.taskproto.ScheduleOptions.ScheduleType"0\n\x0cScheduleType\x12\x11\n\rinterval_type\x10\x00\x12\r\n\tcron_type\x10\x01\x42\x0b\n\t_intervalB\x07\n\x05_cronB\x0c\n\n_time_zone"t\n\x10WarehouseOptions\x12\x16\n\twarehouse\x18\x01 \x01(\tH\x00\x88\x01\x01\x12!\n\x14using_warehouse_size\x18\x02 \x01(\tH\x01\x88\x01\x01\x42\x0c\n\n_warehouseB\x17\n\x15_using_warehouse_size"\x9e\x04\n\x11\x43reateTaskRequest\x12\x11\n\ttask_name\x18\x01 \x01(\t\x12\x11\n\ttenant_id\x18\x02 \x01(\t\x12\x12\n\nquery_text\x18\x03 \x01(\t\x12\r\n\x05owner\x18\x04 \x01(\t\x12\x14\n\x07\x63omment\x18\x05 \x01(\tH\x00\x88\x01\x01\x12\x34\n\x10schedule_options\x18\x07 \x01(\x0b\x32\x1a.taskproto.ScheduleOptions\x12\x36\n\x11warehouse_options\x18\x08 \x01(\x0b\x32\x1b.taskproto.WarehouseOptions\x12,\n\x1fsuspend_task_after_num_failures\x18\t \x01(\x05H\x01\x88\x01\x01\x12\x14\n\x0cif_not_exist\x18\n \x01(\x08\x12\r\n\x05\x61\x66ter\x18\x0b \x03(\t\x12\x1b\n\x0ewhen_condition\x18\x0c \x01(\tH\x02\x88\x01\x01\x12O\n\x12session_parameters\x18\r \x03(\x0b\x32\x33.taskproto.CreateTaskRequest.SessionParametersEntry\x1a\x38\n\x16SessionParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\n\n\x08_commentB"\n _suspend_task_after_num_failuresB\x11\n\x0f_when_condition"8\n\tTaskError\x12\x0c\n\x04kind\x18\x01 \x01(\t\x12\x0f\n\x07message\x18\x02 \x01(\t\x12\x0c\n\x04\x63ode\x18\x03 \x01(\x05"Y\n\x12\x43reateTaskResponse\x12(\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x14.taskproto.TaskErrorH\x00\x88\x01\x01\x12\x0f\n\x07task_id\x18\x02 \x01(\x04\x42\x08\n\x06_error"I\n\x0f\x44ropTaskRequest\x12\x11\n\ttenant_id\x18\x01 \x01(\t\x12\x11\n\ttask_name\x18\x02 \x01(\t\x12\x10\n\x08if_exist\x18\x03 \x01(\x08"F\n\x10\x44ropTaskResponse\x12(\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x14.taskproto.TaskErrorH\x00\x88\x01\x01\x42\x08\n\x06_error":\n\x12\x45xecuteTaskRequest\x12\x11\n\ttenant_id\x18\x01 \x01(\t\x12\x11\n\ttask_name\x18\x02 \x01(\t"I\n\x13\x45xecuteTaskResponse\x12(\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x14.taskproto.TaskErrorH\x00\x88\x01\x01\x42\x08\n\x06_error"M\n\x13\x44\x65scribeTaskRequest\x12\x11\n\ttenant_id\x18\x01 \x01(\t\x12\x11\n\ttask_name\x18\x02 \x01(\t\x12\x10\n\x08if_exist\x18\x03 \x01(\x08"\xce\x05\n\x04Task\x12\x0f\n\x07task_id\x18\x01 \x01(\x04\x12\x11\n\ttask_name\x18\x02 \x01(\t\x12\x12\n\nquery_text\x18\x04 \x01(\t\x12\x14\n\x07\x63omment\x18\x05 \x01(\tH\x00\x88\x01\x01\x12\r\n\x05owner\x18\x06 \x01(\t\x12\x34\n\x10schedule_options\x18\x07 \x01(\x0b\x32\x1a.taskproto.ScheduleOptions\x12\x36\n\x11warehouse_options\x18\x08 \x01(\x0b\x32\x1b.taskproto.WarehouseOptions\x12\x1e\n\x11next_scheduled_at\x18\t \x01(\tH\x01\x88\x01\x01\x12,\n\x1fsuspend_task_after_num_failures\x18\n \x01(\x05H\x02\x88\x01\x01\x12&\n\x06status\x18\x0c \x01(\x0e\x32\x16.taskproto.Task.Status\x12\x12\n\ncreated_at\x18\x0e \x01(\t\x12\x12\n\nupdated_at\x18\x0f \x01(\t\x12\x1e\n\x11last_suspended_at\x18\x10 \x01(\tH\x03\x88\x01\x01\x12\r\n\x05\x61\x66ter\x18\x11 \x03(\t\x12\x1b\n\x0ewhen_condition\x18\x12 \x01(\tH\x04\x88\x01\x01\x12\x42\n\x12session_parameters\x18\x13 \x03(\x0b\x32&.taskproto.Task.SessionParametersEntry\x1a\x38\n\x16SessionParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"$\n\x06Status\x12\r\n\tSuspended\x10\x00\x12\x0b\n\x07Started\x10\x01\x42\n\n\x08_commentB\x14\n\x12_next_scheduled_atB"\n _suspend_task_after_num_failuresB\x14\n\x12_last_suspended_atB\x11\n\x0f_when_condition"i\n\x14\x44\x65scribeTaskResponse\x12\x1d\n\x04task\x18\x01 \x01(\x0b\x32\x0f.taskproto.Task\x12(\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x14.taskproto.TaskErrorH\x00\x88\x01\x01\x42\x08\n\x06_error"p\n\x10ShowTasksRequest\x12\x11\n\ttenant_id\x18\x01 \x01(\t\x12\x11\n\tname_like\x18\x02 \x01(\t\x12\x14\n\x0cresult_limit\x18\x04 \x01(\x05\x12\x0e\n\x06owners\x18\x05 \x03(\t\x12\x10\n\x08task_ids\x18\x06 \x03(\t"g\n\x11ShowTasksResponse\x12\x1e\n\x05tasks\x18\x01 \x03(\x0b\x32\x0f.taskproto.Task\x12(\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x14.taskproto.TaskErrorH\x00\x88\x01\x01\x42\x08\n\x06_error"\x9a\x06\n\x10\x41lterTaskRequest\x12\x11\n\ttask_name\x18\x01 \x01(\t\x12\x11\n\ttenant_id\x18\x02 \x01(\t\x12\x17\n\nquery_text\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x14\n\x07\x63omment\x18\x04 \x01(\tH\x01\x88\x01\x01\x12\x42\n\x0f\x61lter_task_type\x18\x05 \x01(\x0e\x32).taskproto.AlterTaskRequest.AlterTaskType\x12\r\n\x05owner\x18\x06 \x01(\t\x12\x34\n\x10schedule_options\x18\x07 \x01(\x0b\x32\x1a.taskproto.ScheduleOptions\x12\x10\n\x08if_exist\x18\x08 \x01(\x08\x12\x36\n\x11warehouse_options\x18\t \x01(\x0b\x32\x1b.taskproto.WarehouseOptions\x12,\n\x1fsuspend_task_after_num_failures\x18\n \x01(\x05H\x02\x88\x01\x01\x12\x1b\n\x0ewhen_condition\x18\x0b \x01(\tH\x03\x88\x01\x01\x12\x11\n\tadd_after\x18\x0c \x03(\t\x12\x14\n\x0cremove_after\x18\r \x03(\t\x12\x1e\n\x16set_session_parameters\x18\x0e \x01(\x08\x12N\n\x12session_parameters\x18\x0f \x03(\x0b\x32\x32.taskproto.AlterTaskRequest.SessionParametersEntry\x1a\x38\n\x16SessionParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"n\n\rAlterTaskType\x12\x0b\n\x07Suspend\x10\x00\x12\n\n\x06Resume\x10\x01\x12\x07\n\x03Set\x10\x02\x12\x0c\n\x08ModifyAs\x10\x03\x12\x0e\n\nModifyWhen\x10\x04\x12\x0c\n\x08\x41\x64\x64\x41\x66ter\x10\x05\x12\x0f\n\x0bRemoveAfter\x10\x06\x42\r\n\x0b_query_textB\n\n\x08_commentB"\n _suspend_task_after_num_failuresB\x11\n\x0f_when_condition"f\n\x11\x41lterTaskResponse\x12(\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x14.taskproto.TaskErrorH\x00\x88\x01\x01\x12\x1d\n\x04task\x18\x02 \x01(\x0b\x32\x0f.taskproto.TaskB\x08\n\x06_error"\xc1\x01\n\x13ShowTaskRunsRequest\x12\x11\n\ttenant_id\x18\x01 \x01(\t\x12\x1c\n\x14scheduled_time_start\x18\x02 \x01(\t\x12\x1a\n\x12scheduled_time_end\x18\x03 \x01(\t\x12\x14\n\x0cresult_limit\x18\x04 \x01(\x05\x12\x12\n\nerror_only\x18\x05 \x01(\x08\x12\x0e\n\x06owners\x18\x06 \x03(\t\x12\x10\n\x08task_ids\x18\x07 \x03(\t\x12\x11\n\ttask_name\x18\x08 \x01(\t"\xcd\x05\n\x07TaskRun\x12\x0f\n\x07task_id\x18\x01 \x01(\x04\x12\x11\n\ttask_name\x18\x02 \x01(\t\x12\x12\n\nquery_text\x18\x04 \x01(\t\x12\x14\n\x07\x63omment\x18\x05 \x01(\tH\x00\x88\x01\x01\x12\r\n\x05owner\x18\x06 \x01(\t\x12\x34\n\x10schedule_options\x18\x07 \x01(\x0b\x32\x1a.taskproto.ScheduleOptions\x12\x0e\n\x06run_id\x18\x08 \x01(\t\x12\x16\n\x0e\x61ttempt_number\x18\t \x01(\x05\x12\x36\n\x11warehouse_options\x18\n \x01(\x0b\x32\x1b.taskproto.WarehouseOptions\x12\'\n\x05state\x18\x0b \x01(\x0e\x32\x18.taskproto.TaskRun.State\x12\x12\n\nerror_code\x18\x0c \x01(\x03\x12\x1a\n\rerror_message\x18\r \x01(\tH\x01\x88\x01\x01\x12\x16\n\x0escheduled_time\x18\x0e \x01(\t\x12\x1b\n\x0e\x63ompleted_time\x18\x10 \x01(\tH\x02\x88\x01\x01\x12\x10\n\x08query_id\x18\x11 \x01(\t\x12\x16\n\x0e\x63ondition_text\x18\x12 \x01(\t\x12\x14\n\x0croot_task_id\x18\x13 \x01(\t\x12\x45\n\x12session_parameters\x18\x14 \x03(\x0b\x32).taskproto.TaskRun.SessionParametersEntry\x1a\x38\n\x16SessionParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"O\n\x05State\x12\r\n\tSCHEDULED\x10\x00\x12\r\n\tEXECUTING\x10\x01\x12\r\n\tSUCCEEDED\x10\x02\x12\n\n\x06\x46\x41ILED\x10\x03\x12\r\n\tCANCELLED\x10\x04\x42\n\n\x08_commentB\x10\n\x0e_error_messageB\x11\n\x0f_completed_time"q\n\x14ShowTaskRunsResponse\x12%\n\ttask_runs\x18\x01 \x03(\x0b\x32\x12.taskproto.TaskRun\x12(\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x14.taskproto.TaskErrorH\x00\x88\x01\x01\x42\x08\n\x06_error"S\n\x18GetTaskDependentsRequest\x12\x11\n\ttenant_id\x18\x01 \x01(\t\x12\x11\n\ttask_name\x18\x02 \x01(\t\x12\x11\n\trecursive\x18\x03 \x01(\x08"n\n\x19GetTaskDependentsResponse\x12\x1d\n\x04task\x18\x01 \x03(\x0b\x32\x0f.taskproto.Task\x12(\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x14.taskproto.TaskErrorH\x00\x88\x01\x01\x42\x08\n\x06_error"C\n\x1b\x45nableTaskDependentsRequest\x12\x11\n\ttenant_id\x18\x01 \x01(\t\x12\x11\n\ttask_name\x18\x02 \x01(\t"R\n\x1c\x45nableTaskDependentsResponse\x12(\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x14.taskproto.TaskErrorH\x00\x88\x01\x01\x42\x08\n\x06_error2\xe6\x05\n\x0bTaskService\x12I\n\nCreateTask\x12\x1c.taskproto.CreateTaskRequest\x1a\x1d.taskproto.CreateTaskResponse\x12O\n\x0c\x44\x65scribeTask\x12\x1e.taskproto.DescribeTaskRequest\x1a\x1f.taskproto.DescribeTaskResponse\x12L\n\x0b\x45xecuteTask\x12\x1d.taskproto.ExecuteTaskRequest\x1a\x1e.taskproto.ExecuteTaskResponse\x12\x43\n\x08\x44ropTask\x12\x1a.taskproto.DropTaskRequest\x1a\x1b.taskproto.DropTaskResponse\x12\x46\n\tAlterTask\x12\x1b.taskproto.AlterTaskRequest\x1a\x1c.taskproto.AlterTaskResponse\x12\x46\n\tShowTasks\x12\x1b.taskproto.ShowTasksRequest\x1a\x1c.taskproto.ShowTasksResponse\x12O\n\x0cShowTaskRuns\x12\x1e.taskproto.ShowTaskRunsRequest\x1a\x1f.taskproto.ShowTaskRunsResponse\x12^\n\x11GetTaskDependents\x12#.taskproto.GetTaskDependentsRequest\x1a$.taskproto.GetTaskDependentsResponse\x12g\n\x14\x45nableTaskDependents\x12&.taskproto.EnableTaskDependentsRequest\x1a\'.taskproto.EnableTaskDependentsResponseB!Z\x1f\x64\x61tabend.com/cloudcontrol/protob\x06proto3' +) _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'task_pb2', _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "task_pb2", _globals) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'Z\037databend.com/cloudcontrol/proto' - _globals['_SCHEDULEOPTIONS']._serialized_start=26 - _globals['_SCHEDULEOPTIONS']._serialized_end=259 - _globals['_SCHEDULEOPTIONS_SCHEDULETYPE']._serialized_start=175 - _globals['_SCHEDULEOPTIONS_SCHEDULETYPE']._serialized_end=223 - _globals['_WAREHOUSEOPTIONS']._serialized_start=261 - _globals['_WAREHOUSEOPTIONS']._serialized_end=377 - _globals['_CREATETASKREQUEST']._serialized_start=380 - _globals['_CREATETASKREQUEST']._serialized_end=783 - _globals['_TASKERROR']._serialized_start=785 - _globals['_TASKERROR']._serialized_end=841 - _globals['_CREATETASKRESPONSE']._serialized_start=843 - _globals['_CREATETASKRESPONSE']._serialized_end=932 - _globals['_DROPTASKREQUEST']._serialized_start=934 - _globals['_DROPTASKREQUEST']._serialized_end=1007 - _globals['_DROPTASKRESPONSE']._serialized_start=1009 - _globals['_DROPTASKRESPONSE']._serialized_end=1079 - _globals['_EXECUTETASKREQUEST']._serialized_start=1081 - _globals['_EXECUTETASKREQUEST']._serialized_end=1139 - _globals['_EXECUTETASKRESPONSE']._serialized_start=1141 - _globals['_EXECUTETASKRESPONSE']._serialized_end=1214 - _globals['_DESCRIBETASKREQUEST']._serialized_start=1216 - _globals['_DESCRIBETASKREQUEST']._serialized_end=1293 - _globals['_TASK']._serialized_start=1296 - _globals['_TASK']._serialized_end=1888 - _globals['_TASK_STATUS']._serialized_start=1741 - _globals['_TASK_STATUS']._serialized_end=1777 - _globals['_DESCRIBETASKRESPONSE']._serialized_start=1890 - _globals['_DESCRIBETASKRESPONSE']._serialized_end=1995 - _globals['_SHOWTASKSREQUEST']._serialized_start=1997 - _globals['_SHOWTASKSREQUEST']._serialized_end=2109 - _globals['_SHOWTASKSRESPONSE']._serialized_start=2111 - _globals['_SHOWTASKSRESPONSE']._serialized_end=2214 - _globals['_ALTERTASKREQUEST']._serialized_start=2217 - _globals['_ALTERTASKREQUEST']._serialized_end=2841 - _globals['_ALTERTASKREQUEST_ALTERTASKTYPE']._serialized_start=2649 - _globals['_ALTERTASKREQUEST_ALTERTASKTYPE']._serialized_end=2759 - _globals['_ALTERTASKRESPONSE']._serialized_start=2843 - _globals['_ALTERTASKRESPONSE']._serialized_end=2945 - _globals['_SHOWTASKRUNSREQUEST']._serialized_start=2948 - _globals['_SHOWTASKRUNSREQUEST']._serialized_end=3141 - _globals['_TASKRUN']._serialized_start=3144 - _globals['_TASKRUN']._serialized_end=3732 - _globals['_TASKRUN_STATE']._serialized_start=3604 - _globals['_TASKRUN_STATE']._serialized_end=3683 - _globals['_SHOWTASKRUNSRESPONSE']._serialized_start=3734 - _globals['_SHOWTASKRUNSRESPONSE']._serialized_end=3847 - _globals['_GETTASKDEPENDENTSREQUEST']._serialized_start=3849 - _globals['_GETTASKDEPENDENTSREQUEST']._serialized_end=3932 - _globals['_GETTASKDEPENDENTSRESPONSE']._serialized_start=3934 - _globals['_GETTASKDEPENDENTSRESPONSE']._serialized_end=4044 - _globals['_ENABLETASKDEPENDENTSREQUEST']._serialized_start=4046 - _globals['_ENABLETASKDEPENDENTSREQUEST']._serialized_end=4113 - _globals['_ENABLETASKDEPENDENTSRESPONSE']._serialized_start=4115 - _globals['_ENABLETASKDEPENDENTSRESPONSE']._serialized_end=4197 - _globals['_TASKSERVICE']._serialized_start=4200 - _globals['_TASKSERVICE']._serialized_end=4942 + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b"Z\037databend.com/cloudcontrol/proto" + _CREATETASKREQUEST_SESSIONPARAMETERSENTRY._options = None + _CREATETASKREQUEST_SESSIONPARAMETERSENTRY._serialized_options = b"8\001" + _TASK_SESSIONPARAMETERSENTRY._options = None + _TASK_SESSIONPARAMETERSENTRY._serialized_options = b"8\001" + _ALTERTASKREQUEST_SESSIONPARAMETERSENTRY._options = None + _ALTERTASKREQUEST_SESSIONPARAMETERSENTRY._serialized_options = b"8\001" + _TASKRUN_SESSIONPARAMETERSENTRY._options = None + _TASKRUN_SESSIONPARAMETERSENTRY._serialized_options = b"8\001" + _globals["_SCHEDULEOPTIONS"]._serialized_start = 26 + _globals["_SCHEDULEOPTIONS"]._serialized_end = 259 + _globals["_SCHEDULEOPTIONS_SCHEDULETYPE"]._serialized_start = 175 + _globals["_SCHEDULEOPTIONS_SCHEDULETYPE"]._serialized_end = 223 + _globals["_WAREHOUSEOPTIONS"]._serialized_start = 261 + _globals["_WAREHOUSEOPTIONS"]._serialized_end = 377 + _globals["_CREATETASKREQUEST"]._serialized_start = 380 + _globals["_CREATETASKREQUEST"]._serialized_end = 922 + _globals["_CREATETASKREQUEST_SESSIONPARAMETERSENTRY"]._serialized_start = 799 + _globals["_CREATETASKREQUEST_SESSIONPARAMETERSENTRY"]._serialized_end = 855 + _globals["_TASKERROR"]._serialized_start = 924 + _globals["_TASKERROR"]._serialized_end = 980 + _globals["_CREATETASKRESPONSE"]._serialized_start = 982 + _globals["_CREATETASKRESPONSE"]._serialized_end = 1071 + _globals["_DROPTASKREQUEST"]._serialized_start = 1073 + _globals["_DROPTASKREQUEST"]._serialized_end = 1146 + _globals["_DROPTASKRESPONSE"]._serialized_start = 1148 + _globals["_DROPTASKRESPONSE"]._serialized_end = 1218 + _globals["_EXECUTETASKREQUEST"]._serialized_start = 1220 + _globals["_EXECUTETASKREQUEST"]._serialized_end = 1278 + _globals["_EXECUTETASKRESPONSE"]._serialized_start = 1280 + _globals["_EXECUTETASKRESPONSE"]._serialized_end = 1353 + _globals["_DESCRIBETASKREQUEST"]._serialized_start = 1355 + _globals["_DESCRIBETASKREQUEST"]._serialized_end = 1432 + _globals["_TASK"]._serialized_start = 1435 + _globals["_TASK"]._serialized_end = 2153 + _globals["_TASK_SESSIONPARAMETERSENTRY"]._serialized_start = 799 + _globals["_TASK_SESSIONPARAMETERSENTRY"]._serialized_end = 855 + _globals["_TASK_STATUS"]._serialized_start = 2006 + _globals["_TASK_STATUS"]._serialized_end = 2042 + _globals["_DESCRIBETASKRESPONSE"]._serialized_start = 2155 + _globals["_DESCRIBETASKRESPONSE"]._serialized_end = 2260 + _globals["_SHOWTASKSREQUEST"]._serialized_start = 2262 + _globals["_SHOWTASKSREQUEST"]._serialized_end = 2374 + _globals["_SHOWTASKSRESPONSE"]._serialized_start = 2376 + _globals["_SHOWTASKSRESPONSE"]._serialized_end = 2479 + _globals["_ALTERTASKREQUEST"]._serialized_start = 2482 + _globals["_ALTERTASKREQUEST"]._serialized_end = 3276 + _globals["_ALTERTASKREQUEST_SESSIONPARAMETERSENTRY"]._serialized_start = 799 + _globals["_ALTERTASKREQUEST_SESSIONPARAMETERSENTRY"]._serialized_end = 855 + _globals["_ALTERTASKREQUEST_ALTERTASKTYPE"]._serialized_start = 3084 + _globals["_ALTERTASKREQUEST_ALTERTASKTYPE"]._serialized_end = 3194 + _globals["_ALTERTASKRESPONSE"]._serialized_start = 3278 + _globals["_ALTERTASKRESPONSE"]._serialized_end = 3380 + _globals["_SHOWTASKRUNSREQUEST"]._serialized_start = 3383 + _globals["_SHOWTASKRUNSREQUEST"]._serialized_end = 3576 + _globals["_TASKRUN"]._serialized_start = 3579 + _globals["_TASKRUN"]._serialized_end = 4296 + _globals["_TASKRUN_SESSIONPARAMETERSENTRY"]._serialized_start = 799 + _globals["_TASKRUN_SESSIONPARAMETERSENTRY"]._serialized_end = 855 + _globals["_TASKRUN_STATE"]._serialized_start = 4168 + _globals["_TASKRUN_STATE"]._serialized_end = 4247 + _globals["_SHOWTASKRUNSRESPONSE"]._serialized_start = 4298 + _globals["_SHOWTASKRUNSRESPONSE"]._serialized_end = 4411 + _globals["_GETTASKDEPENDENTSREQUEST"]._serialized_start = 4413 + _globals["_GETTASKDEPENDENTSREQUEST"]._serialized_end = 4496 + _globals["_GETTASKDEPENDENTSRESPONSE"]._serialized_start = 4498 + _globals["_GETTASKDEPENDENTSRESPONSE"]._serialized_end = 4608 + _globals["_ENABLETASKDEPENDENTSREQUEST"]._serialized_start = 4610 + _globals["_ENABLETASKDEPENDENTSREQUEST"]._serialized_end = 4677 + _globals["_ENABLETASKDEPENDENTSRESPONSE"]._serialized_start = 4679 + _globals["_ENABLETASKDEPENDENTSRESPONSE"]._serialized_end = 4761 + _globals["_TASKSERVICE"]._serialized_start = 4764 + _globals["_TASKSERVICE"]._serialized_end = 5506 # @@protoc_insertion_point(module_scope) diff --git a/tests/cloud_control_server/task_pb2_grpc.py b/tests/cloud_control_server/task_pb2_grpc.py index 52d0f5c54be2..274c826bb069 100644 --- a/tests/cloud_control_server/task_pb2_grpc.py +++ b/tests/cloud_control_server/task_pb2_grpc.py @@ -15,50 +15,50 @@ def __init__(self, channel): channel: A grpc.Channel. """ self.CreateTask = channel.unary_unary( - '/taskproto.TaskService/CreateTask', - request_serializer=task__pb2.CreateTaskRequest.SerializeToString, - response_deserializer=task__pb2.CreateTaskResponse.FromString, - ) + "/taskproto.TaskService/CreateTask", + request_serializer=task__pb2.CreateTaskRequest.SerializeToString, + response_deserializer=task__pb2.CreateTaskResponse.FromString, + ) self.DescribeTask = channel.unary_unary( - '/taskproto.TaskService/DescribeTask', - request_serializer=task__pb2.DescribeTaskRequest.SerializeToString, - response_deserializer=task__pb2.DescribeTaskResponse.FromString, - ) + "/taskproto.TaskService/DescribeTask", + request_serializer=task__pb2.DescribeTaskRequest.SerializeToString, + response_deserializer=task__pb2.DescribeTaskResponse.FromString, + ) self.ExecuteTask = channel.unary_unary( - '/taskproto.TaskService/ExecuteTask', - request_serializer=task__pb2.ExecuteTaskRequest.SerializeToString, - response_deserializer=task__pb2.ExecuteTaskResponse.FromString, - ) + "/taskproto.TaskService/ExecuteTask", + request_serializer=task__pb2.ExecuteTaskRequest.SerializeToString, + response_deserializer=task__pb2.ExecuteTaskResponse.FromString, + ) self.DropTask = channel.unary_unary( - '/taskproto.TaskService/DropTask', - request_serializer=task__pb2.DropTaskRequest.SerializeToString, - response_deserializer=task__pb2.DropTaskResponse.FromString, - ) + "/taskproto.TaskService/DropTask", + request_serializer=task__pb2.DropTaskRequest.SerializeToString, + response_deserializer=task__pb2.DropTaskResponse.FromString, + ) self.AlterTask = channel.unary_unary( - '/taskproto.TaskService/AlterTask', - request_serializer=task__pb2.AlterTaskRequest.SerializeToString, - response_deserializer=task__pb2.AlterTaskResponse.FromString, - ) + "/taskproto.TaskService/AlterTask", + request_serializer=task__pb2.AlterTaskRequest.SerializeToString, + response_deserializer=task__pb2.AlterTaskResponse.FromString, + ) self.ShowTasks = channel.unary_unary( - '/taskproto.TaskService/ShowTasks', - request_serializer=task__pb2.ShowTasksRequest.SerializeToString, - response_deserializer=task__pb2.ShowTasksResponse.FromString, - ) + "/taskproto.TaskService/ShowTasks", + request_serializer=task__pb2.ShowTasksRequest.SerializeToString, + response_deserializer=task__pb2.ShowTasksResponse.FromString, + ) self.ShowTaskRuns = channel.unary_unary( - '/taskproto.TaskService/ShowTaskRuns', - request_serializer=task__pb2.ShowTaskRunsRequest.SerializeToString, - response_deserializer=task__pb2.ShowTaskRunsResponse.FromString, - ) + "/taskproto.TaskService/ShowTaskRuns", + request_serializer=task__pb2.ShowTaskRunsRequest.SerializeToString, + response_deserializer=task__pb2.ShowTaskRunsResponse.FromString, + ) self.GetTaskDependents = channel.unary_unary( - '/taskproto.TaskService/GetTaskDependents', - request_serializer=task__pb2.GetTaskDependentsRequest.SerializeToString, - response_deserializer=task__pb2.GetTaskDependentsResponse.FromString, - ) + "/taskproto.TaskService/GetTaskDependents", + request_serializer=task__pb2.GetTaskDependentsRequest.SerializeToString, + response_deserializer=task__pb2.GetTaskDependentsResponse.FromString, + ) self.EnableTaskDependents = channel.unary_unary( - '/taskproto.TaskService/EnableTaskDependents', - request_serializer=task__pb2.EnableTaskDependentsRequest.SerializeToString, - response_deserializer=task__pb2.EnableTaskDependentsResponse.FromString, - ) + "/taskproto.TaskService/EnableTaskDependents", + request_serializer=task__pb2.EnableTaskDependentsRequest.SerializeToString, + response_deserializer=task__pb2.EnableTaskDependentsResponse.FromString, + ) class TaskServiceServicer(object): @@ -67,264 +67,373 @@ class TaskServiceServicer(object): def CreateTask(self, request, context): """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") def DescribeTask(self, request, context): """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") def ExecuteTask(self, request, context): """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") def DropTask(self, request, context): """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") def AlterTask(self, request, context): """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") def ShowTasks(self, request, context): """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") def ShowTaskRuns(self, request, context): """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") def GetTaskDependents(self, request, context): """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") def EnableTaskDependents(self, request, context): """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details('Method not implemented!') - raise NotImplementedError('Method not implemented!') + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") def add_TaskServiceServicer_to_server(servicer, server): rpc_method_handlers = { - 'CreateTask': grpc.unary_unary_rpc_method_handler( - servicer.CreateTask, - request_deserializer=task__pb2.CreateTaskRequest.FromString, - response_serializer=task__pb2.CreateTaskResponse.SerializeToString, - ), - 'DescribeTask': grpc.unary_unary_rpc_method_handler( - servicer.DescribeTask, - request_deserializer=task__pb2.DescribeTaskRequest.FromString, - response_serializer=task__pb2.DescribeTaskResponse.SerializeToString, - ), - 'ExecuteTask': grpc.unary_unary_rpc_method_handler( - servicer.ExecuteTask, - request_deserializer=task__pb2.ExecuteTaskRequest.FromString, - response_serializer=task__pb2.ExecuteTaskResponse.SerializeToString, - ), - 'DropTask': grpc.unary_unary_rpc_method_handler( - servicer.DropTask, - request_deserializer=task__pb2.DropTaskRequest.FromString, - response_serializer=task__pb2.DropTaskResponse.SerializeToString, - ), - 'AlterTask': grpc.unary_unary_rpc_method_handler( - servicer.AlterTask, - request_deserializer=task__pb2.AlterTaskRequest.FromString, - response_serializer=task__pb2.AlterTaskResponse.SerializeToString, - ), - 'ShowTasks': grpc.unary_unary_rpc_method_handler( - servicer.ShowTasks, - request_deserializer=task__pb2.ShowTasksRequest.FromString, - response_serializer=task__pb2.ShowTasksResponse.SerializeToString, - ), - 'ShowTaskRuns': grpc.unary_unary_rpc_method_handler( - servicer.ShowTaskRuns, - request_deserializer=task__pb2.ShowTaskRunsRequest.FromString, - response_serializer=task__pb2.ShowTaskRunsResponse.SerializeToString, - ), - 'GetTaskDependents': grpc.unary_unary_rpc_method_handler( - servicer.GetTaskDependents, - request_deserializer=task__pb2.GetTaskDependentsRequest.FromString, - response_serializer=task__pb2.GetTaskDependentsResponse.SerializeToString, - ), - 'EnableTaskDependents': grpc.unary_unary_rpc_method_handler( - servicer.EnableTaskDependents, - request_deserializer=task__pb2.EnableTaskDependentsRequest.FromString, - response_serializer=task__pb2.EnableTaskDependentsResponse.SerializeToString, - ), + "CreateTask": grpc.unary_unary_rpc_method_handler( + servicer.CreateTask, + request_deserializer=task__pb2.CreateTaskRequest.FromString, + response_serializer=task__pb2.CreateTaskResponse.SerializeToString, + ), + "DescribeTask": grpc.unary_unary_rpc_method_handler( + servicer.DescribeTask, + request_deserializer=task__pb2.DescribeTaskRequest.FromString, + response_serializer=task__pb2.DescribeTaskResponse.SerializeToString, + ), + "ExecuteTask": grpc.unary_unary_rpc_method_handler( + servicer.ExecuteTask, + request_deserializer=task__pb2.ExecuteTaskRequest.FromString, + response_serializer=task__pb2.ExecuteTaskResponse.SerializeToString, + ), + "DropTask": grpc.unary_unary_rpc_method_handler( + servicer.DropTask, + request_deserializer=task__pb2.DropTaskRequest.FromString, + response_serializer=task__pb2.DropTaskResponse.SerializeToString, + ), + "AlterTask": grpc.unary_unary_rpc_method_handler( + servicer.AlterTask, + request_deserializer=task__pb2.AlterTaskRequest.FromString, + response_serializer=task__pb2.AlterTaskResponse.SerializeToString, + ), + "ShowTasks": grpc.unary_unary_rpc_method_handler( + servicer.ShowTasks, + request_deserializer=task__pb2.ShowTasksRequest.FromString, + response_serializer=task__pb2.ShowTasksResponse.SerializeToString, + ), + "ShowTaskRuns": grpc.unary_unary_rpc_method_handler( + servicer.ShowTaskRuns, + request_deserializer=task__pb2.ShowTaskRunsRequest.FromString, + response_serializer=task__pb2.ShowTaskRunsResponse.SerializeToString, + ), + "GetTaskDependents": grpc.unary_unary_rpc_method_handler( + servicer.GetTaskDependents, + request_deserializer=task__pb2.GetTaskDependentsRequest.FromString, + response_serializer=task__pb2.GetTaskDependentsResponse.SerializeToString, + ), + "EnableTaskDependents": grpc.unary_unary_rpc_method_handler( + servicer.EnableTaskDependents, + request_deserializer=task__pb2.EnableTaskDependentsRequest.FromString, + response_serializer=task__pb2.EnableTaskDependentsResponse.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( - 'taskproto.TaskService', rpc_method_handlers) + "taskproto.TaskService", rpc_method_handlers + ) server.add_generic_rpc_handlers((generic_handler,)) - # This class is part of an EXPERIMENTAL API. +# This class is part of an EXPERIMENTAL API. class TaskService(object): """Missing associated documentation comment in .proto file.""" @staticmethod - def CreateTask(request, + def CreateTask( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return grpc.experimental.unary_unary( + request, target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/taskproto.TaskService/CreateTask', + "/taskproto.TaskService/CreateTask", task__pb2.CreateTaskRequest.SerializeToString, task__pb2.CreateTaskResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) @staticmethod - def DescribeTask(request, + def DescribeTask( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return grpc.experimental.unary_unary( + request, target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/taskproto.TaskService/DescribeTask', + "/taskproto.TaskService/DescribeTask", task__pb2.DescribeTaskRequest.SerializeToString, task__pb2.DescribeTaskResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) @staticmethod - def ExecuteTask(request, + def ExecuteTask( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return grpc.experimental.unary_unary( + request, target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/taskproto.TaskService/ExecuteTask', + "/taskproto.TaskService/ExecuteTask", task__pb2.ExecuteTaskRequest.SerializeToString, task__pb2.ExecuteTaskResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) @staticmethod - def DropTask(request, + def DropTask( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return grpc.experimental.unary_unary( + request, target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/taskproto.TaskService/DropTask', + "/taskproto.TaskService/DropTask", task__pb2.DropTaskRequest.SerializeToString, task__pb2.DropTaskResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) @staticmethod - def AlterTask(request, + def AlterTask( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return grpc.experimental.unary_unary( + request, target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/taskproto.TaskService/AlterTask', + "/taskproto.TaskService/AlterTask", task__pb2.AlterTaskRequest.SerializeToString, task__pb2.AlterTaskResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) @staticmethod - def ShowTasks(request, + def ShowTasks( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return grpc.experimental.unary_unary( + request, target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/taskproto.TaskService/ShowTasks', + "/taskproto.TaskService/ShowTasks", task__pb2.ShowTasksRequest.SerializeToString, task__pb2.ShowTasksResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) @staticmethod - def ShowTaskRuns(request, + def ShowTaskRuns( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return grpc.experimental.unary_unary( + request, target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/taskproto.TaskService/ShowTaskRuns', + "/taskproto.TaskService/ShowTaskRuns", task__pb2.ShowTaskRunsRequest.SerializeToString, task__pb2.ShowTaskRunsResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) @staticmethod - def GetTaskDependents(request, + def GetTaskDependents( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return grpc.experimental.unary_unary( + request, target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/taskproto.TaskService/GetTaskDependents', + "/taskproto.TaskService/GetTaskDependents", task__pb2.GetTaskDependentsRequest.SerializeToString, task__pb2.GetTaskDependentsResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) @staticmethod - def EnableTaskDependents(request, + def EnableTaskDependents( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return grpc.experimental.unary_unary( + request, target, - options=(), - channel_credentials=None, - call_credentials=None, - insecure=False, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None): - return grpc.experimental.unary_unary(request, target, '/taskproto.TaskService/EnableTaskDependents', + "/taskproto.TaskService/EnableTaskDependents", task__pb2.EnableTaskDependentsRequest.SerializeToString, task__pb2.EnableTaskDependentsResponse.FromString, - options, channel_credentials, - insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) diff --git a/tests/sqllogictests/suites/base/03_common/03_0034_select_star_columns.test b/tests/sqllogictests/suites/base/03_common/03_0034_select_star_columns.test index 55146b7c7155..c9880e14f3c4 100644 --- a/tests/sqllogictests/suites/base/03_common/03_0034_select_star_columns.test +++ b/tests/sqllogictests/suites/base/03_common/03_0034_select_star_columns.test @@ -257,4 +257,4 @@ drop table default.t statement ok -drop table default.t1 \ No newline at end of file +drop table default.t1 diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0026_merge_into.test b/tests/sqllogictests/suites/base/09_fuse_engine/09_0026_merge_into.test index e2f26d9ebc90..b56efaf7794d 100644 --- a/tests/sqllogictests/suites/base/09_fuse_engine/09_0026_merge_into.test +++ b/tests/sqllogictests/suites/base/09_fuse_engine/09_0026_merge_into.test @@ -1,13 +1,7 @@ statement ok set enable_distributed_merge_into = 1; -statement ok -set enable_runtime_filter = 1; - include ./09_0036_merge_into_without_distributed_enable.test statement ok set enable_distributed_merge_into = 0; - -statement ok -set enable_runtime_filter = 0; diff --git a/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test b/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test index 36ded546c793..ed22625675bc 100644 --- a/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test +++ b/tests/sqllogictests/suites/mode/cluster/memo/aggregate_property.test @@ -7,6 +7,9 @@ create database aggregate_property statement ok use aggregate_property +statement ok +set max_threads = 1 + statement ok create table t_10(a int) as select * from numbers(10) @@ -23,22 +26,22 @@ where t_10.a = t_1000.a and t_100.a = t_1000.a ---- Memo ├── root group: #8 -├── estimated memory: 9912 bytes +├── estimated memory: 10560 bytes ├── Group #0 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 100.000, children: [] -│ │ ├── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 5200.000, children: [{ dist: Any }] -│ │ └── { dist: Serial }: expr: #2, cost: 35100.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] +│ │ ├── { dist: Hash(t_1000.a (#2)::Int32 NULL) }: expr: #1, cost: 52000.000, children: [{ dist: Any }] +│ │ └── { dist: Serial }: expr: #2, cost: 351000.000, children: [{ dist: Any }] │ ├── #0 Scan [] -│ ├── #1 Exchange: (Hash(t_100.a (#1)::Int32 NULL)) [#0] +│ ├── #1 Exchange: (Hash(t_1000.a (#2)::Int32 NULL)) [#0] │ └── #2 Exchange: (Merge) [#0] ├── Group #1 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] -│ │ ├── { dist: Hash(t_1000.a (#2)::Int32 NULL) }: expr: #1, cost: 52000.000, children: [{ dist: Any }] -│ │ └── { dist: Serial }: expr: #2, cost: 351000.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 100.000, children: [] +│ │ ├── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 5200.000, children: [{ dist: Any }] +│ │ └── { dist: Serial }: expr: #2, cost: 35100.000, children: [{ dist: Any }] │ ├── #0 Scan [] -│ ├── #1 Exchange: (Hash(t_1000.a (#2)::Int32 NULL)) [#1] +│ ├── #1 Exchange: (Hash(t_100.a (#1)::Int32 NULL)) [#1] │ └── #2 Exchange: (Merge) [#1] ├── Group #2 │ ├── Best properties @@ -52,14 +55,16 @@ Memo │ └── #3 Exchange: (Merge) [#2] ├── Group #3 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 3110.000, children: [{ dist: Any }, { dist: Broadcast }] -│ │ ├── { dist: Broadcast }: expr: #1, cost: 4110.000, children: [{ dist: Any }] -│ │ ├── { dist: Hash(t_1000.a (#2)::Int32 NULL) }: expr: #0, cost: 54110.000, children: [{ dist: Hash(t_1000.a (#2)::Int32 NULL) }, { dist: Hash(t_10.a (#0)::Int32 NULL) }] -│ │ └── { dist: Serial }: expr: #0, cost: 355610.000, children: [{ dist: Serial }, { dist: Serial }] +│ │ ├── { dist: Any }: expr: #0, cost: 1310.000, children: [{ dist: Any }, { dist: Broadcast }] +│ │ ├── { dist: Broadcast }: expr: #1, cost: 2310.000, children: [{ dist: Any }] +│ │ ├── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #3, cost: 1820.000, children: [{ dist: Any }] +│ │ ├── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #0, cost: 6410.000, children: [{ dist: Hash(t_100.a (#1)::Int32 NULL) }, { dist: Hash(t_10.a (#0)::Int32 NULL) }] +│ │ └── { dist: Serial }: expr: #0, cost: 38810.000, children: [{ dist: Serial }, { dist: Serial }] │ ├── #0 Join [#1, #2] │ ├── #1 Exchange: (Broadcast) [#3] │ ├── #2 Exchange: (Merge) [#3] -│ └── #3 Exchange: (Hash(t_1000.a (#2)::Int32 NULL)) [#3] +│ ├── #3 Exchange: (Hash(t_10.a (#0)::Int32 NULL)) [#3] +│ └── #4 Exchange: (Hash(t_100.a (#1)::Int32 NULL)) [#3] ├── Group #4 │ ├── Best properties │ │ └── { dist: Any }: expr: #0, cost: 4410.000, children: [{ dist: Any }, { dist: Broadcast }] @@ -92,22 +97,22 @@ group by t_10.a, t_100.a ---- Memo ├── root group: #8 -├── estimated memory: 23128 bytes +├── estimated memory: 24480 bytes ├── Group #0 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 100.000, children: [] -│ │ ├── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 5200.000, children: [{ dist: Any }] -│ │ └── { dist: Serial }: expr: #2, cost: 35100.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] +│ │ ├── { dist: Hash(t_1000.a (#2)::Int32 NULL) }: expr: #1, cost: 52000.000, children: [{ dist: Any }] +│ │ └── { dist: Serial }: expr: #2, cost: 351000.000, children: [{ dist: Any }] │ ├── #0 Scan [] -│ ├── #1 Exchange: (Hash(t_100.a (#1)::Int32 NULL)) [#0] +│ ├── #1 Exchange: (Hash(t_1000.a (#2)::Int32 NULL)) [#0] │ └── #2 Exchange: (Merge) [#0] ├── Group #1 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] -│ │ ├── { dist: Hash(t_1000.a (#2)::Int32 NULL) }: expr: #1, cost: 52000.000, children: [{ dist: Any }] -│ │ └── { dist: Serial }: expr: #2, cost: 351000.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 100.000, children: [] +│ │ ├── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 5200.000, children: [{ dist: Any }] +│ │ └── { dist: Serial }: expr: #2, cost: 35100.000, children: [{ dist: Any }] │ ├── #0 Scan [] -│ ├── #1 Exchange: (Hash(t_1000.a (#2)::Int32 NULL)) [#1] +│ ├── #1 Exchange: (Hash(t_100.a (#1)::Int32 NULL)) [#1] │ └── #2 Exchange: (Merge) [#1] ├── Group #2 │ ├── Best properties @@ -121,14 +126,16 @@ Memo │ └── #3 Exchange: (Merge) [#2] ├── Group #3 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 3110.000, children: [{ dist: Any }, { dist: Broadcast }] -│ │ ├── { dist: Broadcast }: expr: #1, cost: 4110.000, children: [{ dist: Any }] -│ │ ├── { dist: Hash(t_1000.a (#2)::Int32 NULL) }: expr: #0, cost: 53620.000, children: [{ dist: Hash(t_1000.a (#2)::Int32 NULL) }, { dist: Hash(t_10.a (#0)::Int32 NULL) }] -│ │ └── { dist: Serial }: expr: #0, cost: 355610.000, children: [{ dist: Serial }, { dist: Serial }] +│ │ ├── { dist: Any }: expr: #0, cost: 1310.000, children: [{ dist: Any }, { dist: Broadcast }] +│ │ ├── { dist: Broadcast }: expr: #1, cost: 2310.000, children: [{ dist: Any }] +│ │ ├── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #3, cost: 1820.000, children: [{ dist: Any }] +│ │ ├── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #0, cost: 5920.000, children: [{ dist: Hash(t_100.a (#1)::Int32 NULL) }, { dist: Hash(t_10.a (#0)::Int32 NULL) }] +│ │ └── { dist: Serial }: expr: #0, cost: 38810.000, children: [{ dist: Serial }, { dist: Serial }] │ ├── #0 Join [#1, #2] │ ├── #1 Exchange: (Broadcast) [#3] │ ├── #2 Exchange: (Merge) [#3] -│ └── #3 Exchange: (Hash(t_1000.a (#2)::Int32 NULL)) [#3] +│ ├── #3 Exchange: (Hash(t_10.a (#0)::Int32 NULL)) [#3] +│ └── #4 Exchange: (Hash(t_100.a (#1)::Int32 NULL)) [#3] ├── Group #4 │ ├── Best properties │ │ └── { dist: Any }: expr: #0, cost: 4410.000, children: [{ dist: Any }, { dist: Broadcast }] @@ -158,85 +165,87 @@ Memo │ └── #4 Exchange: (Merge) [#8] ├── Group #9 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 3120.000, children: [{ dist: Any }] -│ │ └── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 3630.000, children: [{ dist: Any }] -│ ├── #0 EvalScalar [#3] -│ └── #1 Exchange: (Hash(t_10.a (#0)::Int32 NULL)) [#9] +│ │ ├── { dist: Any }: expr: #0, cost: 2000.000, children: [{ dist: Any }] +│ │ └── { dist: Hash(t_1000.a (#2)::Int32 NULL) }: expr: #1, cost: 53000.000, children: [{ dist: Any }] +│ ├── #0 EvalScalar [#0] +│ └── #1 Exchange: (Hash(t_1000.a (#2)::Int32 NULL)) [#9] ├── Group #10 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 3680.000, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] +│ │ └── { dist: Any }: expr: #0, cost: 58000.000, children: [{ dist: Hash(t_1000.a (#2)::Int32 NULL) }] │ └── #0 Aggregate [#9] ├── Group #11 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 3730.000, children: [{ dist: Any }] -│ │ ├── { dist: Broadcast }: expr: #1, cost: 4730.000, children: [{ dist: Any }] -│ │ ├── { dist: Hash(t_1000.a (#2)::Int32 NULL) }: expr: #3, cost: 4240.000, children: [{ dist: Any }] -│ │ └── { dist: Serial }: expr: #2, cost: 7230.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 63000.000, children: [{ dist: Any }] +│ │ ├── { dist: Hash(t_1000.a (#2)::Int32 NULL) }: expr: #1, cost: 114000.000, children: [{ dist: Any }] +│ │ └── { dist: Serial }: expr: #2, cost: 413000.000, children: [{ dist: Any }] │ ├── #0 Aggregate [#10] -│ ├── #1 Exchange: (Broadcast) [#11] -│ ├── #2 Exchange: (Merge) [#11] -│ └── #3 Exchange: (Hash(t_1000.a (#2)::Int32 NULL)) [#11] +│ ├── #1 Exchange: (Hash(t_1000.a (#2)::Int32 NULL)) [#11] +│ └── #2 Exchange: (Merge) [#11] ├── Group #12 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 5030.000, children: [{ dist: Any }, { dist: Broadcast }] -│ │ └── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 5489.000, children: [{ dist: Any }] -│ ├── #0 Join [#0, #11] +│ │ ├── { dist: Any }: expr: #0, cost: 66410.000, children: [{ dist: Any }, { dist: Broadcast }] +│ │ └── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 66869.000, children: [{ dist: Any }] +│ ├── #0 Join [#11, #3] │ └── #1 Exchange: (Hash(t_10.a (#0)::Int32 NULL)) [#12] ├── Group #13 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 5534.000, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] +│ │ └── { dist: Any }: expr: #0, cost: 66914.000, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] │ └── #0 Aggregate [#12] ├── Group #14 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 5579.000, children: [{ dist: Any }] +│ │ └── { dist: Any }: expr: #0, cost: 66959.000, children: [{ dist: Any }] │ └── #0 Aggregate [#13] ├── Group #15 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 5700.000, children: [{ dist: Hash(t_100.a (#1)::Int32 NULL) }] -│ └── #0 Aggregate [#0] +│ │ └── { dist: Any }: expr: #0, cost: 1870.000, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] +│ └── #0 Aggregate [#3] ├── Group #16 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 6200.000, children: [{ dist: Any }] -│ │ ├── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 11300.000, children: [{ dist: Any }] -│ │ └── { dist: Serial }: expr: #2, cost: 41200.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 1920.000, children: [{ dist: Any }] +│ │ ├── { dist: Broadcast }: expr: #3, cost: 2920.000, children: [{ dist: Any }] +│ │ ├── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 2430.000, children: [{ dist: Any }] +│ │ ├── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #2, cost: 2430.000, children: [{ dist: Any }] +│ │ └── { dist: Serial }: expr: #4, cost: 5420.000, children: [{ dist: Any }] │ ├── #0 Aggregate [#15] -│ ├── #1 Exchange: (Hash(t_100.a (#1)::Int32 NULL)) [#16] -│ └── #2 Exchange: (Merge) [#16] +│ ├── #1 Exchange: (Hash(t_10.a (#0)::Int32 NULL)) [#16] +│ ├── #2 Exchange: (Hash(t_100.a (#1)::Int32 NULL)) [#16] +│ ├── #3 Exchange: (Broadcast) [#16] +│ └── #4 Exchange: (Merge) [#16] ├── Group #17 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 10510.000, children: [{ dist: Any }, { dist: Broadcast }] -│ └── #0 Join [#16, #3] +│ │ └── { dist: Any }: expr: #0, cost: 5020.000, children: [{ dist: Any }, { dist: Broadcast }] +│ └── #0 Join [#0, #16] ├── Group #18 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 10519.000, children: [{ dist: Any }] -│ │ └── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 10978.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 5029.000, children: [{ dist: Any }] +│ │ └── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 5488.000, children: [{ dist: Any }] │ ├── #0 EvalScalar [#17] │ └── #1 Exchange: (Hash(t_10.a (#0)::Int32 NULL)) [#18] ├── Group #19 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 11023.000, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] +│ │ └── { dist: Any }: expr: #0, cost: 5533.000, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] │ └── #0 Aggregate [#18] ├── Group #20 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 11068.000, children: [{ dist: Any }] +│ │ └── { dist: Any }: expr: #0, cost: 5578.000, children: [{ dist: Any }] │ └── #0 Aggregate [#19] ├── Group #21 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 11130.000, children: [{ dist: Any }, { dist: Broadcast }] -│ └── #0 Join [#16, #11] +│ │ └── { dist: Any }: expr: #0, cost: 67020.000, children: [{ dist: Any }, { dist: Broadcast }] +│ └── #0 Join [#11, #16] ├── Group #22 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 11139.000, children: [{ dist: Any }] -│ │ └── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 11598.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 67029.000, children: [{ dist: Any }] +│ │ └── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #1, cost: 67488.000, children: [{ dist: Any }] │ ├── #0 EvalScalar [#21] │ └── #1 Exchange: (Hash(t_10.a (#0)::Int32 NULL)) [#22] ├── Group #23 │ ├── Best properties -│ │ └── { dist: Any }: expr: #0, cost: 11643.000, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] +│ │ └── { dist: Any }: expr: #0, cost: 67533.000, children: [{ dist: Hash(t_10.a (#0)::Int32 NULL) }] │ └── #0 Aggregate [#22] └── Group #24 ├── Best properties - │ └── { dist: Any }: expr: #0, cost: 11688.000, children: [{ dist: Any }] + │ └── { dist: Any }: expr: #0, cost: 67578.000, children: [{ dist: Any }] └── #0 Aggregate [#23] diff --git a/tests/sqllogictests/suites/mode/cluster/memo/join_property.test b/tests/sqllogictests/suites/mode/cluster/memo/join_property.test index 8b2d4577d15e..eda22f0a7791 100644 --- a/tests/sqllogictests/suites/mode/cluster/memo/join_property.test +++ b/tests/sqllogictests/suites/mode/cluster/memo/join_property.test @@ -7,6 +7,9 @@ create database join_property statement ok use join_property +statement ok +set max_threads = 1 + statement ok create table t_10(a int) as select * from numbers(10) @@ -22,22 +25,22 @@ select * from t_10, t_100, t_1000 where t_10.a = t_1000.a and t_100.a = t_1000.a ---- Memo ├── root group: #5 -├── estimated memory: 8024 bytes +├── estimated memory: 8640 bytes ├── Group #0 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 100.000, children: [] -│ │ ├── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 5200.000, children: [{ dist: Any }] -│ │ └── { dist: Serial }: expr: #2, cost: 35100.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] +│ │ ├── { dist: Hash(t_1000.a (#2)::Int32 NULL) }: expr: #1, cost: 52000.000, children: [{ dist: Any }] +│ │ └── { dist: Serial }: expr: #2, cost: 351000.000, children: [{ dist: Any }] │ ├── #0 Scan [] -│ ├── #1 Exchange: (Hash(t_100.a (#1)::Int32 NULL)) [#0] +│ ├── #1 Exchange: (Hash(t_1000.a (#2)::Int32 NULL)) [#0] │ └── #2 Exchange: (Merge) [#0] ├── Group #1 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] -│ │ ├── { dist: Hash(t_1000.a (#2)::Int32 NULL) }: expr: #1, cost: 52000.000, children: [{ dist: Any }] -│ │ └── { dist: Serial }: expr: #2, cost: 351000.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 100.000, children: [] +│ │ ├── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 5200.000, children: [{ dist: Any }] +│ │ └── { dist: Serial }: expr: #2, cost: 35100.000, children: [{ dist: Any }] │ ├── #0 Scan [] -│ ├── #1 Exchange: (Hash(t_1000.a (#2)::Int32 NULL)) [#1] +│ ├── #1 Exchange: (Hash(t_100.a (#1)::Int32 NULL)) [#1] │ └── #2 Exchange: (Merge) [#1] ├── Group #2 │ ├── Best properties @@ -51,14 +54,16 @@ Memo │ └── #3 Exchange: (Merge) [#2] ├── Group #3 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 3110.000, children: [{ dist: Any }, { dist: Broadcast }] -│ │ ├── { dist: Broadcast }: expr: #1, cost: 4110.000, children: [{ dist: Any }] -│ │ ├── { dist: Hash(t_1000.a (#2)::Int32 NULL) }: expr: #0, cost: 54110.000, children: [{ dist: Hash(t_1000.a (#2)::Int32 NULL) }, { dist: Hash(t_10.a (#0)::Int32 NULL) }] -│ │ └── { dist: Serial }: expr: #0, cost: 355610.000, children: [{ dist: Serial }, { dist: Serial }] +│ │ ├── { dist: Any }: expr: #0, cost: 1310.000, children: [{ dist: Any }, { dist: Broadcast }] +│ │ ├── { dist: Broadcast }: expr: #1, cost: 2310.000, children: [{ dist: Any }] +│ │ ├── { dist: Hash(t_10.a (#0)::Int32 NULL) }: expr: #3, cost: 1820.000, children: [{ dist: Any }] +│ │ ├── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #0, cost: 6410.000, children: [{ dist: Hash(t_100.a (#1)::Int32 NULL) }, { dist: Hash(t_10.a (#0)::Int32 NULL) }] +│ │ └── { dist: Serial }: expr: #0, cost: 38810.000, children: [{ dist: Serial }, { dist: Serial }] │ ├── #0 Join [#1, #2] │ ├── #1 Exchange: (Broadcast) [#3] │ ├── #2 Exchange: (Merge) [#3] -│ └── #3 Exchange: (Hash(t_1000.a (#2)::Int32 NULL)) [#3] +│ ├── #3 Exchange: (Hash(t_10.a (#0)::Int32 NULL)) [#3] +│ └── #4 Exchange: (Hash(t_100.a (#1)::Int32 NULL)) [#3] ├── Group #4 │ ├── Best properties │ │ └── { dist: Any }: expr: #0, cost: 4410.000, children: [{ dist: Any }, { dist: Broadcast }] @@ -76,7 +81,7 @@ select * from t_1000 left join t_10 on t_1000.a = t_10.a left join t_100 on t_10 ---- Memo ├── root group: #5 -├── estimated memory: 8024 bytes +├── estimated memory: 8160 bytes ├── Group #0 │ ├── Best properties │ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] @@ -130,7 +135,7 @@ select * from t_1000 right join t_10 on t_1000.a = t_10.a right join t_100 on t_ ---- Memo ├── root group: #5 -├── estimated memory: 7080 bytes +├── estimated memory: 7200 bytes ├── Group #0 │ ├── Best properties │ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] @@ -180,7 +185,7 @@ select * from t_1000 full join t_10 on t_1000.a = t_10.a full join t_100 on t_10 ---- Memo ├── root group: #5 -├── estimated memory: 7080 bytes +├── estimated memory: 7200 bytes ├── Group #0 │ ├── Best properties │ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] @@ -230,7 +235,7 @@ select * from t_10, t_100, t_1000 ---- Memo ├── root group: #5 -├── estimated memory: 6136 bytes +├── estimated memory: 6240 bytes ├── Group #0 │ ├── Best properties │ │ ├── { dist: Any }: expr: #0, cost: 10.000, children: [] diff --git a/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test b/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test index 4bdc095fa684..14d8473afbb1 100644 --- a/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test +++ b/tests/sqllogictests/suites/mode/cluster/memo/mix_property.test @@ -7,6 +7,9 @@ create database mix_property statement ok use mix_property +statement ok +set max_threads = 1 + statement ok create table t_10(a int) as select * from numbers(10) @@ -26,22 +29,22 @@ limit 10 ---- Memo ├── root group: #10 -├── estimated memory: 10856 bytes +├── estimated memory: 11520 bytes ├── Group #0 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 100.000, children: [] -│ │ ├── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 5200.000, children: [{ dist: Any }] -│ │ └── { dist: Serial }: expr: #2, cost: 35100.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] +│ │ ├── { dist: Hash(t_1000.a (#0)::Int32 NULL) }: expr: #1, cost: 52000.000, children: [{ dist: Any }] +│ │ └── { dist: Serial }: expr: #2, cost: 351000.000, children: [{ dist: Any }] │ ├── #0 Scan [] -│ ├── #1 Exchange: (Hash(t_100.a (#1)::Int32 NULL)) [#0] +│ ├── #1 Exchange: (Hash(t_1000.a (#0)::Int32 NULL)) [#0] │ └── #2 Exchange: (Merge) [#0] ├── Group #1 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 1000.000, children: [] -│ │ ├── { dist: Hash(t_1000.a (#0)::Int32 NULL) }: expr: #1, cost: 52000.000, children: [{ dist: Any }] -│ │ └── { dist: Serial }: expr: #2, cost: 351000.000, children: [{ dist: Any }] +│ │ ├── { dist: Any }: expr: #0, cost: 100.000, children: [] +│ │ ├── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #1, cost: 5200.000, children: [{ dist: Any }] +│ │ └── { dist: Serial }: expr: #2, cost: 35100.000, children: [{ dist: Any }] │ ├── #0 Scan [] -│ ├── #1 Exchange: (Hash(t_1000.a (#0)::Int32 NULL)) [#1] +│ ├── #1 Exchange: (Hash(t_100.a (#1)::Int32 NULL)) [#1] │ └── #2 Exchange: (Merge) [#1] ├── Group #2 │ ├── Best properties @@ -55,14 +58,16 @@ Memo │ └── #3 Exchange: (Merge) [#2] ├── Group #3 │ ├── Best properties -│ │ ├── { dist: Any }: expr: #0, cost: 3110.000, children: [{ dist: Any }, { dist: Broadcast }] -│ │ ├── { dist: Broadcast }: expr: #1, cost: 4110.000, children: [{ dist: Any }] -│ │ ├── { dist: Hash(t_1000.a (#0)::Int32 NULL) }: expr: #0, cost: 54110.000, children: [{ dist: Hash(t_1000.a (#0)::Int32 NULL) }, { dist: Hash(t_10.a (#2)::Int32 NULL) }] -│ │ └── { dist: Serial }: expr: #0, cost: 355610.000, children: [{ dist: Serial }, { dist: Serial }] +│ │ ├── { dist: Any }: expr: #0, cost: 1310.000, children: [{ dist: Any }, { dist: Broadcast }] +│ │ ├── { dist: Broadcast }: expr: #1, cost: 2310.000, children: [{ dist: Any }] +│ │ ├── { dist: Hash(t_10.a (#2)::Int32 NULL) }: expr: #3, cost: 1820.000, children: [{ dist: Any }] +│ │ ├── { dist: Hash(t_100.a (#1)::Int32 NULL) }: expr: #0, cost: 6410.000, children: [{ dist: Hash(t_100.a (#1)::Int32 NULL) }, { dist: Hash(t_10.a (#2)::Int32 NULL) }] +│ │ └── { dist: Serial }: expr: #0, cost: 38810.000, children: [{ dist: Serial }, { dist: Serial }] │ ├── #0 Join [#1, #2] │ ├── #1 Exchange: (Broadcast) [#3] │ ├── #2 Exchange: (Merge) [#3] -│ └── #3 Exchange: (Hash(t_1000.a (#0)::Int32 NULL)) [#3] +│ ├── #3 Exchange: (Hash(t_10.a (#2)::Int32 NULL)) [#3] +│ └── #4 Exchange: (Hash(t_100.a (#1)::Int32 NULL)) [#3] ├── Group #4 │ ├── Best properties │ │ └── { dist: Any }: expr: #0, cost: 4410.000, children: [{ dist: Any }, { dist: Broadcast }] diff --git a/tests/sqllogictests/suites/mode/cluster/merge_into_non_equal_distributed.test b/tests/sqllogictests/suites/mode/cluster/merge_into_non_equal_distributed.test new file mode 100644 index 000000000000..2d8c9c1af6c2 --- /dev/null +++ b/tests/sqllogictests/suites/mode/cluster/merge_into_non_equal_distributed.test @@ -0,0 +1,103 @@ +statement ok +set enable_experimental_merge_into = 1; + +statement ok +set enable_distributed_merge_into = 1; + +statement ok +create table t1(a int); + +statement ok +create table t2(a int); + +statement ok +insert into t1 values(1),(2),(3),(4),(5); + +statement ok +insert into t1 values(6),(7),(8),(9),(10); + +statement ok +insert into t1 values(11),(12),(13),(14),(15); + +statement ok +insert into t2 values(8); + +query t +select * from t1 order by a; +---- +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 + +query T +select * from t2 order by a; +---- +8 + +## check there is no add row_number. +query T +explain merge into t1 using t2 on t1.a < t2.a when matched then update * when not matched then insert *; +---- +MergeInto: +target_table: default.default.t1 +├── distributed: false +├── target_build_optimization: false +├── matched update: [condition: None,update set a = a (#0)] +├── unmatched insert: [condition: None,insert into (a) values(CAST(a (#0) AS Int32 NULL))] +└── HashJoin: RIGHT OUTER + ├── equi conditions: [] + ├── non-equi conditions: [lt(t1.a (#1), t2.a (#0))] + ├── Exchange(Merge) + │ └── LogicalGet + │ ├── table: default.default.t1 + │ ├── filters: [] + │ ├── order by: [] + │ └── limit: NONE + └── Exchange(Merge) + └── LogicalGet + ├── table: default.default.t2 + ├── filters: [] + ├── order by: [] + └── limit: NONE + +query TT +merge into t1 using t2 on t1.a < t2.a when matched then update * when not matched then insert *; +---- +0 7 + +query T +select * from t1 order by a; +---- +8 +8 +8 +8 +8 +8 +8 +8 +9 +10 +11 +12 +13 +14 +15 + +statement ok +set enable_experimental_merge_into = 0; + +statement ok +set enable_distributed_merge_into = 0; \ No newline at end of file diff --git a/tests/sqllogictests/suites/mode/standalone/ee/explain_agg_index.test b/tests/sqllogictests/suites/mode/standalone/ee/explain_agg_index.test index 95986f444c75..20774352bea3 100644 --- a/tests/sqllogictests/suites/mode/standalone/ee/explain_agg_index.test +++ b/tests/sqllogictests/suites/mode/standalone/ee/explain_agg_index.test @@ -154,67 +154,61 @@ EvalScalar ├── partitions scanned: 0 ├── push downs: [filters: [t1.b (#1) > 5], limit: NONE] ├── aggregating index: [SELECT b, SUM(a) FROM test_index_db.t1 WHERE (b > 3) GROUP BY b] - ├── rewritten query: [selection: [index_col_0 (#0), index_col_1 (#1)], filter: index_col_0 (#0) > to_int32(5)] + ├── rewritten query: [selection: [index_col_0 (#0), index_col_1 (#1)], filter: index_col_0 (#0) > 5] └── estimated rows: 0.00 query T EXPLAIN SELECT t1.b, SUM(a) FROM t1 GROUP BY t1.b HAVING SUM(a)=(SELECT SUM(a) FROM t1 t WHERE t1.b=t.b and t.b > 3) ---- -Filter +HashJoin ├── output columns: [SUM(a) (#2), t1.b (#1)] -├── filters: [is_true(SUM(a) (#2) = scalar_subquery_5 (#5))] +├── join type: INNER +├── build keys: [scalar_subquery_5 (#5), b (#4)] +├── probe keys: [SUM(a) (#2), b (#1)] +├── filters: [] ├── estimated rows: 0.00 -└── HashJoin - ├── output columns: [SUM(a) (#2), t1.b (#1), SUM(a) (#5)] - ├── join type: LEFT SINGLE - ├── build keys: [b (#4)] - ├── probe keys: [CAST(b (#1) AS Int32 NULL)] - ├── filters: [] +├── AggregateFinal(Build) +│ ├── output columns: [SUM(a) (#5), t.b (#4)] +│ ├── group by: [b] +│ ├── aggregate functions: [sum(a)] +│ ├── estimated rows: 0.00 +│ └── AggregatePartial +│ ├── group by: [b] +│ ├── aggregate functions: [sum(a)] +│ ├── estimated rows: 0.00 +│ └── Filter +│ ├── output columns: [t.a (#3), t.b (#4)] +│ ├── filters: [b (#4) > 3] +│ ├── estimated rows: 0.00 +│ └── TableScan +│ ├── table: default.test_index_db.t1 +│ ├── output columns: [a (#3), b (#4)] +│ ├── read rows: 0 +│ ├── read bytes: 0 +│ ├── partitions total: 0 +│ ├── partitions scanned: 0 +│ ├── push downs: [filters: [t1.b (#4) > 3], limit: NONE] +│ ├── aggregating index: [SELECT b, SUM(a) FROM test_index_db.t1 WHERE b > 3 GROUP BY b] +│ ├── rewritten query: [selection: [index_col_0 (#0), index_col_1 (#1)]] +│ └── estimated rows: 0.00 +└── AggregateFinal(Probe) + ├── output columns: [SUM(a) (#2), t1.b (#1)] + ├── group by: [b] + ├── aggregate functions: [sum(a)] ├── estimated rows: 0.00 - ├── AggregateFinal(Build) - │ ├── output columns: [SUM(a) (#5), t.b (#4)] - │ ├── group by: [b] - │ ├── aggregate functions: [sum(a)] - │ ├── estimated rows: 0.00 - │ └── AggregatePartial - │ ├── output columns: [SUM(a) (#5), #_group_by_key] - │ ├── group by: [b] - │ ├── aggregate functions: [sum(a)] - │ ├── estimated rows: 0.00 - │ └── Filter - │ ├── output columns: [t.a (#3), t.b (#4)] - │ ├── filters: [t.b (#4) > 3] - │ ├── estimated rows: 0.00 - │ └── TableScan - │ ├── table: default.test_index_db.t1 - │ ├── output columns: [a (#3), b (#4)] - │ ├── read rows: 0 - │ ├── read bytes: 0 - │ ├── partitions total: 0 - │ ├── partitions scanned: 0 - │ ├── push downs: [filters: [t1.b (#4) > 3], limit: NONE] - │ ├── aggregating index: [SELECT b, SUM(a) FROM test_index_db.t1 WHERE (b > 3) GROUP BY b] - │ ├── rewritten query: [selection: [index_col_0 (#0), index_col_1 (#1)]] - │ └── estimated rows: 0.00 - └── AggregateFinal(Probe) - ├── output columns: [SUM(a) (#2), t1.b (#1)] + └── AggregatePartial ├── group by: [b] ├── aggregate functions: [sum(a)] ├── estimated rows: 0.00 - └── AggregatePartial - ├── output columns: [SUM(a) (#2), #_group_by_key] - ├── group by: [b] - ├── aggregate functions: [sum(a)] - ├── estimated rows: 0.00 - └── TableScan - ├── table: default.test_index_db.t1 - ├── output columns: [a (#0), b (#1)] - ├── read rows: 0 - ├── read bytes: 0 - ├── partitions total: 0 - ├── partitions scanned: 0 - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 0.00 + └── TableScan + ├── table: default.test_index_db.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 0 + ├── read bytes: 0 + ├── partitions total: 0 + ├── partitions scanned: 0 + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 0.00 # Disable aggregating index scan statement ok @@ -509,7 +503,7 @@ Sort ├── estimated rows: 0.00 └── Filter ├── output columns: [onebrc.station_name (#0), onebrc.measurement (#1)] - ├── filters: [is_true(onebrc.station_name (#0) = 'Beijing'), is_true(onebrc.measurement (#1) > 0), is_true(onebrc.measurement (#1) = 1 OR onebrc.measurement (#1) = 2)] + ├── filters: [is_true(onebrc.measurement (#1) > 0), is_true(onebrc.station_name (#0) = 'Beijing'), is_true(onebrc.measurement (#1) = 1 OR onebrc.measurement (#1) = 2)] ├── estimated rows: 0.00 └── TableScan ├── table: default.test_index_db.onebrc @@ -518,7 +512,7 @@ Sort ├── read bytes: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(and_filters(onebrc.station_name (#0) = 'Beijing', onebrc.measurement (#1) > 0), onebrc.measurement (#1) = 1 OR onebrc.measurement (#1) = 2)], limit: NONE] + ├── push downs: [filters: [and_filters(and_filters(onebrc.measurement (#1) > 0, onebrc.station_name (#0) = 'Beijing'), onebrc.measurement (#1) = 1 OR onebrc.measurement (#1) = 2)], limit: NONE] ├── aggregating index: [SELECT station_name, measurement, COUNT(), COUNT(measurement), MAX(measurement), MIN(measurement), SUM(measurement) FROM test_index_db.onebrc GROUP BY station_name, measurement] ├── rewritten query: [selection: [index_col_0 (#0), index_col_1 (#1), index_col_5 (#5), index_col_6 (#6), index_col_3 (#3), index_col_4 (#4)], filter: is_true(CAST(onebrc.station_name (#0) AS Boolean NULL) AND CAST('Beijing' AS Boolean NULL) AND (onebrc.measurement (#1) = CAST(1 AS Float64 NULL) OR onebrc.measurement (#1) = CAST(2 AS Float64 NULL)) AND index_col_1 (#1) > CAST(0 AS Float64 NULL))] └── estimated rows: 0.00 diff --git a/tests/sqllogictests/suites/mode/standalone/explain/09_0039_target_build_merge_into_standalone.test b/tests/sqllogictests/suites/mode/standalone/explain/09_0039_target_build_merge_into_standalone.test index e9cdadf2b9a9..b877d7537827 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/09_0039_target_build_merge_into_standalone.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/09_0039_target_build_merge_into_standalone.test @@ -1,6 +1,12 @@ statement ok set enable_experimental_merge_into = 1; +statement ok +drop table if exists target_build_optimization; + +statement ok +drop table if exists source_optimization; + ## Target Build Optimization Test statement ok create table target_build_optimization(a int,b string,c string); @@ -150,7 +156,7 @@ select * from target_build_optimization order by a,b,c; 11 b11 c_11 12 b12 c_12 -### test with conjunct +### test with conjunct #### we need to make sure the blocks count and layout, so we should truncate and insert again. statement ok truncate table target_build_optimization; @@ -303,4 +309,4 @@ select * from target_build_optimization order by a,b,c; 12 b12 c_12 statement ok -set enable_experimental_merge_into = 0; \ No newline at end of file +set enable_experimental_merge_into = 0; diff --git a/tests/sqllogictests/suites/mode/standalone/explain/bloom_filter.test b/tests/sqllogictests/suites/mode/standalone/explain/bloom_filter.test index fdc6c353dec6..26ec22165615 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/bloom_filter.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/bloom_filter.test @@ -1,4 +1,7 @@ # This case depends on explain(standalone mode), thus we put it here +statement ok +drop table if exists bloom_test_t; + statement ok create table bloom_test_t(c1 int, c2 int) diff --git a/tests/sqllogictests/suites/mode/standalone/explain/eliminate_outer_join.test b/tests/sqllogictests/suites/mode/standalone/explain/eliminate_outer_join.test index d98e7ac8db2d..d2accfbc4f93 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/eliminate_outer_join.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/eliminate_outer_join.test @@ -185,71 +185,79 @@ query T explain select * from t full join t t1 on t.a = t1.a where t.a is not null ---- HashJoin -├── output columns: [t1.a (#1), t.a (#0)] -├── join type: RIGHT OUTER -├── build keys: [t.a (#0)] -├── probe keys: [t1.a (#1)] +├── output columns: [t.a (#0), t1.a (#1)] +├── join type: LEFT OUTER +├── build keys: [t1.a (#1)] +├── probe keys: [t.a (#0)] ├── filters: [] ├── estimated rows: 2.00 ├── Filter(Build) -│ ├── output columns: [t.a (#0)] -│ ├── filters: [is_not_null(t.a (#0))] +│ ├── output columns: [t1.a (#1)] +│ ├── filters: [is_not_null(t1.a (#1))] │ ├── estimated rows: 2.00 │ └── TableScan │ ├── table: default.eliminate_outer_join.t -│ ├── output columns: [a (#0)] +│ ├── output columns: [a (#1)] │ ├── read rows: 10 │ ├── read bytes: 74 │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] -│ ├── push downs: [filters: [is_not_null(t.a (#0))], limit: NONE] +│ ├── push downs: [filters: [is_not_null(t.a (#1))], limit: NONE] │ └── estimated rows: 10.00 -└── TableScan(Probe) - ├── table: default.eliminate_outer_join.t - ├── output columns: [a (#1)] - ├── read rows: 10 - ├── read bytes: 74 - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 10.00 +└── Filter(Probe) + ├── output columns: [t.a (#0)] + ├── filters: [is_not_null(t.a (#0))] + ├── estimated rows: 2.00 + └── TableScan + ├── table: default.eliminate_outer_join.t + ├── output columns: [a (#0)] + ├── read rows: 10 + ├── read bytes: 74 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_not_null(t.a (#0))], limit: NONE] + └── estimated rows: 10.00 query T explain select * from t full join t t1 on t.a = t1.a where t1.a is not null ---- HashJoin -├── output columns: [t.a (#0), t1.a (#1)] -├── join type: RIGHT OUTER -├── build keys: [t1.a (#1)] -├── probe keys: [t.a (#0)] +├── output columns: [t1.a (#1), t.a (#0)] +├── join type: LEFT OUTER +├── build keys: [t.a (#0)] +├── probe keys: [t1.a (#1)] ├── filters: [] ├── estimated rows: 2.00 ├── Filter(Build) -│ ├── output columns: [t1.a (#1)] -│ ├── filters: [is_not_null(t1.a (#1))] +│ ├── output columns: [t.a (#0)] +│ ├── filters: [is_not_null(t.a (#0))] │ ├── estimated rows: 2.00 │ └── TableScan │ ├── table: default.eliminate_outer_join.t -│ ├── output columns: [a (#1)] +│ ├── output columns: [a (#0)] │ ├── read rows: 10 │ ├── read bytes: 74 │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] -│ ├── push downs: [filters: [is_not_null(t.a (#1))], limit: NONE] +│ ├── push downs: [filters: [is_not_null(t.a (#0))], limit: NONE] │ └── estimated rows: 10.00 -└── TableScan(Probe) - ├── table: default.eliminate_outer_join.t - ├── output columns: [a (#0)] - ├── read rows: 10 - ├── read bytes: 74 - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 10.00 +└── Filter(Probe) + ├── output columns: [t1.a (#1)] + ├── filters: [is_not_null(t1.a (#1))] + ├── estimated rows: 2.00 + └── TableScan + ├── table: default.eliminate_outer_join.t + ├── output columns: [a (#1)] + ├── read rows: 10 + ├── read bytes: 74 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_not_null(t.a (#1))], limit: NONE] + └── estimated rows: 10.00 query T explain select * from t left join t t1 on t.a = t1.a where t1.a is not null and t.a is not null @@ -606,37 +614,33 @@ Filter query T explain select * from t left join t t1 on t.a = t1.a where t1.a <= 1 or (t.a > 1 and t.a < 2) ---- -Filter +HashJoin ├── output columns: [t.a (#0), t1.a (#1)] -├── filters: [is_true(t1.a (#1) <= 1 OR t.a (#0) > 1 AND t.a (#0) < 2)] -├── estimated rows: 4.05 -└── HashJoin - ├── output columns: [t.a (#0), t1.a (#1)] - ├── join type: LEFT OUTER - ├── build keys: [t1.a (#1)] - ├── probe keys: [t.a (#0)] - ├── filters: [] - ├── estimated rows: 10.00 - ├── TableScan(Build) - │ ├── table: default.eliminate_outer_join.t - │ ├── output columns: [a (#1)] - │ ├── read rows: 10 - │ ├── read bytes: 74 - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── pruning stats: [segments: , blocks: ] - │ ├── push downs: [filters: [], limit: NONE] - │ └── estimated rows: 10.00 - └── TableScan(Probe) - ├── table: default.eliminate_outer_join.t - ├── output columns: [a (#0)] - ├── read rows: 10 - ├── read bytes: 74 - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 10.00 +├── join type: INNER +├── build keys: [t1.a (#1)] +├── probe keys: [t.a (#0)] +├── filters: [t1.a (#1) <= 1 OR t.a (#0) > 1 AND t.a (#0) < 2] +├── estimated rows: 10.00 +├── TableScan(Build) +│ ├── table: default.eliminate_outer_join.t +│ ├── output columns: [a (#1)] +│ ├── read rows: 10 +│ ├── read bytes: 74 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 10.00 +└── TableScan(Probe) + ├── table: default.eliminate_outer_join.t + ├── output columns: [a (#0)] + ├── read rows: 10 + ├── read bytes: 74 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 10.00 statement ok create table time_table(a timestamp) diff --git a/tests/sqllogictests/suites/mode/standalone/explain/explain.test b/tests/sqllogictests/suites/mode/standalone/explain/explain.test index f2f0f7b11ccf..8bbd4b423df2 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/explain.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/explain.test @@ -703,54 +703,50 @@ create table t3 as select number as a, number as b from numbers(10) query T explain select * from t1,t2, t3 where (t1.a > 1 and t2.a > 2) or (t1.b < 3 and t2.b < 4) or t3.a = 2 ---- -Filter +HashJoin ├── output columns: [t3.a (#4), t3.b (#5), t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] +├── join type: INNER +├── build keys: [] +├── probe keys: [] ├── filters: [t1.a (#0) > 1 AND t2.a (#2) > 2 OR t1.b (#1) < 3 AND t2.b (#3) < 4 OR t3.a (#4) = 2] -├── estimated rows: 21.20 -└── HashJoin - ├── output columns: [t3.a (#4), t3.b (#5), t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] - ├── join type: CROSS - ├── build keys: [] - ├── probe keys: [] - ├── filters: [] - ├── estimated rows: 50.00 - ├── HashJoin(Build) - │ ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] - │ ├── join type: CROSS - │ ├── build keys: [] - │ ├── probe keys: [] - │ ├── filters: [] - │ ├── estimated rows: 5.00 - │ ├── TableScan(Build) - │ │ ├── table: default.default.t1 - │ │ ├── output columns: [a (#0), b (#1)] - │ │ ├── read rows: 1 - │ │ ├── read bytes: 78 - │ │ ├── partitions total: 1 - │ │ ├── partitions scanned: 1 - │ │ ├── pruning stats: [segments: , blocks: ] - │ │ ├── push downs: [filters: [], limit: NONE] - │ │ └── estimated rows: 1.00 - │ └── TableScan(Probe) - │ ├── table: default.default.t2 - │ ├── output columns: [a (#2), b (#3)] - │ ├── read rows: 5 - │ ├── read bytes: 108 - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── pruning stats: [segments: , blocks: ] - │ ├── push downs: [filters: [], limit: NONE] - │ └── estimated rows: 5.00 - └── TableScan(Probe) - ├── table: default.default.t3 - ├── output columns: [a (#4), b (#5)] - ├── read rows: 10 - ├── read bytes: 130 - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 10.00 +├── estimated rows: 50.00 +├── HashJoin(Build) +│ ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] +│ ├── join type: CROSS +│ ├── build keys: [] +│ ├── probe keys: [] +│ ├── filters: [] +│ ├── estimated rows: 5.00 +│ ├── TableScan(Build) +│ │ ├── table: default.default.t1 +│ │ ├── output columns: [a (#0), b (#1)] +│ │ ├── read rows: 1 +│ │ ├── read bytes: 78 +│ │ ├── partitions total: 1 +│ │ ├── partitions scanned: 1 +│ │ ├── pruning stats: [segments: , blocks: ] +│ │ ├── push downs: [filters: [], limit: NONE] +│ │ └── estimated rows: 1.00 +│ └── TableScan(Probe) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 5 +│ ├── read bytes: 108 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 5.00 +└── TableScan(Probe) + ├── table: default.default.t3 + ├── output columns: [a (#4), b (#5)] + ├── read rows: 10 + ├── read bytes: 130 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 10.00 query T explain select * from t1,t2, t3 where ((t1.a > 1 and t2.a > 2) or (t1.b < 3 and t2.b < 4)) and t3.a > 1 @@ -1132,7 +1128,7 @@ Limit ├── estimated rows: 0.08 ├── Filter(Build) │ ├── output columns: [a.id (#0), a.c1 (#1)] - │ ├── filters: [is_true(CAST(a.c1 (#1) AS Int64 NULL) >= 1683648000), is_true(CAST(a.c1 (#1) AS Int64 NULL) <= 1683734400)] + │ ├── filters: [is_true(a.c1 (#1) >= 1683648000), is_true(a.c1 (#1) <= 1683734400)] │ ├── estimated rows: 0.20 │ └── TableScan │ ├── table: default.default.a @@ -1142,11 +1138,11 @@ Limit │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] - │ ├── push downs: [filters: [and_filters(CAST(a.c1 (#1) AS Int64 NULL) >= 1683648000, CAST(a.c1 (#1) AS Int64 NULL) <= 1683734400)], limit: NONE] + │ ├── push downs: [filters: [and_filters(a.c1 (#1) >= 1683648000, a.c1 (#1) <= 1683734400)], limit: NONE] │ └── estimated rows: 1.00 └── Filter(Probe) ├── output columns: [b.id (#2), b.c1 (#3)] - ├── filters: [is_true(CAST(b.c1 (#3) AS Int64 NULL) >= 1683648000), is_true(CAST(b.c1 (#3) AS Int64 NULL) <= 1683734400)] + ├── filters: [is_true(b.c1 (#3) >= 1683648000), is_true(b.c1 (#3) <= 1683734400)] ├── estimated rows: 0.40 └── TableScan ├── table: default.default.b @@ -1156,7 +1152,7 @@ Limit ├── partitions total: 2 ├── partitions scanned: 2 ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [and_filters(CAST(b.c1 (#3) AS Int64 NULL) >= 1683648000, CAST(b.c1 (#3) AS Int64 NULL) <= 1683734400)], limit: NONE] + ├── push downs: [filters: [and_filters(b.c1 (#3) >= 1683648000, b.c1 (#3) <= 1683734400)], limit: NONE] └── estimated rows: 2.00 statement ok @@ -1173,49 +1169,49 @@ from numbers(10) where number > 5 ) b on a.number=b.number order by a.number) where pt = register_at; ---- Sort -├── output columns: [register_at (#3), numbers.number (#0), pt (#1)] +├── output columns: [numbers.number (#0), pt (#1), register_at (#3)] ├── sort keys: [number ASC NULLS LAST] ├── estimated rows: 0.00 └── HashJoin - ├── output columns: [register_at (#3), numbers.number (#0), pt (#1)] + ├── output columns: [numbers.number (#0), pt (#1), register_at (#3)] ├── join type: INNER - ├── build keys: [a.number (#0), a.pt (#1)] - ├── probe keys: [b.number (#2), b.register_at (#3)] + ├── build keys: [b.register_at (#3), b.number (#2)] + ├── probe keys: [a.pt (#1), a.number (#0)] ├── filters: [] ├── estimated rows: 0.00 ├── EvalScalar(Build) - │ ├── output columns: [numbers.number (#0), pt (#1)] - │ ├── expressions: [to_yyyymmdd(to_timestamp(to_int64(numbers.number (#0))))] + │ ├── output columns: [numbers.number (#2), register_at (#3)] + │ ├── expressions: [to_yyyymmdd(to_timestamp(to_int64(numbers.number (#2))))] │ ├── estimated rows: 0.00 │ └── Filter - │ ├── output columns: [numbers.number (#0)] - │ ├── filters: [numbers.number (#0) > 5] + │ ├── output columns: [numbers.number (#2)] + │ ├── filters: [numbers.number (#2) > 5] │ ├── estimated rows: 0.00 │ └── TableScan │ ├── table: default.system.numbers - │ ├── output columns: [number (#0)] + │ ├── output columns: [number (#2)] │ ├── read rows: 10 │ ├── read bytes: 80 │ ├── partitions total: 1 │ ├── partitions scanned: 1 - │ ├── push downs: [filters: [numbers.number (#0) > 5], limit: NONE] + │ ├── push downs: [filters: [numbers.number (#2) > 5], limit: NONE] │ └── estimated rows: 10.00 └── EvalScalar(Probe) - ├── output columns: [numbers.number (#2), register_at (#3)] - ├── expressions: [to_yyyymmdd(to_timestamp(to_int64(numbers.number (#2))))] + ├── output columns: [numbers.number (#0), pt (#1)] + ├── expressions: [to_yyyymmdd(to_timestamp(to_int64(numbers.number (#0))))] ├── estimated rows: 0.00 └── Filter - ├── output columns: [numbers.number (#2)] - ├── filters: [numbers.number (#2) > 5] + ├── output columns: [numbers.number (#0)] + ├── filters: [numbers.number (#0) > 5] ├── estimated rows: 0.00 └── TableScan ├── table: default.system.numbers - ├── output columns: [number (#2)] + ├── output columns: [number (#0)] ├── read rows: 10 ├── read bytes: 80 ├── partitions total: 1 ├── partitions scanned: 1 - ├── push downs: [filters: [numbers.number (#2) > 5], limit: NONE] + ├── push downs: [filters: [numbers.number (#0) > 5], limit: NONE] └── estimated rows: 10.00 @@ -1244,17 +1240,17 @@ explain select * from a where a.id = (select id from b where a.id = b.id); Filter ├── output columns: [a.id (#0), a.c1 (#1)] ├── filters: [is_true(a.id (#0) = scalar_subquery_2 (#2))] -├── estimated rows: 0.60 +├── estimated rows: 0.08 └── HashJoin ├── output columns: [a.id (#0), a.c1 (#1), b.id (#2)] - ├── join type: LEFT SINGLE + ├── join type: INNER ├── build keys: [id (#2)] ├── probe keys: [id (#0)] ├── filters: [] - ├── estimated rows: 3.00 + ├── estimated rows: 0.40 ├── Filter(Build) │ ├── output columns: [b.id (#2)] - │ ├── filters: [is_true(id (#2) = b.id (#2))] + │ ├── filters: [is_true(b.id (#2) = b.id (#2))] │ ├── estimated rows: 0.40 │ └── TableScan │ ├── table: default.default.b @@ -1332,34 +1328,34 @@ Filter ├── estimated rows: 0.00 └── HashJoin ├── output columns: [t1.a (#0), t1.b (#1), marker (#3)] - ├── join type: RIGHT MARK - ├── build keys: [CAST(subquery_2 (#2) AS Int32 NULL)] - ├── probe keys: [t1.a (#0)] + ├── join type: LEFT MARK + ├── build keys: [t1.a (#0)] + ├── probe keys: [CAST(subquery_2 (#2) AS Int32 NULL)] ├── filters: [] ├── estimated rows: 3.00 - ├── AggregateFinal(Build) - │ ├── output columns: [col0 (#2)] - │ ├── group by: [col0] - │ ├── aggregate functions: [] - │ ├── estimated rows: 1301.00 - │ └── AggregatePartial - │ ├── output columns: [#_group_by_key] - │ ├── group by: [col0] - │ ├── aggregate functions: [] - │ ├── estimated rows: 1301.00 - │ └── ConstantTableScan - │ ├── output columns: [col0 (#2)] - │ └── column 0: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103, 1104, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 1117, 1118, 1119, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1155, 1156, 1157, 1158, 1159, 1160, 1161, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1231, 1232, 1233, 1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300] - └── TableScan(Probe) - ├── table: default.default.t1 - ├── output columns: [a (#0), b (#1)] - ├── read rows: 3 - ├── read bytes: 90 - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 3.00 + ├── TableScan(Build) + │ ├── table: default.default.t1 + │ ├── output columns: [a (#0), b (#1)] + │ ├── read rows: 3 + │ ├── read bytes: 90 + │ ├── partitions total: 1 + │ ├── partitions scanned: 1 + │ ├── pruning stats: [segments: , blocks: ] + │ ├── push downs: [filters: [], limit: NONE] + │ └── estimated rows: 3.00 + └── AggregateFinal(Probe) + ├── output columns: [col0 (#2)] + ├── group by: [col0] + ├── aggregate functions: [] + ├── estimated rows: 1301.00 + └── AggregatePartial + ├── output columns: [#_group_by_key] + ├── group by: [col0] + ├── aggregate functions: [] + ├── estimated rows: 1301.00 + └── ConstantTableScan + ├── output columns: [col0 (#2)] + └── column 0: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103, 1104, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 1117, 1118, 1119, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1155, 1156, 1157, 1158, 1159, 1160, 1161, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1231, 1232, 1233, 1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300] statement ok drop table t1; @@ -1489,23 +1485,23 @@ FROM numbers(500); query T explain join SELECT customer_name, segment, (SELECT SUM(net_paid) FROM sales WHERE customer_id IN (SELECT customer_id FROM customers WHERE segment = c.segment AND active = true)) FROM customers c WHERE c.customer_id IN (SELECT customer_id FROM sales WHERE net_paid > 100) LIMIT 10; ---- -HashJoin: LEFT SINGLE +HashJoin: LEFT OUTER ├── Build │ └── HashJoin: RIGHT SEMI │ ├── Build │ │ └── HashJoin: CROSS │ │ ├── Build -│ │ │ └── Scan: default.default.customers (read rows: 100) +│ │ │ └── Scan: default.default.customers (#0) (read rows: 100) │ │ └── Probe -│ │ └── Scan: default.default.sales (read rows: 500) +│ │ └── Scan: default.default.sales (#1) (read rows: 500) │ └── Probe -│ └── Scan: default.default.customers (read rows: 100) +│ └── Scan: default.default.customers (#2) (read rows: 100) └── Probe └── HashJoin: LEFT SEMI ├── Build - │ └── Scan: default.default.sales (read rows: 0) + │ └── Scan: default.default.sales (#3) (read rows: 0) └── Probe - └── Scan: default.default.customers (read rows: 100) + └── Scan: default.default.customers (#0) (read rows: 100) query T @@ -1513,21 +1509,21 @@ explain join SELECT c.customer_name FROM customers c WHERE NOT EXISTS ( SELECT c ---- HashJoin: RIGHT MARK ├── Build -│ └── HashJoin: RIGHT MARK +│ └── HashJoin: LEFT MARK │ ├── Build -│ │ └── HashJoin: INNER +│ │ └── HashJoin: CROSS │ │ ├── Build -│ │ │ └── Scan: default.default.products (read rows: 10) +│ │ │ └── Scan: default.default.customers (#0) (read rows: 100) │ │ └── Probe -│ │ └── Scan: default.default.sales (read rows: 500) +│ │ └── Scan: default.default.products (#1) (read rows: 10) │ └── Probe -│ └── HashJoin: CROSS +│ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.default.customers (read rows: 100) +│ │ └── Scan: default.default.products (#3) (read rows: 10) │ └── Probe -│ └── Scan: default.default.products (read rows: 10) +│ └── Scan: default.default.sales (#2) (read rows: 500) └── Probe - └── Scan: default.default.customers (read rows: 100) + └── Scan: default.default.customers (#0) (read rows: 100) statement ok drop table customers; @@ -1608,7 +1604,7 @@ Filter ├── estimated rows: 0.00 └── HashJoin ├── output columns: [t2.a (#0), t2.b (#1), t2.c (#2), t1.c (#5)] - ├── join type: LEFT SINGLE + ├── join type: INNER ├── build keys: [a (#3)] ├── probe keys: [a (#0)] ├── filters: [] @@ -1633,7 +1629,7 @@ Filter │ ├── estimated rows: 0.00 │ └── Filter │ ├── output columns: [t1.a (#3), t1.c (#5)] - │ ├── filters: [is_true(t1.a (#3) = a (#3))] + │ ├── filters: [is_true(t1.a (#3) = t1.a (#3))] │ ├── estimated rows: 0.00 │ └── TableScan │ ├── table: default.default.t1 @@ -1663,7 +1659,7 @@ Filter ├── estimated rows: 0.00 └── HashJoin ├── output columns: [t2.a (#0), t2.b (#1), t2.c (#2), t1.c (#5)] - ├── join type: LEFT SINGLE + ├── join type: INNER ├── build keys: [a (#3)] ├── probe keys: [a (#0)] ├── filters: [] @@ -1688,7 +1684,7 @@ Filter │ ├── estimated rows: 0.00 │ └── Filter │ ├── output columns: [t1.a (#3), t1.c (#5)] - │ ├── filters: [is_true(t1.a (#3) = a (#3))] + │ ├── filters: [is_true(a (#3) = a (#3))] │ ├── estimated rows: 0.00 │ └── TableScan │ ├── table: default.default.t1 diff --git a/tests/sqllogictests/suites/mode/standalone/explain/filter.test b/tests/sqllogictests/suites/mode/standalone/explain/filter.test index f4222c31218c..1a1009ba94a7 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/filter.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/filter.test @@ -79,7 +79,7 @@ HashJoin │ ├── estimated rows: 0.00 │ └── Filter │ ├── output columns: [t.a (#0)] -│ ├── filters: [false, is_true(t.a (#0) > 1)] +│ ├── filters: [is_true(t.a (#0) > 1), false] │ ├── estimated rows: 0.00 │ └── TableScan │ ├── table: default.default.t diff --git a/tests/sqllogictests/suites/mode/standalone/explain/infer_filter.test b/tests/sqllogictests/suites/mode/standalone/explain/infer_filter.test index 3e3a1cf71bf2..4ee8635e7c10 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/infer_filter.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/infer_filter.test @@ -581,7 +581,7 @@ explain select * from t1 where 10000 > a and a > 10 and 100 >= a; ---- Filter ├── output columns: [t1.a (#0), t1.b (#1)] -├── filters: [t1.a (#0) > 10, t1.a (#0) <= 100] +├── filters: [t1.a (#0) <= 100, t1.a (#0) > 10] ├── estimated rows: 0.00 └── TableScan ├── table: default.default.t1 @@ -590,7 +590,7 @@ Filter ├── read bytes: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(t1.a (#0) > 10, t1.a (#0) <= 100)], limit: NONE] + ├── push downs: [filters: [and_filters(t1.a (#0) <= 100, t1.a (#0) > 10)], limit: NONE] └── estimated rows: 0.00 # t1.a > 10 and t2.a > 10 @@ -636,58 +636,58 @@ query T explain select * from t1, t2, t3 where t1.a = t2.a and t1.a = t3.a and t1.a > 5 and t3.a < 10; ---- HashJoin -├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2), t3.b (#5), t3.a (#4)] +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3), t3.b (#5), t3.a (#4)] ├── join type: INNER ├── build keys: [t3.a (#4)] ├── probe keys: [t1.a (#0)] ├── filters: [] ├── estimated rows: 0.00 -├── Filter(Build) -│ ├── output columns: [t3.a (#4), t3.b (#5)] -│ ├── filters: [t3.a (#4) > 5, t3.a (#4) < 10] +├── HashJoin(Build) +│ ├── output columns: [t2.a (#2), t2.b (#3), t3.b (#5), t3.a (#4)] +│ ├── join type: INNER +│ ├── build keys: [t3.a (#4)] +│ ├── probe keys: [t2.a (#2)] +│ ├── filters: [] │ ├── estimated rows: 0.00 -│ └── TableScan -│ ├── table: default.default.t3 -│ ├── output columns: [a (#4), b (#5)] -│ ├── read rows: 0 -│ ├── read bytes: 0 -│ ├── partitions total: 0 -│ ├── partitions scanned: 0 -│ ├── push downs: [filters: [and_filters(t3.a (#4) > 5, t3.a (#4) < 10)], limit: NONE] -│ └── estimated rows: 0.00 -└── HashJoin(Probe) - ├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] - ├── join type: INNER - ├── build keys: [t2.a (#2)] - ├── probe keys: [t1.a (#0)] - ├── filters: [] +│ ├── Filter(Build) +│ │ ├── output columns: [t3.a (#4), t3.b (#5)] +│ │ ├── filters: [t3.a (#4) > 5, t3.a (#4) < 10] +│ │ ├── estimated rows: 0.00 +│ │ └── TableScan +│ │ ├── table: default.default.t3 +│ │ ├── output columns: [a (#4), b (#5)] +│ │ ├── read rows: 0 +│ │ ├── read bytes: 0 +│ │ ├── partitions total: 0 +│ │ ├── partitions scanned: 0 +│ │ ├── push downs: [filters: [and_filters(t3.a (#4) > 5, t3.a (#4) < 10)], limit: NONE] +│ │ └── estimated rows: 0.00 +│ └── Filter(Probe) +│ ├── output columns: [t2.a (#2), t2.b (#3)] +│ ├── filters: [t2.a (#2) > 5, t2.a (#2) < 10] +│ ├── estimated rows: 0.00 +│ └── TableScan +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 0 +│ ├── read bytes: 0 +│ ├── partitions total: 0 +│ ├── partitions scanned: 0 +│ ├── push downs: [filters: [and_filters(t2.a (#2) > 5, t2.a (#2) < 10)], limit: NONE] +│ └── estimated rows: 0.00 +└── Filter(Probe) + ├── output columns: [t1.a (#0), t1.b (#1)] + ├── filters: [t1.a (#0) > 5, t1.a (#0) < 10] ├── estimated rows: 0.00 - ├── Filter(Build) - │ ├── output columns: [t2.a (#2), t2.b (#3)] - │ ├── filters: [t2.a (#2) > 5, t2.a (#2) < 10] - │ ├── estimated rows: 0.00 - │ └── TableScan - │ ├── table: default.default.t2 - │ ├── output columns: [a (#2), b (#3)] - │ ├── read rows: 0 - │ ├── read bytes: 0 - │ ├── partitions total: 0 - │ ├── partitions scanned: 0 - │ ├── push downs: [filters: [and_filters(t2.a (#2) > 5, t2.a (#2) < 10)], limit: NONE] - │ └── estimated rows: 0.00 - └── Filter(Probe) - ├── output columns: [t1.a (#0), t1.b (#1)] - ├── filters: [t1.a (#0) > 5, t1.a (#0) < 10] - ├── estimated rows: 0.00 - └── TableScan - ├── table: default.default.t1 - ├── output columns: [a (#0), b (#1)] - ├── read rows: 0 - ├── read bytes: 0 - ├── partitions total: 0 - ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(t1.a (#0) > 5, t1.a (#0) < 10)], limit: NONE] - └── estimated rows: 0.00 + └── TableScan + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 0 + ├── read bytes: 0 + ├── partitions total: 0 + ├── partitions scanned: 0 + ├── push downs: [filters: [and_filters(t1.a (#0) > 5, t1.a (#0) < 10)], limit: NONE] + └── estimated rows: 0.00 # t1.a > 5 and t2.a > 10 query T @@ -855,6 +855,53 @@ HashJoin ├── push downs: [filters: [false], limit: NONE] └── estimated rows: 0.00 +# t1.a = t2.a, t1.a = t3.a => t2.a = t3.a +query T +explain select * from t1, t2, t3 where t1.a = t2.a and t1.a = t3.a; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3), t3.b (#5), t3.a (#4)] +├── join type: INNER +├── build keys: [t3.a (#4)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 0.00 +├── HashJoin(Build) +│ ├── output columns: [t2.a (#2), t2.b (#3), t3.b (#5), t3.a (#4)] +│ ├── join type: INNER +│ ├── build keys: [t3.a (#4)] +│ ├── probe keys: [t2.a (#2)] +│ ├── filters: [] +│ ├── estimated rows: 0.00 +│ ├── TableScan(Build) +│ │ ├── table: default.default.t3 +│ │ ├── output columns: [a (#4), b (#5)] +│ │ ├── read rows: 0 +│ │ ├── read bytes: 0 +│ │ ├── partitions total: 0 +│ │ ├── partitions scanned: 0 +│ │ ├── push downs: [filters: [], limit: NONE] +│ │ └── estimated rows: 0.00 +│ └── TableScan(Probe) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 0 +│ ├── read bytes: 0 +│ ├── partitions total: 0 +│ ├── partitions scanned: 0 +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 0.00 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 0 + ├── read bytes: 0 + ├── partitions total: 0 + ├── partitions scanned: 0 + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 0.00 + + statement ok drop table if exists t1; diff --git a/tests/sqllogictests/suites/mode/standalone/explain/join.test b/tests/sqllogictests/suites/mode/standalone/explain/join.test index 596d2be32efc..4173c36e02bb 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/join.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/join.test @@ -67,16 +67,20 @@ HashJoin │ ├── pruning stats: [segments: , blocks: ] │ ├── push downs: [filters: [], limit: NONE] │ └── estimated rows: 1.00 -└── TableScan(Probe) - ├── table: default.default.t1 - ├── output columns: [number (#1)] - ├── read rows: 10 - ├── read bytes: 65 - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 10.00 +└── Filter(Probe) + ├── output columns: [t1.number (#1)] + ├── filters: [t1.number (#1) = t1.number (#1) + 1] + ├── estimated rows: 2.00 + └── TableScan + ├── table: default.default.t1 + ├── output columns: [number (#1)] + ├── read rows: 10 + ├── read bytes: 65 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [t1.number (#1) = t1.number (#1) + 1], limit: NONE] + └── estimated rows: 10.00 query T explain select t.number from t, t1 where t.number > 1 and 1 < t1.number @@ -87,7 +91,7 @@ HashJoin ├── build keys: [] ├── probe keys: [] ├── filters: [] -├── estimated rows: 0.18 +├── estimated rows: 1.64 ├── Filter(Build) │ ├── output columns: [t.number (#0)] │ ├── filters: [t.number (#0) > 1] @@ -104,8 +108,8 @@ HashJoin │ └── estimated rows: 1.00 └── Filter(Probe) ├── output columns: [] - ├── filters: [1 < t1.number (#1)] - ├── estimated rows: 0.91 + ├── filters: [t1.number (#1) > 1] + ├── estimated rows: 8.18 └── TableScan ├── table: default.default.t1 ├── output columns: [number (#1)] @@ -114,7 +118,7 @@ HashJoin ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [1 < t1.number (#1)], limit: NONE] + ├── push downs: [filters: [t1.number (#1) > 1], limit: NONE] └── estimated rows: 10.00 query T @@ -383,36 +387,40 @@ query T explain select * from onecolumn as a right join twocolumn as b on a.x = b.x where b.x > 42 and b.x < 45 ---- HashJoin -├── output columns: [a.x (#0), b.x (#1), b.y (#2)] -├── join type: RIGHT OUTER -├── build keys: [b.x (#1)] -├── probe keys: [a.x (#0)] +├── output columns: [b.x (#1), b.y (#2), a.x (#0)] +├── join type: LEFT OUTER +├── build keys: [a.x (#0)] +├── probe keys: [b.x (#1)] ├── filters: [] ├── estimated rows: 3.20 ├── Filter(Build) -│ ├── output columns: [b.x (#1), b.y (#2)] -│ ├── filters: [is_true(b.x (#1) > 42), is_true(b.x (#1) < 45)] -│ ├── estimated rows: 3.20 +│ ├── output columns: [a.x (#0)] +│ ├── filters: [is_true(a.x (#0) > 42), is_true(a.x (#0) < 45)] +│ ├── estimated rows: 3.00 │ └── TableScan -│ ├── table: default.default.twocolumn -│ ├── output columns: [x (#1), y (#2)] +│ ├── table: default.default.onecolumn +│ ├── output columns: [x (#0)] │ ├── read rows: 4 -│ ├── read bytes: 94 +│ ├── read bytes: 45 │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] -│ ├── push downs: [filters: [and_filters(twocolumn.x (#1) > 42, twocolumn.x (#1) < 45)], limit: NONE] +│ ├── push downs: [filters: [and_filters(onecolumn.x (#0) > 42, onecolumn.x (#0) < 45)], limit: NONE] │ └── estimated rows: 4.00 -└── TableScan(Probe) - ├── table: default.default.onecolumn - ├── output columns: [x (#0)] - ├── read rows: 4 - ├── read bytes: 45 - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 4.00 +└── Filter(Probe) + ├── output columns: [b.x (#1), b.y (#2)] + ├── filters: [is_true(b.x (#1) > 42), is_true(b.x (#1) < 45)] + ├── estimated rows: 3.20 + └── TableScan + ├── table: default.default.twocolumn + ├── output columns: [x (#1), y (#2)] + ├── read rows: 4 + ├── read bytes: 94 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [and_filters(twocolumn.x (#1) > 42, twocolumn.x (#1) < 45)], limit: NONE] + └── estimated rows: 4.00 statement ok drop table t diff --git a/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/chain.test b/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/chain.test index e0fec1332775..dceb3bf47c8e 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/chain.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/chain.test @@ -29,9 +29,9 @@ query T explain select * from t, t1, t2 where t.a = t1.a and t1.a = t2.a ---- HashJoin -├── output columns: [t2.a (#2), t.a (#0), t1.a (#1)] +├── output columns: [t2.a (#2), t1.a (#1), t.a (#0)] ├── join type: INNER -├── build keys: [t1.a (#1)] +├── build keys: [t.a (#0)] ├── probe keys: [t2.a (#2)] ├── filters: [] ├── estimated rows: 1.00 @@ -77,17 +77,17 @@ query T explain select * from t, t2, t1 where t.a = t1.a and t1.a = t2.a ---- HashJoin -├── output columns: [t2.a (#1), t.a (#0), t1.a (#2)] +├── output columns: [t1.a (#2), t2.a (#1), t.a (#0)] ├── join type: INNER -├── build keys: [t1.a (#2)] -├── probe keys: [t2.a (#1)] +├── build keys: [t.a (#0)] +├── probe keys: [t1.a (#2)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) -│ ├── output columns: [t1.a (#2), t.a (#0)] +│ ├── output columns: [t2.a (#1), t.a (#0)] │ ├── join type: INNER │ ├── build keys: [t.a (#0)] -│ ├── probe keys: [t1.a (#2)] +│ ├── probe keys: [t2.a (#1)] │ ├── filters: [] │ ├── estimated rows: 1.00 │ ├── TableScan(Build) @@ -101,41 +101,41 @@ HashJoin │ │ ├── push downs: [filters: [], limit: NONE] │ │ └── estimated rows: 1.00 │ └── TableScan(Probe) -│ ├── table: default.join_reorder.t1 -│ ├── output columns: [a (#2)] -│ ├── read rows: 10 -│ ├── read bytes: 65 +│ ├── table: default.join_reorder.t2 +│ ├── output columns: [a (#1)] +│ ├── read rows: 100 +│ ├── read bytes: 172 │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] │ ├── push downs: [filters: [], limit: NONE] -│ └── estimated rows: 10.00 +│ └── estimated rows: 100.00 └── TableScan(Probe) - ├── table: default.join_reorder.t2 - ├── output columns: [a (#1)] - ├── read rows: 100 - ├── read bytes: 172 + ├── table: default.join_reorder.t1 + ├── output columns: [a (#2)] + ├── read rows: 10 + ├── read bytes: 65 ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 100.00 + └── estimated rows: 10.00 query T explain select * from t1, t, t2 where t.a = t1.a and t1.a = t2.a ---- HashJoin -├── output columns: [t2.a (#2), t.a (#1), t1.a (#0)] +├── output columns: [t1.a (#0), t.a (#1), t2.a (#2)] ├── join type: INNER -├── build keys: [t1.a (#0)] -├── probe keys: [t2.a (#2)] +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) -│ ├── output columns: [t1.a (#0), t.a (#1)] +│ ├── output columns: [t2.a (#2), t.a (#1)] │ ├── join type: INNER │ ├── build keys: [t.a (#1)] -│ ├── probe keys: [t1.a (#0)] +│ ├── probe keys: [t2.a (#2)] │ ├── filters: [] │ ├── estimated rows: 1.00 │ ├── TableScan(Build) @@ -149,41 +149,41 @@ HashJoin │ │ ├── push downs: [filters: [], limit: NONE] │ │ └── estimated rows: 1.00 │ └── TableScan(Probe) -│ ├── table: default.join_reorder.t1 -│ ├── output columns: [a (#0)] -│ ├── read rows: 10 -│ ├── read bytes: 65 +│ ├── table: default.join_reorder.t2 +│ ├── output columns: [a (#2)] +│ ├── read rows: 100 +│ ├── read bytes: 172 │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] │ ├── push downs: [filters: [], limit: NONE] -│ └── estimated rows: 10.00 +│ └── estimated rows: 100.00 └── TableScan(Probe) - ├── table: default.join_reorder.t2 - ├── output columns: [a (#2)] - ├── read rows: 100 - ├── read bytes: 172 + ├── table: default.join_reorder.t1 + ├── output columns: [a (#0)] + ├── read rows: 10 + ├── read bytes: 65 ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 100.00 + └── estimated rows: 10.00 query T explain select * from t1, t2, t where t.a = t1.a and t1.a = t2.a ---- HashJoin -├── output columns: [t2.a (#1), t.a (#2), t1.a (#0)] +├── output columns: [t1.a (#0), t2.a (#1), t.a (#2)] ├── join type: INNER -├── build keys: [t1.a (#0)] -├── probe keys: [t2.a (#1)] +├── build keys: [t.a (#2)] +├── probe keys: [t1.a (#0)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) -│ ├── output columns: [t1.a (#0), t.a (#2)] +│ ├── output columns: [t2.a (#1), t.a (#2)] │ ├── join type: INNER │ ├── build keys: [t.a (#2)] -│ ├── probe keys: [t1.a (#0)] +│ ├── probe keys: [t2.a (#1)] │ ├── filters: [] │ ├── estimated rows: 1.00 │ ├── TableScan(Build) @@ -197,33 +197,33 @@ HashJoin │ │ ├── push downs: [filters: [], limit: NONE] │ │ └── estimated rows: 1.00 │ └── TableScan(Probe) -│ ├── table: default.join_reorder.t1 -│ ├── output columns: [a (#0)] -│ ├── read rows: 10 -│ ├── read bytes: 65 +│ ├── table: default.join_reorder.t2 +│ ├── output columns: [a (#1)] +│ ├── read rows: 100 +│ ├── read bytes: 172 │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] │ ├── push downs: [filters: [], limit: NONE] -│ └── estimated rows: 10.00 +│ └── estimated rows: 100.00 └── TableScan(Probe) - ├── table: default.join_reorder.t2 - ├── output columns: [a (#1)] - ├── read rows: 100 - ├── read bytes: 172 + ├── table: default.join_reorder.t1 + ├── output columns: [a (#0)] + ├── read rows: 10 + ├── read bytes: 65 ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 100.00 + └── estimated rows: 10.00 query T explain select * from t2, t1, t where t.a = t1.a and t1.a = t2.a ---- HashJoin -├── output columns: [t2.a (#0), t.a (#2), t1.a (#1)] +├── output columns: [t2.a (#0), t1.a (#1), t.a (#2)] ├── join type: INNER -├── build keys: [t1.a (#1)] +├── build keys: [t.a (#2)] ├── probe keys: [t2.a (#0)] ├── filters: [] ├── estimated rows: 1.00 @@ -504,9 +504,9 @@ explain join select * from t right anti join t1 on t1.a = t.a ---- HashJoin: LEFT ANTI ├── Build -│ └── Scan: default.join_reorder.t (read rows: 1) +│ └── Scan: default.join_reorder.t (#0) (read rows: 1) └── Probe - └── Scan: default.join_reorder.t1 (read rows: 10) + └── Scan: default.join_reorder.t1 (#1) (read rows: 10) statement ok drop database join_reorder diff --git a/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/cycles.test b/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/cycles.test index 58aada62528b..b4b5e7651757 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/cycles.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/cycles.test @@ -22,8 +22,8 @@ explain select * from t, t1, t2 where t.a = t1.a and t1.a = t2.a and t2.a = t.a HashJoin ├── output columns: [t2.a (#2), t1.a (#1), t.a (#0)] ├── join type: INNER -├── build keys: [t.a (#0), t1.a (#1)] -├── probe keys: [t2.a (#2), t2.a (#2)] +├── build keys: [t.a (#0)] +├── probe keys: [t2.a (#2)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) @@ -70,8 +70,8 @@ explain select * from t, t2, t1 where t.a = t1.a and t1.a = t2.a and t2.a = t.a HashJoin ├── output columns: [t1.a (#2), t2.a (#1), t.a (#0)] ├── join type: INNER -├── build keys: [t.a (#0), t2.a (#1)] -├── probe keys: [t1.a (#2), t1.a (#2)] +├── build keys: [t.a (#0)] +├── probe keys: [t1.a (#2)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) @@ -116,10 +116,10 @@ query T explain select * from t1, t, t2 where t.a = t1.a and t1.a = t2.a and t2.a = t.a ---- HashJoin -├── output columns: [t1.a (#0), t2.a (#2), t.a (#1)] +├── output columns: [t1.a (#0), t.a (#1), t2.a (#2)] ├── join type: INNER -├── build keys: [t2.a (#2), t.a (#1)] -├── probe keys: [t1.a (#0), t1.a (#0)] +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) @@ -166,8 +166,8 @@ explain select * from t1, t2, t where t.a = t1.a and t1.a = t2.a and t2.a = t.a HashJoin ├── output columns: [t1.a (#0), t2.a (#1), t.a (#2)] ├── join type: INNER -├── build keys: [t.a (#2), t2.a (#1)] -├── probe keys: [t1.a (#0), t1.a (#0)] +├── build keys: [t.a (#2)] +├── probe keys: [t1.a (#0)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) @@ -214,8 +214,8 @@ explain select * from t2, t1, t where t.a = t1.a and t1.a = t2.a and t2.a = t.a HashJoin ├── output columns: [t2.a (#0), t1.a (#1), t.a (#2)] ├── join type: INNER -├── build keys: [t.a (#2), t1.a (#1)] -├── probe keys: [t2.a (#0), t2.a (#0)] +├── build keys: [t.a (#2)] +├── probe keys: [t2.a (#0)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) @@ -260,10 +260,10 @@ query T explain select * from t2, t, t1 where t.a = t1.a and t1.a = t2.a and t2.a = t.a ---- HashJoin -├── output columns: [t2.a (#0), t1.a (#2), t.a (#1)] +├── output columns: [t2.a (#0), t.a (#1), t1.a (#2)] ├── join type: INNER -├── build keys: [t1.a (#2), t.a (#1)] -├── probe keys: [t2.a (#0), t2.a (#0)] +├── build keys: [t1.a (#2)] +├── probe keys: [t2.a (#0)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) diff --git a/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/mark.test b/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/mark.test index 86ffbb800d07..8740e9732bf1 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/mark.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/mark.test @@ -55,3 +55,110 @@ HashJoin ├── partitions scanned: 1 ├── push downs: [filters: [], limit: NONE] └── estimated rows: 10000.00 + +statement ok +drop table if exists t1; + +statement ok +drop table if exists t2; + +statement ok +create table t1(a int, b int); + +statement ok +create table t2(c int, d int); + +statement ok +insert into t1 values(1, 2), (2, 3), (3, 4); + +statement ok +insert into t2 values(1, 2), (2, 3), (3, 4), (4, 5); + +query T +explain SELECT + (SELECT IF(EXISTS(SELECT 1 FROM t1 WHERE t1.a = t2.c), '1', '0')) +FROM + t2; +---- +EvalScalar +├── output columns: [(select if(exists (select 1 from t1 where (t1.a = t2.c)), '1', '0')) (#6)] +├── expressions: [scalar_subquery_5 (#5)] +├── estimated rows: 4.00 +└── HashJoin + ├── output columns: [if(exists (select 1 from t1 where (t1.a = t2.c)), '1', '0') (#5)] + ├── join type: LEFT SINGLE + ├── build keys: [c (#8)] + ├── probe keys: [c (#0)] + ├── filters: [] + ├── estimated rows: 4.00 + ├── EvalScalar(Build) + │ ├── output columns: [c (#8), if(exists (select 1 from t1 where (t1.a = t2.c)), '1', '0') (#5)] + │ ├── expressions: [if(7 (#7), '1', '0')] + │ ├── estimated rows: 0.00 + │ └── HashJoin + │ ├── output columns: [c (#8), marker (#7)] + │ ├── join type: LEFT MARK + │ ├── build keys: [c (#8)] + │ ├── probe keys: [a (#2)] + │ ├── filters: [] + │ ├── estimated rows: 0.00 + │ ├── HashJoin(Build) + │ │ ├── output columns: [c (#8)] + │ │ ├── join type: CROSS + │ │ ├── build keys: [] + │ │ ├── probe keys: [] + │ │ ├── filters: [] + │ │ ├── estimated rows: 0.00 + │ │ ├── AggregateFinal(Build) + │ │ │ ├── output columns: [c (#8)] + │ │ │ ├── group by: [c] + │ │ │ ├── aggregate functions: [] + │ │ │ ├── estimated rows: 0.00 + │ │ │ └── AggregatePartial + │ │ │ ├── output columns: [#_group_by_key] + │ │ │ ├── group by: [c] + │ │ │ ├── aggregate functions: [] + │ │ │ ├── estimated rows: 0.00 + │ │ │ └── TableScan + │ │ │ ├── table: default.default.t2 + │ │ │ ├── output columns: [c (#8)] + │ │ │ ├── read rows: 4 + │ │ │ ├── read bytes: 49 + │ │ │ ├── partitions total: 1 + │ │ │ ├── partitions scanned: 1 + │ │ │ ├── pruning stats: [segments: , blocks: ] + │ │ │ ├── push downs: [filters: [], limit: NONE] + │ │ │ └── estimated rows: 0.00 + │ │ └── DummyTableScan(Probe) + │ └── Filter(Probe) + │ ├── output columns: [t1.a (#2)] + │ ├── filters: [is_true(a (#2) = a (#2))] + │ ├── estimated rows: 0.60 + │ └── TableScan + │ ├── table: default.default.t1 + │ ├── output columns: [a (#2)] + │ ├── read rows: 3 + │ ├── read bytes: 45 + │ ├── partitions total: 1 + │ ├── partitions scanned: 1 + │ ├── pruning stats: [segments: , blocks: ] + │ ├── push downs: [filters: [is_true(t1.a (#2) = t1.a (#2))], limit: NONE] + │ └── estimated rows: 3.00 + └── TableScan(Probe) + ├── table: default.default.t2 + ├── output columns: [c (#0)] + ├── read rows: 4 + ├── read bytes: 49 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 4.00 + + + +statement ok +drop table t1; + +statement ok +drop table t2 diff --git a/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/star.test b/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/star.test index c21e4714f6b1..796e7576ef57 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/star.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/join_reorder/star.test @@ -20,17 +20,17 @@ query T explain select * from t, t1, t2 where t.a = t2.a and t1.a = t2.a ---- HashJoin -├── output columns: [t1.a (#1), t.a (#0), t2.a (#2)] +├── output columns: [t2.a (#2), t1.a (#1), t.a (#0)] ├── join type: INNER -├── build keys: [t2.a (#2)] -├── probe keys: [t1.a (#1)] +├── build keys: [t.a (#0)] +├── probe keys: [t2.a (#2)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) -│ ├── output columns: [t2.a (#2), t.a (#0)] +│ ├── output columns: [t1.a (#1), t.a (#0)] │ ├── join type: INNER │ ├── build keys: [t.a (#0)] -│ ├── probe keys: [t2.a (#2)] +│ ├── probe keys: [t1.a (#1)] │ ├── filters: [] │ ├── estimated rows: 1.00 │ ├── TableScan(Build) @@ -44,33 +44,33 @@ HashJoin │ │ ├── push downs: [filters: [], limit: NONE] │ │ └── estimated rows: 1.00 │ └── TableScan(Probe) -│ ├── table: default.join_reorder.t2 -│ ├── output columns: [a (#2)] -│ ├── read rows: 100 -│ ├── read bytes: 172 +│ ├── table: default.join_reorder.t1 +│ ├── output columns: [a (#1)] +│ ├── read rows: 10 +│ ├── read bytes: 65 │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] │ ├── push downs: [filters: [], limit: NONE] -│ └── estimated rows: 100.00 +│ └── estimated rows: 10.00 └── TableScan(Probe) - ├── table: default.join_reorder.t1 - ├── output columns: [a (#1)] - ├── read rows: 10 - ├── read bytes: 65 + ├── table: default.join_reorder.t2 + ├── output columns: [a (#2)] + ├── read rows: 100 + ├── read bytes: 172 ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 10.00 + └── estimated rows: 100.00 query T explain select * from t, t2, t1 where t.a = t2.a and t1.a = t2.a ---- HashJoin -├── output columns: [t1.a (#2), t.a (#0), t2.a (#1)] +├── output columns: [t1.a (#2), t2.a (#1), t.a (#0)] ├── join type: INNER -├── build keys: [t2.a (#1)] +├── build keys: [t.a (#0)] ├── probe keys: [t1.a (#2)] ├── filters: [] ├── estimated rows: 1.00 @@ -164,9 +164,9 @@ query T explain select * from t1, t2, t where t.a = t2.a and t1.a = t2.a ---- HashJoin -├── output columns: [t1.a (#0), t.a (#2), t2.a (#1)] +├── output columns: [t1.a (#0), t2.a (#1), t.a (#2)] ├── join type: INNER -├── build keys: [t2.a (#1)] +├── build keys: [t.a (#2)] ├── probe keys: [t1.a (#0)] ├── filters: [] ├── estimated rows: 1.00 @@ -212,17 +212,17 @@ query T explain select * from t2, t1, t where t.a = t2.a and t1.a = t2.a ---- HashJoin -├── output columns: [t1.a (#1), t.a (#2), t2.a (#0)] +├── output columns: [t2.a (#0), t1.a (#1), t.a (#2)] ├── join type: INNER -├── build keys: [t2.a (#0)] -├── probe keys: [t1.a (#1)] +├── build keys: [t.a (#2)] +├── probe keys: [t2.a (#0)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) -│ ├── output columns: [t2.a (#0), t.a (#2)] +│ ├── output columns: [t1.a (#1), t.a (#2)] │ ├── join type: INNER │ ├── build keys: [t.a (#2)] -│ ├── probe keys: [t2.a (#0)] +│ ├── probe keys: [t1.a (#1)] │ ├── filters: [] │ ├── estimated rows: 1.00 │ ├── TableScan(Build) @@ -236,41 +236,41 @@ HashJoin │ │ ├── push downs: [filters: [], limit: NONE] │ │ └── estimated rows: 1.00 │ └── TableScan(Probe) -│ ├── table: default.join_reorder.t2 -│ ├── output columns: [a (#0)] -│ ├── read rows: 100 -│ ├── read bytes: 172 +│ ├── table: default.join_reorder.t1 +│ ├── output columns: [a (#1)] +│ ├── read rows: 10 +│ ├── read bytes: 65 │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] │ ├── push downs: [filters: [], limit: NONE] -│ └── estimated rows: 100.00 +│ └── estimated rows: 10.00 └── TableScan(Probe) - ├── table: default.join_reorder.t1 - ├── output columns: [a (#1)] - ├── read rows: 10 - ├── read bytes: 65 + ├── table: default.join_reorder.t2 + ├── output columns: [a (#0)] + ├── read rows: 100 + ├── read bytes: 172 ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 10.00 + └── estimated rows: 100.00 query T explain select * from t2, t, t1 where t.a = t2.a and t1.a = t2.a ---- HashJoin -├── output columns: [t1.a (#2), t.a (#1), t2.a (#0)] +├── output columns: [t2.a (#0), t.a (#1), t1.a (#2)] ├── join type: INNER -├── build keys: [t2.a (#0)] -├── probe keys: [t1.a (#2)] +├── build keys: [t1.a (#2)] +├── probe keys: [t2.a (#0)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) -│ ├── output columns: [t2.a (#0), t.a (#1)] +│ ├── output columns: [t1.a (#2), t.a (#1)] │ ├── join type: INNER │ ├── build keys: [t.a (#1)] -│ ├── probe keys: [t2.a (#0)] +│ ├── probe keys: [t1.a (#2)] │ ├── filters: [] │ ├── estimated rows: 1.00 │ ├── TableScan(Build) @@ -284,25 +284,25 @@ HashJoin │ │ ├── push downs: [filters: [], limit: NONE] │ │ └── estimated rows: 1.00 │ └── TableScan(Probe) -│ ├── table: default.join_reorder.t2 -│ ├── output columns: [a (#0)] -│ ├── read rows: 100 -│ ├── read bytes: 172 +│ ├── table: default.join_reorder.t1 +│ ├── output columns: [a (#2)] +│ ├── read rows: 10 +│ ├── read bytes: 65 │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] │ ├── push downs: [filters: [], limit: NONE] -│ └── estimated rows: 100.00 +│ └── estimated rows: 10.00 └── TableScan(Probe) - ├── table: default.join_reorder.t1 - ├── output columns: [a (#2)] - ├── read rows: 10 - ├── read bytes: 65 + ├── table: default.join_reorder.t2 + ├── output columns: [a (#0)] + ├── read rows: 100 + ├── read bytes: 172 ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 10.00 + └── estimated rows: 100.00 statement ok drop database join_reorder diff --git a/tests/sqllogictests/suites/mode/standalone/explain/lateral.test b/tests/sqllogictests/suites/mode/standalone/explain/lateral.test index bc0b00c6a2cd..e4466bada019 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/lateral.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/lateral.test @@ -2,15 +2,15 @@ query T explain select * from numbers(10) t(a), lateral(select * from numbers(10) t1(a) where t.a = t1.a) t1 ---- HashJoin -├── output columns: [t1.number (#1), t.number (#0)] +├── output columns: [t.number (#0), t1.number (#1)] ├── join type: INNER -├── build keys: [number (#0)] -├── probe keys: [number (#1)] +├── build keys: [number (#1)] +├── probe keys: [number (#0)] ├── filters: [] ├── estimated rows: 100.00 ├── TableScan(Build) │ ├── table: default.system.numbers -│ ├── output columns: [number (#0)] +│ ├── output columns: [number (#1)] │ ├── read rows: 10 │ ├── read bytes: 80 │ ├── partitions total: 1 @@ -19,7 +19,7 @@ HashJoin │ └── estimated rows: 10.00 └── TableScan(Probe) ├── table: default.system.numbers - ├── output columns: [number (#1)] + ├── output columns: [number (#0)] ├── read rows: 10 ├── read bytes: 80 ├── partitions total: 1 diff --git a/tests/sqllogictests/suites/mode/standalone/explain/lazy_read.test b/tests/sqllogictests/suites/mode/standalone/explain/lazy_read.test index 18d91e281dd9..25ba90fb681b 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/lazy_read.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/lazy_read.test @@ -97,7 +97,7 @@ RowFetch ├── estimated rows: 0.00 └── Filter ├── output columns: [t_11831.uid (#0), t_11831.time (#3), t_11831._row_id (#4)] - ├── filters: [is_true(t_11831.uid (#0) = 11), is_true(t_11831.time (#3) >= 1686672000000), is_true(t_11831.time (#3) <= 1686758399000)] + ├── filters: [is_true(t_11831.time (#3) >= 1686672000000), is_true(t_11831.time (#3) <= 1686758399000), is_true(t_11831.uid (#0) = 11)] ├── estimated rows: 0.00 └── TableScan ├── table: default.default.t_11831 @@ -106,7 +106,7 @@ RowFetch ├── read bytes: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(and_filters(and_filters(and_filters(t_11831.time (#3) >= 1686672000000, t_11831.time (#3) <= 1686758399000), t_11831.uid (#0) = 11), t_11831.time (#3) >= 1686672000000), t_11831.time (#3) <= 1686758399000)], limit: NONE] + ├── push downs: [filters: [and_filters(and_filters(t_11831.time (#3) >= 1686672000000, t_11831.time (#3) <= 1686758399000), t_11831.uid (#0) = 11)], limit: NONE] └── estimated rows: 0.00 statement ok diff --git a/tests/sqllogictests/suites/mode/standalone/explain/limit.test b/tests/sqllogictests/suites/mode/standalone/explain/limit.test index 12c1a26d4a41..c33a8410dee6 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/limit.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/limit.test @@ -97,13 +97,13 @@ Limit ├── estimated rows: 0.20 └── Filter ├── output columns: [t.number (#0)] - ├── filters: [is_true(CAST(t.number (#0) AS UInt64 NULL) = if(CAST(is_not_null(scalar_subquery_4 (#4)) AS Boolean NULL), scalar_subquery_4 (#4), 0))] + ├── filters: [is_true(CAST(t.number (#0) AS UInt64 NULL) = if(true, TRY_CAST(scalar_subquery_4 (#4) AS UInt64 NULL), 0))] ├── estimated rows: 0.20 └── HashJoin ├── output columns: [t.number (#0), COUNT(*) (#4)] - ├── join type: LEFT SINGLE + ├── join type: INNER ├── build keys: [number (#2)] - ├── probe keys: [CAST(number (#0) AS UInt64 NULL)] + ├── probe keys: [number (#0)] ├── filters: [] ├── estimated rows: 1.00 ├── AggregateFinal(Build) diff --git a/tests/sqllogictests/suites/mode/standalone/explain/outer_to_inner.test b/tests/sqllogictests/suites/mode/standalone/explain/outer_to_inner.test new file mode 100644 index 000000000000..7d515713b154 --- /dev/null +++ b/tests/sqllogictests/suites/mode/standalone/explain/outer_to_inner.test @@ -0,0 +1,190 @@ +statement ok +drop table if exists t1; + +statement ok +drop table if exists t2; + +# table with string column +statement ok +create table t1(a string, b string); + +statement ok +create table t2(a string, b string); + +query T +explain select * from t1 left join t2 on t1.b = t2.b where t2.a > 'a'; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3)] +├── join type: INNER +├── build keys: [t2.b (#3)] +├── probe keys: [t1.b (#1)] +├── filters: [] +├── estimated rows: 0.00 +├── Filter(Build) +│ ├── output columns: [t2.a (#2), t2.b (#3)] +│ ├── filters: [is_true(t2.a (#2) > 'a')] +│ ├── estimated rows: 0.00 +│ └── TableScan +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 0 +│ ├── read bytes: 0 +│ ├── partitions total: 0 +│ ├── partitions scanned: 0 +│ ├── push downs: [filters: [is_true(t2.a (#2) > 'a')], limit: NONE] +│ └── estimated rows: 0.00 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 0 + ├── read bytes: 0 + ├── partitions total: 0 + ├── partitions scanned: 0 + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 0.00 + + +statement ok +drop table t1; + +statement ok +drop table t2; + +# table with date column +statement ok +create table t1(a date, b date); + +statement ok +create table t2(a date, b date); + +query T +explain select * from t1 left join t2 on t1.b = t2.b where t2.a > '2022-01-01 02:00:11'; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3)] +├── join type: INNER +├── build keys: [t2.b (#3)] +├── probe keys: [t1.b (#1)] +├── filters: [] +├── estimated rows: 0.00 +├── Filter(Build) +│ ├── output columns: [t2.a (#2), t2.b (#3)] +│ ├── filters: [is_true(t2.a (#2) > '2022-01-01')] +│ ├── estimated rows: 0.00 +│ └── TableScan +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 0 +│ ├── read bytes: 0 +│ ├── partitions total: 0 +│ ├── partitions scanned: 0 +│ ├── push downs: [filters: [is_true(t2.a (#2) > '2022-01-01')], limit: NONE] +│ └── estimated rows: 0.00 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 0 + ├── read bytes: 0 + ├── partitions total: 0 + ├── partitions scanned: 0 + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 0.00 + +statement ok +drop table t1; + +statement ok +drop table t2; + +# table with decimal column +statement ok +create table t1(a decimal(4, 2), b decimal(4, 2)); + +statement ok +create table t2(a decimal(4, 2), b decimal(4, 2)); + +query T +explain select * from t1 left join t2 on t1.b = t2.b where t2.a > 1.1; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3)] +├── join type: INNER +├── build keys: [t2.b (#3)] +├── probe keys: [t1.b (#1)] +├── filters: [] +├── estimated rows: 0.00 +├── Filter(Build) +│ ├── output columns: [t2.a (#2), t2.b (#3)] +│ ├── filters: [is_true(t2.a (#2) > 1.10)] +│ ├── estimated rows: 0.00 +│ └── TableScan +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 0 +│ ├── read bytes: 0 +│ ├── partitions total: 0 +│ ├── partitions scanned: 0 +│ ├── push downs: [filters: [is_true(t2.a (#2) > 1.10)], limit: NONE] +│ └── estimated rows: 0.00 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 0 + ├── read bytes: 0 + ├── partitions total: 0 + ├── partitions scanned: 0 + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 0.00 + +statement ok +drop table t1; + +statement ok +drop table t2; + +# table with float column +statement ok +create table t1(a float, b float); + +statement ok +create table t2(a float, b float); + +query T +explain select * from t1 left join t2 on t1.b = t2.b where t2.a > 1.1; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3)] +├── join type: INNER +├── build keys: [t2.b (#3)] +├── probe keys: [t1.b (#1)] +├── filters: [] +├── estimated rows: 0.00 +├── Filter(Build) +│ ├── output columns: [t2.a (#2), t2.b (#3)] +│ ├── filters: [is_true(t2.a (#2) > 1.1)] +│ ├── estimated rows: 0.00 +│ └── TableScan +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 0 +│ ├── read bytes: 0 +│ ├── partitions total: 0 +│ ├── partitions scanned: 0 +│ ├── push downs: [filters: [is_true(t2.a (#2) > 1.1)], limit: NONE] +│ └── estimated rows: 0.00 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 0 + ├── read bytes: 0 + ├── partitions total: 0 + ├── partitions scanned: 0 + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 0.00 + +statement ok +drop table t1; + +statement ok +drop table t2; diff --git a/tests/sqllogictests/suites/mode/standalone/explain/prune_column.test b/tests/sqllogictests/suites/mode/standalone/explain/prune_column.test index a8e16b1a1603..d67f409f36af 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/prune_column.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/prune_column.test @@ -117,90 +117,91 @@ explain select t1.a from (select number + 1 as a, number + 1 as b from numbers(1 HashJoin ├── output columns: [a (#1)] ├── join type: INNER -├── build keys: [CAST(t1.a (#1) AS UInt64 NULL)] -├── probe keys: [_if_scalar_subquery (#15)] +├── build keys: [_if_scalar_subquery (#15)] +├── probe keys: [CAST(t1.a (#1) AS UInt64 NULL)] ├── filters: [] ├── estimated rows: 1.00 ├── EvalScalar(Build) -│ ├── output columns: [a (#1)] -│ ├── expressions: [numbers.number (#0) + 1] +│ ├── output columns: [_if_scalar_subquery (#15)] +│ ├── expressions: [if(CAST(_count_scalar_subquery (#13) = 0 AS Boolean NULL), NULL, _any_scalar_subquery (#14))] │ ├── estimated rows: 1.00 -│ └── TableScan -│ ├── table: default.system.numbers -│ ├── output columns: [number (#0)] -│ ├── read rows: 1 -│ ├── read bytes: 8 -│ ├── partitions total: 1 -│ ├── partitions scanned: 1 -│ ├── push downs: [filters: [], limit: NONE] -│ └── estimated rows: 1.00 +│ └── Limit +│ ├── output columns: [_count_scalar_subquery (#13), _any_scalar_subquery (#14)] +│ ├── limit: 1 +│ ├── offset: 0 +│ ├── estimated rows: 1.00 +│ └── AggregateFinal +│ ├── output columns: [_count_scalar_subquery (#13), _any_scalar_subquery (#14)] +│ ├── group by: [] +│ ├── aggregate functions: [count(), any(COUNT(*))] +│ ├── limit: 1 +│ ├── estimated rows: 1.00 +│ └── AggregatePartial +│ ├── output columns: [_count_scalar_subquery (#13), _any_scalar_subquery (#14)] +│ ├── group by: [] +│ ├── aggregate functions: [count(), any(COUNT(*))] +│ ├── estimated rows: 1.00 +│ └── AggregateFinal +│ ├── output columns: [COUNT(*) (#12)] +│ ├── group by: [] +│ ├── aggregate functions: [count()] +│ ├── estimated rows: 1.00 +│ └── AggregatePartial +│ ├── output columns: [COUNT(*) (#12)] +│ ├── group by: [] +│ ├── aggregate functions: [count()] +│ ├── estimated rows: 1.00 +│ └── HashJoin +│ ├── output columns: [] +│ ├── join type: INNER +│ ├── build keys: [t2.b (#5)] +│ ├── probe keys: [t3.b (#10)] +│ ├── filters: [] +│ ├── estimated rows: 0.20 +│ ├── EvalScalar(Build) +│ │ ├── output columns: [b (#5)] +│ │ ├── expressions: [numbers.number (#3) + 1] +│ │ ├── estimated rows: 0.20 +│ │ └── Filter +│ │ ├── output columns: [numbers.number (#3)] +│ │ ├── filters: [numbers.number (#3) + 1 = 1] +│ │ ├── estimated rows: 0.20 +│ │ └── TableScan +│ │ ├── table: default.system.numbers +│ │ ├── output columns: [number (#3)] +│ │ ├── read rows: 1 +│ │ ├── read bytes: 8 +│ │ ├── partitions total: 1 +│ │ ├── partitions scanned: 1 +│ │ ├── push downs: [filters: [numbers.number (#3) + 1 = 1], limit: NONE] +│ │ └── estimated rows: 1.00 +│ └── EvalScalar(Probe) +│ ├── output columns: [b (#10)] +│ ├── expressions: [numbers.number (#8) + 1] +│ ├── estimated rows: 1.00 +│ └── TableScan +│ ├── table: default.system.numbers +│ ├── output columns: [number (#8)] +│ ├── read rows: 1 +│ ├── read bytes: 8 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 1.00 └── EvalScalar(Probe) - ├── output columns: [_if_scalar_subquery (#15)] - ├── expressions: [if(CAST(_count_scalar_subquery (#13) = 0 AS Boolean NULL), NULL, _any_scalar_subquery (#14))] + ├── output columns: [a (#1)] + ├── expressions: [numbers.number (#0) + 1] ├── estimated rows: 1.00 - └── Limit - ├── output columns: [_count_scalar_subquery (#13), _any_scalar_subquery (#14)] - ├── limit: 1 - ├── offset: 0 - ├── estimated rows: 1.00 - └── AggregateFinal - ├── output columns: [_count_scalar_subquery (#13), _any_scalar_subquery (#14)] - ├── group by: [] - ├── aggregate functions: [count(), any(COUNT(*))] - ├── limit: 1 - ├── estimated rows: 1.00 - └── AggregatePartial - ├── output columns: [_count_scalar_subquery (#13), _any_scalar_subquery (#14)] - ├── group by: [] - ├── aggregate functions: [count(), any(COUNT(*))] - ├── estimated rows: 1.00 - └── AggregateFinal - ├── output columns: [COUNT(*) (#12)] - ├── group by: [] - ├── aggregate functions: [count()] - ├── estimated rows: 1.00 - └── AggregatePartial - ├── output columns: [COUNT(*) (#12)] - ├── group by: [] - ├── aggregate functions: [count()] - ├── estimated rows: 1.00 - └── HashJoin - ├── output columns: [] - ├── join type: INNER - ├── build keys: [t2.b (#5)] - ├── probe keys: [t3.b (#10)] - ├── filters: [] - ├── estimated rows: 0.20 - ├── EvalScalar(Build) - │ ├── output columns: [b (#5)] - │ ├── expressions: [numbers.number (#3) + 1] - │ ├── estimated rows: 0.20 - │ └── Filter - │ ├── output columns: [numbers.number (#3)] - │ ├── filters: [numbers.number (#3) + 1 = 1] - │ ├── estimated rows: 0.20 - │ └── TableScan - │ ├── table: default.system.numbers - │ ├── output columns: [number (#3)] - │ ├── read rows: 1 - │ ├── read bytes: 8 - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── push downs: [filters: [numbers.number (#3) + 1 = 1], limit: NONE] - │ └── estimated rows: 1.00 - └── EvalScalar(Probe) - ├── output columns: [b (#10)] - ├── expressions: [numbers.number (#8) + 1] - ├── estimated rows: 1.00 - └── TableScan - ├── table: default.system.numbers - ├── output columns: [number (#8)] - ├── read rows: 1 - ├── read bytes: 8 - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 1.00 + └── TableScan + ├── table: default.system.numbers + ├── output columns: [number (#0)] + ├── read rows: 1 + ├── read bytes: 8 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 1.00 + query T explain select name from system.functions order by example diff --git a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter.test b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_eval_scalar.test similarity index 50% rename from tests/sqllogictests/suites/mode/standalone/explain/push_down_filter.test rename to tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_eval_scalar.test index c339766166b1..43a459a500c7 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_eval_scalar.test @@ -1,83 +1,3 @@ -# push down filter ProjectSet -statement ok -drop table if exists products; - -statement ok -create table products(name varchar, details variant); - -statement ok -insert into products(name, details) values ('Laptop', '{"brand": "Dell", "colors": ["Black", "Silver"], "price": 1200, "features": {"ram": "16GB", "storage": "512GB"}}'), ('Smartphone', '{"brand": "Apple", "colors": ["White", "Black"], "price": 999, "features": {"ram": "4GB", "storage": "128GB"}}'), ('Headphones', '{"brand": "Sony", "colors": ["Black", "Blue", "Red"], "price": 150, "features": {"battery": "20h", "bluetooth": "5.0"}}'); - -query T -explain select name, json_path_query(details, '$.features.*') as all_features, json_path_query_first(details, '$.features.*') as first_feature from products where name = 'Laptop' and first_feature = '16GB' and all_features = '512GB'; ----- -EvalScalar -├── output columns: [products.name (#0), all_features (#3), first_feature (#4)] -├── expressions: [get(1)(json_path_query (#2)), json_path_query_first(products.details (#1), '$.features.*')] -├── estimated rows: 0.12 -└── Filter - ├── output columns: [products.name (#0), products.details (#1), json_path_query (#2)] - ├── filters: [is_true(TRY_CAST(get(1)(json_path_query (#2)) AS String NULL) = '512GB')] - ├── estimated rows: 0.12 - └── ProjectSet - ├── output columns: [products.name (#0), products.details (#1), json_path_query (#2)] - ├── estimated rows: 0.60 - ├── set returning functions: json_path_query(products.details (#1), '$.features.*') - └── Filter - ├── output columns: [products.name (#0), products.details (#1)] - ├── filters: [is_true(products.name (#0) = 'Laptop'), is_true(TRY_CAST(json_path_query_first(products.details (#1), '$.features.*') AS String NULL) = '16GB')] - ├── estimated rows: 0.60 - └── TableScan - ├── table: default.default.products - ├── output columns: [name (#0), details (#1)] - ├── read rows: 3 - ├── read bytes: 328 - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [and_filters(products.name (#0) = 'Laptop', TRY_CAST(json_path_query_first(products.details (#1), '$.features.*') AS String NULL) = '16GB')], limit: NONE] - └── estimated rows: 3.00 - -query T -select name, json_path_query(details, '$.features.*') as all_features, json_path_query_first(details, '$.features.*') as first_feature from products where name = 'Laptop' and first_feature = '16GB' and all_features = '512GB'; ----- -Laptop "512GB" "16GB" - -query T -explain select name, json_path_query(details, '$.features.*') as all_features, json_path_query_first(details, '$.features.*') as first_feature from products where name = 'Laptop' and first_feature = '16GB'; ----- -EvalScalar -├── output columns: [products.name (#0), all_features (#3), first_feature (#4)] -├── expressions: [get(1)(json_path_query (#2)), json_path_query_first(products.details (#1), '$.features.*')] -├── estimated rows: 0.60 -└── ProjectSet - ├── output columns: [products.name (#0), products.details (#1), json_path_query (#2)] - ├── estimated rows: 0.60 - ├── set returning functions: json_path_query(products.details (#1), '$.features.*') - └── Filter - ├── output columns: [products.name (#0), products.details (#1)] - ├── filters: [is_true(products.name (#0) = 'Laptop'), is_true(TRY_CAST(json_path_query_first(products.details (#1), '$.features.*') AS String NULL) = '16GB')] - ├── estimated rows: 0.60 - └── TableScan - ├── table: default.default.products - ├── output columns: [name (#0), details (#1)] - ├── read rows: 3 - ├── read bytes: 328 - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [and_filters(products.name (#0) = 'Laptop', TRY_CAST(json_path_query_first(products.details (#1), '$.features.*') AS String NULL) = '16GB')], limit: NONE] - └── estimated rows: 3.00 - -query T -select name, json_path_query(details, '$.features.*') as all_features, json_path_query_first(details, '$.features.*') as first_feature from products where name = 'Laptop' and first_feature = '16GB'; ----- -Laptop "16GB" "16GB" -Laptop "512GB" "16GB" - -statement ok -drop table products; - # push down filter EvalScalar statement ok drop table if exists t1; @@ -179,15 +99,19 @@ AggregateFinal │ │ ├── output columns: [t2.sid (#1), sum_arg_0 (#4)] │ │ ├── expressions: [if(CAST(is_not_null(t3.val (#2)) AS Boolean NULL), CAST(assume_not_null(t3.val (#2)) AS Int32 NULL), true, 0, NULL)] │ │ ├── estimated rows: 0.00 - │ │ └── TableScan - │ │ ├── table: default.default.t2 - │ │ ├── output columns: [sid (#1), val (#2)] - │ │ ├── read rows: 0 - │ │ ├── read bytes: 0 - │ │ ├── partitions total: 0 - │ │ ├── partitions scanned: 0 - │ │ ├── push downs: [filters: [], limit: NONE] - │ │ └── estimated rows: 0.00 + │ │ └── Filter + │ │ ├── output columns: [t2.sid (#1), t2.val (#2)] + │ │ ├── filters: [is_true(t3.sid (#1) = 1)] + │ │ ├── estimated rows: 0.00 + │ │ └── TableScan + │ │ ├── table: default.default.t2 + │ │ ├── output columns: [sid (#1), val (#2)] + │ │ ├── read rows: 0 + │ │ ├── read bytes: 0 + │ │ ├── partitions total: 0 + │ │ ├── partitions scanned: 0 + │ │ ├── push downs: [filters: [is_true(t2.sid (#1) = 1)], limit: NONE] + │ │ └── estimated rows: 0.00 │ └── Filter(Probe) │ ├── output columns: [t.id (#0)] │ ├── filters: [is_true(t.id (#0) = 1)] @@ -234,44 +158,13 @@ AggregateFinal └── estimated rows: 0.00 statement ok -drop table t1; - -statement ok -drop table t2; - -statement ok -drop view v1; - -statement ok -drop view v2; - -# push down alias filter scan -statement ok -drop table if exists t; +drop table if exists t1; statement ok -create table t (x INT); +drop table if exists t2; statement ok -insert into t(x) values (1), (2); - -query I -explain select * from t as a(id) where a.id > 1; ----- -Filter -├── output columns: [a.x (#0)] -├── filters: [is_true(a.id (#0) > 1)] -├── estimated rows: 0.40 -└── TableScan - ├── table: default.default.t - ├── output columns: [x (#0)] - ├── read rows: 2 - ├── read bytes: 41 - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [is_true(t.x (#0) > 1)], limit: NONE] - └── estimated rows: 2.00 +drop view if exists v1; statement ok -drop table t; +drop view if exists v2; diff --git a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_full_outer.test b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_full_outer.test new file mode 100644 index 000000000000..d01e182e6db7 --- /dev/null +++ b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_full_outer.test @@ -0,0 +1,288 @@ +# push down filter full outer join +statement ok +drop table if exists t1; + +statement ok +drop table if exists t2; + +statement ok +create table t1(a int, b int); + +statement ok +create table t2(a int, b int); + +statement ok +insert into t1 values(null, null), (1, 1), (2, 2), (3, 3); + +statement ok +insert into t2 values(null, null), (1, 1), (2, 2); + +# convert full outer join to left outer join, can propagate t1.a > 0 +query T +explain select * from t1 full outer join t2 on t1.a = t2.a where t1.a > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3)] +├── join type: LEFT OUTER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 4.00 +├── Filter(Build) +│ ├── output columns: [t2.a (#2), t2.b (#3)] +│ ├── filters: [is_true(t2.a (#2) > 0)] +│ ├── estimated rows: 0.60 +│ └── TableScan +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 82 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [is_true(t2.a (#2) > 0)], limit: NONE] +│ └── estimated rows: 3.00 +└── Filter(Probe) + ├── output columns: [t1.a (#0), t1.b (#1)] + ├── filters: [is_true(t1.a (#0) > 0)] + ├── estimated rows: 4.00 + └── TableScan + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 90 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t1.a (#0) > 0)], limit: NONE] + └── estimated rows: 4.00 + +# convert full outer join to right outer join, can propagate t2.a > 0 +query T +explain select * from t1 full outer join t2 on t1.a = t2.a where t2.a > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3)] +├── join type: RIGHT OUTER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 0.80 +├── Filter(Build) +│ ├── output columns: [t2.a (#2), t2.b (#3)] +│ ├── filters: [is_true(t2.a (#2) > 0)] +│ ├── estimated rows: 0.60 +│ └── TableScan +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 82 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [is_true(t2.a (#2) > 0)], limit: NONE] +│ └── estimated rows: 3.00 +└── Filter(Probe) + ├── output columns: [t1.a (#0), t1.b (#1)] + ├── filters: [is_true(t1.a (#0) > 0)] + ├── estimated rows: 4.00 + └── TableScan + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 90 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t1.a (#0) > 0)], limit: NONE] + └── estimated rows: 4.00 + +# convert full outer join to left outer join, can not propagate t1.b > 0 +query T +explain select * from t1 full outer join t2 on t1.a = t2.a where t1.b > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3)] +├── join type: LEFT OUTER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 4.00 +├── TableScan(Build) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 82 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 3.00 +└── Filter(Probe) + ├── output columns: [t1.a (#0), t1.b (#1)] + ├── filters: [is_true(t1.b (#1) > 0)] + ├── estimated rows: 4.00 + └── TableScan + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 90 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t1.b (#1) > 0)], limit: NONE] + └── estimated rows: 4.00 + +# convert full outer join to right outer join, can not propagate t2.b > 0 +query T +explain select * from t1 full outer join t2 on t1.a = t2.a where t2.b > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3)] +├── join type: RIGHT OUTER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 0.80 +├── Filter(Build) +│ ├── output columns: [t2.a (#2), t2.b (#3)] +│ ├── filters: [is_true(t2.b (#3) > 0)] +│ ├── estimated rows: 0.60 +│ └── TableScan +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 82 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [is_true(t2.b (#3) > 0)], limit: NONE] +│ └── estimated rows: 3.00 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 90 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 4.00 + +# convert full outer join to inner join, can propagate t1.a > 0 +query T +explain select * from t1 full outer join t2 on t1.a = t2.a where t1.a > 0 and t2.b > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] +├── join type: INNER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 0.80 +├── Filter(Build) +│ ├── output columns: [t2.a (#2), t2.b (#3)] +│ ├── filters: [is_true(t2.a (#2) > 0), is_true(t2.b (#3) > 0)] +│ ├── estimated rows: 0.60 +│ └── TableScan +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 82 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [and_filters(t2.a (#2) > 0, t2.b (#3) > 0)], limit: NONE] +│ └── estimated rows: 3.00 +└── Filter(Probe) + ├── output columns: [t1.a (#0), t1.b (#1)] + ├── filters: [is_true(t1.a (#0) > 0)] + ├── estimated rows: 4.00 + └── TableScan + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 90 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t1.a (#0) > 0)], limit: NONE] + └── estimated rows: 4.00 + +# convert full outer join to inner join, can propagate t2.a > 0 +query T +explain select * from t1 full outer join t2 on t1.a = t2.a where t2.a > 0 and t1.b > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] +├── join type: INNER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 0.80 +├── Filter(Build) +│ ├── output columns: [t2.a (#2), t2.b (#3)] +│ ├── filters: [is_true(t2.a (#2) > 0)] +│ ├── estimated rows: 0.60 +│ └── TableScan +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 82 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [is_true(t2.a (#2) > 0)], limit: NONE] +│ └── estimated rows: 3.00 +└── Filter(Probe) + ├── output columns: [t1.a (#0), t1.b (#1)] + ├── filters: [is_true(t1.a (#0) > 0), is_true(t1.b (#1) > 0)] + ├── estimated rows: 4.00 + └── TableScan + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 90 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [and_filters(t1.a (#0) > 0, t1.b (#1) > 0)], limit: NONE] + └── estimated rows: 4.00 + +# full outer join, can not push down t2.a > 0 +query T +explain select * from t1 full outer join t2 on t1.a = t2.a and t2.a > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3)] +├── join type: FULL OUTER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [t2.a (#2) > 0] +├── estimated rows: 4.00 +├── TableScan(Build) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 82 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 3.00 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 90 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 4.00 + +statement ok +drop table if exists t1; + +statement ok +drop table if exists t2; diff --git a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_inner.test b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_inner.test new file mode 100644 index 000000000000..fa4ee4f10817 --- /dev/null +++ b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_inner.test @@ -0,0 +1,108 @@ +# push down filter inner join +statement ok +drop table if exists t1; + +statement ok +drop table if exists t2; + +statement ok +create table t1(a int, b int); + +statement ok +create table t2(a int, b int); + +statement ok +insert into t1 values(null, null), (1, 1), (2, 2), (3, 3); + +statement ok +insert into t2 values(null, null), (1, 1), (2, 2); + +# can propagate t1.a > 3 +query T +explain select * from t1 inner join t2 on t1.a = t2.a where t1.a > 3; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] +├── join type: INNER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 0.00 +├── Filter(Build) +│ ├── output columns: [t2.a (#2), t2.b (#3)] +│ ├── filters: [is_true(t2.a (#2) > 3)] +│ ├── estimated rows: 0.60 +│ └── TableScan +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 0 +│ ├── read bytes: 0 +│ ├── partitions total: 1 +│ ├── partitions scanned: 0 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [is_true(t2.a (#2) > 3)], limit: NONE] +│ └── estimated rows: 3.00 +└── Filter(Probe) + ├── output columns: [t1.a (#0), t1.b (#1)] + ├── filters: [is_true(t1.a (#0) > 3)] + ├── estimated rows: 1.00 + └── TableScan + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 0 + ├── read bytes: 0 + ├── partitions total: 1 + ├── partitions scanned: 0 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t1.a (#0) > 3)], limit: NONE] + └── estimated rows: 4.00 + +# can propagate (t2.a > 1 or t2.a <= 2) +query T +explain select * from t1 inner join t2 on t1.a = t2.a where t2.a <= 2 or (t1.a > 1 and t2.a > 1); +---- +Filter +├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] +├── filters: [is_true(t2.a (#2) <= 2 OR t1.a (#0) > 1 AND t2.a (#2) > 1)] +├── estimated rows: 0.45 +└── HashJoin + ├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] + ├── join type: INNER + ├── build keys: [t2.a (#2)] + ├── probe keys: [t1.a (#0)] + ├── filters: [] + ├── estimated rows: 1.26 + ├── Filter(Build) + │ ├── output columns: [t2.a (#2), t2.b (#3)] + │ ├── filters: [is_true(t2.a (#2) <= 2 OR t2.a (#2) > 1)] + │ ├── estimated rows: 1.08 + │ └── TableScan + │ ├── table: default.default.t2 + │ ├── output columns: [a (#2), b (#3)] + │ ├── read rows: 3 + │ ├── read bytes: 82 + │ ├── partitions total: 1 + │ ├── partitions scanned: 1 + │ ├── pruning stats: [segments: , blocks: ] + │ ├── push downs: [filters: [is_true(t2.a (#2) <= 2 OR t2.a (#2) > 1)], limit: NONE] + │ └── estimated rows: 3.00 + └── Filter(Probe) + ├── output columns: [t1.a (#0), t1.b (#1)] + ├── filters: [is_true(t1.a (#0) <= 2 OR t1.a (#0) > 1)] + ├── estimated rows: 3.50 + └── TableScan + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 90 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t1.a (#0) <= 2 OR t1.a (#0) > 1)], limit: NONE] + └── estimated rows: 4.00 + +statement ok +drop table if exists t1; + +statement ok +drop table if exists t2; diff --git a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_left_outer.test b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_left_outer.test new file mode 100644 index 000000000000..ef83fa85610a --- /dev/null +++ b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_left_outer.test @@ -0,0 +1,260 @@ +# push down filter left outer join +statement ok +drop table if exists t1; + +statement ok +drop table if exists t2; + +statement ok +create table t1(a int, b int); + +statement ok +create table t2(a int, b int); + +statement ok +insert into t1 values(null, null), (1, 1), (2, 2), (3, 3); + +statement ok +insert into t2 values(null, null), (1, 1), (2, 2); + +# left outer join, can propagate t1.a > 0 +query T +explain select * from t1 left join t2 on t1.a = t2.a where t1.a > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3)] +├── join type: LEFT OUTER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 4.00 +├── Filter(Build) +│ ├── output columns: [t2.a (#2), t2.b (#3)] +│ ├── filters: [is_true(t2.a (#2) > 0)] +│ ├── estimated rows: 0.60 +│ └── TableScan +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 82 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [is_true(t2.a (#2) > 0)], limit: NONE] +│ └── estimated rows: 3.00 +└── Filter(Probe) + ├── output columns: [t1.a (#0), t1.b (#1)] + ├── filters: [is_true(t1.a (#0) > 0)] + ├── estimated rows: 4.00 + └── TableScan + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 90 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t1.a (#0) > 0)], limit: NONE] + └── estimated rows: 4.00 + +# left outer join, can not push down t1.a > 0 +query T +explain select * from t1 left join t2 on t1.a = t2.a and t1.a > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3)] +├── join type: LEFT OUTER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [t1.a (#0) > 0] +├── estimated rows: 4.00 +├── TableScan(Build) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 82 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 3.00 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 90 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 4.00 + +# left outer join, can not propagate t1.b > 0 +query T +explain select * from t1 left join t2 on t1.a = t2.a where t1.b > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3)] +├── join type: LEFT OUTER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 4.00 +├── TableScan(Build) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 82 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 3.00 +└── Filter(Probe) + ├── output columns: [t1.a (#0), t1.b (#1)] + ├── filters: [is_true(t1.b (#1) > 0)] + ├── estimated rows: 4.00 + └── TableScan + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 90 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t1.b (#1) > 0)], limit: NONE] + └── estimated rows: 4.00 + +# convert left outer join to inner join, can propagate t2.a > 0 +query T +explain select * from t1 left join t2 on t1.a = t2.a where t2.a > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] +├── join type: INNER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 0.80 +├── Filter(Build) +│ ├── output columns: [t2.a (#2), t2.b (#3)] +│ ├── filters: [is_true(t2.a (#2) > 0)] +│ ├── estimated rows: 0.60 +│ └── TableScan +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 82 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [is_true(t2.a (#2) > 0)], limit: NONE] +│ └── estimated rows: 3.00 +└── Filter(Probe) + ├── output columns: [t1.a (#0), t1.b (#1)] + ├── filters: [is_true(t1.a (#0) > 0)] + ├── estimated rows: 4.00 + └── TableScan + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 90 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t1.a (#0) > 0)], limit: NONE] + └── estimated rows: 4.00 + +# convert left outer join to inner join, can not propagate t2.b > 0 +query T +explain select * from t1 left join t2 on t1.a = t2.a where t2.b > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] +├── join type: INNER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 0.80 +├── Filter(Build) +│ ├── output columns: [t2.a (#2), t2.b (#3)] +│ ├── filters: [is_true(t2.b (#3) > 0)] +│ ├── estimated rows: 0.60 +│ └── TableScan +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 82 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [is_true(t2.b (#3) > 0)], limit: NONE] +│ └── estimated rows: 3.00 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 90 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 4.00 + +statement ok +drop table if exists t1; + +statement ok +drop table if exists t2; + +statement ok +drop table if exists m1; + +statement ok +drop table if exists j1; + +statement ok +create table m1(id varchar, "context" varchar); + +statement ok +create table j1(id varchar); + +# In `can_filter_null`, if the function is `assume_not_null` or `remove_nullable`, we cannot replace the column bindings with `Scalar::Null`. +query T +explain WITH base AS (SELECT id, context FROM m1), src1 AS (SELECT base.id FROM base WHERE IFNULL(base.context, '') = ''), join1 AS (SELECT id FROM j1) SELECT src1.id FROM src1 LEFT OUTER JOIN join1 ON TRUE; +---- +HashJoin +├── output columns: [m1.id (#0)] +├── join type: LEFT OUTER +├── build keys: [] +├── probe keys: [] +├── filters: [] +├── estimated rows: 0.00 +├── TableScan(Build) +│ ├── table: default.default.j1 +│ ├── output columns: [] +│ ├── read rows: 0 +│ ├── read size: 0 +│ ├── partitions total: 0 +│ ├── partitions scanned: 0 +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 0.00 +└── Filter(Probe) + ├── output columns: [m1.id (#0)] + ├── filters: [is_true(if(CAST(is_not_null(base.context (#1)) AS Boolean NULL), CAST(assume_not_null(base.context (#1)) AS String NULL), true, '', NULL) = '')] + ├── estimated rows: 0.00 + └── TableScan + ├── table: default.default.m1 + ├── output columns: [id (#0), context (#1)] + ├── read rows: 0 + ├── read size: 0 + ├── partitions total: 0 + ├── partitions scanned: 0 + ├── push downs: [filters: [is_true(if(CAST(is_not_null(m1.context (#1)) AS Boolean NULL), CAST(assume_not_null(m1.context (#1)) AS String NULL), true, '', NULL) = '')], limit: NONE] + └── estimated rows: 0.00 + +statement ok +drop table if exists m1; + +statement ok +drop table if exists j1; diff --git a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_semi_anti.test b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_semi_anti.test new file mode 100644 index 000000000000..3b8c586910b0 --- /dev/null +++ b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_semi_anti.test @@ -0,0 +1,104 @@ +# push down filter semi join +statement ok +drop table if exists t1; + +statement ok +drop table if exists t2; + +statement ok +create table t1(a int, b int); + +statement ok +create table t2(a int, b int); + +statement ok +insert into t1 values(null, null), (1, 1), (2, 2), (3, 3); + +statement ok +insert into t2 values(null, null), (1, 1), (2, 2); + +# left semi, can propagate t1.a > 3 +query T +explain select t1.a from t1 where exists (select * from t2 where t1.a = t2.a) and t1.a > 3; +---- +HashJoin +├── output columns: [t1.a (#0)] +├── join type: LEFT SEMI +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 0.00 +├── Filter(Build) +│ ├── output columns: [t2.a (#2)] +│ ├── filters: [is_true(t2.a (#2) > 3)] +│ ├── estimated rows: 0.60 +│ └── TableScan +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2)] +│ ├── read rows: 0 +│ ├── read bytes: 0 +│ ├── partitions total: 1 +│ ├── partitions scanned: 0 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [is_true(t2.a (#2) > 3)], limit: NONE] +│ └── estimated rows: 3.00 +└── Filter(Probe) + ├── output columns: [t1.a (#0)] + ├── filters: [is_true(t1.a (#0) > 3)] + ├── estimated rows: 1.00 + └── TableScan + ├── table: default.default.t1 + ├── output columns: [a (#0)] + ├── read rows: 0 + ├── read bytes: 0 + ├── partitions total: 1 + ├── partitions scanned: 0 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t1.a (#0) > 3)], limit: NONE] + └── estimated rows: 4.00 + +# left anti, can propagate t1.a > 3 +query T +explain select t1.a from t1 where not exists (select * from t2 where t1.a = t2.a) and t1.a > 3; +---- +HashJoin +├── output columns: [t1.a (#0)] +├── join type: LEFT ANTI +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 1.00 +├── Filter(Build) +│ ├── output columns: [t2.a (#2)] +│ ├── filters: [is_true(t2.a (#2) > 3)] +│ ├── estimated rows: 0.60 +│ └── TableScan +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2)] +│ ├── read rows: 0 +│ ├── read bytes: 0 +│ ├── partitions total: 1 +│ ├── partitions scanned: 0 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [is_true(t2.a (#2) > 3)], limit: NONE] +│ └── estimated rows: 3.00 +└── Filter(Probe) + ├── output columns: [t1.a (#0)] + ├── filters: [is_true(t1.a (#0) > 3)] + ├── estimated rows: 1.00 + └── TableScan + ├── table: default.default.t1 + ├── output columns: [a (#0)] + ├── read rows: 0 + ├── read bytes: 0 + ├── partitions total: 1 + ├── partitions scanned: 0 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t1.a (#0) > 3)], limit: NONE] + └── estimated rows: 4.00 + +statement ok +drop table if exists t1; + +statement ok +drop table if exists t2; diff --git a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_project_set.test b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_project_set.test new file mode 100644 index 000000000000..d068b6b420a8 --- /dev/null +++ b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_project_set.test @@ -0,0 +1,79 @@ +# push down filter ProjectSet +statement ok +drop table if exists products; + +statement ok +create table products(name varchar, details variant); + +statement ok +insert into products(name, details) values ('Laptop', '{"brand": "Dell", "colors": ["Black", "Silver"], "price": 1200, "features": {"ram": "16GB", "storage": "512GB"}}'), ('Smartphone', '{"brand": "Apple", "colors": ["White", "Black"], "price": 999, "features": {"ram": "4GB", "storage": "128GB"}}'), ('Headphones', '{"brand": "Sony", "colors": ["Black", "Blue", "Red"], "price": 150, "features": {"battery": "20h", "bluetooth": "5.0"}}'); + +query T +explain select name, json_path_query(details, '$.features.*') as all_features, json_path_query_first(details, '$.features.*') as first_feature from products where name = 'Laptop' and first_feature = '16GB' and all_features = '512GB'; +---- +EvalScalar +├── output columns: [products.name (#0), all_features (#3), first_feature (#4)] +├── expressions: [get(1)(json_path_query (#2)), json_path_query_first(products.details (#1), '$.features.*')] +├── estimated rows: 0.12 +└── Filter + ├── output columns: [products.name (#0), products.details (#1), json_path_query (#2)] + ├── filters: [is_true(TRY_CAST(get(1)(json_path_query (#2)) AS String NULL) = '512GB')] + ├── estimated rows: 0.12 + └── ProjectSet + ├── output columns: [products.name (#0), products.details (#1), json_path_query (#2)] + ├── estimated rows: 0.60 + ├── set returning functions: json_path_query(products.details (#1), '$.features.*') + └── Filter + ├── output columns: [products.name (#0), products.details (#1)] + ├── filters: [is_true(products.name (#0) = 'Laptop'), is_true(TRY_CAST(json_path_query_first(products.details (#1), '$.features.*') AS String NULL) = '16GB')] + ├── estimated rows: 0.60 + └── TableScan + ├── table: default.default.products + ├── output columns: [name (#0), details (#1)] + ├── read rows: 3 + ├── read bytes: 328 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [and_filters(products.name (#0) = 'Laptop', TRY_CAST(json_path_query_first(products.details (#1), '$.features.*') AS String NULL) = '16GB')], limit: NONE] + └── estimated rows: 3.00 + +query T +select name, json_path_query(details, '$.features.*') as all_features, json_path_query_first(details, '$.features.*') as first_feature from products where name = 'Laptop' and first_feature = '16GB' and all_features = '512GB'; +---- +Laptop "512GB" "16GB" + +query T +explain select name, json_path_query(details, '$.features.*') as all_features, json_path_query_first(details, '$.features.*') as first_feature from products where name = 'Laptop' and first_feature = '16GB'; +---- +EvalScalar +├── output columns: [products.name (#0), all_features (#3), first_feature (#4)] +├── expressions: [get(1)(json_path_query (#2)), json_path_query_first(products.details (#1), '$.features.*')] +├── estimated rows: 0.60 +└── ProjectSet + ├── output columns: [products.name (#0), products.details (#1), json_path_query (#2)] + ├── estimated rows: 0.60 + ├── set returning functions: json_path_query(products.details (#1), '$.features.*') + └── Filter + ├── output columns: [products.name (#0), products.details (#1)] + ├── filters: [is_true(products.name (#0) = 'Laptop'), is_true(TRY_CAST(json_path_query_first(products.details (#1), '$.features.*') AS String NULL) = '16GB')] + ├── estimated rows: 0.60 + └── TableScan + ├── table: default.default.products + ├── output columns: [name (#0), details (#1)] + ├── read rows: 3 + ├── read bytes: 328 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [and_filters(products.name (#0) = 'Laptop', TRY_CAST(json_path_query_first(products.details (#1), '$.features.*') AS String NULL) = '16GB')], limit: NONE] + └── estimated rows: 3.00 + +query T +select name, json_path_query(details, '$.features.*') as all_features, json_path_query_first(details, '$.features.*') as first_feature from products where name = 'Laptop' and first_feature = '16GB'; +---- +Laptop "16GB" "16GB" +Laptop "512GB" "16GB" + +statement ok +drop table if exists products; diff --git a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_scan.test b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_scan.test new file mode 100644 index 000000000000..00ae8d2eeee9 --- /dev/null +++ b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_scan.test @@ -0,0 +1,30 @@ +# push down alias filter scan +statement ok +drop table if exists t; + +statement ok +create table t (x INT); + +statement ok +insert into t(x) values (1), (2); + +query I +explain select * from t as a(id) where a.id > 1; +---- +Filter +├── output columns: [a.x (#0)] +├── filters: [is_true(a.id (#0) > 1)] +├── estimated rows: 0.40 +└── TableScan + ├── table: default.default.t + ├── output columns: [x (#0)] + ├── read rows: 2 + ├── read bytes: 41 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t.x (#0) > 1)], limit: NONE] + └── estimated rows: 2.00 + +statement ok +drop table if exists t; diff --git a/tests/sqllogictests/suites/mode/standalone/explain/select.test b/tests/sqllogictests/suites/mode/standalone/explain/select.test index 9736c107cffa..5d7775e8c691 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/select.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/select.test @@ -16,7 +16,7 @@ explain select * from (select * from numbers(1)) as t1 where number = 1 ---- Filter ├── output columns: [numbers.number (#0)] -├── filters: [numbers.number (#0) = 1] +├── filters: [t1.number (#0) = 1] ├── estimated rows: 0.00 └── TableScan ├── table: default.system.numbers diff --git a/tests/sqllogictests/suites/mode/standalone/explain/sort.test b/tests/sqllogictests/suites/mode/standalone/explain/sort.test index 1076a7fe9057..3c78ce7e1726 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/sort.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/sort.test @@ -10,7 +10,7 @@ Sort ├── estimated rows: 0.00 └── Filter ├── output columns: [t1.a (#0)] - ├── filters: [is_true(t1.a (#0) > 1)] + ├── filters: [is_true(t2.a (#0) > 1)] ├── estimated rows: 0.00 └── TableScan ├── table: default.default.t1 diff --git a/tests/sqllogictests/suites/mode/standalone/explain/subquery.test b/tests/sqllogictests/suites/mode/standalone/explain/subquery.test index ac54f587ed08..b91ba286213a 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/subquery.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/subquery.test @@ -3,13 +3,13 @@ explain select t.number from numbers(1) as t, numbers(1) as t1 where t.number = ---- Filter ├── output columns: [t.number (#0)] -├── filters: [is_true(CAST(t.number (#0) AS UInt64 NULL) = if(CAST(is_not_null(scalar_subquery_4 (#4)) AS Boolean NULL), scalar_subquery_4 (#4), 0))] +├── filters: [is_true(CAST(t.number (#0) AS UInt64 NULL) = if(true, TRY_CAST(scalar_subquery_4 (#4) AS UInt64 NULL), 0))] ├── estimated rows: 0.20 └── HashJoin ├── output columns: [t.number (#0), COUNT(*) (#4)] - ├── join type: LEFT SINGLE + ├── join type: INNER ├── build keys: [number (#2)] - ├── probe keys: [CAST(number (#0) AS UInt64 NULL)] + ├── probe keys: [number (#0)] ├── filters: [] ├── estimated rows: 1.00 ├── AggregateFinal(Build) @@ -164,52 +164,52 @@ explain select t.number from numbers(1) as t where number = (select * from numbe HashJoin ├── output columns: [t.number (#0)] ├── join type: INNER -├── build keys: [CAST(t.number (#0) AS UInt64 NULL)] -├── probe keys: [_if_scalar_subquery (#4)] +├── build keys: [_if_scalar_subquery (#4)] +├── probe keys: [CAST(t.number (#0) AS UInt64 NULL)] ├── filters: [] ├── estimated rows: 1.00 -├── TableScan(Build) -│ ├── table: default.system.numbers -│ ├── output columns: [number (#0)] -│ ├── read rows: 1 -│ ├── read bytes: 8 -│ ├── partitions total: 1 -│ ├── partitions scanned: 1 -│ ├── push downs: [filters: [], limit: NONE] -│ └── estimated rows: 1.00 -└── EvalScalar(Probe) - ├── output columns: [_if_scalar_subquery (#4)] - ├── expressions: [if(CAST(_count_scalar_subquery (#2) = 0 AS Boolean NULL), NULL, _any_scalar_subquery (#3))] - ├── estimated rows: 1.00 - └── Limit - ├── output columns: [_count_scalar_subquery (#2), _any_scalar_subquery (#3)] - ├── limit: 1 - ├── offset: 0 - ├── estimated rows: 1.00 - └── AggregateFinal - ├── output columns: [_count_scalar_subquery (#2), _any_scalar_subquery (#3)] - ├── group by: [] - ├── aggregate functions: [count(), any(number)] - ├── limit: 1 - ├── estimated rows: 1.00 - └── AggregatePartial - ├── output columns: [_count_scalar_subquery (#2), _any_scalar_subquery (#3)] - ├── group by: [] - ├── aggregate functions: [count(), any(number)] - ├── estimated rows: 1.00 - └── Filter - ├── output columns: [numbers.number (#1)] - ├── filters: [numbers.number (#1) = 0] - ├── estimated rows: 0.00 - └── TableScan - ├── table: default.system.numbers - ├── output columns: [number (#1)] - ├── read rows: 1 - ├── read bytes: 8 - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── push downs: [filters: [numbers.number (#1) = 0], limit: NONE] - └── estimated rows: 1.00 +├── EvalScalar(Build) +│ ├── output columns: [_if_scalar_subquery (#4)] +│ ├── expressions: [if(CAST(_count_scalar_subquery (#2) = 0 AS Boolean NULL), NULL, _any_scalar_subquery (#3))] +│ ├── estimated rows: 1.00 +│ └── Limit +│ ├── output columns: [_count_scalar_subquery (#2), _any_scalar_subquery (#3)] +│ ├── limit: 1 +│ ├── offset: 0 +│ ├── estimated rows: 1.00 +│ └── AggregateFinal +│ ├── output columns: [_count_scalar_subquery (#2), _any_scalar_subquery (#3)] +│ ├── group by: [] +│ ├── aggregate functions: [count(), any(number)] +│ ├── limit: 1 +│ ├── estimated rows: 1.00 +│ └── AggregatePartial +│ ├── output columns: [_count_scalar_subquery (#2), _any_scalar_subquery (#3)] +│ ├── group by: [] +│ ├── aggregate functions: [count(), any(number)] +│ ├── estimated rows: 1.00 +│ └── Filter +│ ├── output columns: [numbers.number (#1)] +│ ├── filters: [numbers.number (#1) = 0] +│ ├── estimated rows: 0.00 +│ └── TableScan +│ ├── table: default.system.numbers +│ ├── output columns: [number (#1)] +│ ├── read rows: 1 +│ ├── read bytes: 8 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── push downs: [filters: [numbers.number (#1) = 0], limit: NONE] +│ └── estimated rows: 1.00 +└── TableScan(Probe) + ├── table: default.system.numbers + ├── output columns: [number (#0)] + ├── read rows: 1 + ├── read bytes: 8 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 1.00 query T explain select t.number from numbers(1) as t where exists (select * from numbers(1) where number = t.number) @@ -310,7 +310,7 @@ HashJoin ├── estimated rows: 0.00 ├── Filter(Build) │ ├── output columns: [numbers.number (#1)] -│ ├── filters: [numbers.number (#1) = 0] +│ ├── filters: [numbers.number (#1) < 10, numbers.number (#1) = 0] │ ├── estimated rows: 0.00 │ └── TableScan │ ├── table: default.system.numbers @@ -319,7 +319,7 @@ HashJoin │ ├── read bytes: 8 │ ├── partitions total: 1 │ ├── partitions scanned: 1 -│ ├── push downs: [filters: [numbers.number (#1) = 0], limit: NONE] +│ ├── push downs: [filters: [and_filters(and_filters(numbers.number (#1) = 0, numbers.number (#1) < 10), numbers.number (#1) = 0)], limit: NONE] │ └── estimated rows: 1.00 └── Filter(Probe) ├── output columns: [t.number (#0)] @@ -396,65 +396,61 @@ HashJoin query T explain select t.number from numbers(1) as t, numbers(1) as t1 where (select count(*) = 1 from numbers(1) where t.number = number) and t.number = t1.number ---- -Filter +HashJoin ├── output columns: [t.number (#0)] -├── filters: [is_true(try_to_boolean(if(CAST(is_not_null(scalar_subquery_4 (#4)) AS Boolean NULL), TRY_CAST(scalar_subquery_4 (#4) AS UInt64 NULL), 0)))] +├── join type: INNER +├── build keys: [number (#2)] +├── probe keys: [number (#0)] +├── filters: [] ├── estimated rows: 0.20 -└── HashJoin - ├── output columns: [t.number (#0), (count(*) = 1) (#4)] - ├── join type: LEFT SINGLE - ├── build keys: [number (#2)] - ├── probe keys: [CAST(number (#0) AS UInt64 NULL)] +├── Filter(Build) +│ ├── output columns: [numbers.number (#2)] +│ ├── filters: [is_true(try_to_boolean(if(true, TRY_CAST(COUNT(*) (#3) = 1 AS UInt64 NULL), 0)))] +│ ├── estimated rows: 0.20 +│ └── AggregateFinal +│ ├── output columns: [COUNT(*) (#3), numbers.number (#2)] +│ ├── group by: [number] +│ ├── aggregate functions: [count()] +│ ├── estimated rows: 1.00 +│ └── AggregatePartial +│ ├── output columns: [COUNT(*) (#3), #_group_by_key] +│ ├── group by: [number] +│ ├── aggregate functions: [count()] +│ ├── estimated rows: 1.00 +│ └── TableScan +│ ├── table: default.system.numbers +│ ├── output columns: [number (#2)] +│ ├── read rows: 1 +│ ├── read bytes: 8 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 1.00 +└── HashJoin(Probe) + ├── output columns: [t.number (#0)] + ├── join type: INNER + ├── build keys: [t1.number (#1)] + ├── probe keys: [t.number (#0)] ├── filters: [] ├── estimated rows: 1.00 - ├── EvalScalar(Build) - │ ├── output columns: [numbers.number (#2), (count(*) = 1) (#4)] - │ ├── expressions: [COUNT(*) (#3) = 1] - │ ├── estimated rows: 1.00 - │ └── AggregateFinal - │ ├── output columns: [COUNT(*) (#3), numbers.number (#2)] - │ ├── group by: [number] - │ ├── aggregate functions: [count()] - │ ├── estimated rows: 1.00 - │ └── AggregatePartial - │ ├── output columns: [COUNT(*) (#3), #_group_by_key] - │ ├── group by: [number] - │ ├── aggregate functions: [count()] - │ ├── estimated rows: 1.00 - │ └── TableScan - │ ├── table: default.system.numbers - │ ├── output columns: [number (#2)] - │ ├── read rows: 1 - │ ├── read bytes: 8 - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── push downs: [filters: [], limit: NONE] - │ └── estimated rows: 1.00 - └── HashJoin(Probe) - ├── output columns: [t.number (#0)] - ├── join type: INNER - ├── build keys: [t1.number (#1)] - ├── probe keys: [t.number (#0)] - ├── filters: [] - ├── estimated rows: 1.00 - ├── TableScan(Build) - │ ├── table: default.system.numbers - │ ├── output columns: [number (#1)] - │ ├── read rows: 1 - │ ├── read bytes: 8 - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── push downs: [filters: [], limit: NONE] - │ └── estimated rows: 1.00 - └── TableScan(Probe) - ├── table: default.system.numbers - ├── output columns: [number (#0)] - ├── read rows: 1 - ├── read bytes: 8 - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 1.00 + ├── TableScan(Build) + │ ├── table: default.system.numbers + │ ├── output columns: [number (#1)] + │ ├── read rows: 1 + │ ├── read bytes: 8 + │ ├── partitions total: 1 + │ ├── partitions scanned: 1 + │ ├── push downs: [filters: [], limit: NONE] + │ └── estimated rows: 1.00 + └── TableScan(Probe) + ├── table: default.system.numbers + ├── output columns: [number (#0)] + ├── read rows: 1 + ├── read bytes: 8 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 1.00 query T explain select t.number from numbers(1) as t where exists(select * from numbers(1) as t1 where t.number > t1.number) and not exists(select * from numbers(1) as t1 where t.number < t1.number) @@ -506,7 +502,7 @@ explain select * from (select number as a from numbers(10)) as t(b) where t.b > ---- Filter ├── output columns: [numbers.number (#0)] -├── filters: [numbers.number (#0) > 5] +├── filters: [t.b (#0) > 5] ├── estimated rows: 0.00 └── TableScan ├── table: default.system.numbers diff --git a/tests/sqllogictests/suites/mode/standalone/explain/window.test b/tests/sqllogictests/suites/mode/standalone/explain/window.test index 73c33d8eccc3..c436b9018da3 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/window.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/window.test @@ -64,7 +64,7 @@ explain SELECT k, v FROM (SELECT *, rank() OVER (PARTITION BY k ORDER BY v DESC) ---- Filter ├── output columns: [test.k (#0), test.v (#1)] -├── filters: [rank() OVER (PARTITION BY k ORDER BY v DESC) (#4) = 1] +├── filters: [t2.rank (#4) = 1] ├── estimated rows: 0.00 └── Window ├── output columns: [test.k (#0), test.v (#1), rank() OVER (PARTITION BY k ORDER BY v DESC) (#4)] @@ -108,35 +108,35 @@ explain SELECT k, v FROM (SELECT *, rank() OVER (PARTITION BY v ORDER BY v DESC) ---- Filter ├── output columns: [test.k (#0), test.v (#1)] -├── filters: [rank() OVER (PARTITION BY v ORDER BY v DESC) (#4) = 1, is_true(t1.k (#0) = 12)] +├── filters: [t2.rank (#4) = 1, is_true(t2.k (#0) = 12)] ├── estimated rows: 0.00 └── Window - ├── output columns: [test.k (#0), test.v (#1), rank() OVER (PARTITION BY v ORDER BY v DESC) (#4)] - ├── aggregate function: [rank] - ├── partition by: [v] - ├── order by: [v] - ├── frame: [Range: Preceding(None) ~ CurrentRow] - └── UnionAll - ├── output columns: [test.k (#0), test.v (#1)] - ├── estimated rows: 0.00 - ├── TableScan - │ ├── table: default.test_explain_window.test - │ ├── output columns: [k (#0), v (#1)] - │ ├── read rows: 0 - │ ├── read bytes: 0 - │ ├── partitions total: 0 - │ ├── partitions scanned: 0 - │ ├── push downs: [filters: [], limit: NONE] - │ └── estimated rows: 0.00 - └── TableScan - ├── table: default.test_explain_window.test - ├── output columns: [k (#2), v (#3)] - ├── read rows: 0 - ├── read bytes: 0 - ├── partitions total: 0 - ├── partitions scanned: 0 - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 0.00 + ├── output columns: [test.k (#0), test.v (#1), rank() OVER (PARTITION BY v ORDER BY v DESC) (#4)] + ├── aggregate function: [rank] + ├── partition by: [v] + ├── order by: [v] + ├── frame: [Range: Preceding(None) ~ CurrentRow] + └── UnionAll + ├── output columns: [test.k (#0), test.v (#1)] + ├── estimated rows: 0.00 + ├── TableScan + │ ├── table: default.test_explain_window.test + │ ├── output columns: [k (#0), v (#1)] + │ ├── read rows: 0 + │ ├── read bytes: 0 + │ ├── partitions total: 0 + │ ├── partitions scanned: 0 + │ ├── push downs: [filters: [], limit: NONE] + │ └── estimated rows: 0.00 + └── TableScan + ├── table: default.test_explain_window.test + ├── output columns: [k (#2), v (#3)] + ├── read rows: 0 + ├── read bytes: 0 + ├── partitions total: 0 + ├── partitions scanned: 0 + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 0.00 # cannot push down filter in window function query T @@ -144,7 +144,7 @@ explain SELECT k, v FROM (SELECT *, rank() OVER (ORDER BY v DESC) AS rank FROM ( ---- Filter ├── output columns: [test.k (#0), test.v (#1)] -├── filters: [rank() OVER (ORDER BY v DESC) (#4) = 1, is_true(t1.k (#0) = 12)] +├── filters: [t2.rank (#4) = 1, is_true(t2.k (#0) = 12)] ├── estimated rows: 0.00 └── Window ├── output columns: [test.k (#0), test.v (#1), rank() OVER (ORDER BY v DESC) (#4)] @@ -285,5 +285,91 @@ Filter ├── push downs: [filters: [], limit: NONE] └── estimated rows: 0.00 +# test push down limit to window function +statement ok +drop table if exists t + +statement ok +create table t(a int) + +statement ok +insert into t values(1), (2), (3), (3) + +# range frame (can not push down limit) +query T +explain pipeline select a, sum(a) over (partition by a order by a desc) from t limit 3 +---- +CompoundBlockOperator(Project) × 1 processor + LimitTransform × 1 processor + Transform Window × 1 processor + Merge (TransformSortMerge × 4 processors) to (Transform Window × 1) + TransformSortMerge × 4 processors + SortPartialTransform × 4 processors + Merge (DeserializeDataTransform × 1 processor) to (SortPartialTransform × 4) + DeserializeDataTransform × 1 processor + SyncReadParquetDataSource × 1 processor + +# range frame with ranking function (can push down limit) +query T +explain pipeline select a, dense_rank() over (partition by a order by a desc) from t limit 3 +---- +CompoundBlockOperator(Project) × 1 processor + LimitTransform × 1 processor + Transform Window × 1 processor + Merge (TransformSortMergeLimit × 4 processors) to (Transform Window × 1) + TransformSortMergeLimit × 4 processors + SortPartialTransform × 4 processors + Merge (DeserializeDataTransform × 1 processor) to (SortPartialTransform × 4) + DeserializeDataTransform × 1 processor + SyncReadParquetDataSource × 1 processor + +# rows frame single window (can push down limit) +query T +explain pipeline select a, sum(a) over (partition by a order by a desc rows between unbounded preceding and current row) from t limit 3 +---- +CompoundBlockOperator(Project) × 1 processor + LimitTransform × 1 processor + Transform Window × 1 processor + Merge (TransformSortMergeLimit × 4 processors) to (Transform Window × 1) + TransformSortMergeLimit × 4 processors + SortPartialTransform × 4 processors + Merge (DeserializeDataTransform × 1 processor) to (SortPartialTransform × 4) + DeserializeDataTransform × 1 processor + SyncReadParquetDataSource × 1 processor + +# rows frame single window (can not push down limit) +query T +explain pipeline select a, sum(a) over (partition by a order by a desc rows between unbounded preceding and unbounded following) from t limit 3 +---- +CompoundBlockOperator(Project) × 1 processor + LimitTransform × 1 processor + Transform Window × 1 processor + Merge (TransformSortMerge × 4 processors) to (Transform Window × 1) + TransformSortMerge × 4 processors + SortPartialTransform × 4 processors + Merge (DeserializeDataTransform × 1 processor) to (SortPartialTransform × 4) + DeserializeDataTransform × 1 processor + SyncReadParquetDataSource × 1 processor + +# rows frame multi window (can not push down limit) +query T +explain pipeline select a, sum(a) over (partition by a order by a desc rows between unbounded preceding and current row), +avg(a) over (order by a rows between unbounded preceding and current row) from t limit 3 +---- +CompoundBlockOperator(Project) × 1 processor + LimitTransform × 1 processor + Transform Window × 1 processor + Merge (TransformSortMerge × 4 processors) to (Transform Window × 1) + TransformSortMerge × 4 processors + SortPartialTransform × 4 processors + Merge (Transform Window × 1 processor) to (SortPartialTransform × 4) + Transform Window × 1 processor + Merge (TransformSortMerge × 4 processors) to (Transform Window × 1) + TransformSortMerge × 4 processors + SortPartialTransform × 4 processors + Merge (DeserializeDataTransform × 1 processor) to (SortPartialTransform × 4) + DeserializeDataTransform × 1 processor + SyncReadParquetDataSource × 1 processor + statement ok DROP DATABASE test_explain_window; diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/bloom_filter.test b/tests/sqllogictests/suites/mode/standalone/explain_native/bloom_filter.test index 575bd782b6f5..52d1cbc23ef0 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/bloom_filter.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/bloom_filter.test @@ -1,4 +1,7 @@ # This case depends on explain(standalone mode), thus we put it here +statement ok +drop table if exists bloom_test_t; + statement ok create table bloom_test_t(c1 int, c2 int) diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/explain.test b/tests/sqllogictests/suites/mode/standalone/explain_native/explain.test index e1b7b1e6de88..1e366ae86672 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/explain.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/explain.test @@ -679,54 +679,50 @@ create table t3 as select number as a, number as b from numbers(10) query T explain select * from t1,t2, t3 where (t1.a > 1 and t2.a > 2) or (t1.b < 3 and t2.b < 4) or t3.a = 2 ---- -Filter +HashJoin ├── output columns: [t3.a (#4), t3.b (#5), t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] +├── join type: INNER +├── build keys: [] +├── probe keys: [] ├── filters: [t1.a (#0) > 1 AND t2.a (#2) > 2 OR t1.b (#1) < 3 AND t2.b (#3) < 4 OR t3.a (#4) = 2] -├── estimated rows: 21.20 -└── HashJoin - ├── output columns: [t3.a (#4), t3.b (#5), t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] - ├── join type: CROSS - ├── build keys: [] - ├── probe keys: [] - ├── filters: [] - ├── estimated rows: 50.00 - ├── HashJoin(Build) - │ ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] - │ ├── join type: CROSS - │ ├── build keys: [] - │ ├── probe keys: [] - │ ├── filters: [] - │ ├── estimated rows: 5.00 - │ ├── TableScan(Build) - │ │ ├── table: default.default.t1 - │ │ ├── output columns: [a (#0), b (#1)] - │ │ ├── read rows: 1 - │ │ ├── read bytes: 36 - │ │ ├── partitions total: 1 - │ │ ├── partitions scanned: 1 - │ │ ├── pruning stats: [segments: , blocks: ] - │ │ ├── push downs: [filters: [], limit: NONE] - │ │ └── estimated rows: 1.00 - │ └── TableScan(Probe) - │ ├── table: default.default.t2 - │ ├── output columns: [a (#2), b (#3)] - │ ├── read rows: 5 - │ ├── read bytes: 68 - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── pruning stats: [segments: , blocks: ] - │ ├── push downs: [filters: [], limit: NONE] - │ └── estimated rows: 5.00 - └── TableScan(Probe) - ├── table: default.default.t3 - ├── output columns: [a (#4), b (#5)] - ├── read rows: 10 - ├── read bytes: 108 - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 10.00 +├── estimated rows: 50.00 +├── HashJoin(Build) +│ ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] +│ ├── join type: CROSS +│ ├── build keys: [] +│ ├── probe keys: [] +│ ├── filters: [] +│ ├── estimated rows: 5.00 +│ ├── TableScan(Build) +│ │ ├── table: default.default.t1 +│ │ ├── output columns: [a (#0), b (#1)] +│ │ ├── read rows: 1 +│ │ ├── read bytes: 36 +│ │ ├── partitions total: 1 +│ │ ├── partitions scanned: 1 +│ │ ├── pruning stats: [segments: , blocks: ] +│ │ ├── push downs: [filters: [], limit: NONE] +│ │ └── estimated rows: 1.00 +│ └── TableScan(Probe) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 5 +│ ├── read bytes: 68 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 5.00 +└── TableScan(Probe) + ├── table: default.default.t3 + ├── output columns: [a (#4), b (#5)] + ├── read rows: 10 + ├── read bytes: 108 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 10.00 query T explain select * from t1,t2, t3 where ((t1.a > 1 and t2.a > 2) or (t1.b < 3 and t2.b < 4)) and t3.a > 1 diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/infer_filter.test b/tests/sqllogictests/suites/mode/standalone/explain_native/infer_filter.test index d36017e381c5..f91b67c54fa9 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/infer_filter.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/infer_filter.test @@ -462,7 +462,7 @@ TableScan ├── read bytes: 0 ├── partitions total: 0 ├── partitions scanned: 0 -├── push downs: [filters: [and_filters(t1.a (#0) > 10, t1.a (#0) <= 100)], limit: NONE] +├── push downs: [filters: [and_filters(t1.a (#0) <= 100, t1.a (#0) > 10)], limit: NONE] └── estimated rows: 0.00 # t1.a > 10 and t2.a > 10 @@ -500,46 +500,46 @@ query T explain select * from t1, t2, t3 where t1.a = t2.a and t1.a = t3.a and t1.a > 5 and t3.a < 10; ---- HashJoin -├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2), t3.b (#5), t3.a (#4)] +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3), t3.b (#5), t3.a (#4)] ├── join type: INNER ├── build keys: [t3.a (#4)] ├── probe keys: [t1.a (#0)] ├── filters: [] ├── estimated rows: 0.00 -├── TableScan(Build) -│ ├── table: default.default.t3 -│ ├── output columns: [a (#4), b (#5)] -│ ├── read rows: 0 -│ ├── read bytes: 0 -│ ├── partitions total: 0 -│ ├── partitions scanned: 0 -│ ├── push downs: [filters: [and_filters(t3.a (#4) > 5, t3.a (#4) < 10)], limit: NONE] -│ └── estimated rows: 0.00 -└── HashJoin(Probe) - ├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] - ├── join type: INNER - ├── build keys: [t2.a (#2)] - ├── probe keys: [t1.a (#0)] - ├── filters: [] - ├── estimated rows: 0.00 - ├── TableScan(Build) - │ ├── table: default.default.t2 - │ ├── output columns: [a (#2), b (#3)] - │ ├── read rows: 0 - │ ├── read bytes: 0 - │ ├── partitions total: 0 - │ ├── partitions scanned: 0 - │ ├── push downs: [filters: [and_filters(t2.a (#2) > 5, t2.a (#2) < 10)], limit: NONE] - │ └── estimated rows: 0.00 - └── TableScan(Probe) - ├── table: default.default.t1 - ├── output columns: [a (#0), b (#1)] - ├── read rows: 0 - ├── read bytes: 0 - ├── partitions total: 0 - ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(t1.a (#0) > 5, t1.a (#0) < 10)], limit: NONE] - └── estimated rows: 0.00 +├── HashJoin(Build) +│ ├── output columns: [t2.a (#2), t2.b (#3), t3.b (#5), t3.a (#4)] +│ ├── join type: INNER +│ ├── build keys: [t3.a (#4)] +│ ├── probe keys: [t2.a (#2)] +│ ├── filters: [] +│ ├── estimated rows: 0.00 +│ ├── TableScan(Build) +│ │ ├── table: default.default.t3 +│ │ ├── output columns: [a (#4), b (#5)] +│ │ ├── read rows: 0 +│ │ ├── read bytes: 0 +│ │ ├── partitions total: 0 +│ │ ├── partitions scanned: 0 +│ │ ├── push downs: [filters: [and_filters(t3.a (#4) > 5, t3.a (#4) < 10)], limit: NONE] +│ │ └── estimated rows: 0.00 +│ └── TableScan(Probe) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 0 +│ ├── read bytes: 0 +│ ├── partitions total: 0 +│ ├── partitions scanned: 0 +│ ├── push downs: [filters: [and_filters(t2.a (#2) > 5, t2.a (#2) < 10)], limit: NONE] +│ └── estimated rows: 0.00 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 0 + ├── read bytes: 0 + ├── partitions total: 0 + ├── partitions scanned: 0 + ├── push downs: [filters: [and_filters(t1.a (#0) > 5, t1.a (#0) < 10)], limit: NONE] + └── estimated rows: 0.00 # t1.a > 5 and t2.a > 10 query T @@ -675,6 +675,52 @@ HashJoin ├── push downs: [filters: [false], limit: NONE] └── estimated rows: 0.00 +# t1.a = t2.a, t1.a = t3.a => t2.a = t3.a +query T +explain select * from t1, t2, t3 where t1.a = t2.a and t1.a = t3.a; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3), t3.b (#5), t3.a (#4)] +├── join type: INNER +├── build keys: [t3.a (#4)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 0.00 +├── HashJoin(Build) +│ ├── output columns: [t2.a (#2), t2.b (#3), t3.b (#5), t3.a (#4)] +│ ├── join type: INNER +│ ├── build keys: [t3.a (#4)] +│ ├── probe keys: [t2.a (#2)] +│ ├── filters: [] +│ ├── estimated rows: 0.00 +│ ├── TableScan(Build) +│ │ ├── table: default.default.t3 +│ │ ├── output columns: [a (#4), b (#5)] +│ │ ├── read rows: 0 +│ │ ├── read bytes: 0 +│ │ ├── partitions total: 0 +│ │ ├── partitions scanned: 0 +│ │ ├── push downs: [filters: [], limit: NONE] +│ │ └── estimated rows: 0.00 +│ └── TableScan(Probe) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 0 +│ ├── read bytes: 0 +│ ├── partitions total: 0 +│ ├── partitions scanned: 0 +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 0.00 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 0 + ├── read bytes: 0 + ├── partitions total: 0 + ├── partitions scanned: 0 + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 0.00 + statement ok drop table if exists t1; diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/join.test b/tests/sqllogictests/suites/mode/standalone/explain_native/join.test index 408086894f64..6e9cab5f2f3d 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/join.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/join.test @@ -75,8 +75,8 @@ HashJoin ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 10.00 + ├── push downs: [filters: [t1.number (#1) = t1.number (#1) + 1], limit: NONE] + └── estimated rows: 2.00 query T explain select t.number from t, t1 where t.number > 1 and 1 < t1.number @@ -87,7 +87,7 @@ HashJoin ├── build keys: [] ├── probe keys: [] ├── filters: [] -├── estimated rows: 0.18 +├── estimated rows: 1.64 ├── TableScan(Build) │ ├── table: default.default.t │ ├── output columns: [number (#0)] @@ -106,8 +106,8 @@ HashJoin ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [1 < t1.number (#1)], limit: NONE] - └── estimated rows: 0.91 + ├── push downs: [filters: [t1.number (#1) > 1], limit: NONE] + └── estimated rows: 8.18 query T explain select t.number from t, t1 where t.number + t1.number = 1 @@ -347,32 +347,32 @@ query T explain select * from onecolumn as a right join twocolumn as b on a.x = b.x where b.x > 42 and b.x < 45 ---- HashJoin -├── output columns: [a.x (#0), b.x (#1), b.y (#2)] -├── join type: RIGHT OUTER -├── build keys: [b.x (#1)] -├── probe keys: [a.x (#0)] +├── output columns: [b.x (#1), b.y (#2), a.x (#0)] +├── join type: LEFT OUTER +├── build keys: [a.x (#0)] +├── probe keys: [b.x (#1)] ├── filters: [] ├── estimated rows: 3.20 ├── TableScan(Build) -│ ├── table: default.default.twocolumn -│ ├── output columns: [x (#1), y (#2)] +│ ├── table: default.default.onecolumn +│ ├── output columns: [x (#0)] │ ├── read rows: 4 -│ ├── read bytes: 62 +│ ├── read bytes: 29 │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] -│ ├── push downs: [filters: [and_filters(twocolumn.x (#1) > 42, twocolumn.x (#1) < 45)], limit: NONE] -│ └── estimated rows: 3.20 +│ ├── push downs: [filters: [and_filters(onecolumn.x (#0) > 42, onecolumn.x (#0) < 45)], limit: NONE] +│ └── estimated rows: 3.00 └── TableScan(Probe) - ├── table: default.default.onecolumn - ├── output columns: [x (#0)] + ├── table: default.default.twocolumn + ├── output columns: [x (#1), y (#2)] ├── read rows: 4 - ├── read bytes: 29 + ├── read bytes: 62 ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 4.00 + ├── push downs: [filters: [and_filters(twocolumn.x (#1) > 42, twocolumn.x (#1) < 45)], limit: NONE] + └── estimated rows: 3.20 statement ok drop table t diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/join_reorder/chain.test b/tests/sqllogictests/suites/mode/standalone/explain_native/join_reorder/chain.test index d68450cd8be9..1db3be1d2979 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/join_reorder/chain.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/join_reorder/chain.test @@ -29,9 +29,9 @@ query T explain select * from t, t1, t2 where t.a = t1.a and t1.a = t2.a ---- HashJoin -├── output columns: [t2.a (#2), t.a (#0), t1.a (#1)] +├── output columns: [t2.a (#2), t1.a (#1), t.a (#0)] ├── join type: INNER -├── build keys: [t1.a (#1)] +├── build keys: [t.a (#0)] ├── probe keys: [t2.a (#2)] ├── filters: [] ├── estimated rows: 1.00 @@ -77,17 +77,17 @@ query T explain select * from t, t2, t1 where t.a = t1.a and t1.a = t2.a ---- HashJoin -├── output columns: [t2.a (#1), t.a (#0), t1.a (#2)] +├── output columns: [t1.a (#2), t2.a (#1), t.a (#0)] ├── join type: INNER -├── build keys: [t1.a (#2)] -├── probe keys: [t2.a (#1)] +├── build keys: [t.a (#0)] +├── probe keys: [t1.a (#2)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) -│ ├── output columns: [t1.a (#2), t.a (#0)] +│ ├── output columns: [t2.a (#1), t.a (#0)] │ ├── join type: INNER │ ├── build keys: [t.a (#0)] -│ ├── probe keys: [t1.a (#2)] +│ ├── probe keys: [t2.a (#1)] │ ├── filters: [] │ ├── estimated rows: 1.00 │ ├── TableScan(Build) @@ -101,41 +101,41 @@ HashJoin │ │ ├── push downs: [filters: [], limit: NONE] │ │ └── estimated rows: 1.00 │ └── TableScan(Probe) -│ ├── table: default.join_reorder.t1 -│ ├── output columns: [a (#2)] -│ ├── read rows: 10 -│ ├── read bytes: 54 +│ ├── table: default.join_reorder.t2 +│ ├── output columns: [a (#1)] +│ ├── read rows: 100 +│ ├── read bytes: 414 │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] │ ├── push downs: [filters: [], limit: NONE] -│ └── estimated rows: 10.00 +│ └── estimated rows: 100.00 └── TableScan(Probe) - ├── table: default.join_reorder.t2 - ├── output columns: [a (#1)] - ├── read rows: 100 - ├── read bytes: 414 + ├── table: default.join_reorder.t1 + ├── output columns: [a (#2)] + ├── read rows: 10 + ├── read bytes: 54 ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 100.00 + └── estimated rows: 10.00 query T explain select * from t1, t, t2 where t.a = t1.a and t1.a = t2.a ---- HashJoin -├── output columns: [t2.a (#2), t.a (#1), t1.a (#0)] +├── output columns: [t1.a (#0), t.a (#1), t2.a (#2)] ├── join type: INNER -├── build keys: [t1.a (#0)] -├── probe keys: [t2.a (#2)] +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) -│ ├── output columns: [t1.a (#0), t.a (#1)] +│ ├── output columns: [t2.a (#2), t.a (#1)] │ ├── join type: INNER │ ├── build keys: [t.a (#1)] -│ ├── probe keys: [t1.a (#0)] +│ ├── probe keys: [t2.a (#2)] │ ├── filters: [] │ ├── estimated rows: 1.00 │ ├── TableScan(Build) @@ -149,41 +149,41 @@ HashJoin │ │ ├── push downs: [filters: [], limit: NONE] │ │ └── estimated rows: 1.00 │ └── TableScan(Probe) -│ ├── table: default.join_reorder.t1 -│ ├── output columns: [a (#0)] -│ ├── read rows: 10 -│ ├── read bytes: 54 +│ ├── table: default.join_reorder.t2 +│ ├── output columns: [a (#2)] +│ ├── read rows: 100 +│ ├── read bytes: 414 │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] │ ├── push downs: [filters: [], limit: NONE] -│ └── estimated rows: 10.00 +│ └── estimated rows: 100.00 └── TableScan(Probe) - ├── table: default.join_reorder.t2 - ├── output columns: [a (#2)] - ├── read rows: 100 - ├── read bytes: 414 + ├── table: default.join_reorder.t1 + ├── output columns: [a (#0)] + ├── read rows: 10 + ├── read bytes: 54 ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 100.00 + └── estimated rows: 10.00 query T explain select * from t1, t2, t where t.a = t1.a and t1.a = t2.a ---- HashJoin -├── output columns: [t2.a (#1), t.a (#2), t1.a (#0)] +├── output columns: [t1.a (#0), t2.a (#1), t.a (#2)] ├── join type: INNER -├── build keys: [t1.a (#0)] -├── probe keys: [t2.a (#1)] +├── build keys: [t.a (#2)] +├── probe keys: [t1.a (#0)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) -│ ├── output columns: [t1.a (#0), t.a (#2)] +│ ├── output columns: [t2.a (#1), t.a (#2)] │ ├── join type: INNER │ ├── build keys: [t.a (#2)] -│ ├── probe keys: [t1.a (#0)] +│ ├── probe keys: [t2.a (#1)] │ ├── filters: [] │ ├── estimated rows: 1.00 │ ├── TableScan(Build) @@ -197,33 +197,33 @@ HashJoin │ │ ├── push downs: [filters: [], limit: NONE] │ │ └── estimated rows: 1.00 │ └── TableScan(Probe) -│ ├── table: default.join_reorder.t1 -│ ├── output columns: [a (#0)] -│ ├── read rows: 10 -│ ├── read bytes: 54 +│ ├── table: default.join_reorder.t2 +│ ├── output columns: [a (#1)] +│ ├── read rows: 100 +│ ├── read bytes: 414 │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] │ ├── push downs: [filters: [], limit: NONE] -│ └── estimated rows: 10.00 +│ └── estimated rows: 100.00 └── TableScan(Probe) - ├── table: default.join_reorder.t2 - ├── output columns: [a (#1)] - ├── read rows: 100 - ├── read bytes: 414 + ├── table: default.join_reorder.t1 + ├── output columns: [a (#0)] + ├── read rows: 10 + ├── read bytes: 54 ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 100.00 + └── estimated rows: 10.00 query T explain select * from t2, t1, t where t.a = t1.a and t1.a = t2.a ---- HashJoin -├── output columns: [t2.a (#0), t.a (#2), t1.a (#1)] +├── output columns: [t2.a (#0), t1.a (#1), t.a (#2)] ├── join type: INNER -├── build keys: [t1.a (#1)] +├── build keys: [t.a (#2)] ├── probe keys: [t2.a (#0)] ├── filters: [] ├── estimated rows: 1.00 @@ -504,9 +504,9 @@ explain join select * from t right anti join t1 on t1.a = t.a ---- HashJoin: LEFT ANTI ├── Build -│ └── Scan: default.join_reorder.t (read rows: 1) +│ └── Scan: default.join_reorder.t (#0) (read rows: 1) └── Probe - └── Scan: default.join_reorder.t1 (read rows: 10) + └── Scan: default.join_reorder.t1 (#1) (read rows: 10) statement ok drop database join_reorder diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/join_reorder/cycles.test b/tests/sqllogictests/suites/mode/standalone/explain_native/join_reorder/cycles.test index b53e40e1f7cf..cbb25b91d504 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/join_reorder/cycles.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/join_reorder/cycles.test @@ -22,8 +22,8 @@ explain select * from t, t1, t2 where t.a = t1.a and t1.a = t2.a and t2.a = t.a HashJoin ├── output columns: [t2.a (#2), t1.a (#1), t.a (#0)] ├── join type: INNER -├── build keys: [t.a (#0), t1.a (#1)] -├── probe keys: [t2.a (#2), t2.a (#2)] +├── build keys: [t.a (#0)] +├── probe keys: [t2.a (#2)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) @@ -70,8 +70,8 @@ explain select * from t, t2, t1 where t.a = t1.a and t1.a = t2.a and t2.a = t.a HashJoin ├── output columns: [t1.a (#2), t2.a (#1), t.a (#0)] ├── join type: INNER -├── build keys: [t.a (#0), t2.a (#1)] -├── probe keys: [t1.a (#2), t1.a (#2)] +├── build keys: [t.a (#0)] +├── probe keys: [t1.a (#2)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) @@ -116,10 +116,10 @@ query T explain select * from t1, t, t2 where t.a = t1.a and t1.a = t2.a and t2.a = t.a ---- HashJoin -├── output columns: [t1.a (#0), t2.a (#2), t.a (#1)] +├── output columns: [t1.a (#0), t.a (#1), t2.a (#2)] ├── join type: INNER -├── build keys: [t2.a (#2), t.a (#1)] -├── probe keys: [t1.a (#0), t1.a (#0)] +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) @@ -166,8 +166,8 @@ explain select * from t1, t2, t where t.a = t1.a and t1.a = t2.a and t2.a = t.a HashJoin ├── output columns: [t1.a (#0), t2.a (#1), t.a (#2)] ├── join type: INNER -├── build keys: [t.a (#2), t2.a (#1)] -├── probe keys: [t1.a (#0), t1.a (#0)] +├── build keys: [t.a (#2)] +├── probe keys: [t1.a (#0)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) @@ -214,8 +214,8 @@ explain select * from t2, t1, t where t.a = t1.a and t1.a = t2.a and t2.a = t.a HashJoin ├── output columns: [t2.a (#0), t1.a (#1), t.a (#2)] ├── join type: INNER -├── build keys: [t.a (#2), t1.a (#1)] -├── probe keys: [t2.a (#0), t2.a (#0)] +├── build keys: [t.a (#2)] +├── probe keys: [t2.a (#0)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) @@ -260,10 +260,10 @@ query T explain select * from t2, t, t1 where t.a = t1.a and t1.a = t2.a and t2.a = t.a ---- HashJoin -├── output columns: [t2.a (#0), t1.a (#2), t.a (#1)] +├── output columns: [t2.a (#0), t.a (#1), t1.a (#2)] ├── join type: INNER -├── build keys: [t1.a (#2), t.a (#1)] -├── probe keys: [t2.a (#0), t2.a (#0)] +├── build keys: [t1.a (#2)] +├── probe keys: [t2.a (#0)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/join_reorder/star.test b/tests/sqllogictests/suites/mode/standalone/explain_native/join_reorder/star.test index 0a1574892fee..c45c0548f8ff 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/join_reorder/star.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/join_reorder/star.test @@ -20,17 +20,17 @@ query T explain select * from t, t1, t2 where t.a = t2.a and t1.a = t2.a ---- HashJoin -├── output columns: [t1.a (#1), t.a (#0), t2.a (#2)] +├── output columns: [t2.a (#2), t1.a (#1), t.a (#0)] ├── join type: INNER -├── build keys: [t2.a (#2)] -├── probe keys: [t1.a (#1)] +├── build keys: [t.a (#0)] +├── probe keys: [t2.a (#2)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) -│ ├── output columns: [t2.a (#2), t.a (#0)] +│ ├── output columns: [t1.a (#1), t.a (#0)] │ ├── join type: INNER │ ├── build keys: [t.a (#0)] -│ ├── probe keys: [t2.a (#2)] +│ ├── probe keys: [t1.a (#1)] │ ├── filters: [] │ ├── estimated rows: 1.00 │ ├── TableScan(Build) @@ -44,33 +44,33 @@ HashJoin │ │ ├── push downs: [filters: [], limit: NONE] │ │ └── estimated rows: 1.00 │ └── TableScan(Probe) -│ ├── table: default.join_reorder.t2 -│ ├── output columns: [a (#2)] -│ ├── read rows: 100 -│ ├── read bytes: 414 +│ ├── table: default.join_reorder.t1 +│ ├── output columns: [a (#1)] +│ ├── read rows: 10 +│ ├── read bytes: 54 │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] │ ├── push downs: [filters: [], limit: NONE] -│ └── estimated rows: 100.00 +│ └── estimated rows: 10.00 └── TableScan(Probe) - ├── table: default.join_reorder.t1 - ├── output columns: [a (#1)] - ├── read rows: 10 - ├── read bytes: 54 + ├── table: default.join_reorder.t2 + ├── output columns: [a (#2)] + ├── read rows: 100 + ├── read bytes: 414 ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 10.00 + └── estimated rows: 100.00 query T explain select * from t, t2, t1 where t.a = t2.a and t1.a = t2.a ---- HashJoin -├── output columns: [t1.a (#2), t.a (#0), t2.a (#1)] +├── output columns: [t1.a (#2), t2.a (#1), t.a (#0)] ├── join type: INNER -├── build keys: [t2.a (#1)] +├── build keys: [t.a (#0)] ├── probe keys: [t1.a (#2)] ├── filters: [] ├── estimated rows: 1.00 @@ -164,9 +164,9 @@ query T explain select * from t1, t2, t where t.a = t2.a and t1.a = t2.a ---- HashJoin -├── output columns: [t1.a (#0), t.a (#2), t2.a (#1)] +├── output columns: [t1.a (#0), t2.a (#1), t.a (#2)] ├── join type: INNER -├── build keys: [t2.a (#1)] +├── build keys: [t.a (#2)] ├── probe keys: [t1.a (#0)] ├── filters: [] ├── estimated rows: 1.00 @@ -212,17 +212,17 @@ query T explain select * from t2, t1, t where t.a = t2.a and t1.a = t2.a ---- HashJoin -├── output columns: [t1.a (#1), t.a (#2), t2.a (#0)] +├── output columns: [t2.a (#0), t1.a (#1), t.a (#2)] ├── join type: INNER -├── build keys: [t2.a (#0)] -├── probe keys: [t1.a (#1)] +├── build keys: [t.a (#2)] +├── probe keys: [t2.a (#0)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) -│ ├── output columns: [t2.a (#0), t.a (#2)] +│ ├── output columns: [t1.a (#1), t.a (#2)] │ ├── join type: INNER │ ├── build keys: [t.a (#2)] -│ ├── probe keys: [t2.a (#0)] +│ ├── probe keys: [t1.a (#1)] │ ├── filters: [] │ ├── estimated rows: 1.00 │ ├── TableScan(Build) @@ -236,41 +236,41 @@ HashJoin │ │ ├── push downs: [filters: [], limit: NONE] │ │ └── estimated rows: 1.00 │ └── TableScan(Probe) -│ ├── table: default.join_reorder.t2 -│ ├── output columns: [a (#0)] -│ ├── read rows: 100 -│ ├── read bytes: 414 +│ ├── table: default.join_reorder.t1 +│ ├── output columns: [a (#1)] +│ ├── read rows: 10 +│ ├── read bytes: 54 │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] │ ├── push downs: [filters: [], limit: NONE] -│ └── estimated rows: 100.00 +│ └── estimated rows: 10.00 └── TableScan(Probe) - ├── table: default.join_reorder.t1 - ├── output columns: [a (#1)] - ├── read rows: 10 - ├── read bytes: 54 + ├── table: default.join_reorder.t2 + ├── output columns: [a (#0)] + ├── read rows: 100 + ├── read bytes: 414 ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 10.00 + └── estimated rows: 100.00 query T explain select * from t2, t, t1 where t.a = t2.a and t1.a = t2.a ---- HashJoin -├── output columns: [t1.a (#2), t.a (#1), t2.a (#0)] +├── output columns: [t2.a (#0), t.a (#1), t1.a (#2)] ├── join type: INNER -├── build keys: [t2.a (#0)] -├── probe keys: [t1.a (#2)] +├── build keys: [t1.a (#2)] +├── probe keys: [t2.a (#0)] ├── filters: [] ├── estimated rows: 1.00 ├── HashJoin(Build) -│ ├── output columns: [t2.a (#0), t.a (#1)] +│ ├── output columns: [t1.a (#2), t.a (#1)] │ ├── join type: INNER │ ├── build keys: [t.a (#1)] -│ ├── probe keys: [t2.a (#0)] +│ ├── probe keys: [t1.a (#2)] │ ├── filters: [] │ ├── estimated rows: 1.00 │ ├── TableScan(Build) @@ -284,25 +284,25 @@ HashJoin │ │ ├── push downs: [filters: [], limit: NONE] │ │ └── estimated rows: 1.00 │ └── TableScan(Probe) -│ ├── table: default.join_reorder.t2 -│ ├── output columns: [a (#0)] -│ ├── read rows: 100 -│ ├── read bytes: 414 +│ ├── table: default.join_reorder.t1 +│ ├── output columns: [a (#2)] +│ ├── read rows: 10 +│ ├── read bytes: 54 │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: , blocks: ] │ ├── push downs: [filters: [], limit: NONE] -│ └── estimated rows: 100.00 +│ └── estimated rows: 10.00 └── TableScan(Probe) - ├── table: default.join_reorder.t1 - ├── output columns: [a (#2)] - ├── read rows: 10 - ├── read bytes: 54 + ├── table: default.join_reorder.t2 + ├── output columns: [a (#0)] + ├── read rows: 100 + ├── read bytes: 414 ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: , blocks: ] ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 10.00 + └── estimated rows: 100.00 statement ok drop database join_reorder diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/limit.test b/tests/sqllogictests/suites/mode/standalone/explain_native/limit.test index 12c1a26d4a41..c33a8410dee6 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/limit.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/limit.test @@ -97,13 +97,13 @@ Limit ├── estimated rows: 0.20 └── Filter ├── output columns: [t.number (#0)] - ├── filters: [is_true(CAST(t.number (#0) AS UInt64 NULL) = if(CAST(is_not_null(scalar_subquery_4 (#4)) AS Boolean NULL), scalar_subquery_4 (#4), 0))] + ├── filters: [is_true(CAST(t.number (#0) AS UInt64 NULL) = if(true, TRY_CAST(scalar_subquery_4 (#4) AS UInt64 NULL), 0))] ├── estimated rows: 0.20 └── HashJoin ├── output columns: [t.number (#0), COUNT(*) (#4)] - ├── join type: LEFT SINGLE + ├── join type: INNER ├── build keys: [number (#2)] - ├── probe keys: [CAST(number (#0) AS UInt64 NULL)] + ├── probe keys: [number (#0)] ├── filters: [] ├── estimated rows: 1.00 ├── AggregateFinal(Build) diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/prune_column.test b/tests/sqllogictests/suites/mode/standalone/explain_native/prune_column.test index a8571ac69ad6..9c6f8a75331f 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/prune_column.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/prune_column.test @@ -117,90 +117,90 @@ explain select t1.a from (select number + 1 as a, number + 1 as b from numbers(1 HashJoin ├── output columns: [a (#1)] ├── join type: INNER -├── build keys: [CAST(t1.a (#1) AS UInt64 NULL)] -├── probe keys: [_if_scalar_subquery (#15)] +├── build keys: [_if_scalar_subquery (#15)] +├── probe keys: [CAST(t1.a (#1) AS UInt64 NULL)] ├── filters: [] ├── estimated rows: 1.00 ├── EvalScalar(Build) -│ ├── output columns: [a (#1)] -│ ├── expressions: [numbers.number (#0) + 1] +│ ├── output columns: [_if_scalar_subquery (#15)] +│ ├── expressions: [if(CAST(_count_scalar_subquery (#13) = 0 AS Boolean NULL), NULL, _any_scalar_subquery (#14))] │ ├── estimated rows: 1.00 -│ └── TableScan -│ ├── table: default.system.numbers -│ ├── output columns: [number (#0)] -│ ├── read rows: 1 -│ ├── read bytes: 8 -│ ├── partitions total: 1 -│ ├── partitions scanned: 1 -│ ├── push downs: [filters: [], limit: NONE] -│ └── estimated rows: 1.00 +│ └── Limit +│ ├── output columns: [_count_scalar_subquery (#13), _any_scalar_subquery (#14)] +│ ├── limit: 1 +│ ├── offset: 0 +│ ├── estimated rows: 1.00 +│ └── AggregateFinal +│ ├── output columns: [_count_scalar_subquery (#13), _any_scalar_subquery (#14)] +│ ├── group by: [] +│ ├── aggregate functions: [count(), any(COUNT(*))] +│ ├── limit: 1 +│ ├── estimated rows: 1.00 +│ └── AggregatePartial +│ ├── output columns: [_count_scalar_subquery (#13), _any_scalar_subquery (#14)] +│ ├── group by: [] +│ ├── aggregate functions: [count(), any(COUNT(*))] +│ ├── estimated rows: 1.00 +│ └── AggregateFinal +│ ├── output columns: [COUNT(*) (#12)] +│ ├── group by: [] +│ ├── aggregate functions: [count()] +│ ├── estimated rows: 1.00 +│ └── AggregatePartial +│ ├── output columns: [COUNT(*) (#12)] +│ ├── group by: [] +│ ├── aggregate functions: [count()] +│ ├── estimated rows: 1.00 +│ └── HashJoin +│ ├── output columns: [] +│ ├── join type: INNER +│ ├── build keys: [t2.b (#5)] +│ ├── probe keys: [t3.b (#10)] +│ ├── filters: [] +│ ├── estimated rows: 0.20 +│ ├── EvalScalar(Build) +│ │ ├── output columns: [b (#5)] +│ │ ├── expressions: [numbers.number (#3) + 1] +│ │ ├── estimated rows: 0.20 +│ │ └── Filter +│ │ ├── output columns: [numbers.number (#3)] +│ │ ├── filters: [numbers.number (#3) + 1 = 1] +│ │ ├── estimated rows: 0.20 +│ │ └── TableScan +│ │ ├── table: default.system.numbers +│ │ ├── output columns: [number (#3)] +│ │ ├── read rows: 1 +│ │ ├── read bytes: 8 +│ │ ├── partitions total: 1 +│ │ ├── partitions scanned: 1 +│ │ ├── push downs: [filters: [numbers.number (#3) + 1 = 1], limit: NONE] +│ │ └── estimated rows: 1.00 +│ └── EvalScalar(Probe) +│ ├── output columns: [b (#10)] +│ ├── expressions: [numbers.number (#8) + 1] +│ ├── estimated rows: 1.00 +│ └── TableScan +│ ├── table: default.system.numbers +│ ├── output columns: [number (#8)] +│ ├── read rows: 1 +│ ├── read bytes: 8 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 1.00 └── EvalScalar(Probe) - ├── output columns: [_if_scalar_subquery (#15)] - ├── expressions: [if(CAST(_count_scalar_subquery (#13) = 0 AS Boolean NULL), NULL, _any_scalar_subquery (#14))] + ├── output columns: [a (#1)] + ├── expressions: [numbers.number (#0) + 1] ├── estimated rows: 1.00 - └── Limit - ├── output columns: [_count_scalar_subquery (#13), _any_scalar_subquery (#14)] - ├── limit: 1 - ├── offset: 0 - ├── estimated rows: 1.00 - └── AggregateFinal - ├── output columns: [_count_scalar_subquery (#13), _any_scalar_subquery (#14)] - ├── group by: [] - ├── aggregate functions: [count(), any(COUNT(*))] - ├── limit: 1 - ├── estimated rows: 1.00 - └── AggregatePartial - ├── output columns: [_count_scalar_subquery (#13), _any_scalar_subquery (#14)] - ├── group by: [] - ├── aggregate functions: [count(), any(COUNT(*))] - ├── estimated rows: 1.00 - └── AggregateFinal - ├── output columns: [COUNT(*) (#12)] - ├── group by: [] - ├── aggregate functions: [count()] - ├── estimated rows: 1.00 - └── AggregatePartial - ├── output columns: [COUNT(*) (#12)] - ├── group by: [] - ├── aggregate functions: [count()] - ├── estimated rows: 1.00 - └── HashJoin - ├── output columns: [] - ├── join type: INNER - ├── build keys: [t2.b (#5)] - ├── probe keys: [t3.b (#10)] - ├── filters: [] - ├── estimated rows: 0.20 - ├── EvalScalar(Build) - │ ├── output columns: [b (#5)] - │ ├── expressions: [numbers.number (#3) + 1] - │ ├── estimated rows: 0.20 - │ └── Filter - │ ├── output columns: [numbers.number (#3)] - │ ├── filters: [numbers.number (#3) + 1 = 1] - │ ├── estimated rows: 0.20 - │ └── TableScan - │ ├── table: default.system.numbers - │ ├── output columns: [number (#3)] - │ ├── read rows: 1 - │ ├── read bytes: 8 - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── push downs: [filters: [numbers.number (#3) + 1 = 1], limit: NONE] - │ └── estimated rows: 1.00 - └── EvalScalar(Probe) - ├── output columns: [b (#10)] - ├── expressions: [numbers.number (#8) + 1] - ├── estimated rows: 1.00 - └── TableScan - ├── table: default.system.numbers - ├── output columns: [number (#8)] - ├── read rows: 1 - ├── read bytes: 8 - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 1.00 + └── TableScan + ├── table: default.system.numbers + ├── output columns: [number (#0)] + ├── read rows: 1 + ├── read bytes: 8 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 1.00 query T explain select name from system.functions order by example diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter.test b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_eval_scalar.test similarity index 55% rename from tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter.test rename to tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_eval_scalar.test index 4f3cd9bdc171..8aff143dfa86 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_eval_scalar.test @@ -1,75 +1,3 @@ -# push down filter ProjectSet -statement ok -drop table if exists products; - -statement ok -create table products(name varchar, details variant); - -statement ok -insert into products (name, details) values ('Laptop', '{"brand": "Dell", "colors": ["Black", "Silver"], "price": 1200, "features": {"ram": "16GB", "storage": "512GB"}}'), ('Smartphone', '{"brand": "Apple", "colors": ["White", "Black"], "price": 999, "features": {"ram": "4GB", "storage": "128GB"}}'), ('Headphones', '{"brand": "Sony", "colors": ["Black", "Blue", "Red"], "price": 150, "features": {"battery": "20h", "bluetooth": "5.0"}}'); - -query T -explain select name, json_path_query(details, '$.features.*') as all_features, json_path_query_first(details, '$.features.*') as first_feature from products where name = 'Laptop' and first_feature = '16GB' and all_features = '512GB'; ----- -EvalScalar -├── output columns: [products.name (#0), all_features (#3), first_feature (#4)] -├── expressions: [get(1)(json_path_query (#2)), json_path_query_first(products.details (#1), '$.features.*')] -├── estimated rows: 0.12 -└── Filter - ├── output columns: [products.name (#0), products.details (#1), json_path_query (#2)] - ├── filters: [is_true(TRY_CAST(get(1)(json_path_query (#2)) AS String NULL) = '512GB')] - ├── estimated rows: 0.12 - └── ProjectSet - ├── output columns: [products.name (#0), products.details (#1), json_path_query (#2)] - ├── estimated rows: 0.60 - ├── set returning functions: json_path_query(products.details (#1), '$.features.*') - └── TableScan - ├── table: default.default.products - ├── output columns: [name (#0), details (#1)] - ├── read rows: 3 - ├── read bytes: 370 - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [and_filters(products.name (#0) = 'Laptop', TRY_CAST(json_path_query_first(products.details (#1), '$.features.*') AS String NULL) = '16GB')], limit: NONE] - └── estimated rows: 0.60 - -query T -select name, json_path_query(details, '$.features.*') as all_features, json_path_query_first(details, '$.features.*') as first_feature from products where name = 'Laptop' and first_feature = '16GB' and all_features = '512GB'; ----- -Laptop "512GB" "16GB" - -query T -explain select name, json_path_query(details, '$.features.*') as all_features, json_path_query_first(details, '$.features.*') as first_feature from products where name = 'Laptop' and first_feature = '16GB'; ----- -EvalScalar -├── output columns: [products.name (#0), all_features (#3), first_feature (#4)] -├── expressions: [get(1)(json_path_query (#2)), json_path_query_first(products.details (#1), '$.features.*')] -├── estimated rows: 0.60 -└── ProjectSet - ├── output columns: [products.name (#0), products.details (#1), json_path_query (#2)] - ├── estimated rows: 0.60 - ├── set returning functions: json_path_query(products.details (#1), '$.features.*') - └── TableScan - ├── table: default.default.products - ├── output columns: [name (#0), details (#1)] - ├── read rows: 3 - ├── read bytes: 370 - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [and_filters(products.name (#0) = 'Laptop', TRY_CAST(json_path_query_first(products.details (#1), '$.features.*') AS String NULL) = '16GB')], limit: NONE] - └── estimated rows: 0.60 - -query T -select name, json_path_query(details, '$.features.*') as all_features, json_path_query_first(details, '$.features.*') as first_feature from products where name = 'Laptop' and first_feature = '16GB'; ----- -Laptop "16GB" "16GB" -Laptop "512GB" "16GB" - -statement ok -drop table products; - # push down filter EvalScalar statement ok drop table if exists t1; @@ -178,7 +106,7 @@ AggregateFinal │ │ ├── read bytes: 0 │ │ ├── partitions total: 0 │ │ ├── partitions scanned: 0 - │ │ ├── push downs: [filters: [], limit: NONE] + │ │ ├── push downs: [filters: [is_true(t2.sid (#1) = 1)], limit: NONE] │ │ └── estimated rows: 0.00 │ └── TableScan(Probe) │ ├── table: default.default.t1 @@ -218,44 +146,13 @@ AggregateFinal └── estimated rows: 0.00 statement ok -drop table t1; - -statement ok -drop table t2; - -statement ok -drop view v1; - -statement ok -drop view v2; - -# push down alias filter scan -statement ok -drop table if exists t; +drop table if exists t1; statement ok -create table t (x INT); +drop table if exists t2; statement ok -insert into t(x) values (1), (2); - -query I -explain select * from t as a(id) where a.id > 1; ----- -Filter -├── output columns: [a.x (#0)] -├── filters: [is_true(a.id (#0) > 1)] -├── estimated rows: 0.40 -└── TableScan - ├── table: default.default.t - ├── output columns: [x (#0)] - ├── read rows: 2 - ├── read bytes: 24 - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── pruning stats: [segments: , blocks: ] - ├── push downs: [filters: [is_true(t.x (#0) > 1)], limit: NONE] - └── estimated rows: 2.00 +drop view if exists v1; statement ok -drop table t; +drop view if exists v2; diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_full_outer.test b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_full_outer.test new file mode 100644 index 000000000000..a669551bcbe4 --- /dev/null +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_full_outer.test @@ -0,0 +1,248 @@ +# push down filter full outer join +statement ok +drop table if exists t1; + +statement ok +drop table if exists t2; + +statement ok +create table t1(a int, b int); + +statement ok +create table t2(a int, b int); + +statement ok +insert into t1 values(null, null), (1, 1), (2, 2), (3, 3); + +statement ok +insert into t2 values(null, null), (1, 1), (2, 2); + +# convert full outer join to inner join, can propagate t1.a > 0 +query T +explain select * from t1 full outer join t2 on t1.a = t2.a where t1.a > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3)] +├── join type: LEFT OUTER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 4.00 +├── TableScan(Build) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 56 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [is_true(t2.a (#2) > 0)], limit: NONE] +│ └── estimated rows: 0.60 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 66 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t1.a (#0) > 0)], limit: NONE] + └── estimated rows: 4.00 + +# convert full outer join to inner join, can propagate t2.a > 0 +query T +explain select * from t1 full outer join t2 on t1.a = t2.a where t2.a > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3)] +├── join type: RIGHT OUTER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 0.80 +├── TableScan(Build) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 56 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [is_true(t2.a (#2) > 0)], limit: NONE] +│ └── estimated rows: 0.60 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 66 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t1.a (#0) > 0)], limit: NONE] + └── estimated rows: 4.00 + +# convert full outer join to left outer join, can not propagate t1.b > 0 +query T +explain select * from t1 full outer join t2 on t1.a = t2.a where t1.b > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3)] +├── join type: LEFT OUTER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 4.00 +├── TableScan(Build) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 56 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 3.00 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 66 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t1.b (#1) > 0)], limit: NONE] + └── estimated rows: 4.00 + +# convert full outer join to right outer join, can not propagate t2.b > 0 +query T +explain select * from t1 full outer join t2 on t1.a = t2.a where t2.b > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3)] +├── join type: RIGHT OUTER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 0.80 +├── TableScan(Build) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 56 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [is_true(t2.b (#3) > 0)], limit: NONE] +│ └── estimated rows: 0.60 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 66 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 4.00 + +# convert full outer join to inner join, can propagate t1.a > 0 +query T +explain select * from t1 full outer join t2 on t1.a = t2.a where t1.a > 0 and t2.b > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] +├── join type: INNER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 0.80 +├── TableScan(Build) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 56 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [and_filters(t2.a (#2) > 0, t2.b (#3) > 0)], limit: NONE] +│ └── estimated rows: 0.60 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 66 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t1.a (#0) > 0)], limit: NONE] + └── estimated rows: 4.00 + +# convert full outer join to inner join, can propagate t2.a > 0 +query T +explain select * from t1 full outer join t2 on t1.a = t2.a where t2.a > 0 and t1.b > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] +├── join type: INNER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 0.80 +├── TableScan(Build) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 56 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [is_true(t2.a (#2) > 0)], limit: NONE] +│ └── estimated rows: 0.60 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 66 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [and_filters(t1.a (#0) > 0, t1.b (#1) > 0)], limit: NONE] + └── estimated rows: 4.00 + +# full outer join, can not push down +query T +explain select * from t1 full outer join t2 on t1.a = t2.a and t2.a > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3)] +├── join type: FULL OUTER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [t2.a (#2) > 0] +├── estimated rows: 4.00 +├── TableScan(Build) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 56 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 3.00 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 66 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 4.00 + +statement ok +drop table if exists t1; + +statement ok +drop table if exists t2; diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_inner.test b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_inner.test new file mode 100644 index 000000000000..7008a66cbe7f --- /dev/null +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_inner.test @@ -0,0 +1,92 @@ +# push down filter inner join +statement ok +drop table if exists t1; + +statement ok +drop table if exists t2; + +statement ok +create table t1(a int, b int); + +statement ok +create table t2(a int, b int); + +statement ok +insert into t1 values(null, null), (1, 1), (2, 2), (3, 3); + +statement ok +insert into t2 values(null, null), (1, 1), (2, 2); + +# can propagate t1.a > 3 +query T +explain select * from t1 inner join t2 on t1.a = t2.a where t1.a > 3; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] +├── join type: INNER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 0.00 +├── TableScan(Build) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 0 +│ ├── read bytes: 0 +│ ├── partitions total: 1 +│ ├── partitions scanned: 0 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [is_true(t2.a (#2) > 3)], limit: NONE] +│ └── estimated rows: 0.60 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 0 + ├── read bytes: 0 + ├── partitions total: 1 + ├── partitions scanned: 0 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t1.a (#0) > 3)], limit: NONE] + └── estimated rows: 1.00 + +# can propagate (t2.a > 1 or t2.a <= 2) +query T +explain select * from t1 inner join t2 on t1.a = t2.a where t2.a <= 2 or (t1.a > 1 and t2.a > 1); +---- +Filter +├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] +├── filters: [is_true(t2.a (#2) <= 2 OR t1.a (#0) > 1 AND t2.a (#2) > 1)] +├── estimated rows: 0.45 +└── HashJoin + ├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] + ├── join type: INNER + ├── build keys: [t2.a (#2)] + ├── probe keys: [t1.a (#0)] + ├── filters: [] + ├── estimated rows: 1.26 + ├── TableScan(Build) + │ ├── table: default.default.t2 + │ ├── output columns: [a (#2), b (#3)] + │ ├── read rows: 3 + │ ├── read bytes: 56 + │ ├── partitions total: 1 + │ ├── partitions scanned: 1 + │ ├── pruning stats: [segments: , blocks: ] + │ ├── push downs: [filters: [is_true(t2.a (#2) <= 2 OR t2.a (#2) > 1)], limit: NONE] + │ └── estimated rows: 1.08 + └── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 66 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t1.a (#0) <= 2 OR t1.a (#0) > 1)], limit: NONE] + └── estimated rows: 3.50 + +statement ok +drop table if exists t1; + +statement ok +drop table if exists t2; diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_left_outer.test b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_left_outer.test new file mode 100644 index 000000000000..436b194698f7 --- /dev/null +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_left_outer.test @@ -0,0 +1,232 @@ +# push down filter left outer join +statement ok +drop table if exists t1; + +statement ok +drop table if exists t2; + +statement ok +create table t1(a int, b int); + +statement ok +create table t2(a int, b int); + +statement ok +insert into t1 values(null, null), (1, 1), (2, 2), (3, 3); + +statement ok +insert into t2 values(null, null), (1, 1), (2, 2); + +# left outer join, can propagate t1.a > 0 and convert it to inner join +query T +explain select * from t1 left join t2 on t1.a = t2.a where t1.a > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3)] +├── join type: LEFT OUTER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 4.00 +├── TableScan(Build) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 56 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [is_true(t2.a (#2) > 0)], limit: NONE] +│ └── estimated rows: 0.60 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 66 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t1.a (#0) > 0)], limit: NONE] + └── estimated rows: 4.00 + +# left outer join, can not propagate t1.a > 0 +query T +explain select * from t1 left join t2 on t1.a = t2.a and t1.a > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3)] +├── join type: LEFT OUTER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [t1.a (#0) > 0] +├── estimated rows: 4.00 +├── TableScan(Build) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 56 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 3.00 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 66 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 4.00 + +# left outer join, can not propagate t1.b > 0 +query T +explain select * from t1 left join t2 on t1.a = t2.a where t1.b > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.a (#2), t2.b (#3)] +├── join type: LEFT OUTER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 4.00 +├── TableScan(Build) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 56 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 3.00 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 66 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t1.b (#1) > 0)], limit: NONE] + └── estimated rows: 4.00 + +# convert left outer join to inner join, can propagate t2.a > 0 +query T +explain select * from t1 left join t2 on t1.a = t2.a where t2.a > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] +├── join type: INNER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 0.80 +├── TableScan(Build) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 56 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [is_true(t2.a (#2) > 0)], limit: NONE] +│ └── estimated rows: 0.60 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 66 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t1.a (#0) > 0)], limit: NONE] + └── estimated rows: 4.00 + +# convert left outer join to inner join, can not propagate t2.b > 0 +query T +explain select * from t1 left join t2 on t1.a = t2.a where t2.b > 0; +---- +HashJoin +├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] +├── join type: INNER +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 0.80 +├── TableScan(Build) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2), b (#3)] +│ ├── read rows: 3 +│ ├── read bytes: 56 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [is_true(t2.b (#3) > 0)], limit: NONE] +│ └── estimated rows: 0.60 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0), b (#1)] + ├── read rows: 4 + ├── read bytes: 66 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 4.00 + +statement ok +drop table if exists t1; + +statement ok +drop table if exists t2; + +statement ok +drop table if exists m1; + +statement ok +drop table if exists j1; + +statement ok +create table m1(id varchar, "context" varchar); + +statement ok +create table j1(id varchar); + +# In `can_filter_null`, if the function is `assume_not_null` or `remove_nullable`, we cannot replace the column bindings with `Scalar::Null`. +query T +explain WITH base AS (SELECT id, context FROM m1), src1 AS (SELECT base.id FROM base WHERE IFNULL(base.context, '') = ''), join1 AS (SELECT id FROM j1) SELECT src1.id FROM src1 LEFT OUTER JOIN join1 ON TRUE; +---- +HashJoin +├── output columns: [m1.id (#0)] +├── join type: LEFT OUTER +├── build keys: [] +├── probe keys: [] +├── filters: [] +├── estimated rows: 0.00 +├── TableScan(Build) +│ ├── table: default.default.j1 +│ ├── output columns: [] +│ ├── read rows: 0 +│ ├── read size: 0 +│ ├── partitions total: 0 +│ ├── partitions scanned: 0 +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 0.00 +└── TableScan(Probe) + ├── table: default.default.m1 + ├── output columns: [id (#0)] + ├── read rows: 0 + ├── read size: 0 + ├── partitions total: 0 + ├── partitions scanned: 0 + ├── push downs: [filters: [is_true(if(CAST(is_not_null(m1.context (#1)) AS Boolean NULL), CAST(assume_not_null(m1.context (#1)) AS String NULL), true, '', NULL) = '')], limit: NONE] + └── estimated rows: 0.00 + +statement ok +drop table if exists m1; + +statement ok +drop table if exists j1; diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_semi_anti.test b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_semi_anti.test new file mode 100644 index 000000000000..9e30d7cc5a57 --- /dev/null +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_semi_anti.test @@ -0,0 +1,88 @@ +# push down filter semi join +statement ok +drop table if exists t1; + +statement ok +drop table if exists t2; + +statement ok +create table t1(a int, b int); + +statement ok +create table t2(a int, b int); + +statement ok +insert into t1 values(null, null), (1, 1), (2, 2), (3, 3); + +statement ok +insert into t2 values(null, null), (1, 1), (2, 2); + +# left semi, can propagate t1.a > 3 +query T +explain select t1.a from t1 where exists (select * from t2 where t1.a = t2.a) and t1.a > 3; +---- +HashJoin +├── output columns: [t1.a (#0)] +├── join type: LEFT SEMI +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 0.00 +├── TableScan(Build) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2)] +│ ├── read rows: 0 +│ ├── read bytes: 0 +│ ├── partitions total: 1 +│ ├── partitions scanned: 0 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [is_true(t2.a (#2) > 3)], limit: NONE] +│ └── estimated rows: 0.60 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0)] + ├── read rows: 0 + ├── read bytes: 0 + ├── partitions total: 1 + ├── partitions scanned: 0 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t1.a (#0) > 3)], limit: NONE] + └── estimated rows: 1.00 + +# left anti, can propagate t1.a > 3 +query T +explain select t1.a from t1 where not exists (select * from t2 where t1.a = t2.a) and t1.a > 3; +---- +HashJoin +├── output columns: [t1.a (#0)] +├── join type: LEFT ANTI +├── build keys: [t2.a (#2)] +├── probe keys: [t1.a (#0)] +├── filters: [] +├── estimated rows: 1.00 +├── TableScan(Build) +│ ├── table: default.default.t2 +│ ├── output columns: [a (#2)] +│ ├── read rows: 0 +│ ├── read bytes: 0 +│ ├── partitions total: 1 +│ ├── partitions scanned: 0 +│ ├── pruning stats: [segments: , blocks: ] +│ ├── push downs: [filters: [is_true(t2.a (#2) > 3)], limit: NONE] +│ └── estimated rows: 0.60 +└── TableScan(Probe) + ├── table: default.default.t1 + ├── output columns: [a (#0)] + ├── read rows: 0 + ├── read bytes: 0 + ├── partitions total: 1 + ├── partitions scanned: 0 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t1.a (#0) > 3)], limit: NONE] + └── estimated rows: 1.00 + +statement ok +drop table if exists t1; + +statement ok +drop table if exists t2; diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_project_set.test b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_project_set.test new file mode 100644 index 000000000000..b9e52300e2a4 --- /dev/null +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_project_set.test @@ -0,0 +1,71 @@ +# push down filter ProjectSet +statement ok +drop table if exists products; + +statement ok +create table products(name varchar, details variant); + +statement ok +insert into products(name, details) values ('Laptop', '{"brand": "Dell", "colors": ["Black", "Silver"], "price": 1200, "features": {"ram": "16GB", "storage": "512GB"}}'), ('Smartphone', '{"brand": "Apple", "colors": ["White", "Black"], "price": 999, "features": {"ram": "4GB", "storage": "128GB"}}'), ('Headphones', '{"brand": "Sony", "colors": ["Black", "Blue", "Red"], "price": 150, "features": {"battery": "20h", "bluetooth": "5.0"}}'); + +query T +explain select name, json_path_query(details, '$.features.*') as all_features, json_path_query_first(details, '$.features.*') as first_feature from products where name = 'Laptop' and first_feature = '16GB' and all_features = '512GB'; +---- +EvalScalar +├── output columns: [products.name (#0), all_features (#3), first_feature (#4)] +├── expressions: [get(1)(json_path_query (#2)), json_path_query_first(products.details (#1), '$.features.*')] +├── estimated rows: 0.12 +└── Filter + ├── output columns: [products.name (#0), products.details (#1), json_path_query (#2)] + ├── filters: [is_true(TRY_CAST(get(1)(json_path_query (#2)) AS String NULL) = '512GB')] + ├── estimated rows: 0.12 + └── ProjectSet + ├── output columns: [products.name (#0), products.details (#1), json_path_query (#2)] + ├── estimated rows: 0.60 + ├── set returning functions: json_path_query(products.details (#1), '$.features.*') + └── TableScan + ├── table: default.default.products + ├── output columns: [name (#0), details (#1)] + ├── read rows: 3 + ├── read bytes: 370 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [and_filters(products.name (#0) = 'Laptop', TRY_CAST(json_path_query_first(products.details (#1), '$.features.*') AS String NULL) = '16GB')], limit: NONE] + └── estimated rows: 0.60 + +query T +select name, json_path_query(details, '$.features.*') as all_features, json_path_query_first(details, '$.features.*') as first_feature from products where name = 'Laptop' and first_feature = '16GB' and all_features = '512GB'; +---- +Laptop "512GB" "16GB" + +query T +explain select name, json_path_query(details, '$.features.*') as all_features, json_path_query_first(details, '$.features.*') as first_feature from products where name = 'Laptop' and first_feature = '16GB'; +---- +EvalScalar +├── output columns: [products.name (#0), all_features (#3), first_feature (#4)] +├── expressions: [get(1)(json_path_query (#2)), json_path_query_first(products.details (#1), '$.features.*')] +├── estimated rows: 0.60 +└── ProjectSet + ├── output columns: [products.name (#0), products.details (#1), json_path_query (#2)] + ├── estimated rows: 0.60 + ├── set returning functions: json_path_query(products.details (#1), '$.features.*') + └── TableScan + ├── table: default.default.products + ├── output columns: [name (#0), details (#1)] + ├── read rows: 3 + ├── read bytes: 370 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [and_filters(products.name (#0) = 'Laptop', TRY_CAST(json_path_query_first(products.details (#1), '$.features.*') AS String NULL) = '16GB')], limit: NONE] + └── estimated rows: 0.60 + +query T +select name, json_path_query(details, '$.features.*') as all_features, json_path_query_first(details, '$.features.*') as first_feature from products where name = 'Laptop' and first_feature = '16GB'; +---- +Laptop "16GB" "16GB" +Laptop "512GB" "16GB" + +statement ok +drop table if exists products; diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_scan.test b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_scan.test new file mode 100644 index 000000000000..b7cfcc2c15b1 --- /dev/null +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_scan.test @@ -0,0 +1,30 @@ +# push down alias filter scan +statement ok +drop table if exists t; + +statement ok +create table t (x INT); + +statement ok +insert into t(x) values (1), (2); + +query I +explain select * from t as a(id) where a.id > 1; +---- +Filter +├── output columns: [a.x (#0)] +├── filters: [is_true(a.id (#0) > 1)] +├── estimated rows: 0.40 +└── TableScan + ├── table: default.default.t + ├── output columns: [x (#0)] + ├── read rows: 2 + ├── read bytes: 24 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── pruning stats: [segments: , blocks: ] + ├── push downs: [filters: [is_true(t.x (#0) > 1)], limit: NONE] + └── estimated rows: 2.00 + +statement ok +drop table if exists t; diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/select.test b/tests/sqllogictests/suites/mode/standalone/explain_native/select.test index f95415b3ba9b..f74b8aeaea9e 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/select.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/select.test @@ -16,7 +16,7 @@ explain select * from (select * from numbers(1)) as t1 where number = 1 ---- Filter ├── output columns: [numbers.number (#0)] -├── filters: [numbers.number (#0) = 1] +├── filters: [t1.number (#0) = 1] ├── estimated rows: 0.00 └── TableScan ├── table: default.system.numbers diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/subquery.test b/tests/sqllogictests/suites/mode/standalone/explain_native/subquery.test index fdabb1d44936..f0181b37bd1e 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/subquery.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/subquery.test @@ -3,13 +3,13 @@ explain select t.number from numbers(1) as t, numbers(1) as t1 where t.number = ---- Filter ├── output columns: [t.number (#0)] -├── filters: [is_true(CAST(t.number (#0) AS UInt64 NULL) = if(CAST(is_not_null(scalar_subquery_4 (#4)) AS Boolean NULL), scalar_subquery_4 (#4), 0))] +├── filters: [is_true(CAST(t.number (#0) AS UInt64 NULL) = if(true, TRY_CAST(scalar_subquery_4 (#4) AS UInt64 NULL), 0))] ├── estimated rows: 0.20 └── HashJoin ├── output columns: [t.number (#0), COUNT(*) (#4)] - ├── join type: LEFT SINGLE + ├── join type: INNER ├── build keys: [number (#2)] - ├── probe keys: [CAST(number (#0) AS UInt64 NULL)] + ├── probe keys: [number (#0)] ├── filters: [] ├── estimated rows: 1.00 ├── AggregateFinal(Build) @@ -164,52 +164,52 @@ explain select t.number from numbers(1) as t where number = (select * from numbe HashJoin ├── output columns: [t.number (#0)] ├── join type: INNER -├── build keys: [CAST(t.number (#0) AS UInt64 NULL)] -├── probe keys: [_if_scalar_subquery (#4)] +├── build keys: [_if_scalar_subquery (#4)] +├── probe keys: [CAST(t.number (#0) AS UInt64 NULL)] ├── filters: [] ├── estimated rows: 1.00 -├── TableScan(Build) -│ ├── table: default.system.numbers -│ ├── output columns: [number (#0)] -│ ├── read rows: 1 -│ ├── read bytes: 8 -│ ├── partitions total: 1 -│ ├── partitions scanned: 1 -│ ├── push downs: [filters: [], limit: NONE] -│ └── estimated rows: 1.00 -└── EvalScalar(Probe) - ├── output columns: [_if_scalar_subquery (#4)] - ├── expressions: [if(CAST(_count_scalar_subquery (#2) = 0 AS Boolean NULL), NULL, _any_scalar_subquery (#3))] - ├── estimated rows: 1.00 - └── Limit - ├── output columns: [_count_scalar_subquery (#2), _any_scalar_subquery (#3)] - ├── limit: 1 - ├── offset: 0 - ├── estimated rows: 1.00 - └── AggregateFinal - ├── output columns: [_count_scalar_subquery (#2), _any_scalar_subquery (#3)] - ├── group by: [] - ├── aggregate functions: [count(), any(number)] - ├── limit: 1 - ├── estimated rows: 1.00 - └── AggregatePartial - ├── output columns: [_count_scalar_subquery (#2), _any_scalar_subquery (#3)] - ├── group by: [] - ├── aggregate functions: [count(), any(number)] - ├── estimated rows: 1.00 - └── Filter - ├── output columns: [numbers.number (#1)] - ├── filters: [numbers.number (#1) = 0] - ├── estimated rows: 0.00 - └── TableScan - ├── table: default.system.numbers - ├── output columns: [number (#1)] - ├── read rows: 1 - ├── read bytes: 8 - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── push downs: [filters: [numbers.number (#1) = 0], limit: NONE] - └── estimated rows: 1.00 +├── EvalScalar(Build) +│ ├── output columns: [_if_scalar_subquery (#4)] +│ ├── expressions: [if(CAST(_count_scalar_subquery (#2) = 0 AS Boolean NULL), NULL, _any_scalar_subquery (#3))] +│ ├── estimated rows: 1.00 +│ └── Limit +│ ├── output columns: [_count_scalar_subquery (#2), _any_scalar_subquery (#3)] +│ ├── limit: 1 +│ ├── offset: 0 +│ ├── estimated rows: 1.00 +│ └── AggregateFinal +│ ├── output columns: [_count_scalar_subquery (#2), _any_scalar_subquery (#3)] +│ ├── group by: [] +│ ├── aggregate functions: [count(), any(number)] +│ ├── limit: 1 +│ ├── estimated rows: 1.00 +│ └── AggregatePartial +│ ├── output columns: [_count_scalar_subquery (#2), _any_scalar_subquery (#3)] +│ ├── group by: [] +│ ├── aggregate functions: [count(), any(number)] +│ ├── estimated rows: 1.00 +│ └── Filter +│ ├── output columns: [numbers.number (#1)] +│ ├── filters: [numbers.number (#1) = 0] +│ ├── estimated rows: 0.00 +│ └── TableScan +│ ├── table: default.system.numbers +│ ├── output columns: [number (#1)] +│ ├── read rows: 1 +│ ├── read bytes: 8 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── push downs: [filters: [numbers.number (#1) = 0], limit: NONE] +│ └── estimated rows: 1.00 +└── TableScan(Probe) + ├── table: default.system.numbers + ├── output columns: [number (#0)] + ├── read rows: 1 + ├── read bytes: 8 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 1.00 query T explain select t.number from numbers(1) as t where exists (select * from numbers(1) where number = t.number) @@ -310,7 +310,7 @@ HashJoin ├── estimated rows: 0.00 ├── Filter(Build) │ ├── output columns: [numbers.number (#1)] -│ ├── filters: [numbers.number (#1) = 0] +│ ├── filters: [numbers.number (#1) < 10, numbers.number (#1) = 0] │ ├── estimated rows: 0.00 │ └── TableScan │ ├── table: default.system.numbers @@ -319,7 +319,7 @@ HashJoin │ ├── read bytes: 8 │ ├── partitions total: 1 │ ├── partitions scanned: 1 -│ ├── push downs: [filters: [numbers.number (#1) = 0], limit: NONE] +│ ├── push downs: [filters: [and_filters(and_filters(numbers.number (#1) = 0, numbers.number (#1) < 10), numbers.number (#1) = 0)], limit: NONE] │ └── estimated rows: 1.00 └── Filter(Probe) ├── output columns: [t.number (#0)] @@ -396,65 +396,61 @@ HashJoin query T explain select t.number from numbers(1) as t, numbers(1) as t1 where (select count(*) = 1 from numbers(1) where t.number = number) and t.number = t1.number ---- -Filter +HashJoin ├── output columns: [t.number (#0)] -├── filters: [is_true(try_to_boolean(if(CAST(is_not_null(scalar_subquery_4 (#4)) AS Boolean NULL), TRY_CAST(scalar_subquery_4 (#4) AS UInt64 NULL), 0)))] +├── join type: INNER +├── build keys: [number (#2)] +├── probe keys: [number (#0)] +├── filters: [] ├── estimated rows: 0.20 -└── HashJoin - ├── output columns: [t.number (#0), (count(*) = 1) (#4)] - ├── join type: LEFT SINGLE - ├── build keys: [number (#2)] - ├── probe keys: [CAST(number (#0) AS UInt64 NULL)] +├── Filter(Build) +│ ├── output columns: [numbers.number (#2)] +│ ├── filters: [is_true(try_to_boolean(if(true, TRY_CAST(COUNT(*) (#3) = 1 AS UInt64 NULL), 0)))] +│ ├── estimated rows: 0.20 +│ └── AggregateFinal +│ ├── output columns: [COUNT(*) (#3), numbers.number (#2)] +│ ├── group by: [number] +│ ├── aggregate functions: [count()] +│ ├── estimated rows: 1.00 +│ └── AggregatePartial +│ ├── output columns: [COUNT(*) (#3), #_group_by_key] +│ ├── group by: [number] +│ ├── aggregate functions: [count()] +│ ├── estimated rows: 1.00 +│ └── TableScan +│ ├── table: default.system.numbers +│ ├── output columns: [number (#2)] +│ ├── read rows: 1 +│ ├── read bytes: 8 +│ ├── partitions total: 1 +│ ├── partitions scanned: 1 +│ ├── push downs: [filters: [], limit: NONE] +│ └── estimated rows: 1.00 +└── HashJoin(Probe) + ├── output columns: [t.number (#0)] + ├── join type: INNER + ├── build keys: [t1.number (#1)] + ├── probe keys: [t.number (#0)] ├── filters: [] ├── estimated rows: 1.00 - ├── EvalScalar(Build) - │ ├── output columns: [numbers.number (#2), (count(*) = 1) (#4)] - │ ├── expressions: [COUNT(*) (#3) = 1] - │ ├── estimated rows: 1.00 - │ └── AggregateFinal - │ ├── output columns: [COUNT(*) (#3), numbers.number (#2)] - │ ├── group by: [number] - │ ├── aggregate functions: [count()] - │ ├── estimated rows: 1.00 - │ └── AggregatePartial - │ ├── output columns: [COUNT(*) (#3), #_group_by_key] - │ ├── group by: [number] - │ ├── aggregate functions: [count()] - │ ├── estimated rows: 1.00 - │ └── TableScan - │ ├── table: default.system.numbers - │ ├── output columns: [number (#2)] - │ ├── read rows: 1 - │ ├── read bytes: 8 - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── push downs: [filters: [], limit: NONE] - │ └── estimated rows: 1.00 - └── HashJoin(Probe) - ├── output columns: [t.number (#0)] - ├── join type: INNER - ├── build keys: [t1.number (#1)] - ├── probe keys: [t.number (#0)] - ├── filters: [] - ├── estimated rows: 1.00 - ├── TableScan(Build) - │ ├── table: default.system.numbers - │ ├── output columns: [number (#1)] - │ ├── read rows: 1 - │ ├── read bytes: 8 - │ ├── partitions total: 1 - │ ├── partitions scanned: 1 - │ ├── push downs: [filters: [], limit: NONE] - │ └── estimated rows: 1.00 - └── TableScan(Probe) - ├── table: default.system.numbers - ├── output columns: [number (#0)] - ├── read rows: 1 - ├── read bytes: 8 - ├── partitions total: 1 - ├── partitions scanned: 1 - ├── push downs: [filters: [], limit: NONE] - └── estimated rows: 1.00 + ├── TableScan(Build) + │ ├── table: default.system.numbers + │ ├── output columns: [number (#1)] + │ ├── read rows: 1 + │ ├── read bytes: 8 + │ ├── partitions total: 1 + │ ├── partitions scanned: 1 + │ ├── push downs: [filters: [], limit: NONE] + │ └── estimated rows: 1.00 + └── TableScan(Probe) + ├── table: default.system.numbers + ├── output columns: [number (#0)] + ├── read rows: 1 + ├── read bytes: 8 + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 1.00 query T explain select t.number from numbers(1) as t where exists(select * from numbers(1) as t1 where t.number > t1.number) and not exists(select * from numbers(1) as t1 where t.number < t1.number) diff --git a/tests/sqllogictests/suites/query/02_function/02_0000_function_arithmetic.test b/tests/sqllogictests/suites/query/02_function/02_0000_function_arithmetic.test index cb02c41b11c3..782e3272847a 100644 --- a/tests/sqllogictests/suites/query/02_function/02_0000_function_arithmetic.test +++ b/tests/sqllogictests/suites/query/02_function/02_0000_function_arithmetic.test @@ -146,3 +146,25 @@ query I select * from numbers(4) where -number > -1; ---- 0 + + +## decimal + +statement ok +create table test3 ( + amount1 DECIMAL(38, 18) NULL, + amount2 DECIMAL(28, 8) NULL +); + +statement ok +insert into test3 values('30.606168460000000000','30.60616846'); + + +query TTTTTT +select sum(amount1)a , sum(amount2) b , a + b, a - b, a * b, a / b from test3; +---- +30.606168460000000000 30.60616846 61.212336920000000000 0.000000000000000000 936.73754780189877160000000000 1.000000000000000000 + +statement ok +drop table test3 + diff --git a/tests/sqllogictests/suites/query/02_function/02_0005_function_substring.test b/tests/sqllogictests/suites/query/02_function/02_0005_function_substring.test index 48be2d7c2222..f212bdeaa8be 100644 --- a/tests/sqllogictests/suites/query/02_function/02_0005_function_substring.test +++ b/tests/sqllogictests/suites/query/02_function/02_0005_function_substring.test @@ -195,7 +195,7 @@ SELECT SUBSTRING('12345', 0, 1) query TI select substr('城区主城区其他', 1, 6), length('我爱中国') ---- -城区 12 +城区主城区其 4 statement ok @@ -208,4 +208,4 @@ select substr('城区主城区其他', 1, 6), length('我爱中国') 城区主城区其 4 statement ok -unset sql_dialect \ No newline at end of file +unset sql_dialect diff --git a/tests/sqllogictests/suites/query/02_function/02_0040_function_strings_length.test b/tests/sqllogictests/suites/query/02_function/02_0040_function_strings_length.test index 0ef21f1f73de..2595460d89ca 100644 --- a/tests/sqllogictests/suites/query/02_function/02_0040_function_strings_length.test +++ b/tests/sqllogictests/suites/query/02_function/02_0040_function_strings_length.test @@ -7,17 +7,17 @@ SELECT LENGTH('word') query I SELECT LENGTH('кириллица') ---- -18 +9 query I SELECT LENGTH('кириллица and latin') ---- -28 +19 query I SELECT LENGTH('你好') ---- -6 +2 query T SELECT LENGTH(NULL) diff --git a/tests/sqllogictests/suites/query/02_function/02_0043_function_char.test b/tests/sqllogictests/suites/query/02_function/02_0043_function_char.test index 78bcd636aa10..599bfa1d8422 100644 --- a/tests/sqllogictests/suites/query/02_function/02_0043_function_char.test +++ b/tests/sqllogictests/suites/query/02_function/02_0043_function_char.test @@ -9,30 +9,30 @@ SELECT CHAR(97, NULL) NULL query T -SELECT CHAR(97) +SELECT CHAR(97)::STRING ---- a query TT -select char(97,98) from numbers(2); +select char(97,98)::STRING from numbers(2); ---- ab ab query T -SELECT CHAR(77,121,83,81,76) +SELECT CHAR(77,121,83,81,76)::STRING ---- MySQL query T -SELECT char(0xD0, 0xBF, 0xD1, 0x80, 0xD0, 0xB8, 0xD0, 0xB2, 0xD0, 0xB5, 0xD1, 0x82) AS hello +SELECT char(0xD0, 0xBF, 0xD1, 0x80, 0xD0, 0xB8, 0xD0, 0xB2, 0xD0, 0xB5, 0xD1, 0x82)::STRING AS hello ---- привет query T -SELECT char(0xe4, 0xbd, 0xa0, 0xe5, 0xa5, 0xbd) AS hello +SELECT char(0xe4, 0xbd, 0xa0, 0xe5, 0xa5, 0xbd)::STRING AS hello ---- 你好 @@ -43,7 +43,7 @@ statement ok create database if not exists db1 statement ok -create table db1.t as SELECT CHAR(77,121,83,81,76) AS hello +create table db1.t as SELECT CHAR(77,121,83,81,76)::STRING AS hello query T select * from db1.t diff --git a/tests/sqllogictests/suites/query/join/runtime_filter.test b/tests/sqllogictests/suites/query/join/runtime_filter.test index b875356187dd..bdb84d3eb936 100644 --- a/tests/sqllogictests/suites/query/join/runtime_filter.test +++ b/tests/sqllogictests/suites/query/join/runtime_filter.test @@ -1,6 +1,3 @@ -statement ok -set enable_runtime_filter = 1; - statement ok CREATE TABLE table1 ( key1 String, @@ -89,9 +86,6 @@ NULL NULL NULL NULL NULL NULL -statement ok -set enable_runtime_filter = 0; - statement ok drop table table1; diff --git a/tests/sqllogictests/suites/query/subquery.test b/tests/sqllogictests/suites/query/subquery.test index c88ffc8bd1d9..eaced5013cd6 100644 --- a/tests/sqllogictests/suites/query/subquery.test +++ b/tests/sqllogictests/suites/query/subquery.test @@ -733,3 +733,18 @@ drop table mortgage_records; statement ok drop table transaction_history; + +statement ok +drop table if exists t1; + +statement ok +create table t1(a int); + +statement ok +insert into t1 values(1), (2), (3); + +query error 1001.*Scalar subquery can't return more than one row +select (select sum(a) from t1 where t1.a >= t2.a group by t1.a) from t1 as t2; + +statement ok +drop table t1; diff --git a/tests/sqllogictests/suites/stage/formats/parquet/options/parquet_missing_field.test b/tests/sqllogictests/suites/stage/formats/parquet/options/parquet_missing_field.test index bf8078f50ec3..522f9167dc50 100644 --- a/tests/sqllogictests/suites/stage/formats/parquet/options/parquet_missing_field.test +++ b/tests/sqllogictests/suites/stage/formats/parquet/options/parquet_missing_field.test @@ -19,7 +19,7 @@ c5 BIGINT 1 1 c2 BIGINT 1 2 c4 VARCHAR 1 3 -query error get diff schema +query error copy into t1 from @data/parquet/diff_schema/ file_format=(type=parquet) pattern='.*[.]parquet' query diff --git a/tests/sqllogictests/suites/task/task_dag_test.test b/tests/sqllogictests/suites/task/task_dag_test.test index 2d6dd58d71f3..0629fdf965cf 100644 --- a/tests/sqllogictests/suites/task/task_dag_test.test +++ b/tests/sqllogictests/suites/task/task_dag_test.test @@ -1,5 +1,5 @@ # Please start the UDF Server first before running this test: -# python3 tests/cloud-control-server/simple_server.py +# python3 tests/cloud_control_server/simple_server.py # statement ok DROP TASK IF EXISTS mytaskroot diff --git a/tests/sqllogictests/suites/task/task_ddl_test.test b/tests/sqllogictests/suites/task/task_ddl_test.test index 8b83ab8d405e..678b5c651ab3 100644 --- a/tests/sqllogictests/suites/task/task_ddl_test.test +++ b/tests/sqllogictests/suites/task/task_ddl_test.test @@ -4,6 +4,9 @@ statement ok DROP TASK IF EXISTS mytask +statement ok +DROP TASK IF EXISTS sessionTask + statement ok CREATE TASK mytask WAREHOUSE = 'mywh' @@ -94,5 +97,38 @@ select state from system.task_history where name = 'mytask' ---- SUCCEEDED +statement ok +CREATE TASK sessionTask + WAREHOUSE = 'mywh' + SCHEDULE = USING CRON '0 0 0 1 1 ? 2100' + DATABASE = 'mydb', TIMEZONE = 'America/Los_Angeles' + AS SELECT 1; + +query SSS +select name, state, session_parameters from system.tasks where name = 'sessionTask' +---- +sessionTask Suspended {"database":"mydb","timezone":"America/Los_Angeles"} + +statement ok +EXECUTE TASK sessionTask + +query SSSS +select name, session_parameters from system.task_history where name = 'sessionTask' +---- +sessionTask {"database":"mydb","timezone":"America/Los_Angeles"} + +statement ok +ALTER TASK sessionTask + SET + DATABASE = 'db2', TIMEZONE = 'Pacific/Honolulu' + +query SSS +select name, state, session_parameters from system.tasks where name = 'sessionTask' +---- +sessionTask Suspended {"database":"db2","timezone":"Pacific/Honolulu"} + statement ok DROP TASK mytask + +statement ok +DROP TASK sessionTask \ No newline at end of file diff --git a/tests/sqllogictests/suites/tpcds/queries.test b/tests/sqllogictests/suites/tpcds/queries.test index 72739a04b429..f8cef15ea28a 100644 --- a/tests/sqllogictests/suites/tpcds/queries.test +++ b/tests/sqllogictests/suites/tpcds/queries.test @@ -1,9 +1,6 @@ statement ok set sandbox_tenant = 'test_tenant'; -statement ok -set enable_runtime_filter = 1; - statement ok use tpcds; @@ -7979,6 +7976,3 @@ Conventional childr NEXT DAY ny metro 256 251 235 0 0 Conventional childr OVERNIGHT ny metro 188 181 188 0 0 Conventional childr REGULAR ny metro 179 150 215 0 0 Conventional childr TWO DAY ny metro 185 183 158 0 0 - -statement ok -set enable_runtime_filter = 0 diff --git a/tests/sqllogictests/suites/tpcds/tpcds_join_order.test b/tests/sqllogictests/suites/tpcds/tpcds_join_order.test index 1d46b29367df..f37aedc31742 100644 --- a/tests/sqllogictests/suites/tpcds/tpcds_join_order.test +++ b/tests/sqllogictests/suites/tpcds/tpcds_join_order.test @@ -1,9 +1,6 @@ statement ok set sandbox_tenant = 'test_tenant'; -statement ok -set enable_runtime_filter = 1; - statement ok use tpcds; @@ -34,27 +31,27 @@ WHERE ctr1.ctr_total_return > ORDER BY c_customer_id LIMIT 100; ---- -HashJoin: LEFT SINGLE +HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── HashJoin: INNER +│ │ ├── Build +│ │ │ └── Scan: default.tpcds.date_dim (#5) (read rows: 73049) +│ │ └── Probe +│ │ └── Scan: default.tpcds.store_returns (#4) (read rows: 2810) │ └── Probe -│ └── Scan: default.tpcds.store_returns (read rows: 2810) +│ └── HashJoin: INNER +│ ├── Build +│ │ └── Scan: default.tpcds.store (#2) (read rows: 1) +│ └── Probe +│ └── HashJoin: INNER +│ ├── Build +│ │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) +│ └── Probe +│ └── Scan: default.tpcds.store_returns (#0) (read rows: 2810) └── Probe - └── HashJoin: INNER - ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── Scan: default.tpcds.store (read rows: 1) - │ └── Probe - │ └── HashJoin: INNER - │ ├── Build - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) - │ └── Probe - │ └── Scan: default.tpcds.store_returns (read rows: 2810) - └── Probe - └── Scan: default.tpcds.customer (read rows: 1000) + └── Scan: default.tpcds.customer (#3) (read rows: 1000) # Q2 query I @@ -143,31 +140,31 @@ HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#7) (read rows: 73049) │ └── Probe │ └── HashJoin: INNER │ ├── Build │ │ └── UnionAll │ │ ├── Left -│ │ │ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ │ │ └── Scan: default.tpcds.web_sales (#4) (read rows: 7212) │ │ └── Right -│ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ │ └── Scan: default.tpcds.catalog_sales (#5) (read rows: 14313) │ └── Probe -│ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ └── Scan: default.tpcds.date_dim (#6) (read rows: 73049) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#3) (read rows: 73049) └── Probe └── HashJoin: INNER ├── Build │ └── UnionAll │ ├── Left - │ │ └── Scan: default.tpcds.web_sales (read rows: 7212) + │ │ └── Scan: default.tpcds.web_sales (#0) (read rows: 7212) │ └── Right - │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) + │ └── Scan: default.tpcds.catalog_sales (#1) (read rows: 14313) └── Probe - └── Scan: default.tpcds.date_dim (read rows: 73049) + └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) # Q3 query I @@ -195,11 +192,11 @@ HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.item (read rows: 0) +│ │ └── Scan: default.tpcds.item (#2) (read rows: 0) │ └── Probe -│ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ └── Scan: default.tpcds.store_sales (#1) (read rows: 28810) └── Probe - └── Scan: default.tpcds.date_dim (read rows: 73049) + └── Scan: default.tpcds.date_dim (#0) (read rows: 73049) # Q4 query I @@ -342,31 +339,31 @@ HashJoin: INNER │ │ │ │ │ │ │ ├── Build │ │ │ │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ │ │ │ ├── Build -│ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 0) +│ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#20) (read rows: 0) │ │ │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 0) +│ │ │ │ │ │ │ │ └── Scan: default.tpcds.store_sales (#19) (read rows: 0) │ │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ │ └── Scan: default.tpcds.customer (read rows: 0) +│ │ │ │ │ │ │ └── Scan: default.tpcds.customer (#18) (read rows: 0) │ │ │ │ │ │ └── Right │ │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ │ ├── Build │ │ │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ │ │ ├── Build -│ │ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#23) (read rows: 73049) │ │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ │ │ │ │ │ │ └── Scan: default.tpcds.catalog_sales (#22) (read rows: 14313) │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ └── Scan: default.tpcds.customer (read rows: 1000) +│ │ │ │ │ │ └── Scan: default.tpcds.customer (#21) (read rows: 1000) │ │ │ │ │ └── Right │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ ├── Build │ │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ │ ├── Build -│ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 0) +│ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#26) (read rows: 0) │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ └── Scan: default.tpcds.web_sales (read rows: 0) +│ │ │ │ │ │ └── Scan: default.tpcds.web_sales (#25) (read rows: 0) │ │ │ │ │ └── Probe -│ │ │ │ │ └── Scan: default.tpcds.customer (read rows: 0) +│ │ │ │ │ └── Scan: default.tpcds.customer (#24) (read rows: 0) │ │ │ │ └── Probe │ │ │ │ └── UnionAll │ │ │ │ ├── Left @@ -376,31 +373,31 @@ HashJoin: INNER │ │ │ │ │ │ ├── Build │ │ │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ │ │ ├── Build -│ │ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#38) (read rows: 0) │ │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ │ │ │ │ │ └── Scan: default.tpcds.store_sales (#37) (read rows: 0) │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ └── Scan: default.tpcds.customer (read rows: 1000) +│ │ │ │ │ │ └── Scan: default.tpcds.customer (#36) (read rows: 0) │ │ │ │ │ └── Right │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ ├── Build │ │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ │ ├── Build -│ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 0) +│ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#41) (read rows: 0) │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ └── Scan: default.tpcds.catalog_sales (read rows: 0) +│ │ │ │ │ │ └── Scan: default.tpcds.catalog_sales (#40) (read rows: 0) │ │ │ │ │ └── Probe -│ │ │ │ │ └── Scan: default.tpcds.customer (read rows: 0) +│ │ │ │ │ └── Scan: default.tpcds.customer (#39) (read rows: 0) │ │ │ │ └── Right │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ ├── Build -│ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 0) +│ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#44) (read rows: 73049) │ │ │ │ │ └── Probe -│ │ │ │ │ └── Scan: default.tpcds.web_sales (read rows: 0) +│ │ │ │ │ └── Scan: default.tpcds.web_sales (#43) (read rows: 7212) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.customer (read rows: 0) +│ │ │ │ └── Scan: default.tpcds.customer (#42) (read rows: 1000) │ │ │ └── Probe │ │ │ └── UnionAll │ │ │ ├── Left @@ -410,31 +407,31 @@ HashJoin: INNER │ │ │ │ │ ├── Build │ │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ │ ├── Build -│ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 0) +│ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 0) +│ │ │ │ │ │ └── Scan: default.tpcds.store_sales (#1) (read rows: 28810) │ │ │ │ │ └── Probe -│ │ │ │ │ └── Scan: default.tpcds.customer (read rows: 0) +│ │ │ │ │ └── Scan: default.tpcds.customer (#0) (read rows: 1000) │ │ │ │ └── Right │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ ├── Build -│ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 0) +│ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#5) (read rows: 0) │ │ │ │ │ └── Probe -│ │ │ │ │ └── Scan: default.tpcds.catalog_sales (read rows: 0) +│ │ │ │ │ └── Scan: default.tpcds.catalog_sales (#4) (read rows: 0) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.customer (read rows: 0) +│ │ │ │ └── Scan: default.tpcds.customer (#3) (read rows: 0) │ │ │ └── Right │ │ │ └── HashJoin: INNER │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ │ └── Scan: default.tpcds.date_dim (#8) (read rows: 0) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ │ │ │ └── Scan: default.tpcds.web_sales (#7) (read rows: 0) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.customer (read rows: 1000) +│ │ │ └── Scan: default.tpcds.customer (#6) (read rows: 0) │ │ └── Probe │ │ └── UnionAll │ │ ├── Left @@ -444,31 +441,31 @@ HashJoin: INNER │ │ │ │ ├── Build │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ ├── Build -│ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 0) +│ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#29) (read rows: 0) │ │ │ │ │ └── Probe -│ │ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 0) +│ │ │ │ │ └── Scan: default.tpcds.store_sales (#28) (read rows: 0) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.customer (read rows: 0) +│ │ │ │ └── Scan: default.tpcds.customer (#27) (read rows: 0) │ │ │ └── Right │ │ │ └── HashJoin: INNER │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ │ └── Scan: default.tpcds.date_dim (#32) (read rows: 73049) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ │ │ │ └── Scan: default.tpcds.catalog_sales (#31) (read rows: 14313) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.customer (read rows: 1000) +│ │ │ └── Scan: default.tpcds.customer (#30) (read rows: 1000) │ │ └── Right │ │ └── HashJoin: INNER │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 0) +│ │ │ │ └── Scan: default.tpcds.date_dim (#35) (read rows: 0) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.web_sales (read rows: 0) +│ │ │ └── Scan: default.tpcds.web_sales (#34) (read rows: 0) │ │ └── Probe -│ │ └── Scan: default.tpcds.customer (read rows: 0) +│ │ └── Scan: default.tpcds.customer (#33) (read rows: 0) │ └── Probe │ └── UnionAll │ ├── Left @@ -478,31 +475,31 @@ HashJoin: INNER │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 0) +│ │ │ │ │ └── Scan: default.tpcds.date_dim (#47) (read rows: 0) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 0) +│ │ │ │ └── Scan: default.tpcds.store_sales (#46) (read rows: 0) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.customer (read rows: 0) +│ │ │ └── Scan: default.tpcds.customer (#45) (read rows: 0) │ │ └── Right │ │ └── HashJoin: INNER │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 0) +│ │ │ │ └── Scan: default.tpcds.date_dim (#50) (read rows: 0) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.catalog_sales (read rows: 0) +│ │ │ └── Scan: default.tpcds.catalog_sales (#49) (read rows: 0) │ │ └── Probe -│ │ └── Scan: default.tpcds.customer (read rows: 0) +│ │ └── Scan: default.tpcds.customer (#48) (read rows: 0) │ └── Right │ └── HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#53) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ │ └── Scan: default.tpcds.web_sales (#52) (read rows: 7212) │ └── Probe -│ └── Scan: default.tpcds.customer (read rows: 1000) +│ └── Scan: default.tpcds.customer (#51) (read rows: 1000) └── Probe └── UnionAll ├── Left @@ -512,31 +509,31 @@ HashJoin: INNER │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build - │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ │ │ └── Scan: default.tpcds.date_dim (#11) (read rows: 73049) │ │ │ └── Probe - │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ │ │ └── Scan: default.tpcds.store_sales (#10) (read rows: 28810) │ │ └── Probe - │ │ └── Scan: default.tpcds.customer (read rows: 1000) + │ │ └── Scan: default.tpcds.customer (#9) (read rows: 1000) │ └── Right │ └── HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpcds.date_dim (read rows: 0) + │ │ │ └── Scan: default.tpcds.date_dim (#14) (read rows: 0) │ │ └── Probe - │ │ └── Scan: default.tpcds.catalog_sales (read rows: 0) + │ │ └── Scan: default.tpcds.catalog_sales (#13) (read rows: 0) │ └── Probe - │ └── Scan: default.tpcds.customer (read rows: 0) + │ └── Scan: default.tpcds.customer (#12) (read rows: 0) └── Right └── HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.date_dim (read rows: 0) + │ │ └── Scan: default.tpcds.date_dim (#17) (read rows: 0) │ └── Probe - │ └── Scan: default.tpcds.web_sales (read rows: 0) + │ └── Scan: default.tpcds.web_sales (#16) (read rows: 0) └── Probe - └── Scan: default.tpcds.customer (read rows: 0) + └── Scan: default.tpcds.customer (#15) (read rows: 0) # Q5 @@ -661,49 +658,49 @@ UnionAll │ ├── Left │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.store (read rows: 1) +│ │ │ └── Scan: default.tpcds.store (#3) (read rows: 1) │ │ └── Probe │ │ └── UnionAll │ │ ├── Left -│ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) │ │ └── Right -│ │ └── Scan: default.tpcds.store_returns (read rows: 2810) +│ │ └── Scan: default.tpcds.store_returns (#1) (read rows: 2810) │ └── Right │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.catalog_page (read rows: 11718) +│ │ └── Scan: default.tpcds.catalog_page (#7) (read rows: 11718) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#6) (read rows: 73049) │ └── Probe │ └── UnionAll │ ├── Left -│ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ │ └── Scan: default.tpcds.catalog_sales (#4) (read rows: 14313) │ └── Right -│ └── Scan: default.tpcds.catalog_returns (read rows: 1358) +│ └── Scan: default.tpcds.catalog_returns (#5) (read rows: 1358) └── Right └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#11) (read rows: 73049) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.web_site (read rows: 1) + │ └── Scan: default.tpcds.web_site (#12) (read rows: 1) └── Probe └── UnionAll ├── Left - │ └── Scan: default.tpcds.web_sales (read rows: 7212) + │ └── Scan: default.tpcds.web_sales (#8) (read rows: 7212) └── Right └── HashJoin: RIGHT OUTER ├── Build - │ └── Scan: default.tpcds.web_returns (read rows: 679) + │ └── Scan: default.tpcds.web_returns (#9) (read rows: 679) └── Probe - └── Scan: default.tpcds.web_sales (read rows: 7212) + └── Scan: default.tpcds.web_sales (#10) (read rows: 7212) # Q6 query I @@ -734,31 +731,31 @@ ORDER BY cnt NULLS FIRST, a.ca_state NULLS FIRST LIMIT 100; ---- -HashJoin: LEFT SINGLE +HashJoin: INNER ├── Build -│ └── Scan: default.tpcds.item (read rows: 180) +│ └── Scan: default.tpcds.item (#6) (read rows: 180) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#5) (read rows: 73049) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.item (read rows: 180) + │ └── HashJoin: INNER + │ ├── Build + │ │ └── Scan: default.tpcds.customer_address (#0) (read rows: 500) + │ └── Probe + │ └── Scan: default.tpcds.customer (#1) (read rows: 1000) └── Probe └── HashJoin: INNER ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── HashJoin: INNER - │ │ ├── Build - │ │ │ └── Scan: default.tpcds.customer_address (read rows: 500) - │ │ └── Probe - │ │ └── Scan: default.tpcds.customer (read rows: 1000) - │ └── Probe - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.item (#4) (read rows: 180) └── Probe - └── Scan: default.tpcds.date_dim (read rows: 73049) + └── HashJoin: INNER + ├── Build + │ └── Scan: default.tpcds.store_sales (#2) (read rows: 28810) + └── Probe + └── Scan: default.tpcds.date_dim (#3) (read rows: 73049) # Q7 query I @@ -789,21 +786,21 @@ LIMIT 100; ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpcds.promotion (read rows: 3) +│ └── Scan: default.tpcds.promotion (#4) (read rows: 3) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.item (read rows: 180) + │ └── Scan: default.tpcds.item (#3) (read rows: 180) └── Probe └── HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) │ └── Probe - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) └── Probe - └── Scan: default.tpcds.customer_demographics (read rows: 19208) + └── Scan: default.tpcds.customer_demographics (#1) (read rows: 19208) # Q8 @@ -1243,21 +1240,21 @@ HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.customer_address (read rows: 500) +│ │ │ └── Scan: default.tpcds.customer (#5) (read rows: 1000) │ │ └── Probe -│ │ └── Scan: default.tpcds.customer (read rows: 1000) +│ │ └── Scan: default.tpcds.customer_address (#4) (read rows: 500) │ └── Probe -│ └── Scan: default.tpcds.customer_address (read rows: 500) +│ └── Scan: default.tpcds.customer_address (#3) (read rows: 500) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.store (read rows: 1) + │ └── Scan: default.tpcds.store (#2) (read rows: 1) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) └── Probe - └── Scan: default.tpcds.store_sales (read rows: 28810) + └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) # Q9 query I @@ -1332,65 +1329,65 @@ WHERE r_reason_sk = 1 ; ---- HashJoin: CROSS ├── Build -│ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ └── Scan: default.tpcds.store_sales (#15) (read rows: 28810) └── Probe └── HashJoin: CROSS ├── Build - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#14) (read rows: 28810) └── Probe └── HashJoin: CROSS ├── Build - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#13) (read rows: 28810) └── Probe └── HashJoin: CROSS ├── Build - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#12) (read rows: 28810) └── Probe └── HashJoin: CROSS ├── Build - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#11) (read rows: 28810) └── Probe └── HashJoin: CROSS ├── Build - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#10) (read rows: 28810) └── Probe └── HashJoin: CROSS ├── Build - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#9) (read rows: 28810) └── Probe └── HashJoin: CROSS ├── Build - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#8) (read rows: 28810) └── Probe └── HashJoin: CROSS ├── Build - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#7) (read rows: 28810) └── Probe └── HashJoin: CROSS ├── Build - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#6) (read rows: 28810) └── Probe └── HashJoin: CROSS ├── Build - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#5) (read rows: 28810) └── Probe └── HashJoin: CROSS ├── Build - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#4) (read rows: 28810) └── Probe └── HashJoin: CROSS ├── Build - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#3) (read rows: 28810) └── Probe └── HashJoin: CROSS ├── Build - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#2) (read rows: 28810) └── Probe └── HashJoin: CROSS ├── Build - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#1) (read rows: 28810) └── Probe - └── Scan: default.tpcds.reason (read rows: 1) + └── Scan: default.tpcds.reason (#0) (read rows: 1) # Q10 query I @@ -1461,39 +1458,39 @@ ORDER BY cd_gender, cd_dep_college_count LIMIT 100; ---- -HashJoin: RIGHT MARK +HashJoin: LEFT MARK ├── Build -│ └── HashJoin: INNER +│ └── HashJoin: LEFT MARK │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── HashJoin: LEFT SEMI +│ │ ├── Build +│ │ │ └── HashJoin: INNER +│ │ │ ├── Build +│ │ │ │ └── Scan: default.tpcds.date_dim (#4) (read rows: 73049) +│ │ │ └── Probe +│ │ │ └── Scan: default.tpcds.store_sales (#3) (read rows: 28810) +│ │ └── Probe +│ │ └── HashJoin: INNER +│ │ ├── Build +│ │ │ └── HashJoin: INNER +│ │ │ ├── Build +│ │ │ │ └── Scan: default.tpcds.customer_address (#1) (read rows: 500) +│ │ │ └── Probe +│ │ │ └── Scan: default.tpcds.customer (#0) (read rows: 1000) +│ │ └── Probe +│ │ └── Scan: default.tpcds.customer_demographics (#2) (read rows: 19208) │ └── Probe -│ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ └── HashJoin: INNER +│ ├── Build +│ │ └── Scan: default.tpcds.date_dim (#6) (read rows: 73049) +│ └── Probe +│ └── Scan: default.tpcds.web_sales (#5) (read rows: 7212) └── Probe - └── HashJoin: RIGHT MARK + └── HashJoin: INNER ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) - │ └── Probe - │ └── Scan: default.tpcds.web_sales (read rows: 7212) + │ └── Scan: default.tpcds.date_dim (#8) (read rows: 73049) └── Probe - └── HashJoin: LEFT SEMI - ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) - │ └── Probe - │ └── Scan: default.tpcds.store_sales (read rows: 28810) - └── Probe - └── HashJoin: INNER - ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── Scan: default.tpcds.customer_address (read rows: 500) - │ └── Probe - │ └── Scan: default.tpcds.customer (read rows: 1000) - └── Probe - └── Scan: default.tpcds.customer_demographics (read rows: 19208) + └── Scan: default.tpcds.catalog_sales (#7) (read rows: 14313) # Q11 query I @@ -1591,21 +1588,21 @@ HashJoin: INNER │ │ │ │ ├── Build │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ ├── Build -│ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 0) +│ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#14) (read rows: 0) │ │ │ │ │ └── Probe -│ │ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 0) +│ │ │ │ │ └── Scan: default.tpcds.store_sales (#13) (read rows: 0) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.customer (read rows: 0) +│ │ │ │ └── Scan: default.tpcds.customer (#12) (read rows: 0) │ │ │ └── Right │ │ │ └── HashJoin: INNER │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ │ └── Scan: default.tpcds.date_dim (#17) (read rows: 73049) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ │ │ │ └── Scan: default.tpcds.web_sales (#16) (read rows: 7212) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.customer (read rows: 1000) +│ │ │ └── Scan: default.tpcds.customer (#15) (read rows: 1000) │ │ └── Probe │ │ └── UnionAll │ │ ├── Left @@ -1613,21 +1610,21 @@ HashJoin: INNER │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ │ │ └── Scan: default.tpcds.store_sales (#1) (read rows: 28810) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.customer (read rows: 1000) +│ │ │ └── Scan: default.tpcds.customer (#0) (read rows: 1000) │ │ └── Right │ │ └── HashJoin: INNER │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 0) +│ │ │ │ └── Scan: default.tpcds.date_dim (#5) (read rows: 0) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.web_sales (read rows: 0) +│ │ │ └── Scan: default.tpcds.web_sales (#4) (read rows: 0) │ │ └── Probe -│ │ └── Scan: default.tpcds.customer (read rows: 0) +│ │ └── Scan: default.tpcds.customer (#3) (read rows: 0) │ └── Probe │ └── UnionAll │ ├── Left @@ -1635,21 +1632,21 @@ HashJoin: INNER │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 0) +│ │ │ │ └── Scan: default.tpcds.date_dim (#20) (read rows: 0) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.store_sales (read rows: 0) +│ │ │ └── Scan: default.tpcds.store_sales (#19) (read rows: 0) │ │ └── Probe -│ │ └── Scan: default.tpcds.customer (read rows: 0) +│ │ └── Scan: default.tpcds.customer (#18) (read rows: 0) │ └── Right │ └── HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#23) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ │ └── Scan: default.tpcds.web_sales (#22) (read rows: 7212) │ └── Probe -│ └── Scan: default.tpcds.customer (read rows: 1000) +│ └── Scan: default.tpcds.customer (#21) (read rows: 1000) └── Probe └── UnionAll ├── Left @@ -1657,21 +1654,21 @@ HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ │ └── Scan: default.tpcds.date_dim (#8) (read rows: 73049) │ │ └── Probe - │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ │ └── Scan: default.tpcds.store_sales (#7) (read rows: 28810) │ └── Probe - │ └── Scan: default.tpcds.customer (read rows: 1000) + │ └── Scan: default.tpcds.customer (#6) (read rows: 1000) └── Right └── HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.date_dim (read rows: 0) + │ │ └── Scan: default.tpcds.date_dim (#11) (read rows: 0) │ └── Probe - │ └── Scan: default.tpcds.web_sales (read rows: 0) + │ └── Scan: default.tpcds.web_sales (#10) (read rows: 0) └── Probe - └── Scan: default.tpcds.customer (read rows: 0) + └── Scan: default.tpcds.customer (#9) (read rows: 0) # Q12 query I @@ -1708,11 +1705,11 @@ HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) │ └── Probe -│ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ └── Scan: default.tpcds.web_sales (#0) (read rows: 7212) └── Probe - └── Scan: default.tpcds.item (read rows: 180) + └── Scan: default.tpcds.item (#1) (read rows: 180) # Q13 query I @@ -1769,17 +1766,17 @@ HashJoin: INNER │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ │ │ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.customer_address (read rows: 500) +│ │ │ │ └── Scan: default.tpcds.customer_address (#4) (read rows: 500) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#5) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.store (read rows: 1) +│ │ └── Scan: default.tpcds.household_demographics (#3) (read rows: 7200) │ └── Probe -│ └── Scan: default.tpcds.customer_demographics (read rows: 19208) +│ └── Scan: default.tpcds.store (#1) (read rows: 1) └── Probe - └── Scan: default.tpcds.household_demographics (read rows: 7200) + └── Scan: default.tpcds.customer_demographics (#2) (read rows: 19208) # Q14 @@ -2059,13 +2056,13 @@ HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#3) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ │ └── Scan: default.tpcds.catalog_sales (#0) (read rows: 14313) │ └── Probe -│ └── Scan: default.tpcds.customer (read rows: 1000) +│ └── Scan: default.tpcds.customer (#1) (read rows: 1000) └── Probe - └── Scan: default.tpcds.customer_address (read rows: 500) + └── Scan: default.tpcds.customer_address (#2) (read rows: 500) # Q16 query I @@ -2105,17 +2102,17 @@ HashJoin: RIGHT ANTI │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ │ │ │ └── Scan: default.tpcds.catalog_sales (#0) (read rows: 14313) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.customer_address (read rows: 500) +│ │ │ └── Scan: default.tpcds.customer_address (#2) (read rows: 500) │ │ └── Probe -│ │ └── Scan: default.tpcds.call_center (read rows: 1) +│ │ └── Scan: default.tpcds.call_center (#3) (read rows: 1) │ └── Probe -│ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ └── Scan: default.tpcds.catalog_sales (#4) (read rows: 14313) └── Probe - └── Scan: default.tpcds.catalog_returns (read rows: 1358) + └── Scan: default.tpcds.catalog_returns (#5) (read rows: 1358) # Q17 query I @@ -2180,23 +2177,23 @@ HashJoin: INNER │ │ │ │ ├── Build │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ ├── Build -│ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#4) (read rows: 73049) │ │ │ │ │ └── Probe -│ │ │ │ │ └── Scan: default.tpcds.store_returns (read rows: 2810) +│ │ │ │ │ └── Scan: default.tpcds.store_returns (#1) (read rows: 2810) │ │ │ │ └── Probe │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ │ └── Scan: default.tpcds.date_dim (#3) (read rows: 73049) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ │ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.store (read rows: 1) +│ │ │ └── Scan: default.tpcds.store (#6) (read rows: 1) │ │ └── Probe -│ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ └── Scan: default.tpcds.item (#7) (read rows: 180) │ └── Probe -│ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ └── Scan: default.tpcds.catalog_sales (#2) (read rows: 14313) └── Probe - └── Scan: default.tpcds.date_dim (read rows: 73049) + └── Scan: default.tpcds.date_dim (#5) (read rows: 73049) # Q18 query I @@ -2263,19 +2260,19 @@ HashJoin: INNER │ │ │ │ ├── Build │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ ├── Build -│ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#5) (read rows: 73049) │ │ │ │ │ └── Probe -│ │ │ │ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ │ │ │ │ └── Scan: default.tpcds.catalog_sales (#0) (read rows: 14313) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.customer (read rows: 1000) +│ │ │ │ └── Scan: default.tpcds.customer (#3) (read rows: 1000) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.customer_demographics (read rows: 19208) +│ │ │ └── Scan: default.tpcds.customer_demographics (#1) (read rows: 19208) │ │ └── Probe -│ │ └── Scan: default.tpcds.customer_demographics (read rows: 19208) +│ │ └── Scan: default.tpcds.customer_demographics (#2) (read rows: 19208) │ └── Probe -│ └── Scan: default.tpcds.customer_address (read rows: 500) +│ └── Scan: default.tpcds.customer_address (#4) (read rows: 500) └── Probe - └── Scan: default.tpcds.item (read rows: 180) + └── Scan: default.tpcds.item (#6) (read rows: 180) # Q19 query I @@ -2313,7 +2310,7 @@ LIMIT 100 ; ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpcds.store (read rows: 1) +│ └── Scan: default.tpcds.store (#5) (read rows: 1) └── Probe └── HashJoin: INNER ├── Build @@ -2321,17 +2318,17 @@ HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpcds.item (read rows: 180) + │ │ │ └── Scan: default.tpcds.item (#2) (read rows: 180) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ │ └── Scan: default.tpcds.date_dim (#0) (read rows: 73049) │ │ └── Probe - │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ │ └── Scan: default.tpcds.store_sales (#1) (read rows: 28810) │ └── Probe - │ └── Scan: default.tpcds.customer (read rows: 1000) + │ └── Scan: default.tpcds.customer (#3) (read rows: 1000) └── Probe - └── Scan: default.tpcds.customer_address (read rows: 500) + └── Scan: default.tpcds.customer_address (#4) (read rows: 500) # Q20 query I @@ -2368,11 +2365,11 @@ HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) │ └── Probe -│ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ └── Scan: default.tpcds.catalog_sales (#0) (read rows: 14313) └── Probe - └── Scan: default.tpcds.item (read rows: 180) + └── Scan: default.tpcds.item (#1) (read rows: 180) # Q21 query I @@ -2412,15 +2409,15 @@ HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ └── Scan: default.tpcds.item (#2) (read rows: 180) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#3) (read rows: 73049) │ └── Probe -│ └── Scan: default.tpcds.inventory (read rows: 23490) +│ └── Scan: default.tpcds.inventory (#0) (read rows: 23490) └── Probe - └── Scan: default.tpcds.warehouse (read rows: 1) + └── Scan: default.tpcds.warehouse (#1) (read rows: 1) # Q22 query I @@ -2448,11 +2445,11 @@ HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) │ └── Probe -│ └── Scan: default.tpcds.inventory (read rows: 23490) +│ └── Scan: default.tpcds.inventory (#0) (read rows: 23490) └── Probe - └── Scan: default.tpcds.item (read rows: 180) + └── Scan: default.tpcds.item (#2) (read rows: 180) # Q23 query I @@ -2554,15 +2551,15 @@ UnionAll │ │ │ │ ├── Build │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ ├── Build -│ │ │ │ │ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ │ │ │ │ └── Scan: default.tpcds.item (#5) (read rows: 180) │ │ │ │ │ └── Probe │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ ├── Build -│ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#4) (read rows: 73049) │ │ │ │ │ └── Probe -│ │ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ │ │ │ └── Scan: default.tpcds.store_sales (#3) (read rows: 28810) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ │ │ │ └── Scan: default.tpcds.catalog_sales (#0) (read rows: 14313) │ │ │ └── Probe │ │ │ └── HashJoin: CROSS │ │ │ ├── Build @@ -2570,21 +2567,21 @@ UnionAll │ │ │ │ ├── Build │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ ├── Build -│ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#10) (read rows: 73049) │ │ │ │ │ └── Probe -│ │ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ │ │ │ └── Scan: default.tpcds.store_sales (#8) (read rows: 28810) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.customer (read rows: 1000) +│ │ │ │ └── Scan: default.tpcds.customer (#9) (read rows: 1000) │ │ │ └── Probe │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.customer (read rows: 1000) +│ │ │ │ └── Scan: default.tpcds.customer (#7) (read rows: 1000) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ │ └── Scan: default.tpcds.store_sales (#6) (read rows: 28810) │ │ └── Probe -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) │ └── Probe -│ └── Scan: default.tpcds.customer (read rows: 1000) +│ └── Scan: default.tpcds.customer (#1) (read rows: 1000) └── Right └── HashJoin: INNER ├── Build @@ -2596,15 +2593,15 @@ UnionAll │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build - │ │ │ │ │ └── Scan: default.tpcds.item (read rows: 180) + │ │ │ │ │ └── Scan: default.tpcds.item (#16) (read rows: 180) │ │ │ │ └── Probe │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build - │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ │ │ │ └── Scan: default.tpcds.date_dim (#15) (read rows: 73049) │ │ │ │ └── Probe - │ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ │ │ │ └── Scan: default.tpcds.store_sales (#14) (read rows: 28810) │ │ │ └── Probe - │ │ │ └── Scan: default.tpcds.web_sales (read rows: 7212) + │ │ │ └── Scan: default.tpcds.web_sales (#11) (read rows: 7212) │ │ └── Probe │ │ └── HashJoin: CROSS │ │ ├── Build @@ -2612,21 +2609,21 @@ UnionAll │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build - │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ │ │ │ └── Scan: default.tpcds.date_dim (#21) (read rows: 73049) │ │ │ │ └── Probe - │ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ │ │ │ └── Scan: default.tpcds.store_sales (#19) (read rows: 28810) │ │ │ └── Probe - │ │ │ └── Scan: default.tpcds.customer (read rows: 1000) + │ │ │ └── Scan: default.tpcds.customer (#20) (read rows: 1000) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpcds.customer (read rows: 1000) + │ │ │ └── Scan: default.tpcds.customer (#18) (read rows: 1000) │ │ └── Probe - │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ │ └── Scan: default.tpcds.store_sales (#17) (read rows: 28810) │ └── Probe - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#13) (read rows: 73049) └── Probe - └── Scan: default.tpcds.customer (read rows: 1000) + └── Scan: default.tpcds.customer (#12) (read rows: 1000) # Q24 query I @@ -2696,17 +2693,17 @@ RangeJoin: INNER │ │ │ │ ├── Build │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ ├── Build -│ │ │ │ │ │ └── Scan: default.tpcds.store (read rows: 0) +│ │ │ │ │ │ └── Scan: default.tpcds.store (#2) (read rows: 0) │ │ │ │ │ └── Probe -│ │ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ │ │ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.store_returns (read rows: 2810) +│ │ │ │ └── Scan: default.tpcds.store_returns (#1) (read rows: 2810) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.item (read rows: 0) +│ │ │ └── Scan: default.tpcds.item (#3) (read rows: 0) │ │ └── Probe -│ │ └── Scan: default.tpcds.customer (read rows: 1000) +│ │ └── Scan: default.tpcds.customer (#4) (read rows: 1000) │ └── Probe -│ └── Scan: default.tpcds.customer_address (read rows: 500) +│ └── Scan: default.tpcds.customer_address (#5) (read rows: 500) └── Right └── HashJoin: INNER ├── Build @@ -2718,17 +2715,17 @@ RangeJoin: INNER │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build - │ │ │ │ │ └── Scan: default.tpcds.store (read rows: 0) + │ │ │ │ │ └── Scan: default.tpcds.store (#8) (read rows: 0) │ │ │ │ └── Probe - │ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ │ │ │ └── Scan: default.tpcds.store_sales (#6) (read rows: 28810) │ │ │ └── Probe - │ │ │ └── Scan: default.tpcds.store_returns (read rows: 2810) + │ │ │ └── Scan: default.tpcds.store_returns (#7) (read rows: 2810) │ │ └── Probe - │ │ └── Scan: default.tpcds.item (read rows: 180) + │ │ └── Scan: default.tpcds.item (#9) (read rows: 180) │ └── Probe - │ └── Scan: default.tpcds.customer (read rows: 1000) + │ └── Scan: default.tpcds.customer (#10) (read rows: 1000) └── Probe - └── Scan: default.tpcds.customer_address (read rows: 500) + └── Scan: default.tpcds.customer_address (#11) (read rows: 500) # Q25 query I @@ -2776,33 +2773,33 @@ LIMIT 100; ---- HashJoin: INNER ├── Build -│ └── HashJoin: INNER -│ ├── Build -│ │ └── Scan: default.tpcds.store (read rows: 1) -│ └── Probe -│ └── HashJoin: INNER -│ ├── Build -│ │ └── HashJoin: INNER -│ │ ├── Build -│ │ │ └── HashJoin: INNER -│ │ │ ├── Build -│ │ │ │ └── HashJoin: INNER -│ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) -│ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.store_returns (read rows: 2810) -│ │ │ └── Probe -│ │ │ └── HashJoin: INNER -│ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) -│ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) -│ │ └── Probe -│ │ └── Scan: default.tpcds.store_sales (read rows: 28810) -│ └── Probe -│ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ └── Scan: default.tpcds.store (#6) (read rows: 1) └── Probe - └── Scan: default.tpcds.item (read rows: 180) + └── HashJoin: INNER + ├── Build + │ └── HashJoin: INNER + │ ├── Build + │ │ └── HashJoin: INNER + │ │ ├── Build + │ │ │ └── HashJoin: INNER + │ │ │ ├── Build + │ │ │ │ └── HashJoin: INNER + │ │ │ │ ├── Build + │ │ │ │ │ └── Scan: default.tpcds.date_dim (#4) (read rows: 73049) + │ │ │ │ └── Probe + │ │ │ │ └── Scan: default.tpcds.store_returns (#1) (read rows: 2810) + │ │ │ └── Probe + │ │ │ └── HashJoin: INNER + │ │ │ ├── Build + │ │ │ │ └── Scan: default.tpcds.date_dim (#5) (read rows: 73049) + │ │ │ └── Probe + │ │ │ └── Scan: default.tpcds.catalog_sales (#2) (read rows: 14313) + │ │ └── Probe + │ │ └── Scan: default.tpcds.item (#7) (read rows: 180) + │ └── Probe + │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) + └── Probe + └── Scan: default.tpcds.date_dim (#3) (read rows: 73049) # Q26 query I @@ -2833,7 +2830,7 @@ LIMIT 100; ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpcds.promotion (read rows: 3) +│ └── Scan: default.tpcds.promotion (#4) (read rows: 3) └── Probe └── HashJoin: INNER ├── Build @@ -2841,13 +2838,13 @@ HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) │ │ └── Probe - │ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) + │ │ └── Scan: default.tpcds.catalog_sales (#0) (read rows: 14313) │ └── Probe - │ └── Scan: default.tpcds.customer_demographics (read rows: 19208) + │ └── Scan: default.tpcds.customer_demographics (#1) (read rows: 19208) └── Probe - └── Scan: default.tpcds.item (read rows: 180) + └── Scan: default.tpcds.item (#3) (read rows: 180) # Q27 query I @@ -2919,57 +2916,57 @@ UnionAll │ ├── Left │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ │ └── Scan: default.tpcds.item (#4) (read rows: 180) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.store (read rows: 1) +│ │ │ └── Scan: default.tpcds.store (#3) (read rows: 1) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) │ │ └── Probe -│ │ └── Scan: default.tpcds.customer_demographics (read rows: 19208) +│ │ └── Scan: default.tpcds.customer_demographics (#1) (read rows: 19208) │ └── Right │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ └── Scan: default.tpcds.item (#9) (read rows: 180) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.store (read rows: 1) +│ │ └── Scan: default.tpcds.store (#8) (read rows: 1) │ └── Probe │ └── HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#7) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ └── Scan: default.tpcds.store_sales (#5) (read rows: 28810) │ └── Probe -│ └── Scan: default.tpcds.customer_demographics (read rows: 19208) +│ └── Scan: default.tpcds.customer_demographics (#6) (read rows: 19208) └── Right └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.item (read rows: 180) + │ └── Scan: default.tpcds.item (#14) (read rows: 180) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.store (read rows: 1) + │ └── Scan: default.tpcds.store (#13) (read rows: 1) └── Probe └── HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ └── Scan: default.tpcds.date_dim (#12) (read rows: 73049) │ └── Probe - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#10) (read rows: 28810) └── Probe - └── Scan: default.tpcds.customer_demographics (read rows: 19208) + └── Scan: default.tpcds.customer_demographics (#11) (read rows: 19208) # Q28 query I @@ -3028,25 +3025,25 @@ LIMIT 100; ---- HashJoin: CROSS ├── Build -│ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ └── Scan: default.tpcds.store_sales (#5) (read rows: 28810) └── Probe └── HashJoin: CROSS ├── Build - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#4) (read rows: 28810) └── Probe └── HashJoin: CROSS ├── Build - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#3) (read rows: 28810) └── Probe └── HashJoin: CROSS ├── Build - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#2) (read rows: 28810) └── Probe └── HashJoin: CROSS ├── Build - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#1) (read rows: 28810) └── Probe - └── Scan: default.tpcds.store_sales (read rows: 28810) + └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) # Q29 @@ -3102,27 +3099,27 @@ HashJoin: INNER │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.store (read rows: 1) +│ │ │ │ └── Scan: default.tpcds.store (#6) (read rows: 1) │ │ │ └── Probe │ │ │ └── HashJoin: INNER │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ │ └── Scan: default.tpcds.date_dim (#4) (read rows: 73049) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.store_returns (read rows: 2810) +│ │ │ │ └── Scan: default.tpcds.store_returns (#1) (read rows: 2810) │ │ │ └── Probe │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ └── Scan: default.tpcds.date_dim (#3) (read rows: 73049) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) │ │ └── Probe -│ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ └── Scan: default.tpcds.item (#7) (read rows: 180) │ └── Probe -│ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ └── Scan: default.tpcds.catalog_sales (#2) (read rows: 14313) └── Probe - └── Scan: default.tpcds.date_dim (read rows: 73049) + └── Scan: default.tpcds.date_dim (#5) (read rows: 73049) # Q30 @@ -3178,35 +3175,35 @@ ORDER BY c_customer_id NULLS FIRST, ctr_total_return NULLS FIRST LIMIT 100; ---- -HashJoin: RIGHT SINGLE +HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.customer_address (read rows: 500) +│ │ └── HashJoin: INNER +│ │ ├── Build +│ │ │ └── HashJoin: INNER +│ │ │ ├── Build +│ │ │ │ └── HashJoin: INNER +│ │ │ │ ├── Build +│ │ │ │ │ └── Scan: default.tpcds.date_dim (#6) (read rows: 73049) +│ │ │ │ └── Probe +│ │ │ │ └── Scan: default.tpcds.web_returns (#5) (read rows: 679) +│ │ │ └── Probe +│ │ │ └── Scan: default.tpcds.customer_address (#7) (read rows: 500) +│ │ └── Probe +│ │ └── HashJoin: INNER +│ │ ├── Build +│ │ │ └── HashJoin: INNER +│ │ │ ├── Build +│ │ │ │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) +│ │ │ └── Probe +│ │ │ └── Scan: default.tpcds.web_returns (#0) (read rows: 679) +│ │ └── Probe +│ │ └── Scan: default.tpcds.customer_address (#2) (read rows: 500) │ └── Probe -│ └── HashJoin: INNER -│ ├── Build -│ │ └── HashJoin: INNER -│ │ ├── Build -│ │ │ └── HashJoin: INNER -│ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) -│ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.web_returns (read rows: 679) -│ │ └── Probe -│ │ └── Scan: default.tpcds.customer_address (read rows: 500) -│ └── Probe -│ └── Scan: default.tpcds.customer (read rows: 1000) +│ └── Scan: default.tpcds.customer (#4) (read rows: 1000) └── Probe - └── HashJoin: INNER - ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) - │ └── Probe - │ └── Scan: default.tpcds.web_returns (read rows: 679) - └── Probe - └── Scan: default.tpcds.customer_address (read rows: 500) + └── Scan: default.tpcds.customer_address (#3) (read rows: 500) # Q31 query I @@ -3296,61 +3293,61 @@ HashJoin: INNER │ │ │ │ │ ├── Build │ │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ │ ├── Build -│ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#16) (read rows: 73049) │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ │ │ │ │ │ └── Scan: default.tpcds.web_sales (#15) (read rows: 7212) │ │ │ │ │ └── Probe -│ │ │ │ │ └── Scan: default.tpcds.customer_address (read rows: 500) +│ │ │ │ │ └── Scan: default.tpcds.customer_address (#17) (read rows: 500) │ │ │ │ └── Probe │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ ├── Build -│ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#13) (read rows: 73049) │ │ │ │ │ └── Probe -│ │ │ │ │ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ │ │ │ │ └── Scan: default.tpcds.web_sales (#12) (read rows: 7212) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.customer_address (read rows: 500) +│ │ │ │ └── Scan: default.tpcds.customer_address (#14) (read rows: 500) │ │ │ └── Probe │ │ │ └── HashJoin: INNER │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ │ └── Scan: default.tpcds.date_dim (#10) (read rows: 73049) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ │ │ │ └── Scan: default.tpcds.web_sales (#9) (read rows: 7212) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.customer_address (read rows: 500) +│ │ │ └── Scan: default.tpcds.customer_address (#11) (read rows: 500) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ └── Scan: default.tpcds.date_dim (#7) (read rows: 73049) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ │ └── Scan: default.tpcds.store_sales (#6) (read rows: 28810) │ │ └── Probe -│ │ └── Scan: default.tpcds.customer_address (read rows: 500) +│ │ └── Scan: default.tpcds.customer_address (#8) (read rows: 500) │ └── Probe │ └── HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#4) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ └── Scan: default.tpcds.store_sales (#3) (read rows: 28810) │ └── Probe -│ └── Scan: default.tpcds.customer_address (read rows: 500) +│ └── Scan: default.tpcds.customer_address (#5) (read rows: 500) └── Probe └── HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) │ └── Probe - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) └── Probe - └── Scan: default.tpcds.customer_address (read rows: 500) + └── Scan: default.tpcds.customer_address (#2) (read rows: 500) # Q32 query I @@ -3372,23 +3369,23 @@ WHERE i_manufact_id = 977 AND d_date_sk = cs_sold_date_sk ) LIMIT 100; ---- -HashJoin: RIGHT SINGLE +HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── HashJoin: INNER +│ │ │ ├── Build +│ │ │ │ └── Scan: default.tpcds.date_dim (#4) (read rows: 73049) +│ │ │ └── Probe +│ │ │ └── Scan: default.tpcds.catalog_sales (#3) (read rows: 14313) │ │ └── Probe -│ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ │ └── Scan: default.tpcds.item (#1) (read rows: 0) │ └── Probe -│ └── Scan: default.tpcds.item (read rows: 0) +│ └── Scan: default.tpcds.catalog_sales (#0) (read rows: 14313) └── Probe - └── HashJoin: INNER - ├── Build - │ └── Scan: default.tpcds.date_dim (read rows: 73049) - └── Probe - └── Scan: default.tpcds.catalog_sales (read rows: 14313) + └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) # Q33 query I @@ -3472,15 +3469,15 @@ UnionAll │ │ │ │ ├── Build │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ ├── Build -│ │ │ │ │ │ └── Scan: default.tpcds.customer_address (read rows: 0) +│ │ │ │ │ │ └── Scan: default.tpcds.customer_address (#2) (read rows: 0) │ │ │ │ │ └── Probe -│ │ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ │ │ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ │ └── Scan: default.tpcds.item (#3) (read rows: 180) │ │ └── Probe -│ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ └── Scan: default.tpcds.item (#4) (read rows: 180) │ └── Right │ └── HashJoin: RIGHT SEMI │ ├── Build @@ -3490,15 +3487,15 @@ UnionAll │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.customer_address (read rows: 0) +│ │ │ │ │ └── Scan: default.tpcds.customer_address (#7) (read rows: 0) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ │ │ │ └── Scan: default.tpcds.catalog_sales (#5) (read rows: 14313) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#6) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ └── Scan: default.tpcds.item (#8) (read rows: 180) │ └── Probe -│ └── Scan: default.tpcds.item (read rows: 180) +│ └── Scan: default.tpcds.item (#9) (read rows: 180) └── Right └── HashJoin: RIGHT SEMI ├── Build @@ -3508,15 +3505,15 @@ UnionAll │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build - │ │ │ │ └── Scan: default.tpcds.customer_address (read rows: 0) + │ │ │ │ └── Scan: default.tpcds.customer_address (#12) (read rows: 0) │ │ │ └── Probe - │ │ │ └── Scan: default.tpcds.web_sales (read rows: 7212) + │ │ │ └── Scan: default.tpcds.web_sales (#10) (read rows: 7212) │ │ └── Probe - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ └── Scan: default.tpcds.date_dim (#11) (read rows: 73049) │ └── Probe - │ └── Scan: default.tpcds.item (read rows: 180) + │ └── Scan: default.tpcds.item (#13) (read rows: 180) └── Probe - └── Scan: default.tpcds.item (read rows: 180) + └── Scan: default.tpcds.item (#14) (read rows: 180) # Q34 query I @@ -3566,19 +3563,19 @@ HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.store (read rows: 1) +│ │ └── Scan: default.tpcds.store (#2) (read rows: 1) │ └── Probe │ └── HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) │ └── Probe -│ └── Scan: default.tpcds.household_demographics (read rows: 7200) +│ └── Scan: default.tpcds.household_demographics (#3) (read rows: 7200) └── Probe - └── Scan: default.tpcds.customer (read rows: 1000) + └── Scan: default.tpcds.customer (#4) (read rows: 1000) # Q35 onlyif todo @@ -3726,45 +3723,45 @@ UnionAll │ ├── Left │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.store (read rows: 1) +│ │ │ └── Scan: default.tpcds.store (#3) (read rows: 1) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ │ └── Scan: default.tpcds.item (#2) (read rows: 180) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) │ └── Right │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.store (read rows: 1) +│ │ └── Scan: default.tpcds.store (#7) (read rows: 1) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ └── Scan: default.tpcds.item (#6) (read rows: 180) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#5) (read rows: 73049) │ └── Probe -│ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ └── Scan: default.tpcds.store_sales (#4) (read rows: 28810) └── Right └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.store (read rows: 1) + │ └── Scan: default.tpcds.store (#11) (read rows: 1) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.item (read rows: 180) + │ └── Scan: default.tpcds.item (#10) (read rows: 180) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#9) (read rows: 73049) └── Probe - └── Scan: default.tpcds.store_sales (read rows: 28810) + └── Scan: default.tpcds.store_sales (#8) (read rows: 28810) # Q37 query I @@ -3796,15 +3793,15 @@ HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ └── Scan: default.tpcds.item (#0) (read rows: 180) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) │ └── Probe -│ └── Scan: default.tpcds.inventory (read rows: 23490) +│ └── Scan: default.tpcds.inventory (#1) (read rows: 23490) └── Probe - └── Scan: default.tpcds.catalog_sales (read rows: 14313) + └── Scan: default.tpcds.catalog_sales (#3) (read rows: 14313) # Q38 query I @@ -3846,31 +3843,31 @@ HashJoin: INNER │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ └── Scan: default.tpcds.date_dim (#4) (read rows: 73049) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ │ │ └── Scan: default.tpcds.catalog_sales (#3) (read rows: 14313) │ │ └── Probe -│ │ └── Scan: default.tpcds.customer (read rows: 1000) +│ │ └── Scan: default.tpcds.customer (#5) (read rows: 1000) │ └── Probe │ └── HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) │ └── Probe -│ └── Scan: default.tpcds.customer (read rows: 1000) +│ └── Scan: default.tpcds.customer (#2) (read rows: 1000) └── Probe └── HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ └── Scan: default.tpcds.date_dim (#7) (read rows: 73049) │ └── Probe - │ └── Scan: default.tpcds.web_sales (read rows: 7212) + │ └── Scan: default.tpcds.web_sales (#6) (read rows: 7212) └── Probe - └── Scan: default.tpcds.customer (read rows: 1000) + └── Scan: default.tpcds.customer (#8) (read rows: 1000) # Q39 query I @@ -3938,31 +3935,31 @@ HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.warehouse (read rows: 1) +│ │ └── Scan: default.tpcds.warehouse (#6) (read rows: 1) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ └── Scan: default.tpcds.item (#5) (read rows: 180) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#7) (read rows: 73049) │ └── Probe -│ └── Scan: default.tpcds.inventory (read rows: 23490) +│ └── Scan: default.tpcds.inventory (#4) (read rows: 23490) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.warehouse (read rows: 1) + │ └── Scan: default.tpcds.warehouse (#2) (read rows: 1) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.item (read rows: 180) + │ └── Scan: default.tpcds.item (#1) (read rows: 180) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#3) (read rows: 73049) └── Probe - └── Scan: default.tpcds.inventory (read rows: 23490) + └── Scan: default.tpcds.inventory (#0) (read rows: 23490) # Q40 query I @@ -3997,19 +3994,19 @@ HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#4) (read rows: 73049) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ └── Scan: default.tpcds.item (#3) (read rows: 180) │ └── Probe │ └── HashJoin: LEFT OUTER │ ├── Build -│ │ └── Scan: default.tpcds.catalog_returns (read rows: 1358) +│ │ └── Scan: default.tpcds.catalog_returns (#1) (read rows: 1358) │ └── Probe -│ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ └── Scan: default.tpcds.catalog_sales (#0) (read rows: 14313) └── Probe - └── Scan: default.tpcds.warehouse (read rows: 1) + └── Scan: default.tpcds.warehouse (#2) (read rows: 1) # Q41 query I @@ -4081,15 +4078,15 @@ WHERE i_manufact_id BETWEEN 738 AND 738+40 ORDER BY i_product_name LIMIT 100; ---- -HashJoin: LEFT SINGLE +HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ └── Scan: default.tpcds.item (#0) (read rows: 180) │ └── Probe -│ └── Scan: default.tpcds.item (read rows: 180) +│ └── Scan: default.tpcds.item (#1) (read rows: 180) └── Probe - └── Scan: default.tpcds.item (read rows: 180) + └── Scan: default.tpcds.item (#0) (read rows: 180) # Q42 query I @@ -4116,13 +4113,13 @@ LIMIT 100 ; ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpcds.item (read rows: 180) +│ └── Scan: default.tpcds.item (#2) (read rows: 180) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#0) (read rows: 73049) └── Probe - └── Scan: default.tpcds.store_sales (read rows: 28810) + └── Scan: default.tpcds.store_sales (#1) (read rows: 28810) # Q43 query I @@ -4179,13 +4176,13 @@ LIMIT 100; ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpcds.store (read rows: 0) +│ └── Scan: default.tpcds.store (#2) (read rows: 0) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#0) (read rows: 73049) └── Probe - └── Scan: default.tpcds.store_sales (read rows: 28810) + └── Scan: default.tpcds.store_sales (#1) (read rows: 28810) # Q44 query I @@ -4246,19 +4243,19 @@ HashJoin: INNER │ │ ├── Build │ │ │ └── RangeJoin: INNER │ │ │ ├── Left -│ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 0) +│ │ │ │ └── Scan: default.tpcds.store_sales (#2) (read rows: 0) │ │ │ └── Right -│ │ │ └── Scan: default.tpcds.store_sales (read rows: 0) +│ │ │ └── Scan: default.tpcds.store_sales (#3) (read rows: 0) │ │ └── Probe │ │ └── RangeJoin: INNER │ │ ├── Left -│ │ │ └── Scan: default.tpcds.store_sales (read rows: 0) +│ │ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 0) │ │ └── Right -│ │ └── Scan: default.tpcds.store_sales (read rows: 0) +│ │ └── Scan: default.tpcds.store_sales (#1) (read rows: 0) │ └── Probe -│ └── Scan: default.tpcds.item (read rows: 180) +│ └── Scan: default.tpcds.item (#4) (read rows: 180) └── Probe - └── Scan: default.tpcds.item (read rows: 180) + └── Scan: default.tpcds.item (#5) (read rows: 180) # Q45 query I @@ -4307,7 +4304,7 @@ LIMIT 100; ---- HashJoin: RIGHT MARK ├── Build -│ └── Scan: default.tpcds.item (read rows: 180) +│ └── Scan: default.tpcds.item (#5) (read rows: 180) └── Probe └── HashJoin: INNER ├── Build @@ -4317,15 +4314,15 @@ HashJoin: RIGHT MARK │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build - │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ │ │ └── Scan: default.tpcds.date_dim (#3) (read rows: 73049) │ │ │ └── Probe - │ │ │ └── Scan: default.tpcds.web_sales (read rows: 7212) + │ │ │ └── Scan: default.tpcds.web_sales (#0) (read rows: 7212) │ │ └── Probe - │ │ └── Scan: default.tpcds.customer (read rows: 1000) + │ │ └── Scan: default.tpcds.customer (#1) (read rows: 1000) │ └── Probe - │ └── Scan: default.tpcds.customer_address (read rows: 500) + │ └── Scan: default.tpcds.customer_address (#2) (read rows: 500) └── Probe - └── Scan: default.tpcds.item (read rows: 180) + └── Scan: default.tpcds.item (#4) (read rows: 180) # Q46 query I @@ -4379,29 +4376,29 @@ LIMIT 100; ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpcds.customer_address (read rows: 500) +│ └── Scan: default.tpcds.customer_address (#6) (read rows: 500) └── Probe └── HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.customer_address (read rows: 500) + │ │ └── Scan: default.tpcds.customer_address (#4) (read rows: 500) │ └── Probe │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.store (read rows: 1) + │ │ └── Scan: default.tpcds.store (#2) (read rows: 1) │ └── Probe │ └── HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) │ │ └── Probe - │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) │ └── Probe - │ └── Scan: default.tpcds.household_demographics (read rows: 7200) + │ └── Scan: default.tpcds.household_demographics (#3) (read rows: 7200) └── Probe - └── Scan: default.tpcds.customer (read rows: 1000) + └── Scan: default.tpcds.customer (#5) (read rows: 1000) # Q47 query I @@ -4484,45 +4481,45 @@ HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ │ └── Scan: default.tpcds.item (#0) (read rows: 180) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.store (read rows: 1) +│ │ │ └── Scan: default.tpcds.store (#3) (read rows: 1) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ └── Scan: default.tpcds.store_sales (#1) (read rows: 28810) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ └── Scan: default.tpcds.item (#4) (read rows: 180) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.store (read rows: 1) +│ │ └── Scan: default.tpcds.store (#7) (read rows: 1) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#6) (read rows: 73049) │ └── Probe -│ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ └── Scan: default.tpcds.store_sales (#5) (read rows: 28810) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.item (read rows: 180) + │ └── Scan: default.tpcds.item (#8) (read rows: 180) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.store (read rows: 1) + │ └── Scan: default.tpcds.store (#11) (read rows: 1) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#10) (read rows: 73049) └── Probe - └── Scan: default.tpcds.store_sales (read rows: 28810) + └── Scan: default.tpcds.store_sales (#9) (read rows: 28810) # Q48 query I @@ -4575,15 +4572,15 @@ HashJoin: INNER │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ │ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.customer_address (read rows: 500) +│ │ │ └── Scan: default.tpcds.customer_address (#3) (read rows: 500) │ │ └── Probe -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#4) (read rows: 73049) │ └── Probe -│ └── Scan: default.tpcds.customer_demographics (read rows: 19208) +│ └── Scan: default.tpcds.customer_demographics (#2) (read rows: 19208) └── Probe - └── Scan: default.tpcds.store (read rows: 1) + └── Scan: default.tpcds.store (#1) (read rows: 1) # Q49 query I @@ -4696,33 +4693,33 @@ UnionAll │ ├── Left │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── HashJoin: RIGHT OUTER +│ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ │ │ │ └── Scan: default.tpcds.web_returns (#1) (read rows: 679) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.web_returns (read rows: 679) +│ │ │ └── Scan: default.tpcds.web_sales (#0) (read rows: 7212) │ │ └── Probe -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) │ └── Right │ └── HashJoin: INNER │ ├── Build -│ │ └── HashJoin: RIGHT OUTER +│ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ │ │ └── Scan: default.tpcds.catalog_returns (#4) (read rows: 1358) │ │ └── Probe -│ │ └── Scan: default.tpcds.catalog_returns (read rows: 1358) +│ │ └── Scan: default.tpcds.catalog_sales (#3) (read rows: 14313) │ └── Probe -│ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ └── Scan: default.tpcds.date_dim (#5) (read rows: 73049) └── Right └── HashJoin: INNER ├── Build - │ └── HashJoin: RIGHT OUTER - │ ├── Build - │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) - │ └── Probe - │ └── Scan: default.tpcds.store_returns (read rows: 2810) + │ └── Scan: default.tpcds.store_returns (#7) (read rows: 2810) └── Probe - └── Scan: default.tpcds.date_dim (read rows: 73049) + └── HashJoin: INNER + ├── Build + │ └── Scan: default.tpcds.store_sales (#6) (read rows: 28810) + └── Probe + └── Scan: default.tpcds.date_dim (#8) (read rows: 73049) # Q50 query I @@ -4799,19 +4796,19 @@ HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.store (read rows: 1) +│ │ └── Scan: default.tpcds.store (#2) (read rows: 1) │ └── Probe │ └── HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#4) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.store_returns (read rows: 2810) +│ │ └── Scan: default.tpcds.store_returns (#1) (read rows: 2810) │ └── Probe -│ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) └── Probe - └── Scan: default.tpcds.date_dim (read rows: 73049) + └── Scan: default.tpcds.date_dim (#3) (read rows: 73049) # Q51 query I @@ -4873,15 +4870,15 @@ HashJoin: FULL OUTER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#3) (read rows: 73049) │ └── Probe -│ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ └── Scan: default.tpcds.store_sales (#2) (read rows: 28810) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) └── Probe - └── Scan: default.tpcds.web_sales (read rows: 7212) + └── Scan: default.tpcds.web_sales (#0) (read rows: 7212) # Q52 query I @@ -4908,13 +4905,13 @@ LIMIT 100 ; ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpcds.item (read rows: 180) +│ └── Scan: default.tpcds.item (#2) (read rows: 180) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#0) (read rows: 73049) └── Probe - └── Scan: default.tpcds.store_sales (read rows: 28810) + └── Scan: default.tpcds.store_sales (#1) (read rows: 28810) # Q53 query I @@ -4969,17 +4966,17 @@ LIMIT 100; ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpcds.store (read rows: 1) +│ └── Scan: default.tpcds.store (#3) (read rows: 1) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.item (read rows: 180) + │ └── Scan: default.tpcds.item (#0) (read rows: 180) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) └── Probe - └── Scan: default.tpcds.store_sales (read rows: 28810) + └── Scan: default.tpcds.store_sales (#1) (read rows: 28810) # Q54 query I @@ -5044,11 +5041,11 @@ LIMIT 100; ---- RangeJoin: INNER ├── Left -│ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ └── Scan: default.tpcds.date_dim (#10) (read rows: 73049) └── Right └── RangeJoin: INNER ├── Left - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#9) (read rows: 73049) └── Right └── HashJoin: INNER ├── Build @@ -5058,31 +5055,31 @@ RangeJoin: INNER │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build - │ │ │ │ └── Scan: default.tpcds.store (read rows: 1) + │ │ │ │ └── Scan: default.tpcds.store (#7) (read rows: 1) │ │ │ └── Probe - │ │ │ └── Scan: default.tpcds.customer_address (read rows: 500) + │ │ │ └── Scan: default.tpcds.customer_address (#6) (read rows: 500) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpcds.customer (read rows: 1000) + │ │ │ └── Scan: default.tpcds.customer (#4) (read rows: 1000) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ │ └── Scan: default.tpcds.date_dim (#3) (read rows: 73049) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpcds.item (read rows: 180) + │ │ │ └── Scan: default.tpcds.item (#2) (read rows: 180) │ │ └── Probe │ │ └── UnionAll │ │ ├── Left - │ │ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) + │ │ │ └── Scan: default.tpcds.catalog_sales (#0) (read rows: 14313) │ │ └── Right - │ │ └── Scan: default.tpcds.web_sales (read rows: 7212) + │ │ └── Scan: default.tpcds.web_sales (#1) (read rows: 7212) │ └── Probe - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#5) (read rows: 28810) └── Probe - └── Scan: default.tpcds.date_dim (read rows: 73049) + └── Scan: default.tpcds.date_dim (#8) (read rows: 73049) # Q55 query I @@ -5106,13 +5103,13 @@ LIMIT 100 ; ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpcds.item (read rows: 180) +│ └── Scan: default.tpcds.item (#2) (read rows: 180) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#0) (read rows: 73049) └── Probe - └── Scan: default.tpcds.store_sales (read rows: 28810) + └── Scan: default.tpcds.store_sales (#1) (read rows: 28810) # Q56 query I @@ -5203,15 +5200,15 @@ UnionAll │ │ │ │ ├── Build │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ ├── Build -│ │ │ │ │ │ └── Scan: default.tpcds.customer_address (read rows: 0) +│ │ │ │ │ │ └── Scan: default.tpcds.customer_address (#2) (read rows: 0) │ │ │ │ │ └── Probe -│ │ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ │ │ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ │ └── Scan: default.tpcds.item (#3) (read rows: 180) │ │ └── Probe -│ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ └── Scan: default.tpcds.item (#4) (read rows: 180) │ └── Right │ └── HashJoin: RIGHT SEMI │ ├── Build @@ -5221,15 +5218,15 @@ UnionAll │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.customer_address (read rows: 0) +│ │ │ │ │ └── Scan: default.tpcds.customer_address (#7) (read rows: 0) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ │ │ │ └── Scan: default.tpcds.catalog_sales (#5) (read rows: 14313) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#6) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ └── Scan: default.tpcds.item (#8) (read rows: 180) │ └── Probe -│ └── Scan: default.tpcds.item (read rows: 180) +│ └── Scan: default.tpcds.item (#9) (read rows: 180) └── Right └── HashJoin: RIGHT SEMI ├── Build @@ -5239,15 +5236,15 @@ UnionAll │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build - │ │ │ │ └── Scan: default.tpcds.customer_address (read rows: 0) + │ │ │ │ └── Scan: default.tpcds.customer_address (#12) (read rows: 0) │ │ │ └── Probe - │ │ │ └── Scan: default.tpcds.web_sales (read rows: 7212) + │ │ │ └── Scan: default.tpcds.web_sales (#10) (read rows: 7212) │ │ └── Probe - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ └── Scan: default.tpcds.date_dim (#11) (read rows: 73049) │ └── Probe - │ └── Scan: default.tpcds.item (read rows: 180) + │ └── Scan: default.tpcds.item (#13) (read rows: 180) └── Probe - └── Scan: default.tpcds.item (read rows: 180) + └── Scan: default.tpcds.item (#14) (read rows: 180) # Q57 query I @@ -5323,45 +5320,45 @@ HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ │ └── Scan: default.tpcds.item (#0) (read rows: 180) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.call_center (read rows: 1) +│ │ │ └── Scan: default.tpcds.call_center (#3) (read rows: 1) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ │ └── Scan: default.tpcds.catalog_sales (#1) (read rows: 14313) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ └── Scan: default.tpcds.item (#4) (read rows: 180) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.call_center (read rows: 1) +│ │ └── Scan: default.tpcds.call_center (#7) (read rows: 1) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#6) (read rows: 73049) │ └── Probe -│ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ └── Scan: default.tpcds.catalog_sales (#5) (read rows: 14313) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.item (read rows: 180) + │ └── Scan: default.tpcds.item (#8) (read rows: 180) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.call_center (read rows: 1) + │ └── Scan: default.tpcds.call_center (#11) (read rows: 1) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#10) (read rows: 73049) └── Probe - └── Scan: default.tpcds.catalog_sales (read rows: 14313) + └── Scan: default.tpcds.catalog_sales (#9) (read rows: 14313) # Q58 query I @@ -5439,61 +5436,61 @@ LIMIT 100; ---- HashJoin: INNER ├── Build -│ └── HashJoin: RIGHT SEMI +│ └── HashJoin: INNER │ ├── Build -│ │ └── HashJoin: INNER +│ │ └── HashJoin: RIGHT SEMI │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ │ │ └── HashJoin: INNER +│ │ │ │ ├── Build +│ │ │ │ │ └── Scan: default.tpcds.item (#11) (read rows: 180) +│ │ │ │ └── Probe +│ │ │ │ └── Scan: default.tpcds.web_sales (#10) (read rows: 7212) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ │ │ └── Scan: default.tpcds.date_dim (#12) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── HashJoin: INNER +│ │ ├── Build +│ │ │ └── Scan: default.tpcds.date_dim (#14) (read rows: 73049) +│ │ └── Probe +│ │ └── Scan: default.tpcds.date_dim (#13) (read rows: 73049) │ └── Probe -│ └── HashJoin: INNER +│ └── HashJoin: RIGHT SEMI │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── HashJoin: INNER +│ │ ├── Build +│ │ │ └── HashJoin: INNER +│ │ │ ├── Build +│ │ │ │ └── Scan: default.tpcds.item (#6) (read rows: 180) +│ │ │ └── Probe +│ │ │ └── Scan: default.tpcds.catalog_sales (#5) (read rows: 14313) +│ │ └── Probe +│ │ └── Scan: default.tpcds.date_dim (#7) (read rows: 73049) │ └── Probe -│ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ └── HashJoin: INNER +│ ├── Build +│ │ └── Scan: default.tpcds.date_dim (#9) (read rows: 73049) +│ └── Probe +│ └── Scan: default.tpcds.date_dim (#8) (read rows: 73049) └── Probe - └── HashJoin: INNER + └── HashJoin: RIGHT SEMI ├── Build - │ └── HashJoin: RIGHT SEMI + │ └── HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── HashJoin: INNER - │ │ │ ├── Build - │ │ │ │ └── Scan: default.tpcds.item (read rows: 180) - │ │ │ └── Probe - │ │ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) + │ │ │ └── Scan: default.tpcds.item (#1) (read rows: 180) │ │ └── Probe - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) │ └── Probe - │ └── HashJoin: INNER - │ ├── Build - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) - │ └── Probe - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) └── Probe - └── HashJoin: RIGHT SEMI + └── HashJoin: INNER ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── HashJoin: INNER - │ │ ├── Build - │ │ │ └── Scan: default.tpcds.item (read rows: 180) - │ │ └── Probe - │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) - │ └── Probe - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#4) (read rows: 73049) └── Probe - └── HashJoin: INNER - ├── Build - │ └── Scan: default.tpcds.date_dim (read rows: 73049) - └── Probe - └── Scan: default.tpcds.date_dim (read rows: 73049) + └── Scan: default.tpcds.date_dim (#3) (read rows: 73049) # Q59 query I @@ -5588,31 +5585,31 @@ HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.store (read rows: 1) +│ │ └── Scan: default.tpcds.store (#6) (read rows: 1) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#7) (read rows: 73049) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ └── Scan: default.tpcds.store_sales (#4) (read rows: 28810) │ └── Probe -│ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ └── Scan: default.tpcds.date_dim (#5) (read rows: 73049) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.store (read rows: 1) + │ └── Scan: default.tpcds.store (#2) (read rows: 1) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#3) (read rows: 73049) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) └── Probe - └── Scan: default.tpcds.date_dim (read rows: 73049) + └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) # Q60 query I @@ -5697,15 +5694,15 @@ UnionAll │ │ │ │ ├── Build │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ ├── Build -│ │ │ │ │ │ └── Scan: default.tpcds.customer_address (read rows: 0) +│ │ │ │ │ │ └── Scan: default.tpcds.customer_address (#2) (read rows: 0) │ │ │ │ │ └── Probe -│ │ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ │ │ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ │ └── Scan: default.tpcds.item (#3) (read rows: 180) │ │ └── Probe -│ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ └── Scan: default.tpcds.item (#4) (read rows: 180) │ └── Right │ └── HashJoin: RIGHT SEMI │ ├── Build @@ -5715,15 +5712,15 @@ UnionAll │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.customer_address (read rows: 0) +│ │ │ │ │ └── Scan: default.tpcds.customer_address (#7) (read rows: 0) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ │ │ │ └── Scan: default.tpcds.catalog_sales (#5) (read rows: 14313) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#6) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ └── Scan: default.tpcds.item (#8) (read rows: 180) │ └── Probe -│ └── Scan: default.tpcds.item (read rows: 180) +│ └── Scan: default.tpcds.item (#9) (read rows: 180) └── Right └── HashJoin: RIGHT SEMI ├── Build @@ -5733,15 +5730,15 @@ UnionAll │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build - │ │ │ │ └── Scan: default.tpcds.customer_address (read rows: 0) + │ │ │ │ └── Scan: default.tpcds.customer_address (#12) (read rows: 0) │ │ │ └── Probe - │ │ │ └── Scan: default.tpcds.web_sales (read rows: 7212) + │ │ │ └── Scan: default.tpcds.web_sales (#10) (read rows: 7212) │ │ └── Probe - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ └── Scan: default.tpcds.date_dim (#11) (read rows: 73049) │ └── Probe - │ └── Scan: default.tpcds.item (read rows: 180) + │ └── Scan: default.tpcds.item (#13) (read rows: 180) └── Probe - └── Scan: default.tpcds.item (read rows: 180) + └── Scan: default.tpcds.item (#14) (read rows: 180) # Q61 query I @@ -5803,19 +5800,19 @@ HashJoin: CROSS │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.customer_address (read rows: 0) +│ │ │ │ │ └── Scan: default.tpcds.customer_address (#11) (read rows: 0) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.customer (read rows: 1000) +│ │ │ │ └── Scan: default.tpcds.customer (#10) (read rows: 1000) │ │ │ └── Probe │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.store (read rows: 0) +│ │ │ │ └── Scan: default.tpcds.store (#8) (read rows: 0) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ │ └── Scan: default.tpcds.store_sales (#7) (read rows: 28810) │ │ └── Probe -│ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ └── Scan: default.tpcds.item (#12) (read rows: 180) │ └── Probe -│ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ └── Scan: default.tpcds.date_dim (#9) (read rows: 73049) └── Probe └── HashJoin: INNER ├── Build @@ -5827,21 +5824,21 @@ HashJoin: CROSS │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build - │ │ │ │ │ └── Scan: default.tpcds.customer_address (read rows: 0) + │ │ │ │ │ └── Scan: default.tpcds.customer_address (#5) (read rows: 0) │ │ │ │ └── Probe - │ │ │ │ └── Scan: default.tpcds.customer (read rows: 1000) + │ │ │ │ └── Scan: default.tpcds.customer (#4) (read rows: 1000) │ │ │ └── Probe │ │ │ └── HashJoin: INNER │ │ │ ├── Build - │ │ │ │ └── Scan: default.tpcds.store (read rows: 0) + │ │ │ │ └── Scan: default.tpcds.store (#1) (read rows: 0) │ │ │ └── Probe - │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ │ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) │ │ └── Probe - │ │ └── Scan: default.tpcds.item (read rows: 180) + │ │ └── Scan: default.tpcds.item (#6) (read rows: 180) │ └── Probe - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#3) (read rows: 73049) └── Probe - └── Scan: default.tpcds.promotion (read rows: 3) + └── Scan: default.tpcds.promotion (#2) (read rows: 3) # Q62 query I @@ -5894,21 +5891,21 @@ LIMIT 100; ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpcds.web_site (read rows: 1) +│ └── Scan: default.tpcds.web_site (#3) (read rows: 1) └── Probe └── HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.warehouse (read rows: 1) + │ │ └── Scan: default.tpcds.warehouse (#1) (read rows: 1) │ └── Probe │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ └── Scan: default.tpcds.date_dim (#4) (read rows: 73049) │ └── Probe - │ └── Scan: default.tpcds.web_sales (read rows: 7212) + │ └── Scan: default.tpcds.web_sales (#0) (read rows: 7212) └── Probe - └── Scan: default.tpcds.ship_mode (read rows: 20) + └── Scan: default.tpcds.ship_mode (#2) (read rows: 20) # Q63 query I @@ -5963,17 +5960,17 @@ LIMIT 100; ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpcds.store (read rows: 1) +│ └── Scan: default.tpcds.store (#3) (read rows: 1) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.item (read rows: 180) + │ └── Scan: default.tpcds.item (#0) (read rows: 180) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) └── Probe - └── Scan: default.tpcds.store_sales (read rows: 28810) + └── Scan: default.tpcds.store_sales (#1) (read rows: 28810) # Q64 query I @@ -6119,65 +6116,65 @@ HashJoin: INNER │ │ │ │ │ │ ├── Build │ │ │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ │ │ ├── Build -│ │ │ │ │ │ │ │ └── HashJoin: INNER -│ │ │ │ │ │ │ │ ├── Build -│ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.promotion (read rows: 3) -│ │ │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ │ │ └── HashJoin: INNER -│ │ │ │ │ │ │ │ ├── Build -│ │ │ │ │ │ │ │ │ └── HashJoin: INNER -│ │ │ │ │ │ │ │ │ ├── Build -│ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER -│ │ │ │ │ │ │ │ │ │ ├── Build -│ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER -│ │ │ │ │ │ │ │ │ │ │ ├── Build -│ │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER -│ │ │ │ │ │ │ │ │ │ │ │ ├── Build -│ │ │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER -│ │ │ │ │ │ │ │ │ │ │ │ │ ├── Build -│ │ │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.store (read rows: 1) -│ │ │ │ │ │ │ │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER -│ │ │ │ │ │ │ │ │ │ │ │ │ ├── Build -│ │ │ │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER -│ │ │ │ │ │ │ │ │ │ │ │ │ │ ├── Build -│ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER -│ │ │ │ │ │ │ │ │ │ │ │ │ │ │ ├── Build -│ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.item (read rows: 180) -│ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) -│ │ │ │ │ │ │ │ │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) -│ │ │ │ │ │ │ │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER -│ │ │ │ │ │ │ │ │ │ │ │ │ ├── Build -│ │ │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.catalog_returns (read rows: 1358) -│ │ │ │ │ │ │ │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) -│ │ │ │ │ │ │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.customer (read rows: 1000) -│ │ │ │ │ │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) -│ │ │ │ │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) -│ │ │ │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.customer_demographics (read rows: 19208) -│ │ │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ │ │ └── Scan: default.tpcds.customer_demographics (read rows: 19208) +│ │ │ │ │ │ │ │ └── Scan: default.tpcds.promotion (#30) (read rows: 3) │ │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ │ └── Scan: default.tpcds.household_demographics (read rows: 7200) +│ │ │ │ │ │ │ └── HashJoin: INNER +│ │ │ │ │ │ │ ├── Build +│ │ │ │ │ │ │ │ └── HashJoin: INNER +│ │ │ │ │ │ │ │ ├── Build +│ │ │ │ │ │ │ │ │ └── HashJoin: INNER +│ │ │ │ │ │ │ │ │ ├── Build +│ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER +│ │ │ │ │ │ │ │ │ │ ├── Build +│ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER +│ │ │ │ │ │ │ │ │ │ │ ├── Build +│ │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER +│ │ │ │ │ │ │ │ │ │ │ │ ├── Build +│ │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.store (#26) (read rows: 1) +│ │ │ │ │ │ │ │ │ │ │ │ └── Probe +│ │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER +│ │ │ │ │ │ │ │ │ │ │ │ ├── Build +│ │ │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER +│ │ │ │ │ │ │ │ │ │ │ │ │ ├── Build +│ │ │ │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER +│ │ │ │ │ │ │ │ │ │ │ │ │ │ ├── Build +│ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER +│ │ │ │ │ │ │ │ │ │ │ │ │ │ │ ├── Build +│ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.item (#37) (read rows: 180) +│ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └── Probe +│ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER +│ │ │ │ │ │ │ │ │ │ │ │ │ │ │ ├── Build +│ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.catalog_returns (#22) (read rows: 1358) +│ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └── Probe +│ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.catalog_sales (#21) (read rows: 14313) +│ │ │ │ │ │ │ │ │ │ │ │ │ │ └── Probe +│ │ │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.store_returns (#20) (read rows: 2810) +│ │ │ │ │ │ │ │ │ │ │ │ │ └── Probe +│ │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.store_sales (#19) (read rows: 28810) +│ │ │ │ │ │ │ │ │ │ │ │ └── Probe +│ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#23) (read rows: 73049) +│ │ │ │ │ │ │ │ │ │ │ └── Probe +│ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.customer (#27) (read rows: 1000) +│ │ │ │ │ │ │ │ │ │ └── Probe +│ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#24) (read rows: 73049) +│ │ │ │ │ │ │ │ │ └── Probe +│ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#25) (read rows: 73049) +│ │ │ │ │ │ │ │ └── Probe +│ │ │ │ │ │ │ │ └── Scan: default.tpcds.customer_demographics (#28) (read rows: 19208) +│ │ │ │ │ │ │ └── Probe +│ │ │ │ │ │ │ └── Scan: default.tpcds.customer_demographics (#29) (read rows: 19208) │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ └── Scan: default.tpcds.household_demographics (read rows: 7200) +│ │ │ │ │ │ └── Scan: default.tpcds.household_demographics (#31) (read rows: 7200) │ │ │ │ │ └── Probe -│ │ │ │ │ └── Scan: default.tpcds.customer_address (read rows: 500) +│ │ │ │ │ └── Scan: default.tpcds.household_demographics (#32) (read rows: 7200) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.customer_address (read rows: 500) +│ │ │ │ └── Scan: default.tpcds.customer_address (#33) (read rows: 500) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.income_band (read rows: 20) +│ │ │ └── Scan: default.tpcds.customer_address (#34) (read rows: 500) │ │ └── Probe -│ │ └── Scan: default.tpcds.income_band (read rows: 20) +│ │ └── Scan: default.tpcds.income_band (#35) (read rows: 20) │ └── Probe -│ └── Scan: default.tpcds.store_returns (read rows: 2810) +│ └── Scan: default.tpcds.income_band (#36) (read rows: 20) └── Probe └── HashJoin: INNER ├── Build @@ -6193,65 +6190,65 @@ HashJoin: INNER │ │ │ │ │ ├── Build │ │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ │ ├── Build - │ │ │ │ │ │ │ └── HashJoin: INNER - │ │ │ │ │ │ │ ├── Build - │ │ │ │ │ │ │ │ └── Scan: default.tpcds.promotion (read rows: 3) - │ │ │ │ │ │ │ └── Probe - │ │ │ │ │ │ │ └── HashJoin: INNER - │ │ │ │ │ │ │ ├── Build - │ │ │ │ │ │ │ │ └── HashJoin: INNER - │ │ │ │ │ │ │ │ ├── Build - │ │ │ │ │ │ │ │ │ └── HashJoin: INNER - │ │ │ │ │ │ │ │ │ ├── Build - │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER - │ │ │ │ │ │ │ │ │ │ ├── Build - │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER - │ │ │ │ │ │ │ │ │ │ │ ├── Build - │ │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER - │ │ │ │ │ │ │ │ │ │ │ │ ├── Build - │ │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.store (read rows: 1) - │ │ │ │ │ │ │ │ │ │ │ │ └── Probe - │ │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER - │ │ │ │ │ │ │ │ │ │ │ │ ├── Build - │ │ │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER - │ │ │ │ │ │ │ │ │ │ │ │ │ ├── Build - │ │ │ │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER - │ │ │ │ │ │ │ │ │ │ │ │ │ │ ├── Build - │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.item (read rows: 180) - │ │ │ │ │ │ │ │ │ │ │ │ │ │ └── Probe - │ │ │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) - │ │ │ │ │ │ │ │ │ │ │ │ │ └── Probe - │ │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) - │ │ │ │ │ │ │ │ │ │ │ │ └── Probe - │ │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER - │ │ │ │ │ │ │ │ │ │ │ │ ├── Build - │ │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.catalog_returns (read rows: 1358) - │ │ │ │ │ │ │ │ │ │ │ │ └── Probe - │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) - │ │ │ │ │ │ │ │ │ │ │ └── Probe - │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.customer (read rows: 1000) - │ │ │ │ │ │ │ │ │ │ └── Probe - │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) - │ │ │ │ │ │ │ │ │ └── Probe - │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) - │ │ │ │ │ │ │ │ └── Probe - │ │ │ │ │ │ │ │ └── Scan: default.tpcds.customer_demographics (read rows: 19208) - │ │ │ │ │ │ │ └── Probe - │ │ │ │ │ │ │ └── Scan: default.tpcds.customer_demographics (read rows: 19208) + │ │ │ │ │ │ │ └── Scan: default.tpcds.promotion (#11) (read rows: 3) │ │ │ │ │ │ └── Probe - │ │ │ │ │ │ └── Scan: default.tpcds.household_demographics (read rows: 7200) + │ │ │ │ │ │ └── HashJoin: INNER + │ │ │ │ │ │ ├── Build + │ │ │ │ │ │ │ └── HashJoin: INNER + │ │ │ │ │ │ │ ├── Build + │ │ │ │ │ │ │ │ └── HashJoin: INNER + │ │ │ │ │ │ │ │ ├── Build + │ │ │ │ │ │ │ │ │ └── HashJoin: INNER + │ │ │ │ │ │ │ │ │ ├── Build + │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER + │ │ │ │ │ │ │ │ │ │ ├── Build + │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER + │ │ │ │ │ │ │ │ │ │ │ ├── Build + │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.store (#7) (read rows: 1) + │ │ │ │ │ │ │ │ │ │ │ └── Probe + │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER + │ │ │ │ │ │ │ │ │ │ │ ├── Build + │ │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER + │ │ │ │ │ │ │ │ │ │ │ │ ├── Build + │ │ │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER + │ │ │ │ │ │ │ │ │ │ │ │ │ ├── Build + │ │ │ │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER + │ │ │ │ │ │ │ │ │ │ │ │ │ │ ├── Build + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.item (#18) (read rows: 180) + │ │ │ │ │ │ │ │ │ │ │ │ │ │ └── Probe + │ │ │ │ │ │ │ │ │ │ │ │ │ │ └── HashJoin: INNER + │ │ │ │ │ │ │ │ │ │ │ │ │ │ ├── Build + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.catalog_returns (#3) (read rows: 1358) + │ │ │ │ │ │ │ │ │ │ │ │ │ │ └── Probe + │ │ │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.catalog_sales (#2) (read rows: 14313) + │ │ │ │ │ │ │ │ │ │ │ │ │ └── Probe + │ │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.store_returns (#1) (read rows: 2810) + │ │ │ │ │ │ │ │ │ │ │ │ └── Probe + │ │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) + │ │ │ │ │ │ │ │ │ │ │ └── Probe + │ │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#4) (read rows: 73049) + │ │ │ │ │ │ │ │ │ │ └── Probe + │ │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.customer (#8) (read rows: 1000) + │ │ │ │ │ │ │ │ │ └── Probe + │ │ │ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#5) (read rows: 73049) + │ │ │ │ │ │ │ │ └── Probe + │ │ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#6) (read rows: 73049) + │ │ │ │ │ │ │ └── Probe + │ │ │ │ │ │ │ └── Scan: default.tpcds.customer_demographics (#9) (read rows: 19208) + │ │ │ │ │ │ └── Probe + │ │ │ │ │ │ └── Scan: default.tpcds.customer_demographics (#10) (read rows: 19208) │ │ │ │ │ └── Probe - │ │ │ │ │ └── Scan: default.tpcds.household_demographics (read rows: 7200) + │ │ │ │ │ └── Scan: default.tpcds.household_demographics (#12) (read rows: 7200) │ │ │ │ └── Probe - │ │ │ │ └── Scan: default.tpcds.customer_address (read rows: 500) + │ │ │ │ └── Scan: default.tpcds.household_demographics (#13) (read rows: 7200) │ │ │ └── Probe - │ │ │ └── Scan: default.tpcds.customer_address (read rows: 500) + │ │ │ └── Scan: default.tpcds.customer_address (#14) (read rows: 500) │ │ └── Probe - │ │ └── Scan: default.tpcds.income_band (read rows: 20) + │ │ └── Scan: default.tpcds.customer_address (#15) (read rows: 500) │ └── Probe - │ └── Scan: default.tpcds.income_band (read rows: 20) + │ └── Scan: default.tpcds.income_band (#16) (read rows: 20) └── Probe - └── Scan: default.tpcds.store_returns (read rows: 2810) + └── Scan: default.tpcds.income_band (#17) (read rows: 20) # Q65 query I @@ -6296,25 +6293,25 @@ LIMIT 100; ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpcds.store (read rows: 1) +│ └── HashJoin: INNER +│ ├── Build +│ │ └── HashJoin: INNER +│ │ ├── Build +│ │ │ └── Scan: default.tpcds.date_dim (#3) (read rows: 73049) +│ │ └── Probe +│ │ └── Scan: default.tpcds.store_sales (#2) (read rows: 28810) +│ └── Probe +│ └── Scan: default.tpcds.store (#0) (read rows: 1) └── Probe └── HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build - │ │ └── HashJoin: INNER - │ │ ├── Build - │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) - │ │ └── Probe - │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ │ └── Scan: default.tpcds.date_dim (#5) (read rows: 73049) │ └── Probe - │ └── HashJoin: INNER - │ ├── Build - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) - │ └── Probe - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#4) (read rows: 28810) └── Probe - └── Scan: default.tpcds.item (read rows: 180) + └── Scan: default.tpcds.item (#1) (read rows: 180) # Q66 @@ -6542,39 +6539,39 @@ UnionAll ├── Left │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.warehouse (read rows: 1) +│ │ └── Scan: default.tpcds.warehouse (#1) (read rows: 1) │ └── Probe │ └── HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.ship_mode (read rows: 20) +│ │ │ └── Scan: default.tpcds.ship_mode (#4) (read rows: 20) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ │ └── Scan: default.tpcds.web_sales (#0) (read rows: 7212) │ └── Probe -│ └── Scan: default.tpcds.time_dim (read rows: 86400) +│ └── Scan: default.tpcds.time_dim (#3) (read rows: 86400) └── Right └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.warehouse (read rows: 1) + │ └── Scan: default.tpcds.warehouse (#6) (read rows: 1) └── Probe └── HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.ship_mode (read rows: 20) + │ │ └── Scan: default.tpcds.ship_mode (#9) (read rows: 20) │ └── Probe │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ └── Scan: default.tpcds.date_dim (#7) (read rows: 73049) │ └── Probe - │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) + │ └── Scan: default.tpcds.catalog_sales (#5) (read rows: 14313) └── Probe - └── Scan: default.tpcds.time_dim (read rows: 86400) + └── Scan: default.tpcds.time_dim (#8) (read rows: 86400) # Q67 @@ -6629,15 +6626,15 @@ HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.store (read rows: 1) +│ │ └── Scan: default.tpcds.store (#2) (read rows: 1) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) │ └── Probe -│ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) └── Probe - └── Scan: default.tpcds.item (read rows: 180) + └── Scan: default.tpcds.item (#3) (read rows: 180) # Q68 query I @@ -6689,33 +6686,33 @@ LIMIT 100; ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpcds.customer_address (read rows: 500) +│ └── Scan: default.tpcds.customer_address (#6) (read rows: 500) └── Probe └── HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.customer_address (read rows: 500) + │ │ └── Scan: default.tpcds.customer_address (#4) (read rows: 500) │ └── Probe │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.store (read rows: 1) + │ │ └── Scan: default.tpcds.store (#2) (read rows: 1) │ └── Probe │ └── HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) │ │ └── Probe - │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) │ └── Probe - │ └── Scan: default.tpcds.household_demographics (read rows: 7200) + │ └── Scan: default.tpcds.household_demographics (#3) (read rows: 7200) └── Probe - └── Scan: default.tpcds.customer (read rows: 1000) + └── Scan: default.tpcds.customer (#5) (read rows: 1000) # Q69 -query I +query explain join SELECT cd_gender, cd_marital_status, @@ -6769,39 +6766,39 @@ ORDER BY cd_gender, cd_credit_rating LIMIT 100; ---- -HashJoin: RIGHT MARK +HashJoin: LEFT MARK ├── Build -│ └── HashJoin: INNER +│ └── HashJoin: LEFT MARK │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── HashJoin: LEFT MARK +│ │ ├── Build +│ │ │ └── HashJoin: INNER +│ │ │ ├── Build +│ │ │ │ └── HashJoin: INNER +│ │ │ │ ├── Build +│ │ │ │ │ └── Scan: default.tpcds.customer_address (#1) (read rows: 500) +│ │ │ │ └── Probe +│ │ │ │ └── Scan: default.tpcds.customer (#0) (read rows: 1000) +│ │ │ └── Probe +│ │ │ └── Scan: default.tpcds.customer_demographics (#2) (read rows: 19208) +│ │ └── Probe +│ │ └── HashJoin: INNER +│ │ ├── Build +│ │ │ └── Scan: default.tpcds.date_dim (#4) (read rows: 73049) +│ │ └── Probe +│ │ └── Scan: default.tpcds.store_sales (#3) (read rows: 28810) │ └── Probe -│ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ └── HashJoin: INNER +│ ├── Build +│ │ └── Scan: default.tpcds.date_dim (#6) (read rows: 73049) +│ └── Probe +│ └── Scan: default.tpcds.web_sales (#5) (read rows: 7212) └── Probe - └── HashJoin: RIGHT MARK + └── HashJoin: INNER ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) - │ └── Probe - │ └── Scan: default.tpcds.web_sales (read rows: 7212) + │ └── Scan: default.tpcds.date_dim (#8) (read rows: 73049) └── Probe - └── HashJoin: RIGHT MARK - ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) - │ └── Probe - │ └── Scan: default.tpcds.store_sales (read rows: 28810) - └── Probe - └── HashJoin: INNER - ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── Scan: default.tpcds.customer_address (read rows: 500) - │ └── Probe - │ └── Scan: default.tpcds.customer (read rows: 1000) - └── Probe - └── Scan: default.tpcds.customer_demographics (read rows: 19208) + └── Scan: default.tpcds.catalog_sales (#7) (read rows: 14313) # Q70 @@ -6846,7 +6843,7 @@ LIMIT 100; ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) └── Probe └── HashJoin: INNER ├── Build @@ -6854,17 +6851,17 @@ HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpcds.store (read rows: 1) + │ │ │ └── Scan: default.tpcds.store (#4) (read rows: 1) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ │ └── Scan: default.tpcds.date_dim (#5) (read rows: 73049) │ │ └── Probe - │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ │ └── Scan: default.tpcds.store_sales (#3) (read rows: 28810) │ └── Probe - │ └── Scan: default.tpcds.store (read rows: 1) + │ └── Scan: default.tpcds.store (#2) (read rows: 1) └── Probe - └── Scan: default.tpcds.store_sales (read rows: 28810) + └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) # Q71 query I @@ -6920,7 +6917,7 @@ HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ └── Scan: default.tpcds.item (#0) (read rows: 180) │ └── Probe │ └── UnionAll │ ├── Left @@ -6928,23 +6925,23 @@ HashJoin: INNER │ │ ├── Left │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ │ │ └── Scan: default.tpcds.web_sales (#1) (read rows: 7212) │ │ └── Right │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#4) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ │ └── Scan: default.tpcds.catalog_sales (#3) (read rows: 14313) │ └── Right │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#6) (read rows: 73049) │ └── Probe -│ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ └── Scan: default.tpcds.store_sales (#5) (read rows: 28810) └── Probe - └── Scan: default.tpcds.time_dim (read rows: 86400) + └── Scan: default.tpcds.time_dim (#7) (read rows: 86400) # Q72 @@ -6993,11 +6990,11 @@ HashJoin: RIGHT OUTER ├── Build │ └── HashJoin: LEFT OUTER │ ├── Build -│ │ └── Scan: default.tpcds.promotion (read rows: 3) +│ │ └── Scan: default.tpcds.promotion (#9) (read rows: 3) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.warehouse (read rows: 1) +│ │ └── Scan: default.tpcds.warehouse (#2) (read rows: 1) │ └── Probe │ └── HashJoin: INNER │ ├── Build @@ -7013,23 +7010,23 @@ HashJoin: RIGHT OUTER │ │ │ │ │ │ ├── Build │ │ │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ │ │ ├── Build -│ │ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#6) (read rows: 73049) │ │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ │ │ │ │ │ │ └── Scan: default.tpcds.catalog_sales (#0) (read rows: 14313) │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ └── Scan: default.tpcds.household_demographics (read rows: 7200) +│ │ │ │ │ │ └── Scan: default.tpcds.household_demographics (#5) (read rows: 7200) │ │ │ │ │ └── Probe -│ │ │ │ │ └── Scan: default.tpcds.customer_demographics (read rows: 19208) +│ │ │ │ │ └── Scan: default.tpcds.customer_demographics (#4) (read rows: 19208) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ │ │ └── Scan: default.tpcds.item (#3) (read rows: 180) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#8) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#7) (read rows: 73049) │ └── Probe -│ └── Scan: default.tpcds.inventory (read rows: 23490) +│ └── Scan: default.tpcds.inventory (#1) (read rows: 23490) └── Probe - └── Scan: default.tpcds.catalog_returns (read rows: 1358) + └── Scan: default.tpcds.catalog_returns (#10) (read rows: 1358) # Q73 @@ -7081,17 +7078,17 @@ HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.store (read rows: 1) +│ │ │ └── Scan: default.tpcds.store (#2) (read rows: 1) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) │ └── Probe -│ └── Scan: default.tpcds.household_demographics (read rows: 7200) +│ └── Scan: default.tpcds.household_demographics (#3) (read rows: 7200) └── Probe - └── Scan: default.tpcds.customer (read rows: 1000) + └── Scan: default.tpcds.customer (#4) (read rows: 1000) # Q74 @@ -7174,21 +7171,21 @@ HashJoin: INNER │ │ │ │ ├── Build │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ ├── Build -│ │ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 0) +│ │ │ │ │ │ └── Scan: default.tpcds.date_dim (#14) (read rows: 0) │ │ │ │ │ └── Probe -│ │ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 0) +│ │ │ │ │ └── Scan: default.tpcds.store_sales (#13) (read rows: 0) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.customer (read rows: 0) +│ │ │ │ └── Scan: default.tpcds.customer (#12) (read rows: 0) │ │ │ └── Right │ │ │ └── HashJoin: INNER │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ │ └── Scan: default.tpcds.date_dim (#17) (read rows: 73049) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ │ │ │ └── Scan: default.tpcds.web_sales (#16) (read rows: 7212) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.customer (read rows: 1000) +│ │ │ └── Scan: default.tpcds.customer (#15) (read rows: 1000) │ │ └── Probe │ │ └── UnionAll │ │ ├── Left @@ -7196,21 +7193,21 @@ HashJoin: INNER │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ │ │ └── Scan: default.tpcds.store_sales (#1) (read rows: 28810) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.customer (read rows: 1000) +│ │ │ └── Scan: default.tpcds.customer (#0) (read rows: 1000) │ │ └── Right │ │ └── HashJoin: INNER │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 0) +│ │ │ │ └── Scan: default.tpcds.date_dim (#5) (read rows: 0) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.web_sales (read rows: 0) +│ │ │ └── Scan: default.tpcds.web_sales (#4) (read rows: 0) │ │ └── Probe -│ │ └── Scan: default.tpcds.customer (read rows: 0) +│ │ └── Scan: default.tpcds.customer (#3) (read rows: 0) │ └── Probe │ └── UnionAll │ ├── Left @@ -7218,21 +7215,21 @@ HashJoin: INNER │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 0) +│ │ │ │ └── Scan: default.tpcds.date_dim (#20) (read rows: 0) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.store_sales (read rows: 0) +│ │ │ └── Scan: default.tpcds.store_sales (#19) (read rows: 0) │ │ └── Probe -│ │ └── Scan: default.tpcds.customer (read rows: 0) +│ │ └── Scan: default.tpcds.customer (#18) (read rows: 0) │ └── Right │ └── HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#23) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ │ └── Scan: default.tpcds.web_sales (#22) (read rows: 7212) │ └── Probe -│ └── Scan: default.tpcds.customer (read rows: 1000) +│ └── Scan: default.tpcds.customer (#21) (read rows: 1000) └── Probe └── UnionAll ├── Left @@ -7240,21 +7237,21 @@ HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ │ └── Scan: default.tpcds.date_dim (#8) (read rows: 73049) │ │ └── Probe - │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ │ └── Scan: default.tpcds.store_sales (#7) (read rows: 28810) │ └── Probe - │ └── Scan: default.tpcds.customer (read rows: 1000) + │ └── Scan: default.tpcds.customer (#6) (read rows: 1000) └── Right └── HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.date_dim (read rows: 0) + │ │ └── Scan: default.tpcds.date_dim (#11) (read rows: 0) │ └── Probe - │ └── Scan: default.tpcds.web_sales (read rows: 0) + │ └── Scan: default.tpcds.web_sales (#10) (read rows: 0) └── Probe - └── Scan: default.tpcds.customer (read rows: 0) + └── Scan: default.tpcds.customer (#9) (read rows: 0) # Q75 @@ -7346,43 +7343,43 @@ HashJoin: INNER │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ │ │ │ └── Scan: default.tpcds.item (#13) (read rows: 180) │ │ │ │ └── Probe │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ │ └── Scan: default.tpcds.date_dim (#14) (read rows: 73049) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ │ │ │ └── Scan: default.tpcds.catalog_sales (#12) (read rows: 14313) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.catalog_returns (read rows: 1358) +│ │ │ └── Scan: default.tpcds.catalog_returns (#15) (read rows: 1358) │ │ └── Right │ │ └── HashJoin: RIGHT OUTER │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ │ │ └── Scan: default.tpcds.item (#17) (read rows: 180) │ │ │ └── Probe │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ └── Scan: default.tpcds.date_dim (#18) (read rows: 73049) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ │ └── Scan: default.tpcds.store_sales (#16) (read rows: 28810) │ │ └── Probe -│ │ └── Scan: default.tpcds.store_returns (read rows: 2810) +│ │ └── Scan: default.tpcds.store_returns (#19) (read rows: 2810) │ └── Right │ └── HashJoin: RIGHT OUTER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ │ └── Scan: default.tpcds.item (#21) (read rows: 180) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#22) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ │ └── Scan: default.tpcds.web_sales (#20) (read rows: 7212) │ └── Probe -│ └── Scan: default.tpcds.web_returns (read rows: 679) +│ └── Scan: default.tpcds.web_returns (#23) (read rows: 679) └── Probe └── UnionAll ├── Left @@ -7392,43 +7389,43 @@ HashJoin: INNER │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build - │ │ │ │ └── Scan: default.tpcds.item (read rows: 180) + │ │ │ │ └── Scan: default.tpcds.item (#1) (read rows: 180) │ │ │ └── Probe │ │ │ └── HashJoin: INNER │ │ │ ├── Build - │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ │ │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) │ │ │ └── Probe - │ │ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) + │ │ │ └── Scan: default.tpcds.catalog_sales (#0) (read rows: 14313) │ │ └── Probe - │ │ └── Scan: default.tpcds.catalog_returns (read rows: 1358) + │ │ └── Scan: default.tpcds.catalog_returns (#3) (read rows: 1358) │ └── Right │ └── HashJoin: RIGHT OUTER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpcds.item (read rows: 180) + │ │ │ └── Scan: default.tpcds.item (#5) (read rows: 180) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ │ └── Scan: default.tpcds.date_dim (#6) (read rows: 73049) │ │ └── Probe - │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ │ └── Scan: default.tpcds.store_sales (#4) (read rows: 28810) │ └── Probe - │ └── Scan: default.tpcds.store_returns (read rows: 2810) + │ └── Scan: default.tpcds.store_returns (#7) (read rows: 2810) └── Right └── HashJoin: RIGHT OUTER ├── Build │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.item (read rows: 180) + │ │ └── Scan: default.tpcds.item (#9) (read rows: 180) │ └── Probe │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ └── Scan: default.tpcds.date_dim (#10) (read rows: 73049) │ └── Probe - │ └── Scan: default.tpcds.web_sales (read rows: 7212) + │ └── Scan: default.tpcds.web_sales (#8) (read rows: 7212) └── Probe - └── Scan: default.tpcds.web_returns (read rows: 679) + └── Scan: default.tpcds.web_returns (#11) (read rows: 679) # Q76 query I @@ -7497,31 +7494,31 @@ UnionAll │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ │ │ └── Scan: default.tpcds.item (#1) (read rows: 180) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.store_sales (read rows: 0) +│ │ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 0) │ │ └── Probe -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) │ └── Right │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ └── Scan: default.tpcds.item (#4) (read rows: 180) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.web_sales (read rows: 0) +│ │ └── Scan: default.tpcds.web_sales (#3) (read rows: 0) │ └── Probe -│ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ └── Scan: default.tpcds.date_dim (#5) (read rows: 73049) └── Right └── HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.item (read rows: 180) + │ │ └── Scan: default.tpcds.item (#7) (read rows: 180) │ └── Probe - │ └── Scan: default.tpcds.catalog_sales (read rows: 0) + │ └── Scan: default.tpcds.catalog_sales (#6) (read rows: 0) └── Probe - └── Scan: default.tpcds.date_dim (read rows: 73049) + └── Scan: default.tpcds.date_dim (#8) (read rows: 73049) # Q77 query I @@ -7631,35 +7628,35 @@ UnionAll │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ │ └── Scan: default.tpcds.date_dim (#4) (read rows: 73049) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.store_returns (read rows: 2810) +│ │ │ │ └── Scan: default.tpcds.store_returns (#3) (read rows: 2810) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.store (read rows: 1) +│ │ │ └── Scan: default.tpcds.store (#5) (read rows: 1) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.store (read rows: 1) +│ │ │ └── Scan: default.tpcds.store (#2) (read rows: 1) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) │ └── Right │ └── HashJoin: CROSS │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#9) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.catalog_returns (read rows: 1358) +│ │ └── Scan: default.tpcds.catalog_returns (#8) (read rows: 1358) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#7) (read rows: 73049) │ └── Probe -│ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ └── Scan: default.tpcds.catalog_sales (#6) (read rows: 14313) └── Right └── HashJoin: LEFT OUTER ├── Build @@ -7667,21 +7664,21 @@ UnionAll │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ │ └── Scan: default.tpcds.date_dim (#14) (read rows: 73049) │ │ └── Probe - │ │ └── Scan: default.tpcds.web_returns (read rows: 679) + │ │ └── Scan: default.tpcds.web_returns (#13) (read rows: 679) │ └── Probe - │ └── Scan: default.tpcds.web_page (read rows: 1) + │ └── Scan: default.tpcds.web_page (#15) (read rows: 1) └── Probe └── HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ └── Scan: default.tpcds.date_dim (#11) (read rows: 73049) │ └── Probe - │ └── Scan: default.tpcds.web_sales (read rows: 7212) + │ └── Scan: default.tpcds.web_sales (#10) (read rows: 7212) └── Probe - └── Scan: default.tpcds.web_page (read rows: 1) + └── Scan: default.tpcds.web_page (#12) (read rows: 1) # Q78 query I @@ -7767,35 +7764,35 @@ HashJoin: LEFT OUTER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── HashJoin: LEFT OUTER -│ │ ├── Build -│ │ │ └── Scan: default.tpcds.catalog_returns (read rows: 1358) -│ │ └── Probe -│ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ │ └── Scan: default.tpcds.date_dim (#8) (read rows: 73049) │ └── Probe -│ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ └── HashJoin: LEFT OUTER +│ ├── Build +│ │ └── Scan: default.tpcds.catalog_returns (#7) (read rows: 1358) +│ └── Probe +│ └── Scan: default.tpcds.catalog_sales (#6) (read rows: 14313) └── Probe └── HashJoin: LEFT OUTER ├── Build │ └── HashJoin: INNER │ ├── Build - │ │ └── HashJoin: LEFT OUTER - │ │ ├── Build - │ │ │ └── Scan: default.tpcds.web_returns (read rows: 679) - │ │ └── Probe - │ │ └── Scan: default.tpcds.web_sales (read rows: 7212) + │ │ └── Scan: default.tpcds.date_dim (#5) (read rows: 73049) │ └── Probe - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── HashJoin: LEFT OUTER + │ ├── Build + │ │ └── Scan: default.tpcds.web_returns (#4) (read rows: 679) + │ └── Probe + │ └── Scan: default.tpcds.web_sales (#3) (read rows: 7212) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) └── Probe └── HashJoin: LEFT OUTER ├── Build - │ └── Scan: default.tpcds.store_returns (read rows: 2810) + │ └── Scan: default.tpcds.store_returns (#1) (read rows: 2810) └── Probe - └── Scan: default.tpcds.store_sales (read rows: 28810) + └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) # Q79 query I @@ -7844,17 +7841,17 @@ HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.store (read rows: 1) +│ │ │ └── Scan: default.tpcds.store (#2) (read rows: 1) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) │ └── Probe -│ └── Scan: default.tpcds.household_demographics (read rows: 7200) +│ └── Scan: default.tpcds.household_demographics (#3) (read rows: 7200) └── Probe - └── Scan: default.tpcds.customer (read rows: 1000) + └── Scan: default.tpcds.customer (#4) (read rows: 1000) # Q80 query I @@ -7956,21 +7953,21 @@ UnionAll │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) │ │ │ │ └── Probe │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ │ │ │ └── Scan: default.tpcds.item (#4) (read rows: 180) │ │ │ │ └── Probe │ │ │ │ └── HashJoin: LEFT OUTER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.store_returns (read rows: 2810) +│ │ │ │ │ └── Scan: default.tpcds.store_returns (#1) (read rows: 2810) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ │ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.store (read rows: 1) +│ │ │ └── Scan: default.tpcds.store (#3) (read rows: 1) │ │ └── Probe -│ │ └── Scan: default.tpcds.promotion (read rows: 3) +│ │ └── Scan: default.tpcds.promotion (#5) (read rows: 3) │ └── Right │ └── HashJoin: INNER │ ├── Build @@ -7978,21 +7975,21 @@ UnionAll │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ └── Scan: default.tpcds.date_dim (#8) (read rows: 73049) │ │ │ └── Probe │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ │ │ └── Scan: default.tpcds.item (#10) (read rows: 180) │ │ │ └── Probe │ │ │ └── HashJoin: LEFT OUTER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.catalog_returns (read rows: 1358) +│ │ │ │ └── Scan: default.tpcds.catalog_returns (#7) (read rows: 1358) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ │ │ └── Scan: default.tpcds.catalog_sales (#6) (read rows: 14313) │ │ └── Probe -│ │ └── Scan: default.tpcds.catalog_page (read rows: 11718) +│ │ └── Scan: default.tpcds.catalog_page (#9) (read rows: 11718) │ └── Probe -│ └── Scan: default.tpcds.promotion (read rows: 3) +│ └── Scan: default.tpcds.promotion (#11) (read rows: 3) └── Right └── HashJoin: INNER ├── Build @@ -8000,21 +7997,21 @@ UnionAll │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ │ └── Scan: default.tpcds.date_dim (#14) (read rows: 73049) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpcds.item (read rows: 180) + │ │ │ └── Scan: default.tpcds.item (#16) (read rows: 180) │ │ └── Probe │ │ └── HashJoin: LEFT OUTER │ │ ├── Build - │ │ │ └── Scan: default.tpcds.web_returns (read rows: 679) + │ │ │ └── Scan: default.tpcds.web_returns (#13) (read rows: 679) │ │ └── Probe - │ │ └── Scan: default.tpcds.web_sales (read rows: 7212) + │ │ └── Scan: default.tpcds.web_sales (#12) (read rows: 7212) │ └── Probe - │ └── Scan: default.tpcds.web_site (read rows: 1) + │ └── Scan: default.tpcds.web_site (#15) (read rows: 1) └── Probe - └── Scan: default.tpcds.promotion (read rows: 3) + └── Scan: default.tpcds.promotion (#17) (read rows: 3) # Q81 query I @@ -8075,35 +8072,35 @@ ORDER BY c_customer_id, ctr_total_return LIMIT 100; ---- -HashJoin: RIGHT SINGLE +HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.customer_address (read rows: 500) +│ │ └── HashJoin: INNER +│ │ ├── Build +│ │ │ └── HashJoin: INNER +│ │ │ ├── Build +│ │ │ │ └── HashJoin: INNER +│ │ │ │ ├── Build +│ │ │ │ │ └── Scan: default.tpcds.date_dim (#6) (read rows: 73049) +│ │ │ │ └── Probe +│ │ │ │ └── Scan: default.tpcds.catalog_returns (#5) (read rows: 1358) +│ │ │ └── Probe +│ │ │ └── Scan: default.tpcds.customer_address (#7) (read rows: 500) +│ │ └── Probe +│ │ └── HashJoin: INNER +│ │ ├── Build +│ │ │ └── HashJoin: INNER +│ │ │ ├── Build +│ │ │ │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) +│ │ │ └── Probe +│ │ │ └── Scan: default.tpcds.catalog_returns (#0) (read rows: 1358) +│ │ └── Probe +│ │ └── Scan: default.tpcds.customer_address (#2) (read rows: 500) │ └── Probe -│ └── HashJoin: INNER -│ ├── Build -│ │ └── HashJoin: INNER -│ │ ├── Build -│ │ │ └── HashJoin: INNER -│ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) -│ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.catalog_returns (read rows: 1358) -│ │ └── Probe -│ │ └── Scan: default.tpcds.customer_address (read rows: 500) -│ └── Probe -│ └── Scan: default.tpcds.customer (read rows: 1000) +│ └── Scan: default.tpcds.customer (#4) (read rows: 1000) └── Probe - └── HashJoin: INNER - ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) - │ └── Probe - │ └── Scan: default.tpcds.catalog_returns (read rows: 1358) - └── Probe - └── Scan: default.tpcds.customer_address (read rows: 500) + └── Scan: default.tpcds.customer_address (#3) (read rows: 500) # Q82 query I @@ -8135,18 +8132,18 @@ HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ └── Scan: default.tpcds.item (#0) (read rows: 180) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) │ └── Probe -│ └── Scan: default.tpcds.inventory (read rows: 23490) +│ └── Scan: default.tpcds.inventory (#1) (read rows: 23490) └── Probe - └── Scan: default.tpcds.store_sales (read rows: 28810) + └── Scan: default.tpcds.store_sales (#3) (read rows: 28810) # Q83 -query I +query explain join WITH sr_items AS (SELECT i_item_id item_id, @@ -8227,55 +8224,55 @@ HashJoin: INNER │ │ ├── Build │ │ │ └── HashJoin: LEFT SEMI │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ └── Scan: default.tpcds.date_dim (#14) (read rows: 73049) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#13) (read rows: 73049) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ │ │ └── Scan: default.tpcds.item (#11) (read rows: 180) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.web_returns (read rows: 679) +│ │ │ └── Scan: default.tpcds.web_returns (#10) (read rows: 679) │ │ └── Probe -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#12) (read rows: 73049) │ └── Probe │ └── HashJoin: LEFT SEMI │ ├── Build │ │ └── HashJoin: LEFT SEMI │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#9) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#8) (read rows: 73049) │ └── Probe │ └── HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.item (read rows: 180) +│ │ │ └── Scan: default.tpcds.item (#6) (read rows: 180) │ │ └── Probe -│ │ └── Scan: default.tpcds.store_returns (read rows: 2810) +│ │ └── Scan: default.tpcds.catalog_returns (#5) (read rows: 1358) │ └── Probe -│ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ └── Scan: default.tpcds.date_dim (#7) (read rows: 73049) └── Probe └── HashJoin: LEFT SEMI ├── Build │ └── HashJoin: LEFT SEMI │ ├── Build - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ └── Scan: default.tpcds.date_dim (#4) (read rows: 73049) │ └── Probe - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#3) (read rows: 73049) └── Probe └── HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.item (read rows: 180) + │ │ └── Scan: default.tpcds.item (#1) (read rows: 180) │ └── Probe - │ └── Scan: default.tpcds.catalog_returns (read rows: 1358) + │ └── Scan: default.tpcds.store_returns (#0) (read rows: 2810) └── Probe - └── Scan: default.tpcds.date_dim (read rows: 73049) + └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) # Q84 query I @@ -8309,17 +8306,17 @@ HashJoin: INNER │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.customer_address (read rows: 500) +│ │ │ │ │ └── Scan: default.tpcds.customer_address (#1) (read rows: 500) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.customer (read rows: 1000) +│ │ │ │ └── Scan: default.tpcds.customer (#0) (read rows: 1000) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.customer_demographics (read rows: 19208) +│ │ │ └── Scan: default.tpcds.household_demographics (#3) (read rows: 7200) │ │ └── Probe -│ │ └── Scan: default.tpcds.household_demographics (read rows: 7200) +│ │ └── Scan: default.tpcds.income_band (#4) (read rows: 20) │ └── Probe -│ └── Scan: default.tpcds.income_band (read rows: 20) +│ └── Scan: default.tpcds.customer_demographics (#2) (read rows: 19208) └── Probe - └── Scan: default.tpcds.store_returns (read rows: 2810) + └── Scan: default.tpcds.store_returns (#5) (read rows: 2810) # Q85 query I @@ -8396,21 +8393,21 @@ HashJoin: INNER │ │ │ │ │ ├── Build │ │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ │ ├── Build -│ │ │ │ │ │ │ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ │ │ │ │ │ │ └── Scan: default.tpcds.web_sales (#0) (read rows: 7212) │ │ │ │ │ │ └── Probe -│ │ │ │ │ │ └── Scan: default.tpcds.web_returns (read rows: 679) +│ │ │ │ │ │ └── Scan: default.tpcds.web_returns (#1) (read rows: 679) │ │ │ │ │ └── Probe -│ │ │ │ │ └── Scan: default.tpcds.customer_address (read rows: 500) +│ │ │ │ │ └── Scan: default.tpcds.customer_address (#5) (read rows: 500) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ │ └── Scan: default.tpcds.date_dim (#6) (read rows: 73049) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.web_page (read rows: 1) +│ │ │ └── Scan: default.tpcds.web_page (#2) (read rows: 1) │ │ └── Probe -│ │ └── Scan: default.tpcds.customer_demographics (read rows: 19208) +│ │ └── Scan: default.tpcds.customer_demographics (#3) (read rows: 19208) │ └── Probe -│ └── Scan: default.tpcds.customer_demographics (read rows: 19208) +│ └── Scan: default.tpcds.customer_demographics (#4) (read rows: 19208) └── Probe - └── Scan: default.tpcds.reason (read rows: 1) + └── Scan: default.tpcds.reason (#7) (read rows: 1) # Q86 query I @@ -8442,11 +8439,11 @@ HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) │ └── Probe -│ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ └── Scan: default.tpcds.web_sales (#0) (read rows: 7212) └── Probe - └── Scan: default.tpcds.item (read rows: 180) + └── Scan: default.tpcds.item (#2) (read rows: 180) # Q87 query I @@ -8488,11 +8485,11 @@ HashJoin: LEFT ANTI │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#7) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ │ └── Scan: default.tpcds.web_sales (#6) (read rows: 7212) │ └── Probe -│ └── Scan: default.tpcds.customer (read rows: 1000) +│ └── Scan: default.tpcds.customer (#8) (read rows: 1000) └── Probe └── HashJoin: LEFT ANTI ├── Build @@ -8500,21 +8497,21 @@ HashJoin: LEFT ANTI │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ │ └── Scan: default.tpcds.date_dim (#4) (read rows: 73049) │ │ └── Probe - │ │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) + │ │ └── Scan: default.tpcds.catalog_sales (#3) (read rows: 14313) │ └── Probe - │ └── Scan: default.tpcds.customer (read rows: 1000) + │ └── Scan: default.tpcds.customer (#5) (read rows: 1000) └── Probe └── HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) │ └── Probe - │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) └── Probe - └── Scan: default.tpcds.customer (read rows: 1000) + └── Scan: default.tpcds.customer (#2) (read rows: 1000) # Q88 query I @@ -8666,13 +8663,13 @@ HashJoin: CROSS │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpcds.store (read rows: 0) +│ │ │ │ └── Scan: default.tpcds.store (#31) (read rows: 0) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ │ └── Scan: default.tpcds.store_sales (#28) (read rows: 28810) │ │ └── Probe -│ │ └── Scan: default.tpcds.household_demographics (read rows: 7200) +│ │ └── Scan: default.tpcds.household_demographics (#29) (read rows: 7200) │ └── Probe -│ └── Scan: default.tpcds.time_dim (read rows: 86400) +│ └── Scan: default.tpcds.time_dim (#30) (read rows: 86400) └── Probe └── HashJoin: CROSS ├── Build @@ -8682,13 +8679,13 @@ HashJoin: CROSS │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build - │ │ │ │ └── Scan: default.tpcds.store (read rows: 0) + │ │ │ │ └── Scan: default.tpcds.store (#27) (read rows: 0) │ │ │ └── Probe - │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ │ │ └── Scan: default.tpcds.store_sales (#24) (read rows: 28810) │ │ └── Probe - │ │ └── Scan: default.tpcds.household_demographics (read rows: 7200) + │ │ └── Scan: default.tpcds.household_demographics (#25) (read rows: 7200) │ └── Probe - │ └── Scan: default.tpcds.time_dim (read rows: 86400) + │ └── Scan: default.tpcds.time_dim (#26) (read rows: 86400) └── Probe └── HashJoin: CROSS ├── Build @@ -8698,13 +8695,13 @@ HashJoin: CROSS │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build - │ │ │ │ └── Scan: default.tpcds.store (read rows: 0) + │ │ │ │ └── Scan: default.tpcds.store (#23) (read rows: 0) │ │ │ └── Probe - │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ │ │ └── Scan: default.tpcds.store_sales (#20) (read rows: 28810) │ │ └── Probe - │ │ └── Scan: default.tpcds.household_demographics (read rows: 7200) + │ │ └── Scan: default.tpcds.household_demographics (#21) (read rows: 7200) │ └── Probe - │ └── Scan: default.tpcds.time_dim (read rows: 86400) + │ └── Scan: default.tpcds.time_dim (#22) (read rows: 86400) └── Probe └── HashJoin: CROSS ├── Build @@ -8714,13 +8711,13 @@ HashJoin: CROSS │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build - │ │ │ │ └── Scan: default.tpcds.store (read rows: 0) + │ │ │ │ └── Scan: default.tpcds.store (#19) (read rows: 0) │ │ │ └── Probe - │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ │ │ └── Scan: default.tpcds.store_sales (#16) (read rows: 28810) │ │ └── Probe - │ │ └── Scan: default.tpcds.household_demographics (read rows: 7200) + │ │ └── Scan: default.tpcds.household_demographics (#17) (read rows: 7200) │ └── Probe - │ └── Scan: default.tpcds.time_dim (read rows: 86400) + │ └── Scan: default.tpcds.time_dim (#18) (read rows: 86400) └── Probe └── HashJoin: CROSS ├── Build @@ -8730,13 +8727,13 @@ HashJoin: CROSS │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build - │ │ │ │ └── Scan: default.tpcds.store (read rows: 0) + │ │ │ │ └── Scan: default.tpcds.store (#15) (read rows: 0) │ │ │ └── Probe - │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ │ │ └── Scan: default.tpcds.store_sales (#12) (read rows: 28810) │ │ └── Probe - │ │ └── Scan: default.tpcds.household_demographics (read rows: 7200) + │ │ └── Scan: default.tpcds.household_demographics (#13) (read rows: 7200) │ └── Probe - │ └── Scan: default.tpcds.time_dim (read rows: 86400) + │ └── Scan: default.tpcds.time_dim (#14) (read rows: 86400) └── Probe └── HashJoin: CROSS ├── Build @@ -8746,13 +8743,13 @@ HashJoin: CROSS │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build - │ │ │ │ └── Scan: default.tpcds.store (read rows: 0) + │ │ │ │ └── Scan: default.tpcds.store (#11) (read rows: 0) │ │ │ └── Probe - │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ │ │ └── Scan: default.tpcds.store_sales (#8) (read rows: 28810) │ │ └── Probe - │ │ └── Scan: default.tpcds.household_demographics (read rows: 7200) + │ │ └── Scan: default.tpcds.household_demographics (#9) (read rows: 7200) │ └── Probe - │ └── Scan: default.tpcds.time_dim (read rows: 86400) + │ └── Scan: default.tpcds.time_dim (#10) (read rows: 86400) └── Probe └── HashJoin: CROSS ├── Build @@ -8762,13 +8759,13 @@ HashJoin: CROSS │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build - │ │ │ │ └── Scan: default.tpcds.store (read rows: 0) + │ │ │ │ └── Scan: default.tpcds.store (#7) (read rows: 0) │ │ │ └── Probe - │ │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ │ │ └── Scan: default.tpcds.store_sales (#4) (read rows: 28810) │ │ └── Probe - │ │ └── Scan: default.tpcds.household_demographics (read rows: 7200) + │ │ └── Scan: default.tpcds.household_demographics (#5) (read rows: 7200) │ └── Probe - │ └── Scan: default.tpcds.time_dim (read rows: 86400) + │ └── Scan: default.tpcds.time_dim (#6) (read rows: 86400) └── Probe └── HashJoin: INNER ├── Build @@ -8776,13 +8773,13 @@ HashJoin: CROSS │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpcds.store (read rows: 0) + │ │ │ └── Scan: default.tpcds.store (#3) (read rows: 0) │ │ └── Probe - │ │ └── Scan: default.tpcds.store_sales (read rows: 28810) + │ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) │ └── Probe - │ └── Scan: default.tpcds.household_demographics (read rows: 7200) + │ └── Scan: default.tpcds.household_demographics (#1) (read rows: 7200) └── Probe - └── Scan: default.tpcds.time_dim (read rows: 86400) + └── Scan: default.tpcds.time_dim (#2) (read rows: 86400) # Q89 query I @@ -8809,17 +8806,17 @@ LIMIT 100; ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpcds.store (read rows: 1) +│ └── Scan: default.tpcds.store (#3) (read rows: 1) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.item (read rows: 180) + │ └── Scan: default.tpcds.item (#0) (read rows: 180) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) └── Probe - └── Scan: default.tpcds.store_sales (read rows: 28810) + └── Scan: default.tpcds.store_sales (#1) (read rows: 28810) # Q90 query I @@ -8857,29 +8854,29 @@ HashJoin: CROSS │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.web_page (read rows: 0) +│ │ │ └── Scan: default.tpcds.web_page (#7) (read rows: 0) │ │ └── Probe │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.household_demographics (read rows: 7200) +│ │ │ └── Scan: default.tpcds.household_demographics (#5) (read rows: 7200) │ │ └── Probe -│ │ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ │ └── Scan: default.tpcds.web_sales (#4) (read rows: 7212) │ └── Probe -│ └── Scan: default.tpcds.time_dim (read rows: 86400) +│ └── Scan: default.tpcds.time_dim (#6) (read rows: 86400) └── Probe └── HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.web_page (read rows: 0) + │ │ └── Scan: default.tpcds.web_page (#3) (read rows: 0) │ └── Probe │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.household_demographics (read rows: 7200) + │ │ └── Scan: default.tpcds.household_demographics (#1) (read rows: 7200) │ └── Probe - │ └── Scan: default.tpcds.web_sales (read rows: 7212) + │ └── Scan: default.tpcds.web_sales (#0) (read rows: 7212) └── Probe - └── Scan: default.tpcds.time_dim (read rows: 86400) + └── Scan: default.tpcds.time_dim (#2) (read rows: 86400) # Q91 query I @@ -8927,19 +8924,19 @@ HashJoin: INNER │ │ │ │ ├── Build │ │ │ │ │ └── HashJoin: INNER │ │ │ │ │ ├── Build -│ │ │ │ │ │ └── Scan: default.tpcds.customer_address (read rows: 0) +│ │ │ │ │ │ └── Scan: default.tpcds.customer_address (#4) (read rows: 0) │ │ │ │ │ └── Probe -│ │ │ │ │ └── Scan: default.tpcds.customer (read rows: 1000) +│ │ │ │ │ └── Scan: default.tpcds.customer (#3) (read rows: 1000) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.catalog_returns (read rows: 1358) +│ │ │ │ └── Scan: default.tpcds.catalog_returns (#1) (read rows: 1358) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.customer_demographics (read rows: 19208) +│ │ └── Scan: default.tpcds.customer_demographics (#5) (read rows: 19208) │ └── Probe -│ └── Scan: default.tpcds.household_demographics (read rows: 7200) +│ └── Scan: default.tpcds.household_demographics (#6) (read rows: 7200) └── Probe - └── Scan: default.tpcds.call_center (read rows: 1) + └── Scan: default.tpcds.call_center (#0) (read rows: 1) # Q92 query I @@ -8962,23 +8959,23 @@ WHERE i_manufact_id = 350 ORDER BY sum(ws_ext_discount_amt) LIMIT 100; ---- -HashJoin: RIGHT SINGLE +HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── HashJoin: INNER +│ │ │ ├── Build +│ │ │ │ └── Scan: default.tpcds.date_dim (#4) (read rows: 73049) +│ │ │ └── Probe +│ │ │ └── Scan: default.tpcds.web_sales (#3) (read rows: 7212) │ │ └── Probe -│ │ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ │ └── Scan: default.tpcds.item (#1) (read rows: 180) │ └── Probe -│ └── Scan: default.tpcds.item (read rows: 180) +│ └── Scan: default.tpcds.web_sales (#0) (read rows: 7212) └── Probe - └── HashJoin: INNER - ├── Build - │ └── Scan: default.tpcds.date_dim (read rows: 73049) - └── Probe - └── Scan: default.tpcds.web_sales (read rows: 7212) + └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) # Q93 query I @@ -9005,13 +9002,13 @@ LIMIT 100; ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpcds.reason (read rows: 0) +│ └── Scan: default.tpcds.reason (#2) (read rows: 0) └── Probe └── HashJoin: LEFT OUTER ├── Build - │ └── Scan: default.tpcds.store_returns (read rows: 2810) + │ └── Scan: default.tpcds.store_returns (#1) (read rows: 2810) └── Probe - └── Scan: default.tpcds.store_sales (read rows: 28810) + └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) # Q94 query I @@ -9051,17 +9048,17 @@ HashJoin: RIGHT ANTI │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.web_site (read rows: 0) +│ │ │ │ │ └── Scan: default.tpcds.web_site (#3) (read rows: 0) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ │ │ │ └── Scan: default.tpcds.web_sales (#0) (read rows: 7212) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.customer_address (read rows: 500) +│ │ └── Scan: default.tpcds.customer_address (#2) (read rows: 500) │ └── Probe -│ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ └── Scan: default.tpcds.web_sales (#4) (read rows: 7212) └── Probe - └── Scan: default.tpcds.web_returns (read rows: 679) + └── Scan: default.tpcds.web_returns (#5) (read rows: 679) # Q95 query I @@ -9108,29 +9105,29 @@ HashJoin: RIGHT SEMI │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpcds.web_site (read rows: 0) +│ │ │ │ │ └── Scan: default.tpcds.web_site (#3) (read rows: 0) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ │ │ │ └── Scan: default.tpcds.web_sales (#0) (read rows: 7212) │ │ │ └── Probe -│ │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) │ │ └── Probe -│ │ └── Scan: default.tpcds.customer_address (read rows: 500) +│ │ └── Scan: default.tpcds.customer_address (#2) (read rows: 500) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ │ └── Scan: default.tpcds.web_sales (#5) (read rows: 7212) │ └── Probe -│ └── Scan: default.tpcds.web_sales (read rows: 7212) +│ └── Scan: default.tpcds.web_sales (#4) (read rows: 7212) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.web_returns (read rows: 679) + │ └── Scan: default.tpcds.web_returns (#6) (read rows: 679) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.web_sales (read rows: 7212) + │ └── Scan: default.tpcds.web_sales (#8) (read rows: 7212) └── Probe - └── Scan: default.tpcds.web_sales (read rows: 7212) + └── Scan: default.tpcds.web_sales (#7) (read rows: 7212) # Q96 query I @@ -9156,13 +9153,13 @@ HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpcds.store (read rows: 0) +│ │ │ └── Scan: default.tpcds.store (#3) (read rows: 0) │ │ └── Probe -│ │ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ │ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) │ └── Probe -│ └── Scan: default.tpcds.household_demographics (read rows: 7200) +│ └── Scan: default.tpcds.household_demographics (#1) (read rows: 7200) └── Probe - └── Scan: default.tpcds.time_dim (read rows: 86400) + └── Scan: default.tpcds.time_dim (#2) (read rows: 86400) # Q97 query I @@ -9206,15 +9203,15 @@ HashJoin: FULL OUTER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#3) (read rows: 73049) │ └── Probe -│ └── Scan: default.tpcds.catalog_sales (read rows: 14313) +│ └── Scan: default.tpcds.catalog_sales (#2) (read rows: 14313) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ └── Scan: default.tpcds.date_dim (#1) (read rows: 73049) └── Probe - └── Scan: default.tpcds.store_sales (read rows: 28810) + └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) # Q98 query I @@ -9250,11 +9247,11 @@ HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpcds.date_dim (read rows: 73049) +│ │ └── Scan: default.tpcds.date_dim (#2) (read rows: 73049) │ └── Probe -│ └── Scan: default.tpcds.store_sales (read rows: 28810) +│ └── Scan: default.tpcds.store_sales (#0) (read rows: 28810) └── Probe - └── Scan: default.tpcds.item (read rows: 180) + └── Scan: default.tpcds.item (#1) (read rows: 180) # Q99 query I @@ -9306,22 +9303,18 @@ LIMIT 100; ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpcds.call_center (read rows: 1) +│ └── Scan: default.tpcds.call_center (#3) (read rows: 1) └── Probe └── HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.warehouse (read rows: 1) + │ │ └── Scan: default.tpcds.warehouse (#1) (read rows: 1) │ └── Probe │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpcds.date_dim (read rows: 73049) + │ │ └── Scan: default.tpcds.date_dim (#4) (read rows: 73049) │ └── Probe - │ └── Scan: default.tpcds.catalog_sales (read rows: 14313) + │ └── Scan: default.tpcds.catalog_sales (#0) (read rows: 14313) └── Probe - └── Scan: default.tpcds.ship_mode (read rows: 20) - - -statement ok -set enable_runtime_filter = 0; + └── Scan: default.tpcds.ship_mode (#2) (read rows: 20) \ No newline at end of file diff --git a/tests/sqllogictests/suites/tpch/join.test b/tests/sqllogictests/suites/tpch/join.test index d82496001369..6beb1d25c877 100644 --- a/tests/sqllogictests/suites/tpch/join.test +++ b/tests/sqllogictests/suites/tpch/join.test @@ -4,9 +4,6 @@ set sandbox_tenant = 'test_tenant'; statement ok use tpch_test; -statement ok -set enable_runtime_filter = 1; - query I select c_custkey, count(o_orderkey) as c_count @@ -332,5 +329,28 @@ select l_orderkey from (select * from lineitem order by l_orderkey limit 5000) a 3 3 -statement ok -set enable_runtime_filter = 0; \ No newline at end of file +# LEFT OUTER / LEFT SINGEL / FULL +query I +select l_orderkey, o_orderdate, o_shippriority from lineitem left join orders on l_orderkey = o_orderkey and o_orderdate < to_date('1995-03-15') order by o_orderdate limit 5; +---- +571586 1992-01-01 0 +190656 1992-01-01 0 +359170 1992-01-01 0 +414725 1992-01-01 0 +45697 1992-01-01 0 + +# LEFT ANTI +query I +select o_custkey from orders where not exists (select * from customer where substring(c_phone from 1 for 2) in ('13', '31', '23', '29', '30', '18', '17') and o_custkey = c_custkey) order by o_custkey limit 10; +---- +1 +1 +1 +1 +1 +1 +1 +1 +1 +4 + diff --git a/tests/sqllogictests/suites/tpch/queries.test b/tests/sqllogictests/suites/tpch/queries.test index ff5b0e1923d4..f73b0ec40d14 100644 --- a/tests/sqllogictests/suites/tpch/queries.test +++ b/tests/sqllogictests/suites/tpch/queries.test @@ -1,9 +1,6 @@ statement ok set sandbox_tenant = 'test_tenant'; -statement ok -set enable_runtime_filter = 1; - statement ok use tpch_test; @@ -1175,7 +1172,7 @@ order by l_returnflag, l_linestatus; ---- -Scan: default.tpch_test.lineitem (read rows: 600572) +Scan: default.tpch_test.lineitem (#0) (read rows: 600572) # Q2 query I @@ -1223,7 +1220,7 @@ order by s_name, p_partkey; ---- -HashJoin: RIGHT SINGLE +HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build @@ -1231,17 +1228,17 @@ HashJoin: RIGHT SINGLE │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build -│ │ │ │ └── Scan: default.tpch_test.region (read rows: 5) +│ │ │ │ └── Scan: default.tpch_test.region (#4) (read rows: 5) │ │ │ └── Probe -│ │ │ └── Scan: default.tpch_test.nation (read rows: 25) +│ │ │ └── Scan: default.tpch_test.nation (#3) (read rows: 25) │ │ └── Probe -│ │ └── Scan: default.tpch_test.supplier (read rows: 1000) +│ │ └── Scan: default.tpch_test.supplier (#1) (read rows: 1000) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpch_test.part (read rows: 20000) +│ │ └── Scan: default.tpch_test.part (#0) (read rows: 20000) │ └── Probe -│ └── Scan: default.tpch_test.partsupp (read rows: 80000) +│ └── Scan: default.tpch_test.partsupp (#2) (read rows: 80000) └── Probe └── HashJoin: INNER ├── Build @@ -1249,13 +1246,13 @@ HashJoin: RIGHT SINGLE │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpch_test.region (read rows: 5) + │ │ │ └── Scan: default.tpch_test.region (#8) (read rows: 5) │ │ └── Probe - │ │ └── Scan: default.tpch_test.nation (read rows: 25) + │ │ └── Scan: default.tpch_test.nation (#7) (read rows: 25) │ └── Probe - │ └── Scan: default.tpch_test.supplier (read rows: 1000) + │ └── Scan: default.tpch_test.supplier (#6) (read rows: 1000) └── Probe - └── Scan: default.tpch_test.partsupp (read rows: 80000) + └── Scan: default.tpch_test.partsupp (#5) (read rows: 80000) # Q3 query I @@ -1287,11 +1284,11 @@ HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpch_test.customer (read rows: 15000) +│ │ └── Scan: default.tpch_test.customer (#0) (read rows: 15000) │ └── Probe -│ └── Scan: default.tpch_test.orders (read rows: 150000) +│ └── Scan: default.tpch_test.orders (#1) (read rows: 150000) └── Probe - └── Scan: default.tpch_test.lineitem (read rows: 600572) + └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) # Q4 query I @@ -1319,9 +1316,9 @@ order by ---- HashJoin: RIGHT SEMI ├── Build -│ └── Scan: default.tpch_test.orders (read rows: 150000) +│ └── Scan: default.tpch_test.orders (#0) (read rows: 150000) └── Probe - └── Scan: default.tpch_test.lineitem (read rows: 600572) + └── Scan: default.tpch_test.lineitem (#1) (read rows: 600572) # Q5 query I @@ -1352,25 +1349,25 @@ order by ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpch_test.customer (read rows: 15000) +│ └── Scan: default.tpch_test.supplier (#3) (read rows: 1000) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpch_test.orders (read rows: 150000) + │ └── HashJoin: INNER + │ ├── Build + │ │ └── HashJoin: INNER + │ │ ├── Build + │ │ │ └── HashJoin: INNER + │ │ │ ├── Build + │ │ │ │ └── Scan: default.tpch_test.region (#5) (read rows: 5) + │ │ │ └── Probe + │ │ │ └── Scan: default.tpch_test.nation (#4) (read rows: 25) + │ │ └── Probe + │ │ └── Scan: default.tpch_test.customer (#0) (read rows: 15000) + │ └── Probe + │ └── Scan: default.tpch_test.orders (#1) (read rows: 150000) └── Probe - └── HashJoin: INNER - ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── HashJoin: INNER - │ │ ├── Build - │ │ │ └── Scan: default.tpch_test.region (read rows: 5) - │ │ └── Probe - │ │ └── Scan: default.tpch_test.nation (read rows: 25) - │ └── Probe - │ └── Scan: default.tpch_test.supplier (read rows: 1000) - └── Probe - └── Scan: default.tpch_test.lineitem (read rows: 600572) + └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) # Q6 query I @@ -1384,7 +1381,7 @@ where and l_discount between 0.05 and 0.07 and l_quantity < 24; ---- -Scan: default.tpch_test.lineitem (read rows: 600572) +Scan: default.tpch_test.lineitem (#0) (read rows: 600572) # Q7 query I @@ -1432,9 +1429,9 @@ HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpch_test.nation (read rows: 25) +│ │ └── Scan: default.tpch_test.nation (#4) (read rows: 25) │ └── Probe -│ └── Scan: default.tpch_test.supplier (read rows: 1000) +│ └── Scan: default.tpch_test.supplier (#0) (read rows: 1000) └── Probe └── HashJoin: INNER ├── Build @@ -1442,13 +1439,13 @@ HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpch_test.nation (read rows: 25) + │ │ │ └── Scan: default.tpch_test.nation (#5) (read rows: 25) │ │ └── Probe - │ │ └── Scan: default.tpch_test.customer (read rows: 15000) + │ │ └── Scan: default.tpch_test.customer (#3) (read rows: 15000) │ └── Probe - │ └── Scan: default.tpch_test.orders (read rows: 150000) + │ └── Scan: default.tpch_test.orders (#2) (read rows: 150000) └── Probe - └── Scan: default.tpch_test.lineitem (read rows: 600572) + └── Scan: default.tpch_test.lineitem (#1) (read rows: 600572) # Q8 query I @@ -1492,7 +1489,7 @@ order by ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpch_test.nation (read rows: 25) +│ └── Scan: default.tpch_test.nation (#6) (read rows: 25) └── Probe └── HashJoin: INNER ├── Build @@ -1500,9 +1497,9 @@ HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build - │ │ │ └── Scan: default.tpch_test.region (read rows: 5) + │ │ │ └── Scan: default.tpch_test.region (#7) (read rows: 5) │ │ └── Probe - │ │ └── Scan: default.tpch_test.nation (read rows: 25) + │ │ └── Scan: default.tpch_test.nation (#5) (read rows: 25) │ └── Probe │ └── HashJoin: INNER │ ├── Build @@ -1510,15 +1507,15 @@ HashJoin: INNER │ │ ├── Build │ │ │ └── HashJoin: INNER │ │ │ ├── Build - │ │ │ │ └── Scan: default.tpch_test.part (read rows: 20000) + │ │ │ │ └── Scan: default.tpch_test.part (#0) (read rows: 20000) │ │ │ └── Probe - │ │ │ └── Scan: default.tpch_test.lineitem (read rows: 600572) + │ │ │ └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) │ │ └── Probe - │ │ └── Scan: default.tpch_test.orders (read rows: 150000) + │ │ └── Scan: default.tpch_test.orders (#3) (read rows: 150000) │ └── Probe - │ └── Scan: default.tpch_test.customer (read rows: 15000) + │ └── Scan: default.tpch_test.customer (#4) (read rows: 15000) └── Probe - └── Scan: default.tpch_test.supplier (read rows: 1000) + └── Scan: default.tpch_test.supplier (#1) (read rows: 1000) # Q9 query I @@ -1557,25 +1554,25 @@ limit 5; ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpch_test.partsupp (read rows: 80000) +│ └── HashJoin: INNER +│ ├── Build +│ │ └── HashJoin: INNER +│ │ ├── Build +│ │ │ └── Scan: default.tpch_test.nation (#5) (read rows: 25) +│ │ └── Probe +│ │ └── Scan: default.tpch_test.supplier (#1) (read rows: 1000) +│ └── Probe +│ └── Scan: default.tpch_test.partsupp (#3) (read rows: 80000) └── Probe └── HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpch_test.nation (read rows: 25) + │ │ └── Scan: default.tpch_test.part (#0) (read rows: 20000) │ └── Probe - │ └── Scan: default.tpch_test.supplier (read rows: 1000) + │ └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) └── Probe - └── HashJoin: INNER - ├── Build - │ └── HashJoin: INNER - │ ├── Build - │ │ └── Scan: default.tpch_test.part (read rows: 20000) - │ └── Probe - │ └── Scan: default.tpch_test.lineitem (read rows: 600572) - └── Probe - └── Scan: default.tpch_test.orders (read rows: 150000) + └── Scan: default.tpch_test.orders (#4) (read rows: 150000) # Q10 query I @@ -1615,15 +1612,15 @@ HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpch_test.nation (read rows: 25) +│ │ └── Scan: default.tpch_test.nation (#3) (read rows: 25) │ └── Probe │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpch_test.orders (read rows: 150000) +│ │ └── Scan: default.tpch_test.orders (#1) (read rows: 150000) │ └── Probe -│ └── Scan: default.tpch_test.customer (read rows: 15000) +│ └── Scan: default.tpch_test.customer (#0) (read rows: 15000) └── Probe - └── Scan: default.tpch_test.lineitem (read rows: 600572) + └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) # Q11 query I @@ -1661,21 +1658,21 @@ RangeJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpch_test.nation (read rows: 25) +│ │ │ └── Scan: default.tpch_test.nation (#5) (read rows: 25) │ │ └── Probe -│ │ └── Scan: default.tpch_test.supplier (read rows: 1000) +│ │ └── Scan: default.tpch_test.supplier (#4) (read rows: 1000) │ └── Probe -│ └── Scan: default.tpch_test.partsupp (read rows: 80000) +│ └── Scan: default.tpch_test.partsupp (#3) (read rows: 80000) └── Right └── HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build - │ │ └── Scan: default.tpch_test.nation (read rows: 25) + │ │ └── Scan: default.tpch_test.nation (#2) (read rows: 25) │ └── Probe - │ └── Scan: default.tpch_test.supplier (read rows: 1000) + │ └── Scan: default.tpch_test.supplier (#1) (read rows: 1000) └── Probe - └── Scan: default.tpch_test.partsupp (read rows: 80000) + └── Scan: default.tpch_test.partsupp (#0) (read rows: 80000) # Q12 query I @@ -1710,9 +1707,9 @@ order by ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpch_test.lineitem (read rows: 600572) +│ └── Scan: default.tpch_test.lineitem (#1) (read rows: 600572) └── Probe - └── Scan: default.tpch_test.orders (read rows: 150000) + └── Scan: default.tpch_test.orders (#0) (read rows: 150000) # Q13 query I @@ -1742,9 +1739,9 @@ order by ---- HashJoin: RIGHT OUTER ├── Build -│ └── Scan: default.tpch_test.customer (read rows: 15000) +│ └── Scan: default.tpch_test.customer (#0) (read rows: 15000) └── Probe - └── Scan: default.tpch_test.orders (read rows: 150000) + └── Scan: default.tpch_test.orders (#1) (read rows: 150000) # Q14 query I @@ -1764,9 +1761,9 @@ where ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpch_test.lineitem (read rows: 600572) +│ └── Scan: default.tpch_test.lineitem (#0) (read rows: 600572) └── Probe - └── Scan: default.tpch_test.part (read rows: 20000) + └── Scan: default.tpch_test.part (#1) (read rows: 20000) # Q15 query T @@ -1803,13 +1800,13 @@ order by ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpch_test.lineitem (read rows: 600572) +│ └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpch_test.lineitem (read rows: 600572) + │ └── Scan: default.tpch_test.lineitem (#1) (read rows: 600572) └── Probe - └── Scan: default.tpch_test.supplier (read rows: 1000) + └── Scan: default.tpch_test.supplier (#0) (read rows: 1000) # Q15 query T @@ -1846,7 +1843,7 @@ order by ---- MaterializedCte: 0 ├── Left -│ └── Scan: default.tpch_test.lineitem (read rows: 600572) +│ └── Scan: default.tpch_test.lineitem (#1) (read rows: 600572) └── Right └── HashJoin: INNER ├── Build @@ -1860,7 +1857,7 @@ MaterializedCte: 0 │ ├── CTE index: 0, sub index: 1 │ └── estimated rows: 81.00 └── Probe - └── Scan: default.tpch_test.supplier (read rows: 1000) + └── Scan: default.tpch_test.supplier (#0) (read rows: 1000) # Q16 query I @@ -1898,13 +1895,13 @@ limit 20; ---- HashJoin: RIGHT MARK ├── Build -│ └── Scan: default.tpch_test.supplier (read rows: 1000) +│ └── Scan: default.tpch_test.supplier (#2) (read rows: 1000) └── Probe └── HashJoin: INNER ├── Build - │ └── Scan: default.tpch_test.part (read rows: 20000) + │ └── Scan: default.tpch_test.part (#1) (read rows: 20000) └── Probe - └── Scan: default.tpch_test.partsupp (read rows: 80000) + └── Scan: default.tpch_test.partsupp (#0) (read rows: 80000) #Q17 query I @@ -1926,15 +1923,15 @@ where l_partkey = p_partkey ); ---- -HashJoin: RIGHT SINGLE +HashJoin: INNER ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpch_test.part (read rows: 20000) +│ │ └── Scan: default.tpch_test.part (#1) (read rows: 20000) │ └── Probe -│ └── Scan: default.tpch_test.lineitem (read rows: 600572) +│ └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) └── Probe - └── Scan: default.tpch_test.lineitem (read rows: 600572) + └── Scan: default.tpch_test.lineitem (#0) (read rows: 600572) #Q18 query I @@ -1977,13 +1974,13 @@ HashJoin: INNER │ ├── Build │ │ └── HashJoin: INNER │ │ ├── Build -│ │ │ └── Scan: default.tpch_test.lineitem (read rows: 600572) +│ │ │ └── Scan: default.tpch_test.lineitem (#3) (read rows: 600572) │ │ └── Probe -│ │ └── Scan: default.tpch_test.orders (read rows: 150000) +│ │ └── Scan: default.tpch_test.orders (#1) (read rows: 150000) │ └── Probe -│ └── Scan: default.tpch_test.customer (read rows: 15000) +│ └── Scan: default.tpch_test.customer (#0) (read rows: 15000) └── Probe - └── Scan: default.tpch_test.lineitem (read rows: 600572) + └── Scan: default.tpch_test.lineitem (#2) (read rows: 600572) # Q19 query I @@ -2059,9 +2056,9 @@ where ---- HashJoin: INNER ├── Build -│ └── Scan: default.tpch_test.lineitem (read rows: 600572) +│ └── Scan: default.tpch_test.lineitem (#0) (read rows: 600572) └── Probe - └── Scan: default.tpch_test.part (read rows: 20000) + └── Scan: default.tpch_test.part (#1) (read rows: 20000) # Q20 query I @@ -2107,19 +2104,19 @@ HashJoin: RIGHT SEMI ├── Build │ └── HashJoin: INNER │ ├── Build -│ │ └── Scan: default.tpch_test.nation (read rows: 25) +│ │ └── Scan: default.tpch_test.nation (#1) (read rows: 25) │ └── Probe -│ └── Scan: default.tpch_test.supplier (read rows: 1000) +│ └── Scan: default.tpch_test.supplier (#0) (read rows: 1000) └── Probe - └── HashJoin: RIGHT SINGLE + └── HashJoin: INNER ├── Build │ └── HashJoin: LEFT SEMI │ ├── Build - │ │ └── Scan: default.tpch_test.part (read rows: 20000) + │ │ └── Scan: default.tpch_test.part (#3) (read rows: 20000) │ └── Probe - │ └── Scan: default.tpch_test.partsupp (read rows: 80000) + │ └── Scan: default.tpch_test.partsupp (#2) (read rows: 80000) └── Probe - └── Scan: default.tpch_test.lineitem (read rows: 600572) + └── Scan: default.tpch_test.lineitem (#4) (read rows: 600572) # Q21 query I @@ -2173,17 +2170,17 @@ HashJoin: RIGHT ANTI │ │ │ ├── Build │ │ │ │ └── HashJoin: INNER │ │ │ │ ├── Build -│ │ │ │ │ └── Scan: default.tpch_test.nation (read rows: 25) +│ │ │ │ │ └── Scan: default.tpch_test.nation (#3) (read rows: 25) │ │ │ │ └── Probe -│ │ │ │ └── Scan: default.tpch_test.supplier (read rows: 1000) +│ │ │ │ └── Scan: default.tpch_test.supplier (#0) (read rows: 1000) │ │ │ └── Probe -│ │ │ └── Scan: default.tpch_test.lineitem (read rows: 600572) +│ │ │ └── Scan: default.tpch_test.lineitem (#1) (read rows: 600572) │ │ └── Probe -│ │ └── Scan: default.tpch_test.orders (read rows: 150000) +│ │ └── Scan: default.tpch_test.orders (#2) (read rows: 150000) │ └── Probe -│ └── Scan: default.tpch_test.lineitem (read rows: 600572) +│ └── Scan: default.tpch_test.lineitem (#4) (read rows: 600572) └── Probe - └── Scan: default.tpch_test.lineitem (read rows: 600572) + └── Scan: default.tpch_test.lineitem (#5) (read rows: 600572) # Q22 query I @@ -2229,12 +2226,8 @@ HashJoin: RIGHT ANTI ├── Build │ └── RangeJoin: INNER │ ├── Left -│ │ └── Scan: default.tpch_test.customer (read rows: 15000) +│ │ └── Scan: default.tpch_test.customer (#1) (read rows: 15000) │ └── Right -│ └── Scan: default.tpch_test.customer (read rows: 15000) +│ └── Scan: default.tpch_test.customer (#0) (read rows: 15000) └── Probe - └── Scan: default.tpch_test.orders (read rows: 150000) - - -statement ok -set enable_runtime_filter = 0; + └── Scan: default.tpch_test.orders (#2) (read rows: 150000) diff --git a/tests/suites/0_stateless/00_dummy/00_0002_dummy_select_py.py b/tests/suites/0_stateless/00_dummy/00_0002_dummy_select_py.py index 7c96bb83c511..e6057f5beb17 100755 --- a/tests/suites/0_stateless/00_dummy/00_0002_dummy_select_py.py +++ b/tests/suites/0_stateless/00_dummy/00_0002_dummy_select_py.py @@ -5,7 +5,7 @@ import signal import boto3 -from moto import mock_s3 +from moto import mock_aws CURDIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(CURDIR, "../../../helpers")) @@ -40,7 +40,7 @@ def save(self): s3.put_object(Bucket="mybucket", Key=self.name, Body=self.value) -@mock_s3 +@mock_aws def test_my_model_save(): conn = boto3.resource("s3", region_name="us-east-1") # We need to create the bucket since this is all in Moto's 'virtual' AWS account