From e6cf9d1599d12129fe0b5752dabe4dc661ba5940 Mon Sep 17 00:00:00 2001 From: TCeason <33082201+TCeason@users.noreply.github.com> Date: Mon, 13 Jan 2025 15:19:15 +0800 Subject: [PATCH 01/33] refactor: only set global settings need to check super privilege (#17255) query setting, session setting, set variable will not check privileges --- .../interpreters/access/privilege_access.rs | 16 ++++++++++++++- .../18_rbac/18_0007_privilege_access.result | 6 ++++++ .../18_rbac/18_0007_privilege_access.sh | 20 +++++++++++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/src/query/service/src/interpreters/access/privilege_access.rs b/src/query/service/src/interpreters/access/privilege_access.rs index 4c5df38fb8068..6c15c2369f58c 100644 --- a/src/query/service/src/interpreters/access/privilege_access.rs +++ b/src/query/service/src/interpreters/access/privilege_access.rs @@ -1151,7 +1151,21 @@ impl AccessChecker for PrivilegeAccess { self.validate_access(&GrantObject::Global, UserPrivilegeType::Grant,false, false) .await?; } - Plan::Set(_) | Plan::Unset(_) | Plan::Kill(_) | Plan::SetPriority(_) | Plan::System(_) => { + Plan::Set(plan) => { + use databend_common_ast::ast::SetType; + if let SetType::SettingsGlobal = plan.set_type { + self.validate_access(&GrantObject::Global, UserPrivilegeType::Super, false, false) + .await?; + } + } + Plan::Unset(plan) => { + use databend_common_ast::ast::SetType; + if let SetType::SettingsGlobal = plan.unset_type { + self.validate_access(&GrantObject::Global, UserPrivilegeType::Super, false, false) + .await?; + } + } + Plan::Kill(_) | Plan::SetPriority(_) | Plan::System(_) => { self.validate_access(&GrantObject::Global, UserPrivilegeType::Super, false, false) .await?; } diff --git a/tests/suites/0_stateless/18_rbac/18_0007_privilege_access.result b/tests/suites/0_stateless/18_rbac/18_0007_privilege_access.result index fd9a369f184f9..1bb4bd20095b0 100644 --- a/tests/suites/0_stateless/18_rbac/18_0007_privilege_access.result +++ b/tests/suites/0_stateless/18_rbac/18_0007_privilege_access.result @@ -142,3 +142,9 @@ OWNERSHIP default.default.t2 USER b GRANT OWNERSHIP ON 'default'.'default'.'t2' 1 2 3 +=== set privilege check === +100 +100 +1 +1 +=== set privilege check succ === diff --git a/tests/suites/0_stateless/18_rbac/18_0007_privilege_access.sh b/tests/suites/0_stateless/18_rbac/18_0007_privilege_access.sh index ebbc772282056..0c663d9db0df7 100755 --- a/tests/suites/0_stateless/18_rbac/18_0007_privilege_access.sh +++ b/tests/suites/0_stateless/18_rbac/18_0007_privilege_access.sh @@ -302,3 +302,23 @@ echo "drop table if exists t1" | $BENDSQL_CLIENT_CONNECT echo "drop table if exists t2" | $BENDSQL_CLIENT_CONNECT echo "drop stage if exists s3;" | $BENDSQL_CLIENT_CONNECT echo "drop database if exists db01" | $BENDSQL_CLIENT_CONNECT + +echo "=== set privilege check ===" +echo "drop user if exists c" | $BENDSQL_CLIENT_CONNECT +echo "create user c identified by '123'" | $BENDSQL_CLIENT_CONNECT +export USER_C_CONNECT="bendsql --user=c --password=123 --host=${QUERY_MYSQL_HANDLER_HOST} --port ${QUERY_HTTP_HANDLER_PORT}" +echo "set session max_threads=1000" | $BENDSQL_CLIENT_CONNECT +echo "unset session max_threads" | $BENDSQL_CLIENT_CONNECT +echo "settings (ddl_column_type_nullable=0) select 100" | $BENDSQL_CLIENT_CONNECT +echo "SET variable a = 'a';" | $BENDSQL_CLIENT_CONNECT +echo "set global max_threads=1000" | $BENDSQL_CLIENT_CONNECT +echo "unset global max_threads" | $BENDSQL_CLIENT_CONNECT + +echo "set session max_threads=1000" | $USER_C_CONNECT +echo "unset session max_threads" | $USER_C_CONNECT +echo "settings (ddl_column_type_nullable=0) select 100" | $USER_C_CONNECT +echo "SET variable a = 'a';" | $USER_C_CONNECT +echo "set global max_threads=1000;" | $USER_C_CONNECT 2>&1 | grep "Super" | wc -l +echo "unset global max_threads;" | $USER_C_CONNECT 2>&1 | grep "Super" | wc -l +echo "drop user if exists c" | $BENDSQL_CLIENT_CONNECT +echo "=== set privilege check succ ===" From 61e962e531c51166b1f3dcd426e612e01a224183 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Mon, 13 Jan 2025 20:17:22 +0800 Subject: [PATCH 02/33] fix(query): hash table scatter will always send agg meta (#17245) * fix(query): hash table scatter will always send agg meta * fix(query): hash table scatter will always send agg meta * fix(query): z * fix(query): z * update * update * update * z * z * z * z * z * z * z --- src/query/expression/src/aggregate/payload.rs | 11 +++--- .../expression/src/aggregate/payload_flush.rs | 4 +- .../aggregator/aggregate_exchange_injector.rs | 26 +++++-------- .../new_transform_partition_bucket.rs | 37 ++++++++++++------- .../transforms/aggregator/serde/serde_meta.rs | 22 +++++------ .../serde/transform_aggregate_serializer.rs | 32 ++++++++++++++-- .../serde/transform_deserializer.rs | 32 ++++++++++------ ...transform_exchange_aggregate_serializer.rs | 21 +++++------ .../aggregator/transform_aggregate_final.rs | 1 + .../transforms/range_join/ie_join_state.rs | 4 ++ .../processors/transforms/transform_srf.rs | 3 ++ 11 files changed, 115 insertions(+), 78 deletions(-) diff --git a/src/query/expression/src/aggregate/payload.rs b/src/query/expression/src/aggregate/payload.rs index 562b2a899f9bf..f08e61280f5cc 100644 --- a/src/query/expression/src/aggregate/payload.rs +++ b/src/query/expression/src/aggregate/payload.rs @@ -402,15 +402,14 @@ impl Payload { true } - pub fn empty_block(&self) -> DataBlock { - let columns = self - .aggrs - .iter() - .map(|f| ColumnBuilder::with_capacity(&f.return_type().unwrap(), 0).build()) + pub fn empty_block(&self, fake_rows: Option) -> DataBlock { + let fake_rows = fake_rows.unwrap_or(0); + let columns = (0..self.aggrs.len()) + .map(|_| ColumnBuilder::repeat_default(&DataType::Binary, fake_rows).build()) .chain( self.group_types .iter() - .map(|t| ColumnBuilder::with_capacity(t, 0).build()), + .map(|t| ColumnBuilder::repeat_default(t, fake_rows).build()), ) .collect_vec(); DataBlock::new_from_columns(columns) diff --git a/src/query/expression/src/aggregate/payload_flush.rs b/src/query/expression/src/aggregate/payload_flush.rs index 632ec78f69990..e4b4735394c17 100644 --- a/src/query/expression/src/aggregate/payload_flush.rs +++ b/src/query/expression/src/aggregate/payload_flush.rs @@ -121,7 +121,7 @@ impl Payload { } if blocks.is_empty() { - return Ok(self.empty_block()); + return Ok(self.empty_block(None)); } DataBlock::concat(&blocks) } @@ -173,7 +173,7 @@ impl Payload { } if blocks.is_empty() { - return Ok(self.empty_block()); + return Ok(self.empty_block(None)); } DataBlock::concat(&blocks) diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_exchange_injector.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_exchange_injector.rs index 482920b53571e..dacfcc0826b08 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_exchange_injector.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_exchange_injector.rs @@ -173,26 +173,20 @@ impl FlightScatter for HashTableHashScatter { AggregateMeta::Partitioned { .. } => unreachable!(), AggregateMeta::AggregateSpilling(payload) => { for p in scatter_partitioned_payload(payload, self.buckets)? { - blocks.push(match p.len() == 0 { - true => DataBlock::empty(), - false => DataBlock::empty_with_meta( - AggregateMeta::create_agg_spilling(p), - ), - }); + blocks.push(DataBlock::empty_with_meta( + AggregateMeta::create_agg_spilling(p), + )); } } AggregateMeta::AggregatePayload(p) => { for payload in scatter_payload(p.payload, self.buckets)? { - blocks.push(match payload.len() == 0 { - true => DataBlock::empty(), - false => { - DataBlock::empty_with_meta(AggregateMeta::create_agg_payload( - p.bucket, - payload, - p.max_partition_count, - )) - } - }); + blocks.push(DataBlock::empty_with_meta( + AggregateMeta::create_agg_payload( + p.bucket, + payload, + p.max_partition_count, + ), + )); } } }; diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/new_transform_partition_bucket.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/new_transform_partition_bucket.rs index 0e35ebaed73b2..39db4c52ca273 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/new_transform_partition_bucket.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/new_transform_partition_bucket.rs @@ -212,6 +212,7 @@ impl NewTransformPartitionBucket { #[allow(unused_assignments)] fn add_bucket(&mut self, mut data_block: DataBlock) -> Result<(isize, usize)> { let (mut bucket, mut partition_count) = (0, 0); + let mut is_empty_block = false; if let Some(block_meta) = data_block.get_meta() { if let Some(block_meta) = AggregateMeta::downcast_ref_from(block_meta) { (bucket, partition_count) = match block_meta { @@ -250,7 +251,11 @@ impl NewTransformPartitionBucket { if let Some(AggregateMeta::Spilled(buckets_payload)) = AggregateMeta::downcast_from(meta) { - let partition_count = buckets_payload[0].max_partition_count; + let partition_count = if !buckets_payload.is_empty() { + buckets_payload[0].max_partition_count + } else { + MAX_PARTITION_COUNT + }; self.max_partition_count = self.max_partition_count.max(partition_count); @@ -274,12 +279,14 @@ impl NewTransformPartitionBucket { unreachable!() } AggregateMeta::Serialized(payload) => { + is_empty_block = payload.data_block.is_empty(); self.max_partition_count = self.max_partition_count.max(payload.max_partition_count); (payload.bucket, payload.max_partition_count) } AggregateMeta::AggregatePayload(payload) => { + is_empty_block = payload.payload.len() == 0; self.max_partition_count = self.max_partition_count.max(payload.max_partition_count); @@ -298,23 +305,25 @@ impl NewTransformPartitionBucket { )); } - if self.all_inputs_init { - if partition_count != self.max_partition_count { - return Err(ErrorCode::Internal( + if !is_empty_block { + if self.all_inputs_init { + if partition_count != self.max_partition_count { + return Err(ErrorCode::Internal( "Internal, the partition count does not equal the max partition count on TransformPartitionBucket. ", )); - } - match self.buckets_blocks.entry(bucket) { - Entry::Vacant(v) => { - v.insert(vec![data_block]); - } - Entry::Occupied(mut v) => { - v.get_mut().push(data_block); } - }; - } else { - self.unpartitioned_blocks.push(data_block); + match self.buckets_blocks.entry(bucket) { + Entry::Vacant(v) => { + v.insert(vec![data_block]); + } + Entry::Occupied(mut v) => { + v.get_mut().push(data_block); + } + }; + } else { + self.unpartitioned_blocks.push(data_block); + } } Ok((bucket, partition_count)) diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/serde_meta.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/serde_meta.rs index 731ec4e1b1049..b83cf2c97c90e 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/serde_meta.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/serde_meta.rs @@ -31,21 +31,15 @@ pub struct AggregateSerdeMeta { pub columns_layout: Vec, // use for new agg hashtable pub max_partition_count: usize, + pub is_empty: bool, } impl AggregateSerdeMeta { - pub fn create(bucket: isize) -> BlockMetaInfoPtr { - Box::new(AggregateSerdeMeta { - typ: BUCKET_TYPE, - bucket, - location: None, - data_range: None, - columns_layout: vec![], - max_partition_count: 0, - }) - } - - pub fn create_agg_payload(bucket: isize, max_partition_count: usize) -> BlockMetaInfoPtr { + pub fn create_agg_payload( + bucket: isize, + max_partition_count: usize, + is_empty: bool, + ) -> BlockMetaInfoPtr { Box::new(AggregateSerdeMeta { typ: BUCKET_TYPE, bucket, @@ -53,6 +47,7 @@ impl AggregateSerdeMeta { data_range: None, columns_layout: vec![], max_partition_count, + is_empty, }) } @@ -61,6 +56,7 @@ impl AggregateSerdeMeta { location: String, data_range: Range, columns_layout: Vec, + is_empty: bool, ) -> BlockMetaInfoPtr { Box::new(AggregateSerdeMeta { typ: SPILLED_TYPE, @@ -69,6 +65,7 @@ impl AggregateSerdeMeta { location: Some(location), data_range: Some(data_range), max_partition_count: 0, + is_empty, }) } @@ -86,6 +83,7 @@ impl AggregateSerdeMeta { location: Some(location), data_range: Some(data_range), max_partition_count, + is_empty: false, }) } } diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_aggregate_serializer.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_aggregate_serializer.rs index bd3bdb24d6339..096485fa98fcc 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_aggregate_serializer.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_aggregate_serializer.rs @@ -183,6 +183,7 @@ pub struct SerializeAggregateStream { pub payload: Pin>, flush_state: PayloadFlushState, end_iter: bool, + nums: usize, } unsafe impl Send for SerializeAggregateStream {} @@ -198,6 +199,7 @@ impl SerializeAggregateStream { flush_state: PayloadFlushState::default(), _params: params.clone(), end_iter: false, + nums: 0, } } } @@ -225,10 +227,32 @@ impl SerializeAggregateStream { } match block { - Some(block) => Ok(Some(block.add_meta(Some( - AggregateSerdeMeta::create_agg_payload(p.bucket, p.max_partition_count), - ))?)), - None => Ok(None), + Some(block) => { + self.nums += 1; + Ok(Some(block.add_meta(Some( + AggregateSerdeMeta::create_agg_payload( + p.bucket, + p.max_partition_count, + false, + ), + ))?)) + } + None => { + // always return at least one block + if self.nums == 0 { + self.nums += 1; + let block = p.payload.empty_block(Some(1)); + Ok(Some(block.add_meta(Some( + AggregateSerdeMeta::create_agg_payload( + p.bucket, + p.max_partition_count, + true, + ), + ))?)) + } else { + Ok(None) + } + } } } } diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_deserializer.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_deserializer.rs index c73799432d17d..ed82428bbc7d2 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_deserializer.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_deserializer.rs @@ -89,18 +89,26 @@ impl TransformDeserializer { } Some(meta) => { return match meta.typ == BUCKET_TYPE { - true => Ok(DataBlock::empty_with_meta( - AggregateMeta::create_serialized( - meta.bucket, - deserialize_block( - dict, - fragment_data, - &self.schema, - self.arrow_schema.clone(), - )?, - meta.max_partition_count, - ), - )), + true => { + let mut block = deserialize_block( + dict, + fragment_data, + &self.schema, + self.arrow_schema.clone(), + )?; + + if meta.is_empty { + block = block.slice(0..0); + } + + Ok(DataBlock::empty_with_meta( + AggregateMeta::create_serialized( + meta.bucket, + block, + meta.max_partition_count, + ), + )) + } false => { let data_schema = Arc::new(exchange_defines::spilled_schema()); let arrow_schema = Arc::new(exchange_defines::spilled_arrow_schema()); diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_aggregate_serializer.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_aggregate_serializer.rs index 65e9e5cdc513a..a034b86038acb 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_aggregate_serializer.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_aggregate_serializer.rs @@ -149,6 +149,8 @@ impl BlockMetaTransform for TransformExchangeAggregateSeria } Some(AggregateMeta::AggregatePayload(p)) => { + let (bucket, max_partition_count) = (p.bucket, p.max_partition_count); + if index == self.local_pos { serialized_blocks.push(FlightSerialized::DataBlock( block.add_meta(Some(Box::new(AggregateMeta::AggregatePayload(p))))?, @@ -156,24 +158,19 @@ impl BlockMetaTransform for TransformExchangeAggregateSeria continue; } - let bucket = compute_block_number(p.bucket, p.max_partition_count)?; + let block_number = compute_block_number(bucket, max_partition_count)?; let stream = SerializeAggregateStream::create( &self.params, SerializePayload::AggregatePayload(p), ); let mut stream_blocks = stream.into_iter().collect::>>()?; - - if stream_blocks.is_empty() { - serialized_blocks.push(FlightSerialized::DataBlock(DataBlock::empty())); - } else { - let mut c = DataBlock::concat(&stream_blocks)?; - if let Some(meta) = stream_blocks[0].take_meta() { - c.replace_meta(meta); - } - - let c = serialize_block(bucket, c, &self.options)?; - serialized_blocks.push(FlightSerialized::DataBlock(c)); + debug_assert!(!stream_blocks.is_empty()); + let mut c = DataBlock::concat(&stream_blocks)?; + if let Some(meta) = stream_blocks[0].take_meta() { + c.replace_meta(meta); } + let c = serialize_block(block_number, c, &self.options)?; + serialized_blocks.push(FlightSerialized::DataBlock(c)); } }; } diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_final.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_final.rs index 2a719a23c23d8..beded12043100 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_final.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_final.rs @@ -59,6 +59,7 @@ impl TransformFinalAggregate { AggregateMeta::Serialized(payload) => match agg_hashtable.as_mut() { Some(ht) => { debug_assert!(bucket == payload.bucket); + let payload = payload.convert_to_partitioned_payload( self.params.group_data_types.clone(), self.params.aggregate_functions.clone(), diff --git a/src/query/service/src/pipelines/processors/transforms/range_join/ie_join_state.rs b/src/query/service/src/pipelines/processors/transforms/range_join/ie_join_state.rs index 5c44b2be9cbf6..24991a001a112 100644 --- a/src/query/service/src/pipelines/processors/transforms/range_join/ie_join_state.rs +++ b/src/query/service/src/pipelines/processors/transforms/range_join/ie_join_state.rs @@ -125,6 +125,10 @@ impl IEJoinState { fn intersection(&self, left_block: &DataBlock, right_block: &DataBlock) -> bool { let left_len = left_block.num_rows(); let right_len = right_block.num_rows(); + if left_len == 0 || right_len == 0 { + return false; + } + let left_l1_column = left_block.columns()[0] .value .convert_to_full_column(&self.l1_data_type, left_len); diff --git a/src/query/service/src/pipelines/processors/transforms/transform_srf.rs b/src/query/service/src/pipelines/processors/transforms/transform_srf.rs index 63e6b94b3bab5..ab71d492ed2cf 100644 --- a/src/query/service/src/pipelines/processors/transforms/transform_srf.rs +++ b/src/query/service/src/pipelines/processors/transforms/transform_srf.rs @@ -113,6 +113,9 @@ impl BlockingTransform for TransformSRF { } let input = self.input.take().unwrap(); + if input.is_empty() { + return Ok(None); + } let mut result_size = 0; let mut used = 0; From 15c2435597f9013e46ed7533d8b18d8d63488d51 Mon Sep 17 00:00:00 2001 From: baishen Date: Tue, 14 Jan 2025 08:28:45 +0800 Subject: [PATCH 03/33] chore(sqllogictests): Check container and stop first to avoid conflict running (#17261) * chore(sqllogictests): Stop container to avoid conflict running * add sleep * check timeout --- tests/sqllogictests/src/util.rs | 38 +++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/tests/sqllogictests/src/util.rs b/tests/sqllogictests/src/util.rs index c985d6151f4a5..949ea98055d71 100644 --- a/tests/sqllogictests/src/util.rs +++ b/tests/sqllogictests/src/util.rs @@ -19,18 +19,21 @@ use std::path::PathBuf; use std::time::Duration; use std::time::Instant; +use bollard::container::RemoveContainerOptions; use bollard::Docker; use clap::Parser; use redis::Commands; use serde::Deserialize; use serde::Serialize; use serde_json::Value; +use testcontainers::core::error::WaitContainerError; use testcontainers::core::IntoContainerPort; use testcontainers::core::WaitFor; use testcontainers::runners::AsyncRunner; use testcontainers::ContainerAsync; use testcontainers::GenericImage; use testcontainers::ImageExt; +use testcontainers::TestcontainersError; use testcontainers_modules::mysql::Mysql; use testcontainers_modules::redis::Redis; use testcontainers_modules::redis::REDIS_PORT; @@ -256,9 +259,7 @@ pub async fn run_ttc_container( let start = Instant::now(); println!("Start container {container_name}"); - // Stop the container - let _ = docker.stop_container(&container_name, None).await; - let _ = docker.remove_container(&container_name, None).await; + stop_container(docker, &container_name).await; let mut i = 1; loop { @@ -290,6 +291,10 @@ pub async fn run_ttc_container( "Start container {} using {} secs failed: {}", container_name, duration, err ); + if let TestcontainersError::WaitContainer(WaitContainerError::StartupTimeout) = err + { + stop_container(docker, &container_name).await; + } if i == CONTAINER_RETRY_TIMES || duration >= CONTAINER_TIMEOUT_SECONDS { break; } else { @@ -327,9 +332,7 @@ async fn run_redis_server(docker: &Docker) -> Result> { let container_name = "redis".to_string(); println!("Start container {container_name}"); - // Stop the container - let _ = docker.stop_container(&container_name, None).await; - let _ = docker.remove_container(&container_name, None).await; + stop_container(docker, &container_name).await; let mut i = 1; loop { @@ -365,6 +368,10 @@ async fn run_redis_server(docker: &Docker) -> Result> { "Start container {} using {} secs failed: {}", container_name, duration, err ); + if let TestcontainersError::WaitContainer(WaitContainerError::StartupTimeout) = err + { + stop_container(docker, &container_name).await; + } if i == CONTAINER_RETRY_TIMES || duration >= CONTAINER_TIMEOUT_SECONDS { break; } else { @@ -381,9 +388,7 @@ async fn run_mysql_server(docker: &Docker) -> Result> { let container_name = "mysql".to_string(); println!("Start container {container_name}"); - // Stop the container - let _ = docker.stop_container(&container_name, None).await; - let _ = docker.remove_container(&container_name, None).await; + stop_container(docker, &container_name).await; // Add a table for test. // CREATE TABLE test.user( @@ -436,6 +441,10 @@ async fn run_mysql_server(docker: &Docker) -> Result> { "Start container {} using {} secs failed: {}", container_name, duration, err ); + if let TestcontainersError::WaitContainer(WaitContainerError::StartupTimeout) = err + { + stop_container(docker, &container_name).await; + } if i == CONTAINER_RETRY_TIMES || duration >= CONTAINER_TIMEOUT_SECONDS { break; } else { @@ -446,3 +455,14 @@ async fn run_mysql_server(docker: &Docker) -> Result> { } Err(format!("Start {container_name} failed").into()) } + +// Stop the running container to avoid conflict +async fn stop_container(docker: &Docker, container_name: &str) { + let _ = docker.stop_container(container_name, None).await; + let options = Some(RemoveContainerOptions { + force: true, + ..Default::default() + }); + let _ = docker.remove_container(container_name, options).await; + println!("Stop container {container_name}"); +} From 009a5ebac5829b51e8499f441f67438fa4e9f656 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 14 Jan 2025 10:32:00 +0800 Subject: [PATCH 04/33] chore: Bump opendal to pinned tag version (#17259) Signed-off-by: Xuanwo --- Cargo.lock | 44 +++++++++++++++++++++++--------------------- Cargo.toml | 4 ++-- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 10b186b7710c4..9ea8284d9192e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2931,7 +2931,7 @@ dependencies = [ "databend-storages-common-table-meta", "limits-rs", "log", - "opendal 0.51.0", + "opendal 0.51.1", "serde", "serde_json", "serfig", @@ -3209,7 +3209,7 @@ dependencies = [ "libc", "object", "once_cell", - "opendal 0.51.0", + "opendal 0.51.1", "parquet", "paste", "prost", @@ -3559,7 +3559,7 @@ dependencies = [ "maplit", "num-derive", "num-traits", - "opendal 0.51.0", + "opendal 0.51.1", "paste", "prost", "serde", @@ -3797,7 +3797,7 @@ dependencies = [ "lz4", "match-template", "num", - "opendal 0.51.0", + "opendal 0.51.1", "rand", "ringbuffer", "roaring", @@ -4008,7 +4008,7 @@ dependencies = [ "log", "num-derive", "num-traits", - "opendal 0.51.0", + "opendal 0.51.1", "parking_lot 0.12.3", "prqlc", "rand", @@ -4044,7 +4044,7 @@ dependencies = [ "futures", "http 1.1.0", "log", - "opendal 0.51.0", + "opendal 0.51.1", "parquet", "prometheus-client", "regex", @@ -4153,7 +4153,7 @@ dependencies = [ "itertools 0.13.0", "jsonb", "log", - "opendal 0.51.0", + "opendal 0.51.1", "parking_lot 0.12.3", "parquet", "rand", @@ -4201,7 +4201,7 @@ dependencies = [ "futures", "hive_metastore", "log", - "opendal 0.51.0", + "opendal 0.51.1", "parquet", "recursive", "serde", @@ -4311,7 +4311,7 @@ dependencies = [ "databend-storages-common-table-meta", "futures-util", "log", - "opendal 0.51.0", + "opendal 0.51.1", "orc-rust", "serde", "serde_json", @@ -4347,7 +4347,7 @@ dependencies = [ "ethnum", "futures", "log", - "opendal 0.51.0", + "opendal 0.51.1", "parquet", "rand", "serde", @@ -4392,7 +4392,7 @@ dependencies = [ "databend-common-storages-parquet", "databend-storages-common-blocks", "databend-storages-common-table-meta", - "opendal 0.51.0", + "opendal 0.51.1", "parquet", "serde", "serde_json", @@ -4433,7 +4433,7 @@ dependencies = [ "enum-as-inner", "futures", "log", - "opendal 0.51.0", + "opendal 0.51.1", "parquet", "serde", "serde_json", @@ -4498,7 +4498,7 @@ dependencies = [ "jsonb", "log", "once_cell", - "opendal 0.51.0", + "opendal 0.51.1", "parking_lot 0.12.3", "regex", "serde", @@ -4735,7 +4735,7 @@ dependencies = [ "jsonb", "jwt-simple", "log", - "opendal 0.51.0", + "opendal 0.51.1", "tantivy", "tempfile", ] @@ -5113,7 +5113,7 @@ dependencies = [ "mysql_async", "naive-cityhash", "num_cpus", - "opendal 0.51.0", + "opendal 0.51.1", "opensrv-mysql", "opentelemetry", "opentelemetry_sdk", @@ -5297,7 +5297,7 @@ dependencies = [ "fastrace", "futures", "log", - "opendal 0.51.0", + "opendal 0.51.1", ] [[package]] @@ -10226,8 +10226,9 @@ dependencies = [ [[package]] name = "object_store_opendal" -version = "0.48.3" -source = "git+https://github.com/apache/opendal?rev=f7f9990#f7f9990f95146400c3aef8afc11804e4a6e3afe3" +version = "0.49.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d47750d91b6d9fa245b3dff15d582dff8462d826cf6c2d5f2e91035d9a317f9" dependencies = [ "async-trait", "bytes", @@ -10235,7 +10236,7 @@ dependencies = [ "futures", "futures-util", "object_store", - "opendal 0.51.0", + "opendal 0.51.1", "pin-project", "tokio", ] @@ -10307,8 +10308,9 @@ dependencies = [ [[package]] name = "opendal" -version = "0.51.0" -source = "git+https://github.com/apache/opendal?rev=f7f9990#f7f9990f95146400c3aef8afc11804e4a6e3afe3" +version = "0.51.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c9dcfa7a3615e3c60eb662ed6b46b6f244cf2658098f593c0c0915430b3a268" dependencies = [ "anyhow", "async-backtrace", diff --git a/Cargo.toml b/Cargo.toml index 1259a7be28ef4..07383209a6470 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -369,10 +369,10 @@ num-derive = "0.3.3" num-traits = "0.2.19" num_cpus = "1.13.1" object = "0.36.5" -object_store_opendal = { git = "https://github.com/apache/opendal", package = "object_store_opendal", rev = "f7f9990" } +object_store_opendal = { version = "0.49.0" } once_cell = "1.15.0" openai_api_rust = "0.1" -opendal = { version = "0.51", git = "https://github.com/apache/opendal", rev = "f7f9990", features = [ +opendal = { version = "0.51.1", features = [ "layers-fastrace", "layers-prometheus-client", "layers-async-backtrace", From 5b372c1193c18c1e653463cf65007d16f5625134 Mon Sep 17 00:00:00 2001 From: Damon07 <1490818749@qq.com> Date: Tue, 14 Jan 2025 11:02:20 +0800 Subject: [PATCH 05/33] fix(query): fix lazy columns missed in constant table scan (#17258) * fix(query): fix lazy columns missed in constant table scan Signed-off-by: damon * clear lazy columns when rewriting false filter to be empty scan Signed-off-by: damon --------- Signed-off-by: damon --- src/query/sql/src/planner/metadata.rs | 4 ++++ .../optimizer/rule/rewrite/rule_eliminate_filter.rs | 6 ++++++ tests/sqllogictests/suites/query/issues/issue_17158.test | 8 ++++++++ 3 files changed, 18 insertions(+) create mode 100644 tests/sqllogictests/suites/query/issues/issue_17158.test diff --git a/src/query/sql/src/planner/metadata.rs b/src/query/sql/src/planner/metadata.rs index bd8507d0604c3..d3a84062b70cf 100644 --- a/src/query/sql/src/planner/metadata.rs +++ b/src/query/sql/src/planner/metadata.rs @@ -170,6 +170,10 @@ impl Metadata { self.lazy_columns.extend(indices); } + pub fn clear_lazy_columns(&mut self) { + self.lazy_columns.clear(); + } + pub fn add_non_lazy_columns(&mut self, indices: HashSet) { debug_assert!(indices.iter().all(|i| *i < self.columns.len())); self.non_lazy_columns.extend(indices); diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_filter.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_filter.rs index 211fb3e01e2a5..8400b5dc6512e 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_filter.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_filter.rs @@ -77,6 +77,12 @@ impl Rule for RuleEliminateFilter { .derive_relational_prop(&RelExpr::with_s_expr(s_expr))? .output_columns .clone(); + + { + let mut metadata = self.metadata.write(); + metadata.clear_lazy_columns(); + } + let metadata = self.metadata.read(); let mut fields = Vec::with_capacity(output_columns.len()); diff --git a/tests/sqllogictests/suites/query/issues/issue_17158.test b/tests/sqllogictests/suites/query/issues/issue_17158.test new file mode 100644 index 0000000000000..cee1165ae2947 --- /dev/null +++ b/tests/sqllogictests/suites/query/issues/issue_17158.test @@ -0,0 +1,8 @@ +statement ok +create table tt2 (c0 bool, c1 int); + +statement ok +insert into tt2 values(true, 1),(false, 2),(true, 3); + +statement ok +select null, c0, 30, c1 from tt2 where false order by c0 LIMIT 3 OFFSET 0; \ No newline at end of file From 82611b17883857bc1b600bc37ee1b8ed67a176e2 Mon Sep 17 00:00:00 2001 From: Sky Fan <3374614481@qq.com> Date: Tue, 14 Jan 2025 11:03:21 +0800 Subject: [PATCH 06/33] fix: vacuum index not work if index is dropped by create or replace (#17263) * fix: vacuum index not work if index is dropped by create or replace * fix test * remove Any * add comment --- src/meta/api/src/name_id_value_api.rs | 12 ++- src/meta/api/src/schema_api_impl.rs | 85 +++++++++++++------ .../management/src/procedure/procedure_mgr.rs | 19 +++-- ...1_004_vacuum_drop_aggregating_index.result | 14 +++ .../01_004_vacuum_drop_aggregating_index.sh | 34 ++++++++ 5 files changed, 130 insertions(+), 34 deletions(-) diff --git a/src/meta/api/src/name_id_value_api.rs b/src/meta/api/src/name_id_value_api.rs index 46117cb9cf290..2d51a7dc194ff 100644 --- a/src/meta/api/src/name_id_value_api.rs +++ b/src/meta/api/src/name_id_value_api.rs @@ -71,17 +71,22 @@ where /// Such operations do not have any condition constraints. /// For example, a `name -> id` mapping can have a reverse `id -> name` mapping. /// + /// `mark_delete_records` is used to generate additional key-values for implementing `mark_delete` operation. + /// For example, when an index is dropped by `override_exist`, `__fd_marked_deleted_index// -> marked_deleted_index_meta` will be added. + /// /// If there is already a `name_ident` exists, return the existing id in a `Ok(Err(exist))`. /// Otherwise, create `name -> id -> value` and returns the created id in a `Ok(Ok(created))`. - async fn create_id_value( + async fn create_id_value( &self, name_ident: &K, value: &IdRsc::ValueType, override_exist: bool, associated_records: A, + mark_delete_records: M, ) -> Result, SeqV>>, MetaTxnError> where A: Fn(DataId) -> Vec<(String, Vec)> + Send, + M: Fn(DataId, &IdRsc::ValueType) -> Result)>, MetaError> + Send, { debug!(name_ident :? =name_ident; "NameIdValueApi: {}", func_name!()); @@ -114,6 +119,11 @@ where for (k, _v) in kvs { txn.if_then.push(TxnOp::delete(k)); } + + let kvs = mark_delete_records(seq_id.data, &seq_meta.data)?; + for (k, v) in kvs { + txn.if_then.push(TxnOp::put(k, v)); + } } else { return Ok(Err(seq_id)); } diff --git a/src/meta/api/src/schema_api_impl.rs b/src/meta/api/src/schema_api_impl.rs index 532d4a642af3f..4a1506d410875 100644 --- a/src/meta/api/src/schema_api_impl.rs +++ b/src/meta/api/src/schema_api_impl.rs @@ -753,12 +753,21 @@ impl + ?Sized> SchemaApi for KV { let name_ident_raw = serialize_struct(&IndexNameIdentRaw::from(name_ident))?; let create_res = self - .create_id_value(name_ident, meta, overriding, |id| { - vec![( - IndexIdToNameIdent::new_generic(name_ident.tenant(), id).to_string_key(), - name_ident_raw.clone(), - )] - }) + .create_id_value( + name_ident, + meta, + overriding, + |id| { + vec![( + IndexIdToNameIdent::new_generic(name_ident.tenant(), id).to_string_key(), + name_ident_raw.clone(), + )] + }, + |index_id, value| { + mark_index_as_deleted(name_ident.tenant(), value.table_id, *index_id) + .map(|(k, v)| vec![(k, v)]) + }, + ) .await?; match create_res { @@ -803,20 +812,9 @@ impl + ?Sized> SchemaApi for KV { IndexIdToNameIdent::new_generic(name_ident.tenant(), seq_id.data).to_string_key(), )); - // add __fd_marked_deleted_index// -> marked_deleted_index_meta - let marked_deleted_index_id_ident = MarkedDeletedIndexIdIdent::new_generic( - name_ident.tenant(), - MarkedDeletedIndexId::new(seq_meta.data.table_id, *seq_id.data), - ); - let marked_deleted_index_meta = MarkedDeletedIndexMeta { - dropped_on: Utc::now(), - index_type: MarkedDeletedIndexType::AGGREGATING, - }; - - txn.if_then.push(TxnOp::put( - marked_deleted_index_id_ident.to_string_key(), - serialize_struct(&marked_deleted_index_meta)?, - )); + let (key, value) = + mark_index_as_deleted(name_ident.tenant(), seq_meta.data.table_id, *seq_id.data)?; + txn.if_then.push(TxnOp::put(key, value)); let (succ, _responses) = send_txn(self, txn).await?; debug!(key :? =name_ident, id :? =&id_ident,succ = succ; "{}", func_name!()); @@ -2901,12 +2899,18 @@ impl + ?Sized> SchemaApi for KV { let name_ident_raw = serialize_struct(&CatalogNameIdentRaw::from(name_ident))?; let res = self - .create_id_value(name_ident, meta, false, |id| { - vec![( - CatalogIdToNameIdent::new_generic(name_ident.tenant(), id).to_string_key(), - name_ident_raw.clone(), - )] - }) + .create_id_value( + name_ident, + meta, + false, + |id| { + vec![( + CatalogIdToNameIdent::new_generic(name_ident.tenant(), id).to_string_key(), + name_ident_raw.clone(), + )] + }, + |_, _| Ok(vec![]), + ) .await?; Ok(res) @@ -3020,7 +3024,13 @@ impl + ?Sized> SchemaApi for KV { let name_ident = &req.dictionary_ident; let create_res = self - .create_id_value(name_ident, &req.dictionary_meta, false, |_| vec![]) + .create_id_value( + name_ident, + &req.dictionary_meta, + false, + |_| vec![], + |_, _| Ok(vec![]), + ) .await?; match create_res { @@ -4177,3 +4187,24 @@ fn typ() -> &'static str { .next() .unwrap_or("UnknownType") } + +/// add __fd_marked_deleted_index// -> marked_deleted_index_meta +pub fn mark_index_as_deleted( + tenant: &Tenant, + table_id: u64, + index_id: u64, +) -> Result<(String, Vec), MetaError> { + let marked_deleted_index_id_ident = MarkedDeletedIndexIdIdent::new_generic( + tenant, + MarkedDeletedIndexId::new(table_id, index_id), + ); + let marked_deleted_index_meta = MarkedDeletedIndexMeta { + dropped_on: Utc::now(), + index_type: MarkedDeletedIndexType::AGGREGATING, + }; + + Ok(( + marked_deleted_index_id_ident.to_string_key(), + serialize_struct(&marked_deleted_index_meta)?, + )) +} diff --git a/src/query/management/src/procedure/procedure_mgr.rs b/src/query/management/src/procedure/procedure_mgr.rs index 21a88e301522c..be765d04b3477 100644 --- a/src/query/management/src/procedure/procedure_mgr.rs +++ b/src/query/management/src/procedure/procedure_mgr.rs @@ -66,12 +66,19 @@ impl ProcedureMgr { let create_res = self .kv_api - .create_id_value(name_ident, meta, overriding, |id| { - vec![( - ProcedureIdToNameIdent::new_generic(name_ident.tenant(), id).to_string_key(), - name_ident_raw.clone(), - )] - }) + .create_id_value( + name_ident, + meta, + overriding, + |id| { + vec![( + ProcedureIdToNameIdent::new_generic(name_ident.tenant(), id) + .to_string_key(), + name_ident_raw.clone(), + )] + }, + |_, _| Ok(vec![]), + ) .await?; match create_res { diff --git a/tests/suites/5_ee/01_vacuum/01_004_vacuum_drop_aggregating_index.result b/tests/suites/5_ee/01_vacuum/01_004_vacuum_drop_aggregating_index.result index 2c156409652a2..c2dbef8e4e291 100644 --- a/tests/suites/5_ee/01_vacuum/01_004_vacuum_drop_aggregating_index.result +++ b/tests/suites/5_ee/01_vacuum/01_004_vacuum_drop_aggregating_index.result @@ -37,3 +37,17 @@ before vacuum, should be 1 index dir after vacuum, should be 0 index dir 0 0 +>>>> create or replace database test_vacuum_drop_aggregating_index +>>>> create or replace table test_vacuum_drop_aggregating_index.agg(a int, b int,c int) 'fs:///tmp/test_vacuum_drop_aggregating_index/' +>>>> insert into test_vacuum_drop_aggregating_index.agg values (1,1,4), (1,2,1), (1,2,4) +3 +>>>> CREATE OR REPLACE AGGREGATING INDEX index AS SELECT MIN(a), MAX(b) FROM test_vacuum_drop_aggregating_index.agg; +>>>> insert into test_vacuum_drop_aggregating_index.agg values (2,2,5) +1 +>>>> REFRESH AGGREGATING INDEX index; +before vacuum, should be 1 index dir +1 +>>>> create or replace aggregating index index AS SELECT MIN(a), MAX(b) FROM test_vacuum_drop_aggregating_index.agg; +after vacuum, should be 0 index dir +0 +>>>> drop aggregating index index diff --git a/tests/suites/5_ee/01_vacuum/01_004_vacuum_drop_aggregating_index.sh b/tests/suites/5_ee/01_vacuum/01_004_vacuum_drop_aggregating_index.sh index b6b92e6ce8f50..b957280cf7ad1 100755 --- a/tests/suites/5_ee/01_vacuum/01_004_vacuum_drop_aggregating_index.sh +++ b/tests/suites/5_ee/01_vacuum/01_004_vacuum_drop_aggregating_index.sh @@ -86,3 +86,37 @@ echo "after vacuum, should be 0 index dir" find /tmp/test_vacuum_drop_aggregating_index/"$PREFIX_1"/_i_a/ -type f | wc -l find /tmp/test_vacuum_drop_aggregating_index/"$PREFIX_2"/_i_a/ -type f | wc -l + + +### create or replace index + +stmt "create or replace database test_vacuum_drop_aggregating_index" + +mkdir -p /tmp/test_vacuum_drop_aggregating_index/ + +stmt "create or replace table test_vacuum_drop_aggregating_index.agg(a int, b int,c int) 'fs:///tmp/test_vacuum_drop_aggregating_index/'" + + +stmt "insert into test_vacuum_drop_aggregating_index.agg values (1,1,4), (1,2,1), (1,2,4)" + +stmt "CREATE OR REPLACE AGGREGATING INDEX index AS SELECT MIN(a), MAX(b) FROM test_vacuum_drop_aggregating_index.agg;" + +stmt "insert into test_vacuum_drop_aggregating_index.agg values (2,2,5)" + +stmt "REFRESH AGGREGATING INDEX index;" + +SNAPSHOT_LOCATION=$(echo "select snapshot_location from fuse_snapshot('test_vacuum_drop_aggregating_index','agg') limit 1" | $BENDSQL_CLIENT_CONNECT) +PREFIX=$(echo "$SNAPSHOT_LOCATION" | cut -d'/' -f1-2) + +echo "before vacuum, should be 1 index dir" + +ls /tmp/test_vacuum_drop_aggregating_index/"$PREFIX"/_i_a/ | wc -l + +stmt "create or replace aggregating index index AS SELECT MIN(a), MAX(b) FROM test_vacuum_drop_aggregating_index.agg;" + +stmt "set data_retention_time_in_days=0; select * from fuse_vacuum_drop_aggregating_index('test_vacuum_drop_aggregating_index','agg')" > /dev/null + +echo "after vacuum, should be 0 index dir" +find /tmp/test_vacuum_drop_aggregating_index/"$PREFIX"/_i_a/ -type f | wc -l + +stmt "drop aggregating index index" From 420f4b4d56cb8e85c6de919e118ac48432c3a676 Mon Sep 17 00:00:00 2001 From: Winter Zhang Date: Tue, 14 Jan 2025 13:52:20 +0800 Subject: [PATCH 07/33] feat(cluster): support warehouse level show processlist and kill query (#17249) * feat(cluster): support proxy to warehouse * feat(cluster): support proxy to warehouse * feat(cluster): support proxy to warehouse * feat(cluster): support proxy to warehouse * feat(cluster): support proxy to warehouse * feat(cluster): support proxy to warehouse * feat(cluster): support proxy to warehouse * feat(cluster): support proxy to warehouse * feat(cluster): support proxy to warehouse --- src/query/catalog/src/plan/partition.rs | 7 ++- src/query/catalog/src/table.rs | 13 +++- src/query/catalog/src/table_context.rs | 2 + src/query/catalog/tests/it/partitions.rs | 2 +- .../management/src/warehouse/warehouse_api.rs | 2 + .../management/src/warehouse/warehouse_mgr.rs | 59 ++++++++++++++++-- src/query/service/src/clusters/cluster.rs | 47 ++++++++++---- .../src/interpreters/interpreter_kill.rs | 23 +++---- .../interpreters/interpreter_set_priority.rs | 22 +++---- .../interpreters/interpreter_system_action.rs | 19 +++--- .../interpreter_table_truncate.rs | 18 +++--- src/query/service/src/schedulers/scheduler.rs | 4 ++ .../src/servers/admin/v1/tenant_tables.rs | 3 +- src/query/service/src/sessions/query_ctx.rs | 11 +++- .../service/src/sessions/query_ctx_shared.rs | 33 +++++++++- .../src/table_functions/async_crash_me.rs | 4 -- .../table_functions/cloud/task_dependents.rs | 4 -- .../cloud/task_dependents_enable.rs | 4 -- .../src/table_functions/cloud/task_history.rs | 4 -- .../table_functions/numbers/numbers_table.rs | 8 ++- .../others/execute_background_job.rs | 4 -- .../table_functions/others/license_info.rs | 4 -- .../others/suggested_background_tasks.rs | 4 -- .../table_functions/others/tenant_quota.rs | 4 -- .../service/src/table_functions/others/udf.rs | 4 -- .../src/table_functions/sync_crash_me.rs | 4 -- .../tests/it/sql/exec/get_table_bind_test.rs | 8 +++ .../it/storages/fuse/operations/commit.rs | 8 +++ src/query/sql/src/executor/physical_plan.rs | 7 +++ .../sql/src/planner/optimizer/optimizer.rs | 15 +++++ src/query/sql/src/planner/optimizer/util.rs | 33 +++++++++- src/query/storages/delta/src/table.rs | 5 +- src/query/storages/fuse/src/fuse_table.rs | 5 +- .../function_template/simple_arg_func.rs | 10 +-- .../function_template/simple_func_template.rs | 27 +++++--- .../src/table_functions/set_cache_capacity.rs | 6 +- .../storages/hive/hive/src/hive_table.rs | 5 +- src/query/storages/iceberg/src/table.rs | 5 +- src/query/storages/memory/src/memory_table.rs | 7 ++- src/query/storages/null/src/null_table.rs | 5 +- src/query/storages/orc/src/table.rs | 5 +- .../src/parquet_rs/parquet_table/table.rs | 5 +- .../result_cache/src/table_function/table.rs | 4 -- src/query/storages/stage/src/stage_table.rs | 5 +- src/query/storages/stream/src/stream_table.rs | 5 +- .../storages/system/src/backtrace_table.rs | 3 +- src/query/storages/system/src/caches_table.rs | 3 +- src/query/storages/system/src/log_queue.rs | 11 ++-- .../system/src/malloc_stats_totals_table.rs | 3 +- .../storages/system/src/metrics_table.rs | 3 +- .../storages/system/src/processes_table.rs | 3 +- .../storages/system/src/queries_profiling.rs | 3 +- src/query/storages/system/src/table.rs | 61 +++++++++++++------ .../storages/system/src/temp_files_table.rs | 6 +- 54 files changed, 392 insertions(+), 187 deletions(-) diff --git a/src/query/catalog/src/plan/partition.rs b/src/query/catalog/src/plan/partition.rs index 026f80d4c32f2..b2ba71f4e5898 100644 --- a/src/query/catalog/src/plan/partition.rs +++ b/src/query/catalog/src/plan/partition.rs @@ -102,7 +102,9 @@ pub enum PartitionsShuffleKind { // Bind the Partition to executor by partition.rand() order. Rand, // Bind the Partition to executor by broadcast - Broadcast, + BroadcastCluster, + // Bind the Partition to warehouse executor by broadcast + BroadcastWarehouse, } #[derive(serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq)] @@ -190,7 +192,8 @@ impl Partitions { parts.shuffle(&mut rng); parts } - PartitionsShuffleKind::Broadcast => { + // the executors will be all nodes in the warehouse if a query is BroadcastWarehouse. + PartitionsShuffleKind::BroadcastCluster | PartitionsShuffleKind::BroadcastWarehouse => { return Ok(executors_sorted .into_iter() .map(|executor| { diff --git a/src/query/catalog/src/table.rs b/src/query/catalog/src/table.rs index 829a4c9e79800..7d6bdfa471d63 100644 --- a/src/query/catalog/src/table.rs +++ b/src/query/catalog/src/table.rs @@ -94,8 +94,8 @@ pub trait Table: Sync + Send { self.get_table_info().ident.table_id } - fn is_local(&self) -> bool { - true + fn distribution_level(&self) -> DistributionLevel { + DistributionLevel::Local } fn as_any(&self) -> &dyn Any; @@ -450,7 +450,7 @@ pub trait Table: Sync + Send { false } - fn broadcast_truncate_to_cluster(&self) -> bool { + fn broadcast_truncate_to_warehouse(&self) -> bool { false } @@ -678,3 +678,10 @@ pub struct ColumnRange { pub min: Bound, pub max: Bound, } + +#[derive(Debug)] +pub enum DistributionLevel { + Local, + Cluster, + Warehouse, +} diff --git a/src/query/catalog/src/table_context.rs b/src/query/catalog/src/table_context.rs index 1e957eb2ea68c..824d03fec4945 100644 --- a/src/query/catalog/src/table_context.rs +++ b/src/query/catalog/src/table_context.rs @@ -234,6 +234,8 @@ pub trait TableContext: Send + Sync { fn get_settings(&self) -> Arc; fn get_session_settings(&self) -> Arc; fn get_cluster(&self) -> Arc; + fn set_cluster(&self, cluster: Arc); + async fn get_warehouse_cluster(&self) -> Result>; fn get_processes_info(&self) -> Vec; fn get_queued_queries(&self) -> Vec; fn get_queries_profile(&self) -> HashMap>; diff --git a/src/query/catalog/tests/it/partitions.rs b/src/query/catalog/tests/it/partitions.rs index 2199c93858f94..e9cbe441abd48 100644 --- a/src/query/catalog/tests/it/partitions.rs +++ b/src/query/catalog/tests/it/partitions.rs @@ -239,7 +239,7 @@ fn test_partition_reshuffle() { // Broadcast. { - let partitions = gen_parts(PartitionsShuffleKind::Broadcast, 3); + let partitions = gen_parts(PartitionsShuffleKind::BroadcastCluster, 3); let shuffle = partitions.reshuffle(executors_2.clone()).unwrap(); writeln!( diff --git a/src/query/management/src/warehouse/warehouse_api.rs b/src/query/management/src/warehouse/warehouse_api.rs index ed4f92c4610c5..dbeecd9bd69bf 100644 --- a/src/query/management/src/warehouse/warehouse_api.rs +++ b/src/query/management/src/warehouse/warehouse_api.rs @@ -132,5 +132,7 @@ pub trait WarehouseApi: Sync + Send { async fn discover(&self, node_id: &str) -> Result>; + async fn discover_warehouse_nodes(&self, node_id: &str) -> Result>; + async fn get_node_info(&self, node_id: &str) -> Result>; } diff --git a/src/query/management/src/warehouse/warehouse_mgr.rs b/src/query/management/src/warehouse/warehouse_mgr.rs index b0b563d209094..c0aa324f2a0d0 100644 --- a/src/query/management/src/warehouse/warehouse_mgr.rs +++ b/src/query/management/src/warehouse/warehouse_mgr.rs @@ -1485,13 +1485,34 @@ impl WarehouseApi for WarehouseMgr { return Err(ErrorCode::InvalidWarehouse("Warehouse name is empty.")); } - let warehouse_snapshot = self.warehouse_snapshot(&warehouse).await?; + let nodes_prefix = format!( + "{}/{}/", + self.cluster_node_key_prefix, + escape_for_key(&warehouse)? + ); - Ok(warehouse_snapshot - .snapshot_nodes - .into_iter() - .map(|x| x.node_info) - .collect()) + let values = self.metastore.prefix_list_kv(&nodes_prefix).await?; + + let mut nodes_info = Vec::with_capacity(values.len()); + for (node_key, value) in values { + let mut node_info = serde_json::from_slice::(&value.data)?; + + let suffix = &node_key[nodes_prefix.len()..]; + + let Some((cluster, node)) = suffix.split_once('/') else { + return Err(ErrorCode::InvalidWarehouse(format!( + "Node key is invalid {:?}", + node_key + ))); + }; + + node_info.id = unescape_for_key(node)?; + node_info.cluster_id = unescape_for_key(cluster)?; + node_info.warehouse_id = warehouse.to_string(); + nodes_info.push(node_info); + } + + Ok(nodes_info) } async fn add_warehouse_cluster( @@ -2155,6 +2176,32 @@ impl WarehouseApi for WarehouseMgr { .collect::>()) } + async fn discover_warehouse_nodes(&self, node_id: &str) -> Result> { + let node_key = format!("{}/{}", self.node_key_prefix, escape_for_key(node_id)?); + + let Some(seq) = self.metastore.get_kv(&node_key).await? else { + return Err(ErrorCode::NotFoundClusterNode(format!( + "Node {} is offline, Please restart this node.", + node_id + ))); + }; + + let node = serde_json::from_slice::(&seq.data)?; + + if !node.assigned_warehouse() { + return Ok(vec![node]); + } + + let expect_version = DATABEND_COMMIT_VERSION.to_string(); + + Ok(self + .list_warehouse_nodes(node.warehouse_id.clone()) + .await? + .into_iter() + .filter(|x| x.binary_version == expect_version) + .collect::>()) + } + async fn get_node_info(&self, node_id: &str) -> Result> { let node_key = format!("{}/{}", self.node_key_prefix, escape_for_key(node_id)?); match self.metastore.get_kv(&node_key).await? { diff --git a/src/query/service/src/clusters/cluster.rs b/src/query/service/src/clusters/cluster.rs index 5ba2fa91125c6..3239aa0484fc9 100644 --- a/src/query/service/src/clusters/cluster.rs +++ b/src/query/service/src/clusters/cluster.rs @@ -273,17 +273,11 @@ impl ClusterDiscovery { Ok((lift_time, Arc::new(cluster_manager))) } - #[async_backtrace::framed] - pub async fn discover(&self, config: &InnerConfig) -> Result> { - let nodes = match config.query.cluster_id.is_empty() { - true => self.warehouse_manager.discover(&config.query.node_id).await, - false => { - self.warehouse_manager - .list_warehouse_cluster_nodes(&self.cluster_id, &self.cluster_id) - .await - } - }; - + async fn create_cluster_with_try_connect( + &self, + config: &InnerConfig, + nodes: Result>, + ) -> Result> { match nodes { Err(cause) => { metric_incr_cluster_error_count( @@ -336,6 +330,37 @@ impl ClusterDiscovery { } } + pub async fn discover_warehouse_nodes(&self, config: &InnerConfig) -> Result> { + let nodes = match config.query.cluster_id.is_empty() { + true => { + self.warehouse_manager + .discover_warehouse_nodes(&config.query.node_id) + .await + } + false => { + self.warehouse_manager + .list_warehouse_nodes(self.cluster_id.clone()) + .await + } + }; + + self.create_cluster_with_try_connect(config, nodes).await + } + + #[async_backtrace::framed] + pub async fn discover(&self, config: &InnerConfig) -> Result> { + let nodes = match config.query.cluster_id.is_empty() { + true => self.warehouse_manager.discover(&config.query.node_id).await, + false => { + self.warehouse_manager + .list_warehouse_cluster_nodes(&self.cluster_id, &self.cluster_id) + .await + } + }; + + self.create_cluster_with_try_connect(config, nodes).await + } + pub async fn find_node_by_warehouse( self: Arc, warehouse: &str, diff --git a/src/query/service/src/interpreters/interpreter_kill.rs b/src/query/service/src/interpreters/interpreter_kill.rs index 72309f0c2ecf8..e3bd3dcf2ba1e 100644 --- a/src/query/service/src/interpreters/interpreter_kill.rs +++ b/src/query/service/src/interpreters/interpreter_kill.rs @@ -31,7 +31,7 @@ use crate::sessions::QueryContext; pub struct KillInterpreter { ctx: Arc, plan: KillPlan, - proxy_to_cluster: bool, + proxy_to_warehouse: bool, } impl KillInterpreter { @@ -39,7 +39,7 @@ impl KillInterpreter { Ok(KillInterpreter { ctx, plan, - proxy_to_cluster: true, + proxy_to_warehouse: true, }) } @@ -47,29 +47,30 @@ impl KillInterpreter { Ok(KillInterpreter { ctx, plan, - proxy_to_cluster: false, + proxy_to_warehouse: false, }) } #[async_backtrace::framed] - async fn kill_cluster_query(&self) -> Result { - let cluster = self.ctx.get_cluster(); + async fn kill_warehouse_query(&self) -> Result { let settings = self.ctx.get_settings(); + let warehouse = self.ctx.get_warehouse_cluster().await?; + let flight_params = FlightParams { timeout: settings.get_flight_client_timeout()?, retry_times: settings.get_flight_max_retry_times()?, retry_interval: settings.get_flight_retry_interval()?, }; - let mut message = HashMap::with_capacity(cluster.nodes.len()); + let mut message = HashMap::with_capacity(warehouse.nodes.len()); - for node_info in &cluster.nodes { - if node_info.id != cluster.local_id { + for node_info in &warehouse.nodes { + if node_info.id != warehouse.local_id { message.insert(node_info.id.clone(), self.plan.clone()); } } - let res = cluster + let res = warehouse .do_action::<_, bool>(KILL_QUERY, message, flight_params) .await?; @@ -85,8 +86,8 @@ impl KillInterpreter { #[async_backtrace::framed] async fn execute_kill(&self, session_id: &String) -> Result { match self.ctx.get_session_by_id(session_id) { - None => match self.proxy_to_cluster { - true => self.kill_cluster_query().await, + None => match self.proxy_to_warehouse { + true => self.kill_warehouse_query().await, false => Err(ErrorCode::UnknownSession(format!( "Not found session id {}", session_id diff --git a/src/query/service/src/interpreters/interpreter_set_priority.rs b/src/query/service/src/interpreters/interpreter_set_priority.rs index 1bf830b24be01..eabff93f3d5fc 100644 --- a/src/query/service/src/interpreters/interpreter_set_priority.rs +++ b/src/query/service/src/interpreters/interpreter_set_priority.rs @@ -30,7 +30,7 @@ use crate::sessions::QueryContext; pub struct SetPriorityInterpreter { ctx: Arc, plan: SetPriorityPlan, - proxy_to_cluster: bool, + proxy_to_warehouse: bool, } impl SetPriorityInterpreter { @@ -38,7 +38,7 @@ impl SetPriorityInterpreter { Ok(SetPriorityInterpreter { ctx, plan, - proxy_to_cluster: true, + proxy_to_warehouse: true, }) } @@ -46,17 +46,17 @@ impl SetPriorityInterpreter { Ok(SetPriorityInterpreter { ctx, plan, - proxy_to_cluster: false, + proxy_to_warehouse: false, }) } #[async_backtrace::framed] - async fn set_cluster_priority(&self) -> Result { - let cluster = self.ctx.get_cluster(); + async fn set_warehouse_priority(&self) -> Result { + let warehouse = self.ctx.get_warehouse_cluster().await?; - let mut message = HashMap::with_capacity(cluster.nodes.len()); - for node_info in &cluster.nodes { - if node_info.id != cluster.local_id { + let mut message = HashMap::with_capacity(warehouse.nodes.len()); + for node_info in &warehouse.nodes { + if node_info.id != warehouse.local_id { message.insert(node_info.id.clone(), self.plan.clone()); } } @@ -67,7 +67,7 @@ impl SetPriorityInterpreter { retry_times: settings.get_flight_max_retry_times()?, retry_interval: settings.get_flight_retry_interval()?, }; - let res = cluster + let res = warehouse .do_action::<_, bool>(SET_PRIORITY, message, flight_params) .await?; @@ -96,8 +96,8 @@ impl Interpreter for SetPriorityInterpreter { async fn execute2(&self) -> Result { let id = &self.plan.id; match self.ctx.get_session_by_id(id) { - None => match self.proxy_to_cluster { - true => self.set_cluster_priority().await, + None => match self.proxy_to_warehouse { + true => self.set_warehouse_priority().await, false => Err(ErrorCode::UnknownSession(format!( "Not found session id {}", id diff --git a/src/query/service/src/interpreters/interpreter_system_action.rs b/src/query/service/src/interpreters/interpreter_system_action.rs index ca7e3ffb50c7d..e0b2b192a3a70 100644 --- a/src/query/service/src/interpreters/interpreter_system_action.rs +++ b/src/query/service/src/interpreters/interpreter_system_action.rs @@ -31,7 +31,7 @@ use crate::sessions::QueryContext; pub struct SystemActionInterpreter { ctx: Arc, plan: SystemPlan, - proxy_to_cluster: bool, + proxy_to_warehouse: bool, } impl SystemActionInterpreter { @@ -39,7 +39,7 @@ impl SystemActionInterpreter { Ok(SystemActionInterpreter { ctx, plan, - proxy_to_cluster: true, + proxy_to_warehouse: true, }) } @@ -47,7 +47,7 @@ impl SystemActionInterpreter { Ok(SystemActionInterpreter { ctx, plan, - proxy_to_cluster: false, + proxy_to_warehouse: false, }) } } @@ -65,11 +65,12 @@ impl Interpreter for SystemActionInterpreter { #[async_backtrace::framed] #[fastrace::trace] async fn execute2(&self) -> Result { - if self.proxy_to_cluster { - let cluster = self.ctx.get_cluster(); - let mut message = HashMap::with_capacity(cluster.nodes.len()); - for node_info in &cluster.nodes { - if node_info.id != cluster.local_id { + if self.proxy_to_warehouse { + let warehouse = self.ctx.get_warehouse_cluster().await?; + + let mut message = HashMap::with_capacity(warehouse.nodes.len()); + for node_info in &warehouse.nodes { + if node_info.id != warehouse.local_id { message.insert(node_info.id.clone(), self.plan.clone()); } } @@ -80,7 +81,7 @@ impl Interpreter for SystemActionInterpreter { retry_times: settings.get_flight_max_retry_times()?, retry_interval: settings.get_flight_retry_interval()?, }; - cluster + warehouse .do_action::<_, ()>(SYSTEM_ACTION, message, flight_params) .await?; } diff --git a/src/query/service/src/interpreters/interpreter_table_truncate.rs b/src/query/service/src/interpreters/interpreter_table_truncate.rs index 70afa08e38660..1c704dcc3247e 100644 --- a/src/query/service/src/interpreters/interpreter_table_truncate.rs +++ b/src/query/service/src/interpreters/interpreter_table_truncate.rs @@ -32,7 +32,7 @@ pub struct TruncateTableInterpreter { ctx: Arc, plan: TruncateTablePlan, - proxy_to_cluster: bool, + proxy_to_warehouse: bool, } impl TruncateTableInterpreter { @@ -40,7 +40,7 @@ impl TruncateTableInterpreter { Ok(TruncateTableInterpreter { ctx, plan, - proxy_to_cluster: true, + proxy_to_warehouse: true, }) } @@ -48,7 +48,7 @@ impl TruncateTableInterpreter { Ok(TruncateTableInterpreter { ctx, plan, - proxy_to_cluster: false, + proxy_to_warehouse: false, }) } } @@ -85,12 +85,12 @@ impl Interpreter for TruncateTableInterpreter { // check mutability table.check_mutable()?; - if self.proxy_to_cluster && table.broadcast_truncate_to_cluster() { - let cluster = self.ctx.get_cluster(); + if self.proxy_to_warehouse && table.broadcast_truncate_to_warehouse() { + let warehouse = self.ctx.get_warehouse_cluster().await?; - let mut message = HashMap::with_capacity(cluster.nodes.len()); - for node_info in &cluster.nodes { - if node_info.id != cluster.local_id { + let mut message = HashMap::with_capacity(warehouse.nodes.len()); + for node_info in &warehouse.nodes { + if node_info.id != warehouse.local_id { message.insert(node_info.id.clone(), self.plan.clone()); } } @@ -101,7 +101,7 @@ impl Interpreter for TruncateTableInterpreter { retry_times: settings.get_flight_max_retry_times()?, retry_interval: settings.get_flight_retry_interval()?, }; - cluster + warehouse .do_action::<_, ()>(TRUNCATE_TABLE, message, flight_params) .await?; } diff --git a/src/query/service/src/schedulers/scheduler.rs b/src/query/service/src/schedulers/scheduler.rs index 726f380237aad..5bee2a3567800 100644 --- a/src/query/service/src/schedulers/scheduler.rs +++ b/src/query/service/src/schedulers/scheduler.rs @@ -65,6 +65,10 @@ pub async fn build_query_pipeline_without_render_result_set( let build_res = if !plan.is_distributed_plan() { build_local_pipeline(ctx, plan).await } else { + if plan.is_warehouse_distributed_plan() { + ctx.set_cluster(ctx.get_warehouse_cluster().await?); + } + build_distributed_pipeline(ctx, plan).await }?; Ok(build_res) diff --git a/src/query/service/src/servers/admin/v1/tenant_tables.rs b/src/query/service/src/servers/admin/v1/tenant_tables.rs index 098609e887057..7be7b059b713c 100644 --- a/src/query/service/src/servers/admin/v1/tenant_tables.rs +++ b/src/query/service/src/servers/admin/v1/tenant_tables.rs @@ -16,6 +16,7 @@ use chrono::DateTime; use chrono::Utc; use databend_common_ast::parser::Dialect; use databend_common_catalog::catalog::CatalogManager; +use databend_common_catalog::table::DistributionLevel; use databend_common_config::GlobalConfig; use databend_common_exception::Result; use databend_common_meta_app::tenant::Tenant; @@ -111,7 +112,7 @@ async fn load_tenant_tables(tenant: &Tenant) -> Result { engine: table.engine().to_string(), created_on: table.get_table_info().meta.created_on, updated_on: table.get_table_info().meta.updated_on, - is_local: table.is_local(), + is_local: matches!(table.distribution_level(), DistributionLevel::Local), is_external: table.get_table_info().meta.storage_params.is_some(), rows: stats.number_of_rows, data_bytes: stats.data_bytes, diff --git a/src/query/service/src/sessions/query_ctx.rs b/src/query/service/src/sessions/query_ctx.rs index 9c55fba8f5b37..b06d45b0f0841 100644 --- a/src/query/service/src/sessions/query_ctx.rs +++ b/src/query/service/src/sessions/query_ctx.rs @@ -976,6 +976,10 @@ impl TableContext for QueryContext { self.shared.get_cluster() } + fn set_cluster(&self, cluster: Arc) { + self.shared.set_cluster(cluster) + } + // Get all the processes list info. fn get_processes_info(&self) -> Vec { SessionManager::instance().processes_info() @@ -1140,10 +1144,9 @@ impl TableContext for QueryContext { if actual_batch_limit != max_batch_size { return Err(ErrorCode::StorageUnsupported( format!( - "Within the same transaction, the batch size for a stream must remain consistent {:?} {:?}", + "Within the same transaction, the batch size for a stream must remain consistent {:?} {:?}", actual_batch_limit, max_batch_size ) - )); } } else if max_batch_size.is_some() { @@ -1714,6 +1717,10 @@ impl TableContext for QueryContext { } Ok(streams_meta) } + + async fn get_warehouse_cluster(&self) -> Result> { + self.shared.get_warehouse_clusters().await + } } impl TrySpawn for QueryContext { diff --git a/src/query/service/src/sessions/query_ctx_shared.rs b/src/query/service/src/sessions/query_ctx_shared.rs index c31fe083316b2..af2324b508239 100644 --- a/src/query/service/src/sessions/query_ctx_shared.rs +++ b/src/query/service/src/sessions/query_ctx_shared.rs @@ -36,6 +36,7 @@ use databend_common_catalog::runtime_filter_info::RuntimeFilterReady; use databend_common_catalog::statistics::data_cache_statistics::DataCacheMetrics; use databend_common_catalog::table_context::ContextError; use databend_common_catalog::table_context::StageAttachment; +use databend_common_config::GlobalConfig; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_meta_app::principal::OnErrorMode; @@ -59,6 +60,7 @@ use parking_lot::RwLock; use uuid::Uuid; use crate::clusters::Cluster; +use crate::clusters::ClusterDiscovery; use crate::pipelines::executor::PipelineExecutor; use crate::sessions::query_affect::QueryAffect; use crate::sessions::Session; @@ -91,7 +93,8 @@ pub struct QueryContextShared { pub(in crate::sessions) session: Arc, pub(in crate::sessions) runtime: Arc>>>, pub(in crate::sessions) init_query_id: Arc>, - pub(in crate::sessions) cluster_cache: Arc, + pub(in crate::sessions) cluster_cache: Arc>>, + pub(in crate::sessions) warehouse_cache: Arc>>>, pub(in crate::sessions) running_query: Arc>>, pub(in crate::sessions) running_query_kind: Arc>>, pub(in crate::sessions) running_query_text_hash: Arc>>, @@ -158,7 +161,7 @@ impl QueryContextShared { query_settings: Settings::create(session.get_current_tenant()), catalog_manager: CatalogManager::instance(), session, - cluster_cache, + cluster_cache: Arc::new(RwLock::new(cluster_cache)), data_operator: DataOperator::instance(), init_query_id: Arc::new(RwLock::new(Uuid::new_v4().to_string())), total_scan_values: Arc::new(Progress::create()), @@ -206,6 +209,7 @@ impl QueryContextShared { cluster_spill_progress: Default::default(), spilled_files: Default::default(), + warehouse_cache: Arc::new(RwLock::new(None)), })) } @@ -263,8 +267,31 @@ impl QueryContextShared { // TODO: Wait for the query to be processed (write out the last error) } + pub fn set_cluster(&self, cluster: Arc) { + let mut cluster_cache = self.cluster_cache.write(); + *cluster_cache = cluster; + } + pub fn get_cluster(&self) -> Arc { - self.cluster_cache.clone() + self.cluster_cache.read().clone() + } + + pub async fn get_warehouse_clusters(&self) -> Result> { + if let Some(warehouse) = self.warehouse_cache.read().as_ref() { + return Ok(warehouse.clone()); + } + + let config = GlobalConfig::instance(); + let discovery = ClusterDiscovery::instance(); + let warehouse = discovery.discover_warehouse_nodes(&config).await?; + + let mut write_guard = self.warehouse_cache.write(); + + if write_guard.is_none() { + *write_guard = Some(warehouse.clone()); + } + + Ok(write_guard.as_ref().cloned().expect("expect cluster.")) } pub fn get_current_catalog(&self) -> String { diff --git a/src/query/service/src/table_functions/async_crash_me.rs b/src/query/service/src/table_functions/async_crash_me.rs index 5ad3d3f1e3dcc..14c54c37538c9 100644 --- a/src/query/service/src/table_functions/async_crash_me.rs +++ b/src/query/service/src/table_functions/async_crash_me.rs @@ -90,10 +90,6 @@ impl AsyncCrashMeTable { #[async_trait::async_trait] impl Table for AsyncCrashMeTable { - fn is_local(&self) -> bool { - true - } - fn as_any(&self) -> &dyn Any { self } diff --git a/src/query/service/src/table_functions/cloud/task_dependents.rs b/src/query/service/src/table_functions/cloud/task_dependents.rs index 997ffc91f4cdf..b01285495871c 100644 --- a/src/query/service/src/table_functions/cloud/task_dependents.rs +++ b/src/query/service/src/table_functions/cloud/task_dependents.rs @@ -110,10 +110,6 @@ impl TaskDependentsTable { #[async_trait::async_trait] impl Table for TaskDependentsTable { - fn is_local(&self) -> bool { - true - } - fn as_any(&self) -> &dyn Any { self } diff --git a/src/query/service/src/table_functions/cloud/task_dependents_enable.rs b/src/query/service/src/table_functions/cloud/task_dependents_enable.rs index 734006a5498e2..85413edb1f05b 100644 --- a/src/query/service/src/table_functions/cloud/task_dependents_enable.rs +++ b/src/query/service/src/table_functions/cloud/task_dependents_enable.rs @@ -83,10 +83,6 @@ impl TaskDependentsEnableTable { #[async_trait::async_trait] impl Table for TaskDependentsEnableTable { - fn is_local(&self) -> bool { - true - } - fn as_any(&self) -> &dyn Any { self } diff --git a/src/query/service/src/table_functions/cloud/task_history.rs b/src/query/service/src/table_functions/cloud/task_history.rs index a84d331d17baf..a2ce415103283 100644 --- a/src/query/service/src/table_functions/cloud/task_history.rs +++ b/src/query/service/src/table_functions/cloud/task_history.rs @@ -87,10 +87,6 @@ impl TaskHistoryTable { #[async_trait::async_trait] impl Table for TaskHistoryTable { - fn is_local(&self) -> bool { - true - } - fn as_any(&self) -> &dyn Any { self } diff --git a/src/query/service/src/table_functions/numbers/numbers_table.rs b/src/query/service/src/table_functions/numbers/numbers_table.rs index 09b017a961f73..50a57ef3cd745 100644 --- a/src/query/service/src/table_functions/numbers/numbers_table.rs +++ b/src/query/service/src/table_functions/numbers/numbers_table.rs @@ -22,6 +22,7 @@ use databend_common_catalog::plan::PartInfoPtr; use databend_common_catalog::plan::PartStatistics; use databend_common_catalog::plan::Partitions; use databend_common_catalog::plan::PushDownInfo; +use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::TableStatistics; use databend_common_catalog::table_args::TableArgs; use databend_common_exception::Result; @@ -111,8 +112,11 @@ impl NumbersTable { #[async_trait::async_trait] impl Table for NumbersTable { - fn is_local(&self) -> bool { - self.name() == "numbers_local" + fn distribution_level(&self) -> DistributionLevel { + match self.name() { + "numbers_local" => DistributionLevel::Local, + _ => DistributionLevel::Cluster, + } } fn as_any(&self) -> &dyn Any { diff --git a/src/query/service/src/table_functions/others/execute_background_job.rs b/src/query/service/src/table_functions/others/execute_background_job.rs index c1681625b76f5..7bfdaa2e42653 100644 --- a/src/query/service/src/table_functions/others/execute_background_job.rs +++ b/src/query/service/src/table_functions/others/execute_background_job.rs @@ -78,10 +78,6 @@ impl ExecuteBackgroundJobTable { #[async_trait::async_trait] impl Table for ExecuteBackgroundJobTable { - fn is_local(&self) -> bool { - true - } - fn as_any(&self) -> &dyn Any { self } diff --git a/src/query/service/src/table_functions/others/license_info.rs b/src/query/service/src/table_functions/others/license_info.rs index b4a986ad11ecc..6423a6d245888 100644 --- a/src/query/service/src/table_functions/others/license_info.rs +++ b/src/query/service/src/table_functions/others/license_info.rs @@ -99,10 +99,6 @@ impl LicenseInfoTable { #[async_trait::async_trait] impl Table for LicenseInfoTable { - fn is_local(&self) -> bool { - true - } - fn as_any(&self) -> &dyn Any { self } diff --git a/src/query/service/src/table_functions/others/suggested_background_tasks.rs b/src/query/service/src/table_functions/others/suggested_background_tasks.rs index 637b99b5fe591..039142a0cea5c 100644 --- a/src/query/service/src/table_functions/others/suggested_background_tasks.rs +++ b/src/query/service/src/table_functions/others/suggested_background_tasks.rs @@ -97,10 +97,6 @@ impl SuggestedBackgroundTasksTable { #[async_trait::async_trait] impl Table for SuggestedBackgroundTasksTable { - fn is_local(&self) -> bool { - true - } - fn as_any(&self) -> &dyn Any { self } diff --git a/src/query/service/src/table_functions/others/tenant_quota.rs b/src/query/service/src/table_functions/others/tenant_quota.rs index 9e79ebb5b7b60..99f33e2e5fa30 100644 --- a/src/query/service/src/table_functions/others/tenant_quota.rs +++ b/src/query/service/src/table_functions/others/tenant_quota.rs @@ -109,10 +109,6 @@ impl TenantQuotaTable { #[async_trait::async_trait] impl Table for TenantQuotaTable { - fn is_local(&self) -> bool { - true - } - fn as_any(&self) -> &dyn Any { self } diff --git a/src/query/service/src/table_functions/others/udf.rs b/src/query/service/src/table_functions/others/udf.rs index 96a8c15873bd6..622d7b83ab8d0 100644 --- a/src/query/service/src/table_functions/others/udf.rs +++ b/src/query/service/src/table_functions/others/udf.rs @@ -122,10 +122,6 @@ impl UdfEchoTable { #[async_trait::async_trait] impl Table for UdfEchoTable { - fn is_local(&self) -> bool { - true - } - fn as_any(&self) -> &dyn Any { self } diff --git a/src/query/service/src/table_functions/sync_crash_me.rs b/src/query/service/src/table_functions/sync_crash_me.rs index 5cd36268abcbb..ae9e97cee4750 100644 --- a/src/query/service/src/table_functions/sync_crash_me.rs +++ b/src/query/service/src/table_functions/sync_crash_me.rs @@ -90,10 +90,6 @@ impl SyncCrashMeTable { #[async_trait::async_trait] impl Table for SyncCrashMeTable { - fn is_local(&self) -> bool { - true - } - fn as_any(&self) -> &dyn Any { self } diff --git a/src/query/service/tests/it/sql/exec/get_table_bind_test.rs b/src/query/service/tests/it/sql/exec/get_table_bind_test.rs index 228f0753b4378..f2b9f92f9a4d5 100644 --- a/src/query/service/tests/it/sql/exec/get_table_bind_test.rs +++ b/src/query/service/tests/it/sql/exec/get_table_bind_test.rs @@ -1006,6 +1006,14 @@ impl TableContext for CtxDelegation { async fn drop_m_cte_temp_table(&self) -> Result<()> { todo!() } + + fn set_cluster(&self, _: Arc) { + todo!() + } + + async fn get_warehouse_cluster(&self) -> Result> { + todo!() + } } #[tokio::test(flavor = "multi_thread")] diff --git a/src/query/service/tests/it/storages/fuse/operations/commit.rs b/src/query/service/tests/it/storages/fuse/operations/commit.rs index 884615af0a3dd..6a713e3d8804e 100644 --- a/src/query/service/tests/it/storages/fuse/operations/commit.rs +++ b/src/query/service/tests/it/storages/fuse/operations/commit.rs @@ -881,6 +881,14 @@ impl TableContext for CtxDelegation { async fn drop_m_cte_temp_table(&self) -> Result<()> { todo!() } + + fn set_cluster(&self, _: Arc) { + todo!() + } + + async fn get_warehouse_cluster(&self) -> Result> { + todo!() + } } #[derive(Clone, Debug)] diff --git a/src/query/sql/src/executor/physical_plan.rs b/src/query/sql/src/executor/physical_plan.rs index 292438a290e06..bb7240f586abd 100644 --- a/src/query/sql/src/executor/physical_plan.rs +++ b/src/query/sql/src/executor/physical_plan.rs @@ -16,6 +16,7 @@ use std::collections::HashMap; use databend_common_catalog::plan::DataSourceInfo; use databend_common_catalog::plan::DataSourcePlan; +use databend_common_catalog::plan::PartitionsShuffleKind; use databend_common_exception::Result; use databend_common_expression::DataSchemaRef; use databend_common_functions::BUILTIN_FUNCTIONS; @@ -710,6 +711,12 @@ impl PhysicalPlan { ) } + pub fn is_warehouse_distributed_plan(&self) -> bool { + self.children() + .any(|child| child.is_warehouse_distributed_plan()) + || matches!(self, Self::TableScan(v) if v.source.parts.kind == PartitionsShuffleKind::BroadcastWarehouse) + } + pub fn get_desc(&self) -> Result { Ok(match self { PhysicalPlan::TableScan(v) => format!( diff --git a/src/query/sql/src/planner/optimizer/optimizer.rs b/src/query/sql/src/planner/optimizer/optimizer.rs index 9529adb43ef1c..03257f9738030 100644 --- a/src/query/sql/src/planner/optimizer/optimizer.rs +++ b/src/query/sql/src/planner/optimizer/optimizer.rs @@ -41,6 +41,7 @@ use crate::optimizer::join::SingleToInnerOptimizer; use crate::optimizer::rule::TransformResult; use crate::optimizer::statistics::CollectStatisticsOptimizer; use crate::optimizer::util::contains_local_table_scan; +use crate::optimizer::util::contains_warehouse_table_scan; use crate::optimizer::RuleFactory; use crate::optimizer::RuleID; use crate::optimizer::SExpr; @@ -374,6 +375,13 @@ pub async fn optimize_query(opt_ctx: &mut OptimizerContext, mut s_expr: SExpr) - if contains_local_table_scan(&s_expr, &opt_ctx.metadata) { opt_ctx.enable_distributed_optimization = false; info!("Disable distributed optimization due to local table scan."); + } else if contains_warehouse_table_scan(&s_expr, &opt_ctx.metadata) { + let warehouse = opt_ctx.table_ctx.get_warehouse_cluster().await?; + + if !warehouse.is_empty() { + opt_ctx.enable_distributed_optimization = true; + info!("Enable distributed optimization due to warehouse table scan."); + } } // Decorrelate subqueries, after this step, there should be no subquery in the expression. @@ -461,6 +469,13 @@ async fn get_optimized_memo(opt_ctx: &mut OptimizerContext, mut s_expr: SExpr) - if contains_local_table_scan(&s_expr, &opt_ctx.metadata) { opt_ctx.enable_distributed_optimization = false; info!("Disable distributed optimization due to local table scan."); + } else if contains_warehouse_table_scan(&s_expr, &opt_ctx.metadata) { + let warehouse = opt_ctx.table_ctx.get_warehouse_cluster().await?; + + if !warehouse.is_empty() { + opt_ctx.enable_distributed_optimization = true; + info!("Enable distributed optimization due to warehouse table scan."); + } } // Decorrelate subqueries, after this step, there should be no subquery in the expression. diff --git a/src/query/sql/src/planner/optimizer/util.rs b/src/query/sql/src/planner/optimizer/util.rs index 8120893a8cec2..1e8641dc6da4f 100644 --- a/src/query/sql/src/planner/optimizer/util.rs +++ b/src/query/sql/src/planner/optimizer/util.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +use databend_common_catalog::table::DistributionLevel; + use super::SExpr; use crate::plans::RelOperator; use crate::MetadataRef; @@ -22,9 +24,38 @@ pub fn contains_local_table_scan(s_expr: &SExpr, metadata: &MetadataRef) -> bool .children() .any(|s_expr| contains_local_table_scan(s_expr, metadata)) || if let RelOperator::Scan(get) = s_expr.plan() { - metadata.read().table(get.table_index).table().is_local() + matches!( + metadata + .read() + .table(get.table_index) + .table() + .distribution_level(), + DistributionLevel::Local + ) } else { false } || matches!(s_expr.plan(), RelOperator::RecursiveCteScan { .. }) } + +pub fn contains_warehouse_table_scan(s_expr: &SExpr, metadata: &MetadataRef) -> bool { + if s_expr + .children() + .any(|s_expr| contains_warehouse_table_scan(s_expr, metadata)) + { + return true; + } + + if let RelOperator::Scan(scan) = s_expr.plan() { + return matches!( + metadata + .read() + .table(scan.table_index) + .table() + .distribution_level(), + DistributionLevel::Warehouse, + ); + } + + false +} diff --git a/src/query/storages/delta/src/table.rs b/src/query/storages/delta/src/table.rs index 2bff4c9b76d22..64e0d36f67ced 100644 --- a/src/query/storages/delta/src/table.rs +++ b/src/query/storages/delta/src/table.rs @@ -26,6 +26,7 @@ use databend_common_catalog::plan::PartStatistics; use databend_common_catalog::plan::Partitions; use databend_common_catalog::plan::PartitionsShuffleKind; use databend_common_catalog::plan::PushDownInfo; +use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::Table; use databend_common_catalog::table_args::TableArgs; use databend_common_catalog::table_context::TableContext; @@ -367,8 +368,8 @@ impl Table for DeltaTable { self } - fn is_local(&self) -> bool { - false + fn distribution_level(&self) -> DistributionLevel { + DistributionLevel::Cluster } fn get_table_info(&self) -> &TableInfo { diff --git a/src/query/storages/fuse/src/fuse_table.rs b/src/query/storages/fuse/src/fuse_table.rs index ce04e7587e14d..bb252fde36204 100644 --- a/src/query/storages/fuse/src/fuse_table.rs +++ b/src/query/storages/fuse/src/fuse_table.rs @@ -39,6 +39,7 @@ use databend_common_catalog::table::Bound; use databend_common_catalog::table::ColumnRange; use databend_common_catalog::table::ColumnStatisticsProvider; use databend_common_catalog::table::CompactionLimits; +use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::NavigationDescriptor; use databend_common_catalog::table::TimeNavigation; use databend_common_catalog::table_context::AbortChecker; @@ -605,8 +606,8 @@ impl FuseTable { #[async_trait::async_trait] impl Table for FuseTable { - fn is_local(&self) -> bool { - false + fn distribution_level(&self) -> DistributionLevel { + DistributionLevel::Cluster } fn as_any(&self) -> &dyn Any { diff --git a/src/query/storages/fuse/src/table_functions/function_template/simple_arg_func.rs b/src/query/storages/fuse/src/table_functions/function_template/simple_arg_func.rs index 310b0f5e31dd2..3dd86628d731b 100644 --- a/src/query/storages/fuse/src/table_functions/function_template/simple_arg_func.rs +++ b/src/query/storages/fuse/src/table_functions/function_template/simple_arg_func.rs @@ -15,6 +15,7 @@ use std::sync::Arc; use databend_common_catalog::plan::DataSourcePlan; +use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table_args::TableArgs; use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; @@ -29,9 +30,10 @@ pub trait SimpleArgFunc { type Args; fn schema() -> TableSchemaRef; - fn is_local_func() -> bool { - true + fn distribution_level() -> DistributionLevel { + DistributionLevel::Local } + async fn apply( ctx: &Arc, args: &Self::Args, @@ -63,8 +65,8 @@ where T::schema() } - fn is_local_func(&self) -> bool { - T::is_local_func() + fn distribution_level(&self) -> DistributionLevel { + T::distribution_level() } async fn apply( diff --git a/src/query/storages/fuse/src/table_functions/function_template/simple_func_template.rs b/src/query/storages/fuse/src/table_functions/function_template/simple_func_template.rs index 3ce61497255bc..49985d3ba24ef 100644 --- a/src/query/storages/fuse/src/table_functions/function_template/simple_func_template.rs +++ b/src/query/storages/fuse/src/table_functions/function_template/simple_func_template.rs @@ -21,6 +21,7 @@ use databend_common_catalog::plan::PartStatistics; use databend_common_catalog::plan::Partitions; use databend_common_catalog::plan::PartitionsShuffleKind; use databend_common_catalog::plan::PushDownInfo; +use databend_common_catalog::table::DistributionLevel; use databend_common_exception::Result; use databend_common_expression::DataBlock; use databend_common_expression::TableSchemaRef; @@ -64,8 +65,8 @@ pub trait SimpleTableFunc: Send + Sync + 'static { "table_func_template".to_owned() } - fn is_local_func(&self) -> bool { - true + fn distribution_level(&self) -> DistributionLevel { + DistributionLevel::Local } fn table_args(&self) -> Option; @@ -129,8 +130,8 @@ impl Table for TableFunctionTemplate { &self.table_info } - fn is_local(&self) -> bool { - self.inner.is_local_func() + fn distribution_level(&self) -> DistributionLevel { + self.inner.distribution_level() } #[async_backtrace::framed] @@ -140,18 +141,24 @@ impl Table for TableFunctionTemplate { _push_downs: Option, _dry_run: bool, ) -> Result<(PartStatistics, Partitions)> { - match self.inner.is_local_func() { - true => Ok(( + match self.inner.distribution_level() { + DistributionLevel::Local => Ok(( PartStatistics::default(), Partitions::create(PartitionsShuffleKind::Seq, vec![Arc::new(Box::new( PlaceHolder, ))]), )), - false => Ok(( + DistributionLevel::Cluster => Ok(( PartStatistics::default(), - Partitions::create(PartitionsShuffleKind::Broadcast, vec![Arc::new(Box::new( - PlaceHolder, - ))]), + Partitions::create(PartitionsShuffleKind::BroadcastCluster, vec![Arc::new( + Box::new(PlaceHolder), + )]), + )), + DistributionLevel::Warehouse => Ok(( + PartStatistics::default(), + Partitions::create(PartitionsShuffleKind::BroadcastWarehouse, vec![Arc::new( + Box::new(PlaceHolder), + )]), )), } } diff --git a/src/query/storages/fuse/src/table_functions/set_cache_capacity.rs b/src/query/storages/fuse/src/table_functions/set_cache_capacity.rs index 05e704c4e0455..9c1cfc6bc9e13 100644 --- a/src/query/storages/fuse/src/table_functions/set_cache_capacity.rs +++ b/src/query/storages/fuse/src/table_functions/set_cache_capacity.rs @@ -15,6 +15,7 @@ use std::sync::Arc; use databend_common_catalog::plan::DataSourcePlan; +use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; use databend_common_expression::types::StringType; @@ -62,9 +63,8 @@ impl SimpleTableFunc for SetCacheCapacity { ]) } - fn is_local_func(&self) -> bool { - // cache operation needs to be broadcast to all nodes - false + fn distribution_level(&self) -> DistributionLevel { + DistributionLevel::Warehouse } async fn apply( diff --git a/src/query/storages/hive/hive/src/hive_table.rs b/src/query/storages/hive/hive/src/hive_table.rs index b2de36a1e7bc4..dc24507edd764 100644 --- a/src/query/storages/hive/hive/src/hive_table.rs +++ b/src/query/storages/hive/hive/src/hive_table.rs @@ -25,6 +25,7 @@ use databend_common_catalog::plan::PartStatistics; use databend_common_catalog::plan::Partitions; use databend_common_catalog::plan::PartitionsShuffleKind; use databend_common_catalog::plan::PushDownInfo; +use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::NavigationPoint; use databend_common_catalog::table::Table; use databend_common_catalog::table::TableStatistics; @@ -396,8 +397,8 @@ impl HiveTable { #[async_trait::async_trait] impl Table for HiveTable { - fn is_local(&self) -> bool { - false + fn distribution_level(&self) -> DistributionLevel { + DistributionLevel::Cluster } fn as_any(&self) -> &(dyn std::any::Any + 'static) { diff --git a/src/query/storages/iceberg/src/table.rs b/src/query/storages/iceberg/src/table.rs index b41ccaf7aaacd..47fbacb5b79de 100644 --- a/src/query/storages/iceberg/src/table.rs +++ b/src/query/storages/iceberg/src/table.rs @@ -28,6 +28,7 @@ use databend_common_catalog::plan::PartStatistics; use databend_common_catalog::plan::Partitions; use databend_common_catalog::plan::PartitionsShuffleKind; use databend_common_catalog::plan::PushDownInfo; +use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::Table; use databend_common_catalog::table::TableStatistics; use databend_common_catalog::table_args::TableArgs; @@ -292,8 +293,8 @@ impl Table for IcebergTable { self } - fn is_local(&self) -> bool { - false + fn distribution_level(&self) -> DistributionLevel { + DistributionLevel::Cluster } fn get_table_info(&self) -> &TableInfo { diff --git a/src/query/storages/memory/src/memory_table.rs b/src/query/storages/memory/src/memory_table.rs index 2ec25250b6c79..915e057b7a160 100644 --- a/src/query/storages/memory/src/memory_table.rs +++ b/src/query/storages/memory/src/memory_table.rs @@ -26,6 +26,7 @@ use databend_common_catalog::plan::Partitions; use databend_common_catalog::plan::PartitionsShuffleKind; use databend_common_catalog::plan::Projection; use databend_common_catalog::plan::PushDownInfo; +use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::Table; use databend_common_catalog::table::TableStatistics; use databend_common_catalog::table_context::TableContext; @@ -139,8 +140,8 @@ impl Table for MemoryTable { /// MemoryTable could be distributed table, yet we only insert data in one node per query /// Because commit_insert did not support distributed transaction - fn is_local(&self) -> bool { - false + fn distribution_level(&self) -> DistributionLevel { + DistributionLevel::Cluster } fn support_column_projection(&self) -> bool { @@ -207,7 +208,7 @@ impl Table for MemoryTable { let parts = vec![MemoryPartInfo::create()]; return Ok(( statistics, - Partitions::create(PartitionsShuffleKind::Broadcast, parts), + Partitions::create(PartitionsShuffleKind::BroadcastCluster, parts), )); } diff --git a/src/query/storages/null/src/null_table.rs b/src/query/storages/null/src/null_table.rs index 5bb625429694f..c7c73210b091f 100644 --- a/src/query/storages/null/src/null_table.rs +++ b/src/query/storages/null/src/null_table.rs @@ -20,6 +20,7 @@ use databend_common_catalog::plan::DataSourcePlan; use databend_common_catalog::plan::PartStatistics; use databend_common_catalog::plan::Partitions; use databend_common_catalog::plan::PushDownInfo; +use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; @@ -62,8 +63,8 @@ impl Table for NullTable { } /// Null do not keep data, it's safe to make it non-local. - fn is_local(&self) -> bool { - false + fn distribution_level(&self) -> DistributionLevel { + DistributionLevel::Cluster } #[async_backtrace::framed] diff --git a/src/query/storages/orc/src/table.rs b/src/query/storages/orc/src/table.rs index d3da0a13008ab..8b38930df7d8f 100644 --- a/src/query/storages/orc/src/table.rs +++ b/src/query/storages/orc/src/table.rs @@ -24,6 +24,7 @@ use databend_common_catalog::plan::PartStatistics; use databend_common_catalog::plan::Partitions; use databend_common_catalog::plan::PushDownInfo; use databend_common_catalog::plan::StageTableInfo; +use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::Table; use databend_common_catalog::table::TableStatistics; use databend_common_catalog::table_context::TableContext; @@ -122,8 +123,8 @@ impl Table for OrcTable { self } - fn is_local(&self) -> bool { - false + fn distribution_level(&self) -> DistributionLevel { + DistributionLevel::Cluster } fn get_table_info(&self) -> &TableInfo { diff --git a/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs b/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs index 286f39498ec18..9bbc66eb79860 100644 --- a/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs +++ b/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs @@ -29,6 +29,7 @@ use databend_common_catalog::plan::Partitions; use databend_common_catalog::plan::PushDownInfo; use databend_common_catalog::query_kind::QueryKind; use databend_common_catalog::table::ColumnStatisticsProvider; +use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::DummyColumnStatisticsProvider; use databend_common_catalog::table::Table; use databend_common_catalog::table::TableStatistics; @@ -188,8 +189,8 @@ impl Table for ParquetRSTable { self } - fn is_local(&self) -> bool { - false + fn distribution_level(&self) -> DistributionLevel { + DistributionLevel::Cluster } fn get_table_info(&self) -> &TableInfo { diff --git a/src/query/storages/result_cache/src/table_function/table.rs b/src/query/storages/result_cache/src/table_function/table.rs index 74dcb3a0373d1..cd4a55757fce0 100644 --- a/src/query/storages/result_cache/src/table_function/table.rs +++ b/src/query/storages/result_cache/src/table_function/table.rs @@ -95,10 +95,6 @@ impl ResultScan { #[async_trait::async_trait] impl Table for ResultScan { - fn is_local(&self) -> bool { - true - } - fn as_any(&self) -> &dyn Any { self } diff --git a/src/query/storages/stage/src/stage_table.rs b/src/query/storages/stage/src/stage_table.rs index 2675f4e0b1f8d..d24ed83cc9040 100644 --- a/src/query/storages/stage/src/stage_table.rs +++ b/src/query/storages/stage/src/stage_table.rs @@ -23,6 +23,7 @@ use databend_common_catalog::plan::Partitions; use databend_common_catalog::plan::PartitionsShuffleKind; use databend_common_catalog::plan::PushDownInfo; use databend_common_catalog::plan::StageTableInfo; +use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; @@ -163,8 +164,8 @@ impl Table for StageTable { } } - fn is_local(&self) -> bool { - false + fn distribution_level(&self) -> DistributionLevel { + DistributionLevel::Cluster } fn read_data( diff --git a/src/query/storages/stream/src/stream_table.rs b/src/query/storages/stream/src/stream_table.rs index 6c7f052c71a7d..80b0f849912a2 100644 --- a/src/query/storages/stream/src/stream_table.rs +++ b/src/query/storages/stream/src/stream_table.rs @@ -24,6 +24,7 @@ use databend_common_catalog::plan::Partitions; use databend_common_catalog::plan::PushDownInfo; use databend_common_catalog::plan::StreamColumn; use databend_common_catalog::table::ColumnStatisticsProvider; +use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::Table; use databend_common_catalog::table::TableStatistics; use databend_common_catalog::table_context::TableContext; @@ -335,8 +336,8 @@ impl StreamTable { #[async_trait::async_trait] impl Table for StreamTable { - fn is_local(&self) -> bool { - false + fn distribution_level(&self) -> DistributionLevel { + DistributionLevel::Cluster } fn as_any(&self) -> &dyn Any { diff --git a/src/query/storages/system/src/backtrace_table.rs b/src/query/storages/system/src/backtrace_table.rs index 0d995cc4bc345..3291434771e16 100644 --- a/src/query/storages/system/src/backtrace_table.rs +++ b/src/query/storages/system/src/backtrace_table.rs @@ -18,6 +18,7 @@ use std::sync::Arc; use databend_common_base::get_all_tasks; use databend_common_base::runtime::Runtime; +use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; @@ -44,7 +45,7 @@ impl SyncSystemTable for BacktraceTable { const NAME: &'static str = "system.backtrace"; // Allow distributed query. - const DATA_IN_LOCAL: bool = false; + const DISTRIBUTION_LEVEL: DistributionLevel = DistributionLevel::Warehouse; fn get_table_info(&self) -> &TableInfo { &self.table_info diff --git a/src/query/storages/system/src/caches_table.rs b/src/query/storages/system/src/caches_table.rs index 04c411675dfa1..f2be151ef77f8 100644 --- a/src/query/storages/system/src/caches_table.rs +++ b/src/query/storages/system/src/caches_table.rs @@ -14,6 +14,7 @@ use std::sync::Arc; +use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::Table; use databend_common_exception::Result; use databend_common_expression::types::NumberDataType; @@ -62,7 +63,7 @@ impl SyncSystemTable for CachesTable { const NAME: &'static str = "system.caches"; // Allow distributed query. - const DATA_IN_LOCAL: bool = false; + const DISTRIBUTION_LEVEL: DistributionLevel = DistributionLevel::Warehouse; fn get_table_info(&self) -> &TableInfo { &self.table_info diff --git a/src/query/storages/system/src/log_queue.rs b/src/query/storages/system/src/log_queue.rs index 59cea8cecaac0..af1b1a4f0b9e4 100644 --- a/src/query/storages/system/src/log_queue.rs +++ b/src/query/storages/system/src/log_queue.rs @@ -23,6 +23,7 @@ use databend_common_catalog::plan::PartStatistics; use databend_common_catalog::plan::Partitions; use databend_common_catalog::plan::PartitionsShuffleKind; use databend_common_catalog::plan::PushDownInfo; +use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; @@ -150,8 +151,8 @@ impl SystemLogTable { #[async_trait::async_trait] impl Table for SystemLogTable { - fn is_local(&self) -> bool { - false + fn distribution_level(&self) -> DistributionLevel { + DistributionLevel::Warehouse } fn as_any(&self) -> &dyn Any { @@ -172,9 +173,9 @@ impl Table for SystemLogTable { Ok(( PartStatistics::default(), // Make the table in distributed. - Partitions::create(PartitionsShuffleKind::Broadcast, vec![Arc::new(Box::new( - SystemTablePart, - ))]), + Partitions::create(PartitionsShuffleKind::BroadcastWarehouse, vec![Arc::new( + Box::new(SystemTablePart), + )]), )) } diff --git a/src/query/storages/system/src/malloc_stats_totals_table.rs b/src/query/storages/system/src/malloc_stats_totals_table.rs index 4942edaaa69c5..9fa57054fb59d 100644 --- a/src/query/storages/system/src/malloc_stats_totals_table.rs +++ b/src/query/storages/system/src/malloc_stats_totals_table.rs @@ -15,6 +15,7 @@ use std::default::Default; use std::sync::Arc; +use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; @@ -53,7 +54,7 @@ pub struct MallocStatsTotalsTable { impl SyncSystemTable for MallocStatsTotalsTable { const NAME: &'static str = "system.malloc_stats_totals"; - const DATA_IN_LOCAL: bool = false; + const DISTRIBUTION_LEVEL: DistributionLevel = DistributionLevel::Warehouse; fn get_table_info(&self) -> &TableInfo { &self.table_info diff --git a/src/query/storages/system/src/metrics_table.rs b/src/query/storages/system/src/metrics_table.rs index b8811aecb297a..4921929002785 100644 --- a/src/query/storages/system/src/metrics_table.rs +++ b/src/query/storages/system/src/metrics_table.rs @@ -19,6 +19,7 @@ use databend_common_base::runtime::metrics::MetricSample; use databend_common_base::runtime::metrics::MetricValue; use databend_common_base::runtime::metrics::GLOBAL_METRICS_REGISTRY; use databend_common_base::runtime::GLOBAL_MEM_STAT; +use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; @@ -44,7 +45,7 @@ pub struct MetricsTable { impl SyncSystemTable for MetricsTable { const NAME: &'static str = "system.metrics"; // Allow distributed query. - const DATA_IN_LOCAL: bool = false; + const DISTRIBUTION_LEVEL: DistributionLevel = DistributionLevel::Warehouse; const BROADCAST_TRUNCATE: bool = true; fn get_table_info(&self) -> &TableInfo { diff --git a/src/query/storages/system/src/processes_table.rs b/src/query/storages/system/src/processes_table.rs index e864af9f29a7b..d34f90d242368 100644 --- a/src/query/storages/system/src/processes_table.rs +++ b/src/query/storages/system/src/processes_table.rs @@ -17,6 +17,7 @@ use std::time::Duration; use chrono::DateTime; use chrono::Utc; +use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; @@ -46,7 +47,7 @@ pub struct ProcessesTable { impl SyncSystemTable for ProcessesTable { const NAME: &'static str = "system.processes"; - const DATA_IN_LOCAL: bool = false; + const DISTRIBUTION_LEVEL: DistributionLevel = DistributionLevel::Warehouse; fn get_table_info(&self) -> &TableInfo { &self.table_info diff --git a/src/query/storages/system/src/queries_profiling.rs b/src/query/storages/system/src/queries_profiling.rs index f2f638aefa4bf..ab1914be835e5 100644 --- a/src/query/storages/system/src/queries_profiling.rs +++ b/src/query/storages/system/src/queries_profiling.rs @@ -16,6 +16,7 @@ use std::collections::HashMap; use std::sync::Arc; use databend_common_base::runtime::profile::ProfileStatisticsName; +use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; @@ -47,7 +48,7 @@ pub struct QueriesProfilingTable { impl SyncSystemTable for QueriesProfilingTable { const NAME: &'static str = "system.queries_profiling"; - const DATA_IN_LOCAL: bool = false; + const DISTRIBUTION_LEVEL: DistributionLevel = DistributionLevel::Warehouse; fn get_table_info(&self) -> &TableInfo { &self.table_info diff --git a/src/query/storages/system/src/table.rs b/src/query/storages/system/src/table.rs index 5ff2b253d96fa..f1c12c009e406 100644 --- a/src/query/storages/system/src/table.rs +++ b/src/query/storages/system/src/table.rs @@ -21,6 +21,7 @@ use databend_common_catalog::plan::PartStatistics; use databend_common_catalog::plan::Partitions; use databend_common_catalog::plan::PartitionsShuffleKind; use databend_common_catalog::plan::PushDownInfo; +use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; @@ -57,7 +58,7 @@ impl PartInfo for SystemTablePart { pub trait SyncSystemTable: Send + Sync { const NAME: &'static str; - const DATA_IN_LOCAL: bool = true; + const DISTRIBUTION_LEVEL: DistributionLevel = DistributionLevel::Local; const BROADCAST_TRUNCATE: bool = false; fn get_table_info(&self) -> &TableInfo; @@ -68,18 +69,24 @@ pub trait SyncSystemTable: Send + Sync { _ctx: Arc, _push_downs: Option, ) -> Result<(PartStatistics, Partitions)> { - match Self::DATA_IN_LOCAL { - true => Ok(( + match Self::DISTRIBUTION_LEVEL { + DistributionLevel::Local => Ok(( PartStatistics::default(), Partitions::create(PartitionsShuffleKind::Seq, vec![Arc::new(Box::new( SystemTablePart, ))]), )), - false => Ok(( + DistributionLevel::Cluster => Ok(( PartStatistics::default(), - Partitions::create(PartitionsShuffleKind::Broadcast, vec![Arc::new(Box::new( - SystemTablePart, - ))]), + Partitions::create(PartitionsShuffleKind::BroadcastCluster, vec![Arc::new( + Box::new(SystemTablePart), + )]), + )), + DistributionLevel::Warehouse => Ok(( + PartStatistics::default(), + Partitions::create(PartitionsShuffleKind::BroadcastWarehouse, vec![Arc::new( + Box::new(SystemTablePart), + )]), )), } } @@ -109,9 +116,14 @@ impl Table for SyncOneBlockSystemTable bool { + fn distribution_level(&self) -> DistributionLevel { // When querying a memory table, we send the partition to one node for execution. The other nodes send empty partitions. - false + // For system tables, they are always non-local, which ensures that system tables can be JOIN or UNION operation with any other table. + match TTable::DISTRIBUTION_LEVEL { + DistributionLevel::Local => DistributionLevel::Cluster, + DistributionLevel::Cluster => DistributionLevel::Cluster, + DistributionLevel::Warehouse => DistributionLevel::Warehouse, + } } fn get_table_info(&self) -> &TableInfo { @@ -155,7 +167,7 @@ impl Table for SyncOneBlockSystemTable bool { + fn broadcast_truncate_to_warehouse(&self) -> bool { TTable::BROADCAST_TRUNCATE } } @@ -198,7 +210,7 @@ impl SyncSource for SystemTableSyncSource &TableInfo; async fn get_full_data( @@ -213,18 +225,24 @@ pub trait AsyncSystemTable: Send + Sync { _ctx: Arc, _push_downs: Option, ) -> Result<(PartStatistics, Partitions)> { - match Self::DATA_IN_LOCAL { - true => Ok(( + match Self::DISTRIBUTION_LEVEL { + DistributionLevel::Local => Ok(( PartStatistics::default(), Partitions::create(PartitionsShuffleKind::Seq, vec![Arc::new(Box::new( SystemTablePart, ))]), )), - false => Ok(( + DistributionLevel::Cluster => Ok(( PartStatistics::default(), - Partitions::create(PartitionsShuffleKind::Broadcast, vec![Arc::new(Box::new( - SystemTablePart, - ))]), + Partitions::create(PartitionsShuffleKind::BroadcastCluster, vec![Arc::new( + Box::new(SystemTablePart), + )]), + )), + DistributionLevel::Warehouse => Ok(( + PartStatistics::default(), + Partitions::create(PartitionsShuffleKind::BroadcastWarehouse, vec![Arc::new( + Box::new(SystemTablePart), + )]), )), } } @@ -250,9 +268,14 @@ impl Table for AsyncOneBlockSystemTable bool { + fn distribution_level(&self) -> DistributionLevel { // When querying a memory table, we send the partition to one node for execution. The other nodes send empty partitions. - false + // For system tables, they are always non-local, which ensures that system tables can be JOIN or UNION operation with any other table. + match TTable::DISTRIBUTION_LEVEL { + DistributionLevel::Local => DistributionLevel::Cluster, + DistributionLevel::Cluster => DistributionLevel::Cluster, + DistributionLevel::Warehouse => DistributionLevel::Warehouse, + } } fn get_table_info(&self) -> &TableInfo { diff --git a/src/query/storages/system/src/temp_files_table.rs b/src/query/storages/system/src/temp_files_table.rs index f4dae9e7f4ead..c0ee1678d93f7 100644 --- a/src/query/storages/system/src/temp_files_table.rs +++ b/src/query/storages/system/src/temp_files_table.rs @@ -21,6 +21,7 @@ use databend_common_catalog::plan::PartStatistics; use databend_common_catalog::plan::Partitions; use databend_common_catalog::plan::PartitionsShuffleKind; use databend_common_catalog::plan::PushDownInfo; +use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; @@ -64,9 +65,8 @@ pub struct TempFilesTable { #[async_trait::async_trait] impl Table for TempFilesTable { - fn is_local(&self) -> bool { - // Follow the practice of `SyncOneBlockSystemTable::is_local` - false + fn distribution_level(&self) -> DistributionLevel { + DistributionLevel::Cluster } fn as_any(&self) -> &dyn Any { From 5faf4a6cfe171c1f1733cf3713fbfbe1ac801b69 Mon Sep 17 00:00:00 2001 From: everpcpc Date: Tue, 14 Jan 2025 15:54:05 +0800 Subject: [PATCH 08/33] chore(ci): fix release bump version (#17277) --- .github/scripts/bump_version.js | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/scripts/bump_version.js b/.github/scripts/bump_version.js index 6cfd59c4033d8..619fbe197b3a4 100644 --- a/.github/scripts/bump_version.js +++ b/.github/scripts/bump_version.js @@ -7,15 +7,15 @@ module.exports = async ({ github, context, core }) => { const { TYPE, TAG } = process.env; - const RE_TAG_STABLE = /^v(\d+)\.(\d+)\.(\d+)$/g; - const RE_TAG_NIGHTLY = /^v(\d+)\.(\d+)\.(\d+)-nightly$/g; - const RE_TAG_PATCH = /^v(\d+)\.(\d+)\.(\d+)-p(\d+)$/g; + const RE_TAG_STABLE = /^v(\d+)\.(\d+)\.(\d+)$/; + const RE_TAG_NIGHTLY = /^v(\d+)\.(\d+)\.(\d+)-nightly$/; + const RE_TAG_PATCH = /^v(\d+)\.(\d+)\.(\d+)-p(\d+)$/; switch (TYPE) { case "": case "nightly": { core.setOutput("sha", context.sha); - core.info(`Nightly release triggered by ${TAG} (${context.sha})`); + core.info(`Nightly release triggered by (${context.sha})`); let previous = null; const releases = await github.rest.repos.listReleases({ @@ -23,8 +23,8 @@ module.exports = async ({ github, context, core }) => { repo: context.repo.repo, }); for (const release of releases.data) { - const result = RE_TAG_NIGHTLY.exec(release.tag_name); - if (result) { + const ret = RE_TAG_NIGHTLY.exec(release.tag_name); + if (ret) { previous = release.tag_name; break; } From c8b8a28dba0b6c9cb7918e95421bf7ce49ec177a Mon Sep 17 00:00:00 2001 From: everpcpc Date: Tue, 14 Jan 2025 16:28:41 +0800 Subject: [PATCH 09/33] chore(ci): fix release patch version (#17278) --- .github/scripts/bump_version.js | 198 +++++++++++++++++++------------- 1 file changed, 119 insertions(+), 79 deletions(-) diff --git a/.github/scripts/bump_version.js b/.github/scripts/bump_version.js index 619fbe197b3a4..415eea0808ff3 100644 --- a/.github/scripts/bump_version.js +++ b/.github/scripts/bump_version.js @@ -11,23 +11,118 @@ module.exports = async ({ github, context, core }) => { const RE_TAG_NIGHTLY = /^v(\d+)\.(\d+)\.(\d+)-nightly$/; const RE_TAG_PATCH = /^v(\d+)\.(\d+)\.(\d+)-p(\d+)$/; - switch (TYPE) { - case "": - case "nightly": { - core.setOutput("sha", context.sha); - core.info(`Nightly release triggered by (${context.sha})`); + async function getPreviousNightlyRelease(github, context) { + const releases = await github.rest.repos.listReleases({ + owner: context.repo.owner, + repo: context.repo.repo, + }); + for (const release of releases.data) { + const ret = RE_TAG_NIGHTLY.exec(release.tag_name); + if (ret) { + return release.tag_name; + } + } + } + + function getNextNightlyRelease(previous) { + const nightly = RE_TAG_NIGHTLY.exec(previous); + if (nightly) { + const major = nightly[1]; + const minor = nightly[2]; + const patch = parseInt(nightly[3]) + 1; + return `v${major}.${minor}.${patch}-nightly`; + } + } - let previous = null; + async function getPreviousStableRelease(github, context) { + let page = 1; + while (true) { const releases = await github.rest.repos.listReleases({ owner: context.repo.owner, repo: context.repo.repo, + page, }); + if (releases.data.length === 0) { + break; + } + page++; for (const release of releases.data) { - const ret = RE_TAG_NIGHTLY.exec(release.tag_name); + const ret = RE_TAG_STABLE.exec(release.tag_name); if (ret) { - previous = release.tag_name; - break; + return release.tag_name; + } + } + } + } + + function getNextStableRelease() { + const nightly = RE_TAG_NIGHTLY.exec(TAG); + if (nightly) { + const major = nightly[1]; + const minor = nightly[2]; + const patch = nightly[3]; + return `v${major}.${minor}.${patch}`; + } + } + + async function getPreviousPatchRelease(github, context) { + let page = 1; + while (true) { + const releases = await github.rest.repos.listReleases({ + owner: context.repo.owner, + repo: context.repo.repo, + page, + }); + if (releases.data.length === 0) { + break; + } + page++; + for (const release of releases.data) { + if (!release.tag_name.startsWith(TAG)) { + continue; + } + if (release.tag_name === TAG) { + // no previous patch release, use the previous stable release + return release.tag_name; + } + const ret = RE_TAG_PATCH.exec(release.tag_name); + if (!ret) { + core.warning(`Ignore invalid patch release ${release.tag_name}`); + continue; } + return release.tag_name; + } + } + } + + function getNextPatchRelease(previous) { + const stable = RE_TAG_STABLE.exec(previous); + if (stable) { + const major = stable[1]; + const minor = stable[2]; + const patch = stable[3]; + return `v${major}.${minor}.${patch}-p1`; + } + const patch = RE_TAG_PATCH.exec(previous); + if (patch) { + const major = patch[1]; + const minor = patch[2]; + const patch = patch[3]; + const pv = parseInt(patch[4]); + return `v${major}.${minor}.${patch}-p${pv + 1}`; + } + } + + switch (TYPE) { + case "": + case "nightly": { + core.setOutput("sha", context.sha); + core.info(`Nightly release triggered by (${context.sha})`); + + const previous = await getPreviousNightlyRelease(github, context); + if (!previous) { + core.setFailed(`No previous nightly release found, ignoring`); + return; } core.setOutput("previous", previous); core.info(`Nightly release with previous release: ${previous}`); @@ -37,15 +132,11 @@ module.exports = async ({ github, context, core }) => { core.info(`Release create manually with tag ${TAG}`); return; } - const result = RE_TAG_NIGHTLY.exec(previous); - if (!result) { - core.setFailed(`The previous tag ${previous} is invalid.`); + const nextTag = getNextNightlyRelease(previous); + if (!nextTag) { + core.setFailed(`No next nightly release from ${previous}`); return; } - const major = result[1]; - const minor = result[2]; - const patch = (parseInt(result[3]) + 1).toString(); - const nextTag = `v${major}.${minor}.${patch}-nightly`; core.setOutput("tag", nextTag); core.info(`Release create new nightly ${nextTag}`); return; @@ -58,38 +149,15 @@ module.exports = async ({ github, context, core }) => { return; } core.info(`Stable release triggered by ${TAG} (${context.sha})`); - const result = RE_TAG_NIGHTLY.exec(TAG); - if (!result) { - core.setFailed(`The tag ${TAG} is invalid, ignoring`); + const nextTag = getNextStableRelease(); + if (!nextTag) { + core.setFailed(`No stable release from ${TAG}`); return; } - const major = result[1]; - const minor = result[2]; - const patch = result[3]; - const nextTag = `v${major}.${minor}.${patch}`; core.setOutput("tag", nextTag); core.info(`Stable release ${nextTag} from ${TAG}`); - let previous = null; - let page = 1; - while (true) { - const releases = await github.rest.repos.listReleases({ - owner: context.repo.owner, - repo: context.repo.repo, - page, - }); - if (releases.data.length === 0) { - break; - } - page++; - for (const release of releases.data) { - const ret = RE_TAG_STABLE.exec(release.tag_name); - if (ret) { - previous = release.tag_name; - break; - } - } - } + const previous = await getPreviousStableRelease(github, context); if (!previous) { core.setFailed(`No previous stable release found, ignoring`); return; @@ -120,49 +188,21 @@ module.exports = async ({ github, context, core }) => { `Patch release triggered by ${TAG} (${branch.data.commit.sha})` ); - let pv = 1; - let previous = null; - let page = 1; - while (true) { - const releases = await github.rest.repos.listReleases({ - owner: context.repo.owner, - repo: context.repo.repo, - page, - }); - if (releases.data.length === 0) { - break; - } - page++; - for (const release of releases.data) { - if (!release.tag_name.startsWith(TAG)) { - continue; - } - if (release.tag_name === TAG) { - previous = release.tag_name; - break; - } - const ret = RE_TAG_PATCH.exec(release.tag_name); - if (!ret) { - core.warning(`Ignore previous release ${release.tag_name}`); - continue; - } - pv = parseInt(result[4]) + 1; - previous = release.tag_name; - } - } + const previous = await getPreviousPatchRelease(github, context); if (!previous) { - core.setFailed(`No previous stable release found, ignoring`); + core.setFailed(`No previous patch release found, ignoring`); return; } core.setOutput("previous", previous); core.info(`Patch release with previous release: ${previous}`); - const major = result[1]; - const minor = result[2]; - const patch = result[3]; - const nextTag = `v${major}.${minor}.${patch}-p${pv}`; + const nextTag = getNextPatchRelease(previous); + if (!nextTag) { + core.setFailed(`No next patch release from ${previous}`); + return; + } core.setOutput("tag", nextTag); - core.info(`Patch release ${nextTag} from ${TAG}`); + core.info(`Patch release ${nextTag} from ${previous}`); return; } From d1a4975b4f7a471e202621545709490e68e5d54e Mon Sep 17 00:00:00 2001 From: everpcpc Date: Tue, 14 Jan 2025 16:48:25 +0800 Subject: [PATCH 10/33] chore(ci): fix gen next patch release (#17281) --- .github/scripts/bump_version.js | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/scripts/bump_version.js b/.github/scripts/bump_version.js index 415eea0808ff3..a77b8817b92c7 100644 --- a/.github/scripts/bump_version.js +++ b/.github/scripts/bump_version.js @@ -103,13 +103,13 @@ module.exports = async ({ github, context, core }) => { const patch = stable[3]; return `v${major}.${minor}.${patch}-p1`; } - const patch = RE_TAG_PATCH.exec(previous); - if (patch) { - const major = patch[1]; - const minor = patch[2]; - const patch = patch[3]; - const pv = parseInt(patch[4]); - return `v${major}.${minor}.${patch}-p${pv + 1}`; + const version = RE_TAG_PATCH.exec(previous); + if (version) { + const major = version[1]; + const minor = version[2]; + const patch = version[3]; + const pv = parseInt(version[4]) + 1; + return `v${major}.${minor}.${patch}-p${pv}`; } } From 6bcfe0957dcf8ed5eaec92be826e2314daa7fdff Mon Sep 17 00:00:00 2001 From: everpcpc Date: Tue, 14 Jan 2025 17:27:25 +0800 Subject: [PATCH 11/33] chore(ci): unit test with 16c64g (#17283) --- .github/workflows/reuse.linux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/reuse.linux.yml b/.github/workflows/reuse.linux.yml index dd5a2652a2fda..30bcd3d662fb9 100644 --- a/.github/workflows/reuse.linux.yml +++ b/.github/workflows/reuse.linux.yml @@ -95,7 +95,7 @@ jobs: # artifacts: query test_unit: - runs-on: [self-hosted, X64, Linux, 8c32g, "${{ inputs.runner_provider }}"] + runs-on: [self-hosted, X64, Linux, 16c64g, "${{ inputs.runner_provider }}"] steps: - uses: actions/checkout@v4 with: From 2743ff1e26bbaf7b3362887a1e86da4d2a0b905a Mon Sep 17 00:00:00 2001 From: Bohu Date: Tue, 14 Jan 2025 17:29:18 +0800 Subject: [PATCH 12/33] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d308b1026cb6a..cf10f1d3b80b6 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@

Databend: The Next-Gen Cloud [Data+AI] Analytics

-

Zero Disk Architecture

+

The open-source, on-premise alternative to Snowflake

From f71635266c0f3383612fb8b2cf68e62f097a0977 Mon Sep 17 00:00:00 2001 From: Bohu Date: Tue, 14 Jan 2025 17:41:11 +0800 Subject: [PATCH 13/33] Update README.md --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index cf10f1d3b80b6..bf1243ebfd776 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,10 @@ **Databend**, built in Rust, is an open-source cloud data warehouse that serves as a cost-effective [alternative to Snowflake](https://github.com/datafuselabs/databend/issues/13059). With its focus on fast query execution and data ingestion, it's designed for complex analysis of the world's largest datasets. +**Production-Proven Scale:** +- 🤝 **Enterprise Adoption**: Trusted by over **50 organizations** processing more than **100 million queries daily** +- 🗄️ **Massive Scale**: Successfully managing over **800 petabytes** of analytical data + ## ⚡ Performance
From 8a47dce8bf8681ff96781aa0391fa938071a4c53 Mon Sep 17 00:00:00 2001 From: baishen Date: Tue, 14 Jan 2025 19:23:47 +0800 Subject: [PATCH 14/33] fix(query): window function support params (#17282) --- src/query/ast/src/parser/expr.rs | 30 ++++- src/query/ast/tests/it/parser.rs | 1 + src/query/ast/tests/it/testdata/expr.txt | 114 ++++++++++++++++++ .../query/window_function/window_basic.test | 29 +++++ 4 files changed, 169 insertions(+), 5 deletions(-) diff --git a/src/query/ast/src/parser/expr.rs b/src/query/ast/src/parser/expr.rs index ec3ffe28e109f..854923757dc22 100644 --- a/src/query/ast/src/parser/expr.rs +++ b/src/query/ast/src/parser/expr.rs @@ -1083,18 +1083,36 @@ pub fn expr_element(i: Input) -> IResult> { }, }, ); + let function_call_with_params_window = map( + rule! { + #function_name + ~ "(" ~ #comma_separated_list1(subexpr(0)) ~ ")" + ~ "(" ~ DISTINCT? ~ #comma_separated_list0(subexpr(0))? ~ ")" + ~ #window_function + }, + |(name, _, params, _, _, opt_distinct, opt_args, _, window)| ExprElement::FunctionCall { + func: FunctionCall { + distinct: opt_distinct.is_some(), + name, + args: opt_args.unwrap_or_default(), + params, + window: Some(window), + lambda: None, + }, + }, + ); let function_call_with_params = map( rule! { #function_name - ~ ("(" ~ #comma_separated_list1(subexpr(0)) ~ ")")? + ~ "(" ~ #comma_separated_list1(subexpr(0)) ~ ")" ~ "(" ~ DISTINCT? ~ #comma_separated_list0(subexpr(0))? ~ ")" }, - |(name, params, _, opt_distinct, opt_args, _)| ExprElement::FunctionCall { + |(name, _, params, _, _, opt_distinct, opt_args, _)| ExprElement::FunctionCall { func: FunctionCall { distinct: opt_distinct.is_some(), name, args: opt_args.unwrap_or_default(), - params: params.map(|(_, x, _)| x).unwrap_or_default(), + params, window: None, lambda: None, }, @@ -1376,7 +1394,6 @@ pub fn expr_element(i: Input) -> IResult> { | #interval_expr : "`INTERVAL `" | #extract : "`EXTRACT((YEAR | QUARTER | MONTH | DAY | HOUR | MINUTE | SECOND | WEEK) FROM ...)`" | #date_part : "`DATE_PART((YEAR | QUARTER | MONTH | DAY | HOUR | MINUTE | SECOND | WEEK), ...)`" - ), rule!( #substring : "`SUBSTRING(... [FROM ...] [FOR ...])`" @@ -1388,9 +1405,12 @@ pub fn expr_element(i: Input) -> IResult> { | #count_all_with_window : "`COUNT(*) OVER ...`" | #function_call_with_lambda : "`function(..., x -> ...)`" | #function_call_with_window : "`function(...) OVER ([ PARTITION BY , ... ] [ ORDER BY , ... ] [ ])`" + | #function_call_with_params_window : "`function(...)(...) OVER ([ PARTITION BY , ... ] [ ORDER BY , ... ] [ ])`" | #function_call_with_params : "`function(...)(...)`" | #function_call : "`function(...)`" - | #case : "`CASE ... END`" + ), + rule!( + #case : "`CASE ... END`" | #tuple : "`( [, ...])`" | #subquery : "`(SELECT ...)`" | #column_ref : "" diff --git a/src/query/ast/tests/it/parser.rs b/src/query/ast/tests/it/parser.rs index 1d8d789906695..052648e507d76 100644 --- a/src/query/ast/tests/it/parser.rs +++ b/src/query/ast/tests/it/parser.rs @@ -1251,6 +1251,7 @@ fn test_expr() { r#"COUNT() OVER (ORDER BY hire_date ROWS UNBOUNDED PRECEDING)"#, r#"COUNT() OVER (ORDER BY hire_date ROWS CURRENT ROW)"#, r#"COUNT() OVER (ORDER BY hire_date ROWS 3 PRECEDING)"#, + r#"QUANTILE_CONT(0.5)(salary) OVER (PARTITION BY department ORDER BY hire_date)"#, r#"ARRAY_APPLY([1,2,3], x -> x + 1)"#, r#"ARRAY_FILTER(col, y -> y % 2 = 0)"#, r#"(current_timestamp, current_timestamp(), now())"#, diff --git a/src/query/ast/tests/it/testdata/expr.txt b/src/query/ast/tests/it/testdata/expr.txt index eb279496999c7..af6d394434354 100644 --- a/src/query/ast/tests/it/testdata/expr.txt +++ b/src/query/ast/tests/it/testdata/expr.txt @@ -4229,6 +4229,120 @@ FunctionCall { } +---------- Input ---------- +QUANTILE_CONT(0.5)(salary) OVER (PARTITION BY department ORDER BY hire_date) +---------- Output --------- +QUANTILE_CONT(0.5)(salary) OVER (PARTITION BY department ORDER BY hire_date) +---------- AST ------------ +FunctionCall { + span: Some( + 0..76, + ), + func: FunctionCall { + distinct: false, + name: Identifier { + span: Some( + 0..13, + ), + name: "QUANTILE_CONT", + quote: None, + ident_type: None, + }, + args: [ + ColumnRef { + span: Some( + 19..25, + ), + column: ColumnRef { + database: None, + table: None, + column: Name( + Identifier { + span: Some( + 19..25, + ), + name: "salary", + quote: None, + ident_type: None, + }, + ), + }, + }, + ], + params: [ + Literal { + span: Some( + 14..17, + ), + value: Decimal256 { + value: 5, + precision: 76, + scale: 1, + }, + }, + ], + window: Some( + WindowDesc { + ignore_nulls: None, + window: WindowSpec( + WindowSpec { + existing_window_name: None, + partition_by: [ + ColumnRef { + span: Some( + 46..56, + ), + column: ColumnRef { + database: None, + table: None, + column: Name( + Identifier { + span: Some( + 46..56, + ), + name: "department", + quote: None, + ident_type: None, + }, + ), + }, + }, + ], + order_by: [ + OrderByExpr { + expr: ColumnRef { + span: Some( + 66..75, + ), + column: ColumnRef { + database: None, + table: None, + column: Name( + Identifier { + span: Some( + 66..75, + ), + name: "hire_date", + quote: None, + ident_type: None, + }, + ), + }, + }, + asc: None, + nulls_first: None, + }, + ], + window_frame: None, + }, + ), + }, + ), + lambda: None, + }, +} + + ---------- Input ---------- ARRAY_APPLY([1,2,3], x -> x + 1) ---------- Output --------- diff --git a/tests/sqllogictests/suites/query/window_function/window_basic.test b/tests/sqllogictests/suites/query/window_function/window_basic.test index 75110235c9de6..694e675219bce 100644 --- a/tests/sqllogictests/suites/query/window_function/window_basic.test +++ b/tests/sqllogictests/suites/query/window_function/window_basic.test @@ -606,6 +606,34 @@ select grouping(salary), grouping(depname), sum(grouping(salary)) over (partitio 1 0 3 1 1 1 +query TII +SELECT depname, empno, quantile_cont(salary) OVER (PARTITION BY depname ORDER BY empno) FROM empsalary ORDER BY depname, empno +---- +develop 7 4200.0 +develop 8 5100.0 +develop 9 4500.0 +develop 10 4850.0 +develop 11 5200.0 +personnel 2 3900.0 +personnel 5 3700.0 +sales 1 5000.0 +sales 3 4900.0 +sales 4 4800.0 + +query TII +SELECT depname, empno, quantile_cont(0.8)(salary) OVER (PARTITION BY depname ORDER BY empno) FROM empsalary ORDER BY depname, empno +---- +develop 7 4200.0 +develop 8 5640.0 +develop 9 5400.0 +develop 10 5520.0 +develop 11 5360.0 +personnel 2 3900.0 +personnel 5 3820.0 +sales 1 5000.0 +sales 3 4960.0 +sales 4 4920.0 + # Window func in subquery query I SELECT * FROM (SELECT row_number() OVER (PARTITION BY depname ORDER BY salary) rn FROM empsalary ORDER BY depname, rn) order by 1; @@ -846,3 +874,4 @@ Product B 1200 NULL statement ok DROP DATABASE test_window_basic; + From 13d1dcdc319006f058757a7c5042aa6b19f6de69 Mon Sep 17 00:00:00 2001 From: TCeason <33082201+TCeason@users.noreply.github.com> Date: Tue, 14 Jan 2025 23:58:20 +0800 Subject: [PATCH 15/33] chore: try fix flaky ci (#17265) --- tests/suites/5_ee/01_vacuum/01_0001_ee_vacuum_kill_randomly.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/suites/5_ee/01_vacuum/01_0001_ee_vacuum_kill_randomly.sh b/tests/suites/5_ee/01_vacuum/01_0001_ee_vacuum_kill_randomly.sh index e571c4f9b1ec0..39cc7201197c0 100755 --- a/tests/suites/5_ee/01_vacuum/01_0001_ee_vacuum_kill_randomly.sh +++ b/tests/suites/5_ee/01_vacuum/01_0001_ee_vacuum_kill_randomly.sh @@ -21,7 +21,8 @@ pid=$! # kill query randomly sleep_time=$(expr $RANDOM % 5 + 5) sleep $sleep_time -kill $pid +disown %1 +kill -9 $pid > /dev/null 2>&1 # restart query echo "will restart query" From ce8a1c9ec7037c0f74849f804f609ad74843d5ae Mon Sep 17 00:00:00 2001 From: everpcpc Date: Wed, 15 Jan 2025 17:22:20 +0800 Subject: [PATCH 16/33] chore(ci): add bendsql binary to release (#17289) --- .github/actions/pack_binaries/action.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/actions/pack_binaries/action.yml b/.github/actions/pack_binaries/action.yml index 64d3283b129b9..bc54eedb02e41 100644 --- a/.github/actions/pack_binaries/action.yml +++ b/.github/actions/pack_binaries/action.yml @@ -23,6 +23,14 @@ runs: category: ${{ inputs.category }} path: distro/bin artifacts: metactl,meta,query,query.debug + - name: Download BendSQL + shell: bash + env: + GH_TOKEN: ${{ github.token }} + run: | + verison=$(gh release list --repo databendlabs/bendsql | head -n 1 | awk '{print $1}') + curl -sSLfo /tmp/bendsql.tar.gz https://github.com/databendlabs/bendsql/releases/download/${verison}/bendsql-${verison}-${{ inputs.target }}.tar.gz + tar -xzvf /tmp/bendsql.tar.gz -C distro/bin - name: Pack Binaries id: pack_binaries shell: bash From 71f00b8d2b7af28b7d1e95f7b0b20ab48e63310b Mon Sep 17 00:00:00 2001 From: TCeason <33082201+TCeason@users.noreply.github.com> Date: Wed, 15 Jan 2025 21:19:58 +0800 Subject: [PATCH 17/33] chore: databend-test print more stderr (#17288) --- tests/databend-test | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/databend-test b/tests/databend-test index b4b5dc41688b6..f439858093e5d 100755 --- a/tests/databend-test +++ b/tests/databend-test @@ -326,7 +326,7 @@ def run_tests_array(all_tests_with_params): if os.path.isfile(stdout_file): status += ", result:\n\n" status += '\n'.join( - open(stdout_file).read().split('\n')[:100]) + open(stdout_file).read().split('\n')[:300]) status += '\n' elif stderr: @@ -336,7 +336,7 @@ def run_tests_array(all_tests_with_params): status += MSG_FAIL status += print_test_time(total_time) status += " - having stderror:\n{}\n".format( - '\n'.join(stderr.split('\n')[:100])) + '\n'.join(stderr.split('\n')[:300])) elif 'Exception' in stdout: failures += 1 failure_cases.append(case_file_full_path) @@ -344,7 +344,7 @@ def run_tests_array(all_tests_with_params): status += MSG_FAIL status += print_test_time(total_time) status += " - having exception:\n{}\n".format( - '\n'.join(stdout.split('\n')[:100])) + '\n'.join(stdout.split('\n')[:300])) elif not os.path.isfile(result_file): status += MSG_UNKNOWN status += print_test_time(total_time) From e9fc0a1867bcfe91208a0da70fac8c95e2a8dabf Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Thu, 16 Jan 2025 08:43:05 +0800 Subject: [PATCH 18/33] refactor(query): only call once finish callback (#17293) * fix(query): only call once finish callback * fix(query): only call once finish callback * fix(query): fix --- src/query/service/src/pipelines/pipeline_builder.rs | 12 +++++++----- src/query/service/src/sessions/query_ctx.rs | 9 +++++++++ src/query/service/src/sessions/query_ctx_shared.rs | 2 ++ 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/src/query/service/src/pipelines/pipeline_builder.rs b/src/query/service/src/pipelines/pipeline_builder.rs index 6508dcca83b6c..e796d2cb4d705 100644 --- a/src/query/service/src/pipelines/pipeline_builder.rs +++ b/src/query/service/src/pipelines/pipeline_builder.rs @@ -94,11 +94,13 @@ impl PipelineBuilder { } // unload spill metas - self.main_pipeline - .set_on_finished(always_callback(move |_info: &ExecutionInfo| { - self.ctx.unload_spill_meta(); - Ok(()) - })); + if !self.ctx.mark_unload_callbacked() { + self.main_pipeline + .set_on_finished(always_callback(move |_info: &ExecutionInfo| { + self.ctx.unload_spill_meta(); + Ok(()) + })); + } Ok(PipelineBuildResult { main_pipeline: self.main_pipeline, diff --git a/src/query/service/src/sessions/query_ctx.rs b/src/query/service/src/sessions/query_ctx.rs index b06d45b0f0841..cfb5ce834c46d 100644 --- a/src/query/service/src/sessions/query_ctx.rs +++ b/src/query/service/src/sessions/query_ctx.rs @@ -322,6 +322,9 @@ impl QueryContext { pub fn update_init_query_id(&self, id: String) { self.shared.spilled_files.write().clear(); + self.shared + .unload_callbacked + .store(false, Ordering::Release); self.shared.cluster_spill_progress.write().clear(); *self.shared.init_query_id.write() = id; } @@ -471,6 +474,12 @@ impl QueryContext { Ok(table) } + pub fn mark_unload_callbacked(&self) -> bool { + self.shared + .unload_callbacked + .fetch_or(true, Ordering::SeqCst) + } + pub fn unload_spill_meta(&self) { const SPILL_META_SUFFIX: &str = ".list"; let r = self.shared.spilled_files.read(); diff --git a/src/query/service/src/sessions/query_ctx_shared.rs b/src/query/service/src/sessions/query_ctx_shared.rs index af2324b508239..50341c7f1eea4 100644 --- a/src/query/service/src/sessions/query_ctx_shared.rs +++ b/src/query/service/src/sessions/query_ctx_shared.rs @@ -150,6 +150,7 @@ pub struct QueryContextShared { pub(in crate::sessions) cluster_spill_progress: Arc>>, pub(in crate::sessions) spilled_files: Arc>>, + pub(in crate::sessions) unload_callbacked: AtomicBool, } impl QueryContextShared { @@ -209,6 +210,7 @@ impl QueryContextShared { cluster_spill_progress: Default::default(), spilled_files: Default::default(), + unload_callbacked: AtomicBool::new(false), warehouse_cache: Arc::new(RwLock::new(None)), })) } From ab0d1a81d16d10be9c0b12e012f77180a25b7f43 Mon Sep 17 00:00:00 2001 From: Yang Xiufeng Date: Thu, 16 Jan 2025 10:36:31 +0800 Subject: [PATCH 19/33] feat: orc support fill missing tuple field (#17247) * tmp orc speed up * missing field of tuple as null. * feat: deal will array of tuple which missing fields * fix: cluster key use wrong index. * cleanup * fill missing tuple field only for orc * add test for cluster by * fix clippy --- Cargo.lock | 2 +- Cargo.toml | 2 +- .../sql/src/planner/binder/copy_into_table.rs | 13 +- .../sql/src/planner/expression_parser.rs | 25 ++- .../stage/src/read/columnar/projection.rs | 193 ++++++++++++++++-- .../src/copy_into_table/processors/decoder.rs | 59 ++++++ .../orc/src/copy_into_table/projection.rs | 2 + .../src/parquet_rs/copy_into_table/reader.rs | 2 + .../05_ddl/05_0000_ddl_create_tables.test | 6 + 9 files changed, 273 insertions(+), 31 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9ea8284d9192e..edd9af42f8e99 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10532,7 +10532,7 @@ dependencies = [ [[package]] name = "orc-rust" version = "0.5.0" -source = "git+https://github.com/datafusion-contrib/orc-rust?rev=dfb1ede#dfb1ede8f875566372568346ad47b359e1a9c92a" +source = "git+https://github.com/youngsofun/orc-rust?rev=6c5ac57#6c5ac578d8790e3f79bde4764ccea7e5e76a0f0d" dependencies = [ "arrow", "async-trait", diff --git a/Cargo.toml b/Cargo.toml index 07383209a6470..1ecee31663e6b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -639,7 +639,7 @@ deltalake = { git = "https://github.com/delta-io/delta-rs", rev = "3038c145" } ethnum = { git = "https://github.com/datafuse-extras/ethnum-rs", rev = "4cb05f1" } openai_api_rust = { git = "https://github.com/datafuse-extras/openai-api", rev = "819a0ed" } openraft = { git = "https://github.com/databendlabs/openraft", tag = "v0.10.0-alpha.7" } -orc-rust = { git = "https://github.com/datafusion-contrib/orc-rust", rev = "dfb1ede" } +orc-rust = { git = "https://github.com/youngsofun/orc-rust", rev = "6c5ac57" } recursive = { git = "https://github.com/datafuse-extras/recursive.git", rev = "6af35a1" } sled = { git = "https://github.com/datafuse-extras/sled", tag = "v0.34.7-datafuse.1" } tantivy = { git = "https://github.com/datafuse-extras/tantivy", rev = "7502370" } diff --git a/src/query/sql/src/planner/binder/copy_into_table.rs b/src/query/sql/src/planner/binder/copy_into_table.rs index ec1a80ea860a8..c59d9baede093 100644 --- a/src/query/sql/src/planner/binder/copy_into_table.rs +++ b/src/query/sql/src/planner/binder/copy_into_table.rs @@ -182,15 +182,12 @@ impl Binder { files: stmt.files.clone(), pattern, }; - let required_values_schema: DataSchemaRef = Arc::new( - match &stmt.dst_columns { - Some(cols) => self.schema_project(&table.schema(), cols)?, - None => self.schema_project(&table.schema(), &[])?, - } - .into(), - ); + let stage_schema = match &stmt.dst_columns { + Some(cols) => self.schema_project(&table.schema(), cols)?, + None => self.schema_project(&table.schema(), &[])?, + }; - let stage_schema = infer_table_schema(&required_values_schema)?; + let required_values_schema: DataSchemaRef = Arc::new(stage_schema.clone().into()); let default_values = if stage_info.file_format_params.need_field_default() { Some( diff --git a/src/query/sql/src/planner/expression_parser.rs b/src/query/sql/src/planner/expression_parser.rs index dce84a0d74425..ad8c6e93eeef1 100644 --- a/src/query/sql/src/planner/expression_parser.rs +++ b/src/query/sql/src/planner/expression_parser.rs @@ -395,7 +395,30 @@ pub fn parse_cluster_keys( table_meta: Arc, ast_exprs: Vec, ) -> Result> { - let exprs = parse_ast_exprs(ctx, table_meta, ast_exprs)?; + let schema = table_meta.schema(); + let (mut bind_context, metadata) = bind_table(table_meta)?; + let settings = ctx.get_settings(); + let name_resolution_ctx = NameResolutionContext::try_from(settings.as_ref())?; + let mut type_checker = TypeChecker::try_create( + &mut bind_context, + ctx, + &name_resolution_ctx, + metadata, + &[], + false, + )?; + + let exprs: Vec = ast_exprs + .iter() + .map(|ast| { + let (scalar, _) = *type_checker.resolve(ast)?; + let expr = scalar + .as_expr()? + .project_column_ref(|col| schema.index_of(&col.column_name).unwrap()); + Ok(expr) + }) + .collect::>()?; + let mut res = Vec::with_capacity(exprs.len()); for expr in exprs { let inner_type = expr.data_type().remove_nullable(); diff --git a/src/query/storages/common/stage/src/read/columnar/projection.rs b/src/query/storages/common/stage/src/read/columnar/projection.rs index eb67312c50bd6..224ed263fcfdf 100644 --- a/src/query/storages/common/stage/src/read/columnar/projection.rs +++ b/src/query/storages/common/stage/src/read/columnar/projection.rs @@ -14,12 +14,18 @@ use databend_common_exception::ErrorCode; use databend_common_expression::type_check::check_cast; +use databend_common_expression::type_check::check_function; +use databend_common_expression::types::DataType; +use databend_common_expression::types::NumberScalar; use databend_common_expression::Expr; use databend_common_expression::RemoteExpr; use databend_common_expression::Scalar; +use databend_common_expression::TableDataType; +use databend_common_expression::TableField; use databend_common_expression::TableSchemaRef; use databend_common_functions::BUILTIN_FUNCTIONS; use databend_common_meta_app::principal::NullAs; +use databend_common_meta_app::principal::StageFileFormatType; use crate::read::cast::load_can_auto_cast_to; @@ -30,10 +36,11 @@ use crate::read::cast::load_can_auto_cast_to; pub fn project_columnar( input_schema: &TableSchemaRef, output_schema: &TableSchemaRef, - null_as: &NullAs, + missing_as: &NullAs, default_values: &Option>, location: &str, case_sensitive: bool, + fmt: StageFileFormatType, ) -> databend_common_exception::Result<(Vec, Vec)> { let mut pushdown_columns = vec![]; let mut output_projection = vec![]; @@ -68,32 +75,96 @@ pub fn project_columnar( if from_field.data_type == to_field.data_type { expr - } else { - // note: tuple field name is dropped here, matched by pos here - if load_can_auto_cast_to( - &from_field.data_type().into(), + } else if load_can_auto_cast_to( + &from_field.data_type().into(), + &to_field.data_type().into(), + ) { + check_cast( + None, + false, + expr, &to_field.data_type().into(), + &BUILTIN_FUNCTIONS, + )? + } else if fmt == StageFileFormatType::Orc && !matches!(missing_as, NullAs::Error) { + // special cast for tuple type, fill in default values for the missing fields. + match ( + from_field.data_type.remove_nullable(), + to_field.data_type.remove_nullable(), ) { - check_cast( - None, - false, + ( + TableDataType::Array(box TableDataType::Nullable( + box TableDataType::Tuple { + fields_name: from_fields_name, + fields_type: _from_fields_type, + }, + )), + TableDataType::Array(box TableDataType::Nullable( + box TableDataType::Tuple { + fields_name: to_fields_name, + fields_type: _to_fields_type, + }, + )), + ) => { + let mut v = vec![]; + for to in to_fields_name.iter() { + match from_fields_name.iter().position(|k| k == to) { + Some(p) => v.push(p as i32), + None => v.push(-1), + }; + } + let name = v + .iter() + .map(|v| v.to_string()) + .collect::>() + .join(","); + Expr::ColumnRef { + span: None, + id: pos, + data_type: from_field.data_type().into(), + display_name: format!("#!{name}",), + } + } + ( + TableDataType::Tuple { + fields_name: from_fields_name, + fields_type: from_fields_type, + }, + TableDataType::Tuple { + fields_name: to_fields_name, + fields_type: to_fields_type, + }, + ) => project_tuple( expr, - &to_field.data_type().into(), - &BUILTIN_FUNCTIONS, - )? - } else { - return Err(ErrorCode::BadDataValueType(format!( - "fail to load file {}: Cannot cast column {} from {:?} to {:?}", - location, - field_name, - from_field.data_type(), - to_field.data_type() - ))); + from_field, + to_field, + &from_fields_name, + &from_fields_type, + &to_fields_name, + &to_fields_type, + )?, + (_, _) => { + return Err(ErrorCode::BadDataValueType(format!( + "fail to load file {}: Cannot cast column {} from {:?} to {:?}", + location, + field_name, + from_field.data_type(), + to_field.data_type() + ))); + } } + } else { + return Err(ErrorCode::BadDataValueType(format!( + "fail to load file {}: Cannot cast column {} from {:?} to {:?}", + location, + field_name, + from_field.data_type(), + to_field.data_type() + ))); } } 0 => { - match null_as { + match missing_as { // default NullAs::Error => { return Err(ErrorCode::BadDataValueType(format!( @@ -139,3 +210,85 @@ pub fn project_columnar( } Ok((output_projection, pushdown_columns)) } + +fn project_tuple( + expr: Expr, + from_field: &TableField, + to_field: &TableField, + from_fields_name: &[String], + from_fields_type: &[TableDataType], + to_fields_name: &[String], + to_fields_type: &[TableDataType], +) -> databend_common_exception::Result { + let mut inner_columns = Vec::with_capacity(to_fields_name.len()); + + for (to_field_name, to_field_type) in to_fields_name.iter().zip(to_fields_type.iter()) { + let inner_column = match from_fields_name.iter().position(|k| k == to_field_name) { + Some(idx) => { + let from_field_type = from_fields_type.get(idx).unwrap(); + let tuple_idx = Scalar::Number(NumberScalar::Int64((idx + 1) as i64)); + let inner_column = check_function( + None, + "get", + &[tuple_idx], + &[expr.clone()], + &BUILTIN_FUNCTIONS, + )?; + if from_field_type != to_field_type { + check_cast( + None, + false, + inner_column, + &to_field_type.into(), + &BUILTIN_FUNCTIONS, + )? + } else { + inner_column + } + } + None => { + // if inner field not exists, fill default value. + let data_type: DataType = to_field_type.into(); + let scalar = Scalar::default_value(&data_type); + Expr::Constant { + span: None, + scalar, + data_type, + } + } + }; + inner_columns.push(inner_column); + } + let tuple_column = check_function(None, "tuple", &[], &inner_columns, &BUILTIN_FUNCTIONS)?; + let tuple_column = if from_field.data_type != to_field.data_type { + let dest_ty: DataType = (&to_field.data_type).into(); + check_cast(None, false, tuple_column, &dest_ty, &BUILTIN_FUNCTIONS)? + } else { + tuple_column + }; + + if from_field.data_type.is_nullable() && to_field.data_type.is_nullable() { + // add if function to cast null value + let is_not_null = check_function( + None, + "is_not_null", + &[], + &[expr.clone()], + &BUILTIN_FUNCTIONS, + )?; + let null_scalar = Expr::Constant { + span: None, + scalar: Scalar::Null, + data_type: DataType::Null, + }; + check_function( + None, + "if", + &[], + &[is_not_null, tuple_column, null_scalar], + &BUILTIN_FUNCTIONS, + ) + } else { + Ok(tuple_column) + } +} diff --git a/src/query/storages/orc/src/copy_into_table/processors/decoder.rs b/src/query/storages/orc/src/copy_into_table/processors/decoder.rs index adf83c641e74c..b9e46e9113e01 100644 --- a/src/query/storages/orc/src/copy_into_table/processors/decoder.rs +++ b/src/query/storages/orc/src/copy_into_table/processors/decoder.rs @@ -22,13 +22,19 @@ use arrow_array::RecordBatch; use databend_common_catalog::query_kind::QueryKind; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; +use databend_common_expression::types::ArrayColumn; +use databend_common_expression::types::DataType; +use databend_common_expression::types::NullableColumn; use databend_common_expression::BlockEntry; use databend_common_expression::BlockMetaInfoDowncast; +use databend_common_expression::Column; +use databend_common_expression::ColumnBuilder; use databend_common_expression::DataBlock; use databend_common_expression::DataSchemaRef; use databend_common_expression::Evaluator; use databend_common_expression::Expr; use databend_common_expression::FunctionContext; +use databend_common_expression::Value; use databend_common_functions::BUILTIN_FUNCTIONS; use databend_common_pipeline_core::processors::Event; use databend_common_pipeline_core::processors::InputPort; @@ -96,6 +102,59 @@ impl StripeDecoderForCopy { let evaluator = Evaluator::new(&block, &self.func_ctx, &BUILTIN_FUNCTIONS); let mut columns = Vec::with_capacity(projection.len()); for (field, expr) in self.output_schema.fields().iter().zip(projection.iter()) { + if let Expr::ColumnRef { + display_name, id, .. + } = expr + { + if let Some(display_name) = display_name.strip_prefix("#!") { + let typs = match field.data_type() { + DataType::Nullable(box DataType::Array(box DataType::Nullable( + box DataType::Tuple(v), + ))) => v, + _ => { + log::error!("expect array of tuple, got {:?}", field); + unreachable!("expect value: array of tuple") + } + }; + let positions = display_name + .split(',') + .map(|s| s.parse::().unwrap()) + .collect::>(); + let mut e = block.columns()[*id].clone(); + match e.value { + Value::Column(Column::Nullable(box NullableColumn { + column: + Column::Array(box ArrayColumn { + values: + Column::Nullable(box NullableColumn { + column: Column::Tuple(ref mut v), + .. + }), + .. + }), + .. + })) => { + let len = v[0].len(); + let mut v2 = vec![]; + for (i, p) in positions.iter().enumerate() { + if *p < 0 { + v2.push(ColumnBuilder::repeat_default(&typs[i], len).build()); + } else { + v2.push(v[*p as usize].clone()); + } + } + *v = v2 + } + _ => { + log::error!("expect array of tuple, got {:?} {:?}", field, e.value); + unreachable!("expect value: array of tuple") + } + } + let column = BlockEntry::new(field.data_type().clone(), e.value); + columns.push(column); + continue; + } + } let value = evaluator.run(expr)?; let column = BlockEntry::new(field.data_type().clone(), value); columns.push(column); diff --git a/src/query/storages/orc/src/copy_into_table/projection.rs b/src/query/storages/orc/src/copy_into_table/projection.rs index 0ac410e09dcf0..caa9feeca5dca 100644 --- a/src/query/storages/orc/src/copy_into_table/projection.rs +++ b/src/query/storages/orc/src/copy_into_table/projection.rs @@ -19,6 +19,7 @@ use databend_common_expression::Expr; use databend_common_expression::RemoteExpr; use databend_common_expression::TableSchemaRef; use databend_common_meta_app::principal::NullAs; +use databend_common_meta_app::principal::StageFileFormatType; use databend_storages_common_stage::project_columnar; use crate::hashable_schema::HashableSchema; @@ -56,6 +57,7 @@ impl ProjectionFactory { &self.default_values, location, false, + StageFileFormatType::Orc, )? .0; self.projections.insert(schema.clone(), v.clone()); diff --git a/src/query/storages/parquet/src/parquet_rs/copy_into_table/reader.rs b/src/query/storages/parquet/src/parquet_rs/copy_into_table/reader.rs index 564882a26fcac..d5706c93e2d72 100644 --- a/src/query/storages/parquet/src/parquet_rs/copy_into_table/reader.rs +++ b/src/query/storages/parquet/src/parquet_rs/copy_into_table/reader.rs @@ -23,6 +23,7 @@ use databend_common_expression::Expr; use databend_common_expression::RemoteExpr; use databend_common_expression::TableSchemaRef; use databend_common_meta_app::principal::NullAs; +use databend_common_meta_app::principal::StageFileFormatType; use databend_common_storage::parquet_rs::infer_schema_with_extension; use databend_storages_common_stage::project_columnar; use opendal::Operator; @@ -88,6 +89,7 @@ impl RowGroupReaderForCopy { &default_values, location, case_sensitive, + StageFileFormatType::Parquet, )?; pushdown_columns.sort(); let mapping = pushdown_columns diff --git a/tests/sqllogictests/suites/base/05_ddl/05_0000_ddl_create_tables.test b/tests/sqllogictests/suites/base/05_ddl/05_0000_ddl_create_tables.test index 49b3ff94ca675..69383c6d6dd69 100644 --- a/tests/sqllogictests/suites/base/05_ddl/05_0000_ddl_create_tables.test +++ b/tests/sqllogictests/suites/base/05_ddl/05_0000_ddl_create_tables.test @@ -579,6 +579,12 @@ create table t(a int) cluster by (a+rand()) statement error 1081.*is not deterministic create table t(a string) cluster by (a+uuid()) +statement ok +create table tt(a tuple(x int, y int), b string, c int) cluster by (b); + +statement ok +insert into tt(b,c) values ('1',2); + ################################################## # table option `data_retention_period_in_hours` # ################################################## From 6850110dd6335dc611fa2adcdea358915e45c3a1 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 16 Jan 2025 11:16:44 +0800 Subject: [PATCH 20/33] chore: Bump opendal to include a fix for gcs (#17290) Signed-off-by: Xuanwo --- Cargo.lock | 6 ++---- Cargo.toml | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index edd9af42f8e99..f74eab595aa12 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10227,8 +10227,7 @@ dependencies = [ [[package]] name = "object_store_opendal" version = "0.49.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d47750d91b6d9fa245b3dff15d582dff8462d826cf6c2d5f2e91035d9a317f9" +source = "git+https://github.com/apache/opendal?rev=78b6a9f#78b6a9f26dbeb5118990d3140f1c636d8e54e719" dependencies = [ "async-trait", "bytes", @@ -10309,8 +10308,7 @@ dependencies = [ [[package]] name = "opendal" version = "0.51.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c9dcfa7a3615e3c60eb662ed6b46b6f244cf2658098f593c0c0915430b3a268" +source = "git+https://github.com/apache/opendal?rev=78b6a9f#78b6a9f26dbeb5118990d3140f1c636d8e54e719" dependencies = [ "anyhow", "async-backtrace", diff --git a/Cargo.toml b/Cargo.toml index 1ecee31663e6b..e09248212d74a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -369,10 +369,10 @@ num-derive = "0.3.3" num-traits = "0.2.19" num_cpus = "1.13.1" object = "0.36.5" -object_store_opendal = { version = "0.49.0" } +object_store_opendal = { version = "0.49.0", package = "object_store_opendal", git = "https://github.com/apache/opendal", rev = "78b6a9f" } once_cell = "1.15.0" openai_api_rust = "0.1" -opendal = { version = "0.51.1", features = [ +opendal = { version = "0.51.1", package = "opendal", git = "https://github.com/apache/opendal", rev = "78b6a9f", features = [ "layers-fastrace", "layers-prometheus-client", "layers-async-backtrace", From 9b38e65589ee816cf89216627c68e22ea5b5b729 Mon Sep 17 00:00:00 2001 From: Yang Xiufeng Date: Thu, 16 Jan 2025 14:05:48 +0800 Subject: [PATCH 21/33] fix: copy into table collect files twice some times. (#17300) * fix: copy into table collect files twice some times. * add log for infer schema --- src/query/sql/src/planner/binder/copy_into_table.rs | 2 ++ src/query/sql/src/planner/plans/copy_into_table.rs | 6 ++++++ .../storages/parquet/src/parquet_rs/parquet_table/table.rs | 5 ++++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/query/sql/src/planner/binder/copy_into_table.rs b/src/query/sql/src/planner/binder/copy_into_table.rs index c59d9baede093..fd1a0d6fb6655 100644 --- a/src/query/sql/src/planner/binder/copy_into_table.rs +++ b/src/query/sql/src/planner/binder/copy_into_table.rs @@ -223,6 +223,7 @@ impl Binder { write_mode: CopyIntoTableMode::Copy, query: None, enable_distributed: false, + files_collected: false, }) } @@ -400,6 +401,7 @@ impl Binder { enable_distributed: false, is_transform: false, + files_collected: true, }; self.bind_copy_into_table_from_location(bind_context, plan) diff --git a/src/query/sql/src/planner/plans/copy_into_table.rs b/src/query/sql/src/planner/plans/copy_into_table.rs index e5a43486e85ea..92325edab46c1 100644 --- a/src/query/sql/src/planner/plans/copy_into_table.rs +++ b/src/query/sql/src/planner/plans/copy_into_table.rs @@ -137,10 +137,16 @@ pub struct CopyIntoTablePlan { pub is_transform: bool, pub enable_distributed: bool, + + pub files_collected: bool, } impl CopyIntoTablePlan { pub async fn collect_files(&mut self, ctx: &dyn TableContext) -> Result<()> { + if self.files_collected { + return Ok(()); + } + self.files_collected = true; ctx.set_status_info("begin to list files"); let start = Instant::now(); diff --git a/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs b/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs index 9bbc66eb79860..fd0cae9405d7b 100644 --- a/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs +++ b/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs @@ -49,6 +49,7 @@ use databend_common_storage::parquet_rs::read_metadata_async; use databend_common_storage::StageFileInfo; use databend_common_storage::StageFilesInfo; use databend_storages_common_table_meta::table::ChangeType; +use log::info; use opendal::Operator; use parquet::file::metadata::ParquetMetaData; use parquet::schema::types::SchemaDescPtr; @@ -174,7 +175,9 @@ impl ParquetRSTable { // Infer schema from the first parquet file. // Assume all parquet files have the same schema. // If not, throw error during reading. - let size = operator.stat(path).await?.content_length(); + let stat = operator.stat(path).await?; + let size = stat.content_length(); + info!("infer schema from file {}, with stat {:?}", path, stat); let first_meta = read_metadata_async(path, &operator, Some(size)).await?; let arrow_schema = infer_schema_with_extension(first_meta.file_metadata())?; let compression_ratio = get_compression_ratio(&first_meta); From c89e4a322a5d262b307128df131891ac1cffc41b Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Thu, 16 Jan 2025 15:42:08 +0800 Subject: [PATCH 22/33] refactor(query): record state_offsets to reduce the memory leak of agg (#17285) * agg leak * fix(query): record state_offsets to reduce the memory leak of agg * fix(query): record state_offsets to reduce the memory leak of agg * fix(query): record state_offsets to reduce the memory leak of agg * fix(query): record state_offsets to reduce the memory leak of agg * fix(query): revert to StackHashSet --- .../src/dictionary_string_hashtable.rs | 10 ++-- .../hashtable/src/hashjoin_hashtable.rs | 4 +- .../src/hashjoin_string_hashtable.rs | 4 +- src/common/hashtable/src/hashtable.rs | 4 +- src/common/hashtable/src/lookup_hashtable.rs | 10 ++-- .../hashtable/src/short_string_hashtable.rs | 4 +- src/common/hashtable/src/stack_hashtable.rs | 4 +- src/common/hashtable/src/string_hashtable.rs | 4 +- src/query/expression/src/aggregate/payload.rs | 46 ++++++++++++++++--- .../aggregates/aggregate_distinct_state.rs | 7 +-- 10 files changed, 68 insertions(+), 29 deletions(-) diff --git a/src/common/hashtable/src/dictionary_string_hashtable.rs b/src/common/hashtable/src/dictionary_string_hashtable.rs index 5ae808a11b8f8..cfde4dc66c2b0 100644 --- a/src/common/hashtable/src/dictionary_string_hashtable.rs +++ b/src/common/hashtable/src/dictionary_string_hashtable.rs @@ -20,7 +20,7 @@ use std::ptr::NonNull; use std::sync::Arc; use bumpalo::Bump; -use databend_common_base::mem_allocator::MmapAllocator; +use databend_common_base::mem_allocator::DefaultAllocator; use crate::container::Container; use crate::container::HeapContainer; @@ -102,7 +102,7 @@ pub struct DictionaryStringHashTable { arena: Arc, dict_keys: usize, entries_len: usize, - pub(crate) entries: HeapContainer, MmapAllocator>, + pub(crate) entries: HeapContainer, DefaultAllocator>, pub dictionary_hashset: StringHashSet<[u8]>, } @@ -116,7 +116,7 @@ impl DictionaryStringHashTable { arena: bump.clone(), dict_keys, entries_len: 0, - entries: unsafe { HeapContainer::new_zeroed(256, MmapAllocator::default()) }, + entries: unsafe { HeapContainer::new_zeroed(256, DefaultAllocator {}) }, dictionary_hashset: StringHashSet::new(bump), } } @@ -452,7 +452,7 @@ impl HashtableLike for DictionaryStringHashTable { } } - self.entries = HeapContainer::new_zeroed(0, MmapAllocator::default()); + self.entries = HeapContainer::new_zeroed(0, DefaultAllocator {}); } self.dictionary_hashset.clear(); @@ -615,7 +615,7 @@ impl<'a, V> Iterator for DictionaryTableMutIter<'a, V> { } pub struct DictionarySlotIter<'a> { - empty: Option<&'a TableEmpty<(), MmapAllocator>>, + empty: Option<&'a TableEmpty<(), DefaultAllocator>>, entities_slice: &'a [Entry], i: usize, } diff --git a/src/common/hashtable/src/hashjoin_hashtable.rs b/src/common/hashtable/src/hashjoin_hashtable.rs index 12a4c1ac20c9c..0fec13dc8a4d2 100644 --- a/src/common/hashtable/src/hashjoin_hashtable.rs +++ b/src/common/hashtable/src/hashjoin_hashtable.rs @@ -17,7 +17,7 @@ use std::marker::PhantomData; use std::sync::atomic::AtomicU64; use std::sync::atomic::Ordering; -use databend_common_base::mem_allocator::MmapAllocator; +use databend_common_base::mem_allocator::DefaultAllocator; use databend_common_column::bitmap::Bitmap; use super::traits::HashJoinHashtableLike; @@ -101,7 +101,7 @@ pub fn hash_bits() -> u32 { } } -pub struct HashJoinHashTable { +pub struct HashJoinHashTable { pub(crate) pointers: Box<[u64], A>, pub(crate) atomic_pointers: *mut AtomicU64, pub(crate) hash_shift: usize, diff --git a/src/common/hashtable/src/hashjoin_string_hashtable.rs b/src/common/hashtable/src/hashjoin_string_hashtable.rs index 8fdc7aa13208f..2779bb08f9964 100644 --- a/src/common/hashtable/src/hashjoin_string_hashtable.rs +++ b/src/common/hashtable/src/hashjoin_string_hashtable.rs @@ -16,7 +16,7 @@ use std::alloc::Allocator; use std::sync::atomic::AtomicU64; use std::sync::atomic::Ordering; -use databend_common_base::mem_allocator::MmapAllocator; +use databend_common_base::mem_allocator::DefaultAllocator; use databend_common_column::bitmap::Bitmap; use super::traits::HashJoinHashtableLike; @@ -37,7 +37,7 @@ pub struct StringRawEntry { pub next: u64, } -pub struct HashJoinStringHashTable { +pub struct HashJoinStringHashTable { pub(crate) pointers: Box<[u64], A>, pub(crate) atomic_pointers: *mut AtomicU64, pub(crate) hash_shift: usize, diff --git a/src/common/hashtable/src/hashtable.rs b/src/common/hashtable/src/hashtable.rs index 3f5f3055bbf63..6591217a2447c 100644 --- a/src/common/hashtable/src/hashtable.rs +++ b/src/common/hashtable/src/hashtable.rs @@ -17,7 +17,7 @@ use std::intrinsics::unlikely; use std::iter::TrustedLen; use std::mem::MaybeUninit; -use databend_common_base::mem_allocator::MmapAllocator; +use databend_common_base::mem_allocator::DefaultAllocator; use super::container::HeapContainer; use super::table0::Entry; @@ -29,7 +29,7 @@ use super::traits::Keyable; use super::utils::ZeroEntry; use crate::FastHash; -pub struct Hashtable +pub struct Hashtable where K: Keyable, A: Allocator + Clone, diff --git a/src/common/hashtable/src/lookup_hashtable.rs b/src/common/hashtable/src/lookup_hashtable.rs index bcaf6f44ded4a..19be8634c7983 100644 --- a/src/common/hashtable/src/lookup_hashtable.rs +++ b/src/common/hashtable/src/lookup_hashtable.rs @@ -17,13 +17,17 @@ use std::iter::TrustedLen; use std::mem; use std::mem::MaybeUninit; -use databend_common_base::mem_allocator::MmapAllocator; +use databend_common_base::mem_allocator::DefaultAllocator; use crate::table0::Entry; use crate::HashtableLike; -pub struct LookupHashtable -{ +pub struct LookupHashtable< + K: Sized, + const CAPACITY: usize, + V, + A: Allocator + Clone = DefaultAllocator, +> { flags: Box<[bool; CAPACITY], A>, data: Box<[Entry; CAPACITY], A>, len: usize, diff --git a/src/common/hashtable/src/short_string_hashtable.rs b/src/common/hashtable/src/short_string_hashtable.rs index 9dacd1d424d7a..ab2b95b756c8c 100644 --- a/src/common/hashtable/src/short_string_hashtable.rs +++ b/src/common/hashtable/src/short_string_hashtable.rs @@ -21,7 +21,7 @@ use std::ptr::NonNull; use std::sync::Arc; use bumpalo::Bump; -use databend_common_base::mem_allocator::MmapAllocator; +use databend_common_base::mem_allocator::DefaultAllocator; use super::container::HeapContainer; use super::table0::Entry; @@ -39,7 +39,7 @@ use crate::table_empty::TableEmpty; use crate::table_empty::TableEmptyIter; use crate::table_empty::TableEmptyIterMut; -pub struct ShortStringHashtable +pub struct ShortStringHashtable where K: UnsizedKeyable + ?Sized, A: Allocator + Clone, diff --git a/src/common/hashtable/src/stack_hashtable.rs b/src/common/hashtable/src/stack_hashtable.rs index 36dc1d070ae25..5be8ebbb649b0 100644 --- a/src/common/hashtable/src/stack_hashtable.rs +++ b/src/common/hashtable/src/stack_hashtable.rs @@ -16,7 +16,7 @@ use std::alloc::Allocator; use std::intrinsics::unlikely; use std::mem::MaybeUninit; -use databend_common_base::mem_allocator::MmapAllocator; +use databend_common_base::mem_allocator::DefaultAllocator; use super::container::StackContainer; use super::table0::Entry; @@ -25,7 +25,7 @@ use super::table0::Table0Iter; use super::traits::Keyable; use super::utils::ZeroEntry; -pub struct StackHashtable +pub struct StackHashtable where K: Keyable, A: Allocator + Clone, diff --git a/src/common/hashtable/src/string_hashtable.rs b/src/common/hashtable/src/string_hashtable.rs index 076c700bfbb32..83a86106f500c 100644 --- a/src/common/hashtable/src/string_hashtable.rs +++ b/src/common/hashtable/src/string_hashtable.rs @@ -19,7 +19,7 @@ use std::mem::MaybeUninit; use std::sync::Arc; use bumpalo::Bump; -use databend_common_base::mem_allocator::MmapAllocator; +use databend_common_base::mem_allocator::DefaultAllocator; use super::container::HeapContainer; use super::table0::Entry; @@ -38,7 +38,7 @@ use crate::table_empty::TableEmptyIterMut; /// Simple unsized hashtable is used for storing unsized keys in arena. It can be worked with HashMethodSerializer. /// Different from `ShortStringHashTable`, it doesn't use adaptive sub hashtable to store key values via key size. /// It can be considered as a minimal hashtable implementation of ShortStringHashTable -pub struct StringHashtable +pub struct StringHashtable where K: UnsizedKeyable + ?Sized, A: Allocator + Clone, diff --git a/src/query/expression/src/aggregate/payload.rs b/src/query/expression/src/aggregate/payload.rs index f08e61280f5cc..ad76f6dfe5927 100644 --- a/src/query/expression/src/aggregate/payload.rs +++ b/src/query/expression/src/aggregate/payload.rs @@ -19,6 +19,7 @@ use std::sync::Arc; use bumpalo::Bump; use databend_common_base::runtime::drop_guard; use itertools::Itertools; +use log::info; use strength_reduce::StrengthReducedU64; use super::payload_row::rowformat_size; @@ -78,10 +79,18 @@ unsafe impl Sync for Payload {} pub struct Page { pub(crate) data: Vec>, pub(crate) rows: usize, - pub(crate) state_rows: usize, + // state_offset = state_rows * agg_len + // which mark that the offset to clean the agg states + pub(crate) state_offsets: usize, pub(crate) capacity: usize, } +impl Page { + pub fn is_partial_state(&self, agg_len: usize) -> bool { + self.rows * agg_len != self.state_offsets + } +} + pub type Pages = Vec; // TODO FIXME @@ -179,7 +188,7 @@ impl Payload { self.pages.push(Page { data, rows: 0, - state_rows: 0, + state_offsets: 0, capacity: self.row_per_page, }); } @@ -300,10 +309,18 @@ impl Payload { } let place = StateAddr::from(place); + let page = &mut self.pages[page_index[idx]]; for (aggr, offset) in self.aggrs.iter().zip(self.state_addr_offsets.iter()) { aggr.init_state(place.next(*offset)); + page.state_offsets += 1; + } + } + + #[cfg(debug_assertions)] + { + for page in self.pages.iter() { + assert_eq!(page.rows * self.aggrs.len(), page.state_offsets); } - self.pages[page_index[idx]].state_rows += 1; } } @@ -338,6 +355,7 @@ impl Payload { address: &[*const u8], ) { let tuple_size = self.tuple_size; + let agg_len = self.aggrs.len(); let (mut page, _) = self.writable_page(); for i in 0..row_count { let index = select_vector[i]; @@ -350,7 +368,7 @@ impl Payload { ) } page.rows += 1; - page.state_rows += 1; + page.state_offsets += agg_len; if page.rows == page.capacity { (page, _) = self.writable_page(); @@ -421,10 +439,26 @@ impl Drop for Payload { drop_guard(move || { // drop states if !self.state_move_out { - for (aggr, addr_offset) in self.aggrs.iter().zip(self.state_addr_offsets.iter()) { + 'FOR: for (idx, (aggr, addr_offset)) in self + .aggrs + .iter() + .zip(self.state_addr_offsets.iter()) + .enumerate() + { if aggr.need_manual_drop_state() { for page in self.pages.iter() { - for row in 0..page.state_rows { + let is_partial_state = page.is_partial_state(self.aggrs.len()); + + if is_partial_state && idx == 0 { + info!("Cleaning partial page, state_offsets: {}, row: {}, agg length: {}", page.state_offsets, page.rows, self.aggrs.len()); + } + for row in 0..page.state_offsets.div_ceil(self.aggrs.len()) { + // When OOM, some states are not initialized, we don't need to destroy them + if is_partial_state + && row * self.aggrs.len() + idx >= page.state_offsets + { + continue 'FOR; + } let ptr = self.data_ptr(page, row); unsafe { let state_addr = diff --git a/src/query/functions/src/aggregates/aggregate_distinct_state.rs b/src/query/functions/src/aggregates/aggregate_distinct_state.rs index 2c825d2bcc258..f15b4200749e8 100644 --- a/src/query/functions/src/aggregates/aggregate_distinct_state.rs +++ b/src/query/functions/src/aggregates/aggregate_distinct_state.rs @@ -41,6 +41,7 @@ use databend_common_hashtable::HashSet as CommonHashSet; use databend_common_hashtable::HashtableKeyable; use databend_common_hashtable::HashtableLike; use databend_common_hashtable::ShortStringHashSet; +use databend_common_hashtable::StackHashSet; use databend_common_io::prelude::*; use siphasher::sip128::Hasher128; use siphasher::sip128::SipHasher24; @@ -318,13 +319,13 @@ where T: Number + BorshSerialize + BorshDeserialize + HashtableKeyable // For count(distinct string) and uniq(string) pub struct AggregateUniqStringState { - set: CommonHashSet, + set: StackHashSet, } impl DistinctStateFunc for AggregateUniqStringState { fn new() -> Self { AggregateUniqStringState { - set: CommonHashSet::new(), + set: StackHashSet::new(), } } @@ -338,7 +339,7 @@ impl DistinctStateFunc for AggregateUniqStringState { fn deserialize(reader: &mut &[u8]) -> Result { let size = reader.read_uvarint()?; - let mut set = CommonHashSet::with_capacity(size as usize); + let mut set = StackHashSet::with_capacity(size as usize); for _ in 0..size { let e = borsh_deserialize_state(reader)?; let _ = set.set_insert(e).is_ok(); From 94c96f91331d77195fb9548326f51d122a257cc5 Mon Sep 17 00:00:00 2001 From: Sky Fan <3374614481@qq.com> Date: Thu, 16 Jan 2025 19:20:51 +0800 Subject: [PATCH 23/33] chore: make error message of conflicting transactions more user-friendly (#17307) --- src/query/catalog/src/catalog/interface.rs | 26 +++++++++++++--------- src/query/sql/src/planner/binder/binder.rs | 10 +++++---- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/query/catalog/src/catalog/interface.rs b/src/query/catalog/src/catalog/interface.rs index ea36dde14df0d..b5d4ca067dc1e 100644 --- a/src/query/catalog/src/catalog/interface.rs +++ b/src/query/catalog/src/catalog/interface.rs @@ -110,6 +110,7 @@ use databend_common_meta_types::SeqV; use databend_storages_common_session::SessionState; use databend_storages_common_table_meta::table::OPT_KEY_TEMP_PREFIX; use dyn_clone::DynClone; +use log::info; use crate::database::Database; use crate::table::Table; @@ -388,16 +389,21 @@ pub trait Catalog: DynClone + Send + Sync + Debug { &self, req: UpdateMultiTableMetaReq, ) -> Result { - self.retryable_update_multi_table_meta(req) - .await? - .map_err(|e| { - ErrorCode::TableVersionMismatched(format!( - "Fail to update table metas, conflict tables: {:?}", - e.iter() - .map(|(tid, seq, meta)| (tid, seq, &meta.engine)) - .collect::>() - )) - }) + let result = self.retryable_update_multi_table_meta(req).await?; + match result { + Ok(reply) => Ok(reply), + Err(failed_tables) => { + let err_msg = format!( + "Due to concurrent transactions, transaction commit failed. Conflicting table IDs: {:?}", + failed_tables.iter().map(|(tid, _, _)| tid).collect::>() + ); + info!( + "Due to concurrent transactions, transaction commit failed. Conflicting tables: {:?}", + failed_tables + ); + Err(ErrorCode::TableVersionMismatched(err_msg)) + } + } } // update stream metas, currently used by "copy into location form stream" diff --git a/src/query/sql/src/planner/binder/binder.rs b/src/query/sql/src/planner/binder/binder.rs index ede8956c0f3fd..a995023cceb07 100644 --- a/src/query/sql/src/planner/binder/binder.rs +++ b/src/query/sql/src/planner/binder/binder.rs @@ -1059,10 +1059,12 @@ pub async fn execute_commit_statement(ctx: Arc) -> Result<()> } Err(e) => { let err_msg = format!( - "COMMIT: Table versions mismatched in multi statement transaction, conflict tables: {:?}", - e.iter() - .map(|(tid, seq, meta)| (tid, seq, &meta.engine)) - .collect::>() + "Due to concurrent transactions, explicit transaction commit failed. Conflicting table IDs: {:?}", + e.iter().map(|(tid, _, _)| tid).collect::>() + ); + info!( + "Due to concurrent transactions, explicit transaction commit failed. Conflicting table IDs: {:?}", + e ); return Err(ErrorCode::TableVersionMismatched(err_msg)); } From a37a79018d006a9acdfbd8b1f7258776908d421a Mon Sep 17 00:00:00 2001 From: Liuqing Yue Date: Fri, 17 Jan 2025 09:12:09 +0800 Subject: [PATCH 24/33] chore: add enable_prune_cache setting (#17308) * chore: add enable_prune_cache setting * chore: add enable_prune_cache setting --- src/query/settings/src/settings_default.rs | 8 +++ .../settings/src/settings_getter_setter.rs | 4 ++ .../fuse/src/operations/read_partitions.rs | 69 ++++++++++--------- .../pruning_pipeline/send_part_info_sink.rs | 8 ++- 4 files changed, 56 insertions(+), 33 deletions(-) diff --git a/src/query/settings/src/settings_default.rs b/src/query/settings/src/settings_default.rs index ea88aa5a1506a..aa3d2da50b073 100644 --- a/src/query/settings/src/settings_default.rs +++ b/src/query/settings/src/settings_default.rs @@ -1172,6 +1172,14 @@ impl DefaultSettings { scope: SettingScope::Both, range: Some(SettingRange::Numeric(1..=u64::MAX)), }), + ("enable_prune_cache", DefaultSettingValue { + value: UserSettingValue::UInt64(1), + desc: "Enable to cache the pruning result", + mode: SettingMode::Both, + scope: SettingScope::Both, + range: Some(SettingRange::Numeric(0..=1)), + }), + ]); Ok(Arc::new(DefaultSettings { diff --git a/src/query/settings/src/settings_getter_setter.rs b/src/query/settings/src/settings_getter_setter.rs index 9b9608fcc79b3..a7524758608c3 100644 --- a/src/query/settings/src/settings_getter_setter.rs +++ b/src/query/settings/src/settings_getter_setter.rs @@ -829,6 +829,10 @@ impl Settings { Ok(self.try_get_u64("enable_prune_pipeline")? == 1) } + pub fn get_enable_prune_cache(&self) -> Result { + Ok(self.try_get_u64("enable_prune_cache")? == 1) + } + pub fn get_enable_distributed_pruning(&self) -> Result { Ok(self.try_get_u64("enable_distributed_pruning")? == 1) } diff --git a/src/query/storages/fuse/src/operations/read_partitions.rs b/src/query/storages/fuse/src/operations/read_partitions.rs index e0a6d017d65e5..aeaeb7fac99d1 100644 --- a/src/query/storages/fuse/src/operations/read_partitions.rs +++ b/src/query/storages/fuse/src/operations/read_partitions.rs @@ -206,34 +206,37 @@ impl FuseTable { ) }); - if let Some((_stat, part)) = Self::check_prune_cache(&derterministic_cache_key) { - let sender = part_info_tx.clone(); - info!("prune pipeline: get prune result from cache"); - source_pipeline.set_on_init(move || { - // We cannot use the runtime associated with the query to avoid increasing its lifetime. - GlobalIORuntime::instance().spawn(async move { - // avoid block global io runtime - let runtime = Runtime::with_worker_threads(2, Some("send-parts".to_string()))?; - - let join_handler = runtime.spawn(async move { - for part in part.partitions { - // the sql may be killed or early stop, ignore the error - if let Err(_e) = sender.send(Ok(part)).await { - break; + if ctx.get_settings().get_enable_prune_cache()? { + if let Some((_stat, part)) = Self::check_prune_cache(&derterministic_cache_key) { + let sender = part_info_tx.clone(); + info!("prune pipeline: get prune result from cache"); + source_pipeline.set_on_init(move || { + // We cannot use the runtime associated with the query to avoid increasing its lifetime. + GlobalIORuntime::instance().spawn(async move { + // avoid block global io runtime + let runtime = + Runtime::with_worker_threads(2, Some("send-parts".to_string()))?; + + let join_handler = runtime.spawn(async move { + for part in part.partitions { + // the sql may be killed or early stop, ignore the error + if let Err(_e) = sender.send(Ok(part)).await { + break; + } } + }); + + if let Err(cause) = join_handler.await { + log::warn!("Join error while in prune pipeline, cause: {:?}", cause); } - }); - if let Err(cause) = join_handler.await { - log::warn!("Join error while in prune pipeline, cause: {:?}", cause); - } + Result::Ok(()) + }); - Result::Ok(()) + Ok(()) }); - - Ok(()) - }); - return Ok(None); + return Ok(None); + } } let mut prune_pipeline = Pipeline::create(); @@ -426,7 +429,7 @@ impl FuseTable { .as_ref() .filter(|p| p.order_by.is_empty() && p.filters.is_none()) .and_then(|p| p.limit); - + let enable_prune_cache = ctx.get_settings().get_enable_prune_cache()?; let send_part_state = Arc::new(SendPartState::create( derterministic_cache_key, limit, @@ -441,16 +444,20 @@ impl FuseTable { top_k.clone(), pruner.table_schema.clone(), send_part_state.clone(), + enable_prune_cache, ) })?; - prune_pipeline.set_on_finished(move |info: &ExecutionInfo| { - if let Ok(()) = info.res { - // only populating cache when the pipeline is finished successfully - send_part_state.populating_cache(); - } - Ok(()) - }); + if enable_prune_cache { + info!("prune pipeline: enable prune cache"); + prune_pipeline.set_on_finished(move |info: &ExecutionInfo| { + if let Ok(()) = info.res { + // only populating cache when the pipeline is finished successfully + send_part_state.populating_cache(); + } + Ok(()) + }); + } Ok(()) } diff --git a/src/query/storages/fuse/src/pruning_pipeline/send_part_info_sink.rs b/src/query/storages/fuse/src/pruning_pipeline/send_part_info_sink.rs index c0a97d821dabf..dfbaa569d1c00 100644 --- a/src/query/storages/fuse/src/pruning_pipeline/send_part_info_sink.rs +++ b/src/query/storages/fuse/src/pruning_pipeline/send_part_info_sink.rs @@ -125,6 +125,7 @@ pub struct SendPartInfoSink { partitions: Partitions, statistics: PartStatistics, send_part_state: Arc, + enable_cache: bool, } impl SendPartInfoSink { @@ -135,6 +136,7 @@ impl SendPartInfoSink { top_k: Option<(TopK, Scalar)>, schema: TableSchemaRef, send_part_state: Arc, + enable_cache: bool, ) -> Result { let partitions = Partitions::default(); let statistics = PartStatistics::default(); @@ -148,6 +150,7 @@ impl SendPartInfoSink { partitions, statistics, send_part_state, + enable_cache, }, ))) } @@ -184,8 +187,9 @@ impl AsyncSink for SendPartInfoSink { ), }, }; - - self.partitions.partitions.extend(info_ptr.clone()); + if self.enable_cache { + self.partitions.partitions.extend(info_ptr.clone()); + } for info in info_ptr { if let Some(sender) = &self.sender { From 31c243bbfb5c049d5f31836ed44a6a087f19a50c Mon Sep 17 00:00:00 2001 From: Yang Xiufeng Date: Fri, 17 Jan 2025 09:18:02 +0800 Subject: [PATCH 25/33] fix: set query_kind earlier to ensure it takes effect. (#17302) maybe related to the performance regression reported in issue 17284. --- src/query/sql/src/planner/planner.rs | 13 ++++++++----- .../src/parquet_rs/parquet_table/table.rs | 16 ++++++++++++---- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/query/sql/src/planner/planner.rs b/src/query/sql/src/planner/planner.rs index 62126bca834b7..5eeb3c9455b35 100644 --- a/src/query/sql/src/planner/planner.rs +++ b/src/query/sql/src/planner/planner.rs @@ -223,6 +223,7 @@ impl Planner { #[fastrace::trace] pub async fn plan_stmt(&mut self, stmt: &Statement, attach_query: bool) -> Result { let start = Instant::now(); + let query_kind = get_query_kind(stmt); let settings = self.ctx.get_settings(); // Step 3: Bind AST with catalog, and generate a pure logical SExpr let name_resolution_ctx = NameResolutionContext::try_from(settings.as_ref())?; @@ -243,8 +244,7 @@ impl Planner { info!("logical plan from cache, time used: {:?}", start.elapsed()); if attach_query { // update for clickhouse handler - self.ctx - .attach_query_str(get_query_kind(stmt), stmt.to_mask_sql()); + self.ctx.attach_query_str(query_kind, stmt.to_mask_sql()); } return Ok(plan.plan); } @@ -260,11 +260,14 @@ impl Planner { ) .with_subquery_executor(self.query_executor.clone()); - // Indicate binder there is no need to collect column statistics for the binding table. + // must attach before bind, because ParquetRSTable::create used it. + if attach_query { + self.ctx.attach_query_str(query_kind, stmt.to_mask_sql()); + } let plan = binder.bind(stmt).await?; + // attach again to avoid the query kind is overwritten by the subquery if attach_query { - self.ctx - .attach_query_str(get_query_kind(stmt), stmt.to_mask_sql()); + self.ctx.attach_query_str(query_kind, stmt.to_mask_sql()); } // Step 4: Optimize the SExpr with optimizers, and generate optimized physical SExpr diff --git a/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs b/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs index fd0cae9405d7b..f06a765f02b22 100644 --- a/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs +++ b/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs @@ -34,6 +34,7 @@ use databend_common_catalog::table::DummyColumnStatisticsProvider; use databend_common_catalog::table::Table; use databend_common_catalog::table::TableStatistics; use databend_common_catalog::table_context::TableContext; +use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::TableField; use databend_common_expression::TableSchema; @@ -141,10 +142,17 @@ impl ParquetRSTable { // If the query is `COPY`, we don't need to collect column statistics. // It's because the only transform could be contained in `COPY` command is projection. - let need_stats_provider = !matches!( - query_kind, - QueryKind::CopyIntoTable | QueryKind::CopyIntoLocation - ); + let need_stats_provider = match query_kind { + QueryKind::CopyIntoTable | QueryKind::CopyIntoLocation => true, + QueryKind::Unknown => { + // add this branch to ensure query_kind is set + return Err(ErrorCode::Internal( + "Unexpected QueryKind::Unknown: query_kind was not properly set before calling ParquetRSTable::create.", + )); + } + _ => false, + }; + let max_threads = settings.get_max_threads()? as usize; let max_memory_usage = settings.get_max_memory_usage()?; From e6e3aeccc523650e996f72525bde6720768fd9d8 Mon Sep 17 00:00:00 2001 From: Yang Xiufeng Date: Fri, 17 Jan 2025 09:26:55 +0800 Subject: [PATCH 26/33] fix: use full path as cache key for parquet meta. (#17313) --- .../storages/parquet/src/parquet_rs/meta.rs | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/query/storages/parquet/src/parquet_rs/meta.rs b/src/query/storages/parquet/src/parquet_rs/meta.rs index 52e575291fe99..c57b7283ef81a 100644 --- a/src/query/storages/parquet/src/parquet_rs/meta.rs +++ b/src/query/storages/parquet/src/parquet_rs/meta.rs @@ -37,9 +37,11 @@ pub async fn read_metadata_async_cached( operator: &Operator, file_size: Option, ) -> Result> { - let reader = MetaReader::meta_data_reader(operator.clone()); + let info = operator.info(); + let location = format!("{}/{}/{}", info.name(), info.root(), path); + let reader = MetaReader::meta_data_reader(operator.clone(), location.len() - path.len()); let load_params = LoadParams { - location: path.to_owned(), + location, len_hint: file_size, ver: 0, put_cache: true, @@ -246,15 +248,15 @@ fn check_memory_usage(max_memory_usage: u64) -> Result<()> { Ok(()) } -pub struct LoaderWrapper(T); +pub struct LoaderWrapper(T, usize); pub type ParquetMetaReader = InMemoryItemCacheReader>; pub struct MetaReader; impl MetaReader { - pub fn meta_data_reader(dal: Operator) -> ParquetMetaReader { + pub fn meta_data_reader(dal: Operator, prefix_len: usize) -> ParquetMetaReader { ParquetMetaReader::new( CacheManager::instance().get_parquet_meta_data_cache(), - LoaderWrapper(dal), + LoaderWrapper(dal, prefix_len), ) } } @@ -263,15 +265,12 @@ impl MetaReader { impl Loader for LoaderWrapper { #[async_backtrace::framed] async fn load(&self, params: &LoadParams) -> Result { + let location = ¶ms.location[self.1..]; let size = match params.len_hint { Some(v) => v, - None => self.0.stat(¶ms.location).await?.content_length(), + None => self.0.stat(location).await?.content_length(), }; - databend_common_storage::parquet_rs::read_metadata_async( - ¶ms.location, - &self.0, - Some(size), - ) - .await + databend_common_storage::parquet_rs::read_metadata_async(location, &self.0, Some(size)) + .await } } From 37d96b2fd3a0dae957b68fd24a21b5dce9194f20 Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Fri, 17 Jan 2025 14:57:31 +0800 Subject: [PATCH 27/33] chore(query): improve fuse row fetch, use accumulating (#17309) * fix(query): improve fuse row fetch, use accumulating * fix(query): improve fuse row fetch, use accumulating * fix(query): improve fuse row fetch, use accumulating * fix(query): add extra log --- .../transform_accumulating_async.rs | 16 +++++++ .../src/operations/read/fuse_rows_fetcher.rs | 47 ++++++++++++------- .../operations/read/native_rows_fetcher.rs | 5 -- .../operations/read/parquet_rows_fetcher.rs | 5 -- 4 files changed, 46 insertions(+), 27 deletions(-) diff --git a/src/query/pipeline/transforms/src/processors/transforms/transform_accumulating_async.rs b/src/query/pipeline/transforms/src/processors/transforms/transform_accumulating_async.rs index d4e4455595b9e..ad8c53fdb3f62 100644 --- a/src/query/pipeline/transforms/src/processors/transforms/transform_accumulating_async.rs +++ b/src/query/pipeline/transforms/src/processors/transforms/transform_accumulating_async.rs @@ -26,6 +26,10 @@ use databend_common_pipeline_core::processors::Processor; pub trait AsyncAccumulatingTransform: Send { const NAME: &'static str; + async fn on_start(&mut self) -> Result<()> { + Ok(()) + } + async fn transform(&mut self, data: DataBlock) -> Result>; async fn on_finish(&mut self, _output: bool) -> Result> { @@ -38,6 +42,7 @@ pub struct AsyncAccumulatingTransformer input: Arc, output: Arc, + called_on_start: bool, called_on_finish: bool, input_data: Option, output_data: Option, @@ -51,6 +56,7 @@ impl AsyncAccumulatingTransformer { output, input_data: None, output_data: None, + called_on_start: false, called_on_finish: false, }) } @@ -67,6 +73,10 @@ impl Processor for AsyncAccumulatingTra } fn event(&mut self) -> Result { + if !self.called_on_start { + return Ok(Event::Async); + } + if self.output.is_finished() { if !self.called_on_finish { return Ok(Event::Async); @@ -111,6 +121,12 @@ impl Processor for AsyncAccumulatingTra #[async_backtrace::framed] async fn async_process(&mut self) -> Result<()> { + if !self.called_on_start { + self.called_on_start = true; + self.inner.on_start().await?; + return Ok(()); + } + if let Some(data_block) = self.input_data.take() { self.output_data = self.inner.transform(data_block).await?; return Ok(()); diff --git a/src/query/storages/fuse/src/operations/read/fuse_rows_fetcher.rs b/src/query/storages/fuse/src/operations/read/fuse_rows_fetcher.rs index 97b32725f5284..e0ec3424b6161 100644 --- a/src/query/storages/fuse/src/operations/read/fuse_rows_fetcher.rs +++ b/src/query/storages/fuse/src/operations/read/fuse_rows_fetcher.rs @@ -25,15 +25,13 @@ use databend_common_expression::types::DataType; use databend_common_expression::types::NumberDataType; use databend_common_expression::BlockEntry; use databend_common_expression::Column; -use databend_common_expression::ColumnBuilder; use databend_common_expression::DataBlock; -use databend_common_expression::DataSchema; use databend_common_expression::Value; use databend_common_pipeline_core::processors::InputPort; use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::processors::ProcessorPtr; -use databend_common_pipeline_transforms::processors::AsyncTransform; -use databend_common_pipeline_transforms::processors::AsyncTransformer; +use databend_common_pipeline_transforms::AsyncAccumulatingTransform; +use databend_common_pipeline_transforms::AsyncAccumulatingTransformer; use databend_storages_common_io::ReadSettings; use super::native_rows_fetcher::NativeRowsFetcher; @@ -132,17 +130,17 @@ pub fn row_fetch_processor( pub trait RowsFetcher { async fn on_start(&mut self) -> Result<()>; async fn fetch(&mut self, row_ids: &[u64]) -> Result; - fn schema(&self) -> DataSchema; } pub struct TransformRowsFetcher { row_id_col_offset: usize, fetcher: F, need_wrap_nullable: bool, + blocks: Vec, } #[async_trait::async_trait] -impl AsyncTransform for TransformRowsFetcher +impl AsyncAccumulatingTransform for TransformRowsFetcher where F: RowsFetcher + Send + Sync + 'static { const NAME: &'static str = "TransformRowsFetcher"; @@ -152,18 +150,25 @@ where F: RowsFetcher + Send + Sync + 'static self.fetcher.on_start().await } + async fn transform(&mut self, data: DataBlock) -> Result> { + self.blocks.push(data); + Ok(None) + } + #[async_backtrace::framed] - async fn transform(&mut self, mut data: DataBlock) -> Result { + async fn on_finish(&mut self, _output: bool) -> Result> { + if self.blocks.is_empty() { + return Ok(None); + } + + let start_time = std::time::Instant::now(); + let num_blocks = self.blocks.len(); + let mut data = DataBlock::concat(&self.blocks)?; + self.blocks.clear(); + let num_rows = data.num_rows(); if num_rows == 0 { - // Although the data block is empty, we need to add empty columns to align the schema. - let fetched_schema = self.fetcher.schema(); - for f in fetched_schema.fields().iter() { - let builder = ColumnBuilder::with_capacity(f.data_type(), 0); - let col = builder.build(); - data.add_column(BlockEntry::new(f.data_type().clone(), Value::Column(col))); - } - return Ok(data); + return Ok(None); } let entry = &data.columns()[self.row_id_col_offset]; @@ -189,7 +194,14 @@ where F: RowsFetcher + Send + Sync + 'static } } - Ok(data) + log::info!( + "TransformRowsFetcher on_finish: num_rows: {}, input blocks: {} in {} milliseconds", + num_rows, + num_blocks, + start_time.elapsed().as_millis() + ); + + Ok(Some(data)) } } @@ -203,10 +215,11 @@ where F: RowsFetcher + Send + Sync + 'static fetcher: F, need_wrap_nullable: bool, ) -> ProcessorPtr { - ProcessorPtr::create(AsyncTransformer::create(input, output, Self { + ProcessorPtr::create(AsyncAccumulatingTransformer::create(input, output, Self { row_id_col_offset, fetcher, need_wrap_nullable, + blocks: vec![], })) } } diff --git a/src/query/storages/fuse/src/operations/read/native_rows_fetcher.rs b/src/query/storages/fuse/src/operations/read/native_rows_fetcher.rs index b7e1b16a0e3b4..ca45afc996279 100644 --- a/src/query/storages/fuse/src/operations/read/native_rows_fetcher.rs +++ b/src/query/storages/fuse/src/operations/read/native_rows_fetcher.rs @@ -26,7 +26,6 @@ use databend_common_catalog::plan::Projection; use databend_common_catalog::table::Table; use databend_common_exception::Result; use databend_common_expression::DataBlock; -use databend_common_expression::DataSchema; use databend_common_expression::TableSchemaRef; use databend_common_storage::ColumnNodes; use databend_storages_common_cache::LoadParams; @@ -148,10 +147,6 @@ impl RowsFetcher for NativeRowsFetcher { Ok(DataBlock::take_blocks(&blocks, &indices, num_rows)) } - - fn schema(&self) -> DataSchema { - self.reader.data_schema() - } } impl NativeRowsFetcher { diff --git a/src/query/storages/fuse/src/operations/read/parquet_rows_fetcher.rs b/src/query/storages/fuse/src/operations/read/parquet_rows_fetcher.rs index 260fe64a36dae..35b8a47ed9aaf 100644 --- a/src/query/storages/fuse/src/operations/read/parquet_rows_fetcher.rs +++ b/src/query/storages/fuse/src/operations/read/parquet_rows_fetcher.rs @@ -26,7 +26,6 @@ use databend_common_catalog::table::Table; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::DataBlock; -use databend_common_expression::DataSchema; use databend_common_expression::TableSchemaRef; use databend_common_storage::ColumnNodes; use databend_storages_common_cache::LoadParams; @@ -158,10 +157,6 @@ impl RowsFetcher for ParquetRowsFetcher { Ok(DataBlock::take_blocks(&blocks, &indices, num_rows)) } - - fn schema(&self) -> DataSchema { - self.reader.data_schema() - } } impl ParquetRowsFetcher { From 248ec52b75179c4d6a3350fd031a49ad961d762c Mon Sep 17 00:00:00 2001 From: coldWater Date: Fri, 17 Jan 2025 20:38:25 +0800 Subject: [PATCH 28/33] refactor(query): introduce udf runtime pool (#17304) * udf runtime pool * fix * fix * fix * test --- .../src/pipelines/builders/builder_udf.rs | 9 +- .../processors/transforms/aggregator/mod.rs | 1 + .../transforms/aggregator/udaf_script.rs | 172 +++++----- .../pipelines/processors/transforms/mod.rs | 14 +- .../processors/transforms/runtime_pool.rs | 54 +++ .../transforms/transform_udf_script.rs | 322 +++++++++--------- .../20_0023_udf_script_parallelism.result | 3 + .../20_0023_udf_script_parallelism.sh | 19 ++ ...able.result => 07_0000_amend_table.result} | 0 ..._amend_table.sh => 07_0000_amend_table.sh} | 0 .../08_udf_python/08_0000_parallelism.result | 3 + .../5_ee/08_udf_python/08_0000_parallelism.sh | 19 ++ 12 files changed, 356 insertions(+), 260 deletions(-) create mode 100644 src/query/service/src/pipelines/processors/transforms/runtime_pool.rs create mode 100755 tests/suites/0_stateless/20+_others/20_0023_udf_script_parallelism.result create mode 100755 tests/suites/0_stateless/20+_others/20_0023_udf_script_parallelism.sh rename tests/suites/5_ee/07_failsafe/{08_0000_amend_table.result => 07_0000_amend_table.result} (100%) rename tests/suites/5_ee/07_failsafe/{08_0000_amend_table.sh => 07_0000_amend_table.sh} (100%) create mode 100755 tests/suites/5_ee/08_udf_python/08_0000_parallelism.result create mode 100755 tests/suites/5_ee/08_udf_python/08_0000_parallelism.sh diff --git a/src/query/service/src/pipelines/builders/builder_udf.rs b/src/query/service/src/pipelines/builders/builder_udf.rs index 7045374042eaf..da9b35d5559bf 100644 --- a/src/query/service/src/pipelines/builders/builder_udf.rs +++ b/src/query/service/src/pipelines/builders/builder_udf.rs @@ -12,13 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::atomic::AtomicUsize; -use std::sync::Arc; - use databend_common_exception::Result; use databend_common_pipeline_transforms::processors::TransformPipelineHelper; use databend_common_sql::executor::physical_plans::Udf; -use databend_common_storages_fuse::TableContext; use crate::pipelines::processors::transforms::TransformUdfScript; use crate::pipelines::processors::transforms::TransformUdfServer; @@ -29,15 +25,12 @@ impl PipelineBuilder { self.build_pipeline(&udf.input)?; if udf.script_udf { - let index_seq = Arc::new(AtomicUsize::new(0)); - let runtime_num = self.ctx.get_settings().get_max_threads()? as usize; - let runtimes = TransformUdfScript::init_runtime(&udf.udf_funcs, runtime_num)?; + let runtimes = TransformUdfScript::init_runtime(&udf.udf_funcs)?; self.main_pipeline.try_add_transformer(|| { Ok(TransformUdfScript::new( self.func_ctx.clone(), udf.udf_funcs.clone(), runtimes.clone(), - index_seq.clone(), )) }) } else { diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/mod.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/mod.rs index a4dd56da1e1c3..89eaede89b97a 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/mod.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/mod.rs @@ -37,3 +37,4 @@ pub use udaf_script::*; pub use utils::*; pub use self::serde::*; +use super::runtime_pool; diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/udaf_script.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/udaf_script.rs index 9cef5d96932dc..0b324b6c1ef63 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/udaf_script.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/udaf_script.rs @@ -17,7 +17,6 @@ use std::fmt; use std::io::BufRead; use std::io::Cursor; use std::sync::Arc; -use std::sync::Mutex; use arrow_array::Array; use arrow_array::RecordBatch; @@ -40,8 +39,8 @@ use databend_common_functions::aggregates::AggregateFunction; use databend_common_sql::plans::UDFLanguage; use databend_common_sql::plans::UDFScriptCode; -#[cfg(feature = "python-udf")] -use super::super::python_udf::GLOBAL_PYTHON_RUNTIME; +use super::runtime_pool::Pool; +use super::runtime_pool::RuntimeBuilder; pub struct AggregateUdfScript { display_name: String, @@ -138,6 +137,15 @@ impl AggregateFunction for AggregateUdfScript { builder.append_column(&result); Ok(()) } + + fn need_manual_drop_state(&self) -> bool { + true + } + + unsafe fn drop_state(&self, place: StateAddr) { + let state = place.get::(); + std::ptr::drop_in_place(state); + } } impl fmt::Display for AggregateUdfScript { @@ -244,10 +252,10 @@ pub fn create_udaf_script_function( let UDFScriptCode { language, code, .. } = code; let runtime = match language { UDFLanguage::JavaScript => { - let pool = JsRuntimePool::new( + let builder = JsRuntimeBuilder { name, - String::from_utf8(code.to_vec())?, - ArrowType::Struct( + code: String::from_utf8(code.to_vec())?, + state_type: ArrowType::Struct( state_fields .iter() .map(|f| f.into()) @@ -255,8 +263,8 @@ pub fn create_udaf_script_function( .into(), ), output_type, - ); - UDAFRuntime::JavaScript(pool) + }; + UDAFRuntime::JavaScript(JsRuntimePool::new(builder)) } UDFLanguage::WebAssembly => unimplemented!(), #[cfg(not(feature = "python-udf"))] @@ -267,22 +275,19 @@ pub fn create_udaf_script_function( } #[cfg(feature = "python-udf")] UDFLanguage::Python => { - let mut runtime = GLOBAL_PYTHON_RUNTIME.write(); - let code = String::from_utf8(code.to_vec())?; - runtime.add_aggregate( - &name, - ArrowType::Struct( + let builder = python_pool::PyRuntimeBuilder { + name, + code: String::from_utf8(code.to_vec())?, + state_type: ArrowType::Struct( state_fields .iter() .map(|f| f.into()) .collect::>() .into(), ), - ArrowType::from(&output_type), - arrow_udf_python::CallMode::CalledOnNullInput, - &code, - )?; - UDAFRuntime::Python(PythonInfo { name, output_type }) + output_type, + }; + UDAFRuntime::Python(Pool::new(builder)) } }; let init_state = runtime @@ -297,27 +302,17 @@ pub fn create_udaf_script_function( })) } -struct JsRuntimePool { +struct JsRuntimeBuilder { name: String, code: String, state_type: ArrowType, output_type: DataType, - - runtimes: Mutex>, } -impl JsRuntimePool { - fn new(name: String, code: String, state_type: ArrowType, output_type: DataType) -> Self { - Self { - name, - code, - state_type, - output_type, - runtimes: Mutex::new(vec![]), - } - } +impl RuntimeBuilder for JsRuntimeBuilder { + type Error = ErrorCode; - fn create(&self) -> Result { + fn build(&self) -> std::result::Result { let mut runtime = match arrow_udf_js::Runtime::new() { Ok(runtime) => runtime, Err(e) => { @@ -331,12 +326,14 @@ impl JsRuntimePool { converter.set_arrow_extension_key(EXTENSION_KEY); converter.set_json_extension_name(ARROW_EXT_TYPE_VARIANT); - let output_type: ArrowType = (&self.output_type).into(); runtime .add_aggregate( &self.name, self.state_type.clone(), - output_type, + // we pass the field instead of the data type because arrow-udf-js + // now takes the field as an argument here so that it can get any + // metadata associated with the field + arrow_field_from_data_type(&self.name, self.output_type.clone()), arrow_udf_js::CallMode::CalledOnNullInput, &self.code, ) @@ -344,23 +341,45 @@ impl JsRuntimePool { Ok(runtime) } +} - fn call(&self, op: F) -> anyhow::Result - where F: FnOnce(&arrow_udf_js::Runtime) -> anyhow::Result { - let mut runtimes = self.runtimes.lock().unwrap(); - let runtime = match runtimes.pop() { - Some(runtime) => runtime, - None => self.create()?, - }; - drop(runtimes); +fn arrow_field_from_data_type(name: &str, dt: DataType) -> arrow_schema::Field { + let field = DataField::new(name, dt); + (&field).into() +} + +type JsRuntimePool = Pool; + +#[cfg(feature = "python-udf")] +mod python_pool { + use super::*; - let result = op(&runtime)?; + pub(super) struct PyRuntimeBuilder { + pub name: String, + pub code: String, + pub state_type: ArrowType, + pub output_type: DataType, + } - let mut runtimes = self.runtimes.lock().unwrap(); - runtimes.push(runtime); + impl RuntimeBuilder for PyRuntimeBuilder { + type Error = ErrorCode; - Ok(result) + fn build(&self) -> std::result::Result { + let mut runtime = arrow_udf_python::Builder::default() + .sandboxed(true) + .build()?; + runtime.add_aggregate( + &self.name, + self.state_type.clone(), + arrow_field_from_data_type(&self.name, self.output_type.clone()), + arrow_udf_python::CallMode::CalledOnNullInput, + &self.code, + )?; + Ok(runtime) + } } + + pub type PyRuntimePool = Pool; } enum UDAFRuntime { @@ -368,41 +387,36 @@ enum UDAFRuntime { #[expect(unused)] WebAssembly, #[cfg(feature = "python-udf")] - Python(PythonInfo), -} - -#[cfg(feature = "python-udf")] -struct PythonInfo { - name: String, - output_type: DataType, + Python(python_pool::PyRuntimePool), } impl UDAFRuntime { fn name(&self) -> &str { match self { - UDAFRuntime::JavaScript(pool) => &pool.name, + UDAFRuntime::JavaScript(pool) => &pool.builder.name, #[cfg(feature = "python-udf")] - UDAFRuntime::Python(info) => &info.name, + UDAFRuntime::Python(info) => &info.builder.name, _ => unimplemented!(), } } fn return_type(&self) -> DataType { match self { - UDAFRuntime::JavaScript(pool) => pool.output_type.clone(), + UDAFRuntime::JavaScript(pool) => pool.builder.output_type.clone(), #[cfg(feature = "python-udf")] - UDAFRuntime::Python(info) => info.output_type.clone(), + UDAFRuntime::Python(info) => info.builder.output_type.clone(), _ => unimplemented!(), } } fn create_state(&self) -> anyhow::Result { let state = match self { - UDAFRuntime::JavaScript(pool) => pool.call(|runtime| runtime.create_state(&pool.name)), + UDAFRuntime::JavaScript(pool) => { + pool.call(|runtime| runtime.create_state(&pool.builder.name)) + } #[cfg(feature = "python-udf")] - UDAFRuntime::Python(info) => { - let runtime = GLOBAL_PYTHON_RUNTIME.read(); - runtime.create_state(&info.name) + UDAFRuntime::Python(pool) => { + pool.call(|runtime| runtime.create_state(&pool.builder.name)) } _ => unimplemented!(), }?; @@ -412,12 +426,11 @@ impl UDAFRuntime { fn accumulate(&self, state: &UdfAggState, input: &RecordBatch) -> anyhow::Result { let state = match self { UDAFRuntime::JavaScript(pool) => { - pool.call(|runtime| runtime.accumulate(&pool.name, &state.0, input)) + pool.call(|runtime| runtime.accumulate(&pool.builder.name, &state.0, input)) } #[cfg(feature = "python-udf")] - UDAFRuntime::Python(info) => { - let runtime = GLOBAL_PYTHON_RUNTIME.read(); - runtime.accumulate(&info.name, &state.0, input) + UDAFRuntime::Python(pool) => { + pool.call(|runtime| runtime.accumulate(&pool.builder.name, &state.0, input)) } _ => unimplemented!(), }?; @@ -426,11 +439,12 @@ impl UDAFRuntime { fn merge(&self, states: &Arc) -> anyhow::Result { let state = match self { - UDAFRuntime::JavaScript(pool) => pool.call(|runtime| runtime.merge(&pool.name, states)), + UDAFRuntime::JavaScript(pool) => { + pool.call(|runtime| runtime.merge(&pool.builder.name, states)) + } #[cfg(feature = "python-udf")] - UDAFRuntime::Python(info) => { - let runtime = GLOBAL_PYTHON_RUNTIME.read(); - runtime.merge(&info.name, states) + UDAFRuntime::Python(pool) => { + pool.call(|runtime| runtime.merge(&pool.builder.name, states)) } _ => unimplemented!(), }?; @@ -440,12 +454,11 @@ impl UDAFRuntime { fn finish(&self, state: &UdfAggState) -> anyhow::Result> { match self { UDAFRuntime::JavaScript(pool) => { - pool.call(|runtime| runtime.finish(&pool.name, &state.0)) + pool.call(|runtime| runtime.finish(&pool.builder.name, &state.0)) } #[cfg(feature = "python-udf")] - UDAFRuntime::Python(info) => { - let runtime = GLOBAL_PYTHON_RUNTIME.read(); - runtime.finish(&info.name, &state.0) + UDAFRuntime::Python(pool) => { + pool.call(|runtime| runtime.finish(&pool.builder.name, &state.0)) } _ => unimplemented!(), } @@ -495,9 +508,9 @@ mod tests { Field::new("sum", ArrowType::Int64, false), Field::new("weight", ArrowType::Int64, false), ]; - let pool = JsRuntimePool::new( - agg_name.clone(), - r#" + let builder = JsRuntimeBuilder { + name: agg_name.clone(), + code: r#" export function create_state() { return {sum: 0, weight: 0}; } @@ -521,9 +534,10 @@ export function finish(state) { } "# .to_string(), - ArrowType::Struct(fields.clone().into()), - Float32Type::data_type(), - ); + state_type: ArrowType::Struct(fields.clone().into()), + output_type: Float32Type::data_type(), + }; + let pool = JsRuntimePool::new(builder); let state = pool.call(|runtime| runtime.create_state(&agg_name))?; diff --git a/src/query/service/src/pipelines/processors/transforms/mod.rs b/src/query/service/src/pipelines/processors/transforms/mod.rs index 55078470553b9..2e76f346a0e40 100644 --- a/src/query/service/src/pipelines/processors/transforms/mod.rs +++ b/src/query/service/src/pipelines/processors/transforms/mod.rs @@ -15,6 +15,7 @@ pub mod aggregator; mod hash_join; pub(crate) mod range_join; +mod runtime_pool; mod transform_add_computed_columns; mod transform_add_const_columns; mod transform_add_internal_columns; @@ -66,16 +67,3 @@ pub use transform_stream_sort_spill::*; pub use transform_udf_script::TransformUdfScript; pub use transform_udf_server::TransformUdfServer; pub use window::*; - -#[cfg(feature = "python-udf")] -mod python_udf { - use std::sync::Arc; - use std::sync::LazyLock; - - use arrow_udf_python::Runtime; - use parking_lot::RwLock; - - /// python runtime should be only initialized once by gil lock, see: https://github.com/python/cpython/blob/main/Python/pystate.c - pub static GLOBAL_PYTHON_RUNTIME: LazyLock>> = - LazyLock::new(|| Arc::new(RwLock::new(Runtime::new().unwrap()))); -} diff --git a/src/query/service/src/pipelines/processors/transforms/runtime_pool.rs b/src/query/service/src/pipelines/processors/transforms/runtime_pool.rs new file mode 100644 index 0000000000000..5a3fc8437b3a4 --- /dev/null +++ b/src/query/service/src/pipelines/processors/transforms/runtime_pool.rs @@ -0,0 +1,54 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Mutex; + +pub trait RuntimeBuilder { + type Error; + fn build(&self) -> Result; +} + +pub struct Pool> { + pub builder: B, + runtimes: Mutex>, +} + +impl> Pool { + pub fn new(builder: B) -> Self { + Self { + builder, + runtimes: Mutex::new(vec![]), + } + } + + pub fn call(&self, op: F) -> Result + where + F: FnOnce(&R) -> Result, + E: From, + { + let mut runtimes = self.runtimes.lock().unwrap(); + let runtime = match runtimes.pop() { + Some(runtime) => runtime, + None => self.builder.build()?, + }; + drop(runtimes); + + let result = op(&runtime); + + let mut runtimes = self.runtimes.lock().unwrap(); + runtimes.push(runtime); + + result + } +} diff --git a/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs b/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs index 9c499496806f8..1447e95897693 100644 --- a/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs +++ b/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs @@ -15,15 +15,14 @@ use std::collections::btree_map::Entry; use std::collections::BTreeMap; use std::sync::atomic::AtomicUsize; -use std::sync::atomic::Ordering; use std::sync::Arc; use arrow_array::RecordBatch; -use arrow_schema::Schema; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::converts::arrow::ARROW_EXT_TYPE_VARIANT; use databend_common_expression::converts::arrow::EXTENSION_KEY; +use databend_common_expression::types::DataType; use databend_common_expression::variant_transform::contains_variant; use databend_common_expression::variant_transform::transform_variant; use databend_common_expression::BlockEntry; @@ -36,47 +35,58 @@ use databend_common_sql::executor::physical_plans::UdfFunctionDesc; use databend_common_sql::plans::UDFLanguage; use databend_common_sql::plans::UDFScriptCode; use databend_common_sql::plans::UDFType; -use parking_lot::RwLock; -#[cfg(feature = "python-udf")] -use super::python_udf::GLOBAL_PYTHON_RUNTIME; +use super::runtime_pool::Pool; +use super::runtime_pool::RuntimeBuilder; pub enum ScriptRuntime { - JavaScript(Vec>>), - WebAssembly(Arc>), + JavaScript(JsRuntimePool), + WebAssembly(arrow_udf_wasm::Runtime), #[cfg(feature = "python-udf")] - Python, + Python(python_pool::PyRuntimePool), } impl ScriptRuntime { - pub fn try_create(lang: UDFLanguage, code: Option<&[u8]>, runtime_num: usize) -> Result { - match lang { + pub fn try_create(func: &UdfFunctionDesc) -> Result { + let UDFType::Script(UDFScriptCode { language, code, .. }) = &func.udf_type else { + unreachable!() + }; + match language { UDFLanguage::JavaScript => { - // Create multiple runtimes to execute in parallel to avoid blocking caused by js udf runtime locks. - let runtimes = (0..runtime_num) - .map(|_| { - arrow_udf_js::Runtime::new() - .map(|mut runtime| { - runtime - .converter_mut() - .set_arrow_extension_key(EXTENSION_KEY); - runtime - .converter_mut() - .set_json_extension_name(ARROW_EXT_TYPE_VARIANT); - Arc::new(RwLock::new(runtime)) - }) - .map_err(|err| { - ErrorCode::UDFRuntimeError(format!( - "Cannot create js runtime: {err}", - )) - }) - }) - .collect::>>>>()?; - Ok(Self::JavaScript(runtimes)) + let builder = JsRuntimeBuilder { + name: func.name.clone(), + handler: func.func_name.clone(), + code: String::from_utf8(code.to_vec())?, + output_type: func.data_type.as_ref().clone(), + counter: Default::default(), + }; + Ok(Self::JavaScript(JsRuntimePool::new(builder))) + } + UDFLanguage::WebAssembly => { + let start = std::time::Instant::now(); + let runtime = arrow_udf_wasm::Runtime::new(code).map_err(|err| { + ErrorCode::UDFRuntimeError(format!( + "Failed to create WASM runtime for module: {err}" + )) + })?; + log::info!( + "Init WebAssembly UDF runtime for {:?} took: {:?}", + func.name, + start.elapsed() + ); + Ok(ScriptRuntime::WebAssembly(runtime)) } - UDFLanguage::WebAssembly => Self::create_wasm_runtime(code), #[cfg(feature = "python-udf")] - UDFLanguage::Python => Ok(Self::Python), + UDFLanguage::Python => { + let builder = PyRuntimeBuilder { + name: func.name.clone(), + handler: func.func_name.clone(), + code: String::from_utf8(code.to_vec())?, + output_type: func.data_type.as_ref().clone(), + counter: Default::default(), + }; + Ok(Self::Python(python_pool::PyRuntimePool::new(builder))) + } #[cfg(not(feature = "python-udf"))] UDFLanguage::Python => Err(ErrorCode::EnterpriseFeatureNotEnable( "Failed to create python script udf", @@ -84,89 +94,30 @@ impl ScriptRuntime { } } - fn create_wasm_runtime(code_blob: Option<&[u8]>) -> Result { - let decoded_code_blob = code_blob - .ok_or_else(|| ErrorCode::UDFDataError("WASM module not provided".to_string()))?; - - let runtime = arrow_udf_wasm::Runtime::new(decoded_code_blob).map_err(|err| { - ErrorCode::UDFRuntimeError(format!("Failed to create WASM runtime for module: {err}")) - })?; - - Ok(ScriptRuntime::WebAssembly(Arc::new(RwLock::new(runtime)))) - } - - pub fn add_function_with_handler(&self, func: &UdfFunctionDesc, code: &[u8]) -> Result<()> { - let tmp_schema = - DataSchema::new(vec![DataField::new("tmp", func.data_type.as_ref().clone())]); - let arrow_schema = Schema::from(&tmp_schema); - - match self { - ScriptRuntime::JavaScript(runtimes) => { - let code = std::str::from_utf8(code)?; - for runtime in runtimes { - let mut runtime = runtime.write(); - runtime.add_function_with_handler( - &func.name, - // we pass the field instead of the data type because arrow-udf-js - // now takes the field as an argument here so that it can get any - // metadata associated with the field - arrow_schema.field(0).clone(), - arrow_udf_js::CallMode::ReturnNullOnNullInput, - code, - &func.func_name, - )?; - } - } - #[cfg(feature = "python-udf")] - ScriptRuntime::Python => { - let code: &str = std::str::from_utf8(code)?; - let mut runtime = GLOBAL_PYTHON_RUNTIME.write(); - runtime.add_function_with_handler( - &func.name, - arrow_schema.field(0).data_type().clone(), - arrow_udf_python::CallMode::ReturnNullOnNullInput, - code, - &func.func_name, - )?; - } - // Ignore the execution for WASM context - ScriptRuntime::WebAssembly(_) => {} - } - - Ok(()) - } - pub fn handle_execution( &self, func: &UdfFunctionDesc, input_batch: &RecordBatch, - index: usize, ) -> Result { let result_batch = match self { - ScriptRuntime::JavaScript(runtimes) => { - // Choose a js runtime in order to avoid blocking - let idx = index % runtimes.len(); - let runtime = &runtimes[idx]; - let runtime = runtime.read(); - runtime.call(&func.name, input_batch).map_err(|err| { + ScriptRuntime::JavaScript(pool) => pool + .call(|runtime| runtime.call(&func.name, input_batch)) + .map_err(|err| { ErrorCode::UDFRuntimeError(format!( "JavaScript UDF {:?} execution failed: {err}", func.name )) - })? - } + })?, #[cfg(feature = "python-udf")] - ScriptRuntime::Python => { - let runtime = GLOBAL_PYTHON_RUNTIME.read(); - runtime.call(&func.name, input_batch).map_err(|err| { + ScriptRuntime::Python(pool) => pool + .call(|runtime| runtime.call(&func.name, input_batch)) + .map_err(|err| { ErrorCode::UDFRuntimeError(format!( "Python UDF {:?} execution failed: {err}", func.name )) - })? - } + })?, ScriptRuntime::WebAssembly(runtime) => { - let runtime = runtime.read(); runtime.call(&func.func_name, input_batch).map_err(|err| { ErrorCode::UDFRuntimeError(format!( "WASM UDF {:?} execution failed: {err}", @@ -179,25 +130,116 @@ impl ScriptRuntime { } } +pub struct JsRuntimeBuilder { + name: String, + handler: String, + code: String, + output_type: DataType, + + counter: AtomicUsize, +} + +impl RuntimeBuilder for JsRuntimeBuilder { + type Error = ErrorCode; + + fn build(&self) -> Result { + let start = std::time::Instant::now(); + let mut runtime = arrow_udf_js::Runtime::new() + .map_err(|e| ErrorCode::UDFDataError(format!("Cannot create js runtime: {e}")))?; + + let converter = runtime.converter_mut(); + converter.set_arrow_extension_key(EXTENSION_KEY); + converter.set_json_extension_name(ARROW_EXT_TYPE_VARIANT); + + runtime.add_function_with_handler( + &self.name, + // we pass the field instead of the data type because arrow-udf-js + // now takes the field as an argument here so that it can get any + // metadata associated with the field + arrow_field_from_data_type(&self.name, self.output_type.clone()), + arrow_udf_js::CallMode::ReturnNullOnNullInput, + &self.code, + &self.handler, + )?; + + log::info!( + "Init JavaScript UDF runtime for {:?} #{} took: {:?}", + self.name, + self.counter + .fetch_add(1, std::sync::atomic::Ordering::SeqCst), + start.elapsed() + ); + + Ok(runtime) + } +} + +fn arrow_field_from_data_type(name: &str, dt: DataType) -> arrow_schema::Field { + let field = DataField::new(name, dt); + (&field).into() +} + +type JsRuntimePool = Pool; + +#[cfg(feature = "python-udf")] +pub struct PyRuntimeBuilder { + name: String, + handler: String, + code: String, + output_type: DataType, + + counter: AtomicUsize, +} + +#[cfg(feature = "python-udf")] +mod python_pool { + use super::*; + + impl RuntimeBuilder for PyRuntimeBuilder { + type Error = ErrorCode; + + fn build(&self) -> Result { + let start = std::time::Instant::now(); + let mut runtime = arrow_udf_python::Builder::default() + .sandboxed(true) + .build()?; + runtime.add_function_with_handler( + &self.name, + arrow_field_from_data_type(&self.name, self.output_type.clone()), + arrow_udf_python::CallMode::CalledOnNullInput, + &self.code, + &self.handler, + )?; + + log::info!( + "Init Python UDF runtime for {:?} #{} took: {:?}", + self.name, + self.counter + .fetch_add(1, std::sync::atomic::Ordering::SeqCst), + start.elapsed() + ); + + Ok(runtime) + } + } + + pub type PyRuntimePool = Pool; +} + pub struct TransformUdfScript { funcs: Vec, script_runtimes: BTreeMap>, - index_seq: Arc, } -unsafe impl Send for TransformUdfScript {} - impl TransformUdfScript { pub fn new( _func_ctx: FunctionContext, funcs: Vec, script_runtimes: BTreeMap>, - index_seq: Arc, ) -> Self { Self { funcs, script_runtimes, - index_seq, } } } @@ -212,79 +254,39 @@ impl Transform for TransformUdfScript { return Ok(data_block); } - let index = self.index_seq.fetch_add(1, Ordering::SeqCst); for func in &self.funcs { let num_rows = data_block.num_rows(); let block_entries = self.prepare_block_entries(func, &data_block)?; let input_batch = self.create_input_batch(block_entries, num_rows)?; - let runtime_key = Self::get_runtime_key(func)?; - - if let Some(runtime) = self.script_runtimes.get(&runtime_key) { - let result_batch = runtime.handle_execution(func, &input_batch, index)?; - self.update_datablock(func, result_batch, &mut data_block)?; - } else { - return Err(ErrorCode::UDFDataError(format!( - "Failed to find runtime for function {:?} with key: {:?}", - func.name, runtime_key - ))); - } + let runtime = self.script_runtimes.get(&func.name).unwrap(); + let result_batch = runtime.handle_execution(func, &input_batch)?; + self.update_datablock(func, result_batch, &mut data_block)?; } Ok(data_block) } } impl TransformUdfScript { - fn get_runtime_key(func: &UdfFunctionDesc) -> Result { - let (lang, func_name) = match &func.udf_type { - UDFType::Script(UDFScriptCode { language: lang, .. }) => (lang, &func.func_name), - _ => { - return Err(ErrorCode::UDFDataError(format!( - "Unsupported UDFType variant for function {:?}", - func.name - ))); - } - }; - - let runtime_key = format!("{}-{}", lang, func_name.trim()); - Ok(runtime_key) - } - - pub fn init_runtime( - funcs: &[UdfFunctionDesc], - runtime_num: usize, - ) -> Result>> { + pub fn init_runtime(funcs: &[UdfFunctionDesc]) -> Result>> { let mut script_runtimes: BTreeMap> = BTreeMap::new(); - let start = std::time::Instant::now(); for func in funcs { - let (&lang, code_opt) = match &func.udf_type { - UDFType::Script(UDFScriptCode { language, code, .. }) => { - (language, Some(code.as_ref().as_ref())) - } + let code = match &func.udf_type { + UDFType::Script(code) => code, _ => continue, }; - let runtime_key = Self::get_runtime_key(func)?; - let runtime = match script_runtimes.entry(runtime_key.clone()) { - Entry::Occupied(entry) => entry.into_mut().clone(), - Entry::Vacant(entry) => { - let new_runtime = ScriptRuntime::try_create(lang, code_opt, runtime_num) - .map(Arc::new) - .map_err(|err| { - ErrorCode::UDFDataError(format!( - "Failed to create UDF runtime for language {lang:?} with error: {err}", - )) - })?; - entry.insert(new_runtime).clone() - } + if let Entry::Vacant(entry) = script_runtimes.entry(func.name.clone()) { + let runtime = ScriptRuntime::try_create(func).map_err(|err| { + ErrorCode::UDFDataError(format!( + "Failed to create UDF runtime for language {:?} with error: {err}", + code.language + )) + })?; + entry.insert(Arc::new(runtime)); }; - - if let UDFType::Script(UDFScriptCode { code, .. }) = &func.udf_type { - runtime.add_function_with_handler(func, code.as_ref().as_ref())?; - } } - log::info!("Init UDF runtimes took: {:?}", start.elapsed()); Ok(script_runtimes) } @@ -378,7 +380,7 @@ impl TransformUdfScript { if col.data_type != func.data_type.as_ref().clone() { return Err(ErrorCode::UDFDataError(format!( - "Function '{}' returned column with data type {:?} but expected {:?}", + "Function {:?} returned column with data type {:?} but expected {:?}", func.name, col.data_type, func.data_type ))); } diff --git a/tests/suites/0_stateless/20+_others/20_0023_udf_script_parallelism.result b/tests/suites/0_stateless/20+_others/20_0023_udf_script_parallelism.result new file mode 100755 index 0000000000000..26fa101a44a09 --- /dev/null +++ b/tests/suites/0_stateless/20+_others/20_0023_udf_script_parallelism.result @@ -0,0 +1,3 @@ +1000000 +2000000 +100000000 diff --git a/tests/suites/0_stateless/20+_others/20_0023_udf_script_parallelism.sh b/tests/suites/0_stateless/20+_others/20_0023_udf_script_parallelism.sh new file mode 100755 index 0000000000000..a9303b2a6287d --- /dev/null +++ b/tests/suites/0_stateless/20+_others/20_0023_udf_script_parallelism.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../../../shell_env.sh + +$BENDSQL_CLIENT_CONNECT --query=""" +CREATE OR REPLACE FUNCTION js_wait_test ( INT64 ) RETURNS INT64 LANGUAGE javascript HANDLER = 'wait' AS \$\$ +export function wait(n) { + let i = 0; + while (i < n * 1000000) { i++ } + return i; +} +\$\$; +""" + +$BENDSQL_CLIENT_CONNECT --query='select js_wait_test(100)' & +job_pid=$! +$BENDSQL_CLIENT_CONNECT --query='select js_wait_test(1)' +$BENDSQL_CLIENT_CONNECT --query='select js_wait_test(2)' +wait $job_pid diff --git a/tests/suites/5_ee/07_failsafe/08_0000_amend_table.result b/tests/suites/5_ee/07_failsafe/07_0000_amend_table.result similarity index 100% rename from tests/suites/5_ee/07_failsafe/08_0000_amend_table.result rename to tests/suites/5_ee/07_failsafe/07_0000_amend_table.result diff --git a/tests/suites/5_ee/07_failsafe/08_0000_amend_table.sh b/tests/suites/5_ee/07_failsafe/07_0000_amend_table.sh similarity index 100% rename from tests/suites/5_ee/07_failsafe/08_0000_amend_table.sh rename to tests/suites/5_ee/07_failsafe/07_0000_amend_table.sh diff --git a/tests/suites/5_ee/08_udf_python/08_0000_parallelism.result b/tests/suites/5_ee/08_udf_python/08_0000_parallelism.result new file mode 100755 index 0000000000000..040e114b163a0 --- /dev/null +++ b/tests/suites/5_ee/08_udf_python/08_0000_parallelism.result @@ -0,0 +1,3 @@ +1 +2 +100 diff --git a/tests/suites/5_ee/08_udf_python/08_0000_parallelism.sh b/tests/suites/5_ee/08_udf_python/08_0000_parallelism.sh new file mode 100755 index 0000000000000..52121fd75b44f --- /dev/null +++ b/tests/suites/5_ee/08_udf_python/08_0000_parallelism.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../../../shell_env.sh + +$BENDSQL_CLIENT_CONNECT --query=""" +CREATE OR REPLACE FUNCTION python_sleep ( INT64 ) RETURNS INT64 LANGUAGE python HANDLER = 'sleep' AS \$\$ +import time + +def sleep(n): + time.sleep(n/100) + return n +\$\$; +""" + +$BENDSQL_CLIENT_CONNECT --query='select python_sleep(100)' & +job_pid=$! +$BENDSQL_CLIENT_CONNECT --query='select python_sleep(1)' +$BENDSQL_CLIENT_CONNECT --query='select python_sleep(2)' +wait $job_pid From 3a32c18063b0248ba860b74bda87c3cf89978a3b Mon Sep 17 00:00:00 2001 From: baishen Date: Sat, 18 Jan 2025 09:41:33 +0800 Subject: [PATCH 29/33] feat(sqlsmith): fuzz test support read Sqlite and Duckdb test (#17301) --- src/query/ast/src/parser/expr.rs | 22 +-- src/tests/sqlsmith/src/bin/main.rs | 5 + src/tests/sqlsmith/src/lib.rs | 1 + src/tests/sqlsmith/src/runner.rs | 215 ++++++++---------------- src/tests/sqlsmith/src/sql_gen/ddl.rs | 2 +- src/tests/sqlsmith/src/sql_gen/dml.rs | 74 +++++--- src/tests/sqlsmith/src/sql_gen/func.rs | 102 +++++++---- src/tests/sqlsmith/src/sql_gen/types.rs | 15 ++ src/tests/sqlsmith/src/util.rs | 140 +++++++++++++++ 9 files changed, 361 insertions(+), 215 deletions(-) create mode 100644 src/tests/sqlsmith/src/util.rs diff --git a/src/query/ast/src/parser/expr.rs b/src/query/ast/src/parser/expr.rs index 854923757dc22..551dae1108388 100644 --- a/src/query/ast/src/parser/expr.rs +++ b/src/query/ast/src/parser/expr.rs @@ -1088,7 +1088,7 @@ pub fn expr_element(i: Input) -> IResult> { #function_name ~ "(" ~ #comma_separated_list1(subexpr(0)) ~ ")" ~ "(" ~ DISTINCT? ~ #comma_separated_list0(subexpr(0))? ~ ")" - ~ #window_function + ~ #window_function? }, |(name, _, params, _, _, opt_distinct, opt_args, _, window)| ExprElement::FunctionCall { func: FunctionCall { @@ -1096,24 +1096,7 @@ pub fn expr_element(i: Input) -> IResult> { name, args: opt_args.unwrap_or_default(), params, - window: Some(window), - lambda: None, - }, - }, - ); - let function_call_with_params = map( - rule! { - #function_name - ~ "(" ~ #comma_separated_list1(subexpr(0)) ~ ")" - ~ "(" ~ DISTINCT? ~ #comma_separated_list0(subexpr(0))? ~ ")" - }, - |(name, _, params, _, _, opt_distinct, opt_args, _)| ExprElement::FunctionCall { - func: FunctionCall { - distinct: opt_distinct.is_some(), - name, - args: opt_args.unwrap_or_default(), - params, - window: None, + window, lambda: None, }, }, @@ -1406,7 +1389,6 @@ pub fn expr_element(i: Input) -> IResult> { | #function_call_with_lambda : "`function(..., x -> ...)`" | #function_call_with_window : "`function(...) OVER ([ PARTITION BY , ... ] [ ORDER BY , ... ] [ ])`" | #function_call_with_params_window : "`function(...)(...) OVER ([ PARTITION BY , ... ] [ ORDER BY , ... ] [ ])`" - | #function_call_with_params : "`function(...)(...)`" | #function_call : "`function(...)`" ), rule!( diff --git a/src/tests/sqlsmith/src/bin/main.rs b/src/tests/sqlsmith/src/bin/main.rs index 5d58a03d7dd1d..48b4978fdb9c2 100644 --- a/src/tests/sqlsmith/src/bin/main.rs +++ b/src/tests/sqlsmith/src/bin/main.rs @@ -52,6 +52,10 @@ pub struct Args { /// The fuzz query test file path. #[clap(long, default_value = "")] fuzz_path: String, + + /// The log path to write executed SQLs.. + #[clap(long, default_value = ".databend/sqlsmithlog")] + log_path: String, } #[tokio::main(flavor = "multi_thread", worker_threads = 5)] @@ -69,6 +73,7 @@ async fn main() -> Result<()> { args.user.clone(), args.pass.clone(), args.db.clone(), + args.log_path.clone(), args.count, None, args.timeout, diff --git a/src/tests/sqlsmith/src/lib.rs b/src/tests/sqlsmith/src/lib.rs index 363cf6ddd7fcb..ff87a3b175e45 100644 --- a/src/tests/sqlsmith/src/lib.rs +++ b/src/tests/sqlsmith/src/lib.rs @@ -19,5 +19,6 @@ mod query_fuzzer; mod reducer; mod runner; mod sql_gen; +mod util; pub use runner::Runner; diff --git a/src/tests/sqlsmith/src/runner.rs b/src/tests/sqlsmith/src/runner.rs index 3c37316611df2..1816162f6393f 100644 --- a/src/tests/sqlsmith/src/runner.rs +++ b/src/tests/sqlsmith/src/runner.rs @@ -12,13 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::fs; +use std::fs::create_dir_all; +use std::fs::File; use std::future::Future; -use std::path::Path; -use std::path::PathBuf; +use std::io::BufWriter; +use std::io::Write; use std::time::Duration; -use databend_common_ast::ast::AlterTableAction; use databend_common_ast::ast::CreateTableSource; use databend_common_ast::ast::CreateTableStmt; use databend_common_ast::ast::DropTableStmt; @@ -33,15 +33,19 @@ use databend_common_expression::types::NumberDataType; use databend_common_expression::TableField; use databend_common_expression::TableSchemaRefExt; use databend_common_sql::resolve_type_name; +use jiff::fmt::strtime; +use jiff::Zoned; use rand::rngs::SmallRng; use rand::Rng; use rand::SeedableRng; use crate::http_client::HttpClient; -use crate::http_client::QueryResponse; use crate::query_fuzzer::QueryFuzzer; use crate::sql_gen::SqlGenerator; use crate::sql_gen::Table; +use crate::util::read_sql_from_test_dirs; + +const LOG_FILE_FORMAT: &str = "%Y-%m-%d-%H-%M-%S-%f"; const KNOWN_ERRORS: &[&str] = &[ // Errors caused by illegal parameters @@ -68,6 +72,8 @@ const KNOWN_ERRORS: &[&str] = &[ "attempt to shift right with overflow", "attempt to subtract with overflow", "attempt to add with overflow", + "attempt to multiply with overflow", + "map keys have to be unique", // Unsupported features "Row format is not yet support for", "to_decimal not support this DataType", @@ -93,6 +99,7 @@ pub struct Runner { pub(crate) client: HttpClient, db: String, timeout: u64, + file_buf: BufWriter, } impl Runner { @@ -101,26 +108,36 @@ impl Runner { username: String, password: String, db: String, + log_path: String, count: usize, seed: Option, timeout: u64, ) -> Result { let client = HttpClient::create(host, username, password).await?; + // Create SQL log directory and file. + create_dir_all(&log_path)?; + let now = Zoned::now(); + let time = strtime::format(LOG_FILE_FORMAT, &now).unwrap(); + let log_path = format!("{}/databend-sqlsmith.{}.sql", log_path, time); + let file = File::create(log_path)?; + let file_buf = BufWriter::new(file); + Ok(Self { count, seed, client, db, timeout, + file_buf, }) } pub async fn run(&mut self) -> Result<()> { let create_db_sql = format!("CREATE OR REPLACE database {}", self.db); - let _ = self.client.query(&create_db_sql).await?; + let _ = self.run_sql(create_db_sql, None).await; let use_db_sql = format!("USE {}", self.db); - let _ = self.client.query(&use_db_sql).await?; + let _ = self.run_sql(use_db_sql, None).await; let settings = self.get_settings().await?; @@ -132,74 +149,53 @@ impl Runner { let mut new_tables = tables.clone(); for (i, table) in tables.iter().enumerate() { - let insert_stmt = generator.gen_insert(table, row_count); - let insert_sql = insert_stmt.to_string(); - tracing::info!("insert_sql: {}", insert_sql); - Self::check_res(self.client.query(&insert_sql).await); - + let insert_stmts = generator.gen_insert(table, row_count); + for insert_stmt in insert_stmts.into_iter() { + let insert_sql = insert_stmt.to_string(); + let _ = self.run_sql(insert_sql, None).await; + } let alter_stmt_opt = generator.gen_alter(table, row_count); if let Some((alter_stmt, new_table, insert_stmt_opt)) = alter_stmt_opt { - if let AlterTableAction::RenameTable { ref new_table } = alter_stmt.action { - let drop_table_stmt = DropTableStmt { - if_exists: true, - catalog: None, - database: None, - table: new_table.clone(), - all: false, - }; - let drop_table_sql = drop_table_stmt.to_string(); - tracing::info!("drop_table_sql: {}", drop_table_sql); - Self::check_res(self.client.query(&drop_table_sql).await); - } let alter_sql = alter_stmt.to_string(); - tracing::info!("alter_sql: {}", alter_sql); - Self::check_res(self.client.query(&alter_sql).await); + let _ = self.run_sql(alter_sql, None).await; // save new table schema new_tables[i] = new_table; if let Some(insert_stmt) = insert_stmt_opt { let insert_sql = insert_stmt.to_string(); - tracing::info!("after alter insert_sql: {}", insert_sql); - Self::check_res(self.client.query(&insert_sql).await); + let _ = self.run_sql(insert_sql, None).await; } } } generator.tables = new_tables; let enable_merge = "set enable_experimental_merge_into = 1".to_string(); - Self::check_res(self.client.query(&enable_merge).await); - // generate merge, replace, update, delete - for _ in 0..20 { - let sql = match generator.rng.gen_range(0..=20) { - 0..=10 => generator.gen_merge().to_string(), - 11..=15 => generator.gen_replace().to_string(), - 16..=19 => generator.gen_update().to_string(), - 20 => generator.gen_delete().to_string(), - _ => unreachable!(), - }; - let mut timeout_err = None; - tracing::info!("dml sql: {}", sql); - Self::check_timeout( - async { Self::check_res(self.client.query(&sql).await) }, - self.timeout, - &mut timeout_err, - ) - .await; - if let Some(timeout_err) = timeout_err { - tracing::error!("sql timeout: {}", timeout_err); - } + let _ = self.run_sql(enable_merge, None).await; + let dml_stmts = generator.gen_dml_stmt(); + for dml_stmt in dml_stmts.into_iter() { + let dml_sql = dml_stmt.to_string(); + let _ = self.run_sql(dml_sql, None).await; } // generate query + let mut conn_error_cnt = 0; for _ in 0..self.count { let query = generator.gen_query(); let query_sql = query.to_string(); - self.run_sql(query_sql, Some(query)).await; + let is_conn_error = self.run_sql(query_sql, Some(query)).await; + if is_conn_error { + conn_error_cnt += 1; + } + // Query server panic, exist + if conn_error_cnt >= 3 { + break; + } } Ok(()) } pub async fn run_fuzz(&mut self, fuzz_path: &str) -> Result<()> { - let sqls = Self::read_sql_from_sqllogic_tests(fuzz_path)?; + let sqls = read_sql_from_test_dirs(fuzz_path)?; + let mut conn_error_cnt = 0; let mut query_fuzzer = QueryFuzzer::new(); for sql in sqls { tracing::info!("orig sql: {}", sql); @@ -213,7 +209,14 @@ impl Runner { let fuzz_sql = fuzz_stmt.to_string(); tracing::info!("fuzz sql: {}", fuzz_sql); if let Statement::Query(_) = fuzz_stmt { - self.run_sql(fuzz_sql, None).await; + let is_conn_error = self.run_sql(fuzz_sql, None).await; + if is_conn_error { + conn_error_cnt += 1; + } + // Query server panic, exist + if conn_error_cnt >= 3 { + break; + } } else if let Err(err) = self.client.query(&fuzz_sql).await { tracing::error!("fuzz sql err: {}", err); } @@ -222,9 +225,13 @@ impl Runner { Ok(()) } - async fn run_sql(&mut self, query_sql: String, query: Option) { + async fn run_sql(&mut self, query_sql: String, query: Option) -> bool { + // Write SQL to log file for replay. + writeln!(self.file_buf, "{};", query_sql).unwrap(); + let mut timeout_err = None; let mut is_error = false; + let mut is_conn_error = false; let mut try_reduce = false; let mut err_code = 0; let mut err_message = String::new(); @@ -252,6 +259,10 @@ impl Runner { } } Err(err) => { + // 1910 is ReqwestError, Query server may have panic, need exist + if err.code() == 1910 { + is_conn_error = true; + } is_error = true; err_message = format!("http err: {}", err); } @@ -263,18 +274,19 @@ impl Runner { .await; if let Some(timeout_err) = timeout_err { - tracing::info!("query_sql: {}", query_sql); + tracing::info!("sql: {}", query_sql); tracing::error!("sql timeout: {}", timeout_err); } else if is_error { - tracing::info!("query_sql: {}", query_sql); + tracing::info!("sql: {}", query_sql); if try_reduce { if let Some(query) = query { let reduced_query = self.try_reduce_query(err_code, query).await; - tracing::info!("reduced query_sql: {}", reduced_query.to_string()); + tracing::info!("reduced sql: {}", reduced_query.to_string()); } } tracing::error!(err_message); } + is_conn_error } fn generate_rng(seed: Option) -> impl Rng { @@ -292,11 +304,9 @@ impl Runner { let mut tables = Vec::with_capacity(table_stmts.len()); for (drop_table_stmt, create_table_stmt) in table_stmts { let drop_table_sql = drop_table_stmt.to_string(); - tracing::info!("drop_table_sql: {}", drop_table_sql); - Self::check_res(self.client.query(&drop_table_sql).await); + let _ = self.run_sql(drop_table_sql, None).await; let create_table_sql = create_table_stmt.to_string(); - tracing::info!("create_table_sql: {}", create_table_sql); - Self::check_res(self.client.query(&create_table_sql).await); + let _ = self.run_sql(create_table_sql, None).await; let db_name = create_table_stmt.database.clone(); let table_name = create_table_stmt.table.clone(); @@ -350,89 +360,4 @@ impl Runner { *timeout_err = Some(format!("{}", e)); } } - - fn check_res(responses: Result>) { - match responses { - Ok(responses) => { - if let Some(error) = &responses[0].error { - let value = error.as_object().unwrap(); - let code = value["code"].as_u64().unwrap(); - let message = value["message"].as_str().unwrap(); - if code == 1005 || code == 1065 { - return; - } - if KNOWN_ERRORS - .iter() - .any(|known_err| message.starts_with(known_err)) - { - return; - } - let err = format!("sql exec err code: {} message: {}", code, message); - tracing::error!(err); - } - } - Err(err) => { - let err = format!("http err: {}", err); - tracing::error!(err); - } - } - } - - fn collect_paths(path: &Path, paths: &mut Vec) -> Result<()> { - if path.is_dir() { - for entry in (path.read_dir()?).flatten() { - let sub_path = entry.path(); - Self::collect_paths(&sub_path, paths)?; - } - } else if path.is_file() && path.extension().is_some_and(|ext| ext == "test") { - paths.push(path.to_path_buf()); - } - Ok(()) - } - - // Walk through the sqllogic tests folder and read the test SQL in it as seed to generate fuzzed SQL. - fn read_sql_from_sqllogic_tests(fuzz_path: &str) -> Result> { - let path = Path::new(fuzz_path); - let mut paths = vec![]; - Self::collect_paths(path, &mut paths)?; - - let mut sqls = vec![]; - for path in paths.into_iter() { - let content = fs::read_to_string(path)?; - let lines: Vec<&str> = content.lines().collect(); - let mut in_sql = false; - let mut current_sql = String::new(); - - for line in lines { - let line = line.trim(); - if line.starts_with("statement") || line.starts_with("query") { - if !current_sql.is_empty() { - sqls.push(current_sql.clone()); - current_sql.clear(); - } - in_sql = !line.starts_with("statement error"); - } else if line.eq("----") { - // ignore result values - in_sql = false; - } else if !line.is_empty() - && !line.starts_with("--") - && !line.starts_with("#") - && !line.starts_with("include") - && !line.starts_with("onlyif") - && in_sql - { - if current_sql.is_empty() { - current_sql.push_str(line); - } else { - current_sql.push(' '); - current_sql.push_str(line); - } - } - } - if !current_sql.is_empty() { - sqls.push(current_sql); - } - } - Ok(sqls) - } } diff --git a/src/tests/sqlsmith/src/sql_gen/ddl.rs b/src/tests/sqlsmith/src/sql_gen/ddl.rs index cb34b73353d6e..b1b0b53be2367 100644 --- a/src/tests/sqlsmith/src/sql_gen/ddl.rs +++ b/src/tests/sqlsmith/src/sql_gen/ddl.rs @@ -87,7 +87,7 @@ impl SqlGenerator<'_, R> { }; let create_table = CreateTableStmt { - create_option: CreateOption::CreateIfNotExists, + create_option: CreateOption::CreateOrReplace, catalog: None, database: Some(Identifier::from_name(None, db_name)), table: Identifier::from_name(None, table_name), diff --git a/src/tests/sqlsmith/src/sql_gen/dml.rs b/src/tests/sqlsmith/src/sql_gen/dml.rs index b578177f4b625..59b5d73ae382b 100644 --- a/src/tests/sqlsmith/src/sql_gen/dml.rs +++ b/src/tests/sqlsmith/src/sql_gen/dml.rs @@ -33,6 +33,7 @@ use databend_common_ast::ast::MergeOption; use databend_common_ast::ast::MergeSource; use databend_common_ast::ast::MergeUpdateExpr; use databend_common_ast::ast::ReplaceStmt; +use databend_common_ast::ast::Statement; use databend_common_ast::ast::TableReference; use databend_common_ast::ast::UnmatchedClause; use databend_common_ast::ast::UpdateExpr; @@ -67,31 +68,66 @@ enum MutTableAction { } impl<'a, R: Rng + 'a> SqlGenerator<'a, R> { - pub(crate) fn gen_insert(&mut self, table: &Table, row_count: usize) -> InsertStmt { + pub(crate) fn gen_insert(&mut self, table: &Table, row_count: usize) -> Vec { let data_types = table .schema .fields() .iter() .map(|f| (&f.data_type).into()) .collect::>(); - let source = self.gen_insert_source(&data_types, row_count); - InsertStmt { - // TODO - hints: None, - with: None, - catalog: None, - database: table.db_name.clone(), - table: table.name.clone(), - // TODO - columns: vec![], - source, - // TODO - overwrite: false, + let num = self.rng.gen_range(1..=5); + let mut insert_stmts = Vec::with_capacity(num); + for _ in 0..num { + let source = self.gen_insert_source(&data_types, row_count); + let insert_stmt = InsertStmt { + // TODO + hints: None, + with: None, + catalog: None, + database: table.db_name.clone(), + table: table.name.clone(), + // TODO + columns: vec![], + source, + // TODO + overwrite: false, + }; + insert_stmts.push(insert_stmt); + } + insert_stmts + } + + pub(crate) fn gen_dml_stmt(&mut self) -> Vec { + let num = self.rng.gen_range(1..=10); + let mut dml_stmts = Vec::with_capacity(num); + for _ in 0..num { + // generate merge, replace, update, delete + let dml_stmt = match self.rng.gen_range(0..=20) { + 0..=10 => { + let merge_stmt = self.gen_merge(); + Statement::MergeInto(merge_stmt) + } + 11..=15 => { + let replace_stmt = self.gen_replace(); + Statement::Replace(replace_stmt) + } + 16..=19 => { + let update_stmt = self.gen_update(); + Statement::Update(update_stmt) + } + 20 => { + let delete_stmt = self.gen_delete(); + Statement::Delete(delete_stmt) + } + _ => unreachable!(), + }; + dml_stmts.push(dml_stmt); } + dml_stmts } - pub(crate) fn gen_delete(&mut self) -> DeleteStmt { + fn gen_delete(&mut self) -> DeleteStmt { let hints = self.gen_hints(); let (_table, table_reference) = self.random_select_table(); let selection = Some(self.gen_expr(&DataType::Boolean)); @@ -104,7 +140,7 @@ impl<'a, R: Rng + 'a> SqlGenerator<'a, R> { } } - pub(crate) fn gen_update(&mut self) -> UpdateStmt { + fn gen_update(&mut self) -> UpdateStmt { let hints = self.gen_hints(); let (table, table_reference) = self.random_select_table(); let selection = if self.rng.gen_bool(0.8) { @@ -130,7 +166,7 @@ impl<'a, R: Rng + 'a> SqlGenerator<'a, R> { } } - pub(crate) fn gen_replace(&mut self) -> ReplaceStmt { + fn gen_replace(&mut self) -> ReplaceStmt { let hints = self.gen_hints(); let (table, _) = self.random_select_table(); let fields = self.random_select_fields(&table); @@ -160,7 +196,7 @@ impl<'a, R: Rng + 'a> SqlGenerator<'a, R> { } } - pub(crate) fn gen_merge(&mut self) -> MergeIntoStmt { + fn gen_merge(&mut self) -> MergeIntoStmt { let hints = self.gen_hints(); self.cte_tables.clear(); self.bound_tables.clear(); @@ -388,7 +424,7 @@ impl<'a, R: Rng + 'a> SqlGenerator<'a, R> { table: &Table, row_count: usize, ) -> Option<(AlterTableStmt, Table, Option)> { - if self.rng.gen_bool(0.3) { + if self.rng.gen_bool(0.5) { return None; } let mut new_table = table.clone(); diff --git a/src/tests/sqlsmith/src/sql_gen/func.rs b/src/tests/sqlsmith/src/sql_gen/func.rs index 32bcefd81a536..91e1d077e58bf 100644 --- a/src/tests/sqlsmith/src/sql_gen/func.rs +++ b/src/tests/sqlsmith/src/sql_gen/func.rs @@ -63,8 +63,9 @@ impl SqlGenerator<'_, R> { } pub(crate) fn gen_factory_scalar_func(&mut self, ty: &DataType) -> Expr { + let by_ty = self.rng.gen_bool(0.6); let (name, params, args_type) = match ty.remove_nullable() { - DataType::String => { + DataType::String if by_ty => { let idx = self.rng.gen_range(0..=5); let name = match idx { 0 => "char".to_string(), @@ -151,7 +152,7 @@ impl SqlGenerator<'_, R> { }; (name, vec![], args_type) } - DataType::Boolean => { + DataType::Boolean if by_ty => { let idx = self.rng.gen_range(0..=3); let name = match idx { 0 => "and_filters".to_string(), @@ -185,7 +186,7 @@ impl SqlGenerator<'_, R> { }; (name, vec![], args_type) } - DataType::Number(_) => { + DataType::Number(_) if by_ty => { let idx = self.rng.gen_range(0..=4); let name = match idx { 0 => "point_in_ellipses".to_string(), @@ -282,13 +283,28 @@ impl SqlGenerator<'_, R> { (name, vec![], args_type) } - DataType::Array(box inner_ty) => { + DataType::Array(box inner_ty) if by_ty => { let name = "array".to_string(); let len = self.rng.gen_range(0..=4); let args_type = vec![inner_ty; len]; (name, vec![], args_type) } - DataType::Decimal(_) => { + DataType::Map(box DataType::Tuple(inner_tys)) if by_ty => { + let key_ty = inner_tys[0].clone(); + let name = if self.rng.gen_bool(0.5) { + "map_delete".to_string() + } else { + "map_pick".to_string() + }; + if self.rng.gen_bool(0.5) { + let len = self.rng.gen_range(1..=5); + let args_type = vec![key_ty; len]; + (name, vec![], args_type) + } else { + (name, vec![], vec![DataType::Array(Box::new(key_ty))]) + } + } + DataType::Decimal(_) if by_ty => { let decimal = ["to_float64", "to_float32", "to_decimal", "try_to_decimal"]; let name = decimal[self.rng.gen_range(0..=3)].to_string(); if name == "to_decimal" || name == "try_to_decimal" { @@ -312,11 +328,11 @@ impl SqlGenerator<'_, R> { (name, params, args_type) } } - DataType::Tuple(inner_tys) => { + DataType::Tuple(inner_tys) if by_ty => { let name = "tuple".to_string(); (name, vec![], inner_tys) } - DataType::Variant => { + DataType::Variant if by_ty => { if self.rng.gen_bool(0.5) { let json_func = ["json_array", "json_object", "json_object_keep_null"]; let name = json_func[self.rng.gen_range(0..=2)].to_string(); @@ -366,8 +382,9 @@ impl SqlGenerator<'_, R> { } pub(crate) fn gen_agg_func(&mut self, ty: &DataType) -> Expr { + let by_ty = self.rng.gen_bool(0.6); let (name, params, mut args_type) = match ty.remove_nullable() { - DataType::Number(NumberDataType::UInt8) => { + DataType::Number(NumberDataType::UInt8) if by_ty => { let name = "window_funnel".to_string(); let other_type = vec![DataType::Boolean; 6]; let mut args_type = Vec::with_capacity(7); @@ -382,7 +399,7 @@ impl SqlGenerator<'_, R> { let params = vec![Literal::UInt64(self.rng.gen_range(1..=10))]; (name, params, args_type) } - DataType::Number(NumberDataType::UInt64) => { + DataType::Number(NumberDataType::UInt64) if by_ty => { let idx = self.rng.gen_range(0..=7); let name = match idx { 0 => "approx_count_distinct".to_string(), @@ -422,7 +439,7 @@ impl SqlGenerator<'_, R> { }; (name, params, args_type) } - DataType::Array(_) => { + DataType::Array(_) if by_ty => { let idx = self.rng.gen_range(0..=2); let name = match idx { 0 => { @@ -465,14 +482,14 @@ impl SqlGenerator<'_, R> { }; (name, params, args_type) } - DataType::Decimal(_) => { + DataType::Decimal(_) if by_ty => { let name = "sum".to_string(); let params = vec![]; let args_type = vec![self.gen_decimal_data_type()]; (name, params, args_type) } - DataType::Number(NumberDataType::Float64) => { - let idx = self.rng.gen_range(0..=14); + DataType::Number(NumberDataType::Float64) if by_ty => { + let idx = self.rng.gen_range(0..=15); let name = match idx { 0 => "avg".to_string(), 1 => "covar_pop".to_string(), @@ -489,10 +506,11 @@ impl SqlGenerator<'_, R> { 12 => "quantile_cont".to_string(), 13 => "quantile_tdigest".to_string(), 14 => "quantile_disc".to_string(), + 15 => "quantile_tdigest_weighted".to_string(), _ => unreachable!(), }; - let args_type = if idx == 1 || idx == 2 { + let args_type = if idx == 1 || idx == 2 || idx == 15 { vec![ self.gen_all_number_data_type(), self.gen_all_number_data_type(), @@ -512,7 +530,7 @@ impl SqlGenerator<'_, R> { }; (name, params, args_type) } - DataType::Bitmap => { + DataType::Bitmap if by_ty => { let idx = self.rng.gen_range(0..=1); let name = match idx { 0 => "bitmap_intersect".to_string(), @@ -523,29 +541,52 @@ impl SqlGenerator<'_, R> { let args_type = vec![DataType::Bitmap]; (name, params, args_type) } - DataType::String => { - let name = "string_agg".to_string(); - let args_type = if self.rng.gen_bool(0.6) { - vec![DataType::String] + DataType::String if by_ty => { + if self.rng.gen_bool(0.5) { + let name = "histogram".to_string(); + let arg_type = self.gen_simple_common_data_type(); + (name, vec![], vec![arg_type]) } else { - vec![DataType::String; 2] - }; - let params = vec![]; - (name, params, args_type) + let name = "string_agg".to_string(); + let args_type = if self.rng.gen_bool(0.6) { + vec![DataType::String] + } else { + vec![DataType::String; 2] + }; + let params = vec![]; + (name, params, args_type) + } + } + DataType::Variant if by_ty => { + if self.rng.gen_bool(0.5) { + let name = "json_array_agg".to_string(); + let arg_type = self.gen_simple_data_type(); + (name, vec![], vec![arg_type]) + } else { + let name = "json_object_agg".to_string(); + let key_type = DataType::String; + let val_type = self.gen_simple_data_type(); + (name, vec![], vec![key_type, val_type]) + } + } + DataType::Geometry if by_ty => { + let name = "st_collect".to_string(); + let arg_type = DataType::Geometry; + (name, vec![], vec![arg_type]) } _ => { - // TODO: other aggreate functions - let idx = self.rng.gen_range(0..=4); + let idx = self.rng.gen_range(0..=5); let name = match idx { 0 => "any".to_string(), 1 => "min".to_string(), 2 => "max".to_string(), - 3 => "arg_min".to_string(), - 4 => "arg_max".to_string(), + 3 => "mode".to_string(), + 4 => "arg_min".to_string(), + 5 => "arg_max".to_string(), _ => unreachable!(), }; let params = vec![]; - let args_type = if idx == 3 || idx == 4 { + let args_type = if idx == 4 || idx == 5 { vec![ty.clone(), self.gen_simple_data_type()] } else { vec![ty.clone()] @@ -579,10 +620,11 @@ impl SqlGenerator<'_, R> { } pub(crate) fn gen_window_func(&mut self, ty: &DataType) -> Expr { + let by_ty = self.rng.gen_bool(0.6); let window = self.gen_window(); let ty = ty.clone(); match ty { - DataType::Number(NumberDataType::UInt64) => { + DataType::Number(NumberDataType::UInt64) if by_ty => { let number = ["row_number", "rank", "dense_rank", "ntile"]; let name = number[self.rng.gen_range(0..=2)]; let args_type = if name == "ntile" { @@ -592,7 +634,7 @@ impl SqlGenerator<'_, R> { }; self.gen_func(name.to_string(), vec![], args_type, window, None) } - DataType::Number(NumberDataType::Float64) => { + DataType::Number(NumberDataType::Float64) if by_ty => { let float = ["percent_rank", "cume_dist"]; let name = float[self.rng.gen_range(0..=1)].to_string(); self.gen_func(name, vec![], vec![], window, None) diff --git a/src/tests/sqlsmith/src/sql_gen/types.rs b/src/tests/sqlsmith/src/sql_gen/types.rs index 42e2a1681f40a..5587d3294d00c 100644 --- a/src/tests/sqlsmith/src/sql_gen/types.rs +++ b/src/tests/sqlsmith/src/sql_gen/types.rs @@ -64,6 +64,21 @@ impl SqlGenerator<'_, R> { } } + pub(crate) fn gen_simple_common_data_type(&mut self) -> DataType { + let ty = match self.rng.gen_range(0..=7) { + 0 => DataType::String, + 1..=5 => self.gen_all_number_data_type(), + 6 => DataType::Timestamp, + 7 => DataType::Date, + _ => unreachable!(), + }; + if self.rng.gen_bool(0.5) { + ty + } else { + DataType::Nullable(Box::new(ty)) + } + } + pub(crate) fn gen_simple_data_type(&mut self) -> DataType { match self.rng.gen_range(0..=13) { 0 => DataType::Null, diff --git a/src/tests/sqlsmith/src/util.rs b/src/tests/sqlsmith/src/util.rs new file mode 100644 index 0000000000000..69c636245f2d8 --- /dev/null +++ b/src/tests/sqlsmith/src/util.rs @@ -0,0 +1,140 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fs::read_to_string; +use std::path::Path; +use std::path::PathBuf; + +use databend_common_exception::Result; + +// The fuzz test type +#[derive(Default)] +enum TestType { + #[default] + Databend, + Duckdb, + Sqlite, +} + +// Walk through the sqllogic tests folder and read the test SQL in it as seed to generate fuzzed SQL. +pub(crate) fn read_sql_from_test_dirs(fuzz_path: &str) -> Result> { + let test_type = if fuzz_path.contains("sqlite") { + TestType::Sqlite + } else if fuzz_path.contains("duckdb") { + TestType::Duckdb + } else { + TestType::Databend + }; + + let path = Path::new(fuzz_path); + let mut paths = vec![]; + collect_paths(path, &mut paths)?; + + let mut sqls = vec![]; + for path in paths.into_iter() { + let content = read_to_string(path)?; + let lines: Vec<&str> = content.lines().collect(); + match test_type { + TestType::Databend | TestType::Duckdb => { + read_sqllogict_test_sqls(lines, &mut sqls); + } + TestType::Sqlite => { + read_sqlite_test_sqls(lines, &mut sqls); + } + } + } + Ok(sqls) +} + +fn collect_paths(path: &Path, paths: &mut Vec) -> Result<()> { + if path.is_dir() { + for entry in (path.read_dir()?).flatten() { + let sub_path = entry.path(); + collect_paths(&sub_path, paths)?; + } + } else if path.is_file() && path.extension().is_some_and(|ext| ext == "test") { + paths.push(path.to_path_buf()); + } + Ok(()) +} + +// Read and parse Sqllogic test files, include Databend and Duckdb. +fn read_sqllogict_test_sqls(lines: Vec<&str>, sqls: &mut Vec) { + let mut in_sql = false; + let mut current_sql = String::new(); + + for line in lines { + let line = line.trim(); + if line.is_empty() { + continue; + } + if line.starts_with("statement") || line.starts_with("query") { + if !current_sql.is_empty() { + sqls.push(current_sql.clone()); + current_sql.clear(); + } + in_sql = !line.starts_with("statement error"); + } else if line.eq("----") { + // ignore result values + in_sql = false; + } else if !line.is_empty() + && !line.starts_with("--") + && !line.starts_with("#") + && !line.starts_with("include") + && !line.starts_with("onlyif") + && in_sql + { + if current_sql.is_empty() { + current_sql.push_str(line); + } else { + current_sql.push(' '); + current_sql.push_str(line); + } + } + } + if !current_sql.is_empty() { + sqls.push(current_sql); + } +} + +// Read and parse Sqlite test files +fn read_sqlite_test_sqls(lines: Vec<&str>, sqls: &mut Vec) { + let mut in_sql = false; + let mut current_sql = String::new(); + + for line in lines { + let line = line.trim(); + if line.is_empty() { + continue; + } + if line.starts_with("do_execsql_test") { + in_sql = true; + } else if line.starts_with("}") { + if !current_sql.is_empty() { + sqls.push(current_sql.clone()); + current_sql.clear(); + } + in_sql = false; + } else if in_sql { + let line = line.replace("PRIMARY KEY", ""); + let line = line.replace("WITHOUT ROWID", ""); + current_sql.push(' '); + current_sql.push_str(&line); + if line.ends_with(";") { + sqls.push(current_sql.clone()); + current_sql.clear(); + } + } + } +} From 07034df346b9059a3dd213a476870ac9f6e2aad0 Mon Sep 17 00:00:00 2001 From: zhya Date: Sat, 18 Jan 2025 23:07:36 +0800 Subject: [PATCH 30/33] fix: column not found in analyze (#17321) * fix: Column not found in analyze * fix --- .../interpreters/interpreter_table_analyze.rs | 70 +++++++++---------- .../09_fuse_engine/09_0044_issue_17314.sql | 44 ++++++++++++ 2 files changed, 77 insertions(+), 37 deletions(-) create mode 100644 tests/sqllogictests/suites/base/09_fuse_engine/09_0044_issue_17314.sql diff --git a/src/query/service/src/interpreters/interpreter_table_analyze.rs b/src/query/service/src/interpreters/interpreter_table_analyze.rs index 44ca5322cf00d..80bce205f7008 100644 --- a/src/query/service/src/interpreters/interpreter_table_analyze.rs +++ b/src/query/service/src/interpreters/interpreter_table_analyze.rs @@ -117,7 +117,6 @@ impl Interpreter for AnalyzeTableInterpreter { if let Some(snapshot) = snapshot_opt { // plan sql - let schema = table.schema(); let _table_info = table.get_table_info(); let table_statistics = table @@ -165,22 +164,20 @@ impl Interpreter for AnalyzeTableInterpreter { .get_settings() .get_sql_dialect()? .default_ident_quote(); - let index_cols: Vec<(u32, String)> = schema - .fields() - .iter() - .filter(|f| RangeIndex::supported_type(&f.data_type().into())) - .map(|f| (f.column_id(), format!("{quote}{}{quote}", f.name))) - .collect(); // 0.01625 --> 12 buckets --> 4K size per column // 1.04 / math.sqrt(1<<12) --> 0.01625 const DISTINCT_ERROR_RATE: f64 = 0.01625; - let ndv_select_expr = index_cols + let ndv_select_expr = snapshot + .schema + .fields() .iter() - .map(|c| { + .filter(|f| RangeIndex::supported_type(&f.data_type().into())) + .map(|f| { format!( - "approx_count_distinct_state({DISTINCT_ERROR_RATE})({}) as ndv_{}", - c.1, c.0 + "approx_count_distinct_state({DISTINCT_ERROR_RATE})({quote}{}{quote}) as ndv_{}", + f.name, + f.column_id() ) }) .join(", "); @@ -190,7 +187,7 @@ impl Interpreter for AnalyzeTableInterpreter { plan.database, plan.table, ); - info!("Analyze via sql {:?}", sql); + info!("Analyze via sql: {sql}"); let (physical_plan, bind_context) = self.plan_sql(sql).await?; let mut build_res = @@ -200,34 +197,33 @@ impl Interpreter for AnalyzeTableInterpreter { // We add a setting `enable_analyze_histogram` to control whether to compute histogram(default is closed). let mut histogram_info_receivers = HashMap::new(); if self.ctx.get_settings().get_enable_analyze_histogram()? { - let histogram_sqls = index_cols + let histogram_sqls = table + .schema() + .fields() .iter() - .map(|c| { - format!( - "SELECT quantile, - COUNT(DISTINCT {}) AS ndv, - MAX({}) AS max_value, - MIN({}) AS min_value, - COUNT() as count - FROM ( - SELECT {}, NTILE({}) OVER (ORDER BY {}) AS quantile - FROM {}.{} WHERE {} IS DISTINCT FROM NULL - ) - GROUP BY quantile ORDER BY quantile \n", - c.1, - c.1, - c.1, - c.1, - DEFAULT_HISTOGRAM_BUCKETS, - c.1, - plan.database, - plan.table, - c.1, + .filter(|f| RangeIndex::supported_type(&f.data_type().into())) + .map(|f| { + let col_name = format!("{quote}{}{quote}", f.name); + ( + format!( + "SELECT quantile, \ + COUNT(DISTINCT {col_name}) AS ndv, \ + MAX({col_name}) AS max_value, \ + MIN({col_name}) AS min_value, \ + COUNT() as count \ + FROM ( \ + SELECT {col_name}, NTILE({}) OVER (ORDER BY {col_name}) AS quantile \ + FROM {}.{} WHERE {col_name} IS DISTINCT FROM NULL \ + ) \ + GROUP BY quantile ORDER BY quantile", + DEFAULT_HISTOGRAM_BUCKETS, plan.database, plan.table, + ), + f.column_id(), ) }) .collect::>(); - for (sql, (col_id, _)) in histogram_sqls.into_iter().zip(index_cols.iter()) { - info!("Analyze histogram via sql {:?}", sql); + for (sql, col_id) in histogram_sqls.into_iter() { + info!("Analyze histogram via sql: {sql}"); let (mut histogram_plan, bind_context) = self.plan_sql(sql).await?; if !self.ctx.get_cluster().is_empty() { histogram_plan = remove_exchange(histogram_plan); @@ -253,7 +249,7 @@ impl Interpreter for AnalyzeTableInterpreter { build_res .sources_pipelines .extend(histogram_build_res.sources_pipelines); - histogram_info_receivers.insert(*col_id, rx); + histogram_info_receivers.insert(col_id, rx); } } FuseTable::do_analyze( diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0044_issue_17314.sql b/tests/sqllogictests/suites/base/09_fuse_engine/09_0044_issue_17314.sql new file mode 100644 index 0000000000000..744e64a8d471b --- /dev/null +++ b/tests/sqllogictests/suites/base/09_fuse_engine/09_0044_issue_17314.sql @@ -0,0 +1,44 @@ +statement ok +create or replace database issue_17314; + +statement ok +use issue_17314 + +statement ok +set enable_analyze_histogram=1; + +statement ok +create or replace table t1(a string, biz_date1 string); + +statement ok +insert into t1 values('1', '11'); + +statement ok +alter table t1 rename BIZ_date1 to BIZ_DATE; + +statement ok +analyze table t1; + +statement ok +insert into t1 values('2', '22'); + +statement ok +insert into t1 values('3', '33'); + +statement ok +alter table t1 rename BIZ_DATE to b; + +statement ok +analyze table t1; + +query IIT +select * from fuse_statistic('issue_17314', 't1') order by column_name; +---- +a 3 [bucket id: 0, min: "1", max: "1", ndv: 1.0, count: 1.0], [bucket id: 1, min: "2", max: "2", ndv: 1.0, count: 1.0], [bucket id: 2, min: "3", max: "3", ndv: 1.0, count: 1.0] +b 3 [bucket id: 0, min: "11", max: "11", ndv: 1.0, count: 1.0], [bucket id: 1, min: "22", max: "22", ndv: 1.0, count: 1.0], [bucket id: 2, min: "33", max: "33", ndv: 1.0, count: 1.0] + +statement ok +drop table t1 all; + +statement ok +drop database issue_17314; From ffb8771d109b95ea49217b4e4de50221629d8263 Mon Sep 17 00:00:00 2001 From: TCeason <33082201+TCeason@users.noreply.github.com> Date: Sun, 19 Jan 2025 23:10:00 +0800 Subject: [PATCH 31/33] fix(query): unsupport datetime format item should not return panic error (#17323) fix(query): unsupport format item should not return panic error --- .../src/scalars/timestamp/src/datetime.rs | 27 +++++++++++-------- .../functions/02_0012_function_datetimes.test | 6 +++++ 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/query/functions/src/scalars/timestamp/src/datetime.rs b/src/query/functions/src/scalars/timestamp/src/datetime.rs index 32ad99f1f273f..0c77e7404b2e6 100644 --- a/src/query/functions/src/scalars/timestamp/src/datetime.rs +++ b/src/query/functions/src/scalars/timestamp/src/datetime.rs @@ -15,6 +15,7 @@ use std::io::Write; use chrono::format::parse_and_remainder; +use chrono::format::Item; use chrono::format::Parsed; use chrono::format::StrftimeItems; use chrono::prelude::*; @@ -700,18 +701,22 @@ fn register_to_string(registry: &mut FunctionRegistry) { if format.is_empty() { output.push_null(); } else { - // Can't use `tz.timestamp_nanos(self.as_() * 1000)` directly, is may cause multiply with overflow. - let (mut secs, mut nanos) = - (micros / MICROS_PER_SEC, (micros % MICROS_PER_SEC) * 1_000); - if nanos < 0 { - secs -= 1; - nanos += 1_000_000_000; + let items = StrftimeItems::new(format); + if items.clone().any(|item| matches!(item, Item::Error)) { + ctx.set_error(output.len(), "Invalid format string".to_string()); + output.push_null(); + } else { + // Can't use `tz.timestamp_nanos(self.as_() * 1000)` directly, is may cause multiply with overflow. + let (mut secs, mut nanos) = + (micros / MICROS_PER_SEC, (micros % MICROS_PER_SEC) * 1_000); + if nanos < 0 { + secs -= 1; + nanos += 1_000_000_000; + } + let ts = ctx.func_ctx.tz.timestamp_opt(secs, nanos as u32).unwrap(); + let res = ts.format(format).to_string(); + output.push(&res); } - let ts = ctx.func_ctx.tz.timestamp_opt(secs, nanos as u32).unwrap(); - // https://github.com/BurntSushi/jiff/issues/155 - // ASCII is currently required in jiff crate - let res = ts.format(format).to_string(); - output.push(&res); } }, ), diff --git a/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes.test b/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes.test index e76537179a32b..ad7668efcedb0 100644 --- a/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes.test +++ b/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes.test @@ -1367,6 +1367,10 @@ select to_date('精彩的2022年,美丽的02month,激动の02d', '精彩的%Y ---- 2022-02-02 + +statement error 1006 +select date_format('2022-2-04T03:58:59', '%i'); + statement error 1006 select date_format('', ''); @@ -1486,3 +1490,5 @@ query T SELECT add_hours(to_timestamp(710455), 2147483647); ---- 1000-01-01 00:00:00.000000 + + From e71f3b50617557e34ba60f993b27773e3187c3f0 Mon Sep 17 00:00:00 2001 From: everpcpc Date: Mon, 20 Jan 2025 10:04:32 +0800 Subject: [PATCH 32/33] chore(ci): fix pack binaries (#17332) --- .github/actions/pack_binaries/action.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/actions/pack_binaries/action.yml b/.github/actions/pack_binaries/action.yml index bc54eedb02e41..88fa7fc914ff2 100644 --- a/.github/actions/pack_binaries/action.yml +++ b/.github/actions/pack_binaries/action.yml @@ -28,8 +28,9 @@ runs: env: GH_TOKEN: ${{ github.token }} run: | - verison=$(gh release list --repo databendlabs/bendsql | head -n 1 | awk '{print $1}') - curl -sSLfo /tmp/bendsql.tar.gz https://github.com/databendlabs/bendsql/releases/download/${verison}/bendsql-${verison}-${{ inputs.target }}.tar.gz + verison=$(gh release list --repo databendlabs/bendsql --json 'name' --jq '.[].name' | head -n 1) + curl -sSLfo /tmp/bendsql.tar.gz https://github.com/databendlabs/bendsql/releases/download/${verison}/bendsql-${{ inputs.target }}.tar.gz + mkdir -p distro/bin tar -xzvf /tmp/bendsql.tar.gz -C distro/bin - name: Pack Binaries id: pack_binaries From 91f1cedfe8013c405b92cd6ef97b36faf2f6412a Mon Sep 17 00:00:00 2001 From: everpcpc Date: Mon, 20 Jan 2025 11:03:13 +0800 Subject: [PATCH 33/33] chore(ci): fix pack binaries in release (#17335) --- .github/actions/pack_binaries/action.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/actions/pack_binaries/action.yml b/.github/actions/pack_binaries/action.yml index 88fa7fc914ff2..3eb9020f298df 100644 --- a/.github/actions/pack_binaries/action.yml +++ b/.github/actions/pack_binaries/action.yml @@ -23,13 +23,15 @@ runs: category: ${{ inputs.category }} path: distro/bin artifacts: metactl,meta,query,query.debug + - name: Get Latest BendSQL + id: bendsql + uses: pozetroninc/github-action-get-latest-release@master + with: + repository: databendlabs/bendsql - name: Download BendSQL shell: bash - env: - GH_TOKEN: ${{ github.token }} run: | - verison=$(gh release list --repo databendlabs/bendsql --json 'name' --jq '.[].name' | head -n 1) - curl -sSLfo /tmp/bendsql.tar.gz https://github.com/databendlabs/bendsql/releases/download/${verison}/bendsql-${{ inputs.target }}.tar.gz + curl -sSLfo /tmp/bendsql.tar.gz https://github.com/databendlabs/bendsql/releases/download/${{ steps.bendsql.outputs.release }}/bendsql-${{ inputs.target }}.tar.gz mkdir -p distro/bin tar -xzvf /tmp/bendsql.tar.gz -C distro/bin - name: Pack Binaries