From e6cf9d1599d12129fe0b5752dabe4dc661ba5940 Mon Sep 17 00:00:00 2001
From: TCeason <33082201+TCeason@users.noreply.github.com>
Date: Mon, 13 Jan 2025 15:19:15 +0800
Subject: [PATCH 01/33] refactor: only set global settings need to check super
 privilege (#17255)

query setting, session setting, set variable will not check privileges
---
 .../interpreters/access/privilege_access.rs   | 16 ++++++++++++++-
 .../18_rbac/18_0007_privilege_access.result   |  6 ++++++
 .../18_rbac/18_0007_privilege_access.sh       | 20 +++++++++++++++++++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/src/query/service/src/interpreters/access/privilege_access.rs b/src/query/service/src/interpreters/access/privilege_access.rs
index 4c5df38fb8068..6c15c2369f58c 100644
--- a/src/query/service/src/interpreters/access/privilege_access.rs
+++ b/src/query/service/src/interpreters/access/privilege_access.rs
@@ -1151,7 +1151,21 @@ impl AccessChecker for PrivilegeAccess {
                 self.validate_access(&GrantObject::Global, UserPrivilegeType::Grant,false, false)
                     .await?;
             }
-            Plan::Set(_) | Plan::Unset(_) | Plan::Kill(_) | Plan::SetPriority(_) | Plan::System(_) => {
+            Plan::Set(plan) => {
+                use databend_common_ast::ast::SetType;
+                if let SetType::SettingsGlobal = plan.set_type {
+                    self.validate_access(&GrantObject::Global, UserPrivilegeType::Super, false, false)
+                        .await?;
+                }
+            }
+            Plan::Unset(plan) => {
+                use databend_common_ast::ast::SetType;
+                if let SetType::SettingsGlobal = plan.unset_type {
+                    self.validate_access(&GrantObject::Global, UserPrivilegeType::Super, false, false)
+                        .await?;
+                }
+            }
+            Plan::Kill(_) | Plan::SetPriority(_) | Plan::System(_) => {
                 self.validate_access(&GrantObject::Global, UserPrivilegeType::Super, false, false)
                     .await?;
             }
diff --git a/tests/suites/0_stateless/18_rbac/18_0007_privilege_access.result b/tests/suites/0_stateless/18_rbac/18_0007_privilege_access.result
index fd9a369f184f9..1bb4bd20095b0 100644
--- a/tests/suites/0_stateless/18_rbac/18_0007_privilege_access.result
+++ b/tests/suites/0_stateless/18_rbac/18_0007_privilege_access.result
@@ -142,3 +142,9 @@ OWNERSHIP default.default.t2  USER b GRANT OWNERSHIP ON 'default'.'default'.'t2'
 1
 2
 3
+=== set privilege check ===
+100
+100
+1
+1
+=== set privilege check succ ===
diff --git a/tests/suites/0_stateless/18_rbac/18_0007_privilege_access.sh b/tests/suites/0_stateless/18_rbac/18_0007_privilege_access.sh
index ebbc772282056..0c663d9db0df7 100755
--- a/tests/suites/0_stateless/18_rbac/18_0007_privilege_access.sh
+++ b/tests/suites/0_stateless/18_rbac/18_0007_privilege_access.sh
@@ -302,3 +302,23 @@ echo "drop table if exists t1" | $BENDSQL_CLIENT_CONNECT
 echo "drop table if exists t2" | $BENDSQL_CLIENT_CONNECT
 echo "drop stage if exists s3;" | $BENDSQL_CLIENT_CONNECT
 echo "drop database if exists db01" | $BENDSQL_CLIENT_CONNECT
+
+echo "=== set privilege check ==="
+echo "drop user if exists c" | $BENDSQL_CLIENT_CONNECT
+echo "create user c identified by '123'" | $BENDSQL_CLIENT_CONNECT
+export USER_C_CONNECT="bendsql --user=c --password=123 --host=${QUERY_MYSQL_HANDLER_HOST} --port ${QUERY_HTTP_HANDLER_PORT}"
+echo "set session max_threads=1000" | $BENDSQL_CLIENT_CONNECT
+echo "unset session max_threads" | $BENDSQL_CLIENT_CONNECT
+echo "settings (ddl_column_type_nullable=0) select 100" | $BENDSQL_CLIENT_CONNECT
+echo "SET variable a = 'a';" | $BENDSQL_CLIENT_CONNECT
+echo "set global max_threads=1000" | $BENDSQL_CLIENT_CONNECT
+echo "unset global max_threads" | $BENDSQL_CLIENT_CONNECT
+
+echo "set session max_threads=1000" | $USER_C_CONNECT
+echo "unset session max_threads" | $USER_C_CONNECT
+echo "settings (ddl_column_type_nullable=0) select 100" | $USER_C_CONNECT
+echo "SET variable a = 'a';" | $USER_C_CONNECT
+echo "set global max_threads=1000;" | $USER_C_CONNECT 2>&1  | grep "Super" | wc -l
+echo "unset global max_threads;" | $USER_C_CONNECT 2>&1  | grep "Super" | wc -l
+echo "drop user if exists c" | $BENDSQL_CLIENT_CONNECT
+echo "=== set privilege check succ ==="

From 61e962e531c51166b1f3dcd426e612e01a224183 Mon Sep 17 00:00:00 2001
From: sundyli <543950155@qq.com>
Date: Mon, 13 Jan 2025 20:17:22 +0800
Subject: [PATCH 02/33] fix(query): hash table scatter will always send agg
 meta (#17245)

* fix(query): hash table scatter will always send agg meta

* fix(query): hash table scatter will always send agg meta

* fix(query): z

* fix(query): z

* update

* update

* update

* z

* z

* z

* z

* z

* z

* z
---
 src/query/expression/src/aggregate/payload.rs | 11 +++---
 .../expression/src/aggregate/payload_flush.rs |  4 +-
 .../aggregator/aggregate_exchange_injector.rs | 26 +++++--------
 .../new_transform_partition_bucket.rs         | 37 ++++++++++++-------
 .../transforms/aggregator/serde/serde_meta.rs | 22 +++++------
 .../serde/transform_aggregate_serializer.rs   | 32 ++++++++++++++--
 .../serde/transform_deserializer.rs           | 32 ++++++++++------
 ...transform_exchange_aggregate_serializer.rs | 21 +++++------
 .../aggregator/transform_aggregate_final.rs   |  1 +
 .../transforms/range_join/ie_join_state.rs    |  4 ++
 .../processors/transforms/transform_srf.rs    |  3 ++
 11 files changed, 115 insertions(+), 78 deletions(-)

diff --git a/src/query/expression/src/aggregate/payload.rs b/src/query/expression/src/aggregate/payload.rs
index 562b2a899f9bf..f08e61280f5cc 100644
--- a/src/query/expression/src/aggregate/payload.rs
+++ b/src/query/expression/src/aggregate/payload.rs
@@ -402,15 +402,14 @@ impl Payload {
         true
     }
 
-    pub fn empty_block(&self) -> DataBlock {
-        let columns = self
-            .aggrs
-            .iter()
-            .map(|f| ColumnBuilder::with_capacity(&f.return_type().unwrap(), 0).build())
+    pub fn empty_block(&self, fake_rows: Option<usize>) -> DataBlock {
+        let fake_rows = fake_rows.unwrap_or(0);
+        let columns = (0..self.aggrs.len())
+            .map(|_| ColumnBuilder::repeat_default(&DataType::Binary, fake_rows).build())
             .chain(
                 self.group_types
                     .iter()
-                    .map(|t| ColumnBuilder::with_capacity(t, 0).build()),
+                    .map(|t| ColumnBuilder::repeat_default(t, fake_rows).build()),
             )
             .collect_vec();
         DataBlock::new_from_columns(columns)
diff --git a/src/query/expression/src/aggregate/payload_flush.rs b/src/query/expression/src/aggregate/payload_flush.rs
index 632ec78f69990..e4b4735394c17 100644
--- a/src/query/expression/src/aggregate/payload_flush.rs
+++ b/src/query/expression/src/aggregate/payload_flush.rs
@@ -121,7 +121,7 @@ impl Payload {
         }
 
         if blocks.is_empty() {
-            return Ok(self.empty_block());
+            return Ok(self.empty_block(None));
         }
         DataBlock::concat(&blocks)
     }
@@ -173,7 +173,7 @@ impl Payload {
         }
 
         if blocks.is_empty() {
-            return Ok(self.empty_block());
+            return Ok(self.empty_block(None));
         }
 
         DataBlock::concat(&blocks)
diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_exchange_injector.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_exchange_injector.rs
index 482920b53571e..dacfcc0826b08 100644
--- a/src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_exchange_injector.rs
+++ b/src/query/service/src/pipelines/processors/transforms/aggregator/aggregate_exchange_injector.rs
@@ -173,26 +173,20 @@ impl FlightScatter for HashTableHashScatter {
                     AggregateMeta::Partitioned { .. } => unreachable!(),
                     AggregateMeta::AggregateSpilling(payload) => {
                         for p in scatter_partitioned_payload(payload, self.buckets)? {
-                            blocks.push(match p.len() == 0 {
-                                true => DataBlock::empty(),
-                                false => DataBlock::empty_with_meta(
-                                    AggregateMeta::create_agg_spilling(p),
-                                ),
-                            });
+                            blocks.push(DataBlock::empty_with_meta(
+                                AggregateMeta::create_agg_spilling(p),
+                            ));
                         }
                     }
                     AggregateMeta::AggregatePayload(p) => {
                         for payload in scatter_payload(p.payload, self.buckets)? {
-                            blocks.push(match payload.len() == 0 {
-                                true => DataBlock::empty(),
-                                false => {
-                                    DataBlock::empty_with_meta(AggregateMeta::create_agg_payload(
-                                        p.bucket,
-                                        payload,
-                                        p.max_partition_count,
-                                    ))
-                                }
-                            });
+                            blocks.push(DataBlock::empty_with_meta(
+                                AggregateMeta::create_agg_payload(
+                                    p.bucket,
+                                    payload,
+                                    p.max_partition_count,
+                                ),
+                            ));
                         }
                     }
                 };
diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/new_transform_partition_bucket.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/new_transform_partition_bucket.rs
index 0e35ebaed73b2..39db4c52ca273 100644
--- a/src/query/service/src/pipelines/processors/transforms/aggregator/new_transform_partition_bucket.rs
+++ b/src/query/service/src/pipelines/processors/transforms/aggregator/new_transform_partition_bucket.rs
@@ -212,6 +212,7 @@ impl NewTransformPartitionBucket {
     #[allow(unused_assignments)]
     fn add_bucket(&mut self, mut data_block: DataBlock) -> Result<(isize, usize)> {
         let (mut bucket, mut partition_count) = (0, 0);
+        let mut is_empty_block = false;
         if let Some(block_meta) = data_block.get_meta() {
             if let Some(block_meta) = AggregateMeta::downcast_ref_from(block_meta) {
                 (bucket, partition_count) = match block_meta {
@@ -250,7 +251,11 @@ impl NewTransformPartitionBucket {
                         if let Some(AggregateMeta::Spilled(buckets_payload)) =
                             AggregateMeta::downcast_from(meta)
                         {
-                            let partition_count = buckets_payload[0].max_partition_count;
+                            let partition_count = if !buckets_payload.is_empty() {
+                                buckets_payload[0].max_partition_count
+                            } else {
+                                MAX_PARTITION_COUNT
+                            };
                             self.max_partition_count =
                                 self.max_partition_count.max(partition_count);
 
@@ -274,12 +279,14 @@ impl NewTransformPartitionBucket {
                         unreachable!()
                     }
                     AggregateMeta::Serialized(payload) => {
+                        is_empty_block = payload.data_block.is_empty();
                         self.max_partition_count =
                             self.max_partition_count.max(payload.max_partition_count);
 
                         (payload.bucket, payload.max_partition_count)
                     }
                     AggregateMeta::AggregatePayload(payload) => {
+                        is_empty_block = payload.payload.len() == 0;
                         self.max_partition_count =
                             self.max_partition_count.max(payload.max_partition_count);
 
@@ -298,23 +305,25 @@ impl NewTransformPartitionBucket {
             ));
         }
 
-        if self.all_inputs_init {
-            if partition_count != self.max_partition_count {
-                return Err(ErrorCode::Internal(
+        if !is_empty_block {
+            if self.all_inputs_init {
+                if partition_count != self.max_partition_count {
+                    return Err(ErrorCode::Internal(
                     "Internal, the partition count does not equal the max partition count on TransformPartitionBucket.
                     ",
                 ));
-            }
-            match self.buckets_blocks.entry(bucket) {
-                Entry::Vacant(v) => {
-                    v.insert(vec![data_block]);
-                }
-                Entry::Occupied(mut v) => {
-                    v.get_mut().push(data_block);
                 }
-            };
-        } else {
-            self.unpartitioned_blocks.push(data_block);
+                match self.buckets_blocks.entry(bucket) {
+                    Entry::Vacant(v) => {
+                        v.insert(vec![data_block]);
+                    }
+                    Entry::Occupied(mut v) => {
+                        v.get_mut().push(data_block);
+                    }
+                };
+            } else {
+                self.unpartitioned_blocks.push(data_block);
+            }
         }
 
         Ok((bucket, partition_count))
diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/serde_meta.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/serde_meta.rs
index 731ec4e1b1049..b83cf2c97c90e 100644
--- a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/serde_meta.rs
+++ b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/serde_meta.rs
@@ -31,21 +31,15 @@ pub struct AggregateSerdeMeta {
     pub columns_layout: Vec<usize>,
     // use for new agg hashtable
     pub max_partition_count: usize,
+    pub is_empty: bool,
 }
 
 impl AggregateSerdeMeta {
-    pub fn create(bucket: isize) -> BlockMetaInfoPtr {
-        Box::new(AggregateSerdeMeta {
-            typ: BUCKET_TYPE,
-            bucket,
-            location: None,
-            data_range: None,
-            columns_layout: vec![],
-            max_partition_count: 0,
-        })
-    }
-
-    pub fn create_agg_payload(bucket: isize, max_partition_count: usize) -> BlockMetaInfoPtr {
+    pub fn create_agg_payload(
+        bucket: isize,
+        max_partition_count: usize,
+        is_empty: bool,
+    ) -> BlockMetaInfoPtr {
         Box::new(AggregateSerdeMeta {
             typ: BUCKET_TYPE,
             bucket,
@@ -53,6 +47,7 @@ impl AggregateSerdeMeta {
             data_range: None,
             columns_layout: vec![],
             max_partition_count,
+            is_empty,
         })
     }
 
@@ -61,6 +56,7 @@ impl AggregateSerdeMeta {
         location: String,
         data_range: Range<u64>,
         columns_layout: Vec<usize>,
+        is_empty: bool,
     ) -> BlockMetaInfoPtr {
         Box::new(AggregateSerdeMeta {
             typ: SPILLED_TYPE,
@@ -69,6 +65,7 @@ impl AggregateSerdeMeta {
             location: Some(location),
             data_range: Some(data_range),
             max_partition_count: 0,
+            is_empty,
         })
     }
 
@@ -86,6 +83,7 @@ impl AggregateSerdeMeta {
             location: Some(location),
             data_range: Some(data_range),
             max_partition_count,
+            is_empty: false,
         })
     }
 }
diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_aggregate_serializer.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_aggregate_serializer.rs
index bd3bdb24d6339..096485fa98fcc 100644
--- a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_aggregate_serializer.rs
+++ b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_aggregate_serializer.rs
@@ -183,6 +183,7 @@ pub struct SerializeAggregateStream {
     pub payload: Pin<Box<SerializePayload>>,
     flush_state: PayloadFlushState,
     end_iter: bool,
+    nums: usize,
 }
 
 unsafe impl Send for SerializeAggregateStream {}
@@ -198,6 +199,7 @@ impl SerializeAggregateStream {
             flush_state: PayloadFlushState::default(),
             _params: params.clone(),
             end_iter: false,
+            nums: 0,
         }
     }
 }
@@ -225,10 +227,32 @@ impl SerializeAggregateStream {
                 }
 
                 match block {
-                    Some(block) => Ok(Some(block.add_meta(Some(
-                        AggregateSerdeMeta::create_agg_payload(p.bucket, p.max_partition_count),
-                    ))?)),
-                    None => Ok(None),
+                    Some(block) => {
+                        self.nums += 1;
+                        Ok(Some(block.add_meta(Some(
+                            AggregateSerdeMeta::create_agg_payload(
+                                p.bucket,
+                                p.max_partition_count,
+                                false,
+                            ),
+                        ))?))
+                    }
+                    None => {
+                        // always return at least one block
+                        if self.nums == 0 {
+                            self.nums += 1;
+                            let block = p.payload.empty_block(Some(1));
+                            Ok(Some(block.add_meta(Some(
+                                AggregateSerdeMeta::create_agg_payload(
+                                    p.bucket,
+                                    p.max_partition_count,
+                                    true,
+                                ),
+                            ))?))
+                        } else {
+                            Ok(None)
+                        }
+                    }
                 }
             }
         }
diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_deserializer.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_deserializer.rs
index c73799432d17d..ed82428bbc7d2 100644
--- a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_deserializer.rs
+++ b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_deserializer.rs
@@ -89,18 +89,26 @@ impl TransformDeserializer {
                 }
                 Some(meta) => {
                     return match meta.typ == BUCKET_TYPE {
-                        true => Ok(DataBlock::empty_with_meta(
-                            AggregateMeta::create_serialized(
-                                meta.bucket,
-                                deserialize_block(
-                                    dict,
-                                    fragment_data,
-                                    &self.schema,
-                                    self.arrow_schema.clone(),
-                                )?,
-                                meta.max_partition_count,
-                            ),
-                        )),
+                        true => {
+                            let mut block = deserialize_block(
+                                dict,
+                                fragment_data,
+                                &self.schema,
+                                self.arrow_schema.clone(),
+                            )?;
+
+                            if meta.is_empty {
+                                block = block.slice(0..0);
+                            }
+
+                            Ok(DataBlock::empty_with_meta(
+                                AggregateMeta::create_serialized(
+                                    meta.bucket,
+                                    block,
+                                    meta.max_partition_count,
+                                ),
+                            ))
+                        }
                         false => {
                             let data_schema = Arc::new(exchange_defines::spilled_schema());
                             let arrow_schema = Arc::new(exchange_defines::spilled_arrow_schema());
diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_aggregate_serializer.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_aggregate_serializer.rs
index 65e9e5cdc513a..a034b86038acb 100644
--- a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_aggregate_serializer.rs
+++ b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_aggregate_serializer.rs
@@ -149,6 +149,8 @@ impl BlockMetaTransform<ExchangeShuffleMeta> for TransformExchangeAggregateSeria
                 }
 
                 Some(AggregateMeta::AggregatePayload(p)) => {
+                    let (bucket, max_partition_count) = (p.bucket, p.max_partition_count);
+
                     if index == self.local_pos {
                         serialized_blocks.push(FlightSerialized::DataBlock(
                             block.add_meta(Some(Box::new(AggregateMeta::AggregatePayload(p))))?,
@@ -156,24 +158,19 @@ impl BlockMetaTransform<ExchangeShuffleMeta> for TransformExchangeAggregateSeria
                         continue;
                     }
 
-                    let bucket = compute_block_number(p.bucket, p.max_partition_count)?;
+                    let block_number = compute_block_number(bucket, max_partition_count)?;
                     let stream = SerializeAggregateStream::create(
                         &self.params,
                         SerializePayload::AggregatePayload(p),
                     );
                     let mut stream_blocks = stream.into_iter().collect::<Result<Vec<_>>>()?;
-
-                    if stream_blocks.is_empty() {
-                        serialized_blocks.push(FlightSerialized::DataBlock(DataBlock::empty()));
-                    } else {
-                        let mut c = DataBlock::concat(&stream_blocks)?;
-                        if let Some(meta) = stream_blocks[0].take_meta() {
-                            c.replace_meta(meta);
-                        }
-
-                        let c = serialize_block(bucket, c, &self.options)?;
-                        serialized_blocks.push(FlightSerialized::DataBlock(c));
+                    debug_assert!(!stream_blocks.is_empty());
+                    let mut c = DataBlock::concat(&stream_blocks)?;
+                    if let Some(meta) = stream_blocks[0].take_meta() {
+                        c.replace_meta(meta);
                     }
+                    let c = serialize_block(block_number, c, &self.options)?;
+                    serialized_blocks.push(FlightSerialized::DataBlock(c));
                 }
             };
         }
diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_final.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_final.rs
index 2a719a23c23d8..beded12043100 100644
--- a/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_final.rs
+++ b/src/query/service/src/pipelines/processors/transforms/aggregator/transform_aggregate_final.rs
@@ -59,6 +59,7 @@ impl TransformFinalAggregate {
                     AggregateMeta::Serialized(payload) => match agg_hashtable.as_mut() {
                         Some(ht) => {
                             debug_assert!(bucket == payload.bucket);
+
                             let payload = payload.convert_to_partitioned_payload(
                                 self.params.group_data_types.clone(),
                                 self.params.aggregate_functions.clone(),
diff --git a/src/query/service/src/pipelines/processors/transforms/range_join/ie_join_state.rs b/src/query/service/src/pipelines/processors/transforms/range_join/ie_join_state.rs
index 5c44b2be9cbf6..24991a001a112 100644
--- a/src/query/service/src/pipelines/processors/transforms/range_join/ie_join_state.rs
+++ b/src/query/service/src/pipelines/processors/transforms/range_join/ie_join_state.rs
@@ -125,6 +125,10 @@ impl IEJoinState {
     fn intersection(&self, left_block: &DataBlock, right_block: &DataBlock) -> bool {
         let left_len = left_block.num_rows();
         let right_len = right_block.num_rows();
+        if left_len == 0 || right_len == 0 {
+            return false;
+        }
+
         let left_l1_column = left_block.columns()[0]
             .value
             .convert_to_full_column(&self.l1_data_type, left_len);
diff --git a/src/query/service/src/pipelines/processors/transforms/transform_srf.rs b/src/query/service/src/pipelines/processors/transforms/transform_srf.rs
index 63e6b94b3bab5..ab71d492ed2cf 100644
--- a/src/query/service/src/pipelines/processors/transforms/transform_srf.rs
+++ b/src/query/service/src/pipelines/processors/transforms/transform_srf.rs
@@ -113,6 +113,9 @@ impl BlockingTransform for TransformSRF {
         }
 
         let input = self.input.take().unwrap();
+        if input.is_empty() {
+            return Ok(None);
+        }
 
         let mut result_size = 0;
         let mut used = 0;

From 15c2435597f9013e46ed7533d8b18d8d63488d51 Mon Sep 17 00:00:00 2001
From: baishen <baishen2009@gmail.com>
Date: Tue, 14 Jan 2025 08:28:45 +0800
Subject: [PATCH 03/33] chore(sqllogictests): Check container and stop first to
 avoid conflict running (#17261)

* chore(sqllogictests): Stop container to avoid conflict running

* add sleep

* check timeout
---
 tests/sqllogictests/src/util.rs | 38 +++++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/tests/sqllogictests/src/util.rs b/tests/sqllogictests/src/util.rs
index c985d6151f4a5..949ea98055d71 100644
--- a/tests/sqllogictests/src/util.rs
+++ b/tests/sqllogictests/src/util.rs
@@ -19,18 +19,21 @@ use std::path::PathBuf;
 use std::time::Duration;
 use std::time::Instant;
 
+use bollard::container::RemoveContainerOptions;
 use bollard::Docker;
 use clap::Parser;
 use redis::Commands;
 use serde::Deserialize;
 use serde::Serialize;
 use serde_json::Value;
+use testcontainers::core::error::WaitContainerError;
 use testcontainers::core::IntoContainerPort;
 use testcontainers::core::WaitFor;
 use testcontainers::runners::AsyncRunner;
 use testcontainers::ContainerAsync;
 use testcontainers::GenericImage;
 use testcontainers::ImageExt;
+use testcontainers::TestcontainersError;
 use testcontainers_modules::mysql::Mysql;
 use testcontainers_modules::redis::Redis;
 use testcontainers_modules::redis::REDIS_PORT;
@@ -256,9 +259,7 @@ pub async fn run_ttc_container(
     let start = Instant::now();
     println!("Start container {container_name}");
 
-    // Stop the container
-    let _ = docker.stop_container(&container_name, None).await;
-    let _ = docker.remove_container(&container_name, None).await;
+    stop_container(docker, &container_name).await;
 
     let mut i = 1;
     loop {
@@ -290,6 +291,10 @@ pub async fn run_ttc_container(
                     "Start container {} using {} secs failed: {}",
                     container_name, duration, err
                 );
+                if let TestcontainersError::WaitContainer(WaitContainerError::StartupTimeout) = err
+                {
+                    stop_container(docker, &container_name).await;
+                }
                 if i == CONTAINER_RETRY_TIMES || duration >= CONTAINER_TIMEOUT_SECONDS {
                     break;
                 } else {
@@ -327,9 +332,7 @@ async fn run_redis_server(docker: &Docker) -> Result<ContainerAsync<Redis>> {
     let container_name = "redis".to_string();
     println!("Start container {container_name}");
 
-    // Stop the container
-    let _ = docker.stop_container(&container_name, None).await;
-    let _ = docker.remove_container(&container_name, None).await;
+    stop_container(docker, &container_name).await;
 
     let mut i = 1;
     loop {
@@ -365,6 +368,10 @@ async fn run_redis_server(docker: &Docker) -> Result<ContainerAsync<Redis>> {
                     "Start container {} using {} secs failed: {}",
                     container_name, duration, err
                 );
+                if let TestcontainersError::WaitContainer(WaitContainerError::StartupTimeout) = err
+                {
+                    stop_container(docker, &container_name).await;
+                }
                 if i == CONTAINER_RETRY_TIMES || duration >= CONTAINER_TIMEOUT_SECONDS {
                     break;
                 } else {
@@ -381,9 +388,7 @@ async fn run_mysql_server(docker: &Docker) -> Result<ContainerAsync<Mysql>> {
     let container_name = "mysql".to_string();
     println!("Start container {container_name}");
 
-    // Stop the container
-    let _ = docker.stop_container(&container_name, None).await;
-    let _ = docker.remove_container(&container_name, None).await;
+    stop_container(docker, &container_name).await;
 
     // Add a table for test.
     // CREATE TABLE test.user(
@@ -436,6 +441,10 @@ async fn run_mysql_server(docker: &Docker) -> Result<ContainerAsync<Mysql>> {
                     "Start container {} using {} secs failed: {}",
                     container_name, duration, err
                 );
+                if let TestcontainersError::WaitContainer(WaitContainerError::StartupTimeout) = err
+                {
+                    stop_container(docker, &container_name).await;
+                }
                 if i == CONTAINER_RETRY_TIMES || duration >= CONTAINER_TIMEOUT_SECONDS {
                     break;
                 } else {
@@ -446,3 +455,14 @@ async fn run_mysql_server(docker: &Docker) -> Result<ContainerAsync<Mysql>> {
     }
     Err(format!("Start {container_name} failed").into())
 }
+
+// Stop the running container to avoid conflict
+async fn stop_container(docker: &Docker, container_name: &str) {
+    let _ = docker.stop_container(container_name, None).await;
+    let options = Some(RemoveContainerOptions {
+        force: true,
+        ..Default::default()
+    });
+    let _ = docker.remove_container(container_name, options).await;
+    println!("Stop container {container_name}");
+}

From 009a5ebac5829b51e8499f441f67438fa4e9f656 Mon Sep 17 00:00:00 2001
From: Xuanwo <github@xuanwo.io>
Date: Tue, 14 Jan 2025 10:32:00 +0800
Subject: [PATCH 04/33] chore: Bump opendal to pinned tag version (#17259)

Signed-off-by: Xuanwo <github@xuanwo.io>
---
 Cargo.lock | 44 +++++++++++++++++++++++---------------------
 Cargo.toml |  4 ++--
 2 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 10b186b7710c4..9ea8284d9192e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2931,7 +2931,7 @@ dependencies = [
  "databend-storages-common-table-meta",
  "limits-rs",
  "log",
- "opendal 0.51.0",
+ "opendal 0.51.1",
  "serde",
  "serde_json",
  "serfig",
@@ -3209,7 +3209,7 @@ dependencies = [
  "libc",
  "object",
  "once_cell",
- "opendal 0.51.0",
+ "opendal 0.51.1",
  "parquet",
  "paste",
  "prost",
@@ -3559,7 +3559,7 @@ dependencies = [
  "maplit",
  "num-derive",
  "num-traits",
- "opendal 0.51.0",
+ "opendal 0.51.1",
  "paste",
  "prost",
  "serde",
@@ -3797,7 +3797,7 @@ dependencies = [
  "lz4",
  "match-template",
  "num",
- "opendal 0.51.0",
+ "opendal 0.51.1",
  "rand",
  "ringbuffer",
  "roaring",
@@ -4008,7 +4008,7 @@ dependencies = [
  "log",
  "num-derive",
  "num-traits",
- "opendal 0.51.0",
+ "opendal 0.51.1",
  "parking_lot 0.12.3",
  "prqlc",
  "rand",
@@ -4044,7 +4044,7 @@ dependencies = [
  "futures",
  "http 1.1.0",
  "log",
- "opendal 0.51.0",
+ "opendal 0.51.1",
  "parquet",
  "prometheus-client",
  "regex",
@@ -4153,7 +4153,7 @@ dependencies = [
  "itertools 0.13.0",
  "jsonb",
  "log",
- "opendal 0.51.0",
+ "opendal 0.51.1",
  "parking_lot 0.12.3",
  "parquet",
  "rand",
@@ -4201,7 +4201,7 @@ dependencies = [
  "futures",
  "hive_metastore",
  "log",
- "opendal 0.51.0",
+ "opendal 0.51.1",
  "parquet",
  "recursive",
  "serde",
@@ -4311,7 +4311,7 @@ dependencies = [
  "databend-storages-common-table-meta",
  "futures-util",
  "log",
- "opendal 0.51.0",
+ "opendal 0.51.1",
  "orc-rust",
  "serde",
  "serde_json",
@@ -4347,7 +4347,7 @@ dependencies = [
  "ethnum",
  "futures",
  "log",
- "opendal 0.51.0",
+ "opendal 0.51.1",
  "parquet",
  "rand",
  "serde",
@@ -4392,7 +4392,7 @@ dependencies = [
  "databend-common-storages-parquet",
  "databend-storages-common-blocks",
  "databend-storages-common-table-meta",
- "opendal 0.51.0",
+ "opendal 0.51.1",
  "parquet",
  "serde",
  "serde_json",
@@ -4433,7 +4433,7 @@ dependencies = [
  "enum-as-inner",
  "futures",
  "log",
- "opendal 0.51.0",
+ "opendal 0.51.1",
  "parquet",
  "serde",
  "serde_json",
@@ -4498,7 +4498,7 @@ dependencies = [
  "jsonb",
  "log",
  "once_cell",
- "opendal 0.51.0",
+ "opendal 0.51.1",
  "parking_lot 0.12.3",
  "regex",
  "serde",
@@ -4735,7 +4735,7 @@ dependencies = [
  "jsonb",
  "jwt-simple",
  "log",
- "opendal 0.51.0",
+ "opendal 0.51.1",
  "tantivy",
  "tempfile",
 ]
@@ -5113,7 +5113,7 @@ dependencies = [
  "mysql_async",
  "naive-cityhash",
  "num_cpus",
- "opendal 0.51.0",
+ "opendal 0.51.1",
  "opensrv-mysql",
  "opentelemetry",
  "opentelemetry_sdk",
@@ -5297,7 +5297,7 @@ dependencies = [
  "fastrace",
  "futures",
  "log",
- "opendal 0.51.0",
+ "opendal 0.51.1",
 ]
 
 [[package]]
@@ -10226,8 +10226,9 @@ dependencies = [
 
 [[package]]
 name = "object_store_opendal"
-version = "0.48.3"
-source = "git+https://github.com/apache/opendal?rev=f7f9990#f7f9990f95146400c3aef8afc11804e4a6e3afe3"
+version = "0.49.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d47750d91b6d9fa245b3dff15d582dff8462d826cf6c2d5f2e91035d9a317f9"
 dependencies = [
  "async-trait",
  "bytes",
@@ -10235,7 +10236,7 @@ dependencies = [
  "futures",
  "futures-util",
  "object_store",
- "opendal 0.51.0",
+ "opendal 0.51.1",
  "pin-project",
  "tokio",
 ]
@@ -10307,8 +10308,9 @@ dependencies = [
 
 [[package]]
 name = "opendal"
-version = "0.51.0"
-source = "git+https://github.com/apache/opendal?rev=f7f9990#f7f9990f95146400c3aef8afc11804e4a6e3afe3"
+version = "0.51.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8c9dcfa7a3615e3c60eb662ed6b46b6f244cf2658098f593c0c0915430b3a268"
 dependencies = [
  "anyhow",
  "async-backtrace",
diff --git a/Cargo.toml b/Cargo.toml
index 1259a7be28ef4..07383209a6470 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -369,10 +369,10 @@ num-derive = "0.3.3"
 num-traits = "0.2.19"
 num_cpus = "1.13.1"
 object = "0.36.5"
-object_store_opendal = { git = "https://github.com/apache/opendal", package = "object_store_opendal", rev = "f7f9990" }
+object_store_opendal = { version = "0.49.0" }
 once_cell = "1.15.0"
 openai_api_rust = "0.1"
-opendal = { version = "0.51", git = "https://github.com/apache/opendal", rev = "f7f9990", features = [
+opendal = { version = "0.51.1", features = [
     "layers-fastrace",
     "layers-prometheus-client",
     "layers-async-backtrace",

From 5b372c1193c18c1e653463cf65007d16f5625134 Mon Sep 17 00:00:00 2001
From: Damon07 <1490818749@qq.com>
Date: Tue, 14 Jan 2025 11:02:20 +0800
Subject: [PATCH 05/33] fix(query): fix lazy columns missed in constant table
 scan (#17258)

* fix(query): fix lazy columns missed in constant table scan

Signed-off-by: damon <wangmengdamon@gmail.com>

* clear lazy columns when rewriting false filter to be empty scan

Signed-off-by: damon <wangmengdamon@gmail.com>

---------

Signed-off-by: damon <wangmengdamon@gmail.com>
---
 src/query/sql/src/planner/metadata.rs                     | 4 ++++
 .../optimizer/rule/rewrite/rule_eliminate_filter.rs       | 6 ++++++
 tests/sqllogictests/suites/query/issues/issue_17158.test  | 8 ++++++++
 3 files changed, 18 insertions(+)
 create mode 100644 tests/sqllogictests/suites/query/issues/issue_17158.test

diff --git a/src/query/sql/src/planner/metadata.rs b/src/query/sql/src/planner/metadata.rs
index bd8507d0604c3..d3a84062b70cf 100644
--- a/src/query/sql/src/planner/metadata.rs
+++ b/src/query/sql/src/planner/metadata.rs
@@ -170,6 +170,10 @@ impl Metadata {
         self.lazy_columns.extend(indices);
     }
 
+    pub fn clear_lazy_columns(&mut self) {
+        self.lazy_columns.clear();
+    }
+
     pub fn add_non_lazy_columns(&mut self, indices: HashSet<usize>) {
         debug_assert!(indices.iter().all(|i| *i < self.columns.len()));
         self.non_lazy_columns.extend(indices);
diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_filter.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_filter.rs
index 211fb3e01e2a5..8400b5dc6512e 100644
--- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_filter.rs
+++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_eliminate_filter.rs
@@ -77,6 +77,12 @@ impl Rule for RuleEliminateFilter {
                 .derive_relational_prop(&RelExpr::with_s_expr(s_expr))?
                 .output_columns
                 .clone();
+
+            {
+                let mut metadata = self.metadata.write();
+                metadata.clear_lazy_columns();
+            }
+
             let metadata = self.metadata.read();
             let mut fields = Vec::with_capacity(output_columns.len());
 
diff --git a/tests/sqllogictests/suites/query/issues/issue_17158.test b/tests/sqllogictests/suites/query/issues/issue_17158.test
new file mode 100644
index 0000000000000..cee1165ae2947
--- /dev/null
+++ b/tests/sqllogictests/suites/query/issues/issue_17158.test
@@ -0,0 +1,8 @@
+statement ok
+create table tt2 (c0 bool, c1 int);
+
+statement ok
+insert into tt2 values(true, 1),(false, 2),(true, 3);
+
+statement ok
+select null, c0, 30, c1 from tt2 where false order by c0 LIMIT 3 OFFSET 0;
\ No newline at end of file

From 82611b17883857bc1b600bc37ee1b8ed67a176e2 Mon Sep 17 00:00:00 2001
From: Sky Fan <3374614481@qq.com>
Date: Tue, 14 Jan 2025 11:03:21 +0800
Subject: [PATCH 06/33] fix: vacuum index not work if index is dropped by
 create or replace (#17263)

* fix: vacuum index not work if index is dropped by create or replace

* fix test

* remove Any

* add comment
---
 src/meta/api/src/name_id_value_api.rs         | 12 ++-
 src/meta/api/src/schema_api_impl.rs           | 85 +++++++++++++------
 .../management/src/procedure/procedure_mgr.rs | 19 +++--
 ...1_004_vacuum_drop_aggregating_index.result | 14 +++
 .../01_004_vacuum_drop_aggregating_index.sh   | 34 ++++++++
 5 files changed, 130 insertions(+), 34 deletions(-)

diff --git a/src/meta/api/src/name_id_value_api.rs b/src/meta/api/src/name_id_value_api.rs
index 46117cb9cf290..2d51a7dc194ff 100644
--- a/src/meta/api/src/name_id_value_api.rs
+++ b/src/meta/api/src/name_id_value_api.rs
@@ -71,17 +71,22 @@ where
     /// Such operations do not have any condition constraints.
     /// For example, a `name -> id` mapping can have a reverse `id -> name` mapping.
     ///
+    /// `mark_delete_records` is used to generate additional key-values for implementing `mark_delete` operation.
+    /// For example, when an index is dropped by `override_exist`,  `__fd_marked_deleted_index/<table_id>/<index_id> -> marked_deleted_index_meta` will be added.
+    ///
     /// If there is already a `name_ident` exists, return the existing id in a `Ok(Err(exist))`.
     /// Otherwise, create `name -> id -> value` and returns the created id in a `Ok(Ok(created))`.
-    async fn create_id_value<A>(
+    async fn create_id_value<A, M>(
         &self,
         name_ident: &K,
         value: &IdRsc::ValueType,
         override_exist: bool,
         associated_records: A,
+        mark_delete_records: M,
     ) -> Result<Result<DataId<IdRsc>, SeqV<DataId<IdRsc>>>, MetaTxnError>
     where
         A: Fn(DataId<IdRsc>) -> Vec<(String, Vec<u8>)> + Send,
+        M: Fn(DataId<IdRsc>, &IdRsc::ValueType) -> Result<Vec<(String, Vec<u8>)>, MetaError> + Send,
     {
         debug!(name_ident :? =name_ident; "NameIdValueApi: {}", func_name!());
 
@@ -114,6 +119,11 @@ where
                         for (k, _v) in kvs {
                             txn.if_then.push(TxnOp::delete(k));
                         }
+
+                        let kvs = mark_delete_records(seq_id.data, &seq_meta.data)?;
+                        for (k, v) in kvs {
+                            txn.if_then.push(TxnOp::put(k, v));
+                        }
                     } else {
                         return Ok(Err(seq_id));
                     }
diff --git a/src/meta/api/src/schema_api_impl.rs b/src/meta/api/src/schema_api_impl.rs
index 532d4a642af3f..4a1506d410875 100644
--- a/src/meta/api/src/schema_api_impl.rs
+++ b/src/meta/api/src/schema_api_impl.rs
@@ -753,12 +753,21 @@ impl<KV: kvapi::KVApi<Error = MetaError> + ?Sized> SchemaApi for KV {
         let name_ident_raw = serialize_struct(&IndexNameIdentRaw::from(name_ident))?;
 
         let create_res = self
-            .create_id_value(name_ident, meta, overriding, |id| {
-                vec![(
-                    IndexIdToNameIdent::new_generic(name_ident.tenant(), id).to_string_key(),
-                    name_ident_raw.clone(),
-                )]
-            })
+            .create_id_value(
+                name_ident,
+                meta,
+                overriding,
+                |id| {
+                    vec![(
+                        IndexIdToNameIdent::new_generic(name_ident.tenant(), id).to_string_key(),
+                        name_ident_raw.clone(),
+                    )]
+                },
+                |index_id, value| {
+                    mark_index_as_deleted(name_ident.tenant(), value.table_id, *index_id)
+                        .map(|(k, v)| vec![(k, v)])
+                },
+            )
             .await?;
 
         match create_res {
@@ -803,20 +812,9 @@ impl<KV: kvapi::KVApi<Error = MetaError> + ?Sized> SchemaApi for KV {
                 IndexIdToNameIdent::new_generic(name_ident.tenant(), seq_id.data).to_string_key(),
             ));
 
-            // add __fd_marked_deleted_index/<table_id>/<index_id> -> marked_deleted_index_meta
-            let marked_deleted_index_id_ident = MarkedDeletedIndexIdIdent::new_generic(
-                name_ident.tenant(),
-                MarkedDeletedIndexId::new(seq_meta.data.table_id, *seq_id.data),
-            );
-            let marked_deleted_index_meta = MarkedDeletedIndexMeta {
-                dropped_on: Utc::now(),
-                index_type: MarkedDeletedIndexType::AGGREGATING,
-            };
-
-            txn.if_then.push(TxnOp::put(
-                marked_deleted_index_id_ident.to_string_key(),
-                serialize_struct(&marked_deleted_index_meta)?,
-            ));
+            let (key, value) =
+                mark_index_as_deleted(name_ident.tenant(), seq_meta.data.table_id, *seq_id.data)?;
+            txn.if_then.push(TxnOp::put(key, value));
 
             let (succ, _responses) = send_txn(self, txn).await?;
             debug!(key :? =name_ident, id :? =&id_ident,succ = succ; "{}", func_name!());
@@ -2901,12 +2899,18 @@ impl<KV: kvapi::KVApi<Error = MetaError> + ?Sized> SchemaApi for KV {
         let name_ident_raw = serialize_struct(&CatalogNameIdentRaw::from(name_ident))?;
 
         let res = self
-            .create_id_value(name_ident, meta, false, |id| {
-                vec![(
-                    CatalogIdToNameIdent::new_generic(name_ident.tenant(), id).to_string_key(),
-                    name_ident_raw.clone(),
-                )]
-            })
+            .create_id_value(
+                name_ident,
+                meta,
+                false,
+                |id| {
+                    vec![(
+                        CatalogIdToNameIdent::new_generic(name_ident.tenant(), id).to_string_key(),
+                        name_ident_raw.clone(),
+                    )]
+                },
+                |_, _| Ok(vec![]),
+            )
             .await?;
 
         Ok(res)
@@ -3020,7 +3024,13 @@ impl<KV: kvapi::KVApi<Error = MetaError> + ?Sized> SchemaApi for KV {
         let name_ident = &req.dictionary_ident;
 
         let create_res = self
-            .create_id_value(name_ident, &req.dictionary_meta, false, |_| vec![])
+            .create_id_value(
+                name_ident,
+                &req.dictionary_meta,
+                false,
+                |_| vec![],
+                |_, _| Ok(vec![]),
+            )
             .await?;
 
         match create_res {
@@ -4177,3 +4187,24 @@ fn typ<K>() -> &'static str {
         .next()
         .unwrap_or("UnknownType")
 }
+
+/// add __fd_marked_deleted_index/<table_id>/<index_id> -> marked_deleted_index_meta
+pub fn mark_index_as_deleted(
+    tenant: &Tenant,
+    table_id: u64,
+    index_id: u64,
+) -> Result<(String, Vec<u8>), MetaError> {
+    let marked_deleted_index_id_ident = MarkedDeletedIndexIdIdent::new_generic(
+        tenant,
+        MarkedDeletedIndexId::new(table_id, index_id),
+    );
+    let marked_deleted_index_meta = MarkedDeletedIndexMeta {
+        dropped_on: Utc::now(),
+        index_type: MarkedDeletedIndexType::AGGREGATING,
+    };
+
+    Ok((
+        marked_deleted_index_id_ident.to_string_key(),
+        serialize_struct(&marked_deleted_index_meta)?,
+    ))
+}
diff --git a/src/query/management/src/procedure/procedure_mgr.rs b/src/query/management/src/procedure/procedure_mgr.rs
index 21a88e301522c..be765d04b3477 100644
--- a/src/query/management/src/procedure/procedure_mgr.rs
+++ b/src/query/management/src/procedure/procedure_mgr.rs
@@ -66,12 +66,19 @@ impl ProcedureMgr {
 
         let create_res = self
             .kv_api
-            .create_id_value(name_ident, meta, overriding, |id| {
-                vec![(
-                    ProcedureIdToNameIdent::new_generic(name_ident.tenant(), id).to_string_key(),
-                    name_ident_raw.clone(),
-                )]
-            })
+            .create_id_value(
+                name_ident,
+                meta,
+                overriding,
+                |id| {
+                    vec![(
+                        ProcedureIdToNameIdent::new_generic(name_ident.tenant(), id)
+                            .to_string_key(),
+                        name_ident_raw.clone(),
+                    )]
+                },
+                |_, _| Ok(vec![]),
+            )
             .await?;
 
         match create_res {
diff --git a/tests/suites/5_ee/01_vacuum/01_004_vacuum_drop_aggregating_index.result b/tests/suites/5_ee/01_vacuum/01_004_vacuum_drop_aggregating_index.result
index 2c156409652a2..c2dbef8e4e291 100644
--- a/tests/suites/5_ee/01_vacuum/01_004_vacuum_drop_aggregating_index.result
+++ b/tests/suites/5_ee/01_vacuum/01_004_vacuum_drop_aggregating_index.result
@@ -37,3 +37,17 @@ before vacuum, should be 1 index dir
 after vacuum, should be 0 index dir
 0
 0
+>>>> create or replace database test_vacuum_drop_aggregating_index
+>>>> create or replace table test_vacuum_drop_aggregating_index.agg(a int, b int,c int) 'fs:///tmp/test_vacuum_drop_aggregating_index/'
+>>>> insert into test_vacuum_drop_aggregating_index.agg values (1,1,4), (1,2,1), (1,2,4)
+3
+>>>> CREATE OR REPLACE AGGREGATING INDEX index AS SELECT MIN(a), MAX(b) FROM test_vacuum_drop_aggregating_index.agg;
+>>>> insert into test_vacuum_drop_aggregating_index.agg values (2,2,5)
+1
+>>>> REFRESH AGGREGATING INDEX index;
+before vacuum, should be 1 index dir
+1
+>>>> create or replace aggregating index index AS SELECT MIN(a), MAX(b) FROM test_vacuum_drop_aggregating_index.agg;
+after vacuum, should be 0 index dir
+0
+>>>> drop aggregating index index
diff --git a/tests/suites/5_ee/01_vacuum/01_004_vacuum_drop_aggregating_index.sh b/tests/suites/5_ee/01_vacuum/01_004_vacuum_drop_aggregating_index.sh
index b6b92e6ce8f50..b957280cf7ad1 100755
--- a/tests/suites/5_ee/01_vacuum/01_004_vacuum_drop_aggregating_index.sh
+++ b/tests/suites/5_ee/01_vacuum/01_004_vacuum_drop_aggregating_index.sh
@@ -86,3 +86,37 @@ echo "after vacuum, should be 0 index dir"
 
 find /tmp/test_vacuum_drop_aggregating_index/"$PREFIX_1"/_i_a/ -type f | wc -l
 find /tmp/test_vacuum_drop_aggregating_index/"$PREFIX_2"/_i_a/ -type f | wc -l
+
+
+### create or replace index
+
+stmt "create or replace database test_vacuum_drop_aggregating_index"
+
+mkdir -p /tmp/test_vacuum_drop_aggregating_index/
+
+stmt "create or replace table test_vacuum_drop_aggregating_index.agg(a int, b int,c int) 'fs:///tmp/test_vacuum_drop_aggregating_index/'"
+
+
+stmt "insert into test_vacuum_drop_aggregating_index.agg values (1,1,4), (1,2,1), (1,2,4)"
+
+stmt "CREATE OR REPLACE AGGREGATING INDEX index AS SELECT MIN(a), MAX(b) FROM test_vacuum_drop_aggregating_index.agg;"
+
+stmt "insert into test_vacuum_drop_aggregating_index.agg values (2,2,5)"
+
+stmt "REFRESH AGGREGATING INDEX index;"
+
+SNAPSHOT_LOCATION=$(echo "select snapshot_location from fuse_snapshot('test_vacuum_drop_aggregating_index','agg') limit 1" | $BENDSQL_CLIENT_CONNECT)
+PREFIX=$(echo "$SNAPSHOT_LOCATION" | cut -d'/' -f1-2)
+
+echo "before vacuum, should be 1 index dir"
+
+ls  /tmp/test_vacuum_drop_aggregating_index/"$PREFIX"/_i_a/ | wc -l
+
+stmt "create or replace aggregating index index AS SELECT MIN(a), MAX(b) FROM test_vacuum_drop_aggregating_index.agg;"
+
+stmt "set data_retention_time_in_days=0; select * from fuse_vacuum_drop_aggregating_index('test_vacuum_drop_aggregating_index','agg')" > /dev/null
+
+echo "after vacuum, should be 0 index dir"
+find /tmp/test_vacuum_drop_aggregating_index/"$PREFIX"/_i_a/ -type f | wc -l
+
+stmt "drop aggregating index index"

From 420f4b4d56cb8e85c6de919e118ac48432c3a676 Mon Sep 17 00:00:00 2001
From: Winter Zhang <coswde@gmail.com>
Date: Tue, 14 Jan 2025 13:52:20 +0800
Subject: [PATCH 07/33] feat(cluster): support warehouse level show processlist
 and kill query (#17249)

* feat(cluster): support proxy to warehouse

* feat(cluster): support proxy to warehouse

* feat(cluster): support proxy to warehouse

* feat(cluster): support proxy to warehouse

* feat(cluster): support proxy to warehouse

* feat(cluster): support proxy to warehouse

* feat(cluster): support proxy to warehouse

* feat(cluster): support proxy to warehouse

* feat(cluster): support proxy to warehouse
---
 src/query/catalog/src/plan/partition.rs       |  7 ++-
 src/query/catalog/src/table.rs                | 13 +++-
 src/query/catalog/src/table_context.rs        |  2 +
 src/query/catalog/tests/it/partitions.rs      |  2 +-
 .../management/src/warehouse/warehouse_api.rs |  2 +
 .../management/src/warehouse/warehouse_mgr.rs | 59 ++++++++++++++++--
 src/query/service/src/clusters/cluster.rs     | 47 ++++++++++----
 .../src/interpreters/interpreter_kill.rs      | 23 +++----
 .../interpreters/interpreter_set_priority.rs  | 22 +++----
 .../interpreters/interpreter_system_action.rs | 19 +++---
 .../interpreter_table_truncate.rs             | 18 +++---
 src/query/service/src/schedulers/scheduler.rs |  4 ++
 .../src/servers/admin/v1/tenant_tables.rs     |  3 +-
 src/query/service/src/sessions/query_ctx.rs   | 11 +++-
 .../service/src/sessions/query_ctx_shared.rs  | 33 +++++++++-
 .../src/table_functions/async_crash_me.rs     |  4 --
 .../table_functions/cloud/task_dependents.rs  |  4 --
 .../cloud/task_dependents_enable.rs           |  4 --
 .../src/table_functions/cloud/task_history.rs |  4 --
 .../table_functions/numbers/numbers_table.rs  |  8 ++-
 .../others/execute_background_job.rs          |  4 --
 .../table_functions/others/license_info.rs    |  4 --
 .../others/suggested_background_tasks.rs      |  4 --
 .../table_functions/others/tenant_quota.rs    |  4 --
 .../service/src/table_functions/others/udf.rs |  4 --
 .../src/table_functions/sync_crash_me.rs      |  4 --
 .../tests/it/sql/exec/get_table_bind_test.rs  |  8 +++
 .../it/storages/fuse/operations/commit.rs     |  8 +++
 src/query/sql/src/executor/physical_plan.rs   |  7 +++
 .../sql/src/planner/optimizer/optimizer.rs    | 15 +++++
 src/query/sql/src/planner/optimizer/util.rs   | 33 +++++++++-
 src/query/storages/delta/src/table.rs         |  5 +-
 src/query/storages/fuse/src/fuse_table.rs     |  5 +-
 .../function_template/simple_arg_func.rs      | 10 +--
 .../function_template/simple_func_template.rs | 27 +++++---
 .../src/table_functions/set_cache_capacity.rs |  6 +-
 .../storages/hive/hive/src/hive_table.rs      |  5 +-
 src/query/storages/iceberg/src/table.rs       |  5 +-
 src/query/storages/memory/src/memory_table.rs |  7 ++-
 src/query/storages/null/src/null_table.rs     |  5 +-
 src/query/storages/orc/src/table.rs           |  5 +-
 .../src/parquet_rs/parquet_table/table.rs     |  5 +-
 .../result_cache/src/table_function/table.rs  |  4 --
 src/query/storages/stage/src/stage_table.rs   |  5 +-
 src/query/storages/stream/src/stream_table.rs |  5 +-
 .../storages/system/src/backtrace_table.rs    |  3 +-
 src/query/storages/system/src/caches_table.rs |  3 +-
 src/query/storages/system/src/log_queue.rs    | 11 ++--
 .../system/src/malloc_stats_totals_table.rs   |  3 +-
 .../storages/system/src/metrics_table.rs      |  3 +-
 .../storages/system/src/processes_table.rs    |  3 +-
 .../storages/system/src/queries_profiling.rs  |  3 +-
 src/query/storages/system/src/table.rs        | 61 +++++++++++++------
 .../storages/system/src/temp_files_table.rs   |  6 +-
 54 files changed, 392 insertions(+), 187 deletions(-)

diff --git a/src/query/catalog/src/plan/partition.rs b/src/query/catalog/src/plan/partition.rs
index 026f80d4c32f2..b2ba71f4e5898 100644
--- a/src/query/catalog/src/plan/partition.rs
+++ b/src/query/catalog/src/plan/partition.rs
@@ -102,7 +102,9 @@ pub enum PartitionsShuffleKind {
     // Bind the Partition to executor by partition.rand() order.
     Rand,
     // Bind the Partition to executor by broadcast
-    Broadcast,
+    BroadcastCluster,
+    // Bind the Partition to warehouse executor by broadcast
+    BroadcastWarehouse,
 }
 
 #[derive(serde::Serialize, serde::Deserialize, Debug, Clone, PartialEq)]
@@ -190,7 +192,8 @@ impl Partitions {
                 parts.shuffle(&mut rng);
                 parts
             }
-            PartitionsShuffleKind::Broadcast => {
+            // the executors will be all nodes in the warehouse if a query is BroadcastWarehouse.
+            PartitionsShuffleKind::BroadcastCluster | PartitionsShuffleKind::BroadcastWarehouse => {
                 return Ok(executors_sorted
                     .into_iter()
                     .map(|executor| {
diff --git a/src/query/catalog/src/table.rs b/src/query/catalog/src/table.rs
index 829a4c9e79800..7d6bdfa471d63 100644
--- a/src/query/catalog/src/table.rs
+++ b/src/query/catalog/src/table.rs
@@ -94,8 +94,8 @@ pub trait Table: Sync + Send {
         self.get_table_info().ident.table_id
     }
 
-    fn is_local(&self) -> bool {
-        true
+    fn distribution_level(&self) -> DistributionLevel {
+        DistributionLevel::Local
     }
 
     fn as_any(&self) -> &dyn Any;
@@ -450,7 +450,7 @@ pub trait Table: Sync + Send {
         false
     }
 
-    fn broadcast_truncate_to_cluster(&self) -> bool {
+    fn broadcast_truncate_to_warehouse(&self) -> bool {
         false
     }
 
@@ -678,3 +678,10 @@ pub struct ColumnRange {
     pub min: Bound,
     pub max: Bound,
 }
+
+#[derive(Debug)]
+pub enum DistributionLevel {
+    Local,
+    Cluster,
+    Warehouse,
+}
diff --git a/src/query/catalog/src/table_context.rs b/src/query/catalog/src/table_context.rs
index 1e957eb2ea68c..824d03fec4945 100644
--- a/src/query/catalog/src/table_context.rs
+++ b/src/query/catalog/src/table_context.rs
@@ -234,6 +234,8 @@ pub trait TableContext: Send + Sync {
     fn get_settings(&self) -> Arc<Settings>;
     fn get_session_settings(&self) -> Arc<Settings>;
     fn get_cluster(&self) -> Arc<Cluster>;
+    fn set_cluster(&self, cluster: Arc<Cluster>);
+    async fn get_warehouse_cluster(&self) -> Result<Arc<Cluster>>;
     fn get_processes_info(&self) -> Vec<ProcessInfo>;
     fn get_queued_queries(&self) -> Vec<ProcessInfo>;
     fn get_queries_profile(&self) -> HashMap<String, Vec<PlanProfile>>;
diff --git a/src/query/catalog/tests/it/partitions.rs b/src/query/catalog/tests/it/partitions.rs
index 2199c93858f94..e9cbe441abd48 100644
--- a/src/query/catalog/tests/it/partitions.rs
+++ b/src/query/catalog/tests/it/partitions.rs
@@ -239,7 +239,7 @@ fn test_partition_reshuffle() {
 
     // Broadcast.
     {
-        let partitions = gen_parts(PartitionsShuffleKind::Broadcast, 3);
+        let partitions = gen_parts(PartitionsShuffleKind::BroadcastCluster, 3);
         let shuffle = partitions.reshuffle(executors_2.clone()).unwrap();
 
         writeln!(
diff --git a/src/query/management/src/warehouse/warehouse_api.rs b/src/query/management/src/warehouse/warehouse_api.rs
index ed4f92c4610c5..dbeecd9bd69bf 100644
--- a/src/query/management/src/warehouse/warehouse_api.rs
+++ b/src/query/management/src/warehouse/warehouse_api.rs
@@ -132,5 +132,7 @@ pub trait WarehouseApi: Sync + Send {
 
     async fn discover(&self, node_id: &str) -> Result<Vec<NodeInfo>>;
 
+    async fn discover_warehouse_nodes(&self, node_id: &str) -> Result<Vec<NodeInfo>>;
+
     async fn get_node_info(&self, node_id: &str) -> Result<Option<NodeInfo>>;
 }
diff --git a/src/query/management/src/warehouse/warehouse_mgr.rs b/src/query/management/src/warehouse/warehouse_mgr.rs
index b0b563d209094..c0aa324f2a0d0 100644
--- a/src/query/management/src/warehouse/warehouse_mgr.rs
+++ b/src/query/management/src/warehouse/warehouse_mgr.rs
@@ -1485,13 +1485,34 @@ impl WarehouseApi for WarehouseMgr {
             return Err(ErrorCode::InvalidWarehouse("Warehouse name is empty."));
         }
 
-        let warehouse_snapshot = self.warehouse_snapshot(&warehouse).await?;
+        let nodes_prefix = format!(
+            "{}/{}/",
+            self.cluster_node_key_prefix,
+            escape_for_key(&warehouse)?
+        );
 
-        Ok(warehouse_snapshot
-            .snapshot_nodes
-            .into_iter()
-            .map(|x| x.node_info)
-            .collect())
+        let values = self.metastore.prefix_list_kv(&nodes_prefix).await?;
+
+        let mut nodes_info = Vec::with_capacity(values.len());
+        for (node_key, value) in values {
+            let mut node_info = serde_json::from_slice::<NodeInfo>(&value.data)?;
+
+            let suffix = &node_key[nodes_prefix.len()..];
+
+            let Some((cluster, node)) = suffix.split_once('/') else {
+                return Err(ErrorCode::InvalidWarehouse(format!(
+                    "Node key is invalid {:?}",
+                    node_key
+                )));
+            };
+
+            node_info.id = unescape_for_key(node)?;
+            node_info.cluster_id = unescape_for_key(cluster)?;
+            node_info.warehouse_id = warehouse.to_string();
+            nodes_info.push(node_info);
+        }
+
+        Ok(nodes_info)
     }
 
     async fn add_warehouse_cluster(
@@ -2155,6 +2176,32 @@ impl WarehouseApi for WarehouseMgr {
             .collect::<Vec<_>>())
     }
 
+    async fn discover_warehouse_nodes(&self, node_id: &str) -> Result<Vec<NodeInfo>> {
+        let node_key = format!("{}/{}", self.node_key_prefix, escape_for_key(node_id)?);
+
+        let Some(seq) = self.metastore.get_kv(&node_key).await? else {
+            return Err(ErrorCode::NotFoundClusterNode(format!(
+                "Node {} is offline, Please restart this node.",
+                node_id
+            )));
+        };
+
+        let node = serde_json::from_slice::<NodeInfo>(&seq.data)?;
+
+        if !node.assigned_warehouse() {
+            return Ok(vec![node]);
+        }
+
+        let expect_version = DATABEND_COMMIT_VERSION.to_string();
+
+        Ok(self
+            .list_warehouse_nodes(node.warehouse_id.clone())
+            .await?
+            .into_iter()
+            .filter(|x| x.binary_version == expect_version)
+            .collect::<Vec<_>>())
+    }
+
     async fn get_node_info(&self, node_id: &str) -> Result<Option<NodeInfo>> {
         let node_key = format!("{}/{}", self.node_key_prefix, escape_for_key(node_id)?);
         match self.metastore.get_kv(&node_key).await? {
diff --git a/src/query/service/src/clusters/cluster.rs b/src/query/service/src/clusters/cluster.rs
index 5ba2fa91125c6..3239aa0484fc9 100644
--- a/src/query/service/src/clusters/cluster.rs
+++ b/src/query/service/src/clusters/cluster.rs
@@ -273,17 +273,11 @@ impl ClusterDiscovery {
         Ok((lift_time, Arc::new(cluster_manager)))
     }
 
-    #[async_backtrace::framed]
-    pub async fn discover(&self, config: &InnerConfig) -> Result<Arc<Cluster>> {
-        let nodes = match config.query.cluster_id.is_empty() {
-            true => self.warehouse_manager.discover(&config.query.node_id).await,
-            false => {
-                self.warehouse_manager
-                    .list_warehouse_cluster_nodes(&self.cluster_id, &self.cluster_id)
-                    .await
-            }
-        };
-
+    async fn create_cluster_with_try_connect(
+        &self,
+        config: &InnerConfig,
+        nodes: Result<Vec<NodeInfo>>,
+    ) -> Result<Arc<Cluster>> {
         match nodes {
             Err(cause) => {
                 metric_incr_cluster_error_count(
@@ -336,6 +330,37 @@ impl ClusterDiscovery {
         }
     }
 
+    pub async fn discover_warehouse_nodes(&self, config: &InnerConfig) -> Result<Arc<Cluster>> {
+        let nodes = match config.query.cluster_id.is_empty() {
+            true => {
+                self.warehouse_manager
+                    .discover_warehouse_nodes(&config.query.node_id)
+                    .await
+            }
+            false => {
+                self.warehouse_manager
+                    .list_warehouse_nodes(self.cluster_id.clone())
+                    .await
+            }
+        };
+
+        self.create_cluster_with_try_connect(config, nodes).await
+    }
+
+    #[async_backtrace::framed]
+    pub async fn discover(&self, config: &InnerConfig) -> Result<Arc<Cluster>> {
+        let nodes = match config.query.cluster_id.is_empty() {
+            true => self.warehouse_manager.discover(&config.query.node_id).await,
+            false => {
+                self.warehouse_manager
+                    .list_warehouse_cluster_nodes(&self.cluster_id, &self.cluster_id)
+                    .await
+            }
+        };
+
+        self.create_cluster_with_try_connect(config, nodes).await
+    }
+
     pub async fn find_node_by_warehouse(
         self: Arc<Self>,
         warehouse: &str,
diff --git a/src/query/service/src/interpreters/interpreter_kill.rs b/src/query/service/src/interpreters/interpreter_kill.rs
index 72309f0c2ecf8..e3bd3dcf2ba1e 100644
--- a/src/query/service/src/interpreters/interpreter_kill.rs
+++ b/src/query/service/src/interpreters/interpreter_kill.rs
@@ -31,7 +31,7 @@ use crate::sessions::QueryContext;
 pub struct KillInterpreter {
     ctx: Arc<QueryContext>,
     plan: KillPlan,
-    proxy_to_cluster: bool,
+    proxy_to_warehouse: bool,
 }
 
 impl KillInterpreter {
@@ -39,7 +39,7 @@ impl KillInterpreter {
         Ok(KillInterpreter {
             ctx,
             plan,
-            proxy_to_cluster: true,
+            proxy_to_warehouse: true,
         })
     }
 
@@ -47,29 +47,30 @@ impl KillInterpreter {
         Ok(KillInterpreter {
             ctx,
             plan,
-            proxy_to_cluster: false,
+            proxy_to_warehouse: false,
         })
     }
 
     #[async_backtrace::framed]
-    async fn kill_cluster_query(&self) -> Result<PipelineBuildResult> {
-        let cluster = self.ctx.get_cluster();
+    async fn kill_warehouse_query(&self) -> Result<PipelineBuildResult> {
         let settings = self.ctx.get_settings();
+        let warehouse = self.ctx.get_warehouse_cluster().await?;
+
         let flight_params = FlightParams {
             timeout: settings.get_flight_client_timeout()?,
             retry_times: settings.get_flight_max_retry_times()?,
             retry_interval: settings.get_flight_retry_interval()?,
         };
 
-        let mut message = HashMap::with_capacity(cluster.nodes.len());
+        let mut message = HashMap::with_capacity(warehouse.nodes.len());
 
-        for node_info in &cluster.nodes {
-            if node_info.id != cluster.local_id {
+        for node_info in &warehouse.nodes {
+            if node_info.id != warehouse.local_id {
                 message.insert(node_info.id.clone(), self.plan.clone());
             }
         }
 
-        let res = cluster
+        let res = warehouse
             .do_action::<_, bool>(KILL_QUERY, message, flight_params)
             .await?;
 
@@ -85,8 +86,8 @@ impl KillInterpreter {
     #[async_backtrace::framed]
     async fn execute_kill(&self, session_id: &String) -> Result<PipelineBuildResult> {
         match self.ctx.get_session_by_id(session_id) {
-            None => match self.proxy_to_cluster {
-                true => self.kill_cluster_query().await,
+            None => match self.proxy_to_warehouse {
+                true => self.kill_warehouse_query().await,
                 false => Err(ErrorCode::UnknownSession(format!(
                     "Not found session id {}",
                     session_id
diff --git a/src/query/service/src/interpreters/interpreter_set_priority.rs b/src/query/service/src/interpreters/interpreter_set_priority.rs
index 1bf830b24be01..eabff93f3d5fc 100644
--- a/src/query/service/src/interpreters/interpreter_set_priority.rs
+++ b/src/query/service/src/interpreters/interpreter_set_priority.rs
@@ -30,7 +30,7 @@ use crate::sessions::QueryContext;
 pub struct SetPriorityInterpreter {
     ctx: Arc<QueryContext>,
     plan: SetPriorityPlan,
-    proxy_to_cluster: bool,
+    proxy_to_warehouse: bool,
 }
 
 impl SetPriorityInterpreter {
@@ -38,7 +38,7 @@ impl SetPriorityInterpreter {
         Ok(SetPriorityInterpreter {
             ctx,
             plan,
-            proxy_to_cluster: true,
+            proxy_to_warehouse: true,
         })
     }
 
@@ -46,17 +46,17 @@ impl SetPriorityInterpreter {
         Ok(SetPriorityInterpreter {
             ctx,
             plan,
-            proxy_to_cluster: false,
+            proxy_to_warehouse: false,
         })
     }
 
     #[async_backtrace::framed]
-    async fn set_cluster_priority(&self) -> Result<PipelineBuildResult> {
-        let cluster = self.ctx.get_cluster();
+    async fn set_warehouse_priority(&self) -> Result<PipelineBuildResult> {
+        let warehouse = self.ctx.get_warehouse_cluster().await?;
 
-        let mut message = HashMap::with_capacity(cluster.nodes.len());
-        for node_info in &cluster.nodes {
-            if node_info.id != cluster.local_id {
+        let mut message = HashMap::with_capacity(warehouse.nodes.len());
+        for node_info in &warehouse.nodes {
+            if node_info.id != warehouse.local_id {
                 message.insert(node_info.id.clone(), self.plan.clone());
             }
         }
@@ -67,7 +67,7 @@ impl SetPriorityInterpreter {
             retry_times: settings.get_flight_max_retry_times()?,
             retry_interval: settings.get_flight_retry_interval()?,
         };
-        let res = cluster
+        let res = warehouse
             .do_action::<_, bool>(SET_PRIORITY, message, flight_params)
             .await?;
 
@@ -96,8 +96,8 @@ impl Interpreter for SetPriorityInterpreter {
     async fn execute2(&self) -> Result<PipelineBuildResult> {
         let id = &self.plan.id;
         match self.ctx.get_session_by_id(id) {
-            None => match self.proxy_to_cluster {
-                true => self.set_cluster_priority().await,
+            None => match self.proxy_to_warehouse {
+                true => self.set_warehouse_priority().await,
                 false => Err(ErrorCode::UnknownSession(format!(
                     "Not found session id {}",
                     id
diff --git a/src/query/service/src/interpreters/interpreter_system_action.rs b/src/query/service/src/interpreters/interpreter_system_action.rs
index ca7e3ffb50c7d..e0b2b192a3a70 100644
--- a/src/query/service/src/interpreters/interpreter_system_action.rs
+++ b/src/query/service/src/interpreters/interpreter_system_action.rs
@@ -31,7 +31,7 @@ use crate::sessions::QueryContext;
 pub struct SystemActionInterpreter {
     ctx: Arc<QueryContext>,
     plan: SystemPlan,
-    proxy_to_cluster: bool,
+    proxy_to_warehouse: bool,
 }
 
 impl SystemActionInterpreter {
@@ -39,7 +39,7 @@ impl SystemActionInterpreter {
         Ok(SystemActionInterpreter {
             ctx,
             plan,
-            proxy_to_cluster: true,
+            proxy_to_warehouse: true,
         })
     }
 
@@ -47,7 +47,7 @@ impl SystemActionInterpreter {
         Ok(SystemActionInterpreter {
             ctx,
             plan,
-            proxy_to_cluster: false,
+            proxy_to_warehouse: false,
         })
     }
 }
@@ -65,11 +65,12 @@ impl Interpreter for SystemActionInterpreter {
     #[async_backtrace::framed]
     #[fastrace::trace]
     async fn execute2(&self) -> Result<PipelineBuildResult> {
-        if self.proxy_to_cluster {
-            let cluster = self.ctx.get_cluster();
-            let mut message = HashMap::with_capacity(cluster.nodes.len());
-            for node_info in &cluster.nodes {
-                if node_info.id != cluster.local_id {
+        if self.proxy_to_warehouse {
+            let warehouse = self.ctx.get_warehouse_cluster().await?;
+
+            let mut message = HashMap::with_capacity(warehouse.nodes.len());
+            for node_info in &warehouse.nodes {
+                if node_info.id != warehouse.local_id {
                     message.insert(node_info.id.clone(), self.plan.clone());
                 }
             }
@@ -80,7 +81,7 @@ impl Interpreter for SystemActionInterpreter {
                 retry_times: settings.get_flight_max_retry_times()?,
                 retry_interval: settings.get_flight_retry_interval()?,
             };
-            cluster
+            warehouse
                 .do_action::<_, ()>(SYSTEM_ACTION, message, flight_params)
                 .await?;
         }
diff --git a/src/query/service/src/interpreters/interpreter_table_truncate.rs b/src/query/service/src/interpreters/interpreter_table_truncate.rs
index 70afa08e38660..1c704dcc3247e 100644
--- a/src/query/service/src/interpreters/interpreter_table_truncate.rs
+++ b/src/query/service/src/interpreters/interpreter_table_truncate.rs
@@ -32,7 +32,7 @@ pub struct TruncateTableInterpreter {
     ctx: Arc<QueryContext>,
     plan: TruncateTablePlan,
 
-    proxy_to_cluster: bool,
+    proxy_to_warehouse: bool,
 }
 
 impl TruncateTableInterpreter {
@@ -40,7 +40,7 @@ impl TruncateTableInterpreter {
         Ok(TruncateTableInterpreter {
             ctx,
             plan,
-            proxy_to_cluster: true,
+            proxy_to_warehouse: true,
         })
     }
 
@@ -48,7 +48,7 @@ impl TruncateTableInterpreter {
         Ok(TruncateTableInterpreter {
             ctx,
             plan,
-            proxy_to_cluster: false,
+            proxy_to_warehouse: false,
         })
     }
 }
@@ -85,12 +85,12 @@ impl Interpreter for TruncateTableInterpreter {
         // check mutability
         table.check_mutable()?;
 
-        if self.proxy_to_cluster && table.broadcast_truncate_to_cluster() {
-            let cluster = self.ctx.get_cluster();
+        if self.proxy_to_warehouse && table.broadcast_truncate_to_warehouse() {
+            let warehouse = self.ctx.get_warehouse_cluster().await?;
 
-            let mut message = HashMap::with_capacity(cluster.nodes.len());
-            for node_info in &cluster.nodes {
-                if node_info.id != cluster.local_id {
+            let mut message = HashMap::with_capacity(warehouse.nodes.len());
+            for node_info in &warehouse.nodes {
+                if node_info.id != warehouse.local_id {
                     message.insert(node_info.id.clone(), self.plan.clone());
                 }
             }
@@ -101,7 +101,7 @@ impl Interpreter for TruncateTableInterpreter {
                 retry_times: settings.get_flight_max_retry_times()?,
                 retry_interval: settings.get_flight_retry_interval()?,
             };
-            cluster
+            warehouse
                 .do_action::<_, ()>(TRUNCATE_TABLE, message, flight_params)
                 .await?;
         }
diff --git a/src/query/service/src/schedulers/scheduler.rs b/src/query/service/src/schedulers/scheduler.rs
index 726f380237aad..5bee2a3567800 100644
--- a/src/query/service/src/schedulers/scheduler.rs
+++ b/src/query/service/src/schedulers/scheduler.rs
@@ -65,6 +65,10 @@ pub async fn build_query_pipeline_without_render_result_set(
     let build_res = if !plan.is_distributed_plan() {
         build_local_pipeline(ctx, plan).await
     } else {
+        if plan.is_warehouse_distributed_plan() {
+            ctx.set_cluster(ctx.get_warehouse_cluster().await?);
+        }
+
         build_distributed_pipeline(ctx, plan).await
     }?;
     Ok(build_res)
diff --git a/src/query/service/src/servers/admin/v1/tenant_tables.rs b/src/query/service/src/servers/admin/v1/tenant_tables.rs
index 098609e887057..7be7b059b713c 100644
--- a/src/query/service/src/servers/admin/v1/tenant_tables.rs
+++ b/src/query/service/src/servers/admin/v1/tenant_tables.rs
@@ -16,6 +16,7 @@ use chrono::DateTime;
 use chrono::Utc;
 use databend_common_ast::parser::Dialect;
 use databend_common_catalog::catalog::CatalogManager;
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_config::GlobalConfig;
 use databend_common_exception::Result;
 use databend_common_meta_app::tenant::Tenant;
@@ -111,7 +112,7 @@ async fn load_tenant_tables(tenant: &Tenant) -> Result<TenantTablesResponse> {
                 engine: table.engine().to_string(),
                 created_on: table.get_table_info().meta.created_on,
                 updated_on: table.get_table_info().meta.updated_on,
-                is_local: table.is_local(),
+                is_local: matches!(table.distribution_level(), DistributionLevel::Local),
                 is_external: table.get_table_info().meta.storage_params.is_some(),
                 rows: stats.number_of_rows,
                 data_bytes: stats.data_bytes,
diff --git a/src/query/service/src/sessions/query_ctx.rs b/src/query/service/src/sessions/query_ctx.rs
index 9c55fba8f5b37..b06d45b0f0841 100644
--- a/src/query/service/src/sessions/query_ctx.rs
+++ b/src/query/service/src/sessions/query_ctx.rs
@@ -976,6 +976,10 @@ impl TableContext for QueryContext {
         self.shared.get_cluster()
     }
 
+    fn set_cluster(&self, cluster: Arc<Cluster>) {
+        self.shared.set_cluster(cluster)
+    }
+
     // Get all the processes list info.
     fn get_processes_info(&self) -> Vec<ProcessInfo> {
         SessionManager::instance().processes_info()
@@ -1140,10 +1144,9 @@ impl TableContext for QueryContext {
             if actual_batch_limit != max_batch_size {
                 return Err(ErrorCode::StorageUnsupported(
                     format!(
-                    "Within the same transaction, the batch size for a stream must remain consistent {:?} {:?}",
+                        "Within the same transaction, the batch size for a stream must remain consistent {:?} {:?}",
                         actual_batch_limit, max_batch_size
                     )
-
                 ));
             }
         } else if max_batch_size.is_some() {
@@ -1714,6 +1717,10 @@ impl TableContext for QueryContext {
         }
         Ok(streams_meta)
     }
+
+    async fn get_warehouse_cluster(&self) -> Result<Arc<Cluster>> {
+        self.shared.get_warehouse_clusters().await
+    }
 }
 
 impl TrySpawn for QueryContext {
diff --git a/src/query/service/src/sessions/query_ctx_shared.rs b/src/query/service/src/sessions/query_ctx_shared.rs
index c31fe083316b2..af2324b508239 100644
--- a/src/query/service/src/sessions/query_ctx_shared.rs
+++ b/src/query/service/src/sessions/query_ctx_shared.rs
@@ -36,6 +36,7 @@ use databend_common_catalog::runtime_filter_info::RuntimeFilterReady;
 use databend_common_catalog::statistics::data_cache_statistics::DataCacheMetrics;
 use databend_common_catalog::table_context::ContextError;
 use databend_common_catalog::table_context::StageAttachment;
+use databend_common_config::GlobalConfig;
 use databend_common_exception::ErrorCode;
 use databend_common_exception::Result;
 use databend_common_meta_app::principal::OnErrorMode;
@@ -59,6 +60,7 @@ use parking_lot::RwLock;
 use uuid::Uuid;
 
 use crate::clusters::Cluster;
+use crate::clusters::ClusterDiscovery;
 use crate::pipelines::executor::PipelineExecutor;
 use crate::sessions::query_affect::QueryAffect;
 use crate::sessions::Session;
@@ -91,7 +93,8 @@ pub struct QueryContextShared {
     pub(in crate::sessions) session: Arc<Session>,
     pub(in crate::sessions) runtime: Arc<RwLock<Option<Arc<Runtime>>>>,
     pub(in crate::sessions) init_query_id: Arc<RwLock<String>>,
-    pub(in crate::sessions) cluster_cache: Arc<Cluster>,
+    pub(in crate::sessions) cluster_cache: Arc<RwLock<Arc<Cluster>>>,
+    pub(in crate::sessions) warehouse_cache: Arc<RwLock<Option<Arc<Cluster>>>>,
     pub(in crate::sessions) running_query: Arc<RwLock<Option<String>>>,
     pub(in crate::sessions) running_query_kind: Arc<RwLock<Option<QueryKind>>>,
     pub(in crate::sessions) running_query_text_hash: Arc<RwLock<Option<String>>>,
@@ -158,7 +161,7 @@ impl QueryContextShared {
             query_settings: Settings::create(session.get_current_tenant()),
             catalog_manager: CatalogManager::instance(),
             session,
-            cluster_cache,
+            cluster_cache: Arc::new(RwLock::new(cluster_cache)),
             data_operator: DataOperator::instance(),
             init_query_id: Arc::new(RwLock::new(Uuid::new_v4().to_string())),
             total_scan_values: Arc::new(Progress::create()),
@@ -206,6 +209,7 @@ impl QueryContextShared {
 
             cluster_spill_progress: Default::default(),
             spilled_files: Default::default(),
+            warehouse_cache: Arc::new(RwLock::new(None)),
         }))
     }
 
@@ -263,8 +267,31 @@ impl QueryContextShared {
         // TODO: Wait for the query to be processed (write out the last error)
     }
 
+    pub fn set_cluster(&self, cluster: Arc<Cluster>) {
+        let mut cluster_cache = self.cluster_cache.write();
+        *cluster_cache = cluster;
+    }
+
     pub fn get_cluster(&self) -> Arc<Cluster> {
-        self.cluster_cache.clone()
+        self.cluster_cache.read().clone()
+    }
+
+    pub async fn get_warehouse_clusters(&self) -> Result<Arc<Cluster>> {
+        if let Some(warehouse) = self.warehouse_cache.read().as_ref() {
+            return Ok(warehouse.clone());
+        }
+
+        let config = GlobalConfig::instance();
+        let discovery = ClusterDiscovery::instance();
+        let warehouse = discovery.discover_warehouse_nodes(&config).await?;
+
+        let mut write_guard = self.warehouse_cache.write();
+
+        if write_guard.is_none() {
+            *write_guard = Some(warehouse.clone());
+        }
+
+        Ok(write_guard.as_ref().cloned().expect("expect cluster."))
     }
 
     pub fn get_current_catalog(&self) -> String {
diff --git a/src/query/service/src/table_functions/async_crash_me.rs b/src/query/service/src/table_functions/async_crash_me.rs
index 5ad3d3f1e3dcc..14c54c37538c9 100644
--- a/src/query/service/src/table_functions/async_crash_me.rs
+++ b/src/query/service/src/table_functions/async_crash_me.rs
@@ -90,10 +90,6 @@ impl AsyncCrashMeTable {
 
 #[async_trait::async_trait]
 impl Table for AsyncCrashMeTable {
-    fn is_local(&self) -> bool {
-        true
-    }
-
     fn as_any(&self) -> &dyn Any {
         self
     }
diff --git a/src/query/service/src/table_functions/cloud/task_dependents.rs b/src/query/service/src/table_functions/cloud/task_dependents.rs
index 997ffc91f4cdf..b01285495871c 100644
--- a/src/query/service/src/table_functions/cloud/task_dependents.rs
+++ b/src/query/service/src/table_functions/cloud/task_dependents.rs
@@ -110,10 +110,6 @@ impl TaskDependentsTable {
 
 #[async_trait::async_trait]
 impl Table for TaskDependentsTable {
-    fn is_local(&self) -> bool {
-        true
-    }
-
     fn as_any(&self) -> &dyn Any {
         self
     }
diff --git a/src/query/service/src/table_functions/cloud/task_dependents_enable.rs b/src/query/service/src/table_functions/cloud/task_dependents_enable.rs
index 734006a5498e2..85413edb1f05b 100644
--- a/src/query/service/src/table_functions/cloud/task_dependents_enable.rs
+++ b/src/query/service/src/table_functions/cloud/task_dependents_enable.rs
@@ -83,10 +83,6 @@ impl TaskDependentsEnableTable {
 
 #[async_trait::async_trait]
 impl Table for TaskDependentsEnableTable {
-    fn is_local(&self) -> bool {
-        true
-    }
-
     fn as_any(&self) -> &dyn Any {
         self
     }
diff --git a/src/query/service/src/table_functions/cloud/task_history.rs b/src/query/service/src/table_functions/cloud/task_history.rs
index a84d331d17baf..a2ce415103283 100644
--- a/src/query/service/src/table_functions/cloud/task_history.rs
+++ b/src/query/service/src/table_functions/cloud/task_history.rs
@@ -87,10 +87,6 @@ impl TaskHistoryTable {
 
 #[async_trait::async_trait]
 impl Table for TaskHistoryTable {
-    fn is_local(&self) -> bool {
-        true
-    }
-
     fn as_any(&self) -> &dyn Any {
         self
     }
diff --git a/src/query/service/src/table_functions/numbers/numbers_table.rs b/src/query/service/src/table_functions/numbers/numbers_table.rs
index 09b017a961f73..50a57ef3cd745 100644
--- a/src/query/service/src/table_functions/numbers/numbers_table.rs
+++ b/src/query/service/src/table_functions/numbers/numbers_table.rs
@@ -22,6 +22,7 @@ use databend_common_catalog::plan::PartInfoPtr;
 use databend_common_catalog::plan::PartStatistics;
 use databend_common_catalog::plan::Partitions;
 use databend_common_catalog::plan::PushDownInfo;
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_catalog::table::TableStatistics;
 use databend_common_catalog::table_args::TableArgs;
 use databend_common_exception::Result;
@@ -111,8 +112,11 @@ impl NumbersTable {
 
 #[async_trait::async_trait]
 impl Table for NumbersTable {
-    fn is_local(&self) -> bool {
-        self.name() == "numbers_local"
+    fn distribution_level(&self) -> DistributionLevel {
+        match self.name() {
+            "numbers_local" => DistributionLevel::Local,
+            _ => DistributionLevel::Cluster,
+        }
     }
 
     fn as_any(&self) -> &dyn Any {
diff --git a/src/query/service/src/table_functions/others/execute_background_job.rs b/src/query/service/src/table_functions/others/execute_background_job.rs
index c1681625b76f5..7bfdaa2e42653 100644
--- a/src/query/service/src/table_functions/others/execute_background_job.rs
+++ b/src/query/service/src/table_functions/others/execute_background_job.rs
@@ -78,10 +78,6 @@ impl ExecuteBackgroundJobTable {
 
 #[async_trait::async_trait]
 impl Table for ExecuteBackgroundJobTable {
-    fn is_local(&self) -> bool {
-        true
-    }
-
     fn as_any(&self) -> &dyn Any {
         self
     }
diff --git a/src/query/service/src/table_functions/others/license_info.rs b/src/query/service/src/table_functions/others/license_info.rs
index b4a986ad11ecc..6423a6d245888 100644
--- a/src/query/service/src/table_functions/others/license_info.rs
+++ b/src/query/service/src/table_functions/others/license_info.rs
@@ -99,10 +99,6 @@ impl LicenseInfoTable {
 
 #[async_trait::async_trait]
 impl Table for LicenseInfoTable {
-    fn is_local(&self) -> bool {
-        true
-    }
-
     fn as_any(&self) -> &dyn Any {
         self
     }
diff --git a/src/query/service/src/table_functions/others/suggested_background_tasks.rs b/src/query/service/src/table_functions/others/suggested_background_tasks.rs
index 637b99b5fe591..039142a0cea5c 100644
--- a/src/query/service/src/table_functions/others/suggested_background_tasks.rs
+++ b/src/query/service/src/table_functions/others/suggested_background_tasks.rs
@@ -97,10 +97,6 @@ impl SuggestedBackgroundTasksTable {
 
 #[async_trait::async_trait]
 impl Table for SuggestedBackgroundTasksTable {
-    fn is_local(&self) -> bool {
-        true
-    }
-
     fn as_any(&self) -> &dyn Any {
         self
     }
diff --git a/src/query/service/src/table_functions/others/tenant_quota.rs b/src/query/service/src/table_functions/others/tenant_quota.rs
index 9e79ebb5b7b60..99f33e2e5fa30 100644
--- a/src/query/service/src/table_functions/others/tenant_quota.rs
+++ b/src/query/service/src/table_functions/others/tenant_quota.rs
@@ -109,10 +109,6 @@ impl TenantQuotaTable {
 
 #[async_trait::async_trait]
 impl Table for TenantQuotaTable {
-    fn is_local(&self) -> bool {
-        true
-    }
-
     fn as_any(&self) -> &dyn Any {
         self
     }
diff --git a/src/query/service/src/table_functions/others/udf.rs b/src/query/service/src/table_functions/others/udf.rs
index 96a8c15873bd6..622d7b83ab8d0 100644
--- a/src/query/service/src/table_functions/others/udf.rs
+++ b/src/query/service/src/table_functions/others/udf.rs
@@ -122,10 +122,6 @@ impl UdfEchoTable {
 
 #[async_trait::async_trait]
 impl Table for UdfEchoTable {
-    fn is_local(&self) -> bool {
-        true
-    }
-
     fn as_any(&self) -> &dyn Any {
         self
     }
diff --git a/src/query/service/src/table_functions/sync_crash_me.rs b/src/query/service/src/table_functions/sync_crash_me.rs
index 5cd36268abcbb..ae9e97cee4750 100644
--- a/src/query/service/src/table_functions/sync_crash_me.rs
+++ b/src/query/service/src/table_functions/sync_crash_me.rs
@@ -90,10 +90,6 @@ impl SyncCrashMeTable {
 
 #[async_trait::async_trait]
 impl Table for SyncCrashMeTable {
-    fn is_local(&self) -> bool {
-        true
-    }
-
     fn as_any(&self) -> &dyn Any {
         self
     }
diff --git a/src/query/service/tests/it/sql/exec/get_table_bind_test.rs b/src/query/service/tests/it/sql/exec/get_table_bind_test.rs
index 228f0753b4378..f2b9f92f9a4d5 100644
--- a/src/query/service/tests/it/sql/exec/get_table_bind_test.rs
+++ b/src/query/service/tests/it/sql/exec/get_table_bind_test.rs
@@ -1006,6 +1006,14 @@ impl TableContext for CtxDelegation {
     async fn drop_m_cte_temp_table(&self) -> Result<()> {
         todo!()
     }
+
+    fn set_cluster(&self, _: Arc<Cluster>) {
+        todo!()
+    }
+
+    async fn get_warehouse_cluster(&self) -> Result<Arc<Cluster>> {
+        todo!()
+    }
 }
 
 #[tokio::test(flavor = "multi_thread")]
diff --git a/src/query/service/tests/it/storages/fuse/operations/commit.rs b/src/query/service/tests/it/storages/fuse/operations/commit.rs
index 884615af0a3dd..6a713e3d8804e 100644
--- a/src/query/service/tests/it/storages/fuse/operations/commit.rs
+++ b/src/query/service/tests/it/storages/fuse/operations/commit.rs
@@ -881,6 +881,14 @@ impl TableContext for CtxDelegation {
     async fn drop_m_cte_temp_table(&self) -> Result<()> {
         todo!()
     }
+
+    fn set_cluster(&self, _: Arc<Cluster>) {
+        todo!()
+    }
+
+    async fn get_warehouse_cluster(&self) -> Result<Arc<Cluster>> {
+        todo!()
+    }
 }
 
 #[derive(Clone, Debug)]
diff --git a/src/query/sql/src/executor/physical_plan.rs b/src/query/sql/src/executor/physical_plan.rs
index 292438a290e06..bb7240f586abd 100644
--- a/src/query/sql/src/executor/physical_plan.rs
+++ b/src/query/sql/src/executor/physical_plan.rs
@@ -16,6 +16,7 @@ use std::collections::HashMap;
 
 use databend_common_catalog::plan::DataSourceInfo;
 use databend_common_catalog::plan::DataSourcePlan;
+use databend_common_catalog::plan::PartitionsShuffleKind;
 use databend_common_exception::Result;
 use databend_common_expression::DataSchemaRef;
 use databend_common_functions::BUILTIN_FUNCTIONS;
@@ -710,6 +711,12 @@ impl PhysicalPlan {
             )
     }
 
+    pub fn is_warehouse_distributed_plan(&self) -> bool {
+        self.children()
+            .any(|child| child.is_warehouse_distributed_plan())
+            || matches!(self, Self::TableScan(v) if v.source.parts.kind == PartitionsShuffleKind::BroadcastWarehouse)
+    }
+
     pub fn get_desc(&self) -> Result<String> {
         Ok(match self {
             PhysicalPlan::TableScan(v) => format!(
diff --git a/src/query/sql/src/planner/optimizer/optimizer.rs b/src/query/sql/src/planner/optimizer/optimizer.rs
index 9529adb43ef1c..03257f9738030 100644
--- a/src/query/sql/src/planner/optimizer/optimizer.rs
+++ b/src/query/sql/src/planner/optimizer/optimizer.rs
@@ -41,6 +41,7 @@ use crate::optimizer::join::SingleToInnerOptimizer;
 use crate::optimizer::rule::TransformResult;
 use crate::optimizer::statistics::CollectStatisticsOptimizer;
 use crate::optimizer::util::contains_local_table_scan;
+use crate::optimizer::util::contains_warehouse_table_scan;
 use crate::optimizer::RuleFactory;
 use crate::optimizer::RuleID;
 use crate::optimizer::SExpr;
@@ -374,6 +375,13 @@ pub async fn optimize_query(opt_ctx: &mut OptimizerContext, mut s_expr: SExpr) -
     if contains_local_table_scan(&s_expr, &opt_ctx.metadata) {
         opt_ctx.enable_distributed_optimization = false;
         info!("Disable distributed optimization due to local table scan.");
+    } else if contains_warehouse_table_scan(&s_expr, &opt_ctx.metadata) {
+        let warehouse = opt_ctx.table_ctx.get_warehouse_cluster().await?;
+
+        if !warehouse.is_empty() {
+            opt_ctx.enable_distributed_optimization = true;
+            info!("Enable distributed optimization due to warehouse table scan.");
+        }
     }
 
     // Decorrelate subqueries, after this step, there should be no subquery in the expression.
@@ -461,6 +469,13 @@ async fn get_optimized_memo(opt_ctx: &mut OptimizerContext, mut s_expr: SExpr) -
     if contains_local_table_scan(&s_expr, &opt_ctx.metadata) {
         opt_ctx.enable_distributed_optimization = false;
         info!("Disable distributed optimization due to local table scan.");
+    } else if contains_warehouse_table_scan(&s_expr, &opt_ctx.metadata) {
+        let warehouse = opt_ctx.table_ctx.get_warehouse_cluster().await?;
+
+        if !warehouse.is_empty() {
+            opt_ctx.enable_distributed_optimization = true;
+            info!("Enable distributed optimization due to warehouse table scan.");
+        }
     }
 
     // Decorrelate subqueries, after this step, there should be no subquery in the expression.
diff --git a/src/query/sql/src/planner/optimizer/util.rs b/src/query/sql/src/planner/optimizer/util.rs
index 8120893a8cec2..1e8641dc6da4f 100644
--- a/src/query/sql/src/planner/optimizer/util.rs
+++ b/src/query/sql/src/planner/optimizer/util.rs
@@ -12,6 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use databend_common_catalog::table::DistributionLevel;
+
 use super::SExpr;
 use crate::plans::RelOperator;
 use crate::MetadataRef;
@@ -22,9 +24,38 @@ pub fn contains_local_table_scan(s_expr: &SExpr, metadata: &MetadataRef) -> bool
         .children()
         .any(|s_expr| contains_local_table_scan(s_expr, metadata))
         || if let RelOperator::Scan(get) = s_expr.plan() {
-            metadata.read().table(get.table_index).table().is_local()
+            matches!(
+                metadata
+                    .read()
+                    .table(get.table_index)
+                    .table()
+                    .distribution_level(),
+                DistributionLevel::Local
+            )
         } else {
             false
         }
         || matches!(s_expr.plan(), RelOperator::RecursiveCteScan { .. })
 }
+
+pub fn contains_warehouse_table_scan(s_expr: &SExpr, metadata: &MetadataRef) -> bool {
+    if s_expr
+        .children()
+        .any(|s_expr| contains_warehouse_table_scan(s_expr, metadata))
+    {
+        return true;
+    }
+
+    if let RelOperator::Scan(scan) = s_expr.plan() {
+        return matches!(
+            metadata
+                .read()
+                .table(scan.table_index)
+                .table()
+                .distribution_level(),
+            DistributionLevel::Warehouse,
+        );
+    }
+
+    false
+}
diff --git a/src/query/storages/delta/src/table.rs b/src/query/storages/delta/src/table.rs
index 2bff4c9b76d22..64e0d36f67ced 100644
--- a/src/query/storages/delta/src/table.rs
+++ b/src/query/storages/delta/src/table.rs
@@ -26,6 +26,7 @@ use databend_common_catalog::plan::PartStatistics;
 use databend_common_catalog::plan::Partitions;
 use databend_common_catalog::plan::PartitionsShuffleKind;
 use databend_common_catalog::plan::PushDownInfo;
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_catalog::table::Table;
 use databend_common_catalog::table_args::TableArgs;
 use databend_common_catalog::table_context::TableContext;
@@ -367,8 +368,8 @@ impl Table for DeltaTable {
         self
     }
 
-    fn is_local(&self) -> bool {
-        false
+    fn distribution_level(&self) -> DistributionLevel {
+        DistributionLevel::Cluster
     }
 
     fn get_table_info(&self) -> &TableInfo {
diff --git a/src/query/storages/fuse/src/fuse_table.rs b/src/query/storages/fuse/src/fuse_table.rs
index ce04e7587e14d..bb252fde36204 100644
--- a/src/query/storages/fuse/src/fuse_table.rs
+++ b/src/query/storages/fuse/src/fuse_table.rs
@@ -39,6 +39,7 @@ use databend_common_catalog::table::Bound;
 use databend_common_catalog::table::ColumnRange;
 use databend_common_catalog::table::ColumnStatisticsProvider;
 use databend_common_catalog::table::CompactionLimits;
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_catalog::table::NavigationDescriptor;
 use databend_common_catalog::table::TimeNavigation;
 use databend_common_catalog::table_context::AbortChecker;
@@ -605,8 +606,8 @@ impl FuseTable {
 
 #[async_trait::async_trait]
 impl Table for FuseTable {
-    fn is_local(&self) -> bool {
-        false
+    fn distribution_level(&self) -> DistributionLevel {
+        DistributionLevel::Cluster
     }
 
     fn as_any(&self) -> &dyn Any {
diff --git a/src/query/storages/fuse/src/table_functions/function_template/simple_arg_func.rs b/src/query/storages/fuse/src/table_functions/function_template/simple_arg_func.rs
index 310b0f5e31dd2..3dd86628d731b 100644
--- a/src/query/storages/fuse/src/table_functions/function_template/simple_arg_func.rs
+++ b/src/query/storages/fuse/src/table_functions/function_template/simple_arg_func.rs
@@ -15,6 +15,7 @@
 use std::sync::Arc;
 
 use databend_common_catalog::plan::DataSourcePlan;
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_catalog::table_args::TableArgs;
 use databend_common_catalog::table_context::TableContext;
 use databend_common_exception::ErrorCode;
@@ -29,9 +30,10 @@ pub trait SimpleArgFunc {
     type Args;
     fn schema() -> TableSchemaRef;
 
-    fn is_local_func() -> bool {
-        true
+    fn distribution_level() -> DistributionLevel {
+        DistributionLevel::Local
     }
+
     async fn apply(
         ctx: &Arc<dyn TableContext>,
         args: &Self::Args,
@@ -63,8 +65,8 @@ where
         T::schema()
     }
 
-    fn is_local_func(&self) -> bool {
-        T::is_local_func()
+    fn distribution_level(&self) -> DistributionLevel {
+        T::distribution_level()
     }
 
     async fn apply(
diff --git a/src/query/storages/fuse/src/table_functions/function_template/simple_func_template.rs b/src/query/storages/fuse/src/table_functions/function_template/simple_func_template.rs
index 3ce61497255bc..49985d3ba24ef 100644
--- a/src/query/storages/fuse/src/table_functions/function_template/simple_func_template.rs
+++ b/src/query/storages/fuse/src/table_functions/function_template/simple_func_template.rs
@@ -21,6 +21,7 @@ use databend_common_catalog::plan::PartStatistics;
 use databend_common_catalog::plan::Partitions;
 use databend_common_catalog::plan::PartitionsShuffleKind;
 use databend_common_catalog::plan::PushDownInfo;
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_exception::Result;
 use databend_common_expression::DataBlock;
 use databend_common_expression::TableSchemaRef;
@@ -64,8 +65,8 @@ pub trait SimpleTableFunc: Send + Sync + 'static {
         "table_func_template".to_owned()
     }
 
-    fn is_local_func(&self) -> bool {
-        true
+    fn distribution_level(&self) -> DistributionLevel {
+        DistributionLevel::Local
     }
 
     fn table_args(&self) -> Option<TableArgs>;
@@ -129,8 +130,8 @@ impl<T: SimpleTableFunc> Table for TableFunctionTemplate<T> {
         &self.table_info
     }
 
-    fn is_local(&self) -> bool {
-        self.inner.is_local_func()
+    fn distribution_level(&self) -> DistributionLevel {
+        self.inner.distribution_level()
     }
 
     #[async_backtrace::framed]
@@ -140,18 +141,24 @@ impl<T: SimpleTableFunc> Table for TableFunctionTemplate<T> {
         _push_downs: Option<PushDownInfo>,
         _dry_run: bool,
     ) -> Result<(PartStatistics, Partitions)> {
-        match self.inner.is_local_func() {
-            true => Ok((
+        match self.inner.distribution_level() {
+            DistributionLevel::Local => Ok((
                 PartStatistics::default(),
                 Partitions::create(PartitionsShuffleKind::Seq, vec![Arc::new(Box::new(
                     PlaceHolder,
                 ))]),
             )),
-            false => Ok((
+            DistributionLevel::Cluster => Ok((
                 PartStatistics::default(),
-                Partitions::create(PartitionsShuffleKind::Broadcast, vec![Arc::new(Box::new(
-                    PlaceHolder,
-                ))]),
+                Partitions::create(PartitionsShuffleKind::BroadcastCluster, vec![Arc::new(
+                    Box::new(PlaceHolder),
+                )]),
+            )),
+            DistributionLevel::Warehouse => Ok((
+                PartStatistics::default(),
+                Partitions::create(PartitionsShuffleKind::BroadcastWarehouse, vec![Arc::new(
+                    Box::new(PlaceHolder),
+                )]),
             )),
         }
     }
diff --git a/src/query/storages/fuse/src/table_functions/set_cache_capacity.rs b/src/query/storages/fuse/src/table_functions/set_cache_capacity.rs
index 05e704c4e0455..9c1cfc6bc9e13 100644
--- a/src/query/storages/fuse/src/table_functions/set_cache_capacity.rs
+++ b/src/query/storages/fuse/src/table_functions/set_cache_capacity.rs
@@ -15,6 +15,7 @@
 use std::sync::Arc;
 
 use databend_common_catalog::plan::DataSourcePlan;
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_catalog::table_context::TableContext;
 use databend_common_exception::Result;
 use databend_common_expression::types::StringType;
@@ -62,9 +63,8 @@ impl SimpleTableFunc for SetCacheCapacity {
         ])
     }
 
-    fn is_local_func(&self) -> bool {
-        // cache operation needs to be broadcast to all nodes
-        false
+    fn distribution_level(&self) -> DistributionLevel {
+        DistributionLevel::Warehouse
     }
 
     async fn apply(
diff --git a/src/query/storages/hive/hive/src/hive_table.rs b/src/query/storages/hive/hive/src/hive_table.rs
index b2de36a1e7bc4..dc24507edd764 100644
--- a/src/query/storages/hive/hive/src/hive_table.rs
+++ b/src/query/storages/hive/hive/src/hive_table.rs
@@ -25,6 +25,7 @@ use databend_common_catalog::plan::PartStatistics;
 use databend_common_catalog::plan::Partitions;
 use databend_common_catalog::plan::PartitionsShuffleKind;
 use databend_common_catalog::plan::PushDownInfo;
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_catalog::table::NavigationPoint;
 use databend_common_catalog::table::Table;
 use databend_common_catalog::table::TableStatistics;
@@ -396,8 +397,8 @@ impl HiveTable {
 
 #[async_trait::async_trait]
 impl Table for HiveTable {
-    fn is_local(&self) -> bool {
-        false
+    fn distribution_level(&self) -> DistributionLevel {
+        DistributionLevel::Cluster
     }
 
     fn as_any(&self) -> &(dyn std::any::Any + 'static) {
diff --git a/src/query/storages/iceberg/src/table.rs b/src/query/storages/iceberg/src/table.rs
index b41ccaf7aaacd..47fbacb5b79de 100644
--- a/src/query/storages/iceberg/src/table.rs
+++ b/src/query/storages/iceberg/src/table.rs
@@ -28,6 +28,7 @@ use databend_common_catalog::plan::PartStatistics;
 use databend_common_catalog::plan::Partitions;
 use databend_common_catalog::plan::PartitionsShuffleKind;
 use databend_common_catalog::plan::PushDownInfo;
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_catalog::table::Table;
 use databend_common_catalog::table::TableStatistics;
 use databend_common_catalog::table_args::TableArgs;
@@ -292,8 +293,8 @@ impl Table for IcebergTable {
         self
     }
 
-    fn is_local(&self) -> bool {
-        false
+    fn distribution_level(&self) -> DistributionLevel {
+        DistributionLevel::Cluster
     }
 
     fn get_table_info(&self) -> &TableInfo {
diff --git a/src/query/storages/memory/src/memory_table.rs b/src/query/storages/memory/src/memory_table.rs
index 2ec25250b6c79..915e057b7a160 100644
--- a/src/query/storages/memory/src/memory_table.rs
+++ b/src/query/storages/memory/src/memory_table.rs
@@ -26,6 +26,7 @@ use databend_common_catalog::plan::Partitions;
 use databend_common_catalog::plan::PartitionsShuffleKind;
 use databend_common_catalog::plan::Projection;
 use databend_common_catalog::plan::PushDownInfo;
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_catalog::table::Table;
 use databend_common_catalog::table::TableStatistics;
 use databend_common_catalog::table_context::TableContext;
@@ -139,8 +140,8 @@ impl Table for MemoryTable {
 
     /// MemoryTable could be distributed table, yet we only insert data in one node per query
     /// Because commit_insert did not support distributed transaction
-    fn is_local(&self) -> bool {
-        false
+    fn distribution_level(&self) -> DistributionLevel {
+        DistributionLevel::Cluster
     }
 
     fn support_column_projection(&self) -> bool {
@@ -207,7 +208,7 @@ impl Table for MemoryTable {
         let parts = vec![MemoryPartInfo::create()];
         return Ok((
             statistics,
-            Partitions::create(PartitionsShuffleKind::Broadcast, parts),
+            Partitions::create(PartitionsShuffleKind::BroadcastCluster, parts),
         ));
     }
 
diff --git a/src/query/storages/null/src/null_table.rs b/src/query/storages/null/src/null_table.rs
index 5bb625429694f..c7c73210b091f 100644
--- a/src/query/storages/null/src/null_table.rs
+++ b/src/query/storages/null/src/null_table.rs
@@ -20,6 +20,7 @@ use databend_common_catalog::plan::DataSourcePlan;
 use databend_common_catalog::plan::PartStatistics;
 use databend_common_catalog::plan::Partitions;
 use databend_common_catalog::plan::PushDownInfo;
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_catalog::table::Table;
 use databend_common_catalog::table_context::TableContext;
 use databend_common_exception::Result;
@@ -62,8 +63,8 @@ impl Table for NullTable {
     }
 
     /// Null do not keep data, it's safe to make it non-local.
-    fn is_local(&self) -> bool {
-        false
+    fn distribution_level(&self) -> DistributionLevel {
+        DistributionLevel::Cluster
     }
 
     #[async_backtrace::framed]
diff --git a/src/query/storages/orc/src/table.rs b/src/query/storages/orc/src/table.rs
index d3da0a13008ab..8b38930df7d8f 100644
--- a/src/query/storages/orc/src/table.rs
+++ b/src/query/storages/orc/src/table.rs
@@ -24,6 +24,7 @@ use databend_common_catalog::plan::PartStatistics;
 use databend_common_catalog::plan::Partitions;
 use databend_common_catalog::plan::PushDownInfo;
 use databend_common_catalog::plan::StageTableInfo;
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_catalog::table::Table;
 use databend_common_catalog::table::TableStatistics;
 use databend_common_catalog::table_context::TableContext;
@@ -122,8 +123,8 @@ impl Table for OrcTable {
         self
     }
 
-    fn is_local(&self) -> bool {
-        false
+    fn distribution_level(&self) -> DistributionLevel {
+        DistributionLevel::Cluster
     }
 
     fn get_table_info(&self) -> &TableInfo {
diff --git a/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs b/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs
index 286f39498ec18..9bbc66eb79860 100644
--- a/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs
+++ b/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs
@@ -29,6 +29,7 @@ use databend_common_catalog::plan::Partitions;
 use databend_common_catalog::plan::PushDownInfo;
 use databend_common_catalog::query_kind::QueryKind;
 use databend_common_catalog::table::ColumnStatisticsProvider;
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_catalog::table::DummyColumnStatisticsProvider;
 use databend_common_catalog::table::Table;
 use databend_common_catalog::table::TableStatistics;
@@ -188,8 +189,8 @@ impl Table for ParquetRSTable {
         self
     }
 
-    fn is_local(&self) -> bool {
-        false
+    fn distribution_level(&self) -> DistributionLevel {
+        DistributionLevel::Cluster
     }
 
     fn get_table_info(&self) -> &TableInfo {
diff --git a/src/query/storages/result_cache/src/table_function/table.rs b/src/query/storages/result_cache/src/table_function/table.rs
index 74dcb3a0373d1..cd4a55757fce0 100644
--- a/src/query/storages/result_cache/src/table_function/table.rs
+++ b/src/query/storages/result_cache/src/table_function/table.rs
@@ -95,10 +95,6 @@ impl ResultScan {
 
 #[async_trait::async_trait]
 impl Table for ResultScan {
-    fn is_local(&self) -> bool {
-        true
-    }
-
     fn as_any(&self) -> &dyn Any {
         self
     }
diff --git a/src/query/storages/stage/src/stage_table.rs b/src/query/storages/stage/src/stage_table.rs
index 2675f4e0b1f8d..d24ed83cc9040 100644
--- a/src/query/storages/stage/src/stage_table.rs
+++ b/src/query/storages/stage/src/stage_table.rs
@@ -23,6 +23,7 @@ use databend_common_catalog::plan::Partitions;
 use databend_common_catalog::plan::PartitionsShuffleKind;
 use databend_common_catalog::plan::PushDownInfo;
 use databend_common_catalog::plan::StageTableInfo;
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_catalog::table::Table;
 use databend_common_catalog::table_context::TableContext;
 use databend_common_exception::ErrorCode;
@@ -163,8 +164,8 @@ impl Table for StageTable {
         }
     }
 
-    fn is_local(&self) -> bool {
-        false
+    fn distribution_level(&self) -> DistributionLevel {
+        DistributionLevel::Cluster
     }
 
     fn read_data(
diff --git a/src/query/storages/stream/src/stream_table.rs b/src/query/storages/stream/src/stream_table.rs
index 6c7f052c71a7d..80b0f849912a2 100644
--- a/src/query/storages/stream/src/stream_table.rs
+++ b/src/query/storages/stream/src/stream_table.rs
@@ -24,6 +24,7 @@ use databend_common_catalog::plan::Partitions;
 use databend_common_catalog::plan::PushDownInfo;
 use databend_common_catalog::plan::StreamColumn;
 use databend_common_catalog::table::ColumnStatisticsProvider;
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_catalog::table::Table;
 use databend_common_catalog::table::TableStatistics;
 use databend_common_catalog::table_context::TableContext;
@@ -335,8 +336,8 @@ impl StreamTable {
 
 #[async_trait::async_trait]
 impl Table for StreamTable {
-    fn is_local(&self) -> bool {
-        false
+    fn distribution_level(&self) -> DistributionLevel {
+        DistributionLevel::Cluster
     }
 
     fn as_any(&self) -> &dyn Any {
diff --git a/src/query/storages/system/src/backtrace_table.rs b/src/query/storages/system/src/backtrace_table.rs
index 0d995cc4bc345..3291434771e16 100644
--- a/src/query/storages/system/src/backtrace_table.rs
+++ b/src/query/storages/system/src/backtrace_table.rs
@@ -18,6 +18,7 @@ use std::sync::Arc;
 
 use databend_common_base::get_all_tasks;
 use databend_common_base::runtime::Runtime;
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_catalog::table::Table;
 use databend_common_catalog::table_context::TableContext;
 use databend_common_exception::Result;
@@ -44,7 +45,7 @@ impl SyncSystemTable for BacktraceTable {
     const NAME: &'static str = "system.backtrace";
 
     // Allow distributed query.
-    const DATA_IN_LOCAL: bool = false;
+    const DISTRIBUTION_LEVEL: DistributionLevel = DistributionLevel::Warehouse;
 
     fn get_table_info(&self) -> &TableInfo {
         &self.table_info
diff --git a/src/query/storages/system/src/caches_table.rs b/src/query/storages/system/src/caches_table.rs
index 04c411675dfa1..f2be151ef77f8 100644
--- a/src/query/storages/system/src/caches_table.rs
+++ b/src/query/storages/system/src/caches_table.rs
@@ -14,6 +14,7 @@
 
 use std::sync::Arc;
 
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_catalog::table::Table;
 use databend_common_exception::Result;
 use databend_common_expression::types::NumberDataType;
@@ -62,7 +63,7 @@ impl SyncSystemTable for CachesTable {
     const NAME: &'static str = "system.caches";
 
     // Allow distributed query.
-    const DATA_IN_LOCAL: bool = false;
+    const DISTRIBUTION_LEVEL: DistributionLevel = DistributionLevel::Warehouse;
 
     fn get_table_info(&self) -> &TableInfo {
         &self.table_info
diff --git a/src/query/storages/system/src/log_queue.rs b/src/query/storages/system/src/log_queue.rs
index 59cea8cecaac0..af1b1a4f0b9e4 100644
--- a/src/query/storages/system/src/log_queue.rs
+++ b/src/query/storages/system/src/log_queue.rs
@@ -23,6 +23,7 @@ use databend_common_catalog::plan::PartStatistics;
 use databend_common_catalog::plan::Partitions;
 use databend_common_catalog::plan::PartitionsShuffleKind;
 use databend_common_catalog::plan::PushDownInfo;
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_catalog::table::Table;
 use databend_common_catalog::table_context::TableContext;
 use databend_common_exception::ErrorCode;
@@ -150,8 +151,8 @@ impl<Event: SystemLogElement + 'static> SystemLogTable<Event> {
 
 #[async_trait::async_trait]
 impl<Event: SystemLogElement + 'static> Table for SystemLogTable<Event> {
-    fn is_local(&self) -> bool {
-        false
+    fn distribution_level(&self) -> DistributionLevel {
+        DistributionLevel::Warehouse
     }
 
     fn as_any(&self) -> &dyn Any {
@@ -172,9 +173,9 @@ impl<Event: SystemLogElement + 'static> Table for SystemLogTable<Event> {
         Ok((
             PartStatistics::default(),
             // Make the table in distributed.
-            Partitions::create(PartitionsShuffleKind::Broadcast, vec![Arc::new(Box::new(
-                SystemTablePart,
-            ))]),
+            Partitions::create(PartitionsShuffleKind::BroadcastWarehouse, vec![Arc::new(
+                Box::new(SystemTablePart),
+            )]),
         ))
     }
 
diff --git a/src/query/storages/system/src/malloc_stats_totals_table.rs b/src/query/storages/system/src/malloc_stats_totals_table.rs
index 4942edaaa69c5..9fa57054fb59d 100644
--- a/src/query/storages/system/src/malloc_stats_totals_table.rs
+++ b/src/query/storages/system/src/malloc_stats_totals_table.rs
@@ -15,6 +15,7 @@
 use std::default::Default;
 use std::sync::Arc;
 
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_catalog::table::Table;
 use databend_common_catalog::table_context::TableContext;
 use databend_common_exception::ErrorCode;
@@ -53,7 +54,7 @@ pub struct MallocStatsTotalsTable {
 
 impl SyncSystemTable for MallocStatsTotalsTable {
     const NAME: &'static str = "system.malloc_stats_totals";
-    const DATA_IN_LOCAL: bool = false;
+    const DISTRIBUTION_LEVEL: DistributionLevel = DistributionLevel::Warehouse;
 
     fn get_table_info(&self) -> &TableInfo {
         &self.table_info
diff --git a/src/query/storages/system/src/metrics_table.rs b/src/query/storages/system/src/metrics_table.rs
index b8811aecb297a..4921929002785 100644
--- a/src/query/storages/system/src/metrics_table.rs
+++ b/src/query/storages/system/src/metrics_table.rs
@@ -19,6 +19,7 @@ use databend_common_base::runtime::metrics::MetricSample;
 use databend_common_base::runtime::metrics::MetricValue;
 use databend_common_base::runtime::metrics::GLOBAL_METRICS_REGISTRY;
 use databend_common_base::runtime::GLOBAL_MEM_STAT;
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_catalog::table::Table;
 use databend_common_catalog::table_context::TableContext;
 use databend_common_exception::ErrorCode;
@@ -44,7 +45,7 @@ pub struct MetricsTable {
 impl SyncSystemTable for MetricsTable {
     const NAME: &'static str = "system.metrics";
     // Allow distributed query.
-    const DATA_IN_LOCAL: bool = false;
+    const DISTRIBUTION_LEVEL: DistributionLevel = DistributionLevel::Warehouse;
     const BROADCAST_TRUNCATE: bool = true;
 
     fn get_table_info(&self) -> &TableInfo {
diff --git a/src/query/storages/system/src/processes_table.rs b/src/query/storages/system/src/processes_table.rs
index e864af9f29a7b..d34f90d242368 100644
--- a/src/query/storages/system/src/processes_table.rs
+++ b/src/query/storages/system/src/processes_table.rs
@@ -17,6 +17,7 @@ use std::time::Duration;
 
 use chrono::DateTime;
 use chrono::Utc;
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_catalog::table::Table;
 use databend_common_catalog::table_context::TableContext;
 use databend_common_exception::Result;
@@ -46,7 +47,7 @@ pub struct ProcessesTable {
 impl SyncSystemTable for ProcessesTable {
     const NAME: &'static str = "system.processes";
 
-    const DATA_IN_LOCAL: bool = false;
+    const DISTRIBUTION_LEVEL: DistributionLevel = DistributionLevel::Warehouse;
 
     fn get_table_info(&self) -> &TableInfo {
         &self.table_info
diff --git a/src/query/storages/system/src/queries_profiling.rs b/src/query/storages/system/src/queries_profiling.rs
index f2f638aefa4bf..ab1914be835e5 100644
--- a/src/query/storages/system/src/queries_profiling.rs
+++ b/src/query/storages/system/src/queries_profiling.rs
@@ -16,6 +16,7 @@ use std::collections::HashMap;
 use std::sync::Arc;
 
 use databend_common_base::runtime::profile::ProfileStatisticsName;
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_catalog::table::Table;
 use databend_common_catalog::table_context::TableContext;
 use databend_common_exception::Result;
@@ -47,7 +48,7 @@ pub struct QueriesProfilingTable {
 impl SyncSystemTable for QueriesProfilingTable {
     const NAME: &'static str = "system.queries_profiling";
 
-    const DATA_IN_LOCAL: bool = false;
+    const DISTRIBUTION_LEVEL: DistributionLevel = DistributionLevel::Warehouse;
 
     fn get_table_info(&self) -> &TableInfo {
         &self.table_info
diff --git a/src/query/storages/system/src/table.rs b/src/query/storages/system/src/table.rs
index 5ff2b253d96fa..f1c12c009e406 100644
--- a/src/query/storages/system/src/table.rs
+++ b/src/query/storages/system/src/table.rs
@@ -21,6 +21,7 @@ use databend_common_catalog::plan::PartStatistics;
 use databend_common_catalog::plan::Partitions;
 use databend_common_catalog::plan::PartitionsShuffleKind;
 use databend_common_catalog::plan::PushDownInfo;
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_catalog::table::Table;
 use databend_common_catalog::table_context::TableContext;
 use databend_common_exception::Result;
@@ -57,7 +58,7 @@ impl PartInfo for SystemTablePart {
 
 pub trait SyncSystemTable: Send + Sync {
     const NAME: &'static str;
-    const DATA_IN_LOCAL: bool = true;
+    const DISTRIBUTION_LEVEL: DistributionLevel = DistributionLevel::Local;
     const BROADCAST_TRUNCATE: bool = false;
 
     fn get_table_info(&self) -> &TableInfo;
@@ -68,18 +69,24 @@ pub trait SyncSystemTable: Send + Sync {
         _ctx: Arc<dyn TableContext>,
         _push_downs: Option<PushDownInfo>,
     ) -> Result<(PartStatistics, Partitions)> {
-        match Self::DATA_IN_LOCAL {
-            true => Ok((
+        match Self::DISTRIBUTION_LEVEL {
+            DistributionLevel::Local => Ok((
                 PartStatistics::default(),
                 Partitions::create(PartitionsShuffleKind::Seq, vec![Arc::new(Box::new(
                     SystemTablePart,
                 ))]),
             )),
-            false => Ok((
+            DistributionLevel::Cluster => Ok((
                 PartStatistics::default(),
-                Partitions::create(PartitionsShuffleKind::Broadcast, vec![Arc::new(Box::new(
-                    SystemTablePart,
-                ))]),
+                Partitions::create(PartitionsShuffleKind::BroadcastCluster, vec![Arc::new(
+                    Box::new(SystemTablePart),
+                )]),
+            )),
+            DistributionLevel::Warehouse => Ok((
+                PartStatistics::default(),
+                Partitions::create(PartitionsShuffleKind::BroadcastWarehouse, vec![Arc::new(
+                    Box::new(SystemTablePart),
+                )]),
             )),
         }
     }
@@ -109,9 +116,14 @@ impl<TTable: 'static + SyncSystemTable> Table for SyncOneBlockSystemTable<TTable
         self
     }
 
-    fn is_local(&self) -> bool {
+    fn distribution_level(&self) -> DistributionLevel {
         // When querying a memory table, we send the partition to one node for execution. The other nodes send empty partitions.
-        false
+        // For system tables, they are always non-local, which ensures that system tables can be JOIN or UNION operation with any other table.
+        match TTable::DISTRIBUTION_LEVEL {
+            DistributionLevel::Local => DistributionLevel::Cluster,
+            DistributionLevel::Cluster => DistributionLevel::Cluster,
+            DistributionLevel::Warehouse => DistributionLevel::Warehouse,
+        }
     }
 
     fn get_table_info(&self) -> &TableInfo {
@@ -155,7 +167,7 @@ impl<TTable: 'static + SyncSystemTable> Table for SyncOneBlockSystemTable<TTable
         self.inner_table.truncate(ctx, pipeline)
     }
 
-    fn broadcast_truncate_to_cluster(&self) -> bool {
+    fn broadcast_truncate_to_warehouse(&self) -> bool {
         TTable::BROADCAST_TRUNCATE
     }
 }
@@ -198,7 +210,7 @@ impl<TTable: 'static + SyncSystemTable> SyncSource for SystemTableSyncSource<TTa
 #[async_trait::async_trait]
 pub trait AsyncSystemTable: Send + Sync {
     const NAME: &'static str;
-    const DATA_IN_LOCAL: bool = true;
+    const DISTRIBUTION_LEVEL: DistributionLevel = DistributionLevel::Local;
 
     fn get_table_info(&self) -> &TableInfo;
     async fn get_full_data(
@@ -213,18 +225,24 @@ pub trait AsyncSystemTable: Send + Sync {
         _ctx: Arc<dyn TableContext>,
         _push_downs: Option<PushDownInfo>,
     ) -> Result<(PartStatistics, Partitions)> {
-        match Self::DATA_IN_LOCAL {
-            true => Ok((
+        match Self::DISTRIBUTION_LEVEL {
+            DistributionLevel::Local => Ok((
                 PartStatistics::default(),
                 Partitions::create(PartitionsShuffleKind::Seq, vec![Arc::new(Box::new(
                     SystemTablePart,
                 ))]),
             )),
-            false => Ok((
+            DistributionLevel::Cluster => Ok((
                 PartStatistics::default(),
-                Partitions::create(PartitionsShuffleKind::Broadcast, vec![Arc::new(Box::new(
-                    SystemTablePart,
-                ))]),
+                Partitions::create(PartitionsShuffleKind::BroadcastCluster, vec![Arc::new(
+                    Box::new(SystemTablePart),
+                )]),
+            )),
+            DistributionLevel::Warehouse => Ok((
+                PartStatistics::default(),
+                Partitions::create(PartitionsShuffleKind::BroadcastWarehouse, vec![Arc::new(
+                    Box::new(SystemTablePart),
+                )]),
             )),
         }
     }
@@ -250,9 +268,14 @@ impl<TTable: 'static + AsyncSystemTable> Table for AsyncOneBlockSystemTable<TTab
         self
     }
 
-    fn is_local(&self) -> bool {
+    fn distribution_level(&self) -> DistributionLevel {
         // When querying a memory table, we send the partition to one node for execution. The other nodes send empty partitions.
-        false
+        // For system tables, they are always non-local, which ensures that system tables can be JOIN or UNION operation with any other table.
+        match TTable::DISTRIBUTION_LEVEL {
+            DistributionLevel::Local => DistributionLevel::Cluster,
+            DistributionLevel::Cluster => DistributionLevel::Cluster,
+            DistributionLevel::Warehouse => DistributionLevel::Warehouse,
+        }
     }
 
     fn get_table_info(&self) -> &TableInfo {
diff --git a/src/query/storages/system/src/temp_files_table.rs b/src/query/storages/system/src/temp_files_table.rs
index f4dae9e7f4ead..c0ee1678d93f7 100644
--- a/src/query/storages/system/src/temp_files_table.rs
+++ b/src/query/storages/system/src/temp_files_table.rs
@@ -21,6 +21,7 @@ use databend_common_catalog::plan::PartStatistics;
 use databend_common_catalog::plan::Partitions;
 use databend_common_catalog::plan::PartitionsShuffleKind;
 use databend_common_catalog::plan::PushDownInfo;
+use databend_common_catalog::table::DistributionLevel;
 use databend_common_catalog::table::Table;
 use databend_common_catalog::table_context::TableContext;
 use databend_common_exception::Result;
@@ -64,9 +65,8 @@ pub struct TempFilesTable {
 
 #[async_trait::async_trait]
 impl Table for TempFilesTable {
-    fn is_local(&self) -> bool {
-        // Follow the practice of `SyncOneBlockSystemTable::is_local`
-        false
+    fn distribution_level(&self) -> DistributionLevel {
+        DistributionLevel::Cluster
     }
 
     fn as_any(&self) -> &dyn Any {

From 5faf4a6cfe171c1f1733cf3713fbfbe1ac801b69 Mon Sep 17 00:00:00 2001
From: everpcpc <everpcpc@users.noreply.github.com>
Date: Tue, 14 Jan 2025 15:54:05 +0800
Subject: [PATCH 08/33] chore(ci): fix release bump version (#17277)

---
 .github/scripts/bump_version.js | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/scripts/bump_version.js b/.github/scripts/bump_version.js
index 6cfd59c4033d8..619fbe197b3a4 100644
--- a/.github/scripts/bump_version.js
+++ b/.github/scripts/bump_version.js
@@ -7,15 +7,15 @@ module.exports = async ({ github, context, core }) => {
 
   const { TYPE, TAG } = process.env;
 
-  const RE_TAG_STABLE = /^v(\d+)\.(\d+)\.(\d+)$/g;
-  const RE_TAG_NIGHTLY = /^v(\d+)\.(\d+)\.(\d+)-nightly$/g;
-  const RE_TAG_PATCH = /^v(\d+)\.(\d+)\.(\d+)-p(\d+)$/g;
+  const RE_TAG_STABLE = /^v(\d+)\.(\d+)\.(\d+)$/;
+  const RE_TAG_NIGHTLY = /^v(\d+)\.(\d+)\.(\d+)-nightly$/;
+  const RE_TAG_PATCH = /^v(\d+)\.(\d+)\.(\d+)-p(\d+)$/;
 
   switch (TYPE) {
     case "":
     case "nightly": {
       core.setOutput("sha", context.sha);
-      core.info(`Nightly release triggered by ${TAG} (${context.sha})`);
+      core.info(`Nightly release triggered by (${context.sha})`);
 
       let previous = null;
       const releases = await github.rest.repos.listReleases({
@@ -23,8 +23,8 @@ module.exports = async ({ github, context, core }) => {
         repo: context.repo.repo,
       });
       for (const release of releases.data) {
-        const result = RE_TAG_NIGHTLY.exec(release.tag_name);
-        if (result) {
+        const ret = RE_TAG_NIGHTLY.exec(release.tag_name);
+        if (ret) {
           previous = release.tag_name;
           break;
         }

From c8b8a28dba0b6c9cb7918e95421bf7ce49ec177a Mon Sep 17 00:00:00 2001
From: everpcpc <everpcpc@users.noreply.github.com>
Date: Tue, 14 Jan 2025 16:28:41 +0800
Subject: [PATCH 09/33] chore(ci): fix release patch version (#17278)

---
 .github/scripts/bump_version.js | 198 +++++++++++++++++++-------------
 1 file changed, 119 insertions(+), 79 deletions(-)

diff --git a/.github/scripts/bump_version.js b/.github/scripts/bump_version.js
index 619fbe197b3a4..415eea0808ff3 100644
--- a/.github/scripts/bump_version.js
+++ b/.github/scripts/bump_version.js
@@ -11,23 +11,118 @@ module.exports = async ({ github, context, core }) => {
   const RE_TAG_NIGHTLY = /^v(\d+)\.(\d+)\.(\d+)-nightly$/;
   const RE_TAG_PATCH = /^v(\d+)\.(\d+)\.(\d+)-p(\d+)$/;
 
-  switch (TYPE) {
-    case "":
-    case "nightly": {
-      core.setOutput("sha", context.sha);
-      core.info(`Nightly release triggered by (${context.sha})`);
+  async function getPreviousNightlyRelease(github, context) {
+    const releases = await github.rest.repos.listReleases({
+      owner: context.repo.owner,
+      repo: context.repo.repo,
+    });
+    for (const release of releases.data) {
+      const ret = RE_TAG_NIGHTLY.exec(release.tag_name);
+      if (ret) {
+        return release.tag_name;
+      }
+    }
+  }
+
+  function getNextNightlyRelease(previous) {
+    const nightly = RE_TAG_NIGHTLY.exec(previous);
+    if (nightly) {
+      const major = nightly[1];
+      const minor = nightly[2];
+      const patch = parseInt(nightly[3]) + 1;
+      return `v${major}.${minor}.${patch}-nightly`;
+    }
+  }
 
-      let previous = null;
+  async function getPreviousStableRelease(github, context) {
+    let page = 1;
+    while (true) {
       const releases = await github.rest.repos.listReleases({
         owner: context.repo.owner,
         repo: context.repo.repo,
+        page,
       });
+      if (releases.data.length === 0) {
+        break;
+      }
+      page++;
       for (const release of releases.data) {
-        const ret = RE_TAG_NIGHTLY.exec(release.tag_name);
+        const ret = RE_TAG_STABLE.exec(release.tag_name);
         if (ret) {
-          previous = release.tag_name;
-          break;
+          return release.tag_name;
+        }
+      }
+    }
+  }
+
+  function getNextStableRelease() {
+    const nightly = RE_TAG_NIGHTLY.exec(TAG);
+    if (nightly) {
+      const major = nightly[1];
+      const minor = nightly[2];
+      const patch = nightly[3];
+      return `v${major}.${minor}.${patch}`;
+    }
+  }
+
+  async function getPreviousPatchRelease(github, context) {
+    let page = 1;
+    while (true) {
+      const releases = await github.rest.repos.listReleases({
+        owner: context.repo.owner,
+        repo: context.repo.repo,
+        page,
+      });
+      if (releases.data.length === 0) {
+        break;
+      }
+      page++;
+      for (const release of releases.data) {
+        if (!release.tag_name.startsWith(TAG)) {
+          continue;
+        }
+        if (release.tag_name === TAG) {
+          // no previous patch release, use the previous stable release
+          return release.tag_name;
+        }
+        const ret = RE_TAG_PATCH.exec(release.tag_name);
+        if (!ret) {
+          core.warning(`Ignore invalid patch release ${release.tag_name}`);
+          continue;
         }
+        return release.tag_name;
+      }
+    }
+  }
+
+  function getNextPatchRelease(previous) {
+    const stable = RE_TAG_STABLE.exec(previous);
+    if (stable) {
+      const major = stable[1];
+      const minor = stable[2];
+      const patch = stable[3];
+      return `v${major}.${minor}.${patch}-p1`;
+    }
+    const patch = RE_TAG_PATCH.exec(previous);
+    if (patch) {
+      const major = patch[1];
+      const minor = patch[2];
+      const patch = patch[3];
+      const pv = parseInt(patch[4]);
+      return `v${major}.${minor}.${patch}-p${pv + 1}`;
+    }
+  }
+
+  switch (TYPE) {
+    case "":
+    case "nightly": {
+      core.setOutput("sha", context.sha);
+      core.info(`Nightly release triggered by (${context.sha})`);
+
+      const previous = await getPreviousNightlyRelease(github, context);
+      if (!previous) {
+        core.setFailed(`No previous nightly release found, ignoring`);
+        return;
       }
       core.setOutput("previous", previous);
       core.info(`Nightly release with previous release: ${previous}`);
@@ -37,15 +132,11 @@ module.exports = async ({ github, context, core }) => {
         core.info(`Release create manually with tag ${TAG}`);
         return;
       }
-      const result = RE_TAG_NIGHTLY.exec(previous);
-      if (!result) {
-        core.setFailed(`The previous tag ${previous} is invalid.`);
+      const nextTag = getNextNightlyRelease(previous);
+      if (!nextTag) {
+        core.setFailed(`No next nightly release from ${previous}`);
         return;
       }
-      const major = result[1];
-      const minor = result[2];
-      const patch = (parseInt(result[3]) + 1).toString();
-      const nextTag = `v${major}.${minor}.${patch}-nightly`;
       core.setOutput("tag", nextTag);
       core.info(`Release create new nightly ${nextTag}`);
       return;
@@ -58,38 +149,15 @@ module.exports = async ({ github, context, core }) => {
         return;
       }
       core.info(`Stable release triggered by ${TAG} (${context.sha})`);
-      const result = RE_TAG_NIGHTLY.exec(TAG);
-      if (!result) {
-        core.setFailed(`The tag ${TAG} is invalid, ignoring`);
+      const nextTag = getNextStableRelease();
+      if (!nextTag) {
+        core.setFailed(`No stable release from ${TAG}`);
         return;
       }
-      const major = result[1];
-      const minor = result[2];
-      const patch = result[3];
-      const nextTag = `v${major}.${minor}.${patch}`;
       core.setOutput("tag", nextTag);
       core.info(`Stable release ${nextTag} from ${TAG}`);
 
-      let previous = null;
-      let page = 1;
-      while (true) {
-        const releases = await github.rest.repos.listReleases({
-          owner: context.repo.owner,
-          repo: context.repo.repo,
-          page,
-        });
-        if (releases.data.length === 0) {
-          break;
-        }
-        page++;
-        for (const release of releases.data) {
-          const ret = RE_TAG_STABLE.exec(release.tag_name);
-          if (ret) {
-            previous = release.tag_name;
-            break;
-          }
-        }
-      }
+      const previous = await getPreviousStableRelease(github, context);
       if (!previous) {
         core.setFailed(`No previous stable release found, ignoring`);
         return;
@@ -120,49 +188,21 @@ module.exports = async ({ github, context, core }) => {
         `Patch release triggered by ${TAG} (${branch.data.commit.sha})`
       );
 
-      let pv = 1;
-      let previous = null;
-      let page = 1;
-      while (true) {
-        const releases = await github.rest.repos.listReleases({
-          owner: context.repo.owner,
-          repo: context.repo.repo,
-          page,
-        });
-        if (releases.data.length === 0) {
-          break;
-        }
-        page++;
-        for (const release of releases.data) {
-          if (!release.tag_name.startsWith(TAG)) {
-            continue;
-          }
-          if (release.tag_name === TAG) {
-            previous = release.tag_name;
-            break;
-          }
-          const ret = RE_TAG_PATCH.exec(release.tag_name);
-          if (!ret) {
-            core.warning(`Ignore previous release ${release.tag_name}`);
-            continue;
-          }
-          pv = parseInt(result[4]) + 1;
-          previous = release.tag_name;
-        }
-      }
+      const previous = await getPreviousPatchRelease(github, context);
       if (!previous) {
-        core.setFailed(`No previous stable release found, ignoring`);
+        core.setFailed(`No previous patch release found, ignoring`);
         return;
       }
       core.setOutput("previous", previous);
       core.info(`Patch release with previous release: ${previous}`);
 
-      const major = result[1];
-      const minor = result[2];
-      const patch = result[3];
-      const nextTag = `v${major}.${minor}.${patch}-p${pv}`;
+      const nextTag = getNextPatchRelease(previous);
+      if (!nextTag) {
+        core.setFailed(`No next patch release from ${previous}`);
+        return;
+      }
       core.setOutput("tag", nextTag);
-      core.info(`Patch release ${nextTag} from ${TAG}`);
+      core.info(`Patch release ${nextTag} from ${previous}`);
       return;
     }
 

From d1a4975b4f7a471e202621545709490e68e5d54e Mon Sep 17 00:00:00 2001
From: everpcpc <everpcpc@users.noreply.github.com>
Date: Tue, 14 Jan 2025 16:48:25 +0800
Subject: [PATCH 10/33] chore(ci): fix gen next patch release (#17281)

---
 .github/scripts/bump_version.js | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/scripts/bump_version.js b/.github/scripts/bump_version.js
index 415eea0808ff3..a77b8817b92c7 100644
--- a/.github/scripts/bump_version.js
+++ b/.github/scripts/bump_version.js
@@ -103,13 +103,13 @@ module.exports = async ({ github, context, core }) => {
       const patch = stable[3];
       return `v${major}.${minor}.${patch}-p1`;
     }
-    const patch = RE_TAG_PATCH.exec(previous);
-    if (patch) {
-      const major = patch[1];
-      const minor = patch[2];
-      const patch = patch[3];
-      const pv = parseInt(patch[4]);
-      return `v${major}.${minor}.${patch}-p${pv + 1}`;
+    const version = RE_TAG_PATCH.exec(previous);
+    if (version) {
+      const major = version[1];
+      const minor = version[2];
+      const patch = version[3];
+      const pv = parseInt(version[4]) + 1;
+      return `v${major}.${minor}.${patch}-p${pv}`;
     }
   }
 

From 6bcfe0957dcf8ed5eaec92be826e2314daa7fdff Mon Sep 17 00:00:00 2001
From: everpcpc <everpcpc@users.noreply.github.com>
Date: Tue, 14 Jan 2025 17:27:25 +0800
Subject: [PATCH 11/33] chore(ci): unit test with 16c64g (#17283)

---
 .github/workflows/reuse.linux.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/reuse.linux.yml b/.github/workflows/reuse.linux.yml
index dd5a2652a2fda..30bcd3d662fb9 100644
--- a/.github/workflows/reuse.linux.yml
+++ b/.github/workflows/reuse.linux.yml
@@ -95,7 +95,7 @@ jobs:
   #         artifacts: query
 
   test_unit:
-    runs-on: [self-hosted, X64, Linux, 8c32g, "${{ inputs.runner_provider }}"]
+    runs-on: [self-hosted, X64, Linux, 16c64g, "${{ inputs.runner_provider }}"]
     steps:
       - uses: actions/checkout@v4
         with:

From 2743ff1e26bbaf7b3362887a1e86da4d2a0b905a Mon Sep 17 00:00:00 2001
From: Bohu <overred.shuttler@gmail.com>
Date: Tue, 14 Jan 2025 17:29:18 +0800
Subject: [PATCH 12/33] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d308b1026cb6a..cf10f1d3b80b6 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 <h1 align="center">Databend: The Next-Gen Cloud [Data+AI] Analytics</h1>
-<h2 align="center">Zero Disk Architecture</h2>
+<h2 align="center">The open-source, on-premise alternative to Snowflake</h2>
 
 <div align="center">
 

From f71635266c0f3383612fb8b2cf68e62f097a0977 Mon Sep 17 00:00:00 2001
From: Bohu <overred.shuttler@gmail.com>
Date: Tue, 14 Jan 2025 17:41:11 +0800
Subject: [PATCH 13/33] Update README.md

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index cf10f1d3b80b6..bf1243ebfd776 100644
--- a/README.md
+++ b/README.md
@@ -41,6 +41,10 @@
 
 **Databend**, built in Rust, is an open-source cloud data warehouse that serves as a cost-effective [alternative to Snowflake](https://github.com/datafuselabs/databend/issues/13059). With its focus on fast query execution and data ingestion, it's designed for complex analysis of the world's largest datasets.
 
+**Production-Proven Scale:**
+- 🤝 **Enterprise Adoption**: Trusted by over **50 organizations** processing more than **100 million queries daily**
+- 🗄️ **Massive Scale**: Successfully managing over **800 petabytes** of analytical data
+
 ## ⚡ Performance
 
 <div align="center">

From 8a47dce8bf8681ff96781aa0391fa938071a4c53 Mon Sep 17 00:00:00 2001
From: baishen <baishen2009@gmail.com>
Date: Tue, 14 Jan 2025 19:23:47 +0800
Subject: [PATCH 14/33] fix(query): window function support params (#17282)

---
 src/query/ast/src/parser/expr.rs              |  30 ++++-
 src/query/ast/tests/it/parser.rs              |   1 +
 src/query/ast/tests/it/testdata/expr.txt      | 114 ++++++++++++++++++
 .../query/window_function/window_basic.test   |  29 +++++
 4 files changed, 169 insertions(+), 5 deletions(-)

diff --git a/src/query/ast/src/parser/expr.rs b/src/query/ast/src/parser/expr.rs
index ec3ffe28e109f..854923757dc22 100644
--- a/src/query/ast/src/parser/expr.rs
+++ b/src/query/ast/src/parser/expr.rs
@@ -1083,18 +1083,36 @@ pub fn expr_element(i: Input) -> IResult<WithSpan<ExprElement>> {
             },
         },
     );
+    let function_call_with_params_window = map(
+        rule! {
+            #function_name
+            ~ "(" ~ #comma_separated_list1(subexpr(0)) ~ ")"
+            ~ "(" ~ DISTINCT? ~ #comma_separated_list0(subexpr(0))? ~ ")"
+            ~ #window_function
+        },
+        |(name, _, params, _, _, opt_distinct, opt_args, _, window)| ExprElement::FunctionCall {
+            func: FunctionCall {
+                distinct: opt_distinct.is_some(),
+                name,
+                args: opt_args.unwrap_or_default(),
+                params,
+                window: Some(window),
+                lambda: None,
+            },
+        },
+    );
     let function_call_with_params = map(
         rule! {
             #function_name
-            ~ ("(" ~ #comma_separated_list1(subexpr(0)) ~ ")")?
+            ~ "(" ~ #comma_separated_list1(subexpr(0)) ~ ")"
             ~ "(" ~ DISTINCT? ~ #comma_separated_list0(subexpr(0))? ~ ")"
         },
-        |(name, params, _, opt_distinct, opt_args, _)| ExprElement::FunctionCall {
+        |(name, _, params, _, _, opt_distinct, opt_args, _)| ExprElement::FunctionCall {
             func: FunctionCall {
                 distinct: opt_distinct.is_some(),
                 name,
                 args: opt_args.unwrap_or_default(),
-                params: params.map(|(_, x, _)| x).unwrap_or_default(),
+                params,
                 window: None,
                 lambda: None,
             },
@@ -1376,7 +1394,6 @@ pub fn expr_element(i: Input) -> IResult<WithSpan<ExprElement>> {
                 | #interval_expr : "`INTERVAL <str_literal>`"
                 | #extract : "`EXTRACT((YEAR | QUARTER | MONTH | DAY | HOUR | MINUTE | SECOND | WEEK) FROM ...)`"
                 | #date_part : "`DATE_PART((YEAR | QUARTER | MONTH | DAY | HOUR | MINUTE | SECOND | WEEK), ...)`"
-
             ),
             rule!(
                 #substring : "`SUBSTRING(... [FROM ...] [FOR ...])`"
@@ -1388,9 +1405,12 @@ pub fn expr_element(i: Input) -> IResult<WithSpan<ExprElement>> {
                 | #count_all_with_window : "`COUNT(*) OVER ...`"
                 | #function_call_with_lambda : "`function(..., x -> ...)`"
                 | #function_call_with_window : "`function(...) OVER ([ PARTITION BY <expr>, ... ] [ ORDER BY <expr>, ... ] [ <window frame> ])`"
+                | #function_call_with_params_window : "`function(...)(...) OVER ([ PARTITION BY <expr>, ... ] [ ORDER BY <expr>, ... ] [ <window frame> ])`"
                 | #function_call_with_params : "`function(...)(...)`"
                 | #function_call : "`function(...)`"
-                | #case : "`CASE ... END`"
+            ),
+            rule!(
+                #case : "`CASE ... END`"
                 | #tuple : "`(<expr> [, ...])`"
                 | #subquery : "`(SELECT ...)`"
                 | #column_ref : "<column>"
diff --git a/src/query/ast/tests/it/parser.rs b/src/query/ast/tests/it/parser.rs
index 1d8d789906695..052648e507d76 100644
--- a/src/query/ast/tests/it/parser.rs
+++ b/src/query/ast/tests/it/parser.rs
@@ -1251,6 +1251,7 @@ fn test_expr() {
         r#"COUNT() OVER (ORDER BY hire_date ROWS UNBOUNDED PRECEDING)"#,
         r#"COUNT() OVER (ORDER BY hire_date ROWS CURRENT ROW)"#,
         r#"COUNT() OVER (ORDER BY hire_date ROWS 3 PRECEDING)"#,
+        r#"QUANTILE_CONT(0.5)(salary) OVER (PARTITION BY department ORDER BY hire_date)"#,
         r#"ARRAY_APPLY([1,2,3], x -> x + 1)"#,
         r#"ARRAY_FILTER(col, y -> y % 2 = 0)"#,
         r#"(current_timestamp, current_timestamp(), now())"#,
diff --git a/src/query/ast/tests/it/testdata/expr.txt b/src/query/ast/tests/it/testdata/expr.txt
index eb279496999c7..af6d394434354 100644
--- a/src/query/ast/tests/it/testdata/expr.txt
+++ b/src/query/ast/tests/it/testdata/expr.txt
@@ -4229,6 +4229,120 @@ FunctionCall {
 }
 
 
+---------- Input ----------
+QUANTILE_CONT(0.5)(salary) OVER (PARTITION BY department ORDER BY hire_date)
+---------- Output ---------
+QUANTILE_CONT(0.5)(salary) OVER (PARTITION BY department ORDER BY hire_date)
+---------- AST ------------
+FunctionCall {
+    span: Some(
+        0..76,
+    ),
+    func: FunctionCall {
+        distinct: false,
+        name: Identifier {
+            span: Some(
+                0..13,
+            ),
+            name: "QUANTILE_CONT",
+            quote: None,
+            ident_type: None,
+        },
+        args: [
+            ColumnRef {
+                span: Some(
+                    19..25,
+                ),
+                column: ColumnRef {
+                    database: None,
+                    table: None,
+                    column: Name(
+                        Identifier {
+                            span: Some(
+                                19..25,
+                            ),
+                            name: "salary",
+                            quote: None,
+                            ident_type: None,
+                        },
+                    ),
+                },
+            },
+        ],
+        params: [
+            Literal {
+                span: Some(
+                    14..17,
+                ),
+                value: Decimal256 {
+                    value: 5,
+                    precision: 76,
+                    scale: 1,
+                },
+            },
+        ],
+        window: Some(
+            WindowDesc {
+                ignore_nulls: None,
+                window: WindowSpec(
+                    WindowSpec {
+                        existing_window_name: None,
+                        partition_by: [
+                            ColumnRef {
+                                span: Some(
+                                    46..56,
+                                ),
+                                column: ColumnRef {
+                                    database: None,
+                                    table: None,
+                                    column: Name(
+                                        Identifier {
+                                            span: Some(
+                                                46..56,
+                                            ),
+                                            name: "department",
+                                            quote: None,
+                                            ident_type: None,
+                                        },
+                                    ),
+                                },
+                            },
+                        ],
+                        order_by: [
+                            OrderByExpr {
+                                expr: ColumnRef {
+                                    span: Some(
+                                        66..75,
+                                    ),
+                                    column: ColumnRef {
+                                        database: None,
+                                        table: None,
+                                        column: Name(
+                                            Identifier {
+                                                span: Some(
+                                                    66..75,
+                                                ),
+                                                name: "hire_date",
+                                                quote: None,
+                                                ident_type: None,
+                                            },
+                                        ),
+                                    },
+                                },
+                                asc: None,
+                                nulls_first: None,
+                            },
+                        ],
+                        window_frame: None,
+                    },
+                ),
+            },
+        ),
+        lambda: None,
+    },
+}
+
+
 ---------- Input ----------
 ARRAY_APPLY([1,2,3], x -> x + 1)
 ---------- Output ---------
diff --git a/tests/sqllogictests/suites/query/window_function/window_basic.test b/tests/sqllogictests/suites/query/window_function/window_basic.test
index 75110235c9de6..694e675219bce 100644
--- a/tests/sqllogictests/suites/query/window_function/window_basic.test
+++ b/tests/sqllogictests/suites/query/window_function/window_basic.test
@@ -606,6 +606,34 @@ select grouping(salary), grouping(depname), sum(grouping(salary)) over (partitio
 1 0 3
 1 1 1
 
+query TII
+SELECT depname, empno, quantile_cont(salary) OVER (PARTITION BY depname ORDER BY empno) FROM empsalary ORDER BY depname, empno
+----
+develop 7 4200.0
+develop 8 5100.0
+develop 9 4500.0
+develop 10 4850.0
+develop 11 5200.0
+personnel 2 3900.0
+personnel 5 3700.0
+sales 1 5000.0
+sales 3 4900.0
+sales 4 4800.0
+
+query TII
+SELECT depname, empno, quantile_cont(0.8)(salary) OVER (PARTITION BY depname ORDER BY empno) FROM empsalary ORDER BY depname, empno
+----
+develop 7 4200.0
+develop 8 5640.0
+develop 9 5400.0
+develop 10 5520.0
+develop 11 5360.0
+personnel 2 3900.0
+personnel 5 3820.0
+sales 1 5000.0
+sales 3 4960.0
+sales 4 4920.0
+
 # Window func in subquery
 query I
 SELECT * FROM (SELECT row_number() OVER (PARTITION BY depname ORDER BY salary) rn FROM empsalary ORDER BY depname, rn) order by 1;
@@ -846,3 +874,4 @@ Product B 1200 NULL
 
 statement ok
 DROP DATABASE test_window_basic;
+

From 13d1dcdc319006f058757a7c5042aa6b19f6de69 Mon Sep 17 00:00:00 2001
From: TCeason <33082201+TCeason@users.noreply.github.com>
Date: Tue, 14 Jan 2025 23:58:20 +0800
Subject: [PATCH 15/33] chore: try fix flaky ci (#17265)

---
 tests/suites/5_ee/01_vacuum/01_0001_ee_vacuum_kill_randomly.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/suites/5_ee/01_vacuum/01_0001_ee_vacuum_kill_randomly.sh b/tests/suites/5_ee/01_vacuum/01_0001_ee_vacuum_kill_randomly.sh
index e571c4f9b1ec0..39cc7201197c0 100755
--- a/tests/suites/5_ee/01_vacuum/01_0001_ee_vacuum_kill_randomly.sh
+++ b/tests/suites/5_ee/01_vacuum/01_0001_ee_vacuum_kill_randomly.sh
@@ -21,7 +21,8 @@ pid=$!
 # kill query randomly
 sleep_time=$(expr $RANDOM % 5 + 5)
 sleep $sleep_time
-kill $pid
+disown %1
+kill -9 $pid > /dev/null 2>&1
 
 # restart query
 echo "will restart query"

From ce8a1c9ec7037c0f74849f804f609ad74843d5ae Mon Sep 17 00:00:00 2001
From: everpcpc <everpcpc@users.noreply.github.com>
Date: Wed, 15 Jan 2025 17:22:20 +0800
Subject: [PATCH 16/33] chore(ci): add bendsql binary to release (#17289)

---
 .github/actions/pack_binaries/action.yml | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/.github/actions/pack_binaries/action.yml b/.github/actions/pack_binaries/action.yml
index 64d3283b129b9..bc54eedb02e41 100644
--- a/.github/actions/pack_binaries/action.yml
+++ b/.github/actions/pack_binaries/action.yml
@@ -23,6 +23,14 @@ runs:
         category: ${{ inputs.category }}
         path: distro/bin
         artifacts: metactl,meta,query,query.debug
+    - name: Download BendSQL
+      shell: bash
+      env:
+        GH_TOKEN: ${{ github.token }}
+      run: |
+        verison=$(gh release list --repo databendlabs/bendsql | head -n 1 | awk '{print $1}')
+        curl -sSLfo /tmp/bendsql.tar.gz https://github.com/databendlabs/bendsql/releases/download/${verison}/bendsql-${verison}-${{ inputs.target }}.tar.gz
+        tar -xzvf /tmp/bendsql.tar.gz -C distro/bin
     - name: Pack Binaries
       id: pack_binaries
       shell: bash

From 71f00b8d2b7af28b7d1e95f7b0b20ab48e63310b Mon Sep 17 00:00:00 2001
From: TCeason <33082201+TCeason@users.noreply.github.com>
Date: Wed, 15 Jan 2025 21:19:58 +0800
Subject: [PATCH 17/33] chore: databend-test print more stderr (#17288)

---
 tests/databend-test | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/databend-test b/tests/databend-test
index b4b5dc41688b6..f439858093e5d 100755
--- a/tests/databend-test
+++ b/tests/databend-test
@@ -326,7 +326,7 @@ def run_tests_array(all_tests_with_params):
                             if os.path.isfile(stdout_file):
                                 status += ", result:\n\n"
                                 status += '\n'.join(
-                                    open(stdout_file).read().split('\n')[:100])
+                                    open(stdout_file).read().split('\n')[:300])
                                 status += '\n'
 
                         elif stderr:
@@ -336,7 +336,7 @@ def run_tests_array(all_tests_with_params):
                             status += MSG_FAIL
                             status += print_test_time(total_time)
                             status += " - having stderror:\n{}\n".format(
-                                '\n'.join(stderr.split('\n')[:100]))
+                                '\n'.join(stderr.split('\n')[:300]))
                         elif 'Exception' in stdout:
                             failures += 1
                             failure_cases.append(case_file_full_path)
@@ -344,7 +344,7 @@ def run_tests_array(all_tests_with_params):
                             status += MSG_FAIL
                             status += print_test_time(total_time)
                             status += " - having exception:\n{}\n".format(
-                                '\n'.join(stdout.split('\n')[:100]))
+                                '\n'.join(stdout.split('\n')[:300]))
                         elif not os.path.isfile(result_file):
                             status += MSG_UNKNOWN
                             status += print_test_time(total_time)

From e9fc0a1867bcfe91208a0da70fac8c95e2a8dabf Mon Sep 17 00:00:00 2001
From: sundyli <543950155@qq.com>
Date: Thu, 16 Jan 2025 08:43:05 +0800
Subject: [PATCH 18/33] refactor(query): only call once finish callback
 (#17293)

* fix(query): only call once finish callback

* fix(query): only call once finish callback

* fix(query): fix
---
 src/query/service/src/pipelines/pipeline_builder.rs | 12 +++++++-----
 src/query/service/src/sessions/query_ctx.rs         |  9 +++++++++
 src/query/service/src/sessions/query_ctx_shared.rs  |  2 ++
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/query/service/src/pipelines/pipeline_builder.rs b/src/query/service/src/pipelines/pipeline_builder.rs
index 6508dcca83b6c..e796d2cb4d705 100644
--- a/src/query/service/src/pipelines/pipeline_builder.rs
+++ b/src/query/service/src/pipelines/pipeline_builder.rs
@@ -94,11 +94,13 @@ impl PipelineBuilder {
         }
 
         // unload spill metas
-        self.main_pipeline
-            .set_on_finished(always_callback(move |_info: &ExecutionInfo| {
-                self.ctx.unload_spill_meta();
-                Ok(())
-            }));
+        if !self.ctx.mark_unload_callbacked() {
+            self.main_pipeline
+                .set_on_finished(always_callback(move |_info: &ExecutionInfo| {
+                    self.ctx.unload_spill_meta();
+                    Ok(())
+                }));
+        }
 
         Ok(PipelineBuildResult {
             main_pipeline: self.main_pipeline,
diff --git a/src/query/service/src/sessions/query_ctx.rs b/src/query/service/src/sessions/query_ctx.rs
index b06d45b0f0841..cfb5ce834c46d 100644
--- a/src/query/service/src/sessions/query_ctx.rs
+++ b/src/query/service/src/sessions/query_ctx.rs
@@ -322,6 +322,9 @@ impl QueryContext {
 
     pub fn update_init_query_id(&self, id: String) {
         self.shared.spilled_files.write().clear();
+        self.shared
+            .unload_callbacked
+            .store(false, Ordering::Release);
         self.shared.cluster_spill_progress.write().clear();
         *self.shared.init_query_id.write() = id;
     }
@@ -471,6 +474,12 @@ impl QueryContext {
         Ok(table)
     }
 
+    pub fn mark_unload_callbacked(&self) -> bool {
+        self.shared
+            .unload_callbacked
+            .fetch_or(true, Ordering::SeqCst)
+    }
+
     pub fn unload_spill_meta(&self) {
         const SPILL_META_SUFFIX: &str = ".list";
         let r = self.shared.spilled_files.read();
diff --git a/src/query/service/src/sessions/query_ctx_shared.rs b/src/query/service/src/sessions/query_ctx_shared.rs
index af2324b508239..50341c7f1eea4 100644
--- a/src/query/service/src/sessions/query_ctx_shared.rs
+++ b/src/query/service/src/sessions/query_ctx_shared.rs
@@ -150,6 +150,7 @@ pub struct QueryContextShared {
     pub(in crate::sessions) cluster_spill_progress: Arc<RwLock<HashMap<String, SpillProgress>>>,
     pub(in crate::sessions) spilled_files:
         Arc<RwLock<HashMap<crate::spillers::Location, crate::spillers::Layout>>>,
+    pub(in crate::sessions) unload_callbacked: AtomicBool,
 }
 
 impl QueryContextShared {
@@ -209,6 +210,7 @@ impl QueryContextShared {
 
             cluster_spill_progress: Default::default(),
             spilled_files: Default::default(),
+            unload_callbacked: AtomicBool::new(false),
             warehouse_cache: Arc::new(RwLock::new(None)),
         }))
     }

From ab0d1a81d16d10be9c0b12e012f77180a25b7f43 Mon Sep 17 00:00:00 2001
From: Yang Xiufeng <yangxiufeng.c@gmail.com>
Date: Thu, 16 Jan 2025 10:36:31 +0800
Subject: [PATCH 19/33] feat: orc support fill missing tuple field (#17247)

* tmp orc speed up

* missing field of tuple as null.

* feat: deal will array of tuple which missing fields

* fix: cluster key use wrong index.

* cleanup

* fill missing tuple field only for orc

* add test for cluster by

* fix clippy
---
 Cargo.lock                                    |   2 +-
 Cargo.toml                                    |   2 +-
 .../sql/src/planner/binder/copy_into_table.rs |  13 +-
 .../sql/src/planner/expression_parser.rs      |  25 ++-
 .../stage/src/read/columnar/projection.rs     | 193 ++++++++++++++++--
 .../src/copy_into_table/processors/decoder.rs |  59 ++++++
 .../orc/src/copy_into_table/projection.rs     |   2 +
 .../src/parquet_rs/copy_into_table/reader.rs  |   2 +
 .../05_ddl/05_0000_ddl_create_tables.test     |   6 +
 9 files changed, 273 insertions(+), 31 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 9ea8284d9192e..edd9af42f8e99 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -10532,7 +10532,7 @@ dependencies = [
 [[package]]
 name = "orc-rust"
 version = "0.5.0"
-source = "git+https://github.com/datafusion-contrib/orc-rust?rev=dfb1ede#dfb1ede8f875566372568346ad47b359e1a9c92a"
+source = "git+https://github.com/youngsofun/orc-rust?rev=6c5ac57#6c5ac578d8790e3f79bde4764ccea7e5e76a0f0d"
 dependencies = [
  "arrow",
  "async-trait",
diff --git a/Cargo.toml b/Cargo.toml
index 07383209a6470..1ecee31663e6b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -639,7 +639,7 @@ deltalake = { git = "https://github.com/delta-io/delta-rs", rev = "3038c145" }
 ethnum = { git = "https://github.com/datafuse-extras/ethnum-rs", rev = "4cb05f1" }
 openai_api_rust = { git = "https://github.com/datafuse-extras/openai-api", rev = "819a0ed" }
 openraft = { git = "https://github.com/databendlabs/openraft", tag = "v0.10.0-alpha.7" }
-orc-rust = { git = "https://github.com/datafusion-contrib/orc-rust", rev = "dfb1ede" }
+orc-rust = { git = "https://github.com/youngsofun/orc-rust", rev = "6c5ac57" }
 recursive = { git = "https://github.com/datafuse-extras/recursive.git", rev = "6af35a1" }
 sled = { git = "https://github.com/datafuse-extras/sled", tag = "v0.34.7-datafuse.1" }
 tantivy = { git = "https://github.com/datafuse-extras/tantivy", rev = "7502370" }
diff --git a/src/query/sql/src/planner/binder/copy_into_table.rs b/src/query/sql/src/planner/binder/copy_into_table.rs
index ec1a80ea860a8..c59d9baede093 100644
--- a/src/query/sql/src/planner/binder/copy_into_table.rs
+++ b/src/query/sql/src/planner/binder/copy_into_table.rs
@@ -182,15 +182,12 @@ impl Binder {
             files: stmt.files.clone(),
             pattern,
         };
-        let required_values_schema: DataSchemaRef = Arc::new(
-            match &stmt.dst_columns {
-                Some(cols) => self.schema_project(&table.schema(), cols)?,
-                None => self.schema_project(&table.schema(), &[])?,
-            }
-            .into(),
-        );
+        let stage_schema = match &stmt.dst_columns {
+            Some(cols) => self.schema_project(&table.schema(), cols)?,
+            None => self.schema_project(&table.schema(), &[])?,
+        };
 
-        let stage_schema = infer_table_schema(&required_values_schema)?;
+        let required_values_schema: DataSchemaRef = Arc::new(stage_schema.clone().into());
 
         let default_values = if stage_info.file_format_params.need_field_default() {
             Some(
diff --git a/src/query/sql/src/planner/expression_parser.rs b/src/query/sql/src/planner/expression_parser.rs
index dce84a0d74425..ad8c6e93eeef1 100644
--- a/src/query/sql/src/planner/expression_parser.rs
+++ b/src/query/sql/src/planner/expression_parser.rs
@@ -395,7 +395,30 @@ pub fn parse_cluster_keys(
     table_meta: Arc<dyn Table>,
     ast_exprs: Vec<AExpr>,
 ) -> Result<Vec<Expr>> {
-    let exprs = parse_ast_exprs(ctx, table_meta, ast_exprs)?;
+    let schema = table_meta.schema();
+    let (mut bind_context, metadata) = bind_table(table_meta)?;
+    let settings = ctx.get_settings();
+    let name_resolution_ctx = NameResolutionContext::try_from(settings.as_ref())?;
+    let mut type_checker = TypeChecker::try_create(
+        &mut bind_context,
+        ctx,
+        &name_resolution_ctx,
+        metadata,
+        &[],
+        false,
+    )?;
+
+    let exprs: Vec<Expr> = ast_exprs
+        .iter()
+        .map(|ast| {
+            let (scalar, _) = *type_checker.resolve(ast)?;
+            let expr = scalar
+                .as_expr()?
+                .project_column_ref(|col| schema.index_of(&col.column_name).unwrap());
+            Ok(expr)
+        })
+        .collect::<Result<_>>()?;
+
     let mut res = Vec::with_capacity(exprs.len());
     for expr in exprs {
         let inner_type = expr.data_type().remove_nullable();
diff --git a/src/query/storages/common/stage/src/read/columnar/projection.rs b/src/query/storages/common/stage/src/read/columnar/projection.rs
index eb67312c50bd6..224ed263fcfdf 100644
--- a/src/query/storages/common/stage/src/read/columnar/projection.rs
+++ b/src/query/storages/common/stage/src/read/columnar/projection.rs
@@ -14,12 +14,18 @@
 
 use databend_common_exception::ErrorCode;
 use databend_common_expression::type_check::check_cast;
+use databend_common_expression::type_check::check_function;
+use databend_common_expression::types::DataType;
+use databend_common_expression::types::NumberScalar;
 use databend_common_expression::Expr;
 use databend_common_expression::RemoteExpr;
 use databend_common_expression::Scalar;
+use databend_common_expression::TableDataType;
+use databend_common_expression::TableField;
 use databend_common_expression::TableSchemaRef;
 use databend_common_functions::BUILTIN_FUNCTIONS;
 use databend_common_meta_app::principal::NullAs;
+use databend_common_meta_app::principal::StageFileFormatType;
 
 use crate::read::cast::load_can_auto_cast_to;
 
@@ -30,10 +36,11 @@ use crate::read::cast::load_can_auto_cast_to;
 pub fn project_columnar(
     input_schema: &TableSchemaRef,
     output_schema: &TableSchemaRef,
-    null_as: &NullAs,
+    missing_as: &NullAs,
     default_values: &Option<Vec<RemoteExpr>>,
     location: &str,
     case_sensitive: bool,
+    fmt: StageFileFormatType,
 ) -> databend_common_exception::Result<(Vec<Expr>, Vec<usize>)> {
     let mut pushdown_columns = vec![];
     let mut output_projection = vec![];
@@ -68,32 +75,96 @@ pub fn project_columnar(
 
                 if from_field.data_type == to_field.data_type {
                     expr
-                } else {
-                    // note: tuple field name is dropped here, matched by pos here
-                    if load_can_auto_cast_to(
-                        &from_field.data_type().into(),
+                } else if load_can_auto_cast_to(
+                    &from_field.data_type().into(),
+                    &to_field.data_type().into(),
+                ) {
+                    check_cast(
+                        None,
+                        false,
+                        expr,
                         &to_field.data_type().into(),
+                        &BUILTIN_FUNCTIONS,
+                    )?
+                } else if fmt == StageFileFormatType::Orc && !matches!(missing_as, NullAs::Error) {
+                    // special cast for tuple type, fill in default values for the missing fields.
+                    match (
+                        from_field.data_type.remove_nullable(),
+                        to_field.data_type.remove_nullable(),
                     ) {
-                        check_cast(
-                            None,
-                            false,
+                        (
+                            TableDataType::Array(box TableDataType::Nullable(
+                                box TableDataType::Tuple {
+                                    fields_name: from_fields_name,
+                                    fields_type: _from_fields_type,
+                                },
+                            )),
+                            TableDataType::Array(box TableDataType::Nullable(
+                                box TableDataType::Tuple {
+                                    fields_name: to_fields_name,
+                                    fields_type: _to_fields_type,
+                                },
+                            )),
+                        ) => {
+                            let mut v = vec![];
+                            for to in to_fields_name.iter() {
+                                match from_fields_name.iter().position(|k| k == to) {
+                                    Some(p) => v.push(p as i32),
+                                    None => v.push(-1),
+                                };
+                            }
+                            let name = v
+                                .iter()
+                                .map(|v| v.to_string())
+                                .collect::<Vec<_>>()
+                                .join(",");
+                            Expr::ColumnRef {
+                                span: None,
+                                id: pos,
+                                data_type: from_field.data_type().into(),
+                                display_name: format!("#!{name}",),
+                            }
+                        }
+                        (
+                            TableDataType::Tuple {
+                                fields_name: from_fields_name,
+                                fields_type: from_fields_type,
+                            },
+                            TableDataType::Tuple {
+                                fields_name: to_fields_name,
+                                fields_type: to_fields_type,
+                            },
+                        ) => project_tuple(
                             expr,
-                            &to_field.data_type().into(),
-                            &BUILTIN_FUNCTIONS,
-                        )?
-                    } else {
-                        return Err(ErrorCode::BadDataValueType(format!(
-                            "fail to load file {}: Cannot cast column {} from {:?} to {:?}",
-                            location,
-                            field_name,
-                            from_field.data_type(),
-                            to_field.data_type()
-                        )));
+                            from_field,
+                            to_field,
+                            &from_fields_name,
+                            &from_fields_type,
+                            &to_fields_name,
+                            &to_fields_type,
+                        )?,
+                        (_, _) => {
+                            return Err(ErrorCode::BadDataValueType(format!(
+                                "fail to load file {}: Cannot cast column {} from {:?} to {:?}",
+                                location,
+                                field_name,
+                                from_field.data_type(),
+                                to_field.data_type()
+                            )));
+                        }
                     }
+                } else {
+                    return Err(ErrorCode::BadDataValueType(format!(
+                        "fail to load file {}: Cannot cast column {} from {:?} to {:?}",
+                        location,
+                        field_name,
+                        from_field.data_type(),
+                        to_field.data_type()
+                    )));
                 }
             }
             0 => {
-                match null_as {
+                match missing_as {
                     // default
                     NullAs::Error => {
                         return Err(ErrorCode::BadDataValueType(format!(
@@ -139,3 +210,85 @@ pub fn project_columnar(
     }
     Ok((output_projection, pushdown_columns))
 }
+
+fn project_tuple(
+    expr: Expr,
+    from_field: &TableField,
+    to_field: &TableField,
+    from_fields_name: &[String],
+    from_fields_type: &[TableDataType],
+    to_fields_name: &[String],
+    to_fields_type: &[TableDataType],
+) -> databend_common_exception::Result<Expr> {
+    let mut inner_columns = Vec::with_capacity(to_fields_name.len());
+
+    for (to_field_name, to_field_type) in to_fields_name.iter().zip(to_fields_type.iter()) {
+        let inner_column = match from_fields_name.iter().position(|k| k == to_field_name) {
+            Some(idx) => {
+                let from_field_type = from_fields_type.get(idx).unwrap();
+                let tuple_idx = Scalar::Number(NumberScalar::Int64((idx + 1) as i64));
+                let inner_column = check_function(
+                    None,
+                    "get",
+                    &[tuple_idx],
+                    &[expr.clone()],
+                    &BUILTIN_FUNCTIONS,
+                )?;
+                if from_field_type != to_field_type {
+                    check_cast(
+                        None,
+                        false,
+                        inner_column,
+                        &to_field_type.into(),
+                        &BUILTIN_FUNCTIONS,
+                    )?
+                } else {
+                    inner_column
+                }
+            }
+            None => {
+                // if inner field not exists, fill default value.
+                let data_type: DataType = to_field_type.into();
+                let scalar = Scalar::default_value(&data_type);
+                Expr::Constant {
+                    span: None,
+                    scalar,
+                    data_type,
+                }
+            }
+        };
+        inner_columns.push(inner_column);
+    }
+    let tuple_column = check_function(None, "tuple", &[], &inner_columns, &BUILTIN_FUNCTIONS)?;
+    let tuple_column = if from_field.data_type != to_field.data_type {
+        let dest_ty: DataType = (&to_field.data_type).into();
+        check_cast(None, false, tuple_column, &dest_ty, &BUILTIN_FUNCTIONS)?
+    } else {
+        tuple_column
+    };
+
+    if from_field.data_type.is_nullable() && to_field.data_type.is_nullable() {
+        // add if function to cast null value
+        let is_not_null = check_function(
+            None,
+            "is_not_null",
+            &[],
+            &[expr.clone()],
+            &BUILTIN_FUNCTIONS,
+        )?;
+        let null_scalar = Expr::Constant {
+            span: None,
+            scalar: Scalar::Null,
+            data_type: DataType::Null,
+        };
+        check_function(
+            None,
+            "if",
+            &[],
+            &[is_not_null, tuple_column, null_scalar],
+            &BUILTIN_FUNCTIONS,
+        )
+    } else {
+        Ok(tuple_column)
+    }
+}
diff --git a/src/query/storages/orc/src/copy_into_table/processors/decoder.rs b/src/query/storages/orc/src/copy_into_table/processors/decoder.rs
index adf83c641e74c..b9e46e9113e01 100644
--- a/src/query/storages/orc/src/copy_into_table/processors/decoder.rs
+++ b/src/query/storages/orc/src/copy_into_table/processors/decoder.rs
@@ -22,13 +22,19 @@ use arrow_array::RecordBatch;
 use databend_common_catalog::query_kind::QueryKind;
 use databend_common_catalog::table_context::TableContext;
 use databend_common_exception::Result;
+use databend_common_expression::types::ArrayColumn;
+use databend_common_expression::types::DataType;
+use databend_common_expression::types::NullableColumn;
 use databend_common_expression::BlockEntry;
 use databend_common_expression::BlockMetaInfoDowncast;
+use databend_common_expression::Column;
+use databend_common_expression::ColumnBuilder;
 use databend_common_expression::DataBlock;
 use databend_common_expression::DataSchemaRef;
 use databend_common_expression::Evaluator;
 use databend_common_expression::Expr;
 use databend_common_expression::FunctionContext;
+use databend_common_expression::Value;
 use databend_common_functions::BUILTIN_FUNCTIONS;
 use databend_common_pipeline_core::processors::Event;
 use databend_common_pipeline_core::processors::InputPort;
@@ -96,6 +102,59 @@ impl StripeDecoderForCopy {
         let evaluator = Evaluator::new(&block, &self.func_ctx, &BUILTIN_FUNCTIONS);
         let mut columns = Vec::with_capacity(projection.len());
         for (field, expr) in self.output_schema.fields().iter().zip(projection.iter()) {
+            if let Expr::ColumnRef {
+                display_name, id, ..
+            } = expr
+            {
+                if let Some(display_name) = display_name.strip_prefix("#!") {
+                    let typs = match field.data_type() {
+                        DataType::Nullable(box DataType::Array(box DataType::Nullable(
+                            box DataType::Tuple(v),
+                        ))) => v,
+                        _ => {
+                            log::error!("expect array of tuple, got {:?}", field);
+                            unreachable!("expect value: array of tuple")
+                        }
+                    };
+                    let positions = display_name
+                        .split(',')
+                        .map(|s| s.parse::<i32>().unwrap())
+                        .collect::<Vec<i32>>();
+                    let mut e = block.columns()[*id].clone();
+                    match e.value {
+                        Value::Column(Column::Nullable(box NullableColumn {
+                            column:
+                                Column::Array(box ArrayColumn {
+                                    values:
+                                        Column::Nullable(box NullableColumn {
+                                            column: Column::Tuple(ref mut v),
+                                            ..
+                                        }),
+                                    ..
+                                }),
+                            ..
+                        })) => {
+                            let len = v[0].len();
+                            let mut v2 = vec![];
+                            for (i, p) in positions.iter().enumerate() {
+                                if *p < 0 {
+                                    v2.push(ColumnBuilder::repeat_default(&typs[i], len).build());
+                                } else {
+                                    v2.push(v[*p as usize].clone());
+                                }
+                            }
+                            *v = v2
+                        }
+                        _ => {
+                            log::error!("expect array of tuple, got {:?} {:?}", field, e.value);
+                            unreachable!("expect value: array of tuple")
+                        }
+                    }
+                    let column = BlockEntry::new(field.data_type().clone(), e.value);
+                    columns.push(column);
+                    continue;
+                }
+            }
             let value = evaluator.run(expr)?;
             let column = BlockEntry::new(field.data_type().clone(), value);
             columns.push(column);
diff --git a/src/query/storages/orc/src/copy_into_table/projection.rs b/src/query/storages/orc/src/copy_into_table/projection.rs
index 0ac410e09dcf0..caa9feeca5dca 100644
--- a/src/query/storages/orc/src/copy_into_table/projection.rs
+++ b/src/query/storages/orc/src/copy_into_table/projection.rs
@@ -19,6 +19,7 @@ use databend_common_expression::Expr;
 use databend_common_expression::RemoteExpr;
 use databend_common_expression::TableSchemaRef;
 use databend_common_meta_app::principal::NullAs;
+use databend_common_meta_app::principal::StageFileFormatType;
 use databend_storages_common_stage::project_columnar;
 
 use crate::hashable_schema::HashableSchema;
@@ -56,6 +57,7 @@ impl ProjectionFactory {
                 &self.default_values,
                 location,
                 false,
+                StageFileFormatType::Orc,
             )?
             .0;
             self.projections.insert(schema.clone(), v.clone());
diff --git a/src/query/storages/parquet/src/parquet_rs/copy_into_table/reader.rs b/src/query/storages/parquet/src/parquet_rs/copy_into_table/reader.rs
index 564882a26fcac..d5706c93e2d72 100644
--- a/src/query/storages/parquet/src/parquet_rs/copy_into_table/reader.rs
+++ b/src/query/storages/parquet/src/parquet_rs/copy_into_table/reader.rs
@@ -23,6 +23,7 @@ use databend_common_expression::Expr;
 use databend_common_expression::RemoteExpr;
 use databend_common_expression::TableSchemaRef;
 use databend_common_meta_app::principal::NullAs;
+use databend_common_meta_app::principal::StageFileFormatType;
 use databend_common_storage::parquet_rs::infer_schema_with_extension;
 use databend_storages_common_stage::project_columnar;
 use opendal::Operator;
@@ -88,6 +89,7 @@ impl RowGroupReaderForCopy {
             &default_values,
             location,
             case_sensitive,
+            StageFileFormatType::Parquet,
         )?;
         pushdown_columns.sort();
         let mapping = pushdown_columns
diff --git a/tests/sqllogictests/suites/base/05_ddl/05_0000_ddl_create_tables.test b/tests/sqllogictests/suites/base/05_ddl/05_0000_ddl_create_tables.test
index 49b3ff94ca675..69383c6d6dd69 100644
--- a/tests/sqllogictests/suites/base/05_ddl/05_0000_ddl_create_tables.test
+++ b/tests/sqllogictests/suites/base/05_ddl/05_0000_ddl_create_tables.test
@@ -579,6 +579,12 @@ create table t(a int) cluster by (a+rand())
 statement error 1081.*is not deterministic
 create table t(a string) cluster by (a+uuid())
 
+statement ok
+create table tt(a tuple(x int, y int), b string, c int) cluster by (b);
+
+statement ok
+insert into tt(b,c) values ('1',2);
+
 ##################################################
 # table option  `data_retention_period_in_hours` #
 ##################################################

From 6850110dd6335dc611fa2adcdea358915e45c3a1 Mon Sep 17 00:00:00 2001
From: Xuanwo <github@xuanwo.io>
Date: Thu, 16 Jan 2025 11:16:44 +0800
Subject: [PATCH 20/33] chore: Bump opendal to include a fix for gcs (#17290)

Signed-off-by: Xuanwo <github@xuanwo.io>
---
 Cargo.lock | 6 ++----
 Cargo.toml | 4 ++--
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index edd9af42f8e99..f74eab595aa12 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -10227,8 +10227,7 @@ dependencies = [
 [[package]]
 name = "object_store_opendal"
 version = "0.49.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6d47750d91b6d9fa245b3dff15d582dff8462d826cf6c2d5f2e91035d9a317f9"
+source = "git+https://github.com/apache/opendal?rev=78b6a9f#78b6a9f26dbeb5118990d3140f1c636d8e54e719"
 dependencies = [
  "async-trait",
  "bytes",
@@ -10309,8 +10308,7 @@ dependencies = [
 [[package]]
 name = "opendal"
 version = "0.51.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c9dcfa7a3615e3c60eb662ed6b46b6f244cf2658098f593c0c0915430b3a268"
+source = "git+https://github.com/apache/opendal?rev=78b6a9f#78b6a9f26dbeb5118990d3140f1c636d8e54e719"
 dependencies = [
  "anyhow",
  "async-backtrace",
diff --git a/Cargo.toml b/Cargo.toml
index 1ecee31663e6b..e09248212d74a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -369,10 +369,10 @@ num-derive = "0.3.3"
 num-traits = "0.2.19"
 num_cpus = "1.13.1"
 object = "0.36.5"
-object_store_opendal = { version = "0.49.0" }
+object_store_opendal = { version = "0.49.0", package = "object_store_opendal", git = "https://github.com/apache/opendal", rev = "78b6a9f" }
 once_cell = "1.15.0"
 openai_api_rust = "0.1"
-opendal = { version = "0.51.1", features = [
+opendal = { version = "0.51.1", package = "opendal", git = "https://github.com/apache/opendal", rev = "78b6a9f", features = [
     "layers-fastrace",
     "layers-prometheus-client",
     "layers-async-backtrace",

From 9b38e65589ee816cf89216627c68e22ea5b5b729 Mon Sep 17 00:00:00 2001
From: Yang Xiufeng <yangxiufeng.c@gmail.com>
Date: Thu, 16 Jan 2025 14:05:48 +0800
Subject: [PATCH 21/33] fix: copy into table collect files twice some times.
 (#17300)

* fix: copy into table collect files twice some times.

* add log for infer schema
---
 src/query/sql/src/planner/binder/copy_into_table.rs         | 2 ++
 src/query/sql/src/planner/plans/copy_into_table.rs          | 6 ++++++
 .../storages/parquet/src/parquet_rs/parquet_table/table.rs  | 5 ++++-
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/query/sql/src/planner/binder/copy_into_table.rs b/src/query/sql/src/planner/binder/copy_into_table.rs
index c59d9baede093..fd1a0d6fb6655 100644
--- a/src/query/sql/src/planner/binder/copy_into_table.rs
+++ b/src/query/sql/src/planner/binder/copy_into_table.rs
@@ -223,6 +223,7 @@ impl Binder {
             write_mode: CopyIntoTableMode::Copy,
             query: None,
             enable_distributed: false,
+            files_collected: false,
         })
     }
 
@@ -400,6 +401,7 @@ impl Binder {
 
             enable_distributed: false,
             is_transform: false,
+            files_collected: true,
         };
 
         self.bind_copy_into_table_from_location(bind_context, plan)
diff --git a/src/query/sql/src/planner/plans/copy_into_table.rs b/src/query/sql/src/planner/plans/copy_into_table.rs
index e5a43486e85ea..92325edab46c1 100644
--- a/src/query/sql/src/planner/plans/copy_into_table.rs
+++ b/src/query/sql/src/planner/plans/copy_into_table.rs
@@ -137,10 +137,16 @@ pub struct CopyIntoTablePlan {
     pub is_transform: bool,
 
     pub enable_distributed: bool,
+
+    pub files_collected: bool,
 }
 
 impl CopyIntoTablePlan {
     pub async fn collect_files(&mut self, ctx: &dyn TableContext) -> Result<()> {
+        if self.files_collected {
+            return Ok(());
+        }
+        self.files_collected = true;
         ctx.set_status_info("begin to list files");
         let start = Instant::now();
 
diff --git a/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs b/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs
index 9bbc66eb79860..fd0cae9405d7b 100644
--- a/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs
+++ b/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs
@@ -49,6 +49,7 @@ use databend_common_storage::parquet_rs::read_metadata_async;
 use databend_common_storage::StageFileInfo;
 use databend_common_storage::StageFilesInfo;
 use databend_storages_common_table_meta::table::ChangeType;
+use log::info;
 use opendal::Operator;
 use parquet::file::metadata::ParquetMetaData;
 use parquet::schema::types::SchemaDescPtr;
@@ -174,7 +175,9 @@ impl ParquetRSTable {
         // Infer schema from the first parquet file.
         // Assume all parquet files have the same schema.
         // If not, throw error during reading.
-        let size = operator.stat(path).await?.content_length();
+        let stat = operator.stat(path).await?;
+        let size = stat.content_length();
+        info!("infer schema from file {}, with stat {:?}", path, stat);
         let first_meta = read_metadata_async(path, &operator, Some(size)).await?;
         let arrow_schema = infer_schema_with_extension(first_meta.file_metadata())?;
         let compression_ratio = get_compression_ratio(&first_meta);

From c89e4a322a5d262b307128df131891ac1cffc41b Mon Sep 17 00:00:00 2001
From: sundyli <543950155@qq.com>
Date: Thu, 16 Jan 2025 15:42:08 +0800
Subject: [PATCH 22/33] refactor(query): record state_offsets to reduce the
 memory leak of agg (#17285)

* agg leak

* fix(query): record state_offsets to reduce the memory leak of agg

* fix(query): record state_offsets to reduce the memory leak of agg

* fix(query): record state_offsets to reduce the memory leak of agg

* fix(query): record state_offsets to reduce the memory leak of agg

* fix(query): revert to StackHashSet
---
 .../src/dictionary_string_hashtable.rs        | 10 ++--
 .../hashtable/src/hashjoin_hashtable.rs       |  4 +-
 .../src/hashjoin_string_hashtable.rs          |  4 +-
 src/common/hashtable/src/hashtable.rs         |  4 +-
 src/common/hashtable/src/lookup_hashtable.rs  | 10 ++--
 .../hashtable/src/short_string_hashtable.rs   |  4 +-
 src/common/hashtable/src/stack_hashtable.rs   |  4 +-
 src/common/hashtable/src/string_hashtable.rs  |  4 +-
 src/query/expression/src/aggregate/payload.rs | 46 ++++++++++++++++---
 .../aggregates/aggregate_distinct_state.rs    |  7 +--
 10 files changed, 68 insertions(+), 29 deletions(-)

diff --git a/src/common/hashtable/src/dictionary_string_hashtable.rs b/src/common/hashtable/src/dictionary_string_hashtable.rs
index 5ae808a11b8f8..cfde4dc66c2b0 100644
--- a/src/common/hashtable/src/dictionary_string_hashtable.rs
+++ b/src/common/hashtable/src/dictionary_string_hashtable.rs
@@ -20,7 +20,7 @@ use std::ptr::NonNull;
 use std::sync::Arc;
 
 use bumpalo::Bump;
-use databend_common_base::mem_allocator::MmapAllocator;
+use databend_common_base::mem_allocator::DefaultAllocator;
 
 use crate::container::Container;
 use crate::container::HeapContainer;
@@ -102,7 +102,7 @@ pub struct DictionaryStringHashTable<V> {
     arena: Arc<Bump>,
     dict_keys: usize,
     entries_len: usize,
-    pub(crate) entries: HeapContainer<DictionaryEntry<V>, MmapAllocator>,
+    pub(crate) entries: HeapContainer<DictionaryEntry<V>, DefaultAllocator>,
     pub dictionary_hashset: StringHashSet<[u8]>,
 }
 
@@ -116,7 +116,7 @@ impl<V> DictionaryStringHashTable<V> {
             arena: bump.clone(),
             dict_keys,
             entries_len: 0,
-            entries: unsafe { HeapContainer::new_zeroed(256, MmapAllocator::default()) },
+            entries: unsafe { HeapContainer::new_zeroed(256, DefaultAllocator {}) },
             dictionary_hashset: StringHashSet::new(bump),
         }
     }
@@ -452,7 +452,7 @@ impl<V> HashtableLike for DictionaryStringHashTable<V> {
                 }
             }
 
-            self.entries = HeapContainer::new_zeroed(0, MmapAllocator::default());
+            self.entries = HeapContainer::new_zeroed(0, DefaultAllocator {});
         }
 
         self.dictionary_hashset.clear();
@@ -615,7 +615,7 @@ impl<'a, V> Iterator for DictionaryTableMutIter<'a, V> {
 }
 
 pub struct DictionarySlotIter<'a> {
-    empty: Option<&'a TableEmpty<(), MmapAllocator>>,
+    empty: Option<&'a TableEmpty<(), DefaultAllocator>>,
     entities_slice: &'a [Entry<FallbackKey, ()>],
     i: usize,
 }
diff --git a/src/common/hashtable/src/hashjoin_hashtable.rs b/src/common/hashtable/src/hashjoin_hashtable.rs
index 12a4c1ac20c9c..0fec13dc8a4d2 100644
--- a/src/common/hashtable/src/hashjoin_hashtable.rs
+++ b/src/common/hashtable/src/hashjoin_hashtable.rs
@@ -17,7 +17,7 @@ use std::marker::PhantomData;
 use std::sync::atomic::AtomicU64;
 use std::sync::atomic::Ordering;
 
-use databend_common_base::mem_allocator::MmapAllocator;
+use databend_common_base::mem_allocator::DefaultAllocator;
 use databend_common_column::bitmap::Bitmap;
 
 use super::traits::HashJoinHashtableLike;
@@ -101,7 +101,7 @@ pub fn hash_bits() -> u32 {
     }
 }
 
-pub struct HashJoinHashTable<K: Keyable, A: Allocator + Clone = MmapAllocator> {
+pub struct HashJoinHashTable<K: Keyable, A: Allocator + Clone = DefaultAllocator> {
     pub(crate) pointers: Box<[u64], A>,
     pub(crate) atomic_pointers: *mut AtomicU64,
     pub(crate) hash_shift: usize,
diff --git a/src/common/hashtable/src/hashjoin_string_hashtable.rs b/src/common/hashtable/src/hashjoin_string_hashtable.rs
index 8fdc7aa13208f..2779bb08f9964 100644
--- a/src/common/hashtable/src/hashjoin_string_hashtable.rs
+++ b/src/common/hashtable/src/hashjoin_string_hashtable.rs
@@ -16,7 +16,7 @@ use std::alloc::Allocator;
 use std::sync::atomic::AtomicU64;
 use std::sync::atomic::Ordering;
 
-use databend_common_base::mem_allocator::MmapAllocator;
+use databend_common_base::mem_allocator::DefaultAllocator;
 use databend_common_column::bitmap::Bitmap;
 
 use super::traits::HashJoinHashtableLike;
@@ -37,7 +37,7 @@ pub struct StringRawEntry {
     pub next: u64,
 }
 
-pub struct HashJoinStringHashTable<A: Allocator + Clone = MmapAllocator> {
+pub struct HashJoinStringHashTable<A: Allocator + Clone = DefaultAllocator> {
     pub(crate) pointers: Box<[u64], A>,
     pub(crate) atomic_pointers: *mut AtomicU64,
     pub(crate) hash_shift: usize,
diff --git a/src/common/hashtable/src/hashtable.rs b/src/common/hashtable/src/hashtable.rs
index 3f5f3055bbf63..6591217a2447c 100644
--- a/src/common/hashtable/src/hashtable.rs
+++ b/src/common/hashtable/src/hashtable.rs
@@ -17,7 +17,7 @@ use std::intrinsics::unlikely;
 use std::iter::TrustedLen;
 use std::mem::MaybeUninit;
 
-use databend_common_base::mem_allocator::MmapAllocator;
+use databend_common_base::mem_allocator::DefaultAllocator;
 
 use super::container::HeapContainer;
 use super::table0::Entry;
@@ -29,7 +29,7 @@ use super::traits::Keyable;
 use super::utils::ZeroEntry;
 use crate::FastHash;
 
-pub struct Hashtable<K, V, A = MmapAllocator>
+pub struct Hashtable<K, V, A = DefaultAllocator>
 where
     K: Keyable,
     A: Allocator + Clone,
diff --git a/src/common/hashtable/src/lookup_hashtable.rs b/src/common/hashtable/src/lookup_hashtable.rs
index bcaf6f44ded4a..19be8634c7983 100644
--- a/src/common/hashtable/src/lookup_hashtable.rs
+++ b/src/common/hashtable/src/lookup_hashtable.rs
@@ -17,13 +17,17 @@ use std::iter::TrustedLen;
 use std::mem;
 use std::mem::MaybeUninit;
 
-use databend_common_base::mem_allocator::MmapAllocator;
+use databend_common_base::mem_allocator::DefaultAllocator;
 
 use crate::table0::Entry;
 use crate::HashtableLike;
 
-pub struct LookupHashtable<K: Sized, const CAPACITY: usize, V, A: Allocator + Clone = MmapAllocator>
-{
+pub struct LookupHashtable<
+    K: Sized,
+    const CAPACITY: usize,
+    V,
+    A: Allocator + Clone = DefaultAllocator,
+> {
     flags: Box<[bool; CAPACITY], A>,
     data: Box<[Entry<K, V>; CAPACITY], A>,
     len: usize,
diff --git a/src/common/hashtable/src/short_string_hashtable.rs b/src/common/hashtable/src/short_string_hashtable.rs
index 9dacd1d424d7a..ab2b95b756c8c 100644
--- a/src/common/hashtable/src/short_string_hashtable.rs
+++ b/src/common/hashtable/src/short_string_hashtable.rs
@@ -21,7 +21,7 @@ use std::ptr::NonNull;
 use std::sync::Arc;
 
 use bumpalo::Bump;
-use databend_common_base::mem_allocator::MmapAllocator;
+use databend_common_base::mem_allocator::DefaultAllocator;
 
 use super::container::HeapContainer;
 use super::table0::Entry;
@@ -39,7 +39,7 @@ use crate::table_empty::TableEmpty;
 use crate::table_empty::TableEmptyIter;
 use crate::table_empty::TableEmptyIterMut;
 
-pub struct ShortStringHashtable<K, V, A = MmapAllocator>
+pub struct ShortStringHashtable<K, V, A = DefaultAllocator>
 where
     K: UnsizedKeyable + ?Sized,
     A: Allocator + Clone,
diff --git a/src/common/hashtable/src/stack_hashtable.rs b/src/common/hashtable/src/stack_hashtable.rs
index 36dc1d070ae25..5be8ebbb649b0 100644
--- a/src/common/hashtable/src/stack_hashtable.rs
+++ b/src/common/hashtable/src/stack_hashtable.rs
@@ -16,7 +16,7 @@ use std::alloc::Allocator;
 use std::intrinsics::unlikely;
 use std::mem::MaybeUninit;
 
-use databend_common_base::mem_allocator::MmapAllocator;
+use databend_common_base::mem_allocator::DefaultAllocator;
 
 use super::container::StackContainer;
 use super::table0::Entry;
@@ -25,7 +25,7 @@ use super::table0::Table0Iter;
 use super::traits::Keyable;
 use super::utils::ZeroEntry;
 
-pub struct StackHashtable<K, V, const N: usize = 16, A = MmapAllocator>
+pub struct StackHashtable<K, V, const N: usize = 16, A = DefaultAllocator>
 where
     K: Keyable,
     A: Allocator + Clone,
diff --git a/src/common/hashtable/src/string_hashtable.rs b/src/common/hashtable/src/string_hashtable.rs
index 076c700bfbb32..83a86106f500c 100644
--- a/src/common/hashtable/src/string_hashtable.rs
+++ b/src/common/hashtable/src/string_hashtable.rs
@@ -19,7 +19,7 @@ use std::mem::MaybeUninit;
 use std::sync::Arc;
 
 use bumpalo::Bump;
-use databend_common_base::mem_allocator::MmapAllocator;
+use databend_common_base::mem_allocator::DefaultAllocator;
 
 use super::container::HeapContainer;
 use super::table0::Entry;
@@ -38,7 +38,7 @@ use crate::table_empty::TableEmptyIterMut;
 /// Simple unsized hashtable is used for storing unsized keys in arena. It can be worked with HashMethodSerializer.
 /// Different from `ShortStringHashTable`, it doesn't use adaptive sub hashtable to store key values via key size.
 /// It can be considered as a minimal hashtable implementation of ShortStringHashTable
-pub struct StringHashtable<K, V, A = MmapAllocator>
+pub struct StringHashtable<K, V, A = DefaultAllocator>
 where
     K: UnsizedKeyable + ?Sized,
     A: Allocator + Clone,
diff --git a/src/query/expression/src/aggregate/payload.rs b/src/query/expression/src/aggregate/payload.rs
index f08e61280f5cc..ad76f6dfe5927 100644
--- a/src/query/expression/src/aggregate/payload.rs
+++ b/src/query/expression/src/aggregate/payload.rs
@@ -19,6 +19,7 @@ use std::sync::Arc;
 use bumpalo::Bump;
 use databend_common_base::runtime::drop_guard;
 use itertools::Itertools;
+use log::info;
 use strength_reduce::StrengthReducedU64;
 
 use super::payload_row::rowformat_size;
@@ -78,10 +79,18 @@ unsafe impl Sync for Payload {}
 pub struct Page {
     pub(crate) data: Vec<MaybeUninit<u8>>,
     pub(crate) rows: usize,
-    pub(crate) state_rows: usize,
+    // state_offset = state_rows * agg_len
+    // which mark that the offset to clean the agg states
+    pub(crate) state_offsets: usize,
     pub(crate) capacity: usize,
 }
 
+impl Page {
+    pub fn is_partial_state(&self, agg_len: usize) -> bool {
+        self.rows * agg_len != self.state_offsets
+    }
+}
+
 pub type Pages = Vec<Page>;
 
 // TODO FIXME
@@ -179,7 +188,7 @@ impl Payload {
                 self.pages.push(Page {
                     data,
                     rows: 0,
-                    state_rows: 0,
+                    state_offsets: 0,
                     capacity: self.row_per_page,
                 });
             }
@@ -300,10 +309,18 @@ impl Payload {
                 }
 
                 let place = StateAddr::from(place);
+                let page = &mut self.pages[page_index[idx]];
                 for (aggr, offset) in self.aggrs.iter().zip(self.state_addr_offsets.iter()) {
                     aggr.init_state(place.next(*offset));
+                    page.state_offsets += 1;
+                }
+            }
+
+            #[cfg(debug_assertions)]
+            {
+                for page in self.pages.iter() {
+                    assert_eq!(page.rows * self.aggrs.len(), page.state_offsets);
                 }
-                self.pages[page_index[idx]].state_rows += 1;
             }
         }
 
@@ -338,6 +355,7 @@ impl Payload {
         address: &[*const u8],
     ) {
         let tuple_size = self.tuple_size;
+        let agg_len = self.aggrs.len();
         let (mut page, _) = self.writable_page();
         for i in 0..row_count {
             let index = select_vector[i];
@@ -350,7 +368,7 @@ impl Payload {
                 )
             }
             page.rows += 1;
-            page.state_rows += 1;
+            page.state_offsets += agg_len;
 
             if page.rows == page.capacity {
                 (page, _) = self.writable_page();
@@ -421,10 +439,26 @@ impl Drop for Payload {
         drop_guard(move || {
             // drop states
             if !self.state_move_out {
-                for (aggr, addr_offset) in self.aggrs.iter().zip(self.state_addr_offsets.iter()) {
+                'FOR: for (idx, (aggr, addr_offset)) in self
+                    .aggrs
+                    .iter()
+                    .zip(self.state_addr_offsets.iter())
+                    .enumerate()
+                {
                     if aggr.need_manual_drop_state() {
                         for page in self.pages.iter() {
-                            for row in 0..page.state_rows {
+                            let is_partial_state = page.is_partial_state(self.aggrs.len());
+
+                            if is_partial_state && idx == 0 {
+                                info!("Cleaning partial page, state_offsets: {}, row: {}, agg length: {}", page.state_offsets, page.rows, self.aggrs.len());
+                            }
+                            for row in 0..page.state_offsets.div_ceil(self.aggrs.len()) {
+                                // When OOM, some states are not initialized, we don't need to destroy them
+                                if is_partial_state
+                                    && row * self.aggrs.len() + idx >= page.state_offsets
+                                {
+                                    continue 'FOR;
+                                }
                                 let ptr = self.data_ptr(page, row);
                                 unsafe {
                                     let state_addr =
diff --git a/src/query/functions/src/aggregates/aggregate_distinct_state.rs b/src/query/functions/src/aggregates/aggregate_distinct_state.rs
index 2c825d2bcc258..f15b4200749e8 100644
--- a/src/query/functions/src/aggregates/aggregate_distinct_state.rs
+++ b/src/query/functions/src/aggregates/aggregate_distinct_state.rs
@@ -41,6 +41,7 @@ use databend_common_hashtable::HashSet as CommonHashSet;
 use databend_common_hashtable::HashtableKeyable;
 use databend_common_hashtable::HashtableLike;
 use databend_common_hashtable::ShortStringHashSet;
+use databend_common_hashtable::StackHashSet;
 use databend_common_io::prelude::*;
 use siphasher::sip128::Hasher128;
 use siphasher::sip128::SipHasher24;
@@ -318,13 +319,13 @@ where T: Number + BorshSerialize + BorshDeserialize + HashtableKeyable
 
 // For count(distinct string) and uniq(string)
 pub struct AggregateUniqStringState {
-    set: CommonHashSet<u128>,
+    set: StackHashSet<u128>,
 }
 
 impl DistinctStateFunc for AggregateUniqStringState {
     fn new() -> Self {
         AggregateUniqStringState {
-            set: CommonHashSet::new(),
+            set: StackHashSet::new(),
         }
     }
 
@@ -338,7 +339,7 @@ impl DistinctStateFunc for AggregateUniqStringState {
 
     fn deserialize(reader: &mut &[u8]) -> Result<Self> {
         let size = reader.read_uvarint()?;
-        let mut set = CommonHashSet::with_capacity(size as usize);
+        let mut set = StackHashSet::with_capacity(size as usize);
         for _ in 0..size {
             let e = borsh_deserialize_state(reader)?;
             let _ = set.set_insert(e).is_ok();

From 94c96f91331d77195fb9548326f51d122a257cc5 Mon Sep 17 00:00:00 2001
From: Sky Fan <3374614481@qq.com>
Date: Thu, 16 Jan 2025 19:20:51 +0800
Subject: [PATCH 23/33] chore: make error message of conflicting transactions
 more user-friendly (#17307)

---
 src/query/catalog/src/catalog/interface.rs | 26 +++++++++++++---------
 src/query/sql/src/planner/binder/binder.rs | 10 +++++----
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/src/query/catalog/src/catalog/interface.rs b/src/query/catalog/src/catalog/interface.rs
index ea36dde14df0d..b5d4ca067dc1e 100644
--- a/src/query/catalog/src/catalog/interface.rs
+++ b/src/query/catalog/src/catalog/interface.rs
@@ -110,6 +110,7 @@ use databend_common_meta_types::SeqV;
 use databend_storages_common_session::SessionState;
 use databend_storages_common_table_meta::table::OPT_KEY_TEMP_PREFIX;
 use dyn_clone::DynClone;
+use log::info;
 
 use crate::database::Database;
 use crate::table::Table;
@@ -388,16 +389,21 @@ pub trait Catalog: DynClone + Send + Sync + Debug {
         &self,
         req: UpdateMultiTableMetaReq,
     ) -> Result<UpdateTableMetaReply> {
-        self.retryable_update_multi_table_meta(req)
-            .await?
-            .map_err(|e| {
-                ErrorCode::TableVersionMismatched(format!(
-                    "Fail to update table metas, conflict tables: {:?}",
-                    e.iter()
-                        .map(|(tid, seq, meta)| (tid, seq, &meta.engine))
-                        .collect::<Vec<_>>()
-                ))
-            })
+        let result = self.retryable_update_multi_table_meta(req).await?;
+        match result {
+            Ok(reply) => Ok(reply),
+            Err(failed_tables) => {
+                let err_msg = format!(
+                    "Due to concurrent transactions, transaction commit failed. Conflicting table IDs: {:?}",
+                    failed_tables.iter().map(|(tid, _, _)| tid).collect::<Vec<_>>()
+                );
+                info!(
+                    "Due to concurrent transactions, transaction commit failed. Conflicting tables: {:?}",
+                    failed_tables
+                );
+                Err(ErrorCode::TableVersionMismatched(err_msg))
+            }
+        }
     }
 
     // update stream metas, currently used by "copy into location form stream"
diff --git a/src/query/sql/src/planner/binder/binder.rs b/src/query/sql/src/planner/binder/binder.rs
index ede8956c0f3fd..a995023cceb07 100644
--- a/src/query/sql/src/planner/binder/binder.rs
+++ b/src/query/sql/src/planner/binder/binder.rs
@@ -1059,10 +1059,12 @@ pub async fn execute_commit_statement(ctx: Arc<dyn TableContext>) -> Result<()>
             }
             Err(e) => {
                 let err_msg = format!(
-                    "COMMIT: Table versions mismatched in multi statement transaction, conflict tables: {:?}",
-                    e.iter()
-                        .map(|(tid, seq, meta)| (tid, seq, &meta.engine))
-                        .collect::<Vec<_>>()
+                    "Due to concurrent transactions, explicit transaction commit failed. Conflicting table IDs: {:?}",
+                    e.iter().map(|(tid, _, _)| tid).collect::<Vec<_>>()
+                );
+                info!(
+                    "Due to concurrent transactions, explicit transaction commit failed. Conflicting table IDs: {:?}",
+                    e
                 );
                 return Err(ErrorCode::TableVersionMismatched(err_msg));
             }

From a37a79018d006a9acdfbd8b1f7258776908d421a Mon Sep 17 00:00:00 2001
From: Liuqing Yue <dqhl76@gmail.com>
Date: Fri, 17 Jan 2025 09:12:09 +0800
Subject: [PATCH 24/33] chore: add enable_prune_cache setting (#17308)

* chore: add enable_prune_cache setting

* chore: add enable_prune_cache setting
---
 src/query/settings/src/settings_default.rs    |  8 +++
 .../settings/src/settings_getter_setter.rs    |  4 ++
 .../fuse/src/operations/read_partitions.rs    | 69 ++++++++++---------
 .../pruning_pipeline/send_part_info_sink.rs   |  8 ++-
 4 files changed, 56 insertions(+), 33 deletions(-)

diff --git a/src/query/settings/src/settings_default.rs b/src/query/settings/src/settings_default.rs
index ea88aa5a1506a..aa3d2da50b073 100644
--- a/src/query/settings/src/settings_default.rs
+++ b/src/query/settings/src/settings_default.rs
@@ -1172,6 +1172,14 @@ impl DefaultSettings {
                     scope: SettingScope::Both,
                     range: Some(SettingRange::Numeric(1..=u64::MAX)),
                 }),
+                ("enable_prune_cache", DefaultSettingValue {
+                    value: UserSettingValue::UInt64(1),
+                    desc: "Enable to cache the pruning result",
+                    mode: SettingMode::Both,
+                    scope: SettingScope::Both,
+                    range: Some(SettingRange::Numeric(0..=1)),
+                }),
+
             ]);
 
             Ok(Arc::new(DefaultSettings {
diff --git a/src/query/settings/src/settings_getter_setter.rs b/src/query/settings/src/settings_getter_setter.rs
index 9b9608fcc79b3..a7524758608c3 100644
--- a/src/query/settings/src/settings_getter_setter.rs
+++ b/src/query/settings/src/settings_getter_setter.rs
@@ -829,6 +829,10 @@ impl Settings {
         Ok(self.try_get_u64("enable_prune_pipeline")? == 1)
     }
 
+    pub fn get_enable_prune_cache(&self) -> Result<bool> {
+        Ok(self.try_get_u64("enable_prune_cache")? == 1)
+    }
+
     pub fn get_enable_distributed_pruning(&self) -> Result<bool> {
         Ok(self.try_get_u64("enable_distributed_pruning")? == 1)
     }
diff --git a/src/query/storages/fuse/src/operations/read_partitions.rs b/src/query/storages/fuse/src/operations/read_partitions.rs
index e0a6d017d65e5..aeaeb7fac99d1 100644
--- a/src/query/storages/fuse/src/operations/read_partitions.rs
+++ b/src/query/storages/fuse/src/operations/read_partitions.rs
@@ -206,34 +206,37 @@ impl FuseTable {
                     )
                 });
 
-        if let Some((_stat, part)) = Self::check_prune_cache(&derterministic_cache_key) {
-            let sender = part_info_tx.clone();
-            info!("prune pipeline: get prune result from cache");
-            source_pipeline.set_on_init(move || {
-                // We cannot use the runtime associated with the query to avoid increasing its lifetime.
-                GlobalIORuntime::instance().spawn(async move {
-                    // avoid block global io runtime
-                    let runtime = Runtime::with_worker_threads(2, Some("send-parts".to_string()))?;
-
-                    let join_handler = runtime.spawn(async move {
-                        for part in part.partitions {
-                            // the sql may be killed or early stop, ignore the error
-                            if let Err(_e) = sender.send(Ok(part)).await {
-                                break;
+        if ctx.get_settings().get_enable_prune_cache()? {
+            if let Some((_stat, part)) = Self::check_prune_cache(&derterministic_cache_key) {
+                let sender = part_info_tx.clone();
+                info!("prune pipeline: get prune result from cache");
+                source_pipeline.set_on_init(move || {
+                    // We cannot use the runtime associated with the query to avoid increasing its lifetime.
+                    GlobalIORuntime::instance().spawn(async move {
+                        // avoid block global io runtime
+                        let runtime =
+                            Runtime::with_worker_threads(2, Some("send-parts".to_string()))?;
+
+                        let join_handler = runtime.spawn(async move {
+                            for part in part.partitions {
+                                // the sql may be killed or early stop, ignore the error
+                                if let Err(_e) = sender.send(Ok(part)).await {
+                                    break;
+                                }
                             }
+                        });
+
+                        if let Err(cause) = join_handler.await {
+                            log::warn!("Join error while in prune pipeline, cause: {:?}", cause);
                         }
-                    });
 
-                    if let Err(cause) = join_handler.await {
-                        log::warn!("Join error while in prune pipeline, cause: {:?}", cause);
-                    }
+                        Result::Ok(())
+                    });
 
-                    Result::Ok(())
+                    Ok(())
                 });
-
-                Ok(())
-            });
-            return Ok(None);
+                return Ok(None);
+            }
         }
 
         let mut prune_pipeline = Pipeline::create();
@@ -426,7 +429,7 @@ impl FuseTable {
             .as_ref()
             .filter(|p| p.order_by.is_empty() && p.filters.is_none())
             .and_then(|p| p.limit);
-
+        let enable_prune_cache = ctx.get_settings().get_enable_prune_cache()?;
         let send_part_state = Arc::new(SendPartState::create(
             derterministic_cache_key,
             limit,
@@ -441,16 +444,20 @@ impl FuseTable {
                 top_k.clone(),
                 pruner.table_schema.clone(),
                 send_part_state.clone(),
+                enable_prune_cache,
             )
         })?;
 
-        prune_pipeline.set_on_finished(move |info: &ExecutionInfo| {
-            if let Ok(()) = info.res {
-                // only populating cache when the pipeline is finished successfully
-                send_part_state.populating_cache();
-            }
-            Ok(())
-        });
+        if enable_prune_cache {
+            info!("prune pipeline: enable prune cache");
+            prune_pipeline.set_on_finished(move |info: &ExecutionInfo| {
+                if let Ok(()) = info.res {
+                    // only populating cache when the pipeline is finished successfully
+                    send_part_state.populating_cache();
+                }
+                Ok(())
+            });
+        }
 
         Ok(())
     }
diff --git a/src/query/storages/fuse/src/pruning_pipeline/send_part_info_sink.rs b/src/query/storages/fuse/src/pruning_pipeline/send_part_info_sink.rs
index c0a97d821dabf..dfbaa569d1c00 100644
--- a/src/query/storages/fuse/src/pruning_pipeline/send_part_info_sink.rs
+++ b/src/query/storages/fuse/src/pruning_pipeline/send_part_info_sink.rs
@@ -125,6 +125,7 @@ pub struct SendPartInfoSink {
     partitions: Partitions,
     statistics: PartStatistics,
     send_part_state: Arc<SendPartState>,
+    enable_cache: bool,
 }
 
 impl SendPartInfoSink {
@@ -135,6 +136,7 @@ impl SendPartInfoSink {
         top_k: Option<(TopK, Scalar)>,
         schema: TableSchemaRef,
         send_part_state: Arc<SendPartState>,
+        enable_cache: bool,
     ) -> Result<ProcessorPtr> {
         let partitions = Partitions::default();
         let statistics = PartStatistics::default();
@@ -148,6 +150,7 @@ impl SendPartInfoSink {
                 partitions,
                 statistics,
                 send_part_state,
+                enable_cache,
             },
         )))
     }
@@ -184,8 +187,9 @@ impl AsyncSink for SendPartInfoSink {
                         ),
                     },
                 };
-
-                self.partitions.partitions.extend(info_ptr.clone());
+                if self.enable_cache {
+                    self.partitions.partitions.extend(info_ptr.clone());
+                }
 
                 for info in info_ptr {
                     if let Some(sender) = &self.sender {

From 31c243bbfb5c049d5f31836ed44a6a087f19a50c Mon Sep 17 00:00:00 2001
From: Yang Xiufeng <yangxiufeng.c@gmail.com>
Date: Fri, 17 Jan 2025 09:18:02 +0800
Subject: [PATCH 25/33] fix: set query_kind earlier to ensure it takes effect.
 (#17302)

maybe related to the performance regression reported in issue 17284.
---
 src/query/sql/src/planner/planner.rs             | 13 ++++++++-----
 .../src/parquet_rs/parquet_table/table.rs        | 16 ++++++++++++----
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/src/query/sql/src/planner/planner.rs b/src/query/sql/src/planner/planner.rs
index 62126bca834b7..5eeb3c9455b35 100644
--- a/src/query/sql/src/planner/planner.rs
+++ b/src/query/sql/src/planner/planner.rs
@@ -223,6 +223,7 @@ impl Planner {
     #[fastrace::trace]
     pub async fn plan_stmt(&mut self, stmt: &Statement, attach_query: bool) -> Result<Plan> {
         let start = Instant::now();
+        let query_kind = get_query_kind(stmt);
         let settings = self.ctx.get_settings();
         // Step 3: Bind AST with catalog, and generate a pure logical SExpr
         let name_resolution_ctx = NameResolutionContext::try_from(settings.as_ref())?;
@@ -243,8 +244,7 @@ impl Planner {
                 info!("logical plan from cache, time used: {:?}", start.elapsed());
                 if attach_query {
                     // update for clickhouse handler
-                    self.ctx
-                        .attach_query_str(get_query_kind(stmt), stmt.to_mask_sql());
+                    self.ctx.attach_query_str(query_kind, stmt.to_mask_sql());
                 }
                 return Ok(plan.plan);
             }
@@ -260,11 +260,14 @@ impl Planner {
         )
         .with_subquery_executor(self.query_executor.clone());
 
-        // Indicate binder there is no need to collect column statistics for the binding table.
+        // must attach before bind, because ParquetRSTable::create used it.
+        if attach_query {
+            self.ctx.attach_query_str(query_kind, stmt.to_mask_sql());
+        }
         let plan = binder.bind(stmt).await?;
+        // attach again to avoid the query kind is overwritten by the subquery
         if attach_query {
-            self.ctx
-                .attach_query_str(get_query_kind(stmt), stmt.to_mask_sql());
+            self.ctx.attach_query_str(query_kind, stmt.to_mask_sql());
         }
 
         // Step 4: Optimize the SExpr with optimizers, and generate optimized physical SExpr
diff --git a/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs b/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs
index fd0cae9405d7b..f06a765f02b22 100644
--- a/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs
+++ b/src/query/storages/parquet/src/parquet_rs/parquet_table/table.rs
@@ -34,6 +34,7 @@ use databend_common_catalog::table::DummyColumnStatisticsProvider;
 use databend_common_catalog::table::Table;
 use databend_common_catalog::table::TableStatistics;
 use databend_common_catalog::table_context::TableContext;
+use databend_common_exception::ErrorCode;
 use databend_common_exception::Result;
 use databend_common_expression::TableField;
 use databend_common_expression::TableSchema;
@@ -141,10 +142,17 @@ impl ParquetRSTable {
 
         // If the query is `COPY`, we don't need to collect column statistics.
         // It's because the only transform could be contained in `COPY` command is projection.
-        let need_stats_provider = !matches!(
-            query_kind,
-            QueryKind::CopyIntoTable | QueryKind::CopyIntoLocation
-        );
+        let need_stats_provider = match query_kind {
+            QueryKind::CopyIntoTable | QueryKind::CopyIntoLocation => true,
+            QueryKind::Unknown => {
+                // add this branch to ensure query_kind is set
+                return Err(ErrorCode::Internal(
+                    "Unexpected QueryKind::Unknown: query_kind was not properly set before calling ParquetRSTable::create.",
+                ));
+            }
+            _ => false,
+        };
+
         let max_threads = settings.get_max_threads()? as usize;
         let max_memory_usage = settings.get_max_memory_usage()?;
 

From e6e3aeccc523650e996f72525bde6720768fd9d8 Mon Sep 17 00:00:00 2001
From: Yang Xiufeng <yangxiufeng.c@gmail.com>
Date: Fri, 17 Jan 2025 09:26:55 +0800
Subject: [PATCH 26/33] fix: use full path as cache key for parquet meta.
 (#17313)

---
 .../storages/parquet/src/parquet_rs/meta.rs   | 23 +++++++++----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/query/storages/parquet/src/parquet_rs/meta.rs b/src/query/storages/parquet/src/parquet_rs/meta.rs
index 52e575291fe99..c57b7283ef81a 100644
--- a/src/query/storages/parquet/src/parquet_rs/meta.rs
+++ b/src/query/storages/parquet/src/parquet_rs/meta.rs
@@ -37,9 +37,11 @@ pub async fn read_metadata_async_cached(
     operator: &Operator,
     file_size: Option<u64>,
 ) -> Result<Arc<ParquetMetaData>> {
-    let reader = MetaReader::meta_data_reader(operator.clone());
+    let info = operator.info();
+    let location = format!("{}/{}/{}", info.name(), info.root(), path);
+    let reader = MetaReader::meta_data_reader(operator.clone(), location.len() - path.len());
     let load_params = LoadParams {
-        location: path.to_owned(),
+        location,
         len_hint: file_size,
         ver: 0,
         put_cache: true,
@@ -246,15 +248,15 @@ fn check_memory_usage(max_memory_usage: u64) -> Result<()> {
     Ok(())
 }
 
-pub struct LoaderWrapper<T>(T);
+pub struct LoaderWrapper<T>(T, usize);
 pub type ParquetMetaReader = InMemoryItemCacheReader<ParquetMetaData, LoaderWrapper<Operator>>;
 
 pub struct MetaReader;
 impl MetaReader {
-    pub fn meta_data_reader(dal: Operator) -> ParquetMetaReader {
+    pub fn meta_data_reader(dal: Operator, prefix_len: usize) -> ParquetMetaReader {
         ParquetMetaReader::new(
             CacheManager::instance().get_parquet_meta_data_cache(),
-            LoaderWrapper(dal),
+            LoaderWrapper(dal, prefix_len),
         )
     }
 }
@@ -263,15 +265,12 @@ impl MetaReader {
 impl Loader<ParquetMetaData> for LoaderWrapper<Operator> {
     #[async_backtrace::framed]
     async fn load(&self, params: &LoadParams) -> Result<ParquetMetaData> {
+        let location = &params.location[self.1..];
         let size = match params.len_hint {
             Some(v) => v,
-            None => self.0.stat(&params.location).await?.content_length(),
+            None => self.0.stat(location).await?.content_length(),
         };
-        databend_common_storage::parquet_rs::read_metadata_async(
-            &params.location,
-            &self.0,
-            Some(size),
-        )
-        .await
+        databend_common_storage::parquet_rs::read_metadata_async(location, &self.0, Some(size))
+            .await
     }
 }

From 37d96b2fd3a0dae957b68fd24a21b5dce9194f20 Mon Sep 17 00:00:00 2001
From: sundyli <543950155@qq.com>
Date: Fri, 17 Jan 2025 14:57:31 +0800
Subject: [PATCH 27/33] chore(query): improve fuse row fetch, use accumulating
 (#17309)

* fix(query): improve fuse row fetch, use accumulating

* fix(query): improve fuse row fetch, use accumulating

* fix(query): improve fuse row fetch, use accumulating

* fix(query): add extra log
---
 .../transform_accumulating_async.rs           | 16 +++++++
 .../src/operations/read/fuse_rows_fetcher.rs  | 47 ++++++++++++-------
 .../operations/read/native_rows_fetcher.rs    |  5 --
 .../operations/read/parquet_rows_fetcher.rs   |  5 --
 4 files changed, 46 insertions(+), 27 deletions(-)

diff --git a/src/query/pipeline/transforms/src/processors/transforms/transform_accumulating_async.rs b/src/query/pipeline/transforms/src/processors/transforms/transform_accumulating_async.rs
index d4e4455595b9e..ad8c53fdb3f62 100644
--- a/src/query/pipeline/transforms/src/processors/transforms/transform_accumulating_async.rs
+++ b/src/query/pipeline/transforms/src/processors/transforms/transform_accumulating_async.rs
@@ -26,6 +26,10 @@ use databend_common_pipeline_core::processors::Processor;
 pub trait AsyncAccumulatingTransform: Send {
     const NAME: &'static str;
 
+    async fn on_start(&mut self) -> Result<()> {
+        Ok(())
+    }
+
     async fn transform(&mut self, data: DataBlock) -> Result<Option<DataBlock>>;
 
     async fn on_finish(&mut self, _output: bool) -> Result<Option<DataBlock>> {
@@ -38,6 +42,7 @@ pub struct AsyncAccumulatingTransformer<T: AsyncAccumulatingTransform + 'static>
     input: Arc<InputPort>,
     output: Arc<OutputPort>,
 
+    called_on_start: bool,
     called_on_finish: bool,
     input_data: Option<DataBlock>,
     output_data: Option<DataBlock>,
@@ -51,6 +56,7 @@ impl<T: AsyncAccumulatingTransform + 'static> AsyncAccumulatingTransformer<T> {
             output,
             input_data: None,
             output_data: None,
+            called_on_start: false,
             called_on_finish: false,
         })
     }
@@ -67,6 +73,10 @@ impl<T: AsyncAccumulatingTransform + 'static> Processor for AsyncAccumulatingTra
     }
 
     fn event(&mut self) -> Result<Event> {
+        if !self.called_on_start {
+            return Ok(Event::Async);
+        }
+
         if self.output.is_finished() {
             if !self.called_on_finish {
                 return Ok(Event::Async);
@@ -111,6 +121,12 @@ impl<T: AsyncAccumulatingTransform + 'static> Processor for AsyncAccumulatingTra
 
     #[async_backtrace::framed]
     async fn async_process(&mut self) -> Result<()> {
+        if !self.called_on_start {
+            self.called_on_start = true;
+            self.inner.on_start().await?;
+            return Ok(());
+        }
+
         if let Some(data_block) = self.input_data.take() {
             self.output_data = self.inner.transform(data_block).await?;
             return Ok(());
diff --git a/src/query/storages/fuse/src/operations/read/fuse_rows_fetcher.rs b/src/query/storages/fuse/src/operations/read/fuse_rows_fetcher.rs
index 97b32725f5284..e0ec3424b6161 100644
--- a/src/query/storages/fuse/src/operations/read/fuse_rows_fetcher.rs
+++ b/src/query/storages/fuse/src/operations/read/fuse_rows_fetcher.rs
@@ -25,15 +25,13 @@ use databend_common_expression::types::DataType;
 use databend_common_expression::types::NumberDataType;
 use databend_common_expression::BlockEntry;
 use databend_common_expression::Column;
-use databend_common_expression::ColumnBuilder;
 use databend_common_expression::DataBlock;
-use databend_common_expression::DataSchema;
 use databend_common_expression::Value;
 use databend_common_pipeline_core::processors::InputPort;
 use databend_common_pipeline_core::processors::OutputPort;
 use databend_common_pipeline_core::processors::ProcessorPtr;
-use databend_common_pipeline_transforms::processors::AsyncTransform;
-use databend_common_pipeline_transforms::processors::AsyncTransformer;
+use databend_common_pipeline_transforms::AsyncAccumulatingTransform;
+use databend_common_pipeline_transforms::AsyncAccumulatingTransformer;
 use databend_storages_common_io::ReadSettings;
 
 use super::native_rows_fetcher::NativeRowsFetcher;
@@ -132,17 +130,17 @@ pub fn row_fetch_processor(
 pub trait RowsFetcher {
     async fn on_start(&mut self) -> Result<()>;
     async fn fetch(&mut self, row_ids: &[u64]) -> Result<DataBlock>;
-    fn schema(&self) -> DataSchema;
 }
 
 pub struct TransformRowsFetcher<F: RowsFetcher> {
     row_id_col_offset: usize,
     fetcher: F,
     need_wrap_nullable: bool,
+    blocks: Vec<DataBlock>,
 }
 
 #[async_trait::async_trait]
-impl<F> AsyncTransform for TransformRowsFetcher<F>
+impl<F> AsyncAccumulatingTransform for TransformRowsFetcher<F>
 where F: RowsFetcher + Send + Sync + 'static
 {
     const NAME: &'static str = "TransformRowsFetcher";
@@ -152,18 +150,25 @@ where F: RowsFetcher + Send + Sync + 'static
         self.fetcher.on_start().await
     }
 
+    async fn transform(&mut self, data: DataBlock) -> Result<Option<DataBlock>> {
+        self.blocks.push(data);
+        Ok(None)
+    }
+
     #[async_backtrace::framed]
-    async fn transform(&mut self, mut data: DataBlock) -> Result<DataBlock> {
+    async fn on_finish(&mut self, _output: bool) -> Result<Option<DataBlock>> {
+        if self.blocks.is_empty() {
+            return Ok(None);
+        }
+
+        let start_time = std::time::Instant::now();
+        let num_blocks = self.blocks.len();
+        let mut data = DataBlock::concat(&self.blocks)?;
+        self.blocks.clear();
+
         let num_rows = data.num_rows();
         if num_rows == 0 {
-            // Although the data block is empty, we need to add empty columns to align the schema.
-            let fetched_schema = self.fetcher.schema();
-            for f in fetched_schema.fields().iter() {
-                let builder = ColumnBuilder::with_capacity(f.data_type(), 0);
-                let col = builder.build();
-                data.add_column(BlockEntry::new(f.data_type().clone(), Value::Column(col)));
-            }
-            return Ok(data);
+            return Ok(None);
         }
 
         let entry = &data.columns()[self.row_id_col_offset];
@@ -189,7 +194,14 @@ where F: RowsFetcher + Send + Sync + 'static
             }
         }
 
-        Ok(data)
+        log::info!(
+            "TransformRowsFetcher on_finish: num_rows: {}, input blocks: {} in {} milliseconds",
+            num_rows,
+            num_blocks,
+            start_time.elapsed().as_millis()
+        );
+
+        Ok(Some(data))
     }
 }
 
@@ -203,10 +215,11 @@ where F: RowsFetcher + Send + Sync + 'static
         fetcher: F,
         need_wrap_nullable: bool,
     ) -> ProcessorPtr {
-        ProcessorPtr::create(AsyncTransformer::create(input, output, Self {
+        ProcessorPtr::create(AsyncAccumulatingTransformer::create(input, output, Self {
             row_id_col_offset,
             fetcher,
             need_wrap_nullable,
+            blocks: vec![],
         }))
     }
 }
diff --git a/src/query/storages/fuse/src/operations/read/native_rows_fetcher.rs b/src/query/storages/fuse/src/operations/read/native_rows_fetcher.rs
index b7e1b16a0e3b4..ca45afc996279 100644
--- a/src/query/storages/fuse/src/operations/read/native_rows_fetcher.rs
+++ b/src/query/storages/fuse/src/operations/read/native_rows_fetcher.rs
@@ -26,7 +26,6 @@ use databend_common_catalog::plan::Projection;
 use databend_common_catalog::table::Table;
 use databend_common_exception::Result;
 use databend_common_expression::DataBlock;
-use databend_common_expression::DataSchema;
 use databend_common_expression::TableSchemaRef;
 use databend_common_storage::ColumnNodes;
 use databend_storages_common_cache::LoadParams;
@@ -148,10 +147,6 @@ impl<const BLOCKING_IO: bool> RowsFetcher for NativeRowsFetcher<BLOCKING_IO> {
 
         Ok(DataBlock::take_blocks(&blocks, &indices, num_rows))
     }
-
-    fn schema(&self) -> DataSchema {
-        self.reader.data_schema()
-    }
 }
 
 impl<const BLOCKING_IO: bool> NativeRowsFetcher<BLOCKING_IO> {
diff --git a/src/query/storages/fuse/src/operations/read/parquet_rows_fetcher.rs b/src/query/storages/fuse/src/operations/read/parquet_rows_fetcher.rs
index 260fe64a36dae..35b8a47ed9aaf 100644
--- a/src/query/storages/fuse/src/operations/read/parquet_rows_fetcher.rs
+++ b/src/query/storages/fuse/src/operations/read/parquet_rows_fetcher.rs
@@ -26,7 +26,6 @@ use databend_common_catalog::table::Table;
 use databend_common_exception::ErrorCode;
 use databend_common_exception::Result;
 use databend_common_expression::DataBlock;
-use databend_common_expression::DataSchema;
 use databend_common_expression::TableSchemaRef;
 use databend_common_storage::ColumnNodes;
 use databend_storages_common_cache::LoadParams;
@@ -158,10 +157,6 @@ impl<const BLOCKING_IO: bool> RowsFetcher for ParquetRowsFetcher<BLOCKING_IO> {
 
         Ok(DataBlock::take_blocks(&blocks, &indices, num_rows))
     }
-
-    fn schema(&self) -> DataSchema {
-        self.reader.data_schema()
-    }
 }
 
 impl<const BLOCKING_IO: bool> ParquetRowsFetcher<BLOCKING_IO> {

From 248ec52b75179c4d6a3350fd031a49ad961d762c Mon Sep 17 00:00:00 2001
From: coldWater <forsaken628@gmail.com>
Date: Fri, 17 Jan 2025 20:38:25 +0800
Subject: [PATCH 28/33] refactor(query): introduce udf runtime pool (#17304)

* udf runtime pool

* fix

* fix

* fix

* test
---
 .../src/pipelines/builders/builder_udf.rs     |   9 +-
 .../processors/transforms/aggregator/mod.rs   |   1 +
 .../transforms/aggregator/udaf_script.rs      | 172 +++++-----
 .../pipelines/processors/transforms/mod.rs    |  14 +-
 .../processors/transforms/runtime_pool.rs     |  54 +++
 .../transforms/transform_udf_script.rs        | 322 +++++++++---------
 .../20_0023_udf_script_parallelism.result     |   3 +
 .../20_0023_udf_script_parallelism.sh         |  19 ++
 ...able.result => 07_0000_amend_table.result} |   0
 ..._amend_table.sh => 07_0000_amend_table.sh} |   0
 .../08_udf_python/08_0000_parallelism.result  |   3 +
 .../5_ee/08_udf_python/08_0000_parallelism.sh |  19 ++
 12 files changed, 356 insertions(+), 260 deletions(-)
 create mode 100644 src/query/service/src/pipelines/processors/transforms/runtime_pool.rs
 create mode 100755 tests/suites/0_stateless/20+_others/20_0023_udf_script_parallelism.result
 create mode 100755 tests/suites/0_stateless/20+_others/20_0023_udf_script_parallelism.sh
 rename tests/suites/5_ee/07_failsafe/{08_0000_amend_table.result => 07_0000_amend_table.result} (100%)
 rename tests/suites/5_ee/07_failsafe/{08_0000_amend_table.sh => 07_0000_amend_table.sh} (100%)
 create mode 100755 tests/suites/5_ee/08_udf_python/08_0000_parallelism.result
 create mode 100755 tests/suites/5_ee/08_udf_python/08_0000_parallelism.sh

diff --git a/src/query/service/src/pipelines/builders/builder_udf.rs b/src/query/service/src/pipelines/builders/builder_udf.rs
index 7045374042eaf..da9b35d5559bf 100644
--- a/src/query/service/src/pipelines/builders/builder_udf.rs
+++ b/src/query/service/src/pipelines/builders/builder_udf.rs
@@ -12,13 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::sync::atomic::AtomicUsize;
-use std::sync::Arc;
-
 use databend_common_exception::Result;
 use databend_common_pipeline_transforms::processors::TransformPipelineHelper;
 use databend_common_sql::executor::physical_plans::Udf;
-use databend_common_storages_fuse::TableContext;
 
 use crate::pipelines::processors::transforms::TransformUdfScript;
 use crate::pipelines::processors::transforms::TransformUdfServer;
@@ -29,15 +25,12 @@ impl PipelineBuilder {
         self.build_pipeline(&udf.input)?;
 
         if udf.script_udf {
-            let index_seq = Arc::new(AtomicUsize::new(0));
-            let runtime_num = self.ctx.get_settings().get_max_threads()? as usize;
-            let runtimes = TransformUdfScript::init_runtime(&udf.udf_funcs, runtime_num)?;
+            let runtimes = TransformUdfScript::init_runtime(&udf.udf_funcs)?;
             self.main_pipeline.try_add_transformer(|| {
                 Ok(TransformUdfScript::new(
                     self.func_ctx.clone(),
                     udf.udf_funcs.clone(),
                     runtimes.clone(),
-                    index_seq.clone(),
                 ))
             })
         } else {
diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/mod.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/mod.rs
index a4dd56da1e1c3..89eaede89b97a 100644
--- a/src/query/service/src/pipelines/processors/transforms/aggregator/mod.rs
+++ b/src/query/service/src/pipelines/processors/transforms/aggregator/mod.rs
@@ -37,3 +37,4 @@ pub use udaf_script::*;
 pub use utils::*;
 
 pub use self::serde::*;
+use super::runtime_pool;
diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/udaf_script.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/udaf_script.rs
index 9cef5d96932dc..0b324b6c1ef63 100644
--- a/src/query/service/src/pipelines/processors/transforms/aggregator/udaf_script.rs
+++ b/src/query/service/src/pipelines/processors/transforms/aggregator/udaf_script.rs
@@ -17,7 +17,6 @@ use std::fmt;
 use std::io::BufRead;
 use std::io::Cursor;
 use std::sync::Arc;
-use std::sync::Mutex;
 
 use arrow_array::Array;
 use arrow_array::RecordBatch;
@@ -40,8 +39,8 @@ use databend_common_functions::aggregates::AggregateFunction;
 use databend_common_sql::plans::UDFLanguage;
 use databend_common_sql::plans::UDFScriptCode;
 
-#[cfg(feature = "python-udf")]
-use super::super::python_udf::GLOBAL_PYTHON_RUNTIME;
+use super::runtime_pool::Pool;
+use super::runtime_pool::RuntimeBuilder;
 
 pub struct AggregateUdfScript {
     display_name: String,
@@ -138,6 +137,15 @@ impl AggregateFunction for AggregateUdfScript {
         builder.append_column(&result);
         Ok(())
     }
+
+    fn need_manual_drop_state(&self) -> bool {
+        true
+    }
+
+    unsafe fn drop_state(&self, place: StateAddr) {
+        let state = place.get::<UdfAggState>();
+        std::ptr::drop_in_place(state);
+    }
 }
 
 impl fmt::Display for AggregateUdfScript {
@@ -244,10 +252,10 @@ pub fn create_udaf_script_function(
     let UDFScriptCode { language, code, .. } = code;
     let runtime = match language {
         UDFLanguage::JavaScript => {
-            let pool = JsRuntimePool::new(
+            let builder = JsRuntimeBuilder {
                 name,
-                String::from_utf8(code.to_vec())?,
-                ArrowType::Struct(
+                code: String::from_utf8(code.to_vec())?,
+                state_type: ArrowType::Struct(
                     state_fields
                         .iter()
                         .map(|f| f.into())
@@ -255,8 +263,8 @@ pub fn create_udaf_script_function(
                         .into(),
                 ),
                 output_type,
-            );
-            UDAFRuntime::JavaScript(pool)
+            };
+            UDAFRuntime::JavaScript(JsRuntimePool::new(builder))
         }
         UDFLanguage::WebAssembly => unimplemented!(),
         #[cfg(not(feature = "python-udf"))]
@@ -267,22 +275,19 @@ pub fn create_udaf_script_function(
         }
         #[cfg(feature = "python-udf")]
         UDFLanguage::Python => {
-            let mut runtime = GLOBAL_PYTHON_RUNTIME.write();
-            let code = String::from_utf8(code.to_vec())?;
-            runtime.add_aggregate(
-                &name,
-                ArrowType::Struct(
+            let builder = python_pool::PyRuntimeBuilder {
+                name,
+                code: String::from_utf8(code.to_vec())?,
+                state_type: ArrowType::Struct(
                     state_fields
                         .iter()
                         .map(|f| f.into())
                         .collect::<Vec<arrow_schema::Field>>()
                         .into(),
                 ),
-                ArrowType::from(&output_type),
-                arrow_udf_python::CallMode::CalledOnNullInput,
-                &code,
-            )?;
-            UDAFRuntime::Python(PythonInfo { name, output_type })
+                output_type,
+            };
+            UDAFRuntime::Python(Pool::new(builder))
         }
     };
     let init_state = runtime
@@ -297,27 +302,17 @@ pub fn create_udaf_script_function(
     }))
 }
 
-struct JsRuntimePool {
+struct JsRuntimeBuilder {
     name: String,
     code: String,
     state_type: ArrowType,
     output_type: DataType,
-
-    runtimes: Mutex<Vec<arrow_udf_js::Runtime>>,
 }
 
-impl JsRuntimePool {
-    fn new(name: String, code: String, state_type: ArrowType, output_type: DataType) -> Self {
-        Self {
-            name,
-            code,
-            state_type,
-            output_type,
-            runtimes: Mutex::new(vec![]),
-        }
-    }
+impl RuntimeBuilder<arrow_udf_js::Runtime> for JsRuntimeBuilder {
+    type Error = ErrorCode;
 
-    fn create(&self) -> Result<arrow_udf_js::Runtime> {
+    fn build(&self) -> std::result::Result<arrow_udf_js::Runtime, Self::Error> {
         let mut runtime = match arrow_udf_js::Runtime::new() {
             Ok(runtime) => runtime,
             Err(e) => {
@@ -331,12 +326,14 @@ impl JsRuntimePool {
         converter.set_arrow_extension_key(EXTENSION_KEY);
         converter.set_json_extension_name(ARROW_EXT_TYPE_VARIANT);
 
-        let output_type: ArrowType = (&self.output_type).into();
         runtime
             .add_aggregate(
                 &self.name,
                 self.state_type.clone(),
-                output_type,
+                // we pass the field instead of the data type because arrow-udf-js
+                // now takes the field as an argument here so that it can get any
+                // metadata associated with the field
+                arrow_field_from_data_type(&self.name, self.output_type.clone()),
                 arrow_udf_js::CallMode::CalledOnNullInput,
                 &self.code,
             )
@@ -344,23 +341,45 @@ impl JsRuntimePool {
 
         Ok(runtime)
     }
+}
 
-    fn call<T, F>(&self, op: F) -> anyhow::Result<T>
-    where F: FnOnce(&arrow_udf_js::Runtime) -> anyhow::Result<T> {
-        let mut runtimes = self.runtimes.lock().unwrap();
-        let runtime = match runtimes.pop() {
-            Some(runtime) => runtime,
-            None => self.create()?,
-        };
-        drop(runtimes);
+fn arrow_field_from_data_type(name: &str, dt: DataType) -> arrow_schema::Field {
+    let field = DataField::new(name, dt);
+    (&field).into()
+}
+
+type JsRuntimePool = Pool<arrow_udf_js::Runtime, JsRuntimeBuilder>;
+
+#[cfg(feature = "python-udf")]
+mod python_pool {
+    use super::*;
 
-        let result = op(&runtime)?;
+    pub(super) struct PyRuntimeBuilder {
+        pub name: String,
+        pub code: String,
+        pub state_type: ArrowType,
+        pub output_type: DataType,
+    }
 
-        let mut runtimes = self.runtimes.lock().unwrap();
-        runtimes.push(runtime);
+    impl RuntimeBuilder<arrow_udf_python::Runtime> for PyRuntimeBuilder {
+        type Error = ErrorCode;
 
-        Ok(result)
+        fn build(&self) -> std::result::Result<arrow_udf_python::Runtime, Self::Error> {
+            let mut runtime = arrow_udf_python::Builder::default()
+                .sandboxed(true)
+                .build()?;
+            runtime.add_aggregate(
+                &self.name,
+                self.state_type.clone(),
+                arrow_field_from_data_type(&self.name, self.output_type.clone()),
+                arrow_udf_python::CallMode::CalledOnNullInput,
+                &self.code,
+            )?;
+            Ok(runtime)
+        }
     }
+
+    pub type PyRuntimePool = Pool<arrow_udf_python::Runtime, PyRuntimeBuilder>;
 }
 
 enum UDAFRuntime {
@@ -368,41 +387,36 @@ enum UDAFRuntime {
     #[expect(unused)]
     WebAssembly,
     #[cfg(feature = "python-udf")]
-    Python(PythonInfo),
-}
-
-#[cfg(feature = "python-udf")]
-struct PythonInfo {
-    name: String,
-    output_type: DataType,
+    Python(python_pool::PyRuntimePool),
 }
 
 impl UDAFRuntime {
     fn name(&self) -> &str {
         match self {
-            UDAFRuntime::JavaScript(pool) => &pool.name,
+            UDAFRuntime::JavaScript(pool) => &pool.builder.name,
             #[cfg(feature = "python-udf")]
-            UDAFRuntime::Python(info) => &info.name,
+            UDAFRuntime::Python(info) => &info.builder.name,
             _ => unimplemented!(),
         }
     }
 
     fn return_type(&self) -> DataType {
         match self {
-            UDAFRuntime::JavaScript(pool) => pool.output_type.clone(),
+            UDAFRuntime::JavaScript(pool) => pool.builder.output_type.clone(),
             #[cfg(feature = "python-udf")]
-            UDAFRuntime::Python(info) => info.output_type.clone(),
+            UDAFRuntime::Python(info) => info.builder.output_type.clone(),
             _ => unimplemented!(),
         }
     }
 
     fn create_state(&self) -> anyhow::Result<UdfAggState> {
         let state = match self {
-            UDAFRuntime::JavaScript(pool) => pool.call(|runtime| runtime.create_state(&pool.name)),
+            UDAFRuntime::JavaScript(pool) => {
+                pool.call(|runtime| runtime.create_state(&pool.builder.name))
+            }
             #[cfg(feature = "python-udf")]
-            UDAFRuntime::Python(info) => {
-                let runtime = GLOBAL_PYTHON_RUNTIME.read();
-                runtime.create_state(&info.name)
+            UDAFRuntime::Python(pool) => {
+                pool.call(|runtime| runtime.create_state(&pool.builder.name))
             }
             _ => unimplemented!(),
         }?;
@@ -412,12 +426,11 @@ impl UDAFRuntime {
     fn accumulate(&self, state: &UdfAggState, input: &RecordBatch) -> anyhow::Result<UdfAggState> {
         let state = match self {
             UDAFRuntime::JavaScript(pool) => {
-                pool.call(|runtime| runtime.accumulate(&pool.name, &state.0, input))
+                pool.call(|runtime| runtime.accumulate(&pool.builder.name, &state.0, input))
             }
             #[cfg(feature = "python-udf")]
-            UDAFRuntime::Python(info) => {
-                let runtime = GLOBAL_PYTHON_RUNTIME.read();
-                runtime.accumulate(&info.name, &state.0, input)
+            UDAFRuntime::Python(pool) => {
+                pool.call(|runtime| runtime.accumulate(&pool.builder.name, &state.0, input))
             }
             _ => unimplemented!(),
         }?;
@@ -426,11 +439,12 @@ impl UDAFRuntime {
 
     fn merge(&self, states: &Arc<dyn Array>) -> anyhow::Result<UdfAggState> {
         let state = match self {
-            UDAFRuntime::JavaScript(pool) => pool.call(|runtime| runtime.merge(&pool.name, states)),
+            UDAFRuntime::JavaScript(pool) => {
+                pool.call(|runtime| runtime.merge(&pool.builder.name, states))
+            }
             #[cfg(feature = "python-udf")]
-            UDAFRuntime::Python(info) => {
-                let runtime = GLOBAL_PYTHON_RUNTIME.read();
-                runtime.merge(&info.name, states)
+            UDAFRuntime::Python(pool) => {
+                pool.call(|runtime| runtime.merge(&pool.builder.name, states))
             }
             _ => unimplemented!(),
         }?;
@@ -440,12 +454,11 @@ impl UDAFRuntime {
     fn finish(&self, state: &UdfAggState) -> anyhow::Result<Arc<dyn Array>> {
         match self {
             UDAFRuntime::JavaScript(pool) => {
-                pool.call(|runtime| runtime.finish(&pool.name, &state.0))
+                pool.call(|runtime| runtime.finish(&pool.builder.name, &state.0))
             }
             #[cfg(feature = "python-udf")]
-            UDAFRuntime::Python(info) => {
-                let runtime = GLOBAL_PYTHON_RUNTIME.read();
-                runtime.finish(&info.name, &state.0)
+            UDAFRuntime::Python(pool) => {
+                pool.call(|runtime| runtime.finish(&pool.builder.name, &state.0))
             }
             _ => unimplemented!(),
         }
@@ -495,9 +508,9 @@ mod tests {
             Field::new("sum", ArrowType::Int64, false),
             Field::new("weight", ArrowType::Int64, false),
         ];
-        let pool = JsRuntimePool::new(
-            agg_name.clone(),
-            r#"
+        let builder = JsRuntimeBuilder {
+            name: agg_name.clone(),
+            code: r#"
 export function create_state() {
     return {sum: 0, weight: 0};
 }
@@ -521,9 +534,10 @@ export function finish(state) {
 }
             "#
             .to_string(),
-            ArrowType::Struct(fields.clone().into()),
-            Float32Type::data_type(),
-        );
+            state_type: ArrowType::Struct(fields.clone().into()),
+            output_type: Float32Type::data_type(),
+        };
+        let pool = JsRuntimePool::new(builder);
 
         let state = pool.call(|runtime| runtime.create_state(&agg_name))?;
 
diff --git a/src/query/service/src/pipelines/processors/transforms/mod.rs b/src/query/service/src/pipelines/processors/transforms/mod.rs
index 55078470553b9..2e76f346a0e40 100644
--- a/src/query/service/src/pipelines/processors/transforms/mod.rs
+++ b/src/query/service/src/pipelines/processors/transforms/mod.rs
@@ -15,6 +15,7 @@
 pub mod aggregator;
 mod hash_join;
 pub(crate) mod range_join;
+mod runtime_pool;
 mod transform_add_computed_columns;
 mod transform_add_const_columns;
 mod transform_add_internal_columns;
@@ -66,16 +67,3 @@ pub use transform_stream_sort_spill::*;
 pub use transform_udf_script::TransformUdfScript;
 pub use transform_udf_server::TransformUdfServer;
 pub use window::*;
-
-#[cfg(feature = "python-udf")]
-mod python_udf {
-    use std::sync::Arc;
-    use std::sync::LazyLock;
-
-    use arrow_udf_python::Runtime;
-    use parking_lot::RwLock;
-
-    /// python runtime should be only initialized once by gil lock, see: https://github.com/python/cpython/blob/main/Python/pystate.c
-    pub static GLOBAL_PYTHON_RUNTIME: LazyLock<Arc<RwLock<Runtime>>> =
-        LazyLock::new(|| Arc::new(RwLock::new(Runtime::new().unwrap())));
-}
diff --git a/src/query/service/src/pipelines/processors/transforms/runtime_pool.rs b/src/query/service/src/pipelines/processors/transforms/runtime_pool.rs
new file mode 100644
index 0000000000000..5a3fc8437b3a4
--- /dev/null
+++ b/src/query/service/src/pipelines/processors/transforms/runtime_pool.rs
@@ -0,0 +1,54 @@
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Mutex;
+
+pub trait RuntimeBuilder<T> {
+    type Error;
+    fn build(&self) -> Result<T, Self::Error>;
+}
+
+pub struct Pool<R, B: RuntimeBuilder<R>> {
+    pub builder: B,
+    runtimes: Mutex<Vec<R>>,
+}
+
+impl<R, B: RuntimeBuilder<R>> Pool<R, B> {
+    pub fn new(builder: B) -> Self {
+        Self {
+            builder,
+            runtimes: Mutex::new(vec![]),
+        }
+    }
+
+    pub fn call<T, F, E>(&self, op: F) -> Result<T, E>
+    where
+        F: FnOnce(&R) -> Result<T, E>,
+        E: From<B::Error>,
+    {
+        let mut runtimes = self.runtimes.lock().unwrap();
+        let runtime = match runtimes.pop() {
+            Some(runtime) => runtime,
+            None => self.builder.build()?,
+        };
+        drop(runtimes);
+
+        let result = op(&runtime);
+
+        let mut runtimes = self.runtimes.lock().unwrap();
+        runtimes.push(runtime);
+
+        result
+    }
+}
diff --git a/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs b/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs
index 9c499496806f8..1447e95897693 100644
--- a/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs
+++ b/src/query/service/src/pipelines/processors/transforms/transform_udf_script.rs
@@ -15,15 +15,14 @@
 use std::collections::btree_map::Entry;
 use std::collections::BTreeMap;
 use std::sync::atomic::AtomicUsize;
-use std::sync::atomic::Ordering;
 use std::sync::Arc;
 
 use arrow_array::RecordBatch;
-use arrow_schema::Schema;
 use databend_common_exception::ErrorCode;
 use databend_common_exception::Result;
 use databend_common_expression::converts::arrow::ARROW_EXT_TYPE_VARIANT;
 use databend_common_expression::converts::arrow::EXTENSION_KEY;
+use databend_common_expression::types::DataType;
 use databend_common_expression::variant_transform::contains_variant;
 use databend_common_expression::variant_transform::transform_variant;
 use databend_common_expression::BlockEntry;
@@ -36,47 +35,58 @@ use databend_common_sql::executor::physical_plans::UdfFunctionDesc;
 use databend_common_sql::plans::UDFLanguage;
 use databend_common_sql::plans::UDFScriptCode;
 use databend_common_sql::plans::UDFType;
-use parking_lot::RwLock;
 
-#[cfg(feature = "python-udf")]
-use super::python_udf::GLOBAL_PYTHON_RUNTIME;
+use super::runtime_pool::Pool;
+use super::runtime_pool::RuntimeBuilder;
 
 pub enum ScriptRuntime {
-    JavaScript(Vec<Arc<RwLock<arrow_udf_js::Runtime>>>),
-    WebAssembly(Arc<RwLock<arrow_udf_wasm::Runtime>>),
+    JavaScript(JsRuntimePool),
+    WebAssembly(arrow_udf_wasm::Runtime),
     #[cfg(feature = "python-udf")]
-    Python,
+    Python(python_pool::PyRuntimePool),
 }
 
 impl ScriptRuntime {
-    pub fn try_create(lang: UDFLanguage, code: Option<&[u8]>, runtime_num: usize) -> Result<Self> {
-        match lang {
+    pub fn try_create(func: &UdfFunctionDesc) -> Result<Self> {
+        let UDFType::Script(UDFScriptCode { language, code, .. }) = &func.udf_type else {
+            unreachable!()
+        };
+        match language {
             UDFLanguage::JavaScript => {
-                // Create multiple runtimes to execute in parallel to avoid blocking caused by js udf runtime locks.
-                let runtimes = (0..runtime_num)
-                    .map(|_| {
-                        arrow_udf_js::Runtime::new()
-                            .map(|mut runtime| {
-                                runtime
-                                    .converter_mut()
-                                    .set_arrow_extension_key(EXTENSION_KEY);
-                                runtime
-                                    .converter_mut()
-                                    .set_json_extension_name(ARROW_EXT_TYPE_VARIANT);
-                                Arc::new(RwLock::new(runtime))
-                            })
-                            .map_err(|err| {
-                                ErrorCode::UDFRuntimeError(format!(
-                                    "Cannot create js runtime: {err}",
-                                ))
-                            })
-                    })
-                    .collect::<Result<Vec<Arc<RwLock<arrow_udf_js::Runtime>>>>>()?;
-                Ok(Self::JavaScript(runtimes))
+                let builder = JsRuntimeBuilder {
+                    name: func.name.clone(),
+                    handler: func.func_name.clone(),
+                    code: String::from_utf8(code.to_vec())?,
+                    output_type: func.data_type.as_ref().clone(),
+                    counter: Default::default(),
+                };
+                Ok(Self::JavaScript(JsRuntimePool::new(builder)))
+            }
+            UDFLanguage::WebAssembly => {
+                let start = std::time::Instant::now();
+                let runtime = arrow_udf_wasm::Runtime::new(code).map_err(|err| {
+                    ErrorCode::UDFRuntimeError(format!(
+                        "Failed to create WASM runtime for module: {err}"
+                    ))
+                })?;
+                log::info!(
+                    "Init WebAssembly UDF runtime for {:?} took: {:?}",
+                    func.name,
+                    start.elapsed()
+                );
+                Ok(ScriptRuntime::WebAssembly(runtime))
             }
-            UDFLanguage::WebAssembly => Self::create_wasm_runtime(code),
             #[cfg(feature = "python-udf")]
-            UDFLanguage::Python => Ok(Self::Python),
+            UDFLanguage::Python => {
+                let builder = PyRuntimeBuilder {
+                    name: func.name.clone(),
+                    handler: func.func_name.clone(),
+                    code: String::from_utf8(code.to_vec())?,
+                    output_type: func.data_type.as_ref().clone(),
+                    counter: Default::default(),
+                };
+                Ok(Self::Python(python_pool::PyRuntimePool::new(builder)))
+            }
             #[cfg(not(feature = "python-udf"))]
             UDFLanguage::Python => Err(ErrorCode::EnterpriseFeatureNotEnable(
                 "Failed to create python script udf",
@@ -84,89 +94,30 @@ impl ScriptRuntime {
         }
     }
 
-    fn create_wasm_runtime(code_blob: Option<&[u8]>) -> Result<Self> {
-        let decoded_code_blob = code_blob
-            .ok_or_else(|| ErrorCode::UDFDataError("WASM module not provided".to_string()))?;
-
-        let runtime = arrow_udf_wasm::Runtime::new(decoded_code_blob).map_err(|err| {
-            ErrorCode::UDFRuntimeError(format!("Failed to create WASM runtime for module: {err}"))
-        })?;
-
-        Ok(ScriptRuntime::WebAssembly(Arc::new(RwLock::new(runtime))))
-    }
-
-    pub fn add_function_with_handler(&self, func: &UdfFunctionDesc, code: &[u8]) -> Result<()> {
-        let tmp_schema =
-            DataSchema::new(vec![DataField::new("tmp", func.data_type.as_ref().clone())]);
-        let arrow_schema = Schema::from(&tmp_schema);
-
-        match self {
-            ScriptRuntime::JavaScript(runtimes) => {
-                let code = std::str::from_utf8(code)?;
-                for runtime in runtimes {
-                    let mut runtime = runtime.write();
-                    runtime.add_function_with_handler(
-                        &func.name,
-                        // we pass the field instead of the data type because arrow-udf-js
-                        // now takes the field as an argument here so that it can get any
-                        // metadata associated with the field
-                        arrow_schema.field(0).clone(),
-                        arrow_udf_js::CallMode::ReturnNullOnNullInput,
-                        code,
-                        &func.func_name,
-                    )?;
-                }
-            }
-            #[cfg(feature = "python-udf")]
-            ScriptRuntime::Python => {
-                let code: &str = std::str::from_utf8(code)?;
-                let mut runtime = GLOBAL_PYTHON_RUNTIME.write();
-                runtime.add_function_with_handler(
-                    &func.name,
-                    arrow_schema.field(0).data_type().clone(),
-                    arrow_udf_python::CallMode::ReturnNullOnNullInput,
-                    code,
-                    &func.func_name,
-                )?;
-            }
-            // Ignore the execution for WASM context
-            ScriptRuntime::WebAssembly(_) => {}
-        }
-
-        Ok(())
-    }
-
     pub fn handle_execution(
         &self,
         func: &UdfFunctionDesc,
         input_batch: &RecordBatch,
-        index: usize,
     ) -> Result<RecordBatch> {
         let result_batch = match self {
-            ScriptRuntime::JavaScript(runtimes) => {
-                // Choose a js runtime in order to avoid blocking
-                let idx = index % runtimes.len();
-                let runtime = &runtimes[idx];
-                let runtime = runtime.read();
-                runtime.call(&func.name, input_batch).map_err(|err| {
+            ScriptRuntime::JavaScript(pool) => pool
+                .call(|runtime| runtime.call(&func.name, input_batch))
+                .map_err(|err| {
                     ErrorCode::UDFRuntimeError(format!(
                         "JavaScript UDF {:?} execution failed: {err}",
                         func.name
                     ))
-                })?
-            }
+                })?,
             #[cfg(feature = "python-udf")]
-            ScriptRuntime::Python => {
-                let runtime = GLOBAL_PYTHON_RUNTIME.read();
-                runtime.call(&func.name, input_batch).map_err(|err| {
+            ScriptRuntime::Python(pool) => pool
+                .call(|runtime| runtime.call(&func.name, input_batch))
+                .map_err(|err| {
                     ErrorCode::UDFRuntimeError(format!(
                         "Python UDF {:?} execution failed: {err}",
                         func.name
                     ))
-                })?
-            }
+                })?,
             ScriptRuntime::WebAssembly(runtime) => {
-                let runtime = runtime.read();
                 runtime.call(&func.func_name, input_batch).map_err(|err| {
                     ErrorCode::UDFRuntimeError(format!(
                         "WASM UDF {:?} execution failed: {err}",
@@ -179,25 +130,116 @@ impl ScriptRuntime {
     }
 }
 
+pub struct JsRuntimeBuilder {
+    name: String,
+    handler: String,
+    code: String,
+    output_type: DataType,
+
+    counter: AtomicUsize,
+}
+
+impl RuntimeBuilder<arrow_udf_js::Runtime> for JsRuntimeBuilder {
+    type Error = ErrorCode;
+
+    fn build(&self) -> Result<arrow_udf_js::Runtime> {
+        let start = std::time::Instant::now();
+        let mut runtime = arrow_udf_js::Runtime::new()
+            .map_err(|e| ErrorCode::UDFDataError(format!("Cannot create js runtime: {e}")))?;
+
+        let converter = runtime.converter_mut();
+        converter.set_arrow_extension_key(EXTENSION_KEY);
+        converter.set_json_extension_name(ARROW_EXT_TYPE_VARIANT);
+
+        runtime.add_function_with_handler(
+            &self.name,
+            // we pass the field instead of the data type because arrow-udf-js
+            // now takes the field as an argument here so that it can get any
+            // metadata associated with the field
+            arrow_field_from_data_type(&self.name, self.output_type.clone()),
+            arrow_udf_js::CallMode::ReturnNullOnNullInput,
+            &self.code,
+            &self.handler,
+        )?;
+
+        log::info!(
+            "Init JavaScript UDF runtime for {:?} #{} took: {:?}",
+            self.name,
+            self.counter
+                .fetch_add(1, std::sync::atomic::Ordering::SeqCst),
+            start.elapsed()
+        );
+
+        Ok(runtime)
+    }
+}
+
+fn arrow_field_from_data_type(name: &str, dt: DataType) -> arrow_schema::Field {
+    let field = DataField::new(name, dt);
+    (&field).into()
+}
+
+type JsRuntimePool = Pool<arrow_udf_js::Runtime, JsRuntimeBuilder>;
+
+#[cfg(feature = "python-udf")]
+pub struct PyRuntimeBuilder {
+    name: String,
+    handler: String,
+    code: String,
+    output_type: DataType,
+
+    counter: AtomicUsize,
+}
+
+#[cfg(feature = "python-udf")]
+mod python_pool {
+    use super::*;
+
+    impl RuntimeBuilder<arrow_udf_python::Runtime> for PyRuntimeBuilder {
+        type Error = ErrorCode;
+
+        fn build(&self) -> Result<arrow_udf_python::Runtime> {
+            let start = std::time::Instant::now();
+            let mut runtime = arrow_udf_python::Builder::default()
+                .sandboxed(true)
+                .build()?;
+            runtime.add_function_with_handler(
+                &self.name,
+                arrow_field_from_data_type(&self.name, self.output_type.clone()),
+                arrow_udf_python::CallMode::CalledOnNullInput,
+                &self.code,
+                &self.handler,
+            )?;
+
+            log::info!(
+                "Init Python UDF runtime for {:?} #{} took: {:?}",
+                self.name,
+                self.counter
+                    .fetch_add(1, std::sync::atomic::Ordering::SeqCst),
+                start.elapsed()
+            );
+
+            Ok(runtime)
+        }
+    }
+
+    pub type PyRuntimePool = Pool<arrow_udf_python::Runtime, PyRuntimeBuilder>;
+}
+
 pub struct TransformUdfScript {
     funcs: Vec<UdfFunctionDesc>,
     script_runtimes: BTreeMap<String, Arc<ScriptRuntime>>,
-    index_seq: Arc<AtomicUsize>,
 }
 
-unsafe impl Send for TransformUdfScript {}
-
 impl TransformUdfScript {
     pub fn new(
         _func_ctx: FunctionContext,
         funcs: Vec<UdfFunctionDesc>,
         script_runtimes: BTreeMap<String, Arc<ScriptRuntime>>,
-        index_seq: Arc<AtomicUsize>,
     ) -> Self {
         Self {
             funcs,
             script_runtimes,
-            index_seq,
         }
     }
 }
@@ -212,79 +254,39 @@ impl Transform for TransformUdfScript {
             return Ok(data_block);
         }
 
-        let index = self.index_seq.fetch_add(1, Ordering::SeqCst);
         for func in &self.funcs {
             let num_rows = data_block.num_rows();
             let block_entries = self.prepare_block_entries(func, &data_block)?;
             let input_batch = self.create_input_batch(block_entries, num_rows)?;
-            let runtime_key = Self::get_runtime_key(func)?;
-
-            if let Some(runtime) = self.script_runtimes.get(&runtime_key) {
-                let result_batch = runtime.handle_execution(func, &input_batch, index)?;
-                self.update_datablock(func, result_batch, &mut data_block)?;
-            } else {
-                return Err(ErrorCode::UDFDataError(format!(
-                    "Failed to find runtime for function {:?} with key: {:?}",
-                    func.name, runtime_key
-                )));
-            }
+            let runtime = self.script_runtimes.get(&func.name).unwrap();
+            let result_batch = runtime.handle_execution(func, &input_batch)?;
+            self.update_datablock(func, result_batch, &mut data_block)?;
         }
         Ok(data_block)
     }
 }
 
 impl TransformUdfScript {
-    fn get_runtime_key(func: &UdfFunctionDesc) -> Result<String> {
-        let (lang, func_name) = match &func.udf_type {
-            UDFType::Script(UDFScriptCode { language: lang, .. }) => (lang, &func.func_name),
-            _ => {
-                return Err(ErrorCode::UDFDataError(format!(
-                    "Unsupported UDFType variant for function {:?}",
-                    func.name
-                )));
-            }
-        };
-
-        let runtime_key = format!("{}-{}", lang, func_name.trim());
-        Ok(runtime_key)
-    }
-
-    pub fn init_runtime(
-        funcs: &[UdfFunctionDesc],
-        runtime_num: usize,
-    ) -> Result<BTreeMap<String, Arc<ScriptRuntime>>> {
+    pub fn init_runtime(funcs: &[UdfFunctionDesc]) -> Result<BTreeMap<String, Arc<ScriptRuntime>>> {
         let mut script_runtimes: BTreeMap<String, Arc<ScriptRuntime>> = BTreeMap::new();
 
-        let start = std::time::Instant::now();
         for func in funcs {
-            let (&lang, code_opt) = match &func.udf_type {
-                UDFType::Script(UDFScriptCode { language, code, .. }) => {
-                    (language, Some(code.as_ref().as_ref()))
-                }
+            let code = match &func.udf_type {
+                UDFType::Script(code) => code,
                 _ => continue,
             };
 
-            let runtime_key = Self::get_runtime_key(func)?;
-            let runtime = match script_runtimes.entry(runtime_key.clone()) {
-                Entry::Occupied(entry) => entry.into_mut().clone(),
-                Entry::Vacant(entry) => {
-                    let new_runtime = ScriptRuntime::try_create(lang, code_opt, runtime_num)
-                        .map(Arc::new)
-                        .map_err(|err| {
-                            ErrorCode::UDFDataError(format!(
-                                "Failed to create UDF runtime for language {lang:?} with error: {err}",
-                            ))
-                        })?;
-                    entry.insert(new_runtime).clone()
-                }
+            if let Entry::Vacant(entry) = script_runtimes.entry(func.name.clone()) {
+                let runtime = ScriptRuntime::try_create(func).map_err(|err| {
+                    ErrorCode::UDFDataError(format!(
+                        "Failed to create UDF runtime for language {:?} with error: {err}",
+                        code.language
+                    ))
+                })?;
+                entry.insert(Arc::new(runtime));
             };
-
-            if let UDFType::Script(UDFScriptCode { code, .. }) = &func.udf_type {
-                runtime.add_function_with_handler(func, code.as_ref().as_ref())?;
-            }
         }
 
-        log::info!("Init UDF runtimes took: {:?}", start.elapsed());
         Ok(script_runtimes)
     }
 
@@ -378,7 +380,7 @@ impl TransformUdfScript {
 
         if col.data_type != func.data_type.as_ref().clone() {
             return Err(ErrorCode::UDFDataError(format!(
-                "Function '{}' returned column with data type {:?} but expected {:?}",
+                "Function {:?} returned column with data type {:?} but expected {:?}",
                 func.name, col.data_type, func.data_type
             )));
         }
diff --git a/tests/suites/0_stateless/20+_others/20_0023_udf_script_parallelism.result b/tests/suites/0_stateless/20+_others/20_0023_udf_script_parallelism.result
new file mode 100755
index 0000000000000..26fa101a44a09
--- /dev/null
+++ b/tests/suites/0_stateless/20+_others/20_0023_udf_script_parallelism.result
@@ -0,0 +1,3 @@
+1000000
+2000000
+100000000
diff --git a/tests/suites/0_stateless/20+_others/20_0023_udf_script_parallelism.sh b/tests/suites/0_stateless/20+_others/20_0023_udf_script_parallelism.sh
new file mode 100755
index 0000000000000..a9303b2a6287d
--- /dev/null
+++ b/tests/suites/0_stateless/20+_others/20_0023_udf_script_parallelism.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+. "$CURDIR"/../../../shell_env.sh
+
+$BENDSQL_CLIENT_CONNECT --query="""
+CREATE OR REPLACE FUNCTION js_wait_test ( INT64 ) RETURNS INT64 LANGUAGE javascript HANDLER = 'wait' AS \$\$
+export function wait(n) {
+    let i = 0;
+    while (i < n * 1000000) { i++ }
+    return i;
+}
+\$\$;
+"""
+
+$BENDSQL_CLIENT_CONNECT --query='select js_wait_test(100)' &
+job_pid=$!
+$BENDSQL_CLIENT_CONNECT --query='select js_wait_test(1)'
+$BENDSQL_CLIENT_CONNECT --query='select js_wait_test(2)'
+wait $job_pid
diff --git a/tests/suites/5_ee/07_failsafe/08_0000_amend_table.result b/tests/suites/5_ee/07_failsafe/07_0000_amend_table.result
similarity index 100%
rename from tests/suites/5_ee/07_failsafe/08_0000_amend_table.result
rename to tests/suites/5_ee/07_failsafe/07_0000_amend_table.result
diff --git a/tests/suites/5_ee/07_failsafe/08_0000_amend_table.sh b/tests/suites/5_ee/07_failsafe/07_0000_amend_table.sh
similarity index 100%
rename from tests/suites/5_ee/07_failsafe/08_0000_amend_table.sh
rename to tests/suites/5_ee/07_failsafe/07_0000_amend_table.sh
diff --git a/tests/suites/5_ee/08_udf_python/08_0000_parallelism.result b/tests/suites/5_ee/08_udf_python/08_0000_parallelism.result
new file mode 100755
index 0000000000000..040e114b163a0
--- /dev/null
+++ b/tests/suites/5_ee/08_udf_python/08_0000_parallelism.result
@@ -0,0 +1,3 @@
+1
+2
+100
diff --git a/tests/suites/5_ee/08_udf_python/08_0000_parallelism.sh b/tests/suites/5_ee/08_udf_python/08_0000_parallelism.sh
new file mode 100755
index 0000000000000..52121fd75b44f
--- /dev/null
+++ b/tests/suites/5_ee/08_udf_python/08_0000_parallelism.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+. "$CURDIR"/../../../shell_env.sh
+
+$BENDSQL_CLIENT_CONNECT --query="""
+CREATE OR REPLACE FUNCTION python_sleep ( INT64 ) RETURNS INT64 LANGUAGE python HANDLER = 'sleep' AS \$\$
+import time
+
+def sleep(n):
+    time.sleep(n/100)
+    return n
+\$\$;
+"""
+
+$BENDSQL_CLIENT_CONNECT --query='select python_sleep(100)' &
+job_pid=$!
+$BENDSQL_CLIENT_CONNECT --query='select python_sleep(1)'
+$BENDSQL_CLIENT_CONNECT --query='select python_sleep(2)'
+wait $job_pid

From 3a32c18063b0248ba860b74bda87c3cf89978a3b Mon Sep 17 00:00:00 2001
From: baishen <baishen2009@gmail.com>
Date: Sat, 18 Jan 2025 09:41:33 +0800
Subject: [PATCH 29/33] feat(sqlsmith): fuzz test support read Sqlite and
 Duckdb test (#17301)

---
 src/query/ast/src/parser/expr.rs        |  22 +--
 src/tests/sqlsmith/src/bin/main.rs      |   5 +
 src/tests/sqlsmith/src/lib.rs           |   1 +
 src/tests/sqlsmith/src/runner.rs        | 215 ++++++++----------------
 src/tests/sqlsmith/src/sql_gen/ddl.rs   |   2 +-
 src/tests/sqlsmith/src/sql_gen/dml.rs   |  74 +++++---
 src/tests/sqlsmith/src/sql_gen/func.rs  | 102 +++++++----
 src/tests/sqlsmith/src/sql_gen/types.rs |  15 ++
 src/tests/sqlsmith/src/util.rs          | 140 +++++++++++++++
 9 files changed, 361 insertions(+), 215 deletions(-)
 create mode 100644 src/tests/sqlsmith/src/util.rs

diff --git a/src/query/ast/src/parser/expr.rs b/src/query/ast/src/parser/expr.rs
index 854923757dc22..551dae1108388 100644
--- a/src/query/ast/src/parser/expr.rs
+++ b/src/query/ast/src/parser/expr.rs
@@ -1088,7 +1088,7 @@ pub fn expr_element(i: Input) -> IResult<WithSpan<ExprElement>> {
             #function_name
             ~ "(" ~ #comma_separated_list1(subexpr(0)) ~ ")"
             ~ "(" ~ DISTINCT? ~ #comma_separated_list0(subexpr(0))? ~ ")"
-            ~ #window_function
+            ~ #window_function?
         },
         |(name, _, params, _, _, opt_distinct, opt_args, _, window)| ExprElement::FunctionCall {
             func: FunctionCall {
@@ -1096,24 +1096,7 @@ pub fn expr_element(i: Input) -> IResult<WithSpan<ExprElement>> {
                 name,
                 args: opt_args.unwrap_or_default(),
                 params,
-                window: Some(window),
-                lambda: None,
-            },
-        },
-    );
-    let function_call_with_params = map(
-        rule! {
-            #function_name
-            ~ "(" ~ #comma_separated_list1(subexpr(0)) ~ ")"
-            ~ "(" ~ DISTINCT? ~ #comma_separated_list0(subexpr(0))? ~ ")"
-        },
-        |(name, _, params, _, _, opt_distinct, opt_args, _)| ExprElement::FunctionCall {
-            func: FunctionCall {
-                distinct: opt_distinct.is_some(),
-                name,
-                args: opt_args.unwrap_or_default(),
-                params,
-                window: None,
+                window,
                 lambda: None,
             },
         },
@@ -1406,7 +1389,6 @@ pub fn expr_element(i: Input) -> IResult<WithSpan<ExprElement>> {
                 | #function_call_with_lambda : "`function(..., x -> ...)`"
                 | #function_call_with_window : "`function(...) OVER ([ PARTITION BY <expr>, ... ] [ ORDER BY <expr>, ... ] [ <window frame> ])`"
                 | #function_call_with_params_window : "`function(...)(...) OVER ([ PARTITION BY <expr>, ... ] [ ORDER BY <expr>, ... ] [ <window frame> ])`"
-                | #function_call_with_params : "`function(...)(...)`"
                 | #function_call : "`function(...)`"
             ),
             rule!(
diff --git a/src/tests/sqlsmith/src/bin/main.rs b/src/tests/sqlsmith/src/bin/main.rs
index 5d58a03d7dd1d..48b4978fdb9c2 100644
--- a/src/tests/sqlsmith/src/bin/main.rs
+++ b/src/tests/sqlsmith/src/bin/main.rs
@@ -52,6 +52,10 @@ pub struct Args {
     /// The fuzz query test file path.
     #[clap(long, default_value = "")]
     fuzz_path: String,
+
+    /// The log path to write executed SQLs..
+    #[clap(long, default_value = ".databend/sqlsmithlog")]
+    log_path: String,
 }
 
 #[tokio::main(flavor = "multi_thread", worker_threads = 5)]
@@ -69,6 +73,7 @@ async fn main() -> Result<()> {
         args.user.clone(),
         args.pass.clone(),
         args.db.clone(),
+        args.log_path.clone(),
         args.count,
         None,
         args.timeout,
diff --git a/src/tests/sqlsmith/src/lib.rs b/src/tests/sqlsmith/src/lib.rs
index 363cf6ddd7fcb..ff87a3b175e45 100644
--- a/src/tests/sqlsmith/src/lib.rs
+++ b/src/tests/sqlsmith/src/lib.rs
@@ -19,5 +19,6 @@ mod query_fuzzer;
 mod reducer;
 mod runner;
 mod sql_gen;
+mod util;
 
 pub use runner::Runner;
diff --git a/src/tests/sqlsmith/src/runner.rs b/src/tests/sqlsmith/src/runner.rs
index 3c37316611df2..1816162f6393f 100644
--- a/src/tests/sqlsmith/src/runner.rs
+++ b/src/tests/sqlsmith/src/runner.rs
@@ -12,13 +12,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::fs;
+use std::fs::create_dir_all;
+use std::fs::File;
 use std::future::Future;
-use std::path::Path;
-use std::path::PathBuf;
+use std::io::BufWriter;
+use std::io::Write;
 use std::time::Duration;
 
-use databend_common_ast::ast::AlterTableAction;
 use databend_common_ast::ast::CreateTableSource;
 use databend_common_ast::ast::CreateTableStmt;
 use databend_common_ast::ast::DropTableStmt;
@@ -33,15 +33,19 @@ use databend_common_expression::types::NumberDataType;
 use databend_common_expression::TableField;
 use databend_common_expression::TableSchemaRefExt;
 use databend_common_sql::resolve_type_name;
+use jiff::fmt::strtime;
+use jiff::Zoned;
 use rand::rngs::SmallRng;
 use rand::Rng;
 use rand::SeedableRng;
 
 use crate::http_client::HttpClient;
-use crate::http_client::QueryResponse;
 use crate::query_fuzzer::QueryFuzzer;
 use crate::sql_gen::SqlGenerator;
 use crate::sql_gen::Table;
+use crate::util::read_sql_from_test_dirs;
+
+const LOG_FILE_FORMAT: &str = "%Y-%m-%d-%H-%M-%S-%f";
 
 const KNOWN_ERRORS: &[&str] = &[
     // Errors caused by illegal parameters
@@ -68,6 +72,8 @@ const KNOWN_ERRORS: &[&str] = &[
     "attempt to shift right with overflow",
     "attempt to subtract with overflow",
     "attempt to add with overflow",
+    "attempt to multiply with overflow",
+    "map keys have to be unique",
     // Unsupported features
     "Row format is not yet support for",
     "to_decimal not support this DataType",
@@ -93,6 +99,7 @@ pub struct Runner {
     pub(crate) client: HttpClient,
     db: String,
     timeout: u64,
+    file_buf: BufWriter<File>,
 }
 
 impl Runner {
@@ -101,26 +108,36 @@ impl Runner {
         username: String,
         password: String,
         db: String,
+        log_path: String,
         count: usize,
         seed: Option<u64>,
         timeout: u64,
     ) -> Result<Self> {
         let client = HttpClient::create(host, username, password).await?;
 
+        // Create SQL log directory and file.
+        create_dir_all(&log_path)?;
+        let now = Zoned::now();
+        let time = strtime::format(LOG_FILE_FORMAT, &now).unwrap();
+        let log_path = format!("{}/databend-sqlsmith.{}.sql", log_path, time);
+        let file = File::create(log_path)?;
+        let file_buf = BufWriter::new(file);
+
         Ok(Self {
             count,
             seed,
             client,
             db,
             timeout,
+            file_buf,
         })
     }
 
     pub async fn run(&mut self) -> Result<()> {
         let create_db_sql = format!("CREATE OR REPLACE database {}", self.db);
-        let _ = self.client.query(&create_db_sql).await?;
+        let _ = self.run_sql(create_db_sql, None).await;
         let use_db_sql = format!("USE {}", self.db);
-        let _ = self.client.query(&use_db_sql).await?;
+        let _ = self.run_sql(use_db_sql, None).await;
 
         let settings = self.get_settings().await?;
 
@@ -132,74 +149,53 @@ impl Runner {
 
         let mut new_tables = tables.clone();
         for (i, table) in tables.iter().enumerate() {
-            let insert_stmt = generator.gen_insert(table, row_count);
-            let insert_sql = insert_stmt.to_string();
-            tracing::info!("insert_sql: {}", insert_sql);
-            Self::check_res(self.client.query(&insert_sql).await);
-
+            let insert_stmts = generator.gen_insert(table, row_count);
+            for insert_stmt in insert_stmts.into_iter() {
+                let insert_sql = insert_stmt.to_string();
+                let _ = self.run_sql(insert_sql, None).await;
+            }
             let alter_stmt_opt = generator.gen_alter(table, row_count);
             if let Some((alter_stmt, new_table, insert_stmt_opt)) = alter_stmt_opt {
-                if let AlterTableAction::RenameTable { ref new_table } = alter_stmt.action {
-                    let drop_table_stmt = DropTableStmt {
-                        if_exists: true,
-                        catalog: None,
-                        database: None,
-                        table: new_table.clone(),
-                        all: false,
-                    };
-                    let drop_table_sql = drop_table_stmt.to_string();
-                    tracing::info!("drop_table_sql: {}", drop_table_sql);
-                    Self::check_res(self.client.query(&drop_table_sql).await);
-                }
                 let alter_sql = alter_stmt.to_string();
-                tracing::info!("alter_sql: {}", alter_sql);
-                Self::check_res(self.client.query(&alter_sql).await);
+                let _ = self.run_sql(alter_sql, None).await;
                 // save new table schema
                 new_tables[i] = new_table;
                 if let Some(insert_stmt) = insert_stmt_opt {
                     let insert_sql = insert_stmt.to_string();
-                    tracing::info!("after alter insert_sql: {}", insert_sql);
-                    Self::check_res(self.client.query(&insert_sql).await);
+                    let _ = self.run_sql(insert_sql, None).await;
                 }
             }
         }
         generator.tables = new_tables;
 
         let enable_merge = "set enable_experimental_merge_into = 1".to_string();
-        Self::check_res(self.client.query(&enable_merge).await);
-        // generate merge, replace, update, delete
-        for _ in 0..20 {
-            let sql = match generator.rng.gen_range(0..=20) {
-                0..=10 => generator.gen_merge().to_string(),
-                11..=15 => generator.gen_replace().to_string(),
-                16..=19 => generator.gen_update().to_string(),
-                20 => generator.gen_delete().to_string(),
-                _ => unreachable!(),
-            };
-            let mut timeout_err = None;
-            tracing::info!("dml sql: {}", sql);
-            Self::check_timeout(
-                async { Self::check_res(self.client.query(&sql).await) },
-                self.timeout,
-                &mut timeout_err,
-            )
-            .await;
-            if let Some(timeout_err) = timeout_err {
-                tracing::error!("sql timeout: {}", timeout_err);
-            }
+        let _ = self.run_sql(enable_merge, None).await;
+        let dml_stmts = generator.gen_dml_stmt();
+        for dml_stmt in dml_stmts.into_iter() {
+            let dml_sql = dml_stmt.to_string();
+            let _ = self.run_sql(dml_sql, None).await;
         }
 
         // generate query
+        let mut conn_error_cnt = 0;
         for _ in 0..self.count {
             let query = generator.gen_query();
             let query_sql = query.to_string();
-            self.run_sql(query_sql, Some(query)).await;
+            let is_conn_error = self.run_sql(query_sql, Some(query)).await;
+            if is_conn_error {
+                conn_error_cnt += 1;
+            }
+            // Query server panic, exist
+            if conn_error_cnt >= 3 {
+                break;
+            }
         }
         Ok(())
     }
 
     pub async fn run_fuzz(&mut self, fuzz_path: &str) -> Result<()> {
-        let sqls = Self::read_sql_from_sqllogic_tests(fuzz_path)?;
+        let sqls = read_sql_from_test_dirs(fuzz_path)?;
+        let mut conn_error_cnt = 0;
         let mut query_fuzzer = QueryFuzzer::new();
         for sql in sqls {
             tracing::info!("orig sql: {}", sql);
@@ -213,7 +209,14 @@ impl Runner {
             let fuzz_sql = fuzz_stmt.to_string();
             tracing::info!("fuzz sql: {}", fuzz_sql);
             if let Statement::Query(_) = fuzz_stmt {
-                self.run_sql(fuzz_sql, None).await;
+                let is_conn_error = self.run_sql(fuzz_sql, None).await;
+                if is_conn_error {
+                    conn_error_cnt += 1;
+                }
+                // Query server panic, exist
+                if conn_error_cnt >= 3 {
+                    break;
+                }
             } else if let Err(err) = self.client.query(&fuzz_sql).await {
                 tracing::error!("fuzz sql err: {}", err);
             }
@@ -222,9 +225,13 @@ impl Runner {
         Ok(())
     }
 
-    async fn run_sql(&mut self, query_sql: String, query: Option<Query>) {
+    async fn run_sql(&mut self, query_sql: String, query: Option<Query>) -> bool {
+        // Write SQL to log file for replay.
+        writeln!(self.file_buf, "{};", query_sql).unwrap();
+
         let mut timeout_err = None;
         let mut is_error = false;
+        let mut is_conn_error = false;
         let mut try_reduce = false;
         let mut err_code = 0;
         let mut err_message = String::new();
@@ -252,6 +259,10 @@ impl Runner {
                         }
                     }
                     Err(err) => {
+                        // 1910 is ReqwestError, Query server may have panic, need exist
+                        if err.code() == 1910 {
+                            is_conn_error = true;
+                        }
                         is_error = true;
                         err_message = format!("http err: {}", err);
                     }
@@ -263,18 +274,19 @@ impl Runner {
         .await;
 
         if let Some(timeout_err) = timeout_err {
-            tracing::info!("query_sql: {}", query_sql);
+            tracing::info!("sql: {}", query_sql);
             tracing::error!("sql timeout: {}", timeout_err);
         } else if is_error {
-            tracing::info!("query_sql: {}", query_sql);
+            tracing::info!("sql: {}", query_sql);
             if try_reduce {
                 if let Some(query) = query {
                     let reduced_query = self.try_reduce_query(err_code, query).await;
-                    tracing::info!("reduced query_sql: {}", reduced_query.to_string());
+                    tracing::info!("reduced sql: {}", reduced_query.to_string());
                 }
             }
             tracing::error!(err_message);
         }
+        is_conn_error
     }
 
     fn generate_rng(seed: Option<u64>) -> impl Rng {
@@ -292,11 +304,9 @@ impl Runner {
         let mut tables = Vec::with_capacity(table_stmts.len());
         for (drop_table_stmt, create_table_stmt) in table_stmts {
             let drop_table_sql = drop_table_stmt.to_string();
-            tracing::info!("drop_table_sql: {}", drop_table_sql);
-            Self::check_res(self.client.query(&drop_table_sql).await);
+            let _ = self.run_sql(drop_table_sql, None).await;
             let create_table_sql = create_table_stmt.to_string();
-            tracing::info!("create_table_sql: {}", create_table_sql);
-            Self::check_res(self.client.query(&create_table_sql).await);
+            let _ = self.run_sql(create_table_sql, None).await;
 
             let db_name = create_table_stmt.database.clone();
             let table_name = create_table_stmt.table.clone();
@@ -350,89 +360,4 @@ impl Runner {
             *timeout_err = Some(format!("{}", e));
         }
     }
-
-    fn check_res(responses: Result<Vec<QueryResponse>>) {
-        match responses {
-            Ok(responses) => {
-                if let Some(error) = &responses[0].error {
-                    let value = error.as_object().unwrap();
-                    let code = value["code"].as_u64().unwrap();
-                    let message = value["message"].as_str().unwrap();
-                    if code == 1005 || code == 1065 {
-                        return;
-                    }
-                    if KNOWN_ERRORS
-                        .iter()
-                        .any(|known_err| message.starts_with(known_err))
-                    {
-                        return;
-                    }
-                    let err = format!("sql exec err code: {} message: {}", code, message);
-                    tracing::error!(err);
-                }
-            }
-            Err(err) => {
-                let err = format!("http err: {}", err);
-                tracing::error!(err);
-            }
-        }
-    }
-
-    fn collect_paths(path: &Path, paths: &mut Vec<PathBuf>) -> Result<()> {
-        if path.is_dir() {
-            for entry in (path.read_dir()?).flatten() {
-                let sub_path = entry.path();
-                Self::collect_paths(&sub_path, paths)?;
-            }
-        } else if path.is_file() && path.extension().is_some_and(|ext| ext == "test") {
-            paths.push(path.to_path_buf());
-        }
-        Ok(())
-    }
-
-    // Walk through the sqllogic tests folder and read the test SQL in it as seed to generate fuzzed SQL.
-    fn read_sql_from_sqllogic_tests(fuzz_path: &str) -> Result<Vec<String>> {
-        let path = Path::new(fuzz_path);
-        let mut paths = vec![];
-        Self::collect_paths(path, &mut paths)?;
-
-        let mut sqls = vec![];
-        for path in paths.into_iter() {
-            let content = fs::read_to_string(path)?;
-            let lines: Vec<&str> = content.lines().collect();
-            let mut in_sql = false;
-            let mut current_sql = String::new();
-
-            for line in lines {
-                let line = line.trim();
-                if line.starts_with("statement") || line.starts_with("query") {
-                    if !current_sql.is_empty() {
-                        sqls.push(current_sql.clone());
-                        current_sql.clear();
-                    }
-                    in_sql = !line.starts_with("statement error");
-                } else if line.eq("----") {
-                    // ignore result values
-                    in_sql = false;
-                } else if !line.is_empty()
-                    && !line.starts_with("--")
-                    && !line.starts_with("#")
-                    && !line.starts_with("include")
-                    && !line.starts_with("onlyif")
-                    && in_sql
-                {
-                    if current_sql.is_empty() {
-                        current_sql.push_str(line);
-                    } else {
-                        current_sql.push(' ');
-                        current_sql.push_str(line);
-                    }
-                }
-            }
-            if !current_sql.is_empty() {
-                sqls.push(current_sql);
-            }
-        }
-        Ok(sqls)
-    }
 }
diff --git a/src/tests/sqlsmith/src/sql_gen/ddl.rs b/src/tests/sqlsmith/src/sql_gen/ddl.rs
index cb34b73353d6e..b1b0b53be2367 100644
--- a/src/tests/sqlsmith/src/sql_gen/ddl.rs
+++ b/src/tests/sqlsmith/src/sql_gen/ddl.rs
@@ -87,7 +87,7 @@ impl<R: Rng> SqlGenerator<'_, R> {
             };
 
             let create_table = CreateTableStmt {
-                create_option: CreateOption::CreateIfNotExists,
+                create_option: CreateOption::CreateOrReplace,
                 catalog: None,
                 database: Some(Identifier::from_name(None, db_name)),
                 table: Identifier::from_name(None, table_name),
diff --git a/src/tests/sqlsmith/src/sql_gen/dml.rs b/src/tests/sqlsmith/src/sql_gen/dml.rs
index b578177f4b625..59b5d73ae382b 100644
--- a/src/tests/sqlsmith/src/sql_gen/dml.rs
+++ b/src/tests/sqlsmith/src/sql_gen/dml.rs
@@ -33,6 +33,7 @@ use databend_common_ast::ast::MergeOption;
 use databend_common_ast::ast::MergeSource;
 use databend_common_ast::ast::MergeUpdateExpr;
 use databend_common_ast::ast::ReplaceStmt;
+use databend_common_ast::ast::Statement;
 use databend_common_ast::ast::TableReference;
 use databend_common_ast::ast::UnmatchedClause;
 use databend_common_ast::ast::UpdateExpr;
@@ -67,31 +68,66 @@ enum MutTableAction {
 }
 
 impl<'a, R: Rng + 'a> SqlGenerator<'a, R> {
-    pub(crate) fn gen_insert(&mut self, table: &Table, row_count: usize) -> InsertStmt {
+    pub(crate) fn gen_insert(&mut self, table: &Table, row_count: usize) -> Vec<InsertStmt> {
         let data_types = table
             .schema
             .fields()
             .iter()
             .map(|f| (&f.data_type).into())
             .collect::<Vec<DataType>>();
-        let source = self.gen_insert_source(&data_types, row_count);
 
-        InsertStmt {
-            // TODO
-            hints: None,
-            with: None,
-            catalog: None,
-            database: table.db_name.clone(),
-            table: table.name.clone(),
-            // TODO
-            columns: vec![],
-            source,
-            // TODO
-            overwrite: false,
+        let num = self.rng.gen_range(1..=5);
+        let mut insert_stmts = Vec::with_capacity(num);
+        for _ in 0..num {
+            let source = self.gen_insert_source(&data_types, row_count);
+            let insert_stmt = InsertStmt {
+                // TODO
+                hints: None,
+                with: None,
+                catalog: None,
+                database: table.db_name.clone(),
+                table: table.name.clone(),
+                // TODO
+                columns: vec![],
+                source,
+                // TODO
+                overwrite: false,
+            };
+            insert_stmts.push(insert_stmt);
+        }
+        insert_stmts
+    }
+
+    pub(crate) fn gen_dml_stmt(&mut self) -> Vec<Statement> {
+        let num = self.rng.gen_range(1..=10);
+        let mut dml_stmts = Vec::with_capacity(num);
+        for _ in 0..num {
+            // generate merge, replace, update, delete
+            let dml_stmt = match self.rng.gen_range(0..=20) {
+                0..=10 => {
+                    let merge_stmt = self.gen_merge();
+                    Statement::MergeInto(merge_stmt)
+                }
+                11..=15 => {
+                    let replace_stmt = self.gen_replace();
+                    Statement::Replace(replace_stmt)
+                }
+                16..=19 => {
+                    let update_stmt = self.gen_update();
+                    Statement::Update(update_stmt)
+                }
+                20 => {
+                    let delete_stmt = self.gen_delete();
+                    Statement::Delete(delete_stmt)
+                }
+                _ => unreachable!(),
+            };
+            dml_stmts.push(dml_stmt);
         }
+        dml_stmts
     }
 
-    pub(crate) fn gen_delete(&mut self) -> DeleteStmt {
+    fn gen_delete(&mut self) -> DeleteStmt {
         let hints = self.gen_hints();
         let (_table, table_reference) = self.random_select_table();
         let selection = Some(self.gen_expr(&DataType::Boolean));
@@ -104,7 +140,7 @@ impl<'a, R: Rng + 'a> SqlGenerator<'a, R> {
         }
     }
 
-    pub(crate) fn gen_update(&mut self) -> UpdateStmt {
+    fn gen_update(&mut self) -> UpdateStmt {
         let hints = self.gen_hints();
         let (table, table_reference) = self.random_select_table();
         let selection = if self.rng.gen_bool(0.8) {
@@ -130,7 +166,7 @@ impl<'a, R: Rng + 'a> SqlGenerator<'a, R> {
         }
     }
 
-    pub(crate) fn gen_replace(&mut self) -> ReplaceStmt {
+    fn gen_replace(&mut self) -> ReplaceStmt {
         let hints = self.gen_hints();
         let (table, _) = self.random_select_table();
         let fields = self.random_select_fields(&table);
@@ -160,7 +196,7 @@ impl<'a, R: Rng + 'a> SqlGenerator<'a, R> {
         }
     }
 
-    pub(crate) fn gen_merge(&mut self) -> MergeIntoStmt {
+    fn gen_merge(&mut self) -> MergeIntoStmt {
         let hints = self.gen_hints();
         self.cte_tables.clear();
         self.bound_tables.clear();
@@ -388,7 +424,7 @@ impl<'a, R: Rng + 'a> SqlGenerator<'a, R> {
         table: &Table,
         row_count: usize,
     ) -> Option<(AlterTableStmt, Table, Option<InsertStmt>)> {
-        if self.rng.gen_bool(0.3) {
+        if self.rng.gen_bool(0.5) {
             return None;
         }
         let mut new_table = table.clone();
diff --git a/src/tests/sqlsmith/src/sql_gen/func.rs b/src/tests/sqlsmith/src/sql_gen/func.rs
index 32bcefd81a536..91e1d077e58bf 100644
--- a/src/tests/sqlsmith/src/sql_gen/func.rs
+++ b/src/tests/sqlsmith/src/sql_gen/func.rs
@@ -63,8 +63,9 @@ impl<R: Rng> SqlGenerator<'_, R> {
     }
 
     pub(crate) fn gen_factory_scalar_func(&mut self, ty: &DataType) -> Expr {
+        let by_ty = self.rng.gen_bool(0.6);
         let (name, params, args_type) = match ty.remove_nullable() {
-            DataType::String => {
+            DataType::String if by_ty => {
                 let idx = self.rng.gen_range(0..=5);
                 let name = match idx {
                     0 => "char".to_string(),
@@ -151,7 +152,7 @@ impl<R: Rng> SqlGenerator<'_, R> {
                 };
                 (name, vec![], args_type)
             }
-            DataType::Boolean => {
+            DataType::Boolean if by_ty => {
                 let idx = self.rng.gen_range(0..=3);
                 let name = match idx {
                     0 => "and_filters".to_string(),
@@ -185,7 +186,7 @@ impl<R: Rng> SqlGenerator<'_, R> {
                 };
                 (name, vec![], args_type)
             }
-            DataType::Number(_) => {
+            DataType::Number(_) if by_ty => {
                 let idx = self.rng.gen_range(0..=4);
                 let name = match idx {
                     0 => "point_in_ellipses".to_string(),
@@ -282,13 +283,28 @@ impl<R: Rng> SqlGenerator<'_, R> {
 
                 (name, vec![], args_type)
             }
-            DataType::Array(box inner_ty) => {
+            DataType::Array(box inner_ty) if by_ty => {
                 let name = "array".to_string();
                 let len = self.rng.gen_range(0..=4);
                 let args_type = vec![inner_ty; len];
                 (name, vec![], args_type)
             }
-            DataType::Decimal(_) => {
+            DataType::Map(box DataType::Tuple(inner_tys)) if by_ty => {
+                let key_ty = inner_tys[0].clone();
+                let name = if self.rng.gen_bool(0.5) {
+                    "map_delete".to_string()
+                } else {
+                    "map_pick".to_string()
+                };
+                if self.rng.gen_bool(0.5) {
+                    let len = self.rng.gen_range(1..=5);
+                    let args_type = vec![key_ty; len];
+                    (name, vec![], args_type)
+                } else {
+                    (name, vec![], vec![DataType::Array(Box::new(key_ty))])
+                }
+            }
+            DataType::Decimal(_) if by_ty => {
                 let decimal = ["to_float64", "to_float32", "to_decimal", "try_to_decimal"];
                 let name = decimal[self.rng.gen_range(0..=3)].to_string();
                 if name == "to_decimal" || name == "try_to_decimal" {
@@ -312,11 +328,11 @@ impl<R: Rng> SqlGenerator<'_, R> {
                     (name, params, args_type)
                 }
             }
-            DataType::Tuple(inner_tys) => {
+            DataType::Tuple(inner_tys) if by_ty => {
                 let name = "tuple".to_string();
                 (name, vec![], inner_tys)
             }
-            DataType::Variant => {
+            DataType::Variant if by_ty => {
                 if self.rng.gen_bool(0.5) {
                     let json_func = ["json_array", "json_object", "json_object_keep_null"];
                     let name = json_func[self.rng.gen_range(0..=2)].to_string();
@@ -366,8 +382,9 @@ impl<R: Rng> SqlGenerator<'_, R> {
     }
 
     pub(crate) fn gen_agg_func(&mut self, ty: &DataType) -> Expr {
+        let by_ty = self.rng.gen_bool(0.6);
         let (name, params, mut args_type) = match ty.remove_nullable() {
-            DataType::Number(NumberDataType::UInt8) => {
+            DataType::Number(NumberDataType::UInt8) if by_ty => {
                 let name = "window_funnel".to_string();
                 let other_type = vec![DataType::Boolean; 6];
                 let mut args_type = Vec::with_capacity(7);
@@ -382,7 +399,7 @@ impl<R: Rng> SqlGenerator<'_, R> {
                 let params = vec![Literal::UInt64(self.rng.gen_range(1..=10))];
                 (name, params, args_type)
             }
-            DataType::Number(NumberDataType::UInt64) => {
+            DataType::Number(NumberDataType::UInt64) if by_ty => {
                 let idx = self.rng.gen_range(0..=7);
                 let name = match idx {
                     0 => "approx_count_distinct".to_string(),
@@ -422,7 +439,7 @@ impl<R: Rng> SqlGenerator<'_, R> {
                 };
                 (name, params, args_type)
             }
-            DataType::Array(_) => {
+            DataType::Array(_) if by_ty => {
                 let idx = self.rng.gen_range(0..=2);
                 let name = match idx {
                     0 => {
@@ -465,14 +482,14 @@ impl<R: Rng> SqlGenerator<'_, R> {
                 };
                 (name, params, args_type)
             }
-            DataType::Decimal(_) => {
+            DataType::Decimal(_) if by_ty => {
                 let name = "sum".to_string();
                 let params = vec![];
                 let args_type = vec![self.gen_decimal_data_type()];
                 (name, params, args_type)
             }
-            DataType::Number(NumberDataType::Float64) => {
-                let idx = self.rng.gen_range(0..=14);
+            DataType::Number(NumberDataType::Float64) if by_ty => {
+                let idx = self.rng.gen_range(0..=15);
                 let name = match idx {
                     0 => "avg".to_string(),
                     1 => "covar_pop".to_string(),
@@ -489,10 +506,11 @@ impl<R: Rng> SqlGenerator<'_, R> {
                     12 => "quantile_cont".to_string(),
                     13 => "quantile_tdigest".to_string(),
                     14 => "quantile_disc".to_string(),
+                    15 => "quantile_tdigest_weighted".to_string(),
                     _ => unreachable!(),
                 };
 
-                let args_type = if idx == 1 || idx == 2 {
+                let args_type = if idx == 1 || idx == 2 || idx == 15 {
                     vec![
                         self.gen_all_number_data_type(),
                         self.gen_all_number_data_type(),
@@ -512,7 +530,7 @@ impl<R: Rng> SqlGenerator<'_, R> {
                 };
                 (name, params, args_type)
             }
-            DataType::Bitmap => {
+            DataType::Bitmap if by_ty => {
                 let idx = self.rng.gen_range(0..=1);
                 let name = match idx {
                     0 => "bitmap_intersect".to_string(),
@@ -523,29 +541,52 @@ impl<R: Rng> SqlGenerator<'_, R> {
                 let args_type = vec![DataType::Bitmap];
                 (name, params, args_type)
             }
-            DataType::String => {
-                let name = "string_agg".to_string();
-                let args_type = if self.rng.gen_bool(0.6) {
-                    vec![DataType::String]
+            DataType::String if by_ty => {
+                if self.rng.gen_bool(0.5) {
+                    let name = "histogram".to_string();
+                    let arg_type = self.gen_simple_common_data_type();
+                    (name, vec![], vec![arg_type])
                 } else {
-                    vec![DataType::String; 2]
-                };
-                let params = vec![];
-                (name, params, args_type)
+                    let name = "string_agg".to_string();
+                    let args_type = if self.rng.gen_bool(0.6) {
+                        vec![DataType::String]
+                    } else {
+                        vec![DataType::String; 2]
+                    };
+                    let params = vec![];
+                    (name, params, args_type)
+                }
+            }
+            DataType::Variant if by_ty => {
+                if self.rng.gen_bool(0.5) {
+                    let name = "json_array_agg".to_string();
+                    let arg_type = self.gen_simple_data_type();
+                    (name, vec![], vec![arg_type])
+                } else {
+                    let name = "json_object_agg".to_string();
+                    let key_type = DataType::String;
+                    let val_type = self.gen_simple_data_type();
+                    (name, vec![], vec![key_type, val_type])
+                }
+            }
+            DataType::Geometry if by_ty => {
+                let name = "st_collect".to_string();
+                let arg_type = DataType::Geometry;
+                (name, vec![], vec![arg_type])
             }
             _ => {
-                // TODO: other aggreate functions
-                let idx = self.rng.gen_range(0..=4);
+                let idx = self.rng.gen_range(0..=5);
                 let name = match idx {
                     0 => "any".to_string(),
                     1 => "min".to_string(),
                     2 => "max".to_string(),
-                    3 => "arg_min".to_string(),
-                    4 => "arg_max".to_string(),
+                    3 => "mode".to_string(),
+                    4 => "arg_min".to_string(),
+                    5 => "arg_max".to_string(),
                     _ => unreachable!(),
                 };
                 let params = vec![];
-                let args_type = if idx == 3 || idx == 4 {
+                let args_type = if idx == 4 || idx == 5 {
                     vec![ty.clone(), self.gen_simple_data_type()]
                 } else {
                     vec![ty.clone()]
@@ -579,10 +620,11 @@ impl<R: Rng> SqlGenerator<'_, R> {
     }
 
     pub(crate) fn gen_window_func(&mut self, ty: &DataType) -> Expr {
+        let by_ty = self.rng.gen_bool(0.6);
         let window = self.gen_window();
         let ty = ty.clone();
         match ty {
-            DataType::Number(NumberDataType::UInt64) => {
+            DataType::Number(NumberDataType::UInt64) if by_ty => {
                 let number = ["row_number", "rank", "dense_rank", "ntile"];
                 let name = number[self.rng.gen_range(0..=2)];
                 let args_type = if name == "ntile" {
@@ -592,7 +634,7 @@ impl<R: Rng> SqlGenerator<'_, R> {
                 };
                 self.gen_func(name.to_string(), vec![], args_type, window, None)
             }
-            DataType::Number(NumberDataType::Float64) => {
+            DataType::Number(NumberDataType::Float64) if by_ty => {
                 let float = ["percent_rank", "cume_dist"];
                 let name = float[self.rng.gen_range(0..=1)].to_string();
                 self.gen_func(name, vec![], vec![], window, None)
diff --git a/src/tests/sqlsmith/src/sql_gen/types.rs b/src/tests/sqlsmith/src/sql_gen/types.rs
index 42e2a1681f40a..5587d3294d00c 100644
--- a/src/tests/sqlsmith/src/sql_gen/types.rs
+++ b/src/tests/sqlsmith/src/sql_gen/types.rs
@@ -64,6 +64,21 @@ impl<R: Rng> SqlGenerator<'_, R> {
         }
     }
 
+    pub(crate) fn gen_simple_common_data_type(&mut self) -> DataType {
+        let ty = match self.rng.gen_range(0..=7) {
+            0 => DataType::String,
+            1..=5 => self.gen_all_number_data_type(),
+            6 => DataType::Timestamp,
+            7 => DataType::Date,
+            _ => unreachable!(),
+        };
+        if self.rng.gen_bool(0.5) {
+            ty
+        } else {
+            DataType::Nullable(Box::new(ty))
+        }
+    }
+
     pub(crate) fn gen_simple_data_type(&mut self) -> DataType {
         match self.rng.gen_range(0..=13) {
             0 => DataType::Null,
diff --git a/src/tests/sqlsmith/src/util.rs b/src/tests/sqlsmith/src/util.rs
new file mode 100644
index 0000000000000..69c636245f2d8
--- /dev/null
+++ b/src/tests/sqlsmith/src/util.rs
@@ -0,0 +1,140 @@
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::fs::read_to_string;
+use std::path::Path;
+use std::path::PathBuf;
+
+use databend_common_exception::Result;
+
+// The fuzz test type
+#[derive(Default)]
+enum TestType {
+    #[default]
+    Databend,
+    Duckdb,
+    Sqlite,
+}
+
+// Walk through the sqllogic tests folder and read the test SQL in it as seed to generate fuzzed SQL.
+pub(crate) fn read_sql_from_test_dirs(fuzz_path: &str) -> Result<Vec<String>> {
+    let test_type = if fuzz_path.contains("sqlite") {
+        TestType::Sqlite
+    } else if fuzz_path.contains("duckdb") {
+        TestType::Duckdb
+    } else {
+        TestType::Databend
+    };
+
+    let path = Path::new(fuzz_path);
+    let mut paths = vec![];
+    collect_paths(path, &mut paths)?;
+
+    let mut sqls = vec![];
+    for path in paths.into_iter() {
+        let content = read_to_string(path)?;
+        let lines: Vec<&str> = content.lines().collect();
+        match test_type {
+            TestType::Databend | TestType::Duckdb => {
+                read_sqllogict_test_sqls(lines, &mut sqls);
+            }
+            TestType::Sqlite => {
+                read_sqlite_test_sqls(lines, &mut sqls);
+            }
+        }
+    }
+    Ok(sqls)
+}
+
+fn collect_paths(path: &Path, paths: &mut Vec<PathBuf>) -> Result<()> {
+    if path.is_dir() {
+        for entry in (path.read_dir()?).flatten() {
+            let sub_path = entry.path();
+            collect_paths(&sub_path, paths)?;
+        }
+    } else if path.is_file() && path.extension().is_some_and(|ext| ext == "test") {
+        paths.push(path.to_path_buf());
+    }
+    Ok(())
+}
+
+// Read and parse Sqllogic test files, include Databend and Duckdb.
+fn read_sqllogict_test_sqls(lines: Vec<&str>, sqls: &mut Vec<String>) {
+    let mut in_sql = false;
+    let mut current_sql = String::new();
+
+    for line in lines {
+        let line = line.trim();
+        if line.is_empty() {
+            continue;
+        }
+        if line.starts_with("statement") || line.starts_with("query") {
+            if !current_sql.is_empty() {
+                sqls.push(current_sql.clone());
+                current_sql.clear();
+            }
+            in_sql = !line.starts_with("statement error");
+        } else if line.eq("----") {
+            // ignore result values
+            in_sql = false;
+        } else if !line.is_empty()
+            && !line.starts_with("--")
+            && !line.starts_with("#")
+            && !line.starts_with("include")
+            && !line.starts_with("onlyif")
+            && in_sql
+        {
+            if current_sql.is_empty() {
+                current_sql.push_str(line);
+            } else {
+                current_sql.push(' ');
+                current_sql.push_str(line);
+            }
+        }
+    }
+    if !current_sql.is_empty() {
+        sqls.push(current_sql);
+    }
+}
+
+// Read and parse Sqlite test files
+fn read_sqlite_test_sqls(lines: Vec<&str>, sqls: &mut Vec<String>) {
+    let mut in_sql = false;
+    let mut current_sql = String::new();
+
+    for line in lines {
+        let line = line.trim();
+        if line.is_empty() {
+            continue;
+        }
+        if line.starts_with("do_execsql_test") {
+            in_sql = true;
+        } else if line.starts_with("}") {
+            if !current_sql.is_empty() {
+                sqls.push(current_sql.clone());
+                current_sql.clear();
+            }
+            in_sql = false;
+        } else if in_sql {
+            let line = line.replace("PRIMARY KEY", "");
+            let line = line.replace("WITHOUT ROWID", "");
+            current_sql.push(' ');
+            current_sql.push_str(&line);
+            if line.ends_with(";") {
+                sqls.push(current_sql.clone());
+                current_sql.clear();
+            }
+        }
+    }
+}

From 07034df346b9059a3dd213a476870ac9f6e2aad0 Mon Sep 17 00:00:00 2001
From: zhya <mytesla@live.com>
Date: Sat, 18 Jan 2025 23:07:36 +0800
Subject: [PATCH 30/33] fix: column not found in analyze (#17321)

* fix: Column not found in analyze

* fix
---
 .../interpreters/interpreter_table_analyze.rs | 70 +++++++++----------
 .../09_fuse_engine/09_0044_issue_17314.sql    | 44 ++++++++++++
 2 files changed, 77 insertions(+), 37 deletions(-)
 create mode 100644 tests/sqllogictests/suites/base/09_fuse_engine/09_0044_issue_17314.sql 

diff --git a/src/query/service/src/interpreters/interpreter_table_analyze.rs b/src/query/service/src/interpreters/interpreter_table_analyze.rs
index 44ca5322cf00d..80bce205f7008 100644
--- a/src/query/service/src/interpreters/interpreter_table_analyze.rs
+++ b/src/query/service/src/interpreters/interpreter_table_analyze.rs
@@ -117,7 +117,6 @@ impl Interpreter for AnalyzeTableInterpreter {
 
         if let Some(snapshot) = snapshot_opt {
             // plan sql
-            let schema = table.schema();
             let _table_info = table.get_table_info();
 
             let table_statistics = table
@@ -165,22 +164,20 @@ impl Interpreter for AnalyzeTableInterpreter {
                 .get_settings()
                 .get_sql_dialect()?
                 .default_ident_quote();
-            let index_cols: Vec<(u32, String)> = schema
-                .fields()
-                .iter()
-                .filter(|f| RangeIndex::supported_type(&f.data_type().into()))
-                .map(|f| (f.column_id(), format!("{quote}{}{quote}", f.name)))
-                .collect();
 
             // 0.01625 --> 12 buckets --> 4K size per column
             // 1.04 / math.sqrt(1<<12) --> 0.01625
             const DISTINCT_ERROR_RATE: f64 = 0.01625;
-            let ndv_select_expr = index_cols
+            let ndv_select_expr = snapshot
+                .schema
+                .fields()
                 .iter()
-                .map(|c| {
+                .filter(|f| RangeIndex::supported_type(&f.data_type().into()))
+                .map(|f| {
                     format!(
-                        "approx_count_distinct_state({DISTINCT_ERROR_RATE})({}) as ndv_{}",
-                        c.1, c.0
+                        "approx_count_distinct_state({DISTINCT_ERROR_RATE})({quote}{}{quote}) as ndv_{}",
+                        f.name,
+                        f.column_id()
                     )
                 })
                 .join(", ");
@@ -190,7 +187,7 @@ impl Interpreter for AnalyzeTableInterpreter {
                 plan.database, plan.table,
             );
 
-            info!("Analyze via sql {:?}", sql);
+            info!("Analyze via sql: {sql}");
 
             let (physical_plan, bind_context) = self.plan_sql(sql).await?;
             let mut build_res =
@@ -200,34 +197,33 @@ impl Interpreter for AnalyzeTableInterpreter {
             // We add a setting `enable_analyze_histogram` to control whether to compute histogram(default is closed).
             let mut histogram_info_receivers = HashMap::new();
             if self.ctx.get_settings().get_enable_analyze_histogram()? {
-                let histogram_sqls = index_cols
+                let histogram_sqls = table
+                    .schema()
+                    .fields()
                     .iter()
-                    .map(|c| {
-                        format!(
-                            "SELECT quantile,
-                            COUNT(DISTINCT {}) AS ndv,
-                            MAX({}) AS max_value,
-                            MIN({}) AS min_value,
-                            COUNT() as count
-                        FROM  (
-                            SELECT {}, NTILE({}) OVER (ORDER BY {}) AS quantile
-                            FROM {}.{} WHERE {} IS DISTINCT FROM NULL
-                        )
-                        GROUP BY quantile ORDER BY quantile \n",
-                            c.1,
-                            c.1,
-                            c.1,
-                            c.1,
-                            DEFAULT_HISTOGRAM_BUCKETS,
-                            c.1,
-                            plan.database,
-                            plan.table,
-                            c.1,
+                    .filter(|f| RangeIndex::supported_type(&f.data_type().into()))
+                    .map(|f| {
+                        let col_name = format!("{quote}{}{quote}", f.name);
+                        (
+                            format!(
+                                "SELECT quantile, \
+                                    COUNT(DISTINCT {col_name}) AS ndv, \
+                                    MAX({col_name}) AS max_value, \
+                                    MIN({col_name}) AS min_value, \
+                                    COUNT() as count \
+                                FROM ( \
+                                    SELECT {col_name}, NTILE({}) OVER (ORDER BY {col_name}) AS quantile \
+                                    FROM {}.{} WHERE {col_name} IS DISTINCT FROM NULL \
+                                ) \
+                                GROUP BY quantile ORDER BY quantile",
+                                DEFAULT_HISTOGRAM_BUCKETS, plan.database, plan.table,
+                            ),
+                            f.column_id(),
                         )
                     })
                     .collect::<Vec<_>>();
-                for (sql, (col_id, _)) in histogram_sqls.into_iter().zip(index_cols.iter()) {
-                    info!("Analyze histogram via sql {:?}", sql);
+                for (sql, col_id) in histogram_sqls.into_iter() {
+                    info!("Analyze histogram via sql: {sql}");
                     let (mut histogram_plan, bind_context) = self.plan_sql(sql).await?;
                     if !self.ctx.get_cluster().is_empty() {
                         histogram_plan = remove_exchange(histogram_plan);
@@ -253,7 +249,7 @@ impl Interpreter for AnalyzeTableInterpreter {
                     build_res
                         .sources_pipelines
                         .extend(histogram_build_res.sources_pipelines);
-                    histogram_info_receivers.insert(*col_id, rx);
+                    histogram_info_receivers.insert(col_id, rx);
                 }
             }
             FuseTable::do_analyze(
diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0044_issue_17314.sql  b/tests/sqllogictests/suites/base/09_fuse_engine/09_0044_issue_17314.sql 
new file mode 100644
index 0000000000000..744e64a8d471b
--- /dev/null
+++ b/tests/sqllogictests/suites/base/09_fuse_engine/09_0044_issue_17314.sql 	
@@ -0,0 +1,44 @@
+statement ok
+create or replace database issue_17314;
+
+statement ok
+use issue_17314
+
+statement ok
+set enable_analyze_histogram=1;
+
+statement ok
+create or replace table t1(a string, biz_date1 string);
+
+statement ok
+insert into t1 values('1', '11');
+
+statement ok
+alter table t1 rename BIZ_date1 to BIZ_DATE;
+
+statement ok
+analyze table t1;
+
+statement ok
+insert into t1 values('2', '22');
+
+statement ok
+insert into t1 values('3', '33');
+
+statement ok
+alter table t1 rename BIZ_DATE to b;
+
+statement ok
+analyze table t1;
+
+query IIT
+select * from fuse_statistic('issue_17314', 't1') order by column_name;
+----
+a 3 [bucket id: 0, min: "1", max: "1", ndv: 1.0, count: 1.0], [bucket id: 1, min: "2", max: "2", ndv: 1.0, count: 1.0], [bucket id: 2, min: "3", max: "3", ndv: 1.0, count: 1.0]
+b 3 [bucket id: 0, min: "11", max: "11", ndv: 1.0, count: 1.0], [bucket id: 1, min: "22", max: "22", ndv: 1.0, count: 1.0], [bucket id: 2, min: "33", max: "33", ndv: 1.0, count: 1.0]
+
+statement ok
+drop table t1 all;
+
+statement ok
+drop database issue_17314;

From ffb8771d109b95ea49217b4e4de50221629d8263 Mon Sep 17 00:00:00 2001
From: TCeason <33082201+TCeason@users.noreply.github.com>
Date: Sun, 19 Jan 2025 23:10:00 +0800
Subject: [PATCH 31/33] fix(query): unsupport datetime format item should not
 return panic error (#17323)

fix(query): unsupport format item should not return panic error
---
 .../src/scalars/timestamp/src/datetime.rs     | 27 +++++++++++--------
 .../functions/02_0012_function_datetimes.test |  6 +++++
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/src/query/functions/src/scalars/timestamp/src/datetime.rs b/src/query/functions/src/scalars/timestamp/src/datetime.rs
index 32ad99f1f273f..0c77e7404b2e6 100644
--- a/src/query/functions/src/scalars/timestamp/src/datetime.rs
+++ b/src/query/functions/src/scalars/timestamp/src/datetime.rs
@@ -15,6 +15,7 @@
 use std::io::Write;
 
 use chrono::format::parse_and_remainder;
+use chrono::format::Item;
 use chrono::format::Parsed;
 use chrono::format::StrftimeItems;
 use chrono::prelude::*;
@@ -700,18 +701,22 @@ fn register_to_string(registry: &mut FunctionRegistry) {
                 if format.is_empty() {
                     output.push_null();
                 } else {
-                    // Can't use `tz.timestamp_nanos(self.as_() * 1000)` directly, is may cause multiply with overflow.
-                    let (mut secs, mut nanos) =
-                        (micros / MICROS_PER_SEC, (micros % MICROS_PER_SEC) * 1_000);
-                    if nanos < 0 {
-                        secs -= 1;
-                        nanos += 1_000_000_000;
+                    let items = StrftimeItems::new(format);
+                    if items.clone().any(|item| matches!(item, Item::Error)) {
+                        ctx.set_error(output.len(), "Invalid format string".to_string());
+                        output.push_null();
+                    } else {
+                        // Can't use `tz.timestamp_nanos(self.as_() * 1000)` directly, is may cause multiply with overflow.
+                        let (mut secs, mut nanos) =
+                            (micros / MICROS_PER_SEC, (micros % MICROS_PER_SEC) * 1_000);
+                        if nanos < 0 {
+                            secs -= 1;
+                            nanos += 1_000_000_000;
+                        }
+                        let ts = ctx.func_ctx.tz.timestamp_opt(secs, nanos as u32).unwrap();
+                        let res = ts.format(format).to_string();
+                        output.push(&res);
                     }
-                    let ts = ctx.func_ctx.tz.timestamp_opt(secs, nanos as u32).unwrap();
-                    // https://github.com/BurntSushi/jiff/issues/155
-                    // ASCII is currently required in jiff crate
-                    let res = ts.format(format).to_string();
-                    output.push(&res);
                 }
             },
         ),
diff --git a/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes.test b/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes.test
index e76537179a32b..ad7668efcedb0 100644
--- a/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes.test
+++ b/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes.test
@@ -1367,6 +1367,10 @@ select to_date('精彩的2022年，美丽的02month,激动の02d', '精彩的%Y
 ----
 2022-02-02
 
+
+statement error 1006
+select date_format('2022-2-04T03:58:59', '%i');
+
 statement error 1006
 select date_format('', '');
 
@@ -1486,3 +1490,5 @@ query T
 SELECT add_hours(to_timestamp(710455), 2147483647);
 ----
 1000-01-01 00:00:00.000000
+
+

From e71f3b50617557e34ba60f993b27773e3187c3f0 Mon Sep 17 00:00:00 2001
From: everpcpc <everpcpc@users.noreply.github.com>
Date: Mon, 20 Jan 2025 10:04:32 +0800
Subject: [PATCH 32/33] chore(ci): fix pack binaries (#17332)

---
 .github/actions/pack_binaries/action.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/actions/pack_binaries/action.yml b/.github/actions/pack_binaries/action.yml
index bc54eedb02e41..88fa7fc914ff2 100644
--- a/.github/actions/pack_binaries/action.yml
+++ b/.github/actions/pack_binaries/action.yml
@@ -28,8 +28,9 @@ runs:
       env:
         GH_TOKEN: ${{ github.token }}
       run: |
-        verison=$(gh release list --repo databendlabs/bendsql | head -n 1 | awk '{print $1}')
-        curl -sSLfo /tmp/bendsql.tar.gz https://github.com/databendlabs/bendsql/releases/download/${verison}/bendsql-${verison}-${{ inputs.target }}.tar.gz
+        verison=$(gh release list --repo databendlabs/bendsql --json 'name' --jq '.[].name' | head -n 1)
+        curl -sSLfo /tmp/bendsql.tar.gz https://github.com/databendlabs/bendsql/releases/download/${verison}/bendsql-${{ inputs.target }}.tar.gz
+        mkdir -p distro/bin
         tar -xzvf /tmp/bendsql.tar.gz -C distro/bin
     - name: Pack Binaries
       id: pack_binaries

From 91f1cedfe8013c405b92cd6ef97b36faf2f6412a Mon Sep 17 00:00:00 2001
From: everpcpc <everpcpc@users.noreply.github.com>
Date: Mon, 20 Jan 2025 11:03:13 +0800
Subject: [PATCH 33/33] chore(ci): fix pack binaries in release (#17335)

---
 .github/actions/pack_binaries/action.yml | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/.github/actions/pack_binaries/action.yml b/.github/actions/pack_binaries/action.yml
index 88fa7fc914ff2..3eb9020f298df 100644
--- a/.github/actions/pack_binaries/action.yml
+++ b/.github/actions/pack_binaries/action.yml
@@ -23,13 +23,15 @@ runs:
         category: ${{ inputs.category }}
         path: distro/bin
         artifacts: metactl,meta,query,query.debug
+    - name: Get Latest BendSQL
+      id: bendsql
+      uses: pozetroninc/github-action-get-latest-release@master
+      with:
+        repository: databendlabs/bendsql
     - name: Download BendSQL
       shell: bash
-      env:
-        GH_TOKEN: ${{ github.token }}
       run: |
-        verison=$(gh release list --repo databendlabs/bendsql --json 'name' --jq '.[].name' | head -n 1)
-        curl -sSLfo /tmp/bendsql.tar.gz https://github.com/databendlabs/bendsql/releases/download/${verison}/bendsql-${{ inputs.target }}.tar.gz
+        curl -sSLfo /tmp/bendsql.tar.gz https://github.com/databendlabs/bendsql/releases/download/${{ steps.bendsql.outputs.release }}/bendsql-${{ inputs.target }}.tar.gz
         mkdir -p distro/bin
         tar -xzvf /tmp/bendsql.tar.gz -C distro/bin
     - name: Pack Binaries