Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions iceberg/iceberg-handler/src/test/queries/positive/col_stats.q
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ explain select * from tbl_ice_puffin order by a, b, c;
select * from tbl_ice_puffin order by a, b, c;
desc formatted tbl_ice_puffin b;
update tbl_ice_puffin set b='two' where b='one' or b='three';
analyze table tbl_ice_puffin compute statistics for columns;
analyze table tbl_ice_puffin compute statistics for columns;
explain select * from tbl_ice_puffin order by a, b, c;
select * from tbl_ice_puffin order by a, b, c;
select count(*) from tbl_ice_puffin ;
select count(*) from tbl_ice_puffin;
desc formatted tbl_ice_puffin b;


Expand All @@ -33,7 +33,7 @@ create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg t
insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56);
explain select * from tbl_ice_puffin order by a, b, c;
select * from tbl_ice_puffin order by a, b, c;
select count(*) from tbl_ice_puffin ;
select count(*) from tbl_ice_puffin;
desc formatted tbl_ice_puffin a;


Expand All @@ -44,12 +44,14 @@ create external table tbl_ice(a int, b string, c int) stored by iceberg tblprope
insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56);
explain select * from tbl_ice order by a, b, c;
select * from tbl_ice order by a, b, c;
select count(*) from tbl_ice ;
select count(*) from tbl_ice;

set hive.iceberg.stats.source=iceberg;
delete from tbl_ice_puffin where a = 2;
explain select * from tbl_ice order by a, b, c;
select count(*) from tbl_ice ;
delete from tbl_ice_puffin where a = 2;
analyze table tbl_ice_puffin compute statistics for columns A, C;
explain select * from tbl_ice_puffin order by a, b, c;
select count(*) from tbl_ice_puffin;
desc formatted tbl_ice_puffin C;

create table t1 (a int) stored by iceberg tblproperties ('format-version'='2');
create table t2 (b int) stored by iceberg tblproperties ('format-version'='2');
Expand Down
63 changes: 46 additions & 17 deletions iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@tbl_ice_puffin
POSTHOOK: Output: default@tbl_ice_puffin
POSTHOOK: Output: default@tbl_ice_puffin
PREHOOK: query: analyze table tbl_ice_puffin compute statistics for columns
PREHOOK: query: analyze table tbl_ice_puffin compute statistics for columns
PREHOOK: type: ANALYZE_TABLE
PREHOOK: Input: default@tbl_ice_puffin
PREHOOK: Output: default@tbl_ice_puffin
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: analyze table tbl_ice_puffin compute statistics for columns
POSTHOOK: query: analyze table tbl_ice_puffin compute statistics for columns
POSTHOOK: type: ANALYZE_TABLE
POSTHOOK: Input: default@tbl_ice_puffin
POSTHOOK: Output: default@tbl_ice_puffin
Expand Down Expand Up @@ -432,21 +432,31 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@tbl_ice
POSTHOOK: Output: hdfs://### HDFS PATH ###
9
PREHOOK: query: delete from tbl_ice_puffin where a = 2
PREHOOK: query: delete from tbl_ice_puffin where a = 2
PREHOOK: type: QUERY
PREHOOK: Input: default@tbl_ice_puffin
PREHOOK: Output: default@tbl_ice_puffin
POSTHOOK: query: delete from tbl_ice_puffin where a = 2
POSTHOOK: query: delete from tbl_ice_puffin where a = 2
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tbl_ice_puffin
POSTHOOK: Output: default@tbl_ice_puffin
PREHOOK: query: explain select * from tbl_ice order by a, b, c
PREHOOK: query: analyze table tbl_ice_puffin compute statistics for columns A, C
PREHOOK: type: ANALYZE_TABLE
PREHOOK: Input: default@tbl_ice_puffin
PREHOOK: Output: default@tbl_ice_puffin
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: analyze table tbl_ice_puffin compute statistics for columns A, C
POSTHOOK: type: ANALYZE_TABLE
POSTHOOK: Input: default@tbl_ice_puffin
POSTHOOK: Output: default@tbl_ice_puffin
POSTHOOK: Output: hdfs://### HDFS PATH ###
PREHOOK: query: explain select * from tbl_ice_puffin order by a, b, c
PREHOOK: type: QUERY
PREHOOK: Input: default@tbl_ice
PREHOOK: Input: default@tbl_ice_puffin
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: explain select * from tbl_ice order by a, b, c
POSTHOOK: query: explain select * from tbl_ice_puffin order by a, b, c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tbl_ice
POSTHOOK: Input: default@tbl_ice_puffin
POSTHOOK: Output: hdfs://### HDFS PATH ###
Plan optimized by CBO.

Expand All @@ -459,24 +469,43 @@ Stage-0
Stage-1
Reducer 2 vectorized
File Output Operator [FS_8]
Select Operator [SEL_7] (rows=9 width=95)
Select Operator [SEL_7] (rows=6 width=192)
Output:["_col0","_col1","_col2"]
<-Map 1 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_6]
Select Operator [SEL_5] (rows=9 width=95)
Select Operator [SEL_5] (rows=6 width=192)
Output:["_col0","_col1","_col2"]
TableScan [TS_0] (rows=9 width=95)
default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
TableScan [TS_0] (rows=6 width=192)
default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:PARTIAL,Output:["a","b","c"]

PREHOOK: query: select count(*) from tbl_ice
PREHOOK: query: select count(*) from tbl_ice_puffin
PREHOOK: type: QUERY
PREHOOK: Input: default@tbl_ice
PREHOOK: Input: default@tbl_ice_puffin
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select count(*) from tbl_ice
POSTHOOK: query: select count(*) from tbl_ice_puffin
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tbl_ice
POSTHOOK: Input: default@tbl_ice_puffin
POSTHOOK: Output: hdfs://### HDFS PATH ###
9
6
PREHOOK: query: desc formatted tbl_ice_puffin C
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@tbl_ice_puffin
POSTHOOK: query: desc formatted tbl_ice_puffin C
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_ice_puffin
col_name C
data_type int
min 50
max 56
num_nulls 0
distinct_count 6
avg_col_len
max_col_len
num_trues
num_falses
bit_vector HL
comment
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"A\":\"true\",\"C\":\"true\"}}
PREHOOK: query: create table t1 (a int) stored by iceberg tblproperties ('format-version'='2')
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public static ColumnStatisticsObj readHiveColumnStatistics(String columnName, St
List<ColumnStatsField> columnStatsFields, int start, List<? extends StructField> fields,
List<Object> values) throws HiveException {
ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
statsObj.setColName(columnName);
statsObj.setColName(columnName.toLowerCase());

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How this fix the lower case? Somewhere is a compare the column name and in other places could be capital?

Copy link
Member Author

@deniskuzZ deniskuzZ Apr 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's always lowercased in Hive schema, but not in ColumnStatisticsObj

statsObj.setColType(columnType);

int end = start + columnStatsFields.size();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ Retention: 0
#### A masked pattern was here ####
Table Type: EXTERNAL_TABLE
Table Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adRevenue\":\"true\",\"avgTimeOnSite\":\"true\",\"sourceIP\":\"true\"}}
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}}
EXTERNAL TRUE
bucketing_version 2
numFiles 1
Expand Down Expand Up @@ -440,7 +440,7 @@ num_trues
num_falses
bit_vector HL
comment from deserializer
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adRevenue\":\"true\",\"avgTimeOnSite\":\"true\",\"sourceIP\":\"true\"}}
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}}
PREHOOK: query: desc formatted UserVisits_web_text_none avgTimeOnSite
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@uservisits_web_text_none
Expand All @@ -459,7 +459,7 @@ num_trues
num_falses
bit_vector HL
comment from deserializer
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adRevenue\":\"true\",\"avgTimeOnSite\":\"true\",\"sourceIP\":\"true\"}}
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}}
PREHOOK: query: desc formatted UserVisits_web_text_none adRevenue
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@uservisits_web_text_none
Expand All @@ -478,7 +478,7 @@ num_trues
num_falses
bit_vector HL
comment from deserializer
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adRevenue\":\"true\",\"avgTimeOnSite\":\"true\",\"sourceIP\":\"true\"}}
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}}
PREHOOK: query: CREATE TEMPORARY TABLE empty_tab(
a int,
b double,
Expand Down Expand Up @@ -709,7 +709,7 @@ POSTHOOK: query: desc extended default.UserVisits_web_text_none sourceIP
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@uservisits_web_text_none
sourceIP string from deserializer
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adRevenue\":\"true\",\"avgTimeOnSite\":\"true\",\"sourceIP\":\"true\"}}
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}}
PREHOOK: query: desc formatted UserVisits_web_text_none sourceIP
PREHOOK: type: DESCTABLE
PREHOOK: Input: test@uservisits_web_text_none
Expand Down Expand Up @@ -764,7 +764,7 @@ num_trues
num_falses
bit_vector HL
comment from deserializer
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adRevenue\":\"true\",\"avgTimeOnSite\":\"true\",\"sourceIP\":\"true\"}}
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}}
PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sKeyword
PREHOOK: type: ANALYZE_TABLE
PREHOOK: Input: test@uservisits_web_text_none
Expand All @@ -782,7 +782,7 @@ POSTHOOK: query: desc extended UserVisits_web_text_none sKeyword
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: test@uservisits_web_text_none
sKeyword string from deserializer
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"sKeyword\":\"true\"}}
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"skeyword\":\"true\"}}
PREHOOK: query: desc formatted UserVisits_web_text_none sKeyword
PREHOOK: type: DESCTABLE
PREHOOK: Input: test@uservisits_web_text_none
Expand All @@ -801,7 +801,7 @@ num_trues
num_falses
bit_vector HL
comment from deserializer
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"sKeyword\":\"true\"}}
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"skeyword\":\"true\"}}
PREHOOK: query: desc formatted test.UserVisits_web_text_none sKeyword
PREHOOK: type: DESCTABLE
PREHOOK: Input: test@uservisits_web_text_none
Expand All @@ -820,4 +820,4 @@ num_trues
num_falses
bit_vector HL
comment from deserializer
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"sKeyword\":\"true\"}}
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"skeyword\":\"true\"}}
Loading