diff --git a/iceberg/iceberg-handler/src/test/queries/positive/col_stats.q b/iceberg/iceberg-handler/src/test/queries/positive/col_stats.q index d7c4d811a8b2..cb7b7ae9ae88 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/col_stats.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/col_stats.q @@ -12,10 +12,10 @@ explain select * from tbl_ice_puffin order by a, b, c; select * from tbl_ice_puffin order by a, b, c; desc formatted tbl_ice_puffin b; update tbl_ice_puffin set b='two' where b='one' or b='three'; -analyze table tbl_ice_puffin compute statistics for columns; +analyze table tbl_ice_puffin compute statistics for columns; explain select * from tbl_ice_puffin order by a, b, c; select * from tbl_ice_puffin order by a, b, c; -select count(*) from tbl_ice_puffin ; +select count(*) from tbl_ice_puffin; desc formatted tbl_ice_puffin b; @@ -33,7 +33,7 @@ create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg t insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56); explain select * from tbl_ice_puffin order by a, b, c; select * from tbl_ice_puffin order by a, b, c; -select count(*) from tbl_ice_puffin ; +select count(*) from tbl_ice_puffin; desc formatted tbl_ice_puffin a; @@ -44,12 +44,14 @@ create external table tbl_ice(a int, b string, c int) stored by iceberg tblprope insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56); explain select * from tbl_ice order by a, b, c; select * from tbl_ice order by a, b, c; -select count(*) from tbl_ice ; +select count(*) from tbl_ice; set hive.iceberg.stats.source=iceberg; -delete from tbl_ice_puffin where a = 2; -explain select * from tbl_ice order by a, b, c; -select count(*) from tbl_ice ; +delete from tbl_ice_puffin where a = 2; +analyze table tbl_ice_puffin compute statistics for columns A, C; +explain select * from tbl_ice_puffin order by a, b, c; +select count(*) from tbl_ice_puffin; +desc formatted tbl_ice_puffin C; create table t1 (a int) stored by iceberg tblproperties ('format-version'='2'); create table t2 (b int) stored by iceberg tblproperties ('format-version'='2'); diff --git a/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out b/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out index 6842264744b6..3ef9a6b9fe97 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out @@ -111,12 +111,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl_ice_puffin POSTHOOK: Output: default@tbl_ice_puffin POSTHOOK: Output: default@tbl_ice_puffin -PREHOOK: query: analyze table tbl_ice_puffin compute statistics for columns +PREHOOK: query: analyze table tbl_ice_puffin compute statistics for columns PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@tbl_ice_puffin PREHOOK: Output: default@tbl_ice_puffin PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: analyze table tbl_ice_puffin compute statistics for columns +POSTHOOK: query: analyze table tbl_ice_puffin compute statistics for columns POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@tbl_ice_puffin POSTHOOK: Output: default@tbl_ice_puffin @@ -432,21 +432,31 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl_ice POSTHOOK: Output: hdfs://### HDFS PATH ### 9 -PREHOOK: query: delete from tbl_ice_puffin where a = 2 +PREHOOK: query: delete from tbl_ice_puffin where a = 2 PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice_puffin PREHOOK: Output: default@tbl_ice_puffin -POSTHOOK: query: delete from tbl_ice_puffin where a = 2 +POSTHOOK: query: delete from tbl_ice_puffin where a = 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl_ice_puffin POSTHOOK: Output: default@tbl_ice_puffin -PREHOOK: query: explain select * from tbl_ice order by a, b, c +PREHOOK: query: analyze table tbl_ice_puffin compute statistics for columns A, C +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@tbl_ice_puffin +PREHOOK: Output: default@tbl_ice_puffin +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: analyze table tbl_ice_puffin compute statistics for columns A, C +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@tbl_ice_puffin +POSTHOOK: Output: default@tbl_ice_puffin +POSTHOOK: Output: hdfs://### HDFS PATH ### +PREHOOK: query: explain select * from tbl_ice_puffin order by a, b, c PREHOOK: type: QUERY -PREHOOK: Input: default@tbl_ice +PREHOOK: Input: default@tbl_ice_puffin PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: explain select * from tbl_ice order by a, b, c +POSTHOOK: query: explain select * from tbl_ice_puffin order by a, b, c POSTHOOK: type: QUERY -POSTHOOK: Input: default@tbl_ice +POSTHOOK: Input: default@tbl_ice_puffin POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. @@ -459,24 +469,43 @@ Stage-0 Stage-1 Reducer 2 vectorized File Output Operator [FS_8] - Select Operator [SEL_7] (rows=9 width=95) + Select Operator [SEL_7] (rows=6 width=192) Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_6] - Select Operator [SEL_5] (rows=9 width=95) + Select Operator [SEL_5] (rows=6 width=192) Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=9 width=95) - default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"] + TableScan [TS_0] (rows=6 width=192) + default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:PARTIAL,Output:["a","b","c"] -PREHOOK: query: select count(*) from tbl_ice +PREHOOK: query: select count(*) from tbl_ice_puffin PREHOOK: type: QUERY -PREHOOK: Input: default@tbl_ice +PREHOOK: Input: default@tbl_ice_puffin PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: select count(*) from tbl_ice +POSTHOOK: query: select count(*) from tbl_ice_puffin POSTHOOK: type: QUERY -POSTHOOK: Input: default@tbl_ice +POSTHOOK: Input: default@tbl_ice_puffin POSTHOOK: Output: hdfs://### HDFS PATH ### -9 +6 +PREHOOK: query: desc formatted tbl_ice_puffin C +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@tbl_ice_puffin +POSTHOOK: query: desc formatted tbl_ice_puffin C +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@tbl_ice_puffin +col_name C +data_type int +min 50 +max 56 +num_nulls 0 +distinct_count 6 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector HL +comment +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"A\":\"true\",\"C\":\"true\"}} PREHOOK: query: create table t1 (a int) stored by iceberg tblproperties ('format-version'='2') PREHOOK: type: CREATETABLE PREHOOK: Output: database:default diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java index 6bffa52eea9d..67548cf54bb8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java @@ -59,7 +59,7 @@ public static ColumnStatisticsObj readHiveColumnStatistics(String columnName, St List columnStatsFields, int start, List fields, List values) throws HiveException { ColumnStatisticsObj statsObj = new ColumnStatisticsObj(); - statsObj.setColName(columnName); + statsObj.setColName(columnName.toLowerCase()); statsObj.setColType(columnType); int end = start + columnStatsFields.size(); diff --git a/ql/src/test/results/clientpositive/llap/temp_table_display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/llap/temp_table_display_colstats_tbllvl.q.out index 9faf0e05d9ad..9d5317b685d0 100644 --- a/ql/src/test/results/clientpositive/llap/temp_table_display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/llap/temp_table_display_colstats_tbllvl.q.out @@ -403,7 +403,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: EXTERNAL_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adRevenue\":\"true\",\"avgTimeOnSite\":\"true\",\"sourceIP\":\"true\"}} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} EXTERNAL TRUE bucketing_version 2 numFiles 1 @@ -440,7 +440,7 @@ num_trues num_falses bit_vector HL comment from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adRevenue\":\"true\",\"avgTimeOnSite\":\"true\",\"sourceIP\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} PREHOOK: query: desc formatted UserVisits_web_text_none avgTimeOnSite PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none @@ -459,7 +459,7 @@ num_trues num_falses bit_vector HL comment from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adRevenue\":\"true\",\"avgTimeOnSite\":\"true\",\"sourceIP\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} PREHOOK: query: desc formatted UserVisits_web_text_none adRevenue PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none @@ -478,7 +478,7 @@ num_trues num_falses bit_vector HL comment from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adRevenue\":\"true\",\"avgTimeOnSite\":\"true\",\"sourceIP\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} PREHOOK: query: CREATE TEMPORARY TABLE empty_tab( a int, b double, @@ -709,7 +709,7 @@ POSTHOOK: query: desc extended default.UserVisits_web_text_none sourceIP POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none sourceIP string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adRevenue\":\"true\",\"avgTimeOnSite\":\"true\",\"sourceIP\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} PREHOOK: query: desc formatted UserVisits_web_text_none sourceIP PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none @@ -764,7 +764,7 @@ num_trues num_falses bit_vector HL comment from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adRevenue\":\"true\",\"avgTimeOnSite\":\"true\",\"sourceIP\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sKeyword PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: test@uservisits_web_text_none @@ -782,7 +782,7 @@ POSTHOOK: query: desc extended UserVisits_web_text_none sKeyword POSTHOOK: type: DESCTABLE POSTHOOK: Input: test@uservisits_web_text_none sKeyword string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"sKeyword\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"skeyword\":\"true\"}} PREHOOK: query: desc formatted UserVisits_web_text_none sKeyword PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none @@ -801,7 +801,7 @@ num_trues num_falses bit_vector HL comment from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"sKeyword\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"skeyword\":\"true\"}} PREHOOK: query: desc formatted test.UserVisits_web_text_none sKeyword PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none @@ -820,4 +820,4 @@ num_trues num_falses bit_vector HL comment from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"sKeyword\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"skeyword\":\"true\"}}