Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -337,32 +337,39 @@ public Map<String, String> getBasicStatistics(Partish partish) {
Table table = getTable(hmsTable);
String statsSource = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_ICEBERG_STATS_SOURCE).toLowerCase();
Map<String, String> stats = Maps.newHashMap();
switch (statsSource) {
case ICEBERG:
if (table.currentSnapshot() != null) {
Map<String, String> summary = table.currentSnapshot().summary();
if (summary != null) {
if (summary.containsKey(SnapshotSummary.TOTAL_DATA_FILES_PROP)) {
stats.put(StatsSetupConst.NUM_FILES, summary.get(SnapshotSummary.TOTAL_DATA_FILES_PROP));
}
if (summary.containsKey(SnapshotSummary.TOTAL_RECORDS_PROP)) {
stats.put(StatsSetupConst.ROW_COUNT, summary.get(SnapshotSummary.TOTAL_RECORDS_PROP));
}
if (summary.containsKey(SnapshotSummary.TOTAL_FILE_SIZE_PROP)) {
stats.put(StatsSetupConst.TOTAL_SIZE, summary.get(SnapshotSummary.TOTAL_FILE_SIZE_PROP));
if (statsSource.equals(ICEBERG)) {
if (table.currentSnapshot() != null) {
Map<String, String> summary = table.currentSnapshot().summary();
if (summary != null) {

if (summary.containsKey(SnapshotSummary.TOTAL_DATA_FILES_PROP)) {
stats.put(StatsSetupConst.NUM_FILES, summary.get(SnapshotSummary.TOTAL_DATA_FILES_PROP));
}

if (summary.containsKey(SnapshotSummary.TOTAL_RECORDS_PROP)) {
long totalRecords = Long.parseLong(summary.get(SnapshotSummary.TOTAL_RECORDS_PROP));
if (summary.containsKey(SnapshotSummary.TOTAL_EQ_DELETES_PROP) &&
summary.containsKey(SnapshotSummary.TOTAL_POS_DELETES_PROP)) {

long totalEqDeletes = Long.parseLong(summary.get(SnapshotSummary.TOTAL_EQ_DELETES_PROP));
long totalPosDeletes = Long.parseLong(summary.get(SnapshotSummary.TOTAL_POS_DELETES_PROP));

long actualRecords = totalRecords - (totalEqDeletes > 0 ? 0 : totalPosDeletes);
totalRecords = actualRecords > 0 ? actualRecords : totalRecords;
// actualRecords maybe -ve in edge cases
}
stats.put(StatsSetupConst.ROW_COUNT, String.valueOf(totalRecords));
}

if (summary.containsKey(SnapshotSummary.TOTAL_FILE_SIZE_PROP)) {
stats.put(StatsSetupConst.TOTAL_SIZE, summary.get(SnapshotSummary.TOTAL_FILE_SIZE_PROP));
}
} else {
stats.put(StatsSetupConst.NUM_FILES, "0");
stats.put(StatsSetupConst.ROW_COUNT, "0");
stats.put(StatsSetupConst.TOTAL_SIZE, "0");
}
break;
case PUFFIN:
// place holder for puffin
break;
default:
// fall back to metastore
} else {
stats.put(StatsSetupConst.NUM_FILES, "0");
stats.put(StatsSetupConst.ROW_COUNT, "0");
stats.put(StatsSetupConst.TOTAL_SIZE, "0");
}
}
return stats;
}
Expand Down
57 changes: 57 additions & 0 deletions iceberg/iceberg-handler/src/test/queries/positive/row_count.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
-- Mask random uuid
--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
-- Mask random snapshot id
--! qt:replace:/(\s+current-snapshot-id\s+)\S+(\s*)/$1#SnapshotId#/
-- Mask current-snapshot-timestamp-ms
--! qt:replace:/(\s+current-snapshot-timestamp-ms\s+)\S+(\s*)/$1#Masked#/
-- Mask totalSize
--! qt:replace:/(\s+totalSize\s+)\S+(\s*)/$1#Masked#/
-- Mask added file size
--! qt:replace:/(\S\"added-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/
-- Mask total file size
--! qt:replace:/(\S\"total-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/
-- Mask width
--! qt:replace:/(width=15)\d+/$1###/

drop table if exists llap_orders;

CREATE EXTERNAL TABLE llap_orders (orderid INT, quantity INT, itemid INT, tradets TIMESTAMP) PARTITIONED BY (p1 STRING, p2 STRING) STORED BY ICEBERG STORED AS ORC tblproperties('format-version'='2');


INSERT INTO llap_orders VALUES
(0, 48, 5, timestamp('2000-06-04 19:55:46.129'), 'EU', 'DE'),
(1, 12, 6, timestamp('2007-06-24 19:23:22.829'), 'US', 'TX'),
(2, 76, 4, timestamp('2018-02-19 23:43:51.995'), 'EU', 'DE'),
(3, 91, 5, timestamp('2000-07-15 09:09:11.587'), 'US', 'NJ'),
(4, 18, 6, timestamp('2007-12-02 22:30:39.302'), 'EU', 'ES'),
(5, 71, 5, timestamp('2010-02-08 20:31:23.430'), 'EU', 'DE'),
(6, 78, 3, timestamp('2016-02-22 20:37:37.025'), 'EU', 'FR'),
(7, 88, 0, timestamp('2020-03-26 18:47:40.611'), 'EU', 'FR'),
(8, 87, 4, timestamp('2003-02-20 00:48:09.139'), 'EU', 'ES'),
(9, 60, 6, timestamp('2012-08-28 01:35:54.283'), 'EU', 'IT'),
(10, 24, 5, timestamp('2015-03-28 18:57:50.069'), 'US', 'NY'),
(11, 42, 2, timestamp('2012-06-27 01:13:32.350'), 'EU', 'UK'),
(12, 37, 4, timestamp('2020-08-09 01:18:50.153'), 'US', 'NY'),
(13, 52, 1, timestamp('2019-09-04 01:46:19.558'), 'EU', 'UK'),
(14, 96, 3, timestamp('2019-03-05 22:00:03.020'), 'US', 'NJ'),
(15, 18, 3, timestamp('2001-09-11 00:14:12.687'), 'EU', 'FR'),
(16, 46, 0, timestamp('2013-08-31 02:16:17.878'), 'EU', 'UK'),
(17, 26, 5, timestamp('2001-02-01 20:05:32.317'), 'EU', 'FR'),
(18, 68, 5, timestamp('2009-12-29 08:44:08.048'), 'EU', 'ES'),
(19, 54, 6, timestamp('2015-08-15 01:59:22.177'), 'EU', 'HU'),
(20, 10, 0, timestamp('2018-05-06 12:56:12.789'), 'US', 'CA');

--check row count
select count(*) from llap_orders;
describe formatted llap_orders;

--delete rows
delete from llap_orders where itemid = 6;
delete from llap_orders where itemid = 5;

--check for updated row count
select count(*) from llap_orders;
describe formatted llap_orders;

explain select count(*) from llap_orders;
explain insert into llap_orders select * from llap_orders limit 100000;
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

-- Mask neededVirtualColumns due to non-strict order
--! qt:replace:/(\s+neededVirtualColumns:\s)(.*)/$1#Masked#/
-- Mask width
--! qt:replace:/(width=17)\d+/$1####/
-- Mask total data size
--! qt:replace:/(Data size: 35)\d+/$1####/

set hive.vectorized.execution.enabled=true;
set hive.llap.io.enabled=false;
Expand Down Expand Up @@ -42,6 +46,8 @@ stored by ICEBERG stored as PARQUET
insert into store_sales (ss_customer_sk, ss_item_sk, ss_sold_date_sk) values (1,1501,"2451181"), (2,1502,"2451181"), (3,1503,"2451181"), (4,1504,"2451181"), (5,1505,"2451181");
delete from store_sales where ss_customer_sk > 2;

select count(*) from store_sales;

create table ssv (
ss_sold_date_sk int,
ss_sold_time_sk int,
Expand Down Expand Up @@ -73,6 +79,8 @@ stored by ICEBERG stored as ORC

insert into ssv (ss_customer_sk2, ss_item_sk2, ss_ext_discount_amt) values (1,1501,-0.1), (2,1502,-0.1), (3,1503,-0.1), (4,1504,-0.1), (5,1505,-0.1);

select count(*) from ssv;

explain vectorization detail
MERGE INTO store_sales t
USING ssv s
Expand Down Expand Up @@ -135,7 +143,68 @@ WHEN NOT matched THEN
);

select * from store_sales;


explain
MERGE INTO store_sales t
USING ssv s
ON (t.ss_item_sk = s.ss_item_sk2
AND t.ss_customer_sk=s.ss_customer_sk2
AND t.ss_sold_date_sk = "2451181"
AND ((Floor((s.ss_item_sk2) / 1000) * 1000) BETWEEN 1000 AND 2000)
AND s.ss_ext_discount_amt < 0.0) WHEN matched
AND t.ss_ext_discount_amt IS NULL
THEN UPDATE
SET ss_ext_discount_amt = 0.0
WHEN NOT matched THEN
INSERT (ss_sold_time_sk,
ss_item_sk,
ss_customer_sk,
ss_cdemo_sk,
ss_hdemo_sk,
ss_addr_sk,
ss_store_sk,
ss_promo_sk,
ss_ticket_number,
ss_quantity,
ss_wholesale_cost,
ss_list_price,
ss_sales_price,
ss_ext_discount_amt,
ss_ext_sales_price,
ss_ext_wholesale_cost,
ss_ext_list_price,
ss_ext_tax,
ss_coupon_amt,
ss_net_paid,
ss_net_paid_inc_tax,
ss_net_profit,
ss_sold_date_sk)
VALUES (
s.ss_sold_time_sk,
s.ss_item_sk2,
s.ss_customer_sk2,
s.ss_cdemo_sk,
s.ss_hdemo_sk,
s.ss_addr_sk,
s.ss_store_sk,
s.ss_promo_sk,
s.ss_ticket_number,
s.ss_quantity,
s.ss_wholesale_cost,
s.ss_list_price,
s.ss_sales_price,
s.ss_ext_discount_amt,
s.ss_ext_sales_price,
s.ss_ext_wholesale_cost,
s.ss_ext_list_price,
s.ss_ext_tax,
s.ss_coupon_amt,
s.ss_net_paid,
s.ss_net_paid_inc_tax,
s.ss_net_profit,
"2451181"
);

MERGE INTO store_sales t
USING ssv s
ON (t.ss_item_sk = s.ss_item_sk2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,13 +138,13 @@ Stage-0
Stage-1
Reducer 2 vectorized
File Output Operator [FS_8]
Select Operator [SEL_7] (rows=24 width=95)
Select Operator [SEL_7] (rows=18 width=95)
Output:["_col0","_col1","_col2"]
<-Map 1 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_6]
Select Operator [SEL_5] (rows=24 width=95)
Select Operator [SEL_5] (rows=18 width=95)
Output:["_col0","_col1","_col2"]
TableScan [TS_0] (rows=24 width=95)
TableScan [TS_0] (rows=18 width=95)
default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]

PREHOOK: query: select * from tbl_ice_puffin order by a, b, c
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,11 @@ Stage-3
Map 1 vectorized
File Output Operator [FS_7]
table:{"name:":"default.ice01"}
Select Operator [SEL_6] (rows=7 width=78)
Select Operator [SEL_6] (rows=5 width=91)
Output:["_col0","_col1"]
Filter Operator [FIL_5] (rows=7 width=78)
Filter Operator [FIL_5] (rows=5 width=91)
predicate:(((id <= 4) and (id <> 2)) or ((id > 4) or (id = 2)) is null)
TableScan [TS_0] (rows=9 width=81)
TableScan [TS_0] (rows=7 width=78)
default@ice01,ice01,Tbl:COMPLETE,Col:COMPLETE,Output:["id","name"]

PREHOOK: query: delete from ice01 where id>4 OR id=2
Expand Down
Loading