diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergFilterFactory.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergFilterFactory.java index 6101ad159af4..d443672b0e8d 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergFilterFactory.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergFilterFactory.java @@ -80,6 +80,9 @@ private static Expression translate(ExpressionTree tree, List lea case NOT: return not(translate(childNodes.get(0), leaves)); case LEAF: + if (tree.getLeaf() >= leaves.size()) { + throw new UnsupportedOperationException("No more leaves are available"); + } return translateLeaf(leaves.get(tree.getLeaf())); case CONSTANT: throw new UnsupportedOperationException("CONSTANT operator is not supported"); @@ -107,6 +110,9 @@ private static Expression translateLeaf(PredicateLeaf leaf) { return in(column, leafToLiteralList(leaf)); case BETWEEN: List icebergLiterals = leafToLiteralList(leaf); + if (icebergLiterals.size() < 2) { + throw new UnsupportedOperationException("Missing leaf literals: " + leaf); + } if (icebergLiterals.size() == 2) { return and(greaterThanOrEqual(column, icebergLiterals.get(0)), lessThanOrEqual(column, icebergLiterals.get(1))); diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergFilterFactory.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergFilterFactory.java index 3044f0467af3..af1a30405b66 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergFilterFactory.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergFilterFactory.java @@ -24,10 +24,14 @@ import java.sql.Timestamp; import java.time.LocalDate; import java.time.ZoneOffset; +import java.util.Collections; +import java.util.List; +import org.apache.hadoop.hive.ql.io.sarg.ExpressionTree; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.expressions.And; import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.expressions.Literal; @@ -134,6 +138,24 @@ public void testBetweenOperand() { assertEquals(actual.right().op(), expected.right().op()); } + @Test + public void testUnsupportedBetweenOperandEmptyLeaves() { + SearchArgument.Builder builder = SearchArgumentFactory.newBuilder(); + final SearchArgument arg = + new MockSearchArgument( + builder + .startAnd() + .between("salary", PredicateLeaf.Type.LONG, 9000L, 15000L) + .end() + .build()); + + AssertHelpers.assertThrows( + "must throw if leaves are empty in between operator", + UnsupportedOperationException.class, + "Missing leaf literals", + () -> HiveIcebergFilterFactory.generateFilterExpression(arg)); + } + @Test public void testIsNullOperand() { SearchArgument.Builder builder = SearchArgumentFactory.newBuilder(); @@ -259,4 +281,69 @@ private void assertPredicatesMatch(UnboundPredicate expected, UnboundPredicate a assertEquals(expected.literal(), actual.literal()); assertEquals(expected.ref().name(), actual.ref().name()); } + + private static class MockSearchArgument implements SearchArgument { + + private final SearchArgument delegate; + + MockSearchArgument(SearchArgument original) { + delegate = original; + } + + @Override + public ExpressionTree getExpression() { + return delegate.getExpression(); + } + + @Override + public ExpressionTree getCompactExpression() { + return null; + } + + @Override + public TruthValue evaluate(TruthValue[] leaves) { + return delegate.evaluate(leaves); + } + + @Override + public List getLeaves() { + return Collections.singletonList( + new PredicateLeaf() { + @Override + public Operator getOperator() { + return Operator.BETWEEN; + } + + @Override + public Type getType() { + return Type.LONG; + } + + @Override + public String getColumnName() { + return "salary"; + } + + @Override + public Object getLiteral() { + return null; + } + + @Override + public List getLiteralList() { + return Collections.emptyList(); + } + + @Override + public int getId() { + return 0; + } + + @Override + public String toString() { + return "Leaf[empty]"; + } + }); + } + } } diff --git a/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_metadata_tables.q.out b/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_metadata_tables.q.out index 360e809471cb..8014b443e18f 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_metadata_tables.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_metadata_tables.q.out @@ -55,6 +55,7 @@ key_metadata binary Encryption key metadata blob split_offsets array Splittable offsets equality_ids array Equality comparison field IDs sort_order_id int Sort order ID +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe default.ice_meta_desc.entries PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -154,6 +155,7 @@ key_metadata binary Encryption key metadata blob split_offsets array Splittable offsets equality_ids array Equality comparison field IDs sort_order_id int Sort order ID +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe default.ice_meta_desc.all_entries PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -186,6 +188,7 @@ key_metadata binary Encryption key metadata blob split_offsets array Splittable offsets equality_ids array Equality comparison field IDs sort_order_id int Sort order ID +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe default.ice_meta_desc.delete_files PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -207,6 +210,7 @@ key_metadata binary Encryption key metadata blob split_offsets array Splittable offsets equality_ids array Equality comparison field IDs sort_order_id int Sort order ID +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe default.ice_meta_desc.metadata_log_entries PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -251,6 +255,7 @@ key_metadata binary Encryption key metadata blob split_offsets array Splittable offsets equality_ids array Equality comparison field IDs sort_order_id int Sort order ID +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe default.ice_meta_desc.all_files PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -272,6 +277,7 @@ key_metadata binary Encryption key metadata blob split_offsets array Splittable offsets equality_ids array Equality comparison field IDs sort_order_id int Sort order ID +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe formatted default.ice_meta_desc.files PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -294,6 +300,7 @@ key_metadata binary Encryption key metadata blob split_offsets array Splittable offsets equality_ids array Equality comparison field IDs sort_order_id int Sort order ID +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe formatted default.ice_meta_desc.entries PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -400,6 +407,7 @@ key_metadata binary Encryption key metadata blob split_offsets array Splittable offsets equality_ids array Equality comparison field IDs sort_order_id int Sort order ID +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe formatted default.ice_meta_desc.all_entries PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -434,6 +442,7 @@ key_metadata binary Encryption key metadata blob split_offsets array Splittable offsets equality_ids array Equality comparison field IDs sort_order_id int Sort order ID +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe formatted default.ice_meta_desc.delete_files PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -456,6 +465,7 @@ key_metadata binary Encryption key metadata blob split_offsets array Splittable offsets equality_ids array Equality comparison field IDs sort_order_id int Sort order ID +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe formatted default.ice_meta_desc.metadata_log_entries PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -503,6 +513,7 @@ key_metadata binary Encryption key metadata blob split_offsets array Splittable offsets equality_ids array Equality comparison field IDs sort_order_id int Sort order ID +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe formatted default.ice_meta_desc.all_files PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -525,6 +536,7 @@ key_metadata binary Encryption key metadata blob split_offsets array Splittable offsets equality_ids array Equality comparison field IDs sort_order_id int Sort order ID +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe extended default.ice_meta_desc.files PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -546,6 +558,7 @@ key_metadata binary Encryption key metadata blob split_offsets array Splittable offsets equality_ids array Equality comparison field IDs sort_order_id int Sort order ID +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe extended default.ice_meta_desc.entries PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -645,6 +658,7 @@ key_metadata binary Encryption key metadata blob split_offsets array Splittable offsets equality_ids array Equality comparison field IDs sort_order_id int Sort order ID +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe extended default.ice_meta_desc.all_entries PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -678,6 +692,7 @@ key_metadata binary Encryption key metadata blob split_offsets array Splittable offsets equality_ids array Equality comparison field IDs sort_order_id int Sort order ID +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe formatted default.ice_meta_desc.delete_files PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -700,6 +715,7 @@ key_metadata binary Encryption key metadata blob split_offsets array Splittable offsets equality_ids array Equality comparison field IDs sort_order_id int Sort order ID +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe formatted default.ice_meta_desc.metadata_log_entries PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -747,6 +763,7 @@ key_metadata binary Encryption key metadata blob split_offsets array Splittable offsets equality_ids array Equality comparison field IDs sort_order_id int Sort order ID +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe formatted default.ice_meta_desc.all_files PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -769,6 +786,7 @@ key_metadata binary Encryption key metadata blob split_offsets array Splittable offsets equality_ids array Equality comparison field IDs sort_order_id int Sort order ID +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: drop table ice_meta_desc PREHOOK: type: DROPTABLE PREHOOK: Input: default@ice_meta_desc diff --git a/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out b/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out index 7e7a5eab1e3a..b1e7fcfe4532 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out @@ -349,26 +349,26 @@ POSTHOOK: query: select * from default.tbl_target_mixed.files POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl_target_mixed POSTHOOK: Output: hdfs://### HDFS PATH ### -0 hdfs://### HDFS PATH ### ORC 0 {"ccy":null,"c_bucket":null} 2 417 {1:7,2:5,3:5} {1:2,2:2,3:2} {1:0,2:2,3:2} {} {1: } {1:n} NULL [3] NULL 0 -0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"CZK","c_bucket":1} 1 449 {1:6,2:12,3:6} {1:1,2:1,3:1} {1:0,2:0,3:0} {} {1:d,2:CZK,3: } {1:d,2:CZK,3: } NULL [3] NULL 0 +0 hdfs://### HDFS PATH ### ORC 0 {"ccy":null,"c_bucket":null} 2 417 {1:7,2:5,3:5} {1:2,2:2,3:2} {1:0,2:2,3:2} {} {1: } {1:n} NULL [3] NULL 0 {"a":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":12,"upper_bound":110},"c":{"column_size":5,"value_count":2,"null_value_count":2,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"ccy":{"column_size":5,"value_count":2,"null_value_count":2,"nan_value_count":null,"lower_bound":null,"upper_bound":null}} +0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"CZK","c_bucket":1} 1 449 {1:6,2:12,3:6} {1:1,2:1,3:1} {1:0,2:0,3:0} {} {1:d,2:CZK,3: } {1:d,2:CZK,3: } NULL [3] NULL 0 {"a":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":100,"upper_bound":100},"c":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":12,"upper_bound":12},"ccy":{"column_size":12,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":"CZK","upper_bound":"CZK"}} 0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"CZK","c_bucket":2} 1 432 {1:6,2:12,3:6} {1:1,2:1,3:1} {1:0,2:0,3:0} {} {1: ,2:CZK,3:} {1: -,2:CZK,3:} NULL [3] NULL 0 -0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"EUR","c_bucket":0} 1 432 {1:6,2:12,3:6} {1:1,2:1,3:1} {1:0,2:0,3:0} {} {1:,2:EUR,3: } {1:,2:EUR,3: } NULL [3] NULL 0 +,2:CZK,3:} NULL [3] NULL 0 {"a":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":10,"upper_bound":10},"c":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":5,"upper_bound":5},"ccy":{"column_size":12,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":"CZK","upper_bound":"CZK"}} +0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"EUR","c_bucket":0} 1 432 {1:6,2:12,3:6} {1:1,2:1,3:1} {1:0,2:0,3:0} {} {1:,2:EUR,3: } {1:,2:EUR,3: } NULL [3] NULL 0 {"a":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":20,"upper_bound":20},"c":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":11,"upper_bound":11},"ccy":{"column_size":12,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":"EUR","upper_bound":"EUR"}} 0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"EUR","c_bucket":1} 2 448 {1:7,2:18,3:7} {1:2,2:2,3:2} {1:0,2:0,3:0} {} {1:,2:EUR,3: } {1: -,2:EUR,3: } NULL [3] NULL 0 +,2:EUR,3: } NULL [3] NULL 0 {"a":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":4,"upper_bound":10},"c":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":12,"upper_bound":12},"ccy":{"column_size":18,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":"EUR","upper_bound":"EUR"}} 0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"EUR","c_bucket":2} 3 448 {1:8,2:17,3:5} {1:3,2:3,3:3} {1:0,2:0,3:0} {} {1:,2:EUR,3: } {1:(,2:EUR,3: -} NULL [3] NULL 0 -0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"HUF","c_bucket":1} 2 448 {1:7,2:18,3:7} {1:2,2:2,3:2} {1:0,2:0,3:0} {} {1:,2:HUF,3:} {1:2,2:HUF,3:} NULL [3] NULL 0 -0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"PLN","c_bucket":0} 2 448 {1:7,2:18,3:7} {1:2,2:2,3:2} {1:0,2:0,3:0} {} {1:,2:PLN,3: } {1: ,2:PLN,3:} NULL [3] NULL 0 -0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"PLN","c_bucket":1} 1 448 {1:6,2:12,3:6} {1:1,2:1,3:1} {1:0,2:0,3:0} {} {1:P,2:PLN,3:d} {1:P,2:PLN,3:d} NULL [3] NULL 0 -0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"PLN","c_bucket":2} 1 449 {1:6,2:12,3:6} {1:1,2:1,3:1} {1:0,2:0,3:0} {} {1:Z,2:PLN,3:} {1:Z,2:PLN,3:} NULL [3] NULL 0 -0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"USD","c_bucket":0} 2 462 {1:7,2:18,3:7} {1:2,2:2,3:2} {1:0,2:0,3:0} {} {1:,2:USD,3: } {1:F,2:USD,3:} NULL [3] NULL 0 -0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"USD","c_bucket":1} 3 470 {1:8,2:17,3:8} {1:3,2:3,3:3} {1:0,2:0,3:0} {} {1:,2:USD,3: } {1:<,2:USD,3:d} NULL [3] NULL 0 +} NULL [3] NULL 0 {"a":{"column_size":8,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":1,"upper_bound":40},"c":{"column_size":5,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":10,"upper_bound":10},"ccy":{"column_size":17,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":"EUR","upper_bound":"EUR"}} +0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"HUF","c_bucket":1} 2 448 {1:7,2:18,3:7} {1:2,2:2,3:2} {1:0,2:0,3:0} {} {1:,2:HUF,3:} {1:2,2:HUF,3:} NULL [3] NULL 0 {"a":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":5,"upper_bound":50},"c":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":30,"upper_bound":30},"ccy":{"column_size":18,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":"HUF","upper_bound":"HUF"}} +0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"PLN","c_bucket":0} 2 448 {1:7,2:18,3:7} {1:2,2:2,3:2} {1:0,2:0,3:0} {} {1:,2:PLN,3: } {1: ,2:PLN,3:} NULL [3] NULL 0 {"a":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":8,"upper_bound":9},"c":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":11,"upper_bound":20},"ccy":{"column_size":18,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":"PLN","upper_bound":"PLN"}} +0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"PLN","c_bucket":1} 1 448 {1:6,2:12,3:6} {1:1,2:1,3:1} {1:0,2:0,3:0} {} {1:P,2:PLN,3:d} {1:P,2:PLN,3:d} NULL [3] NULL 0 {"a":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":80,"upper_bound":80},"c":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":100,"upper_bound":100},"ccy":{"column_size":12,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":"PLN","upper_bound":"PLN"}} +0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"PLN","c_bucket":2} 1 449 {1:6,2:12,3:6} {1:1,2:1,3:1} {1:0,2:0,3:0} {} {1:Z,2:PLN,3:} {1:Z,2:PLN,3:} NULL [3] NULL 0 {"a":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":90,"upper_bound":90},"c":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":18,"upper_bound":18},"ccy":{"column_size":12,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":"PLN","upper_bound":"PLN"}} +0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"USD","c_bucket":0} 2 462 {1:7,2:18,3:7} {1:2,2:2,3:2} {1:0,2:0,3:0} {} {1:,2:USD,3: } {1:F,2:USD,3:} NULL [3] NULL 0 {"a":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":3,"upper_bound":70},"c":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":11,"upper_bound":20},"ccy":{"column_size":18,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":"USD","upper_bound":"USD"}} +0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"USD","c_bucket":1} 3 470 {1:8,2:17,3:8} {1:3,2:3,3:3} {1:0,2:0,3:0} {} {1:,2:USD,3: } {1:<,2:USD,3:d} NULL [3] NULL 0 {"a":{"column_size":8,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":7,"upper_bound":60},"c":{"column_size":8,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":12,"upper_bound":100},"ccy":{"column_size":17,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":"USD","upper_bound":"USD"}} 0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"USD","c_bucket":2} 1 432 {1:6,2:12,3:6} {1:1,2:1,3:1} {1:0,2:0,3:0} {} {1:,2:USD,3: } {1:,2:USD,3: -} NULL [3] NULL 0 +} NULL [3] NULL 0 {"a":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":6,"upper_bound":6},"c":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":10,"upper_bound":10},"ccy":{"column_size":12,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":"USD","upper_bound":"USD"}} PREHOOK: query: explain insert into table tbl_target_mixed select * from tbl_src where b = 'EUR' PREHOOK: type: QUERY PREHOOK: Input: default@tbl_src @@ -524,30 +524,30 @@ POSTHOOK: query: select * from default.tbl_target_mixed.files POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl_target_mixed POSTHOOK: Output: hdfs://### HDFS PATH ### -0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"USD","c_bucket":1} 2 466 {1:7,2:18,3:7} {1:2,2:2,3:2} {1:0,2:0,3:0} {} {1:,2:USD,3:d} {1:,2:USD,3:d} NULL [3] NULL 0 -0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"EUR","c_bucket":0} 1 432 {1:6,2:12,3:6} {1:1,2:1,3:1} {1:0,2:0,3:0} {} {1:,2:EUR,3: } {1:,2:EUR,3: } NULL [3] NULL 0 +0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"USD","c_bucket":1} 2 466 {1:7,2:18,3:7} {1:2,2:2,3:2} {1:0,2:0,3:0} {} {1:,2:USD,3:d} {1:,2:USD,3:d} NULL [3] NULL 0 {"a":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":7,"upper_bound":30},"c":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":100,"upper_bound":100},"ccy":{"column_size":18,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":"USD","upper_bound":"USD"}} +0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"EUR","c_bucket":0} 1 432 {1:6,2:12,3:6} {1:1,2:1,3:1} {1:0,2:0,3:0} {} {1:,2:EUR,3: } {1:,2:EUR,3: } NULL [3] NULL 0 {"a":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":20,"upper_bound":20},"c":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":11,"upper_bound":11},"ccy":{"column_size":12,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":"EUR","upper_bound":"EUR"}} 0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"EUR","c_bucket":1} 2 448 {1:7,2:18,3:7} {1:2,2:2,3:2} {1:0,2:0,3:0} {} {1:,2:EUR,3: } {1: -,2:EUR,3: } NULL [3] NULL 0 +,2:EUR,3: } NULL [3] NULL 0 {"a":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":4,"upper_bound":10},"c":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":12,"upper_bound":12},"ccy":{"column_size":18,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":"EUR","upper_bound":"EUR"}} 0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"EUR","c_bucket":2} 3 448 {1:8,2:17,3:5} {1:3,2:3,3:3} {1:0,2:0,3:0} {} {1:,2:EUR,3: } {1:(,2:EUR,3: -} NULL [3] NULL 0 -0 hdfs://### HDFS PATH ### ORC 0 {"ccy":null,"c_bucket":null} 2 417 {1:7,2:5,3:5} {1:2,2:2,3:2} {1:0,2:2,3:2} {} {1: } {1:n} NULL [3] NULL 0 -0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"CZK","c_bucket":1} 1 449 {1:6,2:12,3:6} {1:1,2:1,3:1} {1:0,2:0,3:0} {} {1:d,2:CZK,3: } {1:d,2:CZK,3: } NULL [3] NULL 0 +} NULL [3] NULL 0 {"a":{"column_size":8,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":1,"upper_bound":40},"c":{"column_size":5,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":10,"upper_bound":10},"ccy":{"column_size":17,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":"EUR","upper_bound":"EUR"}} +0 hdfs://### HDFS PATH ### ORC 0 {"ccy":null,"c_bucket":null} 2 417 {1:7,2:5,3:5} {1:2,2:2,3:2} {1:0,2:2,3:2} {} {1: } {1:n} NULL [3] NULL 0 {"a":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":12,"upper_bound":110},"c":{"column_size":5,"value_count":2,"null_value_count":2,"nan_value_count":null,"lower_bound":null,"upper_bound":null},"ccy":{"column_size":5,"value_count":2,"null_value_count":2,"nan_value_count":null,"lower_bound":null,"upper_bound":null}} +0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"CZK","c_bucket":1} 1 449 {1:6,2:12,3:6} {1:1,2:1,3:1} {1:0,2:0,3:0} {} {1:d,2:CZK,3: } {1:d,2:CZK,3: } NULL [3] NULL 0 {"a":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":100,"upper_bound":100},"c":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":12,"upper_bound":12},"ccy":{"column_size":12,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":"CZK","upper_bound":"CZK"}} 0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"CZK","c_bucket":2} 1 432 {1:6,2:12,3:6} {1:1,2:1,3:1} {1:0,2:0,3:0} {} {1: ,2:CZK,3:} {1: -,2:CZK,3:} NULL [3] NULL 0 -0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"EUR","c_bucket":0} 1 432 {1:6,2:12,3:6} {1:1,2:1,3:1} {1:0,2:0,3:0} {} {1:,2:EUR,3: } {1:,2:EUR,3: } NULL [3] NULL 0 +,2:CZK,3:} NULL [3] NULL 0 {"a":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":10,"upper_bound":10},"c":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":5,"upper_bound":5},"ccy":{"column_size":12,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":"CZK","upper_bound":"CZK"}} +0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"EUR","c_bucket":0} 1 432 {1:6,2:12,3:6} {1:1,2:1,3:1} {1:0,2:0,3:0} {} {1:,2:EUR,3: } {1:,2:EUR,3: } NULL [3] NULL 0 {"a":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":20,"upper_bound":20},"c":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":11,"upper_bound":11},"ccy":{"column_size":12,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":"EUR","upper_bound":"EUR"}} 0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"EUR","c_bucket":1} 2 448 {1:7,2:18,3:7} {1:2,2:2,3:2} {1:0,2:0,3:0} {} {1:,2:EUR,3: } {1: -,2:EUR,3: } NULL [3] NULL 0 +,2:EUR,3: } NULL [3] NULL 0 {"a":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":4,"upper_bound":10},"c":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":12,"upper_bound":12},"ccy":{"column_size":18,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":"EUR","upper_bound":"EUR"}} 0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"EUR","c_bucket":2} 3 448 {1:8,2:17,3:5} {1:3,2:3,3:3} {1:0,2:0,3:0} {} {1:,2:EUR,3: } {1:(,2:EUR,3: -} NULL [3] NULL 0 -0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"HUF","c_bucket":1} 2 448 {1:7,2:18,3:7} {1:2,2:2,3:2} {1:0,2:0,3:0} {} {1:,2:HUF,3:} {1:2,2:HUF,3:} NULL [3] NULL 0 -0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"PLN","c_bucket":0} 2 448 {1:7,2:18,3:7} {1:2,2:2,3:2} {1:0,2:0,3:0} {} {1:,2:PLN,3: } {1: ,2:PLN,3:} NULL [3] NULL 0 -0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"PLN","c_bucket":1} 1 448 {1:6,2:12,3:6} {1:1,2:1,3:1} {1:0,2:0,3:0} {} {1:P,2:PLN,3:d} {1:P,2:PLN,3:d} NULL [3] NULL 0 -0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"PLN","c_bucket":2} 1 449 {1:6,2:12,3:6} {1:1,2:1,3:1} {1:0,2:0,3:0} {} {1:Z,2:PLN,3:} {1:Z,2:PLN,3:} NULL [3] NULL 0 -0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"USD","c_bucket":0} 2 462 {1:7,2:18,3:7} {1:2,2:2,3:2} {1:0,2:0,3:0} {} {1:,2:USD,3: } {1:F,2:USD,3:} NULL [3] NULL 0 -0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"USD","c_bucket":1} 3 470 {1:8,2:17,3:8} {1:3,2:3,3:3} {1:0,2:0,3:0} {} {1:,2:USD,3: } {1:<,2:USD,3:d} NULL [3] NULL 0 +} NULL [3] NULL 0 {"a":{"column_size":8,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":1,"upper_bound":40},"c":{"column_size":5,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":10,"upper_bound":10},"ccy":{"column_size":17,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":"EUR","upper_bound":"EUR"}} +0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"HUF","c_bucket":1} 2 448 {1:7,2:18,3:7} {1:2,2:2,3:2} {1:0,2:0,3:0} {} {1:,2:HUF,3:} {1:2,2:HUF,3:} NULL [3] NULL 0 {"a":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":5,"upper_bound":50},"c":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":30,"upper_bound":30},"ccy":{"column_size":18,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":"HUF","upper_bound":"HUF"}} +0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"PLN","c_bucket":0} 2 448 {1:7,2:18,3:7} {1:2,2:2,3:2} {1:0,2:0,3:0} {} {1:,2:PLN,3: } {1: ,2:PLN,3:} NULL [3] NULL 0 {"a":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":8,"upper_bound":9},"c":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":11,"upper_bound":20},"ccy":{"column_size":18,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":"PLN","upper_bound":"PLN"}} +0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"PLN","c_bucket":1} 1 448 {1:6,2:12,3:6} {1:1,2:1,3:1} {1:0,2:0,3:0} {} {1:P,2:PLN,3:d} {1:P,2:PLN,3:d} NULL [3] NULL 0 {"a":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":80,"upper_bound":80},"c":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":100,"upper_bound":100},"ccy":{"column_size":12,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":"PLN","upper_bound":"PLN"}} +0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"PLN","c_bucket":2} 1 449 {1:6,2:12,3:6} {1:1,2:1,3:1} {1:0,2:0,3:0} {} {1:Z,2:PLN,3:} {1:Z,2:PLN,3:} NULL [3] NULL 0 {"a":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":90,"upper_bound":90},"c":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":18,"upper_bound":18},"ccy":{"column_size":12,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":"PLN","upper_bound":"PLN"}} +0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"USD","c_bucket":0} 2 462 {1:7,2:18,3:7} {1:2,2:2,3:2} {1:0,2:0,3:0} {} {1:,2:USD,3: } {1:F,2:USD,3:} NULL [3] NULL 0 {"a":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":3,"upper_bound":70},"c":{"column_size":7,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":11,"upper_bound":20},"ccy":{"column_size":18,"value_count":2,"null_value_count":0,"nan_value_count":null,"lower_bound":"USD","upper_bound":"USD"}} +0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"USD","c_bucket":1} 3 470 {1:8,2:17,3:8} {1:3,2:3,3:3} {1:0,2:0,3:0} {} {1:,2:USD,3: } {1:<,2:USD,3:d} NULL [3] NULL 0 {"a":{"column_size":8,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":7,"upper_bound":60},"c":{"column_size":8,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":12,"upper_bound":100},"ccy":{"column_size":17,"value_count":3,"null_value_count":0,"nan_value_count":null,"lower_bound":"USD","upper_bound":"USD"}} 0 hdfs://### HDFS PATH ### ORC 0 {"ccy":"USD","c_bucket":2} 1 432 {1:6,2:12,3:6} {1:1,2:1,3:1} {1:0,2:0,3:0} {} {1:,2:USD,3: } {1:,2:USD,3: -} NULL [3] NULL 0 +} NULL [3] NULL 0 {"a":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":6,"upper_bound":6},"c":{"column_size":6,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":10,"upper_bound":10},"ccy":{"column_size":12,"value_count":1,"null_value_count":0,"nan_value_count":null,"lower_bound":"USD","upper_bound":"USD"}} diff --git a/iceberg/iceberg-handler/src/test/results/positive/query_iceberg_metadata_of_unpartitioned_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/query_iceberg_metadata_of_unpartitioned_table.q.out index c69dfd0b8998..f7bee8b09093 100644 Binary files a/iceberg/iceberg-handler/src/test/results/positive/query_iceberg_metadata_of_unpartitioned_table.q.out and b/iceberg/iceberg-handler/src/test/results/positive/query_iceberg_metadata_of_unpartitioned_table.q.out differ diff --git a/iceberg/patched-iceberg-core/pom.xml b/iceberg/patched-iceberg-core/pom.xml index 82f44a7ee4fc..ec1bdc39afc4 100644 --- a/iceberg/patched-iceberg-core/pom.xml +++ b/iceberg/patched-iceberg-core/pom.xml @@ -76,7 +76,6 @@ ${project.build.directory}/classes **/HadoopInputFile.class - **/BaseUpdatePartitionSpec.class diff --git a/iceberg/patched-iceberg-core/src/main/java/org/apache/iceberg/BaseUpdatePartitionSpec.java b/iceberg/patched-iceberg-core/src/main/java/org/apache/iceberg/BaseUpdatePartitionSpec.java deleted file mode 100644 index f448f5314443..000000000000 --- a/iceberg/patched-iceberg-core/src/main/java/org/apache/iceberg/BaseUpdatePartitionSpec.java +++ /dev/null @@ -1,555 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.iceberg; - -import java.util.List; -import java.util.Map; -import java.util.Set; -import org.apache.iceberg.exceptions.ValidationException; -import org.apache.iceberg.expressions.BoundReference; -import org.apache.iceberg.expressions.BoundTerm; -import org.apache.iceberg.expressions.BoundTransform; -import org.apache.iceberg.expressions.Expressions; -import org.apache.iceberg.expressions.Term; -import org.apache.iceberg.expressions.UnboundTerm; -import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; -import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.apache.iceberg.relocated.com.google.common.collect.Maps; -import org.apache.iceberg.relocated.com.google.common.collect.Sets; -import org.apache.iceberg.transforms.PartitionSpecVisitor; -import org.apache.iceberg.transforms.Transform; -import org.apache.iceberg.transforms.Transforms; -import org.apache.iceberg.transforms.UnknownTransform; -import org.apache.iceberg.types.Type; -import org.apache.iceberg.util.Pair; - -class BaseUpdatePartitionSpec implements UpdatePartitionSpec { - private final TableOperations ops; - private final TableMetadata base; - private final int formatVersion; - private final PartitionSpec spec; - private final Schema schema; - private final Map nameToField; - private final Map, PartitionField> transformToField; - - private final List adds = Lists.newArrayList(); - private final Map addedTimeFields = Maps.newHashMap(); - private final Map, PartitionField> transformToAddedField = - Maps.newHashMap(); - private final Map nameToAddedField = Maps.newHashMap(); - private final Set deletes = Sets.newHashSet(); - private final Map renames = Maps.newHashMap(); - - private boolean caseSensitive; - private int lastAssignedPartitionId; - - BaseUpdatePartitionSpec(TableOperations ops) { - this.ops = ops; - this.caseSensitive = true; - this.base = ops.current(); - this.formatVersion = base.formatVersion(); - this.spec = base.spec(); - this.schema = spec.schema(); - this.nameToField = indexSpecByName(spec); - this.transformToField = indexSpecByTransform(spec); - this.lastAssignedPartitionId = base.lastAssignedPartitionId(); - - spec.fields().stream() - .filter(field -> field.transform() instanceof UnknownTransform) - .findAny() - .ifPresent( - field -> { - throw new IllegalArgumentException( - "Cannot update partition spec with unknown transform: " + field); - }); - } - - /** For testing only. */ - @VisibleForTesting - BaseUpdatePartitionSpec(int formatVersion, PartitionSpec spec) { - this(formatVersion, spec, spec.lastAssignedFieldId()); - } - - /** For testing only. */ - @VisibleForTesting - BaseUpdatePartitionSpec(int formatVersion, PartitionSpec spec, int lastAssignedPartitionId) { - this.ops = null; - this.base = null; - this.formatVersion = formatVersion; - this.caseSensitive = true; - this.spec = spec; - this.schema = spec.schema(); - this.nameToField = indexSpecByName(spec); - this.transformToField = indexSpecByTransform(spec); - this.lastAssignedPartitionId = lastAssignedPartitionId; - } - - private int assignFieldId() { - this.lastAssignedPartitionId += 1; - return lastAssignedPartitionId; - } - - /** - * In V2 it searches for a similar partition field in historical partition specs. Tries to match - * on source field ID, transform type and target name (optional). If not found or in V1 cases it - * creates a new PartitionField. - * - * @param sourceTransform pair of source ID and transform for this PartitionField addition - * @param name target partition field name, if specified - * @return the recycled or newly created partition field - */ - private PartitionField recycleOrCreatePartitionField( - Pair> sourceTransform, String name) { - if (formatVersion == 2 && base != null) { - int sourceId = sourceTransform.first(); - Transform transform = sourceTransform.second(); - - Set allHistoricalFields = Sets.newHashSet(); - for (PartitionSpec partitionSpec : base.specs()) { - allHistoricalFields.addAll(partitionSpec.fields()); - } - - for (PartitionField field : allHistoricalFields) { - if (field.sourceId() == sourceId && field.transform().equals(transform)) { - // if target name is specified then consider it too, otherwise not - if (name == null || field.name().equals(name)) { - return field; - } - } - } - } - return new PartitionField( - sourceTransform.first(), assignFieldId(), name, sourceTransform.second()); - } - - @Override - public UpdatePartitionSpec caseSensitive(boolean isCaseSensitive) { - this.caseSensitive = isCaseSensitive; - return this; - } - - @Override - public BaseUpdatePartitionSpec addField(String sourceName) { - return addField(Expressions.ref(sourceName)); - } - - @Override - public BaseUpdatePartitionSpec addField(Term term) { - return addField(null, term); - } - - private BaseUpdatePartitionSpec rewriteDeleteAndAddField( - PartitionField existing, String name, Pair> sourceTransform) { - deletes.remove(existing.fieldId()); - if (name == null || existing.name().equals(name)) { - return this; - } else { - return renameField(existing.name(), name); - } - } - - @Override - public BaseUpdatePartitionSpec addField(String name, Term term) { - PartitionField alreadyAdded = nameToAddedField.get(name); - Preconditions.checkArgument( - alreadyAdded == null, "Cannot add duplicate partition field: %s", alreadyAdded); - - Pair> sourceTransform = resolve(term); - Pair validationKey = - Pair.of(sourceTransform.first(), sourceTransform.second().toString()); - - PartitionField existing = transformToField.get(validationKey); - if (existing != null && - deletes.contains(existing.fieldId()) && - existing.transform().equals(sourceTransform.second())) { - return rewriteDeleteAndAddField(existing, name, sourceTransform); - } - - Preconditions.checkArgument( - existing == null || - (deletes.contains(existing.fieldId()) && - !existing.transform().toString().equals(sourceTransform.second().toString())), - "Cannot add duplicate partition field %s=%s, conflicts with %s", - name, - term, - existing); - - PartitionField added = transformToAddedField.get(validationKey); - Preconditions.checkArgument( - added == null, - "Cannot add duplicate partition field %s=%s, already added: %s", - name, - term, - added); - - PartitionField newField = recycleOrCreatePartitionField(sourceTransform, name); - if (newField.name() == null) { - String partitionName = - PartitionSpecVisitor.visit(schema, newField, PartitionNameGenerator.INSTANCE); - newField = - new PartitionField( - newField.sourceId(), newField.fieldId(), partitionName, newField.transform()); - } - - checkForRedundantAddedPartitions(newField); - transformToAddedField.put(validationKey, newField); - - PartitionField existingField = nameToField.get(newField.name()); - if (existingField != null && !deletes.contains(existingField.fieldId())) { - if (isVoidTransform(existingField)) { - // rename the old deleted field that is being replaced by the new field - renameField(existingField.name(), existingField.name() + "_" + existingField.fieldId()); - } else { - throw new IllegalArgumentException( - String.format("Cannot add duplicate partition field name: %s", name)); - } - } else if (existingField != null && deletes.contains(existingField.fieldId())) { - renames.put(existingField.name(), existingField.name() + "_" + existingField.fieldId()); - } - - nameToAddedField.put(newField.name(), newField); - - adds.add(newField); - - return this; - } - - @Override - public BaseUpdatePartitionSpec removeField(String name) { - PartitionField alreadyAdded = nameToAddedField.get(name); - Preconditions.checkArgument( - alreadyAdded == null, "Cannot delete newly added field: %s", alreadyAdded); - - Preconditions.checkArgument( - renames.get(name) == null, "Cannot rename and delete partition field: %s", name); - - PartitionField field = nameToField.get(name); - Preconditions.checkArgument(field != null, "Cannot find partition field to remove: %s", name); - - deletes.add(field.fieldId()); - - return this; - } - - @Override - public BaseUpdatePartitionSpec removeField(Term term) { - Pair> sourceTransform = resolve(term); - Pair key = - Pair.of(sourceTransform.first(), sourceTransform.second().toString()); - - PartitionField added = transformToAddedField.get(key); - Preconditions.checkArgument(added == null, "Cannot delete newly added field: %s", added); - - PartitionField field = transformToField.get(key); - Preconditions.checkArgument(field != null, "Cannot find partition field to remove: %s", term); - Preconditions.checkArgument( - renames.get(field.name()) == null, - "Cannot rename and delete partition field: %s", - field.name()); - - deletes.add(field.fieldId()); - - return this; - } - - @Override - public BaseUpdatePartitionSpec renameField(String name, String newName) { - PartitionField existingField = nameToField.get(newName); - if (existingField != null && isVoidTransform(existingField)) { - // rename the old deleted field that is being replaced by the new field - renameField(existingField.name(), existingField.name() + "_" + existingField.fieldId()); - } - - PartitionField added = nameToAddedField.get(name); - Preconditions.checkArgument( - added == null, "Cannot rename newly added partition field: %s", name); - - PartitionField field = nameToField.get(name); - Preconditions.checkArgument(field != null, "Cannot find partition field to rename: %s", name); - Preconditions.checkArgument( - !deletes.contains(field.fieldId()), "Cannot delete and rename partition field: %s", name); - - renames.put(name, newName); - - return this; - } - - @Override - public PartitionSpec apply() { - PartitionSpec.Builder builder = PartitionSpec.builderFor(schema); - - for (PartitionField field : spec.fields()) { - if (!deletes.contains(field.fieldId())) { - String newName = renames.get(field.name()); - if (newName != null) { - builder.add(field.sourceId(), field.fieldId(), newName, field.transform()); - } else { - builder.add(field.sourceId(), field.fieldId(), field.name(), field.transform()); - } - } else if (formatVersion < 2) { - // field IDs were not required for v1 and were assigned sequentially in each partition spec - // starting at 1,000. - // to maintain consistent field ids across partition specs in v1 tables, any partition field - // that is removed - // must be replaced with a null transform. null values are always allowed in partition data. - String newName = renames.get(field.name()); - if (newName != null) { - builder.add(field.sourceId(), field.fieldId(), newName, Transforms.alwaysNull()); - } else { - builder.add(field.sourceId(), field.fieldId(), field.name(), Transforms.alwaysNull()); - } - } - } - - for (PartitionField newField : adds) { - builder.add(newField.sourceId(), newField.fieldId(), newField.name(), newField.transform()); - } - - return builder.build(); - } - - @Override - public void commit() { - TableMetadata update = base.updatePartitionSpec(apply()); - ops.commit(base, update); - } - - private Pair> resolve(Term term) { - Preconditions.checkArgument(term instanceof UnboundTerm, "Term must be unbound"); - - BoundTerm boundTerm = ((UnboundTerm) term).bind(schema.asStruct(), caseSensitive); - int sourceId = boundTerm.ref().fieldId(); - Transform transform = toTransform(boundTerm); - - Type fieldType = schema.findType(sourceId); - if (fieldType != null) { - transform = Transforms.fromString(fieldType, transform.toString()); - } else { - transform = Transforms.fromString(transform.toString()); - } - - return Pair.of(sourceId, transform); - } - - private Transform toTransform(BoundTerm term) { - if (term instanceof BoundReference) { - return Transforms.identity(); - } else if (term instanceof BoundTransform) { - return ((BoundTransform) term).transform(); - } else { - throw new ValidationException( - "Invalid term: %s, expected either a bound reference or transform", term); - } - } - - private void checkForRedundantAddedPartitions(PartitionField field) { - if (isTimeTransform(field)) { - PartitionField timeField = addedTimeFields.get(field.sourceId()); - Preconditions.checkArgument( - timeField == null, - "Cannot add redundant partition field: %s conflicts with %s", - timeField, - field); - addedTimeFields.put(field.sourceId(), field); - } - } - - private static Map indexSpecByName(PartitionSpec spec) { - ImmutableMap.Builder builder = ImmutableMap.builder(); - List fields = spec.fields(); - for (PartitionField field : fields) { - builder.put(field.name(), field); - } - - return builder.build(); - } - - private static Map, PartitionField> indexSpecByTransform( - PartitionSpec spec) { - Map, PartitionField> indexSpecs = Maps.newHashMap(); - List fields = spec.fields(); - for (PartitionField field : fields) { - indexSpecs.put(Pair.of(field.sourceId(), field.transform().toString()), field); - } - - return indexSpecs; - } - - private boolean isTimeTransform(PartitionField field) { - return PartitionSpecVisitor.visit(schema, field, IsTimeTransform.INSTANCE); - } - - private static class IsTimeTransform implements PartitionSpecVisitor { - private static final IsTimeTransform INSTANCE = new IsTimeTransform(); - - private IsTimeTransform() { - } - - @Override - public Boolean identity(int fieldId, String sourceName, int sourceId) { - return false; - } - - @Override - public Boolean bucket(int fieldId, String sourceName, int sourceId, int numBuckets) { - return false; - } - - @Override - public Boolean truncate(int fieldId, String sourceName, int sourceId, int width) { - return false; - } - - @Override - public Boolean year(int fieldId, String sourceName, int sourceId) { - return true; - } - - @Override - public Boolean month(int fieldId, String sourceName, int sourceId) { - return true; - } - - @Override - public Boolean day(int fieldId, String sourceName, int sourceId) { - return true; - } - - @Override - public Boolean hour(int fieldId, String sourceName, int sourceId) { - return true; - } - - @Override - public Boolean alwaysNull(int fieldId, String sourceName, int sourceId) { - return false; - } - - @Override - public Boolean unknown(int fieldId, String sourceName, int sourceId, String transform) { - return false; - } - } - - private boolean isVoidTransform(PartitionField field) { - return PartitionSpecVisitor.visit(schema, field, IsVoidTransform.INSTANCE); - } - - private static class IsVoidTransform implements PartitionSpecVisitor { - private static final IsVoidTransform INSTANCE = new IsVoidTransform(); - - private IsVoidTransform() { - } - - @Override - public Boolean identity(int fieldId, String sourceName, int sourceId) { - return false; - } - - @Override - public Boolean bucket(int fieldId, String sourceName, int sourceId, int numBuckets) { - return false; - } - - @Override - public Boolean truncate(int fieldId, String sourceName, int sourceId, int width) { - return false; - } - - @Override - public Boolean year(int fieldId, String sourceName, int sourceId) { - return false; - } - - @Override - public Boolean month(int fieldId, String sourceName, int sourceId) { - return false; - } - - @Override - public Boolean day(int fieldId, String sourceName, int sourceId) { - return false; - } - - @Override - public Boolean hour(int fieldId, String sourceName, int sourceId) { - return false; - } - - @Override - public Boolean alwaysNull(int fieldId, String sourceName, int sourceId) { - return true; - } - - @Override - public Boolean unknown(int fieldId, String sourceName, int sourceId, String transform) { - return false; - } - } - - private static class PartitionNameGenerator implements PartitionSpecVisitor { - private static final PartitionNameGenerator INSTANCE = new PartitionNameGenerator(); - - private PartitionNameGenerator() { - } - - @Override - public String identity(int fieldId, String sourceName, int sourceId) { - return sourceName; - } - - @Override - public String bucket(int fieldId, String sourceName, int sourceId, int numBuckets) { - return sourceName + "_bucket_" + numBuckets; - } - - @Override - public String truncate(int fieldId, String sourceName, int sourceId, int width) { - return sourceName + "_trunc_" + width; - } - - @Override - public String year(int fieldId, String sourceName, int sourceId) { - return sourceName + "_year"; - } - - @Override - public String month(int fieldId, String sourceName, int sourceId) { - return sourceName + "_month"; - } - - @Override - public String day(int fieldId, String sourceName, int sourceId) { - return sourceName + "_day"; - } - - @Override - public String hour(int fieldId, String sourceName, int sourceId) { - return sourceName + "_hour"; - } - - @Override - public String alwaysNull(int fieldId, String sourceName, int sourceId) { - return sourceName + "_null"; - } - } -} diff --git a/iceberg/pom.xml b/iceberg/pom.xml index 31c03dfa1de2..07ebbe1dd92d 100644 --- a/iceberg/pom.xml +++ b/iceberg/pom.xml @@ -25,7 +25,7 @@ .. . - 1.1.0 + 1.2.1 4.0.2 3.4.4 1.11.1