diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveSplitManager.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveSplitManager.java index d6bf69f143c80..c59ac43999a57 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveSplitManager.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveSplitManager.java @@ -35,9 +35,13 @@ import com.google.common.collect.Ordering; import io.airlift.concurrent.BoundedExecutor; import org.apache.hadoop.hive.metastore.ProtectMode; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import javax.inject.Inject; +import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -248,7 +252,24 @@ private Iterable getPartitionMetadata(SemiTransactionalHi for (int i = 0; i < min(partitionColumns.size(), tableColumns.size()); i++) { HiveType tableType = tableColumns.get(i).getType(); HiveType partitionType = partitionColumns.get(i).getType(); - if (!tableType.equals(partitionType)) { + if (isStruct(tableType) && isStruct(partitionType)) { + ArrayList fromFieldTypes = getStructFields(partitionType); + ArrayList toFieldTypes = getStructFields(tableType); + if (!toFieldTypes.subList(0, fromFieldTypes.size()).equals(fromFieldTypes)) { + throw new PrestoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("" + + "There is a mismatch between the table and partition schemas. " + + "The structs are incompatible and cannot be coerced. " + + "The column '%s' in table '%s' is declared as type '%s', " + + "but partition '%s' declared column '%s' as type '%s'.", + tableColumns.get(i).getName(), + tableName, + tableType, + partName, + partitionColumns.get(i).getName(), + partitionType)); + } + } + else if (!tableType.equals(partitionType)) { if (!coercionPolicy.canCoerce(partitionType, tableType)) { throw new PrestoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("" + "There is a mismatch between the table and partition schemas. " + @@ -335,4 +356,14 @@ public void execute(Runnable command) } } } + + private static boolean isStruct(HiveType type) + { + return type.getCategory() == ObjectInspector.Category.STRUCT; + } + + private static ArrayList getStructFields(HiveType structHiveType) + { + return ((StructTypeInfo) structHiveType.getTypeInfo()).getAllStructFieldTypeInfos(); + } } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetHiveRecordCursor.java b/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetHiveRecordCursor.java index 234866e7e7d78..5b12041283d2d 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetHiveRecordCursor.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/parquet/ParquetHiveRecordCursor.java @@ -741,7 +741,7 @@ public ParquetStructConverter(Type prestoType, String columnName, GroupType entr List prestoTypeParameters = prestoType.getTypeParameters(); List fieldTypes = entryType.getFields(); checkArgument( - prestoTypeParameters.size() == fieldTypes.size(), + prestoTypeParameters.size() >= fieldTypes.size(), "Schema mismatch, metastore schema for row column %s has %s fields but parquet schema has %s fields", columnName, prestoTypeParameters.size(), @@ -751,7 +751,7 @@ public ParquetStructConverter(Type prestoType, String columnName, GroupType entr this.fieldIndex = fieldIndex; ImmutableList.Builder converters = ImmutableList.builder(); - for (int i = 0; i < prestoTypeParameters.size(); i++) { + for (int i = 0; i < fieldTypes.size(); i++) { parquet.schema.Type fieldType = fieldTypes.get(i); converters.add(createConverter(prestoTypeParameters.get(i), columnName + "." + fieldType.getName(), fieldType, i)); } @@ -796,7 +796,7 @@ public void end() for (BlockConverter converter : converters) { converter.afterValue(); } - while (currentEntryBuilder.getPositionCount() < converters.size()) { + while (currentEntryBuilder.getPositionCount() < rowType.getTypeParameters().size()) { currentEntryBuilder.appendNull(); } diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java index ee5e013e70fda..d30219dd8054c 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java @@ -79,6 +79,7 @@ import com.facebook.presto.testing.TestingConnectorSession; import com.facebook.presto.type.ArrayType; import com.facebook.presto.type.MapType; +import com.facebook.presto.type.RowType; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMultimap; @@ -2742,6 +2743,22 @@ else if (rowNumber % 39 == 1) { } } + // STRUCT + index = columnIndex.get("t_struct"); + if (index != null) { + if ((rowNumber % 31) == 0) { + assertNull(row.getField(index)); + } + else { + assertTrue(row.getField(index) instanceof List); + List values = (List) row.getField(index); + assertEquals(values.size(), 3); + assertEquals(values.get(0), "test abc"); + assertEquals(values.get(1), 0.1); + assertNull(values.get(2)); + } + } + // MAP>> index = columnIndex.get("t_complex"); if (index != null) { @@ -2980,7 +2997,7 @@ else if (TIMESTAMP.equals(column.getType())) { else if (DATE.equals(column.getType())) { assertInstanceOf(value, SqlDate.class); } - else if (column.getType() instanceof ArrayType) { + else if (column.getType() instanceof ArrayType || column.getType() instanceof RowType) { assertInstanceOf(value, List.class); } else if (column.getType() instanceof MapType) { diff --git a/presto-hive/src/test/sql/create-test-hive13.sql b/presto-hive/src/test/sql/create-test-hive13.sql index 0d63bf67ce1f9..4a9e4b087759c 100644 --- a/presto-hive/src/test/sql/create-test-hive13.sql +++ b/presto-hive/src/test/sql/create-test-hive13.sql @@ -98,12 +98,15 @@ CREATE TABLE presto_test_types_parquet ( , t_binary BINARY , t_map MAP , t_array_string ARRAY -, t_array_struct ARRAY> +, t_array_struct ARRAY> +, t_struct STRUCT ) +PARTITIONED BY (dummy INT) STORED AS PARQUET ; INSERT INTO TABLE presto_test_types_parquet +PARTITION (dummy=0) SELECT t_string , t_varchar @@ -119,9 +122,12 @@ SELECT , t_map , t_array_string , t_array_struct +, t_array_struct[0] FROM presto_test_types_textfile ; +ALTER TABLE presto_test_types_parquet +CHANGE COLUMN t_struct t_struct STRUCT; ALTER TABLE presto_test_types_textfile ADD COLUMNS (new_column INT); ALTER TABLE presto_test_types_sequencefile ADD COLUMNS (new_column INT);