From e448d824b58ffc7eaff33b202f488fbbbb6ebbcb Mon Sep 17 00:00:00 2001 From: gaborkaszab Date: Fri, 21 Oct 2022 07:37:17 +0200 Subject: [PATCH 01/35] Hive: Set the Table owner on table creation (#5763) --- .../iceberg/hive/HiveTableOperations.java | 22 ++-- .../apache/iceberg/hive/TestHiveCatalog.java | 119 ++++++++---------- 2 files changed, 63 insertions(+), 78 deletions(-) diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 43f7e52382b4..6d1e7e53819e 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -56,6 +56,7 @@ import org.apache.iceberg.hadoop.ConfigProperties; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.BiMap; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableBiMap; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; @@ -193,7 +194,7 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { updateHiveTable = true; LOG.debug("Committing existing table: {}", fullName); } else { - tbl = newHmsTable(); + tbl = newHmsTable(metadata); LOG.debug("Committing new table: {}", fullName); } @@ -291,12 +292,13 @@ private Table loadHmsTable() throws TException, InterruptedException { } } - private Table newHmsTable() { + private Table newHmsTable(TableMetadata metadata) { + Preconditions.checkNotNull(metadata, "'metadata' parameter can't be null"); final long currentTimeMillis = System.currentTimeMillis(); Table newTable = new Table(tableName, database, - System.getProperty("user.name"), + metadata.property(TableProperties.HMS_TABLE_OWNER, System.getProperty("user.name")), (int) currentTimeMillis / 1000, (int) currentTimeMillis / 1000, Integer.MAX_VALUE, @@ -318,11 +320,15 @@ private void setHmsTableParameters(String newMetadataLocation, Table tbl, TableM .orElseGet(Maps::newHashMap); // push all Iceberg table properties into HMS - metadata.properties().forEach((key, value) -> { - // translate key names between Iceberg and HMS where needed - String hmsKey = ICEBERG_TO_HMS_TRANSLATION.getOrDefault(key, key); - parameters.put(hmsKey, value); - }); + metadata.properties().entrySet().stream() + .filter(entry -> !entry.getKey().equalsIgnoreCase(TableProperties.HMS_TABLE_OWNER)) + .forEach( + entry -> { + String key = entry.getKey(); + // translate key names between Iceberg and HMS where needed + String hmsKey = ICEBERG_TO_HMS_TRANSLATION.getOrDefault(key, key); + parameters.put(hmsKey, entry.getValue()); + }); if (metadata.uuid() != null) { parameters.put(TableProperties.UUID, metadata.uuid()); } diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java index 00d7468cf4ac..89007c6f23b0 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java @@ -88,15 +88,16 @@ public class TestHiveCatalog extends HiveMetastoreTest { @Rule public TemporaryFolder temp = new TemporaryFolder(); + private Schema getTestSchema() { + return new Schema( + required(1, "id", Types.IntegerType.get(), "unique ID"), + required(2, "data", Types.StringType.get())); + } + @Test public void testCreateTableBuilder() throws Exception { - Schema schema = new Schema( - required(1, "id", Types.IntegerType.get(), "unique ID"), - required(2, "data", Types.StringType.get()) - ); - PartitionSpec spec = PartitionSpec.builderFor(schema) - .bucket("data", 16) - .build(); + Schema schema = getTestSchema(); + PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build(); TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); String location = temp.newFolder("tbl").toString(); @@ -120,13 +121,8 @@ public void testCreateTableBuilder() throws Exception { @Test public void testCreateTableWithCaching() throws Exception { - Schema schema = new Schema( - required(1, "id", Types.IntegerType.get(), "unique ID"), - required(2, "data", Types.StringType.get()) - ); - PartitionSpec spec = PartitionSpec.builderFor(schema) - .bucket("data", 16) - .build(); + Schema schema = getTestSchema(); + PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build(); TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); String location = temp.newFolder("tbl").toString(); ImmutableMap properties = ImmutableMap.of("key1", "value1", "key2", "value2"); @@ -175,10 +171,7 @@ public void testInitializeCatalogWithProperties() { @Test public void testCreateTableTxnBuilder() throws Exception { - Schema schema = new Schema( - required(1, "id", Types.IntegerType.get(), "unique ID"), - required(2, "data", Types.StringType.get()) - ); + Schema schema = getTestSchema(); TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); String location = temp.newFolder("tbl").toString(); @@ -199,13 +192,8 @@ public void testCreateTableTxnBuilder() throws Exception { @Test public void testReplaceTxnBuilder() throws Exception { - Schema schema = new Schema( - required(1, "id", Types.IntegerType.get(), "unique ID"), - required(2, "data", Types.StringType.get()) - ); - PartitionSpec spec = PartitionSpec.builderFor(schema) - .bucket("data", 16) - .build(); + Schema schema = getTestSchema(); + PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build(); TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); String location = temp.newFolder("tbl").toString(); @@ -245,15 +233,32 @@ public void testReplaceTxnBuilder() throws Exception { } } + @Test + public void testCreateTableWithOwner() throws Exception { + Schema schema = getTestSchema(); + PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build(); + TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); + String location = temp.newFolder("tbl").toString(); + String owner = "some_owner"; + ImmutableMap properties = + ImmutableMap.of(TableProperties.HMS_TABLE_OWNER, owner); + + try { + Table table = catalog.createTable(tableIdent, schema, spec, location, properties); + org.apache.hadoop.hive.metastore.api.Table hmsTable = + metastoreClient.getTable(DB_NAME, "tbl"); + Assert.assertEquals(owner, hmsTable.getOwner()); + Map hmsTableParams = hmsTable.getParameters(); + Assert.assertFalse(hmsTableParams.containsKey(TableProperties.HMS_TABLE_OWNER)); + } finally { + catalog.dropTable(tableIdent); + } + } + @Test public void testCreateTableDefaultSortOrder() throws Exception { - Schema schema = new Schema( - required(1, "id", Types.IntegerType.get(), "unique ID"), - required(2, "data", Types.StringType.get()) - ); - PartitionSpec spec = PartitionSpec.builderFor(schema) - .bucket("data", 16) - .build(); + Schema schema = getTestSchema(); + PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build(); TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); try { @@ -270,16 +275,9 @@ public void testCreateTableDefaultSortOrder() throws Exception { @Test public void testCreateTableCustomSortOrder() throws Exception { - Schema schema = new Schema( - required(1, "id", Types.IntegerType.get(), "unique ID"), - required(2, "data", Types.StringType.get()) - ); - PartitionSpec spec = PartitionSpec.builderFor(schema) - .bucket("data", 16) - .build(); - SortOrder order = SortOrder.builderFor(schema) - .asc("id", NULLS_FIRST) - .build(); + Schema schema = getTestSchema(); + PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build(); + SortOrder order = SortOrder.builderFor(schema).asc("id", NULLS_FIRST).build(); TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); try { @@ -417,8 +415,7 @@ public void testRemoveNamespaceProperties() throws TException { public void testDropNamespace() throws TException { Namespace namespace = Namespace.of("dbname_drop"); TableIdentifier identifier = TableIdentifier.of(namespace, "table"); - Schema schema = new Schema(Types.StructType.of( - required(1, "id", Types.LongType.get())).fields()); + Schema schema = getTestSchema(); catalog.createNamespace(namespace, meta); catalog.createTable(identifier, schema); @@ -445,8 +442,7 @@ public void testDropNamespace() throws TException { @Test public void testDropTableWithoutMetadataFile() { TableIdentifier identifier = TableIdentifier.of(DB_NAME, "tbl"); - Schema tableSchema = - new Schema(Types.StructType.of(required(1, "id", Types.LongType.get())).fields()); + Schema tableSchema = getTestSchema(); catalog.createTable(identifier, tableSchema); String metadataFileLocation = catalog.newTableOps(identifier).current().metadataFileLocation(); TableOperations ops = catalog.newTableOps(identifier); @@ -460,13 +456,8 @@ public void testDropTableWithoutMetadataFile() { @Test public void testTableName() { - Schema schema = new Schema( - required(1, "id", Types.IntegerType.get(), "unique ID"), - required(2, "data", Types.StringType.get()) - ); - PartitionSpec spec = PartitionSpec.builderFor(schema) - .bucket("data", 16) - .build(); + Schema schema = getTestSchema(); + PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build(); TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); try { @@ -492,10 +483,7 @@ private String defaultUri(Namespace namespace) throws TException { @Test public void testUUIDinTableProperties() throws Exception { - Schema schema = new Schema( - required(1, "id", Types.IntegerType.get(), "unique ID"), - required(2, "data", Types.StringType.get()) - ); + Schema schema = getTestSchema(); TableIdentifier tableIdentifier = TableIdentifier.of(DB_NAME, "tbl"); String location = temp.newFolder("tbl").toString(); @@ -512,10 +500,7 @@ public void testUUIDinTableProperties() throws Exception { @Test public void testSnapshotStatsTableProperties() throws Exception { - Schema schema = new Schema( - required(1, "id", Types.IntegerType.get(), "unique ID"), - required(2, "data", Types.StringType.get()) - ); + Schema schema = getTestSchema(); TableIdentifier tableIdentifier = TableIdentifier.of(DB_NAME, "tbl"); String location = temp.newFolder("tbl").toString(); @@ -620,10 +605,7 @@ public void testNotExposeTableProperties() { @Test public void testSetDefaultPartitionSpec() throws Exception { - Schema schema = new Schema( - required(1, "id", Types.IntegerType.get(), "unique ID"), - required(2, "data", Types.StringType.get()) - ); + Schema schema = getTestSchema(); TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); try { @@ -641,10 +623,7 @@ public void testSetDefaultPartitionSpec() throws Exception { @Test public void testSetCurrentSchema() throws Exception { - Schema schema = new Schema( - required(1, "id", Types.IntegerType.get(), "unique ID"), - required(2, "data", Types.StringType.get()) - ); + Schema schema = getTestSchema(); TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); try { @@ -686,7 +665,7 @@ public void testConstructorWarehousePathWithEndSlash() { @Test public void testTablePropsDefinedAtCatalogLevel() { - Schema schema = new Schema(required(1, "id", Types.IntegerType.get(), "unique ID")); + Schema schema = getTestSchema(); TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); ImmutableMap catalogProps = From 8cd8ff676648a8b0be12b187f4c22bf84d096f3e Mon Sep 17 00:00:00 2001 From: Haizhou Zhao Date: Mon, 28 Nov 2022 06:51:57 -0800 Subject: [PATCH 02/35] Hive: Set the database owner on namespace creation (#6045) --- .../org/apache/iceberg/hive/HiveCatalog.java | 46 +- .../iceberg/hive/HiveTableOperations.java | 4 +- .../apache/iceberg/hive/TestHiveCatalog.java | 414 +++++++++++++++++- 3 files changed, 454 insertions(+), 10 deletions(-) diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java index 6cccb0c01a2f..24e29ca569d2 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.InvalidOperationException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.PrincipalType; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.UnknownDBException; import org.apache.iceberg.BaseMetastoreCatalog; @@ -65,6 +66,10 @@ public class HiveCatalog extends BaseMetastoreCatalog implements SupportsNamespa public static final String LIST_ALL_TABLES = "list-all-tables"; public static final String LIST_ALL_TABLES_DEFAULT = "false"; + public static final String HMS_TABLE_OWNER = "hive.metastore.table.owner"; + public static final String HMS_DB_OWNER = "hive.metastore.database.owner"; + public static final String HMS_DB_OWNER_TYPE = "hive.metastore.database.owner-type"; + private static final Logger LOG = LoggerFactory.getLogger(HiveCatalog.class); private String name; @@ -244,11 +249,16 @@ public void renameTable(TableIdentifier from, TableIdentifier originalTo) { @Override public void createNamespace(Namespace namespace, Map meta) { Preconditions.checkArgument( - !namespace.isEmpty(), - "Cannot create namespace with invalid name: %s", namespace); - Preconditions.checkArgument(isValidateNamespace(namespace), - "Cannot support multi part namespace in Hive Metastore: %s", namespace); - + !namespace.isEmpty(), "Cannot create namespace with invalid name: %s", namespace); + Preconditions.checkArgument( + isValidateNamespace(namespace), + "Cannot support multi part namespace in Hive Metastore: %s", + namespace); + Preconditions.checkArgument( + meta.get(HMS_DB_OWNER_TYPE) == null || meta.get(HMS_DB_OWNER) != null, + "Create namespace setting %s without setting %s is not allowed", + HMS_DB_OWNER_TYPE, + HMS_DB_OWNER); try { clients.run(client -> { client.createDatabase(convertToDatabase(namespace, meta)); @@ -334,6 +344,11 @@ public boolean dropNamespace(Namespace namespace) { @Override public boolean setProperties(Namespace namespace, Map properties) { + Preconditions.checkArgument( + (properties.get(HMS_DB_OWNER_TYPE) == null) == (properties.get(HMS_DB_OWNER) == null), + "Setting %s and %s has to be performed together or not at all", + HMS_DB_OWNER_TYPE, + HMS_DB_OWNER); Map parameter = Maps.newHashMap(); parameter.putAll(loadNamespaceMetadata(namespace)); @@ -349,6 +364,11 @@ public boolean setProperties(Namespace namespace, Map propertie @Override public boolean removeProperties(Namespace namespace, Set properties) { + Preconditions.checkArgument( + properties.contains(HMS_DB_OWNER_TYPE) == properties.contains(HMS_DB_OWNER), + "Removing %s and %s has to be performed together or not at all", + HMS_DB_OWNER_TYPE, + HMS_DB_OWNER); Map parameter = Maps.newHashMap(); parameter.putAll(loadNamespaceMetadata(namespace)); @@ -489,6 +509,12 @@ private Map convertToMetadata(Database database) { if (database.getDescription() != null) { meta.put("comment", database.getDescription()); } + if (database.getOwnerName() != null) { + meta.put(HMS_DB_OWNER, database.getOwnerName()); + if (database.getOwnerType() != null) { + meta.put(HMS_DB_OWNER_TYPE, database.getOwnerType().name()); + } + } return meta; } @@ -510,12 +536,22 @@ Database convertToDatabase(Namespace namespace, Map meta) { database.setDescription(value); } else if (key.equals("location")) { database.setLocationUri(value); + } else if (key.equals(HMS_DB_OWNER)) { + database.setOwnerName(value); + } else if (key.equals(HMS_DB_OWNER_TYPE) && value != null) { + database.setOwnerType(PrincipalType.valueOf(value)); } else { if (value != null) { parameter.put(key, value); } } }); + + if (database.getOwnerName() == null) { + database.setOwnerName(System.getProperty("user.name")); + database.setOwnerType(PrincipalType.USER); + } + database.setParameters(parameter); return database; diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 6d1e7e53819e..2b7622fcafd5 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -298,7 +298,7 @@ private Table newHmsTable(TableMetadata metadata) { Table newTable = new Table(tableName, database, - metadata.property(TableProperties.HMS_TABLE_OWNER, System.getProperty("user.name")), + metadata.property(HiveCatalog.HMS_TABLE_OWNER, System.getProperty("user.name")), (int) currentTimeMillis / 1000, (int) currentTimeMillis / 1000, Integer.MAX_VALUE, @@ -321,7 +321,7 @@ private void setHmsTableParameters(String newMetadataLocation, Table tbl, TableM // push all Iceberg table properties into HMS metadata.properties().entrySet().stream() - .filter(entry -> !entry.getKey().equalsIgnoreCase(TableProperties.HMS_TABLE_OWNER)) + .filter(entry -> !entry.getKey().equalsIgnoreCase(HiveCatalog.HMS_TABLE_OWNER)) .forEach( entry -> { String key = entry.getKey(); diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java index 89007c6f23b0..e81bfe308d8e 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java @@ -21,10 +21,14 @@ import java.util.List; import java.util.Map; +import java.util.Set; import java.util.UUID; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.PrincipalType; import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.CachingCatalog; import org.apache.iceberg.CatalogProperties; @@ -240,8 +244,7 @@ public void testCreateTableWithOwner() throws Exception { TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); String location = temp.newFolder("tbl").toString(); String owner = "some_owner"; - ImmutableMap properties = - ImmutableMap.of(TableProperties.HMS_TABLE_OWNER, owner); + ImmutableMap properties = ImmutableMap.of(HiveCatalog.HMS_TABLE_OWNER, owner); try { Table table = catalog.createTable(tableIdent, schema, spec, location, properties); @@ -249,7 +252,7 @@ public void testCreateTableWithOwner() throws Exception { metastoreClient.getTable(DB_NAME, "tbl"); Assert.assertEquals(owner, hmsTable.getOwner()); Map hmsTableParams = hmsTable.getParameters(); - Assert.assertFalse(hmsTableParams.containsKey(TableProperties.HMS_TABLE_OWNER)); + Assert.assertFalse(hmsTableParams.containsKey(HiveCatalog.HMS_TABLE_OWNER)); } finally { catalog.dropTable(tableIdent); } @@ -330,6 +333,81 @@ public void testCreateNamespace() throws Exception { database2.getLocationUri(), hiveLocalDir); } + @Test + public void testCreateNamespaceWithOwnership() throws Exception { + createNamespaceAndVerifyOwnership( + "individual_ownership_1", + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "apache", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.USER.name()), + "apache", + PrincipalType.USER); + + createNamespaceAndVerifyOwnership( + "individual_ownership_2", + ImmutableMap.of(HiveCatalog.HMS_DB_OWNER, "someone"), + "someone", + PrincipalType.USER); + + createNamespaceAndVerifyOwnership( + "group_ownership", + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "iceberg", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.GROUP.name()), + "iceberg", + PrincipalType.GROUP); + + AssertHelpers.assertThrows( + String.format( + "Create namespace setting %s without setting %s is not allowed", + HiveCatalog.HMS_DB_OWNER_TYPE, HiveCatalog.HMS_DB_OWNER), + IllegalArgumentException.class, + () -> { + try { + createNamespaceAndVerifyOwnership( + "create_with_owner_type_alone", + ImmutableMap.of(HiveCatalog.HMS_DB_OWNER_TYPE, PrincipalType.USER.name()), + "no_post_create_expectation_due_to_exception_thrown", + null); + } catch (TException e) { + throw new RuntimeException("Unexpected Exception", e); + } + }); + + AssertHelpers.assertThrows( + "No enum constant " + PrincipalType.class.getCanonicalName(), + IllegalArgumentException.class, + () -> { + try { + createNamespaceAndVerifyOwnership( + "create_with_invalid_owner_type", + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, "iceberg", + HiveCatalog.HMS_DB_OWNER_TYPE, "invalidOwnerType"), + "no_post_create_expectation_due_to_exception_thrown", + null); + } catch (TException e) { + throw new RuntimeException("Unexpected Exception", e); + } + }); + } + + private void createNamespaceAndVerifyOwnership( + String name, Map prop, String expectedOwner, PrincipalType expectedOwnerType) + throws TException { + Namespace namespace = Namespace.of(name); + + catalog.createNamespace(namespace, prop); + Database db = metastoreClient.getDatabase(namespace.toString()); + + Assert.assertEquals(expectedOwner, db.getOwnerName()); + Assert.assertEquals(expectedOwnerType, db.getOwnerType()); + } + @Test public void testListNamespace() throws TException { List namespaces; @@ -393,6 +471,203 @@ public void testSetNamespaceProperties() throws TException { }); } + @Test + public void testSetNamespaceOwnership() throws TException { + setNamespaceOwnershipAndVerify( + "set_individual_ownership_on_default_owner", + ImmutableMap.of(), + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_individual_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.USER.name()), + System.getProperty("user.name"), + PrincipalType.USER, + "some_individual_owner", + PrincipalType.USER); + + setNamespaceOwnershipAndVerify( + "set_group_ownership_on_default_owner", + ImmutableMap.of(), + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_group_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.GROUP.name()), + System.getProperty("user.name"), + PrincipalType.USER, + "some_group_owner", + PrincipalType.GROUP); + + setNamespaceOwnershipAndVerify( + "change_individual_to_group_ownership", + ImmutableMap.of(HiveCatalog.HMS_DB_OWNER, "some_owner"), + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_group_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.GROUP.name()), + "some_owner", + PrincipalType.USER, + "some_group_owner", + PrincipalType.GROUP); + + setNamespaceOwnershipAndVerify( + "change_group_to_individual_ownership", + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_group_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.GROUP.name()), + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_individual_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.USER.name()), + "some_group_owner", + PrincipalType.GROUP, + "some_individual_owner", + PrincipalType.USER); + + AssertHelpers.assertThrows( + String.format( + "Setting %s and %s has to be performed together or not at all", + HiveCatalog.HMS_DB_OWNER_TYPE, HiveCatalog.HMS_DB_OWNER), + IllegalArgumentException.class, + () -> { + try { + setNamespaceOwnershipAndVerify( + "set_owner_without_setting_owner_type", + ImmutableMap.of(), + ImmutableMap.of(HiveCatalog.HMS_DB_OWNER, "some_individual_owner"), + System.getProperty("user.name"), + PrincipalType.USER, + "no_post_setting_expectation_due_to_exception_thrown", + null); + } catch (TException e) { + throw new RuntimeException("Unexpected Exception", e); + } + }); + + AssertHelpers.assertThrows( + String.format( + "Setting %s and %s has to be performed together or not at all", + HiveCatalog.HMS_DB_OWNER_TYPE, HiveCatalog.HMS_DB_OWNER), + IllegalArgumentException.class, + () -> { + try { + setNamespaceOwnershipAndVerify( + "set_owner_type_without_setting_owner", + ImmutableMap.of(HiveCatalog.HMS_DB_OWNER, "some_owner"), + ImmutableMap.of(HiveCatalog.HMS_DB_OWNER_TYPE, PrincipalType.GROUP.name()), + "some_owner", + PrincipalType.USER, + "no_post_setting_expectation_due_to_exception_thrown", + null); + } catch (TException e) { + throw new RuntimeException("Unexpected Exception", e); + } + }); + + AssertHelpers.assertThrows( + HiveCatalog.HMS_DB_OWNER_TYPE + + " has an invalid value of: " + + meta.get(HiveCatalog.HMS_DB_OWNER_TYPE) + + ". Acceptable values are: " + + Stream.of(PrincipalType.values()).map(Enum::name).collect(Collectors.joining(", ")), + IllegalArgumentException.class, + () -> { + try { + setNamespaceOwnershipAndVerify( + "set_invalid_owner_type", + ImmutableMap.of(), + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, "iceberg", + HiveCatalog.HMS_DB_OWNER_TYPE, "invalidOwnerType"), + System.getProperty("user.name"), + PrincipalType.USER, + "no_post_setting_expectation_due_to_exception_thrown", + null); + } catch (TException e) { + throw new RuntimeException("Unexpected Exception", e); + } + }); + } + + @Test + public void testSetNamespaceOwnershipNoop() throws TException { + setNamespaceOwnershipAndVerify( + "set_ownership_noop_1", + ImmutableMap.of(HiveCatalog.HMS_DB_OWNER, "some_individual_owner"), + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_individual_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.USER.name()), + "some_individual_owner", + PrincipalType.USER, + "some_individual_owner", + PrincipalType.USER); + + setNamespaceOwnershipAndVerify( + "set_ownership_noop_2", + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_group_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.GROUP.name()), + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_group_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.GROUP.name()), + "some_group_owner", + PrincipalType.GROUP, + "some_group_owner", + PrincipalType.GROUP); + + setNamespaceOwnershipAndVerify( + "set_ownership_noop_3", + ImmutableMap.of(), + ImmutableMap.of(), + System.getProperty("user.name"), + PrincipalType.USER, + System.getProperty("user.name"), + PrincipalType.USER); + + setNamespaceOwnershipAndVerify( + "set_ownership_noop_4", + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_group_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.GROUP.name()), + ImmutableMap.of("unrelated_prop_1", "value_1", "unrelated_prop_2", "value_2"), + "some_group_owner", + PrincipalType.GROUP, + "some_group_owner", + PrincipalType.GROUP); + } + + private void setNamespaceOwnershipAndVerify( + String name, + Map propToCreate, + Map propToSet, + String expectedOwnerPostCreate, + PrincipalType expectedOwnerTypePostCreate, + String expectedOwnerPostSet, + PrincipalType expectedOwnerTypePostSet) + throws TException { + createNamespaceAndVerifyOwnership( + name, propToCreate, expectedOwnerPostCreate, expectedOwnerTypePostCreate); + + catalog.setProperties(Namespace.of(name), propToSet); + Database database = metastoreClient.getDatabase(name); + + Assert.assertEquals(expectedOwnerPostSet, database.getOwnerName()); + Assert.assertEquals(expectedOwnerTypePostSet, database.getOwnerType()); + } + @Test public void testRemoveNamespaceProperties() throws TException { Namespace namespace = Namespace.of("dbname_remove"); @@ -411,6 +686,139 @@ public void testRemoveNamespaceProperties() throws TException { }); } + @Test + public void testRemoveNamespaceOwnership() throws TException { + removeNamespaceOwnershipAndVerify( + "remove_individual_ownership", + ImmutableMap.of(HiveCatalog.HMS_DB_OWNER, "some_owner"), + ImmutableSet.of(HiveCatalog.HMS_DB_OWNER, HiveCatalog.HMS_DB_OWNER_TYPE), + "some_owner", + PrincipalType.USER, + System.getProperty("user.name"), + PrincipalType.USER); + + removeNamespaceOwnershipAndVerify( + "remove_group_ownership", + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_group_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.GROUP.name()), + ImmutableSet.of(HiveCatalog.HMS_DB_OWNER, HiveCatalog.HMS_DB_OWNER_TYPE), + "some_group_owner", + PrincipalType.GROUP, + System.getProperty("user.name"), + PrincipalType.USER); + + removeNamespaceOwnershipAndVerify( + "remove_ownership_on_default_noop_1", + ImmutableMap.of(), + ImmutableSet.of(HiveCatalog.HMS_DB_OWNER, HiveCatalog.HMS_DB_OWNER_TYPE), + System.getProperty("user.name"), + PrincipalType.USER, + System.getProperty("user.name"), + PrincipalType.USER); + + removeNamespaceOwnershipAndVerify( + "remove_ownership_on_default_noop_2", + ImmutableMap.of(), + ImmutableSet.of(), + System.getProperty("user.name"), + PrincipalType.USER, + System.getProperty("user.name"), + PrincipalType.USER); + + removeNamespaceOwnershipAndVerify( + "remove_ownership_noop_1", + ImmutableMap.of(HiveCatalog.HMS_DB_OWNER, "some_owner"), + ImmutableSet.of(), + "some_owner", + PrincipalType.USER, + "some_owner", + PrincipalType.USER); + + removeNamespaceOwnershipAndVerify( + "remove_ownership_noop_2", + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_group_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.GROUP.name()), + ImmutableSet.of(), + "some_group_owner", + PrincipalType.GROUP, + "some_group_owner", + PrincipalType.GROUP); + + AssertHelpers.assertThrows( + String.format( + "Removing %s and %s has to be performed together or not at all", + HiveCatalog.HMS_DB_OWNER_TYPE, HiveCatalog.HMS_DB_OWNER), + IllegalArgumentException.class, + () -> { + try { + removeNamespaceOwnershipAndVerify( + "remove_owner_without_removing_owner_type", + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_individual_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.USER.name()), + ImmutableSet.of(HiveCatalog.HMS_DB_OWNER), + "some_individual_owner", + PrincipalType.USER, + "no_post_remove_expectation_due_to_exception_thrown", + null); + } catch (TException e) { + throw new RuntimeException("Unexpected Exception", e); + } + }); + + AssertHelpers.assertThrows( + String.format( + "Removing %s and %s has to be performed together or not at all", + HiveCatalog.HMS_DB_OWNER_TYPE, HiveCatalog.HMS_DB_OWNER), + IllegalArgumentException.class, + () -> { + try { + removeNamespaceOwnershipAndVerify( + "remove_owner_type_without_removing_owner", + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_group_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.GROUP.name()), + ImmutableSet.of(HiveCatalog.HMS_DB_OWNER_TYPE), + "some_group_owner", + PrincipalType.GROUP, + "no_post_remove_expectation_due_to_exception_thrown", + null); + } catch (TException e) { + throw new RuntimeException("Unexpected Exception", e); + } + }); + } + + private void removeNamespaceOwnershipAndVerify( + String name, + Map propToCreate, + Set propToRemove, + String expectedOwnerPostCreate, + PrincipalType expectedOwnerTypePostCreate, + String expectedOwnerPostRemove, + PrincipalType expectedOwnerTypePostRemove) + throws TException { + createNamespaceAndVerifyOwnership( + name, propToCreate, expectedOwnerPostCreate, expectedOwnerTypePostCreate); + + catalog.removeProperties(Namespace.of(name), propToRemove); + + Database database = metastoreClient.getDatabase(name); + + Assert.assertEquals(expectedOwnerPostRemove, database.getOwnerName()); + Assert.assertEquals(expectedOwnerTypePostRemove, database.getOwnerType()); + } + @Test public void testDropNamespace() throws TException { Namespace namespace = Namespace.of("dbname_drop"); From 18151d29b0ed8038cde6a03fe05a581c0fd43f96 Mon Sep 17 00:00:00 2001 From: Vikash Kumar Date: Fri, 23 Dec 2022 06:05:57 +0530 Subject: [PATCH 03/35] Hive: Merge identical catch branch (#6477) --- .../java/org/apache/iceberg/hive/ScriptRunner.java | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/ScriptRunner.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/ScriptRunner.java index fce4b307a201..c5960170432e 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/ScriptRunner.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/ScriptRunner.java @@ -101,9 +101,7 @@ public void runScript(Reader reader) throws IOException, SQLException { } finally { connection.setAutoCommit(originalAutoCommit); } - } catch (IOException e) { - throw e; - } catch (SQLException e) { + } catch (IOException | SQLException e) { throw e; } catch (Exception e) { throw new RuntimeException("Error running script. Cause: " + e, e); @@ -199,12 +197,7 @@ private void runScript(Connection conn, Reader reader) throws IOException, SQLEx if (!autoCommit) { conn.commit(); } - } catch (SQLException e) { - e.fillInStackTrace(); - printlnError("Error executing: " + command); - printlnError(e); - throw e; - } catch (IOException e) { + } catch (IOException | SQLException e) { e.fillInStackTrace(); printlnError("Error executing: " + command); printlnError(e); From 79a54503a797fb683687150555a346b40dfcdbd4 Mon Sep 17 00:00:00 2001 From: pvary Date: Wed, 11 Jan 2023 07:43:07 +0100 Subject: [PATCH 04/35] Hive: Lock hardening (#6451) --- iceberg/iceberg-catalog/pom.xml | 5 + .../apache/iceberg/hive/HiveCommitLock.java | 231 ++++++++++++++++-- .../apache/iceberg/hive/HiveSchemaUtil.java | 2 +- .../iceberg/hive/HiveTableOperations.java | 3 +- .../org/apache/iceberg/hive/HiveVersion.java | 66 +++++ .../apache/iceberg/hive/MetastoreUtil.java | 47 ++-- .../iceberg/hive/TestHiveCommitLocks.java | 195 ++++++++++++++- .../apache/iceberg/hive/TestHiveCommits.java | 14 +- .../mr/hive/HiveIcebergInputFormat.java | 6 +- .../IcebergObjectInspector.java | 26 +- .../mr/mapreduce/IcebergInputFormat.java | 6 +- ...veIcebergStorageHandlerWithEngineBase.java | 4 +- .../iceberg/mr/hive/TestDeserializer.java | 5 +- .../TestHiveIcebergStorageHandlerNoScan.java | 4 +- .../apache/iceberg/mr/hive/TestTables.java | 6 +- .../TestIcebergObjectInspector.java | 11 +- 16 files changed, 548 insertions(+), 83 deletions(-) create mode 100644 iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveVersion.java diff --git a/iceberg/iceberg-catalog/pom.xml b/iceberg/iceberg-catalog/pom.xml index efd29a48266d..a5a1c14f008a 100644 --- a/iceberg/iceberg-catalog/pom.xml +++ b/iceberg/iceberg-catalog/pom.xml @@ -74,5 +74,10 @@ junit-vintage-engine test + + org.mockito + mockito-core + test + diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCommitLock.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCommitLock.java index 63d5d40d19fd..60d516747be7 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCommitLock.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCommitLock.java @@ -24,6 +24,7 @@ import java.net.InetAddress; import java.net.UnknownHostException; import java.util.Optional; +import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.ReentrantLock; @@ -35,8 +36,14 @@ import org.apache.hadoop.hive.metastore.api.LockResponse; import org.apache.hadoop.hive.metastore.api.LockState; import org.apache.hadoop.hive.metastore.api.LockType; +import org.apache.hadoop.hive.metastore.api.ShowLocksRequest; +import org.apache.hadoop.hive.metastore.api.ShowLocksResponse; +import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement; import org.apache.iceberg.ClientPool; import org.apache.iceberg.exceptions.CommitFailedException; +import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; +import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.util.Tasks; import org.apache.thrift.TException; @@ -50,10 +57,16 @@ public class HiveCommitLock { private static final String HIVE_ACQUIRE_LOCK_TIMEOUT_MS = "iceberg.hive.lock-timeout-ms"; private static final String HIVE_LOCK_CHECK_MIN_WAIT_MS = "iceberg.hive.lock-check-min-wait-ms"; private static final String HIVE_LOCK_CHECK_MAX_WAIT_MS = "iceberg.hive.lock-check-max-wait-ms"; + private static final String HIVE_LOCK_CREATION_TIMEOUT_MS = "iceberg.hive.lock-creation-timeout-ms"; + private static final String HIVE_LOCK_CREATION_MIN_WAIT_MS = "iceberg.hive.lock-creation-min-wait-ms"; + private static final String HIVE_LOCK_CREATION_MAX_WAIT_MS = "iceberg.hive.lock-creation-max-wait-ms"; private static final String HIVE_TABLE_LEVEL_LOCK_EVICT_MS = "iceberg.hive.table-level-lock-evict-ms"; private static final long HIVE_ACQUIRE_LOCK_TIMEOUT_MS_DEFAULT = 3 * 60 * 1000; // 3 minutes private static final long HIVE_LOCK_CHECK_MIN_WAIT_MS_DEFAULT = 50; // 50 milliseconds private static final long HIVE_LOCK_CHECK_MAX_WAIT_MS_DEFAULT = 5 * 1000; // 5 seconds + private static final long HIVE_LOCK_CREATION_TIMEOUT_MS_DEFAULT = 3 * 60 * 1000; // 3 minutes + private static final long HIVE_LOCK_CREATION_MIN_WAIT_MS_DEFAULT = 50; // 50 milliseconds + private static final long HIVE_LOCK_CREATION_MAX_WAIT_MS_DEFAULT = 5 * 1000; // 5 seconds private static final long HIVE_TABLE_LEVEL_LOCK_EVICT_MS_DEFAULT = TimeUnit.MINUTES.toMillis(10); @@ -75,6 +88,10 @@ private static synchronized void initTableLevelLockCache(long evictionTimeout) { private final long lockAcquireTimeout; private final long lockCheckMinWaitTime; private final long lockCheckMaxWaitTime; + private final long lockCreationTimeout; + private final long lockCreationMinWaitTime; + private final long lockCreationMaxWaitTime; + private final String agentInfo; private Optional hmsLockId = Optional.empty(); private Optional jvmLock = Optional.empty(); @@ -86,12 +103,20 @@ public HiveCommitLock(Configuration conf, ClientPool client.lock(lockRequest)); - AtomicReference state = new AtomicReference<>(lockResponse.getState()); - long lockId = lockResponse.getLockid(); + LockInfo lockInfo = tryLock(); + long lockId = lockInfo.lockId; + AtomicReference state = new AtomicReference<>(lockInfo.lockState); this.hmsLockId = Optional.of(lockId); final long start = System.currentTimeMillis(); @@ -181,19 +201,73 @@ private void acquireLockFromHms() throws UnknownHostException, TException, Inter } private void releaseHmsLock() { - if (hmsLockId.isPresent()) { - try { - metaClients.run(client -> { - client.unlock(hmsLockId.get()); - return null; - }); - hmsLockId = Optional.empty(); - } catch (Exception e) { - LOG.warn("Failed to unlock {}.{}", databaseName, tableName, e); +// if (hmsLockId.isPresent()) { +// try { +// +// metaClients.run(client -> { +// client.unlock(hmsLockId.get()); +// return null; +// }); +// hmsLockId = Optional.empty(); +// } catch (Exception e) { +// LOG.warn("Failed to unlock {}.{}", databaseName, tableName, e); +// } +// } + + Long id = null; + try { + if (!hmsLockId.isPresent()) { + // Try to find the lock based on agentInfo. Only works with Hive 2 or later. + if (HiveVersion.min(HiveVersion.HIVE_2)) { + LockInfo lockInfo = findLock(); + if (lockInfo == null) { + // No lock found + LOG.info("No lock found with {} agentInfo", agentInfo); + return; + } + + id = lockInfo.lockId; + } else { + LOG.warn("Could not find lock with HMSClient {}", HiveVersion.current()); + return; + } + } else { + id = hmsLockId.get(); + } + + doUnlock(hmsLockId.get()); + + } catch (InterruptedException ie) { + if (id != null) { + // Interrupted unlock. We try to unlock one more time if we have a lockId + try { + Thread.interrupted(); // Clear the interrupt status flag for now, so we can retry unlock + LOG.warn("Interrupted unlock we try one more time {}.{}", databaseName, tableName, ie); + doUnlock(id); + } catch (Exception e) { + LOG.warn("Failed to unlock even on 2nd attempt {}.{}", databaseName, tableName, e); + } finally { + Thread.currentThread().interrupt(); // Set back the interrupt status + } + } else { + Thread.currentThread().interrupt(); // Set back the interrupt status + LOG.warn("Interrupted finding locks to unlock {}.{}", databaseName, tableName, ie); } + } catch (Exception e) { + LOG.warn("Failed to unlock {}.{}", databaseName, tableName, e); } } + @VisibleForTesting + void doUnlock(long lockId) throws TException, InterruptedException { + metaClients.run( + client -> { + client.unlock(lockId); + return null; + }); + } + + private void acquireJvmLock() { if (jvmLock.isPresent()) { throw new IllegalStateException(String.format("JVM lock already acquired for table %s", fullName)); @@ -217,6 +291,129 @@ public String getTableName() { return tableName; } + private static class LockInfo { + private long lockId; + private LockState lockState; + + private LockInfo() { + this.lockId = -1; + this.lockState = null; + } + + private LockInfo(long lockId, LockState lockState) { + this.lockId = lockId; + this.lockState = lockState; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("lockId", lockId) + .add("lockState", lockState) + .toString(); + } + } + + /** + * Tries to create a lock. If the lock creation fails, and it is possible then retries the lock + * creation a few times. If the lock creation is successful then a {@link LockInfo} is returned, + * otherwise an appropriate exception is thrown. + * + * @return The created lock + * @throws UnknownHostException When we are not able to fill the hostname for lock creation + * @throws TException When there is an error during lock creation + */ + @SuppressWarnings("ReverseDnsLookup") + private LockInfo tryLock() throws UnknownHostException, TException { + LockInfo lockInfo = new LockInfo(); + + final LockComponent lockComponent = + new LockComponent(LockType.EXCLUSIVE, LockLevel.TABLE, databaseName); + lockComponent.setTablename(tableName); + final LockRequest lockRequest = + new LockRequest( + Lists.newArrayList(lockComponent), + System.getProperty("user.name"), + InetAddress.getLocalHost().getHostName()); + + // Only works in Hive 2 or later. + if (HiveVersion.min(HiveVersion.HIVE_2)) { + lockRequest.setAgentInfo(agentInfo); + } + + Tasks.foreach(lockRequest) + .retry(Integer.MAX_VALUE - 100) + .exponentialBackoff( + lockCreationMinWaitTime, lockCreationMaxWaitTime, lockCreationTimeout, 2.0) + .shouldRetryTest(e -> e instanceof TException && HiveVersion.min(HiveVersion.HIVE_2)) + .throwFailureWhenFinished() + .run( + request -> { + try { + LockResponse lockResponse = metaClients.run(client -> client.lock(request)); + lockInfo.lockId = lockResponse.getLockid(); + lockInfo.lockState = lockResponse.getState(); + } catch (TException te) { + LOG.warn("Failed to acquire lock {}", request, te); + try { + // If we can not check for lock, or we do not find it, then rethrow the exception + // Otherwise we are happy as the findLock sets the lockId and the state correctly + if (!HiveVersion.min(HiveVersion.HIVE_2)) { + LockInfo lockFound = findLock(); + if (lockFound != null) { + lockInfo.lockId = lockFound.lockId; + lockInfo.lockState = lockFound.lockState; + LOG.info("Found lock {} by agentInfo {}", lockInfo, agentInfo); + return; + } + } + + throw te; + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOG.warn( + "Interrupted while checking for lock on table {}.{}", databaseName, tableName, e); + throw new RuntimeException("Interrupted while checking for lock", e); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOG.warn("Interrupted while acquiring lock on table {}.{}", databaseName, tableName, e); + throw new RuntimeException("Interrupted while acquiring lock", e); + } + }, + TException.class); + + // This should be initialized always, or exception should be thrown. + LOG.debug("Lock {} created for table {}.{}", lockInfo, databaseName, tableName); + return lockInfo; + } + + /** + * Search for the locks using HMSClient.showLocks identified by the agentInfo. If the lock is + * there, then a {@link LockInfo} object is returned. If the lock is not found null + * is returned. + * + * @return The {@link LockInfo} for the found lock, or null if nothing found + */ + private LockInfo findLock() throws TException, InterruptedException { + Preconditions.checkArgument( + HiveVersion.min(HiveVersion.HIVE_2), + "Minimally Hive 2 HMS client is needed to find the Lock using the showLocks API call"); + ShowLocksRequest showLocksRequest = new ShowLocksRequest(); + showLocksRequest.setDbname(databaseName); + showLocksRequest.setTablename(tableName); + ShowLocksResponse response = metaClients.run(client -> client.showLocks(showLocksRequest)); + for (ShowLocksResponseElement lock : response.getLocks()) { + if (lock.getAgentInfo().equals(agentInfo)) { + // We found our lock + return new LockInfo(lock.getLockid(), lock.getState()); + } + } + + // Not found anything + return null; + } + private static class WaitingForHmsLockException extends RuntimeException { WaitingForHmsLockException(String message) { super(message); diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaUtil.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaUtil.java index e67577eb588a..8ab320a4cdb7 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaUtil.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaUtil.java @@ -301,7 +301,7 @@ private static String convertToTypeString(Type type) { return "string"; case TIMESTAMP: Types.TimestampType timestampType = (Types.TimestampType) type; - if (MetastoreUtil.hive3PresentOnClasspath() && timestampType.shouldAdjustToUTC()) { + if (HiveVersion.min(HiveVersion.HIVE_3) && timestampType.shouldAdjustToUTC()) { return "timestamp with local time zone"; } return "timestamp"; diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 2b7622fcafd5..02f0c2e98939 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -283,7 +283,8 @@ void persistTable(Table hmsTable, boolean updateHiveTable) throws TException, In } } - private Table loadHmsTable() throws TException, InterruptedException { + @VisibleForTesting + Table loadHmsTable() throws TException, InterruptedException { try { return metaClients.run(client -> client.getTable(database, tableName)); } catch (NoSuchObjectException nte) { diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveVersion.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveVersion.java new file mode 100644 index 000000000000..a94822e91dc8 --- /dev/null +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveVersion.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.hive; + +import org.apache.hive.common.util.HiveVersionInfo; + +public enum HiveVersion { + HIVE_4(4), + HIVE_3(3), + HIVE_2(2), + HIVE_1_2(1), + NOT_SUPPORTED(0); + + private final int order; + private static final HiveVersion current = calculate(); + + HiveVersion(int order) { + this.order = order; + } + + public static HiveVersion current() { + return current; + } + + public static boolean min(HiveVersion other) { + return current.order >= other.order; + } + + private static HiveVersion calculate() { + String version = HiveVersionInfo.getShortVersion(); + String[] versions = version.split("\\."); + switch (versions[0]) { + case "4": + return HIVE_4; + case "3": + return HIVE_3; + case "2": + return HIVE_2; + case "1": + if (versions[1].equals("2")) { + return HIVE_1_2; + } else { + return NOT_SUPPORTED; + } + default: + return NOT_SUPPORTED; + } + } +} diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java index 76363f138c56..f2d19d2ce59b 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java @@ -27,32 +27,28 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; public class MetastoreUtil { - - // this class is unique to Hive3 and cannot be found in Hive2, therefore a good proxy to see if - // we are working against Hive3 dependencies - private static final String HIVE3_UNIQUE_CLASS = "org.apache.hadoop.hive.serde2.io.DateWritableV2"; - - private static final DynMethods.UnboundMethod ALTER_TABLE = DynMethods.builder("alter_table") - .impl(IMetaStoreClient.class, "alter_table_with_environmentContext", - String.class, String.class, Table.class, EnvironmentContext.class) - .impl(IMetaStoreClient.class, "alter_table", - String.class, String.class, Table.class, EnvironmentContext.class) - .impl(IMetaStoreClient.class, "alter_table", - String.class, String.class, Table.class) + private static final DynMethods.UnboundMethod ALTER_TABLE = + DynMethods.builder("alter_table") + .impl( + IMetaStoreClient.class, + "alter_table_with_environmentContext", + String.class, + String.class, + Table.class, + EnvironmentContext.class) + .impl( + IMetaStoreClient.class, + "alter_table", + String.class, + String.class, + Table.class, + EnvironmentContext.class) + .impl(IMetaStoreClient.class, "alter_table", String.class, String.class, Table.class) .build(); - private static final boolean HIVE3_PRESENT_ON_CLASSPATH = detectHive3(); - private MetastoreUtil() { } - /** - * Returns true if Hive3 dependencies are found on classpath, false otherwise. - */ - public static boolean hive3PresentOnClasspath() { - return HIVE3_PRESENT_ON_CLASSPATH; - } - /** * Calls alter_table method using the metastore client. If possible, an environmental context will be used that * turns off stats updates to avoid recursive listing. @@ -63,13 +59,4 @@ public static void alterTable(IMetaStoreClient client, String databaseName, Stri ); ALTER_TABLE.invoke(client, databaseName, tblName, table, envContext); } - - private static boolean detectHive3() { - try { - Class.forName(HIVE3_UNIQUE_CLASS); - return true; - } catch (ClassNotFoundException e) { - return false; - } - } } diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java index 293dd5010cd1..f13ae5b0b840 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java @@ -31,11 +31,14 @@ import org.apache.hadoop.hive.metastore.api.LockRequest; import org.apache.hadoop.hive.metastore.api.LockResponse; import org.apache.hadoop.hive.metastore.api.LockState; +import org.apache.hadoop.hive.metastore.api.ShowLocksResponse; +import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement; import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.HasTableOperations; import org.apache.iceberg.Table; import org.apache.iceberg.TableMetadata; import org.apache.iceberg.exceptions.CommitFailedException; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Types; import org.apache.thrift.TException; import org.junit.AfterClass; @@ -43,11 +46,14 @@ import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import org.mockito.ArgumentCaptor; import static org.mockito.Matchers.any; import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.never; import static org.mockito.Mockito.reset; import static org.mockito.Mockito.spy; @@ -70,6 +76,7 @@ public class TestHiveCommitLocks extends HiveTableBaseTest { LockResponse waitLockResponse = new LockResponse(dummyLockId, LockState.WAITING); LockResponse acquiredLockResponse = new LockResponse(dummyLockId, LockState.ACQUIRED); LockResponse notAcquiredLockResponse = new LockResponse(dummyLockId, LockState.NOT_ACQUIRED); + ShowLocksResponse emptyLocks = new ShowLocksResponse(Lists.newArrayList()); @BeforeClass public static void initializeSpies() throws Exception { @@ -116,8 +123,16 @@ public void before() throws Exception { Assert.assertEquals(2, ops.current().schema().columns().size()); - spyOps = spy(new HiveTableOperations(overriddenHiveConf, spyCachedClientPool, ops.io(), catalog.name(), - dbName, tableName)); + spyOps = + spy( + new HiveTableOperations( + overriddenHiveConf, + spyCachedClientPool, + ops.io(), + catalog.name(), + dbName, + tableName)); + reset(spyClient); } @AfterClass @@ -157,6 +172,182 @@ public void testLockAcquisitionAfterRetries() throws TException, InterruptedExce Assert.assertEquals(1, spyOps.current().schema().columns().size()); // should be 1 again } + @Test + public void testLockAcquisitionAfterFailedNotFoundLock() throws TException, InterruptedException { + doReturn(emptyLocks).when(spyClient).showLocks(any()); + doThrow(new TException("Failed to connect to HMS")) + .doReturn(waitLockResponse) + .when(spyClient) + .lock(any()); + doReturn(waitLockResponse) + .doReturn(acquiredLockResponse) + .when(spyClient) + .checkLock(eq(dummyLockId)); + doNothing().when(spyOps).doUnlock(any()); + doNothing().when(spyClient).heartbeat(eq(0L), eq(dummyLockId)); + + spyOps.doCommit(metadataV2, metadataV1); + + Assert.assertEquals(1, spyOps.current().schema().columns().size()); // should be 1 again + } + + @Test + public void testLockAcquisitionAfterFailedAndFoundLock() throws TException, InterruptedException { + ArgumentCaptor lockRequestCaptor = ArgumentCaptor.forClass(LockRequest.class); + doReturn(emptyLocks).when(spyClient).showLocks(any()); + doThrow(new TException("Failed to connect to HMS")) + .doReturn(waitLockResponse) + .when(spyClient) + .lock(lockRequestCaptor.capture()); + + // Capture the lockRequest, and generate a response simulating that we have a lock + ShowLocksResponse showLocksResponse = new ShowLocksResponse(Lists.newArrayList()); + ShowLocksResponseElement showLocksElement = + new ShowLocksResponseElementWrapper(lockRequestCaptor); + showLocksResponse.getLocks().add(showLocksElement); + + doReturn(showLocksResponse).when(spyClient).showLocks(any()); + doReturn(acquiredLockResponse).when(spyClient).checkLock(eq(dummyLockId)); + doNothing().when(spyOps).doUnlock(any()); + doNothing().when(spyClient).heartbeat(eq(0L), eq(dummyLockId)); + + spyOps.doCommit(metadataV2, metadataV1); + + Assert.assertEquals(1, spyOps.current().schema().columns().size()); // should be 1 again + } + + @Test + public void testUnLock() throws TException { + doReturn(waitLockResponse).when(spyClient).lock(any()); + doReturn(acquiredLockResponse).when(spyClient).checkLock(eq(dummyLockId)); + doNothing().when(spyClient).unlock(eq(dummyLockId)); + doNothing().when(spyClient).heartbeat(eq(0L), eq(dummyLockId)); + + spyOps.doCommit(metadataV2, metadataV1); + + verify(spyClient, times(1)).unlock(eq(dummyLockId)); + } + + @Test + public void testUnLockInterruptedUnLock() throws TException { + doReturn(waitLockResponse).when(spyClient).lock(any()); + doReturn(acquiredLockResponse).when(spyClient).checkLock(eq(dummyLockId)); + doAnswer( + invocation -> { + throw new InterruptedException("Interrupt test"); + }) + .doNothing() + .when(spyClient) + .unlock(eq(dummyLockId)); + doNothing().when(spyClient).heartbeat(eq(0L), eq(dummyLockId)); + + spyOps.doCommit(metadataV2, metadataV1); + + verify(spyClient, times(2)).unlock(eq(dummyLockId)); + } + + @Test + public void testUnLockAfterInterruptedLock() throws TException { + ArgumentCaptor lockRequestCaptor = ArgumentCaptor.forClass(LockRequest.class); + doAnswer( + invocation -> { + throw new InterruptedException("Interrupt test"); + }) + .when(spyClient) + .lock(lockRequestCaptor.capture()); + + // Capture the lockRequest, and generate a response simulating that we have a lock + ShowLocksResponse showLocksResponse = new ShowLocksResponse(Lists.newArrayList()); + ShowLocksResponseElement showLocksElement = + new ShowLocksResponseElementWrapper(lockRequestCaptor); + showLocksResponse.getLocks().add(showLocksElement); + + doReturn(showLocksResponse).when(spyClient).showLocks(any()); + doReturn(acquiredLockResponse).when(spyClient).checkLock(eq(dummyLockId)); + doNothing().when(spyClient).unlock(eq(dummyLockId)); + doNothing().when(spyClient).heartbeat(eq(0L), eq(dummyLockId)); + + AssertHelpers.assertThrows( + "Expected an exception", + RuntimeException.class, + "Interrupted while acquiring lock", + () -> spyOps.doCommit(metadataV2, metadataV1)); + +// verify(spyClient, times(1)).unlock(eq(dummyLockId)); + // Make sure that we exit the lock loop on InterruptedException + verify(spyClient, times(1)).lock(any()); + } + + @Test + public void testUnLockAfterInterruptedLockCheck() throws TException { + doReturn(waitLockResponse).when(spyClient).lock(any()); + doAnswer( + invocation -> { + throw new InterruptedException("Interrupt test"); + }) + .when(spyClient) + .checkLock(eq(dummyLockId)); + + doNothing().when(spyClient).unlock(eq(dummyLockId)); + doNothing().when(spyClient).heartbeat(eq(0L), eq(dummyLockId)); + + AssertHelpers.assertThrows( + "Expected an exception", + RuntimeException.class, + "Could not acquire the lock on", + () -> spyOps.doCommit(metadataV2, metadataV1)); + + verify(spyClient, times(1)).unlock(any()); + // Make sure that we exit the checkLock loop on InterruptedException + verify(spyClient, times(1)).checkLock(any()); + } + + @Test + public void testUnLockAfterInterruptedGetTable() throws TException { + doReturn(acquiredLockResponse).when(spyClient).lock(any()); + doAnswer( + invocation -> { + throw new InterruptedException("Interrupt test"); + }) + .when(spyClient) + .getTable(any(), any()); + + doNothing().when(spyClient).unlock(eq(dummyLockId)); + doNothing().when(spyClient).heartbeat(eq(0L), eq(dummyLockId)); + + AssertHelpers.assertThrows( + "Expected an exception", + RuntimeException.class, + "Interrupted during commit", + () -> spyOps.doCommit(metadataV2, metadataV1)); + + verify(spyClient, times(1)).unlock(eq(dummyLockId)); + } + + /** Wraps an ArgumentCaptor to provide data based on the request */ + private class ShowLocksResponseElementWrapper extends ShowLocksResponseElement { + private ArgumentCaptor wrapped; + + private ShowLocksResponseElementWrapper(ArgumentCaptor wrapped) { + this.wrapped = wrapped; + } + + @Override + public String getAgentInfo() { + return wrapped.getValue().getAgentInfo(); + } + + @Override + public LockState getState() { + return LockState.WAITING; + } + + @Override + public long getLockid() { + return dummyLockId; + } + } + @Test public void testLockFailureAtFirstTime() throws TException { doReturn(notAcquiredLockResponse).when(spyClient).lock(any()); diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java index 1afe98d81b7e..8b5e48056142 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java @@ -237,11 +237,15 @@ public void testThriftExceptionsUnknownSuccessCommit() throws TException, Interr * and a second committer placed a commit on top of ours before the first committer was able to check * if their commit succeeded or not * - * Timeline: - * Client 1 commits which throws an exception but suceeded - * Client 1's lock expires while waiting to do the recheck for commit success - * Client 2 acquires a lock, commits successfully on top of client 1's commit and release lock - * Client 1 check's to see if their commit was successful + *

Timeline: + * + *

    + *
  • Client 1 commits which throws an exception but succeeded + *
  • Client 1's lock expires while waiting to do the recheck for commit success + *
  • Client 2 acquires a lock, commits successfully on top of client 1's commit and release + * lock + *
  • Client 1 check's to see if their commit was successful + *
* * This tests to make sure a disconnected client 1 doesn't think their commit failed just because it isn't the * current one during the recheck phase. diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergInputFormat.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergInputFormat.java index fa92b638c660..66883b02e5cb 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergInputFormat.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergInputFormat.java @@ -50,7 +50,7 @@ import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.expressions.ResidualEvaluator; -import org.apache.iceberg.hive.MetastoreUtil; +import org.apache.iceberg.hive.HiveVersion; import org.apache.iceberg.mr.InputFormatConfig; import org.apache.iceberg.mr.mapred.AbstractMapredIcebergRecordReader; import org.apache.iceberg.mr.mapred.Container; @@ -74,7 +74,7 @@ public class HiveIcebergInputFormat extends MapredIcebergInputFormat public static final String ICEBERG_DISABLE_VECTORIZATION_PREFIX = "iceberg.disable.vectorization."; static { - if (MetastoreUtil.hive3PresentOnClasspath()) { + if (HiveVersion.min(HiveVersion.HIVE_3)) { HIVE_VECTORIZED_RECORDREADER_CTOR = DynConstructors.builder(AbstractMapredIcebergRecordReader.class) .impl(HIVE_VECTORIZED_RECORDREADER_CLASS, IcebergInputFormat.class, @@ -159,7 +159,7 @@ public RecordReader> getRecordReader(InputSplit split, J job.getBoolean(ColumnProjectionUtils.FETCH_VIRTUAL_COLUMNS_CONF_STR, false)); if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) && Utilities.getIsVectorized(job)) { - Preconditions.checkArgument(MetastoreUtil.hive3PresentOnClasspath(), "Vectorization only supported for Hive 3+"); + Preconditions.checkArgument(HiveVersion.min(HiveVersion.HIVE_3), "Vectorization only supported for Hive 3+"); job.setEnum(InputFormatConfig.IN_MEMORY_DATA_MODEL, InputFormatConfig.InMemoryDataModel.HIVE); job.setBoolean(InputFormatConfig.SKIP_RESIDUAL_FILTERING, true); diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java index 039950213f92..625f8f65d296 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java @@ -28,7 +28,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.iceberg.Schema; import org.apache.iceberg.common.DynMethods; -import org.apache.iceberg.hive.MetastoreUtil; +import org.apache.iceberg.hive.HiveVersion; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; @@ -36,23 +36,27 @@ public final class IcebergObjectInspector extends TypeUtil.SchemaVisitor { // get the correct inspectors depending on whether we're working with Hive2 or Hive3 dependencies - // we need to do this because there is a breaking API change in Date/TimestampObjectInspector between Hive2 and Hive3 - private static final String DATE_INSPECTOR_CLASS = MetastoreUtil.hive3PresentOnClasspath() ? - "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergDateObjectInspectorHive3" : - "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergDateObjectInspector"; + // we need to do this because there is a breaking API change in Date/TimestampObjectInspector + // between Hive2 and Hive3 + private static final String DATE_INSPECTOR_CLASS = + HiveVersion.min(HiveVersion.HIVE_3) ? + "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergDateObjectInspectorHive3" : + "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergDateObjectInspector"; public static final ObjectInspector DATE_INSPECTOR = DynMethods.builder("get") .impl(DATE_INSPECTOR_CLASS) .buildStatic() .invoke(); - private static final String TIMESTAMP_INSPECTOR_CLASS = MetastoreUtil.hive3PresentOnClasspath() ? - "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergTimestampObjectInspectorHive3" : - "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergTimestampObjectInspector"; + private static final String TIMESTAMP_INSPECTOR_CLASS = + HiveVersion.min(HiveVersion.HIVE_3) ? + "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergTimestampObjectInspectorHive3" : + "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergTimestampObjectInspector"; - private static final String TIMESTAMPTZ_INSPECTOR_CLASS = MetastoreUtil.hive3PresentOnClasspath() ? - "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergTimestampWithZoneObjectInspectorHive3" : - "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergTimestampWithZoneObjectInspector"; + private static final String TIMESTAMPTZ_INSPECTOR_CLASS = + HiveVersion.min(HiveVersion.HIVE_3) ? + "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergTimestampWithZoneObjectInspectorHive3" : + "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergTimestampWithZoneObjectInspector"; public static final ObjectInspector TIMESTAMP_INSPECTOR = DynMethods.builder("get") .impl(TIMESTAMP_INSPECTOR_CLASS) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergInputFormat.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergInputFormat.java index 46c1c23dc07e..af62c0514e2d 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergInputFormat.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergInputFormat.java @@ -76,7 +76,7 @@ import org.apache.iceberg.expressions.Evaluator; import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.expressions.Expressions; -import org.apache.iceberg.hive.MetastoreUtil; +import org.apache.iceberg.hive.HiveVersion; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.CloseableIterator; import org.apache.iceberg.io.InputFile; @@ -261,7 +261,7 @@ private static final class IcebergRecordReader extends RecordReader private static final DynMethods.StaticMethod HIVE_VECTORIZED_READER_BUILDER; static { - if (MetastoreUtil.hive3PresentOnClasspath()) { + if (HiveVersion.min(HiveVersion.HIVE_3)) { HIVE_VECTORIZED_READER_BUILDER = DynMethods.builder("reader") .impl(HIVE_VECTORIZED_READER_CLASS, Table.class, @@ -363,7 +363,7 @@ private CloseableIterable openVectorized(FileScanTask task, Schema readSchema Preconditions.checkArgument(!task.file().format().equals(FileFormat.AVRO), "Vectorized execution is not yet supported for Iceberg avro tables. " + "Please turn off vectorization and retry the query."); - Preconditions.checkArgument(MetastoreUtil.hive3PresentOnClasspath(), + Preconditions.checkArgument(HiveVersion.min(HiveVersion.HIVE_3), "Vectorized read is unsupported for Hive 2 integration."); Path path = new Path(task.file().path().toString()); diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java index 6de80dfd32e9..8653c0db02fc 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java @@ -32,7 +32,7 @@ import org.apache.iceberg.SnapshotSummary; import org.apache.iceberg.Table; import org.apache.iceberg.data.Record; -import org.apache.iceberg.hive.MetastoreUtil; +import org.apache.iceberg.hive.HiveVersion; import org.apache.iceberg.mr.TestHelper; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; @@ -112,7 +112,7 @@ public static Collection parameters() { if (javaVersion.equals("1.8")) { testParams.add(new Object[] {fileFormat, engine, TestTables.TestTableType.HIVE_CATALOG, false}); // test for vectorization=ON in case of ORC and PARQUET format with Tez engine - if (fileFormat != FileFormat.METADATA && "tez".equals(engine) && MetastoreUtil.hive3PresentOnClasspath()) { + if (fileFormat != FileFormat.METADATA && "tez".equals(engine) && HiveVersion.min(HiveVersion.HIVE_3)) { testParams.add(new Object[] {fileFormat, engine, TestTables.TestTableType.HIVE_CATALOG, true}); } } diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestDeserializer.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestDeserializer.java index adad32aa48a0..e976483b81a1 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestDeserializer.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestDeserializer.java @@ -31,7 +31,7 @@ import org.apache.iceberg.Schema; import org.apache.iceberg.data.GenericRecord; import org.apache.iceberg.data.Record; -import org.apache.iceberg.hive.MetastoreUtil; +import org.apache.iceberg.hive.HiveVersion; import org.apache.iceberg.mr.hive.serde.objectinspector.IcebergObjectInspector; import org.apache.iceberg.types.Types; import org.junit.Assert; @@ -161,7 +161,8 @@ public void testListDeserialize() { @Test public void testDeserializeEverySupportedType() { - Assume.assumeFalse("No test yet for Hive3 (Date/Timestamp creation)", MetastoreUtil.hive3PresentOnClasspath()); + Assume.assumeFalse( + "No test yet for Hive3 (Date/Timestamp creation)", HiveVersion.min(HiveVersion.HIVE_3)); Deserializer deserializer = new Deserializer.Builder() .schema(HiveIcebergTestUtils.FULL_SCHEMA) diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java index c60c3183eb07..294ba9aba689 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java @@ -67,7 +67,7 @@ import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.hadoop.Util; import org.apache.iceberg.hive.HiveSchemaUtil; -import org.apache.iceberg.hive.MetastoreUtil; +import org.apache.iceberg.hive.HiveVersion; import org.apache.iceberg.mr.Catalogs; import org.apache.iceberg.mr.InputFormatConfig; import org.apache.iceberg.mr.TestHelper; @@ -974,7 +974,7 @@ public void testIcebergAndHmsTableProperties() throws Exception { if (Catalogs.hiveCatalog(shell.getHiveConf(), tableProperties)) { expectedIcebergProperties.put(TableProperties.ENGINE_HIVE_ENABLED, "true"); } - if (MetastoreUtil.hive3PresentOnClasspath()) { + if (HiveVersion.min(HiveVersion.HIVE_3)) { expectedIcebergProperties.put("bucketing_version", "2"); } Assert.assertEquals(expectedIcebergProperties, icebergTable.properties()); diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestTables.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestTables.java index c988fa88fd26..656f16b4a1cd 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestTables.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestTables.java @@ -49,7 +49,7 @@ import org.apache.iceberg.hadoop.HadoopCatalog; import org.apache.iceberg.hadoop.HadoopTables; import org.apache.iceberg.hive.HiveCatalog; -import org.apache.iceberg.hive.MetastoreUtil; +import org.apache.iceberg.hive.HiveVersion; import org.apache.iceberg.mr.Catalogs; import org.apache.iceberg.mr.InputFormatConfig; import org.apache.iceberg.mr.TestCatalogs; @@ -506,7 +506,7 @@ static class CustomCatalogTestTables extends TestTables { private final String warehouseLocation; CustomCatalogTestTables(Configuration conf, TemporaryFolder temp, String catalogName) throws IOException { - this(conf, temp, (MetastoreUtil.hive3PresentOnClasspath() ? "file:" : "") + + this(conf, temp, (HiveVersion.min(HiveVersion.HIVE_3) ? "file:" : "") + temp.newFolder("custom", "warehouse").toString(), catalogName); } @@ -537,7 +537,7 @@ static class HadoopCatalogTestTables extends TestTables { private final String warehouseLocation; HadoopCatalogTestTables(Configuration conf, TemporaryFolder temp, String catalogName) throws IOException { - this(conf, temp, (MetastoreUtil.hive3PresentOnClasspath() ? "file:" : "") + + this(conf, temp, (HiveVersion.min(HiveVersion.HIVE_3) ? "file:" : "") + temp.newFolder("hadoop", "warehouse").toString(), catalogName); } diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergObjectInspector.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergObjectInspector.java index eb589b2495e6..b6577a3dd259 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergObjectInspector.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergObjectInspector.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.iceberg.Schema; +import org.apache.iceberg.hive.HiveVersion; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.types.Types; import org.junit.Assert; @@ -90,7 +91,15 @@ public void testIcebergObjectInspector() { Assert.assertEquals(3, dateField.getFieldID()); Assert.assertEquals("date_field", dateField.getFieldName()); Assert.assertEquals("date comment", dateField.getFieldComment()); - Assert.assertEquals(IcebergDateObjectInspectorHive3.get(), dateField.getFieldObjectInspector()); + if (HiveVersion.min(HiveVersion.HIVE_3)) { + Assert.assertEquals( + "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergDateObjectInspectorHive3", + dateField.getFieldObjectInspector().getClass().getName()); + } else { + Assert.assertEquals( + "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergDateObjectInspector", + dateField.getFieldObjectInspector().getClass().getName()); + } // decimal StructField decimalField = soi.getStructFieldRef("decimal_field"); From 763341438e38221c28fc5f1d532a0583241c9915 Mon Sep 17 00:00:00 2001 From: Haizhou Zhao Date: Wed, 18 Jan 2023 09:19:07 -0800 Subject: [PATCH 05/35] Hive: Make UGI current user the owner of new Hive objects (#6324) Co-authored-by: Haizhou Zhao --- .../org/apache/iceberg/hive/HiveCatalog.java | 2 +- .../apache/iceberg/hive/HiveHadoopUtil.java | 42 +++++++++++++ .../iceberg/hive/HiveTableOperations.java | 2 +- .../apache/iceberg/hive/TestHiveCatalog.java | 61 +++++++++++++------ 4 files changed, 88 insertions(+), 19 deletions(-) create mode 100644 iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveHadoopUtil.java diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java index 24e29ca569d2..469b1a2c9cab 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java @@ -548,7 +548,7 @@ Database convertToDatabase(Namespace namespace, Map meta) { }); if (database.getOwnerName() == null) { - database.setOwnerName(System.getProperty("user.name")); + database.setOwnerName(HiveHadoopUtil.currentUser()); database.setOwnerType(PrincipalType.USER); } diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveHadoopUtil.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveHadoopUtil.java new file mode 100644 index 000000000000..e63d1999e645 --- /dev/null +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveHadoopUtil.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.hive; + +import java.io.IOException; +import org.apache.hadoop.security.UserGroupInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class HiveHadoopUtil { + + private static final Logger LOG = LoggerFactory.getLogger(HiveHadoopUtil.class); + + private HiveHadoopUtil() { + } + + public static String currentUser() { + try { + return UserGroupInformation.getCurrentUser().getUserName(); + } catch (IOException e) { + LOG.warn("Failed to get Hadoop user", e); + return System.getProperty("user.name"); + } + } +} diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 02f0c2e98939..e7c44256ae39 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -299,7 +299,7 @@ private Table newHmsTable(TableMetadata metadata) { Table newTable = new Table(tableName, database, - metadata.property(HiveCatalog.HMS_TABLE_OWNER, System.getProperty("user.name")), + metadata.property(HiveCatalog.HMS_TABLE_OWNER, HiveHadoopUtil.currentUser()), (int) currentTimeMillis / 1000, (int) currentTimeMillis / 1000, Integer.MAX_VALUE, diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java index e81bfe308d8e..4bfdb91bc918 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java @@ -19,6 +19,7 @@ package org.apache.iceberg.hive; +import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Set; @@ -29,6 +30,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.PrincipalType; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.CachingCatalog; import org.apache.iceberg.CatalogProperties; @@ -239,17 +241,28 @@ public void testReplaceTxnBuilder() throws Exception { @Test public void testCreateTableWithOwner() throws Exception { + createTableAndVerifyOwner( + DB_NAME, + "tbl_specified_owner", + ImmutableMap.of(HiveCatalog.HMS_TABLE_OWNER, "some_owner"), + "some_owner"); + createTableAndVerifyOwner( + DB_NAME, + "tbl_default_owner", + ImmutableMap.of(), + UserGroupInformation.getCurrentUser().getUserName()); + } + + private void createTableAndVerifyOwner( + String db, String tbl, Map properties, String owner) + throws IOException, TException { Schema schema = getTestSchema(); PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build(); - TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); - String location = temp.newFolder("tbl").toString(); - String owner = "some_owner"; - ImmutableMap properties = ImmutableMap.of(HiveCatalog.HMS_TABLE_OWNER, owner); - + TableIdentifier tableIdent = TableIdentifier.of(db, tbl); + String location = temp.newFolder(tbl).toString(); try { Table table = catalog.createTable(tableIdent, schema, spec, location, properties); - org.apache.hadoop.hive.metastore.api.Table hmsTable = - metastoreClient.getTable(DB_NAME, "tbl"); + org.apache.hadoop.hive.metastore.api.Table hmsTable = metastoreClient.getTable(db, tbl); Assert.assertEquals(owner, hmsTable.getOwner()); Map hmsTableParams = hmsTable.getParameters(); Assert.assertFalse(hmsTableParams.containsKey(HiveCatalog.HMS_TABLE_OWNER)); @@ -335,6 +348,20 @@ public void testCreateNamespace() throws Exception { @Test public void testCreateNamespaceWithOwnership() throws Exception { + createNamespaceAndVerifyOwnership( + "default_ownership_1", + ImmutableMap.of(), + UserGroupInformation.getCurrentUser().getUserName(), + PrincipalType.USER); + + createNamespaceAndVerifyOwnership( + "default_ownership_2", + ImmutableMap.of( + "non_owner_prop1", "value1", + "non_owner_prop2", "value2"), + UserGroupInformation.getCurrentUser().getUserName(), + PrincipalType.USER); + createNamespaceAndVerifyOwnership( "individual_ownership_1", ImmutableMap.of( @@ -595,7 +622,7 @@ public void testSetNamespaceOwnership() throws TException { } @Test - public void testSetNamespaceOwnershipNoop() throws TException { + public void testSetNamespaceOwnershipNoop() throws TException, IOException { setNamespaceOwnershipAndVerify( "set_ownership_noop_1", ImmutableMap.of(HiveCatalog.HMS_DB_OWNER, "some_individual_owner"), @@ -630,9 +657,9 @@ public void testSetNamespaceOwnershipNoop() throws TException { "set_ownership_noop_3", ImmutableMap.of(), ImmutableMap.of(), - System.getProperty("user.name"), + UserGroupInformation.getCurrentUser().getUserName(), PrincipalType.USER, - System.getProperty("user.name"), + UserGroupInformation.getCurrentUser().getUserName(), PrincipalType.USER); setNamespaceOwnershipAndVerify( @@ -687,14 +714,14 @@ public void testRemoveNamespaceProperties() throws TException { } @Test - public void testRemoveNamespaceOwnership() throws TException { + public void testRemoveNamespaceOwnership() throws TException, IOException { removeNamespaceOwnershipAndVerify( "remove_individual_ownership", ImmutableMap.of(HiveCatalog.HMS_DB_OWNER, "some_owner"), ImmutableSet.of(HiveCatalog.HMS_DB_OWNER, HiveCatalog.HMS_DB_OWNER_TYPE), "some_owner", PrincipalType.USER, - System.getProperty("user.name"), + UserGroupInformation.getCurrentUser().getUserName(), PrincipalType.USER); removeNamespaceOwnershipAndVerify( @@ -707,25 +734,25 @@ public void testRemoveNamespaceOwnership() throws TException { ImmutableSet.of(HiveCatalog.HMS_DB_OWNER, HiveCatalog.HMS_DB_OWNER_TYPE), "some_group_owner", PrincipalType.GROUP, - System.getProperty("user.name"), + UserGroupInformation.getCurrentUser().getUserName(), PrincipalType.USER); removeNamespaceOwnershipAndVerify( "remove_ownership_on_default_noop_1", ImmutableMap.of(), ImmutableSet.of(HiveCatalog.HMS_DB_OWNER, HiveCatalog.HMS_DB_OWNER_TYPE), - System.getProperty("user.name"), + UserGroupInformation.getCurrentUser().getUserName(), PrincipalType.USER, - System.getProperty("user.name"), + UserGroupInformation.getCurrentUser().getUserName(), PrincipalType.USER); removeNamespaceOwnershipAndVerify( "remove_ownership_on_default_noop_2", ImmutableMap.of(), ImmutableSet.of(), - System.getProperty("user.name"), + UserGroupInformation.getCurrentUser().getUserName(), PrincipalType.USER, - System.getProperty("user.name"), + UserGroupInformation.getCurrentUser().getUserName(), PrincipalType.USER); removeNamespaceOwnershipAndVerify( From 40e16a4194d152f6a230b77d2ae54ff4002c6132 Mon Sep 17 00:00:00 2001 From: Ajantha Bhat Date: Mon, 23 Jan 2023 16:05:57 +0530 Subject: [PATCH 06/35] Build: Fix minor error-prone warnings (#6629) * Build: Fix minor error-prone warnings * Enforce StringSplitter to avoid future warnings --- .../main/java/org/apache/iceberg/hive/HiveVersion.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveVersion.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveVersion.java index a94822e91dc8..fd8f9006ae9a 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveVersion.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveVersion.java @@ -19,7 +19,9 @@ package org.apache.iceberg.hive; +import java.util.List; import org.apache.hive.common.util.HiveVersionInfo; +import org.apache.iceberg.relocated.com.google.common.base.Splitter; public enum HiveVersion { HIVE_4(4), @@ -45,8 +47,8 @@ public static boolean min(HiveVersion other) { private static HiveVersion calculate() { String version = HiveVersionInfo.getShortVersion(); - String[] versions = version.split("\\."); - switch (versions[0]) { + List versions = Splitter.on('.').splitToList(version); + switch (versions.get(0)) { case "4": return HIVE_4; case "3": @@ -54,7 +56,7 @@ private static HiveVersion calculate() { case "2": return HIVE_2; case "1": - if (versions[1].equals("2")) { + if (versions.get(1).equals("2")) { return HIVE_1_2; } else { return NOT_SUPPORTED; From a4b65f4227f8dc56b1eb865fef8ec436271cd299 Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Mon, 5 Apr 2021 14:46:02 -0700 Subject: [PATCH 07/35] Core: Add SerializableTable (#2403) --- .../java/org/apache/iceberg/TestHelpers.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/TestHelpers.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/TestHelpers.java index 9c0059fb21f5..085c95b7ce27 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/TestHelpers.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/TestHelpers.java @@ -102,6 +102,24 @@ public static T roundTripSerialize(T type) throws IOException, ClassNotFound // ); // } + public static void assertSerializedMetadata(Table expected, Table actual) { + Assert.assertEquals("Name must match", expected.name(), actual.name()); + Assert.assertEquals("Location must match", expected.location(), actual.location()); + Assert.assertEquals("Props must match", expected.properties(), actual.properties()); + Assert.assertEquals("Schema must match", expected.schema().asStruct(), actual.schema().asStruct()); + Assert.assertEquals("Spec must match", expected.spec(), actual.spec()); + Assert.assertEquals("Sort order must match", expected.sortOrder(), actual.sortOrder()); + } + + public static void assertSerializedAndLoadedMetadata(Table expected, Table actual) { + assertSerializedMetadata(expected, actual); + Assert.assertEquals("Specs must match", expected.specs(), actual.specs()); + Assert.assertEquals("Sort orders must match", expected.sortOrders(), actual.sortOrders()); + Assert.assertEquals("Current snapshot must match", expected.currentSnapshot(), actual.currentSnapshot()); + Assert.assertEquals("Snapshots must match", expected.snapshots(), actual.snapshots()); + Assert.assertEquals("History must match", expected.history(), actual.history()); + } + private static class CheckReferencesBound extends ExpressionVisitors.ExpressionVisitor { private final String message; From eb86d3676f7e7dbf95c6fedee8839118db04a3bc Mon Sep 17 00:00:00 2001 From: Zsolt Miskolczi Date: Fri, 5 May 2023 22:30:08 +0200 Subject: [PATCH 08/35] Fix formatting in iceberg-catalog/pom.xml --- iceberg/iceberg-catalog/pom.xml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/iceberg/iceberg-catalog/pom.xml b/iceberg/iceberg-catalog/pom.xml index a5a1c14f008a..ccefd619e25c 100644 --- a/iceberg/iceberg-catalog/pom.xml +++ b/iceberg/iceberg-catalog/pom.xml @@ -74,10 +74,10 @@ junit-vintage-engine test - - org.mockito - mockito-core - test - + + org.mockito + mockito-core + test + From b98449732de8d132191b9929f6a787a6120f7a66 Mon Sep 17 00:00:00 2001 From: Ashish Singh Date: Thu, 4 Aug 2022 00:46:29 -0700 Subject: [PATCH 09/35] Hive: Fix concurrent transactions overwriting commits by adding hive lock heartbeats. (#5036) --- .../apache/iceberg/hive/HiveCommitLock.java | 89 ++++++++++++++++++- .../iceberg/hive/HiveTableOperations.java | 14 +++ .../apache/iceberg/hive/LockException.java | 34 +++++++ .../iceberg/hive/HiveMetastoreTest.java | 29 ++++-- .../iceberg/hive/TestHiveCommitLocks.java | 51 ++++++++++- 5 files changed, 207 insertions(+), 10 deletions(-) create mode 100644 iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/LockException.java diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCommitLock.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCommitLock.java index 60d516747be7..62ae68b8d649 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCommitLock.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCommitLock.java @@ -25,6 +25,9 @@ import java.net.UnknownHostException; import java.util.Optional; import java.util.UUID; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.ReentrantLock; @@ -45,6 +48,7 @@ import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.iceberg.util.Tasks; import org.apache.thrift.TException; import org.slf4j.Logger; @@ -57,6 +61,7 @@ public class HiveCommitLock { private static final String HIVE_ACQUIRE_LOCK_TIMEOUT_MS = "iceberg.hive.lock-timeout-ms"; private static final String HIVE_LOCK_CHECK_MIN_WAIT_MS = "iceberg.hive.lock-check-min-wait-ms"; private static final String HIVE_LOCK_CHECK_MAX_WAIT_MS = "iceberg.hive.lock-check-max-wait-ms"; + private static final String HIVE_LOCK_HEARTBEAT_INTERVAL_MS = "iceberg.hive.lock-heartbeat-interval-ms"; private static final String HIVE_LOCK_CREATION_TIMEOUT_MS = "iceberg.hive.lock-creation-timeout-ms"; private static final String HIVE_LOCK_CREATION_MIN_WAIT_MS = "iceberg.hive.lock-creation-min-wait-ms"; private static final String HIVE_LOCK_CREATION_MAX_WAIT_MS = "iceberg.hive.lock-creation-max-wait-ms"; @@ -67,7 +72,7 @@ public class HiveCommitLock { private static final long HIVE_LOCK_CREATION_TIMEOUT_MS_DEFAULT = 3 * 60 * 1000; // 3 minutes private static final long HIVE_LOCK_CREATION_MIN_WAIT_MS_DEFAULT = 50; // 50 milliseconds private static final long HIVE_LOCK_CREATION_MAX_WAIT_MS_DEFAULT = 5 * 1000; // 5 seconds - + private static final long HIVE_LOCK_HEARTBEAT_INTERVAL_MS_DEFAULT = 4 * 60 * 1000; // 4 minutes private static final long HIVE_TABLE_LEVEL_LOCK_EVICT_MS_DEFAULT = TimeUnit.MINUTES.toMillis(10); private static Cache commitLockCache; @@ -88,13 +93,16 @@ private static synchronized void initTableLevelLockCache(long evictionTimeout) { private final long lockAcquireTimeout; private final long lockCheckMinWaitTime; private final long lockCheckMaxWaitTime; + private final long lockHeartbeatIntervalTime; private final long lockCreationTimeout; private final long lockCreationMinWaitTime; private final long lockCreationMaxWaitTime; private final String agentInfo; + private final ScheduledExecutorService exitingScheduledExecutorService; private Optional hmsLockId = Optional.empty(); private Optional jvmLock = Optional.empty(); + private HiveLockHeartbeat heartbeat = null; public HiveCommitLock(Configuration conf, ClientPool metaClients, String catalogName, String databaseName, String tableName) { @@ -111,6 +119,8 @@ public HiveCommitLock(Configuration conf, ClientPool { client.unlock(lockId); @@ -291,6 +317,22 @@ public String getTableName() { return tableName; } + public void ensureActive() { + if (heartbeat == null) { + throw new LockException("Lock is not active"); + } + + if (heartbeat.encounteredException != null) { + throw new LockException( + heartbeat.encounteredException, + "Failed to heartbeat for hive lock. %s", + heartbeat.encounteredException.getMessage()); + } + if (!heartbeat.active()) { + throw new LockException("Hive lock heartbeat thread not active"); + } + } + private static class LockInfo { private long lockId; private LockState lockState; @@ -419,4 +461,49 @@ private static class WaitingForHmsLockException extends RuntimeException { super(message); } } + + private static class HiveLockHeartbeat implements Runnable { + private final ClientPool hmsClients; + private final long lockId; + private final long intervalMs; + private ScheduledFuture future; + private volatile Exception encounteredException = null; + + HiveLockHeartbeat( + ClientPool hmsClients, long lockId, long intervalMs) { + this.hmsClients = hmsClients; + this.lockId = lockId; + this.intervalMs = intervalMs; + this.future = null; + } + + @Override + public void run() { + try { + hmsClients.run( + client -> { + client.heartbeat(0, lockId); + return null; + }); + } catch (TException | InterruptedException e) { + this.encounteredException = e; + throw new CommitFailedException(e, "Failed to heartbeat for lock: %d", lockId); + } + } + + public void schedule(ScheduledExecutorService scheduler) { + future = + scheduler.scheduleAtFixedRate(this, intervalMs / 2, intervalMs, TimeUnit.MILLISECONDS); + } + + boolean active() { + return future != null && !future.isCancelled(); + } + + public void cancel() { + if (future != null) { + future.cancel(false); + } + } + } } diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index e7c44256ae39..f210669e0e3c 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -226,15 +226,29 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { StatsSetupConst.clearColumnStatsState(tbl.getParameters()); } + commitLock.ensureActive(); try { persistTable(tbl, updateHiveTable); + + commitLock.ensureActive(); + commitStatus = CommitStatus.SUCCESS; + } catch (LockException le) { + throw new CommitStateUnknownException( + "Failed to heartbeat for hive lock while " + + "committing changes. This can lead to a concurrent commit attempt be able to overwrite this commit. " + + "Please check the commit history. If you are running into this issue, try reducing " + + "iceberg.hive.lock-heartbeat-interval-ms.", + le); } catch (org.apache.hadoop.hive.metastore.api.AlreadyExistsException e) { throw new AlreadyExistsException(e, "Table already exists: %s.%s", database, tableName); } catch (InvalidObjectException e) { throw new ValidationException(e, "Invalid Hive object for %s.%s", database, tableName); + } catch (CommitFailedException | CommitStateUnknownException e) { + throw e; + } catch (Throwable e) { if (e.getMessage() != null && e.getMessage().contains("Table/View 'HIVE_LOCKS' does not exist")) { throw new RuntimeException("Failed to acquire locks from metastore because the underlying metastore " + diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/LockException.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/LockException.java new file mode 100644 index 000000000000..79536c5bab12 --- /dev/null +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/LockException.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.hive; + +import com.google.errorprone.annotations.FormatMethod; + +class LockException extends RuntimeException { + @FormatMethod + LockException(String message, Object... args) { + super(String.format(message, args)); + } + + @FormatMethod + LockException(Throwable cause, String message, Object... args) { + super(String.format(message, args), cause); + } +} diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveMetastoreTest.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveMetastoreTest.java index b1fb891f3054..7f39fb1505ad 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveMetastoreTest.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveMetastoreTest.java @@ -19,6 +19,8 @@ package org.apache.iceberg.hive; +import java.util.Collections; +import java.util.Map; import java.util.concurrent.TimeUnit; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; @@ -42,16 +44,33 @@ public abstract class HiveMetastoreTest { @BeforeClass public static void startMetastore() throws Exception { + startMetastore(Collections.emptyMap()); + } + + public static void startMetastore(Map hiveConfOverride) throws Exception { HiveMetastoreTest.metastore = new TestHiveMetastore(); - metastore.start(); + HiveConf hiveConfWithOverrides = new HiveConf(TestHiveMetastore.class); + if (hiveConfOverride != null) { + for (Map.Entry kv : hiveConfOverride.entrySet()) { + hiveConfWithOverrides.set(kv.getKey(), kv.getValue()); + } + } + + metastore.start(hiveConfWithOverrides); HiveMetastoreTest.hiveConf = metastore.hiveConf(); - HiveMetastoreTest.metastoreClient = new HiveMetaStoreClient(hiveConf); + HiveMetastoreTest.metastoreClient = new HiveMetaStoreClient(hiveConfWithOverrides); String dbPath = metastore.getDatabasePath(DB_NAME); Database db = new Database(DB_NAME, "description", dbPath, Maps.newHashMap()); metastoreClient.createDatabase(db); - HiveMetastoreTest.catalog = (HiveCatalog) - CatalogUtil.loadCatalog(HiveCatalog.class.getName(), CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE, ImmutableMap.of( - CatalogProperties.CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS, String.valueOf(EVICTION_INTERVAL)), hiveConf); + HiveMetastoreTest.catalog = + (HiveCatalog) + CatalogUtil.loadCatalog( + HiveCatalog.class.getName(), + CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE, + ImmutableMap.of( + CatalogProperties.CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS, + String.valueOf(EVICTION_INTERVAL)), + hiveConfWithOverrides); } @AfterClass diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java index f13ae5b0b840..7cc692b27500 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java @@ -26,6 +26,7 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.stream.IntStream; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.api.LockRequest; @@ -38,6 +39,7 @@ import org.apache.iceberg.Table; import org.apache.iceberg.TableMetadata; import org.apache.iceberg.exceptions.CommitFailedException; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Types; import org.apache.thrift.TException; @@ -46,14 +48,17 @@ import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import org.mockito.AdditionalAnswers; import org.mockito.ArgumentCaptor; +import org.mockito.invocation.InvocationOnMock; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.atLeastOnce; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.eq; import static org.mockito.Mockito.never; import static org.mockito.Mockito.reset; import static org.mockito.Mockito.spy; @@ -65,7 +70,7 @@ public class TestHiveCommitLocks extends HiveTableBaseTest { private static HiveTableOperations spyOps = null; private static HiveClientPool spyClientPool = null; private static CachedClientPool spyCachedClientPool = null; - private static Configuration overriddenHiveConf = new Configuration(hiveConf); + private static Configuration overriddenHiveConf; private static AtomicReference spyClientRef = new AtomicReference<>(); private static IMetaStoreClient spyClient = null; HiveTableOperations ops = null; @@ -79,10 +84,16 @@ public class TestHiveCommitLocks extends HiveTableBaseTest { ShowLocksResponse emptyLocks = new ShowLocksResponse(Lists.newArrayList()); @BeforeClass - public static void initializeSpies() throws Exception { + public static void startMetastore() throws Exception { + HiveMetastoreTest.startMetastore( + ImmutableMap.of(HiveConf.ConfVars.HIVE_TXN_TIMEOUT.varname, "1s")); + + // start spies + overriddenHiveConf = new Configuration(hiveConf); overriddenHiveConf.setLong("iceberg.hive.lock-timeout-ms", 6 * 1000); overriddenHiveConf.setLong("iceberg.hive.lock-check-min-wait-ms", 50); overriddenHiveConf.setLong("iceberg.hive.lock-check-max-wait-ms", 5 * 1000); + overriddenHiveConf.setLong("iceberg.hive.lock-heartbeat-interval-ms", 100); // Set up the spy clients as static variables instead of before every test. // The spy clients are reused between methods and closed at the end of all tests in this class. @@ -148,6 +159,7 @@ public static void cleanup() { public void testLockAcquisitionAtFirstTime() throws TException, InterruptedException { doReturn(acquiredLockResponse).when(spyClient).lock(any()); doNothing().when(spyClient).unlock(eq(dummyLockId)); + doNothing().when(spyClient).heartbeat(eq(0L), eq(dummyLockId)); spyOps.doCommit(metadataV2, metadataV1); @@ -166,6 +178,7 @@ public void testLockAcquisitionAfterRetries() throws TException, InterruptedExce .when(spyClient) .checkLock(eq(dummyLockId)); doNothing().when(spyClient).unlock(eq(dummyLockId)); + doNothing().when(spyClient).heartbeat(eq(0L), eq(dummyLockId)); spyOps.doCommit(metadataV2, metadataV1); @@ -439,4 +452,34 @@ public void testTableLevelProcessLockBlocksConcurrentHMSRequestsForSameTable() t // all threads eventually got their turn verify(spyClient, times(numConcurrentCommits)).lock(any(LockRequest.class)); } + + @Test + public void testLockHeartbeat() throws TException { + doReturn(acquiredLockResponse).when(spyClient).lock(any()); + doAnswer(AdditionalAnswers.answersWithDelay(2000, InvocationOnMock::callRealMethod)) + .when(spyClient) + .getTable(any(), any()); + doNothing().when(spyClient).heartbeat(eq(0L), eq(dummyLockId)); + + spyOps.doCommit(metadataV2, metadataV1); + + verify(spyClient, atLeastOnce()).heartbeat(eq(0L), eq(dummyLockId)); + } + + @Test + public void testLockHeartbeatFailureDuringCommit() throws TException, InterruptedException { + doReturn(acquiredLockResponse).when(spyClient).lock(any()); + doAnswer(AdditionalAnswers.answersWithDelay(2000, InvocationOnMock::callRealMethod)) + .when(spyOps) + .loadHmsTable(); + doThrow(new TException("Failed to heart beat.")) + .when(spyClient) + .heartbeat(eq(0L), eq(dummyLockId)); + + AssertHelpers.assertThrows( + "Expected commit failure due to failure in heartbeat.", + CommitFailedException.class, + "Failed to heartbeat for hive lock. Failed to heart beat.", + () -> spyOps.doCommit(metadataV2, metadataV1)); + } } From e22fae09e4af86b48f71d9b447bf4b8b80765897 Mon Sep 17 00:00:00 2001 From: pvary Date: Fri, 3 Feb 2023 14:12:18 +0100 Subject: [PATCH 10/35] Hive: Refactor commit lock mechanism from HiveTableOperations (#6648) Co-authored-by: Adam Szita <40628386+szlta@users.noreply.github.com> Co-authored-by: Peter Vary --- .../org/apache/iceberg/hive/HiveLock.java | 28 ++ .../iceberg/hive/HiveTableOperations.java | 28 +- ...HiveCommitLock.java => MetastoreLock.java} | 456 ++++++++++-------- .../org/apache/iceberg/AssertHelpers.java | 19 +- .../iceberg/hive/TestHiveCommitLocks.java | 8 +- .../apache/iceberg/hive/TestHiveCommits.java | 43 +- .../iceberg/mr/hive/HiveIcebergMetaHook.java | 14 +- 7 files changed, 334 insertions(+), 262 deletions(-) create mode 100644 iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveLock.java rename iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/{HiveCommitLock.java => MetastoreLock.java} (73%) diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveLock.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveLock.java new file mode 100644 index 000000000000..20517f3e9052 --- /dev/null +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveLock.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.hive; + +interface HiveLock { + void lock() throws LockException; + + void ensureActive() throws LockException; + + void unlock(); +} diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index f210669e0e3c..4f45438ae858 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -19,7 +19,6 @@ package org.apache.iceberg.hive; -import java.net.UnknownHostException; import java.util.Collections; import java.util.Locale; import java.util.Map; @@ -177,11 +176,10 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { CommitStatus commitStatus = CommitStatus.FAILURE; boolean updateHiveTable = false; - HiveCommitLock commitLock = null; + HiveLock lock = lockObject(); try { - commitLock = createLock(); - commitLock.acquire(); + lock.lock(); Table tbl = loadHmsTable(); @@ -226,11 +224,11 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { StatsSetupConst.clearColumnStatsState(tbl.getParameters()); } - commitLock.ensureActive(); + lock.ensureActive(); try { persistTable(tbl, updateHiveTable); - commitLock.ensureActive(); + lock.ensureActive(); commitStatus = CommitStatus.SUCCESS; } catch (LockException le) { @@ -268,15 +266,18 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { throw new CommitStateUnknownException(e); } } - } catch (TException | UnknownHostException e) { + } catch (TException e) { throw new RuntimeException(String.format("Metastore operation failed for %s.%s", database, tableName), e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException("Interrupted during commit", e); + } catch (LockException e) { + throw new CommitFailedException(e); + } finally { - cleanupMetadataAndUnlock(commitStatus, newMetadataLocation, commitLock); + cleanupMetadataAndUnlock(commitStatus, newMetadataLocation, lock); } LOG.info("Committed to table {} with the new metadata location {}", fullName, newMetadataLocation); @@ -476,12 +477,12 @@ private StorageDescriptor storageDescriptor(TableMetadata metadata, boolean hive } @VisibleForTesting - HiveCommitLock createLock() throws UnknownHostException, TException, InterruptedException { - return new HiveCommitLock(conf, metaClients, catalogName, database, tableName); + HiveLock lockObject() { + return new MetastoreLock(conf, metaClients, catalogName, database, tableName); } private void cleanupMetadataAndUnlock(CommitStatus commitStatus, String metadataLocation, - HiveCommitLock lock) { + HiveLock lock) { try { if (commitStatus == CommitStatus.FAILURE) { // If we are sure the commit failed, clean up the uncommitted metadata file @@ -494,11 +495,10 @@ private void cleanupMetadataAndUnlock(CommitStatus commitStatus, String metadata } } - @VisibleForTesting - void doUnlock(HiveCommitLock lock) { + void doUnlock(HiveLock lock) { if (lock != null) { try { - lock.release(); + lock.unlock(); } catch (Exception e) { LOG.warn("Failed to unlock {}.{}", database, tableName, e); } diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCommitLock.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java similarity index 73% rename from iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCommitLock.java rename to iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java index 62ae68b8d649..37fe25900fa0 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCommitLock.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java @@ -29,7 +29,7 @@ import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.locks.ReentrantLock; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.metastore.IMetaStoreClient; @@ -54,17 +54,17 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class HiveCommitLock { +public class MetastoreLock implements HiveLock { - private static final Logger LOG = LoggerFactory.getLogger(HiveCommitLock.class); + private static final Logger LOG = LoggerFactory.getLogger(MetastoreLock.class); private static final String HIVE_ACQUIRE_LOCK_TIMEOUT_MS = "iceberg.hive.lock-timeout-ms"; private static final String HIVE_LOCK_CHECK_MIN_WAIT_MS = "iceberg.hive.lock-check-min-wait-ms"; private static final String HIVE_LOCK_CHECK_MAX_WAIT_MS = "iceberg.hive.lock-check-max-wait-ms"; - private static final String HIVE_LOCK_HEARTBEAT_INTERVAL_MS = "iceberg.hive.lock-heartbeat-interval-ms"; private static final String HIVE_LOCK_CREATION_TIMEOUT_MS = "iceberg.hive.lock-creation-timeout-ms"; private static final String HIVE_LOCK_CREATION_MIN_WAIT_MS = "iceberg.hive.lock-creation-min-wait-ms"; private static final String HIVE_LOCK_CREATION_MAX_WAIT_MS = "iceberg.hive.lock-creation-max-wait-ms"; + private static final String HIVE_LOCK_HEARTBEAT_INTERVAL_MS = "iceberg.hive.lock-heartbeat-interval-ms"; private static final String HIVE_TABLE_LEVEL_LOCK_EVICT_MS = "iceberg.hive.table-level-lock-evict-ms"; private static final long HIVE_ACQUIRE_LOCK_TIMEOUT_MS_DEFAULT = 3 * 60 * 1000; // 3 minutes private static final long HIVE_LOCK_CHECK_MIN_WAIT_MS_DEFAULT = 50; // 50 milliseconds @@ -74,44 +74,34 @@ public class HiveCommitLock { private static final long HIVE_LOCK_CREATION_MAX_WAIT_MS_DEFAULT = 5 * 1000; // 5 seconds private static final long HIVE_LOCK_HEARTBEAT_INTERVAL_MS_DEFAULT = 4 * 60 * 1000; // 4 minutes private static final long HIVE_TABLE_LEVEL_LOCK_EVICT_MS_DEFAULT = TimeUnit.MINUTES.toMillis(10); + private static volatile Cache commitLockCache; - private static Cache commitLockCache; - - private static synchronized void initTableLevelLockCache(long evictionTimeout) { - if (commitLockCache == null) { - commitLockCache = Caffeine.newBuilder() - .expireAfterAccess(evictionTimeout, TimeUnit.MILLISECONDS) - .build(); - } - } + private final ClientPool metaClients; - private final String fullName; private final String databaseName; private final String tableName; - private final ClientPool metaClients; + private final String fullName; private final long lockAcquireTimeout; private final long lockCheckMinWaitTime; private final long lockCheckMaxWaitTime; - private final long lockHeartbeatIntervalTime; private final long lockCreationTimeout; private final long lockCreationMinWaitTime; private final long lockCreationMaxWaitTime; - private final String agentInfo; + private final long lockHeartbeatIntervalTime; private final ScheduledExecutorService exitingScheduledExecutorService; + private final String agentInfo; private Optional hmsLockId = Optional.empty(); - private Optional jvmLock = Optional.empty(); - private HiveLockHeartbeat heartbeat = null; + private ReentrantLock jvmLock = null; + private Heartbeat heartbeat = null; - public HiveCommitLock(Configuration conf, ClientPool metaClients, - String catalogName, String databaseName, String tableName) { + public MetastoreLock(Configuration conf, ClientPool metaClients, + String catalogName, String databaseName, String tableName) { this.metaClients = metaClients; + this.fullName = catalogName + "." + databaseName + "." + tableName; this.databaseName = databaseName; this.tableName = tableName; - this.fullName = catalogName + "." + databaseName + "." + tableName; - - this.agentInfo = "Iceberg-" + UUID.randomUUID(); this.lockAcquireTimeout = conf.getLong(HIVE_ACQUIRE_LOCK_TIMEOUT_MS, HIVE_ACQUIRE_LOCK_TIMEOUT_MS_DEFAULT); @@ -119,65 +109,95 @@ public HiveCommitLock(Configuration conf, ClientPool state = new AtomicReference<>(lockInfo.lockState); - this.hmsLockId = Optional.of(lockId); + LockInfo lockInfo = createLock(); final long start = System.currentTimeMillis(); long duration = 0; boolean timeout = false; + TException thriftError = null; try { - if (state.get().equals(LockState.WAITING)) { + if (lockInfo.lockState.equals(LockState.WAITING)) { // Retry count is the typical "upper bound of retries" for Tasks.run() function. In fact, the maximum number of // attempts the Tasks.run() would try is `retries + 1`. Here, for checking locks, we use timeout as the // upper bound of retries. So it is just reasonable to set a large retry count. However, if we set // Integer.MAX_VALUE, the above logic of `retries + 1` would overflow into Integer.MIN_VALUE. Hence, // the retry is set conservatively as `Integer.MAX_VALUE - 100` so it doesn't hit any boundary issues. - Tasks.foreach(lockId) + Tasks.foreach(lockInfo.lockId) .retry(Integer.MAX_VALUE - 100) .exponentialBackoff( lockCheckMinWaitTime, @@ -185,209 +205,98 @@ private void acquireLockFromHms() throws UnknownHostException, TException, Inter lockAcquireTimeout, 1.5) .throwFailureWhenFinished() - .onlyRetryOn(WaitingForHmsLockException.class) + .onlyRetryOn(WaitingForLockException.class) .run(id -> { try { LockResponse response = metaClients.run(client -> client.checkLock(id)); LockState newState = response.getState(); - state.set(newState); + lockInfo.lockState = newState; if (newState.equals(LockState.WAITING)) { - throw new WaitingForHmsLockException("Waiting for lock."); + throw new WaitingForLockException(String.format( + "Waiting for lock on table %s.%s", databaseName, tableName)); } } catch (InterruptedException e) { Thread.interrupted(); // Clear the interrupt status flag - LOG.warn("Interrupted while waiting for lock.", e); + LOG.warn( + "Interrupted while waiting for lock on table {}.{}", + databaseName, + tableName, + e); } + }, TException.class); } - } catch (WaitingForHmsLockException waitingForLockException) { + } catch (WaitingForLockException e) { timeout = true; duration = System.currentTimeMillis() - start; + } catch (TException e) { + thriftError = e; } finally { - if (!state.get().equals(LockState.ACQUIRED)) { - releaseHmsLock(); + if (!lockInfo.lockState.equals(LockState.ACQUIRED)) { + unlock(Optional.of(lockInfo.lockId)); } } - // timeout and do not have lock acquired - if (timeout && !state.get().equals(LockState.ACQUIRED)) { - throw new CommitFailedException("Timed out after %s ms waiting for lock on %s.%s", - duration, databaseName, tableName); - } - - if (!state.get().equals(LockState.ACQUIRED)) { - throw new CommitFailedException("Could not acquire the lock on %s.%s, " + - "lock request ended in state %s", databaseName, tableName, state); - } - } - - private void releaseHmsLock() { -// if (hmsLockId.isPresent()) { -// try { -// -// metaClients.run(client -> { -// client.unlock(hmsLockId.get()); -// return null; -// }); -// hmsLockId = Optional.empty(); -// } catch (Exception e) { -// LOG.warn("Failed to unlock {}.{}", databaseName, tableName, e); -// } -// } - - Long id = null; - try { - if (!hmsLockId.isPresent()) { - // Try to find the lock based on agentInfo. Only works with Hive 2 or later. - if (HiveVersion.min(HiveVersion.HIVE_2)) { - LockInfo lockInfo = findLock(); - if (lockInfo == null) { - // No lock found - LOG.info("No lock found with {} agentInfo", agentInfo); - return; - } - - id = lockInfo.lockId; - } else { - LOG.warn("Could not find lock with HMSClient {}", HiveVersion.current()); - return; - } - } else { - id = hmsLockId.get(); + if (!lockInfo.lockState.equals(LockState.ACQUIRED)) { + // timeout and do not have lock acquired + if (timeout) { + throw new LockException("Timed out after %s ms waiting for lock on %s.%s", + duration, databaseName, tableName); } - doUnlock(hmsLockId.get()); - - } catch (InterruptedException ie) { - if (id != null) { - // Interrupted unlock. We try to unlock one more time if we have a lockId - try { - Thread.interrupted(); // Clear the interrupt status flag for now, so we can retry unlock - LOG.warn("Interrupted unlock we try one more time {}.{}", databaseName, tableName, ie); - doUnlock(id); - } catch (Exception e) { - LOG.warn("Failed to unlock even on 2nd attempt {}.{}", databaseName, tableName, e); - } finally { - Thread.currentThread().interrupt(); // Set back the interrupt status - } - } else { - Thread.currentThread().interrupt(); // Set back the interrupt status - LOG.warn("Interrupted finding locks to unlock {}.{}", databaseName, tableName, ie); + if (thriftError != null) { + throw new LockException( + thriftError, "Metastore operation failed for %s.%s", databaseName, tableName); } - } catch (Exception e) { - LOG.warn("Failed to unlock {}.{}", databaseName, tableName, e); - } - } - - @VisibleForTesting - void doUnlock(long lockId) throws TException, InterruptedException { - if (heartbeat != null) { - heartbeat.cancel(); - exitingScheduledExecutorService.shutdown(); - } - - metaClients.run( - client -> { - client.unlock(lockId); - return null; - }); - } - - - private void acquireJvmLock() { - if (jvmLock.isPresent()) { - throw new IllegalStateException(String.format("JVM lock already acquired for table %s", fullName)); - } - jvmLock = Optional.of(commitLockCache.get(fullName, t -> new ReentrantLock(true))); - jvmLock.get().lock(); - } - - private void releaseJvmLock() { - if (jvmLock.isPresent()) { - jvmLock.get().unlock(); - jvmLock = Optional.empty(); - } - } - - public String getDatabaseName() { - return databaseName; - } - - public String getTableName() { - return tableName; - } - - public void ensureActive() { - if (heartbeat == null) { - throw new LockException("Lock is not active"); - } - if (heartbeat.encounteredException != null) { + // Just for safety. We should not get here. throw new LockException( - heartbeat.encounteredException, - "Failed to heartbeat for hive lock. %s", - heartbeat.encounteredException.getMessage()); - } - if (!heartbeat.active()) { - throw new LockException("Hive lock heartbeat thread not active"); - } - } - - private static class LockInfo { - private long lockId; - private LockState lockState; - - private LockInfo() { - this.lockId = -1; - this.lockState = null; - } - - private LockInfo(long lockId, LockState lockState) { - this.lockId = lockId; - this.lockState = lockState; - } - - @Override - public String toString() { - return MoreObjects.toStringHelper(this) - .add("lockId", lockId) - .add("lockState", lockState) - .toString(); + "Could not acquire the lock on %s.%s, lock request ended in state %s", + databaseName, tableName, lockInfo.lockState); + } else { + return lockInfo.lockId; } } /** - * Tries to create a lock. If the lock creation fails, and it is possible then retries the lock - * creation a few times. If the lock creation is successful then a {@link LockInfo} is returned, - * otherwise an appropriate exception is thrown. + * Creates a lock, retrying if possible on failure. * - * @return The created lock - * @throws UnknownHostException When we are not able to fill the hostname for lock creation - * @throws TException When there is an error during lock creation + * @return The {@link LockInfo} object for the successfully created lock + * @throws LockException When we are not able to fill the hostname for lock creation, or there is + * an error during lock creation */ @SuppressWarnings("ReverseDnsLookup") - private LockInfo tryLock() throws UnknownHostException, TException { + private LockInfo createLock() throws LockException { LockInfo lockInfo = new LockInfo(); - final LockComponent lockComponent = + String hostName; + try { + hostName = InetAddress.getLocalHost().getHostName(); + } catch (UnknownHostException uhe) { + throw new LockException(uhe, "Error generating host name"); + } + + LockComponent lockComponent = new LockComponent(LockType.EXCLUSIVE, LockLevel.TABLE, databaseName); lockComponent.setTablename(tableName); - final LockRequest lockRequest = + LockRequest lockRequest = new LockRequest( Lists.newArrayList(lockComponent), - System.getProperty("user.name"), - InetAddress.getLocalHost().getHostName()); + HiveHadoopUtil.currentUser(), + hostName); // Only works in Hive 2 or later. if (HiveVersion.min(HiveVersion.HIVE_2)) { lockRequest.setAgentInfo(agentInfo); } + AtomicBoolean interrupted = new AtomicBoolean(false); Tasks.foreach(lockRequest) .retry(Integer.MAX_VALUE - 100) .exponentialBackoff( lockCreationMinWaitTime, lockCreationMaxWaitTime, lockCreationTimeout, 2.0) - .shouldRetryTest(e -> e instanceof TException && HiveVersion.min(HiveVersion.HIVE_2)) + .shouldRetryTest(e -> !interrupted.get() && e instanceof LockException && + HiveVersion.min(HiveVersion.HIVE_2)) .throwFailureWhenFinished() .run( request -> { @@ -396,7 +305,7 @@ private LockInfo tryLock() throws UnknownHostException, TException { lockInfo.lockId = lockResponse.getLockid(); lockInfo.lockState = lockResponse.getState(); } catch (TException te) { - LOG.warn("Failed to acquire lock {}", request, te); + LOG.warn("Failed to create lock {}", request, te); try { // If we can not check for lock, or we do not find it, then rethrow the exception // Otherwise we are happy as the findLock sets the lockId and the state correctly @@ -410,20 +319,23 @@ private LockInfo tryLock() throws UnknownHostException, TException { } } - throw te; + throw new LockException("Failed to find lock for table %s.%s", databaseName, tableName); } catch (InterruptedException e) { Thread.currentThread().interrupt(); + interrupted.set(true); LOG.warn( - "Interrupted while checking for lock on table {}.{}", databaseName, tableName, e); - throw new RuntimeException("Interrupted while checking for lock", e); + "Interrupted while trying to find lock for table {}.{}", databaseName, tableName, e); + throw new LockException( + e, "Interrupted while trying to find lock for table %s.%s", databaseName, tableName); } } catch (InterruptedException e) { Thread.currentThread().interrupt(); - LOG.warn("Interrupted while acquiring lock on table {}.{}", databaseName, tableName, e); - throw new RuntimeException("Interrupted while acquiring lock", e); + interrupted.set(true); + LOG.warn("Interrupted while creating lock on table {}.{}", databaseName, tableName, e); + throw new LockException("Interrupted while creating lock", e); } }, - TException.class); + LockException.class); // This should be initialized always, or exception should be thrown. LOG.debug("Lock {} created for table {}.{}", lockInfo, databaseName, tableName); @@ -437,14 +349,19 @@ private LockInfo tryLock() throws UnknownHostException, TException { * * @return The {@link LockInfo} for the found lock, or null if nothing found */ - private LockInfo findLock() throws TException, InterruptedException { + private LockInfo findLock() throws LockException, InterruptedException { Preconditions.checkArgument( HiveVersion.min(HiveVersion.HIVE_2), "Minimally Hive 2 HMS client is needed to find the Lock using the showLocks API call"); ShowLocksRequest showLocksRequest = new ShowLocksRequest(); showLocksRequest.setDbname(databaseName); showLocksRequest.setTablename(tableName); - ShowLocksResponse response = metaClients.run(client -> client.showLocks(showLocksRequest)); + ShowLocksResponse response; + try { + response = metaClients.run(client -> client.showLocks(showLocksRequest)); + } catch (TException e) { + throw new LockException(e, "Failed to find lock for table %s.%s", databaseName, tableName); + } for (ShowLocksResponseElement lock : response.getLocks()) { if (lock.getAgentInfo().equals(agentInfo)) { // We found our lock @@ -456,20 +373,103 @@ private LockInfo findLock() throws TException, InterruptedException { return null; } - private static class WaitingForHmsLockException extends RuntimeException { - WaitingForHmsLockException(String message) { - super(message); + private void unlock(Optional lockId) { + + Long id = null; + try { + if (!lockId.isPresent()) { + // Try to find the lock based on agentInfo. Only works with Hive 2 or later. + if (HiveVersion.min(HiveVersion.HIVE_2)) { + LockInfo lockInfo = findLock(); + if (lockInfo == null) { + // No lock found + LOG.info("No lock found with {} agentInfo", agentInfo); + return; + } + + id = lockInfo.lockId; + } else { + LOG.warn("Could not find lock with HMSClient {}", HiveVersion.current()); + return; + } + } else { + id = lockId.get(); + } + + doUnlock(id); + + } catch (InterruptedException ie) { + if (id != null) { + // Interrupted unlock. We try to unlock one more time if we have a lockId + try { + Thread.interrupted(); // Clear the interrupt status flag for now, so we can retry unlock + LOG.warn("Interrupted unlock we try one more time {}.{}", databaseName, tableName, ie); + doUnlock(id); + } catch (Exception e) { + LOG.warn("Failed to unlock even on 2nd attempt {}.{}", databaseName, tableName, e); + } finally { + Thread.currentThread().interrupt(); // Set back the interrupt status + } + } else { + Thread.currentThread().interrupt(); // Set back the interrupt status + LOG.warn("Interrupted finding locks to unlock {}.{}", databaseName, tableName, ie); + } + } catch (Exception e) { + LOG.warn("Failed to unlock {}.{}", databaseName, tableName, e); + } + } + + @VisibleForTesting + void doUnlock(long lockId) throws TException, InterruptedException { + metaClients.run( + client -> { + client.unlock(lockId); + return null; + }); + } + + + private void acquireJvmLock() { + if (jvmLock != null) { + throw new IllegalStateException(String.format("Cannot call acquireLock twice for %s", fullName)); + } + + jvmLock = commitLockCache.get(fullName, t -> new ReentrantLock(true)); + jvmLock.lock(); + } + + private void releaseJvmLock() { + if (jvmLock != null) { + jvmLock.unlock(); + jvmLock = null; + } + } + + private static void initTableLevelLockCache(long evictionTimeout) { + if (commitLockCache == null) { + synchronized (MetastoreLock.class) { + if (commitLockCache == null) { + commitLockCache = + Caffeine.newBuilder() + .expireAfterAccess(evictionTimeout, TimeUnit.MILLISECONDS) + .build(); + } + } } } - private static class HiveLockHeartbeat implements Runnable { + public String getTableName() { + return tableName; + } + + private static class Heartbeat implements Runnable { private final ClientPool hmsClients; private final long lockId; private final long intervalMs; private ScheduledFuture future; private volatile Exception encounteredException = null; - HiveLockHeartbeat( + Heartbeat( ClientPool hmsClients, long lockId, long intervalMs) { this.hmsClients = hmsClients; this.lockId = lockId; @@ -506,4 +506,34 @@ public void cancel() { } } } + + + private static class LockInfo { + private long lockId; + private LockState lockState; + + private LockInfo() { + this.lockId = -1; + this.lockState = null; + } + + private LockInfo(long lockId, LockState lockState) { + this.lockId = lockId; + this.lockState = lockState; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("lockId", lockId) + .add("lockState", lockState) + .toString(); + } + } + + private static class WaitingForLockException extends RuntimeException { + WaitingForLockException(String message) { + super(message); + } + } } diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/AssertHelpers.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/AssertHelpers.java index c6bcc1b1de82..68137034f977 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/AssertHelpers.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/AssertHelpers.java @@ -20,6 +20,8 @@ package org.apache.iceberg; import java.util.concurrent.Callable; +import org.assertj.core.api.AbstractThrowableAssert; +import org.assertj.core.api.Assertions; import org.junit.Assert; public class AssertHelpers { @@ -60,13 +62,18 @@ public static void assertThrows(String message, Class expected, String containedInMessage, Runnable runnable) { - try { - runnable.run(); - Assert.fail("No exception was thrown (" + message + "), expected: " + - expected.getName()); - } catch (Exception actual) { - handleException(message, expected, containedInMessage, actual); + AbstractThrowableAssert check = + Assertions.assertThatThrownBy(runnable::run).as(message).isInstanceOf(expected); + if (null != containedInMessage) { + check.hasMessageContaining(containedInMessage); } +// try { +// runnable.run(); +// Assert.fail("No exception was thrown (" + message + "), expected: " + +// expected.getName()); +// } catch (Exception actual) { +// handleException(message, expected, containedInMessage, actual); +// } } /** diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java index 7cc692b27500..93ad36f58a9d 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java @@ -283,10 +283,10 @@ public void testUnLockAfterInterruptedLock() throws TException { AssertHelpers.assertThrows( "Expected an exception", RuntimeException.class, - "Interrupted while acquiring lock", + "Interrupted while creating lock", () -> spyOps.doCommit(metadataV2, metadataV1)); -// verify(spyClient, times(1)).unlock(eq(dummyLockId)); + verify(spyClient, times(1)).unlock(eq(dummyLockId)); // Make sure that we exit the lock loop on InterruptedException verify(spyClient, times(1)).lock(any()); } @@ -310,9 +310,9 @@ public void testUnLockAfterInterruptedLockCheck() throws TException { "Could not acquire the lock on", () -> spyOps.doCommit(metadataV2, metadataV1)); - verify(spyClient, times(1)).unlock(any()); + verify(spyClient, times(1)).unlock(eq(dummyLockId)); // Make sure that we exit the checkLock loop on InterruptedException - verify(spyClient, times(1)).checkLock(any()); + verify(spyClient, times(1)).checkLock(eq(dummyLockId)); } @Test diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java index 8b5e48056142..b5802fa5cdea 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java @@ -64,21 +64,22 @@ public void testSuppressUnlockExceptions() throws TException, InterruptedExcepti HiveTableOperations spyOps = spy(ops); - AtomicReference lockRef = new AtomicReference<>(); + AtomicReference lockRef = new AtomicReference<>(); - when(spyOps.createLock()).thenAnswer(i -> { - HiveCommitLock lock = (HiveCommitLock) i.callRealMethod(); - lockRef.set(lock); - return lock; - } - ); + when(spyOps.lockObject()) + .thenAnswer( + i -> { + HiveLock lock = (HiveLock) i.callRealMethod(); + lockRef.set(lock); + return lock; + }); try { spyOps.commit(metadataV2, metadataV1); - HiveCommitLock spyLock = spy(lockRef.get()); - doThrow(new RuntimeException()).when(spyLock).release(); + HiveLock spyLock = spy(lockRef.get()); + doThrow(new RuntimeException()).when(spyLock).unlock(); } finally { - ops.doUnlock(lockRef.get()); + lockRef.get().unlock(); } ops.refresh(); @@ -269,11 +270,14 @@ public void testThriftExceptionConcurrentCommit() throws TException, Interrupted HiveTableOperations spyOps = spy(ops); - AtomicReference lock = new AtomicReference<>(); - doAnswer(l -> { - lock.set(ops.createLock()); - return lock.get(); - }).when(spyOps).createLock(); + AtomicReference lock = new AtomicReference<>(); + doAnswer( + l -> { + lock.set(ops.lockObject()); + return lock.get(); + }) + .when(spyOps) + .lockObject(); concurrentCommitAndThrowException(ops, spyOps, table, lock); @@ -316,8 +320,11 @@ private void commitAndThrowException(HiveTableOperations realOperations, HiveTab }).when(spyOperations).persistTable(any(), anyBoolean()); } - private void concurrentCommitAndThrowException(HiveTableOperations realOperations, HiveTableOperations spyOperations, - Table table, AtomicReference lockId) + private void concurrentCommitAndThrowException( + HiveTableOperations realOperations, + HiveTableOperations spyOperations, + Table table, + AtomicReference lock) throws TException, InterruptedException { // Simulate a communication error after a successful commit doAnswer(i -> { @@ -325,7 +332,7 @@ private void concurrentCommitAndThrowException(HiveTableOperations realOperation i.getArgument(0, org.apache.hadoop.hive.metastore.api.Table.class); realOperations.persistTable(tbl, true); // Simulate lock expiration or removal - realOperations.doUnlock(lockId.get()); + lock.get().unlock(); table.refresh(); table.updateSchema().addColumn("newCol", Types.IntegerType.get()).commit(); throw new TException("Datacenter on fire"); diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java index 694c54cf13a6..d946531d58f4 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java @@ -83,9 +83,9 @@ import org.apache.iceberg.exceptions.NotFoundException; import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.hive.CachedClientPool; -import org.apache.iceberg.hive.HiveCommitLock; import org.apache.iceberg.hive.HiveSchemaUtil; import org.apache.iceberg.hive.HiveTableOperations; +import org.apache.iceberg.hive.MetastoreLock; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.mapping.MappingUtil; import org.apache.iceberg.mapping.NameMapping; @@ -147,7 +147,7 @@ public class HiveIcebergMetaHook implements HiveMetaHook { private Transaction transaction; private AlterTableType currentAlterTableOp; private boolean createHMSTableInHook = false; - private HiveCommitLock commitLock; + private MetastoreLock commitLock; private enum FileFormat { ORC("orc"), PARQUET("parquet"), AVRO("avro"); @@ -324,15 +324,15 @@ public void preAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable, E context.getProperties().get(OLD_TABLE_NAME)).toString()); } if (commitLock == null) { - commitLock = new HiveCommitLock(conf, new CachedClientPool(conf, Maps.fromProperties(catalogProperties)), + commitLock = new MetastoreLock(conf, new CachedClientPool(conf, Maps.fromProperties(catalogProperties)), catalogProperties.getProperty(Catalogs.NAME), hmsTable.getDbName(), hmsTable.getTableName()); } try { - commitLock.acquire(); + commitLock.lock(); doPreAlterTable(hmsTable, context); } catch (Exception e) { - commitLock.release(); + commitLock.unlock(); throw new MetaException(StringUtils.stringifyException(e)); } } @@ -526,7 +526,7 @@ public void commitAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable if (commitLock == null) { throw new IllegalStateException("Hive commit lock should already be set"); } - commitLock.release(); + commitLock.unlock(); if (isTableMigration) { catalogProperties = getCatalogProperties(hmsTable); catalogProperties.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(preAlterTableProperties.schema)); @@ -566,7 +566,7 @@ public void rollbackAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTab if (commitLock == null) { throw new IllegalStateException("Hive commit lock should already be set"); } - commitLock.release(); + commitLock.unlock(); if (Boolean.parseBoolean(context.getProperties().getOrDefault(MIGRATE_HIVE_TO_ICEBERG, "false"))) { LOG.debug("Initiating rollback for table {} at location {}", hmsTable.getTableName(), hmsTable.getSd().getLocation()); From 1c1a6fb0b264dfb681024e094055f18d812991c7 Mon Sep 17 00:00:00 2001 From: zhouyifan279 <88070094+zhouyifan279@users.noreply.github.com> Date: Wed, 8 Mar 2023 02:24:57 +0800 Subject: [PATCH 11/35] Use UGI shortUserName as the default owner of Hive objects (#6955) --- .../apache/iceberg/hive/HiveHadoopUtil.java | 9 +++++++- .../apache/iceberg/hive/TestHiveCatalog.java | 22 +++++++++---------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveHadoopUtil.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveHadoopUtil.java index e63d1999e645..fb13cb318eb9 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveHadoopUtil.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveHadoopUtil.java @@ -32,10 +32,17 @@ private HiveHadoopUtil() { } public static String currentUser() { + String username = null; try { - return UserGroupInformation.getCurrentUser().getUserName(); + username = UserGroupInformation.getCurrentUser().getShortUserName(); } catch (IOException e) { LOG.warn("Failed to get Hadoop user", e); + } + + if (username != null) { + return username; + } else { + LOG.warn("Hadoop user is null, defaulting to user.name"); return System.getProperty("user.name"); } } diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java index 4bfdb91bc918..6bceeded4791 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java @@ -250,7 +250,7 @@ public void testCreateTableWithOwner() throws Exception { DB_NAME, "tbl_default_owner", ImmutableMap.of(), - UserGroupInformation.getCurrentUser().getUserName()); + UserGroupInformation.getCurrentUser().getShortUserName()); } private void createTableAndVerifyOwner( @@ -351,7 +351,7 @@ public void testCreateNamespaceWithOwnership() throws Exception { createNamespaceAndVerifyOwnership( "default_ownership_1", ImmutableMap.of(), - UserGroupInformation.getCurrentUser().getUserName(), + UserGroupInformation.getCurrentUser().getShortUserName(), PrincipalType.USER); createNamespaceAndVerifyOwnership( @@ -359,7 +359,7 @@ public void testCreateNamespaceWithOwnership() throws Exception { ImmutableMap.of( "non_owner_prop1", "value1", "non_owner_prop2", "value2"), - UserGroupInformation.getCurrentUser().getUserName(), + UserGroupInformation.getCurrentUser().getShortUserName(), PrincipalType.USER); createNamespaceAndVerifyOwnership( @@ -657,9 +657,9 @@ public void testSetNamespaceOwnershipNoop() throws TException, IOException { "set_ownership_noop_3", ImmutableMap.of(), ImmutableMap.of(), - UserGroupInformation.getCurrentUser().getUserName(), + UserGroupInformation.getCurrentUser().getShortUserName(), PrincipalType.USER, - UserGroupInformation.getCurrentUser().getUserName(), + UserGroupInformation.getCurrentUser().getShortUserName(), PrincipalType.USER); setNamespaceOwnershipAndVerify( @@ -721,7 +721,7 @@ public void testRemoveNamespaceOwnership() throws TException, IOException { ImmutableSet.of(HiveCatalog.HMS_DB_OWNER, HiveCatalog.HMS_DB_OWNER_TYPE), "some_owner", PrincipalType.USER, - UserGroupInformation.getCurrentUser().getUserName(), + UserGroupInformation.getCurrentUser().getShortUserName(), PrincipalType.USER); removeNamespaceOwnershipAndVerify( @@ -734,25 +734,25 @@ public void testRemoveNamespaceOwnership() throws TException, IOException { ImmutableSet.of(HiveCatalog.HMS_DB_OWNER, HiveCatalog.HMS_DB_OWNER_TYPE), "some_group_owner", PrincipalType.GROUP, - UserGroupInformation.getCurrentUser().getUserName(), + UserGroupInformation.getCurrentUser().getShortUserName(), PrincipalType.USER); removeNamespaceOwnershipAndVerify( "remove_ownership_on_default_noop_1", ImmutableMap.of(), ImmutableSet.of(HiveCatalog.HMS_DB_OWNER, HiveCatalog.HMS_DB_OWNER_TYPE), - UserGroupInformation.getCurrentUser().getUserName(), + UserGroupInformation.getCurrentUser().getShortUserName(), PrincipalType.USER, - UserGroupInformation.getCurrentUser().getUserName(), + UserGroupInformation.getCurrentUser().getShortUserName(), PrincipalType.USER); removeNamespaceOwnershipAndVerify( "remove_ownership_on_default_noop_2", ImmutableMap.of(), ImmutableSet.of(), - UserGroupInformation.getCurrentUser().getUserName(), + UserGroupInformation.getCurrentUser().getShortUserName(), PrincipalType.USER, - UserGroupInformation.getCurrentUser().getUserName(), + UserGroupInformation.getCurrentUser().getShortUserName(), PrincipalType.USER); removeNamespaceOwnershipAndVerify( From 77f7d02d05ca29fe974648a6c981322b7005e745 Mon Sep 17 00:00:00 2001 From: pvary Date: Tue, 25 Apr 2023 09:13:20 +0200 Subject: [PATCH 12/35] Hive: Use EnvironmentContext instead of Hive Locks to provide transactional commits after HIVE-26882 (#6570) --- .../iceberg/hive/HiveTableOperations.java | 87 +++- .../apache/iceberg/hive/MetastoreUtil.java | 31 +- .../java/org/apache/iceberg/hive/NoLock.java | 45 +++ .../iceberg/hive/TestHiveCommitLocks.java | 41 ++ .../apache/iceberg/hive/TestHiveCommits.java | 74 +++- iceberg/patched-iceberg-core/pom.xml | 2 + .../org/apache/iceberg/TableProperties.java | 375 ++++++++++++++++++ .../iceberg/hadoop/ConfigProperties.java | 30 ++ 8 files changed, 654 insertions(+), 31 deletions(-) create mode 100644 iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/NoLock.java create mode 100644 iceberg/patched-iceberg-core/src/main/java/org/apache/iceberg/TableProperties.java create mode 100644 iceberg/patched-iceberg-core/src/main/java/org/apache/iceberg/hadoop/ConfigProperties.java diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 4f45438ae858..1bffb22a2f35 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -81,6 +81,8 @@ public class HiveTableOperations extends BaseMetastoreTableOperations { // characters, see https://issues.apache.org/jira/browse/HIVE-12274 // set to 0 to not expose Iceberg metadata in HMS Table properties. private static final String HIVE_TABLE_PROPERTY_MAX_SIZE = "iceberg.hive.table-property-max-size"; + private static final String NO_LOCK_EXPECTED_KEY = "expected_parameter_key"; + private static final String NO_LOCK_EXPECTED_VALUE = "expected_parameter_value"; private static final long HIVE_TABLE_PROPERTY_MAX_SIZE_DEFAULT = 32672; private static final BiMap ICEBERG_TO_HMS_TRANSLATION = ImmutableBiMap.of( @@ -177,7 +179,7 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { CommitStatus commitStatus = CommitStatus.FAILURE; boolean updateHiveTable = false; - HiveLock lock = lockObject(); + HiveLock lock = lockObject(metadata); try { lock.lock(); @@ -226,8 +228,8 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { lock.ensureActive(); try { - persistTable(tbl, updateHiveTable); - + persistTable( + tbl, updateHiveTable, hiveLockEnabled(metadata, conf) ? null : baseMetadataLocation); lock.ensureActive(); commitStatus = CommitStatus.SUCCESS; @@ -248,10 +250,21 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { throw e; } catch (Throwable e) { + if (e.getMessage() + .contains( + "The table has been modified. The parameter value for key '" + + HiveTableOperations.METADATA_LOCATION_PROP + + "' is")) { + throw new CommitFailedException( + e, "The table %s.%s has been modified concurrently", database, tableName); + } + if (e.getMessage() != null && e.getMessage().contains("Table/View 'HIVE_LOCKS' does not exist")) { - throw new RuntimeException("Failed to acquire locks from metastore because the underlying metastore " + - "table 'HIVE_LOCKS' does not exist. This can occur when using an embedded metastore which does not " + - "support transactions. To fix this use an alternative metastore.", e); + throw new RuntimeException( + "Failed to acquire locks from metastore because the underlying metastore " + + "table 'HIVE_LOCKS' does not exist. This can occur when using an embedded metastore which does not " + + "support transactions. To fix this use an alternative metastore.", + e); } LOG.error("Cannot tell if commit to {}.{} succeeded, attempting to reconnect and check.", @@ -284,12 +297,25 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { } @VisibleForTesting - void persistTable(Table hmsTable, boolean updateHiveTable) throws TException, InterruptedException { + void persistTable(Table hmsTable, boolean updateHiveTable, String expectedMetadataLocation) + throws TException, InterruptedException { if (updateHiveTable) { - metaClients.run(client -> { - MetastoreUtil.alterTable(client, database, tableName, hmsTable); - return null; - }); + metaClients.run( + client -> { + MetastoreUtil.alterTable( + client, + database, + tableName, + hmsTable, + expectedMetadataLocation != null ? + ImmutableMap.of( + NO_LOCK_EXPECTED_KEY, + METADATA_LOCATION_PROP, + NO_LOCK_EXPECTED_VALUE, + expectedMetadataLocation) : + ImmutableMap.of()); + return null; + }); } else { metaClients.run(client -> { client.createTable(hmsTable); @@ -531,6 +557,43 @@ private static boolean hiveEngineEnabled(TableMetadata metadata, Configuration c return metadata.propertyAsBoolean(TableProperties.ENGINE_HIVE_ENABLED, false); } - return conf.getBoolean(ConfigProperties.ENGINE_HIVE_ENABLED, TableProperties.ENGINE_HIVE_ENABLED_DEFAULT); + return conf.getBoolean( + ConfigProperties.ENGINE_HIVE_ENABLED, TableProperties.ENGINE_HIVE_ENABLED_DEFAULT); + } + + /** + * Returns if the hive locking should be enabled on the table, or not. + * + *

The decision is made like this: + * + *

    + *
  1. Table property value {@link TableProperties#HIVE_LOCK_ENABLED} + *
  2. If the table property is not set then check the hive-site.xml property value {@link + * ConfigProperties#LOCK_HIVE_ENABLED} + *
  3. If none of the above is enabled then use the default value {@link + * TableProperties#HIVE_LOCK_ENABLED_DEFAULT} + *
+ * + * @param metadata Table metadata to use + * @param conf The hive configuration to use + * @return if the hive engine related values should be enabled or not + */ + private static boolean hiveLockEnabled(TableMetadata metadata, Configuration conf) { + if (metadata.properties().get(TableProperties.HIVE_LOCK_ENABLED) != null) { + // We know that the property is set, so default value will not be used, + return metadata.propertyAsBoolean(TableProperties.HIVE_LOCK_ENABLED, false); + } + + return conf.getBoolean( + ConfigProperties.LOCK_HIVE_ENABLED, TableProperties.HIVE_LOCK_ENABLED_DEFAULT); + } + + @VisibleForTesting + HiveLock lockObject(TableMetadata metadata) { + if (hiveLockEnabled(metadata, conf)) { + return new MetastoreLock(conf, metaClients, catalogName, database, tableName); + } else { + return new NoLock(); + } } } diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java index f2d19d2ce59b..ee62b64ed23e 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java @@ -19,12 +19,14 @@ package org.apache.iceberg.hive; +import java.util.Map; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.iceberg.common.DynMethods; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; public class MetastoreUtil { private static final DynMethods.UnboundMethod ALTER_TABLE = @@ -50,13 +52,28 @@ private MetastoreUtil() { } /** - * Calls alter_table method using the metastore client. If possible, an environmental context will be used that - * turns off stats updates to avoid recursive listing. + * Calls alter_table method using the metastore client. If the HMS supports it, environmental + * context will be set in a way that turns off stats updates to avoid recursive file listing. */ - public static void alterTable(IMetaStoreClient client, String databaseName, String tblName, Table table) { - EnvironmentContext envContext = new EnvironmentContext( - ImmutableMap.of(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE) - ); - ALTER_TABLE.invoke(client, databaseName, tblName, table, envContext); + public static void alterTable( + IMetaStoreClient client, String databaseName, String tblName, Table table) { + alterTable(client, databaseName, tblName, table, ImmutableMap.of()); + } + + /** + * Calls alter_table method using the metastore client. If the HMS supports it, environmental + * context will be set in a way that turns off stats updates to avoid recursive file listing. + */ + public static void alterTable( + IMetaStoreClient client, + String databaseName, + String tblName, + Table table, + Map extraEnv) { + Map env = Maps.newHashMapWithExpectedSize(extraEnv.size() + 1); + env.putAll(extraEnv); + env.put(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE); + + ALTER_TABLE.invoke(client, databaseName, tblName, table, new EnvironmentContext(env)); } } diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/NoLock.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/NoLock.java new file mode 100644 index 000000000000..bc59f0ae358f --- /dev/null +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/NoLock.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.hive; + +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +public class NoLock implements HiveLock { + public NoLock() { + Preconditions.checkArgument( + HiveVersion.min(HiveVersion.HIVE_2), + "Minimally Hive 2 HMS client is needed to use HIVE-26882 based locking"); + } + + @Override + public void lock() throws LockException { + // no-op + } + + @Override + public void ensureActive() throws LockException { + // no-op + } + + @Override + public void unlock() { + // no-op + } +} diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java index 93ad36f58a9d..52e70c84dc05 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java @@ -20,6 +20,7 @@ package org.apache.iceberg.hive; import java.util.Collections; +import java.util.Map; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; @@ -29,6 +30,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.LockRequest; import org.apache.hadoop.hive.metastore.api.LockResponse; import org.apache.hadoop.hive.metastore.api.LockState; @@ -39,6 +41,7 @@ import org.apache.iceberg.Table; import org.apache.iceberg.TableMetadata; import org.apache.iceberg.exceptions.CommitFailedException; +import org.apache.iceberg.hadoop.ConfigProperties; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Types; @@ -482,4 +485,42 @@ public void testLockHeartbeatFailureDuringCommit() throws TException, Interrupte "Failed to heartbeat for hive lock. Failed to heart beat.", () -> spyOps.doCommit(metadataV2, metadataV1)); } + + @Test + public void testNoLockCallsWithNoLock() throws TException { + Configuration confWithLock = new Configuration(overriddenHiveConf); + confWithLock.setBoolean(ConfigProperties.LOCK_HIVE_ENABLED, false); + + HiveTableOperations noLockSpyOps = + spy( + new HiveTableOperations( + confWithLock, + spyCachedClientPool, + ops.io(), + catalog.name(), + TABLE_IDENTIFIER.namespace().level(0), + TABLE_IDENTIFIER.name())); + + ArgumentCaptor contextCaptor = + ArgumentCaptor.forClass(EnvironmentContext.class); + + doNothing() + .when(spyClient) + .alter_table_with_environmentContext(any(), any(), any(), contextCaptor.capture()); + + noLockSpyOps.doCommit(metadataV2, metadataV1); + + // Make sure that the locking is not used + verify(spyClient, never()).lock(any(LockRequest.class)); + verify(spyClient, never()).checkLock(any(Long.class)); + verify(spyClient, never()).heartbeat(any(Long.class), any(Long.class)); + verify(spyClient, never()).unlock(any(Long.class)); + + // Make sure that the expected parameter context values are set + Map context = contextCaptor.getValue().getProperties(); + Assert.assertEquals(3, context.size()); + Assert.assertEquals( + context.get("expected_parameter_key"), HiveTableOperations.METADATA_LOCATION_PROP); + Assert.assertEquals(context.get("expected_parameter_value"), metadataV2.metadataFileLocation()); + } } diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java index b5802fa5cdea..d4afb3b320af 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java @@ -20,7 +20,6 @@ package org.apache.iceberg.hive; import java.io.File; -import java.net.UnknownHostException; import java.util.concurrent.atomic.AtomicReference; import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.HasTableOperations; @@ -29,6 +28,7 @@ import org.apache.iceberg.TableMetadata; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.exceptions.CommitStateUnknownException; import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.types.Types; @@ -39,6 +39,7 @@ import static org.mockito.Matchers.any; import static org.mockito.Matchers.anyBoolean; import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.when; @@ -46,7 +47,7 @@ public class TestHiveCommits extends HiveTableBaseTest { @Test - public void testSuppressUnlockExceptions() throws TException, InterruptedException, UnknownHostException { + public void testSuppressUnlockExceptions() { Table table = catalog.loadTable(TABLE_IDENTIFIER); HiveTableOperations ops = (HiveTableOperations) ((HasTableOperations) table).operations(); @@ -66,7 +67,7 @@ public void testSuppressUnlockExceptions() throws TException, InterruptedExcepti AtomicReference lockRef = new AtomicReference<>(); - when(spyOps.lockObject()) + when(spyOps.lockObject(metadataV1)) .thenAnswer( i -> { HiveLock lock = (HiveLock) i.callRealMethod(); @@ -252,7 +253,7 @@ public void testThriftExceptionsUnknownSuccessCommit() throws TException, Interr * current one during the recheck phase. */ @Test - public void testThriftExceptionConcurrentCommit() throws TException, InterruptedException, UnknownHostException { + public void testThriftExceptionConcurrentCommit() throws TException, InterruptedException { Table table = catalog.loadTable(TABLE_IDENTIFIER); HiveTableOperations ops = (HiveTableOperations) ((HasTableOperations) table).operations(); @@ -273,11 +274,11 @@ public void testThriftExceptionConcurrentCommit() throws TException, Interrupted AtomicReference lock = new AtomicReference<>(); doAnswer( l -> { - lock.set(ops.lockObject()); + lock.set(ops.lockObject(metadataV1)); return lock.get(); }) .when(spyOps) - .lockObject(); + .lockObject(metadataV1); concurrentCommitAndThrowException(ops, spyOps, table, lock); @@ -309,15 +310,61 @@ public void testAlreadyExistsException() { () -> catalog.createTable(TABLE_IDENTIFIER, schema, PartitionSpec.unpartitioned())); } - private void commitAndThrowException(HiveTableOperations realOperations, HiveTableOperations spyOperations) + /** Uses NoLock and pretends we throw an error because of a concurrent commit */ + @Test + public void testNoLockThriftExceptionConcurrentCommit() throws TException, InterruptedException { + Table table = catalog.loadTable(TABLE_IDENTIFIER); + HiveTableOperations ops = (HiveTableOperations) ((HasTableOperations) table).operations(); + + TableMetadata metadataV1 = ops.current(); + + table.updateSchema().addColumn("n", Types.IntegerType.get()).commit(); + + ops.refresh(); + + TableMetadata metadataV2 = ops.current(); + + Assert.assertEquals(2, ops.current().schema().columns().size()); + + HiveTableOperations spyOps = spy(ops); + + // Sets NoLock + doReturn(new NoLock()).when(spyOps).lockObject(any()); + + // Simulate a concurrent table modification error + doThrow( + new RuntimeException( + "MetaException(message:The table has been modified. " + + "The parameter value for key 'metadata_location' is")) + .when(spyOps) + .persistTable(any(), anyBoolean(), any()); + + // Should throw a CommitFailedException so the commit could be retried + AssertHelpers.assertThrows( + "Should throw CommitFailedException since the table has been modified concurrently", + CommitFailedException.class, + "has been modified concurrently", + () -> spyOps.commit(metadataV2, metadataV1)); + + ops.refresh(); + Assert.assertEquals("Current metadata should not have changed", metadataV2, ops.current()); + Assert.assertTrue("Current metadata should still exist", metadataFileExists(metadataV2)); + Assert.assertEquals("New metadata files should not exist", 2, metadataFileCount(ops.current())); + } + + private void commitAndThrowException( + HiveTableOperations realOperations, HiveTableOperations spyOperations) throws TException, InterruptedException { // Simulate a communication error after a successful commit doAnswer(i -> { org.apache.hadoop.hive.metastore.api.Table tbl = i.getArgument(0, org.apache.hadoop.hive.metastore.api.Table.class); - realOperations.persistTable(tbl, true); + String location = i.getArgument(2, String.class); + realOperations.persistTable(tbl, true, location); throw new TException("Datacenter on fire"); - }).when(spyOperations).persistTable(any(), anyBoolean()); + }) + .when(spyOperations) + .persistTable(any(), anyBoolean(), any()); } private void concurrentCommitAndThrowException( @@ -330,19 +377,22 @@ private void concurrentCommitAndThrowException( doAnswer(i -> { org.apache.hadoop.hive.metastore.api.Table tbl = i.getArgument(0, org.apache.hadoop.hive.metastore.api.Table.class); - realOperations.persistTable(tbl, true); + String location = i.getArgument(2, String.class); + realOperations.persistTable(tbl, true, location); // Simulate lock expiration or removal lock.get().unlock(); table.refresh(); table.updateSchema().addColumn("newCol", Types.IntegerType.get()).commit(); throw new TException("Datacenter on fire"); - }).when(spyOperations).persistTable(any(), anyBoolean()); + }) + .when(spyOperations) + .persistTable(any(), anyBoolean(), any()); } private void failCommitAndThrowException(HiveTableOperations spyOperations) throws TException, InterruptedException { doThrow(new TException("Datacenter on fire")) .when(spyOperations) - .persistTable(any(), anyBoolean()); + .persistTable(any(), anyBoolean(), any()); } private void breakFallbackCatalogCommitCheck(HiveTableOperations spyOperations) { diff --git a/iceberg/patched-iceberg-core/pom.xml b/iceberg/patched-iceberg-core/pom.xml index 3654b60f2e88..6726f196e506 100644 --- a/iceberg/patched-iceberg-core/pom.xml +++ b/iceberg/patched-iceberg-core/pom.xml @@ -76,6 +76,8 @@ ${project.build.directory}/classes **/HadoopInputFile.class + **/TableProperties.class + **/ConfigProperties.class **/SerializableTable.class diff --git a/iceberg/patched-iceberg-core/src/main/java/org/apache/iceberg/TableProperties.java b/iceberg/patched-iceberg-core/src/main/java/org/apache/iceberg/TableProperties.java new file mode 100644 index 000000000000..360f0540730c --- /dev/null +++ b/iceberg/patched-iceberg-core/src/main/java/org/apache/iceberg/TableProperties.java @@ -0,0 +1,375 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg; + +import java.util.Set; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; + +public class TableProperties { + + private TableProperties() { + } + + /** + * Reserved table property for table format version. + * + *

Iceberg will default a new table's format version to the latest stable and recommended + * version. This reserved property keyword allows users to override the Iceberg format version of + * the table metadata. + * + *

If this table property exists when creating a table, the table will use the specified format + * version. If a table updates this property, it will try to upgrade to the specified format + * version. + * + *

Note: incomplete or unstable versions cannot be selected using this property. + */ + public static final String FORMAT_VERSION = "format-version"; + + /** Reserved table property for table UUID. */ + public static final String UUID = "uuid"; + + /** Reserved table property for the total number of snapshots. */ + public static final String SNAPSHOT_COUNT = "snapshot-count"; + + /** Reserved table property for current snapshot summary. */ + public static final String CURRENT_SNAPSHOT_SUMMARY = "current-snapshot-summary"; + + /** Reserved table property for current snapshot id. */ + public static final String CURRENT_SNAPSHOT_ID = "current-snapshot-id"; + + /** Reserved table property for current snapshot timestamp. */ + public static final String CURRENT_SNAPSHOT_TIMESTAMP = "current-snapshot-timestamp-ms"; + + /** Reserved table property for the JSON representation of current schema. */ + public static final String CURRENT_SCHEMA = "current-schema"; + + /** Reserved table property for the JSON representation of current(default) partition spec. */ + public static final String DEFAULT_PARTITION_SPEC = "default-partition-spec"; + + /** Reserved table property for the JSON representation of current(default) sort order. */ + public static final String DEFAULT_SORT_ORDER = "default-sort-order"; + + /** + * Reserved Iceberg table properties list. + * + *

Reserved table properties are only used to control behaviors when creating or updating a + * table. The value of these properties are not persisted as a part of the table metadata. + */ + public static final Set RESERVED_PROPERTIES = + ImmutableSet.of( + FORMAT_VERSION, + UUID, + SNAPSHOT_COUNT, + CURRENT_SNAPSHOT_ID, + CURRENT_SNAPSHOT_SUMMARY, + CURRENT_SNAPSHOT_TIMESTAMP, + CURRENT_SCHEMA, + DEFAULT_PARTITION_SPEC, + DEFAULT_SORT_ORDER); + + public static final String COMMIT_NUM_RETRIES = "commit.retry.num-retries"; + public static final int COMMIT_NUM_RETRIES_DEFAULT = 4; + + public static final String COMMIT_MIN_RETRY_WAIT_MS = "commit.retry.min-wait-ms"; + public static final int COMMIT_MIN_RETRY_WAIT_MS_DEFAULT = 100; + + public static final String COMMIT_MAX_RETRY_WAIT_MS = "commit.retry.max-wait-ms"; + public static final int COMMIT_MAX_RETRY_WAIT_MS_DEFAULT = 60 * 1000; // 1 minute + + public static final String COMMIT_TOTAL_RETRY_TIME_MS = "commit.retry.total-timeout-ms"; + public static final int COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT = 30 * 60 * 1000; // 30 minutes + + public static final String COMMIT_NUM_STATUS_CHECKS = "commit.status-check.num-retries"; + public static final int COMMIT_NUM_STATUS_CHECKS_DEFAULT = 3; + + public static final String COMMIT_STATUS_CHECKS_MIN_WAIT_MS = "commit.status-check.min-wait-ms"; + public static final long COMMIT_STATUS_CHECKS_MIN_WAIT_MS_DEFAULT = 1000; // 1 second + + public static final String COMMIT_STATUS_CHECKS_MAX_WAIT_MS = "commit.status-check.max-wait-ms"; + public static final long COMMIT_STATUS_CHECKS_MAX_WAIT_MS_DEFAULT = 60 * 1000; // 1 minute + + public static final String COMMIT_STATUS_CHECKS_TOTAL_WAIT_MS = + "commit.status-check.total-timeout-ms"; + public static final long COMMIT_STATUS_CHECKS_TOTAL_WAIT_MS_DEFAULT = + 30 * 60 * 1000; // 30 minutes + + public static final String MANIFEST_TARGET_SIZE_BYTES = "commit.manifest.target-size-bytes"; + public static final long MANIFEST_TARGET_SIZE_BYTES_DEFAULT = 8 * 1024 * 1024; // 8 MB + + public static final String MANIFEST_MIN_MERGE_COUNT = "commit.manifest.min-count-to-merge"; + public static final int MANIFEST_MIN_MERGE_COUNT_DEFAULT = 100; + + public static final String MANIFEST_MERGE_ENABLED = "commit.manifest-merge.enabled"; + public static final boolean MANIFEST_MERGE_ENABLED_DEFAULT = true; + + public static final String DEFAULT_FILE_FORMAT = "write.format.default"; + public static final String DELETE_DEFAULT_FILE_FORMAT = "write.delete.format.default"; + public static final String DEFAULT_FILE_FORMAT_DEFAULT = "parquet"; + + public static final String PARQUET_ROW_GROUP_SIZE_BYTES = "write.parquet.row-group-size-bytes"; + public static final String DELETE_PARQUET_ROW_GROUP_SIZE_BYTES = + "write.delete.parquet.row-group-size-bytes"; + public static final int PARQUET_ROW_GROUP_SIZE_BYTES_DEFAULT = 128 * 1024 * 1024; // 128 MB + + public static final String PARQUET_PAGE_SIZE_BYTES = "write.parquet.page-size-bytes"; + public static final String DELETE_PARQUET_PAGE_SIZE_BYTES = + "write.delete.parquet.page-size-bytes"; + public static final int PARQUET_PAGE_SIZE_BYTES_DEFAULT = 1024 * 1024; // 1 MB + + public static final String PARQUET_PAGE_ROW_LIMIT = "write.parquet.page-row-limit"; + public static final String DELETE_PARQUET_PAGE_ROW_LIMIT = "write.delete.parquet.page-row-limit"; + public static final int PARQUET_PAGE_ROW_LIMIT_DEFAULT = 20_000; + + public static final String PARQUET_DICT_SIZE_BYTES = "write.parquet.dict-size-bytes"; + public static final String DELETE_PARQUET_DICT_SIZE_BYTES = + "write.delete.parquet.dict-size-bytes"; + public static final int PARQUET_DICT_SIZE_BYTES_DEFAULT = 2 * 1024 * 1024; // 2 MB + + public static final String PARQUET_COMPRESSION = "write.parquet.compression-codec"; + public static final String DELETE_PARQUET_COMPRESSION = "write.delete.parquet.compression-codec"; + public static final String PARQUET_COMPRESSION_DEFAULT = "gzip"; + + public static final String PARQUET_COMPRESSION_LEVEL = "write.parquet.compression-level"; + public static final String DELETE_PARQUET_COMPRESSION_LEVEL = + "write.delete.parquet.compression-level"; + public static final String PARQUET_COMPRESSION_LEVEL_DEFAULT = null; + + public static final String PARQUET_ROW_GROUP_CHECK_MIN_RECORD_COUNT = + "write.parquet.row-group-check-min-record-count"; + public static final String DELETE_PARQUET_ROW_GROUP_CHECK_MIN_RECORD_COUNT = + "write.delete.parquet.row-group-check-min-record-count"; + public static final int PARQUET_ROW_GROUP_CHECK_MIN_RECORD_COUNT_DEFAULT = 100; + + public static final String PARQUET_ROW_GROUP_CHECK_MAX_RECORD_COUNT = + "write.parquet.row-group-check-max-record-count"; + public static final String DELETE_PARQUET_ROW_GROUP_CHECK_MAX_RECORD_COUNT = + "write.delete.parquet.row-group-check-max-record-count"; + public static final int PARQUET_ROW_GROUP_CHECK_MAX_RECORD_COUNT_DEFAULT = 10000; + + public static final String PARQUET_BLOOM_FILTER_MAX_BYTES = + "write.parquet.bloom-filter-max-bytes"; + public static final int PARQUET_BLOOM_FILTER_MAX_BYTES_DEFAULT = 1024 * 1024; + + public static final String PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX = + "write.parquet.bloom-filter-enabled.column."; + + public static final String AVRO_COMPRESSION = "write.avro.compression-codec"; + public static final String DELETE_AVRO_COMPRESSION = "write.delete.avro.compression-codec"; + public static final String AVRO_COMPRESSION_DEFAULT = "gzip"; + + public static final String AVRO_COMPRESSION_LEVEL = "write.avro.compression-level"; + public static final String DELETE_AVRO_COMPRESSION_LEVEL = "write.delete.avro.compression-level"; + public static final String AVRO_COMPRESSION_LEVEL_DEFAULT = null; + + public static final String ORC_STRIPE_SIZE_BYTES = "write.orc.stripe-size-bytes"; + + public static final String ORC_BLOOM_FILTER_COLUMNS = "write.orc.bloom.filter.columns"; + public static final String ORC_BLOOM_FILTER_COLUMNS_DEFAULT = ""; + + public static final String ORC_BLOOM_FILTER_FPP = "write.orc.bloom.filter.fpp"; + public static final double ORC_BLOOM_FILTER_FPP_DEFAULT = 0.05; + + public static final String DELETE_ORC_STRIPE_SIZE_BYTES = "write.delete.orc.stripe-size-bytes"; + public static final long ORC_STRIPE_SIZE_BYTES_DEFAULT = 64L * 1024 * 1024; // 64 MB + + public static final String ORC_BLOCK_SIZE_BYTES = "write.orc.block-size-bytes"; + public static final String DELETE_ORC_BLOCK_SIZE_BYTES = "write.delete.orc.block-size-bytes"; + public static final long ORC_BLOCK_SIZE_BYTES_DEFAULT = 256L * 1024 * 1024; // 256 MB + + public static final String ORC_WRITE_BATCH_SIZE = "write.orc.vectorized.batch-size"; + public static final String DELETE_ORC_WRITE_BATCH_SIZE = "write.delete.orc.vectorized.batch-size"; + public static final int ORC_WRITE_BATCH_SIZE_DEFAULT = 1024; + + public static final String ORC_COMPRESSION = "write.orc.compression-codec"; + public static final String DELETE_ORC_COMPRESSION = "write.delete.orc.compression-codec"; + public static final String ORC_COMPRESSION_DEFAULT = "zlib"; + + public static final String ORC_COMPRESSION_STRATEGY = "write.orc.compression-strategy"; + public static final String DELETE_ORC_COMPRESSION_STRATEGY = + "write.delete.orc.compression-strategy"; + public static final String ORC_COMPRESSION_STRATEGY_DEFAULT = "speed"; + + public static final String SPLIT_SIZE = "read.split.target-size"; + public static final long SPLIT_SIZE_DEFAULT = 128 * 1024 * 1024; // 128 MB + + public static final String METADATA_SPLIT_SIZE = "read.split.metadata-target-size"; + public static final long METADATA_SPLIT_SIZE_DEFAULT = 32 * 1024 * 1024; // 32 MB + + public static final String SPLIT_LOOKBACK = "read.split.planning-lookback"; + public static final int SPLIT_LOOKBACK_DEFAULT = 10; + + public static final String SPLIT_OPEN_FILE_COST = "read.split.open-file-cost"; + public static final long SPLIT_OPEN_FILE_COST_DEFAULT = 4 * 1024 * 1024; // 4MB + + public static final String PARQUET_VECTORIZATION_ENABLED = "read.parquet.vectorization.enabled"; + public static final boolean PARQUET_VECTORIZATION_ENABLED_DEFAULT = true; + + public static final String PARQUET_BATCH_SIZE = "read.parquet.vectorization.batch-size"; + public static final int PARQUET_BATCH_SIZE_DEFAULT = 5000; + + public static final String ORC_VECTORIZATION_ENABLED = "read.orc.vectorization.enabled"; + public static final boolean ORC_VECTORIZATION_ENABLED_DEFAULT = false; + + public static final String ORC_BATCH_SIZE = "read.orc.vectorization.batch-size"; + public static final int ORC_BATCH_SIZE_DEFAULT = 5000; + + public static final String OBJECT_STORE_ENABLED = "write.object-storage.enabled"; + public static final boolean OBJECT_STORE_ENABLED_DEFAULT = false; + + /** + * @deprecated Use {@link #WRITE_DATA_LOCATION} instead. + * */ + @Deprecated public static final String OBJECT_STORE_PATH = "write.object-storage.path"; + + public static final String WRITE_LOCATION_PROVIDER_IMPL = "write.location-provider.impl"; + + /** + * @deprecated Use {@link #WRITE_DATA_LOCATION} instead. + * */ + @Deprecated + public static final String WRITE_FOLDER_STORAGE_LOCATION = "write.folder-storage.path"; + + // This only applies to files written after this property is set. Files previously written aren't + // relocated to reflect this parameter. + // If not set, defaults to a "data" folder underneath the root path of the table. + public static final String WRITE_DATA_LOCATION = "write.data.path"; + + // This only applies to files written after this property is set. Files previously written aren't + // relocated to reflect this parameter. + // If not set, defaults to a "metadata" folder underneath the root path of the table. + public static final String WRITE_METADATA_LOCATION = "write.metadata.path"; + + public static final String WRITE_PARTITION_SUMMARY_LIMIT = "write.summary.partition-limit"; + public static final int WRITE_PARTITION_SUMMARY_LIMIT_DEFAULT = 0; + + /** + * @deprecated will be removed in 2.0.0, writing manifest lists is always enabled + * */ + @Deprecated public static final String MANIFEST_LISTS_ENABLED = "write.manifest-lists.enabled"; + + /** + * @deprecated will be removed in 2.0.0, writing manifest lists is always enabled + * */ + @Deprecated public static final boolean MANIFEST_LISTS_ENABLED_DEFAULT = true; + + public static final String METADATA_COMPRESSION = "write.metadata.compression-codec"; + public static final String METADATA_COMPRESSION_DEFAULT = "none"; + + public static final String METADATA_PREVIOUS_VERSIONS_MAX = + "write.metadata.previous-versions-max"; + public static final int METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT = 100; + + // This enables to delete the oldest metadata file after commit. + public static final String METADATA_DELETE_AFTER_COMMIT_ENABLED = + "write.metadata.delete-after-commit.enabled"; + public static final boolean METADATA_DELETE_AFTER_COMMIT_ENABLED_DEFAULT = false; + + public static final String METRICS_MAX_INFERRED_COLUMN_DEFAULTS = + "write.metadata.metrics.max-inferred-column-defaults"; + public static final int METRICS_MAX_INFERRED_COLUMN_DEFAULTS_DEFAULT = 100; + + public static final String METRICS_MODE_COLUMN_CONF_PREFIX = "write.metadata.metrics.column."; + public static final String DEFAULT_WRITE_METRICS_MODE = "write.metadata.metrics.default"; + public static final String DEFAULT_WRITE_METRICS_MODE_DEFAULT = "truncate(16)"; + + public static final String DEFAULT_NAME_MAPPING = "schema.name-mapping.default"; + + public static final String WRITE_AUDIT_PUBLISH_ENABLED = "write.wap.enabled"; + public static final String WRITE_AUDIT_PUBLISH_ENABLED_DEFAULT = "false"; + + public static final String WRITE_TARGET_FILE_SIZE_BYTES = "write.target-file-size-bytes"; + public static final long WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT = 512 * 1024 * 1024; // 512 MB + + public static final String DELETE_TARGET_FILE_SIZE_BYTES = "write.delete.target-file-size-bytes"; + public static final long DELETE_TARGET_FILE_SIZE_BYTES_DEFAULT = 64 * 1024 * 1024; // 64 MB + + public static final String SPARK_WRITE_PARTITIONED_FANOUT_ENABLED = "write.spark.fanout.enabled"; + public static final boolean SPARK_WRITE_PARTITIONED_FANOUT_ENABLED_DEFAULT = false; + + public static final String SPARK_WRITE_ACCEPT_ANY_SCHEMA = "write.spark.accept-any-schema"; + public static final boolean SPARK_WRITE_ACCEPT_ANY_SCHEMA_DEFAULT = false; + + public static final String SNAPSHOT_ID_INHERITANCE_ENABLED = + "compatibility.snapshot-id-inheritance.enabled"; + public static final boolean SNAPSHOT_ID_INHERITANCE_ENABLED_DEFAULT = false; + + public static final String ENGINE_HIVE_ENABLED = "engine.hive.enabled"; + public static final boolean ENGINE_HIVE_ENABLED_DEFAULT = false; + + public static final String HIVE_LOCK_ENABLED = "engine.hive.lock-enabled"; + public static final boolean HIVE_LOCK_ENABLED_DEFAULT = true; + + public static final String WRITE_DISTRIBUTION_MODE = "write.distribution-mode"; + public static final String WRITE_DISTRIBUTION_MODE_NONE = "none"; + public static final String WRITE_DISTRIBUTION_MODE_HASH = "hash"; + public static final String WRITE_DISTRIBUTION_MODE_RANGE = "range"; + + public static final String GC_ENABLED = "gc.enabled"; + public static final boolean GC_ENABLED_DEFAULT = true; + + public static final String MAX_SNAPSHOT_AGE_MS = "history.expire.max-snapshot-age-ms"; + public static final long MAX_SNAPSHOT_AGE_MS_DEFAULT = 5 * 24 * 60 * 60 * 1000; // 5 days + + public static final String MIN_SNAPSHOTS_TO_KEEP = "history.expire.min-snapshots-to-keep"; + public static final int MIN_SNAPSHOTS_TO_KEEP_DEFAULT = 1; + + public static final String MAX_REF_AGE_MS = "history.expire.max-ref-age-ms"; + public static final long MAX_REF_AGE_MS_DEFAULT = Long.MAX_VALUE; + + public static final String DELETE_ISOLATION_LEVEL = "write.delete.isolation-level"; + public static final String DELETE_ISOLATION_LEVEL_DEFAULT = "serializable"; + + public static final String DELETE_MODE = "write.delete.mode"; + public static final String DELETE_MODE_DEFAULT = RowLevelOperationMode.COPY_ON_WRITE.modeName(); + + public static final String DELETE_DISTRIBUTION_MODE = "write.delete.distribution-mode"; + + public static final String UPDATE_ISOLATION_LEVEL = "write.update.isolation-level"; + public static final String UPDATE_ISOLATION_LEVEL_DEFAULT = "serializable"; + + public static final String UPDATE_MODE = "write.update.mode"; + public static final String UPDATE_MODE_DEFAULT = RowLevelOperationMode.COPY_ON_WRITE.modeName(); + + public static final String UPDATE_DISTRIBUTION_MODE = "write.update.distribution-mode"; + + public static final String MERGE_ISOLATION_LEVEL = "write.merge.isolation-level"; + public static final String MERGE_ISOLATION_LEVEL_DEFAULT = "serializable"; + + public static final String MERGE_MODE = "write.merge.mode"; + public static final String MERGE_MODE_DEFAULT = RowLevelOperationMode.COPY_ON_WRITE.modeName(); + + /** + * @deprecated will be removed once Spark 3.1 support is dropped, the cardinality check is always + * performed starting from 0.13.0. + */ + @Deprecated + public static final String MERGE_CARDINALITY_CHECK_ENABLED = + "write.merge.cardinality-check.enabled"; + /** + * @deprecated will be removed once Spark 3.1 support is dropped, the cardinality check is always + * performed starting from 0.13.0. + */ + @Deprecated public static final boolean MERGE_CARDINALITY_CHECK_ENABLED_DEFAULT = true; + + public static final String MERGE_DISTRIBUTION_MODE = "write.merge.distribution-mode"; + + public static final String UPSERT_ENABLED = "write.upsert.enabled"; + public static final boolean UPSERT_ENABLED_DEFAULT = false; +} diff --git a/iceberg/patched-iceberg-core/src/main/java/org/apache/iceberg/hadoop/ConfigProperties.java b/iceberg/patched-iceberg-core/src/main/java/org/apache/iceberg/hadoop/ConfigProperties.java new file mode 100644 index 000000000000..3fd5a72c0788 --- /dev/null +++ b/iceberg/patched-iceberg-core/src/main/java/org/apache/iceberg/hadoop/ConfigProperties.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.hadoop; + +public class ConfigProperties { + + private ConfigProperties() { + } + + public static final String ENGINE_HIVE_ENABLED = "iceberg.engine.hive.enabled"; + public static final String LOCK_HIVE_ENABLED = "iceberg.engine.hive.lock-enabled"; + public static final String KEEP_HIVE_STATS = "iceberg.hive.keep.stats"; +} From 896f094656d433dc29957279b9772c886b9118f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduard=20Tudenh=C3=B6fner?= Date: Mon, 14 Jun 2021 23:33:02 +0200 Subject: [PATCH 13/35] Nessie: Use AssertJ assertions (#2684) This also adds AssertJ to testCompile in all modules so assertions can be used elsewhere. --- .../org/apache/iceberg/AssertHelpers.java | 60 ++++--------------- 1 file changed, 13 insertions(+), 47 deletions(-) diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/AssertHelpers.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/AssertHelpers.java index 68137034f977..3f8c7fc0cb24 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/AssertHelpers.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/AssertHelpers.java @@ -22,7 +22,6 @@ import java.util.concurrent.Callable; import org.assertj.core.api.AbstractThrowableAssert; import org.assertj.core.api.Assertions; -import org.junit.Assert; public class AssertHelpers { @@ -41,12 +40,11 @@ public static void assertThrows(String message, Class expected, String containedInMessage, Callable callable) { - try { - callable.call(); - Assert.fail("No exception was thrown (" + message + "), expected: " + - expected.getName()); - } catch (Exception actual) { - handleException(message, expected, containedInMessage, actual); + AbstractThrowableAssert check = Assertions.assertThatThrownBy(callable::call) + .withFailMessage(message) + .isInstanceOf(expected); + if (null != containedInMessage) { + check.hasMessageContaining(containedInMessage); } } @@ -62,18 +60,12 @@ public static void assertThrows(String message, Class expected, String containedInMessage, Runnable runnable) { - AbstractThrowableAssert check = - Assertions.assertThatThrownBy(runnable::run).as(message).isInstanceOf(expected); + AbstractThrowableAssert check = Assertions.assertThatThrownBy(runnable::run) + .withFailMessage(message) + .isInstanceOf(expected); if (null != containedInMessage) { check.hasMessageContaining(containedInMessage); } -// try { -// runnable.run(); -// Assert.fail("No exception was thrown (" + message + "), expected: " + -// expected.getName()); -// } catch (Exception actual) { -// handleException(message, expected, containedInMessage, actual); -// } } /** @@ -112,36 +104,10 @@ public static void assertThrowsCause(String message, Class expected, String containedInMessage, Runnable runnable) { - try { - runnable.run(); - Assert.fail("No exception was thrown (" + message + "), expected: " + - expected.getName()); - } catch (Exception actual) { - Throwable cause = actual.getCause(); - if (cause instanceof Exception) { - handleException(message, expected, containedInMessage, (Exception) actual.getCause()); - } else { - Assert.fail("Occur non-exception cause: " + cause); - } - } - } - - private static void handleException(String message, - Class expected, - String containedInMessage, - Exception actual) { - try { - Assert.assertEquals(message, expected, actual.getClass()); - if (containedInMessage != null) { - Assert.assertTrue( - "Expected exception message (" + containedInMessage + ") missing: " + - actual.getMessage(), - actual.getMessage().contains(containedInMessage) - ); - } - } catch (AssertionError e) { - e.addSuppressed(actual); - throw e; - } + Assertions.assertThatThrownBy(runnable::run) + .withFailMessage(message) + .getCause() + .isInstanceOf(expected) + .hasMessageContaining(containedInMessage); } } From 9176d7230eb6c70f524a4880ec49e4bd356b3b82 Mon Sep 17 00:00:00 2001 From: Zsolt Miskolczi Date: Wed, 17 May 2023 11:00:33 +0200 Subject: [PATCH 14/35] Fix compile error --- .../src/main/java/org/apache/iceberg/hive/MetastoreLock.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java index 37fe25900fa0..506915ae7f42 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java @@ -332,7 +332,8 @@ private LockInfo createLock() throws LockException { Thread.currentThread().interrupt(); interrupted.set(true); LOG.warn("Interrupted while creating lock on table {}.{}", databaseName, tableName, e); - throw new LockException("Interrupted while creating lock", e); + throw new LockException( + e, "Interrupted while creating lock on table %s.%s", databaseName, tableName); } }, LockException.class); From 7ce6433e2f8111206198e44ac3fb053a4e36db91 Mon Sep 17 00:00:00 2001 From: Zsolt Miskolczi Date: Mon, 29 May 2023 18:26:29 +0200 Subject: [PATCH 15/35] Try EXCL_WRITE lock --- .../src/main/java/org/apache/iceberg/hive/MetastoreLock.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java index 506915ae7f42..77678f5100fb 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java @@ -277,7 +277,7 @@ private LockInfo createLock() throws LockException { } LockComponent lockComponent = - new LockComponent(LockType.EXCLUSIVE, LockLevel.TABLE, databaseName); + new LockComponent(LockType.EXCL_WRITE, LockLevel.TABLE, databaseName); lockComponent.setTablename(tableName); LockRequest lockRequest = new LockRequest( From 1dcbd848a828e9c5fc8d149ed377de62e45ca6b1 Mon Sep 17 00:00:00 2001 From: Zsolt Miskolczi Date: Thu, 1 Jun 2023 15:03:00 +0200 Subject: [PATCH 16/35] Pass txnid to MetaStoreLock --- iceberg/iceberg-catalog/pom.xml | 1 - .../iceberg/hive/HiveTableOperations.java | 19 ++++++++++----- .../apache/iceberg/hive/MetastoreLock.java | 7 ++---- .../iceberg/mr/hive/HiveIcebergMetaHook.java | 24 ++++++++++++++++++- 4 files changed, 38 insertions(+), 13 deletions(-) diff --git a/iceberg/iceberg-catalog/pom.xml b/iceberg/iceberg-catalog/pom.xml index ccefd619e25c..67a86112b87e 100644 --- a/iceberg/iceberg-catalog/pom.xml +++ b/iceberg/iceberg-catalog/pom.xml @@ -56,7 +56,6 @@ org.apache.hive hive-exec - test org.apache.hive diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 1bffb22a2f35..5514815a0bb1 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -36,6 +36,8 @@ import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager; +import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hive.iceberg.com.fasterxml.jackson.core.JsonProcessingException; import org.apache.iceberg.BaseMetastoreTableOperations; import org.apache.iceberg.ClientPool; @@ -502,11 +504,6 @@ private StorageDescriptor storageDescriptor(TableMetadata metadata, boolean hive return storageDescriptor; } - @VisibleForTesting - HiveLock lockObject() { - return new MetastoreLock(conf, metaClients, catalogName, database, tableName); - } - private void cleanupMetadataAndUnlock(CommitStatus commitStatus, String metadataLocation, HiveLock lock) { try { @@ -591,7 +588,17 @@ private static boolean hiveLockEnabled(TableMetadata metadata, Configuration con @VisibleForTesting HiveLock lockObject(TableMetadata metadata) { if (hiveLockEnabled(metadata, conf)) { - return new MetastoreLock(conf, metaClients, catalogName, database, tableName); + Optional txnId = Optional.empty(); + + SessionState sessionState = SessionState.get(); + if (sessionState != null) { + HiveTxnManager txnMgr = sessionState.getTxnMgr(); + if (txnMgr != null) { + txnId = Optional.of(txnMgr.getCurrentTxnId()); + } + } + + return new MetastoreLock(conf, metaClients, catalogName, database, tableName, txnId); } else { return new NoLock(); } diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java index 77678f5100fb..92c28c5d9d58 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java @@ -97,11 +97,12 @@ public class MetastoreLock implements HiveLock { private Heartbeat heartbeat = null; public MetastoreLock(Configuration conf, ClientPool metaClients, - String catalogName, String databaseName, String tableName) { + String catalogName, String databaseName, String tableName, Optional txnId) { this.metaClients = metaClients; this.fullName = catalogName + "." + databaseName + "." + tableName; this.databaseName = databaseName; this.tableName = tableName; + this.hmsLockId = txnId; this.lockAcquireTimeout = conf.getLong(HIVE_ACQUIRE_LOCK_TIMEOUT_MS, HIVE_ACQUIRE_LOCK_TIMEOUT_MS_DEFAULT); @@ -179,10 +180,6 @@ public void unlock() { // TODO add lock heart beating for cases where default lock timeout is too low. @SuppressWarnings("checkstyle:CyclomaticComplexity") private long acquireLock() throws LockException { - if (hmsLockId.isPresent()) { - throw new IllegalArgumentException(String.format("HMS lock ID=%s already acquired for table %s.%s", - hmsLockId.get(), databaseName, tableName)); - } LockInfo lockInfo = createLock(); final long start = System.currentTimeMillis(); diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java index d946531d58f4..d9c4561561a2 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java @@ -47,6 +47,7 @@ import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.ddl.table.AlterTableType; import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.PartitionTransform; @@ -102,6 +103,7 @@ import org.apache.iceberg.types.Type; import org.apache.iceberg.util.Pair; import org.apache.thrift.TException; +import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -324,8 +326,11 @@ public void preAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable, E context.getProperties().get(OLD_TABLE_NAME)).toString()); } if (commitLock == null) { + + Optional txnId = getTxnId(); + commitLock = new MetastoreLock(conf, new CachedClientPool(conf, Maps.fromProperties(catalogProperties)), - catalogProperties.getProperty(Catalogs.NAME), hmsTable.getDbName(), hmsTable.getTableName()); + catalogProperties.getProperty(Catalogs.NAME), hmsTable.getDbName(), hmsTable.getTableName(), txnId); } try { @@ -337,6 +342,23 @@ public void preAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable, E } } + @NotNull + private static Optional getTxnId() { + Optional txnId; + txnId = Optional.empty(); + + SessionState sessionState = SessionState.get(); + + if (sessionState != null) { + HiveTxnManager txnMgr = sessionState.getTxnMgr(); + if (txnMgr != null) { + txnId = Optional.of(txnMgr.getCurrentTxnId()); + } + } + + return txnId; + } + private void doPreAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable, EnvironmentContext context) throws MetaException { try { From 8acbfd3c38ae09c3c0a7d0a3eee22ad3745421b5 Mon Sep 17 00:00:00 2001 From: Zsolt Miskolczi Date: Wed, 7 Jun 2023 15:59:37 +0200 Subject: [PATCH 17/35] Upgrade Iceberg dependency from 1.2.1 to 1.3.0 --- .../org/apache/iceberg/TableProperties.java | 375 ------------------ .../iceberg/hadoop/ConfigProperties.java | 30 -- iceberg/pom.xml | 2 +- 3 files changed, 1 insertion(+), 406 deletions(-) delete mode 100644 iceberg/patched-iceberg-core/src/main/java/org/apache/iceberg/TableProperties.java delete mode 100644 iceberg/patched-iceberg-core/src/main/java/org/apache/iceberg/hadoop/ConfigProperties.java diff --git a/iceberg/patched-iceberg-core/src/main/java/org/apache/iceberg/TableProperties.java b/iceberg/patched-iceberg-core/src/main/java/org/apache/iceberg/TableProperties.java deleted file mode 100644 index 360f0540730c..000000000000 --- a/iceberg/patched-iceberg-core/src/main/java/org/apache/iceberg/TableProperties.java +++ /dev/null @@ -1,375 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.iceberg; - -import java.util.Set; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; - -public class TableProperties { - - private TableProperties() { - } - - /** - * Reserved table property for table format version. - * - *

Iceberg will default a new table's format version to the latest stable and recommended - * version. This reserved property keyword allows users to override the Iceberg format version of - * the table metadata. - * - *

If this table property exists when creating a table, the table will use the specified format - * version. If a table updates this property, it will try to upgrade to the specified format - * version. - * - *

Note: incomplete or unstable versions cannot be selected using this property. - */ - public static final String FORMAT_VERSION = "format-version"; - - /** Reserved table property for table UUID. */ - public static final String UUID = "uuid"; - - /** Reserved table property for the total number of snapshots. */ - public static final String SNAPSHOT_COUNT = "snapshot-count"; - - /** Reserved table property for current snapshot summary. */ - public static final String CURRENT_SNAPSHOT_SUMMARY = "current-snapshot-summary"; - - /** Reserved table property for current snapshot id. */ - public static final String CURRENT_SNAPSHOT_ID = "current-snapshot-id"; - - /** Reserved table property for current snapshot timestamp. */ - public static final String CURRENT_SNAPSHOT_TIMESTAMP = "current-snapshot-timestamp-ms"; - - /** Reserved table property for the JSON representation of current schema. */ - public static final String CURRENT_SCHEMA = "current-schema"; - - /** Reserved table property for the JSON representation of current(default) partition spec. */ - public static final String DEFAULT_PARTITION_SPEC = "default-partition-spec"; - - /** Reserved table property for the JSON representation of current(default) sort order. */ - public static final String DEFAULT_SORT_ORDER = "default-sort-order"; - - /** - * Reserved Iceberg table properties list. - * - *

Reserved table properties are only used to control behaviors when creating or updating a - * table. The value of these properties are not persisted as a part of the table metadata. - */ - public static final Set RESERVED_PROPERTIES = - ImmutableSet.of( - FORMAT_VERSION, - UUID, - SNAPSHOT_COUNT, - CURRENT_SNAPSHOT_ID, - CURRENT_SNAPSHOT_SUMMARY, - CURRENT_SNAPSHOT_TIMESTAMP, - CURRENT_SCHEMA, - DEFAULT_PARTITION_SPEC, - DEFAULT_SORT_ORDER); - - public static final String COMMIT_NUM_RETRIES = "commit.retry.num-retries"; - public static final int COMMIT_NUM_RETRIES_DEFAULT = 4; - - public static final String COMMIT_MIN_RETRY_WAIT_MS = "commit.retry.min-wait-ms"; - public static final int COMMIT_MIN_RETRY_WAIT_MS_DEFAULT = 100; - - public static final String COMMIT_MAX_RETRY_WAIT_MS = "commit.retry.max-wait-ms"; - public static final int COMMIT_MAX_RETRY_WAIT_MS_DEFAULT = 60 * 1000; // 1 minute - - public static final String COMMIT_TOTAL_RETRY_TIME_MS = "commit.retry.total-timeout-ms"; - public static final int COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT = 30 * 60 * 1000; // 30 minutes - - public static final String COMMIT_NUM_STATUS_CHECKS = "commit.status-check.num-retries"; - public static final int COMMIT_NUM_STATUS_CHECKS_DEFAULT = 3; - - public static final String COMMIT_STATUS_CHECKS_MIN_WAIT_MS = "commit.status-check.min-wait-ms"; - public static final long COMMIT_STATUS_CHECKS_MIN_WAIT_MS_DEFAULT = 1000; // 1 second - - public static final String COMMIT_STATUS_CHECKS_MAX_WAIT_MS = "commit.status-check.max-wait-ms"; - public static final long COMMIT_STATUS_CHECKS_MAX_WAIT_MS_DEFAULT = 60 * 1000; // 1 minute - - public static final String COMMIT_STATUS_CHECKS_TOTAL_WAIT_MS = - "commit.status-check.total-timeout-ms"; - public static final long COMMIT_STATUS_CHECKS_TOTAL_WAIT_MS_DEFAULT = - 30 * 60 * 1000; // 30 minutes - - public static final String MANIFEST_TARGET_SIZE_BYTES = "commit.manifest.target-size-bytes"; - public static final long MANIFEST_TARGET_SIZE_BYTES_DEFAULT = 8 * 1024 * 1024; // 8 MB - - public static final String MANIFEST_MIN_MERGE_COUNT = "commit.manifest.min-count-to-merge"; - public static final int MANIFEST_MIN_MERGE_COUNT_DEFAULT = 100; - - public static final String MANIFEST_MERGE_ENABLED = "commit.manifest-merge.enabled"; - public static final boolean MANIFEST_MERGE_ENABLED_DEFAULT = true; - - public static final String DEFAULT_FILE_FORMAT = "write.format.default"; - public static final String DELETE_DEFAULT_FILE_FORMAT = "write.delete.format.default"; - public static final String DEFAULT_FILE_FORMAT_DEFAULT = "parquet"; - - public static final String PARQUET_ROW_GROUP_SIZE_BYTES = "write.parquet.row-group-size-bytes"; - public static final String DELETE_PARQUET_ROW_GROUP_SIZE_BYTES = - "write.delete.parquet.row-group-size-bytes"; - public static final int PARQUET_ROW_GROUP_SIZE_BYTES_DEFAULT = 128 * 1024 * 1024; // 128 MB - - public static final String PARQUET_PAGE_SIZE_BYTES = "write.parquet.page-size-bytes"; - public static final String DELETE_PARQUET_PAGE_SIZE_BYTES = - "write.delete.parquet.page-size-bytes"; - public static final int PARQUET_PAGE_SIZE_BYTES_DEFAULT = 1024 * 1024; // 1 MB - - public static final String PARQUET_PAGE_ROW_LIMIT = "write.parquet.page-row-limit"; - public static final String DELETE_PARQUET_PAGE_ROW_LIMIT = "write.delete.parquet.page-row-limit"; - public static final int PARQUET_PAGE_ROW_LIMIT_DEFAULT = 20_000; - - public static final String PARQUET_DICT_SIZE_BYTES = "write.parquet.dict-size-bytes"; - public static final String DELETE_PARQUET_DICT_SIZE_BYTES = - "write.delete.parquet.dict-size-bytes"; - public static final int PARQUET_DICT_SIZE_BYTES_DEFAULT = 2 * 1024 * 1024; // 2 MB - - public static final String PARQUET_COMPRESSION = "write.parquet.compression-codec"; - public static final String DELETE_PARQUET_COMPRESSION = "write.delete.parquet.compression-codec"; - public static final String PARQUET_COMPRESSION_DEFAULT = "gzip"; - - public static final String PARQUET_COMPRESSION_LEVEL = "write.parquet.compression-level"; - public static final String DELETE_PARQUET_COMPRESSION_LEVEL = - "write.delete.parquet.compression-level"; - public static final String PARQUET_COMPRESSION_LEVEL_DEFAULT = null; - - public static final String PARQUET_ROW_GROUP_CHECK_MIN_RECORD_COUNT = - "write.parquet.row-group-check-min-record-count"; - public static final String DELETE_PARQUET_ROW_GROUP_CHECK_MIN_RECORD_COUNT = - "write.delete.parquet.row-group-check-min-record-count"; - public static final int PARQUET_ROW_GROUP_CHECK_MIN_RECORD_COUNT_DEFAULT = 100; - - public static final String PARQUET_ROW_GROUP_CHECK_MAX_RECORD_COUNT = - "write.parquet.row-group-check-max-record-count"; - public static final String DELETE_PARQUET_ROW_GROUP_CHECK_MAX_RECORD_COUNT = - "write.delete.parquet.row-group-check-max-record-count"; - public static final int PARQUET_ROW_GROUP_CHECK_MAX_RECORD_COUNT_DEFAULT = 10000; - - public static final String PARQUET_BLOOM_FILTER_MAX_BYTES = - "write.parquet.bloom-filter-max-bytes"; - public static final int PARQUET_BLOOM_FILTER_MAX_BYTES_DEFAULT = 1024 * 1024; - - public static final String PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX = - "write.parquet.bloom-filter-enabled.column."; - - public static final String AVRO_COMPRESSION = "write.avro.compression-codec"; - public static final String DELETE_AVRO_COMPRESSION = "write.delete.avro.compression-codec"; - public static final String AVRO_COMPRESSION_DEFAULT = "gzip"; - - public static final String AVRO_COMPRESSION_LEVEL = "write.avro.compression-level"; - public static final String DELETE_AVRO_COMPRESSION_LEVEL = "write.delete.avro.compression-level"; - public static final String AVRO_COMPRESSION_LEVEL_DEFAULT = null; - - public static final String ORC_STRIPE_SIZE_BYTES = "write.orc.stripe-size-bytes"; - - public static final String ORC_BLOOM_FILTER_COLUMNS = "write.orc.bloom.filter.columns"; - public static final String ORC_BLOOM_FILTER_COLUMNS_DEFAULT = ""; - - public static final String ORC_BLOOM_FILTER_FPP = "write.orc.bloom.filter.fpp"; - public static final double ORC_BLOOM_FILTER_FPP_DEFAULT = 0.05; - - public static final String DELETE_ORC_STRIPE_SIZE_BYTES = "write.delete.orc.stripe-size-bytes"; - public static final long ORC_STRIPE_SIZE_BYTES_DEFAULT = 64L * 1024 * 1024; // 64 MB - - public static final String ORC_BLOCK_SIZE_BYTES = "write.orc.block-size-bytes"; - public static final String DELETE_ORC_BLOCK_SIZE_BYTES = "write.delete.orc.block-size-bytes"; - public static final long ORC_BLOCK_SIZE_BYTES_DEFAULT = 256L * 1024 * 1024; // 256 MB - - public static final String ORC_WRITE_BATCH_SIZE = "write.orc.vectorized.batch-size"; - public static final String DELETE_ORC_WRITE_BATCH_SIZE = "write.delete.orc.vectorized.batch-size"; - public static final int ORC_WRITE_BATCH_SIZE_DEFAULT = 1024; - - public static final String ORC_COMPRESSION = "write.orc.compression-codec"; - public static final String DELETE_ORC_COMPRESSION = "write.delete.orc.compression-codec"; - public static final String ORC_COMPRESSION_DEFAULT = "zlib"; - - public static final String ORC_COMPRESSION_STRATEGY = "write.orc.compression-strategy"; - public static final String DELETE_ORC_COMPRESSION_STRATEGY = - "write.delete.orc.compression-strategy"; - public static final String ORC_COMPRESSION_STRATEGY_DEFAULT = "speed"; - - public static final String SPLIT_SIZE = "read.split.target-size"; - public static final long SPLIT_SIZE_DEFAULT = 128 * 1024 * 1024; // 128 MB - - public static final String METADATA_SPLIT_SIZE = "read.split.metadata-target-size"; - public static final long METADATA_SPLIT_SIZE_DEFAULT = 32 * 1024 * 1024; // 32 MB - - public static final String SPLIT_LOOKBACK = "read.split.planning-lookback"; - public static final int SPLIT_LOOKBACK_DEFAULT = 10; - - public static final String SPLIT_OPEN_FILE_COST = "read.split.open-file-cost"; - public static final long SPLIT_OPEN_FILE_COST_DEFAULT = 4 * 1024 * 1024; // 4MB - - public static final String PARQUET_VECTORIZATION_ENABLED = "read.parquet.vectorization.enabled"; - public static final boolean PARQUET_VECTORIZATION_ENABLED_DEFAULT = true; - - public static final String PARQUET_BATCH_SIZE = "read.parquet.vectorization.batch-size"; - public static final int PARQUET_BATCH_SIZE_DEFAULT = 5000; - - public static final String ORC_VECTORIZATION_ENABLED = "read.orc.vectorization.enabled"; - public static final boolean ORC_VECTORIZATION_ENABLED_DEFAULT = false; - - public static final String ORC_BATCH_SIZE = "read.orc.vectorization.batch-size"; - public static final int ORC_BATCH_SIZE_DEFAULT = 5000; - - public static final String OBJECT_STORE_ENABLED = "write.object-storage.enabled"; - public static final boolean OBJECT_STORE_ENABLED_DEFAULT = false; - - /** - * @deprecated Use {@link #WRITE_DATA_LOCATION} instead. - * */ - @Deprecated public static final String OBJECT_STORE_PATH = "write.object-storage.path"; - - public static final String WRITE_LOCATION_PROVIDER_IMPL = "write.location-provider.impl"; - - /** - * @deprecated Use {@link #WRITE_DATA_LOCATION} instead. - * */ - @Deprecated - public static final String WRITE_FOLDER_STORAGE_LOCATION = "write.folder-storage.path"; - - // This only applies to files written after this property is set. Files previously written aren't - // relocated to reflect this parameter. - // If not set, defaults to a "data" folder underneath the root path of the table. - public static final String WRITE_DATA_LOCATION = "write.data.path"; - - // This only applies to files written after this property is set. Files previously written aren't - // relocated to reflect this parameter. - // If not set, defaults to a "metadata" folder underneath the root path of the table. - public static final String WRITE_METADATA_LOCATION = "write.metadata.path"; - - public static final String WRITE_PARTITION_SUMMARY_LIMIT = "write.summary.partition-limit"; - public static final int WRITE_PARTITION_SUMMARY_LIMIT_DEFAULT = 0; - - /** - * @deprecated will be removed in 2.0.0, writing manifest lists is always enabled - * */ - @Deprecated public static final String MANIFEST_LISTS_ENABLED = "write.manifest-lists.enabled"; - - /** - * @deprecated will be removed in 2.0.0, writing manifest lists is always enabled - * */ - @Deprecated public static final boolean MANIFEST_LISTS_ENABLED_DEFAULT = true; - - public static final String METADATA_COMPRESSION = "write.metadata.compression-codec"; - public static final String METADATA_COMPRESSION_DEFAULT = "none"; - - public static final String METADATA_PREVIOUS_VERSIONS_MAX = - "write.metadata.previous-versions-max"; - public static final int METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT = 100; - - // This enables to delete the oldest metadata file after commit. - public static final String METADATA_DELETE_AFTER_COMMIT_ENABLED = - "write.metadata.delete-after-commit.enabled"; - public static final boolean METADATA_DELETE_AFTER_COMMIT_ENABLED_DEFAULT = false; - - public static final String METRICS_MAX_INFERRED_COLUMN_DEFAULTS = - "write.metadata.metrics.max-inferred-column-defaults"; - public static final int METRICS_MAX_INFERRED_COLUMN_DEFAULTS_DEFAULT = 100; - - public static final String METRICS_MODE_COLUMN_CONF_PREFIX = "write.metadata.metrics.column."; - public static final String DEFAULT_WRITE_METRICS_MODE = "write.metadata.metrics.default"; - public static final String DEFAULT_WRITE_METRICS_MODE_DEFAULT = "truncate(16)"; - - public static final String DEFAULT_NAME_MAPPING = "schema.name-mapping.default"; - - public static final String WRITE_AUDIT_PUBLISH_ENABLED = "write.wap.enabled"; - public static final String WRITE_AUDIT_PUBLISH_ENABLED_DEFAULT = "false"; - - public static final String WRITE_TARGET_FILE_SIZE_BYTES = "write.target-file-size-bytes"; - public static final long WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT = 512 * 1024 * 1024; // 512 MB - - public static final String DELETE_TARGET_FILE_SIZE_BYTES = "write.delete.target-file-size-bytes"; - public static final long DELETE_TARGET_FILE_SIZE_BYTES_DEFAULT = 64 * 1024 * 1024; // 64 MB - - public static final String SPARK_WRITE_PARTITIONED_FANOUT_ENABLED = "write.spark.fanout.enabled"; - public static final boolean SPARK_WRITE_PARTITIONED_FANOUT_ENABLED_DEFAULT = false; - - public static final String SPARK_WRITE_ACCEPT_ANY_SCHEMA = "write.spark.accept-any-schema"; - public static final boolean SPARK_WRITE_ACCEPT_ANY_SCHEMA_DEFAULT = false; - - public static final String SNAPSHOT_ID_INHERITANCE_ENABLED = - "compatibility.snapshot-id-inheritance.enabled"; - public static final boolean SNAPSHOT_ID_INHERITANCE_ENABLED_DEFAULT = false; - - public static final String ENGINE_HIVE_ENABLED = "engine.hive.enabled"; - public static final boolean ENGINE_HIVE_ENABLED_DEFAULT = false; - - public static final String HIVE_LOCK_ENABLED = "engine.hive.lock-enabled"; - public static final boolean HIVE_LOCK_ENABLED_DEFAULT = true; - - public static final String WRITE_DISTRIBUTION_MODE = "write.distribution-mode"; - public static final String WRITE_DISTRIBUTION_MODE_NONE = "none"; - public static final String WRITE_DISTRIBUTION_MODE_HASH = "hash"; - public static final String WRITE_DISTRIBUTION_MODE_RANGE = "range"; - - public static final String GC_ENABLED = "gc.enabled"; - public static final boolean GC_ENABLED_DEFAULT = true; - - public static final String MAX_SNAPSHOT_AGE_MS = "history.expire.max-snapshot-age-ms"; - public static final long MAX_SNAPSHOT_AGE_MS_DEFAULT = 5 * 24 * 60 * 60 * 1000; // 5 days - - public static final String MIN_SNAPSHOTS_TO_KEEP = "history.expire.min-snapshots-to-keep"; - public static final int MIN_SNAPSHOTS_TO_KEEP_DEFAULT = 1; - - public static final String MAX_REF_AGE_MS = "history.expire.max-ref-age-ms"; - public static final long MAX_REF_AGE_MS_DEFAULT = Long.MAX_VALUE; - - public static final String DELETE_ISOLATION_LEVEL = "write.delete.isolation-level"; - public static final String DELETE_ISOLATION_LEVEL_DEFAULT = "serializable"; - - public static final String DELETE_MODE = "write.delete.mode"; - public static final String DELETE_MODE_DEFAULT = RowLevelOperationMode.COPY_ON_WRITE.modeName(); - - public static final String DELETE_DISTRIBUTION_MODE = "write.delete.distribution-mode"; - - public static final String UPDATE_ISOLATION_LEVEL = "write.update.isolation-level"; - public static final String UPDATE_ISOLATION_LEVEL_DEFAULT = "serializable"; - - public static final String UPDATE_MODE = "write.update.mode"; - public static final String UPDATE_MODE_DEFAULT = RowLevelOperationMode.COPY_ON_WRITE.modeName(); - - public static final String UPDATE_DISTRIBUTION_MODE = "write.update.distribution-mode"; - - public static final String MERGE_ISOLATION_LEVEL = "write.merge.isolation-level"; - public static final String MERGE_ISOLATION_LEVEL_DEFAULT = "serializable"; - - public static final String MERGE_MODE = "write.merge.mode"; - public static final String MERGE_MODE_DEFAULT = RowLevelOperationMode.COPY_ON_WRITE.modeName(); - - /** - * @deprecated will be removed once Spark 3.1 support is dropped, the cardinality check is always - * performed starting from 0.13.0. - */ - @Deprecated - public static final String MERGE_CARDINALITY_CHECK_ENABLED = - "write.merge.cardinality-check.enabled"; - /** - * @deprecated will be removed once Spark 3.1 support is dropped, the cardinality check is always - * performed starting from 0.13.0. - */ - @Deprecated public static final boolean MERGE_CARDINALITY_CHECK_ENABLED_DEFAULT = true; - - public static final String MERGE_DISTRIBUTION_MODE = "write.merge.distribution-mode"; - - public static final String UPSERT_ENABLED = "write.upsert.enabled"; - public static final boolean UPSERT_ENABLED_DEFAULT = false; -} diff --git a/iceberg/patched-iceberg-core/src/main/java/org/apache/iceberg/hadoop/ConfigProperties.java b/iceberg/patched-iceberg-core/src/main/java/org/apache/iceberg/hadoop/ConfigProperties.java deleted file mode 100644 index 3fd5a72c0788..000000000000 --- a/iceberg/patched-iceberg-core/src/main/java/org/apache/iceberg/hadoop/ConfigProperties.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.iceberg.hadoop; - -public class ConfigProperties { - - private ConfigProperties() { - } - - public static final String ENGINE_HIVE_ENABLED = "iceberg.engine.hive.enabled"; - public static final String LOCK_HIVE_ENABLED = "iceberg.engine.hive.lock-enabled"; - public static final String KEEP_HIVE_STATS = "iceberg.hive.keep.stats"; -} diff --git a/iceberg/pom.xml b/iceberg/pom.xml index e59b01379c08..1c6015ea32b8 100644 --- a/iceberg/pom.xml +++ b/iceberg/pom.xml @@ -25,7 +25,7 @@ .. . - 1.2.1 + 1.3.0 4.0.2 3.4.4 1.11.1 From 210bb9335abf86272d1b894ff41c7e11f6af6f72 Mon Sep 17 00:00:00 2001 From: Liu Xiao <42756849+liuxiaocs7@users.noreply.github.com> Date: Wed, 22 Mar 2023 00:34:43 +0800 Subject: [PATCH 18/35] MR: Remove deprecated AssertHelpers (#7159) --- .../org/apache/iceberg/mr/TestCatalogs.java | 101 +++---- .../iceberg/mr/TestIcebergInputFormats.java | 31 +++ .../TestHiveIcebergStorageHandlerNoScan.java | 246 ++++++++++-------- 3 files changed, 225 insertions(+), 153 deletions(-) diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestCatalogs.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestCatalogs.java index e9f0bd24de4f..dbbd2a96fa85 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestCatalogs.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestCatalogs.java @@ -24,7 +24,6 @@ import java.util.Optional; import java.util.Properties; import org.apache.hadoop.conf.Configuration; -import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.CatalogProperties; import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.PartitionSpec; @@ -65,10 +64,11 @@ public void before() { @Test public void testLoadTableFromLocation() throws IOException { - conf.set(InputFormatConfig.CATALOG, Catalogs.LOCATION); - AssertHelpers.assertThrows( - "Should complain about table location not set", IllegalArgumentException.class, - "location not set", () -> Catalogs.loadTable(conf)); + conf.set(CatalogUtil.ICEBERG_CATALOG_TYPE, Catalogs.LOCATION); + + Assertions.assertThatThrownBy(() -> Catalogs.loadTable(conf)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Table location not set"); HadoopTables tables = new HadoopTables(); Table hadoopTable = tables.create(SCHEMA, temp.newFolder("hadoop_tables").toString()); @@ -84,9 +84,9 @@ public void testLoadTableFromCatalog() throws IOException { String warehouseLocation = temp.newFolder("hadoop", "warehouse").toString(); setCustomCatalogProperties(defaultCatalogName, warehouseLocation); - AssertHelpers.assertThrows( - "Should complain about table identifier not set", IllegalArgumentException.class, - "identifier not set", () -> Catalogs.loadTable(conf)); + Assertions.assertThatThrownBy(() -> Catalogs.loadTable(conf)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Table identifier not set"); HadoopCatalog catalog = new CustomHadoopCatalog(conf, warehouseLocation); Table hadoopCatalogTable = catalog.createTable(TableIdentifier.of("table"), SCHEMA); @@ -100,16 +100,18 @@ public void testLoadTableFromCatalog() throws IOException { public void testCreateDropTableToLocation() throws IOException { Properties missingSchema = new Properties(); missingSchema.put("location", temp.newFolder("hadoop_tables").toString()); - AssertHelpers.assertThrows( - "Should complain about table schema not set", NullPointerException.class, - "schema not set", () -> Catalogs.createTable(conf, missingSchema)); + + Assertions.assertThatThrownBy(() -> Catalogs.createTable(conf, missingSchema)) + .isInstanceOf(NullPointerException.class) + .hasMessage("Table schema not set"); conf.set(InputFormatConfig.CATALOG, Catalogs.LOCATION); Properties missingLocation = new Properties(); missingLocation.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(SCHEMA)); - AssertHelpers.assertThrows( - "Should complain about table location not set", NullPointerException.class, - "location not set", () -> Catalogs.createTable(conf, missingLocation)); + + Assertions.assertThatThrownBy(() -> Catalogs.createTable(conf, missingLocation)) + .isInstanceOf(NullPointerException.class) + .hasMessage("Table location not set"); Properties properties = new Properties(); properties.put("location", temp.getRoot() + "/hadoop_tables"); @@ -127,17 +129,17 @@ public void testCreateDropTableToLocation() throws IOException { Assert.assertEquals(PartitionSpecParser.toJson(SPEC), PartitionSpecParser.toJson(table.spec())); Assert.assertEquals(Collections.singletonMap("dummy", "test"), table.properties()); - AssertHelpers.assertThrows( - "Should complain about table location not set", NullPointerException.class, - "location not set", () -> Catalogs.dropTable(conf, new Properties())); + Assertions.assertThatThrownBy(() -> Catalogs.dropTable(conf, new Properties())) + .isInstanceOf(NullPointerException.class) + .hasMessage("Table location not set"); Properties dropProperties = new Properties(); dropProperties.put("location", temp.getRoot() + "/hadoop_tables"); Catalogs.dropTable(conf, dropProperties); - AssertHelpers.assertThrows( - "Should complain about table not found", NoSuchTableException.class, - "Table does not exist", () -> Catalogs.loadTable(conf, dropProperties)); + Assertions.assertThatThrownBy(() -> Catalogs.loadTable(conf, dropProperties)) + .isInstanceOf(NoSuchTableException.class) + .hasMessage("Table does not exist at location: " + properties.getProperty("location")); } @Test @@ -151,16 +153,17 @@ public void testCreateDropTableToCatalog() throws IOException { Properties missingSchema = new Properties(); missingSchema.put("name", identifier.toString()); missingSchema.put(InputFormatConfig.CATALOG_NAME, defaultCatalogName); - AssertHelpers.assertThrows( - "Should complain about table schema not set", NullPointerException.class, - "schema not set", () -> Catalogs.createTable(conf, missingSchema)); + + Assertions.assertThatThrownBy(() -> Catalogs.createTable(conf, missingSchema)) + .isInstanceOf(NullPointerException.class) + .hasMessage("Table schema not set"); Properties missingIdentifier = new Properties(); missingIdentifier.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(SCHEMA)); missingIdentifier.put(InputFormatConfig.CATALOG_NAME, defaultCatalogName); - AssertHelpers.assertThrows( - "Should complain about table identifier not set", NullPointerException.class, - "identifier not set", () -> Catalogs.createTable(conf, missingIdentifier)); + Assertions.assertThatThrownBy(() -> Catalogs.createTable(conf, missingIdentifier)) + .isInstanceOf(NullPointerException.class) + .hasMessage("Table identifier not set"); Properties properties = new Properties(); properties.put("name", identifier.toString()); @@ -178,18 +181,18 @@ public void testCreateDropTableToCatalog() throws IOException { Assert.assertEquals(PartitionSpecParser.toJson(SPEC), PartitionSpecParser.toJson(table.spec())); Assert.assertEquals(Collections.singletonMap("dummy", "test"), table.properties()); - AssertHelpers.assertThrows( - "Should complain about table identifier not set", NullPointerException.class, - "identifier not set", () -> Catalogs.dropTable(conf, new Properties())); + Assertions.assertThatThrownBy(() -> Catalogs.dropTable(conf, new Properties())) + .isInstanceOf(NullPointerException.class) + .hasMessage("Table identifier not set"); Properties dropProperties = new Properties(); dropProperties.put("name", identifier.toString()); dropProperties.put(InputFormatConfig.CATALOG_NAME, defaultCatalogName); Catalogs.dropTable(conf, dropProperties); - AssertHelpers.assertThrows( - "Should complain about table not found", NoSuchTableException.class, - "Table does not exist", () -> Catalogs.loadTable(conf, dropProperties)); + Assertions.assertThatThrownBy(() -> Catalogs.loadTable(conf, dropProperties)) + .isInstanceOf(NoSuchTableException.class) + .hasMessage("Table does not exist: test.table"); } @Test @@ -238,9 +241,10 @@ public void testLegacyLoadCatalogLocation() { @Test public void testLegacyLoadCatalogUnknown() { conf.set(InputFormatConfig.CATALOG, "fooType"); - AssertHelpers.assertThrows( - "should complain about catalog not supported", UnsupportedOperationException.class, - "Unknown catalog type", () -> Catalogs.loadCatalog(conf, null)); + + Assertions.assertThatThrownBy(() -> Catalogs.loadCatalog(conf, null)) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessage("Unknown catalog type"); } @Test @@ -274,8 +278,10 @@ public void testLegacyLoadCustomCatalogWithHiveCatalogTypeSet() { CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE); conf.set(InputFormatConfig.CATALOG_LOADER_CLASS, CustomHadoopCatalog.class.getName()); conf.set(InputFormatConfig.HADOOP_CATALOG_WAREHOUSE_LOCATION, "/tmp/mylocation"); - AssertHelpers.assertThrows("Should complain about both configs being set", IllegalArgumentException.class, - "both type and catalog-impl are set", () -> Catalogs.loadCatalog(conf, catalogName)); + + Assertions.assertThatThrownBy(() -> Catalogs.loadCatalog(conf, catalogName)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("both type and catalog-impl are set"); } @Test @@ -332,10 +338,13 @@ public void testLoadCatalogLocation() { @Test public void testLoadCatalogUnknown() { String catalogName = "barCatalog"; - conf.set(InputFormatConfig.catalogPropertyConfigKey(catalogName, CatalogUtil.ICEBERG_CATALOG_TYPE), "fooType"); - AssertHelpers.assertThrows( - "should complain about catalog not supported", UnsupportedOperationException.class, - "Unknown catalog type:", () -> Catalogs.loadCatalog(conf, catalogName)); + conf.set( + InputFormatConfig.catalogPropertyConfigKey(catalogName, CatalogUtil.ICEBERG_CATALOG_TYPE), + "fooType"); + + Assertions.assertThatThrownBy(() -> Catalogs.loadCatalog(conf, catalogName)) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessage("Unknown catalog type: fooType"); } @Test @@ -347,16 +356,18 @@ public void testDefaultCatalogProperties() { HiveCatalog defaultCatalog = (HiveCatalog) Catalogs.loadCatalog(conf, null).get(); Assert.assertEquals("true", defaultCatalog.properties().get(catalogProperty)); Assert.assertEquals("true", - defaultCatalog.newTableOps(TableIdentifier.of("default", "iceberg")).io().properties().get(catalogProperty)); + defaultCatalog.newTableOps(TableIdentifier.of("default", "iceberg")) + .io().properties().get(catalogProperty)); // set property at catalog level, and that should take precedence over the global property. conf.setBoolean( - String.format("%s%s.%s", InputFormatConfig.CATALOG_CONFIG_PREFIX, Catalogs.ICEBERG_DEFAULT_CATALOG_NAME, - catalogProperty), false); + String.format("%s%s.%s", InputFormatConfig.CATALOG_CONFIG_PREFIX, Catalogs.ICEBERG_DEFAULT_CATALOG_NAME, + catalogProperty), false); defaultCatalog = (HiveCatalog) Catalogs.loadCatalog(conf, null).get(); Assert.assertEquals("false", defaultCatalog.properties().get(catalogProperty)); Assert.assertEquals("false", - defaultCatalog.newTableOps(TableIdentifier.of("default", "iceberg")).io().properties().get(catalogProperty)); + defaultCatalog.newTableOps(TableIdentifier.of("default", "iceberg")) + .io().properties().get(catalogProperty)); } public static class CustomHadoopCatalog extends HadoopCatalog { diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java index 0f749f59b249..d3b5a4ca9f25 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java @@ -64,6 +64,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; +import org.assertj.core.api.Assertions; import org.junit.Assert; import org.junit.Before; import org.junit.Rule; @@ -202,6 +203,36 @@ public void testResiduals() throws Exception { testInputFormat.create(builder.conf()).validate(writeRecords); } + @Test + public void testFailedResidualFiltering() throws Exception { + helper.createTable(); + + List expectedRecords = helper.generateRandomRecords(2, 0L); + expectedRecords.get(0).set(2, "2020-03-20"); + expectedRecords.get(1).set(2, "2020-03-20"); + + helper.appendToTable(Row.of("2020-03-20", 0), expectedRecords); + + builder + .useHiveRows() + .filter( + Expressions.and(Expressions.equal("date", "2020-03-20"), Expressions.equal("id", 0))); + + Assertions.assertThatThrownBy(() -> testInputFormat.create(builder.conf())) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessage( + "Filter expression ref(name=\"id\") == 0 is not completely satisfied. Additional rows can be returned " + + "not satisfied by the filter expression"); + + builder.usePigTuples(); + + Assertions.assertThatThrownBy(() -> testInputFormat.create(builder.conf())) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessage( + "Filter expression ref(name=\"id\") == 0 is not completely satisfied. Additional rows can be returned " + + "not satisfied by the filter expression"); + } + @Test public void testProjection() throws Exception { helper.createTable(); diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java index 294ba9aba689..3d317ff70295 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java @@ -48,7 +48,6 @@ import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizer; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.BaseMetastoreTableOperations; import org.apache.iceberg.BaseTable; import org.apache.iceberg.FileFormat; @@ -81,6 +80,7 @@ import org.apache.iceberg.types.Types; import org.apache.parquet.hadoop.ParquetOutputFormat; import org.apache.thrift.TException; +import org.assertj.core.api.Assertions; import org.junit.After; import org.junit.AfterClass; import org.junit.Assert; @@ -451,11 +451,9 @@ public void testCreateDropTable() throws TException, IOException, InterruptedExc shell.executeStatement("DROP TABLE customers"); // Check if the table was really dropped even from the Catalog - AssertHelpers.assertThrows("should throw exception", NoSuchTableException.class, - "Table does not exist", () -> { - testTables.loadTable(identifier); - } - ); + Assertions.assertThatThrownBy(() -> testTables.loadTable(identifier)) + .isInstanceOf(NoSuchTableException.class) + .hasMessageStartingWith("Table does not exist"); } else { Path hmsTableLocation = new Path(hmsTable.getSd().getLocation()); @@ -463,11 +461,9 @@ public void testCreateDropTable() throws TException, IOException, InterruptedExc shell.executeStatement("DROP TABLE customers"); // Check if we drop an exception when trying to load the table - AssertHelpers.assertThrows("should throw exception", NoSuchTableException.class, - "Table does not exist", () -> { - testTables.loadTable(identifier); - } - ); + Assertions.assertThatThrownBy(() -> testTables.loadTable(identifier)) + .isInstanceOf(NoSuchTableException.class) + .hasMessage("Table does not exist: default.customers"); // Check if the files are removed FileSystem fs = Util.getFs(hmsTableLocation, shell.getHiveConf()); @@ -500,11 +496,9 @@ public void testCreateDropTableNonDefaultCatalog() { shell.executeStatement("DROP TABLE default.customers"); // Check if the table was really dropped even from the Catalog - AssertHelpers.assertThrows("should throw exception", NoSuchTableException.class, - "Table does not exist", () -> { - testTables.loadTable(identifier); - } - ); + Assertions.assertThatThrownBy(() -> testTables.loadTable(identifier)) + .isInstanceOf(NoSuchTableException.class) + .hasMessageStartingWith("Table does not exist"); } @Test @@ -599,11 +593,9 @@ public void testDeleteBackingTable() throws TException, IOException, Interrupted shell.executeStatement("DROP TABLE customers"); // Check if we drop an exception when trying to drop the table - AssertHelpers.assertThrows("should throw exception", NoSuchTableException.class, - "Table does not exist", () -> { - testTables.loadTable(identifier); - } - ); + Assertions.assertThatThrownBy(() -> testTables.loadTable(identifier)) + .isInstanceOf(NoSuchTableException.class) + .hasMessage("Table does not exist: default.customers"); // Check if the files are kept FileSystem fs = Util.getFs(hmsTableLocation, shell.getHiveConf()); @@ -633,11 +625,9 @@ public void testDropTableWithCorruptedMetadata() throws TException, IOException, // check if HMS table is nonetheless still droppable shell.executeStatement(String.format("DROP TABLE %s", identifier)); - AssertHelpers.assertThrows("should throw exception", NoSuchTableException.class, - "Table does not exist", () -> { - testTables.loadTable(identifier); - } - ); + Assertions.assertThatThrownBy(() -> testTables.loadTable(identifier)) + .isInstanceOf(NoSuchTableException.class) + .hasMessage("Table does not exist: default.customers"); } @Test @@ -645,37 +635,55 @@ public void testCreateTableError() { TableIdentifier identifier = TableIdentifier.of("default", "withShell2"); // Wrong schema - AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, - "Unrecognized token 'WrongSchema'", () -> { - shell.executeStatement("CREATE EXTERNAL TABLE withShell2 " + - "STORED BY ICEBERG " + - testTables.locationForCreateTableSQL(identifier) + - "TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='WrongSchema'" + - ",'" + InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')"); - } - ); + Assertions.assertThatThrownBy( + () -> + shell.executeStatement( + "CREATE EXTERNAL TABLE withShell2 " + + "STORED BY 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' " + + testTables.locationForCreateTableSQL(identifier) + + "TBLPROPERTIES ('" + + InputFormatConfig.TABLE_SCHEMA + + "'='WrongSchema'" + + ",'" + + InputFormatConfig.CATALOG_NAME + + "'='" + + testTables.catalogName() + + "')")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageStartingWith("Failed to execute Hive query") + .hasMessageContaining("Unrecognized token 'WrongSchema'"); // Missing schema, we try to get the schema from the table and fail - AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, - "Please provide ", () -> { - shell.executeStatement("CREATE EXTERNAL TABLE withShell2 " + - "STORED BY ICEBERG " + - testTables.locationForCreateTableSQL(identifier) + - testTables.propertiesForCreateTableSQL(ImmutableMap.of())); - } - ); + Assertions.assertThatThrownBy( + () -> + shell.executeStatement( + "CREATE EXTERNAL TABLE withShell2 " + + "STORED BY 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' " + + testTables.locationForCreateTableSQL(identifier) + + testTables.propertiesForCreateTableSQL(ImmutableMap.of()))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageStartingWith("Failed to execute Hive query") + .hasMessageContaining("Please provide an existing table or a valid schema"); if (!testTables.locationForCreateTableSQL(identifier).isEmpty()) { // Only test this if the location is required - AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, - "Table location not set", () -> { - shell.executeStatement("CREATE EXTERNAL TABLE withShell2 " + - "STORED BY ICEBERG " + - "TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" + - SchemaParser.toJson(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA) + "','" + - InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')"); - } - ); + Assertions.assertThatThrownBy( + () -> + shell.executeStatement( + "CREATE EXTERNAL TABLE withShell2 " + + "STORED BY 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' " + + "TBLPROPERTIES ('" + + InputFormatConfig.TABLE_SCHEMA + + "'='" + + SchemaParser.toJson(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA) + + "','" + + InputFormatConfig.CATALOG_NAME + + "'='" + + testTables.catalogName() + + "')")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageStartingWith("Failed to execute Hive query") + .hasMessageEndingWith("Table location not set"); } } @@ -687,15 +695,23 @@ public void testCreateTableAboveExistingTable() throws IOException { if (testTableType == TestTables.TestTableType.HIVE_CATALOG) { // In HiveCatalog we just expect an exception since the table is already exists - AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, - "customers already exists", () -> { - shell.executeStatement("CREATE EXTERNAL TABLE customers " + - "STORED BY ICEBERG " + - "TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" + - SchemaParser.toJson(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA) + "',' " + - InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')"); - } - ); + Assertions.assertThatThrownBy( + () -> + shell.executeStatement( + "CREATE EXTERNAL TABLE customers " + + "STORED BY 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' " + + "TBLPROPERTIES ('" + + InputFormatConfig.TABLE_SCHEMA + + "'='" + + SchemaParser.toJson(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA) + + "',' " + + InputFormatConfig.CATALOG_NAME + + "'='" + + testTables.catalogName() + + "')")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageStartingWith("Failed to execute Hive query") + .hasMessageContaining("customers already exists"); } else { // With other catalogs, table creation should succeed shell.executeStatement("CREATE EXTERNAL TABLE customers " + @@ -727,16 +743,24 @@ public void testCreatePartitionedTableWithPropertiesAndWithColumnSpecification() PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).identity("last_name").build(); - AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, - "Provide only one of the following", () -> { - shell.executeStatement("CREATE EXTERNAL TABLE customers (customer_id BIGINT) " + - "PARTITIONED BY (first_name STRING) " + - "STORED BY ICEBERG " + - testTables.locationForCreateTableSQL(TableIdentifier.of("default", "customers")) + - testTables.propertiesForCreateTableSQL( - ImmutableMap.of(InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(spec)))); - } - ); + Assertions.assertThatThrownBy( + () -> + shell.executeStatement( + "CREATE EXTERNAL TABLE customers (customer_id BIGINT) " + + "PARTITIONED BY (first_name STRING) " + + "STORED BY 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' " + + testTables.locationForCreateTableSQL( + TableIdentifier.of("default", "customers")) + + " TBLPROPERTIES ('" + + InputFormatConfig.PARTITION_SPEC + + "'='" + + PartitionSpecParser.toJson(spec) + + "')")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageStartingWith("Failed to execute Hive query") + .hasMessageEndingWith( + "Provide only one of the following: Hive partition specification, " + + "or the iceberg.mr.table.partition.spec property"); } @Test @@ -799,15 +823,19 @@ public void testCreateTableWithNotSupportedTypes() { "CHAR(1)", Types.StringType.get()); for (String notSupportedType : notSupportedTypes.keySet()) { - AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, - "Unsupported Hive type", () -> { - shell.executeStatement("CREATE EXTERNAL TABLE not_supported_types " + - "(not_supported " + notSupportedType + ") " + - "STORED BY ICEBERG " + - testTables.locationForCreateTableSQL(identifier) + - testTables.propertiesForCreateTableSQL(ImmutableMap.of())); - } - ); + Assertions.assertThatThrownBy( + () -> + shell.executeStatement( + "CREATE EXTERNAL TABLE not_supported_types " + + "(not_supported " + + notSupportedType + + ") " + + "STORED BY 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' " + + testTables.locationForCreateTableSQL(identifier) + + testTables.propertiesForCreateTableSQL(ImmutableMap.of()))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageStartingWith("Failed to execute Hive query") + .hasMessageContaining("Unsupported Hive type"); } } @@ -1344,15 +1372,17 @@ public void testAlterTableReplaceColumnsFailsWhenNotOnlyDropping() { }; for (String command : commands) { - AssertHelpers.assertThrows("", IllegalArgumentException.class, - "Unsupported operation to use REPLACE COLUMNS", () -> shell.executeStatement(command)); + Assertions.assertThatThrownBy(() -> shell.executeStatement(command)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Unsupported operation to use REPLACE COLUMNS"); } // check no-op case too String command = "ALTER TABLE default.customers REPLACE COLUMNS (customer_id int, first_name string COMMENT 'This" + " is first name', last_name string COMMENT 'This is last name', address struct)"; - AssertHelpers.assertThrows("", IllegalArgumentException.class, - "No schema change detected", () -> shell.executeStatement(command)); + Assertions.assertThatThrownBy(() -> shell.executeStatement(command)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("No schema change detected"); } @Test @@ -1445,9 +1475,9 @@ public void testCommandsWithPartitionClauseThrow() { }; for (String command : commands) { - AssertHelpers.assertThrows("Should throw unsupported operation exception for queries with partition spec", - IllegalArgumentException.class, "Using partition spec in query is unsupported", - () -> shell.executeStatement(command)); + Assertions.assertThatThrownBy(() -> shell.executeStatement(command)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Using partition spec in query is unsupported"); } } @@ -1597,12 +1627,12 @@ public void testAlterTableWithMetadataLocationFromAnotherTable() throws IOExcept TableIdentifier targetIdentifier = TableIdentifier.of("default", "target"); testTables.createTable(shell, targetIdentifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, PartitionSpec.unpartitioned(), FileFormat.PARQUET, Collections.emptyList(), 1, Collections.emptyMap()); - AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, - "Cannot change iceberg table", - () -> { - shell.executeStatement("ALTER TABLE " + targetIdentifier.name() + " SET TBLPROPERTIES('metadata_location'='" + + Assertions.assertThatThrownBy(() -> { + shell.executeStatement("ALTER TABLE " + targetIdentifier.name() + " SET TBLPROPERTIES('metadata_location'='" + metadataLocation + "')"); - }); + }) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot change iceberg table"); } @Test @@ -1612,12 +1642,12 @@ public void testAlterTableToIcebergAndMetadataLocation() throws IOException { testTables.locationForCreateTableSQL(TableIdentifier.of("default", tableName)) + testTables.propertiesForCreateTableSQL(ImmutableMap.of()); shell.executeStatement(createQuery); - AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, - "Cannot perform table migration to Iceberg and setting the snapshot location in one step.", - () -> { - shell.executeStatement("ALTER TABLE " + tableName + " SET TBLPROPERTIES(" + + Assertions.assertThatThrownBy(() -> { + shell.executeStatement("ALTER TABLE " + tableName + " SET TBLPROPERTIES(" + "'storage_handler'='org.apache.iceberg.mr.hive.HiveIcebergStorageHandler','metadata_location'='asdf')"); - }); + }) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot perform table migration to Iceberg and setting the snapshot location in one step."); } @Test @@ -1658,12 +1688,13 @@ public void testCTLTHiveCatalogValidation() throws TException, InterruptedExcept shell.executeStatement("insert into source values(1)"); // Run a CTLT query. - AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, - " CTLT target table must be a HiveCatalog table", () -> { - shell.executeStatement(String.format("CREATE TABLE dest LIKE source STORED BY ICEBERG %s %s", + Assertions.assertThatThrownBy(() -> { + shell.executeStatement(String.format("CREATE TABLE dest LIKE source STORED BY ICEBERG %s %s", testTables.locationForCreateTableSQL(TableIdentifier.of("default", "dest")), testTables.propertiesForCreateTableSQL(ImmutableMap.of()))); - }); + }) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage(" CTLT target table must be a HiveCatalog table"); } @Test @@ -1779,14 +1810,13 @@ public void testCreateTableWithMetadataLocationWithoutSchema() throws IOExceptio testTables.propertiesForCreateTableSQL(Collections.singletonMap("metadata_location", metadataLocation)); // Try the query with columns also specified, it should throw exception. - AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, - "Column names can not be provided along with metadata location.", () -> { - shell.executeStatement("CREATE EXTERNAL TABLE target (id int) STORED BY ICEBERG " + + Assertions.assertThatThrownBy(() -> { + shell.executeStatement("CREATE EXTERNAL TABLE target (id int) STORED BY ICEBERG " + testTables.locationForCreateTableSQL(targetIdentifier) + tblProps); - }); - shell.executeStatement( - "CREATE EXTERNAL TABLE target STORED BY ICEBERG " + testTables.locationForCreateTableSQL(targetIdentifier) + - tblProps); + }) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("CREATE EXTERNAL TABLE target STORED BY ICEBERG " + + testTables.locationForCreateTableSQL(targetIdentifier) + tblProps); // Check the partition and the schema are preserved. Table targetIcebergTable = From 47f42e76958e3e33567c8e0eafd7f5cfa0563c75 Mon Sep 17 00:00:00 2001 From: Rui Li Date: Sat, 25 Mar 2023 00:55:32 +0800 Subject: [PATCH 19/35] Hive: Support customizable ClientPool (#6698) (cherry picked from commit ef5c7318aa98c016cc32c4595cc65d70ec1ab7be) --- iceberg/iceberg-catalog/pom.xml | 5 + .../apache/iceberg/hive/CachedClientPool.java | 130 +++++++++++++++++- .../org/apache/iceberg/hive/HiveCatalog.java | 5 + .../iceberg/hive/TestCachedClientPool.java | 85 +++++++++++- .../iceberg/hive/TestLoadHiveCatalog.java | 106 ++++++++++++++ iceberg/pom.xml | 11 ++ 6 files changed, 333 insertions(+), 9 deletions(-) create mode 100644 iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestLoadHiveCatalog.java diff --git a/iceberg/iceberg-catalog/pom.xml b/iceberg/iceberg-catalog/pom.xml index 67a86112b87e..9633e5b1cb83 100644 --- a/iceberg/iceberg-catalog/pom.xml +++ b/iceberg/iceberg-catalog/pom.xml @@ -57,6 +57,11 @@ org.apache.hive hive-exec + + org.immutables + value + provided + org.apache.hive hive-standalone-metastore-server diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/CachedClientPool.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/CachedClientPool.java index e2dc990383e2..2ff0495e7ac0 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/CachedClientPool.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/CachedClientPool.java @@ -21,25 +21,60 @@ import com.github.benmanes.caffeine.cache.Cache; import com.github.benmanes.caffeine.cache.Caffeine; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Comparator; +import java.util.List; +import java.util.Locale; import java.util.Map; +import java.util.Set; import java.util.concurrent.TimeUnit; +import javax.annotation.Nullable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.iceberg.CatalogProperties; import org.apache.iceberg.ClientPool; +import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.util.PropertyUtil; import org.apache.thrift.TException; +import org.immutables.value.Value; +/** + * A ClientPool that caches the underlying HiveClientPool instances. + * + *

The following key elements are supported and can be specified via {@link + * CatalogProperties#CLIENT_POOL_CACHE_KEYS}: + * + *

    + *
  • ugi - the Hadoop UserGroupInformation instance that represents the current user using the + * cache. + *
  • user_name - similar to UGI but only includes the user's name determined by + * UserGroupInformation#getUserName. + *
  • conf - name of an arbitrary configuration. The value of the configuration will be extracted + * from catalog properties and added to the cache key. A conf element should start with a + * "conf:" prefix which is followed by the configuration name. E.g. specifying + * "conf:metastore.catalog.default" will add "metastore.catalog.default" to the key, and so + * that configurations with different default catalog wouldn't share the same client pool. + * Multiple conf elements can be specified. + *
+ */ public class CachedClientPool implements ClientPool { - private static Cache clientPoolCache; + private static final String CONF_ELEMENT_PREFIX = "conf:"; + + private static Cache clientPoolCache; private final Configuration conf; private final String metastoreUri; private final int clientPoolSize; private final long evictionInterval; + private final Key key; public CachedClientPool(Configuration conf, Map properties) { this.conf = conf; @@ -50,24 +85,25 @@ public CachedClientPool(Configuration conf, Map properties) { this.evictionInterval = PropertyUtil.propertyAsLong(properties, CatalogProperties.CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS, CatalogProperties.CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS_DEFAULT); + this.key = extractKey(properties.get(CatalogProperties.CLIENT_POOL_CACHE_KEYS), conf); init(); } @VisibleForTesting HiveClientPool clientPool() { - return clientPoolCache.get(metastoreUri, k -> new HiveClientPool(clientPoolSize, conf)); + return clientPoolCache.get(key, k -> new HiveClientPool(clientPoolSize, conf)); } private synchronized void init() { if (clientPoolCache == null) { clientPoolCache = Caffeine.newBuilder().expireAfterAccess(evictionInterval, TimeUnit.MILLISECONDS) - .removalListener((key, value, cause) -> ((HiveClientPool) value).close()) + .removalListener((invisible, value, cause) -> ((HiveClientPool) value).close()) .build(); } } @VisibleForTesting - static Cache clientPoolCache() { + static Cache clientPoolCache() { return clientPoolCache; } @@ -81,4 +117,90 @@ public R run(Action action, boolean retry) throws TException, InterruptedException { return clientPool().run(action, retry); } + + @VisibleForTesting + static Key extractKey(String cacheKeys, Configuration conf) { + // generate key elements in a certain order, so that the Key instances are comparable + List elements = Lists.newArrayList(); + elements.add(conf.get(HiveConf.ConfVars.METASTOREURIS.varname, "")); + if (cacheKeys == null || cacheKeys.isEmpty()) { + return Key.of(elements); + } + + Set types = Sets.newTreeSet(Comparator.comparingInt(Enum::ordinal)); + Map confElements = Maps.newTreeMap(); + for (String element : cacheKeys.split(",", -1)) { + String trimmed = element.trim(); + if (trimmed.toLowerCase(Locale.ROOT).startsWith(CONF_ELEMENT_PREFIX)) { + String key = trimmed.substring(CONF_ELEMENT_PREFIX.length()); + ValidationException.check( + !confElements.containsKey(key), "Conf key element %s already specified", key); + confElements.put(key, conf.get(key)); + } else { + KeyElementType type = KeyElementType.valueOf(trimmed.toUpperCase()); + switch (type) { + case UGI: + case USER_NAME: + ValidationException.check( + !types.contains(type), "%s key element already specified", type.name()); + types.add(type); + break; + default: + throw new ValidationException("Unknown key element %s", trimmed); + } + } + } + for (KeyElementType type : types) { + switch (type) { + case UGI: + try { + elements.add(UserGroupInformation.getCurrentUser()); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + break; + case USER_NAME: + try { + elements.add(UserGroupInformation.getCurrentUser().getUserName()); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + break; + default: + throw new RuntimeException("Unexpected key element " + type.name()); + } + } + for (String key : confElements.keySet()) { + elements.add(ConfElement.of(key, confElements.get(key))); + } + return Key.of(elements); + } + + @Value.Immutable + abstract static class Key { + + abstract List elements(); + + private static Key of(Iterable elements) { + return ImmutableKey.builder().elements(elements).build(); + } + } + + @Value.Immutable + abstract static class ConfElement { + abstract String key(); + + @Nullable + abstract String value(); + + static ConfElement of(String key, String value) { + return ImmutableConfElement.builder().key(key).value(value).build(); + } + } + + private enum KeyElementType { + UGI, + USER_NAME, + CONF + } } diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java index 469b1a2c9cab..5951473e8638 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java @@ -583,4 +583,9 @@ public Map properties() { void setListAllTables(boolean listAllTables) { this.listAllTables = listAllTables; } + + @VisibleForTesting + ClientPool clientPool() { + return clients; + } } diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestCachedClientPool.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestCachedClientPool.java index 7c1f3c4028fb..a5f624753664 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestCachedClientPool.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestCachedClientPool.java @@ -19,9 +19,14 @@ package org.apache.iceberg.hive; +import java.security.PrivilegedAction; import java.util.Collections; import java.util.concurrent.TimeUnit; -import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.iceberg.exceptions.ValidationException; +import org.apache.iceberg.hive.CachedClientPool.Key; +import org.assertj.core.api.Assertions; import org.junit.Assert; import org.junit.Test; @@ -29,15 +34,85 @@ public class TestCachedClientPool extends HiveMetastoreTest { @Test public void testClientPoolCleaner() throws InterruptedException { - String metastoreUri = hiveConf.get(HiveConf.ConfVars.METASTOREURIS.varname, ""); CachedClientPool clientPool = new CachedClientPool(hiveConf, Collections.emptyMap()); HiveClientPool clientPool1 = clientPool.clientPool(); - Assert.assertTrue(CachedClientPool.clientPoolCache().getIfPresent(metastoreUri) == clientPool1); + Assertions.assertThat( + CachedClientPool.clientPoolCache() + .getIfPresent(CachedClientPool.extractKey(null, hiveConf))) + .isSameAs(clientPool1); TimeUnit.MILLISECONDS.sleep(EVICTION_INTERVAL - TimeUnit.SECONDS.toMillis(2)); HiveClientPool clientPool2 = clientPool.clientPool(); - Assert.assertTrue(clientPool1 == clientPool2); + Assert.assertSame(clientPool1, clientPool2); TimeUnit.MILLISECONDS.sleep(EVICTION_INTERVAL + TimeUnit.SECONDS.toMillis(5)); - Assert.assertNull(CachedClientPool.clientPoolCache().getIfPresent(metastoreUri)); + Assert.assertNull( + CachedClientPool.clientPoolCache() + .getIfPresent(CachedClientPool.extractKey(null, hiveConf))); + } + + @Test + public void testCacheKey() throws Exception { + UserGroupInformation current = UserGroupInformation.getCurrentUser(); + UserGroupInformation foo1 = UserGroupInformation.createProxyUser("foo", current); + UserGroupInformation foo2 = UserGroupInformation.createProxyUser("foo", current); + UserGroupInformation bar = UserGroupInformation.createProxyUser("bar", current); + + Key key1 = + foo1.doAs( + (PrivilegedAction) + () -> CachedClientPool.extractKey("user_name,conf:key1", hiveConf)); + Key key2 = + foo2.doAs( + (PrivilegedAction) + () -> CachedClientPool.extractKey("conf:key1,user_name", hiveConf)); + Assert.assertEquals("Key elements order shouldn't matter", key1, key2); + + key1 = foo1.doAs((PrivilegedAction) () -> CachedClientPool.extractKey("ugi", hiveConf)); + key2 = bar.doAs((PrivilegedAction) () -> CachedClientPool.extractKey("ugi", hiveConf)); + Assert.assertNotEquals("Different users are not supposed to be equivalent", key1, key2); + + key2 = foo2.doAs((PrivilegedAction) () -> CachedClientPool.extractKey("ugi", hiveConf)); + Assert.assertNotEquals("Different UGI instances are not supposed to be equivalent", key1, key2); + + key1 = CachedClientPool.extractKey("ugi", hiveConf); + key2 = CachedClientPool.extractKey("ugi,conf:key1", hiveConf); + Assert.assertNotEquals( + "Keys with different number of elements are not supposed to be equivalent", key1, key2); + + Configuration conf1 = new Configuration(hiveConf); + Configuration conf2 = new Configuration(hiveConf); + + conf1.set("key1", "val"); + key1 = CachedClientPool.extractKey("conf:key1", conf1); + key2 = CachedClientPool.extractKey("conf:key1", conf2); + Assert.assertNotEquals( + "Config with different values are not supposed to be equivalent", key1, key2); + + conf2.set("key1", "val"); + conf2.set("key2", "val"); + key2 = CachedClientPool.extractKey("conf:key2", conf2); + Assert.assertNotEquals( + "Config with different keys are not supposed to be equivalent", key1, key2); + + key1 = CachedClientPool.extractKey("conf:key1,ugi", conf1); + key2 = CachedClientPool.extractKey("ugi,conf:key1", conf2); + Assert.assertEquals("Config with same key/value should be equivalent", key1, key2); + + conf1.set("key2", "val"); + key1 = CachedClientPool.extractKey("conf:key2 ,conf:key1", conf1); + key2 = CachedClientPool.extractKey("conf:key2,conf:key1", conf2); + Assert.assertEquals("Config with same key/value should be equivalent", key1, key2); + + Assertions.assertThatThrownBy( + () -> CachedClientPool.extractKey("ugi,ugi", hiveConf), + "Duplicate key elements should result in an error") + .isInstanceOf(ValidationException.class) + .hasMessageContaining("UGI key element already specified"); + + Assertions.assertThatThrownBy( + () -> CachedClientPool.extractKey("conf:k1,conf:k2,CONF:k1", hiveConf), + "Duplicate conf key elements should result in an error") + .isInstanceOf(ValidationException.class) + .hasMessageContaining("Conf key element k1 already specified"); } } diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestLoadHiveCatalog.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestLoadHiveCatalog.java new file mode 100644 index 000000000000..7311432a54fc --- /dev/null +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestLoadHiveCatalog.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.hive; + +import java.util.Collections; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TestLoadHiveCatalog { + + private static TestHiveMetastore metastore; + + @BeforeClass + public static void startMetastore() throws Exception { + HiveConf hiveConf = new HiveConf(TestLoadHiveCatalog.class); + metastore = new TestHiveMetastore(); + metastore.start(hiveConf); + } + + @AfterClass + public static void stopMetastore() throws Exception { + if (metastore != null) { + metastore.stop(); + metastore = null; + } + } + + @Test + public void testCustomCacheKeys() throws Exception { + HiveCatalog hiveCatalog1 = + (HiveCatalog) + CatalogUtil.loadCatalog( + HiveCatalog.class.getName(), + CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE, + Collections.emptyMap(), + metastore.hiveConf()); + HiveCatalog hiveCatalog2 = + (HiveCatalog) + CatalogUtil.loadCatalog( + HiveCatalog.class.getName(), + CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE, + Collections.emptyMap(), + metastore.hiveConf()); + + CachedClientPool clientPool1 = (CachedClientPool) hiveCatalog1.clientPool(); + CachedClientPool clientPool2 = (CachedClientPool) hiveCatalog2.clientPool(); + Assert.assertSame(clientPool1.clientPool(), clientPool2.clientPool()); + + Configuration conf1 = new Configuration(metastore.hiveConf()); + Configuration conf2 = new Configuration(metastore.hiveConf()); + conf1.set("any.key", "any.value"); + conf2.set("any.key", "any.value"); + hiveCatalog1 = + (HiveCatalog) + CatalogUtil.loadCatalog( + HiveCatalog.class.getName(), + CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE, + ImmutableMap.of(CatalogProperties.CLIENT_POOL_CACHE_KEYS, "conf:any.key"), + conf1); + hiveCatalog2 = + (HiveCatalog) + CatalogUtil.loadCatalog( + HiveCatalog.class.getName(), + CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE, + ImmutableMap.of(CatalogProperties.CLIENT_POOL_CACHE_KEYS, "conf:any.key"), + conf2); + clientPool1 = (CachedClientPool) hiveCatalog1.clientPool(); + clientPool2 = (CachedClientPool) hiveCatalog2.clientPool(); + Assert.assertSame(clientPool1.clientPool(), clientPool2.clientPool()); + + conf2.set("any.key", "any.value2"); + hiveCatalog2 = + (HiveCatalog) + CatalogUtil.loadCatalog( + HiveCatalog.class.getName(), + CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE, + ImmutableMap.of(CatalogProperties.CLIENT_POOL_CACHE_KEYS, "conf:any.key"), + conf2); + clientPool2 = (CachedClientPool) hiveCatalog2.clientPool(); + Assert.assertNotSame(clientPool1.clientPool(), clientPool2.clientPool()); + } +} diff --git a/iceberg/pom.xml b/iceberg/pom.xml index 1c6015ea32b8..3d6999165d93 100644 --- a/iceberg/pom.xml +++ b/iceberg/pom.xml @@ -36,6 +36,7 @@ 2.5.1 3.19.0 5.7.2 + 2.9.2 false @@ -208,6 +209,11 @@ mockito-inline ${iceberg.mockito-core.version} + + org.immutables + value + ${immutables.value.version} + @@ -301,6 +307,11 @@ error_prone_core ${google.errorprone.version} + + org.immutables + value + ${immutables.value.version} + From 5a028946238d955e4aff0c617b62d567459e854e Mon Sep 17 00:00:00 2001 From: frankliee <885626704@qq.com> Date: Wed, 26 Apr 2023 23:18:10 +0800 Subject: [PATCH 20/35] Hive: Clean up expired metastore clients (#7310) --- .../org/apache/iceberg/hive/CachedClientPool.java | 13 +++++++++++-- .../apache/iceberg/hive/TestCachedClientPool.java | 4 ++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/CachedClientPool.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/CachedClientPool.java index 2ff0495e7ac0..99b93d632a30 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/CachedClientPool.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/CachedClientPool.java @@ -21,6 +21,7 @@ import com.github.benmanes.caffeine.cache.Cache; import com.github.benmanes.caffeine.cache.Caffeine; +import com.github.benmanes.caffeine.cache.Scheduler; import java.io.IOException; import java.io.UncheckedIOException; import java.util.Comparator; @@ -42,6 +43,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.util.PropertyUtil; +import org.apache.iceberg.util.ThreadPools; import org.apache.thrift.TException; import org.immutables.value.Value; @@ -96,8 +98,15 @@ HiveClientPool clientPool() { private synchronized void init() { if (clientPoolCache == null) { - clientPoolCache = Caffeine.newBuilder().expireAfterAccess(evictionInterval, TimeUnit.MILLISECONDS) - .removalListener((invisible, value, cause) -> ((HiveClientPool) value).close()) + // Since Caffeine does not ensure that removalListener will be involved after expiration + // We use a scheduler with one thread to clean up expired clients. + clientPoolCache = + Caffeine.newBuilder() + .expireAfterAccess(evictionInterval, TimeUnit.MILLISECONDS) + .removalListener((ignored, value, cause) -> ((HiveClientPool) value).close()) + .scheduler( + Scheduler.forScheduledExecutorService( + ThreadPools.newScheduledPool("hive-metastore-cleaner", 1))) .build(); } } diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestCachedClientPool.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestCachedClientPool.java index a5f624753664..eb8103f60a95 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestCachedClientPool.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestCachedClientPool.java @@ -47,6 +47,10 @@ public void testClientPoolCleaner() throws InterruptedException { Assert.assertNull( CachedClientPool.clientPoolCache() .getIfPresent(CachedClientPool.extractKey(null, hiveConf))); + + // The client has been really closed. + Assert.assertTrue(clientPool1.isClosed()); + Assert.assertTrue(clientPool2.isClosed()); } @Test From a9d56cd2e7584594d9696e2dc238d4b7c2cfe07a Mon Sep 17 00:00:00 2001 From: Szehon Ho Date: Mon, 1 May 2023 15:28:28 -0700 Subject: [PATCH 21/35] Hive: Support connecting to multiple HMS-Catalog on same HMS URL (#7441) (cherry picked from commit 79c88a1775c4e2019fff00de7520826388158424) --- .../apache/iceberg/hive/CachedClientPool.java | 8 +-- .../org/apache/iceberg/hive/HiveCatalog.java | 3 ++ .../iceberg/hive/TestCachedClientPool.java | 50 +++++++++++++++++++ 3 files changed, 57 insertions(+), 4 deletions(-) diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/CachedClientPool.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/CachedClientPool.java index 99b93d632a30..061d071d4e50 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/CachedClientPool.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/CachedClientPool.java @@ -60,10 +60,9 @@ * UserGroupInformation#getUserName. *
  • conf - name of an arbitrary configuration. The value of the configuration will be extracted * from catalog properties and added to the cache key. A conf element should start with a - * "conf:" prefix which is followed by the configuration name. E.g. specifying - * "conf:metastore.catalog.default" will add "metastore.catalog.default" to the key, and so - * that configurations with different default catalog wouldn't share the same client pool. - * Multiple conf elements can be specified. + * "conf:" prefix which is followed by the configuration name. E.g. specifying "conf:a.b.c" + * will add "a.b.c" to the key, and so that configurations with different default catalog + * wouldn't share the same client pool. Multiple conf elements can be specified. * */ public class CachedClientPool implements ClientPool { @@ -132,6 +131,7 @@ static Key extractKey(String cacheKeys, Configuration conf) { // generate key elements in a certain order, so that the Key instances are comparable List elements = Lists.newArrayList(); elements.add(conf.get(HiveConf.ConfVars.METASTOREURIS.varname, "")); + elements.add(conf.get(HiveCatalog.HIVE_CONF_CATALOG, "hive")); if (cacheKeys == null || cacheKeys.isEmpty()) { return Key.of(elements); } diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java index 5951473e8638..4d60a2cbb933 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java @@ -70,6 +70,9 @@ public class HiveCatalog extends BaseMetastoreCatalog implements SupportsNamespa public static final String HMS_DB_OWNER = "hive.metastore.database.owner"; public static final String HMS_DB_OWNER_TYPE = "hive.metastore.database.owner-type"; + // MetastoreConf is not available with current Hive version + static final String HIVE_CONF_CATALOG = "metastore.catalog.default"; + private static final Logger LOG = LoggerFactory.getLogger(HiveCatalog.class); private String name; diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestCachedClientPool.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestCachedClientPool.java index eb8103f60a95..fd9e5f569e4f 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestCachedClientPool.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestCachedClientPool.java @@ -21,15 +21,21 @@ import java.security.PrivilegedAction; import java.util.Collections; +import java.util.Map; import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.hive.CachedClientPool.Key; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.assertj.core.api.Assertions; import org.junit.Assert; import org.junit.Test; +import static org.apache.iceberg.CatalogUtil.ICEBERG_CATALOG_TYPE; +import static org.apache.iceberg.CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE; + public class TestCachedClientPool extends HiveMetastoreTest { @Test @@ -119,4 +125,48 @@ public void testCacheKey() throws Exception { .hasMessageContaining("Conf key element k1 already specified"); } + @Test + public void testHmsCatalog() { + Map properties = + ImmutableMap.of( + String.valueOf(EVICTION_INTERVAL), + String.valueOf(Integer.MAX_VALUE), + ICEBERG_CATALOG_TYPE, + ICEBERG_CATALOG_TYPE_HIVE); + + Configuration conf1 = new Configuration(); + conf1.set(HiveCatalog.HIVE_CONF_CATALOG, "foo"); + + Configuration conf2 = new Configuration(); + conf2.set(HiveCatalog.HIVE_CONF_CATALOG, "foo"); + + Configuration conf3 = new Configuration(); + conf3.set(HiveCatalog.HIVE_CONF_CATALOG, "bar"); + + HiveCatalog catalog1 = (HiveCatalog) CatalogUtil.buildIcebergCatalog("1", properties, conf1); + HiveCatalog catalog2 = (HiveCatalog) CatalogUtil.buildIcebergCatalog("2", properties, conf2); + HiveCatalog catalog3 = (HiveCatalog) CatalogUtil.buildIcebergCatalog("3", properties, conf3); + HiveCatalog catalog4 = + (HiveCatalog) CatalogUtil.buildIcebergCatalog("4", properties, new Configuration()); + + HiveClientPool pool1 = ((CachedClientPool) catalog1.clientPool()).clientPool(); + HiveClientPool pool2 = ((CachedClientPool) catalog2.clientPool()).clientPool(); + HiveClientPool pool3 = ((CachedClientPool) catalog3.clientPool()).clientPool(); + HiveClientPool pool4 = ((CachedClientPool) catalog4.clientPool()).clientPool(); + + Assert.assertSame(pool1, pool2); + Assert.assertNotSame(pool3, pool1); + Assert.assertNotSame(pool3, pool2); + Assert.assertNotSame(pool3, pool4); + Assert.assertNotSame(pool4, pool1); + Assert.assertNotSame(pool4, pool2); + + Assert.assertEquals("foo", pool1.hiveConf().get(HiveCatalog.HIVE_CONF_CATALOG)); + Assert.assertEquals("bar", pool3.hiveConf().get(HiveCatalog.HIVE_CONF_CATALOG)); + Assert.assertNull(pool4.hiveConf().get(HiveCatalog.HIVE_CONF_CATALOG)); + + pool1.close(); + pool3.close(); + pool4.close(); + } } From 7cba254c4971c26aa4cd97786c19d5ae506e4759 Mon Sep 17 00:00:00 2001 From: Liu Xiao <42756849+liuxiaocs7@users.noreply.github.com> Date: Tue, 2 May 2023 15:22:47 +0800 Subject: [PATCH 22/35] Hive: Remove deprecated AssertHelpers (#7482) (cherry picked from commit cad9c6e795ddd5d3e7b98e1b19940daf1fdb1450) --- .../hive/HiveCreateReplaceTableTest.java | 38 ++-- .../apache/iceberg/hive/HiveTableTest.java | 105 +++++++++- .../apache/iceberg/hive/TestHiveCatalog.java | 188 +++++++----------- .../iceberg/hive/TestHiveClientPool.java | 15 +- .../iceberg/hive/TestHiveCommitLocks.java | 84 ++++---- .../apache/iceberg/hive/TestHiveCommits.java | 28 ++- .../iceberg/hive/TestHiveSchemaUtil.java | 11 +- .../mr/hive/TestHiveIcebergFilterFactory.java | 11 +- 8 files changed, 263 insertions(+), 217 deletions(-) diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveCreateReplaceTableTest.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveCreateReplaceTableTest.java index 9aef3d4b128f..4165346a673a 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveCreateReplaceTableTest.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveCreateReplaceTableTest.java @@ -21,7 +21,6 @@ import java.io.IOException; import org.apache.iceberg.AppendFiles; -import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.DataFile; import org.apache.iceberg.DataFiles; import org.apache.iceberg.PartitionSpec; @@ -35,6 +34,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.types.Types; +import org.assertj.core.api.Assertions; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -102,11 +102,9 @@ public void testCreateTableTxnTableCreatedConcurrently() { catalog.createTable(TABLE_IDENTIFIER, SCHEMA, SPEC); Assert.assertTrue("Table should be created", catalog.tableExists(TABLE_IDENTIFIER)); - AssertHelpers.assertThrows( - "Create table txn should fail", - AlreadyExistsException.class, - "Table already exists: hivedb.tbl", - txn::commitTransaction); + Assertions.assertThatThrownBy(txn::commitTransaction) + .isInstanceOf(AlreadyExistsException.class) + .hasMessage("Table already exists: hivedb.tbl"); } @Test @@ -139,11 +137,12 @@ public void testCreateTableTxnTableAlreadyExists() { catalog.createTable(TABLE_IDENTIFIER, SCHEMA, SPEC); Assert.assertTrue("Table should be created", catalog.tableExists(TABLE_IDENTIFIER)); - AssertHelpers.assertThrows( - "Should not be possible to start a new create table txn", - AlreadyExistsException.class, - "Table already exists: hivedb.tbl", - () -> catalog.newCreateTableTransaction(TABLE_IDENTIFIER, SCHEMA, SPEC, tableLocation, Maps.newHashMap())); + Assertions.assertThatThrownBy( + () -> + catalog.newCreateTableTransaction( + TABLE_IDENTIFIER, SCHEMA, SPEC, tableLocation, Maps.newHashMap())) + .isInstanceOf(AlreadyExistsException.class) + .hasMessage("Table already exists: hivedb.tbl"); } @Test @@ -165,11 +164,10 @@ public void testReplaceTableTxn() { @Test public void testReplaceTableTxnTableNotExists() { - AssertHelpers.assertThrows( - "Should not be possible to start a new replace table txn", - NoSuchTableException.class, - "Table does not exist: hivedb.tbl", - () -> catalog.newReplaceTableTransaction(TABLE_IDENTIFIER, SCHEMA, SPEC, false)); + Assertions.assertThatThrownBy( + () -> catalog.newReplaceTableTransaction(TABLE_IDENTIFIER, SCHEMA, SPEC, false)) + .isInstanceOf(NoSuchTableException.class) + .hasMessage("Table does not exist: hivedb.tbl"); } @Test @@ -185,11 +183,9 @@ public void testReplaceTableTxnTableDeletedConcurrently() { .set("prop", "value") .commit(); - AssertHelpers.assertThrows( - "Replace table txn should fail", - NoSuchTableException.class, - "No such table: hivedb.tbl", - txn::commitTransaction); + Assertions.assertThatThrownBy(txn::commitTransaction) + .isInstanceOf(NoSuchTableException.class) + .hasMessage("No such table: hivedb.tbl"); } @Test diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java index 777e7ea590aa..cabf21ac1f98 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java @@ -21,27 +21,33 @@ import java.io.File; import java.io.IOException; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hive.iceberg.org.apache.avro.generic.GenericData; import org.apache.hive.iceberg.org.apache.avro.generic.GenericRecordBuilder; -import org.apache.iceberg.AssertHelpers; +import org.apache.iceberg.BaseTable; import org.apache.iceberg.DataFile; import org.apache.iceberg.DataFiles; +import org.apache.iceberg.FileScanTask; import org.apache.iceberg.Files; import org.apache.iceberg.HasTableOperations; import org.apache.iceberg.ManifestFile; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; import org.apache.iceberg.Table; +import org.apache.iceberg.TableMetadataParser; import org.apache.iceberg.TableProperties; import org.apache.iceberg.avro.Avro; import org.apache.iceberg.avro.AvroSchemaUtil; @@ -49,13 +55,16 @@ import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.exceptions.AlreadyExistsException; import org.apache.iceberg.exceptions.CommitFailedException; +import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.exceptions.NotFoundException; import org.apache.iceberg.hadoop.ConfigProperties; +import org.apache.iceberg.hadoop.HadoopCatalog; import org.apache.iceberg.io.FileAppender; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.types.Types; import org.apache.thrift.TException; +import org.assertj.core.api.Assertions; import org.junit.Assert; import org.junit.Rule; import org.junit.Test; @@ -68,6 +77,7 @@ import static org.apache.iceberg.BaseMetastoreTableOperations.METADATA_LOCATION_PROP; import static org.apache.iceberg.BaseMetastoreTableOperations.PREVIOUS_METADATA_LOCATION_PROP; import static org.apache.iceberg.BaseMetastoreTableOperations.TABLE_TYPE_PROP; +import static org.apache.iceberg.TableMetadataParser.getFileExtension; import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; @@ -404,6 +414,84 @@ public void testRegisterTable() throws TException { Assert.assertEquals(originalTable.getSd(), newTable.getSd()); } + @Test + public void testRegisterHadoopTableToHiveCatalog() throws IOException, TException { + // create a hadoop catalog + String tableLocation = tempFolder.newFolder().toString(); + HadoopCatalog hadoopCatalog = new HadoopCatalog(new Configuration(), tableLocation); + // create table using hadoop catalog + TableIdentifier identifier = TableIdentifier.of(DB_NAME, "table1"); + Table table = + hadoopCatalog.createTable( + identifier, schema, PartitionSpec.unpartitioned(), Maps.newHashMap()); + // insert some data + String file1Location = appendData(table, "file1"); + List tasks = Lists.newArrayList(table.newScan().planFiles()); + Assert.assertEquals("Should scan 1 file", 1, tasks.size()); + Assert.assertEquals(tasks.get(0).file().path(), file1Location); + + // collect metadata file + List metadataFiles = + Arrays.stream(new File(table.location() + "/metadata").listFiles()) + .map(File::getAbsolutePath) + .filter(f -> f.endsWith(getFileExtension(TableMetadataParser.Codec.NONE))) + .collect(Collectors.toList()); + Assert.assertEquals(2, metadataFiles.size()); + + Assertions.assertThatThrownBy(() -> metastoreClient.getTable(DB_NAME, "table1")) + .isInstanceOf(NoSuchObjectException.class) + .hasMessage("hivedb.table1 table not found"); + Assertions.assertThatThrownBy(() -> catalog.loadTable(identifier)) + .isInstanceOf(NoSuchTableException.class) + .hasMessage("Table does not exist: hivedb.table1"); + + // register the table to hive catalog using the latest metadata file + String latestMetadataFile = ((BaseTable) table).operations().current().metadataFileLocation(); + catalog.registerTable(identifier, "file:" + latestMetadataFile); + Assert.assertNotNull(metastoreClient.getTable(DB_NAME, "table1")); + + // load the table in hive catalog + table = catalog.loadTable(identifier); + Assert.assertNotNull(table); + + // insert some data + String file2Location = appendData(table, "file2"); + tasks = Lists.newArrayList(table.newScan().planFiles()); + Assert.assertEquals("Should scan 2 files", 2, tasks.size()); + Set files = + tasks.stream().map(task -> task.file().path().toString()).collect(Collectors.toSet()); + Assert.assertTrue(files.contains(file1Location) && files.contains(file2Location)); + } + + private String appendData(Table table, String fileName) throws IOException { + GenericRecordBuilder recordBuilder = + new GenericRecordBuilder(AvroSchemaUtil.convert(schema, "test")); + List records = + Lists.newArrayList( + recordBuilder.set("id", 1L).build(), + recordBuilder.set("id", 2L).build(), + recordBuilder.set("id", 3L).build()); + + String fileLocation = table.location().replace("file:", "") + "/data/" + fileName + ".avro"; + try (FileAppender writer = + Avro.write(Files.localOutput(fileLocation)).schema(schema).named("test").build()) { + for (GenericData.Record rec : records) { + writer.add(rec); + } + } + + DataFile file = + DataFiles.builder(table.spec()) + .withRecordCount(3) + .withPath(fileLocation) + .withFileSizeInBytes(Files.localInput(fileLocation).getLength()) + .build(); + + table.newAppend().appendFile(file).commit(); + + return fileLocation; + } + @Test public void testRegisterExistingTable() throws TException { org.apache.hadoop.hive.metastore.api.Table originalTable = metastoreClient.getTable(DB_NAME, TABLE_NAME); @@ -417,10 +505,10 @@ public void testRegisterExistingTable() throws TException { Assert.assertEquals(1, metadataVersionFiles.size()); // Try to register an existing table - AssertHelpers.assertThrows( - "Should complain that the table already exists", AlreadyExistsException.class, - "Table already exists", - () -> catalog.registerTable(TABLE_IDENTIFIER, "file:" + metadataVersionFiles.get(0))); + Assertions.assertThatThrownBy( + () -> catalog.registerTable(TABLE_IDENTIFIER, "file:" + metadataVersionFiles.get(0))) + .isInstanceOf(AlreadyExistsException.class) + .hasMessage("Table already exists: hivedb.tbl"); } @Test @@ -496,10 +584,9 @@ public void testMissingMetadataWontCauseHang() { File fakeLocation = new File(metadataLocation(TABLE_NAME) + "_dummy"); Assert.assertTrue(realLocation.renameTo(fakeLocation)); - AssertHelpers.assertThrows( - "HiveTableOperations shouldn't hang indefinitely when a missing metadata file is encountered", - NotFoundException.class, - () -> catalog.loadTable(TABLE_IDENTIFIER)); + Assertions.assertThatThrownBy(() -> catalog.loadTable(TABLE_IDENTIFIER)) + .isInstanceOf(NotFoundException.class) + .hasMessageStartingWith("Failed to open input stream for file"); Assert.assertTrue(fakeLocation.renameTo(realLocation)); } diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java index 6bceeded4791..6c60c71d069e 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java @@ -24,14 +24,11 @@ import java.util.Map; import java.util.Set; import java.util.UUID; -import java.util.stream.Collectors; -import java.util.stream.Stream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.PrincipalType; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.CachingCatalog; import org.apache.iceberg.CatalogProperties; import org.apache.iceberg.CatalogUtil; @@ -82,6 +79,7 @@ import static org.apache.iceberg.TableProperties.DEFAULT_SORT_ORDER; import static org.apache.iceberg.expressions.Expressions.bucket; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -327,10 +325,9 @@ public void testCreateNamespace() throws Exception { Assert.assertEquals("There no same location for db and namespace", database1.getLocationUri(), defaultUri(namespace1)); - AssertHelpers.assertThrows("Should fail to create when namespace already exist " + namespace1, - AlreadyExistsException.class, "Namespace '" + namespace1 + "' already exists!", () -> { - catalog.createNamespace(namespace1); - }); + assertThatThrownBy(() -> catalog.createNamespace(namespace1)) + .isInstanceOf(AlreadyExistsException.class) + .hasMessage("Namespace '" + namespace1 + "' already exists!"); String hiveLocalDir = temp.newFolder().toURI().toString(); // remove the trailing slash of the URI hiveLocalDir = hiveLocalDir.substring(0, hiveLocalDir.length() - 1); @@ -388,39 +385,30 @@ public void testCreateNamespaceWithOwnership() throws Exception { "iceberg", PrincipalType.GROUP); - AssertHelpers.assertThrows( - String.format( - "Create namespace setting %s without setting %s is not allowed", - HiveCatalog.HMS_DB_OWNER_TYPE, HiveCatalog.HMS_DB_OWNER), - IllegalArgumentException.class, - () -> { - try { + assertThatThrownBy( + () -> createNamespaceAndVerifyOwnership( "create_with_owner_type_alone", ImmutableMap.of(HiveCatalog.HMS_DB_OWNER_TYPE, PrincipalType.USER.name()), "no_post_create_expectation_due_to_exception_thrown", - null); - } catch (TException e) { - throw new RuntimeException("Unexpected Exception", e); - } - }); - - AssertHelpers.assertThrows( - "No enum constant " + PrincipalType.class.getCanonicalName(), - IllegalArgumentException.class, - () -> { - try { + null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + String.format( + "Create namespace setting %s without setting %s is not allowed", + HiveCatalog.HMS_DB_OWNER_TYPE, HiveCatalog.HMS_DB_OWNER)); + + assertThatThrownBy( + () -> createNamespaceAndVerifyOwnership( "create_with_invalid_owner_type", ImmutableMap.of( HiveCatalog.HMS_DB_OWNER, "iceberg", HiveCatalog.HMS_DB_OWNER_TYPE, "invalidOwnerType"), "no_post_create_expectation_due_to_exception_thrown", - null); - } catch (TException e) { - throw new RuntimeException("Unexpected Exception", e); - } - }); + null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageStartingWith("No enum constant " + PrincipalType.class.getCanonicalName()); } private void createNamespaceAndVerifyOwnership( @@ -492,10 +480,10 @@ public void testSetNamespaceProperties() throws TException { Assert.assertEquals(database.getParameters().get("owner"), "alter_apache"); Assert.assertEquals(database.getParameters().get("test"), "test"); Assert.assertEquals(database.getParameters().get("group"), "iceberg"); - AssertHelpers.assertThrows("Should fail to namespace not exist" + namespace, - NoSuchNamespaceException.class, "Namespace does not exist: ", () -> { - catalog.setProperties(Namespace.of("db2", "db2", "ns2"), meta); - }); + assertThatThrownBy( + () -> catalog.setProperties(Namespace.of("db2", "db2", "ns2"), ImmutableMap.of())) + .isInstanceOf(NoSuchNamespaceException.class) + .hasMessage("Namespace does not exist: db2.db2.ns2"); } @Test @@ -556,13 +544,8 @@ public void testSetNamespaceOwnership() throws TException { "some_individual_owner", PrincipalType.USER); - AssertHelpers.assertThrows( - String.format( - "Setting %s and %s has to be performed together or not at all", - HiveCatalog.HMS_DB_OWNER_TYPE, HiveCatalog.HMS_DB_OWNER), - IllegalArgumentException.class, - () -> { - try { + assertThatThrownBy( + () -> setNamespaceOwnershipAndVerify( "set_owner_without_setting_owner_type", ImmutableMap.of(), @@ -570,19 +553,15 @@ public void testSetNamespaceOwnership() throws TException { System.getProperty("user.name"), PrincipalType.USER, "no_post_setting_expectation_due_to_exception_thrown", - null); - } catch (TException e) { - throw new RuntimeException("Unexpected Exception", e); - } - }); - - AssertHelpers.assertThrows( - String.format( - "Setting %s and %s has to be performed together or not at all", - HiveCatalog.HMS_DB_OWNER_TYPE, HiveCatalog.HMS_DB_OWNER), - IllegalArgumentException.class, - () -> { - try { + null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + String.format( + "Setting %s and %s has to be performed together or not at all", + HiveCatalog.HMS_DB_OWNER_TYPE, HiveCatalog.HMS_DB_OWNER)); + + assertThatThrownBy( + () -> setNamespaceOwnershipAndVerify( "set_owner_type_without_setting_owner", ImmutableMap.of(HiveCatalog.HMS_DB_OWNER, "some_owner"), @@ -590,21 +569,15 @@ public void testSetNamespaceOwnership() throws TException { "some_owner", PrincipalType.USER, "no_post_setting_expectation_due_to_exception_thrown", - null); - } catch (TException e) { - throw new RuntimeException("Unexpected Exception", e); - } - }); - - AssertHelpers.assertThrows( - HiveCatalog.HMS_DB_OWNER_TYPE + - " has an invalid value of: " + - meta.get(HiveCatalog.HMS_DB_OWNER_TYPE) + - ". Acceptable values are: " + - Stream.of(PrincipalType.values()).map(Enum::name).collect(Collectors.joining(", ")), - IllegalArgumentException.class, - () -> { - try { + null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + String.format( + "Setting %s and %s has to be performed together or not at all", + HiveCatalog.HMS_DB_OWNER_TYPE, HiveCatalog.HMS_DB_OWNER)); + + assertThatThrownBy( + () -> setNamespaceOwnershipAndVerify( "set_invalid_owner_type", ImmutableMap.of(), @@ -614,11 +587,10 @@ public void testSetNamespaceOwnership() throws TException { System.getProperty("user.name"), PrincipalType.USER, "no_post_setting_expectation_due_to_exception_thrown", - null); - } catch (TException e) { - throw new RuntimeException("Unexpected Exception", e); - } - }); + null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "No enum constant org.apache.hadoop.hive.metastore.api.PrincipalType.invalidOwnerType"); } @Test @@ -707,10 +679,13 @@ public void testRemoveNamespaceProperties() throws TException { Assert.assertEquals(database.getParameters().get("owner"), null); Assert.assertEquals(database.getParameters().get("group"), "iceberg"); - AssertHelpers.assertThrows("Should fail to namespace not exist" + namespace, - NoSuchNamespaceException.class, "Namespace does not exist: ", () -> { - catalog.removeProperties(Namespace.of("db2", "db2", "ns2"), ImmutableSet.of("comment", "owner")); - }); + + assertThatThrownBy( + () -> + catalog.removeProperties( + Namespace.of("db2", "db2", "ns2"), ImmutableSet.of("comment", "owner"))) + .isInstanceOf(NoSuchNamespaceException.class) + .hasMessage("Namespace does not exist: db2.db2.ns2"); } @Test @@ -777,13 +752,8 @@ public void testRemoveNamespaceOwnership() throws TException, IOException { "some_group_owner", PrincipalType.GROUP); - AssertHelpers.assertThrows( - String.format( - "Removing %s and %s has to be performed together or not at all", - HiveCatalog.HMS_DB_OWNER_TYPE, HiveCatalog.HMS_DB_OWNER), - IllegalArgumentException.class, - () -> { - try { + assertThatThrownBy( + () -> removeNamespaceOwnershipAndVerify( "remove_owner_without_removing_owner_type", ImmutableMap.of( @@ -795,19 +765,15 @@ public void testRemoveNamespaceOwnership() throws TException, IOException { "some_individual_owner", PrincipalType.USER, "no_post_remove_expectation_due_to_exception_thrown", - null); - } catch (TException e) { - throw new RuntimeException("Unexpected Exception", e); - } - }); - - AssertHelpers.assertThrows( - String.format( - "Removing %s and %s has to be performed together or not at all", - HiveCatalog.HMS_DB_OWNER_TYPE, HiveCatalog.HMS_DB_OWNER), - IllegalArgumentException.class, - () -> { - try { + null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + String.format( + "Removing %s and %s has to be performed together or not at all", + HiveCatalog.HMS_DB_OWNER_TYPE, HiveCatalog.HMS_DB_OWNER)); + + assertThatThrownBy( + () -> removeNamespaceOwnershipAndVerify( "remove_owner_type_without_removing_owner", ImmutableMap.of( @@ -819,11 +785,12 @@ public void testRemoveNamespaceOwnership() throws TException, IOException { "some_group_owner", PrincipalType.GROUP, "no_post_remove_expectation_due_to_exception_thrown", - null); - } catch (TException e) { - throw new RuntimeException("Unexpected Exception", e); - } - }); + null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + String.format( + "Removing %s and %s has to be performed together or not at all", + HiveCatalog.HMS_DB_OWNER_TYPE, HiveCatalog.HMS_DB_OWNER)); } private void removeNamespaceOwnershipAndVerify( @@ -858,20 +825,17 @@ public void testDropNamespace() throws TException { Assert.assertTrue(nameMata.get("owner").equals("apache")); Assert.assertTrue(nameMata.get("group").equals("iceberg")); - AssertHelpers.assertThrows("Should fail to drop namespace is not empty" + namespace, - NamespaceNotEmptyException.class, - "Namespace dbname_drop is not empty. One or more tables exist.", () -> { - catalog.dropNamespace(namespace); - }); + assertThatThrownBy(() -> catalog.dropNamespace(namespace)) + .isInstanceOf(NamespaceNotEmptyException.class) + .hasMessage("Namespace dbname_drop is not empty. One or more tables exist."); Assert.assertTrue(catalog.dropTable(identifier, true)); Assert.assertTrue("Should fail to drop namespace if it is not empty", catalog.dropNamespace(namespace)); Assert.assertFalse("Should fail to drop when namespace doesn't exist", catalog.dropNamespace(Namespace.of("db.ns1"))); - AssertHelpers.assertThrows("Should fail to drop namespace exist" + namespace, - NoSuchNamespaceException.class, "Namespace does not exist: ", () -> { - catalog.loadNamespaceMetadata(namespace); - }); + assertThatThrownBy(() -> catalog.loadNamespaceMetadata(namespace)) + .isInstanceOf(NoSuchNamespaceException.class) + .hasMessage("Namespace does not exist: dbname_drop"); } @Test diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveClientPool.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveClientPool.java index 36996e33e3c6..208e95e0e432 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveClientPool.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveClientPool.java @@ -32,9 +32,9 @@ import org.apache.hadoop.hive.metastore.api.GetAllFunctionsResponse; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.PrincipalType; -import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.thrift.transport.TTransportException; +import org.assertj.core.api.Assertions; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -97,8 +97,9 @@ private HiveConf createHiveConf() { @Test public void testNewClientFailure() { Mockito.doThrow(new RuntimeException("Connection exception")).when(clients).newClient(); - AssertHelpers.assertThrows("Should throw exception", RuntimeException.class, - "Connection exception", () -> clients.run(Object::toString)); + Assertions.assertThatThrownBy(() -> clients.run(Object::toString)) + .isInstanceOf(RuntimeException.class) + .hasMessage("Connection exception"); } @Test @@ -106,9 +107,11 @@ public void testGetTablesFailsForNonReconnectableException() throws Exception { HiveMetaStoreClient hmsClient = Mockito.mock(HiveMetaStoreClient.class); Mockito.doReturn(hmsClient).when(clients).newClient(); Mockito.doThrow(new MetaException("Another meta exception")) - .when(hmsClient).getTables(Mockito.anyString(), Mockito.anyString()); - AssertHelpers.assertThrows("Should throw exception", MetaException.class, - "Another meta exception", () -> clients.run(client -> client.getTables("default", "t"))); + .when(hmsClient) + .getTables(Mockito.anyString(), Mockito.anyString()); + Assertions.assertThatThrownBy(() -> clients.run(client -> client.getTables("default", "t"))) + .isInstanceOf(MetaException.class) + .hasMessage("Another meta exception"); } @Test diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java index 52e70c84dc05..f4f9a861034b 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java @@ -36,7 +36,6 @@ import org.apache.hadoop.hive.metastore.api.LockState; import org.apache.hadoop.hive.metastore.api.ShowLocksResponse; import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement; -import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.HasTableOperations; import org.apache.iceberg.Table; import org.apache.iceberg.TableMetadata; @@ -46,6 +45,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Types; import org.apache.thrift.TException; +import org.assertj.core.api.Assertions; import org.junit.AfterClass; import org.junit.Assert; import org.junit.Before; @@ -283,11 +283,11 @@ public void testUnLockAfterInterruptedLock() throws TException { doNothing().when(spyClient).unlock(eq(dummyLockId)); doNothing().when(spyClient).heartbeat(eq(0L), eq(dummyLockId)); - AssertHelpers.assertThrows( - "Expected an exception", - RuntimeException.class, - "Interrupted while creating lock", - () -> spyOps.doCommit(metadataV2, metadataV1)); + Assertions.assertThatThrownBy(() -> spyOps.doCommit(metadataV2, metadataV1)) + .isInstanceOf(RuntimeException.class) + .hasMessage( + "org.apache.iceberg.hive.LockException: " + + "Interrupted while creating lock on table hivedb.tbl"); verify(spyClient, times(1)).unlock(eq(dummyLockId)); // Make sure that we exit the lock loop on InterruptedException @@ -307,11 +307,11 @@ public void testUnLockAfterInterruptedLockCheck() throws TException { doNothing().when(spyClient).unlock(eq(dummyLockId)); doNothing().when(spyClient).heartbeat(eq(0L), eq(dummyLockId)); - AssertHelpers.assertThrows( - "Expected an exception", - RuntimeException.class, - "Could not acquire the lock on", - () -> spyOps.doCommit(metadataV2, metadataV1)); + Assertions.assertThatThrownBy(() -> spyOps.doCommit(metadataV2, metadataV1)) + .isInstanceOf(RuntimeException.class) + .hasMessage( + "org.apache.iceberg.hive.LockException: " + + "Could not acquire the lock on hivedb.tbl, lock request ended in state WAITING"); verify(spyClient, times(1)).unlock(eq(dummyLockId)); // Make sure that we exit the checkLock loop on InterruptedException @@ -331,11 +331,9 @@ public void testUnLockAfterInterruptedGetTable() throws TException { doNothing().when(spyClient).unlock(eq(dummyLockId)); doNothing().when(spyClient).heartbeat(eq(0L), eq(dummyLockId)); - AssertHelpers.assertThrows( - "Expected an exception", - RuntimeException.class, - "Interrupted during commit", - () -> spyOps.doCommit(metadataV2, metadataV1)); + Assertions.assertThatThrownBy(() -> spyOps.doCommit(metadataV2, metadataV1)) + .isInstanceOf(RuntimeException.class) + .hasMessage("Interrupted during commit"); verify(spyClient, times(1)).unlock(eq(dummyLockId)); } @@ -368,10 +366,11 @@ public long getLockid() { public void testLockFailureAtFirstTime() throws TException { doReturn(notAcquiredLockResponse).when(spyClient).lock(any()); - AssertHelpers.assertThrows("Expected an exception", - CommitFailedException.class, - "Could not acquire the lock on", - () -> spyOps.doCommit(metadataV2, metadataV1)); + Assertions.assertThatThrownBy(() -> spyOps.doCommit(metadataV2, metadataV1)) + .isInstanceOf(CommitFailedException.class) + .hasMessage( + "org.apache.iceberg.hive.LockException: " + + "Could not acquire the lock on hivedb.tbl, lock request ended in state NOT_ACQUIRED"); } @Test @@ -385,10 +384,11 @@ public void testLockFailureAfterRetries() throws TException { .when(spyClient) .checkLock(eq(dummyLockId)); - AssertHelpers.assertThrows("Expected an exception", - CommitFailedException.class, - "Could not acquire the lock on", - () -> spyOps.doCommit(metadataV2, metadataV1)); + Assertions.assertThatThrownBy(() -> spyOps.doCommit(metadataV2, metadataV1)) + .isInstanceOf(CommitFailedException.class) + .hasMessage( + "org.apache.iceberg.hive.LockException: " + + "Could not acquire the lock on hivedb.tbl, lock request ended in state NOT_ACQUIRED"); } @Test @@ -396,10 +396,11 @@ public void testLockTimeoutAfterRetries() throws TException { doReturn(waitLockResponse).when(spyClient).lock(any()); doReturn(waitLockResponse).when(spyClient).checkLock(eq(dummyLockId)); - AssertHelpers.assertThrows("Expected an exception", - CommitFailedException.class, - "Timed out after", - () -> spyOps.doCommit(metadataV2, metadataV1)); + Assertions.assertThatThrownBy(() -> spyOps.doCommit(metadataV2, metadataV1)) + .isInstanceOf(CommitFailedException.class) + .hasMessageStartingWith("org.apache.iceberg.hive.LockException") + .hasMessageContaining("Timed out after") + .hasMessageEndingWith("waiting for lock on hivedb.tbl"); } @Test @@ -408,10 +409,10 @@ public void testPassThroughThriftExceptions() throws TException { doReturn(waitLockResponse).doThrow(new TException("Test Thrift Exception")) .when(spyClient).checkLock(eq(dummyLockId)); - AssertHelpers.assertThrows("Expected an exception", - RuntimeException.class, - "Metastore operation failed for", - () -> spyOps.doCommit(metadataV2, metadataV1)); + Assertions.assertThatThrownBy(() -> spyOps.doCommit(metadataV2, metadataV1)) + .isInstanceOf(RuntimeException.class) + .hasMessage( + "org.apache.iceberg.hive.LockException: Metastore operation failed for hivedb.tbl"); } @Test @@ -423,10 +424,11 @@ public void testPassThroughInterruptions() throws TException { return waitLockResponse; }).when(spyClient).checkLock(eq(dummyLockId)); - AssertHelpers.assertThrows("Expected an exception", - CommitFailedException.class, - "Could not acquire the lock on", - () -> spyOps.doCommit(metadataV2, metadataV1)); + Assertions.assertThatThrownBy(() -> spyOps.doCommit(metadataV2, metadataV1)) + .isInstanceOf(CommitFailedException.class) + .hasMessage( + "org.apache.iceberg.hive.LockException: " + + "Could not acquire the lock on hivedb.tbl, lock request ended in state WAITING"); } @Test @@ -479,11 +481,11 @@ public void testLockHeartbeatFailureDuringCommit() throws TException, Interrupte .when(spyClient) .heartbeat(eq(0L), eq(dummyLockId)); - AssertHelpers.assertThrows( - "Expected commit failure due to failure in heartbeat.", - CommitFailedException.class, - "Failed to heartbeat for hive lock. Failed to heart beat.", - () -> spyOps.doCommit(metadataV2, metadataV1)); + Assertions.assertThatThrownBy(() -> spyOps.doCommit(metadataV2, metadataV1)) + .isInstanceOf(CommitFailedException.class) + .hasMessage( + "org.apache.iceberg.hive.LockException: " + + "Failed to heartbeat for hive lock. Failed to heart beat."); } @Test diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java index d4afb3b320af..60d4fa2f1bdf 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java @@ -21,7 +21,6 @@ import java.io.File; import java.util.concurrent.atomic.AtomicReference; -import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.HasTableOperations; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Table; @@ -33,6 +32,7 @@ import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.types.Types; import org.apache.thrift.TException; +import org.assertj.core.api.Assertions; import org.junit.Assert; import org.junit.Test; @@ -114,9 +114,9 @@ public void testThriftExceptionUnknownStateIfNotInHistoryFailureOnCommit() throw failCommitAndThrowException(spyOps); - AssertHelpers.assertThrows("We should assume commit state is unknown if the " + - "new location is not found in history in commit state check", CommitStateUnknownException.class, - "Datacenter on fire", () -> spyOps.commit(metadataV2, metadataV1)); + Assertions.assertThatThrownBy(() -> spyOps.commit(metadataV2, metadataV1)) + .isInstanceOf(CommitStateUnknownException.class) + .hasMessageStartingWith("Datacenter on fire"); ops.refresh(); Assert.assertEquals("Current metadata should not have changed", metadataV2, ops.current()); @@ -186,9 +186,9 @@ public void testThriftExceptionUnknownFailedCommit() throws TException, Interrup failCommitAndThrowException(spyOps); breakFallbackCatalogCommitCheck(spyOps); - AssertHelpers.assertThrows("Should throw CommitStateUnknownException since the catalog check was blocked", - CommitStateUnknownException.class, "Datacenter on fire", - () -> spyOps.commit(metadataV2, metadataV1)); + Assertions.assertThatThrownBy(() -> spyOps.commit(metadataV2, metadataV1)) + .isInstanceOf(CommitStateUnknownException.class) + .hasMessageStartingWith("Datacenter on fire"); ops.refresh(); @@ -224,9 +224,9 @@ public void testThriftExceptionsUnknownSuccessCommit() throws TException, Interr commitAndThrowException(ops, spyOps); breakFallbackCatalogCommitCheck(spyOps); - AssertHelpers.assertThrows("Should throw CommitStateUnknownException since the catalog check was blocked", - CommitStateUnknownException.class, "Datacenter on fire", - () -> spyOps.commit(metadataV2, metadataV1)); + Assertions.assertThatThrownBy(() -> spyOps.commit(metadataV2, metadataV1)) + .isInstanceOf(CommitStateUnknownException.class) + .hasMessageStartingWith("Datacenter on fire"); ops.refresh(); @@ -340,11 +340,9 @@ public void testNoLockThriftExceptionConcurrentCommit() throws TException, Inter .persistTable(any(), anyBoolean(), any()); // Should throw a CommitFailedException so the commit could be retried - AssertHelpers.assertThrows( - "Should throw CommitFailedException since the table has been modified concurrently", - CommitFailedException.class, - "has been modified concurrently", - () -> spyOps.commit(metadataV2, metadataV1)); + Assertions.assertThatThrownBy(() -> spyOps.commit(metadataV2, metadataV1)) + .isInstanceOf(CommitFailedException.class) + .hasMessage("The table hivedb.tbl has been modified concurrently"); ops.refresh(); Assert.assertEquals("Current metadata should not have changed", metadataV2, ops.current()); diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveSchemaUtil.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveSchemaUtil.java index a23e1f30ac12..8c0ef6b5adbb 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveSchemaUtil.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveSchemaUtil.java @@ -26,12 +26,12 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.Schema; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; +import org.assertj.core.api.Assertions; import org.junit.Assert; import org.junit.Test; @@ -118,11 +118,10 @@ public void testSchemaConvertToIcebergSchemaForEveryPrimitiveType() { @Test public void testNotSupportedTypes() { for (FieldSchema notSupportedField : getNotSupportedFieldSchemas()) { - AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, - "Unsupported Hive type", () -> { - HiveSchemaUtil.convert(Lists.newArrayList(Arrays.asList(notSupportedField))); - } - ); + Assertions.assertThatThrownBy( + () -> HiveSchemaUtil.convert(Lists.newArrayList(Arrays.asList(notSupportedField)))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageStartingWith("Unsupported Hive type"); } } diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergFilterFactory.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergFilterFactory.java index af1a30405b66..1614d937c37f 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergFilterFactory.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergFilterFactory.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.expressions.And; import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.expressions.Literal; @@ -40,6 +39,7 @@ import org.apache.iceberg.expressions.UnboundPredicate; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.DateTimeUtil; +import org.assertj.core.api.Assertions; import org.junit.Test; import static org.junit.Assert.assertEquals; @@ -148,12 +148,9 @@ public void testUnsupportedBetweenOperandEmptyLeaves() { .between("salary", PredicateLeaf.Type.LONG, 9000L, 15000L) .end() .build()); - - AssertHelpers.assertThrows( - "must throw if leaves are empty in between operator", - UnsupportedOperationException.class, - "Missing leaf literals", - () -> HiveIcebergFilterFactory.generateFilterExpression(arg)); + Assertions.assertThatThrownBy(() -> HiveIcebergFilterFactory.generateFilterExpression(arg)) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessage("Missing leaf literals: Leaf[empty]"); } @Test From a32dcb648cd7a5a3689eee6ca30362b79c9e2210 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 May 2023 12:16:19 -0500 Subject: [PATCH 23/35] Build: Bump com.esotericsoftware:kryo-shaded from 4.0.2 to 4.0.3 (#7669) Bumps com.esotericsoftware:kryo-shaded from 4.0.2 to 4.0.3. --- updated-dependencies: - dependency-name: com.esotericsoftware:kryo-shaded dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit be6235cf753aa2518404177b1b6f884bff60bf04) --- iceberg/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iceberg/pom.xml b/iceberg/pom.xml index 3d6999165d93..3a4658b9d349 100644 --- a/iceberg/pom.xml +++ b/iceberg/pom.xml @@ -26,7 +26,7 @@ .. . 1.3.0 - 4.0.2 + 4.0.3 3.4.4 1.11.1 5.2.0 From f3ee8f3d231b7affc717e57e15d9c9acd450faae Mon Sep 17 00:00:00 2001 From: Zsolt Miskolczi Date: Tue, 13 Jun 2023 16:09:04 +0200 Subject: [PATCH 24/35] "Fix failing tests - part I" --- .../apache/iceberg/hive/HiveTableTest.java | 2 +- .../org/apache/iceberg/mr/TestCatalogs.java | 6 +++--- .../TestHiveIcebergStorageHandlerNoScan.java | 21 +++++++++---------- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java index cabf21ac1f98..e23d03effed1 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java @@ -440,7 +440,7 @@ public void testRegisterHadoopTableToHiveCatalog() throws IOException, TExceptio Assertions.assertThatThrownBy(() -> metastoreClient.getTable(DB_NAME, "table1")) .isInstanceOf(NoSuchObjectException.class) - .hasMessage("hivedb.table1 table not found"); + .hasMessage("hive.hivedb.table1 table not found"); Assertions.assertThatThrownBy(() -> catalog.loadTable(identifier)) .isInstanceOf(NoSuchTableException.class) .hasMessage("Table does not exist: hivedb.table1"); diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestCatalogs.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestCatalogs.java index dbbd2a96fa85..89b957d4967e 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestCatalogs.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestCatalogs.java @@ -68,7 +68,7 @@ public void testLoadTableFromLocation() throws IOException { Assertions.assertThatThrownBy(() -> Catalogs.loadTable(conf)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Table location not set"); + .hasMessageContaining("Table location not set"); HadoopTables tables = new HadoopTables(); Table hadoopTable = tables.create(SCHEMA, temp.newFolder("hadoop_tables").toString()); @@ -244,7 +244,7 @@ public void testLegacyLoadCatalogUnknown() { Assertions.assertThatThrownBy(() -> Catalogs.loadCatalog(conf, null)) .isInstanceOf(UnsupportedOperationException.class) - .hasMessage("Unknown catalog type"); + .hasMessageContaining("Unknown catalog type"); } @Test @@ -281,7 +281,7 @@ public void testLegacyLoadCustomCatalogWithHiveCatalogTypeSet() { Assertions.assertThatThrownBy(() -> Catalogs.loadCatalog(conf, catalogName)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("both type and catalog-impl are set"); + .hasMessageContaining("both type and catalog-impl are set"); } @Test diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java index 3d317ff70295..719ccafdfad7 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java @@ -759,7 +759,7 @@ public void testCreatePartitionedTableWithPropertiesAndWithColumnSpecification() .isInstanceOf(IllegalArgumentException.class) .hasMessageStartingWith("Failed to execute Hive query") .hasMessageEndingWith( - "Provide only one of the following: Hive partition specification, " + + "Provide only one of the following: Hive partition transform specification, " + "or the iceberg.mr.table.partition.spec property"); } @@ -1374,7 +1374,7 @@ public void testAlterTableReplaceColumnsFailsWhenNotOnlyDropping() { for (String command : commands) { Assertions.assertThatThrownBy(() -> shell.executeStatement(command)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Unsupported operation to use REPLACE COLUMNS"); + .hasMessageContaining("Unsupported operation to use REPLACE COLUMNS"); } // check no-op case too @@ -1382,7 +1382,7 @@ public void testAlterTableReplaceColumnsFailsWhenNotOnlyDropping() { " is first name', last_name string COMMENT 'This is last name', address struct)"; Assertions.assertThatThrownBy(() -> shell.executeStatement(command)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("No schema change detected"); + .hasMessageContaining("No schema change detected"); } @Test @@ -1477,7 +1477,7 @@ public void testCommandsWithPartitionClauseThrow() { for (String command : commands) { Assertions.assertThatThrownBy(() -> shell.executeStatement(command)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Using partition spec in query is unsupported"); + .hasMessageContaining("Using partition spec in query is unsupported"); } } @@ -1632,7 +1632,7 @@ public void testAlterTableWithMetadataLocationFromAnotherTable() throws IOExcept metadataLocation + "')"); }) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Cannot change iceberg table"); + .hasMessageContaining("Cannot change iceberg table"); } @Test @@ -1642,12 +1642,11 @@ public void testAlterTableToIcebergAndMetadataLocation() throws IOException { testTables.locationForCreateTableSQL(TableIdentifier.of("default", tableName)) + testTables.propertiesForCreateTableSQL(ImmutableMap.of()); shell.executeStatement(createQuery); - Assertions.assertThatThrownBy(() -> { - shell.executeStatement("ALTER TABLE " + tableName + " SET TBLPROPERTIES(" + - "'storage_handler'='org.apache.iceberg.mr.hive.HiveIcebergStorageHandler','metadata_location'='asdf')"); - }) + Assertions.assertThatThrownBy(() -> shell.executeStatement("ALTER TABLE " + tableName + " SET TBLPROPERTIES(" + + "'storage_handler'='org.apache.iceberg.mr.hive.HiveIcebergStorageHandler','metadata_location'='asdf')")) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Cannot perform table migration to Iceberg and setting the snapshot location in one step."); + .hasMessageContaining("Cannot perform table migration to Iceberg " + + "and setting the snapshot location in one step."); } @Test @@ -1694,7 +1693,7 @@ public void testCTLTHiveCatalogValidation() throws TException, InterruptedExcept testTables.propertiesForCreateTableSQL(ImmutableMap.of()))); }) .isInstanceOf(IllegalArgumentException.class) - .hasMessage(" CTLT target table must be a HiveCatalog table"); + .hasMessageContaining(" CTLT target table must be a HiveCatalog table"); } @Test From 1d7e5fd40cd6eee9455653185a8cca9ed68b1111 Mon Sep 17 00:00:00 2001 From: Daniel Weeks Date: Sun, 28 Aug 2022 15:21:17 -0700 Subject: [PATCH 25/35] MR: Remove deprecated properties for 1.0 release (#5657) --- .../java/org/apache/iceberg/mr/Catalogs.java | 84 ++++++------------- .../apache/iceberg/mr/InputFormatConfig.java | 23 ----- .../org/apache/iceberg/mr/TestCatalogs.java | 82 +----------------- .../iceberg/mr/TestIcebergInputFormats.java | 2 +- .../mr/TestInputFormatReaderDeletes.java | 3 +- .../hive/vector/TestHiveVectorizedReader.java | 3 +- 6 files changed, 31 insertions(+), 166 deletions(-) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java index d3bc13baedc3..e6555f9664f0 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java @@ -23,6 +23,7 @@ import java.util.Optional; import java.util.Properties; import java.util.Set; +import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.iceberg.CatalogProperties; import org.apache.iceberg.CatalogUtil; @@ -37,30 +38,28 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.relocated.com.google.common.collect.Streams; /** * Class for catalog resolution and accessing the common functions for {@link Catalog} API. - *

    - * If the catalog name is provided, get the catalog type from iceberg.catalog.catalogName.type config. - *

    - * In case the catalog name is {@link #ICEBERG_HADOOP_TABLE_NAME location_based_table}, - * type is ignored and tables will be loaded using {@link HadoopTables}. - *

    - * In case the value of catalog type is null, iceberg.catalog.catalogName.catalog-impl config - * is used to determine the catalog implementation class. - *

    - * If catalog name is null, get the catalog type from {@link InputFormatConfig#CATALOG iceberg.mr.catalog} config: + * + *

    If the catalog name is provided, get the catalog type from iceberg.catalog.catalogName + * .type config. + * + *

    In case the catalog name is {@link #ICEBERG_HADOOP_TABLE_NAME location_based_table}, type is + * ignored and tables will be loaded using {@link HadoopTables}. + * + *

    In case the value of catalog type is null, iceberg.catalog.catalogName + * .catalog-impl config is used to determine the catalog implementation class. + * + *

    If catalog name is null, get the catalog type from {@link CatalogUtil#ICEBERG_CATALOG_TYPE + * catalog type} config: + * *

      *
    • hive: HiveCatalog
    • *
    • location: HadoopTables
    • *
    • hadoop: HadoopCatalog
    • *
    - *

    - * In case the value of catalog type is null, - * {@link InputFormatConfig#CATALOG_LOADER_CLASS iceberg.mr.catalog.loader.class} is used to determine - * the catalog implementation class. - *

    - * Note: null catalog name mode is only supported for backwards compatibility. Using this mode is NOT RECOMMENDED. */ public final class Catalogs { @@ -254,48 +253,15 @@ static Optional loadCatalog(Configuration conf, String catalogName) { * @param catalogType type of the catalog * @return complete map of catalog properties */ - private static Map getCatalogProperties(Configuration conf, String catalogName, String catalogType) { - Map catalogProperties = Maps.newHashMap(); - conf.forEach(config -> { - if (config.getKey().startsWith(InputFormatConfig.CATALOG_DEFAULT_CONFIG_PREFIX)) { - catalogProperties.putIfAbsent( - config.getKey().substring(InputFormatConfig.CATALOG_DEFAULT_CONFIG_PREFIX.length()), - config.getValue()); - } else if (config.getKey().startsWith(InputFormatConfig.CATALOG_CONFIG_PREFIX + catalogName)) { - catalogProperties.put( - config.getKey().substring((InputFormatConfig.CATALOG_CONFIG_PREFIX + catalogName).length() + 1), - config.getValue()); - } - }); - - return addCatalogPropertiesIfMissing(conf, catalogType, catalogProperties); - } - - /** - * This method is used for backward-compatible catalog configuration. - * Collect all the catalog specific configuration from the global hive configuration. - * Note: this should be removed when the old catalog configuration is depracated. - * @param conf global hive configuration - * @param catalogType type of the catalog - * @param catalogProperties pre-populated catalog properties - * @return complete map of catalog properties - */ - private static Map addCatalogPropertiesIfMissing(Configuration conf, String catalogType, - Map catalogProperties) { - if (catalogType != null) { - catalogProperties.putIfAbsent(CatalogUtil.ICEBERG_CATALOG_TYPE, catalogType); - } - - String legacyCatalogImpl = conf.get(InputFormatConfig.CATALOG_LOADER_CLASS); - if (legacyCatalogImpl != null) { - catalogProperties.putIfAbsent(CatalogProperties.CATALOG_IMPL, legacyCatalogImpl); - } - - String legacyWarehouseLocation = conf.get(InputFormatConfig.HADOOP_CATALOG_WAREHOUSE_LOCATION); - if (legacyWarehouseLocation != null) { - catalogProperties.putIfAbsent(CatalogProperties.WAREHOUSE_LOCATION, legacyWarehouseLocation); - } - return catalogProperties; + private static Map getCatalogProperties( + Configuration conf, String catalogName, String catalogType) { + String keyPrefix = InputFormatConfig.CATALOG_CONFIG_PREFIX + catalogName; + + return Streams.stream(conf.iterator()) + .filter(e -> e.getKey().startsWith(keyPrefix)) + .collect( + Collectors.toMap( + e -> e.getKey().substring(keyPrefix.length() + 1), Map.Entry::getValue)); } /** @@ -317,7 +283,7 @@ private static String getCatalogType(Configuration conf, String catalogName) { return catalogType; } } else { - String catalogType = conf.get(InputFormatConfig.CATALOG); + String catalogType = conf.get(CatalogUtil.ICEBERG_CATALOG_TYPE); if (catalogType != null && catalogType.equals(LOCATION)) { return NO_CATALOG_TYPE; } else { diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java index d1bfde2f7f8b..cf3450840a83 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java @@ -53,29 +53,6 @@ private InputFormatConfig() { public static final String TABLE_CATALOG_PREFIX = "iceberg.mr.table.catalog."; public static final String LOCALITY = "iceberg.mr.locality"; - /** - * @deprecated please use {@link #catalogPropertyConfigKey(String, String)} - * with config key {@link org.apache.iceberg.CatalogUtil#ICEBERG_CATALOG_TYPE} to specify the type of a catalog. - */ - @Deprecated - public static final String CATALOG = "iceberg.mr.catalog"; - - /** - * @deprecated please use {@link #catalogPropertyConfigKey(String, String)} - * with config key {@link org.apache.iceberg.CatalogProperties#WAREHOUSE_LOCATION} - * to specify the warehouse location of a catalog. - */ - @Deprecated - public static final String HADOOP_CATALOG_WAREHOUSE_LOCATION = "iceberg.mr.catalog.hadoop.warehouse.location"; - - /** - * @deprecated please use {@link #catalogPropertyConfigKey(String, String)} - * with config key {@link org.apache.iceberg.CatalogProperties#CATALOG_IMPL} - * to specify the implementation of a catalog. - */ - @Deprecated - public static final String CATALOG_LOADER_CLASS = "iceberg.mr.catalog.loader.class"; - public static final String CTAS_TABLE_NAME = "iceberg.mr.ctas.table.name"; public static final String SELECTED_COLUMNS = "iceberg.mr.selected.columns"; public static final String FETCH_VIRTUAL_COLUMNS = "iceberg.mr.fetch.virtual.columns"; diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestCatalogs.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestCatalogs.java index 89b957d4967e..f65f7e584d9c 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestCatalogs.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestCatalogs.java @@ -105,7 +105,7 @@ public void testCreateDropTableToLocation() throws IOException { .isInstanceOf(NullPointerException.class) .hasMessage("Table schema not set"); - conf.set(InputFormatConfig.CATALOG, Catalogs.LOCATION); + conf.set(CatalogUtil.ICEBERG_CATALOG_TYPE, Catalogs.LOCATION); Properties missingLocation = new Properties(); missingLocation.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(SCHEMA)); @@ -195,58 +195,6 @@ public void testCreateDropTableToCatalog() throws IOException { .hasMessage("Table does not exist: test.table"); } - @Test - public void testLegacyLoadCatalogDefault() { - Optional defaultCatalog = Catalogs.loadCatalog(conf, null); - Assert.assertTrue(defaultCatalog.isPresent()); - Assertions.assertThat(defaultCatalog.get()).isInstanceOf(HiveCatalog.class); - Assert.assertTrue(Catalogs.hiveCatalog(conf, new Properties())); - } - - @Test - public void testLegacyLoadCatalogHive() { - conf.set(InputFormatConfig.CATALOG, CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE); - Optional hiveCatalog = Catalogs.loadCatalog(conf, null); - Assert.assertTrue(hiveCatalog.isPresent()); - Assertions.assertThat(hiveCatalog.get()).isInstanceOf(HiveCatalog.class); - Assert.assertTrue(Catalogs.hiveCatalog(conf, new Properties())); - } - - @Test - public void testLegacyLoadCatalogHadoop() { - conf.set(InputFormatConfig.CATALOG, CatalogUtil.ICEBERG_CATALOG_TYPE_HADOOP); - conf.set(InputFormatConfig.HADOOP_CATALOG_WAREHOUSE_LOCATION, "/tmp/mylocation"); - Optional hadoopCatalog = Catalogs.loadCatalog(conf, null); - Assert.assertTrue(hadoopCatalog.isPresent()); - Assertions.assertThat(hadoopCatalog.get()).isInstanceOf(HadoopCatalog.class); - Assert.assertFalse(Catalogs.hiveCatalog(conf, new Properties())); - } - - @Test - public void testLegacyLoadCatalogCustom() { - conf.set(InputFormatConfig.CATALOG_LOADER_CLASS, CustomHadoopCatalog.class.getName()); - conf.set(InputFormatConfig.HADOOP_CATALOG_WAREHOUSE_LOCATION, "/tmp/mylocation"); - Optional customHadoopCatalog = Catalogs.loadCatalog(conf, null); - Assert.assertTrue(customHadoopCatalog.isPresent()); - Assertions.assertThat(customHadoopCatalog.get()).isInstanceOf(CustomHadoopCatalog.class); - Assert.assertFalse(Catalogs.hiveCatalog(conf, new Properties())); - } - - @Test - public void testLegacyLoadCatalogLocation() { - conf.set(InputFormatConfig.CATALOG, Catalogs.LOCATION); - Assert.assertFalse(Catalogs.loadCatalog(conf, null).isPresent()); - } - - @Test - public void testLegacyLoadCatalogUnknown() { - conf.set(InputFormatConfig.CATALOG, "fooType"); - - Assertions.assertThatThrownBy(() -> Catalogs.loadCatalog(conf, null)) - .isInstanceOf(UnsupportedOperationException.class) - .hasMessageContaining("Unknown catalog type"); - } - @Test public void testLoadCatalogDefault() { String catalogName = "barCatalog"; @@ -271,19 +219,6 @@ public void testLoadCatalogHive() { Assert.assertTrue(Catalogs.hiveCatalog(conf, properties)); } - @Test - public void testLegacyLoadCustomCatalogWithHiveCatalogTypeSet() { - String catalogName = "barCatalog"; - conf.set(InputFormatConfig.catalogPropertyConfigKey(catalogName, CatalogUtil.ICEBERG_CATALOG_TYPE), - CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE); - conf.set(InputFormatConfig.CATALOG_LOADER_CLASS, CustomHadoopCatalog.class.getName()); - conf.set(InputFormatConfig.HADOOP_CATALOG_WAREHOUSE_LOCATION, "/tmp/mylocation"); - - Assertions.assertThatThrownBy(() -> Catalogs.loadCatalog(conf, catalogName)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("both type and catalog-impl are set"); - } - @Test public void testLoadCatalogHadoop() { String catalogName = "barCatalog"; @@ -300,21 +235,6 @@ public void testLoadCatalogHadoop() { Assert.assertFalse(Catalogs.hiveCatalog(conf, properties)); } - @Test - public void testLoadCatalogHadoopWithLegacyWarehouseLocation() { - String catalogName = "barCatalog"; - conf.set(InputFormatConfig.catalogPropertyConfigKey(catalogName, CatalogUtil.ICEBERG_CATALOG_TYPE), - CatalogUtil.ICEBERG_CATALOG_TYPE_HADOOP); - conf.set(InputFormatConfig.HADOOP_CATALOG_WAREHOUSE_LOCATION, "/tmp/mylocation"); - Optional hadoopCatalog = Catalogs.loadCatalog(conf, catalogName); - Assert.assertTrue(hadoopCatalog.isPresent()); - Assertions.assertThat(hadoopCatalog.get()).isInstanceOf(HadoopCatalog.class); - Assert.assertEquals("HadoopCatalog{name=barCatalog, location=/tmp/mylocation}", hadoopCatalog.get().toString()); - Properties properties = new Properties(); - properties.put(InputFormatConfig.CATALOG_NAME, catalogName); - Assert.assertFalse(Catalogs.hiveCatalog(conf, properties)); - } - @Test public void testLoadCatalogCustom() { String catalogName = "barCatalog"; diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java index d3b5a4ca9f25..1e956171bd02 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java @@ -111,7 +111,7 @@ public class TestIcebergInputFormats { @Before public void before() throws IOException { conf = new Configuration(); - conf.set(InputFormatConfig.CATALOG, Catalogs.LOCATION); + conf.set(CatalogUtil.ICEBERG_CATALOG_TYPE, Catalogs.LOCATION); HadoopTables tables = new HadoopTables(conf); File location = temp.newFolder(testInputFormat.name(), fileFormat.name()); diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestInputFormatReaderDeletes.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestInputFormatReaderDeletes.java index 2ba4e50e8aa1..5edc299320f6 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestInputFormatReaderDeletes.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestInputFormatReaderDeletes.java @@ -25,6 +25,7 @@ import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.iceberg.BaseTable; +import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.FileFormat; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; @@ -65,7 +66,7 @@ public static Object[][] parameters() { @Before @Override public void writeTestDataFile() throws IOException { - conf.set(InputFormatConfig.CATALOG, Catalogs.LOCATION); + conf.set(CatalogUtil.ICEBERG_CATALOG_TYPE, Catalogs.LOCATION); super.writeTestDataFile(); } diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveVectorizedReader.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveVectorizedReader.java index 9decd05ad7e8..fae2e207ba2a 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveVectorizedReader.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveVectorizedReader.java @@ -29,6 +29,7 @@ import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.FileFormat; import org.apache.iceberg.Schema; import org.apache.iceberg.data.Record; @@ -76,7 +77,7 @@ public void before() throws IOException, HiveException { Assert.assertTrue(location.delete()); Configuration conf = prepareMockJob(SCHEMA, new Path(location.toString())); - conf.set(InputFormatConfig.CATALOG, Catalogs.LOCATION); + conf.set(CatalogUtil.ICEBERG_CATALOG_TYPE, Catalogs.LOCATION); HadoopTables tables = new HadoopTables(conf); helper = new TestHelper(conf, tables, location.toString(), SCHEMA, null, fileFormat, temp); From 4e100546cd66caecb54bb1cee3e617951de35c18 Mon Sep 17 00:00:00 2001 From: Zsolt Miskolczi Date: Tue, 13 Jun 2023 16:49:41 +0200 Subject: [PATCH 26/35] Fix failing tests - part II - regenerate qtest outputs --- .../describe_iceberg_metadata_tables.q.out | 30 +++++++++--- .../positive/dynamic_partition_writes.q.out | 26 +++++------ ...ceberg_metadata_of_partitioned_table.q.out | 46 +++++++++---------- 3 files changed, 60 insertions(+), 42 deletions(-) diff --git a/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_metadata_tables.q.out b/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_metadata_tables.q.out index 8014b443e18f..dccc778cd214 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_metadata_tables.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_metadata_tables.q.out @@ -67,6 +67,7 @@ snapshot_id bigint sequence_number bigint file_sequence_number bigint data_file struct,value_counts:map,null_value_counts:map,nan_value_counts:map,lower_bounds:map,upper_bounds:map,key_metadata:binary,split_offsets:array,equality_ids:array,sort_order_id:int> +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe default.ice_meta_desc.history PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -113,8 +114,12 @@ PREHOOK: Input: default@ice_meta_desc POSTHOOK: query: describe default.ice_meta_desc.partitions POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_meta_desc -record_count bigint -file_count int +record_count bigint Count of records in data files +file_count int Count of data files +position_delete_record_count bigint Count of records in position delete files +position_delete_file_count int Count of position delete files +equality_delete_record_count bigint Count of records in equality delete files +equality_delete_file_count int Count of equality delete files PREHOOK: query: describe default.ice_meta_desc.all_manifests PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -167,6 +172,7 @@ snapshot_id bigint sequence_number bigint file_sequence_number bigint data_file struct,value_counts:map,null_value_counts:map,nan_value_counts:map,lower_bounds:map,upper_bounds:map,key_metadata:binary,split_offsets:array,equality_ids:array,sort_order_id:int> +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe default.ice_meta_desc.data_files PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -313,6 +319,7 @@ snapshot_id bigint sequence_number bigint file_sequence_number bigint data_file struct,value_counts:map,null_value_counts:map,nan_value_counts:map,lower_bounds:map,upper_bounds:map,key_metadata:binary,split_offsets:array,equality_ids:array,sort_order_id:int> +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe formatted default.ice_meta_desc.history PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -363,8 +370,12 @@ POSTHOOK: query: describe formatted default.ice_meta_desc.partitions POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_meta_desc # col_name data_type comment -record_count bigint -file_count int +record_count bigint Count of records in data files +file_count int Count of data files +position_delete_record_count bigint Count of records in position delete files +position_delete_file_count int Count of position delete files +equality_delete_record_count bigint Count of records in equality delete files +equality_delete_file_count int Count of equality delete files PREHOOK: query: describe formatted default.ice_meta_desc.all_manifests PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -420,6 +431,7 @@ snapshot_id bigint sequence_number bigint file_sequence_number bigint data_file struct,value_counts:map,null_value_counts:map,nan_value_counts:map,lower_bounds:map,upper_bounds:map,key_metadata:binary,split_offsets:array,equality_ids:array,sort_order_id:int> +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe formatted default.ice_meta_desc.data_files PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -570,6 +582,7 @@ snapshot_id bigint sequence_number bigint file_sequence_number bigint data_file struct,value_counts:map,null_value_counts:map,nan_value_counts:map,lower_bounds:map,upper_bounds:map,key_metadata:binary,split_offsets:array,equality_ids:array,sort_order_id:int> +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe extended default.ice_meta_desc.history PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -616,8 +629,12 @@ PREHOOK: Input: default@ice_meta_desc POSTHOOK: query: describe extended default.ice_meta_desc.partitions POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_meta_desc -record_count bigint -file_count int +record_count bigint Count of records in data files +file_count int Count of data files +position_delete_record_count bigint Count of records in position delete files +position_delete_file_count int Count of position delete files +equality_delete_record_count bigint Count of records in equality delete files +equality_delete_file_count int Count of equality delete files PREHOOK: query: describe extended default.ice_meta_desc.all_manifests PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -670,6 +687,7 @@ snapshot_id bigint sequence_number bigint file_sequence_number bigint data_file struct,value_counts:map,null_value_counts:map,nan_value_counts:map,lower_bounds:map,upper_bounds:map,key_metadata:binary,split_offsets:array,equality_ids:array,sort_order_id:int> +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe formatted default.ice_meta_desc.data_files PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc diff --git a/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out b/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out index b1e7fcfe4532..037d1b439f73 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out @@ -328,19 +328,19 @@ POSTHOOK: query: select * from default.tbl_target_mixed.partitions order by `par POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl_target_mixed POSTHOOK: Output: hdfs://### HDFS PATH ### -{"ccy":"CZK","c_bucket":1} 1 1 0 -{"ccy":"CZK","c_bucket":2} 1 1 0 -{"ccy":"EUR","c_bucket":0} 1 1 0 -{"ccy":"EUR","c_bucket":1} 2 1 0 -{"ccy":"EUR","c_bucket":2} 3 1 0 -{"ccy":"HUF","c_bucket":1} 2 1 0 -{"ccy":"PLN","c_bucket":0} 2 1 0 -{"ccy":"PLN","c_bucket":1} 1 1 0 -{"ccy":"PLN","c_bucket":2} 1 1 0 -{"ccy":"USD","c_bucket":0} 2 1 0 -{"ccy":"USD","c_bucket":1} 3 1 0 -{"ccy":"USD","c_bucket":2} 1 1 0 -{"ccy":null,"c_bucket":null} 2 1 0 +{"ccy":"CZK","c_bucket":1} 0 1 1 0 0 0 0 +{"ccy":"CZK","c_bucket":2} 0 1 1 0 0 0 0 +{"ccy":"EUR","c_bucket":0} 0 1 1 0 0 0 0 +{"ccy":"EUR","c_bucket":1} 0 2 1 0 0 0 0 +{"ccy":"EUR","c_bucket":2} 0 3 1 0 0 0 0 +{"ccy":"HUF","c_bucket":1} 0 2 1 0 0 0 0 +{"ccy":"PLN","c_bucket":0} 0 2 1 0 0 0 0 +{"ccy":"PLN","c_bucket":1} 0 1 1 0 0 0 0 +{"ccy":"PLN","c_bucket":2} 0 1 1 0 0 0 0 +{"ccy":"USD","c_bucket":0} 0 2 1 0 0 0 0 +{"ccy":"USD","c_bucket":1} 0 3 1 0 0 0 0 +{"ccy":"USD","c_bucket":2} 0 1 1 0 0 0 0 +{"ccy":null,"c_bucket":null} 0 2 1 0 0 0 0 PREHOOK: query: select * from default.tbl_target_mixed.files PREHOOK: type: QUERY PREHOOK: Input: default@tbl_target_mixed diff --git a/iceberg/iceberg-handler/src/test/results/positive/query_iceberg_metadata_of_partitioned_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/query_iceberg_metadata_of_partitioned_table.q.out index c663ca5688e0..efc7a74a9373 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/query_iceberg_metadata_of_partitioned_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/query_iceberg_metadata_of_partitioned_table.q.out @@ -296,8 +296,8 @@ POSTHOOK: query: select * from default.ice_meta_2.partitions POSTHOOK: type: QUERY POSTHOOK: Input: default@ice_meta_2 POSTHOOK: Output: hdfs://### HDFS PATH ### -{"b":"four"} 1 1 0 -{"b":"three"} 3 1 0 +{"b":"four"} 0 1 1 0 0 0 0 +{"b":"three"} 0 3 1 0 0 0 0 PREHOOK: query: select * from default.ice_meta_3.partitions PREHOOK: type: QUERY PREHOOK: Input: default@ice_meta_3 @@ -306,13 +306,13 @@ POSTHOOK: query: select * from default.ice_meta_3.partitions POSTHOOK: type: QUERY POSTHOOK: Input: default@ice_meta_3 POSTHOOK: Output: hdfs://### HDFS PATH ### -{"b":"four","c":"Saturday"} 3 1 0 -{"b":"four","c":"Sunday"} 1 1 0 -{"b":"four","c":"Thursday"} 1 1 0 -{"b":"one","c":"Monday"} 3 1 0 -{"b":"three","c":"Wednesday"} 3 1 0 -{"b":"two","c":"Friday"} 2 1 0 -{"b":"two","c":"Tuesday"} 2 1 0 +{"b":"four","c":"Saturday"} 0 3 1 0 0 0 0 +{"b":"four","c":"Sunday"} 0 1 1 0 0 0 0 +{"b":"four","c":"Thursday"} 0 1 1 0 0 0 0 +{"b":"one","c":"Monday"} 0 3 1 0 0 0 0 +{"b":"three","c":"Wednesday"} 0 3 1 0 0 0 0 +{"b":"two","c":"Friday"} 0 2 1 0 0 0 0 +{"b":"two","c":"Tuesday"} 0 2 1 0 0 0 0 PREHOOK: query: select `partition` from default.ice_meta_2.partitions where `partition`.b='four' PREHOOK: type: QUERY PREHOOK: Input: default@ice_meta_2 @@ -330,7 +330,7 @@ POSTHOOK: query: select * from default.ice_meta_3.partitions where `partition`.b POSTHOOK: type: QUERY POSTHOOK: Input: default@ice_meta_3 POSTHOOK: Output: hdfs://### HDFS PATH ### -{"b":"two","c":"Tuesday"} 2 1 0 +{"b":"two","c":"Tuesday"} 0 2 1 0 0 0 0 PREHOOK: query: select partition_summaries from default.ice_meta_3.manifests where partition_summaries[1].upper_bound='Wednesday' PREHOOK: type: QUERY PREHOOK: Input: default@ice_meta_3 @@ -592,8 +592,8 @@ POSTHOOK: query: select * from default.ice_meta_2.partitions POSTHOOK: type: QUERY POSTHOOK: Input: default@ice_meta_2 POSTHOOK: Output: hdfs://### HDFS PATH ### -{"b":"four"} 1 1 0 -{"b":"three"} 3 1 0 +{"b":"four"} 0 1 1 0 0 0 0 +{"b":"three"} 0 3 1 0 0 0 0 PREHOOK: query: select * from default.ice_meta_3.partitions PREHOOK: type: QUERY PREHOOK: Input: default@ice_meta_3 @@ -602,13 +602,13 @@ POSTHOOK: query: select * from default.ice_meta_3.partitions POSTHOOK: type: QUERY POSTHOOK: Input: default@ice_meta_3 POSTHOOK: Output: hdfs://### HDFS PATH ### -{"b":"four","c":"Saturday"} 3 1 0 -{"b":"four","c":"Sunday"} 1 1 0 -{"b":"four","c":"Thursday"} 1 1 0 -{"b":"one","c":"Monday"} 3 1 0 -{"b":"three","c":"Wednesday"} 3 1 0 -{"b":"two","c":"Friday"} 2 1 0 -{"b":"two","c":"Tuesday"} 2 1 0 +{"b":"four","c":"Saturday"} 0 3 1 0 0 0 0 +{"b":"four","c":"Sunday"} 0 1 1 0 0 0 0 +{"b":"four","c":"Thursday"} 0 1 1 0 0 0 0 +{"b":"one","c":"Monday"} 0 3 1 0 0 0 0 +{"b":"three","c":"Wednesday"} 0 3 1 0 0 0 0 +{"b":"two","c":"Friday"} 0 2 1 0 0 0 0 +{"b":"two","c":"Tuesday"} 0 2 1 0 0 0 0 PREHOOK: query: select `partition` from default.ice_meta_2.partitions where `partition`.b='four' PREHOOK: type: QUERY PREHOOK: Input: default@ice_meta_2 @@ -626,7 +626,7 @@ POSTHOOK: query: select * from default.ice_meta_3.partitions where `partition`.b POSTHOOK: type: QUERY POSTHOOK: Input: default@ice_meta_3 POSTHOOK: Output: hdfs://### HDFS PATH ### -{"b":"two","c":"Tuesday"} 2 1 0 +{"b":"two","c":"Tuesday"} 0 2 1 0 0 0 0 PREHOOK: query: select partition_summaries from default.ice_meta_3.manifests where partition_summaries[1].upper_bound='Wednesday' PREHOOK: type: QUERY PREHOOK: Input: default@ice_meta_3 @@ -833,6 +833,6 @@ POSTHOOK: query: select * from default.partevv.partitions POSTHOOK: type: QUERY POSTHOOK: Input: default@partevv POSTHOOK: Output: hdfs://### HDFS PATH ### -{"id":1,"ts_day":null} 1 1 1 -{"id":2,"ts_day":null} 1 1 1 -{"id":null,"ts_day":"2022-04-29"} 1 1 2 +{"id":1,"ts_day":null} 1 1 1 0 0 0 0 +{"id":2,"ts_day":null} 1 1 1 0 0 0 0 +{"id":null,"ts_day":"2022-04-29"} 2 1 1 0 0 0 0 From 85b6e5fb22177e33447e7b046d367e99780b9b99 Mon Sep 17 00:00:00 2001 From: Zsolt Miskolczi Date: Wed, 14 Jun 2023 16:14:42 +0200 Subject: [PATCH 27/35] Fix failing tests - part VI --- .../java/org/apache/iceberg/mr/Catalogs.java | 21 ++++++++++++------- .../iceberg/mr/TestIcebergInputFormats.java | 5 +++++ .../TestHiveIcebergStorageHandlerNoScan.java | 6 ++++-- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java index e6555f9664f0..cee702a2a0d7 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java @@ -23,7 +23,6 @@ import java.util.Optional; import java.util.Properties; import java.util.Set; -import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.iceberg.CatalogProperties; import org.apache.iceberg.CatalogUtil; @@ -38,7 +37,6 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; import org.apache.iceberg.relocated.com.google.common.collect.Maps; -import org.apache.iceberg.relocated.com.google.common.collect.Streams; /** * Class for catalog resolution and accessing the common functions for {@link Catalog} API. @@ -255,13 +253,20 @@ static Optional loadCatalog(Configuration conf, String catalogName) { */ private static Map getCatalogProperties( Configuration conf, String catalogName, String catalogType) { - String keyPrefix = InputFormatConfig.CATALOG_CONFIG_PREFIX + catalogName; + Map catalogProperties = Maps.newHashMap(); + conf.forEach(config -> { + if (config.getKey().startsWith(InputFormatConfig.CATALOG_DEFAULT_CONFIG_PREFIX)) { + catalogProperties.putIfAbsent( + config.getKey().substring(InputFormatConfig.CATALOG_DEFAULT_CONFIG_PREFIX.length()), + config.getValue()); + } else if (config.getKey().startsWith(InputFormatConfig.CATALOG_CONFIG_PREFIX + catalogName)) { + catalogProperties.put( + config.getKey().substring((InputFormatConfig.CATALOG_CONFIG_PREFIX + catalogName).length() + 1), + config.getValue()); + } + }); - return Streams.stream(conf.iterator()) - .filter(e -> e.getKey().startsWith(keyPrefix)) - .collect( - Collectors.toMap( - e -> e.getKey().substring(keyPrefix.length() + 1), Map.Entry::getValue)); + return catalogProperties; } /** diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java index 1e956171bd02..d060e40273d4 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java @@ -205,6 +205,11 @@ public void testResiduals() throws Exception { @Test public void testFailedResidualFiltering() throws Exception { + // Vectorization is not yet supported for AVRO + if (this.fileFormat.equals(FileFormat.AVRO)) { + return; + } + helper.createTable(); List expectedRecords = helper.generateRandomRecords(2, 0L); diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java index 719ccafdfad7..aedcfdff5ccc 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java @@ -1814,8 +1814,10 @@ public void testCreateTableWithMetadataLocationWithoutSchema() throws IOExceptio testTables.locationForCreateTableSQL(targetIdentifier) + tblProps); }) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("CREATE EXTERNAL TABLE target STORED BY ICEBERG " + - testTables.locationForCreateTableSQL(targetIdentifier) + tblProps); + .hasMessageContaining("Column names can not be provided along with metadata location."); + shell.executeStatement( + "CREATE EXTERNAL TABLE target STORED BY ICEBERG " + testTables.locationForCreateTableSQL(targetIdentifier) + + tblProps); // Check the partition and the schema are preserved. Table targetIcebergTable = From 54551594f597adae4c817b8c7c24cfd76f175a00 Mon Sep 17 00:00:00 2001 From: Dmitriy Fingerman Date: Wed, 14 Jun 2023 11:23:44 -0400 Subject: [PATCH 28/35] Fix failing tests - part III - Ignore testFailedResidualFiltering. --- .../java/org/apache/iceberg/mr/TestIcebergInputFormats.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java index d060e40273d4..aff360e40acd 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java @@ -67,6 +67,7 @@ import org.assertj.core.api.Assertions; import org.junit.Assert; import org.junit.Before; +import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; @@ -204,6 +205,10 @@ public void testResiduals() throws Exception { } @Test + @Ignore + // This test is ignored because for ARVO, the vectorized IcebergInputFormat.IcebergRecordReader doesn't support AVRO + // and for ORC and PARQUET, IcebergInputFormat class ignores residuals + // '... scan.filter(filter).ignoreResiduals()' and it is not compatible with this test public void testFailedResidualFiltering() throws Exception { // Vectorization is not yet supported for AVRO if (this.fileFormat.equals(FileFormat.AVRO)) { From 1b53423eda291d0784a1a01db33070e1c4fffed3 Mon Sep 17 00:00:00 2001 From: Vikash Kumar Date: Thu, 26 Jan 2023 00:41:09 +0530 Subject: [PATCH 29/35] Core: Avoid creating new metadata file on registerTable (#6591) --- .../org/apache/iceberg/util/LocationUtil.java | 38 -------------- .../iceberg/hive/HiveTableOperations.java | 6 +-- .../java/org/apache/iceberg/TestHelpers.java | 0 .../apache/iceberg/hive/TestHiveCatalog.java | 49 +++++++++++++++++++ 4 files changed, 52 insertions(+), 41 deletions(-) delete mode 100644 core/src/main/java/org/apache/iceberg/util/LocationUtil.java rename iceberg/{iceberg-handler => iceberg-catalog}/src/test/java/org/apache/iceberg/TestHelpers.java (100%) diff --git a/core/src/main/java/org/apache/iceberg/util/LocationUtil.java b/core/src/main/java/org/apache/iceberg/util/LocationUtil.java deleted file mode 100644 index 42c26524f28f..000000000000 --- a/core/src/main/java/org/apache/iceberg/util/LocationUtil.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.iceberg.util; - -import org.apache.iceberg.relocated.com.google.common.base.Preconditions; - -public class LocationUtil { - private LocationUtil() { - - } - - public static String stripTrailingSlash(String path) { - Preconditions.checkArgument(path != null && path.length() > 0, "path must not be null or empty"); - - String result = path; - while (result.endsWith("/")) { - result = result.substring(0, result.length() - 1); - } - return result; - } -} diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 5514815a0bb1..f6c62b147460 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -173,8 +173,8 @@ protected void doRefresh() { @SuppressWarnings("checkstyle:CyclomaticComplexity") @Override protected void doCommit(TableMetadata base, TableMetadata metadata) { - String newMetadataLocation = base == null && metadata.metadataFileLocation() != null ? - metadata.metadataFileLocation() : writeNewMetadata(metadata, currentVersion() + 1); + boolean newTable = base == null; + String newMetadataLocation = writeNewMetadataIfRequired(newTable, metadata); boolean hiveEngineEnabled = hiveEngineEnabled(metadata, conf); boolean keepHiveStats = conf.getBoolean(ConfigProperties.KEEP_HIVE_STATS, false); @@ -189,7 +189,7 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { if (tbl != null) { // If we try to create the table but the metadata location is already set, then we had a concurrent commit - if (base == null && tbl.getParameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP) != null) { + if (newTable && tbl.getParameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP) != null) { throw new AlreadyExistsException("Table already exists: %s.%s", database, tableName); } diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/TestHelpers.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/TestHelpers.java similarity index 100% rename from iceberg/iceberg-handler/src/test/java/org/apache/iceberg/TestHelpers.java rename to iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/TestHelpers.java diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java index 6c60c71d069e..03a1bcf74103 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java @@ -35,6 +35,7 @@ import org.apache.iceberg.DataFile; import org.apache.iceberg.DataFiles; import org.apache.iceberg.FileFormat; +import org.apache.iceberg.HasTableOperations; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.PartitionSpecParser; import org.apache.iceberg.Schema; @@ -46,6 +47,7 @@ import org.apache.iceberg.TableMetadata; import org.apache.iceberg.TableOperations; import org.apache.iceberg.TableProperties; +import org.apache.iceberg.TestHelpers; import org.apache.iceberg.Transaction; import org.apache.iceberg.UpdateSchema; import org.apache.iceberg.catalog.Catalog; @@ -79,6 +81,7 @@ import static org.apache.iceberg.TableProperties.DEFAULT_SORT_ORDER; import static org.apache.iceberg.expressions.Expressions.bucket; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -1116,4 +1119,50 @@ public void testTablePropsDefinedAtCatalogLevel() { hiveCatalog.dropTable(tableIdent); } } + + @Test + public void testDatabaseLocationWithSlashInWarehouseDir() { + Configuration conf = new Configuration(); + // With a trailing slash + conf.set("hive.metastore.warehouse.dir", "s3://bucket/"); + conf.set("hive.metastore.warehouse.external.dir", "s3://bucket/"); + + HiveCatalog catalog = new HiveCatalog(); + catalog.setConf(conf); + + Database database = catalog.convertToDatabase(Namespace.of("database"), ImmutableMap.of()); + + Assert.assertEquals("s3://bucket/database.db", database.getLocationUri()); + } + + @Test + public void testRegisterTable() { + TableIdentifier identifier = TableIdentifier.of(DB_NAME, "t1"); + catalog.createTable(identifier, getTestSchema()); + Table registeringTable = catalog.loadTable(identifier); + catalog.dropTable(identifier, false); + TableOperations ops = ((HasTableOperations) registeringTable).operations(); + String metadataLocation = ((HiveTableOperations) ops).currentMetadataLocation(); + Table registeredTable = catalog.registerTable(identifier, metadataLocation); + assertThat(registeredTable).isNotNull(); + TestHelpers.assertSerializedAndLoadedMetadata(registeringTable, registeredTable); + String expectedMetadataLocation = + ((HasTableOperations) registeredTable).operations().current().metadataFileLocation(); + assertThat(metadataLocation).isEqualTo(expectedMetadataLocation); + assertThat(catalog.loadTable(identifier)).isNotNull(); + assertThat(catalog.dropTable(identifier)).isTrue(); + } + + @Test + public void testRegisterExistingTable() { + TableIdentifier identifier = TableIdentifier.of(DB_NAME, "t1"); + catalog.createTable(identifier, getTestSchema()); + Table registeringTable = catalog.loadTable(identifier); + TableOperations ops = ((HasTableOperations) registeringTable).operations(); + String metadataLocation = ((HiveTableOperations) ops).currentMetadataLocation(); + assertThatThrownBy(() -> catalog.registerTable(identifier, metadataLocation)) + .isInstanceOf(AlreadyExistsException.class) + .hasMessage("Table already exists: hivedb.t1"); + assertThat(catalog.dropTable(identifier, true)).isTrue(); + } } From 3f003a02d62825a25ce770b3db3d66372516a7b9 Mon Sep 17 00:00:00 2001 From: Zsolt Miskolczi Date: Tue, 20 Jun 2023 15:58:38 +0200 Subject: [PATCH 30/35] Remove unused field --- .../src/main/java/org/apache/iceberg/hive/CachedClientPool.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/CachedClientPool.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/CachedClientPool.java index 061d071d4e50..c93ce5455e9f 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/CachedClientPool.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/CachedClientPool.java @@ -72,14 +72,12 @@ public class CachedClientPool implements ClientPool clientPoolCache; private final Configuration conf; - private final String metastoreUri; private final int clientPoolSize; private final long evictionInterval; private final Key key; public CachedClientPool(Configuration conf, Map properties) { this.conf = conf; - this.metastoreUri = conf.get(HiveConf.ConfVars.METASTOREURIS.varname, ""); this.clientPoolSize = PropertyUtil.propertyAsInt(properties, CatalogProperties.CLIENT_POOL_SIZE, CatalogProperties.CLIENT_POOL_SIZE_DEFAULT); From e7a8dbd5d3b73caa4a27b68d97345f660ee030ce Mon Sep 17 00:00:00 2001 From: Dmitriy Fingerman Date: Tue, 20 Jun 2023 16:46:46 -0400 Subject: [PATCH 31/35] HIVE-27306: Code cleaning: removal of unused and outdated code. --- iceberg/iceberg-catalog/pom.xml | 5 ----- .../main/java/org/apache/iceberg/hive/MetastoreLock.java | 5 ----- .../src/main/java/org/apache/iceberg/mr/Catalogs.java | 5 +++-- .../src/test/java/org/apache/iceberg/mr/TestCatalogs.java | 4 +--- .../java/org/apache/iceberg/mr/TestIcebergInputFormats.java | 6 ++---- 5 files changed, 6 insertions(+), 19 deletions(-) diff --git a/iceberg/iceberg-catalog/pom.xml b/iceberg/iceberg-catalog/pom.xml index 9633e5b1cb83..06ddcfdd40a1 100644 --- a/iceberg/iceberg-catalog/pom.xml +++ b/iceberg/iceberg-catalog/pom.xml @@ -78,10 +78,5 @@ junit-vintage-engine test - - org.mockito - mockito-core - test - diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java index 92c28c5d9d58..73b38fe32be0 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java @@ -177,7 +177,6 @@ public void unlock() { } } - // TODO add lock heart beating for cases where default lock timeout is too low. @SuppressWarnings("checkstyle:CyclomaticComplexity") private long acquireLock() throws LockException { LockInfo lockInfo = createLock(); @@ -456,10 +455,6 @@ private static void initTableLevelLockCache(long evictionTimeout) { } } - public String getTableName() { - return tableName; - } - private static class Heartbeat implements Runnable { private final ClientPool hmsClients; private final long lockId; diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java index cee702a2a0d7..d422885becda 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java @@ -254,14 +254,15 @@ static Optional loadCatalog(Configuration conf, String catalogName) { private static Map getCatalogProperties( Configuration conf, String catalogName, String catalogType) { Map catalogProperties = Maps.newHashMap(); + String keyPrefix = InputFormatConfig.CATALOG_CONFIG_PREFIX + catalogName; conf.forEach(config -> { if (config.getKey().startsWith(InputFormatConfig.CATALOG_DEFAULT_CONFIG_PREFIX)) { catalogProperties.putIfAbsent( config.getKey().substring(InputFormatConfig.CATALOG_DEFAULT_CONFIG_PREFIX.length()), config.getValue()); - } else if (config.getKey().startsWith(InputFormatConfig.CATALOG_CONFIG_PREFIX + catalogName)) { + } else if (config.getKey().startsWith(keyPrefix)) { catalogProperties.put( - config.getKey().substring((InputFormatConfig.CATALOG_CONFIG_PREFIX + catalogName).length() + 1), + config.getKey().substring(keyPrefix.length() + 1), config.getValue()); } }); diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestCatalogs.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestCatalogs.java index f65f7e584d9c..2bd1f70c9809 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestCatalogs.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestCatalogs.java @@ -258,9 +258,7 @@ public void testLoadCatalogLocation() { @Test public void testLoadCatalogUnknown() { String catalogName = "barCatalog"; - conf.set( - InputFormatConfig.catalogPropertyConfigKey(catalogName, CatalogUtil.ICEBERG_CATALOG_TYPE), - "fooType"); + conf.set(InputFormatConfig.catalogPropertyConfigKey(catalogName, CatalogUtil.ICEBERG_CATALOG_TYPE), "fooType"); Assertions.assertThatThrownBy(() -> Catalogs.loadCatalog(conf, catalogName)) .isInstanceOf(UnsupportedOperationException.class) diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java index aff360e40acd..dfdb27637fca 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java @@ -66,6 +66,7 @@ import org.apache.iceberg.types.Types; import org.assertj.core.api.Assertions; import org.junit.Assert; +import org.junit.Assume; import org.junit.Before; import org.junit.Ignore; import org.junit.Rule; @@ -210,10 +211,7 @@ public void testResiduals() throws Exception { // and for ORC and PARQUET, IcebergInputFormat class ignores residuals // '... scan.filter(filter).ignoreResiduals()' and it is not compatible with this test public void testFailedResidualFiltering() throws Exception { - // Vectorization is not yet supported for AVRO - if (this.fileFormat.equals(FileFormat.AVRO)) { - return; - } + Assume.assumeTrue("Vectorization is not yet supported for AVRO", this.fileFormat != FileFormat.AVRO); helper.createTable(); From 6cb5cadfd52cb12375e7d457ebd50cf7637a4afa Mon Sep 17 00:00:00 2001 From: Zsolt Miskolczi Date: Wed, 21 Jun 2023 12:32:02 +0200 Subject: [PATCH 32/35] Revert "Pass txnid to MetaStoreLock" This reverts commit df0c309de9cbaa31c5b1e299a804de344882e248. --- iceberg/iceberg-catalog/pom.xml | 1 + .../iceberg/hive/HiveTableOperations.java | 14 +---------- .../apache/iceberg/hive/MetastoreLock.java | 7 ++++-- .../iceberg/mr/hive/HiveIcebergMetaHook.java | 24 +------------------ 4 files changed, 8 insertions(+), 38 deletions(-) diff --git a/iceberg/iceberg-catalog/pom.xml b/iceberg/iceberg-catalog/pom.xml index 06ddcfdd40a1..2d7b9447afa0 100644 --- a/iceberg/iceberg-catalog/pom.xml +++ b/iceberg/iceberg-catalog/pom.xml @@ -56,6 +56,7 @@ org.apache.hive hive-exec + test org.immutables diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index f6c62b147460..4b747ac7cfe8 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -36,8 +36,6 @@ import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; -import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager; -import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hive.iceberg.com.fasterxml.jackson.core.JsonProcessingException; import org.apache.iceberg.BaseMetastoreTableOperations; import org.apache.iceberg.ClientPool; @@ -588,17 +586,7 @@ private static boolean hiveLockEnabled(TableMetadata metadata, Configuration con @VisibleForTesting HiveLock lockObject(TableMetadata metadata) { if (hiveLockEnabled(metadata, conf)) { - Optional txnId = Optional.empty(); - - SessionState sessionState = SessionState.get(); - if (sessionState != null) { - HiveTxnManager txnMgr = sessionState.getTxnMgr(); - if (txnMgr != null) { - txnId = Optional.of(txnMgr.getCurrentTxnId()); - } - } - - return new MetastoreLock(conf, metaClients, catalogName, database, tableName, txnId); + return new MetastoreLock(conf, metaClients, catalogName, database, tableName); } else { return new NoLock(); } diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java index 73b38fe32be0..454a3a5f5e1a 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java @@ -97,12 +97,11 @@ public class MetastoreLock implements HiveLock { private Heartbeat heartbeat = null; public MetastoreLock(Configuration conf, ClientPool metaClients, - String catalogName, String databaseName, String tableName, Optional txnId) { + String catalogName, String databaseName, String tableName) { this.metaClients = metaClients; this.fullName = catalogName + "." + databaseName + "." + tableName; this.databaseName = databaseName; this.tableName = tableName; - this.hmsLockId = txnId; this.lockAcquireTimeout = conf.getLong(HIVE_ACQUIRE_LOCK_TIMEOUT_MS, HIVE_ACQUIRE_LOCK_TIMEOUT_MS_DEFAULT); @@ -179,6 +178,10 @@ public void unlock() { @SuppressWarnings("checkstyle:CyclomaticComplexity") private long acquireLock() throws LockException { + if (hmsLockId.isPresent()) { + throw new IllegalArgumentException(String.format("HMS lock ID=%s already acquired for table %s.%s", + hmsLockId.get(), databaseName, tableName)); + } LockInfo lockInfo = createLock(); final long start = System.currentTimeMillis(); diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java index d9c4561561a2..d946531d58f4 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java @@ -47,7 +47,6 @@ import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.ddl.table.AlterTableType; import org.apache.hadoop.hive.ql.io.AcidUtils; -import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.PartitionTransform; @@ -103,7 +102,6 @@ import org.apache.iceberg.types.Type; import org.apache.iceberg.util.Pair; import org.apache.thrift.TException; -import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -326,11 +324,8 @@ public void preAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable, E context.getProperties().get(OLD_TABLE_NAME)).toString()); } if (commitLock == null) { - - Optional txnId = getTxnId(); - commitLock = new MetastoreLock(conf, new CachedClientPool(conf, Maps.fromProperties(catalogProperties)), - catalogProperties.getProperty(Catalogs.NAME), hmsTable.getDbName(), hmsTable.getTableName(), txnId); + catalogProperties.getProperty(Catalogs.NAME), hmsTable.getDbName(), hmsTable.getTableName()); } try { @@ -342,23 +337,6 @@ public void preAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable, E } } - @NotNull - private static Optional getTxnId() { - Optional txnId; - txnId = Optional.empty(); - - SessionState sessionState = SessionState.get(); - - if (sessionState != null) { - HiveTxnManager txnMgr = sessionState.getTxnMgr(); - if (txnMgr != null) { - txnId = Optional.of(txnMgr.getCurrentTxnId()); - } - } - - return txnId; - } - private void doPreAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable, EnvironmentContext context) throws MetaException { try { From 92ccda4c1770fda5f53294fbc319374f34a1e323 Mon Sep 17 00:00:00 2001 From: Zsolt Miskolczi Date: Wed, 21 Jun 2023 13:17:40 +0200 Subject: [PATCH 33/35] Fix typo in Metastore --- .../org/apache/iceberg/hive/HiveCatalog.java | 20 +++++++++---------- .../metastore/TestMetaStoreInitListener.java | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java index 4d60a2cbb933..6c98cee6a528 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java @@ -275,12 +275,12 @@ public void createNamespace(Namespace namespace, Map meta) { namespace); } catch (TException e) { - throw new RuntimeException("Failed to create namespace " + namespace + " in Hive Matastore", e); + throw new RuntimeException("Failed to create namespace " + namespace + " in Hive Metastore", e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException( - "Interrupted in call to createDatabase(name) " + namespace + " in Hive Matastore", e); + "Interrupted in call to createDatabase(name) " + namespace + " in Hive Metastore", e); } } @@ -302,12 +302,12 @@ public List listNamespaces(Namespace namespace) { return namespaces; } catch (TException e) { - throw new RuntimeException("Failed to list all namespace: " + namespace + " in Hive Matastore", e); + throw new RuntimeException("Failed to list all namespace: " + namespace + " in Hive Metastore", e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException( - "Interrupted in call to getAllDatabases() " + namespace + " in Hive Matastore", e); + "Interrupted in call to getAllDatabases() " + namespace + " in Hive Metastore", e); } } @@ -336,12 +336,12 @@ public boolean dropNamespace(Namespace namespace) { return false; } catch (TException e) { - throw new RuntimeException("Failed to drop namespace " + namespace + " in Hive Matastore", e); + throw new RuntimeException("Failed to drop namespace " + namespace + " in Hive Metastore", e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException( - "Interrupted in call to drop dropDatabase(name) " + namespace + " in Hive Matastore", e); + "Interrupted in call to drop dropDatabase(name) " + namespace + " in Hive Metastore", e); } } @@ -397,11 +397,11 @@ private void alterHiveDataBase(Namespace namespace, Database database) { } catch (TException e) { throw new RuntimeException( - "Failed to list namespace under namespace: " + namespace + " in Hive Matastore", e); + "Failed to list namespace under namespace: " + namespace + " in Hive Metastore", e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); - throw new RuntimeException("Interrupted in call to getDatabase(name) " + namespace + " in Hive Matastore", e); + throw new RuntimeException("Interrupted in call to getDatabase(name) " + namespace + " in Hive Metastore", e); } } @@ -421,12 +421,12 @@ public Map loadNamespaceMetadata(Namespace namespace) { throw new NoSuchNamespaceException(e, "Namespace does not exist: %s", namespace); } catch (TException e) { - throw new RuntimeException("Failed to list namespace under namespace: " + namespace + " in Hive Matastore", e); + throw new RuntimeException("Failed to list namespace under namespace: " + namespace + " in Hive Metastore", e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException( - "Interrupted in call to getDatabase(name) " + namespace + " in Hive Matastore", e); + "Interrupted in call to getDatabase(name) " + namespace + " in Hive Metastore", e); } } diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreInitListener.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreInitListener.java index b78076b606ff..94a01ca3f3d5 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreInitListener.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreInitListener.java @@ -48,7 +48,7 @@ public void setUp() throws Exception { @Test public void testMetaStoreInitListener() throws Exception { - // DummyMataStoreInitListener's onInit will be called at HMSHandler + // DummyMetaStoreInitListener's onInit will be called at HMSHandler // initialization, and set this to true Assert.assertTrue(DummyMetaStoreInitListener.wasCalled); } From 505f33feb5a70674768f71decfbc4b88ff209fa7 Mon Sep 17 00:00:00 2001 From: Zsolt Miskolczi Date: Fri, 23 Jun 2023 11:18:04 +0200 Subject: [PATCH 34/35] Clean up patchec-iceberg-core --- iceberg/patched-iceberg-core/pom.xml | 2 -- 1 file changed, 2 deletions(-) diff --git a/iceberg/patched-iceberg-core/pom.xml b/iceberg/patched-iceberg-core/pom.xml index 6726f196e506..3654b60f2e88 100644 --- a/iceberg/patched-iceberg-core/pom.xml +++ b/iceberg/patched-iceberg-core/pom.xml @@ -76,8 +76,6 @@ ${project.build.directory}/classes **/HadoopInputFile.class - **/TableProperties.class - **/ConfigProperties.class **/SerializableTable.class From 9b2d0400a93d488f6ae4b7d30ecf31f1fe6b8218 Mon Sep 17 00:00:00 2001 From: SimhadriG Date: Tue, 27 Jun 2023 23:27:42 +0530 Subject: [PATCH 35/35] HIVE-27306: Update schema of partition metadata table --- .../apache/hadoop/hive/conf/Constants.java | 4 +- .../mr/hive/HiveIcebergStorageHandler.java | 2 +- .../queries/positive/show_partitions_test.q | 14 ++-- .../positive/show_partitions_test.q.out | 80 ++++++++++++------- 4 files changed, 62 insertions(+), 38 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/Constants.java b/common/src/java/org/apache/hadoop/hive/conf/Constants.java index 99d841059626..919e40bec0fc 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/Constants.java +++ b/common/src/java/org/apache/hadoop/hive/conf/Constants.java @@ -108,6 +108,8 @@ public class Constants { public static final String TIME_POSTFIX_REQUEST_TRACK = "_TIME"; public static final String ICEBERG = "iceberg"; - public static final String ICEBERG_PARTITION_TABLE_SCHEMA = "partition,record_count,file_count,spec_id"; + public static final String ICEBERG_PARTITION_TABLE_SCHEMA = "partition,spec_id,record_count,file_count," + + "position_delete_record_count,position_delete_file_count,equality_delete_record_count," + + "equality_delete_file_count"; public static final String DELIMITED_JSON_SERDE = "org.apache.hadoop.hive.serde2.DelimitedJSONSerDe"; } diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index 7ec03084b53a..18204a90c48e 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -168,7 +168,7 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H private static final Splitter TABLE_NAME_SPLITTER = Splitter.on(".."); private static final String TABLE_NAME_SEPARATOR = ".."; // Column index for partition metadata table - private static final int SPEC_IDX = 3; + private static final int SPEC_IDX = 1; private static final int PART_IDX = 0; public static final String COPY_ON_WRITE = "copy-on-write"; public static final String MERGE_ON_READ = "merge-on-read"; diff --git a/iceberg/iceberg-handler/src/test/queries/positive/show_partitions_test.q b/iceberg/iceberg-handler/src/test/queries/positive/show_partitions_test.q index 0d1ebef44ba2..b424fac02806 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/show_partitions_test.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/show_partitions_test.q @@ -14,16 +14,19 @@ insert into ice1 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 2, 2, --compare hive table with iceberg table show partitions hiveT1; -show partitions ice1 ; +describe default.ice1.partitions; select * from default.ice1.partitions order by `partition`; +show partitions ice1 ; + explain show partitions hiveT1; explain show partitions ice1; explain select * from default.ice1.partitions; --- Partition evolution -create table ice2 (a string, b int, c int) PARTITIONED BY (d_part int, e_part int) stored by iceberg stored as orc TBLPROPERTIES("format-version"='2') ; -insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 2, 2, 5), ('aa', 1, 2, 10, 5), ('aa', 1, 2, 10, 5); +---- Partition evolution +create table ice2 (a string, b int, c int) PARTITIONED BY (d_part int, e_part int) stored by iceberg stored as orc +TBLPROPERTIES("format-version"='2') ; +insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 2, 2, 5), ('aa', 1, 2, 10, 5), ('aa', 1, 2,10, 5); select * from default.ice2.partitions order by `partition`; show partitions ice2; @@ -32,7 +35,8 @@ ALTER TABLE ice2 SET PARTITION SPEC (c) ; select * from default.ice2.partitions order by `partition`; show partitions ice2; -insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 3, 2, 5), ('aa', 1, 4, 10, 5), ('aa', 1, 5, 10, 5); +insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 3, 2, 5), ('aa', 1, 4, 10, 5), ('aa', 1, 5, +10, 5); select * from default.ice2.partitions order by `partition`; show partitions ice2; diff --git a/iceberg/iceberg-handler/src/test/results/positive/show_partitions_test.q.out b/iceberg/iceberg-handler/src/test/results/positive/show_partitions_test.q.out index 3d6fcd4ba676..ee3eb58f1850 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/show_partitions_test.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/show_partitions_test.q.out @@ -51,15 +51,20 @@ POSTHOOK: Input: default@hivet1 d_part=10/e_part=5 d_part=2/e_part=5 d_part=3/e_part=4 -PREHOOK: query: show partitions ice1 -PREHOOK: type: SHOWPARTITIONS +PREHOOK: query: describe default.ice1.partitions +PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice1 -POSTHOOK: query: show partitions ice1 -POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: query: describe default.ice1.partitions +POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice1 -current-spec-id=0/d_part=10/e_part=5 -current-spec-id=0/d_part=2/e_part=5 -current-spec-id=0/d_part=3/e_part=4 +partition struct +spec_id int +record_count bigint Count of records in data files +file_count int Count of data files +position_delete_record_count bigint Count of records in position delete files +position_delete_file_count int Count of position delete files +equality_delete_record_count bigint Count of records in equality delete files +equality_delete_file_count int Count of equality delete files PREHOOK: query: select * from default.ice1.partitions order by `partition` PREHOOK: type: QUERY PREHOOK: Input: default@ice1 @@ -68,9 +73,18 @@ POSTHOOK: query: select * from default.ice1.partitions order by `partition` POSTHOOK: type: QUERY POSTHOOK: Input: default@ice1 POSTHOOK: Output: hdfs://### HDFS PATH ### -{"d_part":10,"e_part":5} 2 1 0 -{"d_part":2,"e_part":5} 1 1 0 -{"d_part":3,"e_part":4} 2 1 0 +{"d_part":10,"e_part":5} 0 2 1 0 0 0 0 +{"d_part":2,"e_part":5} 0 1 1 0 0 0 0 +{"d_part":3,"e_part":4} 0 2 1 0 0 0 0 +PREHOOK: query: show partitions ice1 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@ice1 +POSTHOOK: query: show partitions ice1 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@ice1 +current-spec-id=0/d_part=10/e_part=5 +current-spec-id=0/d_part=2/e_part=5 +current-spec-id=0/d_part=3/e_part=4 PREHOOK: query: explain show partitions hiveT1 PREHOOK: type: SHOWPARTITIONS PREHOOK: Input: default@hivet1 @@ -109,23 +123,25 @@ Stage-0 Fetch Operator limit:-1 Select Operator [SEL_1] - Output:["_col0","_col1","_col2","_col3"] + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] TableScan [TS_0] - Output:["partition","record_count","file_count","spec_id"] + Output:["partition","spec_id","record_count","file_count","position_delete_record_count","position_delete_file_count","equality_delete_record_count","equality_delete_file_count"] -PREHOOK: query: create table ice2 (a string, b int, c int) PARTITIONED BY (d_part int, e_part int) stored by iceberg stored as orc TBLPROPERTIES("format-version"='2') +PREHOOK: query: create table ice2 (a string, b int, c int) PARTITIONED BY (d_part int, e_part int) stored by iceberg stored as orc +TBLPROPERTIES("format-version"='2') PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@ice2 -POSTHOOK: query: create table ice2 (a string, b int, c int) PARTITIONED BY (d_part int, e_part int) stored by iceberg stored as orc TBLPROPERTIES("format-version"='2') +POSTHOOK: query: create table ice2 (a string, b int, c int) PARTITIONED BY (d_part int, e_part int) stored by iceberg stored as orc +TBLPROPERTIES("format-version"='2') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@ice2 -PREHOOK: query: insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 2, 2, 5), ('aa', 1, 2, 10, 5), ('aa', 1, 2, 10, 5) +PREHOOK: query: insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 2, 2, 5), ('aa', 1, 2, 10, 5), ('aa', 1, 2,10, 5) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@ice2 -POSTHOOK: query: insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 2, 2, 5), ('aa', 1, 2, 10, 5), ('aa', 1, 2, 10, 5) +POSTHOOK: query: insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 2, 2, 5), ('aa', 1, 2, 10, 5), ('aa', 1, 2,10, 5) POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@ice2 @@ -137,9 +153,9 @@ POSTHOOK: query: select * from default.ice2.partitions order by `partition` POSTHOOK: type: QUERY POSTHOOK: Input: default@ice2 POSTHOOK: Output: hdfs://### HDFS PATH ### -{"d_part":10,"e_part":5} 2 1 0 -{"d_part":2,"e_part":5} 1 1 0 -{"d_part":3,"e_part":4} 2 1 0 +{"d_part":10,"e_part":5} 0 2 1 0 0 0 0 +{"d_part":2,"e_part":5} 0 1 1 0 0 0 0 +{"d_part":3,"e_part":4} 0 2 1 0 0 0 0 PREHOOK: query: show partitions ice2 PREHOOK: type: SHOWPARTITIONS PREHOOK: Input: default@ice2 @@ -164,9 +180,9 @@ POSTHOOK: query: select * from default.ice2.partitions order by `partition` POSTHOOK: type: QUERY POSTHOOK: Input: default@ice2 POSTHOOK: Output: hdfs://### HDFS PATH ### -{"d_part":10,"e_part":5,"c":null} 2 1 0 -{"d_part":2,"e_part":5,"c":null} 1 1 0 -{"d_part":3,"e_part":4,"c":null} 2 1 0 +{"d_part":10,"e_part":5,"c":null} 0 2 1 0 0 0 0 +{"d_part":2,"e_part":5,"c":null} 0 1 1 0 0 0 0 +{"d_part":3,"e_part":4,"c":null} 0 2 1 0 0 0 0 PREHOOK: query: show partitions ice2 PREHOOK: type: SHOWPARTITIONS PREHOOK: Input: default@ice2 @@ -176,11 +192,13 @@ POSTHOOK: Input: default@ice2 spec-id=0/d_part=10/e_part=5 spec-id=0/d_part=2/e_part=5 spec-id=0/d_part=3/e_part=4 -PREHOOK: query: insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 3, 2, 5), ('aa', 1, 4, 10, 5), ('aa', 1, 5, 10, 5) +PREHOOK: query: insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 3, 2, 5), ('aa', 1, 4, 10, 5), ('aa', 1, 5, +10, 5) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@ice2 -POSTHOOK: query: insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 3, 2, 5), ('aa', 1, 4, 10, 5), ('aa', 1, 5, 10, 5) +POSTHOOK: query: insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 3, 2, 5), ('aa', 1, 4, 10, 5), ('aa', 1, 5, +10, 5) POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@ice2 @@ -192,13 +210,13 @@ POSTHOOK: query: select * from default.ice2.partitions order by `partition` POSTHOOK: type: QUERY POSTHOOK: Input: default@ice2 POSTHOOK: Output: hdfs://### HDFS PATH ### -{"d_part":10,"e_part":5,"c":null} 2 1 0 -{"d_part":2,"e_part":5,"c":null} 1 1 0 -{"d_part":3,"e_part":4,"c":null} 2 1 0 -{"d_part":null,"e_part":null,"c":2} 2 1 1 -{"d_part":null,"e_part":null,"c":3} 1 1 1 -{"d_part":null,"e_part":null,"c":4} 1 1 1 -{"d_part":null,"e_part":null,"c":5} 1 1 1 +{"d_part":10,"e_part":5,"c":null} 0 2 1 0 0 0 0 +{"d_part":2,"e_part":5,"c":null} 0 1 1 0 0 0 0 +{"d_part":3,"e_part":4,"c":null} 0 2 1 0 0 0 0 +{"d_part":null,"e_part":null,"c":2} 1 2 1 0 0 0 0 +{"d_part":null,"e_part":null,"c":3} 1 1 1 0 0 0 0 +{"d_part":null,"e_part":null,"c":4} 1 1 1 0 0 0 0 +{"d_part":null,"e_part":null,"c":5} 1 1 1 0 0 0 0 PREHOOK: query: show partitions ice2 PREHOOK: type: SHOWPARTITIONS PREHOOK: Input: default@ice2