From b8fcd8765df38f7ed1edb0bc5c0431afc1b2ca65 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Fri, 11 Feb 2022 14:14:42 -0800 Subject: [PATCH 1/4] AWS: support registerTable in GlueCatalog --- .../aws/glue/TestGlueCatalogTable.java | 74 +++++++++++++++++++ .../apache/iceberg/aws/glue/GlueCatalog.java | 31 ++++++++ .../iceberg/aws/glue/GlueTableOperations.java | 2 +- .../iceberg/aws/glue/TestGlueCatalog.java | 26 +++++++ 4 files changed, 132 insertions(+), 1 deletion(-) diff --git a/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java b/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java index 4dade77a3ac5..f2a64942aa82 100644 --- a/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java +++ b/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java @@ -24,6 +24,7 @@ import java.util.Optional; import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.BaseMetastoreTableOperations; +import org.apache.iceberg.BaseTable; import org.apache.iceberg.DataFile; import org.apache.iceberg.DataFiles; import org.apache.iceberg.PartitionSpec; @@ -34,12 +35,14 @@ import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.NoSuchNamespaceException; import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.types.Types; +import org.assertj.core.util.Lists; import org.junit.Assert; import org.junit.Test; import software.amazon.awssdk.services.glue.model.Column; @@ -364,4 +367,75 @@ public void testColumnCommentsAndParameters() { ); Assert.assertEquals("Columns do not match", expectedColumns, actualColumns); } + + @Test + public void testRegisterTable() { + String namespace = createNamespace(); + String tableName = getRandomName(); + createTable(namespace, tableName); + Table table = glueCatalog.loadTable(TableIdentifier.of(namespace, tableName)); + DataFile dataFile = DataFiles.builder(partitionSpec) + .withPath("/path/to/data-a.parquet") + .withFileSizeInBytes(10) + .withRecordCount(1) + .build(); + table.newAppend().appendFile(dataFile).commit(); + table.refresh(); + long v1SnapshotId = table.currentSnapshot().snapshotId(); + String v1MetadataLocation = ((BaseTable) table).operations().current().metadataFileLocation(); + table.newDelete().deleteFile(dataFile).commit(); + table.refresh(); + + String registeredTableName = getRandomName(); + Table registeredTable = glueCatalog.registerTable( + TableIdentifier.of(namespace, registeredTableName), v1MetadataLocation); + + Assert.assertEquals("registered table should have v1 snapshot ID", + v1SnapshotId, registeredTable.currentSnapshot().snapshotId()); + + Assert.assertNotEquals("registered table should have a different snapshot ID from the original table", + table.currentSnapshot().snapshotId(), registeredTable.currentSnapshot().snapshotId()); + + Assert.assertEquals("Registered table should have 1 data file", + 1, Lists.newArrayList(registeredTable.newScan().planFiles()).size()); + + // check table in Glue + GetTableResponse response = glue.getTable(GetTableRequest.builder() + .databaseName(namespace).name(registeredTableName).build()); + Assert.assertEquals("Table type should be set", + GlueTableOperations.GLUE_EXTERNAL_TABLE_TYPE, response.table().tableType()); + Assert.assertNull("Storage descriptor should be empty", response.table().storageDescriptor()); + Assert.assertTrue("Partition spec should be empty", response.table().partitionKeys().isEmpty()); + Assert.assertEquals("Iceberg table type should be set", + BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE.toLowerCase(Locale.ENGLISH), + response.table().parameters().get(BaseMetastoreTableOperations.TABLE_TYPE_PROP)); + Assert.assertEquals("Metadata file location should be set", + v1MetadataLocation, response.table().parameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP)); + } + + @Test + public void testRegisterTableNamespaceNotFound() { + String namespace = createNamespace(); + String tableName = getRandomName(); + createTable(namespace, tableName); + Table table = glueCatalog.loadTable(TableIdentifier.of(namespace, tableName)); + String metadataLocation = ((BaseTable) table).operations().current().metadataFileLocation(); + AssertHelpers.assertThrows("Should fail to register to an unknown namespace", + NoSuchNamespaceException.class, + "not found in Glue", + () -> glueCatalog.registerTable(TableIdentifier.of(getRandomName(), getRandomName()), metadataLocation)); + } + + @Test + public void testRegisterTableAlreadyExists() { + String namespace = createNamespace(); + String tableName = getRandomName(); + createTable(namespace, tableName); + Table table = glueCatalog.loadTable(TableIdentifier.of(namespace, tableName)); + String metadataLocation = ((BaseTable) table).operations().current().metadataFileLocation(); + AssertHelpers.assertThrows("Should fail to register to an existing Glue table", + AlreadyExistsException.class, + "already exists in Glue", + () -> glueCatalog.registerTable(TableIdentifier.of(namespace, tableName), metadataLocation)); + } } diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/GlueCatalog.java b/aws/src/main/java/org/apache/iceberg/aws/glue/GlueCatalog.java index f878ecf2412b..307dc9354110 100644 --- a/aws/src/main/java/org/apache/iceberg/aws/glue/GlueCatalog.java +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/GlueCatalog.java @@ -22,6 +22,7 @@ import java.io.Closeable; import java.io.IOException; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -48,6 +49,7 @@ import org.apache.iceberg.io.FileIO; import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.util.LockManagers; @@ -73,6 +75,8 @@ import software.amazon.awssdk.services.glue.model.TableInput; import software.amazon.awssdk.services.glue.model.UpdateDatabaseRequest; +import static org.apache.iceberg.aws.glue.GlueTableOperations.GLUE_EXTERNAL_TABLE_TYPE; + public class GlueCatalog extends BaseMetastoreCatalog implements Closeable, SupportsNamespaces, Configurable { @@ -431,6 +435,33 @@ protected boolean isValidIdentifier(TableIdentifier tableIdentifier) { IcebergToGlueConverter.isValidTableName(tableIdentifier.name()); } + @Override + public org.apache.iceberg.Table registerTable(TableIdentifier identifier, String metadataFileLocation) { + Preconditions.checkArgument(isValidIdentifier(identifier), "Table identifier to register is invalid: " + identifier); + Preconditions.checkArgument(metadataFileLocation != null && !metadataFileLocation.isEmpty(), + "Cannot register an empty metadata file location as a table"); + try { + glue.createTable(CreateTableRequest.builder() + .databaseName(IcebergToGlueConverter.getDatabaseName(identifier)) + .tableInput(TableInput.builder() + .name(IcebergToGlueConverter.getTableName(identifier)) + .tableType(GLUE_EXTERNAL_TABLE_TYPE) + .parameters(ImmutableMap.of( + BaseMetastoreTableOperations.TABLE_TYPE_PROP, + BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE.toLowerCase(Locale.ENGLISH), + BaseMetastoreTableOperations.METADATA_LOCATION_PROP, + metadataFileLocation)) + .build()) + .build()); + } catch (software.amazon.awssdk.services.glue.model.AlreadyExistsException e) { + throw new AlreadyExistsException(e, "Table %s already exists in Glue", identifier); + } catch (EntityNotFoundException e) { + throw new NoSuchNamespaceException(e, "Namespace %s is not found in Glue", identifier.namespace()); + } + + return loadTable(identifier); + } + @Override public String name() { return catalogName; diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/GlueTableOperations.java b/aws/src/main/java/org/apache/iceberg/aws/glue/GlueTableOperations.java index bd239d7afd3b..2b1d60d68a09 100644 --- a/aws/src/main/java/org/apache/iceberg/aws/glue/GlueTableOperations.java +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/GlueTableOperations.java @@ -52,7 +52,7 @@ class GlueTableOperations extends BaseMetastoreTableOperations { // same as org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE // more details: https://docs.aws.amazon.com/glue/latest/webapi/API_TableInput.html - private static final String GLUE_EXTERNAL_TABLE_TYPE = "EXTERNAL_TABLE"; + static final String GLUE_EXTERNAL_TABLE_TYPE = "EXTERNAL_TABLE"; private final GlueClient glue; private final AwsProperties awsProperties; diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/TestGlueCatalog.java b/aws/src/test/java/org/apache/iceberg/aws/glue/TestGlueCatalog.java index b607ca6b3b23..b2c0e3f5f6fb 100644 --- a/aws/src/test/java/org/apache/iceberg/aws/glue/TestGlueCatalog.java +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/TestGlueCatalog.java @@ -455,4 +455,30 @@ public void testRemoveProperties() { .when(glue).updateDatabase(Mockito.any(UpdateDatabaseRequest.class)); glueCatalog.removeProperties(Namespace.of("db1"), Sets.newHashSet("key")); } + + @Test + public void testRegisterTableInvalidIdentifier() { + AssertHelpers.assertThrows("Should not allow registering table with multi-level namespace", + IllegalArgumentException.class, + "Table identifier to register is invalid", + () -> glueCatalog.registerTable(TableIdentifier.of("a", "b", "name"), "s3://path")); + + AssertHelpers.assertThrows("Should not allow registering table with unsupported table name", + IllegalArgumentException.class, + "Table identifier to register is invalid", + () -> glueCatalog.registerTable(TableIdentifier.of("a", "$name"), "s3://path")); + } + + @Test + public void testRegisterTableWithBadLocation() { + AssertHelpers.assertThrows("Should not allow registering null location", + IllegalArgumentException.class, + "Cannot register an empty metadata file location as a table", + () -> glueCatalog.registerTable(TableIdentifier.of("a", "name"), null)); + + AssertHelpers.assertThrows("Should not allow registering empty location", + IllegalArgumentException.class, + "Cannot register an empty metadata file location as a table", + () -> glueCatalog.registerTable(TableIdentifier.of("a", "name"), "")); + } } From 2b43550a5050fb45251cfaf5b59e2bb3dabd7629 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Fri, 11 Feb 2022 14:51:15 -0800 Subject: [PATCH 2/4] fix checkstyle --- .../org/apache/iceberg/aws/glue/TestGlueCatalogTable.java | 2 +- .../main/java/org/apache/iceberg/aws/glue/GlueCatalog.java | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java b/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java index f2a64942aa82..427d47c210a7 100644 --- a/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java +++ b/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java @@ -40,9 +40,9 @@ import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.types.Types; -import org.assertj.core.util.Lists; import org.junit.Assert; import org.junit.Test; import software.amazon.awssdk.services.glue.model.Column; diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/GlueCatalog.java b/aws/src/main/java/org/apache/iceberg/aws/glue/GlueCatalog.java index 307dc9354110..396b419710e7 100644 --- a/aws/src/main/java/org/apache/iceberg/aws/glue/GlueCatalog.java +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/GlueCatalog.java @@ -75,8 +75,6 @@ import software.amazon.awssdk.services.glue.model.TableInput; import software.amazon.awssdk.services.glue.model.UpdateDatabaseRequest; -import static org.apache.iceberg.aws.glue.GlueTableOperations.GLUE_EXTERNAL_TABLE_TYPE; - public class GlueCatalog extends BaseMetastoreCatalog implements Closeable, SupportsNamespaces, Configurable { @@ -437,7 +435,8 @@ protected boolean isValidIdentifier(TableIdentifier tableIdentifier) { @Override public org.apache.iceberg.Table registerTable(TableIdentifier identifier, String metadataFileLocation) { - Preconditions.checkArgument(isValidIdentifier(identifier), "Table identifier to register is invalid: " + identifier); + Preconditions.checkArgument(isValidIdentifier(identifier), + "Table identifier to register is invalid: " + identifier); Preconditions.checkArgument(metadataFileLocation != null && !metadataFileLocation.isEmpty(), "Cannot register an empty metadata file location as a table"); try { @@ -445,7 +444,7 @@ public org.apache.iceberg.Table registerTable(TableIdentifier identifier, String .databaseName(IcebergToGlueConverter.getDatabaseName(identifier)) .tableInput(TableInput.builder() .name(IcebergToGlueConverter.getTableName(identifier)) - .tableType(GLUE_EXTERNAL_TABLE_TYPE) + .tableType(GlueTableOperations.GLUE_EXTERNAL_TABLE_TYPE) .parameters(ImmutableMap.of( BaseMetastoreTableOperations.TABLE_TYPE_PROP, BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE.toLowerCase(Locale.ENGLISH), From 3406987e2b7de14a32c1c36b6426f8fe959c1dde Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Fri, 11 Feb 2022 16:13:16 -0800 Subject: [PATCH 3/4] address comments --- .../apache/iceberg/aws/glue/GlueCatalog.java | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/GlueCatalog.java b/aws/src/main/java/org/apache/iceberg/aws/glue/GlueCatalog.java index 396b419710e7..1a39190260bf 100644 --- a/aws/src/main/java/org/apache/iceberg/aws/glue/GlueCatalog.java +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/GlueCatalog.java @@ -439,18 +439,23 @@ public org.apache.iceberg.Table registerTable(TableIdentifier identifier, String "Table identifier to register is invalid: " + identifier); Preconditions.checkArgument(metadataFileLocation != null && !metadataFileLocation.isEmpty(), "Cannot register an empty metadata file location as a table"); + + Map tableParameters = ImmutableMap.of( + BaseMetastoreTableOperations.TABLE_TYPE_PROP, + BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE.toLowerCase(Locale.ENGLISH), + BaseMetastoreTableOperations.METADATA_LOCATION_PROP, + metadataFileLocation); + + TableInput tableInput = TableInput.builder() + .name(IcebergToGlueConverter.getTableName(identifier)) + .tableType(GlueTableOperations.GLUE_EXTERNAL_TABLE_TYPE) + .parameters(tableParameters) + .build(); + try { glue.createTable(CreateTableRequest.builder() .databaseName(IcebergToGlueConverter.getDatabaseName(identifier)) - .tableInput(TableInput.builder() - .name(IcebergToGlueConverter.getTableName(identifier)) - .tableType(GlueTableOperations.GLUE_EXTERNAL_TABLE_TYPE) - .parameters(ImmutableMap.of( - BaseMetastoreTableOperations.TABLE_TYPE_PROP, - BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE.toLowerCase(Locale.ENGLISH), - BaseMetastoreTableOperations.METADATA_LOCATION_PROP, - metadataFileLocation)) - .build()) + .tableInput(tableInput) .build()); } catch (software.amazon.awssdk.services.glue.model.AlreadyExistsException e) { throw new AlreadyExistsException(e, "Table %s already exists in Glue", identifier); From 439b4747514846b71852aca7aa84617225ad05c2 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Sat, 12 Feb 2022 13:08:02 -0800 Subject: [PATCH 4/4] use hasPartitionKeys for test --- .../java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java b/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java index 427d47c210a7..437aaa6fa1d7 100644 --- a/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java +++ b/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java @@ -405,7 +405,7 @@ public void testRegisterTable() { Assert.assertEquals("Table type should be set", GlueTableOperations.GLUE_EXTERNAL_TABLE_TYPE, response.table().tableType()); Assert.assertNull("Storage descriptor should be empty", response.table().storageDescriptor()); - Assert.assertTrue("Partition spec should be empty", response.table().partitionKeys().isEmpty()); + Assert.assertFalse("Partition spec should be empty", response.table().hasPartitionKeys()); Assert.assertEquals("Iceberg table type should be set", BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE.toLowerCase(Locale.ENGLISH), response.table().parameters().get(BaseMetastoreTableOperations.TABLE_TYPE_PROP));