diff --git a/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java b/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java index 4dade77a3ac5..437aaa6fa1d7 100644 --- a/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java +++ b/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java @@ -24,6 +24,7 @@ import java.util.Optional; import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.BaseMetastoreTableOperations; +import org.apache.iceberg.BaseTable; import org.apache.iceberg.DataFile; import org.apache.iceberg.DataFiles; import org.apache.iceberg.PartitionSpec; @@ -34,10 +35,12 @@ import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.NoSuchNamespaceException; import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.types.Types; import org.junit.Assert; @@ -364,4 +367,75 @@ public void testColumnCommentsAndParameters() { ); Assert.assertEquals("Columns do not match", expectedColumns, actualColumns); } + + @Test + public void testRegisterTable() { + String namespace = createNamespace(); + String tableName = getRandomName(); + createTable(namespace, tableName); + Table table = glueCatalog.loadTable(TableIdentifier.of(namespace, tableName)); + DataFile dataFile = DataFiles.builder(partitionSpec) + .withPath("/path/to/data-a.parquet") + .withFileSizeInBytes(10) + .withRecordCount(1) + .build(); + table.newAppend().appendFile(dataFile).commit(); + table.refresh(); + long v1SnapshotId = table.currentSnapshot().snapshotId(); + String v1MetadataLocation = ((BaseTable) table).operations().current().metadataFileLocation(); + table.newDelete().deleteFile(dataFile).commit(); + table.refresh(); + + String registeredTableName = getRandomName(); + Table registeredTable = glueCatalog.registerTable( + TableIdentifier.of(namespace, registeredTableName), v1MetadataLocation); + + Assert.assertEquals("registered table should have v1 snapshot ID", + v1SnapshotId, registeredTable.currentSnapshot().snapshotId()); + + Assert.assertNotEquals("registered table should have a different snapshot ID from the original table", + table.currentSnapshot().snapshotId(), registeredTable.currentSnapshot().snapshotId()); + + Assert.assertEquals("Registered table should have 1 data file", + 1, Lists.newArrayList(registeredTable.newScan().planFiles()).size()); + + // check table in Glue + GetTableResponse response = glue.getTable(GetTableRequest.builder() + .databaseName(namespace).name(registeredTableName).build()); + Assert.assertEquals("Table type should be set", + GlueTableOperations.GLUE_EXTERNAL_TABLE_TYPE, response.table().tableType()); + Assert.assertNull("Storage descriptor should be empty", response.table().storageDescriptor()); + Assert.assertFalse("Partition spec should be empty", response.table().hasPartitionKeys()); + Assert.assertEquals("Iceberg table type should be set", + BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE.toLowerCase(Locale.ENGLISH), + response.table().parameters().get(BaseMetastoreTableOperations.TABLE_TYPE_PROP)); + Assert.assertEquals("Metadata file location should be set", + v1MetadataLocation, response.table().parameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP)); + } + + @Test + public void testRegisterTableNamespaceNotFound() { + String namespace = createNamespace(); + String tableName = getRandomName(); + createTable(namespace, tableName); + Table table = glueCatalog.loadTable(TableIdentifier.of(namespace, tableName)); + String metadataLocation = ((BaseTable) table).operations().current().metadataFileLocation(); + AssertHelpers.assertThrows("Should fail to register to an unknown namespace", + NoSuchNamespaceException.class, + "not found in Glue", + () -> glueCatalog.registerTable(TableIdentifier.of(getRandomName(), getRandomName()), metadataLocation)); + } + + @Test + public void testRegisterTableAlreadyExists() { + String namespace = createNamespace(); + String tableName = getRandomName(); + createTable(namespace, tableName); + Table table = glueCatalog.loadTable(TableIdentifier.of(namespace, tableName)); + String metadataLocation = ((BaseTable) table).operations().current().metadataFileLocation(); + AssertHelpers.assertThrows("Should fail to register to an existing Glue table", + AlreadyExistsException.class, + "already exists in Glue", + () -> glueCatalog.registerTable(TableIdentifier.of(namespace, tableName), metadataLocation)); + } } diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/GlueCatalog.java b/aws/src/main/java/org/apache/iceberg/aws/glue/GlueCatalog.java index f878ecf2412b..1a39190260bf 100644 --- a/aws/src/main/java/org/apache/iceberg/aws/glue/GlueCatalog.java +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/GlueCatalog.java @@ -22,6 +22,7 @@ import java.io.Closeable; import java.io.IOException; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -48,6 +49,7 @@ import org.apache.iceberg.io.FileIO; import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.util.LockManagers; @@ -431,6 +433,39 @@ protected boolean isValidIdentifier(TableIdentifier tableIdentifier) { IcebergToGlueConverter.isValidTableName(tableIdentifier.name()); } + @Override + public org.apache.iceberg.Table registerTable(TableIdentifier identifier, String metadataFileLocation) { + Preconditions.checkArgument(isValidIdentifier(identifier), + "Table identifier to register is invalid: " + identifier); + Preconditions.checkArgument(metadataFileLocation != null && !metadataFileLocation.isEmpty(), + "Cannot register an empty metadata file location as a table"); + + Map tableParameters = ImmutableMap.of( + BaseMetastoreTableOperations.TABLE_TYPE_PROP, + BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE.toLowerCase(Locale.ENGLISH), + BaseMetastoreTableOperations.METADATA_LOCATION_PROP, + metadataFileLocation); + + TableInput tableInput = TableInput.builder() + .name(IcebergToGlueConverter.getTableName(identifier)) + .tableType(GlueTableOperations.GLUE_EXTERNAL_TABLE_TYPE) + .parameters(tableParameters) + .build(); + + try { + glue.createTable(CreateTableRequest.builder() + .databaseName(IcebergToGlueConverter.getDatabaseName(identifier)) + .tableInput(tableInput) + .build()); + } catch (software.amazon.awssdk.services.glue.model.AlreadyExistsException e) { + throw new AlreadyExistsException(e, "Table %s already exists in Glue", identifier); + } catch (EntityNotFoundException e) { + throw new NoSuchNamespaceException(e, "Namespace %s is not found in Glue", identifier.namespace()); + } + + return loadTable(identifier); + } + @Override public String name() { return catalogName; diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/GlueTableOperations.java b/aws/src/main/java/org/apache/iceberg/aws/glue/GlueTableOperations.java index bd239d7afd3b..2b1d60d68a09 100644 --- a/aws/src/main/java/org/apache/iceberg/aws/glue/GlueTableOperations.java +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/GlueTableOperations.java @@ -52,7 +52,7 @@ class GlueTableOperations extends BaseMetastoreTableOperations { // same as org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE // more details: https://docs.aws.amazon.com/glue/latest/webapi/API_TableInput.html - private static final String GLUE_EXTERNAL_TABLE_TYPE = "EXTERNAL_TABLE"; + static final String GLUE_EXTERNAL_TABLE_TYPE = "EXTERNAL_TABLE"; private final GlueClient glue; private final AwsProperties awsProperties; diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/TestGlueCatalog.java b/aws/src/test/java/org/apache/iceberg/aws/glue/TestGlueCatalog.java index b607ca6b3b23..b2c0e3f5f6fb 100644 --- a/aws/src/test/java/org/apache/iceberg/aws/glue/TestGlueCatalog.java +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/TestGlueCatalog.java @@ -455,4 +455,30 @@ public void testRemoveProperties() { .when(glue).updateDatabase(Mockito.any(UpdateDatabaseRequest.class)); glueCatalog.removeProperties(Namespace.of("db1"), Sets.newHashSet("key")); } + + @Test + public void testRegisterTableInvalidIdentifier() { + AssertHelpers.assertThrows("Should not allow registering table with multi-level namespace", + IllegalArgumentException.class, + "Table identifier to register is invalid", + () -> glueCatalog.registerTable(TableIdentifier.of("a", "b", "name"), "s3://path")); + + AssertHelpers.assertThrows("Should not allow registering table with unsupported table name", + IllegalArgumentException.class, + "Table identifier to register is invalid", + () -> glueCatalog.registerTable(TableIdentifier.of("a", "$name"), "s3://path")); + } + + @Test + public void testRegisterTableWithBadLocation() { + AssertHelpers.assertThrows("Should not allow registering null location", + IllegalArgumentException.class, + "Cannot register an empty metadata file location as a table", + () -> glueCatalog.registerTable(TableIdentifier.of("a", "name"), null)); + + AssertHelpers.assertThrows("Should not allow registering empty location", + IllegalArgumentException.class, + "Cannot register an empty metadata file location as a table", + () -> glueCatalog.registerTable(TableIdentifier.of("a", "name"), "")); + } }