Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.Optional;
import org.apache.iceberg.AssertHelpers;
import org.apache.iceberg.BaseMetastoreTableOperations;
import org.apache.iceberg.BaseTable;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DataFiles;
import org.apache.iceberg.PartitionSpec;
Expand All @@ -34,10 +35,12 @@
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.exceptions.AlreadyExistsException;
import org.apache.iceberg.exceptions.NoSuchNamespaceException;
import org.apache.iceberg.exceptions.NoSuchTableException;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.types.Types;
import org.junit.Assert;
Expand Down Expand Up @@ -364,4 +367,75 @@ public void testColumnCommentsAndParameters() {
);
Assert.assertEquals("Columns do not match", expectedColumns, actualColumns);
}

@Test
public void testRegisterTable() {
String namespace = createNamespace();
String tableName = getRandomName();
createTable(namespace, tableName);
Table table = glueCatalog.loadTable(TableIdentifier.of(namespace, tableName));
DataFile dataFile = DataFiles.builder(partitionSpec)
.withPath("/path/to/data-a.parquet")
.withFileSizeInBytes(10)
.withRecordCount(1)
.build();
table.newAppend().appendFile(dataFile).commit();
table.refresh();
long v1SnapshotId = table.currentSnapshot().snapshotId();
String v1MetadataLocation = ((BaseTable) table).operations().current().metadataFileLocation();
table.newDelete().deleteFile(dataFile).commit();
table.refresh();
Comment on lines +383 to +387
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can avoid the refresh calls.


String registeredTableName = getRandomName();
Table registeredTable = glueCatalog.registerTable(
TableIdentifier.of(namespace, registeredTableName), v1MetadataLocation);

Assert.assertEquals("registered table should have v1 snapshot ID",
v1SnapshotId, registeredTable.currentSnapshot().snapshotId());

Assert.assertNotEquals("registered table should have a different snapshot ID from the original table",
table.currentSnapshot().snapshotId(), registeredTable.currentSnapshot().snapshotId());

Assert.assertEquals("Registered table should have 1 data file",
1, Lists.newArrayList(registeredTable.newScan().planFiles()).size());

// check table in Glue
GetTableResponse response = glue.getTable(GetTableRequest.builder()
.databaseName(namespace).name(registeredTableName).build());
Assert.assertEquals("Table type should be set",
GlueTableOperations.GLUE_EXTERNAL_TABLE_TYPE, response.table().tableType());
Assert.assertNull("Storage descriptor should be empty", response.table().storageDescriptor());
Assert.assertFalse("Partition spec should be empty", response.table().hasPartitionKeys());
Assert.assertEquals("Iceberg table type should be set",
BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE.toLowerCase(Locale.ENGLISH),
response.table().parameters().get(BaseMetastoreTableOperations.TABLE_TYPE_PROP));
Assert.assertEquals("Metadata file location should be set",
v1MetadataLocation, response.table().parameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP));
}

@Test
public void testRegisterTableNamespaceNotFound() {
String namespace = createNamespace();
String tableName = getRandomName();
createTable(namespace, tableName);
Table table = glueCatalog.loadTable(TableIdentifier.of(namespace, tableName));
String metadataLocation = ((BaseTable) table).operations().current().metadataFileLocation();
AssertHelpers.assertThrows("Should fail to register to an unknown namespace",
NoSuchNamespaceException.class,
"not found in Glue",
() -> glueCatalog.registerTable(TableIdentifier.of(getRandomName(), getRandomName()), metadataLocation));
}

@Test
public void testRegisterTableAlreadyExists() {
String namespace = createNamespace();
String tableName = getRandomName();
createTable(namespace, tableName);
Table table = glueCatalog.loadTable(TableIdentifier.of(namespace, tableName));
String metadataLocation = ((BaseTable) table).operations().current().metadataFileLocation();
AssertHelpers.assertThrows("Should fail to register to an existing Glue table",
AlreadyExistsException.class,
"already exists in Glue",
() -> glueCatalog.registerTable(TableIdentifier.of(namespace, tableName), metadataLocation));
}
}
35 changes: 35 additions & 0 deletions aws/src/main/java/org/apache/iceberg/aws/glue/GlueCatalog.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.io.Closeable;
import java.io.IOException;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
Expand All @@ -48,6 +49,7 @@
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.util.LockManagers;
Expand Down Expand Up @@ -431,6 +433,39 @@ protected boolean isValidIdentifier(TableIdentifier tableIdentifier) {
IcebergToGlueConverter.isValidTableName(tableIdentifier.name());
}

@Override
public org.apache.iceberg.Table registerTable(TableIdentifier identifier, String metadataFileLocation) {
Copy link
Contributor

@rdblue rdblue Feb 13, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to do this more generically in BaseMetastoreCatalog similar to a regular create?

    public Table registerTable(TableIdentifier identifier, String metadataFileLocation) {
      TableOperations ops = newTableOps(identifier);
      if (ops.current() != null) {
        throw new AlreadyExistsException("Table already exists: %s", identifier);
      }

      FileIO io = ops.io();
      TableMetadata metadata = TableMetadataParser.read(io, metadataFileLocation);

      try {
        // use temporary ops to pick up the table metadata settings
        ops.temp(metadata).commit(null, metadata);
      } catch (CommitFailedException ignored) {
        throw new AlreadyExistsException("Table was created concurrently: %s", identifier);
      }

      return new BaseTable(ops, fullTableName(name(), identifier));
    }

That will rewrite the metadata file rather than using it directly, but it seems like it would work in most cases.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jackye1995, what do you think about this suggestion?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rdblue I have considered this particular suggestion and implemented it under #5037, please do have a look at the implementation...

Preconditions.checkArgument(isValidIdentifier(identifier),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these can likely be generalized to the base metastore class, will do that after we have some other implementations to see how much common code we can extract

Copy link
Contributor

@singhpk234 singhpk234 Feb 12, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1, looks like we are missing pre-conditions on metadataFileLocation in HiveCatalog. CodePointer

Adding it at BaseMetaStoreClass will unify this stuff.

"Table identifier to register is invalid: " + identifier);
Preconditions.checkArgument(metadataFileLocation != null && !metadataFileLocation.isEmpty(),
"Cannot register an empty metadata file location as a table");

Map<String, String> tableParameters = ImmutableMap.of(
BaseMetastoreTableOperations.TABLE_TYPE_PROP,
BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE.toLowerCase(Locale.ENGLISH),
BaseMetastoreTableOperations.METADATA_LOCATION_PROP,
metadataFileLocation);

TableInput tableInput = TableInput.builder()
.name(IcebergToGlueConverter.getTableName(identifier))
.tableType(GlueTableOperations.GLUE_EXTERNAL_TABLE_TYPE)
.parameters(tableParameters)
.build();

try {
glue.createTable(CreateTableRequest.builder()
.databaseName(IcebergToGlueConverter.getDatabaseName(identifier))
.tableInput(tableInput)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we save awsProperties.glueCatalogId() as well?

.build());
} catch (software.amazon.awssdk.services.glue.model.AlreadyExistsException e) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need the full class path here? Is there another AlreadyExistsException?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, this is rethrown as Iceberg's AlreadyExistsException

throw new AlreadyExistsException(e, "Table %s already exists in Glue", identifier);
} catch (EntityNotFoundException e) {
throw new NoSuchNamespaceException(e, "Namespace %s is not found in Glue", identifier.namespace());
}

return loadTable(identifier);
}

@Override
public String name() {
return catalogName;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class GlueTableOperations extends BaseMetastoreTableOperations {

// same as org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE
// more details: https://docs.aws.amazon.com/glue/latest/webapi/API_TableInput.html
private static final String GLUE_EXTERNAL_TABLE_TYPE = "EXTERNAL_TABLE";
static final String GLUE_EXTERNAL_TABLE_TYPE = "EXTERNAL_TABLE";

private final GlueClient glue;
private final AwsProperties awsProperties;
Expand Down
26 changes: 26 additions & 0 deletions aws/src/test/java/org/apache/iceberg/aws/glue/TestGlueCatalog.java
Original file line number Diff line number Diff line change
Expand Up @@ -455,4 +455,30 @@ public void testRemoveProperties() {
.when(glue).updateDatabase(Mockito.any(UpdateDatabaseRequest.class));
glueCatalog.removeProperties(Namespace.of("db1"), Sets.newHashSet("key"));
}

@Test
public void testRegisterTableInvalidIdentifier() {
AssertHelpers.assertThrows("Should not allow registering table with multi-level namespace",
IllegalArgumentException.class,
"Table identifier to register is invalid",
() -> glueCatalog.registerTable(TableIdentifier.of("a", "b", "name"), "s3://path"));

AssertHelpers.assertThrows("Should not allow registering table with unsupported table name",
IllegalArgumentException.class,
"Table identifier to register is invalid",
() -> glueCatalog.registerTable(TableIdentifier.of("a", "$name"), "s3://path"));
}

@Test
public void testRegisterTableWithBadLocation() {
AssertHelpers.assertThrows("Should not allow registering null location",
IllegalArgumentException.class,
"Cannot register an empty metadata file location as a table",
() -> glueCatalog.registerTable(TableIdentifier.of("a", "name"), null));

AssertHelpers.assertThrows("Should not allow registering empty location",
IllegalArgumentException.class,
"Cannot register an empty metadata file location as a table",
() -> glueCatalog.registerTable(TableIdentifier.of("a", "name"), ""));
}
}