Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@
import java.util.stream.IntStream;
import org.apache.iceberg.AssertHelpers;
import org.apache.iceberg.CatalogProperties;
import org.apache.iceberg.HasTableOperations;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.aws.AwsClientFactories;
import org.apache.iceberg.aws.AwsClientFactory;
import org.apache.iceberg.aws.AwsIntegTestUtil;
Expand All @@ -42,6 +44,7 @@
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.types.Types;
import org.assertj.core.api.Assertions;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
Expand Down Expand Up @@ -295,6 +298,37 @@ public void testDropNamespace() {
Assert.assertFalse("namespace must not exist", response.hasItem());
}

@Test
public void testRegisterTable() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This pair of tests is repeated (in a very similar way) across multiple catalogs. Can those be centralized somewhere? CatalogTests maybe?

Namespace namespace = Namespace.of(genRandomName());
catalog.createNamespace(namespace);
TableIdentifier identifier = TableIdentifier.of(namespace, catalogTableName);
catalog.createTable(identifier, SCHEMA);
Table registeringTable = catalog.loadTable(identifier);
Assertions.assertThat(catalog.dropTable(identifier, false)).isTrue();
TableOperations ops = ((HasTableOperations) registeringTable).operations();
String metadataLocation = ((DynamoDbTableOperations) ops).currentMetadataLocation();
Assertions.assertThat(catalog.registerTable(identifier, metadataLocation)).isNotNull();
Assertions.assertThat(catalog.loadTable(identifier)).isNotNull();
Assertions.assertThat(catalog.dropTable(identifier, true)).isTrue();
Assertions.assertThat(catalog.dropNamespace(namespace)).isTrue();
}

@Test
public void testRegisterExistingTable() {
Namespace namespace = Namespace.of(genRandomName());
catalog.createNamespace(namespace);
TableIdentifier identifier = TableIdentifier.of(namespace, catalogTableName);
catalog.createTable(identifier, SCHEMA);
Table registeringTable = catalog.loadTable(identifier);
TableOperations ops = ((HasTableOperations) registeringTable).operations();
String metadataLocation = ((DynamoDbTableOperations) ops).currentMetadataLocation();
Assertions.assertThatThrownBy(() -> catalog.registerTable(identifier, metadataLocation))
.isInstanceOf(AlreadyExistsException.class);
Assertions.assertThat(catalog.dropTable(identifier, true)).isTrue();
Assertions.assertThat(catalog.dropNamespace(namespace)).isTrue();
}

private static String genRandomName() {
return UUID.randomUUID().toString().replace("-", "");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.util.stream.Collectors;
import org.apache.iceberg.AssertHelpers;
import org.apache.iceberg.BaseMetastoreTableOperations;
import org.apache.iceberg.BaseTable;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DataFiles;
import org.apache.iceberg.PartitionSpec;
Expand All @@ -42,6 +43,7 @@
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.types.Types.NestedField;
import org.assertj.core.api.Assertions;
import org.junit.Assert;
import org.junit.Test;
import software.amazon.awssdk.services.glue.model.Column;
Expand Down Expand Up @@ -433,4 +435,33 @@ public void testTablePropsDefinedAtCatalogLevel() {
"table-key5",
table.properties().get("key5"));
}

@Test
public void testRegisterTable() {
String namespace = createNamespace();
String tableName = getRandomName();
createTable(namespace, tableName);
TableIdentifier identifier = TableIdentifier.of(namespace, tableName);
Table table = glueCatalog.loadTable(identifier);
String metadataLocation = ((BaseTable) table).operations().current().metadataFileLocation();
Assertions.assertThat(glueCatalog.dropTable(identifier, false)).isTrue();
Assertions.assertThat(glueCatalog.registerTable(identifier, metadataLocation)).isNotNull();
Assertions.assertThat(glueCatalog.loadTable(identifier)).isNotNull();
Assertions.assertThat(glueCatalog.dropTable(identifier, true)).isTrue();
Assertions.assertThat(glueCatalog.dropNamespace(Namespace.of(namespace))).isTrue();
}

@Test
public void testRegisterTableAlreadyExists() {
String namespace = createNamespace();
String tableName = getRandomName();
createTable(namespace, tableName);
TableIdentifier identifier = TableIdentifier.of(namespace, tableName);
Table table = glueCatalog.loadTable(identifier);
String metadataLocation = ((BaseTable) table).operations().current().metadataFileLocation();
Assertions.assertThatThrownBy(() -> glueCatalog.registerTable(identifier, metadataLocation))
.isInstanceOf(AlreadyExistsException.class);
Assertions.assertThat(glueCatalog.dropTable(identifier, true)).isTrue();
Assertions.assertThat(glueCatalog.dropNamespace(Namespace.of(namespace))).isTrue();
}
}
21 changes: 21 additions & 0 deletions core/src/main/java/org/apache/iceberg/BaseMetastoreCatalog.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.iceberg.exceptions.AlreadyExistsException;
import org.apache.iceberg.exceptions.CommitFailedException;
import org.apache.iceberg.exceptions.NoSuchTableException;
import org.apache.iceberg.io.InputFile;
import org.apache.iceberg.relocated.com.google.common.base.MoreObjects;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
Expand Down Expand Up @@ -65,6 +66,26 @@ public Table loadTable(TableIdentifier identifier) {
return result;
}

@Override
public Table registerTable(TableIdentifier identifier, String metadataFileLocation) {
Preconditions.checkArgument(
identifier != null && isValidIdentifier(identifier), "Invalid identifier: %s", identifier);
Preconditions.checkArgument(metadataFileLocation != null && !metadataFileLocation.isEmpty(),
"Cannot register an empty metadata file location as a table");

// Throw an exception if this table already exists in the catalog.
if (tableExists(identifier)) {
throw new AlreadyExistsException("Table already exists: %s", identifier);
}

TableOperations ops = newTableOps(identifier);
InputFile metadataFile = ops.io().newInputFile(metadataFileLocation);
TableMetadata metadata = TableMetadataParser.read(ops.io(), metadataFile);
ops.commit(null, metadata);

return new BaseTable(ops, identifier.toString());
}

@Override
public TableBuilder buildTable(TableIdentifier identifier, Schema schema) {
return new BaseMetastoreCatalogTableBuilder(identifier, schema);
Expand Down
66 changes: 66 additions & 0 deletions core/src/main/java/org/apache/iceberg/CatalogUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,16 @@
package org.apache.iceberg;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.iceberg.catalog.Catalog;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.SupportsNamespaces;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.common.DynClasses;
import org.apache.iceberg.common.DynConstructors;
import org.apache.iceberg.common.DynMethods;
Expand All @@ -33,6 +39,7 @@
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.relocated.com.google.common.base.Joiner;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
import org.apache.iceberg.relocated.com.google.common.collect.MapMaker;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
Expand Down Expand Up @@ -346,4 +353,63 @@ public static void configureHadoopConf(Object maybeConfigurable, Object conf) {

setConf.invoke(conf);
}

/**
* Used to migrate tables from one catalog(source catalog) to another catalog(target catalog).
* Also, the table would be dropped off from the source catalog once the migration is successful.
*
* @param tableIdentifiers a list of tableIdentifiers for the tables required to be migrated,
* if not specified all the tables would be migrated
* @param sourceCatalogProperties Source Catalog Properties
* @param targetCatalogProperties Target Catalog Properties
* @param sourceHadoopConfig Source Catalog Hadoop Configuration
* @param targetHadoopConfig Target Catalog Hadoop Configuration
* @return list of table identifiers for successfully migrated tables
*/
public static List<TableIdentifier> migrateTables(List<TableIdentifier> tableIdentifiers,
Map<String, String> sourceCatalogProperties, Map<String, String> targetCatalogProperties,
Object sourceHadoopConfig, Object targetHadoopConfig) {
if (tableIdentifiers != null) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's leave out the catalog instantiation and configuration here completely. I suspect that users have at least one of these catalogs already handy - and setting up "the same" catalog twice is superfluous.

tableIdentifiers.forEach(tableIdentifier -> Preconditions.checkArgument(
tableIdentifier != null, "Invalid identifier: %s", tableIdentifier));
}
Catalog sourceCatalog;
try {
sourceCatalog = loadCatalog(sourceCatalogProperties.get("catalogImpl"),
sourceCatalogProperties.get("catalogName"), sourceCatalogProperties, sourceHadoopConfig);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(String.format(
"Cannot initialize Source Catalog implementation %s: %s", sourceCatalogProperties.get("catalogImpl"),
e.getMessage()), e);
}
Catalog targetCatalog;
try {
targetCatalog = loadCatalog(targetCatalogProperties.get("catalogImpl"),
targetCatalogProperties.get("catalogName"), targetCatalogProperties, targetHadoopConfig);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(String.format(
"Cannot initialize Target Catalog implementation %s: %s", targetCatalogProperties.get("catalogImpl"),
e.getMessage()), e);
}
List<TableIdentifier> allIdentifiers = tableIdentifiers;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think, this code should probably live in Catalog: A new function like Catalog.registerTableFromCatalog() to "move" a single table to the current catalog. The HadoopCatalog could then do the special-handling in its implementation.

if (tableIdentifiers == null || tableIdentifiers.isEmpty()) {
List<Namespace> namespaces = (sourceCatalog instanceof SupportsNamespaces) ?
((SupportsNamespaces) sourceCatalog).listNamespaces() : ImmutableList.of(Namespace.empty());
allIdentifiers = namespaces.stream().flatMap(ns ->
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect this will run for a very long time, like when there a a lot of tables.

sourceCatalog.listTables(ns).stream()).collect(Collectors.toList());
}
List<TableIdentifier> migratedTableIdentifiers = new ArrayList<TableIdentifier>();
allIdentifiers.forEach(tableIdentifier -> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect this will run for a very long time, like when there a a lot of tables.
If things fail in the meantime, it's hard to resume after the failed table.
I.e. error handling here is tricky.

Not sure whether it is actually possible to properly handle the case when registerTable worked, but dropTable failed - in such a case you'd have the same table in two catalogs.

final Table icebergTable = sourceCatalog.loadTable(tableIdentifier);
TableOperations ops = ((HasTableOperations) icebergTable).operations();
String metadataLocation = ops.current().metadataFileLocation();
targetCatalog.registerTable(tableIdentifier, metadataLocation);
migratedTableIdentifiers.add(tableIdentifier);
if (!(sourceCatalogProperties.get("catalogImpl").equals("org.apache.iceberg.hadoop.HadoopCatalog"))) {
// Hadoop dropTable deletes the table completely even if the purge is false, would update in follow-up PR
sourceCatalog.dropTable(tableIdentifier, false);
}
});
return migratedTableIdentifiers;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,15 @@
import org.apache.iceberg.CatalogProperties;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DataFiles;
import org.apache.iceberg.HasTableOperations;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.SortOrder;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.Transaction;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.exceptions.AlreadyExistsException;
import org.apache.iceberg.exceptions.NamespaceNotEmptyException;
import org.apache.iceberg.exceptions.NoSuchNamespaceException;
import org.apache.iceberg.exceptions.NoSuchTableException;
Expand All @@ -47,6 +50,7 @@
import org.apache.iceberg.transforms.Transform;
import org.apache.iceberg.transforms.Transforms;
import org.apache.iceberg.types.Types;
import org.assertj.core.api.Assertions;
import org.junit.Assert;
import org.junit.Test;

Expand Down Expand Up @@ -589,4 +593,33 @@ public void testTablePropsDefinedAtCatalogLevel() throws IOException {
"table-key5",
table.properties().get("key5"));
}

@Test
public void testRegisterTable() throws IOException {
TableIdentifier identifier = TableIdentifier.of("a", "t1");
TableIdentifier identifier2 = TableIdentifier.of("a", "t2");
HadoopCatalog catalog = hadoopCatalog();
catalog.createTable(identifier, SCHEMA);
Table registeringTable = catalog.loadTable(identifier);
TableOperations ops = ((HasTableOperations) registeringTable).operations();
String metadataLocation = ((HadoopTableOperations) ops).current().metadataFileLocation();
Assertions.assertThat(catalog.registerTable(identifier2, metadataLocation)).isNotNull();
Assertions.assertThat(catalog.loadTable(identifier2)).isNotNull();
Assertions.assertThat(catalog.dropTable(identifier)).isTrue();
Assertions.assertThat(catalog.dropTable(identifier2)).isTrue();
}

@Test
public void testRegisterExistingTable() throws IOException {
TableIdentifier identifier = TableIdentifier.of("a", "t1");
HadoopCatalog catalog = hadoopCatalog();
catalog.createTable(identifier, SCHEMA);
Table registeringTable = catalog.loadTable(identifier);
TableOperations ops = ((HasTableOperations) registeringTable).operations();
String metadataLocation = ((HadoopTableOperations) ops).current().metadataFileLocation();
Assertions.assertThatThrownBy(() -> catalog.registerTable(identifier, metadataLocation))
.isInstanceOf(AlreadyExistsException.class)
.hasMessage("Table already exists: a.t1");
Assertions.assertThat(catalog.dropTable(identifier)).isTrue();
}
}
28 changes: 28 additions & 0 deletions core/src/test/java/org/apache/iceberg/jdbc/TestJdbcCatalog.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,12 @@
import org.apache.iceberg.CatalogProperties;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DataFiles;
import org.apache.iceberg.HasTableOperations;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.SortOrder;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.Transaction;
import org.apache.iceberg.catalog.CatalogTests;
import org.apache.iceberg.catalog.Namespace;
Expand All @@ -58,6 +60,7 @@
import org.apache.iceberg.transforms.Transform;
import org.apache.iceberg.transforms.Transforms;
import org.apache.iceberg.types.Types;
import org.assertj.core.api.Assertions;
import org.junit.Assert;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
Expand Down Expand Up @@ -639,4 +642,29 @@ public void testConversions() {
Assert.assertEquals(ns, JdbcUtil.stringToNamespace(nsString));
}

@Test
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should these tests better live in CatalogTests?

public void testRegisterTable() {
TableIdentifier identifier = TableIdentifier.of("a", "t1");
catalog.createTable(identifier, SCHEMA);
Table registeringTable = catalog.loadTable(identifier);
catalog.dropTable(identifier, false);
TableOperations ops = ((HasTableOperations) registeringTable).operations();
String metadataLocation = ((JdbcTableOperations) ops).currentMetadataLocation();
Assertions.assertThat(catalog.registerTable(identifier, metadataLocation)).isNotNull();
Assertions.assertThat(catalog.loadTable(identifier)).isNotNull();
Assertions.assertThat(catalog.dropTable(identifier)).isTrue();
}

@Test
public void testRegisterExistingTable() {
TableIdentifier identifier = TableIdentifier.of("a", "t1");
catalog.createTable(identifier, SCHEMA);
Table registeringTable = catalog.loadTable(identifier);
TableOperations ops = ((HasTableOperations) registeringTable).operations();
String metadataLocation = ((JdbcTableOperations) ops).currentMetadataLocation();
Assertions.assertThatThrownBy(() -> catalog.registerTable(identifier, metadataLocation))
.isInstanceOf(AlreadyExistsException.class)
.hasMessage("Table already exists: a.t1");
Assertions.assertThat(catalog.dropTable(identifier)).isTrue();
}
}
31 changes: 31 additions & 0 deletions dell/src/test/java/org/apache/iceberg/dell/ecs/TestEcsCatalog.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,14 @@
import java.util.Map;
import org.apache.iceberg.AssertHelpers;
import org.apache.iceberg.CatalogProperties;
import org.apache.iceberg.HasTableOperations;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.dell.mock.ecs.EcsS3MockRule;
import org.apache.iceberg.exceptions.AlreadyExistsException;
import org.apache.iceberg.exceptions.NamespaceNotEmptyException;
import org.apache.iceberg.exceptions.NoSuchNamespaceException;
import org.apache.iceberg.exceptions.NoSuchTableException;
Expand All @@ -35,6 +39,7 @@
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.types.Types;
import org.assertj.core.api.Assertions;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
Expand Down Expand Up @@ -170,4 +175,30 @@ public void testRenameTable() {
Assert.assertFalse("Old table does not exist", ecsCatalog.tableExists(TableIdentifier.of("a", "t1")));
Assert.assertTrue("New table exists", ecsCatalog.tableExists(TableIdentifier.of("b", "t2")));
}

@Test
public void testRegisterTable() {
TableIdentifier identifier = TableIdentifier.of("a", "t1");
ecsCatalog.createTable(identifier, SCHEMA);
Table registeringTable = ecsCatalog.loadTable(identifier);
ecsCatalog.dropTable(identifier, false);
TableOperations ops = ((HasTableOperations) registeringTable).operations();
String metadataLocation = ((EcsTableOperations) ops).currentMetadataLocation();
Assertions.assertThat(ecsCatalog.registerTable(identifier, metadataLocation)).isNotNull();
Assertions.assertThat(ecsCatalog.loadTable(identifier)).isNotNull();
Assertions.assertThat(ecsCatalog.dropTable(identifier, true)).isTrue();
}

@Test
public void testRegisterExistingTable() {
TableIdentifier identifier = TableIdentifier.of("a", "t1");
ecsCatalog.createTable(identifier, SCHEMA);
Table registeringTable = ecsCatalog.loadTable(identifier);
TableOperations ops = ((HasTableOperations) registeringTable).operations();
String metadataLocation = ((EcsTableOperations) ops).currentMetadataLocation();
Assertions.assertThatThrownBy(() -> ecsCatalog.registerTable(identifier, metadataLocation))
.isInstanceOf(AlreadyExistsException.class)
.hasMessage("Table already exists: a.t1");
Assertions.assertThat(ecsCatalog.dropTable(identifier, true)).isTrue();
}
}
Loading