Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion aws/src/main/java/org/apache/iceberg/aws/AwsProperties.java
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,16 @@ public class AwsProperties implements Serializable {
public static final String GLUE_CATALOG_SKIP_ARCHIVE = "glue.skip-archive";
public static final boolean GLUE_CATALOG_SKIP_ARCHIVE_DEFAULT = false;

/**
* If Glue should skip name validations
* It is recommended to stick to Glue best practice in
* https://docs.aws.amazon.com/athena/latest/ug/glue-best-practices.html to make sure operations are Hive compatible.
* This is only added for users that have existing conventions using non-standard characters. When database name
* and table name validation are skipped, there is no guarantee that downstream systems would all support the names.
*/
public static final String GLUE_CATALOG_SKIP_NAME_VALIDATION = "glue.skip-name-validation";
public static final boolean GLUE_CATALOG_SKIP_NAME_VALIDATION_DEFAULT = false;

/**
* If set, GlueCatalog will use Lake Formation for access control.
* For more credential vending details, see: https://docs.aws.amazon.com/lake-formation/latest/dg/api-overview.html.
Expand Down Expand Up @@ -375,6 +385,7 @@ public class AwsProperties implements Serializable {

private String glueCatalogId;
private boolean glueCatalogSkipArchive;
private boolean glueCatalogSkipNameValidation;
private boolean glueLakeFormationEnabled;

private String dynamoDbTableName;
Expand All @@ -399,6 +410,7 @@ public AwsProperties() {

this.glueCatalogId = null;
this.glueCatalogSkipArchive = GLUE_CATALOG_SKIP_ARCHIVE_DEFAULT;
this.glueCatalogSkipNameValidation = GLUE_CATALOG_SKIP_NAME_VALIDATION_DEFAULT;
this.glueLakeFormationEnabled = GLUE_LAKEFORMATION_ENABLED_DEFAULT;

this.dynamoDbTableName = DYNAMODB_TABLE_NAME_DEFAULT;
Expand All @@ -417,6 +429,8 @@ public AwsProperties(Map<String, String> properties) {
this.glueCatalogId = properties.get(GLUE_CATALOG_ID);
this.glueCatalogSkipArchive = PropertyUtil.propertyAsBoolean(properties,
AwsProperties.GLUE_CATALOG_SKIP_ARCHIVE, AwsProperties.GLUE_CATALOG_SKIP_ARCHIVE_DEFAULT);
this.glueCatalogSkipNameValidation = PropertyUtil.propertyAsBoolean(properties,
AwsProperties.GLUE_CATALOG_SKIP_NAME_VALIDATION, AwsProperties.GLUE_CATALOG_SKIP_NAME_VALIDATION_DEFAULT);
this.glueLakeFormationEnabled = PropertyUtil.propertyAsBoolean(properties,
GLUE_LAKEFORMATION_ENABLED,
GLUE_LAKEFORMATION_ENABLED_DEFAULT);
Expand Down Expand Up @@ -512,11 +526,18 @@ public void setGlueCatalogId(String id) {
public boolean glueCatalogSkipArchive() {
return glueCatalogSkipArchive;
}

public void setGlueCatalogSkipArchive(boolean skipArchive) {
this.glueCatalogSkipArchive = skipArchive;
}

public boolean glueCatalogSkipNameValidation() {
return glueCatalogSkipNameValidation;
}

public void setGlueCatalogSkipNameValidation(boolean glueCatalogSkipNameValidation) {
this.glueCatalogSkipNameValidation = glueCatalogSkipNameValidation;
}

public boolean glueLakeFormationEnabled() {
return glueLakeFormationEnabled;
}
Expand Down
46 changes: 28 additions & 18 deletions aws/src/main/java/org/apache/iceberg/aws/glue/GlueCatalog.java
Original file line number Diff line number Diff line change
Expand Up @@ -201,9 +201,9 @@ protected TableOperations newTableOps(TableIdentifier tableIdentifier) {
Map<String, String> tableSpecificCatalogProperties = ImmutableMap.<String, String>builder()
.putAll(catalogProperties)
.put(AwsProperties.LAKE_FORMATION_DB_NAME,
IcebergToGlueConverter.getDatabaseName(tableIdentifier))
IcebergToGlueConverter.getDatabaseName(tableIdentifier, awsProperties.glueCatalogSkipNameValidation()))
.put(AwsProperties.LAKE_FORMATION_TABLE_NAME,
IcebergToGlueConverter.getTableName(tableIdentifier))
IcebergToGlueConverter.getTableName(tableIdentifier, awsProperties.glueCatalogSkipNameValidation()))
.build();
// FileIO initialization depends on tableSpecificCatalogProperties, so a new FileIO is initialized each time
return new GlueTableOperations(glue, lockManager, catalogName, awsProperties,
Expand All @@ -224,7 +224,7 @@ protected TableOperations newTableOps(TableIdentifier tableIdentifier) {
protected String defaultWarehouseLocation(TableIdentifier tableIdentifier) {
// check if value is set in database
GetDatabaseResponse response = glue.getDatabase(GetDatabaseRequest.builder()
.name(IcebergToGlueConverter.getDatabaseName(tableIdentifier))
.name(IcebergToGlueConverter.getDatabaseName(tableIdentifier, awsProperties.glueCatalogSkipNameValidation()))
.build());
String dbLocationUri = response.database().locationUri();
if (dbLocationUri != null) {
Expand All @@ -234,7 +234,7 @@ protected String defaultWarehouseLocation(TableIdentifier tableIdentifier) {
return String.format(
"%s/%s.db/%s",
warehousePath,
IcebergToGlueConverter.getDatabaseName(tableIdentifier),
IcebergToGlueConverter.getDatabaseName(tableIdentifier, awsProperties.glueCatalogSkipNameValidation()),
tableIdentifier.name());
}

Expand All @@ -247,7 +247,7 @@ public List<TableIdentifier> listTables(Namespace namespace) {
do {
GetTablesResponse response = glue.getTables(GetTablesRequest.builder()
.catalogId(awsProperties.glueCatalogId())
.databaseName(IcebergToGlueConverter.toDatabaseName(namespace))
.databaseName(IcebergToGlueConverter.toDatabaseName(namespace, awsProperties.glueCatalogSkipNameValidation()))
.nextToken(nextToken)
.build());
nextToken = response.nextToken();
Expand Down Expand Up @@ -276,7 +276,8 @@ public boolean dropTable(TableIdentifier identifier, boolean purge) {
TableMetadata lastMetadata = ops.current();
glue.deleteTable(DeleteTableRequest.builder()
.catalogId(awsProperties.glueCatalogId())
.databaseName(IcebergToGlueConverter.getDatabaseName(identifier))
.databaseName(IcebergToGlueConverter.getDatabaseName(
identifier, awsProperties.glueCatalogSkipNameValidation()))
.name(identifier.name())
.build());
LOG.info("Successfully dropped table {} from Glue", identifier);
Expand Down Expand Up @@ -309,10 +310,11 @@ public void renameTable(TableIdentifier from, TableIdentifier to) {
}
// keep metadata
Table fromTable = null;
String fromTableDbName = IcebergToGlueConverter.getDatabaseName(from);
String fromTableName = IcebergToGlueConverter.getTableName(from);
String toTableDbName = IcebergToGlueConverter.getDatabaseName(to);
String toTableName = IcebergToGlueConverter.getTableName(to);
String fromTableDbName = IcebergToGlueConverter.getDatabaseName(
from, awsProperties.glueCatalogSkipNameValidation());
String fromTableName = IcebergToGlueConverter.getTableName(from, awsProperties.glueCatalogSkipNameValidation());
String toTableDbName = IcebergToGlueConverter.getDatabaseName(to, awsProperties.glueCatalogSkipNameValidation());
String toTableName = IcebergToGlueConverter.getTableName(to, awsProperties.glueCatalogSkipNameValidation());
try {
GetTableResponse response = glue.getTable(GetTableRequest.builder()
.catalogId(awsProperties.glueCatalogId())
Expand Down Expand Up @@ -359,7 +361,8 @@ public void createNamespace(Namespace namespace, Map<String, String> metadata) {
try {
glue.createDatabase(CreateDatabaseRequest.builder()
.catalogId(awsProperties.glueCatalogId())
.databaseInput(IcebergToGlueConverter.toDatabaseInput(namespace, metadata))
.databaseInput(IcebergToGlueConverter.toDatabaseInput(
namespace, metadata, awsProperties.glueCatalogSkipNameValidation()))
.build());
LOG.info("Created namespace: {}", namespace);
} catch (software.amazon.awssdk.services.glue.model.AlreadyExistsException e) {
Expand Down Expand Up @@ -400,7 +403,8 @@ public List<Namespace> listNamespaces(Namespace namespace) throws NoSuchNamespac

@Override
public Map<String, String> loadNamespaceMetadata(Namespace namespace) throws NoSuchNamespaceException {
String databaseName = IcebergToGlueConverter.toDatabaseName(namespace);
String databaseName = IcebergToGlueConverter.toDatabaseName(
namespace, awsProperties.glueCatalogSkipNameValidation());
try {
Database database = glue.getDatabase(GetDatabaseRequest.builder()
.catalogId(awsProperties.glueCatalogId())
Expand Down Expand Up @@ -434,7 +438,7 @@ public boolean dropNamespace(Namespace namespace) throws NamespaceNotEmptyExcept

GetTablesResponse response = glue.getTables(GetTablesRequest.builder()
.catalogId(awsProperties.glueCatalogId())
.databaseName(IcebergToGlueConverter.toDatabaseName(namespace))
.databaseName(IcebergToGlueConverter.toDatabaseName(namespace, awsProperties.glueCatalogSkipNameValidation()))
.build());

if (response.hasTableList() && !response.tableList().isEmpty()) {
Expand All @@ -450,7 +454,7 @@ public boolean dropNamespace(Namespace namespace) throws NamespaceNotEmptyExcept

glue.deleteDatabase(DeleteDatabaseRequest.builder()
.catalogId(awsProperties.glueCatalogId())
.name(IcebergToGlueConverter.toDatabaseName(namespace))
.name(IcebergToGlueConverter.toDatabaseName(namespace, awsProperties.glueCatalogSkipNameValidation()))
.build());
LOG.info("Dropped namespace: {}", namespace);
// Always successful, otherwise exception is thrown
Expand All @@ -464,8 +468,9 @@ public boolean setProperties(Namespace namespace, Map<String, String> properties
newProperties.putAll(properties);
glue.updateDatabase(UpdateDatabaseRequest.builder()
.catalogId(awsProperties.glueCatalogId())
.name(IcebergToGlueConverter.toDatabaseName(namespace))
.databaseInput(IcebergToGlueConverter.toDatabaseInput(namespace, newProperties))
.name(IcebergToGlueConverter.toDatabaseName(namespace, awsProperties.glueCatalogSkipNameValidation()))
.databaseInput(IcebergToGlueConverter.toDatabaseInput(
namespace, newProperties, awsProperties.glueCatalogSkipNameValidation()))
.build());
LOG.debug("Successfully set properties {} for {}", properties.keySet(), namespace);
// Always successful, otherwise exception is thrown
Expand All @@ -481,8 +486,9 @@ public boolean removeProperties(Namespace namespace, Set<String> properties) thr

glue.updateDatabase(UpdateDatabaseRequest.builder()
.catalogId(awsProperties.glueCatalogId())
.name(IcebergToGlueConverter.toDatabaseName(namespace))
.databaseInput(IcebergToGlueConverter.toDatabaseInput(namespace, metadata))
.name(IcebergToGlueConverter.toDatabaseName(namespace, awsProperties.glueCatalogSkipNameValidation()))
.databaseInput(IcebergToGlueConverter.toDatabaseInput(
namespace, metadata, awsProperties.glueCatalogSkipNameValidation()))
.build());
LOG.debug("Successfully removed properties {} from {}", properties, namespace);
// Always successful, otherwise exception is thrown
Expand All @@ -491,6 +497,10 @@ public boolean removeProperties(Namespace namespace, Set<String> properties) thr

@Override
protected boolean isValidIdentifier(TableIdentifier tableIdentifier) {
if (awsProperties.glueCatalogSkipNameValidation()) {
return true;
}

return IcebergToGlueConverter.isValidNamespace(tableIdentifier.namespace()) &&
IcebergToGlueConverter.isValidTableName(tableIdentifier.name());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,10 @@ class GlueTableOperations extends BaseMetastoreTableOperations {
FileIO fileIO, TableIdentifier tableIdentifier) {
this.glue = glue;
this.awsProperties = awsProperties;
this.databaseName = IcebergToGlueConverter.getDatabaseName(tableIdentifier);
this.tableName = IcebergToGlueConverter.getTableName(tableIdentifier);
this.databaseName = IcebergToGlueConverter.getDatabaseName(
tableIdentifier, awsProperties.glueCatalogSkipNameValidation());
this.tableName = IcebergToGlueConverter.getTableName(
tableIdentifier, awsProperties.glueCatalogSkipNameValidation());
this.fullTableName = String.format("%s.%s.%s", catalogName, databaseName, tableName);
this.commitLockEntityId = String.format("%s.%s", databaseName, tableName);
this.fileIO = fileIO;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,31 +105,41 @@ static void validateNamespace(Namespace namespace) {

/**
* Validate and convert Iceberg namespace to Glue database name
* @param namespace Iceberg namespace
*
* @param namespace Iceberg namespace
* @param skipNameValidation should skip name validation
* @return database name
*/
static String toDatabaseName(Namespace namespace) {
validateNamespace(namespace);
static String toDatabaseName(Namespace namespace, boolean skipNameValidation) {
if (!skipNameValidation) {
validateNamespace(namespace);
}

return namespace.level(0);
}

/**
* Validate and get Glue database name from Iceberg TableIdentifier
* @param tableIdentifier Iceberg table identifier
*
* @param tableIdentifier Iceberg table identifier
* @param skipNameValidation should skip name validation
* @return database name
*/
static String getDatabaseName(TableIdentifier tableIdentifier) {
return toDatabaseName(tableIdentifier.namespace());
static String getDatabaseName(TableIdentifier tableIdentifier, boolean skipNameValidation) {
return toDatabaseName(tableIdentifier.namespace(), skipNameValidation);
}

/**
* Validate and convert Iceberg name to Glue DatabaseInput
* @param namespace Iceberg namespace
* @param metadata metadata map
*
* @param namespace Iceberg namespace
* @param metadata metadata map
* @param skipNameValidation should skip name validation
* @return Glue DatabaseInput
*/
static DatabaseInput toDatabaseInput(Namespace namespace, Map<String, String> metadata) {
DatabaseInput.Builder builder = DatabaseInput.builder().name(toDatabaseName(namespace));
static DatabaseInput toDatabaseInput(Namespace namespace, Map<String, String> metadata, boolean skipNameValidation) {
DatabaseInput.Builder builder = DatabaseInput.builder().name(toDatabaseName(namespace,
skipNameValidation));
Map<String, String> parameters = Maps.newHashMap();
metadata.forEach((k, v) -> {
if (GLUE_DB_DESCRIPTION_KEY.equals(k)) {
Expand Down Expand Up @@ -167,11 +177,16 @@ static void validateTableName(String tableName) {

/**
* Validate and get Glue table name from Iceberg TableIdentifier
* @param tableIdentifier table identifier
*
* @param tableIdentifier table identifier
* @param skipNameValidation should skip name validation
* @return table name
*/
static String getTableName(TableIdentifier tableIdentifier) {
validateTableName(tableIdentifier.name());
static String getTableName(TableIdentifier tableIdentifier, boolean skipNameValidation) {
if (!skipNameValidation) {
validateTableName(tableIdentifier.name());
}

return tableIdentifier.name();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -479,4 +479,13 @@ public void testTablePropsDefinedAtCatalogLevel() {
Assert.assertTrue(properties.containsKey("table-override.key4"));
Assert.assertEquals("catalog-override-key4", properties.get("table-override.key4"));
}

@Test
public void testValidateIdentifierSkipNameValidation() {
AwsProperties props = new AwsProperties();
props.setGlueCatalogSkipNameValidation(true);
glueCatalog.initialize(CATALOG_NAME, WAREHOUSE_PATH, props, glue,
LockManagers.defaultLockManager(), null);
Assert.assertEquals(glueCatalog.isValidIdentifier(TableIdentifier.parse("db-1.a-1")), true);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.iceberg.TableMetadata;
import org.apache.iceberg.TableProperties;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
Expand All @@ -49,7 +50,7 @@ public class TestIcebergToGlueConverter {

@Test
public void testToDatabaseName() {
Assert.assertEquals("db", IcebergToGlueConverter.toDatabaseName(Namespace.of("db")));
Assert.assertEquals("db", IcebergToGlueConverter.toDatabaseName(Namespace.of("db"), false));
}

@Test
Expand All @@ -64,7 +65,30 @@ public void testToDatabaseNameFailure() {
AssertHelpers.assertThrows("bad namespace name",
ValidationException.class,
"Cannot convert namespace",
() -> IcebergToGlueConverter.toDatabaseName(name)
() -> IcebergToGlueConverter.toDatabaseName(name, false)
);
}
}

@Test
public void testSkipNamespaceValidation() {
List<Namespace> acceptableNames = Lists.newArrayList(
Namespace.of("db-1"),
Namespace.of("db-1-1-1"));
for (Namespace name : acceptableNames) {
Assert.assertEquals(name.toString(), IcebergToGlueConverter.toDatabaseName(name, true)
);
}
}

@Test
public void testSkipTableNameValidation() {
List<TableIdentifier> acceptableIdentifiers = Lists.newArrayList(
TableIdentifier.parse("db.a-1"),
TableIdentifier.parse("db.a-1-1"),
TableIdentifier.parse("db.a#1"));
for (TableIdentifier identifier : acceptableIdentifiers) {
Assert.assertEquals(identifier.name(), IcebergToGlueConverter.getTableName(identifier, true)
);
}
}
Expand All @@ -75,7 +99,7 @@ public void testToDatabaseInput() {
IcebergToGlueConverter.GLUE_DB_DESCRIPTION_KEY, "description",
IcebergToGlueConverter.GLUE_DB_LOCATION_KEY, "s3://location",
"key", "val");
DatabaseInput databaseInput = IcebergToGlueConverter.toDatabaseInput(Namespace.of("ns"), properties);
DatabaseInput databaseInput = IcebergToGlueConverter.toDatabaseInput(Namespace.of("ns"), properties, false);
Assert.assertEquals("Location should be set", "s3://location", databaseInput.locationUri());
Assert.assertEquals("Description should be set", "description", databaseInput.description());
Assert.assertEquals("Parameters should be set", ImmutableMap.of("key", "val"), databaseInput.parameters());
Expand All @@ -89,15 +113,15 @@ public void testToDatabaseInputNoParameter() {
.parameters(ImmutableMap.of())
.build();
Namespace namespace = Namespace.of("db");
Assert.assertEquals(input, IcebergToGlueConverter.toDatabaseInput(namespace, ImmutableMap.of()));
Assert.assertEquals(input, IcebergToGlueConverter.toDatabaseInput(namespace, ImmutableMap.of(), false));
}

@Test
public void testToDatabaseInputEmptyLocation() {
Map<String, String> properties = ImmutableMap.of(
IcebergToGlueConverter.GLUE_DB_DESCRIPTION_KEY, "description",
"key", "val");
DatabaseInput databaseInput = IcebergToGlueConverter.toDatabaseInput(Namespace.of("ns"), properties);
DatabaseInput databaseInput = IcebergToGlueConverter.toDatabaseInput(Namespace.of("ns"), properties, false);
Assert.assertNull("Location should not be set", databaseInput.locationUri());
Assert.assertEquals("Description should be set", "description", databaseInput.description());
Assert.assertEquals("Parameters should be set", ImmutableMap.of("key", "val"), databaseInput.parameters());
Expand All @@ -109,7 +133,7 @@ public void testToDatabaseInputEmptyDescription() {
Map<String, String> properties = ImmutableMap.of(
IcebergToGlueConverter.GLUE_DB_LOCATION_KEY, "s3://location",
"key", "val");
DatabaseInput databaseInput = IcebergToGlueConverter.toDatabaseInput(Namespace.of("ns"), properties);
DatabaseInput databaseInput = IcebergToGlueConverter.toDatabaseInput(Namespace.of("ns"), properties, false);
Assert.assertEquals("Location should be set", "s3://location", databaseInput.locationUri());
Assert.assertNull("Description should not be set", databaseInput.description());
Assert.assertEquals("Parameters should be set", ImmutableMap.of("key", "val"), databaseInput.parameters());
Expand Down
Loading