diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/converters/CatalogToHiveConverter.java b/aws/src/main/java/org/apache/iceberg/aws/glue/converters/CatalogToHiveConverter.java new file mode 100644 index 000000000000..553ea056f8f5 --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/converters/CatalogToHiveConverter.java @@ -0,0 +1,436 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.converters; + +import com.amazonaws.services.glue.model.ErrorDetail; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.commons.lang3.ObjectUtils; +import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Function; +import org.apache.hadoop.hive.metastore.api.FunctionType; +import org.apache.hadoop.hive.metastore.api.Index; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.PrincipalType; +import org.apache.hadoop.hive.metastore.api.ResourceType; +import org.apache.hadoop.hive.metastore.api.ResourceUri; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.SkewedInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.TableMeta; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CatalogToHiveConverter { + + private static final Logger LOG = LoggerFactory.getLogger(CatalogToHiveConverter.class); + + private static final ImmutableMap EXCEPTION_MAP = ImmutableMap.builder() + .put("AlreadyExistsException", new HiveException() { + public TException get(String msg) { + return new AlreadyExistsException(msg); + } + }) + .put("InvalidInputException", new HiveException() { + public TException get(String msg) { + return new InvalidObjectException(msg); + } + }) + .put("InternalServiceException", new HiveException() { + public TException get(String msg) { + return new MetaException(msg); + } + }) + .put("ResourceNumberLimitExceededException", new HiveException() { + public TException get(String msg) { + return new MetaException(msg); + } + }) + .put("OperationTimeoutException", new HiveException() { + public TException get(String msg) { + return new MetaException(msg); + } + }) + .put("EntityNotFoundException", new HiveException() { + public TException get(String msg) { + return new NoSuchObjectException(msg); + } + }) + .build(); + + private CatalogToHiveConverter() { + } + + interface HiveException { + TException get(String msg); + } + + public static TException wrapInHiveException(Throwable err) { + return getHiveException(err.getClass().getSimpleName(), err.getMessage()); + } + + public static TException errorDetailToHiveException(ErrorDetail errorDetail) { + return getHiveException(errorDetail.getErrorCode(), errorDetail.getErrorMessage()); + } + + private static TException getHiveException(String errorName, String errorMsg) { + if (EXCEPTION_MAP.containsKey(errorName)) { + return EXCEPTION_MAP.get(errorName).get(errorMsg); + } else { + LOG.warn("Hive Exception type not found for {}", errorName); + return new MetaException(errorMsg); + } + } + + public static Database convertDatabase(com.amazonaws.services.glue.model.Database catalogDatabase) { + Database hiveDatabase = new Database(); + hiveDatabase.setName(catalogDatabase.getName()); + hiveDatabase.setDescription(catalogDatabase.getDescription()); + String location = catalogDatabase.getLocationUri(); + hiveDatabase.setLocationUri(location == null ? "" : location); + hiveDatabase.setParameters( + ObjectUtils.firstNonNull(catalogDatabase.getParameters(), Maps.newHashMap())); + return hiveDatabase; + } + + public static FieldSchema convertFieldSchema(com.amazonaws.services.glue.model.Column catalogFieldSchema) { + FieldSchema hiveFieldSchema = new FieldSchema(); + hiveFieldSchema.setType(catalogFieldSchema.getType()); + hiveFieldSchema.setName(catalogFieldSchema.getName()); + hiveFieldSchema.setComment(catalogFieldSchema.getComment()); + + return hiveFieldSchema; + } + + public static List convertFieldSchemaList( + List catalogFieldSchemaList) { + List hiveFieldSchemaList = new ArrayList<>(); + if (catalogFieldSchemaList == null) { + return hiveFieldSchemaList; + } + for (com.amazonaws.services.glue.model.Column catalogFieldSchema : catalogFieldSchemaList) { + hiveFieldSchemaList.add(convertFieldSchema(catalogFieldSchema)); + } + + return hiveFieldSchemaList; + } + + public static Table convertTable(com.amazonaws.services.glue.model.Table catalogTable, String dbname) { + Table hiveTable = new Table(); + hiveTable.setDbName(dbname); + hiveTable.setTableName(catalogTable.getName()); + Date createTime = catalogTable.getCreateTime(); + hiveTable.setCreateTime(createTime == null ? 0 : (int) (createTime.getTime() / 1000)); + hiveTable.setOwner(catalogTable.getOwner()); + Date lastAccessedTime = catalogTable.getLastAccessTime(); + hiveTable.setLastAccessTime(lastAccessedTime == null ? 0 : (int) (lastAccessedTime.getTime() / 1000)); + hiveTable.setRetention(catalogTable.getRetention()); + hiveTable.setSd(convertStorageDescriptor(catalogTable.getStorageDescriptor())); + hiveTable.setPartitionKeys(convertFieldSchemaList(catalogTable.getPartitionKeys())); + // Hive may throw a NPE during dropTable if the parameter map is null. + Map parameterMap = catalogTable.getParameters(); + if (parameterMap == null) { + parameterMap = Maps.newHashMap(); + } + hiveTable.setParameters(parameterMap); + hiveTable.setViewOriginalText(catalogTable.getViewOriginalText()); + hiveTable.setViewExpandedText(catalogTable.getViewExpandedText()); + hiveTable.setTableType(catalogTable.getTableType()); + + return hiveTable; + } + + public static TableMeta convertTableMeta(com.amazonaws.services.glue.model.Table catalogTable, String dbName) { + TableMeta tableMeta = new TableMeta(); + tableMeta.setDbName(dbName); + tableMeta.setTableName(catalogTable.getName()); + tableMeta.setTableType(catalogTable.getTableType()); + if (catalogTable.getParameters().containsKey("comment")) { + tableMeta.setComments(catalogTable.getParameters().get("comment")); + } + return tableMeta; + } + + public static StorageDescriptor convertStorageDescriptor( + com.amazonaws.services.glue.model.StorageDescriptor catalogSd) { + StorageDescriptor hiveSd = new StorageDescriptor(); + hiveSd.setCols(convertFieldSchemaList(catalogSd.getColumns())); + hiveSd.setLocation(catalogSd.getLocation()); + hiveSd.setInputFormat(catalogSd.getInputFormat()); + hiveSd.setOutputFormat(catalogSd.getOutputFormat()); + hiveSd.setCompressed(catalogSd.getCompressed()); + hiveSd.setNumBuckets(catalogSd.getNumberOfBuckets()); + hiveSd.setSerdeInfo(convertSerDeInfo(catalogSd.getSerdeInfo())); + hiveSd.setBucketCols(ObjectUtils.firstNonNull(catalogSd.getBucketColumns(), Lists.newArrayList())); + hiveSd.setSortCols(convertOrderList(catalogSd.getSortColumns())); + hiveSd.setParameters(ObjectUtils.firstNonNull(catalogSd.getParameters(), Maps.newHashMap())); + hiveSd.setSkewedInfo(convertSkewedInfo(catalogSd.getSkewedInfo())); + hiveSd.setStoredAsSubDirectories(catalogSd.getStoredAsSubDirectories()); + + return hiveSd; + } + + public static Order convertOrder(com.amazonaws.services.glue.model.Order catalogOrder) { + Order hiveOrder = new Order(); + hiveOrder.setCol(catalogOrder.getColumn()); + hiveOrder.setOrder(catalogOrder.getSortOrder()); + + return hiveOrder; + } + + public static List convertOrderList(List catalogOrderList) { + List hiveOrderList = new ArrayList<>(); + if (catalogOrderList == null) { + return hiveOrderList; + } + for (com.amazonaws.services.glue.model.Order catalogOrder : catalogOrderList) { + hiveOrderList.add(convertOrder(catalogOrder)); + } + + return hiveOrderList; + } + + public static SerDeInfo convertSerDeInfo(com.amazonaws.services.glue.model.SerDeInfo catalogSerDeInfo) { + SerDeInfo hiveSerDeInfo = new SerDeInfo(); + hiveSerDeInfo.setName(catalogSerDeInfo.getName()); + hiveSerDeInfo.setParameters(ObjectUtils.firstNonNull( + catalogSerDeInfo.getParameters(), Maps.newHashMap())); + hiveSerDeInfo.setSerializationLib(catalogSerDeInfo.getSerializationLibrary()); + + return hiveSerDeInfo; + } + + public static SkewedInfo convertSkewedInfo(com.amazonaws.services.glue.model.SkewedInfo catalogSkewedInfo) { + if (catalogSkewedInfo == null) { + return null; + } + + SkewedInfo hiveSkewedInfo = new SkewedInfo(); + hiveSkewedInfo.setSkewedColNames(ObjectUtils.firstNonNull( + catalogSkewedInfo.getSkewedColumnNames(), Lists.newArrayList())); + hiveSkewedInfo.setSkewedColValues(convertSkewedValue(catalogSkewedInfo.getSkewedColumnValues())); + hiveSkewedInfo.setSkewedColValueLocationMaps( + convertSkewedMap(catalogSkewedInfo.getSkewedColumnValueLocationMaps())); + return hiveSkewedInfo; + } + + public static Index convertTableObjectToIndex(com.amazonaws.services.glue.model.Table catalogTable) { + Index hiveIndex = new Index(); + Map parameters = catalogTable.getParameters(); + hiveIndex.setIndexName(catalogTable.getName()); + hiveIndex.setCreateTime((int) (catalogTable.getCreateTime().getTime() / 1000)); + hiveIndex.setLastAccessTime((int) (catalogTable.getLastAccessTime().getTime() / 1000)); + hiveIndex.setSd(convertStorageDescriptor(catalogTable.getStorageDescriptor())); + hiveIndex.setParameters(catalogTable.getParameters()); + + hiveIndex.setDeferredRebuild(parameters.get(ConverterUtils.INDEX_DEFERRED_REBUILD).equals("TRUE")); + hiveIndex.setIndexHandlerClass(parameters.get(ConverterUtils.INDEX_HANDLER_CLASS)); + hiveIndex.setDbName(parameters.get(ConverterUtils.INDEX_DB_NAME)); + hiveIndex.setOrigTableName(parameters.get(ConverterUtils.INDEX_ORIGIN_TABLE_NAME)); + hiveIndex.setIndexTableName(parameters.get(ConverterUtils.INDEX_TABLE_NAME)); + + return hiveIndex; + } + + public static Partition convertPartition(com.amazonaws.services.glue.model.Partition src) { + Partition tgt = new Partition(); + Date createTime = src.getCreationTime(); + if (createTime != null) { + tgt.setCreateTime((int) (createTime.getTime() / 1000)); + tgt.setCreateTimeIsSet(true); + } else { + tgt.setCreateTimeIsSet(false); + } + String dbName = src.getDatabaseName(); + if (dbName != null) { + tgt.setDbName(dbName); + tgt.setDbNameIsSet(true); + } else { + tgt.setDbNameIsSet(false); + } + Date lastAccessTime = src.getLastAccessTime(); + if (lastAccessTime != null) { + tgt.setLastAccessTime((int) (lastAccessTime.getTime() / 1000)); + tgt.setLastAccessTimeIsSet(true); + } else { + tgt.setLastAccessTimeIsSet(false); + } + Map params = src.getParameters(); + + // A null parameter map causes Hive to throw a NPE + // so ensure we do not return a Partition object with a null parameter map. + if (params == null) { + params = Maps.newHashMap(); + } + + tgt.setParameters(params); + tgt.setParametersIsSet(true); + + String tableName = src.getTableName(); + if (tableName != null) { + tgt.setTableName(tableName); + tgt.setTableNameIsSet(true); + } else { + tgt.setTableNameIsSet(false); + } + + List values = src.getValues(); + if (values != null) { + tgt.setValues(values); + tgt.setValuesIsSet(true); + } else { + tgt.setValuesIsSet(false); + } + + com.amazonaws.services.glue.model.StorageDescriptor sd = src.getStorageDescriptor(); + if (sd != null) { + StorageDescriptor hiveSd = convertStorageDescriptor(sd); + tgt.setSd(hiveSd); + tgt.setSdIsSet(true); + } else { + tgt.setSdIsSet(false); + } + return tgt; + } + + public static List convertPartitions(List src) { + if (src == null) { + return null; + } + + List target = Lists.newArrayList(); + for (com.amazonaws.services.glue.model.Partition partition : src) { + target.add(convertPartition(partition)); + } + return target; + } + + public static List convertStringToList(final String str) { + if (str == null) { + return null; + } + List listString = new ArrayList<>(); + for (int i = 0; i < str.length();) { + StringBuilder length = new StringBuilder(); + for (int j = i; j < str.length(); j++) { + if (str.charAt(j) != '$') { + length.append(str.charAt(j)); + } else { + int lengthOfString = Integer.valueOf(length.toString()); + listString.add(str.substring(j + 1, j + 1 + lengthOfString)); + i = j + 1 + lengthOfString; + break; + } + } + } + return listString; + } + + public static Map, String> convertSkewedMap(final Map catalogSkewedMap) { + Map, String> skewedMap = new HashMap<>(); + if (catalogSkewedMap == null) { + return skewedMap; + } + + for (String coralKey : catalogSkewedMap.keySet()) { + skewedMap.put(convertStringToList(coralKey), catalogSkewedMap.get(coralKey)); + } + return skewedMap; + } + + public static List> convertSkewedValue(final List catalogSkewedValue) { + List> skewedValues = new ArrayList<>(); + if (catalogSkewedValue == null) { + return skewedValues; + } + + for (String skewValue : catalogSkewedValue) { + skewedValues.add(convertStringToList(skewValue)); + } + return skewedValues; + } + + public static PrincipalType convertPrincipalType( + com.amazonaws.services.glue.model.PrincipalType catalogPrincipalType) { + if (catalogPrincipalType == null) { + return null; + } + + if (catalogPrincipalType == com.amazonaws.services.glue.model.PrincipalType.GROUP) { + return PrincipalType.GROUP; + } else if (catalogPrincipalType == com.amazonaws.services.glue.model.PrincipalType.USER) { + return PrincipalType.USER; + } else if (catalogPrincipalType == com.amazonaws.services.glue.model.PrincipalType.ROLE) { + return PrincipalType.ROLE; + } + throw new RuntimeException("Unknown principal type:" + catalogPrincipalType.name()); + } + + public static Function convertFunction(final String dbName, + final com.amazonaws.services.glue.model.UserDefinedFunction catalogFunction) { + if (catalogFunction == null) { + return null; + } + Function hiveFunction = new Function(); + hiveFunction.setClassName(catalogFunction.getClassName()); + hiveFunction.setCreateTime((int) (catalogFunction.getCreateTime().getTime() / 1000)); + hiveFunction.setDbName(dbName); + hiveFunction.setFunctionName(catalogFunction.getFunctionName()); + hiveFunction.setFunctionType(FunctionType.JAVA); + hiveFunction.setOwnerName(catalogFunction.getOwnerName()); + hiveFunction.setOwnerType(convertPrincipalType( + com.amazonaws.services.glue.model.PrincipalType.fromValue(catalogFunction.getOwnerType()))); + hiveFunction.setResourceUris(convertResourceUriList(catalogFunction.getResourceUris())); + return hiveFunction; + } + + public static List convertResourceUriList( + final List catalogResourceUriList) { + if (catalogResourceUriList == null) { + return null; + } + List hiveResourceUriList = new ArrayList<>(); + for (com.amazonaws.services.glue.model.ResourceUri catalogResourceUri : catalogResourceUriList) { + ResourceUri hiveResourceUri = new ResourceUri(); + hiveResourceUri.setUri(catalogResourceUri.getUri()); + if (catalogResourceUri.getResourceType() != null) { + hiveResourceUri.setResourceType(ResourceType.valueOf(catalogResourceUri.getResourceType())); + } + hiveResourceUriList.add(hiveResourceUri); + } + + return hiveResourceUriList; + } + +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/converters/ConverterUtils.java b/aws/src/main/java/org/apache/iceberg/aws/glue/converters/ConverterUtils.java new file mode 100644 index 000000000000..e245d3df6800 --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/converters/ConverterUtils.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.converters; + +import com.amazonaws.services.glue.model.Table; +import com.google.gson.Gson; + +public class ConverterUtils { + + private ConverterUtils() { + } + + public static final String INDEX_DEFERRED_REBUILD = "DeferredRebuild"; + public static final String INDEX_TABLE_NAME = "IndexTableName"; + public static final String INDEX_HANDLER_CLASS = "IndexHandlerClass"; + public static final String INDEX_DB_NAME = "DbName"; + public static final String INDEX_ORIGIN_TABLE_NAME = "OriginTableName"; + private static final Gson gson = new Gson(); + + public static String catalogTableToString(final Table table) { + return gson.toJson(table); + } + + public static Table stringToCatalogTable(final String input) { + return gson.fromJson(input, Table.class); + } +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/converters/GlueInputConverter.java b/aws/src/main/java/org/apache/iceberg/aws/glue/converters/GlueInputConverter.java new file mode 100644 index 000000000000..f2988179a9ee --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/converters/GlueInputConverter.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.converters; + +import com.amazonaws.services.glue.model.DatabaseInput; +import com.amazonaws.services.glue.model.PartitionInput; +import com.amazonaws.services.glue.model.TableInput; +import com.amazonaws.services.glue.model.UserDefinedFunctionInput; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.Function; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Table; + +/** + * This class provides methods to convert Hive/Catalog objects to Input objects used + * for Glue API parameters + */ +public final class GlueInputConverter { + + private GlueInputConverter() { + } + + public static DatabaseInput convertToDatabaseInput(Database hiveDatabase) { + return convertToDatabaseInput(HiveToCatalogConverter.convertDatabase(hiveDatabase)); + } + + public static DatabaseInput convertToDatabaseInput(com.amazonaws.services.glue.model.Database database) { + DatabaseInput input = new DatabaseInput(); + + input.setName(database.getName()); + input.setDescription(database.getDescription()); + input.setLocationUri(database.getLocationUri()); + input.setParameters(database.getParameters()); + + return input; + } + + public static TableInput convertToTableInput(Table hiveTable) { + return convertToTableInput(HiveToCatalogConverter.convertTable(hiveTable)); + } + + public static TableInput convertToTableInput(com.amazonaws.services.glue.model.Table table) { + TableInput tableInput = new TableInput(); + + tableInput.setRetention(table.getRetention()); + tableInput.setPartitionKeys(table.getPartitionKeys()); + tableInput.setTableType(table.getTableType()); + tableInput.setName(table.getName()); + tableInput.setOwner(table.getOwner()); + tableInput.setLastAccessTime(table.getLastAccessTime()); + tableInput.setStorageDescriptor(table.getStorageDescriptor()); + tableInput.setParameters(table.getParameters()); + tableInput.setViewExpandedText(table.getViewExpandedText()); + tableInput.setViewOriginalText(table.getViewOriginalText()); + + return tableInput; + } + + public static PartitionInput convertToPartitionInput(Partition src) { + return convertToPartitionInput(HiveToCatalogConverter.convertPartition(src)); + } + + public static PartitionInput convertToPartitionInput(com.amazonaws.services.glue.model.Partition src) { + PartitionInput partitionInput = new PartitionInput(); + + partitionInput.setLastAccessTime(src.getLastAccessTime()); + partitionInput.setParameters(src.getParameters()); + partitionInput.setStorageDescriptor(src.getStorageDescriptor()); + partitionInput.setValues(src.getValues()); + + return partitionInput; + } + + public static List convertToPartitionInputs( + Collection parts) { + List inputList = new ArrayList<>(); + + for (com.amazonaws.services.glue.model.Partition part : parts) { + inputList.add(convertToPartitionInput(part)); + } + return inputList; + } + + public static UserDefinedFunctionInput convertToUserDefinedFunctionInput(Function hiveFunction) { + UserDefinedFunctionInput functionInput = new UserDefinedFunctionInput(); + + functionInput.setClassName(hiveFunction.getClassName()); + functionInput.setFunctionName(hiveFunction.getFunctionName()); + functionInput.setOwnerName(hiveFunction.getOwnerName()); + if (hiveFunction.getOwnerType() != null) { + functionInput.setOwnerType(hiveFunction.getOwnerType().name()); + } + functionInput.setResourceUris(HiveToCatalogConverter.covertResourceUriList(hiveFunction.getResourceUris())); + return functionInput; + } + +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/converters/HiveToCatalogConverter.java b/aws/src/main/java/org/apache/iceberg/aws/glue/converters/HiveToCatalogConverter.java new file mode 100644 index 000000000000..b4c73da7509f --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/converters/HiveToCatalogConverter.java @@ -0,0 +1,260 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.converters; + +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Function; +import org.apache.hadoop.hive.metastore.api.Index; +import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.ResourceUri; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.SkewedInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; + +public class HiveToCatalogConverter { + + private HiveToCatalogConverter() { + } + + public static com.amazonaws.services.glue.model.Database convertDatabase(Database hiveDatabase) { + com.amazonaws.services.glue.model.Database catalogDatabase = new com.amazonaws.services.glue.model.Database(); + catalogDatabase.setName(hiveDatabase.getName()); + catalogDatabase.setDescription(hiveDatabase.getDescription()); + catalogDatabase.setLocationUri(hiveDatabase.getLocationUri()); + catalogDatabase.setParameters(hiveDatabase.getParameters()); + return catalogDatabase; + } + + public static com.amazonaws.services.glue.model.Table convertTable( + Table hiveTable) { + com.amazonaws.services.glue.model.Table catalogTable = new com.amazonaws.services.glue.model.Table(); + catalogTable.setRetention(hiveTable.getRetention()); + catalogTable.setPartitionKeys(convertFieldSchemaList(hiveTable.getPartitionKeys())); + catalogTable.setTableType(hiveTable.getTableType()); + catalogTable.setName(hiveTable.getTableName()); + catalogTable.setOwner(hiveTable.getOwner()); + catalogTable.setCreateTime(new Date((long) hiveTable.getCreateTime() * 1000)); + catalogTable.setLastAccessTime(new Date((long) hiveTable.getLastAccessTime() * 1000)); + catalogTable.setStorageDescriptor(convertStorageDescriptor(hiveTable.getSd())); + catalogTable.setParameters(hiveTable.getParameters()); + catalogTable.setViewExpandedText(hiveTable.getViewExpandedText()); + catalogTable.setViewOriginalText(hiveTable.getViewOriginalText()); + + return catalogTable; + } + + public static com.amazonaws.services.glue.model.StorageDescriptor convertStorageDescriptor( + StorageDescriptor hiveSd) { + com.amazonaws.services.glue.model.StorageDescriptor catalogSd = + new com.amazonaws.services.glue.model.StorageDescriptor(); + catalogSd.setNumberOfBuckets(hiveSd.getNumBuckets()); + catalogSd.setCompressed(hiveSd.isCompressed()); + catalogSd.setParameters(hiveSd.getParameters()); + catalogSd.setBucketColumns(hiveSd.getBucketCols()); + catalogSd.setColumns(convertFieldSchemaList(hiveSd.getCols())); + catalogSd.setInputFormat(hiveSd.getInputFormat()); + catalogSd.setLocation(hiveSd.getLocation()); + catalogSd.setOutputFormat(hiveSd.getOutputFormat()); + catalogSd.setSerdeInfo(convertSerDeInfo(hiveSd.getSerdeInfo())); + catalogSd.setSkewedInfo(convertSkewedInfo(hiveSd.getSkewedInfo())); + catalogSd.setSortColumns(convertOrderList(hiveSd.getSortCols())); + catalogSd.setStoredAsSubDirectories(hiveSd.isStoredAsSubDirectories()); + + return catalogSd; + } + + public static com.amazonaws.services.glue.model.Column convertFieldSchema( + FieldSchema hiveFieldSchema) { + com.amazonaws.services.glue.model.Column catalogFieldSchema = + new com.amazonaws.services.glue.model.Column(); + catalogFieldSchema.setComment(hiveFieldSchema.getComment()); + catalogFieldSchema.setName(hiveFieldSchema.getName()); + catalogFieldSchema.setType(hiveFieldSchema.getType()); + + return catalogFieldSchema; + } + + public static List convertFieldSchemaList( + List hiveFieldSchemaList) { + List catalogFieldSchemaList = + new ArrayList(); + for (FieldSchema hiveFs : hiveFieldSchemaList) { + catalogFieldSchemaList.add(convertFieldSchema(hiveFs)); + } + + return catalogFieldSchemaList; + } + + public static com.amazonaws.services.glue.model.SerDeInfo convertSerDeInfo( + SerDeInfo hiveSerDeInfo) { + com.amazonaws.services.glue.model.SerDeInfo catalogSerDeInfo = new com.amazonaws.services.glue.model.SerDeInfo(); + catalogSerDeInfo.setName(hiveSerDeInfo.getName()); + catalogSerDeInfo.setParameters(hiveSerDeInfo.getParameters()); + catalogSerDeInfo.setSerializationLibrary(hiveSerDeInfo.getSerializationLib()); + + return catalogSerDeInfo; + } + + public static com.amazonaws.services.glue.model.SkewedInfo convertSkewedInfo(SkewedInfo hiveSkewedInfo) { + if (hiveSkewedInfo == null) { + return null; + } + + com.amazonaws.services.glue.model.SkewedInfo catalogSkewedInfo = new com.amazonaws.services.glue.model.SkewedInfo() + .withSkewedColumnNames(hiveSkewedInfo.getSkewedColNames()) + .withSkewedColumnValues(convertSkewedValue(hiveSkewedInfo.getSkewedColValues())) + .withSkewedColumnValueLocationMaps(convertSkewedMap(hiveSkewedInfo.getSkewedColValueLocationMaps())); + return catalogSkewedInfo; + } + + public static com.amazonaws.services.glue.model.Order convertOrder(Order hiveOrder) { + com.amazonaws.services.glue.model.Order order = new com.amazonaws.services.glue.model.Order(); + order.setColumn(hiveOrder.getCol()); + order.setSortOrder(hiveOrder.getOrder()); + + return order; + } + + public static List convertOrderList(List hiveOrderList) { + if (hiveOrderList == null) { + return null; + } + List catalogOrderList = new ArrayList<>(); + for (Order hiveOrder : hiveOrderList) { + catalogOrderList.add(convertOrder(hiveOrder)); + } + + return catalogOrderList; + } + + public static com.amazonaws.services.glue.model.Table convertIndexToTableObject(Index hiveIndex) { + // convert index object to a table object + com.amazonaws.services.glue.model.Table catalogIndexTableObject = new com.amazonaws.services.glue.model.Table(); + catalogIndexTableObject.setName(hiveIndex.getIndexName()); + catalogIndexTableObject.setCreateTime(new Date((long) (hiveIndex.getCreateTime()) * 1000)); + catalogIndexTableObject.setLastAccessTime(new Date((long) (hiveIndex.getLastAccessTime()) * 1000)); + catalogIndexTableObject.setStorageDescriptor(convertStorageDescriptor(hiveIndex.getSd())); + catalogIndexTableObject.setParameters(hiveIndex.getParameters()); + + // store rest of fields in index to paramter map + catalogIndexTableObject.getParameters().put( + ConverterUtils.INDEX_DEFERRED_REBUILD, hiveIndex.isDeferredRebuild() ? "TRUE" : "FALSE"); + catalogIndexTableObject.getParameters().put(ConverterUtils.INDEX_TABLE_NAME, hiveIndex.getIndexTableName()); + catalogIndexTableObject.getParameters().put(ConverterUtils.INDEX_HANDLER_CLASS, hiveIndex.getIndexHandlerClass()); + catalogIndexTableObject.getParameters().put(ConverterUtils.INDEX_DB_NAME, hiveIndex.getDbName()); + catalogIndexTableObject.getParameters().put(ConverterUtils.INDEX_ORIGIN_TABLE_NAME, hiveIndex.getOrigTableName()); + + return catalogIndexTableObject; + } + + public static com.amazonaws.services.glue.model.Partition convertPartition(Partition src) { + com.amazonaws.services.glue.model.Partition tgt = new com.amazonaws.services.glue.model.Partition(); + + tgt.setDatabaseName(src.getDbName()); + tgt.setTableName(src.getTableName()); + tgt.setCreationTime(new Date((long) src.getCreateTime() * 1000)); + tgt.setLastAccessTime(new Date((long) src.getLastAccessTime() * 1000)); + tgt.setParameters(src.getParameters()); + tgt.setStorageDescriptor(convertStorageDescriptor(src.getSd())); + tgt.setValues(src.getValues()); + + return tgt; + } + + public static String convertListToString(final List list) { + if (list == null) { + return null; + } + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < list.size(); i++) { + String currentString = list.get(i); + sb.append(currentString.length() + "$" + currentString); + } + + return sb.toString(); + } + + public static Map convertSkewedMap(final Map, String> coreSkewedMap) { + if (coreSkewedMap == null) { + return null; + } + Map catalogSkewedMap = new HashMap<>(); + for (List coreKey : coreSkewedMap.keySet()) { + catalogSkewedMap.put(convertListToString(coreKey), coreSkewedMap.get(coreKey)); + } + return catalogSkewedMap; + } + + public static List convertSkewedValue(final List> coreSkewedValue) { + if (coreSkewedValue == null) { + return null; + } + List catalogSkewedValue = new ArrayList<>(); + for (int i = 0; i < coreSkewedValue.size(); i++) { + catalogSkewedValue.add(convertListToString(coreSkewedValue.get(i))); + } + + return catalogSkewedValue; + } + + public static com.amazonaws.services.glue.model.UserDefinedFunction convertFunction(final Function hiveFunction) { + if (hiveFunction == null) { + return null; + } + com.amazonaws.services.glue.model.UserDefinedFunction catalogFunction = + new com.amazonaws.services.glue.model.UserDefinedFunction(); + catalogFunction.setClassName(hiveFunction.getClassName()); + catalogFunction.setFunctionName(hiveFunction.getFunctionName()); + catalogFunction.setCreateTime(new Date((long) (hiveFunction.getCreateTime()) * 1000)); + catalogFunction.setOwnerName(hiveFunction.getOwnerName()); + if (hiveFunction.getOwnerType() != null) { + catalogFunction.setOwnerType(hiveFunction.getOwnerType().name()); + } + catalogFunction.setResourceUris(covertResourceUriList(hiveFunction.getResourceUris())); + return catalogFunction; + } + + public static List covertResourceUriList( + final List hiveResourceUriList) { + if (hiveResourceUriList == null) { + return null; + } + List catalogResourceUriList = new ArrayList<>(); + for (ResourceUri hiveResourceUri : hiveResourceUriList) { + com.amazonaws.services.glue.model.ResourceUri catalogResourceUri = + new com.amazonaws.services.glue.model.ResourceUri(); + catalogResourceUri.setUri(hiveResourceUri.getUri()); + if (hiveResourceUri.getResourceType() != null) { + catalogResourceUri.setResourceType(hiveResourceUri.getResourceType().name()); + } + catalogResourceUriList.add(catalogResourceUri); + } + return catalogResourceUriList; + } + +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/lock/DynamoLockComponent.java b/aws/src/main/java/org/apache/iceberg/aws/glue/lock/DynamoLockComponent.java new file mode 100644 index 000000000000..c293c7ea3a2f --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/lock/DynamoLockComponent.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.lock; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonInclude; +import java.io.IOException; +import org.apache.hadoop.hive.metastore.api.LockComponent; +import org.apache.hadoop.hive.metastore.api.LockLevel; +import org.apache.hadoop.hive.metastore.api.LockType; +import org.codehaus.jackson.map.ObjectMapper; + +@JsonIgnoreProperties(ignoreUnknown = true) +@JsonInclude(JsonInclude.Include.NON_NULL) +public class DynamoLockComponent { + + // TODO: reuse the JsonUtil by moving it to iceberg-common? + private static final ObjectMapper MAPPER = new ObjectMapper(); + + private LockType lockType; + private LockLevel lockLevel; + private String dbName; + private String tableName; + private String partitionName; + + public static DynamoLockComponent fromHive(LockComponent lockComponent) { + DynamoLockComponent component = new DynamoLockComponent(); + component.setDbName(lockComponent.getDbname()); + component.setTableName(lockComponent.getTablename()); + component.setPartitionName(lockComponent.getPartitionname()); + component.setLockLevel(lockComponent.getLevel()); + component.setLockType(lockComponent.getType()); + return component; + } + + public static DynamoLockComponent fromJson(String json) { + try { + return MAPPER.readValue(json, DynamoLockComponent.class); + } catch (IOException e) { + // simply throw as runtime exception + throw new RuntimeException("fail to serialize DynamoLockComponent", e); + } + + } + + @Override + public String toString() { + try { + return MAPPER.writeValueAsString(this); + } catch (IOException e) { + // simply throw as runtime exception + throw new RuntimeException("fail to serialize DynamoLockComponent", e); + } + } + + public String getTableName() { + return tableName; + } + + public void setTableName(String tableName) { + this.tableName = tableName; + } + + public String getDbName() { + return dbName; + } + + public void setDbName(String dbName) { + this.dbName = dbName; + } + + public String getPartitionName() { + return partitionName; + } + + public void setPartitionName(String partitionName) { + this.partitionName = partitionName; + } + + public LockLevel getLockLevel() { + return lockLevel; + } + + public void setLockLevel(LockLevel lockLevel) { + this.lockLevel = lockLevel; + } + + public LockType getLockType() { + return lockType; + } + + public void setLockType(LockType lockType) { + this.lockType = lockType; + } +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/lock/DynamoLockManager.java b/aws/src/main/java/org/apache/iceberg/aws/glue/lock/DynamoLockManager.java new file mode 100644 index 000000000000..1cc491cd4ef2 --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/lock/DynamoLockManager.java @@ -0,0 +1,462 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.lock; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ThreadLocalRandom; +import java.util.stream.Collectors; +import org.apache.commons.collections.CollectionUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.api.LockLevel; +import org.apache.hadoop.hive.metastore.api.LockRequest; +import org.apache.hadoop.hive.metastore.api.LockResponse; +import org.apache.hadoop.hive.metastore.api.LockState; +import org.apache.iceberg.aws.glue.util.AWSGlueConfig; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.dynamodb.DynamoDbClient; +import software.amazon.awssdk.services.dynamodb.model.AttributeDefinition; +import software.amazon.awssdk.services.dynamodb.model.AttributeValue; +import software.amazon.awssdk.services.dynamodb.model.BillingMode; +import software.amazon.awssdk.services.dynamodb.model.ConditionalCheckFailedException; +import software.amazon.awssdk.services.dynamodb.model.CreateTableRequest; +import software.amazon.awssdk.services.dynamodb.model.DeleteItemRequest; +import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest; +import software.amazon.awssdk.services.dynamodb.model.DescribeTableResponse; +import software.amazon.awssdk.services.dynamodb.model.GetItemRequest; +import software.amazon.awssdk.services.dynamodb.model.GetItemResponse; +import software.amazon.awssdk.services.dynamodb.model.KeySchemaElement; +import software.amazon.awssdk.services.dynamodb.model.KeyType; +import software.amazon.awssdk.services.dynamodb.model.PutItemRequest; +import software.amazon.awssdk.services.dynamodb.model.ResourceNotFoundException; +import software.amazon.awssdk.services.dynamodb.model.ScalarAttributeType; +import software.amazon.awssdk.services.dynamodb.model.TableStatus; + +/** + * A DynamoDB implementation of Hive's lock interface. + * We use 2 tables, a lockRequest table and a lockComponent table to keep track of all the locks. + * The lockRequest table has the following schema: + * - hash key: lockId (String) - a random long number + * - values: a list of LockComponents, lastHeartbeatMillis + * and other input info such as request user, host, agentInfo, txnId for debug purpose + * The lockComponent table has the following schema: + * - hash key: tableId (String) - databaseName|tableName + * - range key: partitionId (String) - partitionName|lockType + * - values: lockId, lastHeartbeatMillis + * If the expected tables do not exist, we will create the tables and set billing mode to PAY_PER_REQUEST by default. + * For production systems, it is recommended to change to provisioned throughput to ensure performance. + * + * By the nature of DynamoDB, we can not efficiently achieve transaction isolation between different callers. + * In addition, the current implementation only supports exclusive locks. + * However, we consider such implementation sufficient because Iceberg currently only requests exclusive table locks. + * We will add support for shared read and write locks if necessary. + * For people seeking a true lock impl, we can also implement an AuroraServerlessLockManager. + * But in the end we expect Glue to someday provide such support natively. + */ +public class DynamoLockManager implements LockManager { + + private static final Logger LOG = LoggerFactory.getLogger(DynamoLockManager.class); + + private static final String LOCK_ID_COL = "lockId"; + private static final String COMPONENTS_COL = "components"; + private static final String USER_COL = "user"; + private static final String HOST_COL = "host"; + private static final String AGENT_INFO_COL = "agentInfo"; + private static final String TXN_ID_COL = "txnId"; + private static final String TABLE_ID_COL = "tableId"; + private static final String PARTITION_ID_COL = "partitionId"; + private static final String LAST_HEARTBEAT_MILLIS_COL = "lastHeartbeatMillis"; + + private static final String GLOBAL_LOCK_MARKER = "__GLOBAL_LOCK__"; + + // schema and definitions are made public in case people want to create the tables separately + public static final List LOCK_REQUEST_TABLE_SCHEMA = Lists.newArrayList( + KeySchemaElement.builder() + .attributeName(LOCK_ID_COL) + .keyType(KeyType.HASH) + .build() + ); + + public static final List LOCK_REQUEST_TABLE_COL_DEFINITIONS = Lists.newArrayList( + AttributeDefinition.builder() + .attributeName(LOCK_ID_COL) + .attributeType(ScalarAttributeType.S) + .build() + ); + + public static final List LOCK_COMPONENT_TABLE_SCHEMA = Lists.newArrayList( + KeySchemaElement.builder() + .attributeName(TABLE_ID_COL) + .keyType(KeyType.HASH) + .build(), + KeySchemaElement.builder() + .attributeName(PARTITION_ID_COL) + .keyType(KeyType.RANGE) + .build() + ); + + public static final List LOCK_COMPONENT_TABLE_COL_DEFINITIONS = Lists.newArrayList( + AttributeDefinition.builder() + .attributeName(TABLE_ID_COL) + .attributeType(ScalarAttributeType.S) + .build(), + AttributeDefinition.builder() + .attributeName(PARTITION_ID_COL) + .attributeType(ScalarAttributeType.S) + .build() + ); + + private final String requestTableName; + private final String componentTableName; + private final int componentLockReleaseRetryMax; + private final long waitIntervalMillis; + private final long timeoutMillis; + private final DynamoDbClient dynamo; + + public DynamoLockManager(Configuration conf) { + this(conf, DynamoDbClient.create()); + } + + DynamoLockManager(Configuration conf, DynamoDbClient dynamo) { + this.dynamo = dynamo; + this.requestTableName = conf.get( + AWSGlueConfig.AWS_GLUE_LOCK_REQUEST_DYNAMO_TABLE_NAME, + AWSGlueConfig.AWS_GLUE_LOCK_REQUEST_DYNAMO_TABLE_NAME_DEFAULT); + this.componentTableName = conf.get( + AWSGlueConfig.AWS_GLUE_LOCK_COMPONENT_DYNAMO_TABLE_NAME, + AWSGlueConfig.AWS_GLUE_LOCK_COMPONENT_DYNAMO_TABLE_NAME_DEFAULT); + this.componentLockReleaseRetryMax = conf.getInt( + AWSGlueConfig.AWS_GLUE_LOCK_RELEASE_RETRY_MAX, + AWSGlueConfig.AWS_GLUE_LOCK_RELEASE_RETRY_MAX_DEFAULT); + this.waitIntervalMillis = conf.getLong( + AWSGlueConfig.AWS_GLUE_LOCK_WAIT_INTERVAL_MILLIS, + AWSGlueConfig.AWS_GLUE_LOCK_WAIT_INTERVAL_MILLIS_DEFAULT); + this.timeoutMillis = conf.getLong( + AWSGlueConfig.AWS_GLUE_LOCK_TIMEOUT_MILLIS, + AWSGlueConfig.AWS_GLUE_LOCK_TIMEOUT_MILLIS_DEFAULT); + if (conf.getBoolean( + AWSGlueConfig.AWS_GLUE_LOCK_DYNAMO_INITIALIZE_TABLES, + AWSGlueConfig.AWS_GLUE_LOCK_DYNAMO_INITIALIZE_TABLES_DEFAULT)) { + ensureTableExists(requestTableName, LOCK_REQUEST_TABLE_SCHEMA, LOCK_REQUEST_TABLE_COL_DEFINITIONS); + ensureTableExists(componentTableName, LOCK_COMPONENT_TABLE_SCHEMA, LOCK_COMPONENT_TABLE_COL_DEFINITIONS); + } + } + + /** + * try lock given the lock request. + * We first add an entry in lockRequest table with a random lockId + * and then try to acquire lock for each lockComponent + * @param lockRequest lock request + * @return lock response of lock id and status + */ + @Override + public LockResponse lock(LockRequest lockRequest) { + Preconditions.checkArgument(CollectionUtils.isNotEmpty(lockRequest.getComponent()), + "there is no component to lock"); + String lockId = generateRandomLongId(); + List dynamoLockComponents = lockRequest.getComponent().stream() + .map(DynamoLockComponent::fromHive) + .collect(Collectors.toList()); + insertLockRequest(lockRequest, dynamoLockComponents, lockId); + return tryLock(dynamoLockComponents, lockId); + } + + /** + * check lock status + * @param lockId lock id + * @return lock response with id and status + */ + @Override + public LockResponse checkLock(long lockId) { + String lockIdStr = Long.toString(lockId); + List components = getLockComponents(lockIdStr); + if (components.isEmpty()) { + LockResponse response = new LockResponse(); + response.setLockid(lockId); + response.setState(LockState.NOT_ACQUIRED); + return response; + } else { + return tryLock(getLockComponents(lockIdStr), lockIdStr); + } + } + + /** + * unlock + * @param lockId lock id + */ + @Override + public void unlock(long lockId) { + String lockIdStr = Long.toString(lockId); + List components = getLockComponents(lockIdStr); + if (!components.isEmpty()) { + forceReleaseAllComponentLocks(components, lockIdStr); + deleteLockRequest(lockIdStr); + } + } + + private LockResponse tryLock(List dynamoLockComponents, String lockId) { + LockResponse response = new LockResponse(); + response.setLockid(Long.parseLong(lockId)); + List succeededComponents = tryBatchAcquireComponentLocks(dynamoLockComponents, lockId); + if (succeededComponents.size() < dynamoLockComponents.size()) { + forceReleaseAllComponentLocks(succeededComponents, lockId); + response.setState(LockState.WAITING); + } else { + response.setState(LockState.ACQUIRED); + } + return response; + } + + private void insertLockRequest(LockRequest lockRequest, List lockComponents, String lockId) { + Map requestItem = new HashMap<>(); + requestItem.put(LOCK_ID_COL, AttributeValue.builder().s(lockId).build()); + requestItem.put(COMPONENTS_COL, AttributeValue.builder().l(lockComponents.stream() + .map(c -> AttributeValue.builder().s(c.toString()).build()) + .collect(Collectors.toList()) + ).build()); + requestItem.put(USER_COL, AttributeValue.builder().s(lockRequest.getUser()).build()); + requestItem.put(HOST_COL, AttributeValue.builder().s(lockRequest.getHostname()).build()); + requestItem.put(AGENT_INFO_COL, AttributeValue.builder().s(lockRequest.getAgentInfo()).build()); + requestItem.put(TXN_ID_COL, AttributeValue.builder().n(Long.toString(lockRequest.getTxnid())).build()); + requestItem.put(LAST_HEARTBEAT_MILLIS_COL, + AttributeValue.builder().n(Long.toString(System.currentTimeMillis())).build()); + dynamo.putItem(PutItemRequest.builder() + .tableName(requestTableName) + .item(requestItem) + .build()); + } + + private List getLockComponents(String lockId) { + Map key = new HashMap<>(); + key.put(LOCK_ID_COL, AttributeValue.builder().s(lockId).build()); + GetItemResponse response = dynamo.getItem(GetItemRequest.builder() + .tableName(requestTableName) + .key(key) + .build()); + List result = new ArrayList<>(); + if (response.hasItem()) { + Map item = response.item(); + AttributeValue value = item.get(COMPONENTS_COL); + for (AttributeValue v : value.l()) { + result.add(DynamoLockComponent.fromJson(v.s())); + } + long lastHeartbeat = Long.parseLong(item.get(LAST_HEARTBEAT_MILLIS_COL).n()); + if (lastHeartbeat + timeoutMillis > System.currentTimeMillis()) { + // update access heartbeat + Map newItem = new HashMap<>(item); + newItem.put(LAST_HEARTBEAT_MILLIS_COL, + AttributeValue.builder().n(Long.toString(System.currentTimeMillis())).build()); + dynamo.putItem(PutItemRequest.builder() + .tableName(requestTableName) + .item(newItem) + .build()); + } else { + // delete old lock + Map deleteKey = new HashMap<>(); + deleteKey.put(LOCK_ID_COL, item.get(LOCK_ID_COL)); + dynamo.deleteItem(DeleteItemRequest.builder() + .tableName(requestTableName) + .key(deleteKey) + .build()); + forceReleaseAllComponentLocks(result, lockId); + result.clear(); + } + } + return result; + } + + private void deleteLockRequest(String lockId) { + Map key = new HashMap<>(); + key.put(LOCK_ID_COL, AttributeValue.builder().s(lockId).build()); + dynamo.deleteItem(DeleteItemRequest.builder() + .tableName(requestTableName) + .key(key) + .build()); + } + + private boolean tryAcquireComponentLock(DynamoLockComponent component, String lockId) { + Map componentItem = new HashMap<>(); + componentItem.put(TABLE_ID_COL, AttributeValue.builder().s(getTableId(component)).build()); + componentItem.put(PARTITION_ID_COL, AttributeValue.builder().s(getPartitionId(component)).build()); + componentItem.put(LOCK_ID_COL, AttributeValue.builder().s(lockId).build()); + componentItem.put(LAST_HEARTBEAT_MILLIS_COL, AttributeValue.builder().n( + Long.toString(System.currentTimeMillis()) + ).build()); + + try { + Map expressionValues = new HashMap<>(); + expressionValues.put(":lid", AttributeValue.builder().s(lockId).build()); + expressionValues.put(":ts", AttributeValue.builder().n(Long.toString( + System.currentTimeMillis() - timeoutMillis + )).build()); + + dynamo.putItem(PutItemRequest.builder() + .tableName(componentTableName) + .item(componentItem) + // if there is no lock, or the lock is already acquired and heartbeat is not passed + // this also refreshes the heartbeat + .conditionExpression("attribute_not_exists(" + + LOCK_ID_COL + ") OR (" + + LOCK_ID_COL + " = :lid AND " + + LAST_HEARTBEAT_MILLIS_COL + " > :ts)") + .expressionAttributeValues(expressionValues) + .build()); + return true; + } catch (Exception e) { + // most likely it's ConditionalCheckFailedException, but acquisition can fail for any exception + LOG.debug("Acquiring lock {} for {} failed", component, lockId, e); + return false; + } + } + + /** + * batch acquire component locks in the given order. + * Acquisition process won't continue if one acquisition fails. + * @param components components to lock + * @param lockId lock id + * @return succeeded locks + */ + private List tryBatchAcquireComponentLocks( + List components, String lockId) { + List succeedLocks = new ArrayList<>(); + for (DynamoLockComponent component : components) { + if (tryAcquireComponentLock(component, lockId)) { + succeedLocks.add(component); + } else { + break; + } + } + return succeedLocks; + } + + private boolean tryReleaseComponentLock(DynamoLockComponent component, String lockId) { + Map key = new HashMap<>(); + key.put(TABLE_ID_COL, AttributeValue.builder().s(getTableId(component)).build()); + key.put(PARTITION_ID_COL, AttributeValue.builder().s(getPartitionId(component)).build()); + Map expressionValues = new HashMap<>(); + expressionValues.put(":lid", AttributeValue.builder().s(lockId).build()); + expressionValues.put(":ts", AttributeValue.builder().n(Long.toString( + System.currentTimeMillis() - timeoutMillis + )).build()); + try { + dynamo.deleteItem(DeleteItemRequest.builder() + .tableName(componentTableName) + .key(key) + // if lock id is correct, or if there is any expired lock + .conditionExpression(LOCK_ID_COL + " = :lid OR " + + LAST_HEARTBEAT_MILLIS_COL + " < :ts") + .expressionAttributeValues(expressionValues) + .build()); + return true; + } catch (ConditionalCheckFailedException e) { + // some other process has the lock, or the lock does not exist, no need to retry + LOG.debug("Acquiring lock {} for {} failed due to conditional check", component, lockId, e); + return true; + } catch (Exception e) { + // all the other exceptions mean unlock failed and should retry + LOG.debug("Release lock {} for {} failed unexpectedly", component, lockId, e); + return false; + } + } + + /** + * Force to release all the component locks in the given list. + * We will try to unlock until the configured max retry is exceeded. + * @param components components + * @param lockId lock id + */ + private void forceReleaseAllComponentLocks(List components, String lockId) { + for (DynamoLockComponent component : components) { + int retry = componentLockReleaseRetryMax; + boolean succeeded = false; + while (retry-- > 0) { + try { + succeeded = tryReleaseComponentLock(component, lockId); + if (succeeded) { + break; + } + Thread.sleep(waitIntervalMillis); + } catch (InterruptedException e) { + LOG.debug("interrupted when forcing release of component {} for lock {}", component, lockId, e); + } + } + if (!succeeded) { + throw new InconsistentLockStateException(component, lockId, componentLockReleaseRetryMax); + } + } + } + + private String generateRandomLongId() { + return Long.toString(ThreadLocalRandom.current().nextLong()); + } + + private String getTableId(DynamoLockComponent component) { + String tableName = component.getLockLevel().equals(LockLevel.DB) ? GLOBAL_LOCK_MARKER : component.getTableName(); + return String.format("%s|%s", component.getDbName(), tableName); + } + + private String getPartitionId(DynamoLockComponent component) { + String partitionName = component.getPartitionName() == null ? GLOBAL_LOCK_MARKER : component.getPartitionName(); + return String.format("%s|%s", partitionName, component.getLockType()); + } + + private void ensureTableExists( + String tableName, + List schema, + List definitions) { + try { + dynamo.describeTable(DescribeTableRequest.builder() + .tableName(tableName) + .build()); + } catch (ResourceNotFoundException e) { + LOG.info("Glue lock DynamoDB table <{}> not found, try to create", tableName); + dynamo.createTable(CreateTableRequest.builder() + .tableName(tableName) + .keySchema(schema) + .attributeDefinitions(definitions) + .billingMode(BillingMode.PAY_PER_REQUEST) + .build()); + + boolean isTableActive = false; + while (!isTableActive) { + LOG.info("waiting for table <{}> to be active", tableName); + try { + Thread.sleep(waitIntervalMillis); + } catch (InterruptedException ie) { + LOG.warn("Glue lock DynamoDB table creation sleep interrupted", e); + } + DescribeTableResponse describeTableResponse = dynamo.describeTable(DescribeTableRequest.builder() + .tableName(tableName) + .build()); + isTableActive = describeTableResponse.table().tableStatus().equals(TableStatus.ACTIVE); + } + } + } + + @Override + public void close() { + dynamo.close(); + } +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/lock/InconsistentLockStateException.java b/aws/src/main/java/org/apache/iceberg/aws/glue/lock/InconsistentLockStateException.java new file mode 100644 index 000000000000..dc6c0a22279a --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/lock/InconsistentLockStateException.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.lock; + +/** + * Indicating the lock tables for Glue is in an inconsistent state and might need manual fix. + */ +public class InconsistentLockStateException extends IllegalStateException { + + public InconsistentLockStateException(DynamoLockComponent component, String lockId, int retry) { + super(String.format("Fail to release lock %s component %s after %d retries, " + + "you might have inconsistencies in lock tables that have to be manually fixed.", + lockId, component, retry)); + } +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/lock/LockManager.java b/aws/src/main/java/org/apache/iceberg/aws/glue/lock/LockManager.java new file mode 100644 index 000000000000..40af8cba66f8 --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/lock/LockManager.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.lock; + +import java.io.Closeable; +import org.apache.hadoop.hive.metastore.api.LockRequest; +import org.apache.hadoop.hive.metastore.api.LockResponse; + +/** + * A minimum lock interface for Glue to implement Hive metastore transaction and lock feature. + * There are some other methods used by Hive, such as showLocks, heartbeat, etc. that is not here yet. + * We will always just provide minimum support to avoid adding unnecessary complexity. + */ +public interface LockManager extends Closeable { + + LockResponse lock(LockRequest lockRequest); + + LockResponse checkLock(long lockId); + + void unlock(long lockId); + + default void close() { + } +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/AWSCredentialsProviderFactory.java b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/AWSCredentialsProviderFactory.java new file mode 100644 index 000000000000..05699093d2d2 --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/AWSCredentialsProviderFactory.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.metastore; + +import com.amazonaws.auth.AWSCredentialsProvider; +import org.apache.hadoop.hive.conf.HiveConf; + +public interface AWSCredentialsProviderFactory { + + AWSCredentialsProvider buildAWSCredentialsProvider(HiveConf hiveConf); +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/AWSGlueClientFactory.java b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/AWSGlueClientFactory.java new file mode 100644 index 000000000000..7da345ea3213 --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/AWSGlueClientFactory.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.metastore; + +import com.amazonaws.ClientConfiguration; +import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.regions.Region; +import com.amazonaws.regions.Regions; +import com.amazonaws.services.glue.AWSGlue; +import com.amazonaws.services.glue.AWSGlueClientBuilder; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.iceberg.aws.glue.util.AWSGlueConfig; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.base.Strings; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public final class AWSGlueClientFactory implements GlueClientFactory { + + private static final Logger LOG = LoggerFactory.getLogger(AWSGlueClientFactory.class); + + private final HiveConf conf; + + public AWSGlueClientFactory(HiveConf conf) { + Preconditions.checkNotNull(conf, "HiveConf cannot be null"); + this.conf = conf; + } + + @Override + public AWSGlue newClient() throws MetaException { + try { + AWSGlueClientBuilder glueClientBuilder = AWSGlueClientBuilder.standard() + .withCredentials(getAWSCredentialsProvider(conf)); + + String regionStr = getProperty(AWSGlueConfig.AWS_REGION, conf); + String glueEndpoint = getProperty(AWSGlueConfig.AWS_GLUE_ENDPOINT, conf); + + // ClientBuilder only allows one of EndpointConfiguration or Region to be set + if (StringUtils.isNotBlank(glueEndpoint)) { + LOG.info("Setting glue service endpoint to {}", glueEndpoint); + glueClientBuilder.setEndpointConfiguration( + new AwsClientBuilder.EndpointConfiguration(glueEndpoint, null)); + } else if (StringUtils.isNotBlank(regionStr)) { + LOG.info("Setting region to : {}", regionStr); + glueClientBuilder.setRegion(regionStr); + } else { + Region currentRegion = Regions.getCurrentRegion(); + if (currentRegion != null) { + LOG.info("Using region from ec2 metadata : {}", currentRegion.getName()); + glueClientBuilder.setRegion(currentRegion.getName()); + } else { + LOG.info("No region info found, using SDK default region: us-east-1"); + } + } + + glueClientBuilder.setClientConfiguration(buildClientConfiguration(conf)); + return glueClientBuilder.build(); + } catch (Exception e) { + LOG.error("Unable to build AWSGlueClient", e); + throw new MetaException("Unable to build AWSGlueClient: " + e); + } + } + + private AWSCredentialsProvider getAWSCredentialsProvider(HiveConf hiveConf) { + Class providerFactoryClass = hiveConf + .getClass(AWSGlueConfig.AWS_CATALOG_CREDENTIALS_PROVIDER_FACTORY_CLASS, + DefaultAWSCredentialsProviderFactory.class).asSubclass( + AWSCredentialsProviderFactory.class); + AWSCredentialsProviderFactory provider = ReflectionUtils.newInstance( + providerFactoryClass, hiveConf); + return provider.buildAWSCredentialsProvider(hiveConf); + } + + private ClientConfiguration buildClientConfiguration(HiveConf hiveConf) { + ClientConfiguration clientConfiguration = new ClientConfiguration() + .withMaxErrorRetry(hiveConf.getInt( + AWSGlueConfig.AWS_GLUE_MAX_RETRY, + AWSGlueConfig.DEFAULT_MAX_RETRY)) + .withMaxConnections(hiveConf.getInt( + AWSGlueConfig.AWS_GLUE_MAX_CONNECTIONS, + AWSGlueConfig.DEFAULT_MAX_CONNECTIONS)) + .withConnectionTimeout(hiveConf.getInt( + AWSGlueConfig.AWS_GLUE_CONNECTION_TIMEOUT, + AWSGlueConfig.DEFAULT_CONNECTION_TIMEOUT)) + .withSocketTimeout(hiveConf.getInt( + AWSGlueConfig.AWS_GLUE_SOCKET_TIMEOUT, + AWSGlueConfig.DEFAULT_SOCKET_TIMEOUT)); + return clientConfiguration; + } + + private static String getProperty(String propertyName, HiveConf conf) { + return Strings.isNullOrEmpty(System.getProperty(propertyName)) ? + conf.get(propertyName) : System.getProperty(propertyName); + } +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/AWSGlueMetastore.java b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/AWSGlueMetastore.java new file mode 100644 index 000000000000..8f67e1def556 --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/AWSGlueMetastore.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.metastore; + +import com.amazonaws.services.glue.model.Database; +import com.amazonaws.services.glue.model.DatabaseInput; +import com.amazonaws.services.glue.model.Partition; +import com.amazonaws.services.glue.model.PartitionError; +import com.amazonaws.services.glue.model.PartitionInput; +import com.amazonaws.services.glue.model.PartitionValueList; +import com.amazonaws.services.glue.model.Table; +import com.amazonaws.services.glue.model.TableInput; +import com.amazonaws.services.glue.model.UserDefinedFunction; +import com.amazonaws.services.glue.model.UserDefinedFunctionInput; +import java.util.List; +import org.apache.thrift.TException; + +/** + * This is the accessor interface for using AWS Glue as a metastore. + * The generic AWSGlue interface{@link com.amazonaws.services.glue.AWSGlue} + * has a number of methods that are irrelevant for clients using Glue only + * as a metastore. + * Think of this interface as a wrapper over AWSGlue. This additional layer + * of abstraction achieves the following - + * a) Hides the non-metastore related operations present in AWSGlue + * b) Hides away the batching and pagination related limitations of AWSGlue + */ +public interface AWSGlueMetastore { + + void createDatabase(DatabaseInput databaseInput); + + Database getDatabase(String dbName); + + List getAllDatabases(); + + void updateDatabase(String databaseName, DatabaseInput databaseInput); + + void deleteDatabase(String dbName); + + void createTable(String dbName, TableInput tableInput); + + Table getTable(String dbName, String tableName); + + List getTables(String dbname, String tablePattern); + + void updateTable(String dbName, TableInput tableInput); + + void deleteTable(String dbName, String tableName); + + Partition getPartition(String dbName, String tableName, List partitionValues); + + List getPartitionsByNames(String dbName, String tableName, + List partitionsToGet); + + List getPartitions(String dbName, String tableName, String expression, + long max) throws TException; + + void updatePartition(String dbName, String tableName, List partitionValues, + PartitionInput partitionInput); + + void deletePartition(String dbName, String tableName, List partitionValues); + + List createPartitions(String dbName, String tableName, + List partitionInputs); + + void createUserDefinedFunction(String dbName, UserDefinedFunctionInput functionInput); + + UserDefinedFunction getUserDefinedFunction(String dbName, String functionName); + + List getUserDefinedFunctions(String dbName, String pattern); + + void deleteUserDefinedFunction(String dbName, String functionName); + + void updateUserDefinedFunction(String dbName, String functionName, UserDefinedFunctionInput functionInput); +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/AWSGlueMetastoreBaseDecorator.java b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/AWSGlueMetastoreBaseDecorator.java new file mode 100644 index 000000000000..6159a3e6dadd --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/AWSGlueMetastoreBaseDecorator.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.metastore; + +import com.amazonaws.services.glue.model.Database; +import com.amazonaws.services.glue.model.DatabaseInput; +import com.amazonaws.services.glue.model.Partition; +import com.amazonaws.services.glue.model.PartitionError; +import com.amazonaws.services.glue.model.PartitionInput; +import com.amazonaws.services.glue.model.PartitionValueList; +import com.amazonaws.services.glue.model.Table; +import com.amazonaws.services.glue.model.TableInput; +import com.amazonaws.services.glue.model.UserDefinedFunction; +import com.amazonaws.services.glue.model.UserDefinedFunctionInput; +import java.util.List; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.thrift.TException; + +public class AWSGlueMetastoreBaseDecorator implements AWSGlueMetastore { + + private final AWSGlueMetastore awsGlueMetastore; + + public AWSGlueMetastoreBaseDecorator(AWSGlueMetastore awsGlueMetastore) { + Preconditions.checkNotNull(awsGlueMetastore, "awsGlueMetastore can not be null"); + this.awsGlueMetastore = awsGlueMetastore; + } + + @Override + public void createDatabase(DatabaseInput databaseInput) { + awsGlueMetastore.createDatabase(databaseInput); + } + + @Override + public Database getDatabase(String dbName) { + return awsGlueMetastore.getDatabase(dbName); + } + + @Override + public List getAllDatabases() { + return awsGlueMetastore.getAllDatabases(); + } + + @Override + public void updateDatabase(String databaseName, DatabaseInput databaseInput) { + awsGlueMetastore.updateDatabase(databaseName, databaseInput); + } + + @Override + public void deleteDatabase(String dbName) { + awsGlueMetastore.deleteDatabase(dbName); + } + + @Override + public void createTable(String dbName, TableInput tableInput) { + awsGlueMetastore.createTable(dbName, tableInput); + } + + @Override + public Table getTable(String dbName, String tableName) { + return awsGlueMetastore.getTable(dbName, tableName); + } + + @Override + public List
getTables(String dbname, String tablePattern) { + return awsGlueMetastore.getTables(dbname, tablePattern); + } + + @Override + public void updateTable(String dbName, TableInput tableInput) { + awsGlueMetastore.updateTable(dbName, tableInput); + } + + @Override + public void deleteTable(String dbName, String tableName) { + awsGlueMetastore.deleteTable(dbName, tableName); + } + + @Override + public Partition getPartition(String dbName, String tableName, List partitionValues) { + return awsGlueMetastore.getPartition(dbName, tableName, partitionValues); + } + + @Override + public List getPartitionsByNames( + String dbName, String tableName, List partitionsToGet) { + return awsGlueMetastore.getPartitionsByNames(dbName, tableName, partitionsToGet); + } + + @Override + public List getPartitions( + String dbName, String tableName, String expression, long max) throws TException { + return awsGlueMetastore.getPartitions(dbName, tableName, expression, max); + } + + @Override + public void updatePartition( + String dbName, String tableName, List partitionValues, PartitionInput partitionInput) { + awsGlueMetastore.updatePartition(dbName, tableName, partitionValues, partitionInput); + } + + @Override + public void deletePartition(String dbName, String tableName, List partitionValues) { + awsGlueMetastore.deletePartition(dbName, tableName, partitionValues); + } + + @Override + public List createPartitions(String dbName, String tableName, List partitionInputs) { + return awsGlueMetastore.createPartitions(dbName, tableName, partitionInputs); + } + + @Override + public void createUserDefinedFunction(String dbName, UserDefinedFunctionInput functionInput) { + awsGlueMetastore.createUserDefinedFunction(dbName, functionInput); + } + + @Override + public UserDefinedFunction getUserDefinedFunction(String dbName, String functionName) { + return awsGlueMetastore.getUserDefinedFunction(dbName, functionName); + } + + @Override + public List getUserDefinedFunctions(String dbName, String pattern) { + return awsGlueMetastore.getUserDefinedFunctions(dbName, pattern); + } + + @Override + public void deleteUserDefinedFunction(String dbName, String functionName) { + awsGlueMetastore.deleteUserDefinedFunction(dbName, functionName); + } + + @Override + public void updateUserDefinedFunction(String dbName, String functionName, UserDefinedFunctionInput functionInput) { + awsGlueMetastore.updateUserDefinedFunction(dbName, functionName, functionInput); + } + +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/AWSGlueMetastoreCacheDecorator.java b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/AWSGlueMetastoreCacheDecorator.java new file mode 100644 index 000000000000..78122ced906f --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/AWSGlueMetastoreCacheDecorator.java @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.metastore; + +import com.amazonaws.services.glue.model.Database; +import com.amazonaws.services.glue.model.DatabaseInput; +import com.amazonaws.services.glue.model.Table; +import com.amazonaws.services.glue.model.TableInput; +import java.util.Objects; +import java.util.concurrent.TimeUnit; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.iceberg.aws.glue.util.AWSGlueConfig; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.cache.Cache; +import org.apache.iceberg.relocated.com.google.common.cache.CacheBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class AWSGlueMetastoreCacheDecorator extends AWSGlueMetastoreBaseDecorator { + + private static final Logger LOG = LoggerFactory.getLogger(AWSGlueMetastoreCacheDecorator.class); + + private final HiveConf conf; + + private final boolean databaseCacheEnabled; + + private final boolean tableCacheEnabled; + + private Cache databaseCache; + + private Cache tableCache; + + public AWSGlueMetastoreCacheDecorator(HiveConf conf, AWSGlueMetastore awsGlueMetastore) { + super(awsGlueMetastore); + + Preconditions.checkNotNull(conf, "conf can not be null"); + this.conf = conf; + + databaseCacheEnabled = conf.getBoolean(AWSGlueConfig.AWS_GLUE_DB_CACHE_ENABLE, false); + if (databaseCacheEnabled) { + int dbCacheSize = conf.getInt(AWSGlueConfig.AWS_GLUE_DB_CACHE_SIZE, 0); + int dbCacheTtlMins = conf.getInt(AWSGlueConfig.AWS_GLUE_DB_CACHE_TTL_MINS, 0); + + // validate config values for size and ttl + validateConfigValueIsGreaterThanZero(AWSGlueConfig.AWS_GLUE_DB_CACHE_SIZE, dbCacheSize); + validateConfigValueIsGreaterThanZero(AWSGlueConfig.AWS_GLUE_DB_CACHE_TTL_MINS, dbCacheTtlMins); + + // initialize database cache + databaseCache = CacheBuilder.newBuilder().maximumSize(dbCacheSize) + .expireAfterWrite(dbCacheTtlMins, TimeUnit.MINUTES).build(); + } else { + databaseCache = null; + } + + tableCacheEnabled = conf.getBoolean(AWSGlueConfig.AWS_GLUE_TABLE_CACHE_ENABLE, false); + if (tableCacheEnabled) { + int tableCacheSize = conf.getInt(AWSGlueConfig.AWS_GLUE_TABLE_CACHE_SIZE, 0); + int tableCacheTtlMins = conf.getInt(AWSGlueConfig.AWS_GLUE_TABLE_CACHE_TTL_MINS, 0); + + // validate config values for size and ttl + validateConfigValueIsGreaterThanZero(AWSGlueConfig.AWS_GLUE_TABLE_CACHE_SIZE, tableCacheSize); + validateConfigValueIsGreaterThanZero(AWSGlueConfig.AWS_GLUE_TABLE_CACHE_TTL_MINS, tableCacheTtlMins); + + // initialize table cache + tableCache = CacheBuilder.newBuilder().maximumSize(tableCacheSize) + .expireAfterWrite(tableCacheTtlMins, TimeUnit.MINUTES).build(); + } else { + tableCache = null; + } + + LOG.info("Constructed"); + } + + private void validateConfigValueIsGreaterThanZero(String configName, int value) { + Preconditions.checkArgument(value > 0, String.format("Invalid value for Hive Config %s. " + + "Provide a value greater than zero", configName)); + + } + + @Override + public Database getDatabase(String dbName) { + Database result; + if (databaseCacheEnabled) { + Database valueFromCache = databaseCache.getIfPresent(dbName); + if (valueFromCache != null) { + LOG.info("Cache hit for operation [getDatabase] on key [{}]", dbName); + result = valueFromCache; + } else { + LOG.info("Cache miss for operation [getDatabase] on key [{}]", dbName); + result = super.getDatabase(dbName); + databaseCache.put(dbName, result); + } + } else { + result = super.getDatabase(dbName); + } + return result; + } + + @Override + public void updateDatabase(String dbName, DatabaseInput databaseInput) { + super.updateDatabase(dbName, databaseInput); + if (databaseCacheEnabled) { + purgeDatabaseFromCache(dbName); + } + } + + @Override + public void deleteDatabase(String dbName) { + super.deleteDatabase(dbName); + if (databaseCacheEnabled) { + purgeDatabaseFromCache(dbName); + } + } + + private void purgeDatabaseFromCache(String dbName) { + databaseCache.invalidate(dbName); + } + + @Override + public Table getTable(String dbName, String tableName) { + Table result; + if (tableCacheEnabled) { + TableIdentifier key = new TableIdentifier(dbName, tableName); + Table valueFromCache = tableCache.getIfPresent(key); + if (valueFromCache != null) { + LOG.info("Cache hit for operation [getTable] on key [{}]", key); + result = valueFromCache; + } else { + LOG.info("Cache miss for operation [getTable] on key [{}]", key); + result = super.getTable(dbName, tableName); + tableCache.put(key, result); + } + } else { + result = super.getTable(dbName, tableName); + } + return result; + } + + @Override + public void updateTable(String dbName, TableInput tableInput) { + super.updateTable(dbName, tableInput); + if (tableCacheEnabled) { + purgeTableFromCache(dbName, tableInput.getName()); + } + } + + @Override + public void deleteTable(String dbName, String tableName) { + super.deleteTable(dbName, tableName); + if (tableCacheEnabled) { + purgeTableFromCache(dbName, tableName); + } + } + + private void purgeTableFromCache(String dbName, String tableName) { + TableIdentifier key = new TableIdentifier(dbName, tableName); + tableCache.invalidate(key); + } + + + static class TableIdentifier { + private final String dbName; + private final String tableName; + + TableIdentifier(String dbName, String tableName) { + this.dbName = dbName; + this.tableName = tableName; + } + + public String getDbName() { + return dbName; + } + + public String getTableName() { + return tableName; + } + + @Override + public String toString() { + return "TableIdentifier{" + + "dbName='" + dbName + '\'' + + ", tableName='" + tableName + '\'' + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + TableIdentifier that = (TableIdentifier) o; + return Objects.equals(dbName, that.dbName) && + Objects.equals(tableName, that.tableName); + } + + @Override + public int hashCode() { + return Objects.hash(dbName, tableName); + } + } + + Cache getDatabaseCache() { + return databaseCache; + } + + void setDatabaseCache(Cache databaseCache) { + this.databaseCache = databaseCache; + } + + Cache getTableCache() { + return tableCache; + } + + void setTableCache(Cache tableCache) { + this.tableCache = tableCache; + } +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/AWSGlueMetastoreFactory.java b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/AWSGlueMetastoreFactory.java new file mode 100644 index 000000000000..e0a41554fd94 --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/AWSGlueMetastoreFactory.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.metastore; + +import com.amazonaws.services.glue.AWSGlue; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.iceberg.aws.glue.util.AWSGlueConfig; + +public class AWSGlueMetastoreFactory { + + public AWSGlueMetastore newMetastore(HiveConf conf) throws MetaException { + AWSGlue glueClient = new AWSGlueClientFactory(conf).newClient(); + AWSGlueMetastore defaultMetastore = new DefaultAWSGlueMetastore(conf, glueClient); + if (isCacheEnabled(conf)) { + return new AWSGlueMetastoreCacheDecorator(conf, defaultMetastore); + } + return defaultMetastore; + } + + private boolean isCacheEnabled(HiveConf conf) { + boolean databaseCacheEnabled = conf.getBoolean(AWSGlueConfig.AWS_GLUE_DB_CACHE_ENABLE, false); + boolean tableCacheEnabled = conf.getBoolean(AWSGlueConfig.AWS_GLUE_TABLE_CACHE_ENABLE, false); + return databaseCacheEnabled || tableCacheEnabled; + } +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/DefaultAWSCredentialsProviderFactory.java b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/DefaultAWSCredentialsProviderFactory.java new file mode 100644 index 000000000000..001a1e8208ee --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/DefaultAWSCredentialsProviderFactory.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.metastore; + +import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; +import org.apache.hadoop.hive.conf.HiveConf; + +public class DefaultAWSCredentialsProviderFactory implements + AWSCredentialsProviderFactory { + + @Override + public AWSCredentialsProvider buildAWSCredentialsProvider(HiveConf hiveConf) { + return new DefaultAWSCredentialsProviderChain(); + } + +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/DefaultAWSGlueMetastore.java b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/DefaultAWSGlueMetastore.java new file mode 100644 index 000000000000..4444190bba73 --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/DefaultAWSGlueMetastore.java @@ -0,0 +1,423 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.metastore; + +import com.amazonaws.AmazonServiceException; +import com.amazonaws.services.glue.AWSGlue; +import com.amazonaws.services.glue.model.BatchCreatePartitionRequest; +import com.amazonaws.services.glue.model.BatchGetPartitionRequest; +import com.amazonaws.services.glue.model.BatchGetPartitionResult; +import com.amazonaws.services.glue.model.CreateDatabaseRequest; +import com.amazonaws.services.glue.model.CreateTableRequest; +import com.amazonaws.services.glue.model.CreateUserDefinedFunctionRequest; +import com.amazonaws.services.glue.model.Database; +import com.amazonaws.services.glue.model.DatabaseInput; +import com.amazonaws.services.glue.model.DeleteDatabaseRequest; +import com.amazonaws.services.glue.model.DeletePartitionRequest; +import com.amazonaws.services.glue.model.DeleteTableRequest; +import com.amazonaws.services.glue.model.DeleteUserDefinedFunctionRequest; +import com.amazonaws.services.glue.model.GetDatabaseRequest; +import com.amazonaws.services.glue.model.GetDatabaseResult; +import com.amazonaws.services.glue.model.GetDatabasesRequest; +import com.amazonaws.services.glue.model.GetDatabasesResult; +import com.amazonaws.services.glue.model.GetPartitionRequest; +import com.amazonaws.services.glue.model.GetPartitionsRequest; +import com.amazonaws.services.glue.model.GetPartitionsResult; +import com.amazonaws.services.glue.model.GetTableRequest; +import com.amazonaws.services.glue.model.GetTableResult; +import com.amazonaws.services.glue.model.GetTablesRequest; +import com.amazonaws.services.glue.model.GetTablesResult; +import com.amazonaws.services.glue.model.GetUserDefinedFunctionRequest; +import com.amazonaws.services.glue.model.GetUserDefinedFunctionsRequest; +import com.amazonaws.services.glue.model.GetUserDefinedFunctionsResult; +import com.amazonaws.services.glue.model.Partition; +import com.amazonaws.services.glue.model.PartitionError; +import com.amazonaws.services.glue.model.PartitionInput; +import com.amazonaws.services.glue.model.PartitionValueList; +import com.amazonaws.services.glue.model.Segment; +import com.amazonaws.services.glue.model.Table; +import com.amazonaws.services.glue.model.TableInput; +import com.amazonaws.services.glue.model.UpdateDatabaseRequest; +import com.amazonaws.services.glue.model.UpdatePartitionRequest; +import com.amazonaws.services.glue.model.UpdateTableRequest; +import com.amazonaws.services.glue.model.UpdateUserDefinedFunctionRequest; +import com.amazonaws.services.glue.model.UserDefinedFunction; +import com.amazonaws.services.glue.model.UserDefinedFunctionInput; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.iceberg.aws.glue.util.MetastoreClientUtils; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.base.Throwables; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.thrift.TException; + +public class DefaultAWSGlueMetastore implements AWSGlueMetastore { + + public static final int BATCH_GET_PARTITIONS_MAX_REQUEST_SIZE = 1000; + /** + * Based on the maxResults parameter at https://docs.aws.amazon.com/glue/latest/webapi/API_GetPartitions.html + */ + public static final int GET_PARTITIONS_MAX_SIZE = 1000; + /** + * Maximum number of Glue Segments. A segment defines a non-overlapping region of a table's partitions, + * allowing multiple requests to be executed in parallel. + */ + public static final int DEFAULT_NUM_PARTITION_SEGMENTS = 5; + /** + * Currently the upper limit allowed by Glue is 10. + * https://docs.aws.amazon.com/glue/latest/webapi/API_Segment.html + */ + public static final int MAX_NUM_PARTITION_SEGMENTS = 10; + public static final String NUM_PARTITION_SEGMENTS_CONF = "aws.glue.partition.num.segments"; + public static final String CUSTOM_EXECUTOR_FACTORY_CONF = "hive.metastore.executorservice.factory.class"; + + private final HiveConf conf; + private final AWSGlue glueClient; + private final String catalogId; + private final ExecutorService executorService; + private final int numPartitionSegments; + + protected ExecutorService getExecutorService(HiveConf hiveConf) { + Class executorFactoryClass = hiveConf + .getClass(CUSTOM_EXECUTOR_FACTORY_CONF, + DefaultExecutorServiceFactory.class).asSubclass( + ExecutorServiceFactory.class); + ExecutorServiceFactory factory = ReflectionUtils.newInstance( + executorFactoryClass, hiveConf); + return factory.getExecutorService(hiveConf); + } + + public DefaultAWSGlueMetastore(HiveConf conf, AWSGlue glueClient) { + Preconditions.checkNotNull(conf, "Hive Config cannot be null"); + Preconditions.checkNotNull(glueClient, "glueClient cannot be null"); + this.numPartitionSegments = conf.getInt(NUM_PARTITION_SEGMENTS_CONF, DEFAULT_NUM_PARTITION_SEGMENTS); + Preconditions.checkArgument(numPartitionSegments <= MAX_NUM_PARTITION_SEGMENTS, + String.format("Hive Config [%s] can't exceed %d", NUM_PARTITION_SEGMENTS_CONF, MAX_NUM_PARTITION_SEGMENTS)); + this.conf = conf; + this.glueClient = glueClient; + this.catalogId = MetastoreClientUtils.getCatalogId(conf); + this.executorService = getExecutorService(conf); + } + + // ======================= Database ======================= + + @Override + public void createDatabase(DatabaseInput databaseInput) { + CreateDatabaseRequest createDatabaseRequest = new CreateDatabaseRequest().withDatabaseInput(databaseInput) + .withCatalogId(catalogId); + glueClient.createDatabase(createDatabaseRequest); + } + + @Override + public Database getDatabase(String dbName) { + GetDatabaseRequest getDatabaseRequest = new GetDatabaseRequest().withCatalogId(catalogId).withName(dbName); + GetDatabaseResult result = glueClient.getDatabase(getDatabaseRequest); + return result.getDatabase(); + } + + @Override + public List getAllDatabases() { + List ret = Lists.newArrayList(); + String nextToken = null; + do { + GetDatabasesRequest getDatabasesRequest = new GetDatabasesRequest().withNextToken(nextToken).withCatalogId( + catalogId); + GetDatabasesResult result = glueClient.getDatabases(getDatabasesRequest); + nextToken = result.getNextToken(); + ret.addAll(result.getDatabaseList()); + } while (nextToken != null); + return ret; + } + + @Override + public void updateDatabase(String databaseName, DatabaseInput databaseInput) { + UpdateDatabaseRequest updateDatabaseRequest = new UpdateDatabaseRequest().withName(databaseName) + .withDatabaseInput(databaseInput).withCatalogId(catalogId); + glueClient.updateDatabase(updateDatabaseRequest); + } + + @Override + public void deleteDatabase(String dbName) { + DeleteDatabaseRequest deleteDatabaseRequest = new DeleteDatabaseRequest().withName(dbName).withCatalogId( + catalogId); + glueClient.deleteDatabase(deleteDatabaseRequest); + } + + // ======================== Table ======================== + + @Override + public void createTable(String dbName, TableInput tableInput) { + CreateTableRequest createTableRequest = new CreateTableRequest().withTableInput(tableInput) + .withDatabaseName(dbName).withCatalogId(catalogId); + glueClient.createTable(createTableRequest); + } + + @Override + public Table getTable(String dbName, String tableName) { + GetTableRequest getTableRequest = new GetTableRequest().withDatabaseName(dbName).withName(tableName) + .withCatalogId(catalogId); + GetTableResult result = glueClient.getTable(getTableRequest); + return result.getTable(); + } + + @Override + public List
getTables(String dbname, String tablePattern) { + List
ret = new ArrayList<>(); + String nextToken = null; + do { + GetTablesRequest getTablesRequest = new GetTablesRequest().withDatabaseName(dbname) + .withExpression(tablePattern).withNextToken(nextToken).withCatalogId(catalogId); + GetTablesResult result = glueClient.getTables(getTablesRequest); + ret.addAll(result.getTableList()); + nextToken = result.getNextToken(); + } while (nextToken != null); + return ret; + } + + @Override + public void updateTable(String dbName, TableInput tableInput) { + UpdateTableRequest updateTableRequest = new UpdateTableRequest().withDatabaseName(dbName) + .withTableInput(tableInput).withCatalogId(catalogId); + glueClient.updateTable(updateTableRequest); + } + + @Override + public void deleteTable(String dbName, String tableName) { + DeleteTableRequest deleteTableRequest = new DeleteTableRequest().withDatabaseName(dbName).withName(tableName) + .withCatalogId(catalogId); + glueClient.deleteTable(deleteTableRequest); + } + + // =========================== Partition =========================== + + @Override + public Partition getPartition(String dbName, String tableName, List partitionValues) { + GetPartitionRequest request = new GetPartitionRequest() + .withDatabaseName(dbName) + .withTableName(tableName) + .withPartitionValues(partitionValues) + .withCatalogId(catalogId); + return glueClient.getPartition(request).getPartition(); + } + + @Override + public List getPartitionsByNames(String dbName, String tableName, + List partitionsToGet) { + + List> batchedPartitionsToGet = Lists.partition(partitionsToGet, + BATCH_GET_PARTITIONS_MAX_REQUEST_SIZE); + List> batchGetPartitionFutures = Lists.newArrayList(); + + for (List batch : batchedPartitionsToGet) { + final BatchGetPartitionRequest request = new BatchGetPartitionRequest() + .withDatabaseName(dbName) + .withTableName(tableName) + .withPartitionsToGet(batch) + .withCatalogId(catalogId); + batchGetPartitionFutures.add(this.executorService.submit(new Callable() { + @Override + public BatchGetPartitionResult call() throws Exception { + return glueClient.batchGetPartition(request); + } + })); + } + + List result = Lists.newArrayList(); + try { + for (Future future : batchGetPartitionFutures) { + result.addAll(future.get().getPartitions()); + } + } catch (ExecutionException e) { + Throwables.propagateIfInstanceOf(e.getCause(), AmazonServiceException.class); + Throwables.propagate(e.getCause()); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + return result; + } + + @Override + public List getPartitions(String dbName, String tableName, String expression, + long max) throws TException { + if (max == 0) { + return Collections.emptyList(); + } + if (max < 0 || max > GET_PARTITIONS_MAX_SIZE) { + return getPartitionsParallel(dbName, tableName, expression, max); + } else { + // We don't need to get too many partitions, so just do it serially. + return getCatalogPartitions(dbName, tableName, expression, max, null); + } + } + + private List getPartitionsParallel( + final String databaseName, + final String tableName, + final String expression, + final long max) throws TException { + // Prepare the segments + List segments = Lists.newArrayList(); + for (int i = 0; i < numPartitionSegments; i++) { + segments.add(new Segment() + .withSegmentNumber(i) + .withTotalSegments(numPartitionSegments)); + } + // Submit Glue API calls in parallel using the thread pool. + // We could convert this into a parallelStream after upgrading to JDK 8 compiler base. + List>> futures = Lists.newArrayList(); + for (final Segment segment : segments) { + futures.add(this.executorService.submit(new Callable>() { + @Override + public List call() throws Exception { + return getCatalogPartitions(databaseName, tableName, expression, max, segment); + } + })); + } + + // Get the results + List partitions = Lists.newArrayList(); + try { + for (Future> future : futures) { + List segmentPartitions = future.get(); + if (partitions.size() + segmentPartitions.size() >= max && max > 0) { + // Extract the required number of partitions from the segment and we're done. + long remaining = max - partitions.size(); + partitions.addAll(segmentPartitions.subList(0, (int) remaining)); + break; + } else { + partitions.addAll(segmentPartitions); + } + } + } catch (ExecutionException e) { + Throwables.propagateIfInstanceOf(e.getCause(), AmazonServiceException.class); + Throwables.propagate(e.getCause()); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + return partitions; + } + + + private List getCatalogPartitions(String databaseName, String tableName, String expression, + long max, Segment segment) { + List partitions = Lists.newArrayList(); + String nextToken = null; + do { + GetPartitionsRequest request = new GetPartitionsRequest() + .withDatabaseName(databaseName) + .withTableName(tableName) + .withExpression(expression) + .withNextToken(nextToken) + .withCatalogId(catalogId) + .withSegment(segment); + GetPartitionsResult res = glueClient.getPartitions(request); + List list = res.getPartitions(); + if ((partitions.size() + list.size()) >= max && max > 0) { + long remaining = max - partitions.size(); + partitions.addAll(list.subList(0, (int) remaining)); + break; + } + partitions.addAll(list); + nextToken = res.getNextToken(); + } while (nextToken != null); + return partitions; + } + + @Override + public void updatePartition(String dbName, String tableName, List partitionValues, + PartitionInput partitionInput) { + UpdatePartitionRequest updatePartitionRequest = new UpdatePartitionRequest().withDatabaseName(dbName) + .withTableName(tableName).withPartitionValueList(partitionValues) + .withPartitionInput(partitionInput).withCatalogId(catalogId); + glueClient.updatePartition(updatePartitionRequest); + } + + @Override + public void deletePartition(String dbName, String tableName, List partitionValues) { + DeletePartitionRequest request = new DeletePartitionRequest() + .withDatabaseName(dbName) + .withTableName(tableName) + .withPartitionValues(partitionValues) + .withCatalogId(catalogId); + glueClient.deletePartition(request); + } + + @Override + public List createPartitions(String dbName, String tableName, + List partitionInputs) { + BatchCreatePartitionRequest request = + new BatchCreatePartitionRequest().withDatabaseName(dbName) + .withTableName(tableName).withCatalogId(catalogId) + .withPartitionInputList(partitionInputs); + return glueClient.batchCreatePartition(request).getErrors(); + } + + // ====================== User Defined Function ====================== + + @Override + public void createUserDefinedFunction(String dbName, UserDefinedFunctionInput functionInput) { + CreateUserDefinedFunctionRequest createUserDefinedFunctionRequest = new CreateUserDefinedFunctionRequest() + .withDatabaseName(dbName).withFunctionInput(functionInput).withCatalogId(catalogId); + glueClient.createUserDefinedFunction(createUserDefinedFunctionRequest); + } + + @Override + public UserDefinedFunction getUserDefinedFunction(String dbName, String functionName) { + GetUserDefinedFunctionRequest getUserDefinedFunctionRequest = new GetUserDefinedFunctionRequest() + .withDatabaseName(dbName).withFunctionName(functionName).withCatalogId(catalogId); + return glueClient.getUserDefinedFunction(getUserDefinedFunctionRequest).getUserDefinedFunction(); + } + + @Override + public List getUserDefinedFunctions(String dbName, String pattern) { + List ret = Lists.newArrayList(); + String nextToken = null; + do { + GetUserDefinedFunctionsRequest getUserDefinedFunctionsRequest = new GetUserDefinedFunctionsRequest() + .withDatabaseName(dbName).withPattern(pattern).withNextToken(nextToken).withCatalogId(catalogId); + GetUserDefinedFunctionsResult result = glueClient.getUserDefinedFunctions(getUserDefinedFunctionsRequest); + nextToken = result.getNextToken(); + ret.addAll(result.getUserDefinedFunctions()); + } while (nextToken != null); + return ret; + } + + @Override + public void deleteUserDefinedFunction(String dbName, String functionName) { + DeleteUserDefinedFunctionRequest deleteUserDefinedFunctionRequest = new DeleteUserDefinedFunctionRequest() + .withDatabaseName(dbName).withFunctionName(functionName).withCatalogId(catalogId); + glueClient.deleteUserDefinedFunction(deleteUserDefinedFunctionRequest); + } + + @Override + public void updateUserDefinedFunction(String dbName, String functionName, UserDefinedFunctionInput functionInput) { + UpdateUserDefinedFunctionRequest updateUserDefinedFunctionRequest = new UpdateUserDefinedFunctionRequest() + .withDatabaseName(dbName).withFunctionName(functionName).withFunctionInput(functionInput) + .withCatalogId(catalogId); + glueClient.updateUserDefinedFunction(updateUserDefinedFunctionRequest); + } +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/DefaultExecutorServiceFactory.java b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/DefaultExecutorServiceFactory.java new file mode 100644 index 000000000000..e90fc19f77b0 --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/DefaultExecutorServiceFactory.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.metastore; + +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.iceberg.relocated.com.google.common.util.concurrent.ThreadFactoryBuilder; + +public class DefaultExecutorServiceFactory implements ExecutorServiceFactory { + private static final int NUM_EXECUTOR_THREADS = 5; + + private static final ExecutorService GLUE_METASTORE_DELEGATE_THREAD_POOL = Executors.newFixedThreadPool( + NUM_EXECUTOR_THREADS, new ThreadFactoryBuilder() + .setNameFormat(GlueMetastoreClientDelegate.GLUE_METASTORE_DELEGATE_THREADPOOL_NAME_FORMAT) + .setDaemon(true).build() + ); + + @Override + public ExecutorService getExecutorService(HiveConf conf) { + return GLUE_METASTORE_DELEGATE_THREAD_POOL; + } +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/ExecutorServiceFactory.java b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/ExecutorServiceFactory.java new file mode 100644 index 000000000000..62522ca66cb6 --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/ExecutorServiceFactory.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.metastore; + +import java.util.concurrent.ExecutorService; +import org.apache.hadoop.hive.conf.HiveConf; + +/** + * Interface for creating an ExecutorService + */ +public interface ExecutorServiceFactory { + ExecutorService getExecutorService(HiveConf conf); +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/GlueClientFactory.java b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/GlueClientFactory.java new file mode 100644 index 000000000000..2bb07e6ebe35 --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/GlueClientFactory.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.metastore; + +import com.amazonaws.services.glue.AWSGlue; +import org.apache.hadoop.hive.metastore.api.MetaException; + +/*** + * Interface for creating Glue AWS Client + */ +public interface GlueClientFactory { + + AWSGlue newClient() throws MetaException; + +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/GlueMetastoreClientDelegate.java b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/GlueMetastoreClientDelegate.java new file mode 100644 index 000000000000..989e4946dabd --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/GlueMetastoreClientDelegate.java @@ -0,0 +1,1679 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.metastore; + +import com.amazonaws.AmazonServiceException; +import com.amazonaws.services.glue.model.Column; +import com.amazonaws.services.glue.model.Database; +import com.amazonaws.services.glue.model.DatabaseInput; +import com.amazonaws.services.glue.model.EntityNotFoundException; +import com.amazonaws.services.glue.model.Partition; +import com.amazonaws.services.glue.model.PartitionInput; +import com.amazonaws.services.glue.model.PartitionValueList; +import com.amazonaws.services.glue.model.Table; +import com.amazonaws.services.glue.model.TableInput; +import com.amazonaws.services.glue.model.UserDefinedFunction; +import com.amazonaws.services.glue.model.UserDefinedFunctionInput; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.regex.Pattern; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.common.ValidTxnList; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStore; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.AggrStats; +import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.CompactionResponse; +import org.apache.hadoop.hive.metastore.api.CompactionType; +import org.apache.hadoop.hive.metastore.api.CurrentNotificationEventId; +import org.apache.hadoop.hive.metastore.api.DataOperationType; +import org.apache.hadoop.hive.metastore.api.EnvironmentContext; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.FireEventRequest; +import org.apache.hadoop.hive.metastore.api.FireEventResponse; +import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse; +import org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalRequest; +import org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalResponse; +import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeResponse; +import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege; +import org.apache.hadoop.hive.metastore.api.HiveObjectRef; +import org.apache.hadoop.hive.metastore.api.Index; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; +import org.apache.hadoop.hive.metastore.api.InvalidOperationException; +import org.apache.hadoop.hive.metastore.api.LockRequest; +import org.apache.hadoop.hive.metastore.api.LockResponse; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.MetadataPpdResult; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.NotificationEventResponse; +import org.apache.hadoop.hive.metastore.api.OpenTxnsResponse; +import org.apache.hadoop.hive.metastore.api.PartitionEventType; +import org.apache.hadoop.hive.metastore.api.PrincipalType; +import org.apache.hadoop.hive.metastore.api.Role; +import org.apache.hadoop.hive.metastore.api.SQLForeignKey; +import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; +import org.apache.hadoop.hive.metastore.api.ShowCompactResponse; +import org.apache.hadoop.hive.metastore.api.ShowLocksRequest; +import org.apache.hadoop.hive.metastore.api.ShowLocksResponse; +import org.apache.hadoop.hive.metastore.api.TableMeta; +import org.apache.hadoop.hive.metastore.api.UnknownDBException; +import org.apache.hadoop.hive.metastore.api.UnknownTableException; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.iceberg.aws.glue.converters.CatalogToHiveConverter; +import org.apache.iceberg.aws.glue.converters.ConverterUtils; +import org.apache.iceberg.aws.glue.converters.GlueInputConverter; +import org.apache.iceberg.aws.glue.converters.HiveToCatalogConverter; +import org.apache.iceberg.aws.glue.lock.LockManager; +import org.apache.iceberg.aws.glue.shims.AwsGlueHiveShims; +import org.apache.iceberg.aws.glue.shims.ShimsLoader; +import org.apache.iceberg.aws.glue.util.BatchCreatePartitionsHelper; +import org.apache.iceberg.aws.glue.util.ExpressionHelper; +import org.apache.iceberg.aws.glue.util.MetastoreClientUtils; +import org.apache.iceberg.aws.glue.util.PartitionKey; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/*** + * Delegate Class to provide all common functionality + * between Spark-hive version, Hive and Presto clients + */ +@SuppressWarnings({"Slf4jConstantLogMessage", "NonEmptyAtclauseDescription", "EmptyBlockTag"}) +public class GlueMetastoreClientDelegate { + + private static final Logger LOG = LoggerFactory.getLogger(GlueMetastoreClientDelegate.class); + + private static final List implicitRoles = Lists.newArrayList( + new Role(HiveMetaStore.PUBLIC, 0, HiveMetaStore.PUBLIC)); + public static final int MILLISECOND_TO_SECOND_FACTOR = 1000; + public static final Long NO_MAX = -1L; + public static final String MATCH_ALL = ".*"; + + public static final String INDEX_PREFIX = "index_prefix"; + + private static final int BATCH_CREATE_PARTITIONS_MAX_REQUEST_SIZE = 100; + + public static final String CUSTOM_EXECUTOR_FACTORY_CONF = "hive.metastore.executorservice.factory.class"; + + static final String GLUE_METASTORE_DELEGATE_THREADPOOL_NAME_FORMAT = "glue-metastore-delegate-%d"; + + private final ExecutorService executorService; + private final AWSGlueMetastore glueMetastore; + private final HiveConf conf; + private final Warehouse wh; + private final AwsGlueHiveShims hiveShims = ShimsLoader.getHiveShims(); + private final String catalogId; + private final LockManager lockManager; + + public static final String CATALOG_ID_CONF = "hive.metastore.glue.catalogid"; + public static final String NUM_PARTITION_SEGMENTS_CONF = "aws.glue.partition.num.segments"; + + protected ExecutorService getExecutorService() { + Class executorFactoryClass = this.conf + .getClass(CUSTOM_EXECUTOR_FACTORY_CONF, + DefaultExecutorServiceFactory.class).asSubclass( + ExecutorServiceFactory.class); + ExecutorServiceFactory factory = ReflectionUtils.newInstance( + executorFactoryClass, conf); + return factory.getExecutorService(conf); + } + + public GlueMetastoreClientDelegate( + HiveConf conf, + AWSGlueMetastore glueMetastore, + Warehouse wh, + LockManager lockManager) throws MetaException { + Preconditions.checkNotNull(conf, "Hive Config cannot be null"); + Preconditions.checkNotNull(glueMetastore, "glueMetastore cannot be null"); + Preconditions.checkNotNull(wh, "Warehouse cannot be null"); + Preconditions.checkNotNull(lockManager, "LockManager cannot be null"); + + this.conf = conf; + this.glueMetastore = glueMetastore; + this.wh = wh; + this.executorService = getExecutorService(); + this.lockManager = lockManager; + + // TODO - May be validate catalogId confirms to AWS AccountId too. + catalogId = MetastoreClientUtils.getCatalogId(conf); + } + + // ======================= Database ======================= + + public void createDatabase(org.apache.hadoop.hive.metastore.api.Database database) throws TException { + Preconditions.checkNotNull(database, "database cannot be null"); + + if (StringUtils.isEmpty(database.getLocationUri())) { + database.setLocationUri(wh.getDefaultDatabasePath(database.getName()).toString()); + } else { + database.setLocationUri(wh.getDnsPath(new Path(database.getLocationUri())).toString()); + } + Path dbPath = new Path(database.getLocationUri()); + boolean madeDir = MetastoreClientUtils.makeDirs(wh, dbPath); + + try { + DatabaseInput catalogDatabase = GlueInputConverter.convertToDatabaseInput(database); + glueMetastore.createDatabase(catalogDatabase); + } catch (AmazonServiceException e) { + if (madeDir) { + wh.deleteDir(dbPath, true); + } + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + String msg = "Unable to create database: "; + LOG.error(msg, e); + throw new MetaException(msg + e); + } + } + + public org.apache.hadoop.hive.metastore.api.Database getDatabase(String name) throws TException { + Preconditions.checkArgument(StringUtils.isNotEmpty(name), "name cannot be null or empty"); + + try { + Database catalogDatabase = glueMetastore.getDatabase(name); + return CatalogToHiveConverter.convertDatabase(catalogDatabase); + } catch (AmazonServiceException e) { + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + String msg = "Unable to get database object: "; + LOG.error(msg, e); + throw new MetaException(msg + e); + } + } + + public List getDatabases(String patternInput) throws TException { + // Special handling for compatibility with Hue that passes "*" instead of ".*" + String pattern = patternInput; + if (pattern == null || pattern.equals("*")) { + pattern = MATCH_ALL; + } + + try { + List ret = new ArrayList<>(); + + List allDatabases = glueMetastore.getAllDatabases(); + + // filter by pattern + for (Database db : allDatabases) { + String name = db.getName(); + if (Pattern.matches(pattern, name)) { + ret.add(name); + } + } + return ret; + } catch (AmazonServiceException e) { + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + String msg = "Unable to get databases: "; + LOG.error(msg, e); + throw new MetaException(msg + e); + } + } + + public void alterDatabase( + String databaseName, org.apache.hadoop.hive.metastore.api.Database database) throws TException { + Preconditions.checkArgument(StringUtils.isNotEmpty(databaseName), + "databaseName cannot be null or empty"); + Preconditions.checkNotNull(database, "database cannot be null"); + + try { + DatabaseInput catalogDatabase = GlueInputConverter.convertToDatabaseInput(database); + glueMetastore.updateDatabase(databaseName, catalogDatabase); + } catch (AmazonServiceException e) { + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + String msg = "Unable to alter database: "; + LOG.error(msg, e); + throw new MetaException(msg + e); + } + } + + public void dropDatabase( + String name, boolean deleteData, boolean ignoreUnknownDb, boolean cascade) throws TException { + Preconditions.checkArgument(StringUtils.isNotEmpty(name), "name cannot be null or empty"); + + String dbLocation; + try { + List tables = getTables(name, MATCH_ALL); + boolean isEmptyDatabase = tables.isEmpty(); + + org.apache.hadoop.hive.metastore.api.Database db = getDatabase(name); + dbLocation = db.getLocationUri(); + + // TODO: handle cascade + if (isEmptyDatabase || cascade) { + glueMetastore.deleteDatabase(name); + } else { + throw new InvalidOperationException("Database " + name + " is not empty."); + } + } catch (NoSuchObjectException e) { + if (ignoreUnknownDb) { + return; + } else { + throw e; + } + } catch (AmazonServiceException e) { + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + String msg = "Unable to drop database: "; + LOG.error(msg, e); + throw new MetaException(msg + e); + } + + if (deleteData) { + try { + wh.deleteDir(new Path(dbLocation), true); + } catch (Exception e) { + LOG.error("Unable to remove database directory " + dbLocation, e); + } + } + } + + public boolean databaseExists(String dbName) throws TException { + Preconditions.checkArgument(StringUtils.isNotEmpty(dbName), "dbName cannot be null or empty"); + + try { + getDatabase(dbName); + } catch (NoSuchObjectException e) { + return false; + } catch (AmazonServiceException e) { + throw new TException(e); + } catch (Exception e) { + throw new MetaException(e.getMessage()); + } + return true; + } + + // ======================== Table ======================== + + public void createTable(org.apache.hadoop.hive.metastore.api.Table tbl) throws TException { + Preconditions.checkNotNull(tbl, "tbl cannot be null"); + boolean dirCreated = validateNewTableAndCreateDirectory(tbl); + try { + // Glue Server side does not set DDL_TIME. Set it here for the time being. + // TODO: Set DDL_TIME parameter in Glue service + tbl.setParameters(MetastoreClientUtils.deepCopyMap(tbl.getParameters())); + tbl.getParameters().put(hive_metastoreConstants.DDL_TIME, + Long.toString(System.currentTimeMillis() / MILLISECOND_TO_SECOND_FACTOR)); + + TableInput tableInput = GlueInputConverter.convertToTableInput(tbl); + glueMetastore.createTable(tbl.getDbName(), tableInput); + } catch (AmazonServiceException e) { + if (dirCreated) { + Path tblPath = new Path(tbl.getSd().getLocation()); + wh.deleteDir(tblPath, true); + } + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + String msg = "Unable to create table: "; + LOG.error(msg, e); + throw new MetaException(msg + e); + } + } + + public boolean tableExists(String databaseName, String tableName) throws TException { + Preconditions.checkArgument(StringUtils.isNotEmpty(databaseName), "databaseName cannot be null or empty"); + Preconditions.checkArgument(StringUtils.isNotEmpty(tableName), "tableName cannot be null or empty"); + + if (!databaseExists(databaseName)) { + throw new UnknownDBException("Database: " + databaseName + " does not exist."); + } + try { + glueMetastore.getTable(databaseName, tableName); + return true; + } catch (EntityNotFoundException e) { + return false; + } catch (AmazonServiceException e) { + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + String msg = "Unable to check table exist: "; + LOG.error(msg, e); + throw new MetaException(msg + e); + } + } + + public org.apache.hadoop.hive.metastore.api.Table getTable(String dbName, String tableName) throws TException { + Preconditions.checkArgument(StringUtils.isNotEmpty(dbName), "dbName cannot be null or empty"); + Preconditions.checkArgument(StringUtils.isNotEmpty(tableName), "tableName cannot be null or empty"); + + try { + Table table = glueMetastore.getTable(dbName, tableName); + MetastoreClientUtils.validateGlueTable(table); + return CatalogToHiveConverter.convertTable(table, dbName); + } catch (AmazonServiceException e) { + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + String msg = "Unable to get table: "; + LOG.error(msg, e); + throw new MetaException(msg + e); + } + } + + public List getTables(String dbname, String tablePattern) throws TException { + Preconditions.checkArgument(StringUtils.isNotEmpty(dbname), "dbName cannot be null or empty"); + + List names = Lists.newArrayList(); + try { + List
tables = glueMetastore.getTables(dbname, tablePattern); + for (Table catalogTable : tables) { + names.add(catalogTable.getName()); + } + return names; + } catch (AmazonServiceException e) { + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + String msg = "Unable to get tables: "; + LOG.error(msg, e); + throw new MetaException(msg + e); + } + } + + public List getTables(String dbname, String tablePattern, TableType tableType) throws TException { + throw new UnsupportedOperationException("getTables with TableType is not supported"); + } + + public List getTableMeta( + String dbPatterns, + String tablePatterns, + List tableTypes + ) throws TException { + List tables = new ArrayList<>(); + List databases = getDatabases(dbPatterns); + for (String dbName : databases) { + String nextToken = null; + List
dbTables = glueMetastore.getTables(dbName, tablePatterns); + for (Table catalogTable : dbTables) { + if (tableTypes == null || + tableTypes.isEmpty() || + tableTypes.contains(catalogTable.getTableType())) { + tables.add(CatalogToHiveConverter.convertTableMeta(catalogTable, dbName)); + } + } + } + return tables; + } + + /* + * Hive reference: https://github.com/apache/hive/blob/rel/release-2.3.0/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java#L88 + */ + public void alterTable( + String dbName, + String oldTableName, + org.apache.hadoop.hive.metastore.api.Table newTable, + EnvironmentContext environmentContext + ) throws TException { + Preconditions.checkArgument(StringUtils.isNotEmpty(dbName), "dbName cannot be null or empty"); + Preconditions.checkArgument(StringUtils.isNotEmpty(oldTableName), "oldTableName cannot be null or empty"); + Preconditions.checkNotNull(newTable, "newTable cannot be null"); + + if (isCascade(environmentContext)) { + throw new UnsupportedOperationException("Cascade for alter_table is not supported"); + } + + if (!oldTableName.equals(newTable.getTableName())) { + throw new UnsupportedOperationException("Table rename is not supported"); + } + + MetastoreClientUtils.validateTableObject(newTable, conf); + if (!tableExists(dbName, oldTableName)) { + throw new UnknownTableException("Table: " + oldTableName + " does not exists"); + } + + // If table properties has EXTERNAL set, update table type accordinly + // mimics Hive's ObjectStore#convertToMTable, added in HIVE-1329 + boolean isExternal = Boolean.parseBoolean(newTable.getParameters().get("EXTERNAL")); + if (TableType.MANAGED_TABLE.toString().equals(newTable.getTableType()) && isExternal) { + newTable.setTableType(TableType.EXTERNAL_TABLE.toString()); + } else if (TableType.EXTERNAL_TABLE.toString().equals(newTable.getTableType()) && !isExternal) { + newTable.setTableType(TableType.MANAGED_TABLE.toString()); + } + + if (hiveShims.requireCalStats(conf, null, null, newTable, environmentContext) && + newTable.getPartitionKeys().isEmpty()) { + // update table stats for non-partition Table + org.apache.hadoop.hive.metastore.api.Database db = getDatabase(newTable.getDbName()); + hiveShims.updateTableStatsFast(db, newTable, wh, false, true, environmentContext); + } + + try { + TableInput newTableInput = GlueInputConverter.convertToTableInput(newTable); + glueMetastore.updateTable(dbName, newTableInput); + } catch (AmazonServiceException e) { + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + String msg = "Unable to alter table: " + oldTableName; + LOG.error(msg, e); + throw new MetaException(msg + e); + } + } + + private boolean isCascade(EnvironmentContext environmentContext) { + return environmentContext != null && + environmentContext.isSetProperties() && + StatsSetupConst.TRUE.equals(environmentContext.getProperties().get(StatsSetupConst.CASCADE)); + } + + public void dropTable( + String dbName, + String tableName, + boolean deleteData, + boolean ignoreUnknownTbl, + boolean ifPurge + ) throws TException { + Preconditions.checkArgument(StringUtils.isNotEmpty(dbName), "dbName cannot be null or empty"); + Preconditions.checkArgument(StringUtils.isNotEmpty(tableName), "tableName cannot be null or empty"); + + if (!tableExists(dbName, tableName)) { + if (!ignoreUnknownTbl) { + throw new UnknownTableException("Cannot find table: " + dbName + "." + tableName); + } else { + return; + } + } + + org.apache.hadoop.hive.metastore.api.Table tbl = getTable(dbName, tableName); + String tblLocation = tbl.getSd().getLocation(); + boolean isExternal = MetastoreClientUtils.isExternalTable(tbl); + dropPartitionsForTable(dbName, tableName, deleteData && !isExternal); + dropIndexesForTable(dbName, tableName, deleteData && !isExternal); + + try { + glueMetastore.deleteTable(dbName, tableName); + } catch (AmazonServiceException e) { + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + String msg = "Unable to drop table: "; + LOG.error(msg, e); + throw new MetaException(msg + e); + } + + if (StringUtils.isNotEmpty(tblLocation) && deleteData && !isExternal) { + Path tblPath = new Path(tblLocation); + try { + wh.deleteDir(tblPath, true, ifPurge); + } catch (Exception e) { + LOG.error("Unable to remove table directory " + tblPath, e); + } + } + } + + private void dropPartitionsForTable(String dbName, String tableName, boolean deleteData) throws TException { + List partitionsToDelete = + getPartitions(dbName, tableName, null, NO_MAX); + for (org.apache.hadoop.hive.metastore.api.Partition part : partitionsToDelete) { + dropPartition(dbName, tableName, part.getValues(), true, deleteData, false); + } + } + + private void dropIndexesForTable(String dbName, String tableName, boolean deleteData) throws TException { + List indexesToDelete = listIndexes(dbName, tableName); + for (Index index : indexesToDelete) { + dropTable(dbName, index.getIndexTableName(), deleteData, true, false); + } + } + + public List listTableNamesByFilter(String dbName, String filter, short maxTables) throws TException { + throw new UnsupportedOperationException("listTableNamesByFilter is not supported"); + } + + /** + * @return boolean + * true -> directory created + * false -> directory not created + */ + public boolean validateNewTableAndCreateDirectory(org.apache.hadoop.hive.metastore.api.Table tbl) throws TException { + Preconditions.checkNotNull(tbl, "tbl cannot be null"); + if (tableExists(tbl.getDbName(), tbl.getTableName())) { + throw new AlreadyExistsException("Table " + tbl.getTableName() + " already exists."); + } + MetastoreClientUtils.validateTableObject(tbl, conf); + + if (TableType.VIRTUAL_VIEW.toString().equals(tbl.getTableType())) { + // we don't need to create directory for virtual views + return false; + } + + if (StringUtils.isEmpty(tbl.getSd().getLocation())) { + org.apache.hadoop.hive.metastore.api.Database db = getDatabase(tbl.getDbName()); + tbl.getSd().setLocation(hiveShims.getDefaultTablePath(db, tbl.getTableName(), wh).toString()); + } else { + tbl.getSd().setLocation(wh.getDnsPath(new Path(tbl.getSd().getLocation())).toString()); + } + + Path tblPath = new Path(tbl.getSd().getLocation()); + return MetastoreClientUtils.makeDirs(wh, tblPath); + } + + // =========================== Partition =========================== + + public org.apache.hadoop.hive.metastore.api.Partition appendPartition( + String dbName, + String tblName, + List values + ) throws TException { + Preconditions.checkArgument(StringUtils.isNotEmpty(dbName), "dbName cannot be null or empty"); + Preconditions.checkArgument(StringUtils.isNotEmpty(tblName), "tblName cannot be null or empty"); + Preconditions.checkNotNull(values, "partition values cannot be null"); + org.apache.hadoop.hive.metastore.api.Table table = getTable(dbName, tblName); + Preconditions.checkNotNull(table.getSd(), "StorageDescriptor cannot be null for Table " + tblName); + org.apache.hadoop.hive.metastore.api.Partition partition = buildPartitionFromValues(table, values); + addPartitions(Lists.newArrayList(partition), false, true); + return partition; + } + + /** + * Taken from HiveMetaStore#append_partition_common + */ + private org.apache.hadoop.hive.metastore.api.Partition buildPartitionFromValues( + org.apache.hadoop.hive.metastore.api.Table table, + List values) throws MetaException { + org.apache.hadoop.hive.metastore.api.Partition partition = new org.apache.hadoop.hive.metastore.api.Partition(); + partition.setDbName(table.getDbName()); + partition.setTableName(table.getTableName()); + partition.setValues(values); + partition.setSd(table.getSd().deepCopy()); + + Path partLocation = new Path(table.getSd().getLocation(), Warehouse.makePartName(table.getPartitionKeys(), values)); + partition.getSd().setLocation(partLocation.toString()); + + long timeInSecond = System.currentTimeMillis() / MILLISECOND_TO_SECOND_FACTOR; + partition.setCreateTime((int) timeInSecond); + partition.putToParameters(hive_metastoreConstants.DDL_TIME, Long.toString(timeInSecond)); + return partition; + } + + public List addPartitions( + List partitions, + boolean ifNotExists, + boolean needResult + ) throws TException { + Preconditions.checkNotNull(partitions, "partitions cannot be null"); + List partitionsCreated = + batchCreatePartitions(partitions, ifNotExists); + if (!needResult) { + return null; + } + return CatalogToHiveConverter.convertPartitions(partitionsCreated); + } + + private List batchCreatePartitions( + final List hivePartitions, + final boolean ifNotExists + ) throws TException { + if (hivePartitions.isEmpty()) { + return Lists.newArrayList(); + } + + final String dbName = hivePartitions.get(0).getDbName(); + final String tableName = hivePartitions.get(0).getTableName(); + org.apache.hadoop.hive.metastore.api.Table tbl = getTable(dbName, tableName); + validateInputForBatchCreatePartitions(tbl, hivePartitions); + + List catalogPartitions = Lists.newArrayList(); + Map addedPath = Maps.newHashMap(); + try { + for (org.apache.hadoop.hive.metastore.api.Partition partition : hivePartitions) { + Path location = getPartitionLocation(tbl, partition); + boolean partDirCreated = false; + if (location != null) { + partition.getSd().setLocation(location.toString()); + partDirCreated = MetastoreClientUtils.makeDirs(wh, location); + } + Partition catalogPartition = HiveToCatalogConverter.convertPartition(partition); + catalogPartitions.add(catalogPartition); + if (partDirCreated) { + addedPath.put(new PartitionKey(catalogPartition), new Path(partition.getSd().getLocation())); + } + } + } catch (MetaException e) { + for (Path path : addedPath.values()) { + deletePath(path); + } + throw e; + } + + List> batchCreatePartitionsFutures = Lists.newArrayList(); + for (int i = 0; i < catalogPartitions.size(); i += BATCH_CREATE_PARTITIONS_MAX_REQUEST_SIZE) { + int end = Math.min(i + BATCH_CREATE_PARTITIONS_MAX_REQUEST_SIZE, catalogPartitions.size()); + final List partitionsOnePage = catalogPartitions.subList(i, end); + + batchCreatePartitionsFutures.add(this.executorService.submit(new Callable() { + @Override + public BatchCreatePartitionsHelper call() throws Exception { + return new BatchCreatePartitionsHelper( + glueMetastore, dbName, tableName, catalogId, partitionsOnePage, ifNotExists) + .createPartitions(); + } + })); + } + + TException tException = null; + List partitionsCreated = Lists.newArrayList(); + for (Future future : batchCreatePartitionsFutures) { + try { + BatchCreatePartitionsHelper batchCreatePartitionsHelper = future.get(); + partitionsCreated.addAll(batchCreatePartitionsHelper.getPartitionsCreated()); + tException = tException == null ? batchCreatePartitionsHelper.getFirstTException() : tException; + deletePathForPartitions(batchCreatePartitionsHelper.getPartitionsFailed(), addedPath); + } catch (Exception e) { + LOG.error("Exception thrown by BatchCreatePartitions thread pool. ", e); + } + } + + if (tException != null) { + throw tException; + } + return partitionsCreated; + } + + private void validateInputForBatchCreatePartitions( + org.apache.hadoop.hive.metastore.api.Table tbl, + List hivePartitions) { + Preconditions.checkNotNull(tbl.getPartitionKeys(), + "Partition keys cannot be null"); + for (org.apache.hadoop.hive.metastore.api.Partition partition : hivePartitions) { + Preconditions.checkArgument(tbl.getDbName().equals(partition.getDbName()), + "Partitions must be in the same DB"); + Preconditions.checkArgument(tbl.getTableName().equals(partition.getTableName()), + "Partitions must be in the same table"); + Preconditions.checkNotNull(partition.getValues(), + "Partition values cannot be null"); + Preconditions.checkArgument(tbl.getPartitionKeys().size() == partition.getValues().size(), + "Number of table partition keys must match number of partition values"); + } + } + + private void deletePathForPartitions(List partitions, Map addedPath) { + for (Partition partition : partitions) { + Path path = addedPath.get(new PartitionKey(partition)); + if (path != null) { + deletePath(path); + } + } + } + + private void deletePath(Path path) { + try { + wh.deleteDir(path, true); + } catch (MetaException e) { + LOG.error("Warehouse delete directory failed. ", e); + } + } + + /** + * Taken from HiveMetastore#createLocationForAddedPartition + */ + private Path getPartitionLocation( + org.apache.hadoop.hive.metastore.api.Table tbl, + org.apache.hadoop.hive.metastore.api.Partition part) throws MetaException { + Path partLocation = null; + String partLocationStr = null; + if (part.getSd() != null) { + partLocationStr = part.getSd().getLocation(); + } + + if (StringUtils.isEmpty(partLocationStr)) { + // set default location if not specified and this is + // a physical table partition (not a view) + if (tbl.getSd().getLocation() != null) { + partLocation = new Path(tbl.getSd().getLocation(), + Warehouse.makePartName(tbl.getPartitionKeys(), part.getValues())); + } + } else { + if (tbl.getSd().getLocation() == null) { + throw new MetaException("Cannot specify location for a view partition"); + } + partLocation = wh.getDnsPath(new Path(partLocationStr)); + } + return partLocation; + } + + public List listPartitionNames( + String databaseName, + String tableName, + List values, + short max + ) throws TException { + String expression = null; + org.apache.hadoop.hive.metastore.api.Table table = getTable(databaseName, tableName); + if (values != null) { + expression = ExpressionHelper.buildExpressionFromPartialSpecification(table, values); + } + + List names = Lists.newArrayList(); + List partitions = + getPartitions(databaseName, tableName, expression, max); + for (org.apache.hadoop.hive.metastore.api.Partition p : partitions) { + names.add(Warehouse.makePartName(table.getPartitionKeys(), p.getValues())); + } + return names; + } + + public List getPartitionsByNames( + String databaseName, + String tableName, + List partitionNames + ) throws TException { + Preconditions.checkArgument(StringUtils.isNotEmpty(databaseName), "databaseName cannot be null or empty"); + Preconditions.checkArgument(StringUtils.isNotEmpty(tableName), "tableName cannot be null or empty"); + Preconditions.checkNotNull(partitionNames, "partitionNames cannot be null"); + + List partitionsToGet = Lists.newArrayList(); + for (String partitionName : partitionNames) { + partitionsToGet.add(new PartitionValueList().withValues(partitionNameToVals(partitionName))); + } + try { + List partitions = + glueMetastore.getPartitionsByNames(databaseName, tableName, partitionsToGet); + + return CatalogToHiveConverter.convertPartitions(partitions); + } catch (AmazonServiceException e) { + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + String msg = "Unable to get partition by names: " + StringUtils.join(partitionNames, "/"); + LOG.error(msg, e); + throw new MetaException(msg + e); + } + } + + public org.apache.hadoop.hive.metastore.api.Partition getPartition( + String dbName, String tblName, String partitionName) + throws TException { + Preconditions.checkArgument(StringUtils.isNotEmpty(dbName), "dbName cannot be null or empty"); + Preconditions.checkArgument(StringUtils.isNotEmpty(tblName), "tblName cannot be null or empty"); + Preconditions.checkArgument(StringUtils.isNotEmpty(partitionName), "partitionName cannot be null or empty"); + List values = partitionNameToVals(partitionName); + return getPartition(dbName, tblName, values); + } + + public org.apache.hadoop.hive.metastore.api.Partition getPartition( + String dbName, String tblName, List values) throws TException { + Preconditions.checkArgument(StringUtils.isNotEmpty(dbName), "dbName cannot be null or empty"); + Preconditions.checkArgument(StringUtils.isNotEmpty(tblName), "tblName cannot be null or empty"); + Preconditions.checkNotNull(values, "values cannot be null"); + + Partition partition; + try { + partition = glueMetastore.getPartition(dbName, tblName, values); + if (partition == null) { + LOG.debug("No partitions were return for dbName = {}, tblName = {}, values = {}", dbName, tblName, values); + return null; + } + } catch (AmazonServiceException e) { + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + String msg = "Unable to get partition with values: " + StringUtils.join(values, "/"); + LOG.error(msg, e); + throw new MetaException(msg + e); + } + return CatalogToHiveConverter.convertPartition(partition); + } + + public List getPartitions( + String databaseName, + String tableName, + String filter, + long max + ) throws TException { + Preconditions.checkArgument(StringUtils.isNotEmpty(databaseName), "databaseName cannot be null or empty"); + Preconditions.checkArgument(StringUtils.isNotEmpty(tableName), "tableName cannot be null or empty"); + List partitions = getCatalogPartitions(databaseName, tableName, filter, max); + return CatalogToHiveConverter.convertPartitions(partitions); + } + + public List getCatalogPartitions( + final String databaseName, + final String tableName, + final String expression, + final long max + ) throws TException { + Preconditions.checkArgument(StringUtils.isNotEmpty(databaseName), "databaseName cannot be null or empty"); + Preconditions.checkArgument(StringUtils.isNotEmpty(tableName), "tableName cannot be null or empty"); + try { + return glueMetastore.getPartitions(databaseName, tableName, expression, max); + } catch (AmazonServiceException e) { + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + String msg = "Unable to get partitions with expression: " + expression; + LOG.error(msg, e); + throw new MetaException(msg + e); + } + } + + public boolean dropPartition( + String dbName, + String tblName, + List values, + boolean ifExist, + boolean deleteData, + boolean purgeData + ) throws TException { + Preconditions.checkArgument(StringUtils.isNotEmpty(dbName), "dbName cannot be null or empty"); + Preconditions.checkArgument(StringUtils.isNotEmpty(tblName), "tblName cannot be null or empty"); + Preconditions.checkNotNull(values, "values cannot be null"); + + org.apache.hadoop.hive.metastore.api.Partition partition = null; + try { + partition = getPartition(dbName, tblName, values); + } catch (NoSuchObjectException e) { + if (ifExist) { + return true; + } + } + + try { + glueMetastore.deletePartition(dbName, tblName, partition.getValues()); + } catch (AmazonServiceException e) { + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + String msg = "Unable to drop partition with values: " + StringUtils.join(values, "/"); + LOG.error(msg, e); + throw new MetaException(msg + e); + } + + performDropPartitionPostProcessing(dbName, tblName, partition, deleteData, purgeData); + return true; + } + + private void performDropPartitionPostProcessing( + String dbName, + String tblName, + org.apache.hadoop.hive.metastore.api.Partition partition, + boolean deleteData, + boolean ifPurge + ) throws TException { + if (deleteData && partition.getSd() != null && partition.getSd().getLocation() != null) { + Path partPath = new Path(partition.getSd().getLocation()); + org.apache.hadoop.hive.metastore.api.Table table = getTable(dbName, tblName); + if (MetastoreClientUtils.isExternalTable(table)) { + // Don't delete external table data + return; + } + boolean mustPurge = isMustPurge(table, ifPurge); + wh.deleteDir(partPath, true, mustPurge); + try { + List values = partition.getValues(); + deleteParentRecursive(partPath.getParent(), values.size() - 1, mustPurge); + } catch (IOException e) { + throw new MetaException(e.getMessage()); + } + } + } + + /** + * Taken from HiveMetaStore#isMustPurge + */ + private boolean isMustPurge(org.apache.hadoop.hive.metastore.api.Table table, boolean ifPurge) { + return ifPurge || "true".equalsIgnoreCase(table.getParameters().get("auto.purge")); + } + + /** + * Taken from HiveMetaStore#deleteParentRecursive + */ + private void deleteParentRecursive(Path parent, int depth, boolean mustPurge) throws IOException, MetaException { + if (depth > 0 && parent != null && wh.isWritable(parent) && wh.isEmpty(parent)) { + wh.deleteDir(parent, true, mustPurge); + deleteParentRecursive(parent.getParent(), depth - 1, mustPurge); + } + } + + public void alterPartitions( + String dbName, + String tblName, + List partitions + ) throws TException { + Preconditions.checkArgument(StringUtils.isNotEmpty(dbName), "dbName cannot be null or empty"); + Preconditions.checkArgument(StringUtils.isNotEmpty(tblName), "tblName cannot be null or empty"); + Preconditions.checkNotNull(partitions, "partitions cannot be null"); + + for (org.apache.hadoop.hive.metastore.api.Partition part : partitions) { + part.setParameters(MetastoreClientUtils.deepCopyMap(part.getParameters())); + if (part.getParameters().get(hive_metastoreConstants.DDL_TIME) == null || + Integer.parseInt(part.getParameters().get(hive_metastoreConstants.DDL_TIME)) == 0) { + part.putToParameters(hive_metastoreConstants.DDL_TIME, + Long.toString(System.currentTimeMillis() / MILLISECOND_TO_SECOND_FACTOR)); + } + + PartitionInput partitionInput = GlueInputConverter.convertToPartitionInput(part); + + try { + glueMetastore.updatePartition(dbName, tblName, part.getValues(), partitionInput); + } catch (AmazonServiceException e) { + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + LOG.error("Unable to alter partition", e); + throw new MetaException("Unable to alter partition: " + e); + } + } + } + + /** + * Taken from HiveMetaStore#partition_name_to_vals + */ + public List partitionNameToVals(String name) throws TException { + Preconditions.checkNotNull(name, "name cannot be null"); + if (name.isEmpty()) { + return Lists.newArrayList(); + } + Map map = Warehouse.makeSpecFromName(name); + List vals = Lists.newArrayList(); + vals.addAll(map.values()); + return vals; + } + + // ============================ Index ============================== + + public List listIndexes(String dbName, String tblName) throws TException { + Preconditions.checkArgument(StringUtils.isNotEmpty(dbName), "dbName cannot be null or empty"); + Preconditions.checkArgument(StringUtils.isNotEmpty(tblName), "tblName cannot be null or empty"); + + org.apache.hadoop.hive.metastore.api.Table originTable = getTable(dbName, tblName); + Map parameters = originTable.getParameters(); + List
indexTableObjects = Lists.newArrayList(); + for (String key : parameters.keySet()) { + if (key.startsWith(INDEX_PREFIX)) { + String serialisedString = parameters.get(key); + indexTableObjects.add(ConverterUtils.stringToCatalogTable(serialisedString)); + } + } + + List hiveIndexList = Lists.newArrayList(); + for (Table catalogIndexTableObject : indexTableObjects) { + hiveIndexList.add(CatalogToHiveConverter.convertTableObjectToIndex(catalogIndexTableObject)); + } + return hiveIndexList; + } + + // ======================= Roles & Privilege ======================= + + public boolean createRole(Role role) throws TException { + throw new UnsupportedOperationException("createRole is not supported"); + } + + public boolean dropRole(String roleName) throws TException { + throw new UnsupportedOperationException("dropRole is not supported"); + } + + public List listRoles( + String principalName, + PrincipalType principalType + ) throws TException { + // All users belong to public role implicitly, add that role + // Bring logic from Hive's ObjectStore + // https://code.amazon.com/packages/Aws157Hive/blobs/48f6e30080df475ffe54c39f70dd134268e30358/ + // --/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java#L4208 + if (principalType == PrincipalType.USER) { + return implicitRoles; + } else { + throw new UnsupportedOperationException( + "listRoles is only supported for " + PrincipalType.USER + " Principal type"); + } + } + + public List listRoleNames() throws TException { + // return PUBLIC role as implicit role to prevent unnecessary failure, + // even though Glue doesn't support Role API yet + // https://code.amazon.com/packages/Aws157Hive/blobs/48f6e30080df475ffe54c39f70dd134268e30358/ + // --/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java#L4325 + return Lists.newArrayList(HiveMetaStore.PUBLIC); + } + + public org.apache.hadoop.hive.metastore.api.GetPrincipalsInRoleResponse getPrincipalsInRole( + org.apache.hadoop.hive.metastore.api.GetPrincipalsInRoleRequest request + ) throws TException { + throw new UnsupportedOperationException("getPrincipalsInRole is not supported"); + } + + public GetRoleGrantsForPrincipalResponse getRoleGrantsForPrincipal( + GetRoleGrantsForPrincipalRequest request + ) throws TException { + throw new UnsupportedOperationException("getRoleGrantsForPrincipal is not supported"); + } + + public boolean grantRole( + String roleName, + String userName, + PrincipalType principalType, + String grantor, PrincipalType grantorType, + boolean grantOption + ) throws TException { + throw new UnsupportedOperationException("grantRole is not supported"); + } + + public boolean revokeRole( + String roleName, + String userName, + PrincipalType principalType, + boolean grantOption + ) throws TException { + throw new UnsupportedOperationException("revokeRole is not supported"); + } + + public boolean revokePrivileges( + org.apache.hadoop.hive.metastore.api.PrivilegeBag privileges, + boolean grantOption + ) throws TException { + throw new UnsupportedOperationException("revokePrivileges is not supported"); + } + + public boolean grantPrivileges(org.apache.hadoop.hive.metastore.api.PrivilegeBag privileges) + throws TException { + throw new UnsupportedOperationException("grantPrivileges is not supported"); + } + + public org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet getPrivilegeSet( + HiveObjectRef objectRef, + String user, + List groups + ) throws TException { + // getPrivilegeSet is NOT yet supported. + // return null not to break due to optional info + // Hive return null when every condition fail + // https://code.amazon.com/packages/Aws157Hive/blobs/c1ced60e67765d27086b3621255cd843947c151e/ + // --/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java#L5237 + return null; + } + + public List listPrivileges( + String principal, + PrincipalType principalType, + HiveObjectRef objectRef + ) throws TException { + throw new UnsupportedOperationException("listPrivileges is not supported"); + } + + // ========================== Statistics ========================== + + public boolean deletePartitionColumnStatistics( + String dbName, + String tableName, + String partName, + String colName + ) throws TException { + throw new UnsupportedOperationException("deletePartitionColumnStatistics is not supported"); + } + + public boolean deleteTableColumnStatistics( + String dbName, + String tableName, + String colName + ) throws TException { + throw new UnsupportedOperationException("deleteTableColumnStatistics is not supported"); + } + + public Map> getPartitionColumnStatistics( + String dbName, + String tableName, + List partitionNames, List columnNames + ) throws TException { + throw new UnsupportedOperationException("getPartitionColumnStatistics is not supported"); + } + + public List getTableColumnStatistics( + String dbName, + String tableName, + List colNames + ) throws TException { + throw new UnsupportedOperationException("getTableColumnStatistics is not supported"); + } + + public boolean updatePartitionColumnStatistics( + org.apache.hadoop.hive.metastore.api.ColumnStatistics columnStatistics + ) throws TException { + throw new UnsupportedOperationException("updatePartitionColumnStatistics is not supported"); + } + + public boolean updateTableColumnStatistics( + org.apache.hadoop.hive.metastore.api.ColumnStatistics columnStatistics + ) throws TException { + throw new UnsupportedOperationException("updateTableColumnStatistics is not supported"); + } + + public AggrStats getAggrColStatsFor( + String dbName, + String tblName, + List colNames, + List partName + ) throws TException { + throw new UnsupportedOperationException("getAggrColStatsFor is not supported"); + } + + public void cancelDelegationToken(String tokenStrForm) throws TException { + throw new UnsupportedOperationException("cancelDelegationToken is not supported"); + } + + public String getTokenStrForm() throws IOException { + throw new UnsupportedOperationException("getTokenStrForm is not supported"); + } + + public boolean addToken(String tokenIdentifier, String delegationToken) throws TException { + throw new UnsupportedOperationException("addToken is not supported"); + } + + public boolean removeToken(String tokenIdentifier) throws TException { + throw new UnsupportedOperationException("removeToken is not supported"); + } + + public String getToken(String tokenIdentifier) throws TException { + throw new UnsupportedOperationException("getToken is not supported"); + } + + public List getAllTokenIdentifiers() throws TException { + throw new UnsupportedOperationException("getAllTokenIdentifiers is not supported"); + } + + public int addMasterKey(String key) throws TException { + throw new UnsupportedOperationException("addMasterKey is not supported"); + } + + public void updateMasterKey(Integer seqNo, String key) throws TException { + throw new UnsupportedOperationException("updateMasterKey is not supported"); + } + + public boolean removeMasterKey(Integer keySeq) throws TException { + throw new UnsupportedOperationException("removeMasterKey is not supported"); + } + + public String[] getMasterKeys() throws TException { + throw new UnsupportedOperationException("getMasterKeys is not supported"); + } + + public LockResponse checkLock(long lockId) throws TException { + return lockManager.checkLock(lockId); + } + + public void commitTxn(long txnId) throws TException { + throw new UnsupportedOperationException("commitTxn is not supported"); + } + + public void abortTxns(List txnIds) throws TException { + throw new UnsupportedOperationException("abortTxns is not supported"); + } + + public void compact( + String dbName, + String tblName, + String partitionName, + CompactionType compactionType + ) throws TException { + throw new UnsupportedOperationException("compact is not supported"); + } + + public void compact( + String dbName, + String tblName, + String partitionName, + CompactionType compactionType, + Map tblProperties + ) throws TException { + throw new UnsupportedOperationException("compact is not supported"); + } + + public CompactionResponse compact2( + String dbName, + String tblName, + String partitionName, + CompactionType compactionType, + Map tblProperties + ) throws TException { + throw new UnsupportedOperationException("compact2 is not supported"); + } + + public ValidTxnList getValidTxns() throws TException { + throw new UnsupportedOperationException("getValidTxns is not supported"); + } + + public ValidTxnList getValidTxns(long currentTxn) throws TException { + throw new UnsupportedOperationException("getValidTxns is not supported"); + } + + public org.apache.hadoop.hive.metastore.api.Partition exchangePartition( + Map partitionSpecs, + String srcDb, + String srcTbl, + String dstDb, + String dstTbl + ) throws TException { + throw new UnsupportedOperationException("exchangePartition not yet supported."); + } + + public List exchangePartitions( + Map partitionSpecs, + String sourceDb, + String sourceTbl, + String destDb, + String destTbl + ) throws TException { + throw new UnsupportedOperationException("exchangePartitions is not yet supported"); + } + + public String getDelegationToken( + String owner, + String renewerKerberosPrincipalName + ) throws TException { + throw new UnsupportedOperationException("getDelegationToken is not supported"); + } + + public void heartbeat(long txnId, long lockId) throws TException { + throw new UnsupportedOperationException("heartbeat is not supported"); + } + + public HeartbeatTxnRangeResponse heartbeatTxnRange(long min, long max) throws TException { + throw new UnsupportedOperationException("heartbeatTxnRange is not supported"); + } + + public boolean isPartitionMarkedForEvent( + String dbName, + String tblName, + Map partKVs, + PartitionEventType eventType + ) throws TException { + throw new UnsupportedOperationException("isPartitionMarkedForEvent is not supported"); + } + + public int getNumPartitionsByFilter( + String dbName, + String tableName, + String filter + ) throws TException { + throw new UnsupportedOperationException("getNumPartitionsByFilter is not supported."); + } + + public PartitionSpecProxy listPartitionSpecs( + String dbName, + String tblName, + int max + ) throws TException { + throw new UnsupportedOperationException("listPartitionSpecs is not supported."); + } + + public PartitionSpecProxy listPartitionSpecsByFilter( + String dbName, + String tblName, + String filter, + int max + ) throws TException { + throw new UnsupportedOperationException("listPartitionSpecsByFilter is not supported"); + } + + public LockResponse lock(LockRequest lockRequest) throws TException { + return lockManager.lock(lockRequest); + } + + public void markPartitionForEvent( + String dbName, + String tblName, + Map partKeyValues, + PartitionEventType eventType + ) throws TException { + throw new UnsupportedOperationException("markPartitionForEvent is not supported"); + } + + public long openTxn(String user) throws TException { + throw new UnsupportedOperationException("openTxn is not supported"); + } + + public OpenTxnsResponse openTxns(String user, int numTxns) throws TException { + throw new UnsupportedOperationException("openTxns is not supported"); + } + + public long renewDelegationToken(String tokenStrForm) throws TException { + throw new UnsupportedOperationException("renewDelegationToken is not supported"); + } + + public void rollbackTxn(long txnId) throws TException { + throw new UnsupportedOperationException("rollbackTxn is not supported"); + } + + public void createTableWithConstraints( + org.apache.hadoop.hive.metastore.api.Table table, + List primaryKeys, + List foreignKeys + ) throws AlreadyExistsException, TException { + throw new UnsupportedOperationException("createTableWithConstraints is not supported"); + } + + public void dropConstraint( + String dbName, + String tblName, + String constraintName + ) throws TException { + throw new UnsupportedOperationException("dropConstraint is not supported"); + } + + public void addPrimaryKey(List primaryKeyCols) throws TException { + throw new UnsupportedOperationException("addPrimaryKey is not supported"); + } + + public void addForeignKey(List foreignKeyCols) throws TException { + throw new UnsupportedOperationException("addForeignKey is not supported"); + } + + public ShowCompactResponse showCompactions() throws TException { + throw new UnsupportedOperationException("showCompactions is not supported"); + } + + public void addDynamicPartitions( + long txnId, + String dbName, + String tblName, + List partNames + ) throws TException { + throw new UnsupportedOperationException("addDynamicPartitions is not supported"); + } + + public void addDynamicPartitions( + long txnId, + String dbName, + String tblName, + List partNames, + DataOperationType operationType + ) throws TException { + throw new UnsupportedOperationException("addDynamicPartitions is not supported"); + } + + public void insertTable(org.apache.hadoop.hive.metastore.api.Table table, boolean overwrite) throws MetaException { + throw new UnsupportedOperationException("insertTable is not supported"); + } + + public NotificationEventResponse getNextNotification( + long lastEventId, + int maxEvents, + IMetaStoreClient.NotificationFilter notificationFilter + ) throws TException { + throw new UnsupportedOperationException("getNextNotification is not supported"); + } + + public CurrentNotificationEventId getCurrentNotificationEventId() throws TException { + throw new UnsupportedOperationException("getCurrentNotificationEventId is not supported"); + } + + public FireEventResponse fireListenerEvent(FireEventRequest fireEventRequest) throws TException { + throw new UnsupportedOperationException("fireListenerEvent is not supported"); + } + + public ShowLocksResponse showLocks() throws TException { + throw new UnsupportedOperationException("showLocks is not supported"); + } + + public ShowLocksResponse showLocks(ShowLocksRequest showLocksRequest) throws TException { + throw new UnsupportedOperationException("showLocks is not supported"); + } + + public GetOpenTxnsInfoResponse showTxns() throws TException { + throw new UnsupportedOperationException("showTxns is not supported"); + } + + public void unlock(long lockId) throws TException { + lockManager.unlock(lockId); + } + + public Iterable> getFileMetadata(List fileIds) throws TException { + throw new UnsupportedOperationException("getFileMetadata is not supported"); + } + + public Iterable> getFileMetadataBySarg( + List fileIds, + ByteBuffer sarg, + boolean doGetFooters + ) throws TException { + throw new UnsupportedOperationException("getFileMetadataBySarg is not supported"); + } + + public void clearFileMetadata(List fileIds) throws TException { + throw new UnsupportedOperationException("clearFileMetadata is not supported"); + } + + public void putFileMetadata(List fileIds, List metadata) throws TException { + throw new UnsupportedOperationException("putFileMetadata is not supported"); + } + + public boolean setPartitionColumnStatistics( + org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest request + ) throws TException { + throw new UnsupportedOperationException("setPartitionColumnStatistics is not supported"); + } + + public boolean cacheFileMetadata( + String dbName, + String tblName, + String partName, + boolean allParts + ) throws TException { + throw new UnsupportedOperationException("cacheFileMetadata is not supported"); + } + + public int addPartitionsSpecProxy(PartitionSpecProxy pSpec) throws TException { + throw new UnsupportedOperationException("addPartitionsSpecProxy is unsupported"); + } + + public void setUGI(String username) throws TException { + throw new UnsupportedOperationException("setUGI is unsupported"); + } + + /** + * Gets the user defined function in a database stored in metastore and + * converts back to Hive function. + * @param dbName + * @param functionName + * @return function + * @throws MetaException + * @throws TException + */ + public org.apache.hadoop.hive.metastore.api.Function getFunction(String dbName, String functionName) + throws TException { + try { + UserDefinedFunction userDefinedFunction = glueMetastore.getUserDefinedFunction(dbName, functionName); + return CatalogToHiveConverter.convertFunction(dbName, userDefinedFunction); + } catch (AmazonServiceException e) { + LOG.error("encountered AWS exception", e); + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + LOG.error("Unable to get Function", e); + throw new MetaException("Unable to get Function: " + e); + } + } + + /** + * Gets user defined functions that match a pattern in database stored in + * metastore and converts back to Hive function. + * @param dbName + * @param pattern + * @return functions + * @throws MetaException + * @throws TException + */ + public List getFunctions(String dbName, String pattern) throws TException { + try { + List functionNames = Lists.newArrayList(); + List functions = + glueMetastore.getUserDefinedFunctions(dbName, pattern); + for (UserDefinedFunction catalogFunction : functions) { + functionNames.add(catalogFunction.getFunctionName()); + } + return functionNames; + } catch (AmazonServiceException e) { + LOG.error("encountered AWS exception", e); + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + LOG.error("Unable to get Functions", e); + throw new MetaException("Unable to get Functions: " + e); + } + } + + /** + * Creates a new user defined function in the metastore. + * @param function + * @throws InvalidObjectException + * @throws MetaException + * @throws TException + */ + public void createFunction(org.apache.hadoop.hive.metastore.api.Function function) throws InvalidObjectException, + TException { + try { + UserDefinedFunctionInput functionInput = GlueInputConverter.convertToUserDefinedFunctionInput(function); + glueMetastore.createUserDefinedFunction(function.getDbName(), functionInput); + } catch (AmazonServiceException e) { + LOG.error("encountered AWS exception", e); + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + LOG.error("Unable to create Function", e); + throw new MetaException("Unable to create Function: " + e); + } + } + + /** + * Drops a user defined function in the database stored in metastore. + * @param dbName + * @param functionName + * @throws MetaException + * @throws NoSuchObjectException + * @throws InvalidObjectException + * @throws org.apache.hadoop.hive.metastore.api.InvalidInputException + * @throws TException + */ + public void dropFunction(String dbName, String functionName) throws NoSuchObjectException, + InvalidObjectException, org.apache.hadoop.hive.metastore.api.InvalidInputException, TException { + try { + glueMetastore.deleteUserDefinedFunction(dbName, functionName); + } catch (AmazonServiceException e) { + LOG.error("encountered AWS exception", e); + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + String msg = "Unable to drop Function: "; + LOG.error(msg, e); + throw new MetaException(msg + e); + } + } + + /** + * Updates a user defined function in a database stored in the metastore. + * @param dbName + * @param functionName + * @param newFunction + * @throws InvalidObjectException + * @throws MetaException + * @throws TException + */ + public void alterFunction(String dbName, String functionName, + org.apache.hadoop.hive.metastore.api.Function newFunction) throws InvalidObjectException, MetaException, + TException { + try { + UserDefinedFunctionInput functionInput = GlueInputConverter.convertToUserDefinedFunctionInput(newFunction); + glueMetastore.updateUserDefinedFunction(dbName, functionName, functionInput); + } catch (AmazonServiceException e) { + LOG.error("encountered AWS exception", e); + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + String msg = "Unable to alter Function: "; + LOG.error(msg, e); + throw new MetaException(msg + e); + } + } + + /** + * Fetches the fields for a table in a database. + * @param db + * @param tableName + * @return fields + * @throws MetaException + * @throws TException + * @throws UnknownTableException + * @throws UnknownDBException + */ + public List getFields(String db, String tableName) throws MetaException, TException, + UnknownTableException, UnknownDBException { + try { + Table table = glueMetastore.getTable(db, tableName); + return CatalogToHiveConverter.convertFieldSchemaList(table.getStorageDescriptor().getColumns()); + } catch (AmazonServiceException e) { + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + LOG.error("Unable to get field from table", e); + throw new MetaException("Unable to get field from table: " + e); + } + } + + /** + * Fetches the schema for a table in a database. + * @param db + * @param tableName + * @return schema + * @throws MetaException + * @throws TException + * @throws UnknownTableException + * @throws UnknownDBException + */ + public List getSchema(String db, String tableName) throws TException, + UnknownTableException, UnknownDBException { + try { + Table table = glueMetastore.getTable(db, tableName); + List schemas = table.getStorageDescriptor().getColumns(); + if (table.getPartitionKeys() != null && !table.getPartitionKeys().isEmpty()) { + schemas.addAll(table.getPartitionKeys()); + } + return CatalogToHiveConverter.convertFieldSchemaList(schemas); + } catch (AmazonServiceException e) { + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + String msg = "Unable to get field from table: "; + LOG.error(msg, e); + throw new MetaException(msg + e); + } + } + + /** + * Updates the partition values for a table in database stored in metastore. + * @param databaseName + * @param tableName + * @param partitionValues + * @param newPartition + * @throws InvalidOperationException + * @throws MetaException + * @throws TException + */ + public void renamePartitionInCatalog(String databaseName, String tableName, List partitionValues, + org.apache.hadoop.hive.metastore.api.Partition newPartition) throws InvalidOperationException, + TException { + try { + PartitionInput partitionInput = GlueInputConverter.convertToPartitionInput(newPartition); + glueMetastore.updatePartition(databaseName, tableName, partitionValues, partitionInput); + } catch (AmazonServiceException e) { + throw CatalogToHiveConverter.wrapInHiveException(e); + } + } +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/IcebergGlueMetastoreClient.java b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/IcebergGlueMetastoreClient.java new file mode 100644 index 000000000000..6ef89b611ea6 --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/IcebergGlueMetastoreClient.java @@ -0,0 +1,1837 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.metastore; + +import com.amazonaws.AmazonServiceException; +import com.amazonaws.services.glue.AWSGlue; +import com.amazonaws.services.glue.model.AlreadyExistsException; +import com.amazonaws.services.glue.model.EntityNotFoundException; +import com.amazonaws.services.glue.model.GetDatabaseRequest; +import com.amazonaws.services.glue.model.GetUserDefinedFunctionsRequest; +import com.amazonaws.services.glue.model.Partition; +import com.amazonaws.services.glue.model.Table; +import com.amazonaws.services.glue.model.UpdatePartitionRequest; +import com.amazonaws.services.glue.model.UserDefinedFunction; +import java.io.IOException; +import java.net.URI; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.regex.Pattern; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.ObjectPair; +import org.apache.hadoop.hive.common.ValidTxnList; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.PartitionDropOptions; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.AggrStats; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.CompactionResponse; +import org.apache.hadoop.hive.metastore.api.CompactionType; +import org.apache.hadoop.hive.metastore.api.ConfigValSecurityException; +import org.apache.hadoop.hive.metastore.api.CurrentNotificationEventId; +import org.apache.hadoop.hive.metastore.api.DataOperationType; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.EnvironmentContext; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.FireEventRequest; +import org.apache.hadoop.hive.metastore.api.FireEventResponse; +import org.apache.hadoop.hive.metastore.api.ForeignKeysRequest; +import org.apache.hadoop.hive.metastore.api.GetAllFunctionsResponse; +import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse; +import org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalRequest; +import org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalResponse; +import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeResponse; +import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege; +import org.apache.hadoop.hive.metastore.api.HiveObjectRef; +import org.apache.hadoop.hive.metastore.api.HiveObjectType; +import org.apache.hadoop.hive.metastore.api.Index; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; +import org.apache.hadoop.hive.metastore.api.InvalidOperationException; +import org.apache.hadoop.hive.metastore.api.InvalidPartitionException; +import org.apache.hadoop.hive.metastore.api.LockRequest; +import org.apache.hadoop.hive.metastore.api.LockResponse; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.MetadataPpdResult; +import org.apache.hadoop.hive.metastore.api.NoSuchLockException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.NoSuchTxnException; +import org.apache.hadoop.hive.metastore.api.NotificationEventResponse; +import org.apache.hadoop.hive.metastore.api.OpenTxnsResponse; +import org.apache.hadoop.hive.metastore.api.PartitionEventType; +import org.apache.hadoop.hive.metastore.api.PartitionValuesRequest; +import org.apache.hadoop.hive.metastore.api.PartitionValuesResponse; +import org.apache.hadoop.hive.metastore.api.PrimaryKeysRequest; +import org.apache.hadoop.hive.metastore.api.SQLForeignKey; +import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; +import org.apache.hadoop.hive.metastore.api.ShowCompactResponse; +import org.apache.hadoop.hive.metastore.api.ShowLocksRequest; +import org.apache.hadoop.hive.metastore.api.ShowLocksResponse; +import org.apache.hadoop.hive.metastore.api.TableMeta; +import org.apache.hadoop.hive.metastore.api.TxnAbortedException; +import org.apache.hadoop.hive.metastore.api.TxnOpenException; +import org.apache.hadoop.hive.metastore.api.UnknownDBException; +import org.apache.hadoop.hive.metastore.api.UnknownPartitionException; +import org.apache.hadoop.hive.metastore.api.UnknownTableException; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; +import org.apache.iceberg.aws.glue.converters.CatalogToHiveConverter; +import org.apache.iceberg.aws.glue.converters.ConverterUtils; +import org.apache.iceberg.aws.glue.converters.GlueInputConverter; +import org.apache.iceberg.aws.glue.converters.HiveToCatalogConverter; +import org.apache.iceberg.aws.glue.lock.DynamoLockManager; +import org.apache.iceberg.aws.glue.lock.LockManager; +import org.apache.iceberg.aws.glue.shims.AwsGlueHiveShims; +import org.apache.iceberg.aws.glue.shims.ShimsLoader; +import org.apache.iceberg.aws.glue.util.BatchDeletePartitionsHelper; +import org.apache.iceberg.aws.glue.util.ExpressionHelper; +import org.apache.iceberg.aws.glue.util.LoggingHelper; +import org.apache.iceberg.aws.glue.util.MetastoreClientUtils; +import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.base.Strings; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.relocated.com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@SuppressWarnings({"Slf4jConstantLogMessage", "UnusedMethod"}) +public class IcebergGlueMetastoreClient implements IMetaStoreClient { + + // TODO "hook" into Hive logging (hive or hive.metastore) + private static final Logger LOG = LoggerFactory.getLogger(IcebergGlueMetastoreClient.class); + + private final HiveConf conf; + private final AWSGlue glueClient; + private final Warehouse wh; + private final GlueMetastoreClientDelegate glueMetastoreClientDelegate; + private final String catalogId; + + private static final int BATCH_DELETE_PARTITIONS_PAGE_SIZE = 25; + private static final int BATCH_DELETE_PARTITIONS_THREADS_COUNT = 5; + static final String BATCH_DELETE_PARTITIONS_THREAD_POOL_NAME_FORMAT = "batch-delete-partitions-%d"; + private static final ExecutorService BATCH_DELETE_PARTITIONS_THREAD_POOL = Executors.newFixedThreadPool( + BATCH_DELETE_PARTITIONS_THREADS_COUNT, + new ThreadFactoryBuilder() + .setNameFormat(BATCH_DELETE_PARTITIONS_THREAD_POOL_NAME_FORMAT) + .setDaemon(true).build() + ); + + private Map currentMetaVars; + private final AwsGlueHiveShims hiveShims = ShimsLoader.getHiveShims(); + + public IcebergGlueMetastoreClient(HiveConf conf) throws MetaException { + this.conf = conf; + glueClient = new AWSGlueClientFactory(this.conf).newClient(); + + // TODO preserve existing functionality for HiveMetaHook + wh = new Warehouse(this.conf); + + AWSGlueMetastore glueMetastore = new AWSGlueMetastoreFactory().newMetastore(conf); + LockManager lockManager = new DynamoLockManager(conf); + glueMetastoreClientDelegate = new GlueMetastoreClientDelegate(this.conf, glueMetastore, wh, lockManager); + + snapshotActiveConf(); + catalogId = MetastoreClientUtils.getCatalogId(conf); + if (!doesDefaultDBExist()) { + createDefaultDatabase(); + } + } + + /** + * Currently used for unit tests + */ + public static class Builder { + + private HiveConf conf; + private Warehouse wh; + private GlueClientFactory clientFactory; + private AWSGlueMetastoreFactory metastoreFactory; + private boolean createDefaults = true; + private String catalogId; + + public Builder withHiveConf(HiveConf confInput) { + this.conf = confInput; + return this; + } + + public Builder withClientFactory(GlueClientFactory clientFactoryInput) { + this.clientFactory = clientFactoryInput; + return this; + } + + public Builder withMetastoreFactory(AWSGlueMetastoreFactory metastoreFactoryInput) { + this.metastoreFactory = metastoreFactoryInput; + return this; + } + + public Builder withWarehouse(Warehouse whInput) { + this.wh = whInput; + return this; + } + + public Builder withCatalogId(String catalogIdInput) { + this.catalogId = catalogIdInput; + return this; + } + + public IcebergGlueMetastoreClient build() throws MetaException { + return new IcebergGlueMetastoreClient(this); + } + + public Builder createDefaults(boolean createDefaultDB) { + this.createDefaults = createDefaultDB; + return this; + } + } + + private IcebergGlueMetastoreClient(Builder builder) throws MetaException { + conf = MoreObjects.firstNonNull(builder.conf, new HiveConf()); + + if (builder.wh != null) { + this.wh = builder.wh; + } else { + this.wh = new Warehouse(conf); + } + + if (builder.catalogId != null) { + this.catalogId = builder.catalogId; + } else { + this.catalogId = null; + } + + GlueClientFactory clientFactory = MoreObjects.firstNonNull(builder.clientFactory, new AWSGlueClientFactory(conf)); + AWSGlueMetastoreFactory metastoreFactory = MoreObjects.firstNonNull(builder.metastoreFactory, + new AWSGlueMetastoreFactory()); + + glueClient = clientFactory.newClient(); + AWSGlueMetastore glueMetastore = metastoreFactory.newMetastore(conf); + LockManager lockManager = new DynamoLockManager(conf); + glueMetastoreClientDelegate = new GlueMetastoreClientDelegate(this.conf, glueMetastore, wh, lockManager); + + /** + * It seems weird to create databases as part of client construction. This + * part should probably be moved to the section in hive code right after the + * metastore client is instantiated. For now, simply copying the + * functionality in the thrift server + */ + if (builder.createDefaults && !doesDefaultDBExist()) { + createDefaultDatabase(); + } + } + + private boolean doesDefaultDBExist() throws MetaException { + try { + GetDatabaseRequest getDatabaseRequest = new GetDatabaseRequest() + .withName(MetaStoreUtils.DEFAULT_DATABASE_NAME) + .withCatalogId(catalogId); + glueClient.getDatabase(getDatabaseRequest); + } catch (EntityNotFoundException e) { + return false; + } catch (AmazonServiceException e) { + String msg = "Unable to verify existence of default database: "; + LOG.error(msg, e); + throw new MetaException(msg + e); + } + return true; + } + + private void createDefaultDatabase() throws MetaException { + Database defaultDB = new Database(); + defaultDB.setName(MetaStoreUtils.DEFAULT_DATABASE_NAME); + defaultDB.setDescription(MetaStoreUtils.DEFAULT_DATABASE_COMMENT); + defaultDB.setLocationUri(wh.getDefaultDatabasePath(MetaStoreUtils.DEFAULT_DATABASE_NAME).toString()); + + org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet principalPrivilegeSet + = new org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet(); + principalPrivilegeSet.setRolePrivileges( + Maps.>newHashMap()); + + defaultDB.setPrivileges(principalPrivilegeSet); + + /** + * TODO: Grant access to role PUBLIC after role support is added + */ + try { + createDatabase(defaultDB); + } catch (org.apache.hadoop.hive.metastore.api.AlreadyExistsException e) { + LOG.warn("database - default already exists. Ignoring..", e); + } catch (Exception e) { + LOG.error("Unable to create default database", e); + } + } + + @Override + public void createDatabase(Database database) throws InvalidObjectException, + org.apache.hadoop.hive.metastore.api.AlreadyExistsException, MetaException, TException { + glueMetastoreClientDelegate.createDatabase(database); + } + + @Override + public Database getDatabase(String name) throws NoSuchObjectException, MetaException, TException { + return glueMetastoreClientDelegate.getDatabase(name); + } + + @Override + public List getDatabases(String pattern) throws MetaException, TException { + return glueMetastoreClientDelegate.getDatabases(pattern); + } + + @Override + public List getAllDatabases() throws MetaException, TException { + return getDatabases(".*"); + } + + @Override + public void alterDatabase(String databaseName, Database database) throws NoSuchObjectException, MetaException, + TException { + glueMetastoreClientDelegate.alterDatabase(databaseName, database); + } + + @Override + public void dropDatabase(String name) throws NoSuchObjectException, InvalidOperationException, MetaException, + TException { + dropDatabase(name, true, false, false); + } + + @Override + public void dropDatabase(String name, boolean deleteData, boolean ignoreUnknownDb) throws NoSuchObjectException, + InvalidOperationException, MetaException, TException { + dropDatabase(name, deleteData, ignoreUnknownDb, false); + } + + @Override + public void dropDatabase(String name, boolean deleteData, boolean ignoreUnknownDb, boolean cascade) + throws NoSuchObjectException, InvalidOperationException, MetaException, TException { + glueMetastoreClientDelegate.dropDatabase(name, deleteData, ignoreUnknownDb, cascade); + } + + @Override + public org.apache.hadoop.hive.metastore.api.Partition add_partition( + org.apache.hadoop.hive.metastore.api.Partition partition) + throws InvalidObjectException, org.apache.hadoop.hive.metastore.api.AlreadyExistsException, MetaException, + TException { + glueMetastoreClientDelegate.addPartitions(Lists.newArrayList(partition), false, true); + return partition; + } + + @Override + public int add_partitions(List partitions) + throws InvalidObjectException, org.apache.hadoop.hive.metastore.api.AlreadyExistsException, MetaException, + TException { + return glueMetastoreClientDelegate.addPartitions(partitions, false, true).size(); + } + + @Override + public List add_partitions( + List partitions, + boolean ifNotExists, + boolean needResult + ) throws TException { + return glueMetastoreClientDelegate.addPartitions(partitions, ifNotExists, needResult); + } + + @Override + public int add_partitions_pspec( + PartitionSpecProxy pSpec + ) throws InvalidObjectException, org.apache.hadoop.hive.metastore.api.AlreadyExistsException, + MetaException, TException { + return glueMetastoreClientDelegate.addPartitionsSpecProxy(pSpec); + } + + @Override + public void alterFunction( + String dbName, String functionName, org.apache.hadoop.hive.metastore.api.Function newFunction) + throws InvalidObjectException, MetaException, TException { + glueMetastoreClientDelegate.alterFunction(dbName, functionName, newFunction); + } + + @Override + public void alter_index(String dbName, String tblName, String indexName, Index index) + throws InvalidOperationException, MetaException, TException { + Table catalogIndexTableObject = HiveToCatalogConverter.convertIndexToTableObject(index); + org.apache.hadoop.hive.metastore.api.Table originTable = getTable(dbName, tblName); + String indexTableObjectName = GlueMetastoreClientDelegate.INDEX_PREFIX + indexName; + if (!originTable.getParameters().containsKey(indexTableObjectName)) { + throw new NoSuchObjectException("can not find index: " + indexName); + } + + originTable.getParameters().put(indexTableObjectName, ConverterUtils.catalogTableToString(catalogIndexTableObject)); + alter_table(dbName, tblName, originTable); + } + + @Override + public void alter_partition( + String dbName, + String tblName, + org.apache.hadoop.hive.metastore.api.Partition partition + ) throws InvalidOperationException, MetaException, TException { + glueMetastoreClientDelegate.alterPartitions(dbName, tblName, Lists.newArrayList(partition)); + } + + @Override + public void alter_partition( + String dbName, + String tblName, + org.apache.hadoop.hive.metastore.api.Partition partition, + EnvironmentContext environmentContext + ) throws InvalidOperationException, MetaException, TException { + glueMetastoreClientDelegate.alterPartitions(dbName, tblName, Lists.newArrayList(partition)); + } + + @Override + public void alter_partitions( + String dbName, + String tblName, + List partitions + ) throws InvalidOperationException, MetaException, TException { + glueMetastoreClientDelegate.alterPartitions(dbName, tblName, partitions); + } + + @Override + public void alter_partitions( + String dbName, + String tblName, + List partitions, + EnvironmentContext environmentContext + ) throws InvalidOperationException, MetaException, TException { + glueMetastoreClientDelegate.alterPartitions(dbName, tblName, partitions); + } + + @Override + public PartitionValuesResponse listPartitionValues( + PartitionValuesRequest request) throws MetaException, TException, NoSuchObjectException { + throw new UnsupportedOperationException("listPartitionValues is not supported"); + } + + @Override + public void alter_table( + String dbName, String tblName, org.apache.hadoop.hive.metastore.api.Table table) + throws InvalidOperationException, MetaException, TException { + glueMetastoreClientDelegate.alterTable(dbName, tblName, table, null); + } + + @Override + public void alter_table( + String dbName, String tblName, org.apache.hadoop.hive.metastore.api.Table table, boolean cascade) + throws InvalidOperationException, MetaException, TException { + glueMetastoreClientDelegate.alterTable(dbName, tblName, table, null); + } + + @Override + public void alter_table_with_environmentContext( + String dbName, + String tblName, + org.apache.hadoop.hive.metastore.api.Table table, + EnvironmentContext environmentContext + ) throws InvalidOperationException, MetaException, TException { + glueMetastoreClientDelegate.alterTable(dbName, tblName, table, environmentContext); + } + + @Override + public org.apache.hadoop.hive.metastore.api.Partition appendPartition( + String dbName, String tblName, List values) + throws InvalidObjectException, org.apache.hadoop.hive.metastore.api.AlreadyExistsException, + MetaException, TException { + return glueMetastoreClientDelegate.appendPartition(dbName, tblName, values); + } + + @Override + public org.apache.hadoop.hive.metastore.api.Partition appendPartition( + String dbName, String tblName, String partitionName) throws InvalidObjectException, + org.apache.hadoop.hive.metastore.api.AlreadyExistsException, MetaException, TException { + List partVals = partitionNameToVals(partitionName); + return glueMetastoreClientDelegate.appendPartition(dbName, tblName, partVals); + } + + @Override + public boolean create_role(org.apache.hadoop.hive.metastore.api.Role role) throws MetaException, TException { + return glueMetastoreClientDelegate.createRole(role); + } + + @Override + public boolean drop_role(String roleName) throws MetaException, TException { + return glueMetastoreClientDelegate.dropRole(roleName); + } + + @Override + public List list_roles( + String principalName, org.apache.hadoop.hive.metastore.api.PrincipalType principalType + ) throws MetaException, TException { + return glueMetastoreClientDelegate.listRoles(principalName, principalType); + } + + @Override + public List listRoleNames() throws MetaException, TException { + return glueMetastoreClientDelegate.listRoleNames(); + } + + @Override + public org.apache.hadoop.hive.metastore.api.GetPrincipalsInRoleResponse get_principals_in_role( + org.apache.hadoop.hive.metastore.api.GetPrincipalsInRoleRequest request) throws MetaException, TException { + return glueMetastoreClientDelegate.getPrincipalsInRole(request); + } + + @Override + public GetRoleGrantsForPrincipalResponse get_role_grants_for_principal( + GetRoleGrantsForPrincipalRequest request) throws MetaException, TException { + return glueMetastoreClientDelegate.getRoleGrantsForPrincipal(request); + } + + @Override + public boolean grant_role( + String roleName, + String userName, + org.apache.hadoop.hive.metastore.api.PrincipalType principalType, + String grantor, org.apache.hadoop.hive.metastore.api.PrincipalType grantorType, + boolean grantOption + ) throws MetaException, TException { + return glueMetastoreClientDelegate.grantRole( + roleName, userName, principalType, grantor, grantorType, grantOption); + } + + @Override + public boolean revoke_role( + String roleName, + String userName, + org.apache.hadoop.hive.metastore.api.PrincipalType principalType, + boolean grantOption + ) throws MetaException, TException { + return glueMetastoreClientDelegate.revokeRole(roleName, userName, principalType, grantOption); + } + + @Override + public void cancelDelegationToken(String tokenStrForm) throws MetaException, TException { + glueMetastoreClientDelegate.cancelDelegationToken(tokenStrForm); + } + + @Override + public String getTokenStrForm() throws IOException { + return glueMetastoreClientDelegate.getTokenStrForm(); + } + + @Override + public boolean addToken(String tokenIdentifier, String delegationToken) throws TException { + return glueMetastoreClientDelegate.addToken(tokenIdentifier, delegationToken); + } + + @Override + public boolean removeToken(String tokenIdentifier) throws TException { + return glueMetastoreClientDelegate.removeToken(tokenIdentifier); + } + + @Override + public String getToken(String tokenIdentifier) throws TException { + return glueMetastoreClientDelegate.getToken(tokenIdentifier); + } + + @Override + public List getAllTokenIdentifiers() throws TException { + return glueMetastoreClientDelegate.getAllTokenIdentifiers(); + } + + @Override + public int addMasterKey(String key) throws MetaException, TException { + return glueMetastoreClientDelegate.addMasterKey(key); + } + + @Override + public void updateMasterKey(Integer seqNo, String key) throws NoSuchObjectException, MetaException, TException { + glueMetastoreClientDelegate.updateMasterKey(seqNo, key); + } + + @Override + public boolean removeMasterKey(Integer keySeq) throws TException { + return glueMetastoreClientDelegate.removeMasterKey(keySeq); + } + + @Override + public String[] getMasterKeys() throws TException { + return glueMetastoreClientDelegate.getMasterKeys(); + } + + @Override + public LockResponse checkLock(long lockId) + throws NoSuchTxnException, TxnAbortedException, NoSuchLockException, TException { + return glueMetastoreClientDelegate.checkLock(lockId); + } + + @Override + public void close() { + currentMetaVars = null; + } + + @Override + public void commitTxn(long txnId) throws NoSuchTxnException, TxnAbortedException, TException { + glueMetastoreClientDelegate.commitTxn(txnId); + } + + @Override + public void abortTxns(List txnIds) throws TException { + glueMetastoreClientDelegate.abortTxns(txnIds); + } + + @Deprecated + public void compact( + String dbName, + String tblName, + String partitionName, + CompactionType compactionType + ) throws TException { + glueMetastoreClientDelegate.compact(dbName, tblName, partitionName, compactionType); + } + + @Deprecated + public void compact( + String dbName, + String tblName, + String partitionName, + CompactionType compactionType, + Map tblProperties + ) throws TException { + glueMetastoreClientDelegate.compact(dbName, tblName, partitionName, compactionType, tblProperties); + } + + @Override + public CompactionResponse compact2( + String dbName, + String tblName, + String partitionName, + CompactionType compactionType, + Map tblProperties + ) throws TException { + return glueMetastoreClientDelegate.compact2(dbName, tblName, partitionName, compactionType, tblProperties); + } + + @Override + public void createFunction(org.apache.hadoop.hive.metastore.api.Function function) + throws InvalidObjectException, MetaException, TException { + glueMetastoreClientDelegate.createFunction(function); + } + + @Override + public void createIndex(Index index, org.apache.hadoop.hive.metastore.api.Table indexTable) + throws InvalidObjectException, MetaException, NoSuchObjectException, + TException, org.apache.hadoop.hive.metastore.api.AlreadyExistsException { + boolean dirCreated = glueMetastoreClientDelegate.validateNewTableAndCreateDirectory(indexTable); + boolean indexTableCreated = false; + String dbName = index.getDbName(); + String indexTableName = index.getIndexTableName(); + String originTableName = index.getOrigTableName(); + Path indexTablePath = new Path(indexTable.getSd().getLocation()); + Table catalogIndexTableObject = HiveToCatalogConverter.convertIndexToTableObject(index); + String indexTableObjectName = GlueMetastoreClientDelegate.INDEX_PREFIX + index.getIndexName(); + + try { + org.apache.hadoop.hive.metastore.api.Table originTable = getTable(dbName, originTableName); + Map parameters = originTable.getParameters(); + if (parameters.containsKey(indexTableObjectName)) { + throw new org.apache.hadoop.hive.metastore.api.AlreadyExistsException( + "Index: " + index.getIndexName() + " already exist"); + } + createTable(indexTable); + indexTableCreated = true; + originTable.getParameters().put(indexTableObjectName, + ConverterUtils.catalogTableToString(catalogIndexTableObject)); + alter_table(dbName, originTableName, originTable); + } catch (Exception e) { + if (dirCreated) { + wh.deleteDir(indexTablePath, true); + } + if (indexTableCreated) { + dropTable(dbName, indexTableName); + } + String msg = "Unable to create index: "; + LOG.error(msg, e); + if (e instanceof TException) { + throw e; + } else { + throw new MetaException(msg + e); + } + } + } + + @Override + public void createTable(org.apache.hadoop.hive.metastore.api.Table tbl) + throws org.apache.hadoop.hive.metastore.api.AlreadyExistsException, InvalidObjectException, MetaException, + NoSuchObjectException, TException { + glueMetastoreClientDelegate.createTable(tbl); + } + + @Override + public boolean deletePartitionColumnStatistics( + String dbName, String tableName, String partName, String colName + ) throws NoSuchObjectException, MetaException, InvalidObjectException, + TException, org.apache.hadoop.hive.metastore.api.InvalidInputException { + return glueMetastoreClientDelegate.deletePartitionColumnStatistics(dbName, tableName, partName, colName); + } + + @Override + public boolean deleteTableColumnStatistics( + String dbName, String tableName, String colName + ) throws NoSuchObjectException, MetaException, InvalidObjectException, + TException, org.apache.hadoop.hive.metastore.api.InvalidInputException { + return glueMetastoreClientDelegate.deleteTableColumnStatistics(dbName, tableName, colName); + } + + @Override + public void dropFunction(String dbName, String functionName) throws MetaException, NoSuchObjectException, + InvalidObjectException, org.apache.hadoop.hive.metastore.api.InvalidInputException, TException { + glueMetastoreClientDelegate.dropFunction(dbName, functionName); + } + + @Override + public boolean dropIndex(String dbName, String tblName, String name, boolean deleteData) throws NoSuchObjectException, + MetaException, TException { + Index indexToDrop = getIndex(dbName, tblName, name); + String indexTableName = indexToDrop.getIndexTableName(); + + // Drop the index metadata + org.apache.hadoop.hive.metastore.api.Table originTable = getTable(dbName, tblName); + Map parameters = originTable.getParameters(); + String indexTableObjectName = GlueMetastoreClientDelegate.INDEX_PREFIX + name; + if (!parameters.containsKey(indexTableObjectName)) { + throw new NoSuchObjectException("can not find Index: " + name); + } + parameters.remove(indexTableObjectName); + + alter_table(dbName, tblName, originTable); + + // Now drop the data associated with the table used to hold the index data + if (indexTableName != null && indexTableName.length() > 0) { + dropTable(dbName, indexTableName, deleteData, true); + } + + return true; + } + + private void deleteParentRecursive(Path parent, int depth, boolean mustPurge) throws IOException, MetaException { + if (depth > 0 && parent != null && wh.isWritable(parent) && wh.isEmpty(parent)) { + wh.deleteDir(parent, true, mustPurge); + deleteParentRecursive(parent.getParent(), depth - 1, mustPurge); + } + } + + // This logic is taken from HiveMetaStore#isMustPurge + private boolean isMustPurge(org.apache.hadoop.hive.metastore.api.Table table, boolean ifPurge) { + return ifPurge || "true".equalsIgnoreCase(table.getParameters().get("auto.purge")); + } + + @Override + public boolean dropPartition(String dbName, String tblName, List values, boolean deleteData) + throws NoSuchObjectException, MetaException, TException { + return glueMetastoreClientDelegate.dropPartition(dbName, tblName, values, false, deleteData, false); + } + + @Override + public boolean dropPartition( + String dbName, String tblName, List values, PartitionDropOptions options) + throws TException { + return glueMetastoreClientDelegate.dropPartition( + dbName, tblName, values, options.ifExists, options.deleteData, options.purgeData); + } + + @Override + public boolean dropPartition(String dbName, String tblName, String partitionName, boolean deleteData) + throws NoSuchObjectException, MetaException, TException { + List values = partitionNameToVals(partitionName); + return glueMetastoreClientDelegate.dropPartition(dbName, tblName, values, false, deleteData, false); + } + + @Override + public List dropPartitions( + String dbName, + String tblName, + List> partExprs, + boolean deleteData, + boolean ifExists + ) throws NoSuchObjectException, MetaException, TException { + // use defaults from PartitionDropOptions for purgeData + return dropPartitions_core(dbName, tblName, partExprs, deleteData, false); + } + + @Override + public List dropPartitions( + String dbName, + String tblName, + List> partExprs, + boolean deleteData, + boolean ifExists, + boolean needResults + ) throws NoSuchObjectException, MetaException, TException { + return dropPartitions_core(dbName, tblName, partExprs, deleteData, false); + } + + @Override + public List dropPartitions( + String dbName, + String tblName, + List> partExprs, + PartitionDropOptions options + ) throws TException { + return dropPartitions_core(dbName, tblName, partExprs, options.deleteData, options.purgeData); + } + + private List dropPartitions_core( + String databaseName, + String tableName, + List> partExprs, + boolean deleteData, + boolean purgeData + ) throws TException { + throw new UnsupportedOperationException("dropPartitions_core is not supported"); + } + + /** + * Delete all partitions in the list provided with BatchDeletePartitions request. It doesn't use transaction, + * so the call may result in partial failure. + * @param dbName dbName + * @param tableName tableName + * @param partitionsToDelete partitionsToDelete + * @return the partitions successfully deleted + * @throws TException TException + */ + private List batchDeletePartitions( + final String dbName, final String tableName, final List partitionsToDelete, + final boolean deleteData, final boolean purgeData) throws TException { + + List deleted = Lists.newArrayList(); + if (partitionsToDelete == null) { + return deleted; + } + + validateBatchDeletePartitionsArguments(dbName, tableName, partitionsToDelete); + + List> batchDeletePartitionsFutures = Lists.newArrayList(); + + int numOfPartitionsToDelete = partitionsToDelete.size(); + for (int i = 0; i < numOfPartitionsToDelete; i += BATCH_DELETE_PARTITIONS_PAGE_SIZE) { + int end = Math.min(i + BATCH_DELETE_PARTITIONS_PAGE_SIZE, numOfPartitionsToDelete); + final List partitionsOnePage = partitionsToDelete.subList(i, end); + + batchDeletePartitionsFutures.add(BATCH_DELETE_PARTITIONS_THREAD_POOL.submit( + new Callable() { + @Override + public BatchDeletePartitionsHelper call() throws Exception { + return new BatchDeletePartitionsHelper( + glueClient, dbName, tableName, catalogId, partitionsOnePage).deletePartitions(); + } + })); + } + + TException tException = null; + for (Future future : batchDeletePartitionsFutures) { + try { + BatchDeletePartitionsHelper batchDeletePartitionsHelper = future.get(); + for (Partition partition : batchDeletePartitionsHelper.getPartitionsDeleted()) { + org.apache.hadoop.hive.metastore.api.Partition hivePartition = + CatalogToHiveConverter.convertPartition(partition); + try { + performDropPartitionPostProcessing(dbName, tableName, hivePartition, deleteData, purgeData); + } catch (TException e) { + LOG.error("Drop partition directory failed.", e); + tException = tException == null ? e : tException; + } + deleted.add(hivePartition); + } + tException = tException == null ? batchDeletePartitionsHelper.getFirstTException() : tException; + } catch (Exception e) { + LOG.error("Exception thrown by BatchDeletePartitions thread pool. ", e); + } + } + + if (tException != null) { + throw tException; + } + return deleted; + } + + private void validateBatchDeletePartitionsArguments(final String dbName, final String tableName, + final List partitionsToDelete) { + + Preconditions.checkArgument(dbName != null, "Database name cannot be null"); + Preconditions.checkArgument(tableName != null, "Table name cannot be null"); + for (Partition partition : partitionsToDelete) { + Preconditions.checkArgument(dbName.equals(partition.getDatabaseName()), "Database name cannot be null"); + Preconditions.checkArgument(tableName.equals(partition.getTableName()), "Table name cannot be null"); + Preconditions.checkArgument(partition.getValues() != null, "Partition values cannot be null"); + } + } + + // Preserve the logic from Hive metastore + private void performDropPartitionPostProcessing( + String dbName, + String tblName, + org.apache.hadoop.hive.metastore.api.Partition partition, + boolean deleteData, + boolean ifPurge) throws MetaException, NoSuchObjectException, TException { + if (deleteData && partition.getSd() != null && partition.getSd().getLocation() != null) { + Path partPath = new Path(partition.getSd().getLocation()); + org.apache.hadoop.hive.metastore.api.Table table = getTable(dbName, tblName); + if (MetastoreClientUtils.isExternalTable(table)) { + // Don't delete external table data + return; + } + boolean mustPurge = isMustPurge(table, ifPurge); + wh.deleteDir(partPath, true, mustPurge); + try { + List values = partition.getValues(); + deleteParentRecursive(partPath.getParent(), values.size() - 1, mustPurge); + } catch (IOException e) { + throw new MetaException(e.getMessage()); + } + } + } + + @Deprecated + public void dropTable(String tableName, boolean deleteData) throws MetaException, UnknownTableException, TException, + NoSuchObjectException { + dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName, deleteData, false); + } + + @Override + public void dropTable(String dbname, String tableName) throws MetaException, TException, NoSuchObjectException { + dropTable(dbname, tableName, true, true, false); + } + + @Override + public void dropTable(String dbname, String tableName, boolean deleteData, boolean ignoreUnknownTab) + throws MetaException, TException, NoSuchObjectException { + dropTable(dbname, tableName, deleteData, ignoreUnknownTab, false); + } + + @Override + public void dropTable(String dbname, String tableName, boolean deleteData, boolean ignoreUnknownTab, boolean ifPurge) + throws MetaException, TException, NoSuchObjectException { + glueMetastoreClientDelegate.dropTable(dbname, tableName, deleteData, ignoreUnknownTab, ifPurge); + } + + @Override + public org.apache.hadoop.hive.metastore.api.Partition exchange_partition( + Map partitionSpecs, + String srcDb, + String srcTbl, + String dstDb, + String dstTbl + ) throws MetaException, NoSuchObjectException, InvalidObjectException, TException { + return glueMetastoreClientDelegate.exchangePartition(partitionSpecs, srcDb, srcTbl, dstDb, dstTbl); + } + + @Override + public List exchange_partitions( + Map partitionSpecs, + String sourceDb, + String sourceTbl, + String destDb, + String destTbl + ) throws MetaException, NoSuchObjectException, InvalidObjectException, TException { + return glueMetastoreClientDelegate.exchangePartitions(partitionSpecs, sourceDb, sourceTbl, destDb, destTbl); + } + + @Override + public AggrStats getAggrColStatsFor(String dbName, String tblName, List colNames, List partName) + throws NoSuchObjectException, MetaException, TException { + return glueMetastoreClientDelegate.getAggrColStatsFor(dbName, tblName, colNames, partName); + } + + @Override + public List getAllTables(String dbname) throws MetaException, TException, UnknownDBException { + return getTables(dbname, ".*"); + } + + @Override + public String getConfigValue(String name, String defaultValue) throws TException, ConfigValSecurityException { + if (!Pattern.matches("(hive|hdfs|mapred).*", name)) { + throw new ConfigValSecurityException("For security reasons, the config key " + name + " cannot be accessed"); + } + + return conf.get(name, defaultValue); + } + + @Override + public String getDelegationToken( + String owner, String renewerKerberosPrincipalName + ) throws MetaException, TException { + return glueMetastoreClientDelegate.getDelegationToken(owner, renewerKerberosPrincipalName); + } + + @Override + public List getFields(String db, String tableName) throws MetaException, TException, + UnknownTableException, UnknownDBException { + return glueMetastoreClientDelegate.getFields(db, tableName); + } + + @Override + public org.apache.hadoop.hive.metastore.api.Function getFunction( + String dbName, String functionName) throws MetaException, TException { + return glueMetastoreClientDelegate.getFunction(dbName, functionName); + } + + @Override + public List getFunctions(String dbName, String pattern) throws MetaException, TException { + return glueMetastoreClientDelegate.getFunctions(dbName, pattern); + } + + @Override + public GetAllFunctionsResponse getAllFunctions() throws MetaException, TException { + List databaseNames = getDatabases(".*"); + List result = new ArrayList<>(); + try { + for (String databaseName : databaseNames) { + GetUserDefinedFunctionsRequest getUserDefinedFunctionsRequest = new GetUserDefinedFunctionsRequest() + .withDatabaseName(databaseName).withPattern(".*").withCatalogId(catalogId); + + List catalogFunctions = glueClient.getUserDefinedFunctions( + getUserDefinedFunctionsRequest) + .getUserDefinedFunctions(); + for (UserDefinedFunction catalogFunction : catalogFunctions) { + result.add(CatalogToHiveConverter.convertFunction(databaseName, catalogFunction)); + } + } + + GetAllFunctionsResponse response = new GetAllFunctionsResponse(); + response.setFunctions(result); + return response; + } catch (AmazonServiceException e) { + LOG.error("encountered AWS exception", e); + throw CatalogToHiveConverter.wrapInHiveException(e); + } catch (Exception e) { + String msg = "Unable to get Functions: "; + LOG.error(msg, e); + throw new MetaException(msg + e); + } + } + + @Override + public Index getIndex(String dbName, String tblName, String indexName) throws MetaException, UnknownTableException, + NoSuchObjectException, TException { + org.apache.hadoop.hive.metastore.api.Table originTable = getTable(dbName, tblName); + Map map = originTable.getParameters(); + String indexTableName = GlueMetastoreClientDelegate.INDEX_PREFIX + indexName; + if (!map.containsKey(indexTableName)) { + throw new NoSuchObjectException("can not find index: " + indexName); + } + Table indexTableObject = ConverterUtils.stringToCatalogTable(map.get(indexTableName)); + return CatalogToHiveConverter.convertTableObjectToIndex(indexTableObject); + } + + @Override + public String getMetaConf(String key) throws MetaException, TException { + ConfVars metaConfVar = HiveConf.getMetaConf(key); + if (metaConfVar == null) { + throw new MetaException("Invalid configuration key " + key); + } + return conf.get(key, metaConfVar.getDefaultValue()); + } + + @Override + public org.apache.hadoop.hive.metastore.api.Partition getPartition(String dbName, String tblName, List values) + throws NoSuchObjectException, MetaException, TException { + return glueMetastoreClientDelegate.getPartition(dbName, tblName, values); + } + + @Override + public org.apache.hadoop.hive.metastore.api.Partition getPartition( + String dbName, String tblName, String partitionName) + throws MetaException, UnknownTableException, NoSuchObjectException, TException { + return glueMetastoreClientDelegate.getPartition(dbName, tblName, partitionName); + } + + @Override + public Map> getPartitionColumnStatistics( + String dbName, + String tableName, + List partitionNames, + List columnNames + ) throws NoSuchObjectException, MetaException, TException { + return glueMetastoreClientDelegate.getPartitionColumnStatistics(dbName, tableName, partitionNames, columnNames); + } + + @Override + public org.apache.hadoop.hive.metastore.api.Partition getPartitionWithAuthInfo( + String databaseName, String tableName, List values, + String userName, List groupNames) + throws MetaException, UnknownTableException, NoSuchObjectException, TException { + + // TODO move this into the service + org.apache.hadoop.hive.metastore.api.Partition partition = getPartition(databaseName, tableName, values); + org.apache.hadoop.hive.metastore.api.Table table = getTable(databaseName, tableName); + if ("TRUE".equalsIgnoreCase(table.getParameters().get("PARTITION_LEVEL_PRIVILEGE"))) { + String partName = Warehouse.makePartName(table.getPartitionKeys(), values); + HiveObjectRef obj = new HiveObjectRef(); + obj.setObjectType(HiveObjectType.PARTITION); + obj.setDbName(databaseName); + obj.setObjectName(tableName); + obj.setPartValues(values); + org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet privilegeSet = + this.get_privilege_set(obj, userName, groupNames); + partition.setPrivileges(privilegeSet); + } + + return partition; + } + + @Override + public List getPartitionsByNames( + String databaseName, String tableName, List partitionNames) + throws NoSuchObjectException, MetaException, TException { + return glueMetastoreClientDelegate.getPartitionsByNames(databaseName, tableName, partitionNames); + } + + @Override + public List getSchema(String db, String tableName) + throws MetaException, TException, UnknownTableException, UnknownDBException { + return glueMetastoreClientDelegate.getSchema(db, tableName); + } + + @Deprecated + public org.apache.hadoop.hive.metastore.api.Table getTable(String tableName) + throws MetaException, TException, NoSuchObjectException { + // this has been deprecated + return getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); + } + + @Override + public org.apache.hadoop.hive.metastore.api.Table getTable(String dbName, String tableName) + throws MetaException, TException, NoSuchObjectException { + return glueMetastoreClientDelegate.getTable(dbName, tableName); + } + + @Override + public List getTableColumnStatistics(String dbName, String tableName, List colNames) + throws NoSuchObjectException, MetaException, TException { + return glueMetastoreClientDelegate.getTableColumnStatistics(dbName, tableName, colNames); + } + + @Override + public List getTableObjectsByName( + String dbName, List tableNames) + throws MetaException, InvalidOperationException, UnknownDBException, TException { + List hiveTables = Lists.newArrayList(); + for (String tableName : tableNames) { + hiveTables.add(getTable(dbName, tableName)); + } + + return hiveTables; + } + + @Override + public List getTables( + String dbname, String tablePattern) throws MetaException, TException, UnknownDBException { + return glueMetastoreClientDelegate.getTables(dbname, tablePattern); + } + + @Override + public List getTables(String dbname, String tablePattern, TableType tableType) + throws MetaException, TException, UnknownDBException { + return glueMetastoreClientDelegate.getTables(dbname, tablePattern, tableType); + } + + @Override + public List getTableMeta(String dbPatterns, String tablePatterns, List tableTypes) + throws MetaException, TException, UnknownDBException { + return glueMetastoreClientDelegate.getTableMeta(dbPatterns, tablePatterns, tableTypes); + } + + @Override + public ValidTxnList getValidTxns() throws TException { + return glueMetastoreClientDelegate.getValidTxns(); + } + + @Override + public ValidTxnList getValidTxns(long currentTxn) throws TException { + return glueMetastoreClientDelegate.getValidTxns(currentTxn); + } + + @Override + public org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet get_privilege_set( + HiveObjectRef obj, + String user, List groups + ) throws MetaException, TException { + return glueMetastoreClientDelegate.getPrivilegeSet(obj, user, groups); + } + + @Override + public boolean grant_privileges(org.apache.hadoop.hive.metastore.api.PrivilegeBag privileges) + throws MetaException, TException { + return glueMetastoreClientDelegate.grantPrivileges(privileges); + } + + @Override + public boolean revoke_privileges( + org.apache.hadoop.hive.metastore.api.PrivilegeBag privileges, + boolean grantOption + ) throws MetaException, TException { + return glueMetastoreClientDelegate.revokePrivileges(privileges, grantOption); + } + + @Override + public void heartbeat(long txnId, long lockId) + throws NoSuchLockException, NoSuchTxnException, TxnAbortedException, TException { + glueMetastoreClientDelegate.heartbeat(txnId, lockId); + } + + @Override + public HeartbeatTxnRangeResponse heartbeatTxnRange(long min, long max) throws TException { + return glueMetastoreClientDelegate.heartbeatTxnRange(min, max); + } + + @Override + public boolean isCompatibleWith(HiveConf hiveConf) { + if (currentMetaVars == null) { + return false; // recreate + } + boolean compatible = true; + for (ConfVars oneVar : HiveConf.metaVars) { + // Since metaVars are all of different types, use string for comparison + String oldVar = currentMetaVars.get(oneVar.varname); + String newVar = hiveConf.get(oneVar.varname, ""); + if (oldVar == null || + (oneVar.isCaseSensitive() ? !oldVar.equals(newVar) : !oldVar.equalsIgnoreCase(newVar))) { + LOG.info("Mestastore configuration " + oneVar.varname + + " changed from " + oldVar + " to " + newVar); + compatible = false; + } + } + return compatible; + } + + @Override + public void setHiveAddedJars(String addedJars) { + // taken from HiveMetaStoreClient + HiveConf.setVar(conf, ConfVars.HIVEADDEDJARS, addedJars); + } + + @Override + public boolean isLocalMetaStore() { + return false; + } + + private void snapshotActiveConf() { + currentMetaVars = new HashMap(HiveConf.metaVars.length); + for (ConfVars oneVar : HiveConf.metaVars) { + currentMetaVars.put(oneVar.varname, conf.get(oneVar.varname, "")); + } + } + + @Override + public boolean isPartitionMarkedForEvent( + String dbName, String tblName, Map partKVs, PartitionEventType eventType) + throws MetaException, NoSuchObjectException, TException, UnknownTableException, UnknownDBException, + UnknownPartitionException, InvalidPartitionException { + return glueMetastoreClientDelegate.isPartitionMarkedForEvent(dbName, tblName, partKVs, eventType); + } + + @Override + public List listIndexNames(String db_name, String tbl_name, short max) throws MetaException, TException { + // In current hive implementation, it ignores fields "max" + // https://github.com/apache/hive/blob/rel/release-2.3.0/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java#L3902-L3932 + List indexes = listIndexes(db_name, tbl_name, max); + List indexNames = Lists.newArrayList(); + for (Index index : indexes) { + indexNames.add(index.getIndexName()); + } + + return indexNames; + } + + @Override + public List listIndexes( + String db_name, String tbl_name, short max) throws NoSuchObjectException, MetaException, TException { + // In current hive implementation, it ignores fields "max" + // https://github.com/apache/hive/blob/rel/release-2.3.0/ + // metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java#L3867-L3899 + return glueMetastoreClientDelegate.listIndexes(db_name, tbl_name); + } + + @Override + public List listPartitionNames(String dbName, String tblName, short max) + throws MetaException, TException { + try { + return listPartitionNames(dbName, tblName, null, max); + } catch (NoSuchObjectException e) { + // For compatibility with Hive 1.0.0 + return Collections.emptyList(); + } + } + + @Override + public List listPartitionNames(String databaseName, String tableName, + List values, short max) + throws MetaException, TException, NoSuchObjectException { + return glueMetastoreClientDelegate.listPartitionNames(databaseName, tableName, values, max); + } + + @Override + public int getNumPartitionsByFilter(String dbName, String tableName, String filter) + throws MetaException, NoSuchObjectException, TException { + return glueMetastoreClientDelegate.getNumPartitionsByFilter(dbName, tableName, filter); + } + + @Override + public PartitionSpecProxy listPartitionSpecs(String dbName, String tblName, int max) throws TException { + return glueMetastoreClientDelegate.listPartitionSpecs(dbName, tblName, max); + } + + @Override + public PartitionSpecProxy listPartitionSpecsByFilter(String dbName, String tblName, String filter, int max) + throws MetaException, NoSuchObjectException, TException { + return glueMetastoreClientDelegate.listPartitionSpecsByFilter(dbName, tblName, filter, max); + } + + @Override + public List listPartitions(String dbName, String tblName, short max) + throws NoSuchObjectException, MetaException, TException { + return listPartitions(dbName, tblName, null, max); + } + + @Override + public List listPartitions( + String databaseName, + String tableName, + List values, + short max + ) throws NoSuchObjectException, MetaException, TException { + String expression = null; + if (values != null) { + org.apache.hadoop.hive.metastore.api.Table table = getTable(databaseName, tableName); + expression = ExpressionHelper.buildExpressionFromPartialSpecification(table, values); + } + return glueMetastoreClientDelegate.getPartitions(databaseName, tableName, expression, (long) max); + } + + @Override + public boolean listPartitionsByExpr( + String databaseName, + String tableName, + byte[] expr, + String defaultPartitionName, + short max, + List result + ) throws TException { + throw new UnsupportedOperationException("listPartitionsByExpr is not supported"); + } + + @Override + public List listPartitionsByFilter( + String databaseName, + String tableName, + String filterInput, + short max + ) throws MetaException, NoSuchObjectException, TException { + String filter = filterInput; + // we need to replace double quotes with single quotes in the filter expression + // since server side does not accept double quote expressions. + if (StringUtils.isNotBlank(filter)) { + filter = ExpressionHelper.replaceDoubleQuoteWithSingleQuotes(filter); + } + return glueMetastoreClientDelegate.getPartitions(databaseName, tableName, filter, (long) max); + } + + @Override + public List listPartitionsWithAuthInfo( + String database, String table, short maxParts, + String user, List groups) + throws MetaException, TException, NoSuchObjectException { + List partitions = listPartitions(database, table, maxParts); + + for (org.apache.hadoop.hive.metastore.api.Partition p : partitions) { + HiveObjectRef obj = new HiveObjectRef(); + obj.setObjectType(HiveObjectType.PARTITION); + obj.setDbName(database); + obj.setObjectName(table); + obj.setPartValues(p.getValues()); + org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet set = this.get_privilege_set(obj, user, groups); + p.setPrivileges(set); + } + + return partitions; + } + + @Override + public List listPartitionsWithAuthInfo( + String database, String table, + List partVals, short maxParts, + String user, List groups) throws MetaException, TException, NoSuchObjectException { + List partitions = + listPartitions(database, table, partVals, maxParts); + + for (org.apache.hadoop.hive.metastore.api.Partition p : partitions) { + HiveObjectRef obj = new HiveObjectRef(); + obj.setObjectType(HiveObjectType.PARTITION); + obj.setDbName(database); + obj.setObjectName(table); + obj.setPartValues(p.getValues()); + org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet set; + try { + set = get_privilege_set(obj, user, groups); + } catch (MetaException e) { + LOG.info("No privileges found for user: {}, groups: [{}]", + user, LoggingHelper.concatCollectionToStringForLogging(groups, ","), e); + set = new org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet(); + } + p.setPrivileges(set); + } + + return partitions; + } + + @Override + public List listTableNamesByFilter(String dbName, String filter, short maxTables) throws MetaException, + TException, InvalidOperationException, UnknownDBException { + return glueMetastoreClientDelegate.listTableNamesByFilter(dbName, filter, maxTables); + } + + @Override + public List list_privileges( + String principal, + org.apache.hadoop.hive.metastore.api.PrincipalType principalType, + HiveObjectRef objectRef + ) throws MetaException, TException { + return glueMetastoreClientDelegate.listPrivileges(principal, principalType, objectRef); + } + + @Override + public LockResponse lock(LockRequest lockRequest) throws NoSuchTxnException, TxnAbortedException, TException { + return glueMetastoreClientDelegate.lock(lockRequest); + } + + @Override + public void markPartitionForEvent( + String dbName, + String tblName, + Map partKVs, + PartitionEventType eventType + ) throws MetaException, NoSuchObjectException, TException, UnknownTableException, UnknownDBException, + UnknownPartitionException, InvalidPartitionException { + glueMetastoreClientDelegate.markPartitionForEvent(dbName, tblName, partKVs, eventType); + } + + @Override + public long openTxn(String user) throws TException { + return glueMetastoreClientDelegate.openTxn(user); + } + + @Override + public OpenTxnsResponse openTxns(String user, int numTxns) throws TException { + return glueMetastoreClientDelegate.openTxns(user, numTxns); + } + + @Override + public Map partitionNameToSpec(String name) throws MetaException, TException { + // Lifted from HiveMetaStore + if (name.length() == 0) { + return new HashMap(); + } + return Warehouse.makeSpecFromName(name); + } + + @Override + public List partitionNameToVals(String name) throws MetaException, TException { + return glueMetastoreClientDelegate.partitionNameToVals(name); + } + + @Override + public void reconnect() throws MetaException { + // TODO reset active Hive confs for metastore glueClient + LOG.debug("reconnect() was called."); + } + + @Override + public void renamePartition(String dbName, String tblName, List partitionValues, + org.apache.hadoop.hive.metastore.api.Partition newPartition) + throws InvalidOperationException, MetaException, TException { + + // Set DDL time to now if not specified + setDDLTime(newPartition); + org.apache.hadoop.hive.metastore.api.Table tbl; + org.apache.hadoop.hive.metastore.api.Partition oldPart; + + try { + tbl = getTable(dbName, tblName); + oldPart = getPartition(dbName, tblName, partitionValues); + } catch (NoSuchObjectException e) { + throw new InvalidOperationException(e.getMessage()); + } + + if (newPartition.getSd() == null || oldPart.getSd() == null) { + throw new InvalidOperationException("Storage descriptor cannot be null"); + } + + // if an external partition is renamed, the location should not change + if (!Strings.isNullOrEmpty(tbl.getTableType()) && tbl.getTableType().equals(TableType.EXTERNAL_TABLE.toString())) { + newPartition.getSd().setLocation(oldPart.getSd().getLocation()); + renamePartitionInCatalog(dbName, tblName, partitionValues, newPartition); + } else { + + Path destPath = getDestinationPathForRename(dbName, tbl, newPartition); + Path srcPath = new Path(oldPart.getSd().getLocation()); + FileSystem srcFs = wh.getFs(srcPath); + FileSystem destFs = wh.getFs(destPath); + + verifyDestinationLocation(srcFs, destFs, srcPath, destPath, tbl, newPartition); + newPartition.getSd().setLocation(destPath.toString()); + + renamePartitionInCatalog(dbName, tblName, partitionValues, newPartition); + boolean success = true; + try { + if (srcFs.exists(srcPath)) { + // if destPath's parent path doesn't exist, we should mkdir it + Path destParentPath = destPath.getParent(); + if (!wh.mkdirs(destParentPath, true)) { + throw new IOException("Unable to create path " + destParentPath); + } + wh.renameDir(srcPath, destPath, true); + } + } catch (IOException e) { + success = false; + throw new InvalidOperationException("Unable to access old location " + + srcPath + " for partition " + tbl.getDbName() + "." + + tbl.getTableName() + " " + partitionValues); + } finally { + if (!success) { + // revert metastore operation + renamePartitionInCatalog(dbName, tblName, newPartition.getValues(), oldPart); + } + } + } + } + + private void verifyDestinationLocation( + FileSystem srcFs, + FileSystem destFs, + Path srcPath, + Path destPath, + org.apache.hadoop.hive.metastore.api.Table tbl, + org.apache.hadoop.hive.metastore.api.Partition newPartition) + throws InvalidOperationException { + String oldPartLoc = srcPath.toString(); + String newPartLoc = destPath.toString(); + + // check that src and dest are on the same file system + if (!FileUtils.equalsFileSystem(srcFs, destFs)) { + throw new InvalidOperationException("table new location " + destPath + + " is on a different file system than the old location " + + srcPath + ". This operation is not supported"); + } + try { + srcFs.exists(srcPath); // check that src exists and also checks + if (newPartLoc.compareTo(oldPartLoc) != 0 && destFs.exists(destPath)) { + throw new InvalidOperationException("New location for this partition " + + tbl.getDbName() + "." + tbl.getTableName() + "." + newPartition.getValues() + + " already exists : " + destPath); + } + } catch (IOException e) { + throw new InvalidOperationException("Unable to access new location " + + destPath + " for partition " + tbl.getDbName() + "." + + tbl.getTableName() + " " + newPartition.getValues()); + } + } + + private Path getDestinationPathForRename( + String dbName, + org.apache.hadoop.hive.metastore.api.Table tbl, + org.apache.hadoop.hive.metastore.api.Partition newPartition) + throws InvalidOperationException, MetaException, TException { + try { + Path destPath = new Path(hiveShims.getDefaultTablePath(getDatabase(dbName), tbl.getTableName(), wh), + Warehouse.makePartName(tbl.getPartitionKeys(), newPartition.getValues())); + return constructRenamedPath(destPath, new Path(newPartition.getSd().getLocation())); + } catch (NoSuchObjectException e) { + throw new InvalidOperationException( + "Unable to change partition or table. Database " + dbName + " does not exist" + + " Check metastore logs for detailed stack." + e.getMessage()); + } + } + + private void setDDLTime(org.apache.hadoop.hive.metastore.api.Partition partition) { + if (partition.getParameters() == null || + partition.getParameters().get(hive_metastoreConstants.DDL_TIME) == null || + Integer.parseInt(partition.getParameters().get(hive_metastoreConstants.DDL_TIME)) == 0) { + partition.putToParameters(hive_metastoreConstants.DDL_TIME, Long.toString(System + .currentTimeMillis() / 1000)); + } + } + + private void renamePartitionInCatalog( + String databaseName, String tableName, + List partitionValues, org.apache.hadoop.hive.metastore.api.Partition newPartition) + throws InvalidOperationException, MetaException, TException { + try { + glueClient.updatePartition( + new UpdatePartitionRequest() + .withDatabaseName(databaseName) + .withTableName(tableName) + .withPartitionValueList(partitionValues) + .withPartitionInput(GlueInputConverter.convertToPartitionInput(newPartition))); + } catch (AmazonServiceException e) { + throw CatalogToHiveConverter.wrapInHiveException(e); + } + } + + @Override + public long renewDelegationToken(String tokenStrForm) throws MetaException, TException { + return glueMetastoreClientDelegate.renewDelegationToken(tokenStrForm); + } + + @Override + public void rollbackTxn(long txnId) throws NoSuchTxnException, TException { + glueMetastoreClientDelegate.rollbackTxn(txnId); + } + + @Override + public void setMetaConf(String key, String value) throws MetaException, TException { + ConfVars confVar = HiveConf.getMetaConf(key); + if (confVar == null) { + throw new MetaException("Invalid configuration key " + key); + } + String validate = confVar.validate(value); + if (validate != null) { + throw new MetaException("Invalid configuration value " + value + " for key " + key + " by " + validate); + } + conf.set(key, value); + } + + @Override + public boolean setPartitionColumnStatistics(org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest request) + throws NoSuchObjectException, InvalidObjectException, + MetaException, TException, org.apache.hadoop.hive.metastore.api.InvalidInputException { + return glueMetastoreClientDelegate.setPartitionColumnStatistics(request); + } + + @Override + public void flushCache() { + // no op + } + + @Override + public Iterable> getFileMetadata(List fileIds) throws TException { + return glueMetastoreClientDelegate.getFileMetadata(fileIds); + } + + @Override + public Iterable> getFileMetadataBySarg( + List fileIds, + ByteBuffer sarg, + boolean doGetFooters + ) throws TException { + return glueMetastoreClientDelegate.getFileMetadataBySarg(fileIds, sarg, doGetFooters); + } + + @Override + public void clearFileMetadata(List fileIds) throws TException { + glueMetastoreClientDelegate.clearFileMetadata(fileIds); + } + + @Override + public void putFileMetadata(List fileIds, List metadata) throws TException { + glueMetastoreClientDelegate.putFileMetadata(fileIds, metadata); + } + + @Override + public boolean isSameConfObj(HiveConf hiveConf) { + // taken from HiveMetaStoreClient + return this.conf == hiveConf; + } + + @Override + public boolean cacheFileMetadata(String dbName, String tblName, String partName, boolean allParts) throws TException { + return glueMetastoreClientDelegate.cacheFileMetadata(dbName, tblName, partName, allParts); + } + + @Override + public List getPrimaryKeys(PrimaryKeysRequest primaryKeysRequest) + throws MetaException, NoSuchObjectException, TException { + // PrimaryKeys are currently unsupported + // return null to allow DESCRIBE (FORMATTED | EXTENDED) + return null; + } + + @Override + public List getForeignKeys(ForeignKeysRequest foreignKeysRequest) + throws MetaException, NoSuchObjectException, TException { + // PrimaryKeys are currently unsupported + // return null to allow DESCRIBE (FORMATTED | EXTENDED) + return null; + } + + @Override + public void createTableWithConstraints( + org.apache.hadoop.hive.metastore.api.Table table, + List primaryKeys, + List foreignKeys + ) throws AlreadyExistsException, InvalidObjectException, MetaException, NoSuchObjectException, TException { + glueMetastoreClientDelegate.createTableWithConstraints(table, primaryKeys, foreignKeys); + } + + @Override + public void dropConstraint( + String dbName, + String tblName, + String constraintName + ) throws MetaException, NoSuchObjectException, TException { + glueMetastoreClientDelegate.dropConstraint(dbName, tblName, constraintName); + } + + @Override + public void addPrimaryKey(List primaryKeyCols) + throws MetaException, NoSuchObjectException, TException { + glueMetastoreClientDelegate.addPrimaryKey(primaryKeyCols); + } + + @Override + public void addForeignKey(List foreignKeyCols) + throws MetaException, NoSuchObjectException, TException { + glueMetastoreClientDelegate.addForeignKey(foreignKeyCols); + } + + @Override + public ShowCompactResponse showCompactions() throws TException { + return glueMetastoreClientDelegate.showCompactions(); + } + + @Override + public void addDynamicPartitions( + long txnId, + String dbName, + String tblName, + List partNames + ) throws TException { + glueMetastoreClientDelegate.addDynamicPartitions(txnId, dbName, tblName, partNames); + } + + @Override + public void addDynamicPartitions( + long txnId, + String dbName, + String tblName, + List partNames, + DataOperationType operationType + ) throws TException { + glueMetastoreClientDelegate.addDynamicPartitions(txnId, dbName, tblName, partNames, operationType); + } + + @Override + public void insertTable(org.apache.hadoop.hive.metastore.api.Table table, boolean overwrite) throws MetaException { + glueMetastoreClientDelegate.insertTable(table, overwrite); + } + + @Override + public NotificationEventResponse getNextNotification( + long lastEventId, int maxEvents, NotificationFilter notificationFilter) throws TException { + return glueMetastoreClientDelegate.getNextNotification(lastEventId, maxEvents, notificationFilter); + } + + @Override + public CurrentNotificationEventId getCurrentNotificationEventId() throws TException { + return glueMetastoreClientDelegate.getCurrentNotificationEventId(); + } + + @Override + public FireEventResponse fireListenerEvent(FireEventRequest fireEventRequest) throws TException { + return glueMetastoreClientDelegate.fireListenerEvent(fireEventRequest); + } + + @Override + public ShowLocksResponse showLocks() throws TException { + return glueMetastoreClientDelegate.showLocks(); + } + + @Override + public ShowLocksResponse showLocks(ShowLocksRequest showLocksRequest) throws TException { + return glueMetastoreClientDelegate.showLocks(showLocksRequest); + } + + @Override + public GetOpenTxnsInfoResponse showTxns() throws TException { + return glueMetastoreClientDelegate.showTxns(); + } + + @Deprecated + public boolean tableExists(String tableName) throws MetaException, TException, UnknownDBException { + // this method has been deprecated; + return tableExists(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); + } + + @Override + public boolean tableExists(String databaseName, String tableName) throws MetaException, TException, + UnknownDBException { + return glueMetastoreClientDelegate.tableExists(databaseName, tableName); + } + + @Override + public void unlock(long lockId) throws NoSuchLockException, TxnOpenException, TException { + glueMetastoreClientDelegate.unlock(lockId); + } + + @Override + public boolean updatePartitionColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics columnStatistics) + throws NoSuchObjectException, InvalidObjectException, MetaException, TException, + org.apache.hadoop.hive.metastore.api.InvalidInputException { + return glueMetastoreClientDelegate.updatePartitionColumnStatistics(columnStatistics); + } + + @Override + public boolean updateTableColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics columnStatistics) + throws NoSuchObjectException, InvalidObjectException, MetaException, TException, + org.apache.hadoop.hive.metastore.api.InvalidInputException { + return glueMetastoreClientDelegate.updateTableColumnStatistics(columnStatistics); + } + + @Override + public void validatePartitionNameCharacters(List part_vals) throws TException, MetaException { + try { + String partitionValidationRegex = conf.getVar(ConfVars.METASTORE_PARTITION_NAME_WHITELIST_PATTERN); + Pattern partitionValidationPattern = Strings.isNullOrEmpty(partitionValidationRegex) ? null + : Pattern.compile(partitionValidationRegex); + MetaStoreUtils.validatePartitionNameCharacters(part_vals, partitionValidationPattern); + } catch (Exception e) { + if (e instanceof MetaException) { + throw (MetaException) e; + } else { + throw new MetaException(e.getMessage()); + } + } + } + + private Path constructRenamedPath(Path defaultNewPath, Path currentPath) { + URI currentUri = currentPath.toUri(); + + return new Path(currentUri.getScheme(), currentUri.getAuthority(), + defaultNewPath.toUri().getPath()); + } + +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/SessionCredentialsProviderFactory.java b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/SessionCredentialsProviderFactory.java new file mode 100644 index 000000000000..6f4f608af12e --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/metastore/SessionCredentialsProviderFactory.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.metastore; + +import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.auth.AWSSessionCredentials; +import com.amazonaws.auth.BasicSessionCredentials; +import com.amazonaws.internal.StaticCredentialsProvider; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +public class SessionCredentialsProviderFactory implements AWSCredentialsProviderFactory { + + public static final String AWS_ACCESS_KEY_CONF_VAR = "hive.aws_session_access_id"; + public static final String AWS_SECRET_KEY_CONF_VAR = "hive.aws_session_secret_key"; + public static final String AWS_SESSION_TOKEN_CONF_VAR = "hive.aws_session_token"; + + @Override + public AWSCredentialsProvider buildAWSCredentialsProvider(HiveConf hiveConf) { + + Preconditions.checkArgument(hiveConf != null, "hiveConf cannot be null."); + + String accessKey = hiveConf.get(AWS_ACCESS_KEY_CONF_VAR); + String secretKey = hiveConf.get(AWS_SECRET_KEY_CONF_VAR); + String sessionToken = hiveConf.get(AWS_SESSION_TOKEN_CONF_VAR); + + Preconditions.checkArgument(accessKey != null, AWS_ACCESS_KEY_CONF_VAR + " must be set."); + Preconditions.checkArgument(secretKey != null, AWS_SECRET_KEY_CONF_VAR + " must be set."); + Preconditions.checkArgument(sessionToken != null, AWS_SESSION_TOKEN_CONF_VAR + " must be set."); + + AWSSessionCredentials credentials = new BasicSessionCredentials(accessKey, secretKey, sessionToken); + + return new StaticCredentialsProvider(credentials); + } +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/shims/AwsGlueHive2Shims.java b/aws/src/main/java/org/apache/iceberg/aws/glue/shims/AwsGlueHive2Shims.java new file mode 100644 index 000000000000..7c46a1c410ea --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/shims/AwsGlueHive2Shims.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.shims; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.EnvironmentContext; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Table; + +final class AwsGlueHive2Shims implements AwsGlueHiveShims { + + private static final String HIVE_2_VERSION = "2."; + + static boolean supportsVersion(String version) { + return version.startsWith(HIVE_2_VERSION); + } + + @Override + public Path getDefaultTablePath(Database db, String tableName, Warehouse warehouse) throws MetaException { + return warehouse.getDefaultTablePath(db, tableName); + } + + @Override + public boolean validateTableName(String name, Configuration conf) { + return MetaStoreUtils.validateName(name, conf); + } + + @Override + public boolean requireCalStats( + Configuration conf, + Partition oldPart, + Partition newPart, + Table tbl, + EnvironmentContext environmentContext) { + return MetaStoreUtils.requireCalStats(conf, oldPart, newPart, tbl, environmentContext); + } + + @Override + public boolean updateTableStatsFast( + Database db, + Table tbl, + Warehouse wh, + boolean madeDir, + boolean forceRecompute, + EnvironmentContext environmentContext + ) throws MetaException { + return MetaStoreUtils.updateTableStatsFast(db, tbl, wh, madeDir, forceRecompute, environmentContext); + } + +} + diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/shims/AwsGlueHiveShims.java b/aws/src/main/java/org/apache/iceberg/aws/glue/shims/AwsGlueHiveShims.java new file mode 100644 index 000000000000..f1de7c979cb7 --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/shims/AwsGlueHiveShims.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.shims; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.EnvironmentContext; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Table; + +public interface AwsGlueHiveShims { + + Path getDefaultTablePath(Database db, String tableName, Warehouse warehouse) + throws MetaException; + + boolean validateTableName(String name, Configuration conf); + + boolean requireCalStats( + Configuration conf, Partition oldPart, Partition newPart, Table tbl, EnvironmentContext environmentContext); + + boolean updateTableStatsFast( + Database db, Table tbl, Warehouse wh, boolean madeDir, + boolean forceRecompute, EnvironmentContext environmentContext) + throws MetaException; +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/shims/ShimsLoader.java b/aws/src/main/java/org/apache/iceberg/aws/glue/shims/ShimsLoader.java new file mode 100644 index 000000000000..6fd52ac48ccd --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/shims/ShimsLoader.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.shims; + +import org.apache.hive.common.util.HiveVersionInfo; +import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; + +public final class ShimsLoader { + + private static AwsGlueHiveShims hiveShims; + + private ShimsLoader() { + } + + public static synchronized AwsGlueHiveShims getHiveShims() { + if (hiveShims == null) { + hiveShims = loadHiveShims(); + } + return hiveShims; + } + + private static AwsGlueHiveShims loadHiveShims() { + String hiveVersion = HiveVersionInfo.getShortVersion(); + if (AwsGlueHive2Shims.supportsVersion(hiveVersion)) { + try { + return AwsGlueHive2Shims.class.newInstance(); + } catch (InstantiationException | IllegalAccessException e) { + throw new RuntimeException("unable to get instance of Hive 2.x shim class"); + } + } else { + throw new RuntimeException("Shim class for Hive version " + hiveVersion + " does not exist"); + } + } + + @VisibleForTesting + static synchronized void clearShimClass() { + hiveShims = null; + } + +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/util/AWSGlueConfig.java b/aws/src/main/java/org/apache/iceberg/aws/glue/util/AWSGlueConfig.java new file mode 100644 index 000000000000..241f1d512c29 --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/util/AWSGlueConfig.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.util; + +import com.amazonaws.ClientConfiguration; + +public final class AWSGlueConfig { + + private AWSGlueConfig() { + } + + public static final String AWS_GLUE_ENDPOINT = "aws.glue.endpoint"; + public static final String AWS_REGION = "aws.region"; + public static final String AWS_CATALOG_CREDENTIALS_PROVIDER_FACTORY_CLASS + = "aws.catalog.credentials.provider.factory.class"; + + public static final String AWS_GLUE_MAX_RETRY = "aws.glue.max-error-retries"; + public static final int DEFAULT_MAX_RETRY = 5; + + public static final String AWS_GLUE_MAX_CONNECTIONS = "aws.glue.max-connections"; + public static final int DEFAULT_MAX_CONNECTIONS = ClientConfiguration.DEFAULT_MAX_CONNECTIONS; + + public static final String AWS_GLUE_CONNECTION_TIMEOUT = "aws.glue.connection-timeout"; + public static final int DEFAULT_CONNECTION_TIMEOUT = ClientConfiguration.DEFAULT_CONNECTION_TIMEOUT; + + public static final String AWS_GLUE_SOCKET_TIMEOUT = "aws.glue.socket-timeout"; + public static final int DEFAULT_SOCKET_TIMEOUT = ClientConfiguration.DEFAULT_SOCKET_TIMEOUT; + + public static final String AWS_GLUE_DB_CACHE_ENABLE = "aws.glue.cache.db.enable"; + public static final String AWS_GLUE_DB_CACHE_SIZE = "aws.glue.cache.db.size"; + public static final String AWS_GLUE_DB_CACHE_TTL_MINS = "aws.glue.cache.db.ttl-mins"; + + public static final String AWS_GLUE_TABLE_CACHE_ENABLE = "aws.glue.cache.table.enable"; + public static final String AWS_GLUE_TABLE_CACHE_SIZE = "aws.glue.cache.table.size"; + public static final String AWS_GLUE_TABLE_CACHE_TTL_MINS = "aws.glue.cache.table.ttl-mins"; + + public static final String AWS_GLUE_LOCK_RELEASE_RETRY_MAX = "aws.glue.lock.release-retry-max"; + public static final int AWS_GLUE_LOCK_RELEASE_RETRY_MAX_DEFAULT = 10; + + public static final String AWS_GLUE_LOCK_WAIT_INTERVAL_MILLIS = "aws.glue.lock.wait-interval-millis"; + public static final long AWS_GLUE_LOCK_WAIT_INTERVAL_MILLIS_DEFAULT = 2000; + + public static final String AWS_GLUE_LOCK_TIMEOUT_MILLIS = "aws.glue.lock.timeout-millis"; + public static final long AWS_GLUE_LOCK_TIMEOUT_MILLIS_DEFAULT = 300000; // same as hive.txn.timeout + + public static final String AWS_GLUE_LOCK_REQUEST_DYNAMO_TABLE_NAME = "aws.glue.lock.dynamo.request-table-name"; + public static final String AWS_GLUE_LOCK_REQUEST_DYNAMO_TABLE_NAME_DEFAULT = "IcebergGlueMetastoreLockRequests"; + + public static final String AWS_GLUE_LOCK_COMPONENT_DYNAMO_TABLE_NAME = "aws.glue.lock.dynamo.component-table-name"; + public static final String AWS_GLUE_LOCK_COMPONENT_DYNAMO_TABLE_NAME_DEFAULT = "IcebergGlueMetastoreLockComponents"; + + public static final String AWS_GLUE_LOCK_DYNAMO_INITIALIZE_TABLES = "aws.glue.lock.dynamo.initialize-tables"; + public static final boolean AWS_GLUE_LOCK_DYNAMO_INITIALIZE_TABLES_DEFAULT = true; +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/util/BatchCreatePartitionsHelper.java b/aws/src/main/java/org/apache/iceberg/aws/glue/util/BatchCreatePartitionsHelper.java new file mode 100644 index 000000000000..1fb89e4a021c --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/util/BatchCreatePartitionsHelper.java @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.util; + +import com.amazonaws.services.glue.model.EntityNotFoundException; +import com.amazonaws.services.glue.model.Partition; +import com.amazonaws.services.glue.model.PartitionError; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; +import org.apache.iceberg.aws.glue.converters.CatalogToHiveConverter; +import org.apache.iceberg.aws.glue.converters.GlueInputConverter; +import org.apache.iceberg.aws.glue.metastore.AWSGlueMetastore; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@SuppressWarnings("Slf4jConstantLogMessage") +public final class BatchCreatePartitionsHelper { + + private static final Logger LOG = LoggerFactory.getLogger(BatchCreatePartitionsHelper.class); + + private final AWSGlueMetastore glueClient; + private final String databaseName; + private final String tableName; + private final List partitions; + private final boolean ifNotExists; + private Map partitionMap; + private List partitionsFailed; + private TException firstTException; + private String catalogId; + + public BatchCreatePartitionsHelper( + AWSGlueMetastore glueClient, + String databaseName, + String tableName, + String catalogId, + List partitions, + boolean ifNotExists) { + this.glueClient = glueClient; + this.databaseName = databaseName; + this.tableName = tableName; + this.catalogId = catalogId; + this.partitions = partitions; + this.ifNotExists = ifNotExists; + } + + public BatchCreatePartitionsHelper createPartitions() { + partitionMap = PartitionUtils.buildPartitionMap(partitions); + partitionsFailed = Lists.newArrayList(); + + try { + List result = + glueClient.createPartitions(databaseName, tableName, + GlueInputConverter.convertToPartitionInputs(partitionMap.values())); + processResult(result); + } catch (Exception e) { + LOG.error("Exception thrown while creating partitions in DataCatalog: ", e); + firstTException = CatalogToHiveConverter.wrapInHiveException(e); + if (PartitionUtils.isInvalidUserInputException(e)) { + setAllFailed(); + } else { + checkIfPartitionsCreated(); + } + } + return this; + } + + private void setAllFailed() { + partitionsFailed = partitions; + partitionMap.clear(); + } + + private void processResult(List partitionErrors) { + if (partitionErrors == null || partitionErrors.isEmpty()) { + return; + } + + LOG.error("BatchCreatePartitions failed to create {} out of {} partitions.", + partitionErrors.size(), partitionMap.size()); + + for (PartitionError partitionError : partitionErrors) { + Partition partitionFailed = partitionMap.remove(new PartitionKey(partitionError.getPartitionValues())); + + TException exception = CatalogToHiveConverter.errorDetailToHiveException(partitionError.getErrorDetail()); + if (ifNotExists && exception instanceof AlreadyExistsException) { + // AlreadyExistsException is allowed, so we shouldn't add the partition to partitionsFailed list + continue; + } + LOG.error("encountered partition error", exception); + if (firstTException == null) { + firstTException = exception; + } + partitionsFailed.add(partitionFailed); + } + } + + private void checkIfPartitionsCreated() { + for (Partition partition : partitions) { + if (!partitionExists(partition)) { + partitionsFailed.add(partition); + partitionMap.remove(new PartitionKey(partition)); + } + } + } + + private boolean partitionExists(Partition partition) { + try { + Partition partitionReturned = glueClient.getPartition(databaseName, tableName, partition.getValues()); + return partitionReturned != null; // probably always true here + } catch (EntityNotFoundException e) { + // here we assume namespace and table exist. It is assured by calling "isInvalidUserInputException" method above + return false; + } catch (Exception e) { + String message = String.format("Get partition request %s failed.", + StringUtils.join(partition.getValues(), "/")); + LOG.error(message, e); + // partition status unknown, we assume that the partition was not created + return false; + } + } + + public TException getFirstTException() { + return firstTException; + } + + public Collection getPartitionsCreated() { + return partitionMap.values(); + } + + public List getPartitionsFailed() { + return partitionsFailed; + } + +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/util/BatchDeletePartitionsHelper.java b/aws/src/main/java/org/apache/iceberg/aws/glue/util/BatchDeletePartitionsHelper.java new file mode 100644 index 000000000000..2436abecaa2e --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/util/BatchDeletePartitionsHelper.java @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.util; + +import com.amazonaws.services.glue.AWSGlue; +import com.amazonaws.services.glue.model.BatchDeletePartitionRequest; +import com.amazonaws.services.glue.model.BatchDeletePartitionResult; +import com.amazonaws.services.glue.model.EntityNotFoundException; +import com.amazonaws.services.glue.model.ErrorDetail; +import com.amazonaws.services.glue.model.GetPartitionRequest; +import com.amazonaws.services.glue.model.GetPartitionResult; +import com.amazonaws.services.glue.model.Partition; +import com.amazonaws.services.glue.model.PartitionError; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import org.apache.iceberg.aws.glue.converters.CatalogToHiveConverter; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public final class BatchDeletePartitionsHelper { + private static final Logger LOG = LoggerFactory.getLogger(BatchDeletePartitionsHelper.class); + + private final AWSGlue client; + private final String namespaceName; + private final String tableName; + private final String catalogId; + private final List partitions; + private Map partitionMap; + private TException firstTException; + + public BatchDeletePartitionsHelper(AWSGlue client, String namespaceName, String tableName, + String catalogId, List partitions) { + this.client = client; + this.namespaceName = namespaceName; + this.tableName = tableName; + this.catalogId = catalogId; + this.partitions = partitions; + } + + public BatchDeletePartitionsHelper deletePartitions() { + partitionMap = PartitionUtils.buildPartitionMap(partitions); + + BatchDeletePartitionRequest request = new BatchDeletePartitionRequest().withDatabaseName(namespaceName) + .withTableName(tableName).withCatalogId(catalogId) + .withPartitionsToDelete(PartitionUtils.getPartitionValuesList(partitionMap)); + + try { + BatchDeletePartitionResult result = client.batchDeletePartition(request); + processResult(result); + } catch (Exception e) { + LOG.error("Exception thrown while deleting partitions in DataCatalog: ", e); + firstTException = CatalogToHiveConverter.wrapInHiveException(e); + if (PartitionUtils.isInvalidUserInputException(e)) { + setAllFailed(); + } else { + checkIfPartitionsDeleted(); + } + } + return this; + } + + private void setAllFailed() { + partitionMap.clear(); + } + + private void processResult(final BatchDeletePartitionResult batchDeletePartitionsResult) { + List partitionErrors = batchDeletePartitionsResult.getErrors(); + if (partitionErrors == null || partitionErrors.isEmpty()) { + return; + } + + LOG.error("BatchDeletePartitions failed to delete {} out of {} partitions.", + partitionErrors.size(), partitionMap.size()); + + for (PartitionError partitionError : partitionErrors) { + partitionMap.remove(new PartitionKey(partitionError.getPartitionValues())); + ErrorDetail errorDetail = partitionError.getErrorDetail(); + LOG.error("partition error {}", partitionError); + if (firstTException == null) { + firstTException = CatalogToHiveConverter.errorDetailToHiveException(errorDetail); + } + } + } + + private void checkIfPartitionsDeleted() { + for (Partition partition : partitions) { + if (!partitionDeleted(partition)) { + partitionMap.remove(new PartitionKey(partition)); + } + } + } + + private boolean partitionDeleted(Partition partition) { + GetPartitionRequest request = new GetPartitionRequest() + .withDatabaseName(partition.getDatabaseName()) + .withTableName(partition.getTableName()) + .withPartitionValues(partition.getValues()) + .withCatalogId(catalogId); + + try { + GetPartitionResult result = client.getPartition(request); + Partition partitionReturned = result.getPartition(); + return partitionReturned == null; // probably always false + } catch (EntityNotFoundException e) { + // here we assume namespace and table exist. It is assured by calling "isInvalidUserInputException" method above + return true; + } catch (Exception e) { + LOG.error("Get partition request {} failed", request, e); + // Partition status unknown, we assume that the partition was not deleted + return false; + } + } + + public TException getFirstTException() { + return firstTException; + } + + public Collection getPartitionsDeleted() { + return partitionMap.values(); + } + +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/util/ExpressionHelper.java b/aws/src/main/java/org/apache/iceberg/aws/glue/util/ExpressionHelper.java new file mode 100644 index 000000000000..8753df1896cf --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/util/ExpressionHelper.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.util; + +import java.util.LinkedList; +import java.util.List; +import java.util.Set; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.iceberg.relocated.com.google.common.base.Joiner; +import org.apache.iceberg.relocated.com.google.common.base.Strings; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utility methods for constructing the string representation of query expressions used by Catalog service + */ +public final class ExpressionHelper { + + private static final String HIVE_STRING_TYPE_NAME = "string"; + private static final String HIVE_IN_OPERATOR = "IN"; + private static final String HIVE_NOT_IN_OPERATOR = "NOT IN"; + private static final String HIVE_NOT_OPERATOR = "not"; + + // TODO "hook" into Hive logging (hive or hive.metastore) + private static final Logger LOG = LoggerFactory.getLogger(ExpressionHelper.class); + + private static final List QUOTED_TYPES = ImmutableList.of( + "string", "char", "varchar", "date", "datetime", "timestamp"); + private static final Joiner JOINER = Joiner.on(" AND "); + + private ExpressionHelper() { + } + + private static String rewriteExpressionForNotIn(String expressionInput, Set columnsInNotInExpression) { + String expression = expressionInput; + for (String columnName : columnsInNotInExpression) { + if (columnName != null) { + String hiveExpression = getHiveCompatibleNotInExpression(columnName); + hiveExpression = escapeParentheses(hiveExpression); + String catalogExpression = getCatalogCompatibleNotInExpression(columnName); + catalogExpression = escapeParentheses(catalogExpression); + expression = expression.replaceAll(hiveExpression, catalogExpression); + } + } + return expression; + } + + // return "not () IN (" + private static String getHiveCompatibleNotInExpression(String columnName) { + return String.format("%s (%s) %s (", HIVE_NOT_OPERATOR, columnName, HIVE_IN_OPERATOR); + } + + // return "() NOT IN (" + private static String getCatalogCompatibleNotInExpression(String columnName) { + return String.format("(%s) %s (", columnName, HIVE_NOT_IN_OPERATOR); + } + + /* + * Escape the parentheses so that they are considered literally and not as part of regular expression. In the updated + * expression , we need "\\(" as the output. So, the first four '\' generate '\\' and the last two '\' generate a '(' + */ + private static String escapeParentheses(String expressionInput) { + String expression = expressionInput; + expression = expression.replaceAll("\\(", "\\\\\\("); + expression = expression.replaceAll("\\)", "\\\\\\)"); + return expression; + } + + public static String buildExpressionFromPartialSpecification(org.apache.hadoop.hive.metastore.api.Table table, + List partitionValues) throws MetaException { + + List partitionKeys = table.getPartitionKeys(); + + if (partitionValues == null || partitionValues.isEmpty()) { + return null; + } + + if (partitionKeys == null || partitionValues.size() > partitionKeys.size()) { + throw new MetaException("Incorrect number of partition values: " + partitionValues); + } + + partitionKeys = partitionKeys.subList(0, partitionValues.size()); + List predicates = new LinkedList<>(); + for (int i = 0; i < partitionValues.size(); i++) { + if (!Strings.isNullOrEmpty(partitionValues.get(i))) { + predicates.add(buildPredicate(partitionKeys.get(i), partitionValues.get(i))); + } + } + + return JOINER.join(predicates); + } + + private static String buildPredicate(org.apache.hadoop.hive.metastore.api.FieldSchema schema, String value) { + if (isQuotedType(schema.getType())) { + return String.format("(%s='%s')", schema.getName(), escapeSingleQuotes(value)); + } else { + return String.format("(%s=%s)", schema.getName(), value); + } + } + + private static String escapeSingleQuotes(String str) { + return str.replaceAll("'", "\\\\'"); + } + + private static boolean isQuotedType(String type) { + return QUOTED_TYPES.contains(type); + } + + public static String replaceDoubleQuoteWithSingleQuotes(String str) { + return str.replaceAll("\"", "\'"); + } + +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/util/HiveTableValidator.java b/aws/src/main/java/org/apache/iceberg/aws/glue/util/HiveTableValidator.java new file mode 100644 index 000000000000..e911abbd5459 --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/util/HiveTableValidator.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.util; + +import com.amazonaws.services.glue.model.InvalidInputException; +import com.amazonaws.services.glue.model.Table; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; + +public enum HiveTableValidator { + + REQUIRED_PROPERTIES_VALIDATOR { + public void validate(Table table) { + String missingProperty = null; + + if (notApplicableTableType(table)) { + return; + } + + if (table.getTableType() == null) { + missingProperty = "TableType"; + } else if (table.getStorageDescriptor() == null) { + missingProperty = "StorageDescriptor"; + } else if (table.getStorageDescriptor().getInputFormat() == null) { + missingProperty = "StorageDescriptor#InputFormat"; + } else if (table.getStorageDescriptor().getOutputFormat() == null) { + missingProperty = "StorageDescriptor#OutputFormat"; + } else if (table.getStorageDescriptor().getSerdeInfo() == null) { + missingProperty = "StorageDescriptor#SerdeInfo"; + } else if (table.getStorageDescriptor().getSerdeInfo().getSerializationLibrary() == null) { + missingProperty = "StorageDescriptor#SerdeInfo#SerializationLibrary"; + } + + if (missingProperty != null) { + throw new InvalidInputException( + String.format("%s cannot be null for table: %s", missingProperty, table.getName())); + } + } + }; + + public abstract void validate(Table table); + + private static boolean notApplicableTableType(Table table) { + if (isNotManagedOrExternalTable(table) || + isStorageHandlerType(table)) { + return true; + } + return false; + } + + private static boolean isNotManagedOrExternalTable(Table table) { + if (table.getTableType() != null && + TableType.valueOf(table.getTableType()) != TableType.MANAGED_TABLE && + TableType.valueOf(table.getTableType()) != TableType.EXTERNAL_TABLE) { + return true; + } + return false; + } + + private static boolean isStorageHandlerType(Table table) { + if (table.getParameters() != null && + table.getParameters().containsKey(hive_metastoreConstants.META_TABLE_STORAGE) && + StringUtils.isNotEmpty(table.getParameters().get(hive_metastoreConstants.META_TABLE_STORAGE))) { + return true; + } + return false; + } +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/util/LoggingHelper.java b/aws/src/main/java/org/apache/iceberg/aws/glue/util/LoggingHelper.java new file mode 100644 index 000000000000..153a90fc661c --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/util/LoggingHelper.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.util; + +import java.util.Collection; + +public class LoggingHelper { + + private static final int MAX_LOG_STRING_LEN = 2000; + + private LoggingHelper() { + } + + public static String concatCollectionToStringForLogging( + Collection collection, String delimiterInput) { + String delimiter = delimiterInput; + if (collection == null) { + return ""; + } + if (delimiter == null) { + delimiter = ","; + } + StringBuilder bldr = new StringBuilder(); + int totalLen = 0; + int delimiterSize = delimiter.length(); + for (String str : collection) { + if (totalLen > MAX_LOG_STRING_LEN) { + break; + } + if (str.length() + totalLen > MAX_LOG_STRING_LEN) { + bldr.append(str.subSequence(0, MAX_LOG_STRING_LEN - totalLen)); + break; + } else { + bldr.append(str); + bldr.append(delimiter); + totalLen += str.length() + delimiterSize; + } + } + return bldr.toString(); + } + +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/util/MetastoreClientUtils.java b/aws/src/main/java/org/apache/iceberg/aws/glue/util/MetastoreClientUtils.java new file mode 100644 index 000000000000..4f48c0901712 --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/util/MetastoreClientUtils.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.util; + +import java.util.Map; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.iceberg.aws.glue.metastore.GlueMetastoreClientDelegate; +import org.apache.iceberg.aws.glue.shims.AwsGlueHiveShims; +import org.apache.iceberg.aws.glue.shims.ShimsLoader; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; + +public final class MetastoreClientUtils { + + private static final AwsGlueHiveShims hiveShims = ShimsLoader.getHiveShims(); + + private MetastoreClientUtils() { + // static util class should not be instantiated + } + + /** + * @return boolean + * true -> if directory was able to be created. + * false -> if directory already exists. + * @throws MetaException if directory could not be created. + */ + public static boolean makeDirs(Warehouse wh, Path path) throws MetaException { + Preconditions.checkNotNull(wh, "Warehouse cannot be null"); + Preconditions.checkNotNull(path, "Path cannot be null"); + + boolean madeDir = false; + if (!wh.isDir(path)) { + if (!wh.mkdirs(path, true)) { + throw new MetaException("Unable to create path: " + path); + } + madeDir = true; + } + return madeDir; + } + + /** + * Taken from HiveMetaStore#create_table_core + * https://github.com/apache/hive/blob/rel/release-2.3.0/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java#L1370-L1383 + */ + public static void validateTableObject(Table table, Configuration conf) throws InvalidObjectException { + Preconditions.checkNotNull(table, "table cannot be null"); + Preconditions.checkNotNull(table.getSd(), "Table#StorageDescriptor cannot be null"); + + if (!hiveShims.validateTableName(table.getTableName(), conf)) { + throw new InvalidObjectException(table.getTableName() + " is not a valid object name"); + } + String validate = MetaStoreUtils.validateTblColumns(table.getSd().getCols()); + if (validate != null) { + throw new InvalidObjectException("Invalid column " + validate); + } + + if (table.getPartitionKeys() != null) { + validate = MetaStoreUtils.validateTblColumns(table.getPartitionKeys()); + if (validate != null) { + throw new InvalidObjectException("Invalid partition column " + validate); + } + } + } + + /** + * Should be used when getting table from Glue that may have been created by + * users manually or through Crawlers. Validates that table contains properties required by Hive/Spark. + * @param table table + */ + public static void validateGlueTable(com.amazonaws.services.glue.model.Table table) { + Preconditions.checkNotNull(table, "table cannot be null"); + + for (HiveTableValidator validator : HiveTableValidator.values()) { + validator.validate(table); + } + } + + public static Map deepCopyMap(Map originalMap) { + Map deepCopy = Maps.newHashMap(); + if (originalMap == null) { + return deepCopy; + } + + for (Map.Entry entry : originalMap.entrySet()) { + deepCopy.put(entry.getKey(), entry.getValue()); + } + return deepCopy; + } + + /** + * Mimics MetaStoreUtils.isExternalTable + * Additional logic: check Table#getTableType to see if isExternalTable + */ + public static boolean isExternalTable(Table table) { + if (table == null) { + return false; + } + + Map params = table.getParameters(); + String paramsExternalStr = params == null ? null : params.get("EXTERNAL"); + if (paramsExternalStr != null) { + return "TRUE".equalsIgnoreCase(paramsExternalStr); + } + + return table.getTableType() != null && TableType.EXTERNAL_TABLE.name().equalsIgnoreCase(table.getTableType()); + } + + public static String getCatalogId(Configuration conf) { + if (StringUtils.isNotEmpty(conf.get(GlueMetastoreClientDelegate.CATALOG_ID_CONF))) { + return conf.get(GlueMetastoreClientDelegate.CATALOG_ID_CONF); + } + // This case defaults to using the caller's account Id as Catalog Id. + return null; + } +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/util/PartitionKey.java b/aws/src/main/java/org/apache/iceberg/aws/glue/util/PartitionKey.java new file mode 100644 index 000000000000..ce0049ee27db --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/util/PartitionKey.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.util; + +import com.amazonaws.services.glue.model.Partition; +import java.util.List; + +public class PartitionKey { + + private final List partitionValues; + private final int hashCode; + + public PartitionKey(Partition partition) { + this(partition.getValues()); + } + + public PartitionKey(List partitionValues) { + if (partitionValues == null) { + throw new IllegalArgumentException("Partition values cannot be null"); + } + this.partitionValues = partitionValues; + this.hashCode = partitionValues.hashCode(); + } + + @Override + public boolean equals(Object other) { + return this == other || + (other != null && + other instanceof PartitionKey && + this.partitionValues.equals(((PartitionKey) other).partitionValues)); + } + + @Override + public int hashCode() { + return hashCode; + } + + List getValues() { + return partitionValues; + } + +} diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/util/PartitionUtils.java b/aws/src/main/java/org/apache/iceberg/aws/glue/util/PartitionUtils.java new file mode 100644 index 000000000000..3d26881c1f2f --- /dev/null +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/util/PartitionUtils.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.util; + +import com.amazonaws.services.glue.model.EntityNotFoundException; +import com.amazonaws.services.glue.model.InvalidInputException; +import com.amazonaws.services.glue.model.Partition; +import com.amazonaws.services.glue.model.PartitionValueList; +import java.util.List; +import java.util.Map; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; + +public final class PartitionUtils { + + private PartitionUtils() { + } + + public static Map buildPartitionMap(final List partitions) { + Map partitionValuesMap = Maps.newHashMap(); + for (Partition partition : partitions) { + partitionValuesMap.put(new PartitionKey(partition), partition); + } + return partitionValuesMap; + } + + public static List getPartitionValuesList(final Map partitionMap) { + List partitionValuesList = Lists.newArrayList(); + for (Map.Entry entry : partitionMap.entrySet()) { + partitionValuesList.add(new PartitionValueList().withValues(entry.getValue().getValues())); + } + return partitionValuesList; + } + + public static boolean isInvalidUserInputException(Exception err) { + // exceptions caused by invalid requests, in which case we know all partitions creation failed + return err instanceof EntityNotFoundException || err instanceof InvalidInputException; + } + +} diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/convertors/TestConverterUtils.java b/aws/src/test/java/org/apache/iceberg/aws/glue/convertors/TestConverterUtils.java new file mode 100644 index 000000000000..ca2c482cc279 --- /dev/null +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/convertors/TestConverterUtils.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.convertors; + +import com.amazonaws.services.glue.model.Table; +import org.apache.iceberg.aws.glue.converters.ConverterUtils; +import org.apache.iceberg.aws.glue.util.ObjectTestUtils; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class TestConverterUtils { + + @Test + public void testCoralTableToStringConversion() { + Table table = ObjectTestUtils.getTestTable(); + assertEquals(table, ConverterUtils.stringToCatalogTable(ConverterUtils.catalogTableToString(table))); + } + +} diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/convertors/TestEntityConversion.java b/aws/src/test/java/org/apache/iceberg/aws/glue/convertors/TestEntityConversion.java new file mode 100644 index 000000000000..8d0ecc76c2f3 --- /dev/null +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/convertors/TestEntityConversion.java @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.convertors; + +import com.amazonaws.services.glue.model.AlreadyExistsException; +import com.amazonaws.services.glue.model.Database; +import com.amazonaws.services.glue.model.Order; +import com.amazonaws.services.glue.model.Partition; +import com.amazonaws.services.glue.model.SerDeInfo; +import com.amazonaws.services.glue.model.SkewedInfo; +import com.amazonaws.services.glue.model.Table; +import com.amazonaws.services.glue.model.UserDefinedFunction; +import java.util.List; +import org.apache.hadoop.hive.metastore.api.TableMeta; +import org.apache.iceberg.aws.glue.converters.CatalogToHiveConverter; +import org.apache.iceberg.aws.glue.converters.HiveToCatalogConverter; +import org.apache.iceberg.aws.glue.util.ObjectTestUtils; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.junit.Test; + +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; + +public class TestEntityConversion { + + private static final String TEST_DB_NAME = "testDb"; + private static final String TEST_TBL_NAME = "testTbl"; + + @Test + public void testDatabaseConversion() { + Database catalogDb = ObjectTestUtils.getTestDatabase(); + org.apache.hadoop.hive.metastore.api.Database hiveDatabase = CatalogToHiveConverter + .convertDatabase(catalogDb); + Database catalogDb2 = HiveToCatalogConverter.convertDatabase(hiveDatabase); + assertEquals(catalogDb, catalogDb2); + } + + @Test + public void testDatabaseConversionWithNullFields() { + Database catalogDb = ObjectTestUtils.getTestDatabase(); + catalogDb.setLocationUri(null); + catalogDb.setParameters(null); + org.apache.hadoop.hive.metastore.api.Database hiveDatabase = CatalogToHiveConverter + .convertDatabase(catalogDb); + assertThat(hiveDatabase.getLocationUri(), is("")); + assertNotNull(hiveDatabase.getParameters()); + } + + @Test + public void testExceptionTranslation() { + assertEquals("org.apache.hadoop.hive.metastore.api.AlreadyExistsException", + CatalogToHiveConverter.wrapInHiveException(new AlreadyExistsException("")).getClass().getName()); + } + + @Test + public void testTableConversion() { + Table catalogTable = ObjectTestUtils.getTestTable(); + org.apache.hadoop.hive.metastore.api.Table hiveTable = + CatalogToHiveConverter.convertTable(catalogTable, TEST_DB_NAME); + assertEquals(catalogTable, HiveToCatalogConverter.convertTable(hiveTable)); + } + + @Test + public void testTableConversionWithNullParameterMap() { + // Test to ensure the parameter map returned to Hive is never null. + Table catalogTable = ObjectTestUtils.getTestTable(); + catalogTable.setParameters(null); + org.apache.hadoop.hive.metastore.api.Table hiveTable = + CatalogToHiveConverter.convertTable(catalogTable, TEST_DB_NAME); + assertNotNull(hiveTable.getParameters()); + assertTrue(hiveTable.getParameters().isEmpty()); + } + + @Test + public void testPartitionConversion() { + Partition partition = ObjectTestUtils.getTestPartition(TEST_DB_NAME, TEST_TBL_NAME, ImmutableList.of("1")); + org.apache.hadoop.hive.metastore.api.Partition hivePartition = CatalogToHiveConverter.convertPartition(partition); + Partition converted = HiveToCatalogConverter.convertPartition(hivePartition); + assertEquals(partition, converted); + } + + @Test + public void testPartitionConversionWithNullParameterMap() { + // Test to ensure the parameter map returned to Hive is never null. + Partition partition = ObjectTestUtils.getTestPartition(TEST_DB_NAME, TEST_TBL_NAME, ImmutableList.of("1")); + partition.setParameters(null); + org.apache.hadoop.hive.metastore.api.Partition hivePartition = CatalogToHiveConverter.convertPartition(partition); + assertNotNull(hivePartition.getParameters()); + assertTrue(hivePartition.getParameters().isEmpty()); + } + + @Test + public void testConvertPartitions() { + Partition partition = ObjectTestUtils.getTestPartition( + TEST_DB_NAME, TEST_TBL_NAME, ImmutableList.of("value1", "value2")); + org.apache.hadoop.hive.metastore.api.Partition hivePartition = CatalogToHiveConverter.convertPartition(partition); + List partitions = ImmutableList.of(partition); + assertEquals(ImmutableList.of(hivePartition), CatalogToHiveConverter.convertPartitions(partitions)); + } + + @Test + public void testConvertPartitionsEmpty() { + assertEquals(ImmutableList.of(), CatalogToHiveConverter.convertPartitions(ImmutableList.of())); + } + + @Test + public void testConvertPartitionsNull() { + assertEquals(null, CatalogToHiveConverter.convertPartitions(null)); + } + + @Test + public void testSkewedInfoConversion() { + SkewedInfo catalogSkewedInfo = ObjectTestUtils.getSkewedInfo(); + org.apache.hadoop.hive.metastore.api.SkewedInfo hiveSkewedinfo = + CatalogToHiveConverter.convertSkewedInfo(catalogSkewedInfo); + assertEquals(catalogSkewedInfo, HiveToCatalogConverter.convertSkewedInfo(hiveSkewedinfo)); + assertEquals(null, HiveToCatalogConverter.convertSkewedInfo(null)); + assertEquals(null, CatalogToHiveConverter.convertSkewedInfo(null)); + } + + @Test + public void testConvertSkewedInfoNullFields() { + SkewedInfo catalogSkewedInfo = new SkewedInfo(); + org.apache.hadoop.hive.metastore.api.SkewedInfo hiveSkewedInfo = + CatalogToHiveConverter.convertSkewedInfo(catalogSkewedInfo); + assertNotNull(hiveSkewedInfo.getSkewedColNames()); + assertNotNull(hiveSkewedInfo.getSkewedColValues()); + assertNotNull(hiveSkewedInfo.getSkewedColValueLocationMaps()); + } + + @Test + public void testConvertSerdeInfoNullParameter() { + SerDeInfo serDeInfo = ObjectTestUtils.getTestSerdeInfo(); + serDeInfo.setParameters(null); + assertNotNull(CatalogToHiveConverter.convertSerDeInfo(serDeInfo).getParameters()); + } + + @Test + public void testFunctionConversion() { + UserDefinedFunction catalogFunction = ObjectTestUtils.getCatalogTestFunction(); + org.apache.hadoop.hive.metastore.api.Function hiveFunction = + CatalogToHiveConverter.convertFunction(TEST_DB_NAME, catalogFunction); + assertEquals(TEST_DB_NAME, hiveFunction.getDbName()); + assertEquals(catalogFunction, HiveToCatalogConverter.convertFunction(hiveFunction)); + } + + @Test + public void testConvertOrderList() { + List hiveOrderList = ImmutableList.of(ObjectTestUtils.getTestOrder()); + List catalogOrderList = HiveToCatalogConverter.convertOrderList(hiveOrderList); + + assertEquals(hiveOrderList.get(0).getCol(), catalogOrderList.get(0).getColumn()); + assertEquals(hiveOrderList.get(0).getOrder(), catalogOrderList.get(0).getSortOrder().intValue()); + } + + @Test + public void testConvertOrderListNull() { + assertNull(HiveToCatalogConverter.convertOrderList(null)); + } + + @Test + public void testTableMetaConversion() { + Table catalogTable = ObjectTestUtils.getTestTable(); + TableMeta tableMeta = CatalogToHiveConverter.convertTableMeta(catalogTable, TEST_DB_NAME); + assertEquals(catalogTable.getName(), tableMeta.getTableName()); + assertEquals(TEST_DB_NAME, tableMeta.getDbName()); + assertEquals(catalogTable.getTableType(), tableMeta.getTableType()); + } + + @Test + public void testTableConversionStorageDescriptorParameterMapNull() { + Table catalogTable = ObjectTestUtils.getTestTable(); + catalogTable.getStorageDescriptor().setParameters(null); + org.apache.hadoop.hive.metastore.api.Table hiveTable = + CatalogToHiveConverter.convertTable(catalogTable, TEST_DB_NAME); + assertNotNull(hiveTable.getSd().getParameters()); + assertTrue(hiveTable.getSd().getParameters().isEmpty()); + } + + @Test + public void testTableConversionStorageDescriptorBucketColsNull() { + Table catalogTable = ObjectTestUtils.getTestTable(); + catalogTable.getStorageDescriptor().setBucketColumns(null); + org.apache.hadoop.hive.metastore.api.Table hiveTable = + CatalogToHiveConverter.convertTable(catalogTable, TEST_DB_NAME); + assertNotNull(hiveTable.getSd().getBucketCols()); + assertTrue(hiveTable.getSd().getBucketCols().isEmpty()); + } + + @Test + public void testTableConversionStorageDescriptorSorColsNull() { + Table catalogTable = ObjectTestUtils.getTestTable(); + catalogTable.getStorageDescriptor().setSortColumns(null); + org.apache.hadoop.hive.metastore.api.Table hiveTable = + CatalogToHiveConverter.convertTable(catalogTable, TEST_DB_NAME); + assertNotNull(hiveTable.getSd().getSortCols()); + assertTrue(hiveTable.getSd().getSortCols().isEmpty()); + } + + @Test + public void testTableConversionWithNullPartitionKeys() { + Table catalogTable = ObjectTestUtils.getTestTable(); + catalogTable.setPartitionKeys(null); + org.apache.hadoop.hive.metastore.api.Table hiveTable = + CatalogToHiveConverter.convertTable(catalogTable, TEST_DB_NAME); + assertNotNull(hiveTable.getPartitionKeys()); + assertTrue(hiveTable.getPartitionKeys().isEmpty()); + } +} diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/convertors/TestGlueInputConverter.java b/aws/src/test/java/org/apache/iceberg/aws/glue/convertors/TestGlueInputConverter.java new file mode 100644 index 000000000000..0f9b20e73045 --- /dev/null +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/convertors/TestGlueInputConverter.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.convertors; + +import com.amazonaws.services.glue.model.Database; +import com.amazonaws.services.glue.model.DatabaseInput; +import com.amazonaws.services.glue.model.Partition; +import com.amazonaws.services.glue.model.PartitionInput; +import com.amazonaws.services.glue.model.Table; +import com.amazonaws.services.glue.model.TableInput; +import com.amazonaws.services.glue.model.UserDefinedFunction; +import com.amazonaws.services.glue.model.UserDefinedFunctionInput; +import org.apache.iceberg.aws.glue.converters.CatalogToHiveConverter; +import org.apache.iceberg.aws.glue.converters.GlueInputConverter; +import org.apache.iceberg.aws.glue.util.ObjectTestUtils; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class TestGlueInputConverter { + + private Database testDB; + private Table testTable; + private Partition testPartition; + private UserDefinedFunction testFunction; + + @Before + public void before() { + testDB = ObjectTestUtils.getTestDatabase(); + testTable = ObjectTestUtils.getTestTable(); + testPartition = ObjectTestUtils.getTestPartition( + testDB.getName(), testTable.getName(), Lists.newArrayList("val1")); + testFunction = ObjectTestUtils.getCatalogTestFunction(); + } + + @Test + public void testConvertHiveDbToDatabaseInput() { + org.apache.hadoop.hive.metastore.api.Database hivedb = CatalogToHiveConverter.convertDatabase(testDB); + DatabaseInput dbInput = GlueInputConverter.convertToDatabaseInput(hivedb); + + assertEquals(testDB.getName(), dbInput.getName()); + assertEquals(testDB.getDescription(), dbInput.getDescription()); + assertEquals(testDB.getLocationUri(), dbInput.getLocationUri()); + assertEquals(testDB.getParameters(), dbInput.getParameters()); + } + + @Test + public void testConvertCatalogDbToDatabaseInput() { + DatabaseInput dbInput = GlueInputConverter.convertToDatabaseInput(testDB); + + assertEquals(testDB.getName(), dbInput.getName()); + assertEquals(testDB.getDescription(), dbInput.getDescription()); + assertEquals(testDB.getLocationUri(), dbInput.getLocationUri()); + assertEquals(testDB.getParameters(), dbInput.getParameters()); + } + + @Test + public void testConvertHiveTableToTableInput() { + org.apache.hadoop.hive.metastore.api.Table hivetbl = + CatalogToHiveConverter.convertTable(testTable, testDB.getName()); + TableInput tblInput = GlueInputConverter.convertToTableInput(hivetbl); + + assertEquals(testTable.getName(), tblInput.getName()); + assertEquals(testTable.getOwner(), tblInput.getOwner()); + assertEquals(testTable.getTableType(), tblInput.getTableType()); + assertEquals(testTable.getParameters(), tblInput.getParameters()); + assertEquals(testTable.getPartitionKeys(), tblInput.getPartitionKeys()); + assertEquals(testTable.getRetention(), tblInput.getRetention()); + assertEquals(testTable.getLastAccessTime(), tblInput.getLastAccessTime()); + assertEquals(testTable.getStorageDescriptor(), tblInput.getStorageDescriptor()); + assertEquals(testTable.getViewExpandedText(), tblInput.getViewExpandedText()); + assertEquals(testTable.getViewOriginalText(), tblInput.getViewOriginalText()); + } + + @Test + public void testConvertCatalogTableToTableInput() { + TableInput tblInput = GlueInputConverter.convertToTableInput(testTable); + + assertEquals(testTable.getName(), tblInput.getName()); + assertEquals(testTable.getOwner(), tblInput.getOwner()); + assertEquals(testTable.getTableType(), tblInput.getTableType()); + assertEquals(testTable.getParameters(), tblInput.getParameters()); + assertEquals(testTable.getPartitionKeys(), tblInput.getPartitionKeys()); + assertEquals(testTable.getRetention(), tblInput.getRetention()); + assertEquals(testTable.getLastAccessTime(), tblInput.getLastAccessTime()); + assertEquals(testTable.getStorageDescriptor(), tblInput.getStorageDescriptor()); + assertEquals(testTable.getViewExpandedText(), tblInput.getViewExpandedText()); + assertEquals(testTable.getViewOriginalText(), tblInput.getViewOriginalText()); + } + + @Test + public void testConvertHivePartitionToPartitionInput() { + org.apache.hadoop.hive.metastore.api.Partition hivePartition = + CatalogToHiveConverter.convertPartition(testPartition); + PartitionInput partitionInput = GlueInputConverter.convertToPartitionInput(hivePartition); + + assertEquals(testPartition.getLastAccessTime(), partitionInput.getLastAccessTime()); + assertEquals(testPartition.getParameters(), partitionInput.getParameters()); + assertEquals(testPartition.getStorageDescriptor(), partitionInput.getStorageDescriptor()); + assertEquals(testPartition.getValues(), partitionInput.getValues()); + } + + @Test + public void testConvertCatalogPartitionToPartitionInput() { + PartitionInput partitionInput = GlueInputConverter.convertToPartitionInput(testPartition); + + assertEquals(testPartition.getLastAccessTime(), partitionInput.getLastAccessTime()); + assertEquals(testPartition.getParameters(), partitionInput.getParameters()); + assertEquals(testPartition.getStorageDescriptor(), partitionInput.getStorageDescriptor()); + assertEquals(testPartition.getValues(), partitionInput.getValues()); + } + + @Test + public void testConvertHiveFunctionToFunctionInput() { + org.apache.hadoop.hive.metastore.api.Function hiveFunction = + CatalogToHiveConverter.convertFunction(testDB.getName(), testFunction); + UserDefinedFunctionInput functionInput = GlueInputConverter.convertToUserDefinedFunctionInput(hiveFunction); + + assertEquals(testFunction.getClassName(), functionInput.getClassName()); + assertEquals(testFunction.getFunctionName(), functionInput.getFunctionName()); + assertEquals(testFunction.getOwnerName(), functionInput.getOwnerName()); + assertEquals(testFunction.getOwnerType(), functionInput.getOwnerType()); + assertEquals(testFunction.getResourceUris(), functionInput.getResourceUris()); + } + + @Test + public void testConvertHiveFunctionToFunctionInputNullOwnerType() { + org.apache.hadoop.hive.metastore.api.Function hiveFunction = + CatalogToHiveConverter.convertFunction(testDB.getName(), testFunction); + hiveFunction.setOwnerType(null); + GlueInputConverter.convertToUserDefinedFunctionInput(hiveFunction); + } + +} diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/lock/TestDynamoLockComponent.java b/aws/src/test/java/org/apache/iceberg/aws/glue/lock/TestDynamoLockComponent.java new file mode 100644 index 000000000000..062a9c82e5ed --- /dev/null +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/lock/TestDynamoLockComponent.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.lock; + +import org.apache.hadoop.hive.metastore.api.LockComponent; +import org.apache.hadoop.hive.metastore.api.LockLevel; +import org.apache.hadoop.hive.metastore.api.LockType; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class TestDynamoLockComponent { + + @Test + public void testSerializationFromHive() { + LockComponent lockComponent = new LockComponent(LockType.EXCLUSIVE, LockLevel.TABLE, "db"); + lockComponent.setTablename("table"); + DynamoLockComponent dynamoLockComponent = DynamoLockComponent.fromHive(lockComponent); + assertEquals("db", dynamoLockComponent.getDbName()); + assertEquals("table", dynamoLockComponent.getTableName()); + assertEquals(LockType.EXCLUSIVE, dynamoLockComponent.getLockType()); + assertEquals(LockLevel.TABLE, dynamoLockComponent.getLockLevel()); + } + + @Test + public void testSerDesRoundTrip() { + LockComponent lockComponent = new LockComponent(LockType.EXCLUSIVE, LockLevel.TABLE, "db"); + lockComponent.setTablename("table"); + DynamoLockComponent dynamoLockComponent = DynamoLockComponent.fromJson( + DynamoLockComponent.fromHive(lockComponent).toString()); + assertEquals("db", dynamoLockComponent.getDbName()); + assertEquals("table", dynamoLockComponent.getTableName()); + assertEquals(LockType.EXCLUSIVE, dynamoLockComponent.getLockType()); + assertEquals(LockLevel.TABLE, dynamoLockComponent.getLockLevel()); + } +} diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/lock/TestDynamoLockManager.java b/aws/src/test/java/org/apache/iceberg/aws/glue/lock/TestDynamoLockManager.java new file mode 100644 index 000000000000..d70dab4d64e1 --- /dev/null +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/lock/TestDynamoLockManager.java @@ -0,0 +1,241 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.lock; + +import java.net.InetAddress; +import java.net.URI; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.api.LockComponent; +import org.apache.hadoop.hive.metastore.api.LockLevel; +import org.apache.hadoop.hive.metastore.api.LockRequest; +import org.apache.hadoop.hive.metastore.api.LockResponse; +import org.apache.hadoop.hive.metastore.api.LockState; +import org.apache.hadoop.hive.metastore.api.LockType; +import org.apache.iceberg.aws.glue.util.AWSGlueConfig; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.dynamodb.DynamoDbClient; +import software.amazon.awssdk.services.dynamodb.model.DeleteTableRequest; +import software.amazon.awssdk.services.dynamodb.model.DescribeTableRequest; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; + +public class TestDynamoLockManager { + + private static final String DYNAMO_LOCAL_DOWNLOAD_URL = + "https://s3.us-west-2.amazonaws.com/dynamodb-local/dynamodb_local_latest.tar.gz"; + private static final String DYNAMO_LOCAL_DIR_NAME = System.getProperty("java.io.tmpdir") + "dynamo-local"; + private static final int DYNAMO_LOCAL_SERVER_PORT = 8001; + + private Process dynamoLocalProcess; + private DynamoDbClient dynamo; + private LockManager lockManager; + + /** + * We download the DynamoDB executable jar to local and run the server for testing. + * The jar certificate provided by maven central conflicts with iceberg and needs to be fixed. + * @throws Exception exception + */ + @BeforeClass + public static void beforeClass() throws Exception { + Runtime rt = Runtime.getRuntime(); + Process pr = rt.exec(String.format("curl %s -o %s.tar.gz", DYNAMO_LOCAL_DOWNLOAD_URL, DYNAMO_LOCAL_DIR_NAME)); + pr.waitFor(); + pr = rt.exec(String.format("mkdir %s", DYNAMO_LOCAL_DIR_NAME)); + pr.waitFor(); + pr = rt.exec(String.format("tar xvzf %s.tar.gz -C %s", DYNAMO_LOCAL_DIR_NAME, DYNAMO_LOCAL_DIR_NAME)); + pr.waitFor(); + } + + @Before + public void before() throws Exception { + Runtime rt = Runtime.getRuntime(); + dynamoLocalProcess = rt.exec(String.format("java -Djava.library.path=%s/DynamoDBLocal_lib -jar " + + "%s/DynamoDBLocal.jar -inMemory -port %s", + DYNAMO_LOCAL_DIR_NAME, DYNAMO_LOCAL_DIR_NAME, DYNAMO_LOCAL_SERVER_PORT)); + Configuration configuration = new Configuration(); + dynamo = DynamoDbClient.builder() + .endpointOverride(URI.create(String.format("http://localhost:%s", DYNAMO_LOCAL_SERVER_PORT))) + .region(Region.US_EAST_1) // dummy region + .credentialsProvider(StaticCredentialsProvider.create( + AwsBasicCredentials.create("key", "secret"))) // dummy credential + .build(); + lockManager = new DynamoLockManager(configuration, dynamo); + } + + @After + public void after() throws Exception { + dynamo.deleteTable(DeleteTableRequest.builder() + .tableName(AWSGlueConfig.AWS_GLUE_LOCK_COMPONENT_DYNAMO_TABLE_NAME_DEFAULT) + .build()); + dynamo.deleteTable(DeleteTableRequest.builder() + .tableName(AWSGlueConfig.AWS_GLUE_LOCK_REQUEST_DYNAMO_TABLE_NAME_DEFAULT) + .build()); + dynamoLocalProcess.destroy(); + dynamoLocalProcess.waitFor(); + } + + @AfterClass + public static void afterClass() throws Exception { + Runtime.getRuntime().exec(String.format("rm %s.tar.gz", DYNAMO_LOCAL_DIR_NAME)).waitFor(); + Runtime.getRuntime().exec(String.format("rm -rf %s", DYNAMO_LOCAL_DIR_NAME)).waitFor(); + } + + @Test + public void testTableCreation() { + // if fail, will throw ResourceNotFoundException + dynamo.describeTable(DescribeTableRequest.builder() + .tableName(AWSGlueConfig.AWS_GLUE_LOCK_COMPONENT_DYNAMO_TABLE_NAME_DEFAULT) + .build()); + dynamo.describeTable(DescribeTableRequest.builder() + .tableName(AWSGlueConfig.AWS_GLUE_LOCK_REQUEST_DYNAMO_TABLE_NAME_DEFAULT) + .build()); + } + + @Test + public void testLock_singleLock_singleProcess() throws Exception { + LockComponent lockComponent = new LockComponent(LockType.EXCLUSIVE, LockLevel.TABLE, "db"); + lockComponent.setTablename("table1"); + LockRequest lockRequest = new LockRequest(Lists.newArrayList(lockComponent), + System.getProperty("user.name"), + InetAddress.getLocalHost().getHostName()); + LockResponse response = lockManager.lock(lockRequest); + assertEquals(LockState.ACQUIRED, response.getState()); + // check lock again should still be acquired + response = lockManager.checkLock(response.getLockid()); + assertEquals(LockState.ACQUIRED, response.getState()); + } + + @Test + public void testLock_singleLock_sequentialProcesses() throws Exception { + LockComponent lockComponent = new LockComponent(LockType.EXCLUSIVE, LockLevel.TABLE, "db"); + lockComponent.setTablename("table1"); + LockRequest lockRequest = new LockRequest(Lists.newArrayList(lockComponent), + System.getProperty("user.name"), + InetAddress.getLocalHost().getHostName()); + LockResponse response = lockManager.lock(lockRequest); + assertEquals(LockState.ACQUIRED, response.getState()); + LockResponse response2 = lockManager.lock(lockRequest); + assertNotEquals(response2.getLockid(), response.getLockid()); + assertEquals(LockState.WAITING, response2.getState()); + } + + @Test + public void testLock_singleLock_parallelProcesses() throws Exception { + LockComponent lockComponent = new LockComponent(LockType.EXCLUSIVE, LockLevel.TABLE, "db"); + lockComponent.setTablename("table1"); + LockRequest lockRequest = new LockRequest(Lists.newArrayList(lockComponent), + System.getProperty("user.name"), + InetAddress.getLocalHost().getHostName()); + List responses = IntStream.range(0, 100).parallel() + .mapToObj(i -> lockManager.lock(lockRequest)) + .collect(Collectors.toList()); + assertEquals(1, responses.stream() + .map(LockResponse::getState) + .filter(s -> s.equals(LockState.ACQUIRED)) + .count()); + } + + @Test + public void testLock_multipleLocks_sequentialProcesses() throws Exception { + List components = new ArrayList<>(); + for (int i = 0; i < 100; i++) { + LockComponent lockComponent = new LockComponent(LockType.EXCLUSIVE, LockLevel.TABLE, "db" + i); + lockComponent.setTablename("table" + i); + components.add(lockComponent); + } + LockRequest lockRequest = new LockRequest(components, + System.getProperty("user.name"), + InetAddress.getLocalHost().getHostName()); + LockResponse response = lockManager.lock(lockRequest); + assertEquals(LockState.ACQUIRED, response.getState()); + LockResponse response2 = lockManager.lock(lockRequest); + assertNotEquals(response2.getLockid(), response.getLockid()); + assertEquals(LockState.WAITING, response2.getState()); + } + + @Test + public void testLock_multipleLock_parallelProcesses() throws Exception { + List components = new ArrayList<>(); + for (int i = 0; i < 100; i++) { + LockComponent lockComponent = new LockComponent(LockType.EXCLUSIVE, LockLevel.TABLE, "db" + i); + lockComponent.setTablename("table" + i); + components.add(lockComponent); + } + LockRequest lockRequest = new LockRequest(components, + System.getProperty("user.name"), + InetAddress.getLocalHost().getHostName()); + List responses = IntStream.range(0, 100).parallel() + .mapToObj(i -> lockManager.lock(lockRequest)) + .collect(Collectors.toList()); + assertEquals(1, responses.stream() + .map(LockResponse::getState) + .filter(s -> s.equals(LockState.ACQUIRED)) + .count()); + } + + @Test + public void testLock_lockExpire() throws Exception { + Configuration conf = new Configuration(); + conf.set(AWSGlueConfig.AWS_GLUE_LOCK_TIMEOUT_MILLIS, "2000"); + lockManager = new DynamoLockManager(conf, dynamo); + LockComponent lockComponent = new LockComponent(LockType.EXCLUSIVE, LockLevel.TABLE, "db"); + lockComponent.setTablename("table1"); + LockRequest lockRequest = new LockRequest(Lists.newArrayList(lockComponent), + System.getProperty("user.name"), + InetAddress.getLocalHost().getHostName()); + LockResponse response = lockManager.lock(lockRequest); + assertEquals(LockState.ACQUIRED, response.getState()); + Thread.sleep(2000); + response = lockManager.checkLock(response.getLockid()); + assertEquals(LockState.NOT_ACQUIRED, response.getState()); + response = lockManager.lock(lockRequest); + assertEquals(LockState.ACQUIRED, response.getState()); + } + + @Test + public void testUnlock() throws Exception { + LockComponent lockComponent = new LockComponent(LockType.EXCLUSIVE, LockLevel.TABLE, "db"); + lockComponent.setTablename("table1"); + LockRequest lockRequest = new LockRequest(Lists.newArrayList(lockComponent), + System.getProperty("user.name"), + InetAddress.getLocalHost().getHostName()); + LockResponse response = lockManager.lock(lockRequest); + assertEquals(LockState.ACQUIRED, response.getState()); + lockManager.unlock(response.getLockid()); + // check again it should be not acquired + response = lockManager.checkLock(response.getLockid()); + assertEquals(LockState.NOT_ACQUIRED, response.getState()); + response = lockManager.lock(lockRequest); + assertEquals(LockState.ACQUIRED, response.getState()); + } +} diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/metastore/TestAWSGlueClientFactory.java b/aws/src/test/java/org/apache/iceberg/aws/glue/metastore/TestAWSGlueClientFactory.java new file mode 100644 index 000000000000..a417d41be815 --- /dev/null +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/metastore/TestAWSGlueClientFactory.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.metastore; + +import com.amazonaws.auth.AWSCredentials; +import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.auth.BasicSessionCredentials; +import com.amazonaws.services.glue.AWSGlue; +import org.apache.hadoop.hive.conf.HiveConf; +import org.junit.Before; +import org.junit.Test; + +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.AWS_CATALOG_CREDENTIALS_PROVIDER_FACTORY_CLASS; +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.AWS_GLUE_CONNECTION_TIMEOUT; +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.AWS_GLUE_ENDPOINT; +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.AWS_GLUE_MAX_CONNECTIONS; +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.AWS_GLUE_MAX_RETRY; +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.AWS_GLUE_SOCKET_TIMEOUT; +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.AWS_REGION; +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.DEFAULT_CONNECTION_TIMEOUT; +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.DEFAULT_MAX_CONNECTIONS; +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.DEFAULT_MAX_RETRY; +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.DEFAULT_SOCKET_TIMEOUT; +import static org.hamcrest.CoreMatchers.instanceOf; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThat; +import static org.mockito.Mockito.atLeastOnce; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class TestAWSGlueClientFactory { + + private static final String FAKE_ACCESS_KEY = "accessKey"; + private static final String FAKE_SECRET_KEY = "secretKey"; + private static final String FAKE_SESSION_TOKEN = "sessionToken"; + + private AWSGlueClientFactory glueClientFactory; + private HiveConf hiveConf; + + @Before + public void before() { + hiveConf = spy(new HiveConf()); + glueClientFactory = new AWSGlueClientFactory(hiveConf); + } + + @Test + public void testGlueClientConstructionWithHiveConfig() throws Exception { + System.setProperty(AWS_REGION, ""); + System.setProperty(AWS_GLUE_ENDPOINT, ""); + when(hiveConf.get(AWS_GLUE_ENDPOINT)).thenReturn("endpoint"); + when(hiveConf.get(AWS_REGION)).thenReturn("us-west-1"); + + AWSGlue glueClient = glueClientFactory.newClient(); + + assertNotNull(glueClient); + + // client reads hive conf for region & endpoint + verify(hiveConf, atLeastOnce()).get(AWS_GLUE_ENDPOINT); + verify(hiveConf, atLeastOnce()).get(AWS_REGION); + } + + @Test + public void testGlueClientContructionWithAWSConfig() throws Exception { + glueClientFactory.newClient(); + verify(hiveConf, atLeastOnce()).getInt(AWS_GLUE_MAX_RETRY, DEFAULT_MAX_RETRY); + verify(hiveConf, atLeastOnce()).getInt(AWS_GLUE_MAX_CONNECTIONS, DEFAULT_MAX_CONNECTIONS); + verify(hiveConf, atLeastOnce()).getInt(AWS_GLUE_SOCKET_TIMEOUT, DEFAULT_SOCKET_TIMEOUT); + verify(hiveConf, atLeastOnce()).getInt(AWS_GLUE_CONNECTION_TIMEOUT, DEFAULT_CONNECTION_TIMEOUT); + } + + @Test + public void testGlueClientConstructionWithSystemProperty() throws Exception { + System.setProperty(AWS_REGION, "us-east-1"); + System.setProperty(AWS_GLUE_ENDPOINT, "endpoint"); + + AWSGlue glueClient = glueClientFactory.newClient(); + + assertNotNull(glueClient); + + // client has no interactions with the hive conf since system property is set + verify(hiveConf, never()).get(AWS_GLUE_ENDPOINT); + verify(hiveConf, never()).get(AWS_REGION); + } + + @Test + public void testClientConstructionWithSessionCredentialsProviderFactory() throws Exception { + System.setProperty("aws.region", "us-west-2"); + hiveConf.setStrings(SessionCredentialsProviderFactory.AWS_ACCESS_KEY_CONF_VAR, FAKE_ACCESS_KEY); + hiveConf.setStrings(SessionCredentialsProviderFactory.AWS_SECRET_KEY_CONF_VAR, FAKE_SECRET_KEY); + hiveConf.setStrings(SessionCredentialsProviderFactory.AWS_SESSION_TOKEN_CONF_VAR, FAKE_SESSION_TOKEN); + + hiveConf.setStrings(AWS_CATALOG_CREDENTIALS_PROVIDER_FACTORY_CLASS, + SessionCredentialsProviderFactory.class.getCanonicalName()); + + AWSGlue glueClient = glueClientFactory.newClient(); + + assertNotNull(glueClient); + + verify(hiveConf, atLeastOnce()).get(SessionCredentialsProviderFactory.AWS_ACCESS_KEY_CONF_VAR); + verify(hiveConf, atLeastOnce()).get(SessionCredentialsProviderFactory.AWS_SECRET_KEY_CONF_VAR); + verify(hiveConf, atLeastOnce()).get(SessionCredentialsProviderFactory.AWS_SESSION_TOKEN_CONF_VAR); + } + + @Test + public void testCredentialsCreatedBySessionCredentialsProviderFactory() throws Exception { + hiveConf.setStrings(SessionCredentialsProviderFactory.AWS_ACCESS_KEY_CONF_VAR, FAKE_ACCESS_KEY); + hiveConf.setStrings(SessionCredentialsProviderFactory.AWS_SECRET_KEY_CONF_VAR, FAKE_SECRET_KEY); + hiveConf.setStrings(SessionCredentialsProviderFactory.AWS_SESSION_TOKEN_CONF_VAR, FAKE_SESSION_TOKEN); + + SessionCredentialsProviderFactory factory = new SessionCredentialsProviderFactory(); + AWSCredentialsProvider provider = factory.buildAWSCredentialsProvider(hiveConf); + AWSCredentials credentials = provider.getCredentials(); + + assertThat(credentials, instanceOf(BasicSessionCredentials.class)); + + BasicSessionCredentials sessionCredentials = (BasicSessionCredentials) credentials; + + assertEquals(FAKE_ACCESS_KEY, sessionCredentials.getAWSAccessKeyId()); + assertEquals(FAKE_SECRET_KEY, sessionCredentials.getAWSSecretKey()); + assertEquals(FAKE_SESSION_TOKEN, sessionCredentials.getSessionToken()); + } + + @Test(expected = IllegalArgumentException.class) + public void testMissingAccessKeyWithSessionCredentialsProviderFactory() throws Exception { + SessionCredentialsProviderFactory factory = new SessionCredentialsProviderFactory(); + factory.buildAWSCredentialsProvider(hiveConf); + } + + @Test(expected = IllegalArgumentException.class) + public void testMissingSecretKey() throws Exception { + SessionCredentialsProviderFactory factory = new SessionCredentialsProviderFactory(); + hiveConf.setStrings(SessionCredentialsProviderFactory.AWS_ACCESS_KEY_CONF_VAR, FAKE_ACCESS_KEY); + factory.buildAWSCredentialsProvider(hiveConf); + } + + @Test(expected = IllegalArgumentException.class) + public void testMissingSessionTokenKey() throws Exception { + SessionCredentialsProviderFactory factory = new SessionCredentialsProviderFactory(); + hiveConf.setStrings(SessionCredentialsProviderFactory.AWS_ACCESS_KEY_CONF_VAR, FAKE_ACCESS_KEY); + hiveConf.setStrings(SessionCredentialsProviderFactory.AWS_SECRET_KEY_CONF_VAR, FAKE_SECRET_KEY); + factory.buildAWSCredentialsProvider(hiveConf); + } + +} diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/metastore/TestAWSGlueMetastoreCacheDecorator.java b/aws/src/test/java/org/apache/iceberg/aws/glue/metastore/TestAWSGlueMetastoreCacheDecorator.java new file mode 100644 index 000000000000..d9ef2fcf9da6 --- /dev/null +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/metastore/TestAWSGlueMetastoreCacheDecorator.java @@ -0,0 +1,309 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.metastore; + +import com.amazonaws.services.glue.model.Database; +import com.amazonaws.services.glue.model.DatabaseInput; +import com.amazonaws.services.glue.model.Table; +import com.amazonaws.services.glue.model.TableInput; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.iceberg.relocated.com.google.common.cache.Cache; +import org.junit.Before; +import org.junit.Test; + +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_ENABLE; +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_SIZE; +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_TTL_MINS; +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_ENABLE; +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_SIZE; +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_TTL_MINS; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class TestAWSGlueMetastoreCacheDecorator { + + private AWSGlueMetastore glueMetastore; + private HiveConf hiveConf; + + private static final String DB_NAME = "db"; + private static final String TABLE_NAME = "table"; + private static final AWSGlueMetastoreCacheDecorator.TableIdentifier TABLE_IDENTIFIER = + new AWSGlueMetastoreCacheDecorator.TableIdentifier(DB_NAME, TABLE_NAME); + + @Before + public void before() { + glueMetastore = mock(AWSGlueMetastore.class); + hiveConf = spy(new HiveConf()); + when(hiveConf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false)).thenReturn(true); + when(hiveConf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false)).thenReturn(true); + when(hiveConf.getInt(AWS_GLUE_TABLE_CACHE_SIZE, 0)).thenReturn(100); + when(hiveConf.getInt(AWS_GLUE_TABLE_CACHE_TTL_MINS, 0)).thenReturn(100); + when(hiveConf.getInt(AWS_GLUE_DB_CACHE_SIZE, 0)).thenReturn(100); + when(hiveConf.getInt(AWS_GLUE_DB_CACHE_TTL_MINS, 0)).thenReturn(100); + + } + + @Test(expected = NullPointerException.class) + public void testConstructorWithNullConf() { + new AWSGlueMetastoreCacheDecorator(null, glueMetastore); + } + + @Test(expected = IllegalArgumentException.class) + public void testConstructorWithInvalidTableCacheSize() { + when(hiveConf.getInt(AWS_GLUE_TABLE_CACHE_SIZE, 0)).thenReturn(0); + new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); + } + + @Test(expected = IllegalArgumentException.class) + public void testConstructorWithInvalidTableCacheTtl() { + when(hiveConf.getInt(AWS_GLUE_TABLE_CACHE_TTL_MINS, 0)).thenReturn(0); + new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); + } + + @Test(expected = IllegalArgumentException.class) + public void testConstructorWithInvalidDbCacheSize() { + when(hiveConf.getInt(AWS_GLUE_DB_CACHE_SIZE, 0)).thenReturn(0); + new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); + } + + @Test(expected = IllegalArgumentException.class) + public void testConstructorWithInvalidDbCacheTtl() { + when(hiveConf.getInt(AWS_GLUE_DB_CACHE_TTL_MINS, 0)).thenReturn(0); + new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); + } + + @Test + public void testGetDatabaseWhenCacheDisabled() { + // disable cache + when(hiveConf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false)).thenReturn(false); + Database db = new Database(); + AWSGlueMetastoreCacheDecorator cacheDecorator = + new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); + when(glueMetastore.getDatabase(DB_NAME)).thenReturn(db); + assertEquals(db, cacheDecorator.getDatabase(DB_NAME)); + assertNull(cacheDecorator.getDatabaseCache()); + verify(glueMetastore, times(1)).getDatabase(DB_NAME); + } + + @Test + public void testGetDatabaseWhenCacheEnabledAndCacheMiss() { + Database db = new Database(); + AWSGlueMetastoreCacheDecorator cacheDecorator = + new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); + assertNotNull(cacheDecorator.getDatabaseCache()); + Cache dbCache = mock(Cache.class); + cacheDecorator.setDatabaseCache(dbCache); + + when(dbCache.getIfPresent(DB_NAME)).thenReturn(null); + when(glueMetastore.getDatabase(DB_NAME)).thenReturn(db); + doNothing().when(dbCache).put(DB_NAME, db); + + assertEquals(db, cacheDecorator.getDatabase(DB_NAME)); + + verify(glueMetastore, times(1)).getDatabase(DB_NAME); + verify(dbCache, times(1)).getIfPresent(DB_NAME); + verify(dbCache, times(1)).put(DB_NAME, db); + } + + @Test + public void testGetDatabaseWhenCacheEnabledAndCacheHit() { + Database db = new Database(); + AWSGlueMetastoreCacheDecorator cacheDecorator = + new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); + assertNotNull(cacheDecorator.getDatabaseCache()); + Cache dbCache = mock(Cache.class); + cacheDecorator.setDatabaseCache(dbCache); + + when(dbCache.getIfPresent(DB_NAME)).thenReturn(db); + + assertEquals(db, cacheDecorator.getDatabase(DB_NAME)); + + verify(dbCache, times(1)).getIfPresent(DB_NAME); + } + + @Test + public void testUpdateDatabaseWhenCacheDisabled() { + // disable cache + when(hiveConf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false)).thenReturn(false); + DatabaseInput dbInput = new DatabaseInput(); + AWSGlueMetastoreCacheDecorator cacheDecorator = + new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); + doNothing().when(glueMetastore).updateDatabase(DB_NAME, dbInput); + cacheDecorator.updateDatabase(DB_NAME, dbInput); + assertNull(cacheDecorator.getDatabaseCache()); + verify(glueMetastore, times(1)).updateDatabase(DB_NAME, dbInput); + } + + @Test + public void testUpdateDatabaseWhenCacheEnabled() { + DatabaseInput dbInput = new DatabaseInput(); + AWSGlueMetastoreCacheDecorator cacheDecorator = + new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); + cacheDecorator.getDatabaseCache().put(DB_NAME, new Database()); + doNothing().when(glueMetastore).updateDatabase(DB_NAME, dbInput); + + cacheDecorator.updateDatabase(DB_NAME, dbInput); + + // db should have been removed from cache + assertNull(cacheDecorator.getDatabaseCache().getIfPresent(DB_NAME)); + verify(glueMetastore, times(1)).updateDatabase(DB_NAME, dbInput); + } + + @Test + public void testDeleteDatabaseWhenCacheDisabled() { + // disable cache + when(hiveConf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false)).thenReturn(false); + AWSGlueMetastoreCacheDecorator cacheDecorator = + new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); + doNothing().when(glueMetastore).deleteDatabase(DB_NAME); + cacheDecorator.deleteDatabase(DB_NAME); + assertNull(cacheDecorator.getDatabaseCache()); + verify(glueMetastore, times(1)).deleteDatabase(DB_NAME); + } + + @Test + public void testDeleteDatabaseWhenCacheEnabled() { + DatabaseInput dbInput = new DatabaseInput(); + AWSGlueMetastoreCacheDecorator cacheDecorator = + new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); + cacheDecorator.getDatabaseCache().put(DB_NAME, new Database()); + doNothing().when(glueMetastore).deleteDatabase(DB_NAME); + + cacheDecorator.deleteDatabase(DB_NAME); + + // db should have been removed from cache + assertNull(cacheDecorator.getDatabaseCache().getIfPresent(DB_NAME)); + verify(glueMetastore, times(1)).deleteDatabase(DB_NAME); + } + + @Test + public void testGetTableWhenCacheDisabled() { + // disable cache + when(hiveConf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false)).thenReturn(false); + Table table = new Table(); + AWSGlueMetastoreCacheDecorator cacheDecorator = + new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); + when(glueMetastore.getTable(DB_NAME, TABLE_NAME)).thenReturn(table); + assertEquals(table, cacheDecorator.getTable(DB_NAME, TABLE_NAME)); + assertNull(cacheDecorator.getTableCache()); + verify(glueMetastore, times(1)).getTable(DB_NAME, TABLE_NAME); + } + + @Test + public void testGetTableWhenCacheEnabledAndCacheMiss() { + Table table = new Table(); + AWSGlueMetastoreCacheDecorator cacheDecorator = + new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); + assertNotNull(cacheDecorator.getTableCache()); + Cache tableCache = mock(Cache.class); + cacheDecorator.setTableCache(tableCache); + + when(tableCache.getIfPresent(TABLE_IDENTIFIER)).thenReturn(null); + when(glueMetastore.getTable(DB_NAME, TABLE_NAME)).thenReturn(table); + doNothing().when(tableCache).put(TABLE_IDENTIFIER, table); + + assertEquals(table, cacheDecorator.getTable(DB_NAME, TABLE_NAME)); + + verify(glueMetastore, times(1)).getTable(DB_NAME, TABLE_NAME); + verify(tableCache, times(1)).getIfPresent(TABLE_IDENTIFIER); + verify(tableCache, times(1)).put(TABLE_IDENTIFIER, table); + } + + @Test + public void testGetTableWhenCacheEnabledAndCacheHit() { + Table table = new Table(); + AWSGlueMetastoreCacheDecorator cacheDecorator = + new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); + assertNotNull(cacheDecorator.getTableCache()); + Cache tableCache = mock(Cache.class); + cacheDecorator.setTableCache(tableCache); + + when(tableCache.getIfPresent(TABLE_IDENTIFIER)).thenReturn(table); + + assertEquals(table, cacheDecorator.getTable(DB_NAME, TABLE_NAME)); + + verify(tableCache, times(1)).getIfPresent(TABLE_IDENTIFIER); + } + + @Test + public void testUpdateTableWhenCacheDisabled() { + // disable cache + when(hiveConf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false)).thenReturn(false); + TableInput tableInput = new TableInput(); + AWSGlueMetastoreCacheDecorator cacheDecorator = + new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); + doNothing().when(glueMetastore).updateTable(TABLE_NAME, tableInput); + cacheDecorator.updateTable(TABLE_NAME, tableInput); + assertNull(cacheDecorator.getTableCache()); + verify(glueMetastore, times(1)).updateTable(TABLE_NAME, tableInput); + } + + @Test + public void testUpdateTableWhenCacheEnabled() { + TableInput tableInput = new TableInput(); + tableInput.setName(TABLE_NAME); + AWSGlueMetastoreCacheDecorator cacheDecorator = + new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); + + cacheDecorator.getTableCache().put(TABLE_IDENTIFIER, new Table()); + doNothing().when(glueMetastore).updateTable(DB_NAME, tableInput); + + cacheDecorator.updateTable(DB_NAME, tableInput); + + // table should have been removed from cache + assertNull(cacheDecorator.getTableCache().getIfPresent(TABLE_IDENTIFIER)); + verify(glueMetastore, times(1)).updateTable(DB_NAME, tableInput); + } + + @Test + public void testDeleteTableWhenCacheDisabled() { + // disable cache + when(hiveConf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false)).thenReturn(false); + AWSGlueMetastoreCacheDecorator cacheDecorator = + new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); + doNothing().when(glueMetastore).deleteTable(DB_NAME, TABLE_NAME); + cacheDecorator.deleteTable(DB_NAME, TABLE_NAME); + assertNull(cacheDecorator.getTableCache()); + verify(glueMetastore, times(1)).deleteTable(DB_NAME, TABLE_NAME); + } + + @Test + public void testDeleteTableWhenCacheEnabled() { + DatabaseInput dbInput = new DatabaseInput(); + AWSGlueMetastoreCacheDecorator cacheDecorator = + new AWSGlueMetastoreCacheDecorator(hiveConf, glueMetastore); + cacheDecorator.getTableCache().put(TABLE_IDENTIFIER, new Table()); + doNothing().when(glueMetastore).deleteDatabase(DB_NAME); + + cacheDecorator.deleteTable(DB_NAME, TABLE_NAME); + + // table should have been removed from cache + assertNull(cacheDecorator.getTableCache().getIfPresent(TABLE_IDENTIFIER)); + verify(glueMetastore, times(1)).deleteTable(DB_NAME, TABLE_NAME); + } + +} diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/metastore/TestAWSGlueMetastoreFactory.java b/aws/src/test/java/org/apache/iceberg/aws/glue/metastore/TestAWSGlueMetastoreFactory.java new file mode 100644 index 000000000000..74c46ced8d72 --- /dev/null +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/metastore/TestAWSGlueMetastoreFactory.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.metastore; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.junit.Before; +import org.junit.Test; + +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_ENABLE; +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_SIZE; +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.AWS_GLUE_DB_CACHE_TTL_MINS; +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.AWS_GLUE_ENDPOINT; +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_ENABLE; +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_SIZE; +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.AWS_GLUE_TABLE_CACHE_TTL_MINS; +import static org.apache.iceberg.aws.glue.util.AWSGlueConfig.AWS_REGION; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.atLeastOnce; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class TestAWSGlueMetastoreFactory { + + private AWSGlueMetastoreFactory awsGlueMetastoreFactory; + private HiveConf hiveConf; + + @Before + public void before() { + awsGlueMetastoreFactory = new AWSGlueMetastoreFactory(); + hiveConf = spy(new HiveConf()); + + // these configs are needed for AWSGlueClient to get initialized + System.setProperty(AWS_REGION, ""); + System.setProperty(AWS_GLUE_ENDPOINT, ""); + when(hiveConf.get(AWS_GLUE_ENDPOINT)).thenReturn("endpoint"); + when(hiveConf.get(AWS_REGION)).thenReturn("us-west-1"); + + // these configs are needed for AWSGlueMetastoreCacheDecorator to get initialized + when(hiveConf.getInt(AWS_GLUE_DB_CACHE_SIZE, 0)).thenReturn(1); + when(hiveConf.getInt(AWS_GLUE_DB_CACHE_TTL_MINS, 0)).thenReturn(1); + when(hiveConf.getInt(AWS_GLUE_TABLE_CACHE_SIZE, 0)).thenReturn(1); + when(hiveConf.getInt(AWS_GLUE_TABLE_CACHE_TTL_MINS, 0)).thenReturn(1); + } + + @Test + public void testNewMetastoreWhenCacheDisabled() throws Exception { + when(hiveConf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false)).thenReturn(false); + when(hiveConf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false)).thenReturn(false); + assertTrue(DefaultAWSGlueMetastore.class.equals( + awsGlueMetastoreFactory.newMetastore(hiveConf).getClass())); + verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false); + verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false); + } + + @Test + public void testNewMetastoreWhenTableCacheEnabled() throws Exception { + when(hiveConf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false)).thenReturn(false); + when(hiveConf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false)).thenReturn(true); + assertTrue(AWSGlueMetastoreCacheDecorator.class.equals( + awsGlueMetastoreFactory.newMetastore(hiveConf).getClass())); + verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false); + verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false); + } + + @Test + public void testNewMetastoreWhenDBCacheEnabled() throws Exception { + when(hiveConf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false)).thenReturn(true); + when(hiveConf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false)).thenReturn(true); + assertTrue(AWSGlueMetastoreCacheDecorator.class.equals( + awsGlueMetastoreFactory.newMetastore(hiveConf).getClass())); + verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false); + verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false); + } + + @Test + public void testNewMetastoreWhenAllCacheEnabled() throws Exception { + when(hiveConf.getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false)).thenReturn(true); + when(hiveConf.getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false)).thenReturn(true); + assertTrue(AWSGlueMetastoreCacheDecorator.class.equals( + awsGlueMetastoreFactory.newMetastore(hiveConf).getClass())); + verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_DB_CACHE_ENABLE, false); + verify(hiveConf, atLeastOnce()).getBoolean(AWS_GLUE_TABLE_CACHE_ENABLE, false); + } + +} diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/metastore/TestGlueMetastoreClientDelegate.java b/aws/src/test/java/org/apache/iceberg/aws/glue/metastore/TestGlueMetastoreClientDelegate.java new file mode 100644 index 000000000000..3093f501e4c9 --- /dev/null +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/metastore/TestGlueMetastoreClientDelegate.java @@ -0,0 +1,1749 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.metastore; + +import com.amazonaws.services.glue.AWSGlue; +import com.amazonaws.services.glue.model.BatchCreatePartitionRequest; +import com.amazonaws.services.glue.model.BatchCreatePartitionResult; +import com.amazonaws.services.glue.model.BatchGetPartitionRequest; +import com.amazonaws.services.glue.model.BatchGetPartitionResult; +import com.amazonaws.services.glue.model.CreateDatabaseRequest; +import com.amazonaws.services.glue.model.CreateTableRequest; +import com.amazonaws.services.glue.model.CreateUserDefinedFunctionRequest; +import com.amazonaws.services.glue.model.Database; +import com.amazonaws.services.glue.model.DeleteDatabaseRequest; +import com.amazonaws.services.glue.model.DeletePartitionRequest; +import com.amazonaws.services.glue.model.DeletePartitionResult; +import com.amazonaws.services.glue.model.DeleteTableRequest; +import com.amazonaws.services.glue.model.DeleteUserDefinedFunctionRequest; +import com.amazonaws.services.glue.model.EntityNotFoundException; +import com.amazonaws.services.glue.model.GetDatabaseRequest; +import com.amazonaws.services.glue.model.GetDatabaseResult; +import com.amazonaws.services.glue.model.GetDatabasesRequest; +import com.amazonaws.services.glue.model.GetDatabasesResult; +import com.amazonaws.services.glue.model.GetPartitionRequest; +import com.amazonaws.services.glue.model.GetPartitionResult; +import com.amazonaws.services.glue.model.GetPartitionsRequest; +import com.amazonaws.services.glue.model.GetPartitionsResult; +import com.amazonaws.services.glue.model.GetTableRequest; +import com.amazonaws.services.glue.model.GetTableResult; +import com.amazonaws.services.glue.model.GetTablesRequest; +import com.amazonaws.services.glue.model.GetTablesResult; +import com.amazonaws.services.glue.model.GetUserDefinedFunctionRequest; +import com.amazonaws.services.glue.model.GetUserDefinedFunctionResult; +import com.amazonaws.services.glue.model.GetUserDefinedFunctionsRequest; +import com.amazonaws.services.glue.model.GetUserDefinedFunctionsResult; +import com.amazonaws.services.glue.model.InternalServiceException; +import com.amazonaws.services.glue.model.InvalidInputException; +import com.amazonaws.services.glue.model.OperationTimeoutException; +import com.amazonaws.services.glue.model.Partition; +import com.amazonaws.services.glue.model.PartitionInput; +import com.amazonaws.services.glue.model.Table; +import com.amazonaws.services.glue.model.TableInput; +import com.amazonaws.services.glue.model.UpdateDatabaseRequest; +import com.amazonaws.services.glue.model.UpdatePartitionRequest; +import com.amazonaws.services.glue.model.UpdatePartitionResult; +import com.amazonaws.services.glue.model.UpdateTableRequest; +import com.amazonaws.services.glue.model.UpdateUserDefinedFunctionRequest; +import com.amazonaws.services.glue.model.UserDefinedFunction; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import org.apache.commons.collections.CollectionUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.PrincipalType; +import org.apache.hadoop.hive.metastore.api.TableMeta; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.iceberg.aws.glue.converters.CatalogToHiveConverter; +import org.apache.iceberg.aws.glue.converters.GlueInputConverter; +import org.apache.iceberg.aws.glue.lock.LockManager; +import org.apache.iceberg.aws.glue.util.ObjectTestUtils; +import org.apache.iceberg.aws.glue.util.TestExecutorServiceFactory; +import org.apache.iceberg.relocated.com.google.common.base.Function; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.Iterables; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Sets; +import org.junit.Before; +import org.junit.Test; +import org.mockito.ArgumentCaptor; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; + +import static org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE; +import static org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE; +import static org.apache.iceberg.aws.glue.util.ObjectTestUtils.getTestDatabase; +import static org.apache.iceberg.aws.glue.util.ObjectTestUtils.getTestPartition; +import static org.apache.iceberg.aws.glue.util.ObjectTestUtils.getTestTable; +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.CoreMatchers.instanceOf; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.CoreMatchers.notNullValue; +import static org.hamcrest.CoreMatchers.nullValue; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyBoolean; +import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.atLeastOnce; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class TestGlueMetastoreClientDelegate { + + private GlueMetastoreClientDelegate metastoreClientDelegate; + private GlueMetastoreClientDelegate metastoreClientDelegateCatalogId; + + private HiveConf conf; + HiveConf hiveConfCatalogId; // conf with CatalogId + private AWSGlue glueClient; + private Warehouse wh; + + private Database testDb; + private Table testTbl; + private LockManager lockManager; + + private static final int BATCH_CREATE_PARTITIONS_MAX_REQUEST_SIZE = 100; + private static final int BATCH_GET_PARTITIONS_MAX_REQUEST_SIZE = 1000; + private static final String CATALOG_ID = "12345"; + + @Before + public void before() throws Exception { + conf = new HiveConf(); + glueClient = mock(AWSGlue.class); + wh = mock(Warehouse.class); + lockManager = mock(LockManager.class); + metastoreClientDelegate = new GlueMetastoreClientDelegate( + conf, new DefaultAWSGlueMetastore(conf, glueClient), wh, lockManager); + + // Create a client delegate with CatalogId + hiveConfCatalogId = new HiveConf(); + hiveConfCatalogId.set(GlueMetastoreClientDelegate.CATALOG_ID_CONF, CATALOG_ID); + metastoreClientDelegateCatalogId = new GlueMetastoreClientDelegate( + hiveConfCatalogId, new DefaultAWSGlueMetastore(hiveConfCatalogId, glueClient), wh, lockManager); + + testDb = getTestDatabase(); + testTbl = getTestTable(testDb.getName()); + setupMockWarehouseForPath(new Path( + testTbl.getStorageDescriptor().getLocation().toString()), false, true); + } + + private void setupMockWarehouseForPath(Path path, boolean isDir, boolean mkDir) throws Exception { + when(wh.getDnsPath(path)).thenReturn(path); + when(wh.isDir(path)).thenReturn(isDir); + when(wh.mkdirs(path, true)).thenReturn(mkDir); + } + + // ===================== Thread Executor ===================== + + @Test + public void testExecutorService() throws Exception { + Object defaultExecutorService = new DefaultExecutorServiceFactory().getExecutorService(conf); + assertEquals("Default executor service should be used", + metastoreClientDelegate.getExecutorService(), defaultExecutorService); + HiveConf customConf = new HiveConf(); + customConf.set(GlueMetastoreClientDelegate.CATALOG_ID_CONF, CATALOG_ID); + customConf.setClass(GlueMetastoreClientDelegate.CUSTOM_EXECUTOR_FACTORY_CONF, + TestExecutorServiceFactory.class, ExecutorServiceFactory.class); + GlueMetastoreClientDelegate customDelegate = new GlueMetastoreClientDelegate( + customConf, mock(AWSGlueMetastore.class), mock(Warehouse.class), mock(LockManager.class)); + Object customExecutorService = new TestExecutorServiceFactory().getExecutorService(customConf); + + assertEquals("Custom executor service should be used", + customDelegate.getExecutorService(), customExecutorService); + } + + // ===================== Database ===================== + + @Test + public void testCreateDatabaseWithExistingDir() throws Exception { + Path dbPath = new Path(testDb.getLocationUri()); + setupMockWarehouseForPath(dbPath, true, true); + + metastoreClientDelegate.createDatabase(CatalogToHiveConverter.convertDatabase(testDb)); + verify(glueClient, times(1)).createDatabase(any(CreateDatabaseRequest.class)); + verify(wh, times(1)).isDir(dbPath); + verify(wh, never()).mkdirs(dbPath, true); + } + + @Test + public void testCreateDatabaseWithExistingDirWthCatalogId() throws Exception { + Path dbPath = new Path(testDb.getLocationUri()); + setupMockWarehouseForPath(dbPath, true, true); + + metastoreClientDelegateCatalogId.createDatabase(CatalogToHiveConverter.convertDatabase(testDb)); + ArgumentCaptor captor = ArgumentCaptor.forClass(CreateDatabaseRequest.class); + verify(glueClient, times(1)).createDatabase(captor.capture()); + assertEquals(CATALOG_ID, captor.getValue().getCatalogId()); + verify(wh, times(1)).isDir(dbPath); + verify(wh, never()).mkdirs(dbPath, true); + } + + @Test + public void testCreateDatabaseWithoutExistingDir() throws Exception { + Path dbPath = new Path(testDb.getLocationUri()); + setupMockWarehouseForPath(dbPath, false, true); + + metastoreClientDelegate.createDatabase(CatalogToHiveConverter.convertDatabase(testDb)); + verify(glueClient, times(1)).createDatabase(any(CreateDatabaseRequest.class)); + verify(wh, times(1)).isDir(dbPath); + verify(wh, times(1)).mkdirs(dbPath, true); + } + + @Test + public void testGetDatabases() throws Exception { + when(glueClient.getDatabases(any(GetDatabasesRequest.class))).thenReturn( + new GetDatabasesResult().withDatabaseList(testDb)); + + List dbs = metastoreClientDelegate.getDatabases("*"); + assertEquals(testDb.getName(), Iterables.getOnlyElement(dbs)); + } + + @Test + public void testGetDatabasesWithCatalogId() throws Exception { + when(glueClient.getDatabases(any(GetDatabasesRequest.class))).thenReturn( + new GetDatabasesResult().withDatabaseList(testDb)); + + List dbs = metastoreClientDelegateCatalogId.getDatabases("*"); + ArgumentCaptor captor = ArgumentCaptor.forClass(GetDatabasesRequest.class); + verify(glueClient, times(1)).getDatabases(captor.capture()); + assertEquals(CATALOG_ID, captor.getValue().getCatalogId()); + assertEquals(testDb.getName(), Iterables.getOnlyElement(dbs)); + } + + @Test + public void testGetDatabasesNullPattern() throws Exception { + when(glueClient.getDatabases(any(GetDatabasesRequest.class))).thenReturn( + new GetDatabasesResult().withDatabaseList(testDb)); + + List dbs = metastoreClientDelegate.getDatabases(null); + assertEquals(testDb.getName(), Iterables.getOnlyElement(dbs)); + } + + @Test + public void testGetDatabase() throws Exception { + when(glueClient.getDatabase(any(GetDatabaseRequest.class))).thenReturn( + new GetDatabaseResult().withDatabase(getTestDatabase())); + metastoreClientDelegate.getDatabase("db"); + verify(glueClient, atLeastOnce()).getDatabase(any(GetDatabaseRequest.class)); + } + + @Test + public void testGetDatabaseWithCatalogId() throws Exception { + when(glueClient.getDatabase(any(GetDatabaseRequest.class))).thenReturn( + new GetDatabaseResult().withDatabase(getTestDatabase())); + metastoreClientDelegateCatalogId.getDatabase("db"); + ArgumentCaptor captor = ArgumentCaptor.forClass(GetDatabaseRequest.class); + verify(glueClient, atLeastOnce()).getDatabase(captor.capture()); + GetDatabaseRequest request = captor.getValue(); + assertEquals(CATALOG_ID, request.getCatalogId()); + assertEquals("db", request.getName()); + } + + @Test + public void testGetAllDatabases() throws Exception { + when(glueClient.getDatabases(any(GetDatabasesRequest.class))).thenReturn( + new GetDatabasesResult().withDatabaseList(getTestDatabase())); + metastoreClientDelegate.getDatabases("*"); + // Ensure this gets invoked + verify(glueClient, atLeastOnce()).getDatabases(any(GetDatabasesRequest.class)); + } + + @Test + public void testGetAllDatabasesPaginated() throws Exception { + when(glueClient.getDatabases(any(GetDatabasesRequest.class))) + .thenReturn(new GetDatabasesResult().withDatabaseList(testDb).withNextToken("token")) + .thenReturn(new GetDatabasesResult().withDatabaseList(getTestDatabase())); + List databases = metastoreClientDelegate.getDatabases(".*"); + + assertEquals(2, databases.size()); + verify(glueClient, times(2)).getDatabases(any(GetDatabasesRequest.class)); + } + + @Test + public void testAlterDatabase() throws Exception { + metastoreClientDelegate.alterDatabase("db", CatalogToHiveConverter.convertDatabase(testDb)); + verify(glueClient, times(1)).updateDatabase(any(UpdateDatabaseRequest.class)); + } + + @Test + public void testAlterDatabaseWithCatalogId() throws Exception { + metastoreClientDelegateCatalogId.alterDatabase("db", CatalogToHiveConverter.convertDatabase(testDb)); + ArgumentCaptor captor = ArgumentCaptor.forClass(UpdateDatabaseRequest.class); + verify(glueClient, times(1)).updateDatabase(any(UpdateDatabaseRequest.class)); + verify(glueClient).updateDatabase(captor.capture()); + assertEquals(CATALOG_ID, captor.getValue().getCatalogId()); + } + + @Test + public void testDropDatabaseDeleteData() throws Exception { + when(glueClient.getDatabase(any(GetDatabaseRequest.class))).thenReturn( + new GetDatabaseResult().withDatabase(testDb)); + when(glueClient.getTables(any(GetTablesRequest.class))).thenReturn( + new GetTablesResult().withTableList(ImmutableList.
of())); + Path dbPath = new Path(testDb.getLocationUri()); + when(wh.deleteDir(dbPath, true)).thenReturn(true); + + metastoreClientDelegate.dropDatabase(testDb.getName(), true, false, false); + verify(glueClient, times(1)).deleteDatabase(any(DeleteDatabaseRequest.class)); + verify(wh, times(1)).deleteDir(dbPath, true); + } + + @Test + public void testDropDatabaseDeleteDataWithCatalogId() throws Exception { + when(glueClient.getDatabase(any(GetDatabaseRequest.class))).thenReturn( + new GetDatabaseResult().withDatabase(testDb)); + when(glueClient.getTables(any(GetTablesRequest.class))).thenReturn( + new GetTablesResult().withTableList(ImmutableList.
of())); + Path dbPath = new Path(testDb.getLocationUri()); + when(wh.deleteDir(dbPath, true)).thenReturn(true); + + metastoreClientDelegateCatalogId.dropDatabase( + testDb.getName(), true, false, false); + ArgumentCaptor captor = ArgumentCaptor.forClass(DeleteDatabaseRequest.class); + verify(glueClient, times(1)).deleteDatabase(captor.capture()); + DeleteDatabaseRequest request = captor.getValue(); + verify(wh, times(1)).deleteDir(dbPath, true); + assertEquals(CATALOG_ID, request.getCatalogId()); + assertEquals(testDb.getName(), request.getName()); + } + + @Test + public void testDropDatabaseKeepData() throws Exception { + when(glueClient.getDatabase(any(GetDatabaseRequest.class))).thenReturn( + new GetDatabaseResult().withDatabase(testDb)); + when(glueClient.getTables(any(GetTablesRequest.class))).thenReturn( + new GetTablesResult().withTableList(ImmutableList.
of())); + Path dbPath = new Path(testDb.getLocationUri()); + when(wh.deleteDir(dbPath, true)).thenReturn(true); + + metastoreClientDelegate.dropDatabase(testDb.getName(), false, false, false); + verify(glueClient, times(1)).deleteDatabase(any(DeleteDatabaseRequest.class)); + verify(wh, never()).deleteDir(dbPath, true); + } + + // ======================= Table ====================== + + @Test(expected = InvalidObjectException.class) + public void testGetTableInvalidGlueTable() throws Exception { + Table tbl = getTestTable().withTableType(null); + when(glueClient.getTable(any(GetTableRequest.class))).thenReturn(new GetTableResult().withTable(tbl)); + metastoreClientDelegate.getTable(testDb.getName(), tbl.getName()); + } + + @Test + public void testGetTables() throws Exception { + Table tbl2 = getTestTable(); + List tableNames = ImmutableList.of(testTbl.getName(), tbl2.getName()); + List
tableList = ImmutableList.of(testTbl, tbl2); + + when(glueClient.getTables(new GetTablesRequest().withDatabaseName(testDb.getName()).withExpression("*"))) + .thenReturn(new GetTablesResult().withTableList(tableList)); + List result = metastoreClientDelegate.getTables(testDb.getName(), "*"); + + verify(glueClient).getTables(new GetTablesRequest().withDatabaseName(testDb.getName()).withExpression("*")); + assertThat(result, is(tableNames)); + } + + @Test + public void testGetTableWithCatalogId() throws Exception { + Table tbl2 = getTestTable(); + List tableNames = ImmutableList.of(testTbl.getName(), tbl2.getName()); + List
tableList = ImmutableList.of(testTbl, tbl2); + + when(glueClient.getTables(new GetTablesRequest() + .withDatabaseName(testDb.getName()) + .withExpression("*") + .withCatalogId(CATALOG_ID))) + .thenReturn(new GetTablesResult().withTableList(tableList)); + List result = metastoreClientDelegateCatalogId.getTables(testDb.getName(), "*"); + + assertThat(result, is(tableNames)); + ArgumentCaptor captor = ArgumentCaptor.forClass(GetTablesRequest.class); + verify(glueClient, times(1)).getTables(captor.capture()); + assertEquals(CATALOG_ID, captor.getValue().getCatalogId()); + assertEquals(testDb.getName(), captor.getValue().getDatabaseName()); + assertEquals("*", captor.getValue().getExpression()); + } + + @Test + public void testGetTablesWithPagination() throws Exception { + Table tbl2 = getTestTable(); + List tableNames = ImmutableList.of(testTbl.getName(), tbl2.getName()); + List
tableList1 = ImmutableList.of(testTbl); + List
tableList2 = ImmutableList.of(tbl2); + + String nextToken = "1"; + when(glueClient.getTables(any(GetTablesRequest.class))) + .thenReturn(new GetTablesResult().withTableList(tableList1).withNextToken(nextToken)) + .thenReturn(new GetTablesResult().withTableList(tableList2)); + List result = metastoreClientDelegate.getTables(testDb.getName(), "*"); + + verify(glueClient, times(2)).getTables(any(GetTablesRequest.class)); + assertThat(result, is(tableNames)); + } + + @Test + public void testGetTableMeta() throws Exception { + List
tables = Lists.newArrayList(testTbl); + List tableTypes = Lists.newArrayList(TableType.MANAGED_TABLE.name()); + + when(glueClient.getDatabases(any(GetDatabasesRequest.class))).thenReturn( + new GetDatabasesResult().withDatabaseList(testDb)); + when(glueClient.getTables(any(GetTablesRequest.class))).thenReturn( + new GetTablesResult().withTableList(tables)); + + List tableMetaResult = metastoreClientDelegate.getTableMeta( + testDb.getName(), testTbl.getName(), tableTypes); + assertEquals(CatalogToHiveConverter.convertTableMeta( + testTbl, testDb.getName()), Iterables.getOnlyElement(tableMetaResult)); + } + + @Test + public void testGetTableMetaNullEmptyTableType() throws Exception { + List
tables = Lists.newArrayList(testTbl); + List tableTypes = null; + + when(glueClient.getDatabases(any(GetDatabasesRequest.class))).thenReturn( + new GetDatabasesResult().withDatabaseList(testDb)); + when(glueClient.getTables(any(GetTablesRequest.class))).thenReturn( + new GetTablesResult().withTableList(tables)); + + List tableMetaResult = metastoreClientDelegate.getTableMeta( + testDb.getName(), testTbl.getName(), tableTypes); + assertEquals(CatalogToHiveConverter.convertTableMeta( + testTbl, testDb.getName()), Iterables.getOnlyElement(tableMetaResult)); + + tableTypes = Lists.newArrayList(); + tableMetaResult = metastoreClientDelegate.getTableMeta( + testDb.getName(), testTbl.getName(), tableTypes); + assertEquals(CatalogToHiveConverter.convertTableMeta( + testTbl, testDb.getName()), Iterables.getOnlyElement(tableMetaResult)); + } + + @Test + public void testCreateTableWithExistingDir() throws Exception { + Path tblPath = new Path(testTbl.getStorageDescriptor().getLocation()); + setupMockWarehouseForPath(tblPath, true, true); + + when(glueClient.getDatabase(new GetDatabaseRequest().withName(testDb.getName()))) + .thenReturn(new GetDatabaseResult().withDatabase(testDb)); + when(glueClient.getTable(new GetTableRequest().withDatabaseName(testTbl.getDatabaseName()) + .withName(testTbl.getName()))).thenThrow(new EntityNotFoundException("")); + when(glueClient.getDatabase(any(GetDatabaseRequest.class))) + .thenReturn(new GetDatabaseResult().withDatabase(testDb)); + + metastoreClientDelegate.createTable(CatalogToHiveConverter.convertTable(testTbl, testTbl.getDatabaseName())); + + verify(glueClient, times(1)).createTable(any(CreateTableRequest.class)); + verify(wh).isDir(tblPath); + verify(wh, never()).mkdirs(tblPath, true); + } + + @Test + public void testCreateTableWithExistingDirWithCatalogId() throws Exception { + Path tblPath = new Path(testTbl.getStorageDescriptor().getLocation()); + setupMockWarehouseForPath(tblPath, true, true); + + when(glueClient.getDatabase(new GetDatabaseRequest() + .withName(testDb.getName()) + .withCatalogId(CATALOG_ID))) + .thenReturn(new GetDatabaseResult().withDatabase(testDb)); + when(glueClient.getTable(new GetTableRequest() + .withDatabaseName(testTbl.getDatabaseName()) + .withCatalogId(CATALOG_ID) + .withName(testTbl.getName()))).thenThrow(new EntityNotFoundException("")); + when(glueClient.getDatabase(any(GetDatabaseRequest.class))) + .thenReturn(new GetDatabaseResult().withDatabase(testDb)); + + metastoreClientDelegateCatalogId.createTable( + CatalogToHiveConverter.convertTable(testTbl, testTbl.getDatabaseName())); + ArgumentCaptor captor = ArgumentCaptor.forClass(CreateTableRequest.class); + verify(glueClient, times(1)).createTable(captor.capture()); + verify(wh).isDir(tblPath); + verify(wh, never()).mkdirs(tblPath, true); + assertEquals(CATALOG_ID, captor.getValue().getCatalogId()); + } + + @Test + public void testCreateTableWithoutExistingDir() throws Exception { + Path tblPath = new Path(testTbl.getStorageDescriptor().getLocation()); + setupMockWarehouseForPath(tblPath, false, true); + + when(glueClient.getDatabase(new GetDatabaseRequest().withName(testDb.getName()))) + .thenReturn(new GetDatabaseResult().withDatabase(testDb)); + when(glueClient.getTable(new GetTableRequest().withDatabaseName(testTbl.getDatabaseName()) + .withName(testTbl.getName()))).thenThrow(new EntityNotFoundException("")); + when(glueClient.getDatabase(any(GetDatabaseRequest.class))) + .thenReturn(new GetDatabaseResult().withDatabase(testDb)); + metastoreClientDelegate.createTable(CatalogToHiveConverter.convertTable(testTbl, testTbl.getDatabaseName())); + + verify(glueClient, times(1)).createTable(any(CreateTableRequest.class)); + verify(wh).isDir(tblPath); + verify(wh).mkdirs(tblPath, true); + } + + @Test (expected = org.apache.hadoop.hive.metastore.api.AlreadyExistsException.class) + public void testCreateTableWithExistTable() throws Exception { + setupMockWarehouseForPath(new Path(testTbl.getStorageDescriptor().getLocation()), true, false); + when(glueClient.getDatabase(any(GetDatabaseRequest.class))) + .thenReturn(new GetDatabaseResult().withDatabase(testDb)); + when(glueClient.getTable(any(GetTableRequest.class))).thenReturn(new GetTableResult().withTable(testTbl)); + metastoreClientDelegate.createTable(CatalogToHiveConverter.convertTable(testTbl, testTbl.getDatabaseName())); + } + + @Test + public void testAlterTable() throws Exception { + org.apache.hadoop.hive.metastore.api.Table newHiveTable + = CatalogToHiveConverter.convertTable(getTestTable(), testDb.getName()); + newHiveTable.setTableName(testTbl.getName()); + + when(glueClient.getDatabase(any(GetDatabaseRequest.class))) + .thenReturn(new GetDatabaseResult().withDatabase(testDb)); + when(glueClient.getTable(any(GetTableRequest.class))) + .thenReturn(new GetTableResult().withTable(testTbl)); + metastoreClientDelegateCatalogId.alterTable( + testDb.getName(), testTbl.getName(), newHiveTable, null); + + ArgumentCaptor captor = ArgumentCaptor.forClass(UpdateTableRequest.class); + verify(glueClient, times(1)).updateTable(captor.capture()); + + TableInput expectedTableInput = GlueInputConverter.convertToTableInput(newHiveTable); + assertEquals(expectedTableInput, captor.getValue().getTableInput()); + } + + @Test(expected = UnsupportedOperationException.class) + public void testAlterTableRename() throws Exception { + org.apache.hadoop.hive.metastore.api.Table newHiveTable + = CatalogToHiveConverter.convertTable(getTestTable(), testDb.getName()); + metastoreClientDelegate.alterTable(testDb.getName(), testTbl.getName(), newHiveTable, null); + } + + @Test + public void testAlterTableSetExternalType() throws Exception { + org.apache.hadoop.hive.metastore.api.Table newHiveTable + = CatalogToHiveConverter.convertTable(getTestTable(), testDb.getName()); + newHiveTable.setTableType(MANAGED_TABLE.toString()); + newHiveTable.getParameters().put("EXTERNAL", "TRUE"); + + when(glueClient.getDatabase(any(GetDatabaseRequest.class))) + .thenReturn(new GetDatabaseResult().withDatabase(testDb)); + when(glueClient.getTable(any(GetTableRequest.class))) + .thenReturn(new GetTableResult().withTable(testTbl)); + metastoreClientDelegate.alterTable( + testDb.getName(), newHiveTable.getTableName(), newHiveTable, null); + + ArgumentCaptor captor = ArgumentCaptor.forClass(UpdateTableRequest.class); + verify(glueClient, times(1)).updateTable(captor.capture()); + assertEquals(EXTERNAL_TABLE.toString(), captor.getValue().getTableInput().getTableType()); + } + + @Test + public void testAlterTableSetManagedType() throws Exception { + org.apache.hadoop.hive.metastore.api.Table newHiveTable + = CatalogToHiveConverter.convertTable(getTestTable(), testDb.getName()); + newHiveTable.setTableType(EXTERNAL_TABLE.toString()); + newHiveTable.getParameters().put("EXTERNAL", "FALSE"); + + when(glueClient.getDatabase(any(GetDatabaseRequest.class))) + .thenReturn(new GetDatabaseResult().withDatabase(testDb)); + when(glueClient.getTable(any(GetTableRequest.class))) + .thenReturn(new GetTableResult().withTable(testTbl)); + metastoreClientDelegate.alterTable( + testDb.getName(), newHiveTable.getTableName(), newHiveTable, null); + + ArgumentCaptor captor = ArgumentCaptor.forClass(UpdateTableRequest.class); + verify(glueClient, times(1)).updateTable(captor.capture()); + assertEquals(MANAGED_TABLE.toString(), captor.getValue().getTableInput().getTableType()); + } + + @Test(expected = UnsupportedOperationException.class) + public void testListTableNamesByFilter() throws Exception { + metastoreClientDelegate.listTableNamesByFilter("db", "filter", (short) 1); + } + + @Test + public void testDropTableWithDeleteData() throws Exception { + Path tblPath = new Path(testTbl.getStorageDescriptor().getLocation()); + List values = Lists.newArrayList("foo"); + Partition partition = new Partition().withDatabaseName(testDb.getName()) + .withTableName(testTbl.getName()).withValues(values) + .withStorageDescriptor(ObjectTestUtils.getTestStorageDescriptor()); + + when(glueClient.getTable(new GetTableRequest() + .withDatabaseName(testTbl.getDatabaseName()).withName(testTbl.getName()))) + .thenReturn(new GetTableResult().withTable(testTbl)); + when(glueClient.deletePartition(new DeletePartitionRequest() + .withDatabaseName(testDb.getName()).withPartitionValues(values).withTableName(testTbl.getName()))) + .thenReturn(new DeletePartitionResult()); + when(glueClient.getPartitions(any(GetPartitionsRequest.class))) + .thenReturn(new GetPartitionsResult().withPartitions(partition)); + when(glueClient.getPartition(new GetPartitionRequest() + .withDatabaseName(testDb.getName()).withTableName(testTbl.getName()).withPartitionValues(values))) + .thenReturn(new GetPartitionResult().withPartition(partition)); + when(glueClient.getDatabase(any(GetDatabaseRequest.class))) + .thenReturn(new GetDatabaseResult().withDatabase(testDb)); + metastoreClientDelegate.dropTable( + testTbl.getDatabaseName(), testTbl.getName(), true, true, true); + + verify(glueClient).deleteTable(new DeleteTableRequest() + .withDatabaseName(testTbl.getDatabaseName()) + .withName(testTbl.getName())); + verify(wh).deleteDir(tblPath, true, true); + } + + @Test + public void testDropTableWithoutDeleteData() throws Exception { + Path tblPath = new Path(testTbl.getStorageDescriptor().getLocation()); + List values = Lists.newArrayList("foo"); + Partition partition = new Partition().withDatabaseName(testDb.getName()) + .withTableName(testTbl.getName()).withValues(values) + .withStorageDescriptor(ObjectTestUtils.getTestStorageDescriptor()); + + when(glueClient.getTable(new GetTableRequest() + .withDatabaseName(testTbl.getDatabaseName()).withName(testTbl.getName()))) + .thenReturn(new GetTableResult().withTable(testTbl)); + when(glueClient.deletePartition(new DeletePartitionRequest() + .withDatabaseName(testDb.getName()).withPartitionValues(values).withTableName(testTbl.getName()))) + .thenReturn(new DeletePartitionResult()); + when(glueClient.getPartitions(any(GetPartitionsRequest.class))) + .thenReturn(new GetPartitionsResult().withPartitions(partition)); + when(glueClient.getPartition(new GetPartitionRequest() + .withDatabaseName(testDb.getName()).withTableName(testTbl.getName()).withPartitionValues(values))) + .thenReturn(new GetPartitionResult().withPartition(partition)); + when(glueClient.getDatabase(any(GetDatabaseRequest.class))) + .thenReturn(new GetDatabaseResult().withDatabase(testDb)); + metastoreClientDelegate.dropTable( + testTbl.getDatabaseName(), testTbl.getName(), false, true, true); + + verify(glueClient).deleteTable(new DeleteTableRequest() + .withDatabaseName(testTbl.getDatabaseName()) + .withName(testTbl.getName())); + verify(wh, never()).deleteDir(tblPath, true, true); + } + + @Test + public void testDropExternalTableWithoutDeleteData() throws Exception { + Path tblPath = new Path(testTbl.getStorageDescriptor().getLocation()); + List values = Lists.newArrayList("foo"); + Partition partition = new Partition().withDatabaseName(testDb.getName()) + .withTableName(testTbl.getName()).withValues(values) + .withStorageDescriptor(ObjectTestUtils.getTestStorageDescriptor()); + testTbl.getParameters().put("EXTERNAL", "TRUE"); + + when(glueClient.getTable(new GetTableRequest() + .withDatabaseName(testTbl.getDatabaseName()).withName(testTbl.getName()))) + .thenReturn(new GetTableResult().withTable(testTbl)); + when(glueClient.deletePartition(new DeletePartitionRequest() + .withDatabaseName(testDb.getName()).withPartitionValues(values).withTableName(testTbl.getName()))) + .thenReturn(new DeletePartitionResult()); + when(glueClient.getPartitions(any(GetPartitionsRequest.class))) + .thenReturn(new GetPartitionsResult().withPartitions(partition)); + when(glueClient.getPartition(new GetPartitionRequest() + .withDatabaseName(testDb.getName()).withTableName(testTbl.getName()).withPartitionValues(values))) + .thenReturn(new GetPartitionResult().withPartition(partition)); + when(glueClient.getDatabase(any(GetDatabaseRequest.class))) + .thenReturn(new GetDatabaseResult().withDatabase(testDb)); + metastoreClientDelegate.dropTable( + testTbl.getDatabaseName(), testTbl.getName(), false, true, true); + + verify(glueClient).deleteTable(new DeleteTableRequest().withDatabaseName( + testTbl.getDatabaseName()).withName(testTbl.getName())); + verify(wh, never()).deleteDir(tblPath, true, true); + } + + @Test + public void testValidateTableAndCreateDirectoryVirtualView() throws Exception { + testTbl.setTableType(TableType.VIRTUAL_VIEW.toString()); + testTbl.getStorageDescriptor().setLocation(null); + org.apache.hadoop.hive.metastore.api.Table hiveTbl = CatalogToHiveConverter.convertTable( + testTbl, testTbl.getDatabaseName()); + + when(glueClient.getDatabase(any(GetDatabaseRequest.class))) + .thenReturn(new GetDatabaseResult().withDatabase(testDb)); + when(glueClient.getTable(new GetTableRequest() + .withDatabaseName(testTbl.getDatabaseName()).withName(testTbl.getName()))) + .thenThrow(EntityNotFoundException.class); + + assertFalse(metastoreClientDelegate.validateNewTableAndCreateDirectory(hiveTbl)); + assertNull(testTbl.getStorageDescriptor().getLocation()); + verify(wh, never()).mkdirs(any(Path.class), anyBoolean()); + } + + // ======================= Partition ======================= + + @Test + public void testGetPartitionByValues() throws Exception { + List values = Lists.newArrayList("foo", "bar"); + Partition partition = new Partition().withDatabaseName(testDb.getName()) + .withTableName(testTbl.getName()) + .withValues(values) + .withStorageDescriptor(ObjectTestUtils.getTestStorageDescriptor()); + GetPartitionRequest request = new GetPartitionRequest() + .withDatabaseName(testDb.getName()) + .withTableName(testTbl.getName()) + .withPartitionValues(values); + when(glueClient.getPartition(request)).thenReturn(new GetPartitionResult().withPartition(partition)); + org.apache.hadoop.hive.metastore.api.Partition result = metastoreClientDelegate.getPartition( + testDb.getName(), testTbl.getName(), values); + + verify(glueClient, times(1)).getPartition(request); + assertThat(result.getValues(), is(values)); + } + + @Test + public void testGetPartitionByValuesWithCatalogId() throws Exception { + List values = Lists.newArrayList("foo", "bar"); + Partition partition = new Partition().withDatabaseName(testDb.getName()) + .withTableName(testTbl.getName()) + .withValues(values) + .withStorageDescriptor(ObjectTestUtils.getTestStorageDescriptor()); + GetPartitionRequest request = new GetPartitionRequest() + .withDatabaseName(testDb.getName()) + .withTableName(testTbl.getName()) + .withPartitionValues(values) + .withCatalogId(CATALOG_ID); + when(glueClient.getPartition(request)).thenReturn(new GetPartitionResult().withPartition(partition)); + org.apache.hadoop.hive.metastore.api.Partition result = metastoreClientDelegateCatalogId.getPartition( + testDb.getName(), testTbl.getName(), values); + + ArgumentCaptor captor = ArgumentCaptor.forClass(GetPartitionRequest.class); + verify(glueClient, times(1)).getPartition(captor.capture()); + assertThat(result.getValues(), is(values)); + assertEquals(CATALOG_ID, captor.getValue().getCatalogId()); + } + + @Test + public void testGetPartitionByName() throws Exception { + String partitionName = "/a=foo/b=bar"; + List values = ImmutableList.of("foo", "bar"); + Partition partition = new Partition().withDatabaseName(testDb.getName()) + .withTableName(testTbl.getName()) + .withValues(values) + .withStorageDescriptor(ObjectTestUtils.getTestStorageDescriptor()); + when(glueClient.getPartition(any(GetPartitionRequest.class))) + .thenReturn(new GetPartitionResult().withPartition(partition)); + + org.apache.hadoop.hive.metastore.api.Partition result + = metastoreClientDelegate.getPartition(testDb.getName(), testTbl.getName(), partitionName); + + verify(glueClient).getPartition(any(GetPartitionRequest.class)); + assertThat(result.getValues(), is(values)); + } + + @Test(expected = NoSuchObjectException.class) + public void testGetPartitionEntityNotFound() throws Exception { + when(glueClient.getPartition(any(GetPartitionRequest.class))) + .thenThrow(new EntityNotFoundException("Test exception: partition not found")); + metastoreClientDelegate.getPartition(testDb.getName(), testTbl.getName(), "testPart"); + verify(glueClient, times(1)).getPartition(any(GetPartitionRequest.class)); + } + + @Test + public void testGetPartitionsByNames() throws Exception { + String partitionName = "/a=foo/b=bar"; + List values = ImmutableList.of("foo", "bar"); + Partition partition = new Partition().withDatabaseName(testDb.getName()) + .withTableName(testTbl.getName()) + .withValues(values) + .withStorageDescriptor(ObjectTestUtils.getTestStorageDescriptor()); + when(glueClient.batchGetPartition(any(BatchGetPartitionRequest.class))) + .thenReturn(new BatchGetPartitionResult().withPartitions(partition)); + + List result + = metastoreClientDelegate.getPartitionsByNames( + testDb.getName(), testTbl.getName(), ImmutableList.of(partitionName)); + + verify(glueClient, times(1)).batchGetPartition(any(BatchGetPartitionRequest.class)); + assertNotNull(result); + assertThat(Iterables.getOnlyElement(result).getValues(), is(values)); + } + + @Test + public void testGetPartitionsByNamesWithCatalogId() throws Exception { + String partitionName = "/a=foo/b=bar"; + List values = ImmutableList.of("foo", "bar"); + Partition partition = new Partition().withDatabaseName(testDb.getName()) + .withTableName(testTbl.getName()) + .withValues(values) + .withStorageDescriptor(ObjectTestUtils.getTestStorageDescriptor()); + when(glueClient.batchGetPartition(any(BatchGetPartitionRequest.class))) + .thenReturn(new BatchGetPartitionResult().withPartitions(partition)); + + List result + = metastoreClientDelegateCatalogId.getPartitionsByNames( + testDb.getName(), testTbl.getName(), ImmutableList.of(partitionName)); + + ArgumentCaptor captor = ArgumentCaptor.forClass(BatchGetPartitionRequest.class); + verify(glueClient, times(1)).batchGetPartition(captor.capture()); + assertNotNull(result); + assertEquals(CATALOG_ID, captor.getValue().getCatalogId()); + } + + @Test + public void testGetPartitionsByNamePropagateException() throws Exception { + String exceptionMessage = "Partition not found"; + when(glueClient.batchGetPartition(any(BatchGetPartitionRequest.class))) + .thenThrow(new EntityNotFoundException(exceptionMessage)); + + try { + metastoreClientDelegate.getPartitionsByNames( + testDb.getName(), testTbl.getName(), ImmutableList.of("/a=foo/b=bar")); + } catch (Exception e) { + assertThat(e, instanceOf(NoSuchObjectException.class)); + assertThat(e.getMessage(), containsString(exceptionMessage)); + } + verify(glueClient, times(1)).batchGetPartition(any(BatchGetPartitionRequest.class)); + } + + @Test + public void testGetPartitionsByNameTwoPages() throws Exception { + int numPartNames = BATCH_GET_PARTITIONS_MAX_REQUEST_SIZE + 10; + List partNames = getTestPartitionNames(numPartNames); + + when(glueClient.batchGetPartition(any(BatchGetPartitionRequest.class))) + .thenReturn(new BatchGetPartitionResult().withPartitions(ImmutableList.of())); + + metastoreClientDelegate.getPartitionsByNames(testDb.getName(), testTbl.getName(), partNames); + verify(glueClient, times(2)).batchGetPartition(any(BatchGetPartitionRequest.class)); + } + + private static List getTestPartitionNames(int numPartitions) { + List partNames = Lists.newArrayList(); + for (int i = 1; i < numPartitions; i++) { + partNames.add(String.format("a=%d", i)); + } + return partNames; + } + + @Test + public void testGetPartitions() throws Exception { + List expectedValues = Lists.newArrayList("foo", "bar"); + Partition partition = new Partition().withDatabaseName(testDb.getName()) + .withTableName(testTbl.getName()) + .withValues(expectedValues); + when(glueClient.getPartitions(any(GetPartitionsRequest.class))) + .thenReturn(new GetPartitionsResult().withPartitions(Lists.newArrayList(partition))); + + List res = metastoreClientDelegate.getPartitions( + testDb.getName(), testTbl.getName(), null, 10); + + verify(glueClient, times(1)).getPartitions(any(GetPartitionsRequest.class)); + assertFalse(CollectionUtils.isEmpty(res)); + List values = Iterables.getOnlyElement(res).getValues(); + assertThat(values, is(expectedValues)); + } + + @Test + public void testGetPartitionsParallel() throws Exception { + final int numSegments = 2; + HiveConf hiveConf = new HiveConf(this.conf); + hiveConf.setInt(GlueMetastoreClientDelegate.NUM_PARTITION_SEGMENTS_CONF, numSegments); + GlueMetastoreClientDelegate delegate = new GlueMetastoreClientDelegate( + hiveConf, new DefaultAWSGlueMetastore(hiveConf, glueClient), wh, lockManager); + + final Set> expectedValues = Sets.newHashSet(); + final List partitions = Lists.newArrayList(); + final int numPartitions = DefaultAWSGlueMetastore.GET_PARTITIONS_MAX_SIZE + 10; + final int maxPartitionsToRequest = numPartitions - 1; + + for (int i = 1; i <= numPartitions; i++) { + List partitionKeys = Arrays.asList("keyA:" + i, "keyB:" + i); + if (i <= maxPartitionsToRequest) { + expectedValues.add(partitionKeys); + } + Partition partition = new Partition().withDatabaseName(testDb.getName()) + .withTableName(testTbl.getName()) + .withValues(partitionKeys); + partitions.add(partition); + } + + when(glueClient.getPartitions(any(GetPartitionsRequest.class))) + .thenAnswer(new Answer() { + @Override + public GetPartitionsResult answer(InvocationOnMock invocation) { + GetPartitionsRequest request = invocation.getArgumentAt(0, GetPartitionsRequest.class); + GetPartitionsResult result; + if (request.getSegment() == null) { + fail("Should pass in segment"); + } + switch (request.getSegment().getSegmentNumber()) { + case 0: + result = new GetPartitionsResult().withPartitions(partitions.subList(0, numPartitions / 2)); + break; + case 1: + result = new GetPartitionsResult().withPartitions( + partitions.subList(numPartitions / 2, partitions.size())); + break; + default: + result = new GetPartitionsResult().withPartitions(Collections.emptyList()); + fail("Got segmentNumber >= " + numSegments); + } + return result; + } + }); + + List res = delegate.getPartitions( + testDb.getName(), testTbl.getName(), null, maxPartitionsToRequest); + + verify(glueClient, times(numSegments)) + .getPartitions(any(GetPartitionsRequest.class)); + assertFalse(CollectionUtils.isEmpty(res)); + Iterable> values = Iterables.transform(res, + new Function>() { + public List apply(org.apache.hadoop.hive.metastore.api.Partition partition) { + return partition.getValues(); + } + }); + assertThat(Sets.newHashSet(values), is(expectedValues)); + } + + @Test(expected = MetaException.class) + public void testGetPartitionsPartialFailure() throws Exception { + List partitionKeys1 = Arrays.asList("foo1", "bar1"); + final Partition partition1 = new Partition().withDatabaseName(testDb.getName()) + .withTableName(testTbl.getName()) + .withValues(partitionKeys1); + + when(glueClient.getPartitions(any(GetPartitionsRequest.class))) + .thenAnswer(new Answer() { + @Override + public GetPartitionsResult answer(InvocationOnMock invocation) { + GetPartitionsRequest request = invocation.getArgumentAt(0, GetPartitionsRequest.class); + GetPartitionsResult result; + switch (request.getSegment().getSegmentNumber()) { + case 0: + result = new GetPartitionsResult().withPartitions(Lists.newArrayList(partition1)); + break; + default: + throw new OperationTimeoutException("timeout"); + } + return result; + } + }); + + List res = metastoreClientDelegate.getPartitions( + testDb.getName(), testTbl.getName(), null, -1); + } + + @Test(expected = IllegalArgumentException.class) + public void testTooHighGluePartitionSegments() throws MetaException { + HiveConf hiveConf = new HiveConf(this.conf); + hiveConf.setInt(GlueMetastoreClientDelegate.NUM_PARTITION_SEGMENTS_CONF, + DefaultAWSGlueMetastore.MAX_NUM_PARTITION_SEGMENTS + 1); + GlueMetastoreClientDelegate delegate = new GlueMetastoreClientDelegate( + hiveConf, new DefaultAWSGlueMetastore(hiveConf, glueClient), wh, lockManager); + } + + @Test + public void testDropPartitionUsingValues() throws Exception { + List values = Lists.newArrayList("foo", "bar"); + Partition partition = new Partition().withDatabaseName(testDb.getName()) + .withTableName(testTbl.getName()) + .withValues(values) + .withStorageDescriptor(ObjectTestUtils.getTestStorageDescriptor()); + DeletePartitionRequest request = new DeletePartitionRequest() + .withDatabaseName(testDb.getName()) + .withTableName(testTbl.getName()) + .withPartitionValues(values); + + when(glueClient.deletePartition(request)).thenReturn(new DeletePartitionResult()); + when(glueClient.getPartition(any(GetPartitionRequest.class))) + .thenReturn(new GetPartitionResult().withPartition(partition)); + when(glueClient.getTable(any(GetTableRequest.class))).thenReturn(new GetTableResult().withTable(testTbl)); + + metastoreClientDelegate.dropPartition( + testDb.getName(), testTbl.getName(), values, false, false, false); + verify(glueClient, times(1)).deletePartition(request); + } + + @Test + public void testDropPartitionUsingValuesWithCatalogId() throws Exception { + List values = Lists.newArrayList("foo", "bar"); + Partition partition = new Partition().withDatabaseName(testDb.getName()) + .withTableName(testTbl.getName()) + .withValues(values) + .withStorageDescriptor(ObjectTestUtils.getTestStorageDescriptor()); + DeletePartitionRequest request = new DeletePartitionRequest() + .withDatabaseName(testDb.getName()) + .withTableName(testTbl.getName()) + .withPartitionValues(values); + + when(glueClient.deletePartition(request)).thenReturn(new DeletePartitionResult()); + when(glueClient.getPartition(any(GetPartitionRequest.class))) + .thenReturn(new GetPartitionResult().withPartition(partition)); + when(glueClient.getTable(any(GetTableRequest.class))).thenReturn(new GetTableResult().withTable(testTbl)); + + metastoreClientDelegateCatalogId.dropPartition( + testDb.getName(), testTbl.getName(), values, false, false, false); + ArgumentCaptor captor = ArgumentCaptor.forClass(DeletePartitionRequest.class); + verify(glueClient, times(1)).deletePartition(captor.capture()); + assertEquals(CATALOG_ID, captor.getValue().getCatalogId()); + } + + @Test + public void testAppendPartition() throws Exception { + List values = ImmutableList.of("foo"); + when(glueClient.getTable(any(GetTableRequest.class))).thenReturn(new GetTableResult().withTable(testTbl)); + Path partLocation = new Path(testTbl.getStorageDescriptor().getLocation(), + Warehouse.makePartName(CatalogToHiveConverter.convertFieldSchemaList(testTbl.getPartitionKeys()), values)); + setupMockWarehouseForPath(partLocation, false, true); + mockBatchCreatePartitionsSucceed(); + + org.apache.hadoop.hive.metastore.api.Partition res = + metastoreClientDelegate.appendPartition(testDb.getName(), testTbl.getName(), values); + + verify(wh, times(1)).mkdirs(partLocation, true); + assertThat(res.getValues(), is(values)); + } + + @Test + public void testAddPartitionsEmpty() throws Exception { + List partitions = Lists.newArrayList(); + List partitionsCreated = + metastoreClientDelegate.addPartitions(partitions, false, true); + + verify(glueClient, never()).getTable(any(GetTableRequest.class)); + verify(glueClient, never()).batchCreatePartition(any(BatchCreatePartitionRequest.class)); + assertTrue(CollectionUtils.isEmpty(partitionsCreated)); + assertDaemonThreadPools(); + } + + @Test + public void testAddPartitions() throws Exception { + mockBatchCreatePartitionsSucceed(); + setupMockWarehouseForPath(new Path(testTbl.getStorageDescriptor() + .getLocation().toString()), false, true); + when(glueClient.getTable(any(GetTableRequest.class))) + .thenReturn(new GetTableResult().withTable(testTbl)); + + int numPartitions = 2; + List partitions = getTestPartitions(numPartitions); + List partitionsCreated = + metastoreClientDelegate.addPartitions(partitions, false, true); + + verify(glueClient, times(1)).getTable(any(GetTableRequest.class)); + verify(glueClient, times(1)).batchCreatePartition(any(BatchCreatePartitionRequest.class)); + verify(wh, times(numPartitions)).mkdirs(any(Path.class), eq(true)); + verify(wh, never()).deleteDir(any(Path.class), eq(true)); + assertEquals(numPartitions, partitionsCreated.size()); + assertEquals(new HashSet(partitionsCreated), + new HashSet(partitions)); + assertDaemonThreadPools(); + } + + @Test + public void testAddPartitionsEmptyPartitionLocation() throws Exception { + // Case: table contains location & partition location is empty. + // Test that created partitions contains location + int numPartitions = 2; + List partitionsCreated = + addPartitionsWithEmptyLocationsValid(numPartitions); + verify(wh, times(numPartitions)).mkdirs(any(Path.class), eq(true)); + for (org.apache.hadoop.hive.metastore.api.Partition part : partitionsCreated) { + assertThat(part.getSd().getLocation(), notNullValue()); + } + assertDaemonThreadPools(); + } + + @Test + public void testAddPartitionsEmptyTableAndPartitionLocation() throws Exception { + // Case: table location is empty (VIRTUAL_VIEW) & partition location is empty. + // Test that created partitions does not contain location as these are Views. + testTbl.getStorageDescriptor().setLocation(null); + int numPartitions = 1; + List partitionsCreated = + addPartitionsWithEmptyLocationsValid(numPartitions); + verify(wh, never()).mkdirs(any(Path.class), anyBoolean()); + assertThat(partitionsCreated.get(0).getSd().getLocation(), nullValue()); + assertDaemonThreadPools(); + } + + private List addPartitionsWithEmptyLocationsValid( + int numPartitions) throws Exception { + List partitions = getTestPartitions(numPartitions); + for (org.apache.hadoop.hive.metastore.api.Partition partition : partitions) { + partition.getSd().setLocation(null); + } + mockBatchCreatePartitionsSucceed(); + when(glueClient.getTable(any(GetTableRequest.class))) + .thenReturn(new GetTableResult().withTable(testTbl)); + when(wh.mkdirs(any(Path.class), anyBoolean())).thenReturn(true); + + List partitionsCreated = + metastoreClientDelegate.addPartitions(partitions, false, true); + verify(glueClient, times(1)).getTable(any(GetTableRequest.class)); + verify(glueClient, times(1)).batchCreatePartition(any(BatchCreatePartitionRequest.class)); + verify(wh, never()).deleteDir(any(Path.class), anyBoolean()); + assertEquals(numPartitions, partitionsCreated.size()); + assertEquals(new HashSet(partitionsCreated), + new HashSet(partitions)); + return partitionsCreated; + } + + @Test(expected = MetaException.class) + public void testAddPartitions_PartitionViewWithLocation() throws Exception { + // Case: table location is empty (VIRTUAL_VIEW) with partition containing location + // In Hive, this throws MetaException because it doesn't allow parititon views to have location + Table table = testTbl; + table.getStorageDescriptor().setLocation(null); + + int numPartitions = 2; + List partitions = getTestPartitions(numPartitions); + + mockBatchCreatePartitionsSucceed(); + when(glueClient.getTable(any(GetTableRequest.class))) + .thenReturn(new GetTableResult().withTable(table)); + when(wh.mkdirs(any(Path.class), anyBoolean())).thenReturn(true); + + metastoreClientDelegate.addPartitions(partitions, false, true); + + assertDaemonThreadPools(); + } + + @Test + public void testAddPartitionsDoNotNeedResult() throws Exception { + mockBatchCreatePartitionsSucceed(); + when(glueClient.getTable(any(GetTableRequest.class))) + .thenReturn(new GetTableResult().withTable(testTbl)); + + int numPartitions = 2; + List partitions = getTestPartitions(numPartitions); + List partitionsCreated = + metastoreClientDelegate.addPartitions(partitions, false, false); + + verify(glueClient, times(1)).getTable(any(GetTableRequest.class)); + verify(glueClient, times(1)).batchCreatePartition(any(BatchCreatePartitionRequest.class)); + verify(wh, times(numPartitions)).mkdirs(any(Path.class), eq(true)); + verify(wh, never()).deleteDir(any(Path.class), eq(true)); + assertThat(partitionsCreated, is(nullValue())); + assertDaemonThreadPools(); + } + + @Test + public void testAddPartitionsTwoPages() throws Exception { + mockBatchCreatePartitionsSucceed(); + when(glueClient.getTable(any(GetTableRequest.class))) + .thenReturn(new GetTableResult().withTable(testTbl)); + + int numPartitions = (int) (BATCH_CREATE_PARTITIONS_MAX_REQUEST_SIZE * 1.2); + int expectedBatches = 2; + List partitions = getTestPartitions(numPartitions); + List partitionsCreated = + metastoreClientDelegate.addPartitions(partitions, false, true); + + verify(glueClient, times(1)).getTable(any(GetTableRequest.class)); + verify(glueClient, times(expectedBatches)).batchCreatePartition(any(BatchCreatePartitionRequest.class)); + verify(wh, times(numPartitions)).mkdirs(any(Path.class), eq(true)); + verify(wh, never()).deleteDir(any(Path.class), eq(true)); + assertEquals(numPartitions, partitionsCreated.size()); + assertEquals(new HashSet(partitionsCreated), + new HashSet(partitions)); + assertDaemonThreadPools(); + } + + @Test + public void testAddPartitionsTwoPagesWithCatalogId() throws Exception { + mockBatchCreatePartitionsSucceed(); + when(glueClient.getTable(any(GetTableRequest.class))) + .thenReturn(new GetTableResult().withTable(testTbl)); + + int numPartitions = (int) (BATCH_CREATE_PARTITIONS_MAX_REQUEST_SIZE * 1.2); + int expectedBatches = 2; + List partitions = getTestPartitions(numPartitions); + List partitionsCreated = + metastoreClientDelegateCatalogId.addPartitions(partitions, false, true); + ArgumentCaptor captor = ArgumentCaptor.forClass(BatchCreatePartitionRequest.class); + verify(glueClient, times(1)).getTable(any(GetTableRequest.class)); + verify(glueClient, times(expectedBatches)).batchCreatePartition(captor.capture()); + assertEquals(CATALOG_ID, captor.getValue().getCatalogId()); + verify(wh, times(numPartitions)).mkdirs(any(Path.class), eq(true)); + verify(wh, never()).deleteDir(any(Path.class), eq(true)); + assertEquals(numPartitions, partitionsCreated.size()); + assertEquals(new HashSet(partitionsCreated), + new HashSet(partitions)); + assertDaemonThreadPools(); + } + + @Test + public void testAddPartitionsFailedServiceException() throws Exception { + int numPartitions = 2; + List partitions = getTestPartitions(numPartitions); + List values = partitions.get(0).getValues(); + when(glueClient.batchCreatePartition(any(BatchCreatePartitionRequest.class))) + .thenReturn(new BatchCreatePartitionResult().withErrors(ObjectTestUtils.getPartitionError(values, + new InternalServiceException("exception")))); + when(glueClient.getTable(any(GetTableRequest.class))) + .thenReturn(new GetTableResult().withTable(testTbl)); + + try { + metastoreClientDelegate.addPartitions(partitions, false, true); + fail("should throw"); + } catch (Exception e) { + assertThat(e, is(instanceOf(MetaException.class))); + verify(glueClient, times(1)).getTable(any(GetTableRequest.class)); + verify(glueClient, times(1)) + .batchCreatePartition(any(BatchCreatePartitionRequest.class)); + verify(wh, times(numPartitions)).mkdirs(any(Path.class), eq(true)); + verify(wh, times(1)).deleteDir(any(Path.class), eq(true)); + assertDaemonThreadPools(); + } + } + + @Test + public void testAddPartitionsFailedAlreadyExistsException() throws Exception { + int numPartitions = 2; + List partitions = getTestPartitions(numPartitions); + List values = ImmutableList.of("foo1"); + + when(glueClient.batchCreatePartition(any(BatchCreatePartitionRequest.class))) + .thenReturn(new BatchCreatePartitionResult().withErrors(ObjectTestUtils.getPartitionError(values, + new com.amazonaws.services.glue.model.AlreadyExistsException("exception")))); + when(glueClient.getTable(any(GetTableRequest.class))) + .thenReturn(new GetTableResult().withTable(testTbl)); + + try { + metastoreClientDelegate.addPartitions(partitions, false, true); + fail("Should throw"); + } catch (Exception e) { + assertThat(e, is(instanceOf(org.apache.hadoop.hive.metastore.api.AlreadyExistsException.class))); + verify(glueClient, times(1)).getTable(any(GetTableRequest.class)); + verify(glueClient, times(1)) + .batchCreatePartition(any(BatchCreatePartitionRequest.class)); + verify(wh, times(numPartitions)).mkdirs(any(Path.class), eq(true)); + verify(wh, times(1)).deleteDir(any(Path.class), eq(true)); + assertDaemonThreadPools(); + } + } + + @Test + public void testAddPartitionsThrowsEntityNotFoundException() throws Exception { + when(glueClient.batchCreatePartition(any(BatchCreatePartitionRequest.class))) + .thenThrow(new EntityNotFoundException("exception")); + when(glueClient.getTable(any(GetTableRequest.class))) + .thenReturn(new GetTableResult().withTable(testTbl)); + + int numPartitions = 2; + List partitions = getTestPartitions(numPartitions); + + try { + metastoreClientDelegate.addPartitions(partitions, false, true); + fail("Should throw"); + } catch (Exception e) { + assertThat(e, is(instanceOf(NoSuchObjectException.class))); + verify(glueClient, times(1)).getTable(any(GetTableRequest.class)); + verify(glueClient, times(1)) + .batchCreatePartition(any(BatchCreatePartitionRequest.class)); + verify(wh, times(numPartitions)).mkdirs(any(Path.class), eq(true)); + verify(wh, times(numPartitions)).deleteDir(any(Path.class), eq(true)); + assertDaemonThreadPools(); + } + } + + @Test + public void testAddPartitionsThrowsExceptionSecondPage() throws Exception { + int numPartitions = 200; + int secondPageSize = numPartitions - BATCH_CREATE_PARTITIONS_MAX_REQUEST_SIZE; + when(glueClient.batchCreatePartition(any(BatchCreatePartitionRequest.class))) + .thenReturn(new BatchCreatePartitionResult()) + .thenThrow(new InvalidInputException("exception")); + when(glueClient.getTable(any(GetTableRequest.class))) + .thenReturn(new GetTableResult().withTable(testTbl)); + + List partitions = getTestPartitions(numPartitions); + + try { + metastoreClientDelegate.addPartitions(partitions, false, true); + fail("Should throw"); + } catch (Exception e) { + assertThat(e, is(instanceOf(InvalidObjectException.class))); + verify(glueClient, times(1)).getTable(any(GetTableRequest.class)); + verify(glueClient, times(2)) + .batchCreatePartition(any(BatchCreatePartitionRequest.class)); + verify(wh, times(numPartitions)).mkdirs(any(Path.class), eq(true)); + verify(wh, times(secondPageSize)).deleteDir(any(Path.class), eq(true)); + assertDaemonThreadPools(); + } + } + + @Test + public void testAddPartitionsIfNotExists() throws Exception { + List values = ImmutableList.of("foo1"); + when(glueClient.batchCreatePartition(any(BatchCreatePartitionRequest.class))) + .thenReturn(new BatchCreatePartitionResult().withErrors(ObjectTestUtils.getPartitionError(values, + new com.amazonaws.services.glue.model.AlreadyExistsException("exception")))); + when(glueClient.getTable(any(GetTableRequest.class))) + .thenReturn(new GetTableResult().withTable(testTbl)); + + int numPartitions = 2; + List partitions = getTestPartitions(numPartitions); + List partitionsCreated = + metastoreClientDelegate.addPartitions(partitions, true, true); + + verify(glueClient, times(1)).getTable(any(GetTableRequest.class)); + verify(glueClient, times(1)).batchCreatePartition(any(BatchCreatePartitionRequest.class)); + verify(wh, times(numPartitions)).mkdirs(any(Path.class), eq(true)); + verify(wh, never()).deleteDir(any(Path.class), eq(true)); + assertEquals(1, partitionsCreated.size()); + assertTrue(partitions.contains(partitionsCreated.get(0))); + assertDaemonThreadPools(); + } + + @Test + public void testAddPartitionsKeysAndValuesNotMatch() throws Exception { + int numPartitions = 2; + List partitions = getTestPartitions(numPartitions); + // make the partition value size inconsistent with key size + partitions.get(1).setValues(Lists.newArrayList("foo1", "bar1")); + + when(glueClient.getTable(any(GetTableRequest.class))).thenReturn(new GetTableResult().withTable(testTbl)); + + try { + metastoreClientDelegate.addPartitions(partitions, true, true); + fail("should throw"); + } catch (IllegalArgumentException e) { + verify(wh, never()).getDnsPath(any(Path.class)); + assertDaemonThreadPools(); + } + } + + @Test + public void testAddPartitionsDeleteAddedPathsWhenAddPathFail() throws Exception { + int numPartitions = 2; + List partitions = getTestPartitions(numPartitions); + + when(glueClient.getTable(any(GetTableRequest.class))) + .thenReturn(new GetTableResult().withTable(testTbl)); + when(wh.isDir(any(Path.class))).thenReturn(false); + when(wh.mkdirs(any(Path.class), eq(true))).thenReturn(true).thenReturn(false); // succeed first, then fail + + try { + metastoreClientDelegate.addPartitions(partitions, true, true); + fail("should throw"); + } catch (MetaException e) { + verify(wh, times(numPartitions)).getDnsPath(any(Path.class)); + verify(wh, times(numPartitions)).isDir(any(Path.class)); + verify(wh, times(numPartitions)).mkdirs(any(Path.class), eq(true)); + verify(wh, times(1)).deleteDir(any(Path.class), eq(true)); + assertDaemonThreadPools(); + } + } + + @Test + public void testAddPartitionsCallGetPartitionForInternalServiceException() throws Exception { + int numPartitions = 3; + String dbName = testDb.getName(); + String tableName = testTbl.getName(); + List values1 = Lists.newArrayList("val1"); + List values2 = Lists.newArrayList("val2"); + List values3 = Lists.newArrayList("val3"); + Partition partition1 = ObjectTestUtils.getTestPartition(dbName, tableName, values1); + Partition partition2 = ObjectTestUtils.getTestPartition(dbName, tableName, values2); + Partition partition3 = ObjectTestUtils.getTestPartition(dbName, tableName, values3); + List partitions = Lists.newArrayList(partition1, partition2, partition3); + + when(glueClient.batchCreatePartition(any(BatchCreatePartitionRequest.class))) + .thenThrow(new InternalServiceException("InternalServiceException")); + when(glueClient.getTable(any(GetTableRequest.class))) + .thenReturn(new GetTableResult().withTable(testTbl)); + when(glueClient.getPartition(new GetPartitionRequest() + .withDatabaseName(dbName) + .withTableName(tableName) + .withPartitionValues(partition1.getValues()))) + .thenReturn(new GetPartitionResult().withPartition(partition1)); + when(glueClient.getPartition(new GetPartitionRequest() + .withDatabaseName(dbName) + .withTableName(tableName) + .withPartitionValues(partition2.getValues()))) + .thenThrow(new EntityNotFoundException("EntityNotFoundException")); + when(glueClient.getPartition(new GetPartitionRequest() + .withDatabaseName(dbName) + .withTableName(tableName) + .withPartitionValues(partition3.getValues()))) + .thenThrow(new NullPointerException("NullPointerException")); + + try { + metastoreClientDelegate.addPartitions(CatalogToHiveConverter + .convertPartitions(partitions), false, true); + fail("Should throw"); + } catch (Exception e) { + assertThat(e, is(instanceOf(MetaException.class))); + verify(glueClient, times(1)).getTable(any(GetTableRequest.class)); + verify(glueClient, times(1)) + .batchCreatePartition(any(BatchCreatePartitionRequest.class)); + verify(glueClient, times(numPartitions)).getPartition(any(GetPartitionRequest.class)); + verify(wh, times(numPartitions)).mkdirs(any(Path.class), eq(true)); + verify(wh, times(2)).deleteDir(any(Path.class), eq(true)); + assertDaemonThreadPools(); + } + } + + private void mockBatchCreatePartitionsSucceed() { + when(glueClient.batchCreatePartition(any(BatchCreatePartitionRequest.class))) + .thenReturn(new BatchCreatePartitionResult()); + } + + private List getTestPartitions(int count) { + List partitions = Lists.newArrayList(); + for (int i = 0; i < count; i++) { + List values = ImmutableList.of("foo" + i); + Partition partition = ObjectTestUtils.getTestPartition(testDb.getName(), testTbl.getName(), values); + partitions.add(CatalogToHiveConverter.convertPartition(partition)); + } + return partitions; + } + + @Test + public void testAlterPartitions() throws Exception { + List values = ImmutableList.of("foo", "bar"); + Partition partition = getTestPartition(testTbl.getDatabaseName(), testTbl.getName(), values); + org.apache.hadoop.hive.metastore.api.Partition hivePartition = CatalogToHiveConverter.convertPartition(partition); + PartitionInput input = GlueInputConverter.convertToPartitionInput(partition); + UpdatePartitionRequest request = new UpdatePartitionRequest() + .withDatabaseName(testTbl.getDatabaseName()) + .withTableName(testTbl.getName()) + .withPartitionInput(input) + .withPartitionValueList(partition.getValues()); + + when(glueClient.updatePartition(request)).thenReturn(new UpdatePartitionResult()); + metastoreClientDelegate.alterPartitions(testDb.getName(), testTbl.getName(), ImmutableList.of(hivePartition)); + + verify(glueClient, times(1)).updatePartition(any(UpdatePartitionRequest.class)); + } + + @Test + public void testAlterParititonDDLTimeUpdated() throws Exception { + List values = ImmutableList.of("foo", "bar"); + org.apache.hadoop.hive.metastore.api.Partition partition + = CatalogToHiveConverter.convertPartition( + getTestPartition(testTbl.getDatabaseName(), testTbl.getName(), values)); + metastoreClientDelegate.alterPartitions( + testTbl.getDatabaseName(), testTbl.getName(), Lists.newArrayList(partition)); + + ArgumentCaptor captor = ArgumentCaptor.forClass(UpdatePartitionRequest.class); + verify(glueClient, times(1)).updatePartition(captor.capture()); + assertTrue(captor.getValue().getPartitionInput().getParameters().containsKey(hive_metastoreConstants.DDL_TIME)); + } + + // =================== Roles & Privilege =================== + + @Test(expected = UnsupportedOperationException.class) + public void testGrantPublicRole() throws Exception { + metastoreClientDelegate.grantRole("public", "user", + PrincipalType.USER, "grantor", + PrincipalType.ROLE, true); + } + + @Test(expected = UnsupportedOperationException.class) + public void testRevokeRole() throws Exception { + metastoreClientDelegate.revokeRole("role", "user", + PrincipalType.USER, true); + } + + @Test(expected = UnsupportedOperationException.class) + public void testCreateRole() throws Exception { + metastoreClientDelegate.createRole(new org.apache.hadoop.hive.metastore.api.Role( + "role", (int) (new Date().getTime() / 1000), "owner")); + } + + @Test(expected = UnsupportedOperationException.class) + public void testCreatePublicRole() throws Exception { + metastoreClientDelegate.createRole(new org.apache.hadoop.hive.metastore.api.Role( + "public", (int) (new Date().getTime() / 1000), "owner")); + } + + @Test(expected = UnsupportedOperationException.class) + public void testDropRole() throws Exception { + metastoreClientDelegate.dropRole("role"); + } + + @Test(expected = UnsupportedOperationException.class) + public void testDropPublicRole() throws Exception { + metastoreClientDelegate.dropRole("public"); + } + + @Test(expected = UnsupportedOperationException.class) + public void testDropAdminRole() throws Exception { + metastoreClientDelegate.dropRole("admin"); + } + + @Test(expected = UnsupportedOperationException.class) + public void testListRolesWithRolePrincipalType() throws Exception { + metastoreClientDelegate.listRoles("user", PrincipalType.ROLE); + } + + @Test(expected = UnsupportedOperationException.class) + public void testGetPrincipalsInRole() throws Exception { + metastoreClientDelegate.getPrincipalsInRole( + new org.apache.hadoop.hive.metastore.api.GetPrincipalsInRoleRequest("role")); + } + + @Test(expected = UnsupportedOperationException.class) + public void testRoleGrantsForPrincipal() throws Exception { + metastoreClientDelegate.getRoleGrantsForPrincipal( + new org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalRequest("user", + PrincipalType.USER)); + } + + @Test(expected = UnsupportedOperationException.class) + public void testGrantRole() throws Exception { + metastoreClientDelegate.grantRole("role", "user", + PrincipalType.USER, "grantor", + PrincipalType.ROLE, true); + } + + @Test(expected = UnsupportedOperationException.class) + public void testGrantPrivileges() throws Exception { + metastoreClientDelegate.grantPrivileges(ObjectTestUtils.getPrivilegeBag()); + } + + @Test(expected = UnsupportedOperationException.class) + public void testRevokePrivileges() throws Exception { + metastoreClientDelegate.revokePrivileges(ObjectTestUtils.getPrivilegeBag(), false); + } + + @Test(expected = UnsupportedOperationException.class) + public void testListPrivileges() throws Exception { + String principal = "user1"; + PrincipalType principalType = + PrincipalType.USER; + + metastoreClientDelegate.listPrivileges(principal, principalType, ObjectTestUtils.getHiveObjectRef()); + } + + @Test + public void testGetPrincipalPrivilegeSet() throws Exception { + String user = "user1"; + List groupList = ImmutableList.of(); + org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet privilegeSet = metastoreClientDelegate + .getPrivilegeSet(ObjectTestUtils.getHiveObjectRef(), user, groupList); + + assertThat(privilegeSet, is(nullValue())); + } + + @Test(expected = UnsupportedOperationException.class) + public void testGrantPrivilegesThrowingMetaException() throws Exception { + metastoreClientDelegate.grantPrivileges(ObjectTestUtils.getPrivilegeBag()); + } + + // ====================== Statistics ====================== + + @Test(expected = UnsupportedOperationException.class) + public void testDeletePartitionColumnStatisticsValid() throws Exception { + String databaseName = "database-name"; + String tableName = "table-name"; + String partitionName = "A=a/B=b"; + String columnName = "column-name"; + + metastoreClientDelegate.deletePartitionColumnStatistics(databaseName, tableName, partitionName, columnName); + } + + @Test(expected = UnsupportedOperationException.class) + public void testDeleteTableColumnStatistics() throws Exception { + String databaseName = "database-name"; + String tableName = "table-name"; + String columnName = "column-name"; + + metastoreClientDelegate.deleteTableColumnStatistics(databaseName, tableName, columnName); + } + + @Test(expected = UnsupportedOperationException.class) + public void testGetPartitionColumnStatisticsValid() throws Exception { + String databaseName = "database-name"; + String tableName = "table-name"; + List partitionNames = ImmutableList.of("A=a/B=b", "A=x/B=y"); + List columnNames = ImmutableList.of("decimal-column", "string-column"); + + metastoreClientDelegate.getPartitionColumnStatistics(databaseName, tableName, partitionNames, columnNames); + } + + @Test(expected = UnsupportedOperationException.class) + public void testGetTableColumnStatistics() throws Exception { + String databaseName = "database-name"; + String tableName = "table-name"; + List columnNames = ImmutableList.of("decimal-column", "string-column"); + + metastoreClientDelegate.getTableColumnStatistics(databaseName, tableName, columnNames); + } + + @Test(expected = UnsupportedOperationException.class) + public void testUpdatePartitionColumnStatistics() throws Exception { + org.apache.hadoop.hive.metastore.api.ColumnStatistics columnStatistics = + ObjectTestUtils.getHivePartitionColumnStatistics(); + + metastoreClientDelegate.updatePartitionColumnStatistics(columnStatistics); + } + + @Test(expected = UnsupportedOperationException.class) + public void testUpdateTableColumnStatistics() throws Exception { + org.apache.hadoop.hive.metastore.api.ColumnStatistics columnStatistics = + ObjectTestUtils.getHiveTableColumnStatistics(); + + metastoreClientDelegate.updateTableColumnStatistics(columnStatistics); + } + + private void assertDaemonThreadPools() { + String threadNameCreatePrefix = + GlueMetastoreClientDelegate.GLUE_METASTORE_DELEGATE_THREADPOOL_NAME_FORMAT.substring(0, + GlueMetastoreClientDelegate.GLUE_METASTORE_DELEGATE_THREADPOOL_NAME_FORMAT.indexOf('%')); + for (Thread thread : Thread.getAllStackTraces().keySet()) { + String threadName = thread.getName(); + if (threadName != null && threadName.startsWith(threadNameCreatePrefix)) { + assertTrue(thread.isDaemon()); + } + } + } + + //==================== Functions ===================== + + @Test + public void getFunction() throws Exception { + UserDefinedFunction udf = createUserDefinedFunction(); + when(glueClient.getUserDefinedFunction(any(GetUserDefinedFunctionRequest.class))).thenReturn( + new GetUserDefinedFunctionResult().withUserDefinedFunction(udf)); + metastoreClientDelegateCatalogId.getFunction(testDb.getName(), "test-func"); + ArgumentCaptor captor = + ArgumentCaptor.forClass(GetUserDefinedFunctionRequest.class); + verify(glueClient, times(1)).getUserDefinedFunction(captor.capture()); + GetUserDefinedFunctionRequest request = captor.getValue(); + assertEquals(CATALOG_ID, request.getCatalogId()); + assertEquals(testDb.getName(), request.getDatabaseName()); + assertEquals("test-func", request.getFunctionName()); + } + + @Test + public void getFunctions() throws Exception { + UserDefinedFunction udf1 = createUserDefinedFunction(); + UserDefinedFunction udf2 = createUserDefinedFunction(); + + List udfList = new ArrayList<>(); + udfList.add(udf1); + udfList.add(udf2); + + when(glueClient.getUserDefinedFunctions(any(GetUserDefinedFunctionsRequest.class))).thenReturn( + new GetUserDefinedFunctionsResult().withUserDefinedFunctions(udfList).withNextToken(null)); + List result = metastoreClientDelegateCatalogId.getFunctions(testDb.getName(), "test-func"); + ArgumentCaptor captor = ArgumentCaptor + .forClass(GetUserDefinedFunctionsRequest.class); + verify(glueClient, times(1)).getUserDefinedFunctions(captor.capture()); + GetUserDefinedFunctionsRequest request = captor.getValue(); + assertEquals(CATALOG_ID, request.getCatalogId()); + assertEquals(testDb.getName(), request.getDatabaseName()); + assertEquals("test-func", request.getPattern()); + assertEquals(2, result.size()); + } + + @Test + public void testCreateFunction() throws Exception { + org.apache.hadoop.hive.metastore.api.Function hiveFunction = createHiveFunction(); + metastoreClientDelegateCatalogId.createFunction(hiveFunction); + ArgumentCaptor captor = ArgumentCaptor + .forClass(CreateUserDefinedFunctionRequest.class); + verify(glueClient, times(1)).createUserDefinedFunction(captor.capture()); + assertEquals(CATALOG_ID, captor.getValue().getCatalogId()); + } + + @Test + public void testDropFunction() throws Exception { + metastoreClientDelegateCatalogId.dropFunction(testDb.getName(), "test-func"); + ArgumentCaptor captor = ArgumentCaptor + .forClass(DeleteUserDefinedFunctionRequest.class); + verify(glueClient, times(1)).deleteUserDefinedFunction(captor.capture()); + assertEquals(CATALOG_ID, captor.getValue().getCatalogId()); + } + + @Test + public void testAlterFunction() throws Exception { + org.apache.hadoop.hive.metastore.api.Function hiveFunction = createHiveFunction(); + metastoreClientDelegateCatalogId.alterFunction(testDb.getName(), "test-func", createHiveFunction()); + ArgumentCaptor captor = ArgumentCaptor + .forClass(UpdateUserDefinedFunctionRequest.class); + verify(glueClient, times(1)).updateUserDefinedFunction(captor.capture()); + UpdateUserDefinedFunctionRequest request = captor.getValue(); + assertEquals(testDb.getName(), request.getDatabaseName()); + assertEquals("test-func", request.getFunctionName()); + assertEquals(CATALOG_ID, captor.getValue().getCatalogId()); + } + + private org.apache.hadoop.hive.metastore.api.Function createHiveFunction() { + org.apache.hadoop.hive.metastore.api.Function hiveFunction = new org.apache.hadoop.hive.metastore.api.Function(); + hiveFunction.setClassName("testClass"); + hiveFunction.setFunctionName("test-func"); + hiveFunction.setOwnerName("test-owner"); + hiveFunction.setOwnerType(PrincipalType.USER); + return hiveFunction; + } + + private UserDefinedFunction createUserDefinedFunction() { + UserDefinedFunction udf = new UserDefinedFunction(); + udf.setFunctionName("test-func"); + udf.setClassName("test-class"); + udf.setCreateTime(new Date()); + udf.setOwnerName("test-owner"); + udf.setOwnerType(com.amazonaws.services.glue.model.PrincipalType.USER.name()); + return udf; + } + + // ==================== Schema ===================== + @Test + public void testGetFields() throws Exception { + when(glueClient.getTable(any(GetTableRequest.class))).thenReturn(new GetTableResult().withTable(testTbl)); + List res = metastoreClientDelegateCatalogId.getFields(testDb.getName(), testTbl.getName()); + ArgumentCaptor captor = ArgumentCaptor.forClass(GetTableRequest.class); + verify(glueClient, times(1)).getTable(captor.capture()); + GetTableRequest request = captor.getValue(); + assertEquals(testDb.getName(), request.getDatabaseName()); + assertEquals(testTbl.getName(), request.getName()); + assertEquals(CATALOG_ID, captor.getValue().getCatalogId()); + assertEquals(1, res.size()); + } + + @Test + public void testGetSchema() throws Exception { + when(glueClient.getTable(any(GetTableRequest.class))).thenReturn(new GetTableResult().withTable(testTbl)); + List res = metastoreClientDelegateCatalogId.getFields(testDb.getName(), testTbl.getName()); + ArgumentCaptor captor = ArgumentCaptor.forClass(GetTableRequest.class); + verify(glueClient, times(1)).getTable(captor.capture()); + GetTableRequest request = captor.getValue(); + assertEquals(testDb.getName(), request.getDatabaseName()); + assertEquals(testTbl.getName(), request.getName()); + assertEquals(CATALOG_ID, captor.getValue().getCatalogId()); + assertEquals(1, res.size()); + } +} diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/util/GlueTestClientFactory.java b/aws/src/test/java/org/apache/iceberg/aws/glue/util/GlueTestClientFactory.java new file mode 100644 index 000000000000..d3970eb865da --- /dev/null +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/util/GlueTestClientFactory.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.util; + +import com.amazonaws.AmazonClientException; +import com.amazonaws.AmazonServiceException; +import com.amazonaws.AmazonWebServiceRequest; +import com.amazonaws.ClientConfiguration; +import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; +import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration; +import com.amazonaws.retry.PredefinedRetryPolicies; +import com.amazonaws.retry.RetryPolicy; +import com.amazonaws.services.glue.AWSGlue; +import com.amazonaws.services.glue.AWSGlueClientBuilder; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.iceberg.aws.glue.metastore.GlueClientFactory; + +public final class GlueTestClientFactory implements GlueClientFactory { + + private static final int SC_GATEWAY_TIMEOUT = 504; + + @Override + public AWSGlue newClient() throws MetaException { + AWSGlueClientBuilder glueClientBuilder = AWSGlueClientBuilder.standard() + .withClientConfiguration(createGatewayTimeoutRetryableConfiguration()) + .withCredentials(new DefaultAWSCredentialsProviderChain()); + + String endpoint = System.getProperty("endpoint"); + if (StringUtils.isNotBlank(endpoint)) { + glueClientBuilder.setEndpointConfiguration(new EndpointConfiguration(endpoint, null)); + } + + return glueClientBuilder.build(); + } + + private static ClientConfiguration createGatewayTimeoutRetryableConfiguration() { + ClientConfiguration retryableConfig = new ClientConfiguration(); + RetryPolicy.RetryCondition retryCondition = new PredefinedRetryPolicies.SDKDefaultRetryCondition() { + @Override + public boolean shouldRetry(AmazonWebServiceRequest originalRequest, AmazonClientException exception, + int retriesAttempted) { + if (super.shouldRetry(originalRequest, exception, retriesAttempted)) { + return true; + } + if (exception != null && exception instanceof AmazonServiceException) { + AmazonServiceException ase = (AmazonServiceException) exception; + if (ase.getStatusCode() == SC_GATEWAY_TIMEOUT) { + return true; + } + } + return false; + } + }; + RetryPolicy retryPolicy = new RetryPolicy(retryCondition, PredefinedRetryPolicies.DEFAULT_BACKOFF_STRATEGY, + PredefinedRetryPolicies.DEFAULT_MAX_ERROR_RETRY, true); + retryableConfig.setRetryPolicy(retryPolicy); + return retryableConfig; + } + +} diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/util/ObjectTestUtils.java b/aws/src/test/java/org/apache/iceberg/aws/glue/util/ObjectTestUtils.java new file mode 100644 index 000000000000..b298454eebca --- /dev/null +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/util/ObjectTestUtils.java @@ -0,0 +1,392 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.util; + +import com.amazonaws.services.glue.model.Column; +import com.amazonaws.services.glue.model.Database; +import com.amazonaws.services.glue.model.ErrorDetail; +import com.amazonaws.services.glue.model.Order; +import com.amazonaws.services.glue.model.Partition; +import com.amazonaws.services.glue.model.PartitionError; +import com.amazonaws.services.glue.model.PrincipalType; +import com.amazonaws.services.glue.model.ResourceType; +import com.amazonaws.services.glue.model.ResourceUri; +import com.amazonaws.services.glue.model.SerDeInfo; +import com.amazonaws.services.glue.model.SkewedInfo; +import com.amazonaws.services.glue.model.StorageDescriptor; +import com.amazonaws.services.glue.model.Table; +import com.amazonaws.services.glue.model.UserDefinedFunction; +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; +import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; +import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; +import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; +import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege; +import org.apache.hadoop.hive.metastore.api.HiveObjectRef; +import org.apache.hadoop.hive.metastore.api.HiveObjectType; +import org.apache.hadoop.hive.metastore.api.Index; +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; +import org.apache.hadoop.hive.metastore.api.PrivilegeBag; +import org.apache.hadoop.hive.metastore.api.Role; +import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.apache.iceberg.aws.glue.converters.CatalogToHiveConverter; +import org.apache.iceberg.aws.glue.converters.HiveToCatalogConverter; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; + +import static org.apache.iceberg.aws.glue.converters.ConverterUtils.INDEX_DB_NAME; +import static org.apache.iceberg.aws.glue.converters.ConverterUtils.INDEX_DEFERRED_REBUILD; +import static org.apache.iceberg.aws.glue.converters.ConverterUtils.INDEX_HANDLER_CLASS; +import static org.apache.iceberg.aws.glue.converters.ConverterUtils.INDEX_ORIGIN_TABLE_NAME; +import static org.apache.iceberg.aws.glue.converters.ConverterUtils.INDEX_TABLE_NAME; + +public final class ObjectTestUtils { + + private ObjectTestUtils() { + } + + /** + * + * @return a test db + */ + public static Database getTestDatabase() { + + Map parameters = Maps.newHashMap(); + parameters.put("param1", "value1"); + parameters.put("param2", "value2"); + + Database database = new Database() + .withName("test-db-" + UUID.randomUUID().toString().replaceAll("[^a-zA-Z0-9]+", "")) + .withDescription("database desc") + .withLocationUri("/db") + .withParameters(parameters); + + return database; + } + + /** + * @param len len + * @return a random string of size len + */ + public static String getStringOfLength(final int len) { + StringBuffer sb = new StringBuffer(UUID.randomUUID().toString()); + for (int i = sb.length(); i < len; i++) { + sb.append('0'); + } + return sb.toString(); + } + + public static Table getTestTable() { + Table table = new Table(); + table.setName("testtable" + + UUID.randomUUID().toString().replaceAll("[^a-zA-Z0-9]+", "").substring(0, 4)); + table.setOwner("owner"); + table.setCreateTime(new Date(System.currentTimeMillis() / 1000 * 1000)); + table.setLastAccessTime(new Date(System.currentTimeMillis() / 1000 * 1000)); + table.setParameters(new HashMap()); + table.setPartitionKeys(getTestFieldList()); + table.setStorageDescriptor(getTestStorageDescriptor()); + table.setTableType("MANAGED_TABLE"); + table.setRetention(1); + table.setViewOriginalText("originalText"); + table.setViewExpandedText("expandedText"); + return table; + } + + public static Table getTestTable(String dbName) { + Table table = getTestTable(); + table.setDatabaseName(dbName); + return table; + } + + public static StorageDescriptor getTestStorageDescriptor() { + StorageDescriptor sd = new StorageDescriptor(); + List cols = new ArrayList<>(); + cols.add("sampleCols"); + sd.setBucketColumns(cols); + sd.setColumns(getTestFieldList()); + sd.setParameters(new HashMap()); + sd.setSerdeInfo(getTestSerdeInfo()); + sd.setSkewedInfo(getSkewedInfo()); + sd.setSortColumns(new ArrayList()); + sd.setInputFormat("inputFormat"); + sd.setOutputFormat("outputFormat"); + sd.setLocation("/test-table"); + sd.withSortColumns(new Order().withColumn("foo").withSortOrder(1)); + sd.setCompressed(false); + sd.setStoredAsSubDirectories(false); + sd.setNumberOfBuckets(0); + return sd; + } + + public static SerDeInfo getTestSerdeInfo() { + return new SerDeInfo() + .withName("serdeName") + .withSerializationLibrary("serdeLib") + .withParameters(new HashMap()); + } + + public static List getTestFieldList() { + List fieldList = new ArrayList<>(); + Column field = new Column() + .withComment(UUID.randomUUID().toString()) + .withName("column" + UUID.randomUUID().toString().replaceAll("[^a-zA-Z0-9]+", "")) + .withType("string"); + fieldList.add(field); + return fieldList; + } + + public static Index getTestHiveIndex(final String dbName) { + Index index = new Index(); + index.setIndexName("testIndex" + UUID.randomUUID().toString().replaceAll("[^a-zA-Z0-9]+", "")); + index.setCreateTime((int) (System.currentTimeMillis() / 1000)); + index.setLastAccessTime((int) (System.currentTimeMillis() / 1000)); + index.setDbName(dbName); + index.setDeferredRebuild(false); + index.setOrigTableName("OriginalTable"); + index.setIndexTableName("IndexTable"); + index.setIndexHandlerClass("handlerClass"); + index.setParameters(new HashMap()); + index.setSd(CatalogToHiveConverter.convertStorageDescriptor(getTestStorageDescriptor())); + + return index; + } + + public static void setIndexParametersForIndexTable(Table indexTable, String dbName, String originTableName) { + indexTable.getParameters().put(INDEX_DEFERRED_REBUILD, "FALSE"); + indexTable.getParameters().put(INDEX_HANDLER_CLASS, "handlerClass"); + indexTable.getParameters().put(INDEX_DB_NAME, dbName); + indexTable.getParameters().put(INDEX_ORIGIN_TABLE_NAME, originTableName); + indexTable.getParameters().put(INDEX_TABLE_NAME, indexTable.getName()); + } + + public static SkewedInfo getSkewedInfo() { + List skewedName = new ArrayList<>(); + List skewedValue = new ArrayList<>(); + List skewedMapKey = new ArrayList<>(); + List> skewedValueList = new ArrayList<>(); + skewedName.add(UUID.randomUUID().toString()); + skewedName.add(UUID.randomUUID().toString()); + skewedValue.add(UUID.randomUUID().toString()); + skewedValue.add(UUID.randomUUID().toString()); + skewedValueList.add(skewedValue); + skewedMapKey.add(UUID.randomUUID().toString()); + skewedMapKey.add(UUID.randomUUID().toString()); + Map skewedMap = new HashMap<>(); + skewedMap.put(HiveToCatalogConverter.convertListToString(skewedMapKey), UUID.randomUUID().toString()); + + return new SkewedInfo().withSkewedColumnValueLocationMaps(skewedMap).withSkewedColumnNames(skewedName) + .withSkewedColumnValues(HiveToCatalogConverter.convertSkewedValue(skewedValueList)); + } + + public static Partition getTestPartition(String dbName, String tblName, List values) { + return new Partition() + .withDatabaseName(dbName) + .withTableName(tblName) + .withValues(values) + .withCreationTime(new Date(System.currentTimeMillis() / 1000 * 1000)) + .withLastAccessTime(new Date(System.currentTimeMillis() / 1000 * 1000)) + .withParameters(Maps.newHashMap()) + .withStorageDescriptor(ObjectTestUtils.getTestStorageDescriptor()); + } + + public static UserDefinedFunction getCatalogTestFunction() { + List resourceUriList = Lists.newArrayList(new ResourceUri().withUri("s3://abc/def.jar") + .withResourceType(ResourceType.JAR), new ResourceUri().withUri("hdfs://ghi/jkl.jar") + .withResourceType(ResourceType.ARCHIVE)); + return new UserDefinedFunction() + .withFunctionName("functionname") + .withClassName("classname") + .withOwnerName("ownername") + .withCreateTime(new Date(System.currentTimeMillis() / 1000 * 1000)) + .withOwnerType(PrincipalType.USER) + .withResourceUris(resourceUriList); + } + + + private static ByteBuffer byteBuffer(long value) { + return ByteBuffer.wrap(BigInteger.valueOf(value).toByteArray()); + } + + public static org.apache.hadoop.hive.metastore.api.ColumnStatisticsData getHiveBinaryColumnStatsData() { + BinaryColumnStatsData statsData = new BinaryColumnStatsData(); + statsData.setAvgColLen(12.3); + statsData.setMaxColLen(45L); + statsData.setNumNulls(56L); + org.apache.hadoop.hive.metastore.api.ColumnStatisticsData statsWrapper = + new org.apache.hadoop.hive.metastore.api.ColumnStatisticsData(); + statsWrapper.setBinaryStats(statsData); + return statsWrapper; + } + + public static org.apache.hadoop.hive.metastore.api.ColumnStatisticsData getHiveBooleanColumnStatsData() { + BooleanColumnStatsData statsData = new BooleanColumnStatsData(); + statsData.setNumFalses(12L); + statsData.setNumNulls(34L); + statsData.setNumTrues(56L); + org.apache.hadoop.hive.metastore.api.ColumnStatisticsData statsWrapper = + new org.apache.hadoop.hive.metastore.api.ColumnStatisticsData(); + statsWrapper.setBooleanStats(statsData); + return statsWrapper; + } + + public static org.apache.hadoop.hive.metastore.api.ColumnStatisticsData getHiveDecimalColumnStatsData() { + DecimalColumnStatsData statsData = new DecimalColumnStatsData(); + org.apache.hadoop.hive.metastore.api.Decimal highValue = new org.apache.hadoop.hive.metastore.api.Decimal(); + highValue.setScale((short) 1); + highValue.setUnscaled(BigInteger.valueOf(1234L).toByteArray()); + statsData.setHighValue(highValue); + org.apache.hadoop.hive.metastore.api.Decimal lowValue = new org.apache.hadoop.hive.metastore.api.Decimal(); + lowValue.setScale((short) 4); + lowValue.setUnscaled(BigInteger.valueOf(5678L).toByteArray()); + statsData.setLowValue(lowValue); + statsData.setNumDVs(12L); + statsData.setNumNulls(56L); + org.apache.hadoop.hive.metastore.api.ColumnStatisticsData statsWrapper = + new org.apache.hadoop.hive.metastore.api.ColumnStatisticsData(); + statsWrapper.setDecimalStats(statsData); + return statsWrapper; + } + + public static org.apache.hadoop.hive.metastore.api.ColumnStatisticsData getHiveDoubleColumnStatsData() { + DoubleColumnStatsData statsData = new DoubleColumnStatsData(); + statsData.setHighValue(9999.9); + statsData.setLowValue(-1111.1); + statsData.setNumDVs(123L); + statsData.setNumNulls(456L); + org.apache.hadoop.hive.metastore.api.ColumnStatisticsData statsWrapper = + new org.apache.hadoop.hive.metastore.api.ColumnStatisticsData(); + statsWrapper.setDoubleStats(statsData); + return statsWrapper; + } + + public static org.apache.hadoop.hive.metastore.api.ColumnStatisticsData getHiveLongColumnStatsData() { + LongColumnStatsData statsData = new LongColumnStatsData(); + statsData.setHighValue(9999L); + statsData.setLowValue(-1111L); + statsData.setNumDVs(123L); + statsData.setNumNulls(456L); + org.apache.hadoop.hive.metastore.api.ColumnStatisticsData statsWrapper = + new org.apache.hadoop.hive.metastore.api.ColumnStatisticsData(); + statsWrapper.setLongStats(statsData); + return statsWrapper; + } + + public static org.apache.hadoop.hive.metastore.api.ColumnStatisticsData getHiveStringColumnStatsData() { + StringColumnStatsData statsData = new StringColumnStatsData(); + statsData.setAvgColLen(123.4); + statsData.setMaxColLen(567L); + statsData.setNumDVs(89L); + statsData.setNumNulls(13L); + org.apache.hadoop.hive.metastore.api.ColumnStatisticsData statsWrapper = + new org.apache.hadoop.hive.metastore.api.ColumnStatisticsData(); + statsWrapper.setStringStats(statsData); + return statsWrapper; + } + + public static org.apache.hadoop.hive.metastore.api.ColumnStatistics getHiveTableColumnStatistics() { + org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc columnStatisticsDesc = + new org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc(); + columnStatisticsDesc.setDbName("database-name"); + columnStatisticsDesc.setTableName("table-name"); + columnStatisticsDesc.setIsTblLevel(true); + columnStatisticsDesc.setLastAnalyzed(12345); + + org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj decimalObj = + new org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj(); + decimalObj.setColName("decimal-column"); + decimalObj.setColType("decimal(9,6)"); + decimalObj.setStatsData(ObjectTestUtils.getHiveDecimalColumnStatsData()); + + org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj longObj = + new org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj(); + longObj.setColName("long-column"); + longObj.setColType("integer"); + longObj.setStatsData(ObjectTestUtils.getHiveLongColumnStatsData()); + + org.apache.hadoop.hive.metastore.api.ColumnStatistics columnStatistics = + new org.apache.hadoop.hive.metastore.api.ColumnStatistics(); + columnStatistics.setStatsDesc(columnStatisticsDesc); + columnStatistics.setStatsObj(Arrays.asList(decimalObj, longObj)); + + return columnStatistics; + } + + public static org.apache.hadoop.hive.metastore.api.ColumnStatistics getHivePartitionColumnStatistics() { + org.apache.hadoop.hive.metastore.api.ColumnStatistics columnStatistics = getHiveTableColumnStatistics(); + columnStatistics.getStatsDesc().setIsTblLevel(false); + columnStatistics.getStatsDesc().setPartName("A=a/B=b"); + + return columnStatistics; + } + + public static PartitionError getPartitionError(List values, Exception exception) { + return new PartitionError() + .withPartitionValues(values) + .withErrorDetail(new ErrorDetail() + .withErrorCode(exception.getClass().getSimpleName()) + .withErrorMessage(exception.getMessage())); + } + + public static HiveObjectRef getHiveObjectRef() { + HiveObjectRef obj = new HiveObjectRef(); + obj.setObjectType(HiveObjectType.TABLE); + obj.setDbName("default"); + obj.setObjectName("foo"); + return obj; + } + + public static PrivilegeBag getPrivilegeBag() { + PrivilegeBag bag = new PrivilegeBag(); + HiveObjectPrivilege hivePrivilege = new HiveObjectPrivilege(); + hivePrivilege.setPrincipalName("user1"); + hivePrivilege.setPrincipalType(org.apache.hadoop.hive.metastore.api.PrincipalType.USER); + org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo grantInfo = + new org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo(); + grantInfo.setGrantor("user2"); + grantInfo.setGrantorType(org.apache.hadoop.hive.metastore.api.PrincipalType.USER); + hivePrivilege.setGrantInfo(grantInfo); + bag.setPrivileges(Lists.newArrayList(hivePrivilege)); + return bag; + } + + public static org.apache.hadoop.hive.metastore.api.Order getTestOrder() { + org.apache.hadoop.hive.metastore.api.Order order = new org.apache.hadoop.hive.metastore.api.Order(); + order.setCol("foo"); + order.setOrder(1); + return order; + } + + public static Role getTestRole() { + Role role = new Role(); + role.setRoleName("test-role"); + role.setOwnerName("owner"); + return role; + } + +} diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestBatchCreatePartitionsHelper.java b/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestBatchCreatePartitionsHelper.java new file mode 100644 index 000000000000..8574a86f0001 --- /dev/null +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestBatchCreatePartitionsHelper.java @@ -0,0 +1,246 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.util; + +import com.amazonaws.services.glue.model.AlreadyExistsException; +import com.amazonaws.services.glue.model.EntityNotFoundException; +import com.amazonaws.services.glue.model.InternalServiceException; +import com.amazonaws.services.glue.model.Partition; +import com.amazonaws.services.glue.model.PartitionError; +import com.amazonaws.services.glue.model.ResourceNumberLimitExceededException; +import java.util.List; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.iceberg.aws.glue.metastore.AWSGlueMetastore; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.MockitoAnnotations; + +import static org.hamcrest.CoreMatchers.hasItem; +import static org.hamcrest.CoreMatchers.hasItems; +import static org.hamcrest.CoreMatchers.instanceOf; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.CoreMatchers.not; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; + +public class TestBatchCreatePartitionsHelper { + + @Mock + private AWSGlueMetastore awsGlueMetastore; + + private BatchCreatePartitionsHelper batchCreatePartitionsHelper; + + private static final String NAMESPACE_NAME = "ns"; + private static final String TABLE_NAME = "table"; + + @Before + public void init() { + MockitoAnnotations.initMocks(this); + } + + @Test + public void testCreatePartitionsEmpty() throws Exception { + mockBatchCreateSuccess(); + + List partitions = Lists.newArrayList(); + batchCreatePartitionsHelper = new BatchCreatePartitionsHelper( + awsGlueMetastore, NAMESPACE_NAME, TABLE_NAME, null, partitions, false) + .createPartitions(); + + assertTrue(batchCreatePartitionsHelper.getPartitionsCreated().isEmpty()); + assertNull(batchCreatePartitionsHelper.getFirstTException()); + } + + @Test + public void testCreatePartitionsSucceed() throws Exception { + mockBatchCreateSuccess(); + + List values1 = Lists.newArrayList("val1"); + List values2 = Lists.newArrayList("val2"); + List partitions = Lists.newArrayList( + ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values1), + ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values2)); + batchCreatePartitionsHelper = new BatchCreatePartitionsHelper( + awsGlueMetastore, NAMESPACE_NAME, TABLE_NAME, null, partitions, false) + .createPartitions(); + + assertEquals(2, batchCreatePartitionsHelper.getPartitionsCreated().size()); + assertNull(batchCreatePartitionsHelper.getFirstTException()); + for (Partition partition : partitions) { + assertTrue(batchCreatePartitionsHelper.getPartitionsCreated().contains(partition)); + } + assertEquals(0, batchCreatePartitionsHelper.getPartitionsFailed().size()); + } + + @Test + public void testCreatePartitionsThrowsException() throws Exception { + Exception err = new RuntimeException("foo"); + mockBatchCreateThrowsException(err); + + List values1 = Lists.newArrayList("val1"); + List values2 = Lists.newArrayList("val2"); + List partitions = Lists.newArrayList( + ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values1), + ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values2)); + batchCreatePartitionsHelper = new BatchCreatePartitionsHelper( + awsGlueMetastore, NAMESPACE_NAME, TABLE_NAME, null, partitions, false); + batchCreatePartitionsHelper.createPartitions(); + + assertNotNull(batchCreatePartitionsHelper.getFirstTException()); + assertEquals("foo", batchCreatePartitionsHelper.getFirstTException().getMessage()); + assertEquals(partitions, batchCreatePartitionsHelper.getPartitionsFailed()); + assertTrue(batchCreatePartitionsHelper.getPartitionsCreated().isEmpty()); + } + + @Test + public void testCreatePartitionsThrowsServiceExceptionAndPartitionPartiallyCreated() throws Exception { + Exception err = new InternalServiceException("foo"); + mockBatchCreateThrowsException(err); + List values1 = Lists.newArrayList("val1"); + List values2 = Lists.newArrayList("val2"); + List values3 = Lists.newArrayList("val3"); + Partition partition1 = ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values1); + Partition partition2 = ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values2); + Partition partition3 = ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values3); + List partitions = Lists.newArrayList(partition1, partition2, partition3); + Mockito.when(awsGlueMetastore.getPartition(Mockito.anyString(), Mockito.anyString(), Mockito.anyList())) + .thenReturn(partition1) + .thenThrow(new EntityNotFoundException("bar")) + .thenThrow(new NullPointerException("baz")); + + batchCreatePartitionsHelper = new BatchCreatePartitionsHelper( + awsGlueMetastore, NAMESPACE_NAME, TABLE_NAME, null, partitions, false) + .createPartitions(); + + assertThat(batchCreatePartitionsHelper.getFirstTException(), is(instanceOf(MetaException.class))); + assertThat(batchCreatePartitionsHelper.getPartitionsCreated(), hasItems(partition1)); + assertThat(batchCreatePartitionsHelper.getPartitionsCreated(), not(hasItems(partition2, partition3))); + assertThat(batchCreatePartitionsHelper.getPartitionsFailed(), hasItems(partition2, partition3)); + assertThat(batchCreatePartitionsHelper.getPartitionsFailed(), not(hasItems(partition1))); + } + + @Test + public void testCreatePartitionsDuplicateValues() throws Exception { + mockBatchCreateSuccess(); + + List values1 = Lists.newArrayList("val1"); + Partition partition = ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values1); + List partitions = Lists.newArrayList(partition, partition); + batchCreatePartitionsHelper = new BatchCreatePartitionsHelper( + awsGlueMetastore, NAMESPACE_NAME, TABLE_NAME, null, partitions, false) + .createPartitions(); + + assertEquals(1, batchCreatePartitionsHelper.getPartitionsCreated().size()); + assertNull(batchCreatePartitionsHelper.getFirstTException()); + for (Partition p : partitions) { + assertTrue(batchCreatePartitionsHelper.getPartitionsCreated().contains(p)); + } + assertTrue(batchCreatePartitionsHelper.getPartitionsFailed().isEmpty()); + } + + @Test + public void testCreatePartitionsWithFailure() throws Exception { + List values1 = Lists.newArrayList("val1"); + List values2 = Lists.newArrayList("val2"); + Partition partition1 = ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values1); + Partition partition2 = ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values2); + List partitions = Lists.newArrayList(partition1, partition2); + + PartitionError error = ObjectTestUtils.getPartitionError(values1, new AlreadyExistsException("foo error msg")); + mockBatchCreateWithFailures(Lists.newArrayList(error)); + + batchCreatePartitionsHelper = new BatchCreatePartitionsHelper( + awsGlueMetastore, NAMESPACE_NAME, TABLE_NAME, null, partitions, false) + .createPartitions(); + + assertEquals(1, batchCreatePartitionsHelper.getPartitionsCreated().size()); + assertThat(batchCreatePartitionsHelper.getPartitionsCreated(), hasItem(partition2)); + assertThat(batchCreatePartitionsHelper.getFirstTException(), + is(instanceOf(org.apache.hadoop.hive.metastore.api.AlreadyExistsException.class))); + assertThat(batchCreatePartitionsHelper.getPartitionsFailed(), hasItem(partition1)); + } + + @Test + public void testCreatePartitionsWithFailureAllowExists() throws Exception { + List values1 = Lists.newArrayList("val1"); + List values2 = Lists.newArrayList("val2"); + Partition partition1 = ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values1); + Partition partition2 = ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values2); + List partitions = Lists.newArrayList(partition1, partition2); + + PartitionError error = ObjectTestUtils.getPartitionError(values1, new AlreadyExistsException("foo error msg")); + mockBatchCreateWithFailures(Lists.newArrayList(error)); + + batchCreatePartitionsHelper = new BatchCreatePartitionsHelper( + awsGlueMetastore, NAMESPACE_NAME, TABLE_NAME, null, partitions, true) + .createPartitions(); + + assertEquals(1, batchCreatePartitionsHelper.getPartitionsCreated().size()); + assertThat(batchCreatePartitionsHelper.getPartitionsCreated(), hasItem(partition2)); + assertNull(batchCreatePartitionsHelper.getFirstTException()); + assertEquals(0, batchCreatePartitionsHelper.getPartitionsFailed().size()); + } + + @Test + public void testCreatePartitionsWithFailures() throws Exception { + List values1 = Lists.newArrayList("val1"); + List values2 = Lists.newArrayList("val2"); + Partition partition1 = ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values1); + Partition partition2 = ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values2); + List partitions = Lists.newArrayList(partition1, partition2); + + PartitionError error1 = ObjectTestUtils.getPartitionError( + values1, new ResourceNumberLimitExceededException("foo error msg")); + PartitionError error2 = ObjectTestUtils.getPartitionError( + values2, new AlreadyExistsException("foo error msg2")); + mockBatchCreateWithFailures(Lists.newArrayList(error1, error2)); + + batchCreatePartitionsHelper = new BatchCreatePartitionsHelper( + awsGlueMetastore, NAMESPACE_NAME, TABLE_NAME, null, partitions, true) + .createPartitions(); + + assertEquals(0, batchCreatePartitionsHelper.getPartitionsCreated().size()); + assertTrue(batchCreatePartitionsHelper.getFirstTException() instanceof MetaException); + assertEquals(1, batchCreatePartitionsHelper.getPartitionsFailed().size()); + assertThat(batchCreatePartitionsHelper.getPartitionsFailed(), hasItem(partition1)); + } + + private void mockBatchCreateSuccess() { + Mockito.when(awsGlueMetastore.createPartitions(Mockito.anyString(), Mockito.anyString(), + Mockito.anyList())).thenReturn(null); + } + + private void mockBatchCreateWithFailures(List errors) { + Mockito.when(awsGlueMetastore.createPartitions(Mockito.anyString(), Mockito.anyString(), Mockito.anyList())) + .thenReturn(errors); + } + + private void mockBatchCreateThrowsException(Exception err) { + Mockito.when(awsGlueMetastore.createPartitions(Mockito.anyString(), Mockito.anyString(), + Mockito.anyList())).thenThrow(err); + } + +} diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestBatchDeletePartitionsHelper.java b/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestBatchDeletePartitionsHelper.java new file mode 100644 index 000000000000..02168ea245b9 --- /dev/null +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestBatchDeletePartitionsHelper.java @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.util; + +import com.amazonaws.services.glue.AWSGlue; +import com.amazonaws.services.glue.model.BatchDeletePartitionRequest; +import com.amazonaws.services.glue.model.BatchDeletePartitionResult; +import com.amazonaws.services.glue.model.EntityNotFoundException; +import com.amazonaws.services.glue.model.GetPartitionRequest; +import com.amazonaws.services.glue.model.GetPartitionResult; +import com.amazonaws.services.glue.model.InternalServiceException; +import com.amazonaws.services.glue.model.Partition; +import com.amazonaws.services.glue.model.PartitionError; +import java.util.Collection; +import java.util.List; +import org.apache.hadoop.hive.metastore.api.InvalidInputException; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.MockitoAnnotations; + +import static org.apache.iceberg.aws.glue.util.ObjectTestUtils.getPartitionError; +import static org.hamcrest.CoreMatchers.hasItems; +import static org.hamcrest.CoreMatchers.instanceOf; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.CoreMatchers.not; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; + +public class TestBatchDeletePartitionsHelper { + + @Mock + private AWSGlue client; + + private BatchDeletePartitionsHelper batchDeletePartitionsHelper; + + private static final String NAMESPACE_NAME = "ns"; + private static final String TABLE_NAME = "table"; + + @Before + public void init() { + MockitoAnnotations.initMocks(this); + } + + @Test + public void testDeletePartitionsEmpty() throws Exception { + mockBatchDeleteSuccess(); + + List partitions = Lists.newArrayList(); + batchDeletePartitionsHelper = new BatchDeletePartitionsHelper( + client, NAMESPACE_NAME, TABLE_NAME, null, partitions) + .deletePartitions(); + + assertTrue(batchDeletePartitionsHelper.getPartitionsDeleted().isEmpty()); + assertNull(batchDeletePartitionsHelper.getFirstTException()); + } + + @Test + public void testDeletePartitionsSucceed() throws Exception { + mockBatchDeleteSuccess(); + List values1 = Lists.newArrayList("val1"); + List values2 = Lists.newArrayList("val2"); + List partitions = Lists.newArrayList( + ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values1), + ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values2)); + batchDeletePartitionsHelper = new BatchDeletePartitionsHelper( + client, NAMESPACE_NAME, TABLE_NAME, null, partitions) + .deletePartitions(); + + assertEquals(2, batchDeletePartitionsHelper.getPartitionsDeleted().size()); + assertNull(batchDeletePartitionsHelper.getFirstTException()); + for (Partition partition : partitions) { + assertTrue(batchDeletePartitionsHelper.getPartitionsDeleted().contains(partition)); + } + } + + @Test + public void testDeletePartitionsThrowsRuntimeException() throws Exception { + Exception err = new NullPointerException("foo"); + mockBatchDeleteThrowsException(err); + + List values1 = Lists.newArrayList("val1"); + List values2 = Lists.newArrayList("val2"); + List partitions = Lists.newArrayList( + ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values1), + ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values2)); + batchDeletePartitionsHelper = new BatchDeletePartitionsHelper( + client, NAMESPACE_NAME, TABLE_NAME, null, partitions); + + batchDeletePartitionsHelper.deletePartitions(); + assertTrue(batchDeletePartitionsHelper.getPartitionsDeleted().isEmpty()); + assertNotNull(batchDeletePartitionsHelper.getFirstTException()); + assertEquals("foo", batchDeletePartitionsHelper.getFirstTException().getMessage()); + } + + @Test + public void testDeletePartitionsThrowsInvalidInputException() throws Exception { + Exception err = new com.amazonaws.services.glue.model.InvalidInputException("foo"); + mockBatchDeleteThrowsException(err); + + List values1 = Lists.newArrayList("val1"); + List values2 = Lists.newArrayList("val2"); + List partitions = Lists.newArrayList( + ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values1), + ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values2)); + batchDeletePartitionsHelper = new BatchDeletePartitionsHelper( + client, NAMESPACE_NAME, TABLE_NAME, null, partitions); + + batchDeletePartitionsHelper.deletePartitions(); + assertTrue(batchDeletePartitionsHelper.getPartitionsDeleted().isEmpty()); + assertThat(batchDeletePartitionsHelper.getFirstTException(), is(instanceOf(InvalidObjectException.class))); + } + + @Test + public void testDeletePartitionsThrowsServiceException() throws Exception { + List values1 = Lists.newArrayList("val1"); + List values2 = Lists.newArrayList("val2"); + List values3 = Lists.newArrayList("val3"); + Partition partition1 = ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values1); + Partition partition2 = ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values2); + Partition partition3 = ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values3); + List partitions = Lists.newArrayList(partition1, partition2, partition3); + + Exception err = new InternalServiceException("foo"); + mockBatchDeleteThrowsException(err); + Mockito.when(client.getPartition(Mockito.any(GetPartitionRequest.class))) + .thenReturn(new GetPartitionResult().withPartition(partition1)) + .thenThrow(new EntityNotFoundException("bar")) + .thenThrow(new NullPointerException("baz")); + + batchDeletePartitionsHelper = new BatchDeletePartitionsHelper( + client, NAMESPACE_NAME, TABLE_NAME, null, partitions) + .deletePartitions(); + + assertThat(batchDeletePartitionsHelper.getFirstTException(), is(instanceOf(MetaException.class))); + assertThat(batchDeletePartitionsHelper.getPartitionsDeleted(), hasItems(partition2)); + assertThat(batchDeletePartitionsHelper.getPartitionsDeleted(), not(hasItems(partition1, partition3))); + } + + @Test + public void testDeletePartitionsDuplicateValues() throws Exception { + mockBatchDeleteSuccess(); + + List values1 = Lists.newArrayList("val1"); + Partition partition = ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values1); + List partitions = Lists.newArrayList(partition, partition); + batchDeletePartitionsHelper = new BatchDeletePartitionsHelper( + client, NAMESPACE_NAME, TABLE_NAME, null, partitions) + .deletePartitions(); + + assertEquals(1, batchDeletePartitionsHelper.getPartitionsDeleted().size()); + assertNull(batchDeletePartitionsHelper.getFirstTException()); + for (Partition p : partitions) { + assertTrue(batchDeletePartitionsHelper.getPartitionsDeleted().contains(p)); + } + } + + @Test + public void testDeletePartitionsWithFailure() throws Exception { + List values1 = Lists.newArrayList("val1"); + List values2 = Lists.newArrayList("val2"); + Partition partition1 = ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values1); + Partition partition2 = ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values2); + List partitions = Lists.newArrayList(partition1, partition2); + + PartitionError error = getPartitionError(values1, new EntityNotFoundException("foo error msg")); + mockBatchDeleteWithFailures(Lists.newArrayList(error)); + + batchDeletePartitionsHelper = new BatchDeletePartitionsHelper( + client, NAMESPACE_NAME, TABLE_NAME, null, partitions) + .deletePartitions(); + + assertEquals(1, batchDeletePartitionsHelper.getPartitionsDeleted().size()); + assertTrue(batchDeletePartitionsHelper.getPartitionsDeleted().contains(partition2)); + assertTrue(batchDeletePartitionsHelper.getFirstTException() instanceof NoSuchObjectException); + } + + @Test + public void testDeletePartitionsWithFailures() throws Exception { + List values1 = Lists.newArrayList("val1"); + List values2 = Lists.newArrayList("val2"); + Partition partition1 = ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values1); + Partition partition2 = ObjectTestUtils.getTestPartition(NAMESPACE_NAME, TABLE_NAME, values2); + List partitions = Lists.newArrayList(partition1, partition2); + + PartitionError error1 = getPartitionError(values1, new EntityNotFoundException("foo error msg")); + PartitionError error2 = getPartitionError(values2, new InvalidInputException("foo error msg2")); + mockBatchDeleteWithFailures(Lists.newArrayList(error1, error2)); + + batchDeletePartitionsHelper = new BatchDeletePartitionsHelper( + client, NAMESPACE_NAME, TABLE_NAME, null, partitions) + .deletePartitions(); + + assertEquals(0, batchDeletePartitionsHelper.getPartitionsDeleted().size()); + assertTrue(batchDeletePartitionsHelper.getFirstTException() instanceof NoSuchObjectException); + } + + private void mockBatchDeleteSuccess() { + Mockito.when(client.batchDeletePartition(Mockito.any(BatchDeletePartitionRequest.class))) + .thenReturn(new BatchDeletePartitionResult()); + } + + private void mockBatchDeleteWithFailures(Collection errors) { + Mockito.when(client.batchDeletePartition(Mockito.any(BatchDeletePartitionRequest.class))) + .thenReturn(new BatchDeletePartitionResult().withErrors(errors)); + } + + private void mockBatchDeleteThrowsException(Exception err) { + Mockito.when(client.batchDeletePartition(Mockito.any(BatchDeletePartitionRequest.class))).thenThrow(err); + } + +} diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestExecutorService.java b/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestExecutorService.java new file mode 100644 index 000000000000..898e4b7e2254 --- /dev/null +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestExecutorService.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.util; + +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.ThreadFactory; + +public class TestExecutorService extends ScheduledThreadPoolExecutor { + + public TestExecutorService(int corePoolSize, ThreadFactory factory) { + super(corePoolSize, factory); + } +} diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestExecutorServiceFactory.java b/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestExecutorServiceFactory.java new file mode 100644 index 000000000000..a1e57b467f18 --- /dev/null +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestExecutorServiceFactory.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.util; + +import java.util.concurrent.ExecutorService; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.iceberg.aws.glue.metastore.ExecutorServiceFactory; +import org.apache.iceberg.relocated.com.google.common.util.concurrent.ThreadFactoryBuilder; + +public class TestExecutorServiceFactory implements ExecutorServiceFactory { + private static ExecutorService execService = new TestExecutorService( + 1, new ThreadFactoryBuilder().build()); + + @Override + public ExecutorService getExecutorService(HiveConf conf) { + return execService; + } +} diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestHiveTableValidator.java b/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestHiveTableValidator.java new file mode 100644 index 000000000000..c13d03ed069a --- /dev/null +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestHiveTableValidator.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.util; + +import com.amazonaws.services.glue.model.InvalidInputException; +import com.amazonaws.services.glue.model.Table; +import org.apache.hadoop.hive.metastore.TableType; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE; +import static org.apache.iceberg.aws.glue.util.HiveTableValidator.REQUIRED_PROPERTIES_VALIDATOR; +import static org.apache.iceberg.aws.glue.util.ObjectTestUtils.getTestTable; + +public class TestHiveTableValidator { + + @Rule + public ExpectedException thrown = ExpectedException.none(); + private static final String EXPECTED_MESSAGE = "%s cannot be null"; + + @Test + public void testRequiredProperty_TableType() { + thrown.expect(InvalidInputException.class); + thrown.expectMessage(String.format(EXPECTED_MESSAGE, "TableType")); + Table tbl = getTestTable().withTableType(null); + REQUIRED_PROPERTIES_VALIDATOR.validate(tbl); + } + + @Test + public void testRequiredProperty_StorageDescriptor() { + thrown.expect(InvalidInputException.class); + thrown.expectMessage(String.format(EXPECTED_MESSAGE, "StorageDescriptor")); + Table tbl = getTestTable().withStorageDescriptor(null); + REQUIRED_PROPERTIES_VALIDATOR.validate(tbl); + } + + @Test + public void testRequiredProperty_InputFormat() { + thrown.expect(InvalidInputException.class); + thrown.expectMessage(String.format(EXPECTED_MESSAGE, "StorageDescriptor#InputFormat")); + Table tbl = getTestTable(); + tbl.getStorageDescriptor().setInputFormat(null); + REQUIRED_PROPERTIES_VALIDATOR.validate(tbl); + } + + @Test + public void testRequiredProperty_OutputFormat() { + thrown.expect(InvalidInputException.class); + thrown.expectMessage(String.format(EXPECTED_MESSAGE, "StorageDescriptor#OutputFormat")); + Table tbl = getTestTable(); + tbl.getStorageDescriptor().setOutputFormat(null); + REQUIRED_PROPERTIES_VALIDATOR.validate(tbl); + } + + @Test + public void testRequiredProperty_SerdeInfo() { + thrown.expect(InvalidInputException.class); + thrown.expectMessage(String.format(EXPECTED_MESSAGE, "StorageDescriptor#SerdeInfo")); + Table tbl = getTestTable(); + tbl.getStorageDescriptor().setSerdeInfo(null); + REQUIRED_PROPERTIES_VALIDATOR.validate(tbl); + } + + @Test + public void testRequiredProperty_SerializationLibrary() { + thrown.expect(InvalidInputException.class); + thrown.expectMessage(String.format(EXPECTED_MESSAGE, "StorageDescriptor#SerdeInfo#SerializationLibrary")); + Table tbl = getTestTable(); + tbl.getStorageDescriptor().getSerdeInfo().setSerializationLibrary(null); + REQUIRED_PROPERTIES_VALIDATOR.validate(tbl); + } + + @Test + public void testRequiredProperty_ValidTable() { + REQUIRED_PROPERTIES_VALIDATOR.validate(getTestTable()); + } + + @Test + public void testValidate_ViewTableType() { + Table tbl = getTestTable(); + tbl.setTableType(TableType.VIRTUAL_VIEW.name()); + tbl.getStorageDescriptor().getSerdeInfo().setSerializationLibrary(null); + REQUIRED_PROPERTIES_VALIDATOR.validate(tbl); + } + + @Test + public void testValidate_ExcludeStorageHandlerType() { + Table tbl = getTestTable(); + tbl.getParameters().put(META_TABLE_STORAGE, "org.apache.hadoop.hive.dynamodb.DynamoDBStorageHandler"); + tbl.getStorageDescriptor().setInputFormat(null); + REQUIRED_PROPERTIES_VALIDATOR.validate(tbl); + } +} diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestLoggingHelper.java b/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestLoggingHelper.java new file mode 100644 index 000000000000..48cb1d59c2a1 --- /dev/null +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestLoggingHelper.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.util; + +import java.util.Collection; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.junit.Test; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.assertThat; + +public class TestLoggingHelper { + + @Test + public void concatCollectionToStringForLoggingTest() { + Collection logs = ImmutableList.of("test_log_1", "test_log_2", "test_log_3"); + String delimiter = "|"; + + String result = LoggingHelper.concatCollectionToStringForLogging(logs, delimiter); + String expected = "test_log_1|test_log_2|test_log_3|"; + + assertThat(result, is(equalTo(expected))); + } + + @Test + public void concatCollectionToStringForLoggingTestWithoutCollection() { + String delimiter = "|"; + + String result = LoggingHelper.concatCollectionToStringForLogging(null, delimiter); + String expected = ""; + + assertThat(result, is(equalTo(expected))); + } + + @Test + public void concatCollectionToStringForLoggingTestWithoutDelimiter() { + Collection logs = ImmutableList.of("test_log_1", "test_log_2", "test_log_3"); + + String result = LoggingHelper.concatCollectionToStringForLogging(logs, null); + String expected = "test_log_1,test_log_2,test_log_3,"; + + assertThat(result, is(equalTo(expected))); + } + + @Test + public void concatCollectionToStringForLoggingTestWithLongerThanLimitInput() { + ImmutableList.Builder listBuilder = ImmutableList.builder(); + + final int max = 2000; + final String key = "KEY"; + final StringBuilder stringBuilder = new StringBuilder(); + for (int i = 0; i < max; i += key.length()) { + listBuilder.add(key); + stringBuilder.append(key); + } + final String overflow = "OVERFLOW"; + for (int i = 0; i < 100; i += overflow.length()) { + listBuilder.add(overflow); + } + + String result = LoggingHelper.concatCollectionToStringForLogging(listBuilder.build(), ""); + String expected = stringBuilder.toString().substring(0, max); + + assertThat(result.length(), is(equalTo(max))); + assertThat(result, is(equalTo(expected))); + assertThat(expected.indexOf(overflow), is(equalTo(-1))); + } +} diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestMetastoreClientUtils.java b/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestMetastoreClientUtils.java new file mode 100644 index 000000000000..6870ccf443df --- /dev/null +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestMetastoreClientUtils.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.util; + +import java.util.Map; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.iceberg.aws.glue.converters.CatalogToHiveConverter; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.junit.Before; +import org.junit.Test; + +import static org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE; +import static org.apache.iceberg.aws.glue.util.ObjectTestUtils.getTestDatabase; +import static org.apache.iceberg.aws.glue.util.ObjectTestUtils.getTestTable; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class TestMetastoreClientUtils { + + private Warehouse wh; + private HiveConf conf; + + private Path testPath; + private static final String TEST_LOCATION = "s3://mybucket/"; + private Database testDb; + private Table testTbl; + + @Before + public void before() { + wh = mock(Warehouse.class); + conf = new HiveConf(); + testPath = new Path(TEST_LOCATION); + testDb = CatalogToHiveConverter.convertDatabase(getTestDatabase()); + testTbl = CatalogToHiveConverter.convertTable(getTestTable(), testDb.getName()); + } + + @Test(expected = NullPointerException.class) + public void testMakeDirsNullWh() throws Exception { + MetastoreClientUtils.makeDirs(null, testPath); + } + + @Test(expected = NullPointerException.class) + public void testMakeDirsNullPath() throws Exception { + MetastoreClientUtils.makeDirs(wh, null); + } + + @Test + public void testMakeDirsAlreadyExists() throws Exception { + when(wh.isDir(testPath)).thenReturn(true); + assertFalse(MetastoreClientUtils.makeDirs(wh, testPath)); + } + + @Test(expected = MetaException.class) + public void testMakeDirsCannotCreateDir() throws Exception { + when(wh.isDir(testPath)).thenReturn(false); + when(wh.mkdirs(testPath, true)).thenReturn(false); + MetastoreClientUtils.makeDirs(wh, testPath); + } + + @Test(expected = InvalidObjectException.class) + public void testValidateTableObjectInvalidName() throws Exception { + testTbl.setTableName("!"); + MetastoreClientUtils.validateTableObject(testTbl, conf); + } + + @Test(expected = InvalidObjectException.class) + public void testValidateTableObjectInvalidColumnName() throws Exception { + testTbl.getSd().getCols().get(0).setType("invalidtype"); + MetastoreClientUtils.validateTableObject(testTbl, conf); + } + + @Test(expected = InvalidObjectException.class) + public void testValidateTableObjectInvalidPartitionKeys() throws Exception { + testTbl.getPartitionKeys().get(0).setType("invalidtype"); + MetastoreClientUtils.validateTableObject(testTbl, conf); + } + + @Test + public void testDeepCopy() throws Exception { + Map orig = ImmutableMap.of("key", "val"); + Map deepCopy = MetastoreClientUtils.deepCopyMap(orig); + assertNotSame(deepCopy, orig); + assertEquals(deepCopy, orig); + } + + @Test + public void testIsExternalTableFalse() { + assertFalse(MetastoreClientUtils.isExternalTable(testTbl)); + } + + @Test + public void testIsExternalTableParamTrue() { + testTbl.getParameters().put("EXTERNAL", "true"); + assertTrue(MetastoreClientUtils.isExternalTable(testTbl)); + } + + @Test + public void testIsExternalTableTableTypeTrue() { + testTbl.setTableType(EXTERNAL_TABLE.name()); + testTbl.setParameters(null); + assertTrue(MetastoreClientUtils.isExternalTable(testTbl)); + } + + @Test + public void testIsExternalTableParamPriority() { + // parameters has higher priority when there is conflict + testTbl.getParameters().put("EXTERNAL", "false"); + testTbl.setTableType(EXTERNAL_TABLE.name()); + assertFalse(MetastoreClientUtils.isExternalTable(testTbl)); + } + + @Test + public void testIsExternalTableNull() { + assertFalse(MetastoreClientUtils.isExternalTable(null)); + } +} diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestPartitionKey.java b/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestPartitionKey.java new file mode 100644 index 000000000000..2e55a69d3415 --- /dev/null +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestPartitionKey.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.util; + +import com.amazonaws.services.glue.model.Partition; +import com.amazonaws.services.glue.model.PartitionError; +import java.util.List; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; + +public class TestPartitionKey { + + @Test(expected = IllegalArgumentException.class) + public void testNull() { + new PartitionKey((List) null); + } + + @Test + public void testEqualsDifferentTable() { + List values1 = Lists.newArrayList("value1", "value2"); + List values2 = Lists.newArrayList("value1", "value2"); + Partition partition1 = ObjectTestUtils.getTestPartition("ns", "table1", values1); + Partition partition2 = ObjectTestUtils.getTestPartition("ns", "table2", values2); + PartitionKey partitionKey1 = new PartitionKey(partition1); + PartitionKey partitionKey2 = new PartitionKey(partition2); + assertEquals(partitionKey1, partitionKey2); + assertEquals(partitionKey1.hashCode(), partitionKey2.hashCode()); + } + + @Test + public void testEqualsEmptyValue() { + List values = Lists.newArrayList(); + Partition partition1 = ObjectTestUtils.getTestPartition("ns", "table", values); + Partition partition2 = ObjectTestUtils.getTestPartition("ns", "table", values); + PartitionKey partitionKey1 = new PartitionKey(partition1); + PartitionKey partitionKey2 = new PartitionKey(partition2); + assertEquals(partitionKey1, partitionKey2); + assertEquals(partitionKey1.hashCode(), partitionKey2.hashCode()); + } + + @Test + public void testEqualsDifferentClass() { + List values1 = Lists.newArrayList("value1", "value2"); + List values2 = Lists.newLinkedList(values1); + Partition partition1 = ObjectTestUtils.getTestPartition("ns", "table", values1); + Partition partition2 = ObjectTestUtils.getTestPartition("ns", "table", values2); + PartitionKey partitionKey1 = new PartitionKey(partition1); + PartitionKey partitionKey2 = new PartitionKey(partition2); + assertEquals(partitionKey1, partitionKey2); + assertEquals(partitionKey1.hashCode(), partitionKey2.hashCode()); + } + + @Test + public void testEqualsPartitionError() { + List values1 = Lists.newArrayList("value1", "value2"); + List values2 = Lists.newArrayList("value1", "value2"); + PartitionError partitionError1 = ObjectTestUtils.getPartitionError(values1, new RuntimeException("foo")); + PartitionError partitionError2 = ObjectTestUtils.getPartitionError(values2, new Exception("foo2")); + PartitionKey partitionKey1 = new PartitionKey(partitionError1.getPartitionValues()); + PartitionKey partitionKey2 = new PartitionKey(partitionError2.getPartitionValues()); + assertEquals(partitionKey1, partitionKey2); + assertEquals(partitionKey1.hashCode(), partitionKey2.hashCode()); + } + + @Test + public void testEqualsPartitionAndPartitionError() { + List values1 = Lists.newArrayList("value1", "value2"); + List values2 = Lists.newArrayList("value1", "value2"); + Partition partition = ObjectTestUtils.getTestPartition("ns", "table", values1); + PartitionError partitionError = ObjectTestUtils.getPartitionError(values2, new RuntimeException("foo")); + PartitionKey partitionKey1 = new PartitionKey(partition); + PartitionKey partitionKey2 = new PartitionKey(partitionError.getPartitionValues()); + assertEquals(partitionKey1, partitionKey2); + assertEquals(partitionKey1.hashCode(), partitionKey2.hashCode()); + } + + @Test + public void testEqualsNull() { + List values = Lists.newArrayList("value1", "value2"); + Partition partition = ObjectTestUtils.getTestPartition("ns", "table", values); + PartitionKey partitionKey = new PartitionKey(partition); + assertFalse(partitionKey.equals(null)); + } + + @Test + public void testGetValues() { + List values = Lists.newArrayList("value1", "value2"); + Partition partition = ObjectTestUtils.getTestPartition("ns", "table", values); + PartitionKey partitionKey1 = new PartitionKey(partition); + assertEquals(Lists.newArrayList(values), partitionKey1.getValues()); + + PartitionError partitionError = ObjectTestUtils.getPartitionError(values, new RuntimeException("foo")); + PartitionKey partitionKey2 = new PartitionKey(partitionError.getPartitionValues()); + assertEquals(Lists.newArrayList(values), partitionKey2.getValues()); + } + +} diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestPartitionUtils.java b/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestPartitionUtils.java new file mode 100644 index 000000000000..25399dd3f952 --- /dev/null +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/util/TestPartitionUtils.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.aws.glue.util; + +import com.amazonaws.services.glue.model.EntityNotFoundException; +import com.amazonaws.services.glue.model.InternalServiceException; +import com.amazonaws.services.glue.model.InvalidInputException; +import com.amazonaws.services.glue.model.Partition; +import com.amazonaws.services.glue.model.PartitionValueList; +import com.amazonaws.services.glue.model.ResourceNumberLimitExceededException; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Sets; +import org.junit.Test; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.hasItem; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; + +public class TestPartitionUtils { + + @Test + public void testBuildPartitionMapAndGetPartitionValuesList() { + String namespaceName = "ns"; + String tableName = "table"; + // choose special values to make values1.hashCode() == values2.hashCode() + List values1 = Lists.newArrayList("Aa"); + List values2 = Lists.newArrayList("BB"); + Partition partition1 = ObjectTestUtils.getTestPartition(namespaceName, tableName, values1); + Partition partition2 = ObjectTestUtils.getTestPartition(namespaceName, tableName, values2); + Map partitionMap = PartitionUtils.buildPartitionMap( + Lists.newArrayList(partition1, partition2)); + List partitionValuesList = PartitionUtils.getPartitionValuesList(partitionMap); + + assertEquals(2, partitionMap.size()); + Set> valuesSet = Sets.newHashSet(values1, values2); + for (PartitionKey partitionKey : partitionMap.keySet()) { + assertThat(valuesSet, hasItem(partitionKey.getValues())); + assertThat(partitionMap.get(partitionKey).getValues(), equalTo(partitionKey.getValues())); + } + + assertEquals(2, partitionValuesList.size()); + for (PartitionValueList partitionValueList : partitionValuesList) { + assertThat(valuesSet, hasItem(partitionValueList.getValues())); + } + } + + @Test + public void testIsInvalidUserInputException() { + assertTrue(PartitionUtils.isInvalidUserInputException(new InvalidInputException("foo"))); + assertTrue(PartitionUtils.isInvalidUserInputException(new EntityNotFoundException("bar"))); + assertFalse(PartitionUtils.isInvalidUserInputException(new InternalServiceException("bar2"))); + assertFalse(PartitionUtils.isInvalidUserInputException(new ResourceNumberLimitExceededException("bar3"))); + assertFalse(PartitionUtils.isInvalidUserInputException(new NullPointerException("bar4"))); + } + +} diff --git a/build.gradle b/build.gradle index 2b67d50a7822..61cf6b5a9f47 100644 --- a/build.gradle +++ b/build.gradle @@ -715,6 +715,62 @@ project(':iceberg-spark') { } } +project(':iceberg-aws') { + dependencies { + compile project(':iceberg-api') + compile group: 'software.amazon.awssdk', name: 'dynamodb', version: '2.14.26' + compile group: 'com.amazonaws', name: 'aws-java-sdk-glue', version: '1.11.880' + compileOnly("org.apache.hive:hive-metastore") { + exclude group: 'org.apache.avro', module: 'avro' + exclude group: 'org.slf4j', module: 'slf4j-log4j12' + exclude group: 'org.pentaho' // missing dependency + exclude group: 'org.apache.hbase' + exclude group: 'org.apache.logging.log4j' + exclude group: 'co.cask.tephra' + exclude group: 'com.google.code.findbugs', module: 'jsr305' + exclude group: 'org.eclipse.jetty.aggregate', module: 'jetty-all' + exclude group: 'org.eclipse.jetty.orbit', module: 'javax.servlet' + exclude group: 'org.apache.parquet', module: 'parquet-hadoop-bundle' + exclude group: 'com.tdunning', module: 'json' + exclude group: 'javax.transaction', module: 'transaction-api' + exclude group: 'com.zaxxer', module: 'HikariCP' + } + compileOnly("org.apache.hadoop:hadoop-common") { + exclude group: 'commons-beanutils' + exclude group: 'org.apache.avro', module: 'avro' + exclude group: 'org.slf4j', module: 'slf4j-log4j12' + } + testCompile "org.apache.hadoop:hadoop-common::tests" + testCompile "org.apache.hadoop:hadoop-mapreduce-client-core" + testCompile("org.apache.hive:hive-exec::core") { + exclude group: 'org.apache.avro', module: 'avro' + exclude group: 'org.slf4j', module: 'slf4j-log4j12' + exclude group: 'org.pentaho' // missing dependency + exclude group: 'org.apache.hive', module: 'hive-llap-tez' + exclude group: 'org.apache.logging.log4j' + exclude group: 'com.google.protobuf', module: 'protobuf-java' + exclude group: 'org.apache.calcite' + exclude group: 'org.apache.calcite.avatica' + exclude group: 'com.google.code.findbugs', module: 'jsr305' + } + testCompile("org.apache.hive:hive-metastore") { + exclude group: 'org.apache.avro', module: 'avro' + exclude group: 'org.slf4j', module: 'slf4j-log4j12' + exclude group: 'org.pentaho' // missing dependency + exclude group: 'org.apache.hbase' + exclude group: 'org.apache.logging.log4j' + exclude group: 'co.cask.tephra' + exclude group: 'com.google.code.findbugs', module: 'jsr305' + exclude group: 'org.eclipse.jetty.aggregate', module: 'jetty-all' + exclude group: 'org.eclipse.jetty.orbit', module: 'javax.servlet' + exclude group: 'org.apache.parquet', module: 'parquet-hadoop-bundle' + exclude group: 'com.tdunning', module: 'json' + exclude group: 'javax.transaction', module: 'transaction-api' + exclude group: 'com.zaxxer', module: 'HikariCP' + } + } +} + if (jdkVersion == '8') { apply from: 'jmh.gradle' diff --git a/bundled-guava/src/main/java/org/apache/iceberg/GuavaClasses.java b/bundled-guava/src/main/java/org/apache/iceberg/GuavaClasses.java index 107e420b2139..14f4a20dd0ad 100644 --- a/bundled-guava/src/main/java/org/apache/iceberg/GuavaClasses.java +++ b/bundled-guava/src/main/java/org/apache/iceberg/GuavaClasses.java @@ -26,6 +26,8 @@ import com.google.common.base.Preconditions; import com.google.common.base.Splitter; import com.google.common.base.Throwables; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; import com.google.common.collect.BiMap; import com.google.common.collect.FluentIterable; import com.google.common.collect.ImmutableBiMap; @@ -90,6 +92,8 @@ public class GuavaClasses { MoreExecutors.class.getName(); ThreadFactoryBuilder.class.getName(); Iterables.class.getName(); + Cache.class.getName(); + CacheBuilder.class.getName(); } } diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java index 3d5de20ea6ca..77d87547e0aa 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java @@ -28,7 +28,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.InvalidOperationException; @@ -241,7 +241,7 @@ public List listNamespaces(Namespace namespace) { return ImmutableList.of(); } try { - List namespaces = clients.run(HiveMetaStoreClient::getAllDatabases) + List namespaces = clients.run(IMetaStoreClient::getAllDatabases) .stream() .map(Namespace::of) .collect(Collectors.toList()); diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveClientPool.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveClientPool.java index 1df705bcd481..77ce919c83f8 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveClientPool.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveClientPool.java @@ -21,25 +21,21 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.iceberg.common.DynConstructors; import org.apache.thrift.TException; import org.apache.thrift.transport.TTransportException; -public class HiveClientPool extends ClientPool { - - // use appropriate ctor depending on whether we're working with Hive2 or Hive3 dependencies - // we need to do this because there is a breaking API change between Hive2 and Hive3 - private static final DynConstructors.Ctor CLIENT_CTOR = DynConstructors.builder() - .impl(HiveMetaStoreClient.class, HiveConf.class) - .impl(HiveMetaStoreClient.class, Configuration.class) - .build(); +public class HiveClientPool extends ClientPool { private final HiveConf hiveConf; HiveClientPool(Configuration conf) { - this(conf.getInt("iceberg.hive.client-pool-size", 5), conf); + this(conf.getInt( + IcebergHiveConfigs.HIVE_CLIENT_POOL_SIZE, + IcebergHiveConfigs.HIVE_CLIENT_POOL_SIZE_DEFAULT), + conf); } public HiveClientPool(int poolSize, Configuration conf) { @@ -48,10 +44,19 @@ public HiveClientPool(int poolSize, Configuration conf) { } @Override - protected HiveMetaStoreClient newClient() { + protected IMetaStoreClient newClient() { try { try { - return CLIENT_CTOR.newInstance(hiveConf); + String impl = hiveConf.get( + IcebergHiveConfigs.HIVE_CLIENT_IMPL, + IcebergHiveConfigs.HIVE_CLIENT_IMPL_DEFAULT); + // use appropriate ctor depending on whether we're working with Hive2 or Hive3 dependencies + // we need to do this because there is a breaking API change between Hive2 and Hive3 + DynConstructors.Ctor ctor = DynConstructors.builder(IMetaStoreClient.class) + .impl(impl, HiveConf.class) + .impl(impl, Configuration.class) + .build(); + return ctor.newInstance(hiveConf); } catch (RuntimeException e) { // any MetaException would be wrapped into RuntimeException during reflection, so let's double-check type here if (e.getCause() instanceof MetaException) { @@ -67,13 +72,12 @@ protected HiveMetaStoreClient newClient() { "Derby supports only one client at a time. To fix this, use a metastore that supports " + "multiple clients."); } - throw new RuntimeMetaException(t, "Failed to connect to Hive Metastore"); } } @Override - protected HiveMetaStoreClient reconnect(HiveMetaStoreClient client) { + protected IMetaStoreClient reconnect(IMetaStoreClient client) { try { client.close(); client.reconnect(); @@ -84,7 +88,7 @@ protected HiveMetaStoreClient reconnect(HiveMetaStoreClient client) { } @Override - protected void close(HiveMetaStoreClient client) { + protected void close(IMetaStoreClient client) { client.close(); } } diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index badc911c1f7e..3bd8a442a460 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -31,7 +31,6 @@ import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.StatsSetupConst; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.FieldSchema; @@ -71,21 +70,14 @@ public class HiveTableOperations extends BaseMetastoreTableOperations { private static final Logger LOG = LoggerFactory.getLogger(HiveTableOperations.class); - private static final String HIVE_ACQUIRE_LOCK_STATE_TIMEOUT_MS = "iceberg.hive.lock-timeout-ms"; - private static final long HIVE_ACQUIRE_LOCK_STATE_TIMEOUT_MS_DEFAULT = 3 * 60 * 1000; // 3 minutes - private static final DynMethods.UnboundMethod ALTER_TABLE = DynMethods.builder("alter_table") - .impl(HiveMetaStoreClient.class, "alter_table_with_environmentContext", - String.class, String.class, Table.class, EnvironmentContext.class) - .impl(HiveMetaStoreClient.class, "alter_table", - String.class, String.class, Table.class, EnvironmentContext.class) - .build(); - private final HiveClientPool metaClients; private final String fullName; private final String database; private final String tableName; private final Configuration conf; private final long lockAcquireTimeout; + private final String hiveClientClass; + private final DynMethods.UnboundMethod hiveAlterTableMethod; private FileIO fileIO; @@ -96,8 +88,18 @@ protected HiveTableOperations(Configuration conf, HiveClientPool metaClients, this.fullName = catalogName + "." + database + "." + table; this.database = database; this.tableName = table; - this.lockAcquireTimeout = - conf.getLong(HIVE_ACQUIRE_LOCK_STATE_TIMEOUT_MS, HIVE_ACQUIRE_LOCK_STATE_TIMEOUT_MS_DEFAULT); + this.lockAcquireTimeout = conf.getLong( + IcebergHiveConfigs.HIVE_ACQUIRE_LOCK_STATE_TIMEOUT_MS, + IcebergHiveConfigs.HIVE_ACQUIRE_LOCK_STATE_TIMEOUT_MS_DEFAULT); + this.hiveClientClass = conf.get( + IcebergHiveConfigs.HIVE_CLIENT_IMPL, + IcebergHiveConfigs.HIVE_CLIENT_IMPL_DEFAULT); + this.hiveAlterTableMethod = DynMethods.builder("alter_table") + .impl(hiveClientClass, "alter_table_with_environmentContext", + String.class, String.class, Table.class, EnvironmentContext.class) + .impl(hiveClientClass, "alter_table", + String.class, String.class, Table.class, EnvironmentContext.class) + .build(); } @Override @@ -212,7 +214,7 @@ private void persistTable(Table hmsTable, boolean updateHiveTable) throws TExcep EnvironmentContext envContext = new EnvironmentContext( ImmutableMap.of(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE) ); - ALTER_TABLE.invoke(client, database, tableName, hmsTable, envContext); + hiveAlterTableMethod.invoke(client, database, tableName, hmsTable, envContext); return null; }); } else { diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/IcebergHiveConfigs.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/IcebergHiveConfigs.java new file mode 100644 index 000000000000..67e68979bef2 --- /dev/null +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/IcebergHiveConfigs.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.hive; + +public class IcebergHiveConfigs { + + private IcebergHiveConfigs() { + } + + public static final String HIVE_CLIENT_POOL_SIZE = "iceberg.hive.client-pool-size"; + public static final int HIVE_CLIENT_POOL_SIZE_DEFAULT = 5; + + public static final String HIVE_CLIENT_IMPL = "iceberg.hive.client-impl"; + public static final String HIVE_CLIENT_IMPL_DEFAULT = "org.apache.hadoop.hive.metastore.HiveMetaStoreClient"; + + public static final String HIVE_ACQUIRE_LOCK_STATE_TIMEOUT_MS = "iceberg.hive.lock-timeout-ms"; + public static final long HIVE_ACQUIRE_LOCK_STATE_TIMEOUT_MS_DEFAULT = 3 * 60 * 1000; // 3 minutes + +} diff --git a/settings.gradle b/settings.gradle index 0377d31838e6..123471813762 100644 --- a/settings.gradle +++ b/settings.gradle @@ -35,6 +35,7 @@ include 'spark3' include 'spark3-runtime' include 'pig' include 'hive-metastore' +include 'aws' project(':api').name = 'iceberg-api' project(':common').name = 'iceberg-common' @@ -53,6 +54,7 @@ project(':spark3').name = 'iceberg-spark3' project(':spark3-runtime').name = 'iceberg-spark3-runtime' project(':pig').name = 'iceberg-pig' project(':hive-metastore').name = 'iceberg-hive-metastore' +project(':aws')?.name = 'iceberg-aws' if (JavaVersion.current() == JavaVersion.VERSION_1_8) { include 'spark2' diff --git a/site/docs/cloud-integration.md b/site/docs/cloud-integration.md new file mode 100644 index 000000000000..402ac7dc53b6 --- /dev/null +++ b/site/docs/cloud-integration.md @@ -0,0 +1,67 @@ + + +# Cloud Integration + +This section describes the cloud integrations for Iceberg. + +## HDFS Connector + +Iceberg uses `HadoopFileIO` to write files for all the catalogs by default. +Therefore, Iceberg supports writing to any cloud storage that has an HDFS connector. +Here is a list of commonly used cloud storage connectors: + +| Storage | URI scheme | Documentation | +|-------------------------|------------|---------------| +| Amazon S3 | s3a | [link](https://hadoop.apache.org/docs/current/hadoop-aws/tools/hadoop-aws/index.html) | +| Amazon EMR File System | s3 | [link](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-fs.html) | +| Azura Data Lake Storage | adl | [link](https://hadoop.apache.org/docs/current/hadoop-azure-datalake/index.html) | +| Azura Blob Storage | wasb | [link](http://hadoop.apache.org/docs/r2.7.1/hadoop-azure/index.html) | +| Google Cloud Storage | gs | [link](https://cloud.google.com/dataproc/docs/concepts/connectors/cloud-storage) | + +For example, to use S3 with `HadoopCatalog`, you can use the following configurations in Spark: + +``` +spark.sql.catalog.hadoop_prod = org.apache.iceberg.spark.SparkCatalog +spark.sql.catalog.hadoop_prod.type = hadoop +spark.sql.catalog.hadoop_prod.warehouse = s3a://my-bucket/my-file-path +``` + +Please refer to the specific documentations for the limitations and setups required to use each cloud storage. + +## AWS Integrations + +All the AWS supports are in the `iceberg-aws` submodule. Please add the dependency to use the integration features. + +### Using Hive-compatible AWS Glue catalog + +The `iceberg-aws` module provides `IcebergGlueMetastoreClient`, which is an implementation of the Hive +`IMetaStoreClient` interface for users to use AWS Glue as their serverless Hive metastore. +Because AWS Glue does not support Hive's transaction and locking interface, the Glue client currently uses DynamoDB +to provide this additional support. + +To enable Glue, set the Hadoop configuration `iceberg.hive.client-impl` to +`org.apache.iceberg.aws.glue.metastore.IcebergGlueMetastoreClient`. +Then just use `HiveCatalog` as usual, and you do not need to specify the Hive Thrift URI anymore. + +The client uses the [default AWS credential chain](https://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html) +to read IAM credentials. +All configurable settings of this client can be found in `org.apache.iceberg.aws.glue.util.AWSGlueConfig`. + +For more details about AWS Glue data catalog, please refer to the following docs: +- [AWS Glue Introduction](https://docs.aws.amazon.com/glue/latest/dg/what-is-glue.html) +- [Using Glue on EMR](https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-hive-metastore-glue.html) diff --git a/site/docs/configuration.md b/site/docs/configuration.md index 383fa93a4d0b..756cd01011df 100644 --- a/site/docs/configuration.md +++ b/site/docs/configuration.md @@ -50,6 +50,8 @@ Iceberg tables support table properties to configure table behavior, like the de | write.summary.partition-limit | 0 | Includes partition-level summary stats in snapshot summaries if the changed partition count is less than this limit | | write.metadata.delete-after-commit.enabled | false | Controls whether to delete the oldest version metadata files after commit | | write.metadata.previous-versions-max | 100 | The max number of previous version metadata files to keep before deleting after commit | +| write.object-storage.enabled | false | Enables optimizations for object storage | +| write.object-storage.path | (not set) | Base path URI for object storage, must be specified if write.object-storage is enabled | ### Table behavior properties