diff --git a/common/src/java/org/apache/hadoop/hive/conf/Constants.java b/common/src/java/org/apache/hadoop/hive/conf/Constants.java index 99d841059626..919e40bec0fc 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/Constants.java +++ b/common/src/java/org/apache/hadoop/hive/conf/Constants.java @@ -108,6 +108,8 @@ public class Constants { public static final String TIME_POSTFIX_REQUEST_TRACK = "_TIME"; public static final String ICEBERG = "iceberg"; - public static final String ICEBERG_PARTITION_TABLE_SCHEMA = "partition,record_count,file_count,spec_id"; + public static final String ICEBERG_PARTITION_TABLE_SCHEMA = "partition,spec_id,record_count,file_count," + + "position_delete_record_count,position_delete_file_count,equality_delete_record_count," + + "equality_delete_file_count"; public static final String DELIMITED_JSON_SERDE = "org.apache.hadoop.hive.serde2.DelimitedJSONSerDe"; } diff --git a/iceberg/iceberg-catalog/pom.xml b/iceberg/iceberg-catalog/pom.xml index efd29a48266d..2d7b9447afa0 100644 --- a/iceberg/iceberg-catalog/pom.xml +++ b/iceberg/iceberg-catalog/pom.xml @@ -58,6 +58,11 @@ hive-exec test + + org.immutables + value + provided + org.apache.hive hive-standalone-metastore-server diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/CachedClientPool.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/CachedClientPool.java index e2dc990383e2..c93ce5455e9f 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/CachedClientPool.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/CachedClientPool.java @@ -21,53 +21,95 @@ import com.github.benmanes.caffeine.cache.Cache; import com.github.benmanes.caffeine.cache.Caffeine; +import com.github.benmanes.caffeine.cache.Scheduler; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Comparator; +import java.util.List; +import java.util.Locale; import java.util.Map; +import java.util.Set; import java.util.concurrent.TimeUnit; +import javax.annotation.Nullable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.iceberg.CatalogProperties; import org.apache.iceberg.ClientPool; +import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.util.PropertyUtil; +import org.apache.iceberg.util.ThreadPools; import org.apache.thrift.TException; +import org.immutables.value.Value; +/** + * A ClientPool that caches the underlying HiveClientPool instances. + * + *

The following key elements are supported and can be specified via {@link + * CatalogProperties#CLIENT_POOL_CACHE_KEYS}: + * + *

+ */ public class CachedClientPool implements ClientPool { - private static Cache clientPoolCache; + private static final String CONF_ELEMENT_PREFIX = "conf:"; + + private static Cache clientPoolCache; private final Configuration conf; - private final String metastoreUri; private final int clientPoolSize; private final long evictionInterval; + private final Key key; public CachedClientPool(Configuration conf, Map properties) { this.conf = conf; - this.metastoreUri = conf.get(HiveConf.ConfVars.METASTOREURIS.varname, ""); this.clientPoolSize = PropertyUtil.propertyAsInt(properties, CatalogProperties.CLIENT_POOL_SIZE, CatalogProperties.CLIENT_POOL_SIZE_DEFAULT); this.evictionInterval = PropertyUtil.propertyAsLong(properties, CatalogProperties.CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS, CatalogProperties.CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS_DEFAULT); + this.key = extractKey(properties.get(CatalogProperties.CLIENT_POOL_CACHE_KEYS), conf); init(); } @VisibleForTesting HiveClientPool clientPool() { - return clientPoolCache.get(metastoreUri, k -> new HiveClientPool(clientPoolSize, conf)); + return clientPoolCache.get(key, k -> new HiveClientPool(clientPoolSize, conf)); } private synchronized void init() { if (clientPoolCache == null) { - clientPoolCache = Caffeine.newBuilder().expireAfterAccess(evictionInterval, TimeUnit.MILLISECONDS) - .removalListener((key, value, cause) -> ((HiveClientPool) value).close()) + // Since Caffeine does not ensure that removalListener will be involved after expiration + // We use a scheduler with one thread to clean up expired clients. + clientPoolCache = + Caffeine.newBuilder() + .expireAfterAccess(evictionInterval, TimeUnit.MILLISECONDS) + .removalListener((ignored, value, cause) -> ((HiveClientPool) value).close()) + .scheduler( + Scheduler.forScheduledExecutorService( + ThreadPools.newScheduledPool("hive-metastore-cleaner", 1))) .build(); } } @VisibleForTesting - static Cache clientPoolCache() { + static Cache clientPoolCache() { return clientPoolCache; } @@ -81,4 +123,91 @@ public R run(Action action, boolean retry) throws TException, InterruptedException { return clientPool().run(action, retry); } + + @VisibleForTesting + static Key extractKey(String cacheKeys, Configuration conf) { + // generate key elements in a certain order, so that the Key instances are comparable + List elements = Lists.newArrayList(); + elements.add(conf.get(HiveConf.ConfVars.METASTOREURIS.varname, "")); + elements.add(conf.get(HiveCatalog.HIVE_CONF_CATALOG, "hive")); + if (cacheKeys == null || cacheKeys.isEmpty()) { + return Key.of(elements); + } + + Set types = Sets.newTreeSet(Comparator.comparingInt(Enum::ordinal)); + Map confElements = Maps.newTreeMap(); + for (String element : cacheKeys.split(",", -1)) { + String trimmed = element.trim(); + if (trimmed.toLowerCase(Locale.ROOT).startsWith(CONF_ELEMENT_PREFIX)) { + String key = trimmed.substring(CONF_ELEMENT_PREFIX.length()); + ValidationException.check( + !confElements.containsKey(key), "Conf key element %s already specified", key); + confElements.put(key, conf.get(key)); + } else { + KeyElementType type = KeyElementType.valueOf(trimmed.toUpperCase()); + switch (type) { + case UGI: + case USER_NAME: + ValidationException.check( + !types.contains(type), "%s key element already specified", type.name()); + types.add(type); + break; + default: + throw new ValidationException("Unknown key element %s", trimmed); + } + } + } + for (KeyElementType type : types) { + switch (type) { + case UGI: + try { + elements.add(UserGroupInformation.getCurrentUser()); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + break; + case USER_NAME: + try { + elements.add(UserGroupInformation.getCurrentUser().getUserName()); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + break; + default: + throw new RuntimeException("Unexpected key element " + type.name()); + } + } + for (String key : confElements.keySet()) { + elements.add(ConfElement.of(key, confElements.get(key))); + } + return Key.of(elements); + } + + @Value.Immutable + abstract static class Key { + + abstract List elements(); + + private static Key of(Iterable elements) { + return ImmutableKey.builder().elements(elements).build(); + } + } + + @Value.Immutable + abstract static class ConfElement { + abstract String key(); + + @Nullable + abstract String value(); + + static ConfElement of(String key, String value) { + return ImmutableConfElement.builder().key(key).value(value).build(); + } + } + + private enum KeyElementType { + UGI, + USER_NAME, + CONF + } } diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java index 6cccb0c01a2f..6c98cee6a528 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCatalog.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.InvalidOperationException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.PrincipalType; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.UnknownDBException; import org.apache.iceberg.BaseMetastoreCatalog; @@ -65,6 +66,13 @@ public class HiveCatalog extends BaseMetastoreCatalog implements SupportsNamespa public static final String LIST_ALL_TABLES = "list-all-tables"; public static final String LIST_ALL_TABLES_DEFAULT = "false"; + public static final String HMS_TABLE_OWNER = "hive.metastore.table.owner"; + public static final String HMS_DB_OWNER = "hive.metastore.database.owner"; + public static final String HMS_DB_OWNER_TYPE = "hive.metastore.database.owner-type"; + + // MetastoreConf is not available with current Hive version + static final String HIVE_CONF_CATALOG = "metastore.catalog.default"; + private static final Logger LOG = LoggerFactory.getLogger(HiveCatalog.class); private String name; @@ -244,11 +252,16 @@ public void renameTable(TableIdentifier from, TableIdentifier originalTo) { @Override public void createNamespace(Namespace namespace, Map meta) { Preconditions.checkArgument( - !namespace.isEmpty(), - "Cannot create namespace with invalid name: %s", namespace); - Preconditions.checkArgument(isValidateNamespace(namespace), - "Cannot support multi part namespace in Hive Metastore: %s", namespace); - + !namespace.isEmpty(), "Cannot create namespace with invalid name: %s", namespace); + Preconditions.checkArgument( + isValidateNamespace(namespace), + "Cannot support multi part namespace in Hive Metastore: %s", + namespace); + Preconditions.checkArgument( + meta.get(HMS_DB_OWNER_TYPE) == null || meta.get(HMS_DB_OWNER) != null, + "Create namespace setting %s without setting %s is not allowed", + HMS_DB_OWNER_TYPE, + HMS_DB_OWNER); try { clients.run(client -> { client.createDatabase(convertToDatabase(namespace, meta)); @@ -262,12 +275,12 @@ public void createNamespace(Namespace namespace, Map meta) { namespace); } catch (TException e) { - throw new RuntimeException("Failed to create namespace " + namespace + " in Hive Matastore", e); + throw new RuntimeException("Failed to create namespace " + namespace + " in Hive Metastore", e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException( - "Interrupted in call to createDatabase(name) " + namespace + " in Hive Matastore", e); + "Interrupted in call to createDatabase(name) " + namespace + " in Hive Metastore", e); } } @@ -289,12 +302,12 @@ public List listNamespaces(Namespace namespace) { return namespaces; } catch (TException e) { - throw new RuntimeException("Failed to list all namespace: " + namespace + " in Hive Matastore", e); + throw new RuntimeException("Failed to list all namespace: " + namespace + " in Hive Metastore", e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException( - "Interrupted in call to getAllDatabases() " + namespace + " in Hive Matastore", e); + "Interrupted in call to getAllDatabases() " + namespace + " in Hive Metastore", e); } } @@ -323,17 +336,22 @@ public boolean dropNamespace(Namespace namespace) { return false; } catch (TException e) { - throw new RuntimeException("Failed to drop namespace " + namespace + " in Hive Matastore", e); + throw new RuntimeException("Failed to drop namespace " + namespace + " in Hive Metastore", e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException( - "Interrupted in call to drop dropDatabase(name) " + namespace + " in Hive Matastore", e); + "Interrupted in call to drop dropDatabase(name) " + namespace + " in Hive Metastore", e); } } @Override public boolean setProperties(Namespace namespace, Map properties) { + Preconditions.checkArgument( + (properties.get(HMS_DB_OWNER_TYPE) == null) == (properties.get(HMS_DB_OWNER) == null), + "Setting %s and %s has to be performed together or not at all", + HMS_DB_OWNER_TYPE, + HMS_DB_OWNER); Map parameter = Maps.newHashMap(); parameter.putAll(loadNamespaceMetadata(namespace)); @@ -349,6 +367,11 @@ public boolean setProperties(Namespace namespace, Map propertie @Override public boolean removeProperties(Namespace namespace, Set properties) { + Preconditions.checkArgument( + properties.contains(HMS_DB_OWNER_TYPE) == properties.contains(HMS_DB_OWNER), + "Removing %s and %s has to be performed together or not at all", + HMS_DB_OWNER_TYPE, + HMS_DB_OWNER); Map parameter = Maps.newHashMap(); parameter.putAll(loadNamespaceMetadata(namespace)); @@ -374,11 +397,11 @@ private void alterHiveDataBase(Namespace namespace, Database database) { } catch (TException e) { throw new RuntimeException( - "Failed to list namespace under namespace: " + namespace + " in Hive Matastore", e); + "Failed to list namespace under namespace: " + namespace + " in Hive Metastore", e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); - throw new RuntimeException("Interrupted in call to getDatabase(name) " + namespace + " in Hive Matastore", e); + throw new RuntimeException("Interrupted in call to getDatabase(name) " + namespace + " in Hive Metastore", e); } } @@ -398,12 +421,12 @@ public Map loadNamespaceMetadata(Namespace namespace) { throw new NoSuchNamespaceException(e, "Namespace does not exist: %s", namespace); } catch (TException e) { - throw new RuntimeException("Failed to list namespace under namespace: " + namespace + " in Hive Matastore", e); + throw new RuntimeException("Failed to list namespace under namespace: " + namespace + " in Hive Metastore", e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException( - "Interrupted in call to getDatabase(name) " + namespace + " in Hive Matastore", e); + "Interrupted in call to getDatabase(name) " + namespace + " in Hive Metastore", e); } } @@ -489,6 +512,12 @@ private Map convertToMetadata(Database database) { if (database.getDescription() != null) { meta.put("comment", database.getDescription()); } + if (database.getOwnerName() != null) { + meta.put(HMS_DB_OWNER, database.getOwnerName()); + if (database.getOwnerType() != null) { + meta.put(HMS_DB_OWNER_TYPE, database.getOwnerType().name()); + } + } return meta; } @@ -510,12 +539,22 @@ Database convertToDatabase(Namespace namespace, Map meta) { database.setDescription(value); } else if (key.equals("location")) { database.setLocationUri(value); + } else if (key.equals(HMS_DB_OWNER)) { + database.setOwnerName(value); + } else if (key.equals(HMS_DB_OWNER_TYPE) && value != null) { + database.setOwnerType(PrincipalType.valueOf(value)); } else { if (value != null) { parameter.put(key, value); } } }); + + if (database.getOwnerName() == null) { + database.setOwnerName(HiveHadoopUtil.currentUser()); + database.setOwnerType(PrincipalType.USER); + } + database.setParameters(parameter); return database; @@ -547,4 +586,9 @@ public Map properties() { void setListAllTables(boolean listAllTables) { this.listAllTables = listAllTables; } + + @VisibleForTesting + ClientPool clientPool() { + return clients; + } } diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCommitLock.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCommitLock.java deleted file mode 100644 index 63d5d40d19fd..000000000000 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveCommitLock.java +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.iceberg.hive; - -import com.github.benmanes.caffeine.cache.Cache; -import com.github.benmanes.caffeine.cache.Caffeine; -import java.net.InetAddress; -import java.net.UnknownHostException; -import java.util.Optional; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicReference; -import java.util.concurrent.locks.ReentrantLock; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.metastore.IMetaStoreClient; -import org.apache.hadoop.hive.metastore.api.LockComponent; -import org.apache.hadoop.hive.metastore.api.LockLevel; -import org.apache.hadoop.hive.metastore.api.LockRequest; -import org.apache.hadoop.hive.metastore.api.LockResponse; -import org.apache.hadoop.hive.metastore.api.LockState; -import org.apache.hadoop.hive.metastore.api.LockType; -import org.apache.iceberg.ClientPool; -import org.apache.iceberg.exceptions.CommitFailedException; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.apache.iceberg.util.Tasks; -import org.apache.thrift.TException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class HiveCommitLock { - - private static final Logger LOG = LoggerFactory.getLogger(HiveCommitLock.class); - - private static final String HIVE_ACQUIRE_LOCK_TIMEOUT_MS = "iceberg.hive.lock-timeout-ms"; - private static final String HIVE_LOCK_CHECK_MIN_WAIT_MS = "iceberg.hive.lock-check-min-wait-ms"; - private static final String HIVE_LOCK_CHECK_MAX_WAIT_MS = "iceberg.hive.lock-check-max-wait-ms"; - private static final String HIVE_TABLE_LEVEL_LOCK_EVICT_MS = "iceberg.hive.table-level-lock-evict-ms"; - private static final long HIVE_ACQUIRE_LOCK_TIMEOUT_MS_DEFAULT = 3 * 60 * 1000; // 3 minutes - private static final long HIVE_LOCK_CHECK_MIN_WAIT_MS_DEFAULT = 50; // 50 milliseconds - private static final long HIVE_LOCK_CHECK_MAX_WAIT_MS_DEFAULT = 5 * 1000; // 5 seconds - - private static final long HIVE_TABLE_LEVEL_LOCK_EVICT_MS_DEFAULT = TimeUnit.MINUTES.toMillis(10); - - private static Cache commitLockCache; - - private static synchronized void initTableLevelLockCache(long evictionTimeout) { - if (commitLockCache == null) { - commitLockCache = Caffeine.newBuilder() - .expireAfterAccess(evictionTimeout, TimeUnit.MILLISECONDS) - .build(); - } - } - - private final String fullName; - private final String databaseName; - private final String tableName; - private final ClientPool metaClients; - - private final long lockAcquireTimeout; - private final long lockCheckMinWaitTime; - private final long lockCheckMaxWaitTime; - - private Optional hmsLockId = Optional.empty(); - private Optional jvmLock = Optional.empty(); - - public HiveCommitLock(Configuration conf, ClientPool metaClients, - String catalogName, String databaseName, String tableName) { - this.metaClients = metaClients; - this.databaseName = databaseName; - this.tableName = tableName; - this.fullName = catalogName + "." + databaseName + "." + tableName; - - this.lockAcquireTimeout = - conf.getLong(HIVE_ACQUIRE_LOCK_TIMEOUT_MS, HIVE_ACQUIRE_LOCK_TIMEOUT_MS_DEFAULT); - this.lockCheckMinWaitTime = - conf.getLong(HIVE_LOCK_CHECK_MIN_WAIT_MS, HIVE_LOCK_CHECK_MIN_WAIT_MS_DEFAULT); - this.lockCheckMaxWaitTime = - conf.getLong(HIVE_LOCK_CHECK_MAX_WAIT_MS, HIVE_LOCK_CHECK_MAX_WAIT_MS_DEFAULT); - long tableLevelLockCacheEvictionTimeout = - conf.getLong(HIVE_TABLE_LEVEL_LOCK_EVICT_MS, HIVE_TABLE_LEVEL_LOCK_EVICT_MS_DEFAULT); - initTableLevelLockCache(tableLevelLockCacheEvictionTimeout); - } - - public void acquire() throws UnknownHostException, TException, InterruptedException { - // getting a process-level lock per table to avoid concurrent commit attempts to the same table from the same - // JVM process, which would result in unnecessary and costly HMS lock acquisition requests - acquireJvmLock(); - acquireLockFromHms(); - } - - public void release() { - releaseHmsLock(); - releaseJvmLock(); - } - - // TODO add lock heart beating for cases where default lock timeout is too low. - private void acquireLockFromHms() throws UnknownHostException, TException, InterruptedException { - if (hmsLockId.isPresent()) { - throw new IllegalArgumentException(String.format("HMS lock ID=%s already acquired for table %s.%s", - hmsLockId.get(), databaseName, tableName)); - } - final LockComponent lockComponent = new LockComponent(LockType.EXCL_WRITE, LockLevel.TABLE, databaseName); - lockComponent.setTablename(tableName); - final LockRequest lockRequest = new LockRequest(Lists.newArrayList(lockComponent), - System.getProperty("user.name"), - InetAddress.getLocalHost().getHostName()); - LockResponse lockResponse = metaClients.run(client -> client.lock(lockRequest)); - AtomicReference state = new AtomicReference<>(lockResponse.getState()); - long lockId = lockResponse.getLockid(); - this.hmsLockId = Optional.of(lockId); - - final long start = System.currentTimeMillis(); - long duration = 0; - boolean timeout = false; - - try { - if (state.get().equals(LockState.WAITING)) { - // Retry count is the typical "upper bound of retries" for Tasks.run() function. In fact, the maximum number of - // attempts the Tasks.run() would try is `retries + 1`. Here, for checking locks, we use timeout as the - // upper bound of retries. So it is just reasonable to set a large retry count. However, if we set - // Integer.MAX_VALUE, the above logic of `retries + 1` would overflow into Integer.MIN_VALUE. Hence, - // the retry is set conservatively as `Integer.MAX_VALUE - 100` so it doesn't hit any boundary issues. - Tasks.foreach(lockId) - .retry(Integer.MAX_VALUE - 100) - .exponentialBackoff( - lockCheckMinWaitTime, - lockCheckMaxWaitTime, - lockAcquireTimeout, - 1.5) - .throwFailureWhenFinished() - .onlyRetryOn(WaitingForHmsLockException.class) - .run(id -> { - try { - LockResponse response = metaClients.run(client -> client.checkLock(id)); - LockState newState = response.getState(); - state.set(newState); - if (newState.equals(LockState.WAITING)) { - throw new WaitingForHmsLockException("Waiting for lock."); - } - } catch (InterruptedException e) { - Thread.interrupted(); // Clear the interrupt status flag - LOG.warn("Interrupted while waiting for lock.", e); - } - }, TException.class); - } - } catch (WaitingForHmsLockException waitingForLockException) { - timeout = true; - duration = System.currentTimeMillis() - start; - } finally { - if (!state.get().equals(LockState.ACQUIRED)) { - releaseHmsLock(); - } - } - - // timeout and do not have lock acquired - if (timeout && !state.get().equals(LockState.ACQUIRED)) { - throw new CommitFailedException("Timed out after %s ms waiting for lock on %s.%s", - duration, databaseName, tableName); - } - - if (!state.get().equals(LockState.ACQUIRED)) { - throw new CommitFailedException("Could not acquire the lock on %s.%s, " + - "lock request ended in state %s", databaseName, tableName, state); - } - } - - private void releaseHmsLock() { - if (hmsLockId.isPresent()) { - try { - metaClients.run(client -> { - client.unlock(hmsLockId.get()); - return null; - }); - hmsLockId = Optional.empty(); - } catch (Exception e) { - LOG.warn("Failed to unlock {}.{}", databaseName, tableName, e); - } - } - } - - private void acquireJvmLock() { - if (jvmLock.isPresent()) { - throw new IllegalStateException(String.format("JVM lock already acquired for table %s", fullName)); - } - jvmLock = Optional.of(commitLockCache.get(fullName, t -> new ReentrantLock(true))); - jvmLock.get().lock(); - } - - private void releaseJvmLock() { - if (jvmLock.isPresent()) { - jvmLock.get().unlock(); - jvmLock = Optional.empty(); - } - } - - public String getDatabaseName() { - return databaseName; - } - - public String getTableName() { - return tableName; - } - - private static class WaitingForHmsLockException extends RuntimeException { - WaitingForHmsLockException(String message) { - super(message); - } - } -} diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveHadoopUtil.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveHadoopUtil.java new file mode 100644 index 000000000000..fb13cb318eb9 --- /dev/null +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveHadoopUtil.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.hive; + +import java.io.IOException; +import org.apache.hadoop.security.UserGroupInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class HiveHadoopUtil { + + private static final Logger LOG = LoggerFactory.getLogger(HiveHadoopUtil.class); + + private HiveHadoopUtil() { + } + + public static String currentUser() { + String username = null; + try { + username = UserGroupInformation.getCurrentUser().getShortUserName(); + } catch (IOException e) { + LOG.warn("Failed to get Hadoop user", e); + } + + if (username != null) { + return username; + } else { + LOG.warn("Hadoop user is null, defaulting to user.name"); + return System.getProperty("user.name"); + } + } +} diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveLock.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveLock.java new file mode 100644 index 000000000000..20517f3e9052 --- /dev/null +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveLock.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.hive; + +interface HiveLock { + void lock() throws LockException; + + void ensureActive() throws LockException; + + void unlock(); +} diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaUtil.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaUtil.java index e67577eb588a..8ab320a4cdb7 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaUtil.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveSchemaUtil.java @@ -301,7 +301,7 @@ private static String convertToTypeString(Type type) { return "string"; case TIMESTAMP: Types.TimestampType timestampType = (Types.TimestampType) type; - if (MetastoreUtil.hive3PresentOnClasspath() && timestampType.shouldAdjustToUTC()) { + if (HiveVersion.min(HiveVersion.HIVE_3) && timestampType.shouldAdjustToUTC()) { return "timestamp with local time zone"; } return "timestamp"; diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 43f7e52382b4..4b747ac7cfe8 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -19,7 +19,6 @@ package org.apache.iceberg.hive; -import java.net.UnknownHostException; import java.util.Collections; import java.util.Locale; import java.util.Map; @@ -56,6 +55,7 @@ import org.apache.iceberg.hadoop.ConfigProperties; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.BiMap; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableBiMap; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; @@ -81,6 +81,8 @@ public class HiveTableOperations extends BaseMetastoreTableOperations { // characters, see https://issues.apache.org/jira/browse/HIVE-12274 // set to 0 to not expose Iceberg metadata in HMS Table properties. private static final String HIVE_TABLE_PROPERTY_MAX_SIZE = "iceberg.hive.table-property-max-size"; + private static final String NO_LOCK_EXPECTED_KEY = "expected_parameter_key"; + private static final String NO_LOCK_EXPECTED_VALUE = "expected_parameter_value"; private static final long HIVE_TABLE_PROPERTY_MAX_SIZE_DEFAULT = 32672; private static final BiMap ICEBERG_TO_HMS_TRANSLATION = ImmutableBiMap.of( @@ -169,31 +171,30 @@ protected void doRefresh() { @SuppressWarnings("checkstyle:CyclomaticComplexity") @Override protected void doCommit(TableMetadata base, TableMetadata metadata) { - String newMetadataLocation = base == null && metadata.metadataFileLocation() != null ? - metadata.metadataFileLocation() : writeNewMetadata(metadata, currentVersion() + 1); + boolean newTable = base == null; + String newMetadataLocation = writeNewMetadataIfRequired(newTable, metadata); boolean hiveEngineEnabled = hiveEngineEnabled(metadata, conf); boolean keepHiveStats = conf.getBoolean(ConfigProperties.KEEP_HIVE_STATS, false); CommitStatus commitStatus = CommitStatus.FAILURE; boolean updateHiveTable = false; - HiveCommitLock commitLock = null; + HiveLock lock = lockObject(metadata); try { - commitLock = createLock(); - commitLock.acquire(); + lock.lock(); Table tbl = loadHmsTable(); if (tbl != null) { // If we try to create the table but the metadata location is already set, then we had a concurrent commit - if (base == null && tbl.getParameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP) != null) { + if (newTable && tbl.getParameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP) != null) { throw new AlreadyExistsException("Table already exists: %s.%s", database, tableName); } updateHiveTable = true; LOG.debug("Committing existing table: {}", fullName); } else { - tbl = newHmsTable(); + tbl = newHmsTable(metadata); LOG.debug("Committing new table: {}", fullName); } @@ -225,20 +226,45 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { StatsSetupConst.clearColumnStatsState(tbl.getParameters()); } + lock.ensureActive(); try { - persistTable(tbl, updateHiveTable); + persistTable( + tbl, updateHiveTable, hiveLockEnabled(metadata, conf) ? null : baseMetadataLocation); + lock.ensureActive(); + commitStatus = CommitStatus.SUCCESS; + } catch (LockException le) { + throw new CommitStateUnknownException( + "Failed to heartbeat for hive lock while " + + "committing changes. This can lead to a concurrent commit attempt be able to overwrite this commit. " + + "Please check the commit history. If you are running into this issue, try reducing " + + "iceberg.hive.lock-heartbeat-interval-ms.", + le); } catch (org.apache.hadoop.hive.metastore.api.AlreadyExistsException e) { throw new AlreadyExistsException(e, "Table already exists: %s.%s", database, tableName); } catch (InvalidObjectException e) { throw new ValidationException(e, "Invalid Hive object for %s.%s", database, tableName); + } catch (CommitFailedException | CommitStateUnknownException e) { + throw e; + } catch (Throwable e) { + if (e.getMessage() + .contains( + "The table has been modified. The parameter value for key '" + + HiveTableOperations.METADATA_LOCATION_PROP + + "' is")) { + throw new CommitFailedException( + e, "The table %s.%s has been modified concurrently", database, tableName); + } + if (e.getMessage() != null && e.getMessage().contains("Table/View 'HIVE_LOCKS' does not exist")) { - throw new RuntimeException("Failed to acquire locks from metastore because the underlying metastore " + - "table 'HIVE_LOCKS' does not exist. This can occur when using an embedded metastore which does not " + - "support transactions. To fix this use an alternative metastore.", e); + throw new RuntimeException( + "Failed to acquire locks from metastore because the underlying metastore " + + "table 'HIVE_LOCKS' does not exist. This can occur when using an embedded metastore which does not " + + "support transactions. To fix this use an alternative metastore.", + e); } LOG.error("Cannot tell if commit to {}.{} succeeded, attempting to reconnect and check.", @@ -253,27 +279,43 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { throw new CommitStateUnknownException(e); } } - } catch (TException | UnknownHostException e) { + } catch (TException e) { throw new RuntimeException(String.format("Metastore operation failed for %s.%s", database, tableName), e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException("Interrupted during commit", e); + } catch (LockException e) { + throw new CommitFailedException(e); + } finally { - cleanupMetadataAndUnlock(commitStatus, newMetadataLocation, commitLock); + cleanupMetadataAndUnlock(commitStatus, newMetadataLocation, lock); } LOG.info("Committed to table {} with the new metadata location {}", fullName, newMetadataLocation); } @VisibleForTesting - void persistTable(Table hmsTable, boolean updateHiveTable) throws TException, InterruptedException { + void persistTable(Table hmsTable, boolean updateHiveTable, String expectedMetadataLocation) + throws TException, InterruptedException { if (updateHiveTable) { - metaClients.run(client -> { - MetastoreUtil.alterTable(client, database, tableName, hmsTable); - return null; - }); + metaClients.run( + client -> { + MetastoreUtil.alterTable( + client, + database, + tableName, + hmsTable, + expectedMetadataLocation != null ? + ImmutableMap.of( + NO_LOCK_EXPECTED_KEY, + METADATA_LOCATION_PROP, + NO_LOCK_EXPECTED_VALUE, + expectedMetadataLocation) : + ImmutableMap.of()); + return null; + }); } else { metaClients.run(client -> { client.createTable(hmsTable); @@ -282,7 +324,8 @@ void persistTable(Table hmsTable, boolean updateHiveTable) throws TException, In } } - private Table loadHmsTable() throws TException, InterruptedException { + @VisibleForTesting + Table loadHmsTable() throws TException, InterruptedException { try { return metaClients.run(client -> client.getTable(database, tableName)); } catch (NoSuchObjectException nte) { @@ -291,12 +334,13 @@ private Table loadHmsTable() throws TException, InterruptedException { } } - private Table newHmsTable() { + private Table newHmsTable(TableMetadata metadata) { + Preconditions.checkNotNull(metadata, "'metadata' parameter can't be null"); final long currentTimeMillis = System.currentTimeMillis(); Table newTable = new Table(tableName, database, - System.getProperty("user.name"), + metadata.property(HiveCatalog.HMS_TABLE_OWNER, HiveHadoopUtil.currentUser()), (int) currentTimeMillis / 1000, (int) currentTimeMillis / 1000, Integer.MAX_VALUE, @@ -318,11 +362,15 @@ private void setHmsTableParameters(String newMetadataLocation, Table tbl, TableM .orElseGet(Maps::newHashMap); // push all Iceberg table properties into HMS - metadata.properties().forEach((key, value) -> { - // translate key names between Iceberg and HMS where needed - String hmsKey = ICEBERG_TO_HMS_TRANSLATION.getOrDefault(key, key); - parameters.put(hmsKey, value); - }); + metadata.properties().entrySet().stream() + .filter(entry -> !entry.getKey().equalsIgnoreCase(HiveCatalog.HMS_TABLE_OWNER)) + .forEach( + entry -> { + String key = entry.getKey(); + // translate key names between Iceberg and HMS where needed + String hmsKey = ICEBERG_TO_HMS_TRANSLATION.getOrDefault(key, key); + parameters.put(hmsKey, entry.getValue()); + }); if (metadata.uuid() != null) { parameters.put(TableProperties.UUID, metadata.uuid()); } @@ -454,13 +502,8 @@ private StorageDescriptor storageDescriptor(TableMetadata metadata, boolean hive return storageDescriptor; } - @VisibleForTesting - HiveCommitLock createLock() throws UnknownHostException, TException, InterruptedException { - return new HiveCommitLock(conf, metaClients, catalogName, database, tableName); - } - private void cleanupMetadataAndUnlock(CommitStatus commitStatus, String metadataLocation, - HiveCommitLock lock) { + HiveLock lock) { try { if (commitStatus == CommitStatus.FAILURE) { // If we are sure the commit failed, clean up the uncommitted metadata file @@ -473,11 +516,10 @@ private void cleanupMetadataAndUnlock(CommitStatus commitStatus, String metadata } } - @VisibleForTesting - void doUnlock(HiveCommitLock lock) { + void doUnlock(HiveLock lock) { if (lock != null) { try { - lock.release(); + lock.unlock(); } catch (Exception e) { LOG.warn("Failed to unlock {}.{}", database, tableName, e); } @@ -510,6 +552,43 @@ private static boolean hiveEngineEnabled(TableMetadata metadata, Configuration c return metadata.propertyAsBoolean(TableProperties.ENGINE_HIVE_ENABLED, false); } - return conf.getBoolean(ConfigProperties.ENGINE_HIVE_ENABLED, TableProperties.ENGINE_HIVE_ENABLED_DEFAULT); + return conf.getBoolean( + ConfigProperties.ENGINE_HIVE_ENABLED, TableProperties.ENGINE_HIVE_ENABLED_DEFAULT); + } + + /** + * Returns if the hive locking should be enabled on the table, or not. + * + *

The decision is made like this: + * + *

    + *
  1. Table property value {@link TableProperties#HIVE_LOCK_ENABLED} + *
  2. If the table property is not set then check the hive-site.xml property value {@link + * ConfigProperties#LOCK_HIVE_ENABLED} + *
  3. If none of the above is enabled then use the default value {@link + * TableProperties#HIVE_LOCK_ENABLED_DEFAULT} + *
+ * + * @param metadata Table metadata to use + * @param conf The hive configuration to use + * @return if the hive engine related values should be enabled or not + */ + private static boolean hiveLockEnabled(TableMetadata metadata, Configuration conf) { + if (metadata.properties().get(TableProperties.HIVE_LOCK_ENABLED) != null) { + // We know that the property is set, so default value will not be used, + return metadata.propertyAsBoolean(TableProperties.HIVE_LOCK_ENABLED, false); + } + + return conf.getBoolean( + ConfigProperties.LOCK_HIVE_ENABLED, TableProperties.HIVE_LOCK_ENABLED_DEFAULT); + } + + @VisibleForTesting + HiveLock lockObject(TableMetadata metadata) { + if (hiveLockEnabled(metadata, conf)) { + return new MetastoreLock(conf, metaClients, catalogName, database, tableName); + } else { + return new NoLock(); + } } } diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveVersion.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveVersion.java new file mode 100644 index 000000000000..fd8f9006ae9a --- /dev/null +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/HiveVersion.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.hive; + +import java.util.List; +import org.apache.hive.common.util.HiveVersionInfo; +import org.apache.iceberg.relocated.com.google.common.base.Splitter; + +public enum HiveVersion { + HIVE_4(4), + HIVE_3(3), + HIVE_2(2), + HIVE_1_2(1), + NOT_SUPPORTED(0); + + private final int order; + private static final HiveVersion current = calculate(); + + HiveVersion(int order) { + this.order = order; + } + + public static HiveVersion current() { + return current; + } + + public static boolean min(HiveVersion other) { + return current.order >= other.order; + } + + private static HiveVersion calculate() { + String version = HiveVersionInfo.getShortVersion(); + List versions = Splitter.on('.').splitToList(version); + switch (versions.get(0)) { + case "4": + return HIVE_4; + case "3": + return HIVE_3; + case "2": + return HIVE_2; + case "1": + if (versions.get(1).equals("2")) { + return HIVE_1_2; + } else { + return NOT_SUPPORTED; + } + default: + return NOT_SUPPORTED; + } + } +} diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/LockException.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/LockException.java new file mode 100644 index 000000000000..79536c5bab12 --- /dev/null +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/LockException.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.hive; + +import com.google.errorprone.annotations.FormatMethod; + +class LockException extends RuntimeException { + @FormatMethod + LockException(String message, Object... args) { + super(String.format(message, args)); + } + + @FormatMethod + LockException(Throwable cause, String message, Object... args) { + super(String.format(message, args), cause); + } +} diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java new file mode 100644 index 000000000000..454a3a5f5e1a --- /dev/null +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreLock.java @@ -0,0 +1,535 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.hive; + +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.Optional; +import java.util.UUID; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.ReentrantLock; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.LockComponent; +import org.apache.hadoop.hive.metastore.api.LockLevel; +import org.apache.hadoop.hive.metastore.api.LockRequest; +import org.apache.hadoop.hive.metastore.api.LockResponse; +import org.apache.hadoop.hive.metastore.api.LockState; +import org.apache.hadoop.hive.metastore.api.LockType; +import org.apache.hadoop.hive.metastore.api.ShowLocksRequest; +import org.apache.hadoop.hive.metastore.api.ShowLocksResponse; +import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement; +import org.apache.iceberg.ClientPool; +import org.apache.iceberg.exceptions.CommitFailedException; +import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; +import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.iceberg.util.Tasks; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MetastoreLock implements HiveLock { + + private static final Logger LOG = LoggerFactory.getLogger(MetastoreLock.class); + + private static final String HIVE_ACQUIRE_LOCK_TIMEOUT_MS = "iceberg.hive.lock-timeout-ms"; + private static final String HIVE_LOCK_CHECK_MIN_WAIT_MS = "iceberg.hive.lock-check-min-wait-ms"; + private static final String HIVE_LOCK_CHECK_MAX_WAIT_MS = "iceberg.hive.lock-check-max-wait-ms"; + private static final String HIVE_LOCK_CREATION_TIMEOUT_MS = "iceberg.hive.lock-creation-timeout-ms"; + private static final String HIVE_LOCK_CREATION_MIN_WAIT_MS = "iceberg.hive.lock-creation-min-wait-ms"; + private static final String HIVE_LOCK_CREATION_MAX_WAIT_MS = "iceberg.hive.lock-creation-max-wait-ms"; + private static final String HIVE_LOCK_HEARTBEAT_INTERVAL_MS = "iceberg.hive.lock-heartbeat-interval-ms"; + private static final String HIVE_TABLE_LEVEL_LOCK_EVICT_MS = "iceberg.hive.table-level-lock-evict-ms"; + private static final long HIVE_ACQUIRE_LOCK_TIMEOUT_MS_DEFAULT = 3 * 60 * 1000; // 3 minutes + private static final long HIVE_LOCK_CHECK_MIN_WAIT_MS_DEFAULT = 50; // 50 milliseconds + private static final long HIVE_LOCK_CHECK_MAX_WAIT_MS_DEFAULT = 5 * 1000; // 5 seconds + private static final long HIVE_LOCK_CREATION_TIMEOUT_MS_DEFAULT = 3 * 60 * 1000; // 3 minutes + private static final long HIVE_LOCK_CREATION_MIN_WAIT_MS_DEFAULT = 50; // 50 milliseconds + private static final long HIVE_LOCK_CREATION_MAX_WAIT_MS_DEFAULT = 5 * 1000; // 5 seconds + private static final long HIVE_LOCK_HEARTBEAT_INTERVAL_MS_DEFAULT = 4 * 60 * 1000; // 4 minutes + private static final long HIVE_TABLE_LEVEL_LOCK_EVICT_MS_DEFAULT = TimeUnit.MINUTES.toMillis(10); + private static volatile Cache commitLockCache; + + private final ClientPool metaClients; + + private final String databaseName; + private final String tableName; + private final String fullName; + + private final long lockAcquireTimeout; + private final long lockCheckMinWaitTime; + private final long lockCheckMaxWaitTime; + private final long lockCreationTimeout; + private final long lockCreationMinWaitTime; + private final long lockCreationMaxWaitTime; + private final long lockHeartbeatIntervalTime; + private final ScheduledExecutorService exitingScheduledExecutorService; + private final String agentInfo; + + private Optional hmsLockId = Optional.empty(); + private ReentrantLock jvmLock = null; + private Heartbeat heartbeat = null; + + public MetastoreLock(Configuration conf, ClientPool metaClients, + String catalogName, String databaseName, String tableName) { + this.metaClients = metaClients; + this.fullName = catalogName + "." + databaseName + "." + tableName; + this.databaseName = databaseName; + this.tableName = tableName; + + this.lockAcquireTimeout = + conf.getLong(HIVE_ACQUIRE_LOCK_TIMEOUT_MS, HIVE_ACQUIRE_LOCK_TIMEOUT_MS_DEFAULT); + this.lockCheckMinWaitTime = + conf.getLong(HIVE_LOCK_CHECK_MIN_WAIT_MS, HIVE_LOCK_CHECK_MIN_WAIT_MS_DEFAULT); + this.lockCheckMaxWaitTime = + conf.getLong(HIVE_LOCK_CHECK_MAX_WAIT_MS, HIVE_LOCK_CHECK_MAX_WAIT_MS_DEFAULT); + this.lockCreationTimeout = + conf.getLong(HIVE_LOCK_CREATION_TIMEOUT_MS, HIVE_LOCK_CREATION_TIMEOUT_MS_DEFAULT); + this.lockCreationMinWaitTime = + conf.getLong(HIVE_LOCK_CREATION_MIN_WAIT_MS, HIVE_LOCK_CREATION_MIN_WAIT_MS_DEFAULT); + this.lockCreationMaxWaitTime = + conf.getLong(HIVE_LOCK_CREATION_MAX_WAIT_MS, HIVE_LOCK_CREATION_MAX_WAIT_MS_DEFAULT); + this.lockHeartbeatIntervalTime = + conf.getLong(HIVE_LOCK_HEARTBEAT_INTERVAL_MS, HIVE_LOCK_HEARTBEAT_INTERVAL_MS_DEFAULT); + long tableLevelLockCacheEvictionTimeout = + conf.getLong(HIVE_TABLE_LEVEL_LOCK_EVICT_MS, HIVE_TABLE_LEVEL_LOCK_EVICT_MS_DEFAULT); + + this.agentInfo = "Iceberg-" + UUID.randomUUID(); + + this.exitingScheduledExecutorService = + Executors.newSingleThreadScheduledExecutor( + new ThreadFactoryBuilder() + .setDaemon(true) + .setNameFormat("iceberg-hive-lock-heartbeat-" + fullName + "-%d") + .build()); + + initTableLevelLockCache(tableLevelLockCacheEvictionTimeout); + } + + @Override + public void lock() throws LockException { + // getting a process-level lock per table to avoid concurrent commit attempts to the same table from the same + // JVM process, which would result in unnecessary HMS lock acquisition requests + acquireJvmLock(); + hmsLockId = Optional.of(acquireLock()); + + // Starting heartbeat for the HMS lock + heartbeat = + new Heartbeat(metaClients, hmsLockId.get(), lockHeartbeatIntervalTime); + heartbeat.schedule(exitingScheduledExecutorService); + } + + @Override + public void ensureActive() throws LockException { + if (heartbeat == null) { + throw new LockException("Lock is not active"); + } + + if (heartbeat.encounteredException != null) { + throw new LockException( + heartbeat.encounteredException, + "Failed to heartbeat for hive lock. %s", + heartbeat.encounteredException.getMessage()); + } + if (!heartbeat.active()) { + throw new LockException("Hive lock heartbeat thread not active"); + } + } + + @Override + public void unlock() { + if (heartbeat != null) { + heartbeat.cancel(); + exitingScheduledExecutorService.shutdown(); + } + + try { + unlock(hmsLockId); + } finally { + releaseJvmLock(); + } + } + + @SuppressWarnings("checkstyle:CyclomaticComplexity") + private long acquireLock() throws LockException { + if (hmsLockId.isPresent()) { + throw new IllegalArgumentException(String.format("HMS lock ID=%s already acquired for table %s.%s", + hmsLockId.get(), databaseName, tableName)); + } + LockInfo lockInfo = createLock(); + + final long start = System.currentTimeMillis(); + long duration = 0; + boolean timeout = false; + TException thriftError = null; + + try { + if (lockInfo.lockState.equals(LockState.WAITING)) { + // Retry count is the typical "upper bound of retries" for Tasks.run() function. In fact, the maximum number of + // attempts the Tasks.run() would try is `retries + 1`. Here, for checking locks, we use timeout as the + // upper bound of retries. So it is just reasonable to set a large retry count. However, if we set + // Integer.MAX_VALUE, the above logic of `retries + 1` would overflow into Integer.MIN_VALUE. Hence, + // the retry is set conservatively as `Integer.MAX_VALUE - 100` so it doesn't hit any boundary issues. + Tasks.foreach(lockInfo.lockId) + .retry(Integer.MAX_VALUE - 100) + .exponentialBackoff( + lockCheckMinWaitTime, + lockCheckMaxWaitTime, + lockAcquireTimeout, + 1.5) + .throwFailureWhenFinished() + .onlyRetryOn(WaitingForLockException.class) + .run(id -> { + try { + LockResponse response = metaClients.run(client -> client.checkLock(id)); + LockState newState = response.getState(); + lockInfo.lockState = newState; + if (newState.equals(LockState.WAITING)) { + throw new WaitingForLockException(String.format( + "Waiting for lock on table %s.%s", databaseName, tableName)); + } + } catch (InterruptedException e) { + Thread.interrupted(); // Clear the interrupt status flag + LOG.warn( + "Interrupted while waiting for lock on table {}.{}", + databaseName, + tableName, + e); + } + + }, TException.class); + } + } catch (WaitingForLockException e) { + timeout = true; + duration = System.currentTimeMillis() - start; + } catch (TException e) { + thriftError = e; + } finally { + if (!lockInfo.lockState.equals(LockState.ACQUIRED)) { + unlock(Optional.of(lockInfo.lockId)); + } + } + + if (!lockInfo.lockState.equals(LockState.ACQUIRED)) { + // timeout and do not have lock acquired + if (timeout) { + throw new LockException("Timed out after %s ms waiting for lock on %s.%s", + duration, databaseName, tableName); + } + + if (thriftError != null) { + throw new LockException( + thriftError, "Metastore operation failed for %s.%s", databaseName, tableName); + } + + // Just for safety. We should not get here. + throw new LockException( + "Could not acquire the lock on %s.%s, lock request ended in state %s", + databaseName, tableName, lockInfo.lockState); + } else { + return lockInfo.lockId; + } + } + + /** + * Creates a lock, retrying if possible on failure. + * + * @return The {@link LockInfo} object for the successfully created lock + * @throws LockException When we are not able to fill the hostname for lock creation, or there is + * an error during lock creation + */ + @SuppressWarnings("ReverseDnsLookup") + private LockInfo createLock() throws LockException { + LockInfo lockInfo = new LockInfo(); + + String hostName; + try { + hostName = InetAddress.getLocalHost().getHostName(); + } catch (UnknownHostException uhe) { + throw new LockException(uhe, "Error generating host name"); + } + + LockComponent lockComponent = + new LockComponent(LockType.EXCL_WRITE, LockLevel.TABLE, databaseName); + lockComponent.setTablename(tableName); + LockRequest lockRequest = + new LockRequest( + Lists.newArrayList(lockComponent), + HiveHadoopUtil.currentUser(), + hostName); + + // Only works in Hive 2 or later. + if (HiveVersion.min(HiveVersion.HIVE_2)) { + lockRequest.setAgentInfo(agentInfo); + } + + AtomicBoolean interrupted = new AtomicBoolean(false); + Tasks.foreach(lockRequest) + .retry(Integer.MAX_VALUE - 100) + .exponentialBackoff( + lockCreationMinWaitTime, lockCreationMaxWaitTime, lockCreationTimeout, 2.0) + .shouldRetryTest(e -> !interrupted.get() && e instanceof LockException && + HiveVersion.min(HiveVersion.HIVE_2)) + .throwFailureWhenFinished() + .run( + request -> { + try { + LockResponse lockResponse = metaClients.run(client -> client.lock(request)); + lockInfo.lockId = lockResponse.getLockid(); + lockInfo.lockState = lockResponse.getState(); + } catch (TException te) { + LOG.warn("Failed to create lock {}", request, te); + try { + // If we can not check for lock, or we do not find it, then rethrow the exception + // Otherwise we are happy as the findLock sets the lockId and the state correctly + if (!HiveVersion.min(HiveVersion.HIVE_2)) { + LockInfo lockFound = findLock(); + if (lockFound != null) { + lockInfo.lockId = lockFound.lockId; + lockInfo.lockState = lockFound.lockState; + LOG.info("Found lock {} by agentInfo {}", lockInfo, agentInfo); + return; + } + } + + throw new LockException("Failed to find lock for table %s.%s", databaseName, tableName); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + interrupted.set(true); + LOG.warn( + "Interrupted while trying to find lock for table {}.{}", databaseName, tableName, e); + throw new LockException( + e, "Interrupted while trying to find lock for table %s.%s", databaseName, tableName); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + interrupted.set(true); + LOG.warn("Interrupted while creating lock on table {}.{}", databaseName, tableName, e); + throw new LockException( + e, "Interrupted while creating lock on table %s.%s", databaseName, tableName); + } + }, + LockException.class); + + // This should be initialized always, or exception should be thrown. + LOG.debug("Lock {} created for table {}.{}", lockInfo, databaseName, tableName); + return lockInfo; + } + + /** + * Search for the locks using HMSClient.showLocks identified by the agentInfo. If the lock is + * there, then a {@link LockInfo} object is returned. If the lock is not found null + * is returned. + * + * @return The {@link LockInfo} for the found lock, or null if nothing found + */ + private LockInfo findLock() throws LockException, InterruptedException { + Preconditions.checkArgument( + HiveVersion.min(HiveVersion.HIVE_2), + "Minimally Hive 2 HMS client is needed to find the Lock using the showLocks API call"); + ShowLocksRequest showLocksRequest = new ShowLocksRequest(); + showLocksRequest.setDbname(databaseName); + showLocksRequest.setTablename(tableName); + ShowLocksResponse response; + try { + response = metaClients.run(client -> client.showLocks(showLocksRequest)); + } catch (TException e) { + throw new LockException(e, "Failed to find lock for table %s.%s", databaseName, tableName); + } + for (ShowLocksResponseElement lock : response.getLocks()) { + if (lock.getAgentInfo().equals(agentInfo)) { + // We found our lock + return new LockInfo(lock.getLockid(), lock.getState()); + } + } + + // Not found anything + return null; + } + + private void unlock(Optional lockId) { + + Long id = null; + try { + if (!lockId.isPresent()) { + // Try to find the lock based on agentInfo. Only works with Hive 2 or later. + if (HiveVersion.min(HiveVersion.HIVE_2)) { + LockInfo lockInfo = findLock(); + if (lockInfo == null) { + // No lock found + LOG.info("No lock found with {} agentInfo", agentInfo); + return; + } + + id = lockInfo.lockId; + } else { + LOG.warn("Could not find lock with HMSClient {}", HiveVersion.current()); + return; + } + } else { + id = lockId.get(); + } + + doUnlock(id); + + } catch (InterruptedException ie) { + if (id != null) { + // Interrupted unlock. We try to unlock one more time if we have a lockId + try { + Thread.interrupted(); // Clear the interrupt status flag for now, so we can retry unlock + LOG.warn("Interrupted unlock we try one more time {}.{}", databaseName, tableName, ie); + doUnlock(id); + } catch (Exception e) { + LOG.warn("Failed to unlock even on 2nd attempt {}.{}", databaseName, tableName, e); + } finally { + Thread.currentThread().interrupt(); // Set back the interrupt status + } + } else { + Thread.currentThread().interrupt(); // Set back the interrupt status + LOG.warn("Interrupted finding locks to unlock {}.{}", databaseName, tableName, ie); + } + } catch (Exception e) { + LOG.warn("Failed to unlock {}.{}", databaseName, tableName, e); + } + } + + @VisibleForTesting + void doUnlock(long lockId) throws TException, InterruptedException { + metaClients.run( + client -> { + client.unlock(lockId); + return null; + }); + } + + + private void acquireJvmLock() { + if (jvmLock != null) { + throw new IllegalStateException(String.format("Cannot call acquireLock twice for %s", fullName)); + } + + jvmLock = commitLockCache.get(fullName, t -> new ReentrantLock(true)); + jvmLock.lock(); + } + + private void releaseJvmLock() { + if (jvmLock != null) { + jvmLock.unlock(); + jvmLock = null; + } + } + + private static void initTableLevelLockCache(long evictionTimeout) { + if (commitLockCache == null) { + synchronized (MetastoreLock.class) { + if (commitLockCache == null) { + commitLockCache = + Caffeine.newBuilder() + .expireAfterAccess(evictionTimeout, TimeUnit.MILLISECONDS) + .build(); + } + } + } + } + + private static class Heartbeat implements Runnable { + private final ClientPool hmsClients; + private final long lockId; + private final long intervalMs; + private ScheduledFuture future; + private volatile Exception encounteredException = null; + + Heartbeat( + ClientPool hmsClients, long lockId, long intervalMs) { + this.hmsClients = hmsClients; + this.lockId = lockId; + this.intervalMs = intervalMs; + this.future = null; + } + + @Override + public void run() { + try { + hmsClients.run( + client -> { + client.heartbeat(0, lockId); + return null; + }); + } catch (TException | InterruptedException e) { + this.encounteredException = e; + throw new CommitFailedException(e, "Failed to heartbeat for lock: %d", lockId); + } + } + + public void schedule(ScheduledExecutorService scheduler) { + future = + scheduler.scheduleAtFixedRate(this, intervalMs / 2, intervalMs, TimeUnit.MILLISECONDS); + } + + boolean active() { + return future != null && !future.isCancelled(); + } + + public void cancel() { + if (future != null) { + future.cancel(false); + } + } + } + + + private static class LockInfo { + private long lockId; + private LockState lockState; + + private LockInfo() { + this.lockId = -1; + this.lockState = null; + } + + private LockInfo(long lockId, LockState lockState) { + this.lockId = lockId; + this.lockState = lockState; + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("lockId", lockId) + .add("lockState", lockState) + .toString(); + } + } + + private static class WaitingForLockException extends RuntimeException { + WaitingForLockException(String message) { + super(message); + } + } +} diff --git a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java index 76363f138c56..ee62b64ed23e 100644 --- a/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java @@ -19,57 +19,61 @@ package org.apache.iceberg.hive; +import java.util.Map; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.iceberg.common.DynMethods; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; public class MetastoreUtil { - - // this class is unique to Hive3 and cannot be found in Hive2, therefore a good proxy to see if - // we are working against Hive3 dependencies - private static final String HIVE3_UNIQUE_CLASS = "org.apache.hadoop.hive.serde2.io.DateWritableV2"; - - private static final DynMethods.UnboundMethod ALTER_TABLE = DynMethods.builder("alter_table") - .impl(IMetaStoreClient.class, "alter_table_with_environmentContext", - String.class, String.class, Table.class, EnvironmentContext.class) - .impl(IMetaStoreClient.class, "alter_table", - String.class, String.class, Table.class, EnvironmentContext.class) - .impl(IMetaStoreClient.class, "alter_table", - String.class, String.class, Table.class) + private static final DynMethods.UnboundMethod ALTER_TABLE = + DynMethods.builder("alter_table") + .impl( + IMetaStoreClient.class, + "alter_table_with_environmentContext", + String.class, + String.class, + Table.class, + EnvironmentContext.class) + .impl( + IMetaStoreClient.class, + "alter_table", + String.class, + String.class, + Table.class, + EnvironmentContext.class) + .impl(IMetaStoreClient.class, "alter_table", String.class, String.class, Table.class) .build(); - private static final boolean HIVE3_PRESENT_ON_CLASSPATH = detectHive3(); - private MetastoreUtil() { } /** - * Returns true if Hive3 dependencies are found on classpath, false otherwise. + * Calls alter_table method using the metastore client. If the HMS supports it, environmental + * context will be set in a way that turns off stats updates to avoid recursive file listing. */ - public static boolean hive3PresentOnClasspath() { - return HIVE3_PRESENT_ON_CLASSPATH; + public static void alterTable( + IMetaStoreClient client, String databaseName, String tblName, Table table) { + alterTable(client, databaseName, tblName, table, ImmutableMap.of()); } /** - * Calls alter_table method using the metastore client. If possible, an environmental context will be used that - * turns off stats updates to avoid recursive listing. + * Calls alter_table method using the metastore client. If the HMS supports it, environmental + * context will be set in a way that turns off stats updates to avoid recursive file listing. */ - public static void alterTable(IMetaStoreClient client, String databaseName, String tblName, Table table) { - EnvironmentContext envContext = new EnvironmentContext( - ImmutableMap.of(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE) - ); - ALTER_TABLE.invoke(client, databaseName, tblName, table, envContext); - } + public static void alterTable( + IMetaStoreClient client, + String databaseName, + String tblName, + Table table, + Map extraEnv) { + Map env = Maps.newHashMapWithExpectedSize(extraEnv.size() + 1); + env.putAll(extraEnv); + env.put(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE); - private static boolean detectHive3() { - try { - Class.forName(HIVE3_UNIQUE_CLASS); - return true; - } catch (ClassNotFoundException e) { - return false; - } + ALTER_TABLE.invoke(client, databaseName, tblName, table, new EnvironmentContext(env)); } } diff --git a/core/src/main/java/org/apache/iceberg/util/LocationUtil.java b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/NoLock.java similarity index 66% rename from core/src/main/java/org/apache/iceberg/util/LocationUtil.java rename to iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/NoLock.java index 42c26524f28f..bc59f0ae358f 100644 --- a/core/src/main/java/org/apache/iceberg/util/LocationUtil.java +++ b/iceberg/iceberg-catalog/src/main/java/org/apache/iceberg/hive/NoLock.java @@ -17,22 +17,29 @@ * under the License. */ -package org.apache.iceberg.util; +package org.apache.iceberg.hive; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -public class LocationUtil { - private LocationUtil() { +public class NoLock implements HiveLock { + public NoLock() { + Preconditions.checkArgument( + HiveVersion.min(HiveVersion.HIVE_2), + "Minimally Hive 2 HMS client is needed to use HIVE-26882 based locking"); + } + @Override + public void lock() throws LockException { + // no-op } - public static String stripTrailingSlash(String path) { - Preconditions.checkArgument(path != null && path.length() > 0, "path must not be null or empty"); + @Override + public void ensureActive() throws LockException { + // no-op + } - String result = path; - while (result.endsWith("/")) { - result = result.substring(0, result.length() - 1); - } - return result; + @Override + public void unlock() { + // no-op } } diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/AssertHelpers.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/AssertHelpers.java index c6bcc1b1de82..3f8c7fc0cb24 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/AssertHelpers.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/AssertHelpers.java @@ -20,7 +20,8 @@ package org.apache.iceberg; import java.util.concurrent.Callable; -import org.junit.Assert; +import org.assertj.core.api.AbstractThrowableAssert; +import org.assertj.core.api.Assertions; public class AssertHelpers { @@ -39,12 +40,11 @@ public static void assertThrows(String message, Class expected, String containedInMessage, Callable callable) { - try { - callable.call(); - Assert.fail("No exception was thrown (" + message + "), expected: " + - expected.getName()); - } catch (Exception actual) { - handleException(message, expected, containedInMessage, actual); + AbstractThrowableAssert check = Assertions.assertThatThrownBy(callable::call) + .withFailMessage(message) + .isInstanceOf(expected); + if (null != containedInMessage) { + check.hasMessageContaining(containedInMessage); } } @@ -60,12 +60,11 @@ public static void assertThrows(String message, Class expected, String containedInMessage, Runnable runnable) { - try { - runnable.run(); - Assert.fail("No exception was thrown (" + message + "), expected: " + - expected.getName()); - } catch (Exception actual) { - handleException(message, expected, containedInMessage, actual); + AbstractThrowableAssert check = Assertions.assertThatThrownBy(runnable::run) + .withFailMessage(message) + .isInstanceOf(expected); + if (null != containedInMessage) { + check.hasMessageContaining(containedInMessage); } } @@ -105,36 +104,10 @@ public static void assertThrowsCause(String message, Class expected, String containedInMessage, Runnable runnable) { - try { - runnable.run(); - Assert.fail("No exception was thrown (" + message + "), expected: " + - expected.getName()); - } catch (Exception actual) { - Throwable cause = actual.getCause(); - if (cause instanceof Exception) { - handleException(message, expected, containedInMessage, (Exception) actual.getCause()); - } else { - Assert.fail("Occur non-exception cause: " + cause); - } - } - } - - private static void handleException(String message, - Class expected, - String containedInMessage, - Exception actual) { - try { - Assert.assertEquals(message, expected, actual.getClass()); - if (containedInMessage != null) { - Assert.assertTrue( - "Expected exception message (" + containedInMessage + ") missing: " + - actual.getMessage(), - actual.getMessage().contains(containedInMessage) - ); - } - } catch (AssertionError e) { - e.addSuppressed(actual); - throw e; - } + Assertions.assertThatThrownBy(runnable::run) + .withFailMessage(message) + .getCause() + .isInstanceOf(expected) + .hasMessageContaining(containedInMessage); } } diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/TestHelpers.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/TestHelpers.java similarity index 91% rename from iceberg/iceberg-handler/src/test/java/org/apache/iceberg/TestHelpers.java rename to iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/TestHelpers.java index 9c0059fb21f5..085c95b7ce27 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/TestHelpers.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/TestHelpers.java @@ -102,6 +102,24 @@ public static T roundTripSerialize(T type) throws IOException, ClassNotFound // ); // } + public static void assertSerializedMetadata(Table expected, Table actual) { + Assert.assertEquals("Name must match", expected.name(), actual.name()); + Assert.assertEquals("Location must match", expected.location(), actual.location()); + Assert.assertEquals("Props must match", expected.properties(), actual.properties()); + Assert.assertEquals("Schema must match", expected.schema().asStruct(), actual.schema().asStruct()); + Assert.assertEquals("Spec must match", expected.spec(), actual.spec()); + Assert.assertEquals("Sort order must match", expected.sortOrder(), actual.sortOrder()); + } + + public static void assertSerializedAndLoadedMetadata(Table expected, Table actual) { + assertSerializedMetadata(expected, actual); + Assert.assertEquals("Specs must match", expected.specs(), actual.specs()); + Assert.assertEquals("Sort orders must match", expected.sortOrders(), actual.sortOrders()); + Assert.assertEquals("Current snapshot must match", expected.currentSnapshot(), actual.currentSnapshot()); + Assert.assertEquals("Snapshots must match", expected.snapshots(), actual.snapshots()); + Assert.assertEquals("History must match", expected.history(), actual.history()); + } + private static class CheckReferencesBound extends ExpressionVisitors.ExpressionVisitor { private final String message; diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveCreateReplaceTableTest.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveCreateReplaceTableTest.java index 9aef3d4b128f..4165346a673a 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveCreateReplaceTableTest.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveCreateReplaceTableTest.java @@ -21,7 +21,6 @@ import java.io.IOException; import org.apache.iceberg.AppendFiles; -import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.DataFile; import org.apache.iceberg.DataFiles; import org.apache.iceberg.PartitionSpec; @@ -35,6 +34,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.types.Types; +import org.assertj.core.api.Assertions; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -102,11 +102,9 @@ public void testCreateTableTxnTableCreatedConcurrently() { catalog.createTable(TABLE_IDENTIFIER, SCHEMA, SPEC); Assert.assertTrue("Table should be created", catalog.tableExists(TABLE_IDENTIFIER)); - AssertHelpers.assertThrows( - "Create table txn should fail", - AlreadyExistsException.class, - "Table already exists: hivedb.tbl", - txn::commitTransaction); + Assertions.assertThatThrownBy(txn::commitTransaction) + .isInstanceOf(AlreadyExistsException.class) + .hasMessage("Table already exists: hivedb.tbl"); } @Test @@ -139,11 +137,12 @@ public void testCreateTableTxnTableAlreadyExists() { catalog.createTable(TABLE_IDENTIFIER, SCHEMA, SPEC); Assert.assertTrue("Table should be created", catalog.tableExists(TABLE_IDENTIFIER)); - AssertHelpers.assertThrows( - "Should not be possible to start a new create table txn", - AlreadyExistsException.class, - "Table already exists: hivedb.tbl", - () -> catalog.newCreateTableTransaction(TABLE_IDENTIFIER, SCHEMA, SPEC, tableLocation, Maps.newHashMap())); + Assertions.assertThatThrownBy( + () -> + catalog.newCreateTableTransaction( + TABLE_IDENTIFIER, SCHEMA, SPEC, tableLocation, Maps.newHashMap())) + .isInstanceOf(AlreadyExistsException.class) + .hasMessage("Table already exists: hivedb.tbl"); } @Test @@ -165,11 +164,10 @@ public void testReplaceTableTxn() { @Test public void testReplaceTableTxnTableNotExists() { - AssertHelpers.assertThrows( - "Should not be possible to start a new replace table txn", - NoSuchTableException.class, - "Table does not exist: hivedb.tbl", - () -> catalog.newReplaceTableTransaction(TABLE_IDENTIFIER, SCHEMA, SPEC, false)); + Assertions.assertThatThrownBy( + () -> catalog.newReplaceTableTransaction(TABLE_IDENTIFIER, SCHEMA, SPEC, false)) + .isInstanceOf(NoSuchTableException.class) + .hasMessage("Table does not exist: hivedb.tbl"); } @Test @@ -185,11 +183,9 @@ public void testReplaceTableTxnTableDeletedConcurrently() { .set("prop", "value") .commit(); - AssertHelpers.assertThrows( - "Replace table txn should fail", - NoSuchTableException.class, - "No such table: hivedb.tbl", - txn::commitTransaction); + Assertions.assertThatThrownBy(txn::commitTransaction) + .isInstanceOf(NoSuchTableException.class) + .hasMessage("No such table: hivedb.tbl"); } @Test diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveMetastoreTest.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveMetastoreTest.java index b1fb891f3054..7f39fb1505ad 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveMetastoreTest.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveMetastoreTest.java @@ -19,6 +19,8 @@ package org.apache.iceberg.hive; +import java.util.Collections; +import java.util.Map; import java.util.concurrent.TimeUnit; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; @@ -42,16 +44,33 @@ public abstract class HiveMetastoreTest { @BeforeClass public static void startMetastore() throws Exception { + startMetastore(Collections.emptyMap()); + } + + public static void startMetastore(Map hiveConfOverride) throws Exception { HiveMetastoreTest.metastore = new TestHiveMetastore(); - metastore.start(); + HiveConf hiveConfWithOverrides = new HiveConf(TestHiveMetastore.class); + if (hiveConfOverride != null) { + for (Map.Entry kv : hiveConfOverride.entrySet()) { + hiveConfWithOverrides.set(kv.getKey(), kv.getValue()); + } + } + + metastore.start(hiveConfWithOverrides); HiveMetastoreTest.hiveConf = metastore.hiveConf(); - HiveMetastoreTest.metastoreClient = new HiveMetaStoreClient(hiveConf); + HiveMetastoreTest.metastoreClient = new HiveMetaStoreClient(hiveConfWithOverrides); String dbPath = metastore.getDatabasePath(DB_NAME); Database db = new Database(DB_NAME, "description", dbPath, Maps.newHashMap()); metastoreClient.createDatabase(db); - HiveMetastoreTest.catalog = (HiveCatalog) - CatalogUtil.loadCatalog(HiveCatalog.class.getName(), CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE, ImmutableMap.of( - CatalogProperties.CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS, String.valueOf(EVICTION_INTERVAL)), hiveConf); + HiveMetastoreTest.catalog = + (HiveCatalog) + CatalogUtil.loadCatalog( + HiveCatalog.class.getName(), + CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE, + ImmutableMap.of( + CatalogProperties.CLIENT_POOL_CACHE_EVICTION_INTERVAL_MS, + String.valueOf(EVICTION_INTERVAL)), + hiveConfWithOverrides); } @AfterClass diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java index 777e7ea590aa..e23d03effed1 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/HiveTableTest.java @@ -21,27 +21,33 @@ import java.io.File; import java.io.IOException; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hive.iceberg.org.apache.avro.generic.GenericData; import org.apache.hive.iceberg.org.apache.avro.generic.GenericRecordBuilder; -import org.apache.iceberg.AssertHelpers; +import org.apache.iceberg.BaseTable; import org.apache.iceberg.DataFile; import org.apache.iceberg.DataFiles; +import org.apache.iceberg.FileScanTask; import org.apache.iceberg.Files; import org.apache.iceberg.HasTableOperations; import org.apache.iceberg.ManifestFile; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; import org.apache.iceberg.Table; +import org.apache.iceberg.TableMetadataParser; import org.apache.iceberg.TableProperties; import org.apache.iceberg.avro.Avro; import org.apache.iceberg.avro.AvroSchemaUtil; @@ -49,13 +55,16 @@ import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.exceptions.AlreadyExistsException; import org.apache.iceberg.exceptions.CommitFailedException; +import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.exceptions.NotFoundException; import org.apache.iceberg.hadoop.ConfigProperties; +import org.apache.iceberg.hadoop.HadoopCatalog; import org.apache.iceberg.io.FileAppender; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.types.Types; import org.apache.thrift.TException; +import org.assertj.core.api.Assertions; import org.junit.Assert; import org.junit.Rule; import org.junit.Test; @@ -68,6 +77,7 @@ import static org.apache.iceberg.BaseMetastoreTableOperations.METADATA_LOCATION_PROP; import static org.apache.iceberg.BaseMetastoreTableOperations.PREVIOUS_METADATA_LOCATION_PROP; import static org.apache.iceberg.BaseMetastoreTableOperations.TABLE_TYPE_PROP; +import static org.apache.iceberg.TableMetadataParser.getFileExtension; import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; @@ -404,6 +414,84 @@ public void testRegisterTable() throws TException { Assert.assertEquals(originalTable.getSd(), newTable.getSd()); } + @Test + public void testRegisterHadoopTableToHiveCatalog() throws IOException, TException { + // create a hadoop catalog + String tableLocation = tempFolder.newFolder().toString(); + HadoopCatalog hadoopCatalog = new HadoopCatalog(new Configuration(), tableLocation); + // create table using hadoop catalog + TableIdentifier identifier = TableIdentifier.of(DB_NAME, "table1"); + Table table = + hadoopCatalog.createTable( + identifier, schema, PartitionSpec.unpartitioned(), Maps.newHashMap()); + // insert some data + String file1Location = appendData(table, "file1"); + List tasks = Lists.newArrayList(table.newScan().planFiles()); + Assert.assertEquals("Should scan 1 file", 1, tasks.size()); + Assert.assertEquals(tasks.get(0).file().path(), file1Location); + + // collect metadata file + List metadataFiles = + Arrays.stream(new File(table.location() + "/metadata").listFiles()) + .map(File::getAbsolutePath) + .filter(f -> f.endsWith(getFileExtension(TableMetadataParser.Codec.NONE))) + .collect(Collectors.toList()); + Assert.assertEquals(2, metadataFiles.size()); + + Assertions.assertThatThrownBy(() -> metastoreClient.getTable(DB_NAME, "table1")) + .isInstanceOf(NoSuchObjectException.class) + .hasMessage("hive.hivedb.table1 table not found"); + Assertions.assertThatThrownBy(() -> catalog.loadTable(identifier)) + .isInstanceOf(NoSuchTableException.class) + .hasMessage("Table does not exist: hivedb.table1"); + + // register the table to hive catalog using the latest metadata file + String latestMetadataFile = ((BaseTable) table).operations().current().metadataFileLocation(); + catalog.registerTable(identifier, "file:" + latestMetadataFile); + Assert.assertNotNull(metastoreClient.getTable(DB_NAME, "table1")); + + // load the table in hive catalog + table = catalog.loadTable(identifier); + Assert.assertNotNull(table); + + // insert some data + String file2Location = appendData(table, "file2"); + tasks = Lists.newArrayList(table.newScan().planFiles()); + Assert.assertEquals("Should scan 2 files", 2, tasks.size()); + Set files = + tasks.stream().map(task -> task.file().path().toString()).collect(Collectors.toSet()); + Assert.assertTrue(files.contains(file1Location) && files.contains(file2Location)); + } + + private String appendData(Table table, String fileName) throws IOException { + GenericRecordBuilder recordBuilder = + new GenericRecordBuilder(AvroSchemaUtil.convert(schema, "test")); + List records = + Lists.newArrayList( + recordBuilder.set("id", 1L).build(), + recordBuilder.set("id", 2L).build(), + recordBuilder.set("id", 3L).build()); + + String fileLocation = table.location().replace("file:", "") + "/data/" + fileName + ".avro"; + try (FileAppender writer = + Avro.write(Files.localOutput(fileLocation)).schema(schema).named("test").build()) { + for (GenericData.Record rec : records) { + writer.add(rec); + } + } + + DataFile file = + DataFiles.builder(table.spec()) + .withRecordCount(3) + .withPath(fileLocation) + .withFileSizeInBytes(Files.localInput(fileLocation).getLength()) + .build(); + + table.newAppend().appendFile(file).commit(); + + return fileLocation; + } + @Test public void testRegisterExistingTable() throws TException { org.apache.hadoop.hive.metastore.api.Table originalTable = metastoreClient.getTable(DB_NAME, TABLE_NAME); @@ -417,10 +505,10 @@ public void testRegisterExistingTable() throws TException { Assert.assertEquals(1, metadataVersionFiles.size()); // Try to register an existing table - AssertHelpers.assertThrows( - "Should complain that the table already exists", AlreadyExistsException.class, - "Table already exists", - () -> catalog.registerTable(TABLE_IDENTIFIER, "file:" + metadataVersionFiles.get(0))); + Assertions.assertThatThrownBy( + () -> catalog.registerTable(TABLE_IDENTIFIER, "file:" + metadataVersionFiles.get(0))) + .isInstanceOf(AlreadyExistsException.class) + .hasMessage("Table already exists: hivedb.tbl"); } @Test @@ -496,10 +584,9 @@ public void testMissingMetadataWontCauseHang() { File fakeLocation = new File(metadataLocation(TABLE_NAME) + "_dummy"); Assert.assertTrue(realLocation.renameTo(fakeLocation)); - AssertHelpers.assertThrows( - "HiveTableOperations shouldn't hang indefinitely when a missing metadata file is encountered", - NotFoundException.class, - () -> catalog.loadTable(TABLE_IDENTIFIER)); + Assertions.assertThatThrownBy(() -> catalog.loadTable(TABLE_IDENTIFIER)) + .isInstanceOf(NotFoundException.class) + .hasMessageStartingWith("Failed to open input stream for file"); Assert.assertTrue(fakeLocation.renameTo(realLocation)); } diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/ScriptRunner.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/ScriptRunner.java index fce4b307a201..c5960170432e 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/ScriptRunner.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/ScriptRunner.java @@ -101,9 +101,7 @@ public void runScript(Reader reader) throws IOException, SQLException { } finally { connection.setAutoCommit(originalAutoCommit); } - } catch (IOException e) { - throw e; - } catch (SQLException e) { + } catch (IOException | SQLException e) { throw e; } catch (Exception e) { throw new RuntimeException("Error running script. Cause: " + e, e); @@ -199,12 +197,7 @@ private void runScript(Connection conn, Reader reader) throws IOException, SQLEx if (!autoCommit) { conn.commit(); } - } catch (SQLException e) { - e.fillInStackTrace(); - printlnError("Error executing: " + command); - printlnError(e); - throw e; - } catch (IOException e) { + } catch (IOException | SQLException e) { e.fillInStackTrace(); printlnError("Error executing: " + command); printlnError(e); diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestCachedClientPool.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestCachedClientPool.java index 7c1f3c4028fb..fd9e5f569e4f 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestCachedClientPool.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestCachedClientPool.java @@ -19,25 +19,154 @@ package org.apache.iceberg.hive; +import java.security.PrivilegedAction; import java.util.Collections; +import java.util.Map; import java.util.concurrent.TimeUnit; -import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.exceptions.ValidationException; +import org.apache.iceberg.hive.CachedClientPool.Key; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.assertj.core.api.Assertions; import org.junit.Assert; import org.junit.Test; +import static org.apache.iceberg.CatalogUtil.ICEBERG_CATALOG_TYPE; +import static org.apache.iceberg.CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE; + public class TestCachedClientPool extends HiveMetastoreTest { @Test public void testClientPoolCleaner() throws InterruptedException { - String metastoreUri = hiveConf.get(HiveConf.ConfVars.METASTOREURIS.varname, ""); CachedClientPool clientPool = new CachedClientPool(hiveConf, Collections.emptyMap()); HiveClientPool clientPool1 = clientPool.clientPool(); - Assert.assertTrue(CachedClientPool.clientPoolCache().getIfPresent(metastoreUri) == clientPool1); + Assertions.assertThat( + CachedClientPool.clientPoolCache() + .getIfPresent(CachedClientPool.extractKey(null, hiveConf))) + .isSameAs(clientPool1); TimeUnit.MILLISECONDS.sleep(EVICTION_INTERVAL - TimeUnit.SECONDS.toMillis(2)); HiveClientPool clientPool2 = clientPool.clientPool(); - Assert.assertTrue(clientPool1 == clientPool2); + Assert.assertSame(clientPool1, clientPool2); TimeUnit.MILLISECONDS.sleep(EVICTION_INTERVAL + TimeUnit.SECONDS.toMillis(5)); - Assert.assertNull(CachedClientPool.clientPoolCache().getIfPresent(metastoreUri)); + Assert.assertNull( + CachedClientPool.clientPoolCache() + .getIfPresent(CachedClientPool.extractKey(null, hiveConf))); + + // The client has been really closed. + Assert.assertTrue(clientPool1.isClosed()); + Assert.assertTrue(clientPool2.isClosed()); + } + + @Test + public void testCacheKey() throws Exception { + UserGroupInformation current = UserGroupInformation.getCurrentUser(); + UserGroupInformation foo1 = UserGroupInformation.createProxyUser("foo", current); + UserGroupInformation foo2 = UserGroupInformation.createProxyUser("foo", current); + UserGroupInformation bar = UserGroupInformation.createProxyUser("bar", current); + + Key key1 = + foo1.doAs( + (PrivilegedAction) + () -> CachedClientPool.extractKey("user_name,conf:key1", hiveConf)); + Key key2 = + foo2.doAs( + (PrivilegedAction) + () -> CachedClientPool.extractKey("conf:key1,user_name", hiveConf)); + Assert.assertEquals("Key elements order shouldn't matter", key1, key2); + + key1 = foo1.doAs((PrivilegedAction) () -> CachedClientPool.extractKey("ugi", hiveConf)); + key2 = bar.doAs((PrivilegedAction) () -> CachedClientPool.extractKey("ugi", hiveConf)); + Assert.assertNotEquals("Different users are not supposed to be equivalent", key1, key2); + + key2 = foo2.doAs((PrivilegedAction) () -> CachedClientPool.extractKey("ugi", hiveConf)); + Assert.assertNotEquals("Different UGI instances are not supposed to be equivalent", key1, key2); + + key1 = CachedClientPool.extractKey("ugi", hiveConf); + key2 = CachedClientPool.extractKey("ugi,conf:key1", hiveConf); + Assert.assertNotEquals( + "Keys with different number of elements are not supposed to be equivalent", key1, key2); + + Configuration conf1 = new Configuration(hiveConf); + Configuration conf2 = new Configuration(hiveConf); + + conf1.set("key1", "val"); + key1 = CachedClientPool.extractKey("conf:key1", conf1); + key2 = CachedClientPool.extractKey("conf:key1", conf2); + Assert.assertNotEquals( + "Config with different values are not supposed to be equivalent", key1, key2); + + conf2.set("key1", "val"); + conf2.set("key2", "val"); + key2 = CachedClientPool.extractKey("conf:key2", conf2); + Assert.assertNotEquals( + "Config with different keys are not supposed to be equivalent", key1, key2); + + key1 = CachedClientPool.extractKey("conf:key1,ugi", conf1); + key2 = CachedClientPool.extractKey("ugi,conf:key1", conf2); + Assert.assertEquals("Config with same key/value should be equivalent", key1, key2); + + conf1.set("key2", "val"); + key1 = CachedClientPool.extractKey("conf:key2 ,conf:key1", conf1); + key2 = CachedClientPool.extractKey("conf:key2,conf:key1", conf2); + Assert.assertEquals("Config with same key/value should be equivalent", key1, key2); + + Assertions.assertThatThrownBy( + () -> CachedClientPool.extractKey("ugi,ugi", hiveConf), + "Duplicate key elements should result in an error") + .isInstanceOf(ValidationException.class) + .hasMessageContaining("UGI key element already specified"); + + Assertions.assertThatThrownBy( + () -> CachedClientPool.extractKey("conf:k1,conf:k2,CONF:k1", hiveConf), + "Duplicate conf key elements should result in an error") + .isInstanceOf(ValidationException.class) + .hasMessageContaining("Conf key element k1 already specified"); } + @Test + public void testHmsCatalog() { + Map properties = + ImmutableMap.of( + String.valueOf(EVICTION_INTERVAL), + String.valueOf(Integer.MAX_VALUE), + ICEBERG_CATALOG_TYPE, + ICEBERG_CATALOG_TYPE_HIVE); + + Configuration conf1 = new Configuration(); + conf1.set(HiveCatalog.HIVE_CONF_CATALOG, "foo"); + + Configuration conf2 = new Configuration(); + conf2.set(HiveCatalog.HIVE_CONF_CATALOG, "foo"); + + Configuration conf3 = new Configuration(); + conf3.set(HiveCatalog.HIVE_CONF_CATALOG, "bar"); + + HiveCatalog catalog1 = (HiveCatalog) CatalogUtil.buildIcebergCatalog("1", properties, conf1); + HiveCatalog catalog2 = (HiveCatalog) CatalogUtil.buildIcebergCatalog("2", properties, conf2); + HiveCatalog catalog3 = (HiveCatalog) CatalogUtil.buildIcebergCatalog("3", properties, conf3); + HiveCatalog catalog4 = + (HiveCatalog) CatalogUtil.buildIcebergCatalog("4", properties, new Configuration()); + + HiveClientPool pool1 = ((CachedClientPool) catalog1.clientPool()).clientPool(); + HiveClientPool pool2 = ((CachedClientPool) catalog2.clientPool()).clientPool(); + HiveClientPool pool3 = ((CachedClientPool) catalog3.clientPool()).clientPool(); + HiveClientPool pool4 = ((CachedClientPool) catalog4.clientPool()).clientPool(); + + Assert.assertSame(pool1, pool2); + Assert.assertNotSame(pool3, pool1); + Assert.assertNotSame(pool3, pool2); + Assert.assertNotSame(pool3, pool4); + Assert.assertNotSame(pool4, pool1); + Assert.assertNotSame(pool4, pool2); + + Assert.assertEquals("foo", pool1.hiveConf().get(HiveCatalog.HIVE_CONF_CATALOG)); + Assert.assertEquals("bar", pool3.hiveConf().get(HiveCatalog.HIVE_CONF_CATALOG)); + Assert.assertNull(pool4.hiveConf().get(HiveCatalog.HIVE_CONF_CATALOG)); + + pool1.close(); + pool3.close(); + pool4.close(); + } } diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java index 00d7468cf4ac..03a1bcf74103 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java @@ -19,19 +19,23 @@ package org.apache.iceberg.hive; +import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.UUID; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.iceberg.AssertHelpers; +import org.apache.hadoop.hive.metastore.api.PrincipalType; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.iceberg.CachingCatalog; import org.apache.iceberg.CatalogProperties; import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.DataFile; import org.apache.iceberg.DataFiles; import org.apache.iceberg.FileFormat; +import org.apache.iceberg.HasTableOperations; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.PartitionSpecParser; import org.apache.iceberg.Schema; @@ -43,6 +47,7 @@ import org.apache.iceberg.TableMetadata; import org.apache.iceberg.TableOperations; import org.apache.iceberg.TableProperties; +import org.apache.iceberg.TestHelpers; import org.apache.iceberg.Transaction; import org.apache.iceberg.UpdateSchema; import org.apache.iceberg.catalog.Catalog; @@ -76,6 +81,8 @@ import static org.apache.iceberg.TableProperties.DEFAULT_SORT_ORDER; import static org.apache.iceberg.expressions.Expressions.bucket; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -88,15 +95,16 @@ public class TestHiveCatalog extends HiveMetastoreTest { @Rule public TemporaryFolder temp = new TemporaryFolder(); + private Schema getTestSchema() { + return new Schema( + required(1, "id", Types.IntegerType.get(), "unique ID"), + required(2, "data", Types.StringType.get())); + } + @Test public void testCreateTableBuilder() throws Exception { - Schema schema = new Schema( - required(1, "id", Types.IntegerType.get(), "unique ID"), - required(2, "data", Types.StringType.get()) - ); - PartitionSpec spec = PartitionSpec.builderFor(schema) - .bucket("data", 16) - .build(); + Schema schema = getTestSchema(); + PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build(); TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); String location = temp.newFolder("tbl").toString(); @@ -120,13 +128,8 @@ public void testCreateTableBuilder() throws Exception { @Test public void testCreateTableWithCaching() throws Exception { - Schema schema = new Schema( - required(1, "id", Types.IntegerType.get(), "unique ID"), - required(2, "data", Types.StringType.get()) - ); - PartitionSpec spec = PartitionSpec.builderFor(schema) - .bucket("data", 16) - .build(); + Schema schema = getTestSchema(); + PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build(); TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); String location = temp.newFolder("tbl").toString(); ImmutableMap properties = ImmutableMap.of("key1", "value1", "key2", "value2"); @@ -175,10 +178,7 @@ public void testInitializeCatalogWithProperties() { @Test public void testCreateTableTxnBuilder() throws Exception { - Schema schema = new Schema( - required(1, "id", Types.IntegerType.get(), "unique ID"), - required(2, "data", Types.StringType.get()) - ); + Schema schema = getTestSchema(); TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); String location = temp.newFolder("tbl").toString(); @@ -199,13 +199,8 @@ public void testCreateTableTxnBuilder() throws Exception { @Test public void testReplaceTxnBuilder() throws Exception { - Schema schema = new Schema( - required(1, "id", Types.IntegerType.get(), "unique ID"), - required(2, "data", Types.StringType.get()) - ); - PartitionSpec spec = PartitionSpec.builderFor(schema) - .bucket("data", 16) - .build(); + Schema schema = getTestSchema(); + PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build(); TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); String location = temp.newFolder("tbl").toString(); @@ -245,15 +240,42 @@ public void testReplaceTxnBuilder() throws Exception { } } + @Test + public void testCreateTableWithOwner() throws Exception { + createTableAndVerifyOwner( + DB_NAME, + "tbl_specified_owner", + ImmutableMap.of(HiveCatalog.HMS_TABLE_OWNER, "some_owner"), + "some_owner"); + createTableAndVerifyOwner( + DB_NAME, + "tbl_default_owner", + ImmutableMap.of(), + UserGroupInformation.getCurrentUser().getShortUserName()); + } + + private void createTableAndVerifyOwner( + String db, String tbl, Map properties, String owner) + throws IOException, TException { + Schema schema = getTestSchema(); + PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build(); + TableIdentifier tableIdent = TableIdentifier.of(db, tbl); + String location = temp.newFolder(tbl).toString(); + try { + Table table = catalog.createTable(tableIdent, schema, spec, location, properties); + org.apache.hadoop.hive.metastore.api.Table hmsTable = metastoreClient.getTable(db, tbl); + Assert.assertEquals(owner, hmsTable.getOwner()); + Map hmsTableParams = hmsTable.getParameters(); + Assert.assertFalse(hmsTableParams.containsKey(HiveCatalog.HMS_TABLE_OWNER)); + } finally { + catalog.dropTable(tableIdent); + } + } + @Test public void testCreateTableDefaultSortOrder() throws Exception { - Schema schema = new Schema( - required(1, "id", Types.IntegerType.get(), "unique ID"), - required(2, "data", Types.StringType.get()) - ); - PartitionSpec spec = PartitionSpec.builderFor(schema) - .bucket("data", 16) - .build(); + Schema schema = getTestSchema(); + PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build(); TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); try { @@ -270,16 +292,9 @@ public void testCreateTableDefaultSortOrder() throws Exception { @Test public void testCreateTableCustomSortOrder() throws Exception { - Schema schema = new Schema( - required(1, "id", Types.IntegerType.get(), "unique ID"), - required(2, "data", Types.StringType.get()) - ); - PartitionSpec spec = PartitionSpec.builderFor(schema) - .bucket("data", 16) - .build(); - SortOrder order = SortOrder.builderFor(schema) - .asc("id", NULLS_FIRST) - .build(); + Schema schema = getTestSchema(); + PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build(); + SortOrder order = SortOrder.builderFor(schema).asc("id", NULLS_FIRST).build(); TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); try { @@ -313,10 +328,9 @@ public void testCreateNamespace() throws Exception { Assert.assertEquals("There no same location for db and namespace", database1.getLocationUri(), defaultUri(namespace1)); - AssertHelpers.assertThrows("Should fail to create when namespace already exist " + namespace1, - AlreadyExistsException.class, "Namespace '" + namespace1 + "' already exists!", () -> { - catalog.createNamespace(namespace1); - }); + assertThatThrownBy(() -> catalog.createNamespace(namespace1)) + .isInstanceOf(AlreadyExistsException.class) + .hasMessage("Namespace '" + namespace1 + "' already exists!"); String hiveLocalDir = temp.newFolder().toURI().toString(); // remove the trailing slash of the URI hiveLocalDir = hiveLocalDir.substring(0, hiveLocalDir.length() - 1); @@ -332,6 +346,86 @@ public void testCreateNamespace() throws Exception { database2.getLocationUri(), hiveLocalDir); } + @Test + public void testCreateNamespaceWithOwnership() throws Exception { + createNamespaceAndVerifyOwnership( + "default_ownership_1", + ImmutableMap.of(), + UserGroupInformation.getCurrentUser().getShortUserName(), + PrincipalType.USER); + + createNamespaceAndVerifyOwnership( + "default_ownership_2", + ImmutableMap.of( + "non_owner_prop1", "value1", + "non_owner_prop2", "value2"), + UserGroupInformation.getCurrentUser().getShortUserName(), + PrincipalType.USER); + + createNamespaceAndVerifyOwnership( + "individual_ownership_1", + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "apache", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.USER.name()), + "apache", + PrincipalType.USER); + + createNamespaceAndVerifyOwnership( + "individual_ownership_2", + ImmutableMap.of(HiveCatalog.HMS_DB_OWNER, "someone"), + "someone", + PrincipalType.USER); + + createNamespaceAndVerifyOwnership( + "group_ownership", + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "iceberg", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.GROUP.name()), + "iceberg", + PrincipalType.GROUP); + + assertThatThrownBy( + () -> + createNamespaceAndVerifyOwnership( + "create_with_owner_type_alone", + ImmutableMap.of(HiveCatalog.HMS_DB_OWNER_TYPE, PrincipalType.USER.name()), + "no_post_create_expectation_due_to_exception_thrown", + null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + String.format( + "Create namespace setting %s without setting %s is not allowed", + HiveCatalog.HMS_DB_OWNER_TYPE, HiveCatalog.HMS_DB_OWNER)); + + assertThatThrownBy( + () -> + createNamespaceAndVerifyOwnership( + "create_with_invalid_owner_type", + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, "iceberg", + HiveCatalog.HMS_DB_OWNER_TYPE, "invalidOwnerType"), + "no_post_create_expectation_due_to_exception_thrown", + null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageStartingWith("No enum constant " + PrincipalType.class.getCanonicalName()); + } + + private void createNamespaceAndVerifyOwnership( + String name, Map prop, String expectedOwner, PrincipalType expectedOwnerType) + throws TException { + Namespace namespace = Namespace.of(name); + + catalog.createNamespace(namespace, prop); + Database db = metastoreClient.getDatabase(namespace.toString()); + + Assert.assertEquals(expectedOwner, db.getOwnerName()); + Assert.assertEquals(expectedOwnerType, db.getOwnerType()); + } + @Test public void testListNamespace() throws TException { List namespaces; @@ -389,10 +483,191 @@ public void testSetNamespaceProperties() throws TException { Assert.assertEquals(database.getParameters().get("owner"), "alter_apache"); Assert.assertEquals(database.getParameters().get("test"), "test"); Assert.assertEquals(database.getParameters().get("group"), "iceberg"); - AssertHelpers.assertThrows("Should fail to namespace not exist" + namespace, - NoSuchNamespaceException.class, "Namespace does not exist: ", () -> { - catalog.setProperties(Namespace.of("db2", "db2", "ns2"), meta); - }); + assertThatThrownBy( + () -> catalog.setProperties(Namespace.of("db2", "db2", "ns2"), ImmutableMap.of())) + .isInstanceOf(NoSuchNamespaceException.class) + .hasMessage("Namespace does not exist: db2.db2.ns2"); + } + + @Test + public void testSetNamespaceOwnership() throws TException { + setNamespaceOwnershipAndVerify( + "set_individual_ownership_on_default_owner", + ImmutableMap.of(), + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_individual_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.USER.name()), + System.getProperty("user.name"), + PrincipalType.USER, + "some_individual_owner", + PrincipalType.USER); + + setNamespaceOwnershipAndVerify( + "set_group_ownership_on_default_owner", + ImmutableMap.of(), + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_group_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.GROUP.name()), + System.getProperty("user.name"), + PrincipalType.USER, + "some_group_owner", + PrincipalType.GROUP); + + setNamespaceOwnershipAndVerify( + "change_individual_to_group_ownership", + ImmutableMap.of(HiveCatalog.HMS_DB_OWNER, "some_owner"), + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_group_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.GROUP.name()), + "some_owner", + PrincipalType.USER, + "some_group_owner", + PrincipalType.GROUP); + + setNamespaceOwnershipAndVerify( + "change_group_to_individual_ownership", + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_group_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.GROUP.name()), + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_individual_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.USER.name()), + "some_group_owner", + PrincipalType.GROUP, + "some_individual_owner", + PrincipalType.USER); + + assertThatThrownBy( + () -> + setNamespaceOwnershipAndVerify( + "set_owner_without_setting_owner_type", + ImmutableMap.of(), + ImmutableMap.of(HiveCatalog.HMS_DB_OWNER, "some_individual_owner"), + System.getProperty("user.name"), + PrincipalType.USER, + "no_post_setting_expectation_due_to_exception_thrown", + null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + String.format( + "Setting %s and %s has to be performed together or not at all", + HiveCatalog.HMS_DB_OWNER_TYPE, HiveCatalog.HMS_DB_OWNER)); + + assertThatThrownBy( + () -> + setNamespaceOwnershipAndVerify( + "set_owner_type_without_setting_owner", + ImmutableMap.of(HiveCatalog.HMS_DB_OWNER, "some_owner"), + ImmutableMap.of(HiveCatalog.HMS_DB_OWNER_TYPE, PrincipalType.GROUP.name()), + "some_owner", + PrincipalType.USER, + "no_post_setting_expectation_due_to_exception_thrown", + null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + String.format( + "Setting %s and %s has to be performed together or not at all", + HiveCatalog.HMS_DB_OWNER_TYPE, HiveCatalog.HMS_DB_OWNER)); + + assertThatThrownBy( + () -> + setNamespaceOwnershipAndVerify( + "set_invalid_owner_type", + ImmutableMap.of(), + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, "iceberg", + HiveCatalog.HMS_DB_OWNER_TYPE, "invalidOwnerType"), + System.getProperty("user.name"), + PrincipalType.USER, + "no_post_setting_expectation_due_to_exception_thrown", + null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "No enum constant org.apache.hadoop.hive.metastore.api.PrincipalType.invalidOwnerType"); + } + + @Test + public void testSetNamespaceOwnershipNoop() throws TException, IOException { + setNamespaceOwnershipAndVerify( + "set_ownership_noop_1", + ImmutableMap.of(HiveCatalog.HMS_DB_OWNER, "some_individual_owner"), + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_individual_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.USER.name()), + "some_individual_owner", + PrincipalType.USER, + "some_individual_owner", + PrincipalType.USER); + + setNamespaceOwnershipAndVerify( + "set_ownership_noop_2", + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_group_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.GROUP.name()), + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_group_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.GROUP.name()), + "some_group_owner", + PrincipalType.GROUP, + "some_group_owner", + PrincipalType.GROUP); + + setNamespaceOwnershipAndVerify( + "set_ownership_noop_3", + ImmutableMap.of(), + ImmutableMap.of(), + UserGroupInformation.getCurrentUser().getShortUserName(), + PrincipalType.USER, + UserGroupInformation.getCurrentUser().getShortUserName(), + PrincipalType.USER); + + setNamespaceOwnershipAndVerify( + "set_ownership_noop_4", + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_group_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.GROUP.name()), + ImmutableMap.of("unrelated_prop_1", "value_1", "unrelated_prop_2", "value_2"), + "some_group_owner", + PrincipalType.GROUP, + "some_group_owner", + PrincipalType.GROUP); + } + + private void setNamespaceOwnershipAndVerify( + String name, + Map propToCreate, + Map propToSet, + String expectedOwnerPostCreate, + PrincipalType expectedOwnerTypePostCreate, + String expectedOwnerPostSet, + PrincipalType expectedOwnerTypePostSet) + throws TException { + createNamespaceAndVerifyOwnership( + name, propToCreate, expectedOwnerPostCreate, expectedOwnerTypePostCreate); + + catalog.setProperties(Namespace.of(name), propToSet); + Database database = metastoreClient.getDatabase(name); + + Assert.assertEquals(expectedOwnerPostSet, database.getOwnerName()); + Assert.assertEquals(expectedOwnerTypePostSet, database.getOwnerType()); } @Test @@ -407,18 +682,145 @@ public void testRemoveNamespaceProperties() throws TException { Assert.assertEquals(database.getParameters().get("owner"), null); Assert.assertEquals(database.getParameters().get("group"), "iceberg"); - AssertHelpers.assertThrows("Should fail to namespace not exist" + namespace, - NoSuchNamespaceException.class, "Namespace does not exist: ", () -> { - catalog.removeProperties(Namespace.of("db2", "db2", "ns2"), ImmutableSet.of("comment", "owner")); - }); + + assertThatThrownBy( + () -> + catalog.removeProperties( + Namespace.of("db2", "db2", "ns2"), ImmutableSet.of("comment", "owner"))) + .isInstanceOf(NoSuchNamespaceException.class) + .hasMessage("Namespace does not exist: db2.db2.ns2"); + } + + @Test + public void testRemoveNamespaceOwnership() throws TException, IOException { + removeNamespaceOwnershipAndVerify( + "remove_individual_ownership", + ImmutableMap.of(HiveCatalog.HMS_DB_OWNER, "some_owner"), + ImmutableSet.of(HiveCatalog.HMS_DB_OWNER, HiveCatalog.HMS_DB_OWNER_TYPE), + "some_owner", + PrincipalType.USER, + UserGroupInformation.getCurrentUser().getShortUserName(), + PrincipalType.USER); + + removeNamespaceOwnershipAndVerify( + "remove_group_ownership", + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_group_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.GROUP.name()), + ImmutableSet.of(HiveCatalog.HMS_DB_OWNER, HiveCatalog.HMS_DB_OWNER_TYPE), + "some_group_owner", + PrincipalType.GROUP, + UserGroupInformation.getCurrentUser().getShortUserName(), + PrincipalType.USER); + + removeNamespaceOwnershipAndVerify( + "remove_ownership_on_default_noop_1", + ImmutableMap.of(), + ImmutableSet.of(HiveCatalog.HMS_DB_OWNER, HiveCatalog.HMS_DB_OWNER_TYPE), + UserGroupInformation.getCurrentUser().getShortUserName(), + PrincipalType.USER, + UserGroupInformation.getCurrentUser().getShortUserName(), + PrincipalType.USER); + + removeNamespaceOwnershipAndVerify( + "remove_ownership_on_default_noop_2", + ImmutableMap.of(), + ImmutableSet.of(), + UserGroupInformation.getCurrentUser().getShortUserName(), + PrincipalType.USER, + UserGroupInformation.getCurrentUser().getShortUserName(), + PrincipalType.USER); + + removeNamespaceOwnershipAndVerify( + "remove_ownership_noop_1", + ImmutableMap.of(HiveCatalog.HMS_DB_OWNER, "some_owner"), + ImmutableSet.of(), + "some_owner", + PrincipalType.USER, + "some_owner", + PrincipalType.USER); + + removeNamespaceOwnershipAndVerify( + "remove_ownership_noop_2", + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_group_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.GROUP.name()), + ImmutableSet.of(), + "some_group_owner", + PrincipalType.GROUP, + "some_group_owner", + PrincipalType.GROUP); + + assertThatThrownBy( + () -> + removeNamespaceOwnershipAndVerify( + "remove_owner_without_removing_owner_type", + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_individual_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.USER.name()), + ImmutableSet.of(HiveCatalog.HMS_DB_OWNER), + "some_individual_owner", + PrincipalType.USER, + "no_post_remove_expectation_due_to_exception_thrown", + null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + String.format( + "Removing %s and %s has to be performed together or not at all", + HiveCatalog.HMS_DB_OWNER_TYPE, HiveCatalog.HMS_DB_OWNER)); + + assertThatThrownBy( + () -> + removeNamespaceOwnershipAndVerify( + "remove_owner_type_without_removing_owner", + ImmutableMap.of( + HiveCatalog.HMS_DB_OWNER, + "some_group_owner", + HiveCatalog.HMS_DB_OWNER_TYPE, + PrincipalType.GROUP.name()), + ImmutableSet.of(HiveCatalog.HMS_DB_OWNER_TYPE), + "some_group_owner", + PrincipalType.GROUP, + "no_post_remove_expectation_due_to_exception_thrown", + null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + String.format( + "Removing %s and %s has to be performed together or not at all", + HiveCatalog.HMS_DB_OWNER_TYPE, HiveCatalog.HMS_DB_OWNER)); + } + + private void removeNamespaceOwnershipAndVerify( + String name, + Map propToCreate, + Set propToRemove, + String expectedOwnerPostCreate, + PrincipalType expectedOwnerTypePostCreate, + String expectedOwnerPostRemove, + PrincipalType expectedOwnerTypePostRemove) + throws TException { + createNamespaceAndVerifyOwnership( + name, propToCreate, expectedOwnerPostCreate, expectedOwnerTypePostCreate); + + catalog.removeProperties(Namespace.of(name), propToRemove); + + Database database = metastoreClient.getDatabase(name); + + Assert.assertEquals(expectedOwnerPostRemove, database.getOwnerName()); + Assert.assertEquals(expectedOwnerTypePostRemove, database.getOwnerType()); } @Test public void testDropNamespace() throws TException { Namespace namespace = Namespace.of("dbname_drop"); TableIdentifier identifier = TableIdentifier.of(namespace, "table"); - Schema schema = new Schema(Types.StructType.of( - required(1, "id", Types.LongType.get())).fields()); + Schema schema = getTestSchema(); catalog.createNamespace(namespace, meta); catalog.createTable(identifier, schema); @@ -426,27 +828,23 @@ public void testDropNamespace() throws TException { Assert.assertTrue(nameMata.get("owner").equals("apache")); Assert.assertTrue(nameMata.get("group").equals("iceberg")); - AssertHelpers.assertThrows("Should fail to drop namespace is not empty" + namespace, - NamespaceNotEmptyException.class, - "Namespace dbname_drop is not empty. One or more tables exist.", () -> { - catalog.dropNamespace(namespace); - }); + assertThatThrownBy(() -> catalog.dropNamespace(namespace)) + .isInstanceOf(NamespaceNotEmptyException.class) + .hasMessage("Namespace dbname_drop is not empty. One or more tables exist."); Assert.assertTrue(catalog.dropTable(identifier, true)); Assert.assertTrue("Should fail to drop namespace if it is not empty", catalog.dropNamespace(namespace)); Assert.assertFalse("Should fail to drop when namespace doesn't exist", catalog.dropNamespace(Namespace.of("db.ns1"))); - AssertHelpers.assertThrows("Should fail to drop namespace exist" + namespace, - NoSuchNamespaceException.class, "Namespace does not exist: ", () -> { - catalog.loadNamespaceMetadata(namespace); - }); + assertThatThrownBy(() -> catalog.loadNamespaceMetadata(namespace)) + .isInstanceOf(NoSuchNamespaceException.class) + .hasMessage("Namespace does not exist: dbname_drop"); } @Test public void testDropTableWithoutMetadataFile() { TableIdentifier identifier = TableIdentifier.of(DB_NAME, "tbl"); - Schema tableSchema = - new Schema(Types.StructType.of(required(1, "id", Types.LongType.get())).fields()); + Schema tableSchema = getTestSchema(); catalog.createTable(identifier, tableSchema); String metadataFileLocation = catalog.newTableOps(identifier).current().metadataFileLocation(); TableOperations ops = catalog.newTableOps(identifier); @@ -460,13 +858,8 @@ public void testDropTableWithoutMetadataFile() { @Test public void testTableName() { - Schema schema = new Schema( - required(1, "id", Types.IntegerType.get(), "unique ID"), - required(2, "data", Types.StringType.get()) - ); - PartitionSpec spec = PartitionSpec.builderFor(schema) - .bucket("data", 16) - .build(); + Schema schema = getTestSchema(); + PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build(); TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); try { @@ -492,10 +885,7 @@ private String defaultUri(Namespace namespace) throws TException { @Test public void testUUIDinTableProperties() throws Exception { - Schema schema = new Schema( - required(1, "id", Types.IntegerType.get(), "unique ID"), - required(2, "data", Types.StringType.get()) - ); + Schema schema = getTestSchema(); TableIdentifier tableIdentifier = TableIdentifier.of(DB_NAME, "tbl"); String location = temp.newFolder("tbl").toString(); @@ -512,10 +902,7 @@ public void testUUIDinTableProperties() throws Exception { @Test public void testSnapshotStatsTableProperties() throws Exception { - Schema schema = new Schema( - required(1, "id", Types.IntegerType.get(), "unique ID"), - required(2, "data", Types.StringType.get()) - ); + Schema schema = getTestSchema(); TableIdentifier tableIdentifier = TableIdentifier.of(DB_NAME, "tbl"); String location = temp.newFolder("tbl").toString(); @@ -620,10 +1007,7 @@ public void testNotExposeTableProperties() { @Test public void testSetDefaultPartitionSpec() throws Exception { - Schema schema = new Schema( - required(1, "id", Types.IntegerType.get(), "unique ID"), - required(2, "data", Types.StringType.get()) - ); + Schema schema = getTestSchema(); TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); try { @@ -641,10 +1025,7 @@ public void testSetDefaultPartitionSpec() throws Exception { @Test public void testSetCurrentSchema() throws Exception { - Schema schema = new Schema( - required(1, "id", Types.IntegerType.get(), "unique ID"), - required(2, "data", Types.StringType.get()) - ); + Schema schema = getTestSchema(); TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); try { @@ -686,7 +1067,7 @@ public void testConstructorWarehousePathWithEndSlash() { @Test public void testTablePropsDefinedAtCatalogLevel() { - Schema schema = new Schema(required(1, "id", Types.IntegerType.get(), "unique ID")); + Schema schema = getTestSchema(); TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); ImmutableMap catalogProps = @@ -738,4 +1119,50 @@ public void testTablePropsDefinedAtCatalogLevel() { hiveCatalog.dropTable(tableIdent); } } + + @Test + public void testDatabaseLocationWithSlashInWarehouseDir() { + Configuration conf = new Configuration(); + // With a trailing slash + conf.set("hive.metastore.warehouse.dir", "s3://bucket/"); + conf.set("hive.metastore.warehouse.external.dir", "s3://bucket/"); + + HiveCatalog catalog = new HiveCatalog(); + catalog.setConf(conf); + + Database database = catalog.convertToDatabase(Namespace.of("database"), ImmutableMap.of()); + + Assert.assertEquals("s3://bucket/database.db", database.getLocationUri()); + } + + @Test + public void testRegisterTable() { + TableIdentifier identifier = TableIdentifier.of(DB_NAME, "t1"); + catalog.createTable(identifier, getTestSchema()); + Table registeringTable = catalog.loadTable(identifier); + catalog.dropTable(identifier, false); + TableOperations ops = ((HasTableOperations) registeringTable).operations(); + String metadataLocation = ((HiveTableOperations) ops).currentMetadataLocation(); + Table registeredTable = catalog.registerTable(identifier, metadataLocation); + assertThat(registeredTable).isNotNull(); + TestHelpers.assertSerializedAndLoadedMetadata(registeringTable, registeredTable); + String expectedMetadataLocation = + ((HasTableOperations) registeredTable).operations().current().metadataFileLocation(); + assertThat(metadataLocation).isEqualTo(expectedMetadataLocation); + assertThat(catalog.loadTable(identifier)).isNotNull(); + assertThat(catalog.dropTable(identifier)).isTrue(); + } + + @Test + public void testRegisterExistingTable() { + TableIdentifier identifier = TableIdentifier.of(DB_NAME, "t1"); + catalog.createTable(identifier, getTestSchema()); + Table registeringTable = catalog.loadTable(identifier); + TableOperations ops = ((HasTableOperations) registeringTable).operations(); + String metadataLocation = ((HiveTableOperations) ops).currentMetadataLocation(); + assertThatThrownBy(() -> catalog.registerTable(identifier, metadataLocation)) + .isInstanceOf(AlreadyExistsException.class) + .hasMessage("Table already exists: hivedb.t1"); + assertThat(catalog.dropTable(identifier, true)).isTrue(); + } } diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveClientPool.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveClientPool.java index 36996e33e3c6..208e95e0e432 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveClientPool.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveClientPool.java @@ -32,9 +32,9 @@ import org.apache.hadoop.hive.metastore.api.GetAllFunctionsResponse; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.PrincipalType; -import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.thrift.transport.TTransportException; +import org.assertj.core.api.Assertions; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -97,8 +97,9 @@ private HiveConf createHiveConf() { @Test public void testNewClientFailure() { Mockito.doThrow(new RuntimeException("Connection exception")).when(clients).newClient(); - AssertHelpers.assertThrows("Should throw exception", RuntimeException.class, - "Connection exception", () -> clients.run(Object::toString)); + Assertions.assertThatThrownBy(() -> clients.run(Object::toString)) + .isInstanceOf(RuntimeException.class) + .hasMessage("Connection exception"); } @Test @@ -106,9 +107,11 @@ public void testGetTablesFailsForNonReconnectableException() throws Exception { HiveMetaStoreClient hmsClient = Mockito.mock(HiveMetaStoreClient.class); Mockito.doReturn(hmsClient).when(clients).newClient(); Mockito.doThrow(new MetaException("Another meta exception")) - .when(hmsClient).getTables(Mockito.anyString(), Mockito.anyString()); - AssertHelpers.assertThrows("Should throw exception", MetaException.class, - "Another meta exception", () -> clients.run(client -> client.getTables("default", "t"))); + .when(hmsClient) + .getTables(Mockito.anyString(), Mockito.anyString()); + Assertions.assertThatThrownBy(() -> clients.run(client -> client.getTables("default", "t"))) + .isInstanceOf(MetaException.class) + .hasMessage("Another meta exception"); } @Test diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java index 293dd5010cd1..f4f9a861034b 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommitLocks.java @@ -20,34 +20,48 @@ package org.apache.iceberg.hive; import java.util.Collections; +import java.util.Map; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.IntStream; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.LockRequest; import org.apache.hadoop.hive.metastore.api.LockResponse; import org.apache.hadoop.hive.metastore.api.LockState; -import org.apache.iceberg.AssertHelpers; +import org.apache.hadoop.hive.metastore.api.ShowLocksResponse; +import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement; import org.apache.iceberg.HasTableOperations; import org.apache.iceberg.Table; import org.apache.iceberg.TableMetadata; import org.apache.iceberg.exceptions.CommitFailedException; +import org.apache.iceberg.hadoop.ConfigProperties; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Types; import org.apache.thrift.TException; +import org.assertj.core.api.Assertions; import org.junit.AfterClass; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import org.mockito.AdditionalAnswers; +import org.mockito.ArgumentCaptor; +import org.mockito.invocation.InvocationOnMock; -import static org.mockito.Matchers.any; -import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.atLeastOnce; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.eq; import static org.mockito.Mockito.never; import static org.mockito.Mockito.reset; import static org.mockito.Mockito.spy; @@ -59,7 +73,7 @@ public class TestHiveCommitLocks extends HiveTableBaseTest { private static HiveTableOperations spyOps = null; private static HiveClientPool spyClientPool = null; private static CachedClientPool spyCachedClientPool = null; - private static Configuration overriddenHiveConf = new Configuration(hiveConf); + private static Configuration overriddenHiveConf; private static AtomicReference spyClientRef = new AtomicReference<>(); private static IMetaStoreClient spyClient = null; HiveTableOperations ops = null; @@ -70,12 +84,19 @@ public class TestHiveCommitLocks extends HiveTableBaseTest { LockResponse waitLockResponse = new LockResponse(dummyLockId, LockState.WAITING); LockResponse acquiredLockResponse = new LockResponse(dummyLockId, LockState.ACQUIRED); LockResponse notAcquiredLockResponse = new LockResponse(dummyLockId, LockState.NOT_ACQUIRED); + ShowLocksResponse emptyLocks = new ShowLocksResponse(Lists.newArrayList()); @BeforeClass - public static void initializeSpies() throws Exception { + public static void startMetastore() throws Exception { + HiveMetastoreTest.startMetastore( + ImmutableMap.of(HiveConf.ConfVars.HIVE_TXN_TIMEOUT.varname, "1s")); + + // start spies + overriddenHiveConf = new Configuration(hiveConf); overriddenHiveConf.setLong("iceberg.hive.lock-timeout-ms", 6 * 1000); overriddenHiveConf.setLong("iceberg.hive.lock-check-min-wait-ms", 50); overriddenHiveConf.setLong("iceberg.hive.lock-check-max-wait-ms", 5 * 1000); + overriddenHiveConf.setLong("iceberg.hive.lock-heartbeat-interval-ms", 100); // Set up the spy clients as static variables instead of before every test. // The spy clients are reused between methods and closed at the end of all tests in this class. @@ -116,8 +137,16 @@ public void before() throws Exception { Assert.assertEquals(2, ops.current().schema().columns().size()); - spyOps = spy(new HiveTableOperations(overriddenHiveConf, spyCachedClientPool, ops.io(), catalog.name(), - dbName, tableName)); + spyOps = + spy( + new HiveTableOperations( + overriddenHiveConf, + spyCachedClientPool, + ops.io(), + catalog.name(), + dbName, + tableName)); + reset(spyClient); } @AfterClass @@ -133,6 +162,7 @@ public static void cleanup() { public void testLockAcquisitionAtFirstTime() throws TException, InterruptedException { doReturn(acquiredLockResponse).when(spyClient).lock(any()); doNothing().when(spyClient).unlock(eq(dummyLockId)); + doNothing().when(spyClient).heartbeat(eq(0L), eq(dummyLockId)); spyOps.doCommit(metadataV2, metadataV1); @@ -151,20 +181,196 @@ public void testLockAcquisitionAfterRetries() throws TException, InterruptedExce .when(spyClient) .checkLock(eq(dummyLockId)); doNothing().when(spyClient).unlock(eq(dummyLockId)); + doNothing().when(spyClient).heartbeat(eq(0L), eq(dummyLockId)); + + spyOps.doCommit(metadataV2, metadataV1); + + Assert.assertEquals(1, spyOps.current().schema().columns().size()); // should be 1 again + } + + @Test + public void testLockAcquisitionAfterFailedNotFoundLock() throws TException, InterruptedException { + doReturn(emptyLocks).when(spyClient).showLocks(any()); + doThrow(new TException("Failed to connect to HMS")) + .doReturn(waitLockResponse) + .when(spyClient) + .lock(any()); + doReturn(waitLockResponse) + .doReturn(acquiredLockResponse) + .when(spyClient) + .checkLock(eq(dummyLockId)); + doNothing().when(spyOps).doUnlock(any()); + doNothing().when(spyClient).heartbeat(eq(0L), eq(dummyLockId)); + + spyOps.doCommit(metadataV2, metadataV1); + + Assert.assertEquals(1, spyOps.current().schema().columns().size()); // should be 1 again + } + + @Test + public void testLockAcquisitionAfterFailedAndFoundLock() throws TException, InterruptedException { + ArgumentCaptor lockRequestCaptor = ArgumentCaptor.forClass(LockRequest.class); + doReturn(emptyLocks).when(spyClient).showLocks(any()); + doThrow(new TException("Failed to connect to HMS")) + .doReturn(waitLockResponse) + .when(spyClient) + .lock(lockRequestCaptor.capture()); + + // Capture the lockRequest, and generate a response simulating that we have a lock + ShowLocksResponse showLocksResponse = new ShowLocksResponse(Lists.newArrayList()); + ShowLocksResponseElement showLocksElement = + new ShowLocksResponseElementWrapper(lockRequestCaptor); + showLocksResponse.getLocks().add(showLocksElement); + + doReturn(showLocksResponse).when(spyClient).showLocks(any()); + doReturn(acquiredLockResponse).when(spyClient).checkLock(eq(dummyLockId)); + doNothing().when(spyOps).doUnlock(any()); + doNothing().when(spyClient).heartbeat(eq(0L), eq(dummyLockId)); spyOps.doCommit(metadataV2, metadataV1); Assert.assertEquals(1, spyOps.current().schema().columns().size()); // should be 1 again } + @Test + public void testUnLock() throws TException { + doReturn(waitLockResponse).when(spyClient).lock(any()); + doReturn(acquiredLockResponse).when(spyClient).checkLock(eq(dummyLockId)); + doNothing().when(spyClient).unlock(eq(dummyLockId)); + doNothing().when(spyClient).heartbeat(eq(0L), eq(dummyLockId)); + + spyOps.doCommit(metadataV2, metadataV1); + + verify(spyClient, times(1)).unlock(eq(dummyLockId)); + } + + @Test + public void testUnLockInterruptedUnLock() throws TException { + doReturn(waitLockResponse).when(spyClient).lock(any()); + doReturn(acquiredLockResponse).when(spyClient).checkLock(eq(dummyLockId)); + doAnswer( + invocation -> { + throw new InterruptedException("Interrupt test"); + }) + .doNothing() + .when(spyClient) + .unlock(eq(dummyLockId)); + doNothing().when(spyClient).heartbeat(eq(0L), eq(dummyLockId)); + + spyOps.doCommit(metadataV2, metadataV1); + + verify(spyClient, times(2)).unlock(eq(dummyLockId)); + } + + @Test + public void testUnLockAfterInterruptedLock() throws TException { + ArgumentCaptor lockRequestCaptor = ArgumentCaptor.forClass(LockRequest.class); + doAnswer( + invocation -> { + throw new InterruptedException("Interrupt test"); + }) + .when(spyClient) + .lock(lockRequestCaptor.capture()); + + // Capture the lockRequest, and generate a response simulating that we have a lock + ShowLocksResponse showLocksResponse = new ShowLocksResponse(Lists.newArrayList()); + ShowLocksResponseElement showLocksElement = + new ShowLocksResponseElementWrapper(lockRequestCaptor); + showLocksResponse.getLocks().add(showLocksElement); + + doReturn(showLocksResponse).when(spyClient).showLocks(any()); + doReturn(acquiredLockResponse).when(spyClient).checkLock(eq(dummyLockId)); + doNothing().when(spyClient).unlock(eq(dummyLockId)); + doNothing().when(spyClient).heartbeat(eq(0L), eq(dummyLockId)); + + Assertions.assertThatThrownBy(() -> spyOps.doCommit(metadataV2, metadataV1)) + .isInstanceOf(RuntimeException.class) + .hasMessage( + "org.apache.iceberg.hive.LockException: " + + "Interrupted while creating lock on table hivedb.tbl"); + + verify(spyClient, times(1)).unlock(eq(dummyLockId)); + // Make sure that we exit the lock loop on InterruptedException + verify(spyClient, times(1)).lock(any()); + } + + @Test + public void testUnLockAfterInterruptedLockCheck() throws TException { + doReturn(waitLockResponse).when(spyClient).lock(any()); + doAnswer( + invocation -> { + throw new InterruptedException("Interrupt test"); + }) + .when(spyClient) + .checkLock(eq(dummyLockId)); + + doNothing().when(spyClient).unlock(eq(dummyLockId)); + doNothing().when(spyClient).heartbeat(eq(0L), eq(dummyLockId)); + + Assertions.assertThatThrownBy(() -> spyOps.doCommit(metadataV2, metadataV1)) + .isInstanceOf(RuntimeException.class) + .hasMessage( + "org.apache.iceberg.hive.LockException: " + + "Could not acquire the lock on hivedb.tbl, lock request ended in state WAITING"); + + verify(spyClient, times(1)).unlock(eq(dummyLockId)); + // Make sure that we exit the checkLock loop on InterruptedException + verify(spyClient, times(1)).checkLock(eq(dummyLockId)); + } + + @Test + public void testUnLockAfterInterruptedGetTable() throws TException { + doReturn(acquiredLockResponse).when(spyClient).lock(any()); + doAnswer( + invocation -> { + throw new InterruptedException("Interrupt test"); + }) + .when(spyClient) + .getTable(any(), any()); + + doNothing().when(spyClient).unlock(eq(dummyLockId)); + doNothing().when(spyClient).heartbeat(eq(0L), eq(dummyLockId)); + + Assertions.assertThatThrownBy(() -> spyOps.doCommit(metadataV2, metadataV1)) + .isInstanceOf(RuntimeException.class) + .hasMessage("Interrupted during commit"); + + verify(spyClient, times(1)).unlock(eq(dummyLockId)); + } + + /** Wraps an ArgumentCaptor to provide data based on the request */ + private class ShowLocksResponseElementWrapper extends ShowLocksResponseElement { + private ArgumentCaptor wrapped; + + private ShowLocksResponseElementWrapper(ArgumentCaptor wrapped) { + this.wrapped = wrapped; + } + + @Override + public String getAgentInfo() { + return wrapped.getValue().getAgentInfo(); + } + + @Override + public LockState getState() { + return LockState.WAITING; + } + + @Override + public long getLockid() { + return dummyLockId; + } + } + @Test public void testLockFailureAtFirstTime() throws TException { doReturn(notAcquiredLockResponse).when(spyClient).lock(any()); - AssertHelpers.assertThrows("Expected an exception", - CommitFailedException.class, - "Could not acquire the lock on", - () -> spyOps.doCommit(metadataV2, metadataV1)); + Assertions.assertThatThrownBy(() -> spyOps.doCommit(metadataV2, metadataV1)) + .isInstanceOf(CommitFailedException.class) + .hasMessage( + "org.apache.iceberg.hive.LockException: " + + "Could not acquire the lock on hivedb.tbl, lock request ended in state NOT_ACQUIRED"); } @Test @@ -178,10 +384,11 @@ public void testLockFailureAfterRetries() throws TException { .when(spyClient) .checkLock(eq(dummyLockId)); - AssertHelpers.assertThrows("Expected an exception", - CommitFailedException.class, - "Could not acquire the lock on", - () -> spyOps.doCommit(metadataV2, metadataV1)); + Assertions.assertThatThrownBy(() -> spyOps.doCommit(metadataV2, metadataV1)) + .isInstanceOf(CommitFailedException.class) + .hasMessage( + "org.apache.iceberg.hive.LockException: " + + "Could not acquire the lock on hivedb.tbl, lock request ended in state NOT_ACQUIRED"); } @Test @@ -189,10 +396,11 @@ public void testLockTimeoutAfterRetries() throws TException { doReturn(waitLockResponse).when(spyClient).lock(any()); doReturn(waitLockResponse).when(spyClient).checkLock(eq(dummyLockId)); - AssertHelpers.assertThrows("Expected an exception", - CommitFailedException.class, - "Timed out after", - () -> spyOps.doCommit(metadataV2, metadataV1)); + Assertions.assertThatThrownBy(() -> spyOps.doCommit(metadataV2, metadataV1)) + .isInstanceOf(CommitFailedException.class) + .hasMessageStartingWith("org.apache.iceberg.hive.LockException") + .hasMessageContaining("Timed out after") + .hasMessageEndingWith("waiting for lock on hivedb.tbl"); } @Test @@ -201,10 +409,10 @@ public void testPassThroughThriftExceptions() throws TException { doReturn(waitLockResponse).doThrow(new TException("Test Thrift Exception")) .when(spyClient).checkLock(eq(dummyLockId)); - AssertHelpers.assertThrows("Expected an exception", - RuntimeException.class, - "Metastore operation failed for", - () -> spyOps.doCommit(metadataV2, metadataV1)); + Assertions.assertThatThrownBy(() -> spyOps.doCommit(metadataV2, metadataV1)) + .isInstanceOf(RuntimeException.class) + .hasMessage( + "org.apache.iceberg.hive.LockException: Metastore operation failed for hivedb.tbl"); } @Test @@ -216,10 +424,11 @@ public void testPassThroughInterruptions() throws TException { return waitLockResponse; }).when(spyClient).checkLock(eq(dummyLockId)); - AssertHelpers.assertThrows("Expected an exception", - CommitFailedException.class, - "Could not acquire the lock on", - () -> spyOps.doCommit(metadataV2, metadataV1)); + Assertions.assertThatThrownBy(() -> spyOps.doCommit(metadataV2, metadataV1)) + .isInstanceOf(CommitFailedException.class) + .hasMessage( + "org.apache.iceberg.hive.LockException: " + + "Could not acquire the lock on hivedb.tbl, lock request ended in state WAITING"); } @Test @@ -248,4 +457,72 @@ public void testTableLevelProcessLockBlocksConcurrentHMSRequestsForSameTable() t // all threads eventually got their turn verify(spyClient, times(numConcurrentCommits)).lock(any(LockRequest.class)); } + + @Test + public void testLockHeartbeat() throws TException { + doReturn(acquiredLockResponse).when(spyClient).lock(any()); + doAnswer(AdditionalAnswers.answersWithDelay(2000, InvocationOnMock::callRealMethod)) + .when(spyClient) + .getTable(any(), any()); + doNothing().when(spyClient).heartbeat(eq(0L), eq(dummyLockId)); + + spyOps.doCommit(metadataV2, metadataV1); + + verify(spyClient, atLeastOnce()).heartbeat(eq(0L), eq(dummyLockId)); + } + + @Test + public void testLockHeartbeatFailureDuringCommit() throws TException, InterruptedException { + doReturn(acquiredLockResponse).when(spyClient).lock(any()); + doAnswer(AdditionalAnswers.answersWithDelay(2000, InvocationOnMock::callRealMethod)) + .when(spyOps) + .loadHmsTable(); + doThrow(new TException("Failed to heart beat.")) + .when(spyClient) + .heartbeat(eq(0L), eq(dummyLockId)); + + Assertions.assertThatThrownBy(() -> spyOps.doCommit(metadataV2, metadataV1)) + .isInstanceOf(CommitFailedException.class) + .hasMessage( + "org.apache.iceberg.hive.LockException: " + + "Failed to heartbeat for hive lock. Failed to heart beat."); + } + + @Test + public void testNoLockCallsWithNoLock() throws TException { + Configuration confWithLock = new Configuration(overriddenHiveConf); + confWithLock.setBoolean(ConfigProperties.LOCK_HIVE_ENABLED, false); + + HiveTableOperations noLockSpyOps = + spy( + new HiveTableOperations( + confWithLock, + spyCachedClientPool, + ops.io(), + catalog.name(), + TABLE_IDENTIFIER.namespace().level(0), + TABLE_IDENTIFIER.name())); + + ArgumentCaptor contextCaptor = + ArgumentCaptor.forClass(EnvironmentContext.class); + + doNothing() + .when(spyClient) + .alter_table_with_environmentContext(any(), any(), any(), contextCaptor.capture()); + + noLockSpyOps.doCommit(metadataV2, metadataV1); + + // Make sure that the locking is not used + verify(spyClient, never()).lock(any(LockRequest.class)); + verify(spyClient, never()).checkLock(any(Long.class)); + verify(spyClient, never()).heartbeat(any(Long.class), any(Long.class)); + verify(spyClient, never()).unlock(any(Long.class)); + + // Make sure that the expected parameter context values are set + Map context = contextCaptor.getValue().getProperties(); + Assert.assertEquals(3, context.size()); + Assert.assertEquals( + context.get("expected_parameter_key"), HiveTableOperations.METADATA_LOCATION_PROP); + Assert.assertEquals(context.get("expected_parameter_value"), metadataV2.metadataFileLocation()); + } } diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java index 1afe98d81b7e..60d4fa2f1bdf 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveCommits.java @@ -20,25 +20,26 @@ package org.apache.iceberg.hive; import java.io.File; -import java.net.UnknownHostException; import java.util.concurrent.atomic.AtomicReference; -import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.HasTableOperations; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Table; import org.apache.iceberg.TableMetadata; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.exceptions.CommitStateUnknownException; import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.types.Types; import org.apache.thrift.TException; +import org.assertj.core.api.Assertions; import org.junit.Assert; import org.junit.Test; import static org.mockito.Matchers.any; import static org.mockito.Matchers.anyBoolean; import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.when; @@ -46,7 +47,7 @@ public class TestHiveCommits extends HiveTableBaseTest { @Test - public void testSuppressUnlockExceptions() throws TException, InterruptedException, UnknownHostException { + public void testSuppressUnlockExceptions() { Table table = catalog.loadTable(TABLE_IDENTIFIER); HiveTableOperations ops = (HiveTableOperations) ((HasTableOperations) table).operations(); @@ -64,21 +65,22 @@ public void testSuppressUnlockExceptions() throws TException, InterruptedExcepti HiveTableOperations spyOps = spy(ops); - AtomicReference lockRef = new AtomicReference<>(); + AtomicReference lockRef = new AtomicReference<>(); - when(spyOps.createLock()).thenAnswer(i -> { - HiveCommitLock lock = (HiveCommitLock) i.callRealMethod(); - lockRef.set(lock); - return lock; - } - ); + when(spyOps.lockObject(metadataV1)) + .thenAnswer( + i -> { + HiveLock lock = (HiveLock) i.callRealMethod(); + lockRef.set(lock); + return lock; + }); try { spyOps.commit(metadataV2, metadataV1); - HiveCommitLock spyLock = spy(lockRef.get()); - doThrow(new RuntimeException()).when(spyLock).release(); + HiveLock spyLock = spy(lockRef.get()); + doThrow(new RuntimeException()).when(spyLock).unlock(); } finally { - ops.doUnlock(lockRef.get()); + lockRef.get().unlock(); } ops.refresh(); @@ -112,9 +114,9 @@ public void testThriftExceptionUnknownStateIfNotInHistoryFailureOnCommit() throw failCommitAndThrowException(spyOps); - AssertHelpers.assertThrows("We should assume commit state is unknown if the " + - "new location is not found in history in commit state check", CommitStateUnknownException.class, - "Datacenter on fire", () -> spyOps.commit(metadataV2, metadataV1)); + Assertions.assertThatThrownBy(() -> spyOps.commit(metadataV2, metadataV1)) + .isInstanceOf(CommitStateUnknownException.class) + .hasMessageStartingWith("Datacenter on fire"); ops.refresh(); Assert.assertEquals("Current metadata should not have changed", metadataV2, ops.current()); @@ -184,9 +186,9 @@ public void testThriftExceptionUnknownFailedCommit() throws TException, Interrup failCommitAndThrowException(spyOps); breakFallbackCatalogCommitCheck(spyOps); - AssertHelpers.assertThrows("Should throw CommitStateUnknownException since the catalog check was blocked", - CommitStateUnknownException.class, "Datacenter on fire", - () -> spyOps.commit(metadataV2, metadataV1)); + Assertions.assertThatThrownBy(() -> spyOps.commit(metadataV2, metadataV1)) + .isInstanceOf(CommitStateUnknownException.class) + .hasMessageStartingWith("Datacenter on fire"); ops.refresh(); @@ -222,9 +224,9 @@ public void testThriftExceptionsUnknownSuccessCommit() throws TException, Interr commitAndThrowException(ops, spyOps); breakFallbackCatalogCommitCheck(spyOps); - AssertHelpers.assertThrows("Should throw CommitStateUnknownException since the catalog check was blocked", - CommitStateUnknownException.class, "Datacenter on fire", - () -> spyOps.commit(metadataV2, metadataV1)); + Assertions.assertThatThrownBy(() -> spyOps.commit(metadataV2, metadataV1)) + .isInstanceOf(CommitStateUnknownException.class) + .hasMessageStartingWith("Datacenter on fire"); ops.refresh(); @@ -237,17 +239,21 @@ public void testThriftExceptionsUnknownSuccessCommit() throws TException, Interr * and a second committer placed a commit on top of ours before the first committer was able to check * if their commit succeeded or not * - * Timeline: - * Client 1 commits which throws an exception but suceeded - * Client 1's lock expires while waiting to do the recheck for commit success - * Client 2 acquires a lock, commits successfully on top of client 1's commit and release lock - * Client 1 check's to see if their commit was successful + *

Timeline: + * + *

    + *
  • Client 1 commits which throws an exception but succeeded + *
  • Client 1's lock expires while waiting to do the recheck for commit success + *
  • Client 2 acquires a lock, commits successfully on top of client 1's commit and release + * lock + *
  • Client 1 check's to see if their commit was successful + *
* * This tests to make sure a disconnected client 1 doesn't think their commit failed just because it isn't the * current one during the recheck phase. */ @Test - public void testThriftExceptionConcurrentCommit() throws TException, InterruptedException, UnknownHostException { + public void testThriftExceptionConcurrentCommit() throws TException, InterruptedException { Table table = catalog.loadTable(TABLE_IDENTIFIER); HiveTableOperations ops = (HiveTableOperations) ((HasTableOperations) table).operations(); @@ -265,11 +271,14 @@ public void testThriftExceptionConcurrentCommit() throws TException, Interrupted HiveTableOperations spyOps = spy(ops); - AtomicReference lock = new AtomicReference<>(); - doAnswer(l -> { - lock.set(ops.createLock()); - return lock.get(); - }).when(spyOps).createLock(); + AtomicReference lock = new AtomicReference<>(); + doAnswer( + l -> { + lock.set(ops.lockObject(metadataV1)); + return lock.get(); + }) + .when(spyOps) + .lockObject(metadataV1); concurrentCommitAndThrowException(ops, spyOps, table, lock); @@ -301,37 +310,87 @@ public void testAlreadyExistsException() { () -> catalog.createTable(TABLE_IDENTIFIER, schema, PartitionSpec.unpartitioned())); } - private void commitAndThrowException(HiveTableOperations realOperations, HiveTableOperations spyOperations) + /** Uses NoLock and pretends we throw an error because of a concurrent commit */ + @Test + public void testNoLockThriftExceptionConcurrentCommit() throws TException, InterruptedException { + Table table = catalog.loadTable(TABLE_IDENTIFIER); + HiveTableOperations ops = (HiveTableOperations) ((HasTableOperations) table).operations(); + + TableMetadata metadataV1 = ops.current(); + + table.updateSchema().addColumn("n", Types.IntegerType.get()).commit(); + + ops.refresh(); + + TableMetadata metadataV2 = ops.current(); + + Assert.assertEquals(2, ops.current().schema().columns().size()); + + HiveTableOperations spyOps = spy(ops); + + // Sets NoLock + doReturn(new NoLock()).when(spyOps).lockObject(any()); + + // Simulate a concurrent table modification error + doThrow( + new RuntimeException( + "MetaException(message:The table has been modified. " + + "The parameter value for key 'metadata_location' is")) + .when(spyOps) + .persistTable(any(), anyBoolean(), any()); + + // Should throw a CommitFailedException so the commit could be retried + Assertions.assertThatThrownBy(() -> spyOps.commit(metadataV2, metadataV1)) + .isInstanceOf(CommitFailedException.class) + .hasMessage("The table hivedb.tbl has been modified concurrently"); + + ops.refresh(); + Assert.assertEquals("Current metadata should not have changed", metadataV2, ops.current()); + Assert.assertTrue("Current metadata should still exist", metadataFileExists(metadataV2)); + Assert.assertEquals("New metadata files should not exist", 2, metadataFileCount(ops.current())); + } + + private void commitAndThrowException( + HiveTableOperations realOperations, HiveTableOperations spyOperations) throws TException, InterruptedException { // Simulate a communication error after a successful commit doAnswer(i -> { org.apache.hadoop.hive.metastore.api.Table tbl = i.getArgument(0, org.apache.hadoop.hive.metastore.api.Table.class); - realOperations.persistTable(tbl, true); + String location = i.getArgument(2, String.class); + realOperations.persistTable(tbl, true, location); throw new TException("Datacenter on fire"); - }).when(spyOperations).persistTable(any(), anyBoolean()); + }) + .when(spyOperations) + .persistTable(any(), anyBoolean(), any()); } - private void concurrentCommitAndThrowException(HiveTableOperations realOperations, HiveTableOperations spyOperations, - Table table, AtomicReference lockId) + private void concurrentCommitAndThrowException( + HiveTableOperations realOperations, + HiveTableOperations spyOperations, + Table table, + AtomicReference lock) throws TException, InterruptedException { // Simulate a communication error after a successful commit doAnswer(i -> { org.apache.hadoop.hive.metastore.api.Table tbl = i.getArgument(0, org.apache.hadoop.hive.metastore.api.Table.class); - realOperations.persistTable(tbl, true); + String location = i.getArgument(2, String.class); + realOperations.persistTable(tbl, true, location); // Simulate lock expiration or removal - realOperations.doUnlock(lockId.get()); + lock.get().unlock(); table.refresh(); table.updateSchema().addColumn("newCol", Types.IntegerType.get()).commit(); throw new TException("Datacenter on fire"); - }).when(spyOperations).persistTable(any(), anyBoolean()); + }) + .when(spyOperations) + .persistTable(any(), anyBoolean(), any()); } private void failCommitAndThrowException(HiveTableOperations spyOperations) throws TException, InterruptedException { doThrow(new TException("Datacenter on fire")) .when(spyOperations) - .persistTable(any(), anyBoolean()); + .persistTable(any(), anyBoolean(), any()); } private void breakFallbackCatalogCommitCheck(HiveTableOperations spyOperations) { diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveSchemaUtil.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveSchemaUtil.java index a23e1f30ac12..8c0ef6b5adbb 100644 --- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveSchemaUtil.java +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveSchemaUtil.java @@ -26,12 +26,12 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.Schema; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; +import org.assertj.core.api.Assertions; import org.junit.Assert; import org.junit.Test; @@ -118,11 +118,10 @@ public void testSchemaConvertToIcebergSchemaForEveryPrimitiveType() { @Test public void testNotSupportedTypes() { for (FieldSchema notSupportedField : getNotSupportedFieldSchemas()) { - AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, - "Unsupported Hive type", () -> { - HiveSchemaUtil.convert(Lists.newArrayList(Arrays.asList(notSupportedField))); - } - ); + Assertions.assertThatThrownBy( + () -> HiveSchemaUtil.convert(Lists.newArrayList(Arrays.asList(notSupportedField)))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageStartingWith("Unsupported Hive type"); } } diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestLoadHiveCatalog.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestLoadHiveCatalog.java new file mode 100644 index 000000000000..7311432a54fc --- /dev/null +++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestLoadHiveCatalog.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.hive; + +import java.util.Collections; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TestLoadHiveCatalog { + + private static TestHiveMetastore metastore; + + @BeforeClass + public static void startMetastore() throws Exception { + HiveConf hiveConf = new HiveConf(TestLoadHiveCatalog.class); + metastore = new TestHiveMetastore(); + metastore.start(hiveConf); + } + + @AfterClass + public static void stopMetastore() throws Exception { + if (metastore != null) { + metastore.stop(); + metastore = null; + } + } + + @Test + public void testCustomCacheKeys() throws Exception { + HiveCatalog hiveCatalog1 = + (HiveCatalog) + CatalogUtil.loadCatalog( + HiveCatalog.class.getName(), + CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE, + Collections.emptyMap(), + metastore.hiveConf()); + HiveCatalog hiveCatalog2 = + (HiveCatalog) + CatalogUtil.loadCatalog( + HiveCatalog.class.getName(), + CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE, + Collections.emptyMap(), + metastore.hiveConf()); + + CachedClientPool clientPool1 = (CachedClientPool) hiveCatalog1.clientPool(); + CachedClientPool clientPool2 = (CachedClientPool) hiveCatalog2.clientPool(); + Assert.assertSame(clientPool1.clientPool(), clientPool2.clientPool()); + + Configuration conf1 = new Configuration(metastore.hiveConf()); + Configuration conf2 = new Configuration(metastore.hiveConf()); + conf1.set("any.key", "any.value"); + conf2.set("any.key", "any.value"); + hiveCatalog1 = + (HiveCatalog) + CatalogUtil.loadCatalog( + HiveCatalog.class.getName(), + CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE, + ImmutableMap.of(CatalogProperties.CLIENT_POOL_CACHE_KEYS, "conf:any.key"), + conf1); + hiveCatalog2 = + (HiveCatalog) + CatalogUtil.loadCatalog( + HiveCatalog.class.getName(), + CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE, + ImmutableMap.of(CatalogProperties.CLIENT_POOL_CACHE_KEYS, "conf:any.key"), + conf2); + clientPool1 = (CachedClientPool) hiveCatalog1.clientPool(); + clientPool2 = (CachedClientPool) hiveCatalog2.clientPool(); + Assert.assertSame(clientPool1.clientPool(), clientPool2.clientPool()); + + conf2.set("any.key", "any.value2"); + hiveCatalog2 = + (HiveCatalog) + CatalogUtil.loadCatalog( + HiveCatalog.class.getName(), + CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE, + ImmutableMap.of(CatalogProperties.CLIENT_POOL_CACHE_KEYS, "conf:any.key"), + conf2); + clientPool2 = (CachedClientPool) hiveCatalog2.clientPool(); + Assert.assertNotSame(clientPool1.clientPool(), clientPool2.clientPool()); + } +} diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java index d3bc13baedc3..d422885becda 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/Catalogs.java @@ -40,27 +40,24 @@ /** * Class for catalog resolution and accessing the common functions for {@link Catalog} API. - *

- * If the catalog name is provided, get the catalog type from iceberg.catalog.catalogName.type config. - *

- * In case the catalog name is {@link #ICEBERG_HADOOP_TABLE_NAME location_based_table}, - * type is ignored and tables will be loaded using {@link HadoopTables}. - *

- * In case the value of catalog type is null, iceberg.catalog.catalogName.catalog-impl config - * is used to determine the catalog implementation class. - *

- * If catalog name is null, get the catalog type from {@link InputFormatConfig#CATALOG iceberg.mr.catalog} config: + * + *

If the catalog name is provided, get the catalog type from iceberg.catalog.catalogName + * .type config. + * + *

In case the catalog name is {@link #ICEBERG_HADOOP_TABLE_NAME location_based_table}, type is + * ignored and tables will be loaded using {@link HadoopTables}. + * + *

In case the value of catalog type is null, iceberg.catalog.catalogName + * .catalog-impl config is used to determine the catalog implementation class. + * + *

If catalog name is null, get the catalog type from {@link CatalogUtil#ICEBERG_CATALOG_TYPE + * catalog type} config: + * *

    *
  • hive: HiveCatalog
  • *
  • location: HadoopTables
  • *
  • hadoop: HadoopCatalog
  • *
- *

- * In case the value of catalog type is null, - * {@link InputFormatConfig#CATALOG_LOADER_CLASS iceberg.mr.catalog.loader.class} is used to determine - * the catalog implementation class. - *

- * Note: null catalog name mode is only supported for backwards compatibility. Using this mode is NOT RECOMMENDED. */ public final class Catalogs { @@ -254,47 +251,22 @@ static Optional loadCatalog(Configuration conf, String catalogName) { * @param catalogType type of the catalog * @return complete map of catalog properties */ - private static Map getCatalogProperties(Configuration conf, String catalogName, String catalogType) { + private static Map getCatalogProperties( + Configuration conf, String catalogName, String catalogType) { Map catalogProperties = Maps.newHashMap(); + String keyPrefix = InputFormatConfig.CATALOG_CONFIG_PREFIX + catalogName; conf.forEach(config -> { if (config.getKey().startsWith(InputFormatConfig.CATALOG_DEFAULT_CONFIG_PREFIX)) { catalogProperties.putIfAbsent( - config.getKey().substring(InputFormatConfig.CATALOG_DEFAULT_CONFIG_PREFIX.length()), - config.getValue()); - } else if (config.getKey().startsWith(InputFormatConfig.CATALOG_CONFIG_PREFIX + catalogName)) { + config.getKey().substring(InputFormatConfig.CATALOG_DEFAULT_CONFIG_PREFIX.length()), + config.getValue()); + } else if (config.getKey().startsWith(keyPrefix)) { catalogProperties.put( - config.getKey().substring((InputFormatConfig.CATALOG_CONFIG_PREFIX + catalogName).length() + 1), - config.getValue()); + config.getKey().substring(keyPrefix.length() + 1), + config.getValue()); } }); - return addCatalogPropertiesIfMissing(conf, catalogType, catalogProperties); - } - - /** - * This method is used for backward-compatible catalog configuration. - * Collect all the catalog specific configuration from the global hive configuration. - * Note: this should be removed when the old catalog configuration is depracated. - * @param conf global hive configuration - * @param catalogType type of the catalog - * @param catalogProperties pre-populated catalog properties - * @return complete map of catalog properties - */ - private static Map addCatalogPropertiesIfMissing(Configuration conf, String catalogType, - Map catalogProperties) { - if (catalogType != null) { - catalogProperties.putIfAbsent(CatalogUtil.ICEBERG_CATALOG_TYPE, catalogType); - } - - String legacyCatalogImpl = conf.get(InputFormatConfig.CATALOG_LOADER_CLASS); - if (legacyCatalogImpl != null) { - catalogProperties.putIfAbsent(CatalogProperties.CATALOG_IMPL, legacyCatalogImpl); - } - - String legacyWarehouseLocation = conf.get(InputFormatConfig.HADOOP_CATALOG_WAREHOUSE_LOCATION); - if (legacyWarehouseLocation != null) { - catalogProperties.putIfAbsent(CatalogProperties.WAREHOUSE_LOCATION, legacyWarehouseLocation); - } return catalogProperties; } @@ -317,7 +289,7 @@ private static String getCatalogType(Configuration conf, String catalogName) { return catalogType; } } else { - String catalogType = conf.get(InputFormatConfig.CATALOG); + String catalogType = conf.get(CatalogUtil.ICEBERG_CATALOG_TYPE); if (catalogType != null && catalogType.equals(LOCATION)) { return NO_CATALOG_TYPE; } else { diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java index d1bfde2f7f8b..cf3450840a83 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java @@ -53,29 +53,6 @@ private InputFormatConfig() { public static final String TABLE_CATALOG_PREFIX = "iceberg.mr.table.catalog."; public static final String LOCALITY = "iceberg.mr.locality"; - /** - * @deprecated please use {@link #catalogPropertyConfigKey(String, String)} - * with config key {@link org.apache.iceberg.CatalogUtil#ICEBERG_CATALOG_TYPE} to specify the type of a catalog. - */ - @Deprecated - public static final String CATALOG = "iceberg.mr.catalog"; - - /** - * @deprecated please use {@link #catalogPropertyConfigKey(String, String)} - * with config key {@link org.apache.iceberg.CatalogProperties#WAREHOUSE_LOCATION} - * to specify the warehouse location of a catalog. - */ - @Deprecated - public static final String HADOOP_CATALOG_WAREHOUSE_LOCATION = "iceberg.mr.catalog.hadoop.warehouse.location"; - - /** - * @deprecated please use {@link #catalogPropertyConfigKey(String, String)} - * with config key {@link org.apache.iceberg.CatalogProperties#CATALOG_IMPL} - * to specify the implementation of a catalog. - */ - @Deprecated - public static final String CATALOG_LOADER_CLASS = "iceberg.mr.catalog.loader.class"; - public static final String CTAS_TABLE_NAME = "iceberg.mr.ctas.table.name"; public static final String SELECTED_COLUMNS = "iceberg.mr.selected.columns"; public static final String FETCH_VIRTUAL_COLUMNS = "iceberg.mr.fetch.virtual.columns"; diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergInputFormat.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergInputFormat.java index fa92b638c660..66883b02e5cb 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergInputFormat.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergInputFormat.java @@ -50,7 +50,7 @@ import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.expressions.ResidualEvaluator; -import org.apache.iceberg.hive.MetastoreUtil; +import org.apache.iceberg.hive.HiveVersion; import org.apache.iceberg.mr.InputFormatConfig; import org.apache.iceberg.mr.mapred.AbstractMapredIcebergRecordReader; import org.apache.iceberg.mr.mapred.Container; @@ -74,7 +74,7 @@ public class HiveIcebergInputFormat extends MapredIcebergInputFormat public static final String ICEBERG_DISABLE_VECTORIZATION_PREFIX = "iceberg.disable.vectorization."; static { - if (MetastoreUtil.hive3PresentOnClasspath()) { + if (HiveVersion.min(HiveVersion.HIVE_3)) { HIVE_VECTORIZED_RECORDREADER_CTOR = DynConstructors.builder(AbstractMapredIcebergRecordReader.class) .impl(HIVE_VECTORIZED_RECORDREADER_CLASS, IcebergInputFormat.class, @@ -159,7 +159,7 @@ public RecordReader> getRecordReader(InputSplit split, J job.getBoolean(ColumnProjectionUtils.FETCH_VIRTUAL_COLUMNS_CONF_STR, false)); if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) && Utilities.getIsVectorized(job)) { - Preconditions.checkArgument(MetastoreUtil.hive3PresentOnClasspath(), "Vectorization only supported for Hive 3+"); + Preconditions.checkArgument(HiveVersion.min(HiveVersion.HIVE_3), "Vectorization only supported for Hive 3+"); job.setEnum(InputFormatConfig.IN_MEMORY_DATA_MODEL, InputFormatConfig.InMemoryDataModel.HIVE); job.setBoolean(InputFormatConfig.SKIP_RESIDUAL_FILTERING, true); diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java index 694c54cf13a6..d946531d58f4 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java @@ -83,9 +83,9 @@ import org.apache.iceberg.exceptions.NotFoundException; import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.hive.CachedClientPool; -import org.apache.iceberg.hive.HiveCommitLock; import org.apache.iceberg.hive.HiveSchemaUtil; import org.apache.iceberg.hive.HiveTableOperations; +import org.apache.iceberg.hive.MetastoreLock; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.mapping.MappingUtil; import org.apache.iceberg.mapping.NameMapping; @@ -147,7 +147,7 @@ public class HiveIcebergMetaHook implements HiveMetaHook { private Transaction transaction; private AlterTableType currentAlterTableOp; private boolean createHMSTableInHook = false; - private HiveCommitLock commitLock; + private MetastoreLock commitLock; private enum FileFormat { ORC("orc"), PARQUET("parquet"), AVRO("avro"); @@ -324,15 +324,15 @@ public void preAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable, E context.getProperties().get(OLD_TABLE_NAME)).toString()); } if (commitLock == null) { - commitLock = new HiveCommitLock(conf, new CachedClientPool(conf, Maps.fromProperties(catalogProperties)), + commitLock = new MetastoreLock(conf, new CachedClientPool(conf, Maps.fromProperties(catalogProperties)), catalogProperties.getProperty(Catalogs.NAME), hmsTable.getDbName(), hmsTable.getTableName()); } try { - commitLock.acquire(); + commitLock.lock(); doPreAlterTable(hmsTable, context); } catch (Exception e) { - commitLock.release(); + commitLock.unlock(); throw new MetaException(StringUtils.stringifyException(e)); } } @@ -526,7 +526,7 @@ public void commitAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable if (commitLock == null) { throw new IllegalStateException("Hive commit lock should already be set"); } - commitLock.release(); + commitLock.unlock(); if (isTableMigration) { catalogProperties = getCatalogProperties(hmsTable); catalogProperties.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(preAlterTableProperties.schema)); @@ -566,7 +566,7 @@ public void rollbackAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTab if (commitLock == null) { throw new IllegalStateException("Hive commit lock should already be set"); } - commitLock.release(); + commitLock.unlock(); if (Boolean.parseBoolean(context.getProperties().getOrDefault(MIGRATE_HIVE_TO_ICEBERG, "false"))) { LOG.debug("Initiating rollback for table {} at location {}", hmsTable.getTableName(), hmsTable.getSd().getLocation()); diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index 7ec03084b53a..18204a90c48e 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -168,7 +168,7 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H private static final Splitter TABLE_NAME_SPLITTER = Splitter.on(".."); private static final String TABLE_NAME_SEPARATOR = ".."; // Column index for partition metadata table - private static final int SPEC_IDX = 3; + private static final int SPEC_IDX = 1; private static final int PART_IDX = 0; public static final String COPY_ON_WRITE = "copy-on-write"; public static final String MERGE_ON_READ = "merge-on-read"; diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java index 039950213f92..625f8f65d296 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java @@ -28,7 +28,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.iceberg.Schema; import org.apache.iceberg.common.DynMethods; -import org.apache.iceberg.hive.MetastoreUtil; +import org.apache.iceberg.hive.HiveVersion; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; @@ -36,23 +36,27 @@ public final class IcebergObjectInspector extends TypeUtil.SchemaVisitor { // get the correct inspectors depending on whether we're working with Hive2 or Hive3 dependencies - // we need to do this because there is a breaking API change in Date/TimestampObjectInspector between Hive2 and Hive3 - private static final String DATE_INSPECTOR_CLASS = MetastoreUtil.hive3PresentOnClasspath() ? - "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergDateObjectInspectorHive3" : - "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergDateObjectInspector"; + // we need to do this because there is a breaking API change in Date/TimestampObjectInspector + // between Hive2 and Hive3 + private static final String DATE_INSPECTOR_CLASS = + HiveVersion.min(HiveVersion.HIVE_3) ? + "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergDateObjectInspectorHive3" : + "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergDateObjectInspector"; public static final ObjectInspector DATE_INSPECTOR = DynMethods.builder("get") .impl(DATE_INSPECTOR_CLASS) .buildStatic() .invoke(); - private static final String TIMESTAMP_INSPECTOR_CLASS = MetastoreUtil.hive3PresentOnClasspath() ? - "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergTimestampObjectInspectorHive3" : - "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergTimestampObjectInspector"; + private static final String TIMESTAMP_INSPECTOR_CLASS = + HiveVersion.min(HiveVersion.HIVE_3) ? + "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergTimestampObjectInspectorHive3" : + "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergTimestampObjectInspector"; - private static final String TIMESTAMPTZ_INSPECTOR_CLASS = MetastoreUtil.hive3PresentOnClasspath() ? - "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergTimestampWithZoneObjectInspectorHive3" : - "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergTimestampWithZoneObjectInspector"; + private static final String TIMESTAMPTZ_INSPECTOR_CLASS = + HiveVersion.min(HiveVersion.HIVE_3) ? + "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergTimestampWithZoneObjectInspectorHive3" : + "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergTimestampWithZoneObjectInspector"; public static final ObjectInspector TIMESTAMP_INSPECTOR = DynMethods.builder("get") .impl(TIMESTAMP_INSPECTOR_CLASS) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergInputFormat.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergInputFormat.java index 46c1c23dc07e..af62c0514e2d 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergInputFormat.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergInputFormat.java @@ -76,7 +76,7 @@ import org.apache.iceberg.expressions.Evaluator; import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.expressions.Expressions; -import org.apache.iceberg.hive.MetastoreUtil; +import org.apache.iceberg.hive.HiveVersion; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.CloseableIterator; import org.apache.iceberg.io.InputFile; @@ -261,7 +261,7 @@ private static final class IcebergRecordReader extends RecordReader private static final DynMethods.StaticMethod HIVE_VECTORIZED_READER_BUILDER; static { - if (MetastoreUtil.hive3PresentOnClasspath()) { + if (HiveVersion.min(HiveVersion.HIVE_3)) { HIVE_VECTORIZED_READER_BUILDER = DynMethods.builder("reader") .impl(HIVE_VECTORIZED_READER_CLASS, Table.class, @@ -363,7 +363,7 @@ private CloseableIterable openVectorized(FileScanTask task, Schema readSchema Preconditions.checkArgument(!task.file().format().equals(FileFormat.AVRO), "Vectorized execution is not yet supported for Iceberg avro tables. " + "Please turn off vectorization and retry the query."); - Preconditions.checkArgument(MetastoreUtil.hive3PresentOnClasspath(), + Preconditions.checkArgument(HiveVersion.min(HiveVersion.HIVE_3), "Vectorized read is unsupported for Hive 2 integration."); Path path = new Path(task.file().path().toString()); diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestCatalogs.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestCatalogs.java index e9f0bd24de4f..2bd1f70c9809 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestCatalogs.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestCatalogs.java @@ -24,7 +24,6 @@ import java.util.Optional; import java.util.Properties; import org.apache.hadoop.conf.Configuration; -import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.CatalogProperties; import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.PartitionSpec; @@ -65,10 +64,11 @@ public void before() { @Test public void testLoadTableFromLocation() throws IOException { - conf.set(InputFormatConfig.CATALOG, Catalogs.LOCATION); - AssertHelpers.assertThrows( - "Should complain about table location not set", IllegalArgumentException.class, - "location not set", () -> Catalogs.loadTable(conf)); + conf.set(CatalogUtil.ICEBERG_CATALOG_TYPE, Catalogs.LOCATION); + + Assertions.assertThatThrownBy(() -> Catalogs.loadTable(conf)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Table location not set"); HadoopTables tables = new HadoopTables(); Table hadoopTable = tables.create(SCHEMA, temp.newFolder("hadoop_tables").toString()); @@ -84,9 +84,9 @@ public void testLoadTableFromCatalog() throws IOException { String warehouseLocation = temp.newFolder("hadoop", "warehouse").toString(); setCustomCatalogProperties(defaultCatalogName, warehouseLocation); - AssertHelpers.assertThrows( - "Should complain about table identifier not set", IllegalArgumentException.class, - "identifier not set", () -> Catalogs.loadTable(conf)); + Assertions.assertThatThrownBy(() -> Catalogs.loadTable(conf)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Table identifier not set"); HadoopCatalog catalog = new CustomHadoopCatalog(conf, warehouseLocation); Table hadoopCatalogTable = catalog.createTable(TableIdentifier.of("table"), SCHEMA); @@ -100,16 +100,18 @@ public void testLoadTableFromCatalog() throws IOException { public void testCreateDropTableToLocation() throws IOException { Properties missingSchema = new Properties(); missingSchema.put("location", temp.newFolder("hadoop_tables").toString()); - AssertHelpers.assertThrows( - "Should complain about table schema not set", NullPointerException.class, - "schema not set", () -> Catalogs.createTable(conf, missingSchema)); - conf.set(InputFormatConfig.CATALOG, Catalogs.LOCATION); + Assertions.assertThatThrownBy(() -> Catalogs.createTable(conf, missingSchema)) + .isInstanceOf(NullPointerException.class) + .hasMessage("Table schema not set"); + + conf.set(CatalogUtil.ICEBERG_CATALOG_TYPE, Catalogs.LOCATION); Properties missingLocation = new Properties(); missingLocation.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(SCHEMA)); - AssertHelpers.assertThrows( - "Should complain about table location not set", NullPointerException.class, - "location not set", () -> Catalogs.createTable(conf, missingLocation)); + + Assertions.assertThatThrownBy(() -> Catalogs.createTable(conf, missingLocation)) + .isInstanceOf(NullPointerException.class) + .hasMessage("Table location not set"); Properties properties = new Properties(); properties.put("location", temp.getRoot() + "/hadoop_tables"); @@ -127,17 +129,17 @@ public void testCreateDropTableToLocation() throws IOException { Assert.assertEquals(PartitionSpecParser.toJson(SPEC), PartitionSpecParser.toJson(table.spec())); Assert.assertEquals(Collections.singletonMap("dummy", "test"), table.properties()); - AssertHelpers.assertThrows( - "Should complain about table location not set", NullPointerException.class, - "location not set", () -> Catalogs.dropTable(conf, new Properties())); + Assertions.assertThatThrownBy(() -> Catalogs.dropTable(conf, new Properties())) + .isInstanceOf(NullPointerException.class) + .hasMessage("Table location not set"); Properties dropProperties = new Properties(); dropProperties.put("location", temp.getRoot() + "/hadoop_tables"); Catalogs.dropTable(conf, dropProperties); - AssertHelpers.assertThrows( - "Should complain about table not found", NoSuchTableException.class, - "Table does not exist", () -> Catalogs.loadTable(conf, dropProperties)); + Assertions.assertThatThrownBy(() -> Catalogs.loadTable(conf, dropProperties)) + .isInstanceOf(NoSuchTableException.class) + .hasMessage("Table does not exist at location: " + properties.getProperty("location")); } @Test @@ -151,16 +153,17 @@ public void testCreateDropTableToCatalog() throws IOException { Properties missingSchema = new Properties(); missingSchema.put("name", identifier.toString()); missingSchema.put(InputFormatConfig.CATALOG_NAME, defaultCatalogName); - AssertHelpers.assertThrows( - "Should complain about table schema not set", NullPointerException.class, - "schema not set", () -> Catalogs.createTable(conf, missingSchema)); + + Assertions.assertThatThrownBy(() -> Catalogs.createTable(conf, missingSchema)) + .isInstanceOf(NullPointerException.class) + .hasMessage("Table schema not set"); Properties missingIdentifier = new Properties(); missingIdentifier.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(SCHEMA)); missingIdentifier.put(InputFormatConfig.CATALOG_NAME, defaultCatalogName); - AssertHelpers.assertThrows( - "Should complain about table identifier not set", NullPointerException.class, - "identifier not set", () -> Catalogs.createTable(conf, missingIdentifier)); + Assertions.assertThatThrownBy(() -> Catalogs.createTable(conf, missingIdentifier)) + .isInstanceOf(NullPointerException.class) + .hasMessage("Table identifier not set"); Properties properties = new Properties(); properties.put("name", identifier.toString()); @@ -178,69 +181,18 @@ public void testCreateDropTableToCatalog() throws IOException { Assert.assertEquals(PartitionSpecParser.toJson(SPEC), PartitionSpecParser.toJson(table.spec())); Assert.assertEquals(Collections.singletonMap("dummy", "test"), table.properties()); - AssertHelpers.assertThrows( - "Should complain about table identifier not set", NullPointerException.class, - "identifier not set", () -> Catalogs.dropTable(conf, new Properties())); + Assertions.assertThatThrownBy(() -> Catalogs.dropTable(conf, new Properties())) + .isInstanceOf(NullPointerException.class) + .hasMessage("Table identifier not set"); Properties dropProperties = new Properties(); dropProperties.put("name", identifier.toString()); dropProperties.put(InputFormatConfig.CATALOG_NAME, defaultCatalogName); Catalogs.dropTable(conf, dropProperties); - AssertHelpers.assertThrows( - "Should complain about table not found", NoSuchTableException.class, - "Table does not exist", () -> Catalogs.loadTable(conf, dropProperties)); - } - - @Test - public void testLegacyLoadCatalogDefault() { - Optional defaultCatalog = Catalogs.loadCatalog(conf, null); - Assert.assertTrue(defaultCatalog.isPresent()); - Assertions.assertThat(defaultCatalog.get()).isInstanceOf(HiveCatalog.class); - Assert.assertTrue(Catalogs.hiveCatalog(conf, new Properties())); - } - - @Test - public void testLegacyLoadCatalogHive() { - conf.set(InputFormatConfig.CATALOG, CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE); - Optional hiveCatalog = Catalogs.loadCatalog(conf, null); - Assert.assertTrue(hiveCatalog.isPresent()); - Assertions.assertThat(hiveCatalog.get()).isInstanceOf(HiveCatalog.class); - Assert.assertTrue(Catalogs.hiveCatalog(conf, new Properties())); - } - - @Test - public void testLegacyLoadCatalogHadoop() { - conf.set(InputFormatConfig.CATALOG, CatalogUtil.ICEBERG_CATALOG_TYPE_HADOOP); - conf.set(InputFormatConfig.HADOOP_CATALOG_WAREHOUSE_LOCATION, "/tmp/mylocation"); - Optional hadoopCatalog = Catalogs.loadCatalog(conf, null); - Assert.assertTrue(hadoopCatalog.isPresent()); - Assertions.assertThat(hadoopCatalog.get()).isInstanceOf(HadoopCatalog.class); - Assert.assertFalse(Catalogs.hiveCatalog(conf, new Properties())); - } - - @Test - public void testLegacyLoadCatalogCustom() { - conf.set(InputFormatConfig.CATALOG_LOADER_CLASS, CustomHadoopCatalog.class.getName()); - conf.set(InputFormatConfig.HADOOP_CATALOG_WAREHOUSE_LOCATION, "/tmp/mylocation"); - Optional customHadoopCatalog = Catalogs.loadCatalog(conf, null); - Assert.assertTrue(customHadoopCatalog.isPresent()); - Assertions.assertThat(customHadoopCatalog.get()).isInstanceOf(CustomHadoopCatalog.class); - Assert.assertFalse(Catalogs.hiveCatalog(conf, new Properties())); - } - - @Test - public void testLegacyLoadCatalogLocation() { - conf.set(InputFormatConfig.CATALOG, Catalogs.LOCATION); - Assert.assertFalse(Catalogs.loadCatalog(conf, null).isPresent()); - } - - @Test - public void testLegacyLoadCatalogUnknown() { - conf.set(InputFormatConfig.CATALOG, "fooType"); - AssertHelpers.assertThrows( - "should complain about catalog not supported", UnsupportedOperationException.class, - "Unknown catalog type", () -> Catalogs.loadCatalog(conf, null)); + Assertions.assertThatThrownBy(() -> Catalogs.loadTable(conf, dropProperties)) + .isInstanceOf(NoSuchTableException.class) + .hasMessage("Table does not exist: test.table"); } @Test @@ -267,17 +219,6 @@ public void testLoadCatalogHive() { Assert.assertTrue(Catalogs.hiveCatalog(conf, properties)); } - @Test - public void testLegacyLoadCustomCatalogWithHiveCatalogTypeSet() { - String catalogName = "barCatalog"; - conf.set(InputFormatConfig.catalogPropertyConfigKey(catalogName, CatalogUtil.ICEBERG_CATALOG_TYPE), - CatalogUtil.ICEBERG_CATALOG_TYPE_HIVE); - conf.set(InputFormatConfig.CATALOG_LOADER_CLASS, CustomHadoopCatalog.class.getName()); - conf.set(InputFormatConfig.HADOOP_CATALOG_WAREHOUSE_LOCATION, "/tmp/mylocation"); - AssertHelpers.assertThrows("Should complain about both configs being set", IllegalArgumentException.class, - "both type and catalog-impl are set", () -> Catalogs.loadCatalog(conf, catalogName)); - } - @Test public void testLoadCatalogHadoop() { String catalogName = "barCatalog"; @@ -294,21 +235,6 @@ public void testLoadCatalogHadoop() { Assert.assertFalse(Catalogs.hiveCatalog(conf, properties)); } - @Test - public void testLoadCatalogHadoopWithLegacyWarehouseLocation() { - String catalogName = "barCatalog"; - conf.set(InputFormatConfig.catalogPropertyConfigKey(catalogName, CatalogUtil.ICEBERG_CATALOG_TYPE), - CatalogUtil.ICEBERG_CATALOG_TYPE_HADOOP); - conf.set(InputFormatConfig.HADOOP_CATALOG_WAREHOUSE_LOCATION, "/tmp/mylocation"); - Optional hadoopCatalog = Catalogs.loadCatalog(conf, catalogName); - Assert.assertTrue(hadoopCatalog.isPresent()); - Assertions.assertThat(hadoopCatalog.get()).isInstanceOf(HadoopCatalog.class); - Assert.assertEquals("HadoopCatalog{name=barCatalog, location=/tmp/mylocation}", hadoopCatalog.get().toString()); - Properties properties = new Properties(); - properties.put(InputFormatConfig.CATALOG_NAME, catalogName); - Assert.assertFalse(Catalogs.hiveCatalog(conf, properties)); - } - @Test public void testLoadCatalogCustom() { String catalogName = "barCatalog"; @@ -333,9 +259,10 @@ public void testLoadCatalogLocation() { public void testLoadCatalogUnknown() { String catalogName = "barCatalog"; conf.set(InputFormatConfig.catalogPropertyConfigKey(catalogName, CatalogUtil.ICEBERG_CATALOG_TYPE), "fooType"); - AssertHelpers.assertThrows( - "should complain about catalog not supported", UnsupportedOperationException.class, - "Unknown catalog type:", () -> Catalogs.loadCatalog(conf, catalogName)); + + Assertions.assertThatThrownBy(() -> Catalogs.loadCatalog(conf, catalogName)) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessage("Unknown catalog type: fooType"); } @Test @@ -347,16 +274,18 @@ public void testDefaultCatalogProperties() { HiveCatalog defaultCatalog = (HiveCatalog) Catalogs.loadCatalog(conf, null).get(); Assert.assertEquals("true", defaultCatalog.properties().get(catalogProperty)); Assert.assertEquals("true", - defaultCatalog.newTableOps(TableIdentifier.of("default", "iceberg")).io().properties().get(catalogProperty)); + defaultCatalog.newTableOps(TableIdentifier.of("default", "iceberg")) + .io().properties().get(catalogProperty)); // set property at catalog level, and that should take precedence over the global property. conf.setBoolean( - String.format("%s%s.%s", InputFormatConfig.CATALOG_CONFIG_PREFIX, Catalogs.ICEBERG_DEFAULT_CATALOG_NAME, - catalogProperty), false); + String.format("%s%s.%s", InputFormatConfig.CATALOG_CONFIG_PREFIX, Catalogs.ICEBERG_DEFAULT_CATALOG_NAME, + catalogProperty), false); defaultCatalog = (HiveCatalog) Catalogs.loadCatalog(conf, null).get(); Assert.assertEquals("false", defaultCatalog.properties().get(catalogProperty)); Assert.assertEquals("false", - defaultCatalog.newTableOps(TableIdentifier.of("default", "iceberg")).io().properties().get(catalogProperty)); + defaultCatalog.newTableOps(TableIdentifier.of("default", "iceberg")) + .io().properties().get(catalogProperty)); } public static class CustomHadoopCatalog extends HadoopCatalog { diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java index 0f749f59b249..dfdb27637fca 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java @@ -64,8 +64,11 @@ import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; +import org.assertj.core.api.Assertions; import org.junit.Assert; +import org.junit.Assume; import org.junit.Before; +import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; @@ -110,7 +113,7 @@ public class TestIcebergInputFormats { @Before public void before() throws IOException { conf = new Configuration(); - conf.set(InputFormatConfig.CATALOG, Catalogs.LOCATION); + conf.set(CatalogUtil.ICEBERG_CATALOG_TYPE, Catalogs.LOCATION); HadoopTables tables = new HadoopTables(conf); File location = temp.newFolder(testInputFormat.name(), fileFormat.name()); @@ -202,6 +205,42 @@ public void testResiduals() throws Exception { testInputFormat.create(builder.conf()).validate(writeRecords); } + @Test + @Ignore + // This test is ignored because for ARVO, the vectorized IcebergInputFormat.IcebergRecordReader doesn't support AVRO + // and for ORC and PARQUET, IcebergInputFormat class ignores residuals + // '... scan.filter(filter).ignoreResiduals()' and it is not compatible with this test + public void testFailedResidualFiltering() throws Exception { + Assume.assumeTrue("Vectorization is not yet supported for AVRO", this.fileFormat != FileFormat.AVRO); + + helper.createTable(); + + List expectedRecords = helper.generateRandomRecords(2, 0L); + expectedRecords.get(0).set(2, "2020-03-20"); + expectedRecords.get(1).set(2, "2020-03-20"); + + helper.appendToTable(Row.of("2020-03-20", 0), expectedRecords); + + builder + .useHiveRows() + .filter( + Expressions.and(Expressions.equal("date", "2020-03-20"), Expressions.equal("id", 0))); + + Assertions.assertThatThrownBy(() -> testInputFormat.create(builder.conf())) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessage( + "Filter expression ref(name=\"id\") == 0 is not completely satisfied. Additional rows can be returned " + + "not satisfied by the filter expression"); + + builder.usePigTuples(); + + Assertions.assertThatThrownBy(() -> testInputFormat.create(builder.conf())) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessage( + "Filter expression ref(name=\"id\") == 0 is not completely satisfied. Additional rows can be returned " + + "not satisfied by the filter expression"); + } + @Test public void testProjection() throws Exception { helper.createTable(); diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestInputFormatReaderDeletes.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestInputFormatReaderDeletes.java index 2ba4e50e8aa1..5edc299320f6 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestInputFormatReaderDeletes.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/TestInputFormatReaderDeletes.java @@ -25,6 +25,7 @@ import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.iceberg.BaseTable; +import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.FileFormat; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; @@ -65,7 +66,7 @@ public static Object[][] parameters() { @Before @Override public void writeTestDataFile() throws IOException { - conf.set(InputFormatConfig.CATALOG, Catalogs.LOCATION); + conf.set(CatalogUtil.ICEBERG_CATALOG_TYPE, Catalogs.LOCATION); super.writeTestDataFile(); } diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java index 6de80dfd32e9..8653c0db02fc 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java @@ -32,7 +32,7 @@ import org.apache.iceberg.SnapshotSummary; import org.apache.iceberg.Table; import org.apache.iceberg.data.Record; -import org.apache.iceberg.hive.MetastoreUtil; +import org.apache.iceberg.hive.HiveVersion; import org.apache.iceberg.mr.TestHelper; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; @@ -112,7 +112,7 @@ public static Collection parameters() { if (javaVersion.equals("1.8")) { testParams.add(new Object[] {fileFormat, engine, TestTables.TestTableType.HIVE_CATALOG, false}); // test for vectorization=ON in case of ORC and PARQUET format with Tez engine - if (fileFormat != FileFormat.METADATA && "tez".equals(engine) && MetastoreUtil.hive3PresentOnClasspath()) { + if (fileFormat != FileFormat.METADATA && "tez".equals(engine) && HiveVersion.min(HiveVersion.HIVE_3)) { testParams.add(new Object[] {fileFormat, engine, TestTables.TestTableType.HIVE_CATALOG, true}); } } diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestDeserializer.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestDeserializer.java index adad32aa48a0..e976483b81a1 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestDeserializer.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestDeserializer.java @@ -31,7 +31,7 @@ import org.apache.iceberg.Schema; import org.apache.iceberg.data.GenericRecord; import org.apache.iceberg.data.Record; -import org.apache.iceberg.hive.MetastoreUtil; +import org.apache.iceberg.hive.HiveVersion; import org.apache.iceberg.mr.hive.serde.objectinspector.IcebergObjectInspector; import org.apache.iceberg.types.Types; import org.junit.Assert; @@ -161,7 +161,8 @@ public void testListDeserialize() { @Test public void testDeserializeEverySupportedType() { - Assume.assumeFalse("No test yet for Hive3 (Date/Timestamp creation)", MetastoreUtil.hive3PresentOnClasspath()); + Assume.assumeFalse( + "No test yet for Hive3 (Date/Timestamp creation)", HiveVersion.min(HiveVersion.HIVE_3)); Deserializer deserializer = new Deserializer.Builder() .schema(HiveIcebergTestUtils.FULL_SCHEMA) diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergFilterFactory.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergFilterFactory.java index af1a30405b66..1614d937c37f 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergFilterFactory.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergFilterFactory.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.expressions.And; import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.expressions.Literal; @@ -40,6 +39,7 @@ import org.apache.iceberg.expressions.UnboundPredicate; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.DateTimeUtil; +import org.assertj.core.api.Assertions; import org.junit.Test; import static org.junit.Assert.assertEquals; @@ -148,12 +148,9 @@ public void testUnsupportedBetweenOperandEmptyLeaves() { .between("salary", PredicateLeaf.Type.LONG, 9000L, 15000L) .end() .build()); - - AssertHelpers.assertThrows( - "must throw if leaves are empty in between operator", - UnsupportedOperationException.class, - "Missing leaf literals", - () -> HiveIcebergFilterFactory.generateFilterExpression(arg)); + Assertions.assertThatThrownBy(() -> HiveIcebergFilterFactory.generateFilterExpression(arg)) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessage("Missing leaf literals: Leaf[empty]"); } @Test diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java index c60c3183eb07..aedcfdff5ccc 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java @@ -48,7 +48,6 @@ import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizer; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.BaseMetastoreTableOperations; import org.apache.iceberg.BaseTable; import org.apache.iceberg.FileFormat; @@ -67,7 +66,7 @@ import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.hadoop.Util; import org.apache.iceberg.hive.HiveSchemaUtil; -import org.apache.iceberg.hive.MetastoreUtil; +import org.apache.iceberg.hive.HiveVersion; import org.apache.iceberg.mr.Catalogs; import org.apache.iceberg.mr.InputFormatConfig; import org.apache.iceberg.mr.TestHelper; @@ -81,6 +80,7 @@ import org.apache.iceberg.types.Types; import org.apache.parquet.hadoop.ParquetOutputFormat; import org.apache.thrift.TException; +import org.assertj.core.api.Assertions; import org.junit.After; import org.junit.AfterClass; import org.junit.Assert; @@ -451,11 +451,9 @@ public void testCreateDropTable() throws TException, IOException, InterruptedExc shell.executeStatement("DROP TABLE customers"); // Check if the table was really dropped even from the Catalog - AssertHelpers.assertThrows("should throw exception", NoSuchTableException.class, - "Table does not exist", () -> { - testTables.loadTable(identifier); - } - ); + Assertions.assertThatThrownBy(() -> testTables.loadTable(identifier)) + .isInstanceOf(NoSuchTableException.class) + .hasMessageStartingWith("Table does not exist"); } else { Path hmsTableLocation = new Path(hmsTable.getSd().getLocation()); @@ -463,11 +461,9 @@ public void testCreateDropTable() throws TException, IOException, InterruptedExc shell.executeStatement("DROP TABLE customers"); // Check if we drop an exception when trying to load the table - AssertHelpers.assertThrows("should throw exception", NoSuchTableException.class, - "Table does not exist", () -> { - testTables.loadTable(identifier); - } - ); + Assertions.assertThatThrownBy(() -> testTables.loadTable(identifier)) + .isInstanceOf(NoSuchTableException.class) + .hasMessage("Table does not exist: default.customers"); // Check if the files are removed FileSystem fs = Util.getFs(hmsTableLocation, shell.getHiveConf()); @@ -500,11 +496,9 @@ public void testCreateDropTableNonDefaultCatalog() { shell.executeStatement("DROP TABLE default.customers"); // Check if the table was really dropped even from the Catalog - AssertHelpers.assertThrows("should throw exception", NoSuchTableException.class, - "Table does not exist", () -> { - testTables.loadTable(identifier); - } - ); + Assertions.assertThatThrownBy(() -> testTables.loadTable(identifier)) + .isInstanceOf(NoSuchTableException.class) + .hasMessageStartingWith("Table does not exist"); } @Test @@ -599,11 +593,9 @@ public void testDeleteBackingTable() throws TException, IOException, Interrupted shell.executeStatement("DROP TABLE customers"); // Check if we drop an exception when trying to drop the table - AssertHelpers.assertThrows("should throw exception", NoSuchTableException.class, - "Table does not exist", () -> { - testTables.loadTable(identifier); - } - ); + Assertions.assertThatThrownBy(() -> testTables.loadTable(identifier)) + .isInstanceOf(NoSuchTableException.class) + .hasMessage("Table does not exist: default.customers"); // Check if the files are kept FileSystem fs = Util.getFs(hmsTableLocation, shell.getHiveConf()); @@ -633,11 +625,9 @@ public void testDropTableWithCorruptedMetadata() throws TException, IOException, // check if HMS table is nonetheless still droppable shell.executeStatement(String.format("DROP TABLE %s", identifier)); - AssertHelpers.assertThrows("should throw exception", NoSuchTableException.class, - "Table does not exist", () -> { - testTables.loadTable(identifier); - } - ); + Assertions.assertThatThrownBy(() -> testTables.loadTable(identifier)) + .isInstanceOf(NoSuchTableException.class) + .hasMessage("Table does not exist: default.customers"); } @Test @@ -645,37 +635,55 @@ public void testCreateTableError() { TableIdentifier identifier = TableIdentifier.of("default", "withShell2"); // Wrong schema - AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, - "Unrecognized token 'WrongSchema'", () -> { - shell.executeStatement("CREATE EXTERNAL TABLE withShell2 " + - "STORED BY ICEBERG " + - testTables.locationForCreateTableSQL(identifier) + - "TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='WrongSchema'" + - ",'" + InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')"); - } - ); + Assertions.assertThatThrownBy( + () -> + shell.executeStatement( + "CREATE EXTERNAL TABLE withShell2 " + + "STORED BY 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' " + + testTables.locationForCreateTableSQL(identifier) + + "TBLPROPERTIES ('" + + InputFormatConfig.TABLE_SCHEMA + + "'='WrongSchema'" + + ",'" + + InputFormatConfig.CATALOG_NAME + + "'='" + + testTables.catalogName() + + "')")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageStartingWith("Failed to execute Hive query") + .hasMessageContaining("Unrecognized token 'WrongSchema'"); // Missing schema, we try to get the schema from the table and fail - AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, - "Please provide ", () -> { - shell.executeStatement("CREATE EXTERNAL TABLE withShell2 " + - "STORED BY ICEBERG " + - testTables.locationForCreateTableSQL(identifier) + - testTables.propertiesForCreateTableSQL(ImmutableMap.of())); - } - ); + Assertions.assertThatThrownBy( + () -> + shell.executeStatement( + "CREATE EXTERNAL TABLE withShell2 " + + "STORED BY 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' " + + testTables.locationForCreateTableSQL(identifier) + + testTables.propertiesForCreateTableSQL(ImmutableMap.of()))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageStartingWith("Failed to execute Hive query") + .hasMessageContaining("Please provide an existing table or a valid schema"); if (!testTables.locationForCreateTableSQL(identifier).isEmpty()) { // Only test this if the location is required - AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, - "Table location not set", () -> { - shell.executeStatement("CREATE EXTERNAL TABLE withShell2 " + - "STORED BY ICEBERG " + - "TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" + - SchemaParser.toJson(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA) + "','" + - InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')"); - } - ); + Assertions.assertThatThrownBy( + () -> + shell.executeStatement( + "CREATE EXTERNAL TABLE withShell2 " + + "STORED BY 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' " + + "TBLPROPERTIES ('" + + InputFormatConfig.TABLE_SCHEMA + + "'='" + + SchemaParser.toJson(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA) + + "','" + + InputFormatConfig.CATALOG_NAME + + "'='" + + testTables.catalogName() + + "')")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageStartingWith("Failed to execute Hive query") + .hasMessageEndingWith("Table location not set"); } } @@ -687,15 +695,23 @@ public void testCreateTableAboveExistingTable() throws IOException { if (testTableType == TestTables.TestTableType.HIVE_CATALOG) { // In HiveCatalog we just expect an exception since the table is already exists - AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, - "customers already exists", () -> { - shell.executeStatement("CREATE EXTERNAL TABLE customers " + - "STORED BY ICEBERG " + - "TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" + - SchemaParser.toJson(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA) + "',' " + - InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')"); - } - ); + Assertions.assertThatThrownBy( + () -> + shell.executeStatement( + "CREATE EXTERNAL TABLE customers " + + "STORED BY 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' " + + "TBLPROPERTIES ('" + + InputFormatConfig.TABLE_SCHEMA + + "'='" + + SchemaParser.toJson(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA) + + "',' " + + InputFormatConfig.CATALOG_NAME + + "'='" + + testTables.catalogName() + + "')")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageStartingWith("Failed to execute Hive query") + .hasMessageContaining("customers already exists"); } else { // With other catalogs, table creation should succeed shell.executeStatement("CREATE EXTERNAL TABLE customers " + @@ -727,16 +743,24 @@ public void testCreatePartitionedTableWithPropertiesAndWithColumnSpecification() PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).identity("last_name").build(); - AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, - "Provide only one of the following", () -> { - shell.executeStatement("CREATE EXTERNAL TABLE customers (customer_id BIGINT) " + - "PARTITIONED BY (first_name STRING) " + - "STORED BY ICEBERG " + - testTables.locationForCreateTableSQL(TableIdentifier.of("default", "customers")) + - testTables.propertiesForCreateTableSQL( - ImmutableMap.of(InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(spec)))); - } - ); + Assertions.assertThatThrownBy( + () -> + shell.executeStatement( + "CREATE EXTERNAL TABLE customers (customer_id BIGINT) " + + "PARTITIONED BY (first_name STRING) " + + "STORED BY 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' " + + testTables.locationForCreateTableSQL( + TableIdentifier.of("default", "customers")) + + " TBLPROPERTIES ('" + + InputFormatConfig.PARTITION_SPEC + + "'='" + + PartitionSpecParser.toJson(spec) + + "')")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageStartingWith("Failed to execute Hive query") + .hasMessageEndingWith( + "Provide only one of the following: Hive partition transform specification, " + + "or the iceberg.mr.table.partition.spec property"); } @Test @@ -799,15 +823,19 @@ public void testCreateTableWithNotSupportedTypes() { "CHAR(1)", Types.StringType.get()); for (String notSupportedType : notSupportedTypes.keySet()) { - AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, - "Unsupported Hive type", () -> { - shell.executeStatement("CREATE EXTERNAL TABLE not_supported_types " + - "(not_supported " + notSupportedType + ") " + - "STORED BY ICEBERG " + - testTables.locationForCreateTableSQL(identifier) + - testTables.propertiesForCreateTableSQL(ImmutableMap.of())); - } - ); + Assertions.assertThatThrownBy( + () -> + shell.executeStatement( + "CREATE EXTERNAL TABLE not_supported_types " + + "(not_supported " + + notSupportedType + + ") " + + "STORED BY 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' " + + testTables.locationForCreateTableSQL(identifier) + + testTables.propertiesForCreateTableSQL(ImmutableMap.of()))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageStartingWith("Failed to execute Hive query") + .hasMessageContaining("Unsupported Hive type"); } } @@ -974,7 +1002,7 @@ public void testIcebergAndHmsTableProperties() throws Exception { if (Catalogs.hiveCatalog(shell.getHiveConf(), tableProperties)) { expectedIcebergProperties.put(TableProperties.ENGINE_HIVE_ENABLED, "true"); } - if (MetastoreUtil.hive3PresentOnClasspath()) { + if (HiveVersion.min(HiveVersion.HIVE_3)) { expectedIcebergProperties.put("bucketing_version", "2"); } Assert.assertEquals(expectedIcebergProperties, icebergTable.properties()); @@ -1344,15 +1372,17 @@ public void testAlterTableReplaceColumnsFailsWhenNotOnlyDropping() { }; for (String command : commands) { - AssertHelpers.assertThrows("", IllegalArgumentException.class, - "Unsupported operation to use REPLACE COLUMNS", () -> shell.executeStatement(command)); + Assertions.assertThatThrownBy(() -> shell.executeStatement(command)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Unsupported operation to use REPLACE COLUMNS"); } // check no-op case too String command = "ALTER TABLE default.customers REPLACE COLUMNS (customer_id int, first_name string COMMENT 'This" + " is first name', last_name string COMMENT 'This is last name', address struct)"; - AssertHelpers.assertThrows("", IllegalArgumentException.class, - "No schema change detected", () -> shell.executeStatement(command)); + Assertions.assertThatThrownBy(() -> shell.executeStatement(command)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("No schema change detected"); } @Test @@ -1445,9 +1475,9 @@ public void testCommandsWithPartitionClauseThrow() { }; for (String command : commands) { - AssertHelpers.assertThrows("Should throw unsupported operation exception for queries with partition spec", - IllegalArgumentException.class, "Using partition spec in query is unsupported", - () -> shell.executeStatement(command)); + Assertions.assertThatThrownBy(() -> shell.executeStatement(command)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Using partition spec in query is unsupported"); } } @@ -1597,12 +1627,12 @@ public void testAlterTableWithMetadataLocationFromAnotherTable() throws IOExcept TableIdentifier targetIdentifier = TableIdentifier.of("default", "target"); testTables.createTable(shell, targetIdentifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, PartitionSpec.unpartitioned(), FileFormat.PARQUET, Collections.emptyList(), 1, Collections.emptyMap()); - AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, - "Cannot change iceberg table", - () -> { - shell.executeStatement("ALTER TABLE " + targetIdentifier.name() + " SET TBLPROPERTIES('metadata_location'='" + + Assertions.assertThatThrownBy(() -> { + shell.executeStatement("ALTER TABLE " + targetIdentifier.name() + " SET TBLPROPERTIES('metadata_location'='" + metadataLocation + "')"); - }); + }) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot change iceberg table"); } @Test @@ -1612,12 +1642,11 @@ public void testAlterTableToIcebergAndMetadataLocation() throws IOException { testTables.locationForCreateTableSQL(TableIdentifier.of("default", tableName)) + testTables.propertiesForCreateTableSQL(ImmutableMap.of()); shell.executeStatement(createQuery); - AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, - "Cannot perform table migration to Iceberg and setting the snapshot location in one step.", - () -> { - shell.executeStatement("ALTER TABLE " + tableName + " SET TBLPROPERTIES(" + - "'storage_handler'='org.apache.iceberg.mr.hive.HiveIcebergStorageHandler','metadata_location'='asdf')"); - }); + Assertions.assertThatThrownBy(() -> shell.executeStatement("ALTER TABLE " + tableName + " SET TBLPROPERTIES(" + + "'storage_handler'='org.apache.iceberg.mr.hive.HiveIcebergStorageHandler','metadata_location'='asdf')")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot perform table migration to Iceberg " + + "and setting the snapshot location in one step."); } @Test @@ -1658,12 +1687,13 @@ public void testCTLTHiveCatalogValidation() throws TException, InterruptedExcept shell.executeStatement("insert into source values(1)"); // Run a CTLT query. - AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, - " CTLT target table must be a HiveCatalog table", () -> { - shell.executeStatement(String.format("CREATE TABLE dest LIKE source STORED BY ICEBERG %s %s", + Assertions.assertThatThrownBy(() -> { + shell.executeStatement(String.format("CREATE TABLE dest LIKE source STORED BY ICEBERG %s %s", testTables.locationForCreateTableSQL(TableIdentifier.of("default", "dest")), testTables.propertiesForCreateTableSQL(ImmutableMap.of()))); - }); + }) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining(" CTLT target table must be a HiveCatalog table"); } @Test @@ -1779,14 +1809,15 @@ public void testCreateTableWithMetadataLocationWithoutSchema() throws IOExceptio testTables.propertiesForCreateTableSQL(Collections.singletonMap("metadata_location", metadataLocation)); // Try the query with columns also specified, it should throw exception. - AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, - "Column names can not be provided along with metadata location.", () -> { - shell.executeStatement("CREATE EXTERNAL TABLE target (id int) STORED BY ICEBERG " + + Assertions.assertThatThrownBy(() -> { + shell.executeStatement("CREATE EXTERNAL TABLE target (id int) STORED BY ICEBERG " + testTables.locationForCreateTableSQL(targetIdentifier) + tblProps); - }); + }) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Column names can not be provided along with metadata location."); shell.executeStatement( - "CREATE EXTERNAL TABLE target STORED BY ICEBERG " + testTables.locationForCreateTableSQL(targetIdentifier) + - tblProps); + "CREATE EXTERNAL TABLE target STORED BY ICEBERG " + testTables.locationForCreateTableSQL(targetIdentifier) + + tblProps); // Check the partition and the schema are preserved. Table targetIcebergTable = diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestTables.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestTables.java index c988fa88fd26..656f16b4a1cd 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestTables.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestTables.java @@ -49,7 +49,7 @@ import org.apache.iceberg.hadoop.HadoopCatalog; import org.apache.iceberg.hadoop.HadoopTables; import org.apache.iceberg.hive.HiveCatalog; -import org.apache.iceberg.hive.MetastoreUtil; +import org.apache.iceberg.hive.HiveVersion; import org.apache.iceberg.mr.Catalogs; import org.apache.iceberg.mr.InputFormatConfig; import org.apache.iceberg.mr.TestCatalogs; @@ -506,7 +506,7 @@ static class CustomCatalogTestTables extends TestTables { private final String warehouseLocation; CustomCatalogTestTables(Configuration conf, TemporaryFolder temp, String catalogName) throws IOException { - this(conf, temp, (MetastoreUtil.hive3PresentOnClasspath() ? "file:" : "") + + this(conf, temp, (HiveVersion.min(HiveVersion.HIVE_3) ? "file:" : "") + temp.newFolder("custom", "warehouse").toString(), catalogName); } @@ -537,7 +537,7 @@ static class HadoopCatalogTestTables extends TestTables { private final String warehouseLocation; HadoopCatalogTestTables(Configuration conf, TemporaryFolder temp, String catalogName) throws IOException { - this(conf, temp, (MetastoreUtil.hive3PresentOnClasspath() ? "file:" : "") + + this(conf, temp, (HiveVersion.min(HiveVersion.HIVE_3) ? "file:" : "") + temp.newFolder("hadoop", "warehouse").toString(), catalogName); } diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergObjectInspector.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergObjectInspector.java index eb589b2495e6..b6577a3dd259 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergObjectInspector.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergObjectInspector.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.iceberg.Schema; +import org.apache.iceberg.hive.HiveVersion; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.types.Types; import org.junit.Assert; @@ -90,7 +91,15 @@ public void testIcebergObjectInspector() { Assert.assertEquals(3, dateField.getFieldID()); Assert.assertEquals("date_field", dateField.getFieldName()); Assert.assertEquals("date comment", dateField.getFieldComment()); - Assert.assertEquals(IcebergDateObjectInspectorHive3.get(), dateField.getFieldObjectInspector()); + if (HiveVersion.min(HiveVersion.HIVE_3)) { + Assert.assertEquals( + "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergDateObjectInspectorHive3", + dateField.getFieldObjectInspector().getClass().getName()); + } else { + Assert.assertEquals( + "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergDateObjectInspector", + dateField.getFieldObjectInspector().getClass().getName()); + } // decimal StructField decimalField = soi.getStructFieldRef("decimal_field"); diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveVectorizedReader.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveVectorizedReader.java index 9decd05ad7e8..fae2e207ba2a 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveVectorizedReader.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveVectorizedReader.java @@ -29,6 +29,7 @@ import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.iceberg.CatalogUtil; import org.apache.iceberg.FileFormat; import org.apache.iceberg.Schema; import org.apache.iceberg.data.Record; @@ -76,7 +77,7 @@ public void before() throws IOException, HiveException { Assert.assertTrue(location.delete()); Configuration conf = prepareMockJob(SCHEMA, new Path(location.toString())); - conf.set(InputFormatConfig.CATALOG, Catalogs.LOCATION); + conf.set(CatalogUtil.ICEBERG_CATALOG_TYPE, Catalogs.LOCATION); HadoopTables tables = new HadoopTables(conf); helper = new TestHelper(conf, tables, location.toString(), SCHEMA, null, fileFormat, temp); diff --git a/iceberg/iceberg-handler/src/test/queries/positive/show_partitions_test.q b/iceberg/iceberg-handler/src/test/queries/positive/show_partitions_test.q index 0d1ebef44ba2..b424fac02806 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/show_partitions_test.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/show_partitions_test.q @@ -14,16 +14,19 @@ insert into ice1 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 2, 2, --compare hive table with iceberg table show partitions hiveT1; -show partitions ice1 ; +describe default.ice1.partitions; select * from default.ice1.partitions order by `partition`; +show partitions ice1 ; + explain show partitions hiveT1; explain show partitions ice1; explain select * from default.ice1.partitions; --- Partition evolution -create table ice2 (a string, b int, c int) PARTITIONED BY (d_part int, e_part int) stored by iceberg stored as orc TBLPROPERTIES("format-version"='2') ; -insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 2, 2, 5), ('aa', 1, 2, 10, 5), ('aa', 1, 2, 10, 5); +---- Partition evolution +create table ice2 (a string, b int, c int) PARTITIONED BY (d_part int, e_part int) stored by iceberg stored as orc +TBLPROPERTIES("format-version"='2') ; +insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 2, 2, 5), ('aa', 1, 2, 10, 5), ('aa', 1, 2,10, 5); select * from default.ice2.partitions order by `partition`; show partitions ice2; @@ -32,7 +35,8 @@ ALTER TABLE ice2 SET PARTITION SPEC (c) ; select * from default.ice2.partitions order by `partition`; show partitions ice2; -insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 3, 2, 5), ('aa', 1, 4, 10, 5), ('aa', 1, 5, 10, 5); +insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 3, 2, 5), ('aa', 1, 4, 10, 5), ('aa', 1, 5, +10, 5); select * from default.ice2.partitions order by `partition`; show partitions ice2; diff --git a/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_metadata_tables.q.out b/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_metadata_tables.q.out index 8014b443e18f..dccc778cd214 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_metadata_tables.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_metadata_tables.q.out @@ -67,6 +67,7 @@ snapshot_id bigint sequence_number bigint file_sequence_number bigint data_file struct,value_counts:map,null_value_counts:map,nan_value_counts:map,lower_bounds:map,upper_bounds:map,key_metadata:binary,split_offsets:array,equality_ids:array,sort_order_id:int> +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe default.ice_meta_desc.history PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -113,8 +114,12 @@ PREHOOK: Input: default@ice_meta_desc POSTHOOK: query: describe default.ice_meta_desc.partitions POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_meta_desc -record_count bigint -file_count int +record_count bigint Count of records in data files +file_count int Count of data files +position_delete_record_count bigint Count of records in position delete files +position_delete_file_count int Count of position delete files +equality_delete_record_count bigint Count of records in equality delete files +equality_delete_file_count int Count of equality delete files PREHOOK: query: describe default.ice_meta_desc.all_manifests PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -167,6 +172,7 @@ snapshot_id bigint sequence_number bigint file_sequence_number bigint data_file struct,value_counts:map,null_value_counts:map,nan_value_counts:map,lower_bounds:map,upper_bounds:map,key_metadata:binary,split_offsets:array,equality_ids:array,sort_order_id:int> +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe default.ice_meta_desc.data_files PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -313,6 +319,7 @@ snapshot_id bigint sequence_number bigint file_sequence_number bigint data_file struct,value_counts:map,null_value_counts:map,nan_value_counts:map,lower_bounds:map,upper_bounds:map,key_metadata:binary,split_offsets:array,equality_ids:array,sort_order_id:int> +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe formatted default.ice_meta_desc.history PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -363,8 +370,12 @@ POSTHOOK: query: describe formatted default.ice_meta_desc.partitions POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_meta_desc # col_name data_type comment -record_count bigint -file_count int +record_count bigint Count of records in data files +file_count int Count of data files +position_delete_record_count bigint Count of records in position delete files +position_delete_file_count int Count of position delete files +equality_delete_record_count bigint Count of records in equality delete files +equality_delete_file_count int Count of equality delete files PREHOOK: query: describe formatted default.ice_meta_desc.all_manifests PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -420,6 +431,7 @@ snapshot_id bigint sequence_number bigint file_sequence_number bigint data_file struct,value_counts:map,null_value_counts:map,nan_value_counts:map,lower_bounds:map,upper_bounds:map,key_metadata:binary,split_offsets:array,equality_ids:array,sort_order_id:int> +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe formatted default.ice_meta_desc.data_files PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -570,6 +582,7 @@ snapshot_id bigint sequence_number bigint file_sequence_number bigint data_file struct,value_counts:map,null_value_counts:map,nan_value_counts:map,lower_bounds:map,upper_bounds:map,key_metadata:binary,split_offsets:array,equality_ids:array,sort_order_id:int> +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe extended default.ice_meta_desc.history PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -616,8 +629,12 @@ PREHOOK: Input: default@ice_meta_desc POSTHOOK: query: describe extended default.ice_meta_desc.partitions POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice_meta_desc -record_count bigint -file_count int +record_count bigint Count of records in data files +file_count int Count of data files +position_delete_record_count bigint Count of records in position delete files +position_delete_file_count int Count of position delete files +equality_delete_record_count bigint Count of records in equality delete files +equality_delete_file_count int Count of equality delete files PREHOOK: query: describe extended default.ice_meta_desc.all_manifests PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc @@ -670,6 +687,7 @@ snapshot_id bigint sequence_number bigint file_sequence_number bigint data_file struct,value_counts:map,null_value_counts:map,nan_value_counts:map,lower_bounds:map,upper_bounds:map,key_metadata:binary,split_offsets:array,equality_ids:array,sort_order_id:int> +readable_metrics struct,value:struct> Column metrics in readable form PREHOOK: query: describe formatted default.ice_meta_desc.data_files PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice_meta_desc diff --git a/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out b/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out index b1e7fcfe4532..037d1b439f73 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out @@ -328,19 +328,19 @@ POSTHOOK: query: select * from default.tbl_target_mixed.partitions order by `par POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl_target_mixed POSTHOOK: Output: hdfs://### HDFS PATH ### -{"ccy":"CZK","c_bucket":1} 1 1 0 -{"ccy":"CZK","c_bucket":2} 1 1 0 -{"ccy":"EUR","c_bucket":0} 1 1 0 -{"ccy":"EUR","c_bucket":1} 2 1 0 -{"ccy":"EUR","c_bucket":2} 3 1 0 -{"ccy":"HUF","c_bucket":1} 2 1 0 -{"ccy":"PLN","c_bucket":0} 2 1 0 -{"ccy":"PLN","c_bucket":1} 1 1 0 -{"ccy":"PLN","c_bucket":2} 1 1 0 -{"ccy":"USD","c_bucket":0} 2 1 0 -{"ccy":"USD","c_bucket":1} 3 1 0 -{"ccy":"USD","c_bucket":2} 1 1 0 -{"ccy":null,"c_bucket":null} 2 1 0 +{"ccy":"CZK","c_bucket":1} 0 1 1 0 0 0 0 +{"ccy":"CZK","c_bucket":2} 0 1 1 0 0 0 0 +{"ccy":"EUR","c_bucket":0} 0 1 1 0 0 0 0 +{"ccy":"EUR","c_bucket":1} 0 2 1 0 0 0 0 +{"ccy":"EUR","c_bucket":2} 0 3 1 0 0 0 0 +{"ccy":"HUF","c_bucket":1} 0 2 1 0 0 0 0 +{"ccy":"PLN","c_bucket":0} 0 2 1 0 0 0 0 +{"ccy":"PLN","c_bucket":1} 0 1 1 0 0 0 0 +{"ccy":"PLN","c_bucket":2} 0 1 1 0 0 0 0 +{"ccy":"USD","c_bucket":0} 0 2 1 0 0 0 0 +{"ccy":"USD","c_bucket":1} 0 3 1 0 0 0 0 +{"ccy":"USD","c_bucket":2} 0 1 1 0 0 0 0 +{"ccy":null,"c_bucket":null} 0 2 1 0 0 0 0 PREHOOK: query: select * from default.tbl_target_mixed.files PREHOOK: type: QUERY PREHOOK: Input: default@tbl_target_mixed diff --git a/iceberg/iceberg-handler/src/test/results/positive/query_iceberg_metadata_of_partitioned_table.q.out b/iceberg/iceberg-handler/src/test/results/positive/query_iceberg_metadata_of_partitioned_table.q.out index c663ca5688e0..efc7a74a9373 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/query_iceberg_metadata_of_partitioned_table.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/query_iceberg_metadata_of_partitioned_table.q.out @@ -296,8 +296,8 @@ POSTHOOK: query: select * from default.ice_meta_2.partitions POSTHOOK: type: QUERY POSTHOOK: Input: default@ice_meta_2 POSTHOOK: Output: hdfs://### HDFS PATH ### -{"b":"four"} 1 1 0 -{"b":"three"} 3 1 0 +{"b":"four"} 0 1 1 0 0 0 0 +{"b":"three"} 0 3 1 0 0 0 0 PREHOOK: query: select * from default.ice_meta_3.partitions PREHOOK: type: QUERY PREHOOK: Input: default@ice_meta_3 @@ -306,13 +306,13 @@ POSTHOOK: query: select * from default.ice_meta_3.partitions POSTHOOK: type: QUERY POSTHOOK: Input: default@ice_meta_3 POSTHOOK: Output: hdfs://### HDFS PATH ### -{"b":"four","c":"Saturday"} 3 1 0 -{"b":"four","c":"Sunday"} 1 1 0 -{"b":"four","c":"Thursday"} 1 1 0 -{"b":"one","c":"Monday"} 3 1 0 -{"b":"three","c":"Wednesday"} 3 1 0 -{"b":"two","c":"Friday"} 2 1 0 -{"b":"two","c":"Tuesday"} 2 1 0 +{"b":"four","c":"Saturday"} 0 3 1 0 0 0 0 +{"b":"four","c":"Sunday"} 0 1 1 0 0 0 0 +{"b":"four","c":"Thursday"} 0 1 1 0 0 0 0 +{"b":"one","c":"Monday"} 0 3 1 0 0 0 0 +{"b":"three","c":"Wednesday"} 0 3 1 0 0 0 0 +{"b":"two","c":"Friday"} 0 2 1 0 0 0 0 +{"b":"two","c":"Tuesday"} 0 2 1 0 0 0 0 PREHOOK: query: select `partition` from default.ice_meta_2.partitions where `partition`.b='four' PREHOOK: type: QUERY PREHOOK: Input: default@ice_meta_2 @@ -330,7 +330,7 @@ POSTHOOK: query: select * from default.ice_meta_3.partitions where `partition`.b POSTHOOK: type: QUERY POSTHOOK: Input: default@ice_meta_3 POSTHOOK: Output: hdfs://### HDFS PATH ### -{"b":"two","c":"Tuesday"} 2 1 0 +{"b":"two","c":"Tuesday"} 0 2 1 0 0 0 0 PREHOOK: query: select partition_summaries from default.ice_meta_3.manifests where partition_summaries[1].upper_bound='Wednesday' PREHOOK: type: QUERY PREHOOK: Input: default@ice_meta_3 @@ -592,8 +592,8 @@ POSTHOOK: query: select * from default.ice_meta_2.partitions POSTHOOK: type: QUERY POSTHOOK: Input: default@ice_meta_2 POSTHOOK: Output: hdfs://### HDFS PATH ### -{"b":"four"} 1 1 0 -{"b":"three"} 3 1 0 +{"b":"four"} 0 1 1 0 0 0 0 +{"b":"three"} 0 3 1 0 0 0 0 PREHOOK: query: select * from default.ice_meta_3.partitions PREHOOK: type: QUERY PREHOOK: Input: default@ice_meta_3 @@ -602,13 +602,13 @@ POSTHOOK: query: select * from default.ice_meta_3.partitions POSTHOOK: type: QUERY POSTHOOK: Input: default@ice_meta_3 POSTHOOK: Output: hdfs://### HDFS PATH ### -{"b":"four","c":"Saturday"} 3 1 0 -{"b":"four","c":"Sunday"} 1 1 0 -{"b":"four","c":"Thursday"} 1 1 0 -{"b":"one","c":"Monday"} 3 1 0 -{"b":"three","c":"Wednesday"} 3 1 0 -{"b":"two","c":"Friday"} 2 1 0 -{"b":"two","c":"Tuesday"} 2 1 0 +{"b":"four","c":"Saturday"} 0 3 1 0 0 0 0 +{"b":"four","c":"Sunday"} 0 1 1 0 0 0 0 +{"b":"four","c":"Thursday"} 0 1 1 0 0 0 0 +{"b":"one","c":"Monday"} 0 3 1 0 0 0 0 +{"b":"three","c":"Wednesday"} 0 3 1 0 0 0 0 +{"b":"two","c":"Friday"} 0 2 1 0 0 0 0 +{"b":"two","c":"Tuesday"} 0 2 1 0 0 0 0 PREHOOK: query: select `partition` from default.ice_meta_2.partitions where `partition`.b='four' PREHOOK: type: QUERY PREHOOK: Input: default@ice_meta_2 @@ -626,7 +626,7 @@ POSTHOOK: query: select * from default.ice_meta_3.partitions where `partition`.b POSTHOOK: type: QUERY POSTHOOK: Input: default@ice_meta_3 POSTHOOK: Output: hdfs://### HDFS PATH ### -{"b":"two","c":"Tuesday"} 2 1 0 +{"b":"two","c":"Tuesday"} 0 2 1 0 0 0 0 PREHOOK: query: select partition_summaries from default.ice_meta_3.manifests where partition_summaries[1].upper_bound='Wednesday' PREHOOK: type: QUERY PREHOOK: Input: default@ice_meta_3 @@ -833,6 +833,6 @@ POSTHOOK: query: select * from default.partevv.partitions POSTHOOK: type: QUERY POSTHOOK: Input: default@partevv POSTHOOK: Output: hdfs://### HDFS PATH ### -{"id":1,"ts_day":null} 1 1 1 -{"id":2,"ts_day":null} 1 1 1 -{"id":null,"ts_day":"2022-04-29"} 1 1 2 +{"id":1,"ts_day":null} 1 1 1 0 0 0 0 +{"id":2,"ts_day":null} 1 1 1 0 0 0 0 +{"id":null,"ts_day":"2022-04-29"} 2 1 1 0 0 0 0 diff --git a/iceberg/iceberg-handler/src/test/results/positive/show_partitions_test.q.out b/iceberg/iceberg-handler/src/test/results/positive/show_partitions_test.q.out index 3d6fcd4ba676..ee3eb58f1850 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/show_partitions_test.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/show_partitions_test.q.out @@ -51,15 +51,20 @@ POSTHOOK: Input: default@hivet1 d_part=10/e_part=5 d_part=2/e_part=5 d_part=3/e_part=4 -PREHOOK: query: show partitions ice1 -PREHOOK: type: SHOWPARTITIONS +PREHOOK: query: describe default.ice1.partitions +PREHOOK: type: DESCTABLE PREHOOK: Input: default@ice1 -POSTHOOK: query: show partitions ice1 -POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: query: describe default.ice1.partitions +POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@ice1 -current-spec-id=0/d_part=10/e_part=5 -current-spec-id=0/d_part=2/e_part=5 -current-spec-id=0/d_part=3/e_part=4 +partition struct +spec_id int +record_count bigint Count of records in data files +file_count int Count of data files +position_delete_record_count bigint Count of records in position delete files +position_delete_file_count int Count of position delete files +equality_delete_record_count bigint Count of records in equality delete files +equality_delete_file_count int Count of equality delete files PREHOOK: query: select * from default.ice1.partitions order by `partition` PREHOOK: type: QUERY PREHOOK: Input: default@ice1 @@ -68,9 +73,18 @@ POSTHOOK: query: select * from default.ice1.partitions order by `partition` POSTHOOK: type: QUERY POSTHOOK: Input: default@ice1 POSTHOOK: Output: hdfs://### HDFS PATH ### -{"d_part":10,"e_part":5} 2 1 0 -{"d_part":2,"e_part":5} 1 1 0 -{"d_part":3,"e_part":4} 2 1 0 +{"d_part":10,"e_part":5} 0 2 1 0 0 0 0 +{"d_part":2,"e_part":5} 0 1 1 0 0 0 0 +{"d_part":3,"e_part":4} 0 2 1 0 0 0 0 +PREHOOK: query: show partitions ice1 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@ice1 +POSTHOOK: query: show partitions ice1 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@ice1 +current-spec-id=0/d_part=10/e_part=5 +current-spec-id=0/d_part=2/e_part=5 +current-spec-id=0/d_part=3/e_part=4 PREHOOK: query: explain show partitions hiveT1 PREHOOK: type: SHOWPARTITIONS PREHOOK: Input: default@hivet1 @@ -109,23 +123,25 @@ Stage-0 Fetch Operator limit:-1 Select Operator [SEL_1] - Output:["_col0","_col1","_col2","_col3"] + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] TableScan [TS_0] - Output:["partition","record_count","file_count","spec_id"] + Output:["partition","spec_id","record_count","file_count","position_delete_record_count","position_delete_file_count","equality_delete_record_count","equality_delete_file_count"] -PREHOOK: query: create table ice2 (a string, b int, c int) PARTITIONED BY (d_part int, e_part int) stored by iceberg stored as orc TBLPROPERTIES("format-version"='2') +PREHOOK: query: create table ice2 (a string, b int, c int) PARTITIONED BY (d_part int, e_part int) stored by iceberg stored as orc +TBLPROPERTIES("format-version"='2') PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@ice2 -POSTHOOK: query: create table ice2 (a string, b int, c int) PARTITIONED BY (d_part int, e_part int) stored by iceberg stored as orc TBLPROPERTIES("format-version"='2') +POSTHOOK: query: create table ice2 (a string, b int, c int) PARTITIONED BY (d_part int, e_part int) stored by iceberg stored as orc +TBLPROPERTIES("format-version"='2') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@ice2 -PREHOOK: query: insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 2, 2, 5), ('aa', 1, 2, 10, 5), ('aa', 1, 2, 10, 5) +PREHOOK: query: insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 2, 2, 5), ('aa', 1, 2, 10, 5), ('aa', 1, 2,10, 5) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@ice2 -POSTHOOK: query: insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 2, 2, 5), ('aa', 1, 2, 10, 5), ('aa', 1, 2, 10, 5) +POSTHOOK: query: insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 2, 2, 5), ('aa', 1, 2, 10, 5), ('aa', 1, 2,10, 5) POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@ice2 @@ -137,9 +153,9 @@ POSTHOOK: query: select * from default.ice2.partitions order by `partition` POSTHOOK: type: QUERY POSTHOOK: Input: default@ice2 POSTHOOK: Output: hdfs://### HDFS PATH ### -{"d_part":10,"e_part":5} 2 1 0 -{"d_part":2,"e_part":5} 1 1 0 -{"d_part":3,"e_part":4} 2 1 0 +{"d_part":10,"e_part":5} 0 2 1 0 0 0 0 +{"d_part":2,"e_part":5} 0 1 1 0 0 0 0 +{"d_part":3,"e_part":4} 0 2 1 0 0 0 0 PREHOOK: query: show partitions ice2 PREHOOK: type: SHOWPARTITIONS PREHOOK: Input: default@ice2 @@ -164,9 +180,9 @@ POSTHOOK: query: select * from default.ice2.partitions order by `partition` POSTHOOK: type: QUERY POSTHOOK: Input: default@ice2 POSTHOOK: Output: hdfs://### HDFS PATH ### -{"d_part":10,"e_part":5,"c":null} 2 1 0 -{"d_part":2,"e_part":5,"c":null} 1 1 0 -{"d_part":3,"e_part":4,"c":null} 2 1 0 +{"d_part":10,"e_part":5,"c":null} 0 2 1 0 0 0 0 +{"d_part":2,"e_part":5,"c":null} 0 1 1 0 0 0 0 +{"d_part":3,"e_part":4,"c":null} 0 2 1 0 0 0 0 PREHOOK: query: show partitions ice2 PREHOOK: type: SHOWPARTITIONS PREHOOK: Input: default@ice2 @@ -176,11 +192,13 @@ POSTHOOK: Input: default@ice2 spec-id=0/d_part=10/e_part=5 spec-id=0/d_part=2/e_part=5 spec-id=0/d_part=3/e_part=4 -PREHOOK: query: insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 3, 2, 5), ('aa', 1, 4, 10, 5), ('aa', 1, 5, 10, 5) +PREHOOK: query: insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 3, 2, 5), ('aa', 1, 4, 10, 5), ('aa', 1, 5, +10, 5) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@ice2 -POSTHOOK: query: insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 3, 2, 5), ('aa', 1, 4, 10, 5), ('aa', 1, 5, 10, 5) +POSTHOOK: query: insert into ice2 values ('aa', 1, 2, 3, 4), ('aa', 1, 2, 3, 4), ('aa', 1, 3, 2, 5), ('aa', 1, 4, 10, 5), ('aa', 1, 5, +10, 5) POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@ice2 @@ -192,13 +210,13 @@ POSTHOOK: query: select * from default.ice2.partitions order by `partition` POSTHOOK: type: QUERY POSTHOOK: Input: default@ice2 POSTHOOK: Output: hdfs://### HDFS PATH ### -{"d_part":10,"e_part":5,"c":null} 2 1 0 -{"d_part":2,"e_part":5,"c":null} 1 1 0 -{"d_part":3,"e_part":4,"c":null} 2 1 0 -{"d_part":null,"e_part":null,"c":2} 2 1 1 -{"d_part":null,"e_part":null,"c":3} 1 1 1 -{"d_part":null,"e_part":null,"c":4} 1 1 1 -{"d_part":null,"e_part":null,"c":5} 1 1 1 +{"d_part":10,"e_part":5,"c":null} 0 2 1 0 0 0 0 +{"d_part":2,"e_part":5,"c":null} 0 1 1 0 0 0 0 +{"d_part":3,"e_part":4,"c":null} 0 2 1 0 0 0 0 +{"d_part":null,"e_part":null,"c":2} 1 2 1 0 0 0 0 +{"d_part":null,"e_part":null,"c":3} 1 1 1 0 0 0 0 +{"d_part":null,"e_part":null,"c":4} 1 1 1 0 0 0 0 +{"d_part":null,"e_part":null,"c":5} 1 1 1 0 0 0 0 PREHOOK: query: show partitions ice2 PREHOOK: type: SHOWPARTITIONS PREHOOK: Input: default@ice2 diff --git a/iceberg/pom.xml b/iceberg/pom.xml index e59b01379c08..3a4658b9d349 100644 --- a/iceberg/pom.xml +++ b/iceberg/pom.xml @@ -25,8 +25,8 @@ .. . - 1.2.1 - 4.0.2 + 1.3.0 + 4.0.3 3.4.4 1.11.1 5.2.0 @@ -36,6 +36,7 @@ 2.5.1 3.19.0 5.7.2 + 2.9.2 false @@ -208,6 +209,11 @@ mockito-inline ${iceberg.mockito-core.version} + + org.immutables + value + ${immutables.value.version} + @@ -301,6 +307,11 @@ error_prone_core ${google.errorprone.version} + + org.immutables + value + ${immutables.value.version} + diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreInitListener.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreInitListener.java index b78076b606ff..94a01ca3f3d5 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreInitListener.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreInitListener.java @@ -48,7 +48,7 @@ public void setUp() throws Exception { @Test public void testMetaStoreInitListener() throws Exception { - // DummyMataStoreInitListener's onInit will be called at HMSHandler + // DummyMetaStoreInitListener's onInit will be called at HMSHandler // initialization, and set this to true Assert.assertTrue(DummyMetaStoreInitListener.wasCalled); }