-
Notifications
You must be signed in to change notification settings - Fork 2.9k
Hive: add table level JVM lock on HiveCatalog commit operation #2547
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,8 @@ | |
|
|
||
| package org.apache.iceberg.hive; | ||
|
|
||
| import com.github.benmanes.caffeine.cache.Cache; | ||
| import com.github.benmanes.caffeine.cache.Caffeine; | ||
| import java.net.InetAddress; | ||
| import java.net.UnknownHostException; | ||
| import java.util.Collections; | ||
|
|
@@ -28,7 +30,9 @@ | |
| import java.util.Objects; | ||
| import java.util.Optional; | ||
| import java.util.Set; | ||
| import java.util.concurrent.TimeUnit; | ||
| import java.util.concurrent.atomic.AtomicReference; | ||
| import java.util.concurrent.locks.ReentrantLock; | ||
| import java.util.stream.Collectors; | ||
| import org.apache.hadoop.conf.Configuration; | ||
| import org.apache.hadoop.hive.common.StatsSetupConst; | ||
|
|
@@ -82,9 +86,11 @@ public class HiveTableOperations extends BaseMetastoreTableOperations { | |
| private static final String HIVE_ACQUIRE_LOCK_TIMEOUT_MS = "iceberg.hive.lock-timeout-ms"; | ||
| private static final String HIVE_LOCK_CHECK_MIN_WAIT_MS = "iceberg.hive.lock-check-min-wait-ms"; | ||
| private static final String HIVE_LOCK_CHECK_MAX_WAIT_MS = "iceberg.hive.lock-check-max-wait-ms"; | ||
| private static final String HIVE_TABLE_LEVEL_LOCK_EVICT_MS = "iceberg.hive.table-level-lock-evict-ms"; | ||
| private static final long HIVE_ACQUIRE_LOCK_TIMEOUT_MS_DEFAULT = 3 * 60 * 1000; // 3 minutes | ||
| private static final long HIVE_LOCK_CHECK_MIN_WAIT_MS_DEFAULT = 50; // 50 milliseconds | ||
| private static final long HIVE_LOCK_CHECK_MAX_WAIT_MS_DEFAULT = 5 * 1000; // 5 seconds | ||
| private static final long HIVE_TABLE_LEVEL_LOCK_EVICT_MS_DEFAULT = TimeUnit.MINUTES.toMillis(10); | ||
| private static final DynMethods.UnboundMethod ALTER_TABLE = DynMethods.builder("alter_table") | ||
| .impl(HiveMetaStoreClient.class, "alter_table_with_environmentContext", | ||
| String.class, String.class, Table.class, EnvironmentContext.class) | ||
|
|
@@ -96,6 +102,15 @@ public class HiveTableOperations extends BaseMetastoreTableOperations { | |
| GC_ENABLED, "external.table.purge" | ||
| ); | ||
|
|
||
| private static Cache<String, ReentrantLock> commitLockCache; | ||
|
|
||
| private static synchronized void initTableLevelLockCache(long evictionTimeout) { | ||
| if (commitLockCache == null) { | ||
| commitLockCache = Caffeine.newBuilder() | ||
| .expireAfterAccess(evictionTimeout, TimeUnit.MILLISECONDS) | ||
| .build(); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Provides key translation where necessary between Iceberg and HMS props. This translation is needed because some | ||
|
|
@@ -144,6 +159,9 @@ protected HiveTableOperations(Configuration conf, ClientPool metaClients, FileIO | |
| conf.getLong(HIVE_LOCK_CHECK_MIN_WAIT_MS, HIVE_LOCK_CHECK_MIN_WAIT_MS_DEFAULT); | ||
| this.lockCheckMaxWaitTime = | ||
| conf.getLong(HIVE_LOCK_CHECK_MAX_WAIT_MS, HIVE_LOCK_CHECK_MAX_WAIT_MS_DEFAULT); | ||
| long tableLevelLockCacheEvictionTimeout = | ||
| conf.getLong(HIVE_TABLE_LEVEL_LOCK_EVICT_MS, HIVE_TABLE_LEVEL_LOCK_EVICT_MS_DEFAULT); | ||
| initTableLevelLockCache(tableLevelLockCacheEvictionTimeout); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -191,6 +209,10 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { | |
| CommitStatus commitStatus = CommitStatus.FAILURE; | ||
| boolean updateHiveTable = false; | ||
| Optional<Long> lockId = Optional.empty(); | ||
| // getting a process-level lock per table to avoid concurrent commit attempts to the same table from the same | ||
| // JVM process, which would result in unnecessary and costly HMS lock acquisition requests | ||
| ReentrantLock tableLevelMutex = commitLockCache.get(fullName, t -> new ReentrantLock(true)); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems safe even though one application can interact with multiple metastores as full name includes the catalog name. |
||
| tableLevelMutex.lock(); | ||
| try { | ||
| lockId = Optional.of(acquireLock()); | ||
| // TODO add lock heart beating for cases where default lock timeout is too low. | ||
|
|
@@ -267,6 +289,7 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { | |
|
|
||
| } finally { | ||
| cleanupMetadataAndUnlock(commitStatus, newMetadataLocation, lockId); | ||
| tableLevelMutex.unlock(); | ||
| } | ||
| } | ||
|
|
||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How does
expireAfterAccesswork?I am worried about this sequence:
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
expireAfterAccessis based on the amount of time that elapsed after the last read/write operation on the cache entry. So the scenario that you mentioned could only happen if the C1 commit takes more than the entire eviction timeout period (10 minutes by default) which is very unlikely. Even if it does happen (e.g. due to some extreme lock starvation), and C2 starts the commit operation, it shouldn't have too much of an impact because the HMS locking mechanism will still enforce that there won't be write conflicts between threads (i.e. we would be basically back to where we are in the status quo, without the table-level locks).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah I like how the worst case scenario in the lock's not working is that we just fall back to the old behavior.