-
Notifications
You must be signed in to change notification settings - Fork 2.5k
[HUDI-3029] Transaction manager: avoid deadlock when doing begin and end transactions #4363
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2b57642
b4ada1b
2dc5451
b021781
a10ec8a
7e5ad01
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -35,49 +35,64 @@ | |
| public class TransactionManager implements Serializable { | ||
|
|
||
| private static final Logger LOG = LogManager.getLogger(TransactionManager.class); | ||
|
|
||
| private final LockManager lockManager; | ||
| private Option<HoodieInstant> currentTxnOwnerInstant; | ||
| private Option<HoodieInstant> lastCompletedTxnOwnerInstant; | ||
| private boolean supportsOptimisticConcurrency; | ||
| private final boolean isOptimisticConcurrencyControlEnabled; | ||
| private Option<HoodieInstant> currentTxnOwnerInstant = Option.empty(); | ||
| private Option<HoodieInstant> lastCompletedTxnOwnerInstant = Option.empty(); | ||
|
|
||
| public TransactionManager(HoodieWriteConfig config, FileSystem fs) { | ||
| this.lockManager = new LockManager(config, fs); | ||
| this.supportsOptimisticConcurrency = config.getWriteConcurrencyMode().supportsOptimisticConcurrencyControl(); | ||
| this.isOptimisticConcurrencyControlEnabled = config.getWriteConcurrencyMode().supportsOptimisticConcurrencyControl(); | ||
| } | ||
|
|
||
| public synchronized void beginTransaction() { | ||
| if (supportsOptimisticConcurrency) { | ||
| public void beginTransaction() { | ||
| if (isOptimisticConcurrencyControlEnabled) { | ||
| LOG.info("Transaction starting without a transaction owner"); | ||
manojpec marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| lockManager.lock(); | ||
| LOG.info("Transaction started"); | ||
| LOG.info("Transaction started without a transaction owner"); | ||
| } | ||
| } | ||
|
|
||
| public synchronized void beginTransaction(Option<HoodieInstant> currentTxnOwnerInstant, Option<HoodieInstant> lastCompletedTxnOwnerInstant) { | ||
| if (supportsOptimisticConcurrency) { | ||
| this.lastCompletedTxnOwnerInstant = lastCompletedTxnOwnerInstant; | ||
| lockManager.setLatestCompletedWriteInstant(lastCompletedTxnOwnerInstant); | ||
| LOG.info("Latest completed transaction instant " + lastCompletedTxnOwnerInstant); | ||
| this.currentTxnOwnerInstant = currentTxnOwnerInstant; | ||
| LOG.info("Transaction starting with transaction owner " + currentTxnOwnerInstant); | ||
| public void beginTransaction(Option<HoodieInstant> newTxnOwnerInstant, | ||
| Option<HoodieInstant> lastCompletedTxnOwnerInstant) { | ||
| if (isOptimisticConcurrencyControlEnabled) { | ||
| LOG.info("Transaction starting for " + newTxnOwnerInstant | ||
| + " with latest completed transaction instant " + lastCompletedTxnOwnerInstant); | ||
| lockManager.lock(); | ||
| LOG.info("Transaction started"); | ||
| reset(currentTxnOwnerInstant, newTxnOwnerInstant, lastCompletedTxnOwnerInstant); | ||
| LOG.info("Transaction started for " + newTxnOwnerInstant | ||
| + " with latest completed transaction instant " + lastCompletedTxnOwnerInstant); | ||
| } | ||
| } | ||
|
|
||
| public void endTransaction() { | ||
| if (isOptimisticConcurrencyControlEnabled) { | ||
manojpec marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| LOG.info("Transaction ending without a transaction owner"); | ||
| lockManager.unlock(); | ||
| LOG.info("Transaction ended without a transaction owner"); | ||
| } | ||
| } | ||
|
|
||
| public synchronized void endTransaction() { | ||
| if (supportsOptimisticConcurrency) { | ||
| public void endTransaction(Option<HoodieInstant> currentTxnOwnerInstant) { | ||
| if (isOptimisticConcurrencyControlEnabled) { | ||
| LOG.info("Transaction ending with transaction owner " + currentTxnOwnerInstant); | ||
| reset(currentTxnOwnerInstant, Option.empty(), Option.empty()); | ||
| lockManager.unlock(); | ||
manojpec marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| LOG.info("Transaction ended"); | ||
| this.lastCompletedTxnOwnerInstant = Option.empty(); | ||
| lockManager.resetLatestCompletedWriteInstant(); | ||
| LOG.info("Transaction ended with transaction owner " + currentTxnOwnerInstant); | ||
| } | ||
| } | ||
|
|
||
| private synchronized void reset(Option<HoodieInstant> callerInstant, | ||
| Option<HoodieInstant> newTxnOwnerInstant, | ||
| Option<HoodieInstant> lastCompletedTxnOwnerInstant) { | ||
| if (!this.currentTxnOwnerInstant.isPresent() || this.currentTxnOwnerInstant == callerInstant) { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @nsivabalan We need to fix this as well. Usually callers pass in the same instant object and so not an issue. but, for callers who are passing in different object with the same instant also we need to reset.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @nsivabalan Filed https://issues.apache.org/jira/browse/HUDI-3064 to track the CI failing test TestHoodieClientMultiWriter flakiness . The test is buggy and FileSystemBasedLockProviderTestClass doesn't do the right tryLock. Even with the above TransactionManager fix, the test can fail.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @nsivabalan Posted https://github.com/apache/hudi/pull/4373/files to address the object comparison issue |
||
| this.currentTxnOwnerInstant = newTxnOwnerInstant; | ||
| this.lastCompletedTxnOwnerInstant = lastCompletedTxnOwnerInstant; | ||
| } | ||
| } | ||
|
|
||
| public void close() { | ||
| if (supportsOptimisticConcurrency) { | ||
| if (isOptimisticConcurrencyControlEnabled) { | ||
| lockManager.close(); | ||
| LOG.info("Transaction manager closed"); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -217,7 +217,7 @@ private HoodieCleanMetadata runClean(HoodieTable<T, I, K, O> table, HoodieInstan | |
| throw new HoodieIOException("Failed to clean up after commit", e); | ||
| } finally { | ||
| if (!skipLocking) { | ||
| this.txnManager.endTransaction(); | ||
| this.txnManager.endTransaction(Option.empty()); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we try to set current trxn at L209. last trxn can be empty. |
||
| } | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -112,7 +112,7 @@ private void writeToMetadata(HoodieRestoreMetadata restoreMetadata) { | |
| this.txnManager.beginTransaction(Option.empty(), Option.empty()); | ||
| writeTableMetadata(restoreMetadata); | ||
| } finally { | ||
| this.txnManager.endTransaction(); | ||
| this.txnManager.endTransaction(Option.empty()); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we try to see if we can set atleast current owner at L 112. |
||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -266,7 +266,7 @@ protected void finishRollback(HoodieInstant inflightInstant, HoodieRollbackMetad | |
| throw new HoodieIOException("Error executing rollback at instant " + instantTime, e); | ||
| } finally { | ||
| if (!skipLocking) { | ||
| this.txnManager.endTransaction(); | ||
| this.txnManager.endTransaction(Option.empty()); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Guess we can fix L259 to send atleast the current trxn owner. may be last can be empty. |
||
| } | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
write actions seem to be calling this and the table services seem to be calling the one below. can we check to see if we need to streamline into one API?