-
Couldn't load subscription status.
- Fork 3.4k
Improve Delta lake caching of metadata #20437
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -62,7 +62,8 @@ public class TableSnapshot | |
| private final boolean checkpointRowStatisticsWritingEnabled; | ||
| private final int domainCompactionThreshold; | ||
|
|
||
| private Optional<MetadataEntry> cachedMetadata = Optional.empty(); | ||
| private Optional<MetadataEntry> cachedMetadata; | ||
| private Optional<ProtocolEntry> cachedProtocol; | ||
|
|
||
| private TableSnapshot( | ||
| SchemaTableName table, | ||
|
|
@@ -71,7 +72,9 @@ private TableSnapshot( | |
| String tableLocation, | ||
| ParquetReaderOptions parquetReaderOptions, | ||
| boolean checkpointRowStatisticsWritingEnabled, | ||
| int domainCompactionThreshold) | ||
| int domainCompactionThreshold, | ||
| Optional<MetadataEntry> cachedMetadata, | ||
| Optional<ProtocolEntry> cachedProtocol) | ||
|
Comment on lines
+76
to
+77
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From the code changes i see that the metadata & protocol are always obtained from the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We try to get them from the log tail every time, but most of the time they'll be empty, so we need the cached version somehow. |
||
| { | ||
| this.table = requireNonNull(table, "table is null"); | ||
| this.lastCheckpoint = requireNonNull(lastCheckpoint, "lastCheckpoint is null"); | ||
|
|
@@ -80,6 +83,8 @@ private TableSnapshot( | |
| this.parquetReaderOptions = requireNonNull(parquetReaderOptions, "parquetReaderOptions is null"); | ||
| this.checkpointRowStatisticsWritingEnabled = checkpointRowStatisticsWritingEnabled; | ||
| this.domainCompactionThreshold = domainCompactionThreshold; | ||
| this.cachedMetadata = cachedMetadata; | ||
| this.cachedProtocol = cachedProtocol; | ||
| } | ||
|
|
||
| public static TableSnapshot load( | ||
|
|
@@ -102,7 +107,9 @@ public static TableSnapshot load( | |
| tableLocation, | ||
| parquetReaderOptions, | ||
| checkpointRowStatisticsWritingEnabled, | ||
| domainCompactionThreshold); | ||
| domainCompactionThreshold, | ||
| transactionLogTail.getMetadataEntry(), | ||
| transactionLogTail.getProtocolEntry()); | ||
| } | ||
|
|
||
| public Optional<TableSnapshot> getUpdatedSnapshot(TrinoFileSystem fileSystem, Optional<Long> toVersion) | ||
|
|
@@ -136,7 +143,9 @@ public Optional<TableSnapshot> getUpdatedSnapshot(TrinoFileSystem fileSystem, Op | |
| tableLocation, | ||
| parquetReaderOptions, | ||
| checkpointRowStatisticsWritingEnabled, | ||
| domainCompactionThreshold)); | ||
| domainCompactionThreshold, | ||
| transactionLogTail.getMetadataEntry().or(() -> cachedMetadata), | ||
| transactionLogTail.getProtocolEntry().or(() -> cachedProtocol))); | ||
|
Comment on lines
+147
to
+148
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i don't quite get what is happening here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The log tail only contains metadata or protocol entries if needed; most of the time this will be an empty optional. The current Trino implementation is to read the transaction log back until we get the latest version; instead this PR brings in caching so that we do not have to re-read the transaction log, yet still get the updated versions when a new one appears in the log tail 👍 cc @jkylling to double-check if I misrepresented something 😄 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To arrive at a snapshot of a Delta table we do one of:
As we don't know what the snapshot will be used for, we don't eagerly load the data which is part of a snapshot, like the metadata entry, protocol entry, or add actions. However, almost every query will need the protocol entry and metadata entry. Almost all the time these entries are in the checkpoint, so we should remember these entries to avoid reading the checkpoint all the time. To get the metadata entry for a snapshot we can do one of:
Currently Trino does 1., while this PR does 2. The highlighted snippet does step 2: |
||
| } | ||
|
|
||
| public long getVersion() | ||
|
|
@@ -154,6 +163,11 @@ public Optional<MetadataEntry> getCachedMetadata() | |
| return cachedMetadata; | ||
| } | ||
|
|
||
| public Optional<ProtocolEntry> getCachedProtocol() | ||
| { | ||
| return cachedProtocol; | ||
| } | ||
|
|
||
| public String getTableLocation() | ||
| { | ||
| return tableLocation; | ||
|
|
@@ -164,6 +178,11 @@ public void setCachedMetadata(Optional<MetadataEntry> cachedMetadata) | |
| this.cachedMetadata = cachedMetadata; | ||
| } | ||
|
|
||
| public void setCachedProtocol(Optional<ProtocolEntry> cachedProtocol) | ||
| { | ||
| this.cachedProtocol = cachedProtocol; | ||
| } | ||
|
|
||
| public List<DeltaLakeTransactionLogEntry> getJsonTransactionLogEntries() | ||
| { | ||
| return logTail.getFileEntries(); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,7 +18,9 @@ | |
| import io.trino.filesystem.TrinoFileSystem; | ||
| import io.trino.filesystem.TrinoInputFile; | ||
| import io.trino.plugin.deltalake.transactionlog.DeltaLakeTransactionLogEntry; | ||
| import io.trino.plugin.deltalake.transactionlog.MetadataEntry; | ||
| import io.trino.plugin.deltalake.transactionlog.MissingTransactionLogException; | ||
| import io.trino.plugin.deltalake.transactionlog.ProtocolEntry; | ||
| import io.trino.plugin.deltalake.transactionlog.Transaction; | ||
|
|
||
| import java.io.BufferedReader; | ||
|
|
@@ -27,6 +29,7 @@ | |
| import java.io.InputStreamReader; | ||
| import java.util.Collection; | ||
| import java.util.List; | ||
| import java.util.Objects; | ||
| import java.util.Optional; | ||
|
|
||
| import static com.google.common.base.Preconditions.checkArgument; | ||
|
|
@@ -44,10 +47,15 @@ public class TransactionLogTail | |
| private final List<Transaction> entries; | ||
| private final long version; | ||
|
|
||
| private TransactionLogTail(List<Transaction> entries, long version) | ||
| private final Optional<MetadataEntry> metadataEntry; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why should this class know about There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It does not need to. The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Pluies should we change this to compute metadataEntry and protocolEntry on the fly? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good! On it 👍 |
||
| private final Optional<ProtocolEntry> protocolEntry; | ||
|
|
||
| private TransactionLogTail(List<Transaction> entries, long version, Optional<MetadataEntry> metadataEntry, Optional<ProtocolEntry> protocolEntry) | ||
| { | ||
| this.entries = ImmutableList.copyOf(requireNonNull(entries, "entries is null")); | ||
| this.version = version; | ||
| this.metadataEntry = metadataEntry; | ||
| this.protocolEntry = protocolEntry; | ||
| } | ||
|
|
||
| public static TransactionLogTail loadNewTail( | ||
|
|
@@ -75,12 +83,19 @@ public static TransactionLogTail loadNewTail( | |
|
|
||
| String transactionLogDir = getTransactionLogDir(tableLocation); | ||
| Optional<List<DeltaLakeTransactionLogEntry>> results; | ||
| MetadataEntry metadataEntry = null; | ||
| ProtocolEntry protocolEntry = null; | ||
|
|
||
| boolean endOfTail = false; | ||
| while (!endOfTail) { | ||
| results = getEntriesFromJson(entryNumber, transactionLogDir, fileSystem); | ||
| if (results.isPresent()) { | ||
| entriesBuilder.add(new Transaction(entryNumber, results.get())); | ||
| // There is at most one metadata or protocol entry per file https://github.com/delta-io/delta/blob/d74cc6897730f4effb5d7272c21bd2554bdfacdb/PROTOCOL.md#delta-log-entries-1 | ||
| metadataEntry = results.get().stream().map(DeltaLakeTransactionLogEntry::getMetaData) | ||
| .filter(Objects::nonNull).findAny().orElse(metadataEntry); | ||
| protocolEntry = results.get().stream().map(DeltaLakeTransactionLogEntry::getProtocol) | ||
| .filter(Objects::nonNull).findAny().orElse(protocolEntry); | ||
| version = entryNumber; | ||
| entryNumber++; | ||
| } | ||
|
|
@@ -96,7 +111,7 @@ public static TransactionLogTail loadNewTail( | |
| } | ||
| } | ||
|
|
||
| return new TransactionLogTail(entriesBuilder.build(), version); | ||
| return new TransactionLogTail(entriesBuilder.build(), version, Optional.ofNullable(metadataEntry), Optional.ofNullable(protocolEntry)); | ||
| } | ||
|
|
||
| public Optional<TransactionLogTail> getUpdatedTail(TrinoFileSystem fileSystem, String tableLocation, Optional<Long> endVersion) | ||
|
|
@@ -112,7 +127,9 @@ public Optional<TransactionLogTail> getUpdatedTail(TrinoFileSystem fileSystem, S | |
| .addAll(entries) | ||
| .addAll(newTail.entries) | ||
| .build(), | ||
| newTail.version)); | ||
| newTail.version, | ||
| newTail.getMetadataEntry().or(() -> metadataEntry), | ||
| newTail.getProtocolEntry().or(() -> protocolEntry))); | ||
| } | ||
|
|
||
| public static Optional<List<DeltaLakeTransactionLogEntry>> getEntriesFromJson(long entryNumber, String transactionLogDir, TrinoFileSystem fileSystem) | ||
|
|
@@ -152,6 +169,16 @@ public List<Transaction> getTransactions() | |
| return entries; | ||
| } | ||
|
|
||
| public Optional<MetadataEntry> getMetadataEntry() | ||
| { | ||
| return metadataEntry; | ||
| } | ||
|
|
||
| public Optional<ProtocolEntry> getProtocolEntry() | ||
| { | ||
| return protocolEntry; | ||
| } | ||
|
|
||
| public long getVersion() | ||
| { | ||
| return version; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Where are these checks being done now?