-
Notifications
You must be signed in to change notification settings - Fork 2.5k
[HUDI-9336] Extract common logic of getting reader for secondary index #13212
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -176,6 +176,26 @@ private static Map<String, String> getRecordKeyToSecondaryKey(HoodieTableMetaCli | |
| Option<StoragePath> dataFilePath, | ||
| HoodieIndexDefinition indexDefinition, | ||
| String instantTime) throws Exception { | ||
| Map<String, String> recordKeyToSecondaryKey = new HashMap<>(); | ||
| try (HoodieFileSliceReader fileSliceReader = | ||
| getFileSliceReader(metaClient, engineType, logFilePaths, tableSchema, partition, dataFilePath, instantTime)) { | ||
| // Collect the records from the iterator in a map by record key to secondary key | ||
| while (fileSliceReader.hasNext()) { | ||
| HoodieRecord record = (HoodieRecord) fileSliceReader.next(); | ||
| String secondaryKey = getSecondaryKey(record, tableSchema, indexDefinition); | ||
| if (secondaryKey != null) { | ||
| // no delete records here | ||
| recordKeyToSecondaryKey.put(record.getRecordKey(tableSchema, HoodieRecord.RECORD_KEY_METADATA_FIELD), secondaryKey); | ||
| } | ||
| } | ||
| } | ||
| return recordKeyToSecondaryKey; | ||
| } | ||
|
|
||
| private static HoodieFileSliceReader getFileSliceReader( | ||
| HoodieTableMetaClient metaClient, EngineType engineType, | ||
| List<String> logFilePaths, Schema tableSchema, String partition, | ||
| Option<StoragePath> dataFilePath, String instantTime) throws IOException { | ||
| final String basePath = metaClient.getBasePath().toString(); | ||
| final StorageConfiguration<?> storageConf = metaClient.getStorageConf(); | ||
|
|
||
|
|
@@ -207,19 +227,8 @@ private static Map<String, String> getRecordKeyToSecondaryKey(HoodieTableMetaCli | |
| if (dataFilePath.isPresent()) { | ||
| baseFileReader = Option.of(HoodieIOFactory.getIOFactory(metaClient.getStorage()).getReaderFactory(recordMerger.getRecordType()).getFileReader(getReaderConfigs(storageConf), dataFilePath.get())); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This reader is never getting closed and it will lead to leaks.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. This is fixed now. |
||
| } | ||
| HoodieFileSliceReader fileSliceReader = new HoodieFileSliceReader(baseFileReader, mergedLogRecordScanner, tableSchema, metaClient.getTableConfig().getPreCombineField(), recordMerger, | ||
| return new HoodieFileSliceReader(baseFileReader, mergedLogRecordScanner, tableSchema, metaClient.getTableConfig().getPreCombineField(), recordMerger, | ||
| metaClient.getTableConfig().getProps(), Option.empty(), Option.empty()); | ||
| // Collect the records from the iterator in a map by record key to secondary key | ||
| Map<String, String> recordKeyToSecondaryKey = new HashMap<>(); | ||
| while (fileSliceReader.hasNext()) { | ||
| HoodieRecord record = (HoodieRecord) fileSliceReader.next(); | ||
| String secondaryKey = getSecondaryKey(record, tableSchema, indexDefinition); | ||
| if (secondaryKey != null) { | ||
| // no delete records here | ||
| recordKeyToSecondaryKey.put(record.getRecordKey(tableSchema, HoodieRecord.RECORD_KEY_METADATA_FIELD), secondaryKey); | ||
| } | ||
| } | ||
| return recordKeyToSecondaryKey; | ||
| } | ||
|
|
||
| private static String getSecondaryKey(HoodieRecord record, Schema tableSchema, HoodieIndexDefinition indexDefinition) { | ||
|
|
@@ -277,47 +286,14 @@ private static ClosableIterator<HoodieRecord> createSecondaryIndexGenerator(Hood | |
| Option<StoragePath> dataFilePath, | ||
| HoodieIndexDefinition indexDefinition, | ||
| String instantTime) throws Exception { | ||
| final String basePath = metaClient.getBasePath().toString(); | ||
| final StorageConfiguration<?> storageConf = metaClient.getStorageConf(); | ||
|
|
||
| HoodieRecordMerger recordMerger = HoodieRecordUtils.createRecordMerger( | ||
| basePath, | ||
| engineType, | ||
| Collections.emptyList(), | ||
| metaClient.getTableConfig().getRecordMergeStrategyId()); | ||
|
|
||
| HoodieMergedLogRecordScanner mergedLogRecordScanner = HoodieMergedLogRecordScanner.newBuilder() | ||
| .withStorage(metaClient.getStorage()) | ||
| .withBasePath(metaClient.getBasePath()) | ||
| .withLogFilePaths(logFilePaths) | ||
| .withReaderSchema(tableSchema) | ||
| .withLatestInstantTime(instantTime) | ||
| .withReverseReader(false) | ||
| .withMaxMemorySizeInBytes(storageConf.getLong(MAX_MEMORY_FOR_COMPACTION.key(), DEFAULT_MAX_MEMORY_FOR_SPILLABLE_MAP_IN_BYTES)) | ||
| .withBufferSize(HoodieMetadataConfig.MAX_READER_BUFFER_SIZE_PROP.defaultValue()) | ||
| .withSpillableMapBasePath(FileIOUtils.getDefaultSpillableMapBasePath()) | ||
| .withPartition(partition) | ||
| .withOptimizedLogBlocksScan(storageConf.getBoolean("hoodie" + HoodieMetadataConfig.OPTIMIZED_LOG_BLOCKS_SCAN, false)) | ||
| .withDiskMapType(storageConf.getEnum(SPILLABLE_DISK_MAP_TYPE.key(), SPILLABLE_DISK_MAP_TYPE.defaultValue())) | ||
| .withBitCaskDiskMapCompressionEnabled(storageConf.getBoolean(DISK_MAP_BITCASK_COMPRESSION_ENABLED.key(), DISK_MAP_BITCASK_COMPRESSION_ENABLED.defaultValue())) | ||
| .withRecordMerger(recordMerger) | ||
| .withTableMetaClient(metaClient) | ||
| .build(); | ||
|
|
||
| Option<HoodieFileReader> baseFileReader = Option.empty(); | ||
| if (dataFilePath.isPresent()) { | ||
| baseFileReader = Option.of(HoodieIOFactory.getIOFactory(metaClient.getStorage()).getReaderFactory(recordMerger.getRecordType()).getFileReader(getReaderConfigs(storageConf), dataFilePath.get())); | ||
| } | ||
| HoodieFileSliceReader fileSliceReader = new HoodieFileSliceReader(baseFileReader, mergedLogRecordScanner, tableSchema, metaClient.getTableConfig().getPreCombineField(), recordMerger, | ||
| metaClient.getTableConfig().getProps(), | ||
| Option.empty(), Option.empty()); | ||
| ClosableIterator<HoodieRecord> fileSliceIterator = ClosableIterator.wrap(fileSliceReader); | ||
| return new ClosableIterator<HoodieRecord>() { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This closable iterator and the file slice reader are going to be closed properly after #13178 is landed. |
||
| private final HoodieFileSliceReader<HoodieRecord> fileSliceReader = getFileSliceReader( | ||
| metaClient, engineType, logFilePaths, tableSchema, partition, dataFilePath, instantTime); | ||
| private HoodieRecord nextValidRecord; | ||
|
|
||
| @Override | ||
| public void close() { | ||
| fileSliceIterator.close(); | ||
| fileSliceReader.close(); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -334,8 +310,8 @@ public boolean hasNext() { | |
| // NOTE: Delete record should not happen when initializing the secondary index i.e. when called from readSecondaryKeysFromFileSlices, | ||
| // because from that call, we get the merged records as of some committed instant. So, delete records must have been filtered out. | ||
| // Loop to find the next valid record or exhaust the iterator. | ||
| while (fileSliceIterator.hasNext()) { | ||
| HoodieRecord record = fileSliceIterator.next(); | ||
| while (fileSliceReader.hasNext()) { | ||
| HoodieRecord record = fileSliceReader.next(); | ||
| String secondaryKey = getSecondaryKey(record); | ||
| if (secondaryKey != null) { | ||
| nextValidRecord = createSecondaryIndexRecord( | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This class is going to be removed and all the usage will be replaced by
HoodieFileGroupReader. So fixing the close behavior in a simple way in this PR.