From a8c6f41856b493cfa87ce14acb5feb8d35a94ac2 Mon Sep 17 00:00:00 2001 From: Janardhan Hungund Date: Wed, 15 May 2024 21:17:04 +0530 Subject: [PATCH 1/4] HBASE-28467: Add time-based priority caching checks for cacheOnRead code paths. Whenever the blocks of the file are read from the file system, they are cached into the block-cache if cache-on-read configuration is enabled. This configuration is enabled by default. However, with the time-based priority caching, we may not want to cache the blocks of the cold file. In this PR, we add checks to avoid the caching of cold data files and cache the blocks of hot data files. Change-Id: Ia07d791a841640e468c3ba33acebba81dbecbbc2 --- .../hbase/io/hfile/HFileReaderImpl.java | 11 ++++-- .../regionserver/TestDataTieringManager.java | 34 +++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java index e0585c6edaa2..03729282eaf9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java @@ -1347,9 +1347,16 @@ public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final bo BlockType.BlockCategory category = hfileBlock.getBlockType().getCategory(); final boolean cacheCompressed = cacheConf.shouldCacheCompressed(category); final boolean cacheOnRead = cacheConf.shouldCacheBlockOnRead(category); + Optional cacheFileBlock = Optional.of(true); + // Additionally perform the time-based priority checks to see + // whether, or not to cache the block. + if (cacheConf.getBlockCache().isPresent()) { + cacheFileBlock = cacheConf.getBlockCache().get().shouldCacheFile(getHFileInfo(), conf); + } + final boolean shouldCacheFileBlock = cacheFileBlock.get(); // Don't need the unpacked block back and we're storing the block in the cache compressed - if (cacheOnly && cacheCompressed && cacheOnRead) { + if (cacheOnly && cacheCompressed && cacheOnRead && shouldCacheFileBlock) { cacheConf.getBlockCache().ifPresent(cache -> { LOG.debug("Skipping decompression of block {} in prefetch", cacheKey); // Cache the block if necessary @@ -1366,7 +1373,7 @@ public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final bo HFileBlock unpacked = hfileBlock.unpack(hfileContext, fsBlockReader); // Cache the block if necessary cacheConf.getBlockCache().ifPresent(cache -> { - if (cacheable && cacheConf.shouldCacheBlockOnRead(category)) { + if (cacheable && cacheConf.shouldCacheBlockOnRead(category) && shouldCacheFileBlock) { // Using the wait on cache during compaction and prefetching. cache.cacheBlock(cacheKey, cacheCompressed ? hfileBlock : unpacked, cacheConf.isInMemory(), cacheOnly); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java index f999a73c4732..2b8c6e8cc60a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java @@ -21,6 +21,7 @@ import static org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.DEFAULT_ERROR_TOLERATION_DURATION; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -49,12 +50,14 @@ import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.client.TableDescriptorBuilder; import org.apache.hadoop.hbase.fs.HFileSystem; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.BlockCache; import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory; import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; import org.apache.hadoop.hbase.io.hfile.BlockType; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.CacheTestUtils; +import org.apache.hadoop.hbase.io.hfile.HFileBlock; import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache; import org.apache.hadoop.hbase.testclassification.RegionServerTests; @@ -471,6 +474,37 @@ public void testFeatureKeyDisabled() throws Exception { } } + @Test + public void testCacheOnReadColdFile() throws Exception { + // hStoreFiles[3] is a cold file. the blocks should not get loaded after a readBlock call. + HStoreFile hStoreFile = hStoreFiles.get(3); + BlockCacheKey cacheKey = new BlockCacheKey(hStoreFile.getPath(), 0, true, BlockType.DATA); + testCacheOnRead(hStoreFile, cacheKey, 23025, false); + } + + @Test + public void testCacheOnReadHotFile() throws Exception { + // hStoreFiles[0] is a hot file. the blocks should not get loaded after a readBlock call. + HStoreFile hStoreFile = hStoreFiles.get(0); + BlockCacheKey cacheKey = + new BlockCacheKey(hStoreFiles.get(0).getPath(), 0, true, BlockType.DATA); + testCacheOnRead(hStoreFile, cacheKey, hStoreFile.getFileInfo().getSize(), true); + } + + private void testCacheOnRead(HStoreFile hStoreFile, BlockCacheKey key, long onDiskBlockSize, + boolean expectedCached) throws Exception { + // Execute the read block API which will try to cache the block if the block is a hot block. + hStoreFile.getReader().getHFileReader().readBlock(key.getOffset(), onDiskBlockSize, true, false, + false, false, key.getBlockType(), DataBlockEncoding.NONE); + // Validate that the hot block gets cached and cold block is not cached. + HFileBlock block = (HFileBlock) blockCache.getBlock(key, false, false, false, BlockType.DATA); + if (expectedCached) { + assertNotNull(block); + } else { + assertNull(block); + } + } + private void validateBlocks(Set keys, int expectedTotalKeys, int expectedHotBlocks, int expectedColdBlocks) { int numHotBlocks = 0, numColdBlocks = 0; From 07c9e5cad1d576b0fd38f4283d8247643d515405 Mon Sep 17 00:00:00 2001 From: Janardhan Hungund Date: Thu, 16 May 2024 14:23:05 +0530 Subject: [PATCH 2/4] HBASE-28467: Addressed review comments. Change-Id: Ife4f3a50395877027df717191befc2962deb692d --- .../hadoop/hbase/io/hfile/CacheConfig.java | 12 +++++++++ .../hbase/io/hfile/HFileReaderImpl.java | 8 +----- .../regionserver/TestDataTieringManager.java | 27 ++++++++++++++++++- 3 files changed, 39 insertions(+), 8 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java index 7fb1f1ec85bd..e512e9ac6dd2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java @@ -275,6 +275,18 @@ public boolean shouldCacheBlockOnRead(BlockCategory category) { || (prefetchOnOpen && (category != BlockCategory.META && category != BlockCategory.UNKNOWN)); } + public boolean shouldCacheFileBlock(HFileInfo hFileInfo, Configuration conf) { + Optional cacheFileBlock = Optional.of(true); + // Additionally perform the time-based priority checks to see + // whether, or not to cache the block. + if (getBlockCache().isPresent()) { + + cacheFileBlock = getBlockCache().get().shouldCacheFile(hFileInfo, conf); + LOG.info("BlockCache Present, cacheFileBlock: {}", cacheFileBlock.get()); + } + return cacheFileBlock.get(); + } + /** Returns true if blocks in this file should be flagged as in-memory */ public boolean isInMemory() { return this.inMemory; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java index 03729282eaf9..05d1fe69b9d6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java @@ -1347,13 +1347,7 @@ public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final bo BlockType.BlockCategory category = hfileBlock.getBlockType().getCategory(); final boolean cacheCompressed = cacheConf.shouldCacheCompressed(category); final boolean cacheOnRead = cacheConf.shouldCacheBlockOnRead(category); - Optional cacheFileBlock = Optional.of(true); - // Additionally perform the time-based priority checks to see - // whether, or not to cache the block. - if (cacheConf.getBlockCache().isPresent()) { - cacheFileBlock = cacheConf.getBlockCache().get().shouldCacheFile(getHFileInfo(), conf); - } - final boolean shouldCacheFileBlock = cacheFileBlock.get(); + final boolean shouldCacheFileBlock = cacheConf.shouldCacheFileBlock(getHFileInfo(), conf); // Don't need the unpacked block back and we're storing the block in the cache compressed if (cacheOnly && cacheCompressed && cacheOnRead && shouldCacheFileBlock) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java index 2b8c6e8cc60a..daa1bad54261 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java @@ -474,6 +474,31 @@ public void testFeatureKeyDisabled() throws Exception { } } + @Test + public void testCacheConfigShouldCacheFile() throws Exception { + // Evict the files from cache. + for (HStoreFile file : hStoreFiles) { + file.closeStoreFile(true); + } + // Verify that the API shouldCacheFileBlock returns the result correctly. + // hStoreFiles[0], hStoreFiles[1], hStoreFiles[2] are hot files. + // hStoreFiles[3] is a cold file. + try { + assertTrue(cacheConf.shouldCacheFileBlock(hStoreFiles.get(0).getFileInfo().getHFileInfo(), + hStoreFiles.get(0).getFileInfo().getConf())); + assertTrue(cacheConf.shouldCacheFileBlock(hStoreFiles.get(1).getFileInfo().getHFileInfo(), + hStoreFiles.get(1).getFileInfo().getConf())); + assertTrue(cacheConf.shouldCacheFileBlock(hStoreFiles.get(2).getFileInfo().getHFileInfo(), + hStoreFiles.get(2).getFileInfo().getConf())); + assertFalse(cacheConf.shouldCacheFileBlock(hStoreFiles.get(3).getFileInfo().getHFileInfo(), + hStoreFiles.get(3).getFileInfo().getConf())); + } finally { + for (HStoreFile file : hStoreFiles) { + file.initReader(); + } + } + } + @Test public void testCacheOnReadColdFile() throws Exception { // hStoreFiles[3] is a cold file. the blocks should not get loaded after a readBlock call. @@ -488,7 +513,7 @@ public void testCacheOnReadHotFile() throws Exception { HStoreFile hStoreFile = hStoreFiles.get(0); BlockCacheKey cacheKey = new BlockCacheKey(hStoreFiles.get(0).getPath(), 0, true, BlockType.DATA); - testCacheOnRead(hStoreFile, cacheKey, hStoreFile.getFileInfo().getSize(), true); + testCacheOnRead(hStoreFile, cacheKey, 23025, true); } private void testCacheOnRead(HStoreFile hStoreFile, BlockCacheKey key, long onDiskBlockSize, From 168383fead3446d44317e0e3d807a30a45580dbb Mon Sep 17 00:00:00 2001 From: Janardhan Hungund Date: Fri, 17 May 2024 13:38:20 +0530 Subject: [PATCH 3/4] HBASE-28467: Addressed further review comments. Change-Id: Ia55e2e6ee415fa68d1c2714d127670f6df9c404d --- .../apache/hadoop/hbase/io/hfile/CacheConfig.java | 9 +++------ .../hadoop/hbase/io/hfile/HFileReaderImpl.java | 13 +++++++------ .../regionserver/TestDataTieringManager.java | 15 ++++++++++----- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java index e512e9ac6dd2..fe84692fac19 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java @@ -275,16 +275,13 @@ public boolean shouldCacheBlockOnRead(BlockCategory category) { || (prefetchOnOpen && (category != BlockCategory.META && category != BlockCategory.UNKNOWN)); } - public boolean shouldCacheFileBlock(HFileInfo hFileInfo, Configuration conf) { + public boolean shouldCacheBlockOnRead(BlockCategory category, HFileInfo hFileInfo, + Configuration conf) { Optional cacheFileBlock = Optional.of(true); - // Additionally perform the time-based priority checks to see - // whether, or not to cache the block. if (getBlockCache().isPresent()) { - cacheFileBlock = getBlockCache().get().shouldCacheFile(hFileInfo, conf); - LOG.info("BlockCache Present, cacheFileBlock: {}", cacheFileBlock.get()); } - return cacheFileBlock.get(); + return shouldCacheBlockOnRead(category) && cacheFileBlock.get(); } /** Returns true if blocks in this file should be flagged as in-memory */ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java index 05d1fe69b9d6..989af7eab883 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java @@ -1193,7 +1193,8 @@ public HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws BlockCacheKey cacheKey = new BlockCacheKey(name, metaBlockOffset, this.isPrimaryReplicaReader(), BlockType.META); - cacheBlock &= cacheConf.shouldCacheBlockOnRead(BlockType.META.getCategory()); + cacheBlock &= + cacheConf.shouldCacheBlockOnRead(BlockType.META.getCategory(), getHFileInfo(), conf); HFileBlock cachedBlock = getCachedBlock(cacheKey, cacheBlock, false, true, BlockType.META, null); if (cachedBlock != null) { @@ -1346,15 +1347,15 @@ public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final bo } BlockType.BlockCategory category = hfileBlock.getBlockType().getCategory(); final boolean cacheCompressed = cacheConf.shouldCacheCompressed(category); - final boolean cacheOnRead = cacheConf.shouldCacheBlockOnRead(category); - final boolean shouldCacheFileBlock = cacheConf.shouldCacheFileBlock(getHFileInfo(), conf); + final boolean cacheOnRead = + cacheConf.shouldCacheBlockOnRead(category, getHFileInfo(), conf); // Don't need the unpacked block back and we're storing the block in the cache compressed - if (cacheOnly && cacheCompressed && cacheOnRead && shouldCacheFileBlock) { + if (cacheOnly && cacheCompressed && cacheOnRead) { cacheConf.getBlockCache().ifPresent(cache -> { LOG.debug("Skipping decompression of block {} in prefetch", cacheKey); // Cache the block if necessary - if (cacheable && cacheConf.shouldCacheBlockOnRead(category)) { + if (cacheable && cacheOnRead) { cache.cacheBlock(cacheKey, hfileBlock, cacheConf.isInMemory(), cacheOnly); } }); @@ -1367,7 +1368,7 @@ public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final bo HFileBlock unpacked = hfileBlock.unpack(hfileContext, fsBlockReader); // Cache the block if necessary cacheConf.getBlockCache().ifPresent(cache -> { - if (cacheable && cacheConf.shouldCacheBlockOnRead(category) && shouldCacheFileBlock) { + if (cacheable && cacheOnRead) { // Using the wait on cache during compaction and prefetching. cache.cacheBlock(cacheKey, cacheCompressed ? hfileBlock : unpacked, cacheConf.isInMemory(), cacheOnly); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java index daa1bad54261..9f4c1be2e2d7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java @@ -55,6 +55,7 @@ import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory; import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; import org.apache.hadoop.hbase.io.hfile.BlockType; +import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.CacheTestUtils; import org.apache.hadoop.hbase.io.hfile.HFileBlock; @@ -484,13 +485,17 @@ public void testCacheConfigShouldCacheFile() throws Exception { // hStoreFiles[0], hStoreFiles[1], hStoreFiles[2] are hot files. // hStoreFiles[3] is a cold file. try { - assertTrue(cacheConf.shouldCacheFileBlock(hStoreFiles.get(0).getFileInfo().getHFileInfo(), + assertTrue(cacheConf.shouldCacheBlockOnRead(BlockCategory.DATA, + hStoreFiles.get(0).getFileInfo().getHFileInfo(), hStoreFiles.get(0).getFileInfo().getConf())); - assertTrue(cacheConf.shouldCacheFileBlock(hStoreFiles.get(1).getFileInfo().getHFileInfo(), + assertTrue(cacheConf.shouldCacheBlockOnRead(BlockCategory.DATA, + hStoreFiles.get(1).getFileInfo().getHFileInfo(), hStoreFiles.get(1).getFileInfo().getConf())); - assertTrue(cacheConf.shouldCacheFileBlock(hStoreFiles.get(2).getFileInfo().getHFileInfo(), + assertTrue(cacheConf.shouldCacheBlockOnRead(BlockCategory.DATA, + hStoreFiles.get(2).getFileInfo().getHFileInfo(), hStoreFiles.get(2).getFileInfo().getConf())); - assertFalse(cacheConf.shouldCacheFileBlock(hStoreFiles.get(3).getFileInfo().getHFileInfo(), + assertFalse(cacheConf.shouldCacheBlockOnRead(BlockCategory.DATA, + hStoreFiles.get(3).getFileInfo().getHFileInfo(), hStoreFiles.get(3).getFileInfo().getConf())); } finally { for (HStoreFile file : hStoreFiles) { @@ -509,7 +514,7 @@ public void testCacheOnReadColdFile() throws Exception { @Test public void testCacheOnReadHotFile() throws Exception { - // hStoreFiles[0] is a hot file. the blocks should not get loaded after a readBlock call. + // hStoreFiles[0] is a hot file. the blocks should get loaded after a readBlock call. HStoreFile hStoreFile = hStoreFiles.get(0); BlockCacheKey cacheKey = new BlockCacheKey(hStoreFiles.get(0).getPath(), 0, true, BlockType.DATA); From e25913790eb0b555dc9c1ee1cad9d8cccf4b8f6f Mon Sep 17 00:00:00 2001 From: Janardhan Hungund Date: Wed, 22 May 2024 12:53:33 +0530 Subject: [PATCH 4/4] HBASE-28467: Fixed unit test. Change-Id: I924b06e94c83502b39418097813da06d000dc336 --- .../java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java index fe84692fac19..40dc0aed494a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java @@ -279,7 +279,10 @@ public boolean shouldCacheBlockOnRead(BlockCategory category, HFileInfo hFileInf Configuration conf) { Optional cacheFileBlock = Optional.of(true); if (getBlockCache().isPresent()) { - cacheFileBlock = getBlockCache().get().shouldCacheFile(hFileInfo, conf); + Optional result = getBlockCache().get().shouldCacheFile(hFileInfo, conf); + if (result.isPresent()) { + cacheFileBlock = result; + } } return shouldCacheBlockOnRead(category) && cacheFileBlock.get(); }