-
Notifications
You must be signed in to change notification settings - Fork 3.4k
HBASE-28505 Implement enforcement to require Date Tiered Compaction for Time Range Data Tiering #5809
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
HBASE-28505 Implement enforcement to require Date Tiered Compaction for Time Range Data Tiering #5809
Changes from 3 commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
a9d3170
HBASE-28465 Implementation of framework for time-based priority bucke…
vinayakphegde 0dedee4
Implemented enforcement for Date Tiered Compaction in Time Range Data…
vinayakphegde 4064ff0
removed spotless errors
vinayakphegde cd11d86
enhanced the code
vinayakphegde File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
27 changes: 27 additions & 0 deletions
27
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringException.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
| package org.apache.hadoop.hbase.regionserver; | ||
|
|
||
| import org.apache.yetus.audience.InterfaceAudience; | ||
|
|
||
| @InterfaceAudience.Private | ||
| public class DataTieringException extends Exception { | ||
| DataTieringException(String reason) { | ||
| super(reason); | ||
| } | ||
| } |
222 changes: 222 additions & 0 deletions
222
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,222 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
| package org.apache.hadoop.hbase.regionserver; | ||
|
|
||
| import java.util.HashSet; | ||
| import java.util.Map; | ||
| import java.util.OptionalLong; | ||
| import java.util.Set; | ||
| import org.apache.hadoop.conf.Configuration; | ||
| import org.apache.hadoop.fs.Path; | ||
| import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; | ||
| import org.apache.hadoop.hbase.util.Bytes; | ||
| import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; | ||
| import org.apache.yetus.audience.InterfaceAudience; | ||
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
|
|
||
| /** | ||
| * The DataTieringManager class categorizes data into hot data and cold data based on the specified | ||
| * {@link DataTieringType} when DataTiering is enabled. DataTiering is disabled by default with | ||
| * {@link DataTieringType} set to {@link DataTieringType#NONE}. The {@link DataTieringType} | ||
| * determines the logic for distinguishing data into hot or cold. By default, all data is considered | ||
| * as hot. | ||
| */ | ||
| @InterfaceAudience.Private | ||
| public class DataTieringManager { | ||
| private static final Logger LOG = LoggerFactory.getLogger(DataTieringManager.class); | ||
| public static final String DATATIERING_KEY = "hbase.hstore.datatiering.type"; | ||
| public static final String DATATIERING_HOT_DATA_AGE_KEY = | ||
| "hbase.hstore.datatiering.hot.age.millis"; | ||
| public static final DataTieringType DEFAULT_DATATIERING = DataTieringType.NONE; | ||
| public static final long DEFAULT_DATATIERING_HOT_DATA_AGE = 7 * 24 * 60 * 60 * 1000; // 7 Days | ||
| private static DataTieringManager instance; | ||
| private final Map<String, HRegion> onlineRegions; | ||
|
|
||
| private DataTieringManager(Map<String, HRegion> onlineRegions) { | ||
| this.onlineRegions = onlineRegions; | ||
| } | ||
|
|
||
| /** | ||
| * Initializes the DataTieringManager instance with the provided map of online regions. | ||
| * @param onlineRegions A map containing online regions. | ||
| */ | ||
| public static synchronized void instantiate(Map<String, HRegion> onlineRegions) { | ||
| if (instance == null) { | ||
| instance = new DataTieringManager(onlineRegions); | ||
| LOG.info("DataTieringManager instantiated successfully."); | ||
| } else { | ||
| LOG.warn("DataTieringManager is already instantiated."); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Retrieves the instance of DataTieringManager. | ||
| * @return The instance of DataTieringManager. | ||
| * @throws IllegalStateException if DataTieringManager has not been instantiated. | ||
| */ | ||
| public static synchronized DataTieringManager getInstance() { | ||
| if (instance == null) { | ||
| throw new IllegalStateException( | ||
| "DataTieringManager has not been instantiated. Call instantiate() first."); | ||
| } | ||
| return instance; | ||
| } | ||
|
|
||
| /** | ||
| * Determines whether data tiering is enabled for the given block cache key. | ||
| * @param key the block cache key | ||
| * @return {@code true} if data tiering is enabled for the HFile associated with the key, | ||
| * {@code false} otherwise | ||
| * @throws DataTieringException if there is an error retrieving the HFile path or configuration | ||
| */ | ||
| public boolean isDataTieringEnabled(BlockCacheKey key) throws DataTieringException { | ||
| Path hFilePath = key.getFilePath(); | ||
| if (hFilePath == null) { | ||
| throw new DataTieringException("BlockCacheKey Doesn't Contain HFile Path"); | ||
| } | ||
| return isDataTieringEnabled(hFilePath); | ||
| } | ||
|
|
||
| /** | ||
| * Determines whether data tiering is enabled for the given HFile path. | ||
| * @param hFilePath the path to the HFile | ||
| * @return {@code true} if data tiering is enabled, {@code false} otherwise | ||
| * @throws DataTieringException if there is an error retrieving the configuration | ||
| */ | ||
| public boolean isDataTieringEnabled(Path hFilePath) throws DataTieringException { | ||
| Configuration configuration = getConfiguration(hFilePath); | ||
| DataTieringType dataTieringType = getDataTieringType(configuration); | ||
| return !dataTieringType.equals(DataTieringType.NONE); | ||
| } | ||
|
|
||
| /** | ||
| * Determines whether the data associated with the given block cache key is considered hot. | ||
| * @param key the block cache key | ||
| * @return {@code true} if the data is hot, {@code false} otherwise | ||
| * @throws DataTieringException if there is an error retrieving data tiering information or the | ||
| * HFile maximum timestamp | ||
| */ | ||
| public boolean isHotData(BlockCacheKey key) throws DataTieringException { | ||
| Path hFilePath = key.getFilePath(); | ||
| if (hFilePath == null) { | ||
| throw new DataTieringException("BlockCacheKey Doesn't Contain HFile Path"); | ||
| } | ||
| return isHotData(hFilePath); | ||
| } | ||
|
|
||
| /** | ||
| * Determines whether the data in the HFile at the given path is considered hot based on the | ||
| * configured data tiering type and hot data age. | ||
| * @param hFilePath the path to the HFile | ||
| * @return {@code true} if the data is hot, {@code false} otherwise | ||
| * @throws DataTieringException if there is an error retrieving data tiering information or the | ||
| * HFile maximum timestamp | ||
| */ | ||
| public boolean isHotData(Path hFilePath) throws DataTieringException { | ||
| Configuration configuration = getConfiguration(hFilePath); | ||
| DataTieringType dataTieringType = getDataTieringType(configuration); | ||
|
|
||
| if (dataTieringType.equals(DataTieringType.TIME_RANGE)) { | ||
| long hotDataAge = getDataTieringHotDataAge(configuration); | ||
|
|
||
| HStoreFile hStoreFile = getHStoreFile(hFilePath); | ||
| if (hStoreFile == null) { | ||
| LOG.error("HStoreFile corresponding to " + hFilePath + " doesn't exist"); | ||
| return false; | ||
| } | ||
| OptionalLong maxTimestamp = hStoreFile.getMaximumTimestamp(); | ||
| if (!maxTimestamp.isPresent()) { | ||
| throw new DataTieringException("Maximum timestamp not present for " + hFilePath); | ||
| } | ||
|
|
||
| long currentTimestamp = EnvironmentEdgeManager.getDelegate().currentTime(); | ||
| long diff = currentTimestamp - maxTimestamp.getAsLong(); | ||
| return diff <= hotDataAge; | ||
| } | ||
| // DataTieringType.NONE or other types are considered hot by default | ||
| return true; | ||
| } | ||
|
|
||
| /** | ||
| * Returns a set of cold data filenames from the given set of cached blocks. Cold data is | ||
| * determined by the configured data tiering type and hot data age. | ||
| * @param allCachedBlocks a set of all cached block cache keys | ||
| * @return a set of cold data filenames | ||
| * @throws DataTieringException if there is an error determining whether a block is hot | ||
| */ | ||
| public Set<String> getColdDataFiles(Set<BlockCacheKey> allCachedBlocks) | ||
| throws DataTieringException { | ||
| Set<String> coldHFiles = new HashSet<>(); | ||
| for (BlockCacheKey key : allCachedBlocks) { | ||
| if (coldHFiles.contains(key.getHfileName())) { | ||
| continue; | ||
| } | ||
| if (!isHotData(key)) { | ||
| coldHFiles.add(key.getHfileName()); | ||
| } | ||
| } | ||
| return coldHFiles; | ||
| } | ||
|
|
||
| private HRegion getHRegion(Path hFilePath) throws DataTieringException { | ||
| if (hFilePath.getParent() == null || hFilePath.getParent().getParent() == null) { | ||
| throw new DataTieringException("Incorrect HFile Path: " + hFilePath); | ||
| } | ||
| String regionId = hFilePath.getParent().getParent().getName(); | ||
| HRegion hRegion = this.onlineRegions.get(regionId); | ||
| if (hRegion == null) { | ||
| throw new DataTieringException("HRegion corresponding to " + hFilePath + " doesn't exist"); | ||
| } | ||
| return hRegion; | ||
| } | ||
|
|
||
| private HStore getHStore(Path hFilePath) throws DataTieringException { | ||
| HRegion hRegion = getHRegion(hFilePath); | ||
| String columnFamily = hFilePath.getParent().getName(); | ||
| HStore hStore = hRegion.getStore(Bytes.toBytes(columnFamily)); | ||
| if (hStore == null) { | ||
| throw new DataTieringException("HStore corresponding to " + hFilePath + " doesn't exist"); | ||
| } | ||
| return hStore; | ||
| } | ||
|
|
||
| private HStoreFile getHStoreFile(Path hFilePath) throws DataTieringException { | ||
| HStore hStore = getHStore(hFilePath); | ||
| for (HStoreFile file : hStore.getStorefiles()) { | ||
| if (file.getPath().equals(hFilePath)) { | ||
| return file; | ||
| } | ||
| } | ||
| return null; | ||
| } | ||
|
|
||
| private Configuration getConfiguration(Path hFilePath) throws DataTieringException { | ||
| HStore hStore = getHStore(hFilePath); | ||
| return hStore.getReadOnlyConfiguration(); | ||
| } | ||
|
|
||
| private DataTieringType getDataTieringType(Configuration conf) { | ||
| return DataTieringType.valueOf(conf.get(DATATIERING_KEY, DEFAULT_DATATIERING.name())); | ||
| } | ||
|
|
||
| private long getDataTieringHotDataAge(Configuration conf) { | ||
| return Long.parseLong( | ||
| conf.get(DATATIERING_HOT_DATA_AGE_KEY, String.valueOf(DEFAULT_DATATIERING_HOT_DATA_AGE))); | ||
| } | ||
| } |
26 changes: 26 additions & 0 deletions
26
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringType.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
| package org.apache.hadoop.hbase.regionserver; | ||
|
|
||
| import org.apache.yetus.audience.InterfaceAudience; | ||
|
|
||
| @InterfaceAudience.Public | ||
| public enum DataTieringType { | ||
| NONE, | ||
| TIME_RANGE | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,10 +28,13 @@ | |
| import org.apache.hadoop.hbase.client.TableDescriptor; | ||
| import org.apache.hadoop.hbase.client.TableDescriptorBuilder; | ||
| import org.apache.hadoop.hbase.fs.ErasureCodingUtils; | ||
| import org.apache.hadoop.hbase.regionserver.DataTieringManager; | ||
| import org.apache.hadoop.hbase.regionserver.DataTieringType; | ||
| import org.apache.hadoop.hbase.regionserver.DefaultStoreEngine; | ||
| import org.apache.hadoop.hbase.regionserver.HStore; | ||
| import org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost; | ||
| import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy; | ||
| import org.apache.hadoop.hbase.regionserver.StoreEngine; | ||
| import org.apache.hadoop.hbase.regionserver.compactions.ExploringCompactionPolicy; | ||
| import org.apache.hadoop.hbase.regionserver.compactions.FIFOCompactionPolicy; | ||
| import org.apache.yetus.audience.InterfaceAudience; | ||
|
|
@@ -191,6 +194,8 @@ public static void sanityCheck(final Configuration c, final TableDescriptor td) | |
|
|
||
| // check in-memory compaction | ||
| warnOrThrowExceptionForFailure(logWarn, hcd::getInMemoryCompaction); | ||
|
|
||
| checkDateTieredCompactionForTimeRangeDataTiering(conf, td); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -210,6 +215,37 @@ private static void checkReplicationScope(final Configuration conf, final TableD | |
| }); | ||
| } | ||
|
|
||
| private static void checkDateTieredCompactionForTimeRangeDataTiering(final Configuration conf, | ||
| final TableDescriptor td) throws IOException { | ||
| // Table level configurations | ||
| checkDateTieredCompactionForTimeRangeDataTiering(conf); | ||
| for (ColumnFamilyDescriptor cfd : td.getColumnFamilies()) { | ||
| // Column family level configurations | ||
| Configuration cfdConf = | ||
| new CompoundConfiguration().add(conf).addStringMap(cfd.getConfiguration()); | ||
| checkDateTieredCompactionForTimeRangeDataTiering(cfdConf); | ||
| } | ||
| } | ||
|
|
||
| private static void checkDateTieredCompactionForTimeRangeDataTiering(final Configuration conf) | ||
| throws IOException { | ||
| final String DATE_TIERED_STORE_ENGINE = | ||
| "org.apache.hadoop.hbase.regionserver.DateTieredStoreEngine"; | ||
| final String errorMessage = | ||
| "Time Range Data Tiering should be enabled with Date Tiered Compaction."; | ||
|
|
||
| warnOrThrowExceptionForFailure(false, () -> { | ||
|
|
||
| // Determine whether Date Tiered Compaction will be enabled when Time Range Data Tiering is | ||
| // enabled after the configuration change. | ||
| if (DataTieringType.TIME_RANGE.name().equals(conf.get(DataTieringManager.DATATIERING_KEY))) { | ||
| if (!DATE_TIERED_STORE_ENGINE.equals(conf.get(StoreEngine.STORE_ENGINE_CLASS_KEY))) { | ||
| throw new IOException(errorMessage); | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: We should throw IllegalArgumentException, since this is rather a config error. |
||
| } | ||
| }); | ||
| } | ||
|
|
||
| private static void checkCompactionPolicy(final Configuration conf, final TableDescriptor td) | ||
| throws IOException { | ||
| warnOrThrowExceptionForFailure(false, () -> { | ||
|
|
||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: make this a public constant somewhere in the date tiered compaction code, then import static here.