Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
518cf04
Support cacheIterator method with startKey for full table cache
peterxcli May 12, 2025
3c987ed
Support CF key range compaction and SST properties retrieval for rocksDB
peterxcli May 12, 2025
565b258
Add range compaction service skelton
peterxcli May 12, 2025
ef1b22d
Refactor compaction logic to utilize StringUtils for key conversion a…
peterxcli May 18, 2025
dd3aa20
Enhance FullTableCache iterator to handle null startKey and add getKe…
peterxcli May 18, 2025
8ee058f
comment out FSO table compactor
peterxcli May 19, 2025
dbe59d3
Add CompoundKeyRangeStats class for managing key range statistics and…
peterxcli May 24, 2025
2902190
Refactor range compaction configuration by renaming maxEntriesSum to …
peterxcli May 25, 2025
dcb840f
Enhance StringUtils with new methods for key prefix upper bound, max,…
peterxcli May 25, 2025
63d65cb
fix pmd
peterxcli May 25, 2025
9ac937b
Rename "deletedDirTable" to "deletedDirectoryTable" in RangeCompactio…
peterxcli May 25, 2025
0de2e26
Refactor ManagedRange to implement AutoCloseable, ensuring proper res…
peterxcli May 25, 2025
c6dc493
Update Javadoc in DBStore to clarify parameter naming from 'columnFam…
peterxcli May 25, 2025
8ece808
Add range compaction service configuration to ozone-default.xml
peterxcli May 25, 2025
86a9a69
Minor adjustment and fixes
peterxcli May 27, 2025
2ef8b45
Refactor key range handling by removing ManagedRange and replacing it…
peterxcli May 27, 2025
27b3900
Update rocksJava JNI patch to fix GetPropertiesOfTableInRange
peterxcli May 29, 2025
191c19b
try to fix ci
peterxcli May 29, 2025
3898760
Add disk space cleanup script for ci
peterxcli May 29, 2025
835a39c
fix checkstyle and pmd
peterxcli May 31, 2025
d39fd56
Update pom.xml for rocksdb version and add GitHub repository configur…
peterxcli Jun 4, 2025
aa83ae1
Remove disk space cleanup script and related workflow steps from CI c…
peterxcli Jun 4, 2025
122e321
Enhance getCompoundKeyRangeStatsFromRange method in OBSTableCompactor…
peterxcli Jun 4, 2025
122c21d
Fix some logic in OBS table compactor
peterxcli Jun 6, 2025
b45fe1a
add log
peterxcli Jun 6, 2025
7d18be3
Update Grafana dashboard JSON files to replace Prometheus datasource …
peterxcli Jun 7, 2025
ca48dd7
Remove uid: "--Grafana--"
peterxcli Jun 7, 2025
91770bd
Disable the range compaction service by default and add a new benchma…
peterxcli Jun 9, 2025
e10d58a
Add max live keys limit to OmKeyBenchGenerator and optimize key retri…
peterxcli Jun 12, 2025
4a8b81c
Implement FSO compaction logic in FSOTableCompactor, introducing dire…
peterxcli Jul 4, 2025
71340d6
Merge remote-tracking branch 'upstream/master' into hdds12960-impleme…
peterxcli Aug 28, 2025
bc666bd
fix compile error
peterxcli Aug 28, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ env:
OZONE_IMAGE: ghcr.io/apache/ozone
OZONE_RUNNER_IMAGE: ghcr.io/apache/ozone-runner
OZONE_VOLUME_OWNER: 1000
GITHUB_ACTOR: ${{ github.actor }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

jobs:
check:
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ env:
MAVEN_ARGS: --batch-mode --settings ${{ github.workspace }}/dev-support/ci/maven-settings.xml
MAVEN_OPTS: -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.http.retryHandler.class=standard -Dmaven.wagon.http.retryHandler.count=3
OZONE_WITH_COVERAGE: ${{ github.event_name == 'push' }}
GITHUB_ACTOR: ${{ github.actor }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}


jobs:
build-info:
Expand Down
8 changes: 8 additions & 0 deletions dev-support/ci/maven-settings.xml
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,12 @@
<blocked>true</blocked>
</mirror>
</mirrors>

<servers>
<server>
<id>github</id>
<username>${env.GITHUB_ACTOR}</username>
<password>${env.GITHUB_TOKEN}</password>
</server>
</servers>
</settings>
Original file line number Diff line number Diff line change
Expand Up @@ -97,4 +97,16 @@ public static String bytes2String(byte[] bytes) {
public static byte[] string2Bytes(String str) {
return str.getBytes(UTF8);
}

public static String getKeyPrefixUpperBound(String key) {
return key.substring(0, key.length() - 1) + (char)(key.charAt(key.length() - 1) + 1);
Comment on lines +101 to +102

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

If the input key is an empty string, a StringIndexOutOfBoundsException will be thrown. Consider adding a check for an empty key at the beginning of the method.

}

public static String max(String str1, String str2) {
return str1.compareTo(str2) > 0 ? str1 : str2;
}
Comment on lines +105 to +107

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Consider adding null checks for str1 and str2 or annotating them with @Nonnull to clarify the contract of this method. If nulls are possible, define the desired behavior.


public static String min(String str1, String str2) {
return str1.compareTo(str2) < 0 ? str1 : str2;
}
Comment on lines +109 to +111

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Similar to the max method, consider adding null checks for str1 and str2 or annotating them with @Nonnull to clarify the contract. Define behavior for null inputs if they are permissible.

}
50 changes: 50 additions & 0 deletions hadoop-hdds/common/src/main/resources/ozone-default.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4819,4 +4819,54 @@
warm up edek cache if none of key successful on OM start up.
</description>
</property>

<!-- Range Compaction Service Configuration -->
<property>
<name>ozone.om.range.compaction.service.enabled</name>
<value>false</value>
<tag>OZONE, OM, PERFORMANCE</tag>
<description>Enable or disable the range compaction service that compacts specific ranges of data based on tombstone ratio. This service helps improve performance and reduce storage overhead by compacting ranges with high tombstone density.</description>
</property>

<property>
<name>ozone.om.range.compaction.service.interval</name>
<value>30s</value>
<tag>OZONE, OM, PERFORMANCE</tag>
<description>The interval at which the range compaction service checks for ranges that need compaction. The service will scan for ranges with high tombstone density at this interval.</description>
</property>

<property>
<name>ozone.om.range.compaction.service.max.compaction.entries</name>
<value>1000000</value>
<tag>OZONE, OM, PERFORMANCE</tag>
<description>The maximum number of entries that can be compacted in a single range compaction operation. This limit helps prevent long-running compaction operations that could impact system performance.</description>
</property>

<property>
<name>ozone.om.range.compaction.service.min.tombstones</name>
<value>10000</value>
<tag>OZONE, OM, PERFORMANCE</tag>
<description>The minimum number of tombstones required in a range before it is considered for compaction. This threshold helps ensure that only ranges with significant tombstone density are compacted.</description>
</property>

<property>
<name>ozone.om.range.compaction.service.ranges.per.run</name>
<value>20</value>
<tag>OZONE, OM, PERFORMANCE</tag>
<description>The number of ranges to process in each run of the range compaction service. This controls the granularity of compaction operations and helps balance system load.</description>
</property>

<property>
<name>ozone.om.range.compaction.service.timeout</name>
<value>10m</value>
<tag>OZONE, OM, PERFORMANCE</tag>
<description>The timeout for range compaction operations. If a compaction operation takes longer than this, it will be cancelled to prevent long-running operations from impacting system performance.</description>
</property>

<property>
<name>ozone.om.range.compaction.service.tombstone.ratio</name>
<value>0.3</value>
<tag>OZONE, OM, PERFORMANCE</tag>
<description>The ratio of tombstones to total entries that triggers compaction of a range. When the ratio exceeds this threshold, the range will be considered for compaction to improve storage efficiency.</description>
</property>
</configuration>
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.hadoop.hdds.utils.db;

import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hdds.annotation.InterfaceStability;
Expand All @@ -26,6 +27,7 @@
import org.apache.hadoop.hdds.utils.db.managed.ManagedCompactRangeOptions;
import org.apache.ozone.rocksdiff.RocksDBCheckpointDiffer;
import org.apache.ratis.util.UncheckedAutoCloseable;
import org.rocksdb.TableProperties;

/**
* The DBStore interface provides the ability to create Tables, which store
Expand Down Expand Up @@ -108,6 +110,33 @@ <KEY, VALUE> TypedTable<KEY, VALUE> getTable(
*/
void compactTable(String tableName, ManagedCompactRangeOptions options) throws RocksDatabaseException;

/**
* Compact a range of keys in the database.
*
* @param tableName - The table name to compact.
* @param startKey - The starting key of the range to compact.
* @param endKey - The ending key of the range to compact.
* @throws IOException on Failure
*/
void compactTable(String tableName, String startKey, String endKey) throws IOException;

void compactTable(String tableName, String startKey, String endKey,
ManagedCompactRangeOptions options) throws IOException;

/**
* Get the properties of a column family in a range.
* @param tableName - The table name to get the properties of.
* @param startKey - The starting key of the range.
* @param endKey - The ending key of the range.
* @return - The properties of the column family in the range.
* @throws IOException on Failure
*/
Map<String, TableProperties> getPropertiesOfTableInRange(String tableName, String startKey,
String endKey) throws IOException;

Map<String, TableProperties> getPropertiesOfTableInRange(String tableName, List<KeyRange> ranges)
throws IOException;

/**
* Returns an estimated count of keys in this DB.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hdds.utils.db;

import org.apache.hadoop.hdds.StringUtils;

/**
* Represents a range of keys for compaction.
*/
public class KeyRange {
private final String startKey;
private final String endKey;

public KeyRange(String startKey, String endKey) {
this.startKey = startKey;
this.endKey = endKey;
}

public KeyRange(byte[] startKey, byte[] endKey) {
this.startKey = StringUtils.bytes2String(startKey);
this.endKey = StringUtils.bytes2String(endKey);
}

public String getStartKey() {
return startKey;
}

public String getEndKey() {
return endKey;
}

@Override
public String toString() {
return "KeyRange{" +
"startKey='" + startKey + '\'' +
", endKey='" + endKey + '\'' +
'}';
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,22 +34,27 @@
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.hdds.StringUtils;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.utils.IOUtils;
import org.apache.hadoop.hdds.utils.RocksDBStoreMetrics;
import org.apache.hadoop.hdds.utils.db.RocksDatabase.ColumnFamily;
import org.apache.hadoop.hdds.utils.db.cache.TableCache;
import org.apache.hadoop.hdds.utils.db.managed.ManagedCompactRangeOptions;
import org.apache.hadoop.hdds.utils.db.managed.ManagedDBOptions;
import org.apache.hadoop.hdds.utils.db.managed.ManagedSlice;
import org.apache.hadoop.hdds.utils.db.managed.ManagedStatistics;
import org.apache.hadoop.hdds.utils.db.managed.ManagedTransactionLogIterator;
import org.apache.hadoop.hdds.utils.db.managed.ManagedWriteOptions;
import org.apache.ozone.rocksdiff.RocksDBCheckpointDiffer;
import org.apache.ozone.rocksdiff.RocksDBCheckpointDiffer.RocksDBCheckpointDifferHolder;
import org.rocksdb.Range;
import org.rocksdb.RocksDBException;
import org.rocksdb.TableProperties;
import org.rocksdb.TransactionLogIterator.BatchResult;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -220,6 +225,15 @@ public void compactTable(String tableName) throws RocksDatabaseException {
}
}

@Override
public void compactTable(String tableName, String startKey, String endKey) throws IOException {
LOG.info("Starting table compaction for table: {}, range: [{} - {}]", tableName, startKey, endKey);
try (ManagedCompactRangeOptions options = new ManagedCompactRangeOptions()) {
compactTable(tableName, startKey, endKey, options);
}
LOG.info("Completed table compaction for table: {}, range: [{} - {}]", tableName, startKey, endKey);
}

@Override
public void compactTable(String tableName, ManagedCompactRangeOptions options) throws RocksDatabaseException {
RocksDatabase.ColumnFamily columnFamily = db.getColumnFamily(tableName);
Expand All @@ -229,6 +243,67 @@ public void compactTable(String tableName, ManagedCompactRangeOptions options) t
db.compactRange(columnFamily, null, null, options);
}

@Override
public void compactTable(String tableName, String startKey, String endKey,
ManagedCompactRangeOptions options) throws IOException {
LOG.info("Starting table compaction with options for table: {}, range: [{} - {}]", tableName, startKey, endKey);
ColumnFamily columnFamily = db.getColumnFamily(tableName);
if (columnFamily == null) {
LOG.error("Table not found for compaction: {}", tableName);
throw new IOException("No such table in this DB. TableName : " + tableName);
}
db.compactRange(columnFamily, StringUtils.string2Bytes(startKey),
StringUtils.string2Bytes(endKey), options);
LOG.info("Completed table compaction with options for table: {}, range: [{} - {}]", tableName, startKey, endKey);
}

@Override
public Map<String, TableProperties> getPropertiesOfTableInRange(String tableName, String startKey,
String endKey) throws IOException {
LOG.info("Getting table properties for table: {}, range: [{} - {}]", tableName, startKey, endKey);
Map<String, TableProperties> result = getPropertiesOfTableInRange(tableName,
Collections.singletonList(new KeyRange(startKey, endKey)));
LOG.info("Retrieved {} table properties for table: {}, range: [{} - {}]",
result.size(), tableName, startKey, endKey);
return result;
}

@Override
public Map<String, TableProperties> getPropertiesOfTableInRange(String tableName,
List<KeyRange> ranges) throws IOException {
LOG.info("Getting table properties for table: {}, number of ranges: {}", tableName, ranges.size());
ColumnFamily columnFamily = db.getColumnFamily(tableName);
if (columnFamily == null) {
LOG.error("Table not found for getting properties: {}", tableName);
throw new IOException("No such table in this DB. TableName : " + tableName);
}

List<Range> rocksRanges = new ArrayList<>();
List<ManagedSlice> managedSlices = new ArrayList<>();
try {
for (KeyRange t : ranges) {
ManagedSlice start = new ManagedSlice(StringUtils.string2Bytes(t.getStartKey()));
ManagedSlice end = new ManagedSlice(StringUtils.string2Bytes(t.getEndKey()));
managedSlices.add(start);
managedSlices.add(end);
rocksRanges.add(new Range(start, end));
}
LOG.info("Converted {} ranges to RocksDB ranges, start to get properties", rocksRanges.size());
return db.getPropertiesOfColumnFamilyInRange(columnFamily, rocksRanges);
} catch (RocksDatabaseException e) {
throw new IOException("Failed to get properties of table in range", e);
} finally {
// Close all ManagedSlice objects
for (ManagedSlice slice : managedSlices) {
try {
slice.close();
} catch (Exception e) {
LOG.warn("Failed to close ManagedSlice", e);
}
}
Comment on lines +296 to +303

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

If an exception occurs during the creation of a ManagedSlice before it's added to the managedSlices list, that specific slice might not be closed. A safer pattern could be to wrap each ManagedSlice creation and addition to the list in its own try-catch-finally.

}
}

@Override
public void close() {
if (metrics != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@
import org.rocksdb.Holder;
import org.rocksdb.KeyMayExist;
import org.rocksdb.LiveFileMetaData;
import org.rocksdb.Range;
import org.rocksdb.RocksDBException;
import org.rocksdb.TableProperties;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -891,6 +893,16 @@ public void deleteFilesNotMatchingPrefix(Map<String, String> prefixPairs) throws
}
}

public Map<String, TableProperties> getPropertiesOfColumnFamilyInRange(ColumnFamily columnFamily,
List<Range> ranges) throws RocksDatabaseException {
try (UncheckedAutoCloseable ignored = acquire()) {
return db.get().getPropertiesOfTablesInRange(columnFamily.getHandle(), ranges);
} catch (RocksDBException e) {
closeOnError(e);
throw toRocksDatabaseException(this, "getPropertiesOfColumnFamilyInRange", e);
}
}

@Override
protected void finalize() throws Throwable {
if (!isClosed()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,14 @@ default void cleanupCache(List<Long> epochs) {
throw new NotImplementedException("cacheIterator is not implemented");
}

/**
* Return cache iterator maintained for this table.
*/
default Iterator<Map.Entry<CacheKey<KEY>, CacheValue<VALUE>>>
cacheIterator(KEY startKey) {
throw new NotImplementedException("cacheIterator with startKey is not implemented");
}

/**
* Create the metrics datasource that emits table cache metrics.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,11 @@ public Iterator<Map.Entry<CacheKey<KEY>, CacheValue<VALUE>>> cacheIterator() {
return cache.iterator();
}

@Override
public Iterator<Map.Entry<CacheKey<KEY>, CacheValue<VALUE>>> cacheIterator(KEY startKey) {
return cache.iterator(startKey);
}

@Override
public TableCacheMetrics createCacheMetrics() {
return TableCacheMetrics.create(cache, getName());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public class FullTableCache<KEY, VALUE> implements TableCache<KEY, VALUE> {
private static final Logger LOG =
LoggerFactory.getLogger(FullTableCache.class);

private final Map<CacheKey<KEY>, CacheValue<VALUE>> cache;
private final NavigableMap<CacheKey<KEY>, CacheValue<VALUE>> cache;
private final NavigableMap<Long, Set<CacheKey<KEY>>> epochEntries;
private final ScheduledExecutorService executorService;
private final Queue<Long> epochCleanupQueue = new ConcurrentLinkedQueue<>();
Expand Down Expand Up @@ -150,6 +150,15 @@ public Iterator<Map.Entry<CacheKey<KEY>, CacheValue<VALUE>>> iterator() {
return cache.entrySet().iterator();
}

@Override
public Iterator<Map.Entry<CacheKey<KEY>, CacheValue<VALUE>>> iterator(KEY startKey) {
statsRecorder.recordIteration();
if (startKey == null) {
return cache.entrySet().iterator();
}
return cache.tailMap(new CacheKey<>(startKey)).entrySet().iterator();
}

@VisibleForTesting
@Override
public void evictCache(List<Long> epochs) {
Expand Down
Loading
Loading