Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
b858f85
HDDS-11205. Implement a search feature for users to locate keys pendi…
ArafatKhan2198 Jul 19, 2024
f9a96c5
Fixed a bug and improved the tests
ArafatKhan2198 Sep 30, 2024
26223d0
Refactored some code
ArafatKhan2198 Oct 1, 2024
abb9d65
Improved the java doc
ArafatKhan2198 Oct 1, 2024
abca3c9
Fixed checkstyle issues
ArafatKhan2198 Oct 4, 2024
2af1de1
Fixed a dead store variable
ArafatKhan2198 Oct 6, 2024
ab9765a
Refactored key search logic to OMDBInsightEndpoint and enhanced with…
ArafatKhan2198 Oct 10, 2024
53d8126
Fixed checkstyle issues
ArafatKhan2198 Oct 10, 2024
28c4536
Made sure all the Insight endpoints utilise one method for extracting…
ArafatKhan2198 Oct 10, 2024
1df317d
Made final review changes
ArafatKhan2198 Oct 10, 2024
5d9d3c2
Fixed failing test
ArafatKhan2198 Oct 10, 2024
9ec2c32
Fixed java doc error
ArafatKhan2198 Oct 11, 2024
384ce67
Merge branch 'master' into HDDS-11205
ArafatKhan2198 Oct 11, 2024
28b0b00
Fixed possible java doc comment
ArafatKhan2198 Oct 11, 2024
16b2c36
Fixed final java doc problem
ArafatKhan2198 Oct 14, 2024
da82a51
Fixed checkstyle issue
ArafatKhan2198 Oct 14, 2024
86c9f46
Final review refactoring
ArafatKhan2198 Oct 16, 2024
2338c5e
Fixed checkstyle issues
ArafatKhan2198 Oct 16, 2024
f688dac
Removed unnecessary blank check
ArafatKhan2198 Oct 16, 2024
96a7f79
Reverted back the blank check
ArafatKhan2198 Oct 16, 2024
005850a
Done with the null check
ArafatKhan2198 Oct 18, 2024
510b307
Fixed the checkstyle and changed the status code
ArafatKhan2198 Oct 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,16 @@ private ReconConstants() {
public static final int DISK_USAGE_TOP_RECORDS_LIMIT = 30;
public static final String DEFAULT_OPEN_KEY_INCLUDE_NON_FSO = "false";
public static final String DEFAULT_OPEN_KEY_INCLUDE_FSO = "false";
public static final String DEFAULT_START_PREFIX = "/";
public static final String DEFAULT_FETCH_COUNT = "1000";
public static final String DEFAULT_KEY_SIZE = "0";
public static final String DEFAULT_BATCH_NUMBER = "1";
public static final String RECON_QUERY_BATCH_PARAM = "batchNum";
public static final String RECON_QUERY_PREVKEY = "prevKey";
public static final String RECON_OPEN_KEY_INCLUDE_NON_FSO = "includeNonFso";
public static final String RECON_OPEN_KEY_INCLUDE_FSO = "includeFso";
public static final String RECON_OPEN_KEY_DEFAULT_SEARCH_LIMIT = "1000";
public static final String RECON_OPEN_KEY_SEARCH_DEFAULT_PREV_KEY = "";
public static final String RECON_OM_INSIGHTS_DEFAULT_START_PREFIX = "/";
public static final String RECON_OM_INSIGHTS_DEFAULT_SEARCH_LIMIT = "1000";
public static final String RECON_OM_INSIGHTS_DEFAULT_SEARCH_PREV_KEY = "";
public static final String RECON_QUERY_FILTER = "missingIn";
public static final String PREV_CONTAINER_ID_DEFAULT_VALUE = "0";
public static final String PREV_DELETED_BLOCKS_TRANSACTION_ID_DEFAULT_VALUE =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.Instant;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.List;
import java.util.TimeZone;
import java.util.Date;
Expand All @@ -54,6 +56,8 @@
import org.apache.hadoop.hdds.scm.ha.SCMNodeDetails;
import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher;
import org.apache.hadoop.hdds.utils.HddsServerUtil;
import org.apache.hadoop.hdds.utils.db.Table;
import org.apache.hadoop.hdds.utils.db.TableIterator;
import org.apache.hadoop.hdfs.web.URLConnectionFactory;
import org.apache.hadoop.io.IOUtils;

Expand Down Expand Up @@ -596,6 +600,47 @@ public static long convertToEpochMillis(String dateString, String dateFormat, Ti
}
}

/**
* Common method to retrieve keys from a table based on a search prefix and a limit.
*
* @param table The table to retrieve keys from.
* @param startPrefix The search prefix to match keys against.
* @param limit The maximum number of keys to retrieve.
* @param prevKey The key to start after for the next set of records.
* @return A map of keys and their corresponding OmKeyInfo or RepeatedOmKeyInfo objects.
* @throws IOException If there are problems accessing the table.
*/
public static <T> Map<String, T> retrieveKeysFromTable(
Table<String, T> table, String startPrefix, int limit, String prevKey)
throws IOException {
Map<String, T> matchedKeys = new LinkedHashMap<>();
try (TableIterator<String, ? extends Table.KeyValue<String, T>> keyIter = table.iterator()) {
// If a previous key is provided, seek to the previous key and skip it.
if (!prevKey.isEmpty()) {
keyIter.seek(prevKey);
if (keyIter.hasNext()) {
// Skip the previous key
keyIter.next();
}
} else {
// If no previous key is provided, start from the search prefix.
keyIter.seek(startPrefix);
}
while (keyIter.hasNext() && matchedKeys.size() < limit) {
Table.KeyValue<String, T> entry = keyIter.next();
String dbKey = entry.getKey();
if (!dbKey.startsWith(startPrefix)) {
break; // Exit the loop if the key no longer matches the prefix
}
matchedKeys.put(dbKey, entry.getValue());
}
} catch (IOException exception) {
log.error("Error retrieving keys from table for path: {}", startPrefix, exception);
throw exception;
}
return matchedKeys;
}

/**
* Finds all subdirectories under a parent directory in an FSO bucket. It builds
* a list of paths for these subdirectories. These sub-directories are then used
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,11 @@

import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager;
import org.apache.hadoop.hdds.utils.db.Table;
import org.apache.hadoop.hdds.utils.db.TableIterator;
import org.apache.hadoop.ozone.om.helpers.OmBucketInfo;
import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo;
import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
import org.apache.hadoop.ozone.om.helpers.BucketLayout;
import org.apache.hadoop.ozone.recon.ReconUtils;
import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo;
import org.apache.hadoop.ozone.recon.api.handlers.BucketHandler;
import org.apache.hadoop.ozone.recon.api.types.KeyEntityInfo;
import org.apache.hadoop.ozone.recon.api.types.KeyInsightInfoResponse;
Expand All @@ -50,20 +49,27 @@
import java.util.ArrayList;

import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX;
import static org.apache.hadoop.ozone.recon.ReconConstants.DEFAULT_START_PREFIX;
import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_OPEN_KEY_DEFAULT_SEARCH_LIMIT;
import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_OPEN_KEY_SEARCH_DEFAULT_PREV_KEY;
import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_OM_INSIGHTS_DEFAULT_START_PREFIX;
import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_OM_INSIGHTS_DEFAULT_SEARCH_LIMIT;
import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_OM_INSIGHTS_DEFAULT_SEARCH_PREV_KEY;
import static org.apache.hadoop.ozone.recon.ReconResponseUtils.noMatchedKeysResponse;
import static org.apache.hadoop.ozone.recon.ReconResponseUtils.createBadRequestResponse;
import static org.apache.hadoop.ozone.recon.ReconResponseUtils.createInternalServerErrorResponse;
import static org.apache.hadoop.ozone.recon.ReconUtils.constructObjectPathWithPrefix;
import static org.apache.hadoop.ozone.recon.ReconUtils.retrieveKeysFromTable;
import static org.apache.hadoop.ozone.recon.ReconUtils.gatherSubPaths;
import static org.apache.hadoop.ozone.recon.ReconUtils.validateNames;
import static org.apache.hadoop.ozone.recon.api.handlers.BucketHandler.getBucketHandler;
import static org.apache.hadoop.ozone.recon.api.handlers.EntityHandler.normalizePath;
import static org.apache.hadoop.ozone.recon.api.handlers.EntityHandler.parseRequestPath;

/**
* REST endpoint for search implementation in OM DB Insight.
*
* This class provides endpoints for searching keys in the Ozone Manager database.
* It supports searching for both open and deleted keys across File System Optimized (FSO)
* and Object Store (non-FSO) bucket layouts. The results include matching keys and their
* data sizes.
*/
@Path("/keys")
@Produces(MediaType.APPLICATION_JSON)
Expand All @@ -88,14 +94,14 @@ public OMDBInsightSearchEndpoint(OzoneStorageContainerManager reconSCM,


/**
* Performs a search for open keys in the Ozone Manager (OM) database using a specified search prefix.
* Performs a search for open keys in the Ozone Manager OpenKey and OpenFile table using a specified search prefix.
* This endpoint searches across both File System Optimized (FSO) and Object Store (non-FSO) layouts,
* compiling a list of keys that match the given prefix along with their data sizes.
* <p>
*
* The search prefix must start from the bucket level ('/volumeName/bucketName/') or any specific directory
* or key level (e.g., '/volA/bucketA/dir1' for everything under 'dir1' inside 'bucketA' of 'volA').
* The search operation matches the prefix against the start of keys' names within the OM DB.
* <p>
*
* Example Usage:
* 1. A startPrefix of "/volA/bucketA/" retrieves every key under bucket 'bucketA' in volume 'volA'.
* 2. Specifying "/volA/bucketA/dir1" focuses the search within 'dir1' inside 'bucketA' of 'volA'.
Expand All @@ -110,25 +116,17 @@ public OMDBInsightSearchEndpoint(OzoneStorageContainerManager reconSCM,
@GET
@Path("/open/search")
public Response searchOpenKeys(
@DefaultValue(DEFAULT_START_PREFIX) @QueryParam("startPrefix")
@DefaultValue(RECON_OM_INSIGHTS_DEFAULT_START_PREFIX) @QueryParam("startPrefix")
String startPrefix,
@DefaultValue(RECON_OPEN_KEY_DEFAULT_SEARCH_LIMIT) @QueryParam("limit")
@DefaultValue(RECON_OM_INSIGHTS_DEFAULT_SEARCH_LIMIT) @QueryParam("limit")
int limit,
@DefaultValue(RECON_OPEN_KEY_SEARCH_DEFAULT_PREV_KEY) @QueryParam("prevKey") String prevKey) throws IOException {
@DefaultValue(RECON_OM_INSIGHTS_DEFAULT_SEARCH_PREV_KEY) @QueryParam("prevKey")
String prevKey) throws IOException {

try {
// Ensure startPrefix is not null or empty and starts with '/'
if (startPrefix == null || startPrefix.length() == 0) {
return createBadRequestResponse(
"Invalid startPrefix: Path must be at the bucket level or deeper.");
}
startPrefix = startPrefix.startsWith("/") ? startPrefix : "/" + startPrefix;

// Split the path to ensure it's at least at the bucket level
String[] pathComponents = startPrefix.split("/");
if (pathComponents.length < 3 || pathComponents[2].isEmpty()) {
return createBadRequestResponse(
"Invalid startPrefix: Path must be at the bucket level or deeper.");
// Validate the request parameters
if (!validateStartPrefixAndLimit(startPrefix)) {
return createBadRequestResponse("Invalid startPrefix: Path must be at the bucket level or deeper.");
}

// Ensure the limit is non-negative
Expand Down Expand Up @@ -221,7 +219,7 @@ public Map<String, OmKeyInfo> searchOpenKeysInFSO(String startPrefix,
subPaths.add(startPrefixObjectPath);

// Recursively gather all subpaths
ReconUtils.gatherSubPaths(parentId, subPaths, Long.parseLong(names[0]), Long.parseLong(names[1]),
gatherSubPaths(parentId, subPaths, Long.parseLong(names[0]), Long.parseLong(names[1]),
reconNamespaceSummaryManager);

// Iterate over the subpaths and retrieve the open files
Expand Down Expand Up @@ -325,46 +323,93 @@ public String convertToObjectPath(String prevKeyPrefix) throws IOException {
return prevKeyPrefix;
}


/**
* Common method to retrieve keys from a table based on a search prefix and a limit.
* Performs a search for deleted keys in the Ozone Manager DeletedTable using a specified search prefix.
* In the DeletedTable both the fso and non-fso keys are stored in a similar format, this endpoint compiles
* a list of keys that match the given prefix along with their data sizes.
*
* @param table The table to retrieve keys from.
* @param startPrefix The search prefix to match keys against.
* @param limit The maximum number of keys to retrieve.
* The search prefix must start from the bucket level ('/volumeName/bucketName/') or any specific directory
* or key level (e.g., '/volA/bucketA/dir1' for everything under 'dir1' inside 'bucketA' of 'volA').
* The search operation matches the prefix against the start of keys' names within the OM DB DeletedTable.
*
* Example Usage:
* 1. A startPrefix of "/volA/bucketA/" retrieves every key under bucket 'bucketA' in volume 'volA'.
* 2. Specifying "/volA/bucketA/dir1" focuses the search within 'dir1' inside 'bucketA' of 'volA'.
*
* @param startPrefix The prefix for searching keys, starting from the bucket level or any specific path.
* @param limit Limits the number of returned keys.
* @param prevKey The key to start after for the next set of records.
* @return A map of keys and their corresponding OmKeyInfo objects.
* @throws IOException If there are problems accessing the table.
* @return A KeyInsightInfoResponse, containing matching keys and their data sizes.
* @throws IOException On failure to access the OM database or process the operation.
*/
private Map<String, OmKeyInfo> retrieveKeysFromTable(
Table<String, OmKeyInfo> table, String startPrefix, int limit, String prevKey)
throws IOException {
Map<String, OmKeyInfo> matchedKeys = new LinkedHashMap<>();
try (TableIterator<String, ? extends Table.KeyValue<String, OmKeyInfo>> keyIter = table.iterator()) {
// If a previous key is provided, seek to the previous key and skip it.
if (!prevKey.isEmpty()) {
keyIter.seek(prevKey);
if (keyIter.hasNext()) {
// Skip the previous key
keyIter.next();
}
} else {
// If no previous key is provided, start from the search prefix.
keyIter.seek(startPrefix);
@GET
@Path("/deletePending/search")
public Response searchDeletedKeys(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The beginning and ending of this method have a lot in common with searchOpenKeys.

@GET
@Path("/open/search")
public Response searchOpenKeys(
@DefaultValue(DEFAULT_START_PREFIX) @QueryParam("startPrefix")
String startPrefix,
@DefaultValue(RECON_OPEN_KEY_DEFAULT_SEARCH_LIMIT) @QueryParam("limit")
int limit,
@DefaultValue(RECON_OPEN_KEY_SEARCH_DEFAULT_PREV_KEY) @QueryParam("prevKey") String prevKey) throws IOException {

Can we extract them? Do you think it would make sense?

@DefaultValue(RECON_OM_INSIGHTS_DEFAULT_START_PREFIX) @QueryParam("startPrefix")
String startPrefix,
@DefaultValue(RECON_OM_INSIGHTS_DEFAULT_SEARCH_LIMIT) @QueryParam("limit")
int limit,
@DefaultValue(RECON_OM_INSIGHTS_DEFAULT_SEARCH_PREV_KEY) @QueryParam("prevKey")
String prevKey) throws IOException {

try {
// Validate the request parameters
if (!validateStartPrefixAndLimit(startPrefix)) {
return createBadRequestResponse("Invalid startPrefix: Path must be at the bucket level or deeper.");
}
while (keyIter.hasNext() && matchedKeys.size() < limit) {
Table.KeyValue<String, OmKeyInfo> entry = keyIter.next();
String dbKey = entry.getKey();
if (!dbKey.startsWith(startPrefix)) {
break; // Exit the loop if the key no longer matches the prefix
}
matchedKeys.put(dbKey, entry.getValue());

// Ensure the limit is non-negative
limit = Math.max(0, limit);

// Initialize response object
KeyInsightInfoResponse insightResponse = new KeyInsightInfoResponse();
long replicatedTotal = 0;
long unreplicatedTotal = 0;
boolean keysFound = false; // Flag to track if any keys are found
String lastKey = null;

// Search for deleted keys in DeletedTable
Table<String, RepeatedOmKeyInfo> deletedTable = omMetadataManager.getDeletedTable();
Map<String, RepeatedOmKeyInfo> deletedKeys = retrieveKeysFromTable(deletedTable, startPrefix, limit, prevKey);

for (Map.Entry<String, RepeatedOmKeyInfo> entry : deletedKeys.entrySet()) {
keysFound = true;
RepeatedOmKeyInfo repeatedOmKeyInfo = entry.getValue();

// We know each RepeatedOmKeyInfo has just one OmKeyInfo object
OmKeyInfo keyInfo = repeatedOmKeyInfo.getOmKeyInfoList().get(0);
KeyEntityInfo keyEntityInfo = createKeyEntityInfoFromOmKeyInfo(entry.getKey(), keyInfo);

// Add the key directly to the list without classification
insightResponse.getRepeatedOmKeyInfoList().add(repeatedOmKeyInfo);

replicatedTotal += keyInfo.getReplicatedSize();
unreplicatedTotal += keyInfo.getDataSize();

lastKey = entry.getKey(); // Update lastKey
}
} catch (IOException exception) {
LOG.error("Error retrieving keys from table for path: {}", startPrefix, exception);
throw exception;

// If no keys were found, return a response indicating that no keys matched
if (!keysFound) {
return noMatchedKeysResponse(startPrefix);
}

// Set the aggregated totals in the response
insightResponse.setReplicatedDataSize(replicatedTotal);
insightResponse.setUnreplicatedDataSize(unreplicatedTotal);
insightResponse.setLastKey(lastKey);

// Return the response with the matched keys and their data sizes
return Response.ok(insightResponse).build();
} catch (IOException e) {
// Handle IO exceptions and return an internal server error response
return createInternalServerErrorResponse(
"Error searching deleted keys in OM DB: " + e.getMessage());
} catch (IllegalArgumentException e) {
// Handle illegal argument exceptions and return a bad request response
return createBadRequestResponse(
"Invalid startPrefix: " + e.getMessage());
}
return matchedKeys;
}

/**
Expand All @@ -386,4 +431,20 @@ private KeyEntityInfo createKeyEntityInfoFromOmKeyInfo(String dbKey,
return keyEntityInfo;
}

private boolean validateStartPrefixAndLimit(String startPrefix) {
// Ensure startPrefix is not null or empty and starts with '/'
if (startPrefix == null || startPrefix.isEmpty()) {
return false;
}
startPrefix = startPrefix.startsWith("/") ? startPrefix : "/" + startPrefix;

// Split the path to ensure it's at least at the bucket level
String[] pathComponents = startPrefix.split("/");
if (pathComponents.length < 3 || pathComponents[2].isEmpty()) {
return false;
}

return true; // Validation passed
}

}
Loading