From 31332e5d73153c2841e052b67850a8dfa51c3c8b Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Thu, 27 Mar 2025 01:14:48 -0700 Subject: [PATCH 1/6] HDDS-12708. Fix Unhealthy Containers API for pagination Change-Id: I3aee1631a262e2d941dbbbcddcb7a7c5e286d33a --- .../hadoop/ozone/recon/TestReconTasks.java | 17 +++---- .../schema/ContainerSchemaDefinition.java | 8 ++-- .../hadoop/ozone/recon/ReconConstants.java | 2 + .../ozone/recon/api/ClusterStateEndpoint.java | 3 +- .../ozone/recon/api/ContainerEndpoint.java | 44 ++++++++++++------- .../types/UnhealthyContainersResponse.java | 23 ++++++++++ .../ContainerHealthSchemaManager.java | 38 +++++++++++----- .../recon/api/TestContainerEndpoint.java | 23 +++++----- .../recon/fsck/TestContainerHealthTask.java | 3 +- 9 files changed, 110 insertions(+), 51 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconTasks.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconTasks.java index fb25b0f64199..715b72470462 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconTasks.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconTasks.java @@ -28,6 +28,7 @@ import java.time.Duration; import java.util.List; import java.util.Map; +import java.util.Optional; import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; @@ -168,8 +169,8 @@ public void testMissingContainerDownNode() throws Exception { List allMissingContainers = reconContainerManager.getContainerSchemaManager() .getUnhealthyContainers( - ContainerSchemaDefinition.UnHealthyContainerStates.MISSING, - 0, 1000); + ContainerSchemaDefinition.UnHealthyContainerStates.MISSING, 0L, + Optional.empty(), 1000); return (allMissingContainers.size() == 1); }); @@ -180,7 +181,7 @@ public void testMissingContainerDownNode() throws Exception { reconContainerManager.getContainerSchemaManager() .getUnhealthyContainers( ContainerSchemaDefinition.UnHealthyContainerStates.MISSING, - 0, 1000); + 0L, Optional.empty(), 1000); return (allMissingContainers.isEmpty()); }); IOUtils.closeQuietly(client); @@ -246,7 +247,7 @@ public void testEmptyMissingContainerDownNode() throws Exception { .getUnhealthyContainers( ContainerSchemaDefinition.UnHealthyContainerStates. EMPTY_MISSING, - 0, 1000); + 0L, Optional.empty(), 1000); // Check if EMPTY_MISSING containers are not added to the DB and their count is logged Map> @@ -274,7 +275,7 @@ public void testEmptyMissingContainerDownNode() throws Exception { reconContainerManager.getContainerSchemaManager() .getUnhealthyContainers( ContainerSchemaDefinition.UnHealthyContainerStates.MISSING, - 0, 1000); + 0L, Optional.empty(), 1000); return (allMissingContainers.size() == 1); }); @@ -284,7 +285,7 @@ public void testEmptyMissingContainerDownNode() throws Exception { .getUnhealthyContainers( ContainerSchemaDefinition.UnHealthyContainerStates. EMPTY_MISSING, - 0, 1000); + 0L, Optional.empty(), 1000); Map> @@ -314,7 +315,7 @@ public void testEmptyMissingContainerDownNode() throws Exception { .getUnhealthyContainers( ContainerSchemaDefinition.UnHealthyContainerStates. EMPTY_MISSING, - 0, 1000); + 0L, Optional.empty(), 1000); Map> unhealthyContainerStateStatsMap = reconScm.getContainerHealthTask() @@ -334,7 +335,7 @@ public void testEmptyMissingContainerDownNode() throws Exception { reconContainerManager.getContainerSchemaManager() .getUnhealthyContainers( ContainerSchemaDefinition.UnHealthyContainerStates.MISSING, - 0, 1000); + 0L, Optional.empty(), 1000); return (allMissingContainers.isEmpty()); }); diff --git a/hadoop-ozone/recon-codegen/src/main/java/org/apache/ozone/recon/schema/ContainerSchemaDefinition.java b/hadoop-ozone/recon-codegen/src/main/java/org/apache/ozone/recon/schema/ContainerSchemaDefinition.java index 0fffeb9edeff..deb49ff13e91 100644 --- a/hadoop-ozone/recon-codegen/src/main/java/org/apache/ozone/recon/schema/ContainerSchemaDefinition.java +++ b/hadoop-ozone/recon-codegen/src/main/java/org/apache/ozone/recon/schema/ContainerSchemaDefinition.java @@ -37,11 +37,10 @@ */ @Singleton public class ContainerSchemaDefinition implements ReconSchemaDefinition { + private static final Logger LOG = LoggerFactory.getLogger(ContainerSchemaDefinition.class); public static final String UNHEALTHY_CONTAINERS_TABLE_NAME = "UNHEALTHY_CONTAINERS"; - private static final Logger LOG = - LoggerFactory.getLogger(ContainerSchemaDefinition.class); /** * ENUM describing the allowed container states which can be stored in the @@ -92,9 +91,12 @@ private void createUnhealthyContainersTable() { .constraint(DSL.constraint("pk_container_id") .primaryKey(CONTAINER_ID, CONTAINER_STATE)) .constraint(DSL.constraint(UNHEALTHY_CONTAINERS_TABLE_NAME + "ck1") - .check(field(name("container_state")) + .check(field(name(CONTAINER_STATE)) .in(UnHealthyContainerStates.values()))) .execute(); + dslContext.createIndex("idx_container_state") + .on(DSL.table(UNHEALTHY_CONTAINERS_TABLE_NAME), DSL.field(name(CONTAINER_STATE))) + .execute(); } public DSLContext getDSLContext() { diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconConstants.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconConstants.java index 6927d77c0331..f645cd41da65 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconConstants.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconConstants.java @@ -50,6 +50,8 @@ private ReconConstants() { public static final String RECON_QUERY_BATCH_PARAM = "batchNum"; public static final String RECON_QUERY_PREVKEY = "prevKey"; public static final String RECON_QUERY_START_PREFIX = "startPrefix"; + public static final String RECON_QUERY_PREV_START_KEY = "prevStartKey"; + public static final String RECON_QUERY_PREV_LAST_KEY = "prevLastKey"; public static final String RECON_OPEN_KEY_INCLUDE_NON_FSO = "includeNonFso"; public static final String RECON_OPEN_KEY_INCLUDE_FSO = "includeFso"; public static final String RECON_OM_INSIGHTS_DEFAULT_START_PREFIX = "/"; diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ClusterStateEndpoint.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ClusterStateEndpoint.java index bb60b271fa61..e23b56b9509a 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ClusterStateEndpoint.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ClusterStateEndpoint.java @@ -27,6 +27,7 @@ import static org.apache.hadoop.ozone.om.OmMetadataManagerImpl.VOLUME_TABLE; import java.util.List; +import java.util.Optional; import javax.inject.Inject; import javax.ws.rs.GET; import javax.ws.rs.Path; @@ -104,7 +105,7 @@ public Response getClusterState() { List missingContainers = containerHealthSchemaManager .getUnhealthyContainers( ContainerSchemaDefinition.UnHealthyContainerStates.MISSING, - 0, MISSING_CONTAINER_COUNT_LIMIT); + 0L, Optional.empty(), MISSING_CONTAINER_COUNT_LIMIT); containerStateCounts.setMissingContainerCount( missingContainers.size() == MISSING_CONTAINER_COUNT_LIMIT ? diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java index 17382f0634b8..7d8d276f219a 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java @@ -17,14 +17,14 @@ package org.apache.hadoop.ozone.recon.api; -import static org.apache.hadoop.ozone.recon.ReconConstants.DEFAULT_BATCH_NUMBER; import static org.apache.hadoop.ozone.recon.ReconConstants.DEFAULT_FETCH_COUNT; import static org.apache.hadoop.ozone.recon.ReconConstants.DEFAULT_FILTER_FOR_MISSING_CONTAINERS; import static org.apache.hadoop.ozone.recon.ReconConstants.PREV_CONTAINER_ID_DEFAULT_VALUE; -import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_QUERY_BATCH_PARAM; import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_QUERY_FILTER; import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_QUERY_LIMIT; import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_QUERY_PREVKEY; +import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_QUERY_PREV_LAST_KEY; +import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_QUERY_PREV_START_KEY; import java.io.IOException; import java.time.Instant; @@ -35,6 +35,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.UUID; import java.util.stream.Collectors; import javax.inject.Inject; @@ -340,7 +341,7 @@ public Response getMissingContainers( ) { List missingContainers = new ArrayList<>(); containerHealthSchemaManager.getUnhealthyContainers( - UnHealthyContainerStates.MISSING, 0, limit) + UnHealthyContainerStates.MISSING, 0L, Optional.empty(), limit) .forEach(container -> { long containerID = container.getContainerId(); try { @@ -374,9 +375,9 @@ public Response getMissingContainers( * eg UNDER_REPLICATED, MIS_REPLICATED, OVER_REPLICATED or * MISSING. Passing null returns all containers. * @param limit The limit of unhealthy containers to return. - * @param batchNum The batch number (like "page number") of results to return. - * Passing 1, will return records 1 to limit. 2 will return - * limit + 1 to 2 * limit, etc. + * @param prevStartKey startKey of previous batch. If this value is given last N records before this container + * would be returned. + * @param prevLastKey lastKey of previous batch. * @return {@link Response} */ @GET @@ -385,10 +386,11 @@ public Response getUnhealthyContainers( @PathParam("state") String state, @DefaultValue(DEFAULT_FETCH_COUNT) @QueryParam(RECON_QUERY_LIMIT) int limit, - @DefaultValue(DEFAULT_BATCH_NUMBER) - @QueryParam(RECON_QUERY_BATCH_PARAM) int batchNum) { - int offset = Math.max(((batchNum - 1) * limit), 0); - + @DefaultValue(PREV_CONTAINER_ID_DEFAULT_VALUE) + @QueryParam(RECON_QUERY_PREV_START_KEY) long prevStartKey, + @DefaultValue(PREV_CONTAINER_ID_DEFAULT_VALUE) + @QueryParam(RECON_QUERY_PREV_LAST_KEY) long prevLastKey) { + Optional maxContainerId = prevStartKey > 0 ? Optional.of(prevStartKey) : Optional.empty(); List unhealthyMeta = new ArrayList<>(); List summary; try { @@ -402,7 +404,7 @@ public Response getUnhealthyContainers( summary = containerHealthSchemaManager.getUnhealthyContainersSummary(); List containers = containerHealthSchemaManager - .getUnhealthyContainers(internalState, offset, limit); + .getUnhealthyContainers(internalState, prevLastKey, maxContainerId, limit); // Filtering out EMPTY_MISSING and NEGATIVE_SIZE containers from the response. // These container states are not being inserted into the database as they represent @@ -435,6 +437,12 @@ public Response getUnhealthyContainers( UnhealthyContainersResponse response = new UnhealthyContainersResponse(unhealthyMeta); + if (!unhealthyMeta.isEmpty()) { + response.setFirstKey(unhealthyMeta.stream().map(UnhealthyContainerMetadata::getContainerID) + .min(Long::compareTo).orElse(0L)); + response.setLastKey(unhealthyMeta.stream().map(UnhealthyContainerMetadata::getContainerID) + .max(Long::compareTo).orElse(0L)); + } for (UnhealthyContainersSummary s : summary) { response.setSummaryCount(s.getContainerState(), s.getCount()); } @@ -446,9 +454,9 @@ public Response getUnhealthyContainers( * {@link org.apache.hadoop.ozone.recon.api.types.UnhealthyContainerMetadata} * for all unhealthy containers. * @param limit The limit of unhealthy containers to return. - * @param batchNum The batch number (like "page number") of results to return. - * Passing 1, will return records 1 to limit. 2 will return - * limit + 1 to 2 * limit, etc. + * @param prevStartKey startKey of previous batch. If this value is given last N records before this container + * would be returned. + * @param prevLastKey lastKey of previous batch. * @return {@link Response} */ @GET @@ -456,9 +464,11 @@ public Response getUnhealthyContainers( public Response getUnhealthyContainers( @DefaultValue(DEFAULT_FETCH_COUNT) @QueryParam(RECON_QUERY_LIMIT) int limit, - @DefaultValue(DEFAULT_BATCH_NUMBER) - @QueryParam(RECON_QUERY_BATCH_PARAM) int batchNum) { - return getUnhealthyContainers(null, limit, batchNum); + @DefaultValue(PREV_CONTAINER_ID_DEFAULT_VALUE) + @QueryParam(RECON_QUERY_PREV_START_KEY) long prevStartKey, + @DefaultValue(PREV_CONTAINER_ID_DEFAULT_VALUE) + @QueryParam(RECON_QUERY_PREV_LAST_KEY) long prevLastKey) { + return getUnhealthyContainers(null, limit, prevStartKey, prevLastKey); } /** diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/UnhealthyContainersResponse.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/UnhealthyContainersResponse.java index 11b04bf62be5..779407ec5f5d 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/UnhealthyContainersResponse.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/UnhealthyContainersResponse.java @@ -49,6 +49,21 @@ public class UnhealthyContainersResponse { @JsonProperty("misReplicatedCount") private long misReplicatedCount = 0; + /** + * . + */ + @JsonProperty("firstKey") + private long firstKey = 0; + + + /** + * Total count of mis-replicated containers. + */ + @JsonProperty("lastKey") + private long lastKey = 0; + + + /** * A collection of unhealthy containers. */ @@ -98,4 +113,12 @@ public long getMisReplicatedCount() { public Collection getContainers() { return containers; } + + public void setFirstKey(long firstKey) { + this.firstKey = firstKey; + } + + public void setLastKey(long lastKey) { + this.lastKey = lastKey; + } } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHealthSchemaManager.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHealthSchemaManager.java index 84222fc6ccfb..912db3798bb6 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHealthSchemaManager.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHealthSchemaManager.java @@ -26,15 +26,20 @@ import com.google.inject.Inject; import com.google.inject.Singleton; import java.sql.Connection; +import java.util.Comparator; import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; import org.apache.hadoop.ozone.recon.api.types.UnhealthyContainersSummary; import org.apache.ozone.recon.schema.ContainerSchemaDefinition; import org.apache.ozone.recon.schema.ContainerSchemaDefinition.UnHealthyContainerStates; import org.apache.ozone.recon.schema.generated.tables.daos.UnhealthyContainersDao; import org.apache.ozone.recon.schema.generated.tables.pojos.UnhealthyContainers; import org.apache.ozone.recon.schema.generated.tables.records.UnhealthyContainersRecord; +import org.jooq.Condition; import org.jooq.Cursor; import org.jooq.DSLContext; +import org.jooq.OrderField; import org.jooq.Record; import org.jooq.SelectQuery; import org.jooq.exception.DataAccessException; @@ -67,32 +72,43 @@ public ContainerHealthSchemaManager( * matching the given state will be returned. * @param state Return only containers in this state, or all containers if * null - * @param offset The starting record to return in the result set. The first - * record is at zero. + * @param minContainerId minimum containerId for filter + * @param maxContainerId maximum containerId for filter * @param limit The total records to return * @return List of unhealthy containers. */ public List getUnhealthyContainers( - UnHealthyContainerStates state, int offset, int limit) { + UnHealthyContainerStates state, Long minContainerId, Optional maxContainerId, int limit) { DSLContext dslContext = containerSchemaDefinition.getDSLContext(); SelectQuery query = dslContext.selectQuery(); query.addFrom(UNHEALTHY_CONTAINERS); + Condition containerCondition; + OrderField[] orderField; + if (maxContainerId.isPresent() && maxContainerId.get() > 0) { + containerCondition = UNHEALTHY_CONTAINERS.CONTAINER_ID.lessThan(maxContainerId.get()); + orderField = new OrderField[]{UNHEALTHY_CONTAINERS.CONTAINER_ID.desc(), + UNHEALTHY_CONTAINERS.CONTAINER_STATE.asc()}; + } else { + containerCondition = UNHEALTHY_CONTAINERS.CONTAINER_ID.greaterThan(minContainerId); + orderField = new OrderField[]{UNHEALTHY_CONTAINERS.CONTAINER_ID.asc(), + UNHEALTHY_CONTAINERS.CONTAINER_STATE.asc()}; + } if (state != null) { if (state.equals(ALL_REPLICAS_BAD)) { - query.addConditions(UNHEALTHY_CONTAINERS.CONTAINER_STATE - .eq(UNDER_REPLICATED.toString())); + query.addConditions(containerCondition.and(UNHEALTHY_CONTAINERS.CONTAINER_STATE + .eq(UNDER_REPLICATED.toString()))); query.addConditions(UNHEALTHY_CONTAINERS.ACTUAL_REPLICA_COUNT.eq(0)); } else { - query.addConditions( - UNHEALTHY_CONTAINERS.CONTAINER_STATE.eq(state.toString())); + query.addConditions(containerCondition.and(UNHEALTHY_CONTAINERS.CONTAINER_STATE.eq(state.toString()))); } } - query.addOrderBy(UNHEALTHY_CONTAINERS.CONTAINER_ID.asc(), - UNHEALTHY_CONTAINERS.CONTAINER_STATE.asc()); - query.addOffset(offset); + + query.addOrderBy(orderField); query.addLimit(limit); - return query.fetchInto(UnhealthyContainers.class); + return query.fetchInto(UnhealthyContainers.class).stream() + .sorted(Comparator.comparingLong(UnhealthyContainers::getContainerId)) + .collect(Collectors.toList()); } /** diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java index 4db2a276c8a1..dbdfb0790c7c 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java @@ -800,7 +800,7 @@ void putContainerInfos(int num) throws IOException, TimeoutException { @Test public void testUnhealthyContainers() throws IOException, TimeoutException { - Response response = containerEndpoint.getUnhealthyContainers(1000, 1); + Response response = containerEndpoint.getUnhealthyContainers(1000, 0, 0); UnhealthyContainersResponse responseObject = (UnhealthyContainersResponse) response.getEntity(); @@ -819,7 +819,7 @@ public void testUnhealthyContainers() throws IOException, TimeoutException { uuid4 = newDatanode("host4", "127.0.0.4"); createUnhealthyRecords(5, 4, 3, 2); - response = containerEndpoint.getUnhealthyContainers(1000, 1); + response = containerEndpoint.getUnhealthyContainers(1000, 0, 0); responseObject = (UnhealthyContainersResponse) response.getEntity(); assertEquals(5, responseObject.getMissingCount()); @@ -905,7 +905,7 @@ public void testUnhealthyContainersFilteredResponse() // Initial empty response verification Response response = containerEndpoint - .getUnhealthyContainers(missing, 1000, 1); + .getUnhealthyContainers(missing, 1000, 0, 0); UnhealthyContainersResponse responseObject = (UnhealthyContainersResponse) response.getEntity(); @@ -927,7 +927,7 @@ public void testUnhealthyContainersFilteredResponse() createNegativeSizeUnhealthyRecords(2); // For NEGATIVE_SIZE state // Check for unhealthy containers - response = containerEndpoint.getUnhealthyContainers(missing, 1000, 1); + response = containerEndpoint.getUnhealthyContainers(missing, 1000, 0, 0); responseObject = (UnhealthyContainersResponse) response.getEntity(); @@ -951,14 +951,14 @@ public void testUnhealthyContainersFilteredResponse() // Check for empty missing containers, should return zero Response filteredEmptyMissingResponse = containerEndpoint - .getUnhealthyContainers(emptyMissing, 1000, 1); + .getUnhealthyContainers(emptyMissing, 1000, 0, 0); responseObject = (UnhealthyContainersResponse) filteredEmptyMissingResponse.getEntity(); records = responseObject.getContainers(); assertEquals(0, records.size()); // Check for negative size containers, should return zero Response filteredNegativeSizeResponse = containerEndpoint - .getUnhealthyContainers(negativeSize, 1000, 1); + .getUnhealthyContainers(negativeSize, 1000, 0, 0); responseObject = (UnhealthyContainersResponse) filteredNegativeSizeResponse.getEntity(); records = responseObject.getContainers(); assertEquals(0, records.size()); @@ -968,7 +968,7 @@ public void testUnhealthyContainersFilteredResponse() @Test public void testUnhealthyContainersInvalidState() { WebApplicationException e = assertThrows(WebApplicationException.class, - () -> containerEndpoint.getUnhealthyContainers("invalid", 1000, 1)); + () -> containerEndpoint.getUnhealthyContainers("invalid", 1000, 0, 0)); assertEquals("HTTP 400 Bad Request", e.getMessage()); } @@ -983,15 +983,18 @@ public void testUnhealthyContainersPaging() createUnhealthyRecords(5, 4, 3, 2); UnhealthyContainersResponse firstBatch = (UnhealthyContainersResponse) containerEndpoint.getUnhealthyContainers( - 3, 1).getEntity(); + 3, 0, 0).getEntity(); assertTrue(firstBatch.getContainers().stream() .flatMap(containerMetadata -> containerMetadata.getReplicas().stream() .map(ContainerHistory::getState)) .allMatch(s -> s.equals("UNHEALTHY"))); - + long minContainerId = firstBatch.getContainers().stream() + .map(UnhealthyContainerMetadata::getContainerID).min(Long::compareTo).get(); + long maxContainerId = firstBatch.getContainers().stream() + .map(UnhealthyContainerMetadata::getContainerID).max(Long::compareTo).get(); UnhealthyContainersResponse secondBatch = (UnhealthyContainersResponse) containerEndpoint.getUnhealthyContainers( - 3, 2).getEntity(); + 3, minContainerId, maxContainerId).getEntity(); ArrayList records = new ArrayList<>(firstBatch.getContainers()); diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java index bdd5ca35b296..d485390f5bb2 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java @@ -41,6 +41,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.UUID; import org.apache.hadoop.hdds.client.RatisReplicationConfig; @@ -216,7 +217,7 @@ public void testRun() throws Exception { List unhealthyContainers = containerHealthSchemaManager.getUnhealthyContainers( - ALL_REPLICAS_BAD, 0, Integer.MAX_VALUE); + ALL_REPLICAS_BAD, 0L, Optional.empty(), Integer.MAX_VALUE); assertEquals(1, unhealthyContainers.size()); assertEquals(2L, unhealthyContainers.get(0).getContainerId().longValue()); From 7c0ec35bd27518589185a788c2ca254f1c42dac3 Mon Sep 17 00:00:00 2001 From: arafat Date: Mon, 7 Jul 2025 20:54:21 +0530 Subject: [PATCH 2/6] Fixed unit tests and made code changes --- .../ContainerHealthSchemaManager.java | 24 +++++++++++++++++-- .../recon/api/TestContainerEndpoint.java | 23 ++++++++++++------ 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHealthSchemaManager.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHealthSchemaManager.java index 912db3798bb6..b7b98ba1f99b 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHealthSchemaManager.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHealthSchemaManager.java @@ -23,6 +23,7 @@ import static org.apache.ozone.recon.schema.generated.tables.UnhealthyContainersTable.UNHEALTHY_CONTAINERS; import static org.jooq.impl.DSL.count; +import com.google.common.annotations.VisibleForTesting; import com.google.inject.Inject; import com.google.inject.Singleton; import java.sql.Connection; @@ -87,11 +88,11 @@ public List getUnhealthyContainers( if (maxContainerId.isPresent() && maxContainerId.get() > 0) { containerCondition = UNHEALTHY_CONTAINERS.CONTAINER_ID.lessThan(maxContainerId.get()); orderField = new OrderField[]{UNHEALTHY_CONTAINERS.CONTAINER_ID.desc(), - UNHEALTHY_CONTAINERS.CONTAINER_STATE.asc()}; + UNHEALTHY_CONTAINERS.CONTAINER_STATE.asc()}; } else { containerCondition = UNHEALTHY_CONTAINERS.CONTAINER_ID.greaterThan(minContainerId); orderField = new OrderField[]{UNHEALTHY_CONTAINERS.CONTAINER_ID.asc(), - UNHEALTHY_CONTAINERS.CONTAINER_STATE.asc()}; + UNHEALTHY_CONTAINERS.CONTAINER_STATE.asc()}; } if (state != null) { if (state.equals(ALL_REPLICAS_BAD)) { @@ -101,6 +102,10 @@ public List getUnhealthyContainers( } else { query.addConditions(containerCondition.and(UNHEALTHY_CONTAINERS.CONTAINER_STATE.eq(state.toString()))); } + } else { + // CRITICAL FIX: Apply pagination condition even when state is null + // This ensures proper pagination for the "get all unhealthy containers" use case + query.addConditions(containerCondition); } query.addOrderBy(orderField); @@ -167,4 +172,19 @@ public void insertUnhealthyContainerRecords(List recs) { } } + /** + * Clear all unhealthy container records. This is primarily used for testing + * to ensure clean state between tests. + */ + @VisibleForTesting + public void clearAllUnhealthyContainerRecords() { + DSLContext dslContext = containerSchemaDefinition.getDSLContext(); + try { + dslContext.deleteFrom(UNHEALTHY_CONTAINERS).execute(); + LOG.info("Cleared all unhealthy container records"); + } catch (Exception e) { + LOG.info("Failed to clear unhealthy container records", e); + } + } + } diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java index dbdfb0790c7c..5efc19137df6 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/api/TestContainerEndpoint.java @@ -975,12 +975,20 @@ public void testUnhealthyContainersInvalidState() { @Test public void testUnhealthyContainersPaging() throws IOException, TimeoutException { - putContainerInfos(6); + + // Clear any existing unhealthy container records from previous tests + containerHealthSchemaManager.clearAllUnhealthyContainerRecords(); + // Create containers for all IDs that will be used in unhealthy records + // createUnhealthyRecords(5, 4, 3, 2) will create records for containers 1-14 + // So we need to create 14 containers instead of just 6 + putContainerInfos(14); // Changed from 6 to 14 uuid1 = newDatanode("host1", "127.0.0.1"); uuid2 = newDatanode("host2", "127.0.0.2"); uuid3 = newDatanode("host3", "127.0.0.3"); uuid4 = newDatanode("host4", "127.0.0.4"); createUnhealthyRecords(5, 4, 3, 2); + + // Get first batch with no pagination (prevStartKey=0, prevLastKey=0) UnhealthyContainersResponse firstBatch = (UnhealthyContainersResponse) containerEndpoint.getUnhealthyContainers( 3, 0, 0).getEntity(); @@ -988,13 +996,15 @@ public void testUnhealthyContainersPaging() .flatMap(containerMetadata -> containerMetadata.getReplicas().stream() .map(ContainerHistory::getState)) .allMatch(s -> s.equals("UNHEALTHY"))); - long minContainerId = firstBatch.getContainers().stream() - .map(UnhealthyContainerMetadata::getContainerID).min(Long::compareTo).get(); - long maxContainerId = firstBatch.getContainers().stream() - .map(UnhealthyContainerMetadata::getContainerID).max(Long::compareTo).get(); + + // For pagination, use the last container ID from the first batch as prevLastKey + long lastContainerIdFromFirstBatch = firstBatch.getContainers().stream() + .map(i -> i.getContainerID()).max(Long::compareTo).get(); + + // Get second batch using correct pagination parameters UnhealthyContainersResponse secondBatch = (UnhealthyContainersResponse) containerEndpoint.getUnhealthyContainers( - 3, minContainerId, maxContainerId).getEntity(); + 3, 0, lastContainerIdFromFirstBatch).getEntity(); ArrayList records = new ArrayList<>(firstBatch.getContainers()); @@ -1002,7 +1012,6 @@ public void testUnhealthyContainersPaging() assertEquals(1L, records.get(0).getContainerID()); assertEquals(2L, records.get(1).getContainerID()); assertEquals(3L, records.get(2).getContainerID()); - records = new ArrayList<>(secondBatch.getContainers()); assertEquals(3, records.size()); From 899d852512e80bff99ed7e04cbae4732a6df2ce6 Mon Sep 17 00:00:00 2001 From: arafat Date: Fri, 18 Jul 2025 12:35:33 +0530 Subject: [PATCH 3/6] Fixed review comments --- .../hadoop/ozone/recon/ReconConstants.java | 4 +-- .../ozone/recon/api/ContainerEndpoint.java | 25 +++++++++++-------- .../types/UnhealthyContainersResponse.java | 6 +++-- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconConstants.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconConstants.java index f645cd41da65..6dc8078ce2ed 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconConstants.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconConstants.java @@ -50,8 +50,8 @@ private ReconConstants() { public static final String RECON_QUERY_BATCH_PARAM = "batchNum"; public static final String RECON_QUERY_PREVKEY = "prevKey"; public static final String RECON_QUERY_START_PREFIX = "startPrefix"; - public static final String RECON_QUERY_PREV_START_KEY = "prevStartKey"; - public static final String RECON_QUERY_PREV_LAST_KEY = "prevLastKey"; + public static final String RECON_QUERY_MAX_CONTAINER_ID = "maxContainerId"; + public static final String RECON_QUERY_MIN_CONTAINER_ID = "minContainerId"; public static final String RECON_OPEN_KEY_INCLUDE_NON_FSO = "includeNonFso"; public static final String RECON_OPEN_KEY_INCLUDE_FSO = "includeFso"; public static final String RECON_OM_INSIGHTS_DEFAULT_START_PREFIX = "/"; diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java index 7d8d276f219a..e090f77a502e 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java @@ -23,8 +23,8 @@ import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_QUERY_FILTER; import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_QUERY_LIMIT; import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_QUERY_PREVKEY; -import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_QUERY_PREV_LAST_KEY; -import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_QUERY_PREV_START_KEY; +import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_QUERY_MIN_CONTAINER_ID; +import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_QUERY_MAX_CONTAINER_ID; import java.io.IOException; import java.time.Instant; @@ -375,9 +375,12 @@ public Response getMissingContainers( * eg UNDER_REPLICATED, MIS_REPLICATED, OVER_REPLICATED or * MISSING. Passing null returns all containers. * @param limit The limit of unhealthy containers to return. - * @param prevStartKey startKey of previous batch. If this value is given last N records before this container - * would be returned. - * @param prevLastKey lastKey of previous batch. + * @param maxContainerId Upper bound for container IDs to include (exclusive). + * When specified, returns containers with IDs less than this value + * in descending order. Use for backward pagination. + * @param minContainerId Lower bound for container IDs to include (exclusive). + * When maxContainerId is not specified, returns containers with IDs + * greater than this value in ascending order. Use for forward pagination. * @return {@link Response} */ @GET @@ -387,10 +390,10 @@ public Response getUnhealthyContainers( @DefaultValue(DEFAULT_FETCH_COUNT) @QueryParam(RECON_QUERY_LIMIT) int limit, @DefaultValue(PREV_CONTAINER_ID_DEFAULT_VALUE) - @QueryParam(RECON_QUERY_PREV_START_KEY) long prevStartKey, + @QueryParam(RECON_QUERY_MAX_CONTAINER_ID) long maxContainerId, @DefaultValue(PREV_CONTAINER_ID_DEFAULT_VALUE) - @QueryParam(RECON_QUERY_PREV_LAST_KEY) long prevLastKey) { - Optional maxContainerId = prevStartKey > 0 ? Optional.of(prevStartKey) : Optional.empty(); + @QueryParam(RECON_QUERY_MIN_CONTAINER_ID) long minContainerId) { + Optional maxContainerIdOpt = maxContainerId > 0 ? Optional.of(maxContainerId) : Optional.empty(); List unhealthyMeta = new ArrayList<>(); List summary; try { @@ -404,7 +407,7 @@ public Response getUnhealthyContainers( summary = containerHealthSchemaManager.getUnhealthyContainersSummary(); List containers = containerHealthSchemaManager - .getUnhealthyContainers(internalState, prevLastKey, maxContainerId, limit); + .getUnhealthyContainers(internalState, minContainerId, maxContainerIdOpt, limit); // Filtering out EMPTY_MISSING and NEGATIVE_SIZE containers from the response. // These container states are not being inserted into the database as they represent @@ -465,9 +468,9 @@ public Response getUnhealthyContainers( @DefaultValue(DEFAULT_FETCH_COUNT) @QueryParam(RECON_QUERY_LIMIT) int limit, @DefaultValue(PREV_CONTAINER_ID_DEFAULT_VALUE) - @QueryParam(RECON_QUERY_PREV_START_KEY) long prevStartKey, + @QueryParam(RECON_QUERY_MAX_CONTAINER_ID) long prevStartKey, @DefaultValue(PREV_CONTAINER_ID_DEFAULT_VALUE) - @QueryParam(RECON_QUERY_PREV_LAST_KEY) long prevLastKey) { + @QueryParam(RECON_QUERY_MIN_CONTAINER_ID) long prevLastKey) { return getUnhealthyContainers(null, limit, prevStartKey, prevLastKey); } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/UnhealthyContainersResponse.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/UnhealthyContainersResponse.java index 779407ec5f5d..d3704e04aa7b 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/UnhealthyContainersResponse.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/UnhealthyContainersResponse.java @@ -50,14 +50,16 @@ public class UnhealthyContainersResponse { private long misReplicatedCount = 0; /** - * . + * The smallest container ID in the current response batch. + * Used for pagination to determine the lower bound for the next page. */ @JsonProperty("firstKey") private long firstKey = 0; /** - * Total count of mis-replicated containers. + * The largest container ID in the current response batch. + * Used for pagination to determine the upper bound for the next page. */ @JsonProperty("lastKey") private long lastKey = 0; From 01ebd516fe48c6da2307f38948a00f81cf43169c Mon Sep 17 00:00:00 2001 From: arafat Date: Fri, 18 Jul 2025 12:37:01 +0530 Subject: [PATCH 4/6] Fixed review comments 2 --- .../hadoop/ozone/recon/api/ContainerEndpoint.java | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java index e090f77a502e..8bb7729454fd 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java @@ -457,9 +457,12 @@ public Response getUnhealthyContainers( * {@link org.apache.hadoop.ozone.recon.api.types.UnhealthyContainerMetadata} * for all unhealthy containers. * @param limit The limit of unhealthy containers to return. - * @param prevStartKey startKey of previous batch. If this value is given last N records before this container - * would be returned. - * @param prevLastKey lastKey of previous batch. + * @param maxContainerId Upper bound for container IDs to include (exclusive). + * When specified, returns containers with IDs less than this value + * in descending order. Use for backward pagination. + * @param minContainerId Lower bound for container IDs to include (exclusive). + * When maxContainerId is not specified, returns containers with IDs + * greater than this value in ascending order. Use for forward pagination. * @return {@link Response} */ @GET @@ -468,10 +471,10 @@ public Response getUnhealthyContainers( @DefaultValue(DEFAULT_FETCH_COUNT) @QueryParam(RECON_QUERY_LIMIT) int limit, @DefaultValue(PREV_CONTAINER_ID_DEFAULT_VALUE) - @QueryParam(RECON_QUERY_MAX_CONTAINER_ID) long prevStartKey, + @QueryParam(RECON_QUERY_MAX_CONTAINER_ID) long maxContainerId, @DefaultValue(PREV_CONTAINER_ID_DEFAULT_VALUE) - @QueryParam(RECON_QUERY_MIN_CONTAINER_ID) long prevLastKey) { - return getUnhealthyContainers(null, limit, prevStartKey, prevLastKey); + @QueryParam(RECON_QUERY_MIN_CONTAINER_ID) long minContainerId) { + return getUnhealthyContainers(null, limit, maxContainerId, minContainerId); } /** From 0833760d737b65e5ee6f3d40c9cc5307226665ab Mon Sep 17 00:00:00 2001 From: arafat Date: Wed, 23 Jul 2025 13:05:30 +0530 Subject: [PATCH 5/6] Fixed checkstyle --- .../org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java index 5f45e7d66a26..89db23520de4 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/ContainerEndpoint.java @@ -22,9 +22,9 @@ import static org.apache.hadoop.ozone.recon.ReconConstants.PREV_CONTAINER_ID_DEFAULT_VALUE; import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_QUERY_FILTER; import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_QUERY_LIMIT; -import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_QUERY_PREVKEY; -import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_QUERY_MIN_CONTAINER_ID; import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_QUERY_MAX_CONTAINER_ID; +import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_QUERY_MIN_CONTAINER_ID; +import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_QUERY_PREVKEY; import java.io.IOException; import java.time.Instant; From a0bcaa99de26c01d370a27f3ee14c2e13494ab8d Mon Sep 17 00:00:00 2001 From: arafat Date: Wed, 23 Jul 2025 13:25:51 +0530 Subject: [PATCH 6/6] Fixed bugs --- .../recon/api/types/UnhealthyContainersResponse.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/UnhealthyContainersResponse.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/UnhealthyContainersResponse.java index 3c47b527d3ed..350f9e8ceda1 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/UnhealthyContainersResponse.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/UnhealthyContainersResponse.java @@ -125,6 +125,14 @@ public long getReplicaMismatchCount() { return replicaMismatchCount; } + public long getLastKey() { + return lastKey; + } + + public long getFirstKey() { + return firstKey; + } + public Collection getContainers() { return containers; }