diff --git a/hadoop-hdds/docs/content/tools/Admin.md b/hadoop-hdds/docs/content/tools/Admin.md index e89331230fbd..c2f6093180a0 100644 --- a/hadoop-hdds/docs/content/tools/Admin.md +++ b/hadoop-hdds/docs/content/tools/Admin.md @@ -172,3 +172,49 @@ $ ozone admin om lof --service-id=om-service-test1 --length=3 --prefix=/volumelo ``` Note in JSON output mode, field `contToken` won't show up at all in the result if there are no more entries after the batch (i.e. when `hasMore` is `false`). + + +## Snapshot Defragmentation Trigger + +The snapshot defrag command triggers the Snapshot Defragmentation Service to run immediately on a specific Ozone Manager node. +This command manually initiates the snapshot defragmentation process which compacts snapshot data and removes fragmentation to improve storage efficiency. + +This command only works on Ozone Manager HA clusters. + +```bash +$ ozone admin om snapshot defrag --help +Usage: ozone admin om snapshot defrag [-hV] [--no-wait] [--node-id=] + [-id=] +Triggers the Snapshot Defragmentation Service to run immediately. This command +manually initiates the snapshot defragmentation process which compacts +snapshot data and removes fragmentation to improve storage efficiency. This +command works only on OzoneManager HA cluster. + -h, --help Show this help message and exit. + --no-wait Do not wait for the defragmentation task to + complete. The command will return immediately + after triggering the task. + --node-id= NodeID of the OM to trigger snapshot + defragmentation on. + -id, --service-id= + Ozone Manager Service ID + -V, --version Print version information and exit. +``` + +### Example usages + +- Trigger snapshot defragmentation on OM node `om3` in service `omservice` and wait for completion: + +```bash +$ ozone admin om snapshot defrag --service-id=omservice --node-id=om3 +Triggering Snapshot Defrag Service ... +Snapshot defragmentation completed successfully. +``` + +- Trigger snapshot defragmentation without waiting for completion: + +```bash +$ ozone admin om snapshot defrag --service-id=omservice --node-id=om3 --no-wait +Triggering Snapshot Defrag Service ... +Snapshot defragmentation task has been triggered successfully and is running in the background. +``` + diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/OMAdmin.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/OMAdmin.java index d536b81be140..e096a55b95c6 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/OMAdmin.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/OMAdmin.java @@ -29,6 +29,7 @@ import org.apache.hadoop.ozone.OmUtils; import org.apache.hadoop.ozone.admin.OzoneAdmin; import org.apache.hadoop.ozone.admin.om.lease.LeaseSubCommand; +import org.apache.hadoop.ozone.admin.om.snapshot.SnapshotSubCommand; import org.apache.hadoop.ozone.client.OzoneClientException; import org.apache.hadoop.ozone.client.OzoneClientFactory; import org.apache.hadoop.ozone.client.protocol.ClientProtocol; @@ -59,7 +60,8 @@ UpdateRangerSubcommand.class, TransferOmLeaderSubCommand.class, FetchKeySubCommand.class, - LeaseSubCommand.class + LeaseSubCommand.class, + SnapshotSubCommand.class }) @MetaInfServices(AdminSubcommand.class) public class OMAdmin implements AdminSubcommand { diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/snapshot/DefragSubCommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/snapshot/DefragSubCommand.java new file mode 100644 index 000000000000..6062353d60ba --- /dev/null +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/snapshot/DefragSubCommand.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.admin.om.snapshot; + +import java.io.IOException; +import java.util.concurrent.Callable; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.ozone.admin.om.OMAdmin; +import org.apache.hadoop.ozone.om.helpers.OMNodeDetails; +import org.apache.hadoop.ozone.om.protocolPB.OMAdminProtocolClientSideImpl; +import org.apache.hadoop.security.UserGroupInformation; +import picocli.CommandLine; + +/** + * Handler of ozone admin om snapshot defrag command. + */ +@CommandLine.Command( + name = "defrag", + description = "Triggers the Snapshot Defragmentation Service to run " + + "immediately. This command manually initiates the snapshot " + + "defragmentation process which compacts snapshot data and removes " + + "fragmentation to improve storage efficiency. " + + "This command works only on OzoneManager HA cluster.", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class +) +public class DefragSubCommand implements Callable { + + @CommandLine.ParentCommand + private SnapshotSubCommand parent; + + @CommandLine.Option( + names = {"-id", "--service-id"}, + description = "Ozone Manager Service ID" + ) + private String omServiceId; + + @CommandLine.Option( + names = {"--node-id"}, + description = "NodeID of the OM to trigger snapshot defragmentation on.", + required = false + ) + private String nodeId; + + @CommandLine.Option( + names = {"--no-wait"}, + description = "Do not wait for the defragmentation task to complete. " + + "The command will return immediately after triggering the task.", + defaultValue = "false" + ) + private boolean noWait; + + @Override + public Void call() throws Exception { + // Navigate up to get OMAdmin + OMAdmin omAdmin = getOMAdmin(); + OzoneConfiguration conf = omAdmin.getParent().getOzoneConf(); + OMNodeDetails omNodeDetails = OMNodeDetails.getOMNodeDetailsFromConf( + conf, omServiceId, nodeId); + + if (omNodeDetails == null) { + System.err.println("Error: OMNodeDetails could not be determined with given " + + "service ID and node ID."); + return null; + } + + try (OMAdminProtocolClientSideImpl omAdminProtocolClient = createClient(conf, omNodeDetails)) { + execute(omAdminProtocolClient); + } catch (IOException ex) { + System.err.println("Failed to trigger snapshot defragmentation: " + + ex.getMessage()); + throw ex; + } + + return null; + } + + protected OMAdminProtocolClientSideImpl createClient( + OzoneConfiguration conf, OMNodeDetails omNodeDetails) throws IOException { + return OMAdminProtocolClientSideImpl.createProxyForSingleOM(conf, + UserGroupInformation.getCurrentUser(), omNodeDetails); + } + + protected void execute(OMAdminProtocolClientSideImpl omAdminProtocolClient) + throws IOException { + System.out.println("Triggering Snapshot Defrag Service ..."); + boolean result = omAdminProtocolClient.triggerSnapshotDefrag(noWait); + + if (noWait) { + System.out.println("Snapshot defragmentation task has been triggered " + + "successfully and is running in the background."); + } else { + if (result) { + System.out.println("Snapshot defragmentation completed successfully."); + } else { + System.out.println("Snapshot defragmentation task failed or was interrupted."); + } + } + } + + private OMAdmin getOMAdmin() { + // The parent hierarchy is: DefragSubCommand -> SnapshotSubCommand -> OMAdmin + return parent.getParent(); + } +} diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/snapshot/SnapshotSubCommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/snapshot/SnapshotSubCommand.java new file mode 100644 index 000000000000..48ca9e365ff7 --- /dev/null +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/snapshot/SnapshotSubCommand.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.admin.om.snapshot; + +import org.apache.hadoop.ozone.admin.om.OMAdmin; +import picocli.CommandLine; + +/** + * Handler of ozone admin om snapshot command. + */ +@CommandLine.Command( + name = "snapshot", + description = "Command for all snapshot related operations.", + subcommands = { + DefragSubCommand.class + } +) +public class SnapshotSubCommand { + + @CommandLine.ParentCommand + private OMAdmin parent; + + public OMAdmin getParent() { + return parent; + } +} diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/snapshot/package-info.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/snapshot/package-info.java new file mode 100644 index 000000000000..00fd11817ccb --- /dev/null +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/om/snapshot/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Command line for Ozone Manager snapshot operations. + */ +package org.apache.hadoop.ozone.admin.om.snapshot; diff --git a/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/ozone/admin/om/snapshot/TestDefragSubCommand.java b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/ozone/admin/om/snapshot/TestDefragSubCommand.java new file mode 100644 index 000000000000..105a79f987d8 --- /dev/null +++ b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/ozone/admin/om/snapshot/TestDefragSubCommand.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.admin.om.snapshot; + +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.nio.charset.StandardCharsets; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.ozone.om.helpers.OMNodeDetails; +import org.apache.hadoop.ozone.om.protocolPB.OMAdminProtocolClientSideImpl; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import picocli.CommandLine; + +/** + * Unit tests to validate the DefragSubCommand class includes + * the correct output when executed against a mock client. + */ +public class TestDefragSubCommand { + + private TestableDefragSubCommand cmd; + private OMAdminProtocolClientSideImpl omAdminClient; + private final ByteArrayOutputStream outContent = new ByteArrayOutputStream(); + private final ByteArrayOutputStream errContent = new ByteArrayOutputStream(); + private final PrintStream originalOut = System.out; + private final PrintStream originalErr = System.err; + private static final String DEFAULT_ENCODING = StandardCharsets.UTF_8.name(); + + /** + * Testable version of DefragSubCommand that allows injecting a mock client. + */ + private static class TestableDefragSubCommand extends DefragSubCommand { + private final OMAdminProtocolClientSideImpl mockClient; + + TestableDefragSubCommand(OMAdminProtocolClientSideImpl mockClient) { + this.mockClient = mockClient; + } + + @Override + protected OMAdminProtocolClientSideImpl createClient( + OzoneConfiguration conf, OMNodeDetails omNodeDetails) { + return mockClient; + } + } + + @BeforeEach + public void setup() throws Exception { + omAdminClient = mock(OMAdminProtocolClientSideImpl.class); + cmd = new TestableDefragSubCommand(omAdminClient); + + // Mock close() to do nothing - needed for try-with-resources + doNothing().when(omAdminClient).close(); + + + System.setOut(new PrintStream(outContent, false, DEFAULT_ENCODING)); + System.setErr(new PrintStream(errContent, false, DEFAULT_ENCODING)); + } + + @AfterEach + public void tearDown() { + System.setOut(originalOut); + System.setErr(originalErr); + } + + @Test + public void testTriggerSnapshotDefragWithWait() throws Exception { + // Mock the client to return success + when(omAdminClient.triggerSnapshotDefrag(false)).thenReturn(true); + + // Execute the command (default behavior: wait for completion) + CommandLine c = new CommandLine(cmd); + c.parseArgs(); + cmd.execute(omAdminClient); + + // Verify the client method was called with correct parameter + verify(omAdminClient).triggerSnapshotDefrag(eq(false)); + + // Verify output contains success message + String output = outContent.toString(DEFAULT_ENCODING); + assertTrue(output.contains("Triggering Snapshot Defrag Service")); + assertTrue(output.contains("Snapshot defragmentation completed successfully")); + } + + @Test + public void testTriggerSnapshotDefragWithWaitFailure() throws Exception { + // Mock the client to return failure + when(omAdminClient.triggerSnapshotDefrag(false)).thenReturn(false); + + // Execute the command + CommandLine c = new CommandLine(cmd); + c.parseArgs(); + cmd.execute(omAdminClient); + + // Verify the client method was called + verify(omAdminClient).triggerSnapshotDefrag(eq(false)); + + // Verify output contains failure message + String output = outContent.toString(DEFAULT_ENCODING); + assertTrue(output.contains("Triggering Snapshot Defrag")); + assertTrue(output.contains("Snapshot defragmentation task failed or was interrupted")); + } + + @Test + public void testTriggerSnapshotDefragWithServiceIdAndNodeId() throws Exception { + // Mock the client with both service ID and node ID + when(omAdminClient.triggerSnapshotDefrag(false)).thenReturn(true); + + // Execute the command with service ID and node ID + CommandLine c = new CommandLine(cmd); + c.parseArgs("--service-id", "om-service-1", "--node-id", "om1"); + cmd.execute(omAdminClient); + + // Verify the client method was called + verify(omAdminClient).triggerSnapshotDefrag(eq(false)); + + // Verify success message + String output = outContent.toString(DEFAULT_ENCODING); + assertTrue(output.contains("Snapshot defragmentation completed successfully")); + } + + @Test + public void testTriggerSnapshotDefragWithAllOptions() throws Exception { + // Test with service-id, node-id, and no-wait options + when(omAdminClient.triggerSnapshotDefrag(true)).thenReturn(true); + + // Execute the command with multiple options + CommandLine c = new CommandLine(cmd); + c.parseArgs("--service-id", "om-service-1", "--node-id", "om1", "--no-wait"); + cmd.execute(omAdminClient); + + // Verify the client method was called + verify(omAdminClient).triggerSnapshotDefrag(eq(true)); + + // Verify output for background execution + String output = outContent.toString(DEFAULT_ENCODING); + assertTrue(output.contains("triggered successfully and is running in the background")); + } +} + diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OMAdminProtocol.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OMAdminProtocol.java index 8588620074d1..cb6baf79fe7e 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OMAdminProtocol.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OMAdminProtocol.java @@ -45,4 +45,12 @@ public interface OMAdminProtocol extends Closeable { * @param columnFamily */ void compactOMDB(String columnFamily) throws IOException; + + /** + * Triggers the Snapshot Defragmentation Service to run immediately. + * @param noWait if true, return immediately without waiting for completion + * @return true if defragmentation completed successfully (when noWait is false), + * or if the task was triggered successfully (when noWait is true) + */ + boolean triggerSnapshotDefrag(boolean noWait) throws IOException; } diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OMAdminProtocolClientSideImpl.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OMAdminProtocolClientSideImpl.java index f7d22713b329..7ae8a30b73af 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OMAdminProtocolClientSideImpl.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OMAdminProtocolClientSideImpl.java @@ -47,6 +47,8 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.OMConfigurationRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.OMConfigurationResponse; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.OMNodeInfo; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.TriggerSnapshotDefragRequest; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.TriggerSnapshotDefragResponse; import org.apache.hadoop.security.UserGroupInformation; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -232,6 +234,32 @@ public void compactOMDB(String columnFamily) throws IOException { } } + @Override + public boolean triggerSnapshotDefrag(boolean noWait) throws IOException { + TriggerSnapshotDefragRequest request = TriggerSnapshotDefragRequest.newBuilder() + .setNoWait(noWait) + .build(); + TriggerSnapshotDefragResponse response; + try { + response = rpcProxy.triggerSnapshotDefrag(NULL_RPC_CONTROLLER, request); + } catch (ServiceException e) { + throw ProtobufHelper.getRemoteException(e); + } + if (!response.getSuccess()) { + throwException("Request to trigger snapshot defragmentation" + + ", sent to " + omPrintInfo + " failed with error: " + + response.getErrorMsg()); + } + if (response.hasResult()) { + return response.getResult(); + } else { + throwException("Missing result in TriggerSnapshotDefragResponse from " + omPrintInfo + + ". This likely indicates a server error."); + // Unreachable, required for compilation + return false; + } + } + private void throwException(String errorMsg) throws IOException { throw new IOException("Request Failed. Error: " + errorMsg); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDefragAdmin.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDefragAdmin.java new file mode 100644 index 000000000000..dff56a35d164 --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDefragAdmin.java @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om.snapshot; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.nio.charset.StandardCharsets; +import java.util.List; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.utils.IOUtils; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl; +import org.apache.hadoop.ozone.admin.OzoneAdmin; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.om.OMConfigKeys; +import org.apache.hadoop.ozone.om.OzoneManager; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; + +/** + * Integration test for 'ozone admin om snapshot defrag' command. + * Tests that the defrag command can be successfully triggered on any OM + * (leader or follower) in an HA cluster. + */ +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +public class TestSnapshotDefragAdmin { + + private static MiniOzoneHAClusterImpl cluster; + private static OzoneClient client; + private static String omServiceId; + + @BeforeAll + public static void init() throws Exception { + OzoneConfiguration conf = new OzoneConfiguration(); + conf.setBoolean(OMConfigKeys.OZONE_FILESYSTEM_SNAPSHOT_ENABLED_KEY, true); + // Enable snapshot defrag service + conf.setInt(OMConfigKeys.OZONE_SNAPSHOT_DEFRAG_SERVICE_INTERVAL, 7200); + conf.setInt(OMConfigKeys.SNAPSHOT_DEFRAG_LIMIT_PER_TASK, 1); + + omServiceId = "om-service-test-defrag"; + cluster = MiniOzoneCluster.newHABuilder(conf) + .setOMServiceId(omServiceId) + .setNumOfOzoneManagers(3) + .build(); + + cluster.waitForClusterToBeReady(); + client = cluster.newClient(); + } + + @AfterAll + public static void cleanup() { + IOUtils.closeQuietly(client); + if (cluster != null) { + cluster.shutdown(); + } + } + + /** + * Tests triggering snapshot defrag on the OM leader. + */ + @Test + public void testDefragOnLeader() throws Exception { + OzoneManager leader = cluster.getOMLeader(); + String leaderId = leader.getOMNodeId(); + + executeDefragCommand(leaderId, false); + } + + /** + * Tests triggering snapshot defrag on an OM follower. + */ + @Test + public void testDefragOnFollower() throws Exception { + OzoneManager leader = cluster.getOMLeader(); + List allOMs = cluster.getOzoneManagersList(); + + // Find a follower OM + OzoneManager follower = null; + for (OzoneManager om : allOMs) { + if (!om.getOMNodeId().equals(leader.getOMNodeId())) { + follower = om; + break; + } + } + + assertNotNull(follower, "Should have at least one follower OM"); + executeDefragCommand(follower.getOMNodeId(), false); + } + + /** + * Tests triggering snapshot defrag on all OMs in the cluster. + */ + @Test + public void testDefragOnAllOMs() throws Exception { + List allOMs = cluster.getOzoneManagersList(); + + assertEquals(3, allOMs.size(), "Expected 3 OMs in the cluster"); + + // Test defrag on each OM + for (OzoneManager om : allOMs) { + String omNodeId = om.getOMNodeId(); + executeDefragCommand(omNodeId, false); + } + } + + /** + * Tests triggering snapshot defrag with --no-wait option. + */ + @Test + public void testDefragWithNoWait() throws Exception { + OzoneManager leader = cluster.getOMLeader(); + String leaderId = leader.getOMNodeId(); + + executeDefragCommand(leaderId, true); + } + + /** + * Tests triggering snapshot defrag on a follower with --no-wait option. + */ + @Test + public void testDefragOnFollowerWithNoWait() throws Exception { + OzoneManager leader = cluster.getOMLeader(); + List allOMs = cluster.getOzoneManagersList(); + + // Find a follower OM + OzoneManager follower = null; + for (OzoneManager om : allOMs) { + if (!om.getOMNodeId().equals(leader.getOMNodeId())) { + follower = om; + break; + } + } + + assertNotNull(follower, "Should have at least one follower OM"); + executeDefragCommand(follower.getOMNodeId(), true); + } + + /** + * Helper method to execute the defrag command on a specific OM node. + * + * @param nodeId the OM node ID to target + * @param noWait whether to use the --no-wait option + */ + private void executeDefragCommand(String nodeId, boolean noWait) throws Exception { + OzoneAdmin ozoneAdmin = new OzoneAdmin(); + ozoneAdmin.getOzoneConf().addResource(cluster.getConf()); + + // Capture output to verify command execution + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + PrintStream ps = new PrintStream(baos, true, StandardCharsets.UTF_8.name()); + PrintStream oldOut = System.out; + System.setOut(ps); + + try { + String[] args; + if (noWait) { + args = new String[]{ + "om", + "snapshot", + "defrag", + "-id", omServiceId, + "--node-id", nodeId, + "--no-wait" + }; + } else { + args = new String[]{ + "om", + "snapshot", + "defrag", + "-id", omServiceId, + "--node-id", nodeId + }; + } + + int exitCode = ozoneAdmin.execute(args); + System.out.flush(); + String output = baos.toString(StandardCharsets.UTF_8.name()); + + // Verify successful execution + assertEquals(0, exitCode, + "Command should execute successfully on OM " + nodeId); + assertTrue(output.contains("Triggering Snapshot Defrag Service"), + "Output should indicate defrag service is being triggered"); + + if (noWait) { + assertTrue(output.contains("triggered successfully") && + output.contains("background"), + "Output should indicate task triggered in background: " + output); + } else { + assertTrue(output.contains("completed successfully") || + output.contains("failed") || + output.contains("interrupted"), + "Output should indicate completion status: " + output); + } + } finally { + System.setOut(oldOut); + ps.close(); + } + } +} + diff --git a/hadoop-ozone/interface-client/src/main/proto/OMAdminProtocol.proto b/hadoop-ozone/interface-client/src/main/proto/OMAdminProtocol.proto index 4b104514f6ae..5e726b400e87 100644 --- a/hadoop-ozone/interface-client/src/main/proto/OMAdminProtocol.proto +++ b/hadoop-ozone/interface-client/src/main/proto/OMAdminProtocol.proto @@ -79,6 +79,16 @@ message CompactResponse { optional string errorMsg = 3; } +message TriggerSnapshotDefragRequest { + required bool noWait = 1; +} + +message TriggerSnapshotDefragResponse { + required bool success = 1; + optional string errorMsg = 2; + optional bool result = 3; +} + /** The service for OM admin operations. */ @@ -95,4 +105,8 @@ service OzoneManagerAdminService { // RPC request from admin to compact a column family of the OM's db rpc compactDB(CompactRequest) returns(CompactResponse); + + // RPC request from admin to trigger snapshot defragmentation + rpc triggerSnapshotDefrag(TriggerSnapshotDefragRequest) + returns(TriggerSnapshotDefragResponse); } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java index b1005f22a6fc..ad8fa0be344c 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java @@ -3573,6 +3573,43 @@ public boolean triggerRangerBGSync(boolean noWait) throws IOException { } } + public boolean triggerSnapshotDefrag(boolean noWait) throws IOException { + + // Note: Any OM (leader or follower) can run snapshot defrag + + final UserGroupInformation ugi = getRemoteUser(); + // Check Ozone admin privilege + if (!isAdmin(ugi)) { + throw new OMException("Only Ozone admins are allowed to trigger " + + "snapshot defragmentation manually", PERMISSION_DENIED); + } + + // Get the SnapshotDefragService from KeyManager + final SnapshotDefragService defragService = keyManager.getSnapshotDefragService(); + if (defragService == null) { + throw new OMException("Snapshot defragmentation service is not initialized", + FEATURE_NOT_ENABLED); + } + + // Trigger Snapshot Defragmentation + if (noWait) { + final Thread t = new Thread(() -> { + try { + defragService.triggerSnapshotDefragOnce(); + } catch (Exception e) { + LOG.error("Error during snapshot defragmentation", e); + } + }, threadPrefix + "SnapshotDefragTrigger-" + System.currentTimeMillis()); + t.start(); + LOG.info("User '{}' manually triggered Snapshot Defragmentation without waiting" + + " in a new thread, tid = {}", ugi, t.getId()); + return true; + } else { + LOG.info("User '{}' manually triggered Snapshot Defragmentation and is waiting", ugi); + return defragService.triggerSnapshotDefragOnce(); + } + } + @Override public StatusAndMessages finalizeUpgrade(String upgradeClientID) throws IOException { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/SnapshotDefragService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/SnapshotDefragService.java index 212953cd874c..1a9e15136c2e 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/SnapshotDefragService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/SnapshotDefragService.java @@ -172,10 +172,6 @@ private final class SnapshotDefragTask implements BackgroundTask { public BackgroundTaskResult call() throws Exception { // Check OM leader and readiness if (shouldRun()) { - final long count = runCount.incrementAndGet(); - if (LOG.isDebugEnabled()) { - LOG.debug("Initiating Snapshot Defragmentation Task: run # {}", count); - } triggerSnapshotDefragOnce(); } @@ -184,6 +180,12 @@ public BackgroundTaskResult call() throws Exception { } public synchronized boolean triggerSnapshotDefragOnce() throws IOException { + + final long count = runCount.incrementAndGet(); + if (LOG.isDebugEnabled()) { + LOG.debug("Initiating Snapshot Defragmentation Task: run # {}", count); + } + // Check if rocks-tools native lib is available if (!isRocksToolsNativeLibAvailable()) { LOG.warn("Rocks-tools native library is not available. " + diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OMAdminProtocolServerSideImpl.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OMAdminProtocolServerSideImpl.java index 42ca2113f40f..8184b39642e4 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OMAdminProtocolServerSideImpl.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OMAdminProtocolServerSideImpl.java @@ -38,6 +38,8 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.OMConfigurationRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.OMConfigurationResponse; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.OMNodeInfo; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.TriggerSnapshotDefragRequest; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.TriggerSnapshotDefragResponse; /** * This class is the server-side translator that forwards requests received on @@ -128,4 +130,22 @@ public CompactResponse compactDB(RpcController controller, CompactRequest compac return CompactResponse.newBuilder() .setSuccess(true).build(); } + + @Override + public TriggerSnapshotDefragResponse triggerSnapshotDefrag( + RpcController controller, TriggerSnapshotDefragRequest request) + throws ServiceException { + try { + boolean result = ozoneManager.triggerSnapshotDefrag(request.getNoWait()); + return TriggerSnapshotDefragResponse.newBuilder() + .setSuccess(true) + .setResult(result) + .build(); + } catch (Exception ex) { + return TriggerSnapshotDefragResponse.newBuilder() + .setSuccess(false) + .setErrorMsg(ex.getMessage()) + .build(); + } + } }