From e7217a256f21bd22cc4c1863177392ebacc87d82 Mon Sep 17 00:00:00 2001 From: DaveTeng0 Date: Mon, 29 Apr 2024 17:13:12 -0700 Subject: [PATCH 1/6] HDDS-8101. Add FSO repair tool to ozone CLI in read-only and repair modes --- .../hadoop/fs/ozone/TestFSORepairTool.java | 577 +++++++++++++++ hadoop-ozone/tools/pom.xml | 47 ++ .../hadoop/ozone/debug/FSORepairCLI.java | 78 ++ .../hadoop/ozone/debug/FSORepairTool.java | 687 ++++++++++++++++++ 4 files changed, 1389 insertions(+) create mode 100644 hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java create mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairCLI.java create mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairTool.java diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java new file mode 100644 index 000000000000..e23ce993a22c --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java @@ -0,0 +1,577 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.ozone; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.utils.db.DBStore; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl; +import org.apache.hadoop.ozone.client.BucketArgs; +import org.apache.hadoop.ozone.client.ObjectStore; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.client.OzoneClientFactory; +import org.apache.hadoop.ozone.client.io.OzoneOutputStream; +import org.apache.hadoop.ozone.debug.FSORepairTool; +import org.apache.hadoop.ozone.om.OMConfigKeys; +import org.apache.hadoop.ozone.om.OzoneManager; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; +import org.junit.jupiter.api.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_OFS_URI_SCHEME; + +/** + * FSORepairTool test cases. + */ +public class TestFSORepairTool { + public static final Logger LOG = + LoggerFactory.getLogger(TestFSORepairTool.class); + + private static MiniOzoneHAClusterImpl cluster; + private static FileSystem fs; + private static OzoneClient client; + + + @BeforeAll + public static void init() throws Exception { + // Set configs. + OzoneConfiguration conf = new OzoneConfiguration(); + // deletion services will be triggered manually. + conf.setTimeDuration(OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL, + 1_000_000, TimeUnit.SECONDS); + conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 1_000_000, + TimeUnit.SECONDS); + conf.setInt(OMConfigKeys.OZONE_PATH_DELETING_LIMIT_PER_TASK, 10); + conf.setInt(OMConfigKeys.OZONE_KEY_DELETING_LIMIT_PER_TASK, 10); + conf.setBoolean(OMConfigKeys.OZONE_OM_RATIS_ENABLE_KEY, true); + // Since delete services use RocksDB iterators, make sure the double + // buffer is flushed between runs. + conf.setInt(OMConfigKeys.OZONE_OM_UNFLUSHED_TRANSACTION_MAX_COUNT, 1); + + // Build cluster. + cluster = (MiniOzoneHAClusterImpl) MiniOzoneCluster.newHABuilder(conf) + .setNumOfOzoneManagers(1) + .setOMServiceId("omservice") + .setNumDatanodes(3) + .build(); + cluster.waitForClusterToBeReady(); + + // Init ofs. + final String rootPath = String.format("%s://%s/", + OZONE_OFS_URI_SCHEME, cluster.getOzoneManager().getOMNodeId()); + conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, rootPath); + fs = FileSystem.get(conf); + client = OzoneClientFactory.getRpcClient("omservice", conf); + } + + @AfterEach + public void cleanNamespace() throws Exception { + if (fs.exists(new Path("/vol1"))) { + fs.delete(new Path("/vol1"), true); + } + if (fs.exists(new Path("/vol2"))) { + fs.delete(new Path("/vol2"), true); + } + runDeletes(); + assertFileAndDirTablesEmpty(); + } + + @AfterAll + public static void teardown() { + if (cluster != null) { + cluster.shutdown(); + } + IOUtils.closeQuietly(fs); + } + + @Test + public void testConnectedTreeOneBucket() throws Exception { + FSORepairTool.Report expectedReport = buildConnectedTree("vol1", "bucket1"); + + // Test the connected tree in debug mode. + FSORepairTool repair = new FSORepairTool(getOmDB(), + getOmDBLocation(), true); + FSORepairTool.Report debugReport = repair.run(); + + Assertions.assertEquals(expectedReport, debugReport); + assertConnectedTreeReadable("vol1", "bucket1"); + assertDeleteTablesEmpty(); + + // Running again in repair mode should give same results since the tree + // is connected. + repair = new FSORepairTool(getOmDB(), + getOmDBLocation(), false); + FSORepairTool.Report repairReport = repair.run(); + + Assertions.assertEquals(expectedReport, repairReport); + assertConnectedTreeReadable("vol1", "bucket1"); + assertDeleteTablesEmpty(); + } + + @Test + public void testReportedDataSize() throws Exception { + FSORepairTool.Report report1 = buildDisconnectedTree("vol1", "bucket1", 10); + FSORepairTool.Report report2 = buildConnectedTree("vol1", "bucket2", 10); + FSORepairTool.Report expectedReport = new FSORepairTool.Report(report1, report2); + + FSORepairTool repair = new FSORepairTool(getOmDB(), + getOmDBLocation(), false); + FSORepairTool.Report debugReport = repair.run(); + Assertions.assertEquals(expectedReport, debugReport); + } + + @Test + public void testMultipleBucketsAndVolumes() throws Exception { + FSORepairTool.Report report1 = buildConnectedTree("vol1", "bucket1"); + FSORepairTool.Report report2 = buildDisconnectedTree("vol2", "bucket2"); + FSORepairTool.Report expectedAggregateReport = new FSORepairTool.Report( + report1, report2); + + FSORepairTool repair = new FSORepairTool(getOmDB(), + getOmDBLocation(), false); + FSORepairTool.Report generatedReport = repair.run(); + + Assertions.assertEquals(generatedReport, expectedAggregateReport); + assertConnectedTreeReadable("vol1", "bucket1"); + assertDisconnectedTreePartiallyReadable("vol2", "bucket2"); + assertDisconnectedObjectsMarkedForDelete(1); + } + + /** + * Tests having multiple entries in the deleted file and directory tables + * for the same objects. + */ + @Test + public void testDeleteOverwrite() throws Exception { + // Create files and dirs under dir1. To make sure they are added to the + // delete table, the keys must have data. + buildConnectedTree("vol1", "bucket1", 10); + // Move soon to be disconnected objects to the deleted table. + fs.delete(new Path("/vol1/bucket1/dir1/dir2/file3"), true); + fs.delete(new Path("/vol1/bucket1/dir1/dir2"), true); + fs.delete(new Path("/vol1/bucket1/dir1/file1"), true); + fs.delete(new Path("/vol1/bucket1/dir1/file2"), true); + + // Recreate deleted objects, then disconnect dir1. + // This means after the repair runs, these objects will be + // the deleted tables multiple times. Some will have the same dir1 parent ID + // in their key name too. + ContractTestUtils.touch(fs, new Path("/vol1/bucket1/dir1/dir2/file3")); + ContractTestUtils.touch(fs, new Path("/vol1/bucket1/dir1/file1")); + ContractTestUtils.touch(fs, new Path("/vol1/bucket1/dir1/file2")); + disconnectDirectory("dir1"); + + FSORepairTool repair = new FSORepairTool(getOmDB(), + getOmDBLocation(), false); + FSORepairTool.Report generatedReport = repair.run(); + + Assertions.assertEquals(1, generatedReport.getUnreachableDirs()); + Assertions.assertEquals(3, generatedReport.getUnreachableFiles()); + + assertDisconnectedObjectsMarkedForDelete(2); + } + + @Test + public void testEmptyFileTrees() throws Exception { + // Run when there are no file trees. + FSORepairTool repair = new FSORepairTool(getOmDB(), + getOmDBLocation(), false); + FSORepairTool.Report generatedReport = repair.run(); + Assertions.assertEquals(generatedReport, new FSORepairTool.Report()); + assertDeleteTablesEmpty(); + + // Create an empty volume and bucket. + fs.mkdirs(new Path("/vol1")); + fs.mkdirs(new Path("/vol2/bucket1")); + + // Run on an empty volume and bucket. + repair = new FSORepairTool(getOmDB(), + getOmDBLocation(), false); + generatedReport = repair.run(); + Assertions.assertEquals(generatedReport, new FSORepairTool.Report()); + assertDeleteTablesEmpty(); + } + + @Test + public void testNonFSOBucketsSkipped() throws Exception { + ObjectStore store = client.getObjectStore(); + try { + // Create legacy and OBS buckets. + store.createVolume("vol1"); + store.getVolume("vol1").createBucket("obs-bucket", + BucketArgs.newBuilder().setBucketLayout(BucketLayout.OBJECT_STORE) + .build()); + store.getVolume("vol1").createBucket("legacy-bucket", + BucketArgs.newBuilder().setBucketLayout(BucketLayout.LEGACY) + .build()); + + // Put a key in the legacy and OBS buckets. + OzoneOutputStream obsStream = store.getVolume("vol1") + .getBucket("obs-bucket") + .createKey("prefix/test-key", 3); + obsStream.write(new byte[]{1, 1, 1}); + obsStream.close(); + + OzoneOutputStream legacyStream = store.getVolume("vol1") + .getBucket("legacy-bucket") + .createKey("prefix/test-key", 3); + legacyStream.write(new byte[]{1, 1, 1}); + legacyStream.close(); + + // Add an FSO bucket with data. + FSORepairTool.Report connectReport = buildConnectedTree("vol1", "fso" + + "-bucket"); + + // Even in repair mode there should be no action. legacy and obs buckets + // will be skipped and FSO tree is connected. + FSORepairTool repair = new FSORepairTool(getOmDB(), + getOmDBLocation(), false); + FSORepairTool.Report generatedReport = repair.run(); + + Assertions.assertEquals(connectReport, generatedReport); + assertConnectedTreeReadable("vol1", "fso-bucket"); + assertDeleteTablesEmpty(); + } finally { + // Need to manually delete obs bucket. It cannot be deleted with ofs as + // part of the normal test cleanup. + store.getVolume("vol1").getBucket("obs-bucket") + .deleteKey("prefix/test-key"); + store.getVolume("vol1").deleteBucket("obs-bucket"); + } + } + + + private FSORepairTool.Report buildConnectedTree(String volume, String bucket) + throws Exception { + return buildConnectedTree(volume, bucket, 0); + } + + /** + * Creates a tree with 3 reachable directories and 4 reachable files. + */ + private FSORepairTool.Report buildConnectedTree(String volume, String bucket, + int fileSize) + throws Exception { + Path bucketPath = new Path("/" + volume + "/" + bucket); + Path dir1 = new Path(bucketPath, "dir1"); + Path file1 = new Path(dir1, "file1"); + Path file2 = new Path(dir1, "file2"); + + Path dir2 = new Path(bucketPath, "dir1/dir2"); + Path file3 = new Path(dir2, "file3"); + + Path dir3 = new Path(bucketPath, "dir3"); + Path file4 = new Path(bucketPath, "file4"); + + fs.mkdirs(dir1); + fs.mkdirs(dir2); + fs.mkdirs(dir3); + + // Content to put in every file. + String data = new String(new char[fileSize]); + + FSDataOutputStream stream = fs.create(file1); + stream.write(data.getBytes(StandardCharsets.UTF_8)); + stream.close(); + stream = fs.create(file2); + stream.write(data.getBytes(StandardCharsets.UTF_8)); + stream.close(); + stream = fs.create(file3); + stream.write(data.getBytes(StandardCharsets.UTF_8)); + stream.close(); + stream = fs.create(file4); + stream.write(data.getBytes(StandardCharsets.UTF_8)); + stream.close(); + + assertConnectedTreeReadable(volume, bucket); + + return new FSORepairTool.Report.Builder() + .setReachableDirs(3) + .setReachableFiles(4) + .setReachableBytes(fileSize * 4L) + .build(); + } + + private void assertConnectedTreeReadable(String volume, String bucket) + throws IOException { + Path bucketPath = new Path("/" + volume + "/" + bucket); + Path dir1 = new Path(bucketPath, "dir1"); + Path file1 = new Path(dir1, "file1"); + Path file2 = new Path(dir1, "file2"); + + Path dir2 = new Path(bucketPath, "dir1/dir2"); + Path file3 = new Path(dir2, "file3"); + + Path dir3 = new Path(bucketPath, "dir3"); + Path file4 = new Path(bucketPath, "file4"); + + Assertions.assertTrue(fs.exists(dir1)); + Assertions.assertTrue(fs.exists(dir2)); + Assertions.assertTrue(fs.exists(dir3)); + Assertions.assertTrue(fs.exists(file1)); + Assertions.assertTrue(fs.exists(file2)); + Assertions.assertTrue(fs.exists(file3)); + Assertions.assertTrue(fs.exists(file4)); + } + + private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket) + throws Exception { + return buildDisconnectedTree(volume, bucket, 0); + } + + /** + * Creates a tree with 2 reachable directories, 1 reachable file, 1 + * unreachable directory, and 3 unreachable files. + */ + private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket, + int fileSize) throws Exception { + buildConnectedTree(volume, bucket, fileSize); + + // Manually remove dir1. This should disconnect 3 of the files and 1 of + // the directories. + disconnectDirectory("dir1"); + + assertDisconnectedTreePartiallyReadable(volume, bucket); + + return new FSORepairTool.Report.Builder() + .setReachableDirs(1) + .setReachableFiles(1) + .setReachableBytes(fileSize) + // dir1 does not count towards the unreachable directories the tool + // will see. It was deleted completely so the tool will never see it. + .setUnreachableDirs(1) + .setUnreachableFiles(3) + .setUnreachableBytes(fileSize * 3L) + .build(); + } + + private void disconnectDirectory(String dirName) throws Exception { + OzoneManager leader = cluster.getOMLeader(); + Table dirTable = + leader.getMetadataManager().getDirectoryTable(); + try (TableIterator> iterator = + dirTable.iterator()) { + while (iterator.hasNext()) { + Table.KeyValue entry = iterator.next(); + String key = entry.getKey(); + if (key.contains(dirName)) { + dirTable.delete(key); + break; + } + } + } + } + + private void assertDisconnectedTreePartiallyReadable( + String volume, String bucket) throws Exception { + Path bucketPath = new Path("/" + volume + "/" + bucket); + Path dir1 = new Path(bucketPath, "dir1"); + Path file1 = new Path(dir1, "file1"); + Path file2 = new Path(dir1, "file2"); + + Path dir2 = new Path(bucketPath, "dir1/dir2"); + Path file3 = new Path(dir2, "file3"); + + Path dir3 = new Path(bucketPath, "dir3"); + Path file4 = new Path(bucketPath, "file4"); + + Assertions.assertFalse(fs.exists(dir1)); + Assertions.assertFalse(fs.exists(dir2)); + Assertions.assertTrue(fs.exists(dir3)); + Assertions.assertFalse(fs.exists(file1)); + Assertions.assertFalse(fs.exists(file2)); + Assertions.assertFalse(fs.exists(file3)); + Assertions.assertTrue(fs.exists(file4)); + } + + /** + * Checks that the disconnected tree's unreachable objects are correctly + * moved to the delete table. If the tree was written and deleted multiple + * times, it makes sure the delete entries with the same name are preserved. + */ + private void assertDisconnectedObjectsMarkedForDelete(int numWrites) + throws Exception { + + Map pendingDeleteDirCounts = new HashMap<>(); + + // Check deleted directory table. + OzoneManager leader = cluster.getOMLeader(); + Table deletedDirTable = + leader.getMetadataManager().getDeletedDirTable(); + try (TableIterator> iterator = + deletedDirTable.iterator()) { + while (iterator.hasNext()) { + Table.KeyValue entry = iterator.next(); + String key = entry.getKey(); + OmKeyInfo value = entry.getValue(); + + String dirName = key.split("/")[4]; + LOG.info("In deletedDirTable, extracting directory name {} from DB " + + "key {}", dirName, key); + + // Check that the correct dir info was added. + // FSO delete path will fill in the whole path to the key in the + // proto when it is deleted. Once the tree is disconnected that can't + // be done, so just make sure the dirName contained in the key name + // somewhere. + Assertions.assertTrue(value.getKeyName().contains(dirName)); + + int count = pendingDeleteDirCounts.getOrDefault(dirName, 0); + pendingDeleteDirCounts.put(dirName, count + 1); + } + } + + // 1 directory is disconnected in the tree. dir1 was totally deleted so + // the repair tool will not see it. + Assertions.assertEquals(1, pendingDeleteDirCounts.size()); + Assertions.assertEquals(numWrites, pendingDeleteDirCounts.get("dir2")); + + // Check that disconnected files were put in deleting tables. + Map pendingDeleteFileCounts = new HashMap<>(); + + Table deletedFileTable = + leader.getMetadataManager().getDeletedTable(); + try (TableIterator> iterator = + deletedFileTable.iterator()) { + while (iterator.hasNext()) { + Table.KeyValue entry = iterator.next(); + String key = entry.getKey(); + RepeatedOmKeyInfo value = entry.getValue(); + + String[] keyParts = key.split("/"); + String fileName = keyParts[keyParts.length - 1]; + + LOG.info("In deletedTable, extracting file name {} from DB " + + "key {}", fileName, key); + + for (OmKeyInfo fileInfo: value.getOmKeyInfoList()) { + // Check that the correct file info was added. + Assertions.assertTrue(fileInfo.getKeyName().contains(fileName)); + + int count = pendingDeleteFileCounts.getOrDefault(fileName, 0); + pendingDeleteFileCounts.put(fileName, count + 1); + } + } + } + + // 3 files are disconnected in the tree. + // TODO: dir2 ended up in here with count = 1. file3 also had count=1 + // Likely that the dir2/file3 entry got split in two. + Assertions.assertEquals(3, pendingDeleteFileCounts.size()); + Assertions.assertEquals(numWrites, pendingDeleteFileCounts.get("file1")); + Assertions.assertEquals(numWrites, pendingDeleteFileCounts.get("file2")); + Assertions.assertEquals(numWrites, pendingDeleteFileCounts.get("file3")); + } + +// @Test +// public void testOnSavedDB() throws Exception { +// /* +// Path dir1 = new Path("/vol1/bucket1/dir1"); +// Path file1 = new Path(dir1, "file1"); +// Path file2 = new Path(dir1, "file2"); +// +// Path dir2 = new Path("/vol1/bucket1/dir1/dir2"); +// Path file3 = new Path(dir2, "file3"); +// +// Path dir3 = new Path("/vol1/bucket1/dir3"); +// Path file4 = new Path("/vol1/bucket1/file4"); +// +// ContractTestUtils.touch(fs, file1); +// ContractTestUtils.touch(fs, file2); +// ContractTestUtils.touch(fs, file3); +// ContractTestUtils.touch(fs, file4); +// fs.mkdirs(dir3); +// */ +// FsoRepair repair = new FsoRepair("/Users/erose/Temp/omNode-1/om.db", +// FsoRepair.Mode.DEBUG); +// repair.run(); +// +// /* +// Original: +// Expected: +// 3 reachable dirs, 4 reachable files. +// +// After remove dir1: +// 3 unreachable files, 1 unreachable dir. +// 1 reachable file, 1 reachable dir. +// +// */ +// } + + private void assertDeleteTablesEmpty() throws IOException { + OzoneManager leader = cluster.getOMLeader(); + Assertions.assertTrue(leader.getMetadataManager().getDeletedDirTable().isEmpty()); + Assertions.assertTrue(leader.getMetadataManager().getDeletedTable().isEmpty()); + } + + private void runDeletes() throws Exception { + OzoneManager leader = cluster.getOMLeader(); + + int i = 0; + while (!leader.getMetadataManager().getDeletedDirTable().isEmpty()) { + LOG.info("Running iteration {} of DirectoryDeletingService.", i++); + leader.getKeyManager().getDirDeletingService().runPeriodicalTaskNow(); + // Wait for work from this run to flush through the double buffer. + Thread.sleep(500); + } + + i = 0; + while (!leader.getMetadataManager().getDeletedTable().isEmpty()) { + LOG.info("Running iteration {} of KeyDeletingService.", i++); + leader.getKeyManager().getDeletingService().runPeriodicalTaskNow(); + // Wait for work from this run to flush through the double buffer. + Thread.sleep(500); + } + } + + private void assertFileAndDirTablesEmpty() throws Exception { + OzoneManager leader = cluster.getOMLeader(); + Assertions.assertTrue(leader.getMetadataManager().getDirectoryTable().isEmpty()); + Assertions.assertTrue(leader.getMetadataManager().getFileTable().isEmpty()); + } + + private DBStore getOmDB() { + return cluster.getOMLeader().getMetadataManager().getStore(); + } + + private String getOmDBLocation() { + return cluster.getOMLeader().getMetadataManager().getStore().getDbLocation().toString(); + } +} diff --git a/hadoop-ozone/tools/pom.xml b/hadoop-ozone/tools/pom.xml index 839d01f0fa84..7e4ea7090526 100644 --- a/hadoop-ozone/tools/pom.xml +++ b/hadoop-ozone/tools/pom.xml @@ -121,6 +121,53 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> 2048 + + maven-enforcer-plugin + + + depcheck + + + + banned-rocksdb-imports + process-sources + + enforce + + + + + false + Use managed RocksObjects under org.apache.hadoop.hdds.utils.db.managed instead. + + org.rocksdb.** + + org.rocksdb.AbstractEventListener + org.rocksdb.Checkpoint + org.rocksdb.ColumnFamilyDescriptor + org.rocksdb.ColumnFamilyHandle + org.rocksdb.ColumnFamilyOptions + org.rocksdb.CompactionJobInfo + org.rocksdb.CompressionType + org.rocksdb.DBOptions + org.rocksdb.FlushOptions + org.rocksdb.Holder + org.rocksdb.LiveFileMetaData + org.rocksdb.Options + org.rocksdb.RocksDB + org.rocksdb.RocksDBException + org.rocksdb.SstFileReader + org.rocksdb.TableProperties + org.rocksdb.ReadOptions + org.rocksdb.SstFileReaderIterator + + org.apache.hadoop.hdds.utils.db.managed.* + + + + + + diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairCLI.java new file mode 100644 index 000000000000..6d936ef7e23a --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairCLI.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.debug; + +import org.apache.hadoop.hdds.cli.SubcommandWithParent; +import org.kohsuke.MetaInfServices; +import picocli.CommandLine; + +import java.util.concurrent.Callable; + +/** + * Parser for scm.db file. + */ +@CommandLine.Command( + name = "fso-repair", + description = "Identify a disconnected FSO tree, and optionally mark " + + "unreachable entries for deletion. OM should be " + + "stopped while this tool is run. Information will be logged at " + + "INFO and DEBUG levels." +) +@MetaInfServices(SubcommandWithParent.class) +public class FSORepairCLI implements Callable, SubcommandWithParent { + + @CommandLine.Option(names = {"--db"}, + required = true, + description = "Path to OM RocksDB") + private String dbPath; + + @CommandLine.Option(names = {"--read-mode-only", "-r"}, + required = true, + description = + "Mode to run the tool in. Read-mode will just log information about unreachable files or directories;" + + "otherwise the tool will move those files and directories to the deleted tables.", + defaultValue = "true") + private boolean readModeOnly; + + @CommandLine.ParentCommand + private OzoneDebug parent; + + @Override + public Void call() throws Exception { + + try { + // TODO case insensitive enum options. + FSORepairTool repairTool = new FSORepairTool(dbPath, readModeOnly); + repairTool.run(); + } catch (Exception ex) { + throw new IllegalArgumentException("FSO repair failed: " + ex.getMessage()); + } + + System.out.printf("FSO %s finished. See client logs for results.%n", + readModeOnly ? "read-mode" : "repair-mode"); + + return null; + } + + @Override + public Class getParentType() { + return OzoneDebug.class; + } +} + diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairTool.java new file mode 100644 index 000000000000..b67699a46578 --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairTool.java @@ -0,0 +1,687 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.debug; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.utils.db.BatchOperation; +import org.apache.hadoop.hdds.utils.db.DBStore; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.ozone.OmUtils; +import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; +import org.apache.hadoop.ozone.om.helpers.*; +import org.apache.hadoop.ozone.om.request.file.OMFileRequest; +import org.apache.ratis.util.Preconditions; +import org.rocksdb.ColumnFamilyDescriptor; +import org.rocksdb.ColumnFamilyHandle; +import org.rocksdb.Holder; +import org.rocksdb.Options; +import org.rocksdb.RocksDB; +import org.rocksdb.RocksDBException; + + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.*; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; + +/** + * Tool to identify and repair disconnected FSO trees in all buckets. + * The tool can be run in debug mode, where it will just log information + * about unreachable files or directories, or in repair mode to additionally + * move those files and directories to the deleted tables. If deletes are + * still in progress (the deleted directory table is not empty), the tool may + * report that the tree is disconnected, even though pending deletes would + * fix the issue. + * + * Before using the tool, make sure all OMs are stopped, + * and that all Ratis logs have been flushed to the OM DB. This can be + * done using `ozone admin prepare` before running the tool, and `ozone admin + * cancelprepare` when done. + * + * The tool will run a DFS from each bucket, and save all reachable + * directories as keys in a new temporary RocksDB instance called "reachable.db" + * In the same directory as om.db. + * will then scan the entire file and directory tables for each bucket to see + * if each object's parent is in the reachable table of reachable.db. The + * reachable table will be dropped and recreated for each bucket. + * The tool is idempotent. reachable.db will not be deleted automatically + * when the tool finishes, in case users want to manually inspect it. It can + * be safely deleted once the tool finishes. + */ +public class FSORepairTool { + public static final Logger LOG = + LoggerFactory.getLogger(FSORepairTool.class); + + private final String omDBPath; + + private final DBStore store; + private final Table volumeTable; + private final Table bucketTable; + private final Table directoryTable; + private final Table fileTable; + private final Table deletedDirectoryTable; + private final Table deletedTable; + // The temporary DB is used to track which items have been seen. + // Since usage of this DB is simple, use it directly from + // RocksDB. + private String reachableDBPath; + private static final byte[] REACHABLE_TABLE = + "reachable".getBytes(StandardCharsets.UTF_8); + private ColumnFamilyHandle reachableCF; + private RocksDB reachableDB; + + private boolean readModeOnly; + + private long reachableBytes; + private long reachableFiles; + private long reachableDirs; + private long unreachableBytes; + private long unreachableFiles; + private long unreachableDirs; + + + public FSORepairTool(String dbPath, boolean readModeOnly) throws IOException { + this(getStoreFromPath(dbPath), dbPath, readModeOnly); + } + + /** + * Allows passing RocksDB instance from a MiniOzoneCluster directly to this + * class for testing. + */ + @VisibleForTesting + public FSORepairTool(DBStore dbStore, String dbPath, boolean readModeOnly) throws IOException { + this.readModeOnly = readModeOnly; + // Counters to track as we walk the tree. + reachableBytes = 0; + reachableFiles = 0; + reachableDirs = 0; + unreachableBytes = 0; + unreachableFiles = 0; + unreachableDirs = 0; + + this.store = dbStore; + this.omDBPath = dbPath; + volumeTable = store.getTable(OmMetadataManagerImpl.VOLUME_TABLE, + String.class, + OmVolumeArgs.class); + bucketTable = store.getTable(OmMetadataManagerImpl.BUCKET_TABLE, + String.class, + OmBucketInfo.class); + directoryTable = store.getTable(OmMetadataManagerImpl.DIRECTORY_TABLE, + String.class, + OmDirectoryInfo.class); + fileTable = store.getTable(OmMetadataManagerImpl.FILE_TABLE, + String.class, + OmKeyInfo.class); + deletedDirectoryTable = store.getTable( + OmMetadataManagerImpl.DELETED_DIR_TABLE, + String.class, + OmKeyInfo.class); + deletedTable = store.getTable( + OmMetadataManagerImpl.DELETED_TABLE, + String.class, + RepeatedOmKeyInfo.class); + } + + private static DBStore getStoreFromPath(String dbPath) throws IOException { + File omDBFile = new File(dbPath); + if (!omDBFile.exists() || !omDBFile.isDirectory()) { + throw new IOException(String.format("Specified OM DB instance %s does " + + "not exist or is not a RocksDB directory.", dbPath)); + } + // Load RocksDB and tables needed. + return OmMetadataManagerImpl.loadDB(new OzoneConfiguration(), + new File(dbPath).getParentFile()); + } + + public Report run() throws IOException { + // Iterate all volumes. + try (TableIterator> + volumeIterator = volumeTable.iterator()) { + openReachableDB(); + + while (volumeIterator.hasNext()) { + Table.KeyValue volumeEntry = + volumeIterator.next(); + String volumeKey = volumeEntry.getKey(); + + // Iterate all buckets in the volume. + try (TableIterator> + bucketIterator = bucketTable.iterator()) { + bucketIterator.seek(volumeKey); + while (bucketIterator.hasNext()) { + Table.KeyValue bucketEntry = + bucketIterator.next(); + String bucketKey = bucketEntry.getKey(); + OmBucketInfo bucketInfo = bucketEntry.getValue(); + + if (bucketInfo.getBucketLayout() != BucketLayout.FILE_SYSTEM_OPTIMIZED) { + LOG.debug("Skipping non-FSO bucket {}", bucketKey); + continue; + } + + // Stop this loop once we have seen all buckets in the current + // volume. + if (!bucketKey.startsWith(volumeKey)) { + break; + } + + // Start with a fresh list of reachable files for this bucket. + // Also clears partial state if the tool failed on a previous run. + dropReachableTableIfExists(); + createReachableTable(); + // Process one bucket's FSO tree at a time. + markReachableObjectsInBucket(volumeEntry.getValue(), bucketInfo); + handleUnreachableObjects(volumeEntry.getValue(), bucketInfo); + dropReachableTableIfExists(); + } + } + } + } finally { + closeReachableDB(); + } + + return buildReportAndLog(); + } + + private Report buildReportAndLog() { + Report report = new Report.Builder() + .setReachableDirs(reachableDirs) + .setReachableFiles(reachableFiles) + .setReachableBytes(reachableBytes) + .setUnreachableDirs(unreachableDirs) + .setUnreachableFiles(unreachableFiles) + .setUnreachableBytes(unreachableBytes) + .build(); + + LOG.info("\n{}", report); + return report; + } + + private void markReachableObjectsInBucket(OmVolumeArgs volume, + OmBucketInfo bucket) throws IOException { + LOG.info("Processing bucket {}", bucket.getBucketName()); + // Only put directories in the stack. + // Directory keys should have the form /volumeID/bucketID/parentID/name. + Stack dirKeyStack = new Stack<>(); + + // Since the tool uses parent directories to check for reachability, add + // a reachable entry for the bucket as well. + addReachableEntry(volume, bucket, bucket); + // Initialize the stack with all immediate child directories of the + // bucket, and mark them all as reachable. + Collection childDirs = + getChildDirectoriesAndMarkAsReachable(volume, bucket, bucket); + dirKeyStack.addAll(childDirs); + + while (!dirKeyStack.isEmpty()) { + // Get one directory and process its immediate children. + String currentDirKey = dirKeyStack.pop(); + OmDirectoryInfo currentDir = directoryTable.get(currentDirKey); + if (currentDir == null) { + LOG.error("Directory key {} to be processed was not found in the " + + "directory table", currentDirKey); + continue; + } + + // TODO revisit this for a more memory efficient implementation, + // possibly making better use of RocksDB iterators. + childDirs = getChildDirectoriesAndMarkAsReachable(volume, bucket, + currentDir); + dirKeyStack.addAll(childDirs); + } + } + + private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) throws IOException { + // Check for unreachable directories in the bucket. + String bucketPrefix = OM_KEY_PREFIX + + volume.getObjectID() + + OM_KEY_PREFIX + + bucket.getObjectID(); + + try (TableIterator> dirIterator = + directoryTable.iterator()) { + dirIterator.seek(bucketPrefix); + while (dirIterator.hasNext()) { + Table.KeyValue dirEntry = dirIterator.next(); + String dirKey = dirEntry.getKey(); + + // Only search directories in this bucket. + if (!dirKey.startsWith(bucketPrefix)) { + break; + } + + if (!isReachable(dirKey)) { + LOG.debug("Found unreachable directory: {}", dirKey); + unreachableDirs++; + + if (!readModeOnly) { + LOG.debug("Marking unreachable directory {} for deletion.", dirKey); + OmDirectoryInfo dirInfo = dirEntry.getValue(); + markDirectoryForDeletion(volume.getVolume(), bucket.getBucketName(), + dirKey, dirInfo); + } + } + } + } + + // Check for unreachable files + try (TableIterator> + fileIterator = fileTable.iterator()) { + fileIterator.seek(bucketPrefix); + while (fileIterator.hasNext()) { + Table.KeyValue fileEntry = fileIterator.next(); + String fileKey = fileEntry.getKey(); + // Only search files in this bucket. + if (!fileKey.startsWith(bucketPrefix)) { + break; + } + + OmKeyInfo fileInfo = fileEntry.getValue(); + if (!isReachable(fileKey)) { + LOG.debug("Found unreachable file: {}", fileKey); + unreachableBytes += fileInfo.getDataSize(); + unreachableFiles++; + + if (!readModeOnly) { + LOG.debug("Marking unreachable file {} for deletion.", + fileKey); + markFileForDeletion(fileKey, fileInfo); + } + } else { + // NOTE: We are deserializing the proto of every reachable file + // just to log it's size. If we don't need this information we could + // save time by skipping this step. + reachableBytes += fileInfo.getDataSize(); + reachableFiles++; + } + } + } + } + + private void markFileForDeletion(String fileKey, OmKeyInfo fileInfo) throws IOException { + try (BatchOperation batch = store.initBatchOperation()) { + fileTable.deleteWithBatch(batch, fileKey); + + RepeatedOmKeyInfo originalRepeatedKeyInfo = deletedTable.get(fileKey); + RepeatedOmKeyInfo updatedRepeatedOmKeyInfo = OmUtils.prepareKeyForDelete( + fileInfo, fileInfo.getUpdateID(), true); + // NOTE: The FSO code seems to write the open key entry with the whole + // path, using the object's names instead of their ID. This would onyl + // be possible when the file is deleted explicitly, and not part of a + // directory delete. It is also not possible here if the file's parent + // is gone. The name of the key does not matter so just use IDs. + deletedTable.putWithBatch(batch, fileKey, updatedRepeatedOmKeyInfo); + + LOG.debug("Added entry {} to open key table: {}", + fileKey, updatedRepeatedOmKeyInfo); + + store.commitBatchOperation(batch); + } + } + + private void markDirectoryForDeletion(String volumeName, String bucketName, + String dirKeyName, OmDirectoryInfo dirInfo) throws IOException { + try (BatchOperation batch = store.initBatchOperation()) { + directoryTable.deleteWithBatch(batch, dirKeyName); + // HDDS-7592: Make directory entries in deleted dir table unique. + String deleteDirKeyName = + dirKeyName + OM_KEY_PREFIX + dirInfo.getObjectID(); + + // Convert the directory to OmKeyInfo for deletion. + OmKeyInfo dirAsKeyInfo = OMFileRequest.getOmKeyInfo( + volumeName, bucketName, dirInfo, dirInfo.getName()); + deletedDirectoryTable.putWithBatch(batch, deleteDirKeyName, dirAsKeyInfo); + + store.commitBatchOperation(batch); + } + } + + private Collection getChildDirectoriesAndMarkAsReachable(OmVolumeArgs volume, + OmBucketInfo bucket, + WithObjectID currentDir) throws IOException { + + Collection childDirs = new ArrayList<>(); + + try (TableIterator> + dirIterator = directoryTable.iterator()) { + String dirPrefix = buildReachableKey(volume, bucket, currentDir); + // Start searching the directory table at the current directory's + // prefix to get its immediate children. + dirIterator.seek(dirPrefix); + while (dirIterator.hasNext()) { + Table.KeyValue childDirEntry = + dirIterator.next(); + String childDirKey = childDirEntry.getKey(); + // Stop processing once we have seen all immediate children of this + // directory. + if (!childDirKey.startsWith(dirPrefix)) { + break; + } + // This directory was reached by search. + addReachableEntry(volume, bucket, childDirEntry.getValue()); + childDirs.add(childDirKey); + reachableDirs++; + } + } + + return childDirs; + } + + /** + * Add the specified object to the reachable table, indicating it is part + * of the connected FSO tree. + */ + private void addReachableEntry(OmVolumeArgs volume, + OmBucketInfo bucket, WithObjectID object) throws IOException { + byte[] reachableKey = buildReachableKey(volume, bucket, object) + .getBytes(StandardCharsets.UTF_8); + try { + // No value is needed for this table. + reachableDB.put(reachableCF, reachableKey, new byte[]{}); + } catch (RocksDBException ex) { + throw new IOException(ex.getMessage(), ex); + } + } + + /** + * Build an entry in the reachable table for the current object, which + * could be a bucket, file or directory. + */ + private static String buildReachableKey(OmVolumeArgs volume, + OmBucketInfo bucket, WithObjectID object) { + return OM_KEY_PREFIX + + volume.getObjectID() + + OM_KEY_PREFIX + + bucket.getObjectID() + + OM_KEY_PREFIX + + object.getObjectID(); + } + + /** + * + * @param fileOrDirKey The key of a file or directory in RocksDB. + * @return true if the entry's parent is in the reachable table. + */ + private boolean isReachable(String fileOrDirKey) throws IOException { + byte[] reachableParentKey = + buildReachableParentKey(fileOrDirKey).getBytes(StandardCharsets.UTF_8); + try { + if (reachableDB.keyMayExist( + reachableCF, reachableParentKey, new Holder<>())) { + return reachableDB.get(reachableCF, reachableParentKey) != null; + } else { + return false; + } + } catch (RocksDBException ex) { + throw new IOException(ex.getMessage(), ex); + } + } + + /** + * Build an entry in the reachable table for the current object's parent + * object. The object could be a file or directory. + */ + private static String buildReachableParentKey(String fileOrDirKey) { + String[] keyParts = fileOrDirKey.split(OM_KEY_PREFIX); + // Should be /volID/bucketID/parentID/name + // The first part will be blank since key begins with a slash. + Preconditions.assertTrue(keyParts.length >= 4); + String volumeID = keyParts[1]; + String bucketID = keyParts[2]; + String parentID = keyParts[3]; + + return OM_KEY_PREFIX + + volumeID + + OM_KEY_PREFIX + + bucketID + + OM_KEY_PREFIX + + parentID; + } + + private void openReachableDB() throws IOException { + File reachableDBFile = new File(new File(omDBPath).getParentFile(), + "reachable.db"); + LOG.info("Creating database of reachable directories at {}", + reachableDBFile); + try { + // Delete the DB from the last run if it exists. + if (reachableDBFile.exists()) { + FileUtils.deleteDirectory(reachableDBFile); + } + reachableDBPath = reachableDBFile.toString(); + reachableDB = RocksDB.open(reachableDBPath); + } catch (RocksDBException ex) { + if (reachableDB != null) { + reachableDB.close(); + } + throw new IOException(ex.getMessage(), ex); + } + } + + private void closeReachableDB() { + if (reachableDB != null) { + reachableDB.close(); + } + } + + private void dropReachableTableIfExists() throws IOException { + try { + List availableCFs = RocksDB.listColumnFamilies(new Options(), + reachableDBPath); + boolean cfFound = false; + for (byte[] cfNameBytes: availableCFs) { + if (new String(cfNameBytes, UTF_8).equals(new String(REACHABLE_TABLE, UTF_8))) { + cfFound = true; + break; + } + } + + if (cfFound) { + reachableDB.dropColumnFamily(reachableCF); + } + } catch (RocksDBException ex) { + throw new IOException(ex.getMessage(), ex); + } finally { + if (reachableCF != null) { + reachableCF.close(); + } + } + } + + private void createReachableTable() throws IOException { + try { + reachableCF = reachableDB.createColumnFamily( + new ColumnFamilyDescriptor(REACHABLE_TABLE)); + } catch (RocksDBException ex) { + if (reachableCF != null) { + reachableCF.close(); + } + throw new IOException(ex.getMessage(), ex); + } + } + + /** + * Define a Report to be created. + */ + public static class Report { + private long reachableBytes; + private long reachableFiles; + private long reachableDirs; + private long unreachableBytes; + private long unreachableFiles; + private long unreachableDirs; + + /** + * Builds one report that is the aggregate of multiple others. + */ + public Report(Report... reports) { + reachableBytes = 0; + reachableFiles = 0; + reachableDirs = 0; + unreachableBytes = 0; + unreachableFiles = 0; + unreachableDirs = 0; + + for (Report report: reports) { + reachableBytes += report.reachableBytes; + reachableFiles += report.reachableFiles; + reachableDirs += report.reachableDirs; + unreachableBytes += report.unreachableBytes; + unreachableFiles += report.unreachableFiles; + unreachableDirs += report.unreachableDirs; + } + } + + private Report(Builder builder) { + reachableBytes = builder.reachableBytes; + reachableFiles = builder.reachableFiles; + reachableDirs = builder.reachableDirs; + unreachableBytes = builder.unreachableBytes; + unreachableFiles = builder.unreachableFiles; + unreachableDirs = builder.unreachableDirs; + } + + public long getReachableBytes() { + return reachableBytes; + } + + public long getReachableFiles() { + return reachableFiles; + } + + public long getReachableDirs() { + return reachableDirs; + } + + public long getUnreachableBytes() { + return unreachableBytes; + } + + public long getUnreachableFiles() { + return unreachableFiles; + } + + public long getUnreachableDirs() { + return unreachableDirs; + } + + @Override + public String toString() { + return "Reachable:" + + "\n\tDirectories: " + reachableDirs + + "\n\tFiles: " + reachableFiles + + "\n\tBytes: " + reachableBytes + + "\nUnreachable:" + + "\n\tDirectories: " + unreachableDirs + + "\n\tFiles: " + unreachableFiles + + "\n\tBytes: " + unreachableBytes; + } + + @Override + public boolean equals(Object other) { + if (other == this) { + return true; + } + if (other == null || getClass() != other.getClass()) { + return false; + } + Report report = (Report) other; + + // Useful for testing. + LOG.debug("Comparing reports\nExpect:\n{}\nActual:\n{}", this, report); + + return reachableBytes == report.reachableBytes && + reachableFiles == report.reachableFiles && + reachableDirs == report.reachableDirs && + unreachableBytes == report.unreachableBytes && + unreachableFiles == report.unreachableFiles && + unreachableDirs == report.unreachableDirs; + } + + @Override + public int hashCode() { + return Objects.hash(reachableBytes, + reachableFiles, + reachableDirs, + unreachableBytes, + unreachableFiles, + unreachableDirs); + } + + /** + * Builder class for a Report. + */ + public static final class Builder { + private long reachableBytes; + private long reachableFiles; + private long reachableDirs; + private long unreachableBytes; + private long unreachableFiles; + private long unreachableDirs; + + public Builder() { + } + + public Builder setReachableBytes(long reachableBytes) { + this.reachableBytes = reachableBytes; + return this; + } + + public Builder setReachableFiles(long reachableFiles) { + this.reachableFiles = reachableFiles; + return this; + } + + public Builder setReachableDirs(long reachableDirs) { + this.reachableDirs = reachableDirs; + return this; + } + + public Builder setUnreachableBytes(long unreachableBytes) { + this.unreachableBytes = unreachableBytes; + return this; + } + + public Builder setUnreachableFiles(long unreachableFiles) { + this.unreachableFiles = unreachableFiles; + return this; + } + + public Builder setUnreachableDirs(long unreachableDirs) { + this.unreachableDirs = unreachableDirs; + return this; + } + + public Report build() { + return new Report(this); + } + } + } +} From 8863499211d3749a8e710aaf9d2027989a4c561b Mon Sep 17 00:00:00 2001 From: DaveTeng0 Date: Mon, 29 Apr 2024 21:04:58 -0700 Subject: [PATCH 2/6] fix import --- .../apache/hadoop/fs/ozone/TestFSORepairTool.java | 7 ++++++- .../apache/hadoop/ozone/debug/FSORepairTool.java | 15 ++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java index e23ce993a22c..8d57a6139513 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java @@ -41,7 +41,12 @@ import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; -import org.junit.jupiter.api.*; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairTool.java index b67699a46578..69c01b5d1027 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairTool.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairTool.java @@ -26,8 +26,14 @@ import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.TableIterator; import org.apache.hadoop.ozone.OmUtils; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs; +import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; +import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.WithObjectID; import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; -import org.apache.hadoop.ozone.om.helpers.*; import org.apache.hadoop.ozone.om.request.file.OMFileRequest; import org.apache.ratis.util.Preconditions; import org.rocksdb.ColumnFamilyDescriptor; @@ -37,14 +43,17 @@ import org.rocksdb.RocksDB; import org.rocksdb.RocksDBException; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Objects; +import java.util.Stack; import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; From e8f9589eb909f888b4d07231bbe39add59f2cc2b Mon Sep 17 00:00:00 2001 From: DaveTeng0 Date: Thu, 2 May 2024 17:17:53 -0700 Subject: [PATCH 3/6] extract common codes between FSODebugCLI and FSORepairCLI to separated base class FSOBaseCLI and FSOBaseTool --- .../apache/hadoop/hdds/cli/GenericCli.java | 4 +- .../hdds/utils/db/managed/ManagedRocksDB.java | 42 +++++++++ .../hadoop/fs/ozone/TestFSORepairTool.java | 11 ++- hadoop-ozone/tools/pom.xml | 47 ---------- .../FSOBaseCLI.java} | 41 ++++---- .../FSOBaseTool.java} | 93 ++++++++++--------- .../hadoop/ozone/common/package-info.java | 27 ++++++ .../hadoop/ozone/debug/FSODebugCLI.java | 67 +++++++++++++ .../hadoop/ozone/repair/om/FSORepairCLI.java | 67 +++++++++++++ .../hadoop/ozone/repair/om/FSORepairTool.java | 39 ++++++++ 10 files changed, 321 insertions(+), 117 deletions(-) rename hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/{debug/FSORepairCLI.java => common/FSOBaseCLI.java} (64%) rename hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/{debug/FSORepairTool.java => common/FSOBaseTool.java} (89%) create mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/package-info.java create mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSODebugCLI.java create mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java create mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java index 4c5f3fdc872f..d4857e92f547 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java @@ -39,10 +39,10 @@ public class GenericCli implements Callable, GenericParentCommand { public static final int EXECUTION_ERROR_EXIT_CODE = -1; - @Option(names = {"--verbose"}, +` @Option(names = {"--verbose"}, description = "More verbose output. Show the stack trace of the errors.") private boolean verbose; - +` @Option(names = {"-D", "--set"}) private Map configurationOverrides = new HashMap<>(); diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java index 5a5a577351b1..8357a3173525 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java @@ -21,7 +21,9 @@ import org.rocksdb.ColumnFamilyDescriptor; import org.rocksdb.ColumnFamilyHandle; import org.rocksdb.DBOptions; +import org.rocksdb.Holder; import org.rocksdb.LiveFileMetaData; +import org.rocksdb.Options; import org.rocksdb.RocksDB; import org.rocksdb.RocksDBException; import org.slf4j.Logger; @@ -87,6 +89,11 @@ public static ManagedRocksDB open( ); } + public static ManagedRocksDB open(final String path) throws RocksDBException { + return new ManagedRocksDB(RocksDB.open(path)); + } + + /** * Delete liveMetaDataFile from rocks db using RocksDB#deleteFile Api. * This function makes the RocksDB#deleteFile Api synchronized by waiting @@ -102,4 +109,39 @@ public void deleteFile(LiveFileMetaData fileToBeDeleted) File file = new File(fileToBeDeleted.path(), fileToBeDeleted.fileName()); ManagedRocksObjectUtils.waitForFileDelete(file, Duration.ofSeconds(60)); } + + public void put(ColumnFamilyHandle columnFamilyHandle, + byte[] key, byte[] value) throws RocksDBException { + this.get().put(columnFamilyHandle, key, value); + } + + public byte[] get(ColumnFamilyHandle columnFamilyHandle, + byte[] key) throws RocksDBException { + return this.get().get(columnFamilyHandle, key); + } + + public ColumnFamilyHandle createColumnFamily( + ColumnFamilyDescriptor columnFamilyDescriptor) + throws RocksDBException { + return this.get().createColumnFamily(columnFamilyDescriptor); + } + + public void dropColumnFamily(ColumnFamilyHandle columnFamilyHandle) + throws RocksDBException { + this.get().dropColumnFamily(columnFamilyHandle); + } + + public boolean keyMayExist(ColumnFamilyHandle columnFamilyHandle, byte[] key, Holder valueHolder) { + return this.get().keyMayExist(columnFamilyHandle, key, valueHolder); + } + + public void close() { + this.get().close(); + } + + public static List listColumnFamilies(Options options, + String path) throws RocksDBException { + return RocksDB.listColumnFamilies(options, path); + } + } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java index 8d57a6139513..0f3d7c7c2891 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java @@ -34,13 +34,14 @@ import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.hadoop.ozone.client.OzoneClientFactory; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; -import org.apache.hadoop.ozone.debug.FSORepairTool; +import org.apache.hadoop.ozone.common.FSOBaseTool; import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; +import org.apache.hadoop.ozone.repair.om.FSORepairTool; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; @@ -128,9 +129,9 @@ public void testConnectedTreeOneBucket() throws Exception { FSORepairTool.Report expectedReport = buildConnectedTree("vol1", "bucket1"); // Test the connected tree in debug mode. - FSORepairTool repair = new FSORepairTool(getOmDB(), + FSOBaseTool fsoTool = new FSOBaseTool(getOmDB(), getOmDBLocation(), true); - FSORepairTool.Report debugReport = repair.run(); + FSOBaseTool.Report debugReport = fsoTool.run(); Assertions.assertEquals(expectedReport, debugReport); assertConnectedTreeReadable("vol1", "bucket1"); @@ -138,9 +139,9 @@ public void testConnectedTreeOneBucket() throws Exception { // Running again in repair mode should give same results since the tree // is connected. - repair = new FSORepairTool(getOmDB(), + fsoTool = new FSORepairTool(getOmDB(), getOmDBLocation(), false); - FSORepairTool.Report repairReport = repair.run(); + FSORepairTool.Report repairReport = fsoTool.run(); Assertions.assertEquals(expectedReport, repairReport); assertConnectedTreeReadable("vol1", "bucket1"); diff --git a/hadoop-ozone/tools/pom.xml b/hadoop-ozone/tools/pom.xml index 7e4ea7090526..839d01f0fa84 100644 --- a/hadoop-ozone/tools/pom.xml +++ b/hadoop-ozone/tools/pom.xml @@ -121,53 +121,6 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> 2048 - - maven-enforcer-plugin - - - depcheck - - - - banned-rocksdb-imports - process-sources - - enforce - - - - - false - Use managed RocksObjects under org.apache.hadoop.hdds.utils.db.managed instead. - - org.rocksdb.** - - org.rocksdb.AbstractEventListener - org.rocksdb.Checkpoint - org.rocksdb.ColumnFamilyDescriptor - org.rocksdb.ColumnFamilyHandle - org.rocksdb.ColumnFamilyOptions - org.rocksdb.CompactionJobInfo - org.rocksdb.CompressionType - org.rocksdb.DBOptions - org.rocksdb.FlushOptions - org.rocksdb.Holder - org.rocksdb.LiveFileMetaData - org.rocksdb.Options - org.rocksdb.RocksDB - org.rocksdb.RocksDBException - org.rocksdb.SstFileReader - org.rocksdb.TableProperties - org.rocksdb.ReadOptions - org.rocksdb.SstFileReaderIterator - - org.apache.hadoop.hdds.utils.db.managed.* - - - - - - diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseCLI.java similarity index 64% rename from hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairCLI.java rename to hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseCLI.java index 6d936ef7e23a..809fc74b7892 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairCLI.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseCLI.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.hadoop.ozone.debug; +package org.apache.hadoop.ozone.common; import org.apache.hadoop.hdds.cli.SubcommandWithParent; import org.kohsuke.MetaInfServices; @@ -28,51 +28,56 @@ * Parser for scm.db file. */ @CommandLine.Command( - name = "fso-repair", + name = "fso-tree", description = "Identify a disconnected FSO tree, and optionally mark " + "unreachable entries for deletion. OM should be " + "stopped while this tool is run. Information will be logged at " + "INFO and DEBUG levels." ) @MetaInfServices(SubcommandWithParent.class) -public class FSORepairCLI implements Callable, SubcommandWithParent { +public class FSOBaseCLI implements Callable, SubcommandWithParent { @CommandLine.Option(names = {"--db"}, required = true, description = "Path to OM RocksDB") private String dbPath; - @CommandLine.Option(names = {"--read-mode-only", "-r"}, - required = true, - description = - "Mode to run the tool in. Read-mode will just log information about unreachable files or directories;" + - "otherwise the tool will move those files and directories to the deleted tables.", - defaultValue = "true") - private boolean readModeOnly; + @CommandLine.Option(names = {"--verbose"}, + description = "More verbose output. ") + private boolean verbose; - @CommandLine.ParentCommand - private OzoneDebug parent; @Override public Void call() throws Exception { try { // TODO case insensitive enum options. - FSORepairTool repairTool = new FSORepairTool(dbPath, readModeOnly); - repairTool.run(); + FSOBaseTool + baseTool = new FSOBaseTool(dbPath, true); + baseTool.run(); } catch (Exception ex) { - throw new IllegalArgumentException("FSO repair failed: " + ex.getMessage()); + throw new IllegalArgumentException("FSO inspection failed: " + ex.getMessage()); } - System.out.printf("FSO %s finished. See client logs for results.%n", - readModeOnly ? "read-mode" : "repair-mode"); + if (verbose) { + System.out.println("FSO inspection finished. See client logs for results."); + } return null; } @Override public Class getParentType() { - return OzoneDebug.class; + throw new UnsupportedOperationException("Should not be called from " + + "FSOBaseCLI directly."); + } + + public String getDbPath() { + return dbPath; + } + + public boolean getVerbose() { + return verbose; } } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseTool.java similarity index 89% rename from hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairTool.java rename to hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseTool.java index 69c01b5d1027..5f729ad0aa39 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSORepairTool.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseTool.java @@ -15,8 +15,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +package org.apache.hadoop.ozone.common; -package org.apache.hadoop.ozone.debug; import com.google.common.annotations.VisibleForTesting; import org.apache.commons.io.FileUtils; @@ -25,24 +25,24 @@ import org.apache.hadoop.hdds.utils.db.DBStore; import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.hdds.utils.db.managed.ManagedOptions; +import org.apache.hadoop.hdds.utils.db.managed.ManagedRocksDB; import org.apache.hadoop.ozone.OmUtils; +import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs; -import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; import org.apache.hadoop.ozone.om.helpers.WithObjectID; -import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; import org.apache.hadoop.ozone.om.request.file.OMFileRequest; +import org.apache.hadoop.ozone.repair.om.FSORepairTool; import org.apache.ratis.util.Preconditions; import org.rocksdb.ColumnFamilyDescriptor; import org.rocksdb.ColumnFamilyHandle; import org.rocksdb.Holder; -import org.rocksdb.Options; -import org.rocksdb.RocksDB; import org.rocksdb.RocksDBException; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -59,11 +59,9 @@ import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; /** - * Tool to identify and repair disconnected FSO trees in all buckets. - * The tool can be run in debug mode, where it will just log information - * about unreachable files or directories, or in repair mode to additionally - * move those files and directories to the deleted tables. If deletes are - * still in progress (the deleted directory table is not empty), the tool may + * Base Tool to identify disconnected FSO trees in all buckets. + * The tool will log information about unreachable files or directories. + * If deletes are still in progress (the deleted directory table is not empty), the tool may * report that the tree is disconnected, even though pending deletes would * fix the issue. * @@ -82,7 +80,7 @@ * when the tool finishes, in case users want to manually inspect it. It can * be safely deleted once the tool finishes. */ -public class FSORepairTool { +public class FSOBaseTool { public static final Logger LOG = LoggerFactory.getLogger(FSORepairTool.class); @@ -102,9 +100,7 @@ public class FSORepairTool { private static final byte[] REACHABLE_TABLE = "reachable".getBytes(StandardCharsets.UTF_8); private ColumnFamilyHandle reachableCF; - private RocksDB reachableDB; - - private boolean readModeOnly; + private ManagedRocksDB reachableDB; private long reachableBytes; private long reachableFiles; @@ -112,10 +108,10 @@ public class FSORepairTool { private long unreachableBytes; private long unreachableFiles; private long unreachableDirs; + private boolean dryRun; - - public FSORepairTool(String dbPath, boolean readModeOnly) throws IOException { - this(getStoreFromPath(dbPath), dbPath, readModeOnly); + public FSOBaseTool(String dbPath, boolean dryRun) throws IOException { + this(getStoreFromPath(dbPath), dbPath, dryRun); } /** @@ -123,8 +119,8 @@ public FSORepairTool(String dbPath, boolean readModeOnly) throws IOException { * class for testing. */ @VisibleForTesting - public FSORepairTool(DBStore dbStore, String dbPath, boolean readModeOnly) throws IOException { - this.readModeOnly = readModeOnly; + public FSOBaseTool(DBStore dbStore, String dbPath, boolean isDryRun) throws IOException { + dryRun = isDryRun; // Counters to track as we walk the tree. reachableBytes = 0; reachableFiles = 0; @@ -157,7 +153,7 @@ public FSORepairTool(DBStore dbStore, String dbPath, boolean readModeOnly) throw RepeatedOmKeyInfo.class); } - private static DBStore getStoreFromPath(String dbPath) throws IOException { + protected static DBStore getStoreFromPath(String dbPath) throws IOException { File omDBFile = new File(dbPath); if (!omDBFile.exists() || !omDBFile.isDirectory()) { throw new IOException(String.format("Specified OM DB instance %s does " + @@ -168,10 +164,10 @@ private static DBStore getStoreFromPath(String dbPath) throws IOException { new File(dbPath).getParentFile()); } - public Report run() throws IOException { + public FSORepairTool.Report run() throws IOException { // Iterate all volumes. try (TableIterator> - volumeIterator = volumeTable.iterator()) { + volumeIterator = volumeTable.iterator()) { openReachableDB(); while (volumeIterator.hasNext()) { @@ -218,8 +214,8 @@ public Report run() throws IOException { return buildReportAndLog(); } - private Report buildReportAndLog() { - Report report = new Report.Builder() + private FSORepairTool.Report buildReportAndLog() { + FSORepairTool.Report report = new FSORepairTool.Report.Builder() .setReachableDirs(reachableDirs) .setReachableFiles(reachableFiles) .setReachableBytes(reachableBytes) @@ -290,7 +286,7 @@ private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) LOG.debug("Found unreachable directory: {}", dirKey); unreachableDirs++; - if (!readModeOnly) { + if (dryRun) { LOG.debug("Marking unreachable directory {} for deletion.", dirKey); OmDirectoryInfo dirInfo = dirEntry.getValue(); markDirectoryForDeletion(volume.getVolume(), bucket.getBucketName(), @@ -318,7 +314,7 @@ private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) unreachableBytes += fileInfo.getDataSize(); unreachableFiles++; - if (!readModeOnly) { + if (dryRun) { LOG.debug("Marking unreachable file {} for deletion.", fileKey); markFileForDeletion(fileKey, fileInfo); @@ -334,7 +330,7 @@ private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) } } - private void markFileForDeletion(String fileKey, OmKeyInfo fileInfo) throws IOException { + protected void markFileForDeletion(String fileKey, OmKeyInfo fileInfo) throws IOException { try (BatchOperation batch = store.initBatchOperation()) { fileTable.deleteWithBatch(batch, fileKey); @@ -355,7 +351,7 @@ private void markFileForDeletion(String fileKey, OmKeyInfo fileInfo) throws IOEx } } - private void markDirectoryForDeletion(String volumeName, String bucketName, + protected void markDirectoryForDeletion(String volumeName, String bucketName, String dirKeyName, OmDirectoryInfo dirInfo) throws IOException { try (BatchOperation batch = store.initBatchOperation()) { directoryTable.deleteWithBatch(batch, dirKeyName); @@ -379,7 +375,7 @@ private Collection getChildDirectoriesAndMarkAsReachable(OmVolumeArgs vo Collection childDirs = new ArrayList<>(); try (TableIterator> - dirIterator = directoryTable.iterator()) { + dirIterator = directoryTable.iterator()) { String dirPrefix = buildReachableKey(volume, bucket, currentDir); // Start searching the directory table at the current directory's // prefix to get its immediate children. @@ -438,7 +434,7 @@ private static String buildReachableKey(OmVolumeArgs volume, * @param fileOrDirKey The key of a file or directory in RocksDB. * @return true if the entry's parent is in the reachable table. */ - private boolean isReachable(String fileOrDirKey) throws IOException { + protected boolean isReachable(String fileOrDirKey) throws IOException { byte[] reachableParentKey = buildReachableParentKey(fileOrDirKey).getBytes(StandardCharsets.UTF_8); try { @@ -485,7 +481,7 @@ private void openReachableDB() throws IOException { FileUtils.deleteDirectory(reachableDBFile); } reachableDBPath = reachableDBFile.toString(); - reachableDB = RocksDB.open(reachableDBPath); + reachableDB = ManagedRocksDB.open(reachableDBPath); } catch (RocksDBException ex) { if (reachableDB != null) { reachableDB.close(); @@ -502,7 +498,8 @@ private void closeReachableDB() { private void dropReachableTableIfExists() throws IOException { try { - List availableCFs = RocksDB.listColumnFamilies(new Options(), + List + availableCFs = ManagedRocksDB.listColumnFamilies(new ManagedOptions(), reachableDBPath); boolean cfFound = false; for (byte[] cfNameBytes: availableCFs) { @@ -550,7 +547,7 @@ public static class Report { /** * Builds one report that is the aggregate of multiple others. */ - public Report(Report... reports) { + public Report(FSORepairTool.Report... reports) { reachableBytes = 0; reachableFiles = 0; reachableDirs = 0; @@ -558,7 +555,7 @@ public Report(Report... reports) { unreachableFiles = 0; unreachableDirs = 0; - for (Report report: reports) { + for (FSORepairTool.Report report: reports) { reachableBytes += report.reachableBytes; reachableFiles += report.reachableFiles; reachableDirs += report.reachableDirs; @@ -568,7 +565,7 @@ public Report(Report... reports) { } } - private Report(Builder builder) { + private Report(FSORepairTool.Report.Builder builder) { reachableBytes = builder.reachableBytes; reachableFiles = builder.reachableFiles; reachableDirs = builder.reachableDirs; @@ -621,7 +618,7 @@ public boolean equals(Object other) { if (other == null || getClass() != other.getClass()) { return false; } - Report report = (Report) other; + FSORepairTool.Report report = (FSORepairTool.Report) other; // Useful for testing. LOG.debug("Comparing reports\nExpect:\n{}\nActual:\n{}", this, report); @@ -658,38 +655,44 @@ public static final class Builder { public Builder() { } - public Builder setReachableBytes(long reachableBytes) { + @SuppressWarnings("checkstyle:hiddenfield") + public FSOBaseTool.Report.Builder setReachableBytes(long reachableBytes) { this.reachableBytes = reachableBytes; return this; } - public Builder setReachableFiles(long reachableFiles) { + @SuppressWarnings("checkstyle:hiddenfield") + public FSOBaseTool.Report.Builder setReachableFiles(long reachableFiles) { this.reachableFiles = reachableFiles; return this; } - public Builder setReachableDirs(long reachableDirs) { + @SuppressWarnings("checkstyle:hiddenfield") + public FSOBaseTool.Report.Builder setReachableDirs(long reachableDirs) { this.reachableDirs = reachableDirs; return this; } - public Builder setUnreachableBytes(long unreachableBytes) { + @SuppressWarnings("checkstyle:hiddenfield") + public FSOBaseTool.Report.Builder setUnreachableBytes(long unreachableBytes) { this.unreachableBytes = unreachableBytes; return this; } - public Builder setUnreachableFiles(long unreachableFiles) { + @SuppressWarnings("checkstyle:hiddenfield") + public FSOBaseTool.Report.Builder setUnreachableFiles(long unreachableFiles) { this.unreachableFiles = unreachableFiles; return this; } - public Builder setUnreachableDirs(long unreachableDirs) { + @SuppressWarnings("checkstyle:hiddenfield") + public FSOBaseTool.Report.Builder setUnreachableDirs(long unreachableDirs) { this.unreachableDirs = unreachableDirs; return this; } - public Report build() { - return new Report(this); + public FSOBaseTool.Report build() { + return new FSOBaseTool.Report(this); } } } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/package-info.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/package-info.java new file mode 100644 index 000000000000..537abfad32fa --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/package-info.java @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *

+ * SCM related cli tools. + */ + +/** + * Ozone Admin tools. + */ +/** + * Ozone debug/repair tools uility class. + */ +package org.apache.hadoop.ozone.common; diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSODebugCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSODebugCLI.java new file mode 100644 index 000000000000..26ec8614aa21 --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSODebugCLI.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.debug; + +import org.apache.hadoop.hdds.cli.SubcommandWithParent; +import org.apache.hadoop.ozone.common.FSOBaseCLI; +import org.apache.hadoop.ozone.common.FSOBaseTool; +import org.kohsuke.MetaInfServices; +import picocli.CommandLine; + +/** + * Parser for scm.db file. + */ +@CommandLine.Command( + name = "fso-tree", + description = "Identify a disconnected FSO tree, and optionally mark " + + "unreachable entries for deletion. OM should be " + + "stopped while this tool is run. Information will be logged at " + + "INFO and DEBUG levels." +) +@MetaInfServices(SubcommandWithParent.class) +public class FSODebugCLI extends FSOBaseCLI { + + @CommandLine.ParentCommand + private OzoneDebug parent; + + @Override + public Void call() throws Exception { + + try { + // TODO case insensitive enum options. + FSOBaseTool + baseTool = new FSOBaseTool(getDbPath(), true); + baseTool.run(); + } catch (Exception ex) { + throw new IllegalArgumentException("FSO inspection failed: " + ex.getMessage()); + } + + if (getVerbose()) { + System.out.println("FSO inspection finished. See client logs for results."); + } + + return null; + } + + @Override + public Class getParentType() { + return OzoneDebug.class; + } +} + diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java new file mode 100644 index 000000000000..c95a60394ad8 --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.repair.om; + +import org.apache.hadoop.hdds.cli.SubcommandWithParent; +import org.apache.hadoop.ozone.common.FSOBaseCLI; +import org.apache.hadoop.ozone.repair.OzoneRepair; +import org.kohsuke.MetaInfServices; +import picocli.CommandLine; + +/** + * Parser for scm.db file. + */ +@CommandLine.Command( + name = "fso-tree-repair", + description = "Identify and repair a disconnected FSO tree, and mark " + + "unreachable entries for deletion. OM should be " + + "stopped while this tool is run. Information will be logged at " + + "INFO and DEBUG levels." +) +@MetaInfServices(SubcommandWithParent.class) +public class FSORepairCLI extends FSOBaseCLI { + + @CommandLine.ParentCommand + private OzoneRepair parent; + + @Override + public Void call() throws Exception { + + try { + // TODO case insensitive enum options. + FSORepairTool + repairTool = new FSORepairTool(getDbPath(), false); + repairTool.run(); + } catch (Exception ex) { + throw new IllegalArgumentException("FSO repair failed: " + ex.getMessage()); + } + + if (getVerbose()) { + System.out.println("FSO repair finished. See client logs for results."); + } + + return null; + } + + @Override + public Class getParentType() { + return OzoneRepair.class; + } +} + diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java new file mode 100644 index 000000000000..ea5bf8625123 --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.repair.om; + +import org.apache.hadoop.hdds.utils.db.DBStore; +import org.apache.hadoop.ozone.common.FSOBaseTool; + +import java.io.IOException; + +/** + * Tool to identify and repair disconnected FSO trees in all buckets. + */ +public class FSORepairTool extends FSOBaseTool { + + public FSORepairTool(String dbPath, boolean dryRun) throws IOException { + this(getStoreFromPath(dbPath), dbPath, dryRun); + } + + public FSORepairTool(DBStore dbStore, String dbPath, boolean dryRun) throws IOException { + super(dbStore, dbPath, dryRun); + } + +} From 3b1c0fff6f680bcd54f4c12bee634e311620ff4c Mon Sep 17 00:00:00 2001 From: DaveTeng0 Date: Thu, 2 May 2024 17:26:14 -0700 Subject: [PATCH 4/6] removed unused test --- .../hadoop/fs/ozone/TestFSORepairTool.java | 35 ------------------- 1 file changed, 35 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java index 0f3d7c7c2891..c9c8c226b49e 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java @@ -506,41 +506,6 @@ private void assertDisconnectedObjectsMarkedForDelete(int numWrites) Assertions.assertEquals(numWrites, pendingDeleteFileCounts.get("file3")); } -// @Test -// public void testOnSavedDB() throws Exception { -// /* -// Path dir1 = new Path("/vol1/bucket1/dir1"); -// Path file1 = new Path(dir1, "file1"); -// Path file2 = new Path(dir1, "file2"); -// -// Path dir2 = new Path("/vol1/bucket1/dir1/dir2"); -// Path file3 = new Path(dir2, "file3"); -// -// Path dir3 = new Path("/vol1/bucket1/dir3"); -// Path file4 = new Path("/vol1/bucket1/file4"); -// -// ContractTestUtils.touch(fs, file1); -// ContractTestUtils.touch(fs, file2); -// ContractTestUtils.touch(fs, file3); -// ContractTestUtils.touch(fs, file4); -// fs.mkdirs(dir3); -// */ -// FsoRepair repair = new FsoRepair("/Users/erose/Temp/omNode-1/om.db", -// FsoRepair.Mode.DEBUG); -// repair.run(); -// -// /* -// Original: -// Expected: -// 3 reachable dirs, 4 reachable files. -// -// After remove dir1: -// 3 unreachable files, 1 unreachable dir. -// 1 reachable file, 1 reachable dir. -// -// */ -// } - private void assertDeleteTablesEmpty() throws IOException { OzoneManager leader = cluster.getOMLeader(); Assertions.assertTrue(leader.getMetadataManager().getDeletedDirTable().isEmpty()); From 6d3b43a3ad5d8dbc3aeb17de9054c53b98c5e9f1 Mon Sep 17 00:00:00 2001 From: DaveTeng0 Date: Thu, 2 May 2024 17:39:20 -0700 Subject: [PATCH 5/6] remove accidentally added charactor --- .../src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java index d4857e92f547..4c5f3fdc872f 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/cli/GenericCli.java @@ -39,10 +39,10 @@ public class GenericCli implements Callable, GenericParentCommand { public static final int EXECUTION_ERROR_EXIT_CODE = -1; -` @Option(names = {"--verbose"}, + @Option(names = {"--verbose"}, description = "More verbose output. Show the stack trace of the errors.") private boolean verbose; -` + @Option(names = {"-D", "--set"}) private Map configurationOverrides = new HashMap<>(); From 1b90307f11bcff2f9452c6dcbd3641f18686e9dc Mon Sep 17 00:00:00 2001 From: DaveTeng0 Date: Mon, 3 Jun 2024 11:10:19 -0700 Subject: [PATCH 6/6] remove FSOBaseCLI, switch to use RocksDatabase in FSORepairTool, which wraps ManagedRocksDB --- .../hadoop/hdds/utils/db/RocksDatabase.java | 45 +- .../hdds/utils/db/managed/ManagedRocksDB.java | 41 - .../hadoop/fs/ozone/TestFSORepairTool.java | 60 +- .../hadoop/ozone/common/FSOBaseCLI.java | 83 --- .../hadoop/ozone/common/FSOBaseTool.java | 699 ------------------ .../hadoop/ozone/common/package-info.java | 27 - .../hadoop/ozone/debug/FSODebugCLI.java | 67 -- .../hadoop/ozone/repair/om/FSORepairCLI.java | 25 +- .../hadoop/ozone/repair/om/FSORepairTool.java | 661 ++++++++++++++++- 9 files changed, 745 insertions(+), 963 deletions(-) delete mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseCLI.java delete mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseTool.java delete mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/package-info.java delete mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSODebugCLI.java diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java index 19f60d914f32..b4731984d802 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java @@ -140,7 +140,7 @@ public static List listColumnFamiliesEmptyOptions(final String path) } } - static RocksDatabase open(File dbFile, ManagedDBOptions dbOptions, + public static RocksDatabase open(File dbFile, ManagedDBOptions dbOptions, ManagedWriteOptions writeOptions, Set families, boolean readOnly) throws IOException { List descriptors = null; @@ -461,8 +461,13 @@ public void ingestExternalFile(ColumnFamily family, List files, public void put(ColumnFamily family, byte[] key, byte[] value) throws IOException { + put(family.getHandle(), key, value); + } + + public void put(ColumnFamilyHandle handle, byte[] key, byte[] value) + throws IOException { try (UncheckedAutoCloseable ignored = acquire()) { - db.get().put(family.getHandle(), writeOptions, key, value); + db.get().put(handle, writeOptions, key, value); } catch (RocksDBException e) { closeOnError(e); throw toIOException(this, "put " + bytes2String(key), e); @@ -622,9 +627,14 @@ RocksCheckpoint createCheckpoint() { */ Supplier keyMayExist(ColumnFamily family, byte[] key) throws IOException { + return keyMayExist(family.getHandle(), key); + } + + public Supplier keyMayExist(ColumnFamilyHandle handle, byte[] key) + throws IOException { try (UncheckedAutoCloseable ignored = acquire()) { final Holder out = new Holder<>(); - return db.get().keyMayExist(family.getHandle(), key, out) ? + return db.get().keyMayExist(handle, key, out) ? out::getValue : null; } } @@ -653,16 +663,39 @@ public Collection getExtraColumnFamilies() { return Collections.unmodifiableCollection(columnFamilies.values()); } - byte[] get(ColumnFamily family, byte[] key) throws IOException { + public void dropColumnFamily(ColumnFamilyHandle handle) throws IOException { try (UncheckedAutoCloseable ignored = acquire()) { - return db.get().get(family.getHandle(), key); + db.get().dropColumnFamily(handle); } catch (RocksDBException e) { closeOnError(e); - final String message = "get " + bytes2String(key) + " from " + family; + throw toIOException(this, "dropColumnFamily", e); + } + } + + public ColumnFamilyHandle createColumnFamily(ColumnFamilyDescriptor descriptor) throws IOException { + try (UncheckedAutoCloseable ignored = acquire()) { + return db.get().createColumnFamily(descriptor); + } catch (RocksDBException e) { + closeOnError(e); + throw toIOException(this, "createColumnFamily", e); + } + } + + public byte[] get(ColumnFamily family, byte[] key) throws IOException { + return get(family.getHandle(), key, family.getName()); + } + + public byte[] get(ColumnFamilyHandle handle, byte[] key, String familyName) throws IOException { + try (UncheckedAutoCloseable ignored = acquire()) { + return db.get().get(handle, key); + } catch (RocksDBException e) { + closeOnError(e); + final String message = "get " + bytes2String(key) + " from " + familyName; throw toIOException(this, message, e); } } + /** * Get the value mapped to the given key. * diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java index 8357a3173525..6248dfba321c 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksDB.java @@ -21,9 +21,7 @@ import org.rocksdb.ColumnFamilyDescriptor; import org.rocksdb.ColumnFamilyHandle; import org.rocksdb.DBOptions; -import org.rocksdb.Holder; import org.rocksdb.LiveFileMetaData; -import org.rocksdb.Options; import org.rocksdb.RocksDB; import org.rocksdb.RocksDBException; import org.slf4j.Logger; @@ -89,11 +87,6 @@ public static ManagedRocksDB open( ); } - public static ManagedRocksDB open(final String path) throws RocksDBException { - return new ManagedRocksDB(RocksDB.open(path)); - } - - /** * Delete liveMetaDataFile from rocks db using RocksDB#deleteFile Api. * This function makes the RocksDB#deleteFile Api synchronized by waiting @@ -110,38 +103,4 @@ public void deleteFile(LiveFileMetaData fileToBeDeleted) ManagedRocksObjectUtils.waitForFileDelete(file, Duration.ofSeconds(60)); } - public void put(ColumnFamilyHandle columnFamilyHandle, - byte[] key, byte[] value) throws RocksDBException { - this.get().put(columnFamilyHandle, key, value); - } - - public byte[] get(ColumnFamilyHandle columnFamilyHandle, - byte[] key) throws RocksDBException { - return this.get().get(columnFamilyHandle, key); - } - - public ColumnFamilyHandle createColumnFamily( - ColumnFamilyDescriptor columnFamilyDescriptor) - throws RocksDBException { - return this.get().createColumnFamily(columnFamilyDescriptor); - } - - public void dropColumnFamily(ColumnFamilyHandle columnFamilyHandle) - throws RocksDBException { - this.get().dropColumnFamily(columnFamilyHandle); - } - - public boolean keyMayExist(ColumnFamilyHandle columnFamilyHandle, byte[] key, Holder valueHolder) { - return this.get().keyMayExist(columnFamilyHandle, key, valueHolder); - } - - public void close() { - this.get().close(); - } - - public static List listColumnFamilies(Options options, - String path) throws RocksDBException { - return RocksDB.listColumnFamilies(options, path); - } - } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java index c9c8c226b49e..430a931d0547 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestFSORepairTool.java @@ -34,7 +34,6 @@ import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.hadoop.ozone.client.OzoneClientFactory; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; -import org.apache.hadoop.ozone.common.FSOBaseTool; import org.apache.hadoop.ozone.om.OMConfigKeys; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.helpers.BucketLayout; @@ -126,12 +125,12 @@ public static void teardown() { @Test public void testConnectedTreeOneBucket() throws Exception { - FSORepairTool.Report expectedReport = buildConnectedTree("vol1", "bucket1"); + org.apache.hadoop.ozone.repair.om.FSORepairTool.Report expectedReport = buildConnectedTree("vol1", "bucket1"); // Test the connected tree in debug mode. - FSOBaseTool fsoTool = new FSOBaseTool(getOmDB(), + FSORepairTool fsoTool = new FSORepairTool(getOmDB(), getOmDBLocation(), true); - FSOBaseTool.Report debugReport = fsoTool.run(); + FSORepairTool.Report debugReport = fsoTool.run(); Assertions.assertEquals(expectedReport, debugReport); assertConnectedTreeReadable("vol1", "bucket1"); @@ -139,9 +138,9 @@ public void testConnectedTreeOneBucket() throws Exception { // Running again in repair mode should give same results since the tree // is connected. - fsoTool = new FSORepairTool(getOmDB(), + fsoTool = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), getOmDBLocation(), false); - FSORepairTool.Report repairReport = fsoTool.run(); + org.apache.hadoop.ozone.repair.om.FSORepairTool.Report repairReport = fsoTool.run(); Assertions.assertEquals(expectedReport, repairReport); assertConnectedTreeReadable("vol1", "bucket1"); @@ -154,7 +153,8 @@ public void testReportedDataSize() throws Exception { FSORepairTool.Report report2 = buildConnectedTree("vol1", "bucket2", 10); FSORepairTool.Report expectedReport = new FSORepairTool.Report(report1, report2); - FSORepairTool repair = new FSORepairTool(getOmDB(), + FSORepairTool + repair = new FSORepairTool(getOmDB(), getOmDBLocation(), false); FSORepairTool.Report debugReport = repair.run(); Assertions.assertEquals(expectedReport, debugReport); @@ -164,12 +164,13 @@ public void testReportedDataSize() throws Exception { public void testMultipleBucketsAndVolumes() throws Exception { FSORepairTool.Report report1 = buildConnectedTree("vol1", "bucket1"); FSORepairTool.Report report2 = buildDisconnectedTree("vol2", "bucket2"); - FSORepairTool.Report expectedAggregateReport = new FSORepairTool.Report( + FSORepairTool.Report expectedAggregateReport = new org.apache.hadoop.ozone.repair.om.FSORepairTool.Report( report1, report2); - FSORepairTool repair = new FSORepairTool(getOmDB(), + org.apache.hadoop.ozone.repair.om.FSORepairTool + repair = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), getOmDBLocation(), false); - FSORepairTool.Report generatedReport = repair.run(); + org.apache.hadoop.ozone.repair.om.FSORepairTool.Report generatedReport = repair.run(); Assertions.assertEquals(generatedReport, expectedAggregateReport); assertConnectedTreeReadable("vol1", "bucket1"); @@ -201,9 +202,10 @@ public void testDeleteOverwrite() throws Exception { ContractTestUtils.touch(fs, new Path("/vol1/bucket1/dir1/file2")); disconnectDirectory("dir1"); - FSORepairTool repair = new FSORepairTool(getOmDB(), + org.apache.hadoop.ozone.repair.om.FSORepairTool + repair = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), getOmDBLocation(), false); - FSORepairTool.Report generatedReport = repair.run(); + org.apache.hadoop.ozone.repair.om.FSORepairTool.Report generatedReport = repair.run(); Assertions.assertEquals(1, generatedReport.getUnreachableDirs()); Assertions.assertEquals(3, generatedReport.getUnreachableFiles()); @@ -214,10 +216,11 @@ public void testDeleteOverwrite() throws Exception { @Test public void testEmptyFileTrees() throws Exception { // Run when there are no file trees. - FSORepairTool repair = new FSORepairTool(getOmDB(), + org.apache.hadoop.ozone.repair.om.FSORepairTool + repair = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), getOmDBLocation(), false); - FSORepairTool.Report generatedReport = repair.run(); - Assertions.assertEquals(generatedReport, new FSORepairTool.Report()); + org.apache.hadoop.ozone.repair.om.FSORepairTool.Report generatedReport = repair.run(); + Assertions.assertEquals(generatedReport, new org.apache.hadoop.ozone.repair.om.FSORepairTool.Report()); assertDeleteTablesEmpty(); // Create an empty volume and bucket. @@ -225,10 +228,10 @@ public void testEmptyFileTrees() throws Exception { fs.mkdirs(new Path("/vol2/bucket1")); // Run on an empty volume and bucket. - repair = new FSORepairTool(getOmDB(), + repair = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), getOmDBLocation(), false); generatedReport = repair.run(); - Assertions.assertEquals(generatedReport, new FSORepairTool.Report()); + Assertions.assertEquals(generatedReport, new org.apache.hadoop.ozone.repair.om.FSORepairTool.Report()); assertDeleteTablesEmpty(); } @@ -259,14 +262,15 @@ public void testNonFSOBucketsSkipped() throws Exception { legacyStream.close(); // Add an FSO bucket with data. - FSORepairTool.Report connectReport = buildConnectedTree("vol1", "fso" + + org.apache.hadoop.ozone.repair.om.FSORepairTool.Report connectReport = buildConnectedTree("vol1", "fso" + "-bucket"); // Even in repair mode there should be no action. legacy and obs buckets // will be skipped and FSO tree is connected. - FSORepairTool repair = new FSORepairTool(getOmDB(), + org.apache.hadoop.ozone.repair.om.FSORepairTool + repair = new org.apache.hadoop.ozone.repair.om.FSORepairTool(getOmDB(), getOmDBLocation(), false); - FSORepairTool.Report generatedReport = repair.run(); + org.apache.hadoop.ozone.repair.om.FSORepairTool.Report generatedReport = repair.run(); Assertions.assertEquals(connectReport, generatedReport); assertConnectedTreeReadable("vol1", "fso-bucket"); @@ -281,7 +285,7 @@ public void testNonFSOBucketsSkipped() throws Exception { } - private FSORepairTool.Report buildConnectedTree(String volume, String bucket) + private org.apache.hadoop.ozone.repair.om.FSORepairTool.Report buildConnectedTree(String volume, String bucket) throws Exception { return buildConnectedTree(volume, bucket, 0); } @@ -289,8 +293,8 @@ private FSORepairTool.Report buildConnectedTree(String volume, String bucket) /** * Creates a tree with 3 reachable directories and 4 reachable files. */ - private FSORepairTool.Report buildConnectedTree(String volume, String bucket, - int fileSize) + private org.apache.hadoop.ozone.repair.om.FSORepairTool.Report buildConnectedTree(String volume, String bucket, + int fileSize) throws Exception { Path bucketPath = new Path("/" + volume + "/" + bucket); Path dir1 = new Path(bucketPath, "dir1"); @@ -325,7 +329,7 @@ private FSORepairTool.Report buildConnectedTree(String volume, String bucket, assertConnectedTreeReadable(volume, bucket); - return new FSORepairTool.Report.Builder() + return new org.apache.hadoop.ozone.repair.om.FSORepairTool.Report.Builder() .setReachableDirs(3) .setReachableFiles(4) .setReachableBytes(fileSize * 4L) @@ -354,7 +358,7 @@ private void assertConnectedTreeReadable(String volume, String bucket) Assertions.assertTrue(fs.exists(file4)); } - private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket) + private org.apache.hadoop.ozone.repair.om.FSORepairTool.Report buildDisconnectedTree(String volume, String bucket) throws Exception { return buildDisconnectedTree(volume, bucket, 0); } @@ -363,8 +367,8 @@ private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket) * Creates a tree with 2 reachable directories, 1 reachable file, 1 * unreachable directory, and 3 unreachable files. */ - private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket, - int fileSize) throws Exception { + private org.apache.hadoop.ozone.repair.om.FSORepairTool.Report buildDisconnectedTree(String volume, String bucket, + int fileSize) throws Exception { buildConnectedTree(volume, bucket, fileSize); // Manually remove dir1. This should disconnect 3 of the files and 1 of @@ -373,7 +377,7 @@ private FSORepairTool.Report buildDisconnectedTree(String volume, String bucket, assertDisconnectedTreePartiallyReadable(volume, bucket); - return new FSORepairTool.Report.Builder() + return new org.apache.hadoop.ozone.repair.om.FSORepairTool.Report.Builder() .setReachableDirs(1) .setReachableFiles(1) .setReachableBytes(fileSize) diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseCLI.java deleted file mode 100644 index 809fc74b7892..000000000000 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseCLI.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.common; - -import org.apache.hadoop.hdds.cli.SubcommandWithParent; -import org.kohsuke.MetaInfServices; -import picocli.CommandLine; - -import java.util.concurrent.Callable; - -/** - * Parser for scm.db file. - */ -@CommandLine.Command( - name = "fso-tree", - description = "Identify a disconnected FSO tree, and optionally mark " + - "unreachable entries for deletion. OM should be " + - "stopped while this tool is run. Information will be logged at " + - "INFO and DEBUG levels." -) -@MetaInfServices(SubcommandWithParent.class) -public class FSOBaseCLI implements Callable, SubcommandWithParent { - - @CommandLine.Option(names = {"--db"}, - required = true, - description = "Path to OM RocksDB") - private String dbPath; - - @CommandLine.Option(names = {"--verbose"}, - description = "More verbose output. ") - private boolean verbose; - - - @Override - public Void call() throws Exception { - - try { - // TODO case insensitive enum options. - FSOBaseTool - baseTool = new FSOBaseTool(dbPath, true); - baseTool.run(); - } catch (Exception ex) { - throw new IllegalArgumentException("FSO inspection failed: " + ex.getMessage()); - } - - if (verbose) { - System.out.println("FSO inspection finished. See client logs for results."); - } - - return null; - } - - @Override - public Class getParentType() { - throw new UnsupportedOperationException("Should not be called from " + - "FSOBaseCLI directly."); - } - - public String getDbPath() { - return dbPath; - } - - public boolean getVerbose() { - return verbose; - } -} - diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseTool.java deleted file mode 100644 index 5f729ad0aa39..000000000000 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/FSOBaseTool.java +++ /dev/null @@ -1,699 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.ozone.common; - - -import com.google.common.annotations.VisibleForTesting; -import org.apache.commons.io.FileUtils; -import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.utils.db.BatchOperation; -import org.apache.hadoop.hdds.utils.db.DBStore; -import org.apache.hadoop.hdds.utils.db.Table; -import org.apache.hadoop.hdds.utils.db.TableIterator; -import org.apache.hadoop.hdds.utils.db.managed.ManagedOptions; -import org.apache.hadoop.hdds.utils.db.managed.ManagedRocksDB; -import org.apache.hadoop.ozone.OmUtils; -import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; -import org.apache.hadoop.ozone.om.helpers.BucketLayout; -import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; -import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; -import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; -import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs; -import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; -import org.apache.hadoop.ozone.om.helpers.WithObjectID; -import org.apache.hadoop.ozone.om.request.file.OMFileRequest; -import org.apache.hadoop.ozone.repair.om.FSORepairTool; -import org.apache.ratis.util.Preconditions; -import org.rocksdb.ColumnFamilyDescriptor; -import org.rocksdb.ColumnFamilyHandle; -import org.rocksdb.Holder; -import org.rocksdb.RocksDBException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Objects; -import java.util.Stack; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; - -/** - * Base Tool to identify disconnected FSO trees in all buckets. - * The tool will log information about unreachable files or directories. - * If deletes are still in progress (the deleted directory table is not empty), the tool may - * report that the tree is disconnected, even though pending deletes would - * fix the issue. - * - * Before using the tool, make sure all OMs are stopped, - * and that all Ratis logs have been flushed to the OM DB. This can be - * done using `ozone admin prepare` before running the tool, and `ozone admin - * cancelprepare` when done. - * - * The tool will run a DFS from each bucket, and save all reachable - * directories as keys in a new temporary RocksDB instance called "reachable.db" - * In the same directory as om.db. - * will then scan the entire file and directory tables for each bucket to see - * if each object's parent is in the reachable table of reachable.db. The - * reachable table will be dropped and recreated for each bucket. - * The tool is idempotent. reachable.db will not be deleted automatically - * when the tool finishes, in case users want to manually inspect it. It can - * be safely deleted once the tool finishes. - */ -public class FSOBaseTool { - public static final Logger LOG = - LoggerFactory.getLogger(FSORepairTool.class); - - private final String omDBPath; - - private final DBStore store; - private final Table volumeTable; - private final Table bucketTable; - private final Table directoryTable; - private final Table fileTable; - private final Table deletedDirectoryTable; - private final Table deletedTable; - // The temporary DB is used to track which items have been seen. - // Since usage of this DB is simple, use it directly from - // RocksDB. - private String reachableDBPath; - private static final byte[] REACHABLE_TABLE = - "reachable".getBytes(StandardCharsets.UTF_8); - private ColumnFamilyHandle reachableCF; - private ManagedRocksDB reachableDB; - - private long reachableBytes; - private long reachableFiles; - private long reachableDirs; - private long unreachableBytes; - private long unreachableFiles; - private long unreachableDirs; - private boolean dryRun; - - public FSOBaseTool(String dbPath, boolean dryRun) throws IOException { - this(getStoreFromPath(dbPath), dbPath, dryRun); - } - - /** - * Allows passing RocksDB instance from a MiniOzoneCluster directly to this - * class for testing. - */ - @VisibleForTesting - public FSOBaseTool(DBStore dbStore, String dbPath, boolean isDryRun) throws IOException { - dryRun = isDryRun; - // Counters to track as we walk the tree. - reachableBytes = 0; - reachableFiles = 0; - reachableDirs = 0; - unreachableBytes = 0; - unreachableFiles = 0; - unreachableDirs = 0; - - this.store = dbStore; - this.omDBPath = dbPath; - volumeTable = store.getTable(OmMetadataManagerImpl.VOLUME_TABLE, - String.class, - OmVolumeArgs.class); - bucketTable = store.getTable(OmMetadataManagerImpl.BUCKET_TABLE, - String.class, - OmBucketInfo.class); - directoryTable = store.getTable(OmMetadataManagerImpl.DIRECTORY_TABLE, - String.class, - OmDirectoryInfo.class); - fileTable = store.getTable(OmMetadataManagerImpl.FILE_TABLE, - String.class, - OmKeyInfo.class); - deletedDirectoryTable = store.getTable( - OmMetadataManagerImpl.DELETED_DIR_TABLE, - String.class, - OmKeyInfo.class); - deletedTable = store.getTable( - OmMetadataManagerImpl.DELETED_TABLE, - String.class, - RepeatedOmKeyInfo.class); - } - - protected static DBStore getStoreFromPath(String dbPath) throws IOException { - File omDBFile = new File(dbPath); - if (!omDBFile.exists() || !omDBFile.isDirectory()) { - throw new IOException(String.format("Specified OM DB instance %s does " + - "not exist or is not a RocksDB directory.", dbPath)); - } - // Load RocksDB and tables needed. - return OmMetadataManagerImpl.loadDB(new OzoneConfiguration(), - new File(dbPath).getParentFile()); - } - - public FSORepairTool.Report run() throws IOException { - // Iterate all volumes. - try (TableIterator> - volumeIterator = volumeTable.iterator()) { - openReachableDB(); - - while (volumeIterator.hasNext()) { - Table.KeyValue volumeEntry = - volumeIterator.next(); - String volumeKey = volumeEntry.getKey(); - - // Iterate all buckets in the volume. - try (TableIterator> - bucketIterator = bucketTable.iterator()) { - bucketIterator.seek(volumeKey); - while (bucketIterator.hasNext()) { - Table.KeyValue bucketEntry = - bucketIterator.next(); - String bucketKey = bucketEntry.getKey(); - OmBucketInfo bucketInfo = bucketEntry.getValue(); - - if (bucketInfo.getBucketLayout() != BucketLayout.FILE_SYSTEM_OPTIMIZED) { - LOG.debug("Skipping non-FSO bucket {}", bucketKey); - continue; - } - - // Stop this loop once we have seen all buckets in the current - // volume. - if (!bucketKey.startsWith(volumeKey)) { - break; - } - - // Start with a fresh list of reachable files for this bucket. - // Also clears partial state if the tool failed on a previous run. - dropReachableTableIfExists(); - createReachableTable(); - // Process one bucket's FSO tree at a time. - markReachableObjectsInBucket(volumeEntry.getValue(), bucketInfo); - handleUnreachableObjects(volumeEntry.getValue(), bucketInfo); - dropReachableTableIfExists(); - } - } - } - } finally { - closeReachableDB(); - } - - return buildReportAndLog(); - } - - private FSORepairTool.Report buildReportAndLog() { - FSORepairTool.Report report = new FSORepairTool.Report.Builder() - .setReachableDirs(reachableDirs) - .setReachableFiles(reachableFiles) - .setReachableBytes(reachableBytes) - .setUnreachableDirs(unreachableDirs) - .setUnreachableFiles(unreachableFiles) - .setUnreachableBytes(unreachableBytes) - .build(); - - LOG.info("\n{}", report); - return report; - } - - private void markReachableObjectsInBucket(OmVolumeArgs volume, - OmBucketInfo bucket) throws IOException { - LOG.info("Processing bucket {}", bucket.getBucketName()); - // Only put directories in the stack. - // Directory keys should have the form /volumeID/bucketID/parentID/name. - Stack dirKeyStack = new Stack<>(); - - // Since the tool uses parent directories to check for reachability, add - // a reachable entry for the bucket as well. - addReachableEntry(volume, bucket, bucket); - // Initialize the stack with all immediate child directories of the - // bucket, and mark them all as reachable. - Collection childDirs = - getChildDirectoriesAndMarkAsReachable(volume, bucket, bucket); - dirKeyStack.addAll(childDirs); - - while (!dirKeyStack.isEmpty()) { - // Get one directory and process its immediate children. - String currentDirKey = dirKeyStack.pop(); - OmDirectoryInfo currentDir = directoryTable.get(currentDirKey); - if (currentDir == null) { - LOG.error("Directory key {} to be processed was not found in the " + - "directory table", currentDirKey); - continue; - } - - // TODO revisit this for a more memory efficient implementation, - // possibly making better use of RocksDB iterators. - childDirs = getChildDirectoriesAndMarkAsReachable(volume, bucket, - currentDir); - dirKeyStack.addAll(childDirs); - } - } - - private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) throws IOException { - // Check for unreachable directories in the bucket. - String bucketPrefix = OM_KEY_PREFIX + - volume.getObjectID() + - OM_KEY_PREFIX + - bucket.getObjectID(); - - try (TableIterator> dirIterator = - directoryTable.iterator()) { - dirIterator.seek(bucketPrefix); - while (dirIterator.hasNext()) { - Table.KeyValue dirEntry = dirIterator.next(); - String dirKey = dirEntry.getKey(); - - // Only search directories in this bucket. - if (!dirKey.startsWith(bucketPrefix)) { - break; - } - - if (!isReachable(dirKey)) { - LOG.debug("Found unreachable directory: {}", dirKey); - unreachableDirs++; - - if (dryRun) { - LOG.debug("Marking unreachable directory {} for deletion.", dirKey); - OmDirectoryInfo dirInfo = dirEntry.getValue(); - markDirectoryForDeletion(volume.getVolume(), bucket.getBucketName(), - dirKey, dirInfo); - } - } - } - } - - // Check for unreachable files - try (TableIterator> - fileIterator = fileTable.iterator()) { - fileIterator.seek(bucketPrefix); - while (fileIterator.hasNext()) { - Table.KeyValue fileEntry = fileIterator.next(); - String fileKey = fileEntry.getKey(); - // Only search files in this bucket. - if (!fileKey.startsWith(bucketPrefix)) { - break; - } - - OmKeyInfo fileInfo = fileEntry.getValue(); - if (!isReachable(fileKey)) { - LOG.debug("Found unreachable file: {}", fileKey); - unreachableBytes += fileInfo.getDataSize(); - unreachableFiles++; - - if (dryRun) { - LOG.debug("Marking unreachable file {} for deletion.", - fileKey); - markFileForDeletion(fileKey, fileInfo); - } - } else { - // NOTE: We are deserializing the proto of every reachable file - // just to log it's size. If we don't need this information we could - // save time by skipping this step. - reachableBytes += fileInfo.getDataSize(); - reachableFiles++; - } - } - } - } - - protected void markFileForDeletion(String fileKey, OmKeyInfo fileInfo) throws IOException { - try (BatchOperation batch = store.initBatchOperation()) { - fileTable.deleteWithBatch(batch, fileKey); - - RepeatedOmKeyInfo originalRepeatedKeyInfo = deletedTable.get(fileKey); - RepeatedOmKeyInfo updatedRepeatedOmKeyInfo = OmUtils.prepareKeyForDelete( - fileInfo, fileInfo.getUpdateID(), true); - // NOTE: The FSO code seems to write the open key entry with the whole - // path, using the object's names instead of their ID. This would onyl - // be possible when the file is deleted explicitly, and not part of a - // directory delete. It is also not possible here if the file's parent - // is gone. The name of the key does not matter so just use IDs. - deletedTable.putWithBatch(batch, fileKey, updatedRepeatedOmKeyInfo); - - LOG.debug("Added entry {} to open key table: {}", - fileKey, updatedRepeatedOmKeyInfo); - - store.commitBatchOperation(batch); - } - } - - protected void markDirectoryForDeletion(String volumeName, String bucketName, - String dirKeyName, OmDirectoryInfo dirInfo) throws IOException { - try (BatchOperation batch = store.initBatchOperation()) { - directoryTable.deleteWithBatch(batch, dirKeyName); - // HDDS-7592: Make directory entries in deleted dir table unique. - String deleteDirKeyName = - dirKeyName + OM_KEY_PREFIX + dirInfo.getObjectID(); - - // Convert the directory to OmKeyInfo for deletion. - OmKeyInfo dirAsKeyInfo = OMFileRequest.getOmKeyInfo( - volumeName, bucketName, dirInfo, dirInfo.getName()); - deletedDirectoryTable.putWithBatch(batch, deleteDirKeyName, dirAsKeyInfo); - - store.commitBatchOperation(batch); - } - } - - private Collection getChildDirectoriesAndMarkAsReachable(OmVolumeArgs volume, - OmBucketInfo bucket, - WithObjectID currentDir) throws IOException { - - Collection childDirs = new ArrayList<>(); - - try (TableIterator> - dirIterator = directoryTable.iterator()) { - String dirPrefix = buildReachableKey(volume, bucket, currentDir); - // Start searching the directory table at the current directory's - // prefix to get its immediate children. - dirIterator.seek(dirPrefix); - while (dirIterator.hasNext()) { - Table.KeyValue childDirEntry = - dirIterator.next(); - String childDirKey = childDirEntry.getKey(); - // Stop processing once we have seen all immediate children of this - // directory. - if (!childDirKey.startsWith(dirPrefix)) { - break; - } - // This directory was reached by search. - addReachableEntry(volume, bucket, childDirEntry.getValue()); - childDirs.add(childDirKey); - reachableDirs++; - } - } - - return childDirs; - } - - /** - * Add the specified object to the reachable table, indicating it is part - * of the connected FSO tree. - */ - private void addReachableEntry(OmVolumeArgs volume, - OmBucketInfo bucket, WithObjectID object) throws IOException { - byte[] reachableKey = buildReachableKey(volume, bucket, object) - .getBytes(StandardCharsets.UTF_8); - try { - // No value is needed for this table. - reachableDB.put(reachableCF, reachableKey, new byte[]{}); - } catch (RocksDBException ex) { - throw new IOException(ex.getMessage(), ex); - } - } - - /** - * Build an entry in the reachable table for the current object, which - * could be a bucket, file or directory. - */ - private static String buildReachableKey(OmVolumeArgs volume, - OmBucketInfo bucket, WithObjectID object) { - return OM_KEY_PREFIX + - volume.getObjectID() + - OM_KEY_PREFIX + - bucket.getObjectID() + - OM_KEY_PREFIX + - object.getObjectID(); - } - - /** - * - * @param fileOrDirKey The key of a file or directory in RocksDB. - * @return true if the entry's parent is in the reachable table. - */ - protected boolean isReachable(String fileOrDirKey) throws IOException { - byte[] reachableParentKey = - buildReachableParentKey(fileOrDirKey).getBytes(StandardCharsets.UTF_8); - try { - if (reachableDB.keyMayExist( - reachableCF, reachableParentKey, new Holder<>())) { - return reachableDB.get(reachableCF, reachableParentKey) != null; - } else { - return false; - } - } catch (RocksDBException ex) { - throw new IOException(ex.getMessage(), ex); - } - } - - /** - * Build an entry in the reachable table for the current object's parent - * object. The object could be a file or directory. - */ - private static String buildReachableParentKey(String fileOrDirKey) { - String[] keyParts = fileOrDirKey.split(OM_KEY_PREFIX); - // Should be /volID/bucketID/parentID/name - // The first part will be blank since key begins with a slash. - Preconditions.assertTrue(keyParts.length >= 4); - String volumeID = keyParts[1]; - String bucketID = keyParts[2]; - String parentID = keyParts[3]; - - return OM_KEY_PREFIX + - volumeID + - OM_KEY_PREFIX + - bucketID + - OM_KEY_PREFIX + - parentID; - } - - private void openReachableDB() throws IOException { - File reachableDBFile = new File(new File(omDBPath).getParentFile(), - "reachable.db"); - LOG.info("Creating database of reachable directories at {}", - reachableDBFile); - try { - // Delete the DB from the last run if it exists. - if (reachableDBFile.exists()) { - FileUtils.deleteDirectory(reachableDBFile); - } - reachableDBPath = reachableDBFile.toString(); - reachableDB = ManagedRocksDB.open(reachableDBPath); - } catch (RocksDBException ex) { - if (reachableDB != null) { - reachableDB.close(); - } - throw new IOException(ex.getMessage(), ex); - } - } - - private void closeReachableDB() { - if (reachableDB != null) { - reachableDB.close(); - } - } - - private void dropReachableTableIfExists() throws IOException { - try { - List - availableCFs = ManagedRocksDB.listColumnFamilies(new ManagedOptions(), - reachableDBPath); - boolean cfFound = false; - for (byte[] cfNameBytes: availableCFs) { - if (new String(cfNameBytes, UTF_8).equals(new String(REACHABLE_TABLE, UTF_8))) { - cfFound = true; - break; - } - } - - if (cfFound) { - reachableDB.dropColumnFamily(reachableCF); - } - } catch (RocksDBException ex) { - throw new IOException(ex.getMessage(), ex); - } finally { - if (reachableCF != null) { - reachableCF.close(); - } - } - } - - private void createReachableTable() throws IOException { - try { - reachableCF = reachableDB.createColumnFamily( - new ColumnFamilyDescriptor(REACHABLE_TABLE)); - } catch (RocksDBException ex) { - if (reachableCF != null) { - reachableCF.close(); - } - throw new IOException(ex.getMessage(), ex); - } - } - - /** - * Define a Report to be created. - */ - public static class Report { - private long reachableBytes; - private long reachableFiles; - private long reachableDirs; - private long unreachableBytes; - private long unreachableFiles; - private long unreachableDirs; - - /** - * Builds one report that is the aggregate of multiple others. - */ - public Report(FSORepairTool.Report... reports) { - reachableBytes = 0; - reachableFiles = 0; - reachableDirs = 0; - unreachableBytes = 0; - unreachableFiles = 0; - unreachableDirs = 0; - - for (FSORepairTool.Report report: reports) { - reachableBytes += report.reachableBytes; - reachableFiles += report.reachableFiles; - reachableDirs += report.reachableDirs; - unreachableBytes += report.unreachableBytes; - unreachableFiles += report.unreachableFiles; - unreachableDirs += report.unreachableDirs; - } - } - - private Report(FSORepairTool.Report.Builder builder) { - reachableBytes = builder.reachableBytes; - reachableFiles = builder.reachableFiles; - reachableDirs = builder.reachableDirs; - unreachableBytes = builder.unreachableBytes; - unreachableFiles = builder.unreachableFiles; - unreachableDirs = builder.unreachableDirs; - } - - public long getReachableBytes() { - return reachableBytes; - } - - public long getReachableFiles() { - return reachableFiles; - } - - public long getReachableDirs() { - return reachableDirs; - } - - public long getUnreachableBytes() { - return unreachableBytes; - } - - public long getUnreachableFiles() { - return unreachableFiles; - } - - public long getUnreachableDirs() { - return unreachableDirs; - } - - @Override - public String toString() { - return "Reachable:" + - "\n\tDirectories: " + reachableDirs + - "\n\tFiles: " + reachableFiles + - "\n\tBytes: " + reachableBytes + - "\nUnreachable:" + - "\n\tDirectories: " + unreachableDirs + - "\n\tFiles: " + unreachableFiles + - "\n\tBytes: " + unreachableBytes; - } - - @Override - public boolean equals(Object other) { - if (other == this) { - return true; - } - if (other == null || getClass() != other.getClass()) { - return false; - } - FSORepairTool.Report report = (FSORepairTool.Report) other; - - // Useful for testing. - LOG.debug("Comparing reports\nExpect:\n{}\nActual:\n{}", this, report); - - return reachableBytes == report.reachableBytes && - reachableFiles == report.reachableFiles && - reachableDirs == report.reachableDirs && - unreachableBytes == report.unreachableBytes && - unreachableFiles == report.unreachableFiles && - unreachableDirs == report.unreachableDirs; - } - - @Override - public int hashCode() { - return Objects.hash(reachableBytes, - reachableFiles, - reachableDirs, - unreachableBytes, - unreachableFiles, - unreachableDirs); - } - - /** - * Builder class for a Report. - */ - public static final class Builder { - private long reachableBytes; - private long reachableFiles; - private long reachableDirs; - private long unreachableBytes; - private long unreachableFiles; - private long unreachableDirs; - - public Builder() { - } - - @SuppressWarnings("checkstyle:hiddenfield") - public FSOBaseTool.Report.Builder setReachableBytes(long reachableBytes) { - this.reachableBytes = reachableBytes; - return this; - } - - @SuppressWarnings("checkstyle:hiddenfield") - public FSOBaseTool.Report.Builder setReachableFiles(long reachableFiles) { - this.reachableFiles = reachableFiles; - return this; - } - - @SuppressWarnings("checkstyle:hiddenfield") - public FSOBaseTool.Report.Builder setReachableDirs(long reachableDirs) { - this.reachableDirs = reachableDirs; - return this; - } - - @SuppressWarnings("checkstyle:hiddenfield") - public FSOBaseTool.Report.Builder setUnreachableBytes(long unreachableBytes) { - this.unreachableBytes = unreachableBytes; - return this; - } - - @SuppressWarnings("checkstyle:hiddenfield") - public FSOBaseTool.Report.Builder setUnreachableFiles(long unreachableFiles) { - this.unreachableFiles = unreachableFiles; - return this; - } - - @SuppressWarnings("checkstyle:hiddenfield") - public FSOBaseTool.Report.Builder setUnreachableDirs(long unreachableDirs) { - this.unreachableDirs = unreachableDirs; - return this; - } - - public FSOBaseTool.Report build() { - return new FSOBaseTool.Report(this); - } - } - } -} diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/package-info.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/package-info.java deleted file mode 100644 index 537abfad32fa..000000000000 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/common/package-info.java +++ /dev/null @@ -1,27 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *

- * SCM related cli tools. - */ - -/** - * Ozone Admin tools. - */ -/** - * Ozone debug/repair tools uility class. - */ -package org.apache.hadoop.ozone.common; diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSODebugCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSODebugCLI.java deleted file mode 100644 index 26ec8614aa21..000000000000 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/FSODebugCLI.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.debug; - -import org.apache.hadoop.hdds.cli.SubcommandWithParent; -import org.apache.hadoop.ozone.common.FSOBaseCLI; -import org.apache.hadoop.ozone.common.FSOBaseTool; -import org.kohsuke.MetaInfServices; -import picocli.CommandLine; - -/** - * Parser for scm.db file. - */ -@CommandLine.Command( - name = "fso-tree", - description = "Identify a disconnected FSO tree, and optionally mark " + - "unreachable entries for deletion. OM should be " + - "stopped while this tool is run. Information will be logged at " + - "INFO and DEBUG levels." -) -@MetaInfServices(SubcommandWithParent.class) -public class FSODebugCLI extends FSOBaseCLI { - - @CommandLine.ParentCommand - private OzoneDebug parent; - - @Override - public Void call() throws Exception { - - try { - // TODO case insensitive enum options. - FSOBaseTool - baseTool = new FSOBaseTool(getDbPath(), true); - baseTool.run(); - } catch (Exception ex) { - throw new IllegalArgumentException("FSO inspection failed: " + ex.getMessage()); - } - - if (getVerbose()) { - System.out.println("FSO inspection finished. See client logs for results."); - } - - return null; - } - - @Override - public Class getParentType() { - return OzoneDebug.class; - } -} - diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java index c95a60394ad8..35e3bd5936a2 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairCLI.java @@ -19,11 +19,12 @@ package org.apache.hadoop.ozone.repair.om; import org.apache.hadoop.hdds.cli.SubcommandWithParent; -import org.apache.hadoop.ozone.common.FSOBaseCLI; import org.apache.hadoop.ozone.repair.OzoneRepair; import org.kohsuke.MetaInfServices; import picocli.CommandLine; +import java.util.concurrent.Callable; + /** * Parser for scm.db file. */ @@ -35,24 +36,36 @@ "INFO and DEBUG levels." ) @MetaInfServices(SubcommandWithParent.class) -public class FSORepairCLI extends FSOBaseCLI { +public class FSORepairCLI implements Callable, SubcommandWithParent { @CommandLine.ParentCommand private OzoneRepair parent; + @CommandLine.Option(names = {"--db"}, + required = true, + description = "Path to OM RocksDB") + private String dbPath; + + @CommandLine.Option(names = {"--dry-run"}, + description = "Path to OM RocksDB") + private boolean dryRun; + + @CommandLine.Option(names = {"--verbose"}, + description = "More verbose output. ") + private boolean verbose; + + @Override public Void call() throws Exception { - try { - // TODO case insensitive enum options. FSORepairTool - repairTool = new FSORepairTool(getDbPath(), false); + repairTool = new FSORepairTool(dbPath, dryRun); repairTool.run(); } catch (Exception ex) { throw new IllegalArgumentException("FSO repair failed: " + ex.getMessage()); } - if (getVerbose()) { + if (verbose) { System.out.println("FSO repair finished. See client logs for results."); } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java index ea5bf8625123..6ee551a6580d 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java @@ -15,25 +15,674 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.hadoop.ozone.repair.om; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.utils.db.RocksDatabase; +import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.DBStore; -import org.apache.hadoop.ozone.common.FSOBaseTool; +import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.hdds.utils.db.BatchOperation; +import org.apache.hadoop.hdds.utils.db.TableConfig; +import org.apache.hadoop.hdds.utils.db.DBProfile; +import org.apache.hadoop.hdds.utils.db.managed.ManagedWriteOptions; +import org.apache.hadoop.ozone.OmUtils; +import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; +import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs; +import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.WithObjectID; +import org.apache.hadoop.ozone.om.request.file.OMFileRequest; +import org.apache.ratis.util.Preconditions; +import org.rocksdb.ColumnFamilyDescriptor; +import org.rocksdb.ColumnFamilyHandle; +import org.rocksdb.RocksDBException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.io.File; import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.Stack; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DB_PROFILE; +import static org.apache.hadoop.hdds.utils.db.DBStoreBuilder.HDDS_DEFAULT_DB_PROFILE; +import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; /** - * Tool to identify and repair disconnected FSO trees in all buckets. + * Base Tool to identify disconnected FSO trees in all buckets. + * The tool will log information about unreachable files or directories. + * If deletes are still in progress (the deleted directory table is not empty), the tool may + * report that the tree is disconnected, even though pending deletes would + * fix the issue. + * + * Before using the tool, make sure all OMs are stopped, + * and that all Ratis logs have been flushed to the OM DB. This can be + * done using `ozone admin prepare` before running the tool, and `ozone admin + * cancelprepare` when done. + * + * The tool will run a DFS from each bucket, and save all reachable + * directories as keys in a new temporary RocksDB instance called "reachable.db" + * In the same directory as om.db. + * will then scan the entire file and directory tables for each bucket to see + * if each object's parent is in the reachable table of reachable.db. The + * reachable table will be dropped and recreated for each bucket. + * The tool is idempotent. reachable.db will not be deleted automatically + * when the tool finishes, in case users want to manually inspect it. It can + * be safely deleted once the tool finishes. */ -public class FSORepairTool extends FSOBaseTool { +public class FSORepairTool { + public static final Logger LOG = + LoggerFactory.getLogger(org.apache.hadoop.ozone.repair.om.FSORepairTool.class); + + private final String omDBPath; + + private final DBStore store; + private final Table volumeTable; + private final Table bucketTable; + private final Table directoryTable; + private final Table fileTable; + private final Table deletedDirectoryTable; + private final Table deletedTable; + // The temporary DB is used to track which items have been seen. + // Since usage of this DB is simple, use it directly from + // RocksDB. + private String reachableDBPath; + private static final String REACHABLE_TABLE = "reachable"; + private static final byte[] REACHABLE_TABLE_BYTES = + REACHABLE_TABLE.getBytes(StandardCharsets.UTF_8); + private ColumnFamilyHandle reachableCFHandle; + private RocksDatabase reachableDB; + + private long reachableBytes; + private long reachableFiles; + private long reachableDirs; + private long unreachableBytes; + private long unreachableFiles; + private long unreachableDirs; + private boolean dryRun; public FSORepairTool(String dbPath, boolean dryRun) throws IOException { this(getStoreFromPath(dbPath), dbPath, dryRun); } - public FSORepairTool(DBStore dbStore, String dbPath, boolean dryRun) throws IOException { - super(dbStore, dbPath, dryRun); + /** + * Allows passing RocksDB instance from a MiniOzoneCluster directly to this + * class for testing. + */ + @VisibleForTesting + public FSORepairTool(DBStore dbStore, String dbPath, boolean isDryRun) throws IOException { + dryRun = isDryRun; + // Counters to track as we walk the tree. + reachableBytes = 0; + reachableFiles = 0; + reachableDirs = 0; + unreachableBytes = 0; + unreachableFiles = 0; + unreachableDirs = 0; + + this.store = dbStore; + this.omDBPath = dbPath; + volumeTable = store.getTable(OmMetadataManagerImpl.VOLUME_TABLE, + String.class, + OmVolumeArgs.class); + bucketTable = store.getTable(OmMetadataManagerImpl.BUCKET_TABLE, + String.class, + OmBucketInfo.class); + directoryTable = store.getTable(OmMetadataManagerImpl.DIRECTORY_TABLE, + String.class, + OmDirectoryInfo.class); + fileTable = store.getTable(OmMetadataManagerImpl.FILE_TABLE, + String.class, + OmKeyInfo.class); + deletedDirectoryTable = store.getTable( + OmMetadataManagerImpl.DELETED_DIR_TABLE, + String.class, + OmKeyInfo.class); + deletedTable = store.getTable( + OmMetadataManagerImpl.DELETED_TABLE, + String.class, + RepeatedOmKeyInfo.class); + } + + protected static DBStore getStoreFromPath(String dbPath) throws IOException { + File omDBFile = new File(dbPath); + if (!omDBFile.exists() || !omDBFile.isDirectory()) { + throw new IOException(String.format("Specified OM DB instance %s does " + + "not exist or is not a RocksDB directory.", dbPath)); + } + // Load RocksDB and tables needed. + return OmMetadataManagerImpl.loadDB(new OzoneConfiguration(), + new File(dbPath).getParentFile()); + } + + public org.apache.hadoop.ozone.repair.om.FSORepairTool.Report run() throws IOException { + // Iterate all volumes. + try (TableIterator> + volumeIterator = volumeTable.iterator()) { + openReachableDB(); + + while (volumeIterator.hasNext()) { + Table.KeyValue volumeEntry = + volumeIterator.next(); + String volumeKey = volumeEntry.getKey(); + + // Iterate all buckets in the volume. + try (TableIterator> + bucketIterator = bucketTable.iterator()) { + bucketIterator.seek(volumeKey); + while (bucketIterator.hasNext()) { + Table.KeyValue bucketEntry = + bucketIterator.next(); + String bucketKey = bucketEntry.getKey(); + OmBucketInfo bucketInfo = bucketEntry.getValue(); + + if (bucketInfo.getBucketLayout() != BucketLayout.FILE_SYSTEM_OPTIMIZED) { + LOG.debug("Skipping non-FSO bucket {}", bucketKey); + continue; + } + + // Stop this loop once we have seen all buckets in the current + // volume. + if (!bucketKey.startsWith(volumeKey)) { + break; + } + + // Start with a fresh list of reachable files for this bucket. + // Also clears partial state if the tool failed on a previous run. + dropReachableTableIfExists(); + createReachableTable(); + // Process one bucket's FSO tree at a time. + markReachableObjectsInBucket(volumeEntry.getValue(), bucketInfo); + handleUnreachableObjects(volumeEntry.getValue(), bucketInfo); + dropReachableTableIfExists(); + } + } + } + } finally { + closeReachableDB(); + } + + return buildReportAndLog(); + } + + private Report buildReportAndLog() { + Report report = new Report.Builder() + .setReachableDirs(reachableDirs) + .setReachableFiles(reachableFiles) + .setReachableBytes(reachableBytes) + .setUnreachableDirs(unreachableDirs) + .setUnreachableFiles(unreachableFiles) + .setUnreachableBytes(unreachableBytes) + .build(); + + LOG.info("\n{}", report); + return report; + } + + private void markReachableObjectsInBucket(OmVolumeArgs volume, + OmBucketInfo bucket) throws IOException { + LOG.info("Processing bucket {}", bucket.getBucketName()); + // Only put directories in the stack. + // Directory keys should have the form /volumeID/bucketID/parentID/name. + Stack dirKeyStack = new Stack<>(); + + // Since the tool uses parent directories to check for reachability, add + // a reachable entry for the bucket as well. + addReachableEntry(volume, bucket, bucket); + // Initialize the stack with all immediate child directories of the + // bucket, and mark them all as reachable. + Collection childDirs = + getChildDirectoriesAndMarkAsReachable(volume, bucket, bucket); + dirKeyStack.addAll(childDirs); + + while (!dirKeyStack.isEmpty()) { + // Get one directory and process its immediate children. + String currentDirKey = dirKeyStack.pop(); + OmDirectoryInfo currentDir = directoryTable.get(currentDirKey); + if (currentDir == null) { + LOG.error("Directory key {} to be processed was not found in the " + + "directory table", currentDirKey); + continue; + } + + // TODO revisit this for a more memory efficient implementation, + // possibly making better use of RocksDB iterators. + childDirs = getChildDirectoriesAndMarkAsReachable(volume, bucket, + currentDir); + dirKeyStack.addAll(childDirs); + } + } + + private void handleUnreachableObjects(OmVolumeArgs volume, OmBucketInfo bucket) throws IOException { + // Check for unreachable directories in the bucket. + String bucketPrefix = OM_KEY_PREFIX + + volume.getObjectID() + + OM_KEY_PREFIX + + bucket.getObjectID(); + + try (TableIterator> dirIterator = + directoryTable.iterator()) { + dirIterator.seek(bucketPrefix); + while (dirIterator.hasNext()) { + Table.KeyValue dirEntry = dirIterator.next(); + String dirKey = dirEntry.getKey(); + + // Only search directories in this bucket. + if (!dirKey.startsWith(bucketPrefix)) { + break; + } + + if (!isReachable(dirKey)) { + LOG.debug("Found unreachable directory: {}", dirKey); + unreachableDirs++; + + if (dryRun) { + LOG.debug("Marking unreachable directory {} for deletion.", dirKey); + OmDirectoryInfo dirInfo = dirEntry.getValue(); + markDirectoryForDeletion(volume.getVolume(), bucket.getBucketName(), + dirKey, dirInfo); + } + } + } + } + + // Check for unreachable files + try (TableIterator> + fileIterator = fileTable.iterator()) { + fileIterator.seek(bucketPrefix); + while (fileIterator.hasNext()) { + Table.KeyValue fileEntry = fileIterator.next(); + String fileKey = fileEntry.getKey(); + // Only search files in this bucket. + if (!fileKey.startsWith(bucketPrefix)) { + break; + } + + OmKeyInfo fileInfo = fileEntry.getValue(); + if (!isReachable(fileKey)) { + LOG.debug("Found unreachable file: {}", fileKey); + unreachableBytes += fileInfo.getDataSize(); + unreachableFiles++; + + if (dryRun) { + LOG.debug("Marking unreachable file {} for deletion.", + fileKey); + markFileForDeletion(fileKey, fileInfo); + } + } else { + // NOTE: We are deserializing the proto of every reachable file + // just to log it's size. If we don't need this information we could + // save time by skipping this step. + reachableBytes += fileInfo.getDataSize(); + reachableFiles++; + } + } + } + } + + protected void markFileForDeletion(String fileKey, OmKeyInfo fileInfo) throws IOException { + try (BatchOperation batch = store.initBatchOperation()) { + fileTable.deleteWithBatch(batch, fileKey); + + RepeatedOmKeyInfo originalRepeatedKeyInfo = deletedTable.get(fileKey); + RepeatedOmKeyInfo updatedRepeatedOmKeyInfo = OmUtils.prepareKeyForDelete( + fileInfo, fileInfo.getUpdateID(), true); + // NOTE: The FSO code seems to write the open key entry with the whole + // path, using the object's names instead of their ID. This would onyl + // be possible when the file is deleted explicitly, and not part of a + // directory delete. It is also not possible here if the file's parent + // is gone. The name of the key does not matter so just use IDs. + deletedTable.putWithBatch(batch, fileKey, updatedRepeatedOmKeyInfo); + + LOG.debug("Added entry {} to open key table: {}", + fileKey, updatedRepeatedOmKeyInfo); + + store.commitBatchOperation(batch); + } + } + + protected void markDirectoryForDeletion(String volumeName, String bucketName, + String dirKeyName, OmDirectoryInfo dirInfo) throws IOException { + try (BatchOperation batch = store.initBatchOperation()) { + directoryTable.deleteWithBatch(batch, dirKeyName); + // HDDS-7592: Make directory entries in deleted dir table unique. + String deleteDirKeyName = + dirKeyName + OM_KEY_PREFIX + dirInfo.getObjectID(); + + // Convert the directory to OmKeyInfo for deletion. + OmKeyInfo dirAsKeyInfo = OMFileRequest.getOmKeyInfo( + volumeName, bucketName, dirInfo, dirInfo.getName()); + deletedDirectoryTable.putWithBatch(batch, deleteDirKeyName, dirAsKeyInfo); + + store.commitBatchOperation(batch); + } + } + + private Collection getChildDirectoriesAndMarkAsReachable(OmVolumeArgs volume, + OmBucketInfo bucket, + WithObjectID currentDir) throws IOException { + + Collection childDirs = new ArrayList<>(); + + try (TableIterator> + dirIterator = directoryTable.iterator()) { + String dirPrefix = buildReachableKey(volume, bucket, currentDir); + // Start searching the directory table at the current directory's + // prefix to get its immediate children. + dirIterator.seek(dirPrefix); + while (dirIterator.hasNext()) { + Table.KeyValue childDirEntry = + dirIterator.next(); + String childDirKey = childDirEntry.getKey(); + // Stop processing once we have seen all immediate children of this + // directory. + if (!childDirKey.startsWith(dirPrefix)) { + break; + } + // This directory was reached by search. + addReachableEntry(volume, bucket, childDirEntry.getValue()); + childDirs.add(childDirKey); + reachableDirs++; + } + } + + return childDirs; + } + + /** + * Add the specified object to the reachable table, indicating it is part + * of the connected FSO tree. + */ + private void addReachableEntry(OmVolumeArgs volume, + OmBucketInfo bucket, WithObjectID object) throws IOException { + byte[] reachableKey = buildReachableKey(volume, bucket, object) + .getBytes(StandardCharsets.UTF_8); + // No value is needed for this table. + reachableDB.put(reachableCFHandle, reachableKey, new byte[]{}); + } + + /** + * Build an entry in the reachable table for the current object, which + * could be a bucket, file or directory. + */ + private static String buildReachableKey(OmVolumeArgs volume, + OmBucketInfo bucket, WithObjectID object) { + return OM_KEY_PREFIX + + volume.getObjectID() + + OM_KEY_PREFIX + + bucket.getObjectID() + + OM_KEY_PREFIX + + object.getObjectID(); + } + + /** + * + * @param fileOrDirKey The key of a file or directory in RocksDB. + * @return true if the entry's parent is in the reachable table. + */ + protected boolean isReachable(String fileOrDirKey) throws IOException { + byte[] reachableParentKey = + buildReachableParentKey(fileOrDirKey).getBytes(StandardCharsets.UTF_8); + + return reachableDB.get(reachableCFHandle, reachableParentKey, REACHABLE_TABLE) != null; + } + + /** + * Build an entry in the reachable table for the current object's parent + * object. The object could be a file or directory. + */ + private static String buildReachableParentKey(String fileOrDirKey) { + String[] keyParts = fileOrDirKey.split(OM_KEY_PREFIX); + // Should be /volID/bucketID/parentID/name + // The first part will be blank since key begins with a slash. + Preconditions.assertTrue(keyParts.length >= 4); + String volumeID = keyParts[1]; + String bucketID = keyParts[2]; + String parentID = keyParts[3]; + + return OM_KEY_PREFIX + + volumeID + + OM_KEY_PREFIX + + bucketID + + OM_KEY_PREFIX + + parentID; + } + + private void openReachableDB() throws IOException { + File reachableDBFile = new File(new File(omDBPath).getParentFile(), + "reachable.db"); + LOG.info("Creating database of reachable directories at {}", + reachableDBFile); + // Delete the DB from the last run if it exists. + if (reachableDBFile.exists()) { + FileUtils.deleteDirectory(reachableDBFile); + } + reachableDBPath = reachableDBFile.toString(); + reachableDB = buildReachableRocksDB(reachableDBFile); + } + + private RocksDatabase buildReachableRocksDB(File reachableDBFile) throws IOException { + DBProfile profile = new OzoneConfiguration().getEnum(HDDS_DB_PROFILE, HDDS_DEFAULT_DB_PROFILE); + Set tableConfigs = new HashSet<>(); + tableConfigs.add(new TableConfig("default", profile.getColumnFamilyOptions())); + + return RocksDatabase.open(reachableDBFile, + profile.getDBOptions(), + new ManagedWriteOptions(), + tableConfigs, false); + } + + private void closeReachableDB() { + if (reachableDB != null) { + reachableDB.close(); + } + } + + private void dropReachableTableIfExists() throws IOException { + try { + List + availableCFs = reachableDB.listColumnFamiliesEmptyOptions(reachableDBPath); + boolean cfFound = false; + for (byte[] cfNameBytes: availableCFs) { + if (new String(cfNameBytes, UTF_8).equals(new String(REACHABLE_TABLE_BYTES, UTF_8))) { + cfFound = true; + break; + } + } + + if (cfFound) { + reachableDB.dropColumnFamily(reachableCFHandle); + } + } catch (RocksDBException ex) { + throw new IOException(ex.getMessage(), ex); + } finally { + if (reachableCFHandle != null) { + reachableCFHandle.close(); + } + } + } + + private void createReachableTable() throws IOException { + reachableCFHandle = reachableDB.createColumnFamily( + new ColumnFamilyDescriptor(REACHABLE_TABLE_BYTES)); } + /** + * Define a Report to be created. + */ + public static class Report { + private long reachableBytes; + private long reachableFiles; + private long reachableDirs; + private long unreachableBytes; + private long unreachableFiles; + private long unreachableDirs; + + /** + * Builds one report that is the aggregate of multiple others. + */ + public Report(org.apache.hadoop.ozone.repair.om.FSORepairTool.Report... reports) { + reachableBytes = 0; + reachableFiles = 0; + reachableDirs = 0; + unreachableBytes = 0; + unreachableFiles = 0; + unreachableDirs = 0; + + for (org.apache.hadoop.ozone.repair.om.FSORepairTool.Report report: reports) { + reachableBytes += report.reachableBytes; + reachableFiles += report.reachableFiles; + reachableDirs += report.reachableDirs; + unreachableBytes += report.unreachableBytes; + unreachableFiles += report.unreachableFiles; + unreachableDirs += report.unreachableDirs; + } + } + + private Report(org.apache.hadoop.ozone.repair.om.FSORepairTool.Report.Builder builder) { + reachableBytes = builder.reachableBytes; + reachableFiles = builder.reachableFiles; + reachableDirs = builder.reachableDirs; + unreachableBytes = builder.unreachableBytes; + unreachableFiles = builder.unreachableFiles; + unreachableDirs = builder.unreachableDirs; + } + + public long getReachableBytes() { + return reachableBytes; + } + + public long getReachableFiles() { + return reachableFiles; + } + + public long getReachableDirs() { + return reachableDirs; + } + + public long getUnreachableBytes() { + return unreachableBytes; + } + + public long getUnreachableFiles() { + return unreachableFiles; + } + + public long getUnreachableDirs() { + return unreachableDirs; + } + + @Override + public String toString() { + return "Reachable:" + + "\n\tDirectories: " + reachableDirs + + "\n\tFiles: " + reachableFiles + + "\n\tBytes: " + reachableBytes + + "\nUnreachable:" + + "\n\tDirectories: " + unreachableDirs + + "\n\tFiles: " + unreachableFiles + + "\n\tBytes: " + unreachableBytes; + } + + @Override + public boolean equals(Object other) { + if (other == this) { + return true; + } + if (other == null || getClass() != other.getClass()) { + return false; + } + FSORepairTool.Report report = (FSORepairTool.Report) other; + + // Useful for testing. + LOG.debug("Comparing reports\nExpect:\n{}\nActual:\n{}", this, report); + + return reachableBytes == report.reachableBytes && + reachableFiles == report.reachableFiles && + reachableDirs == report.reachableDirs && + unreachableBytes == report.unreachableBytes && + unreachableFiles == report.unreachableFiles && + unreachableDirs == report.unreachableDirs; + } + + @Override + public int hashCode() { + return Objects.hash(reachableBytes, + reachableFiles, + reachableDirs, + unreachableBytes, + unreachableFiles, + unreachableDirs); + } + + /** + * Builder class for a Report. + */ + public static final class Builder { + private long reachableBytes; + private long reachableFiles; + private long reachableDirs; + private long unreachableBytes; + private long unreachableFiles; + private long unreachableDirs; + + public Builder() { + } + + @SuppressWarnings("checkstyle:hiddenfield") + public Builder setReachableBytes(long reachableBytes) { + this.reachableBytes = reachableBytes; + return this; + } + + @SuppressWarnings("checkstyle:hiddenfield") + public Builder setReachableFiles(long reachableFiles) { + this.reachableFiles = reachableFiles; + return this; + } + + @SuppressWarnings("checkstyle:hiddenfield") + public Builder setReachableDirs(long reachableDirs) { + this.reachableDirs = reachableDirs; + return this; + } + + @SuppressWarnings("checkstyle:hiddenfield") + public Builder setUnreachableBytes(long unreachableBytes) { + this.unreachableBytes = unreachableBytes; + return this; + } + + @SuppressWarnings("checkstyle:hiddenfield") + public Builder setUnreachableFiles(long unreachableFiles) { + this.unreachableFiles = unreachableFiles; + return this; + } + + @SuppressWarnings("checkstyle:hiddenfield") + public Builder setUnreachableDirs(long unreachableDirs) { + this.unreachableDirs = unreachableDirs; + return this; + } + + public Report build() { + return new Report(this); + } + } + } }