From 27de7460d7865105b894fc46ac28d652fc197f29 Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Wed, 1 Mar 2023 19:42:57 -0800 Subject: [PATCH 1/7] HDDS-8039. Allow container inspector to run from ozone debug. --- .../container/common/impl/ContainerSet.java | 7 +- .../common/volume/MutableVolumeSet.java | 2 + .../KeyValueContainerMetadataInspector.java | 135 +++++++++++++++--- .../ozoneimpl/ContainerController.java | 4 + .../debug/container/ContainerCommands.java | 5 + .../debug/container/InspectSubcommand.java | 76 ++++++++++ 6 files changed, 209 insertions(+), 20 deletions(-) create mode 100644 hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/InspectSubcommand.java diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java index 0c5ae6aeebc7..2f4eaf5ec754 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java @@ -48,7 +48,7 @@ /** * Class that manages Containers created on the datanode. */ -public class ContainerSet { +public class ContainerSet implements Iterable> { private static final Logger LOG = LoggerFactory.getLogger(ContainerSet.class); @@ -201,6 +201,11 @@ public void handleVolumeFailures() { * @return {@literal Iterator>} */ public Iterator> getContainerIterator() { + return iterator(); + } + + @Override + public Iterator> iterator() { return containerMap.values().iterator(); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java index 22571685df32..f0ae6863c97b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java @@ -165,6 +165,8 @@ private void initializeVolumeSet() throws IOException { } for (String locationString : rawLocations) { + LOG.info("Start initializing raw location {}", locationString); + try { StorageLocation location = StorageLocation.parse(locationString); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java index 595aa925a4fc..9d0c86f652e7 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java @@ -43,7 +43,9 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.EnumMap; import java.util.List; +import java.util.Optional; import java.util.function.BooleanSupplier; import java.util.stream.Stream; @@ -100,6 +102,9 @@ public String toString() { public static final String SYSTEM_PROPERTY = "ozone.datanode.container" + ".metadata.inspector"; + static final String DELETE_COUNT_DELTA_KEY + = "dbDeleteCount_minus_aggregatedDeleteCount"; + private Mode mode; public KeyValueContainerMetadataInspector() { @@ -185,8 +190,8 @@ public void process(ContainerData containerData, DatanodeStore store) { } } - private JsonObject inspectContainer(KeyValueContainerData containerData, - DatanodeStore store) { + public static JsonObject inspectContainer( + KeyValueContainerData containerData, DatanodeStore store) { JsonObject containerJson = new JsonObject(); @@ -216,6 +221,8 @@ private JsonObject inspectContainer(KeyValueContainerData containerData, JsonObject chunksDirectory = getChunksDirectoryJson(new File(containerData.getChunksPath())); containerJson.add("chunksDirectory", chunksDirectory); + + checkDeleteCounts(dBMetadata, aggregates, containerJson); } catch (IOException ex) { LOG.error("Inspecting container {} failed", containerData.getContainerID(), ex); @@ -224,7 +231,29 @@ private JsonObject inspectContainer(KeyValueContainerData containerData, return containerJson; } - private JsonObject getDBMetadataJson(Table metadataTable, + static long getLong(String key, JsonObject json) { + final JsonElement e = json.get(key); + return e == null || e.isJsonNull()? 0 : e.getAsLong(); + } + + static void checkDeleteCounts(JsonObject dBMetadata, JsonObject aggregates, + JsonObject container) { + try { + final long dbDeleteCount = getLong( + OzoneConsts.PENDING_DELETE_BLOCK_COUNT, dBMetadata); + final long aggregatedDeleteCount = getLong( + PendingDeleteType.COUNT.getJsonKey(), aggregates); + final long delta = dbDeleteCount - aggregatedDeleteCount; + if (delta != 0) { + container.addProperty(DELETE_COUNT_DELTA_KEY, delta); + } + } catch (Throwable t) { + LOG.error("Failed to check delete counts for container " + + container.get("containerID"), t); + } + } + + static JsonObject getDBMetadataJson(Table metadataTable, KeyValueContainerData containerData) throws IOException { JsonObject dBMetadata = new JsonObject(); @@ -242,14 +271,13 @@ private JsonObject getDBMetadataJson(Table metadataTable, return dBMetadata; } - private JsonObject getAggregateValues(DatanodeStore store, + static JsonObject getAggregateValues(DatanodeStore store, KeyValueContainerData containerData, String schemaVersion) throws IOException { JsonObject aggregates = new JsonObject(); long usedBytesTotal = 0; long blockCountTotal = 0; - long pendingDeleteBlockCountTotal = 0; // Count normal blocks. try (BlockIterator blockIter = store.getBlockIterator(containerData.getContainerID(), @@ -262,7 +290,10 @@ private JsonObject getAggregateValues(DatanodeStore store, } // Count pending delete blocks. + final EnumMap pendingDeletes; if (schemaVersion.equals(OzoneConsts.SCHEMA_V1)) { + long pendingDeleteBlockCountTotal = 0; + long pendingDeleteBytes = 0; try (BlockIterator blockIter = store.getBlockIterator(containerData.getContainerID(), containerData.getDeletingBlockKeyFilter())) { @@ -270,18 +301,22 @@ private JsonObject getAggregateValues(DatanodeStore store, while (blockIter.hasNext()) { blockCountTotal++; pendingDeleteBlockCountTotal++; - usedBytesTotal += getBlockLength(blockIter.nextBlock()); + final long bytes = getBlockLength(blockIter.nextBlock()); + usedBytesTotal += bytes; + pendingDeleteBytes += bytes; } } + pendingDeletes = PendingDeleteType.newMap( + pendingDeleteBlockCountTotal, pendingDeleteBytes); } else if (schemaVersion.equals(OzoneConsts.SCHEMA_V2)) { DatanodeStoreSchemaTwoImpl schemaTwoStore = (DatanodeStoreSchemaTwoImpl) store; - pendingDeleteBlockCountTotal = - countPendingDeletesSchemaV2(schemaTwoStore); + pendingDeletes = + countPendingDeletesSchemaV2(schemaTwoStore, containerData); } else if (schemaVersion.equals(OzoneConsts.SCHEMA_V3)) { DatanodeStoreSchemaThreeImpl schemaThreeStore = (DatanodeStoreSchemaThreeImpl) store; - pendingDeleteBlockCountTotal = + pendingDeletes = countPendingDeletesSchemaV3(schemaThreeStore, containerData); } else { throw new IOException("Failed to process deleted blocks for unknown " + @@ -290,13 +325,13 @@ private JsonObject getAggregateValues(DatanodeStore store, aggregates.addProperty("blockCount", blockCountTotal); aggregates.addProperty("usedBytes", usedBytesTotal); - aggregates.addProperty("pendingDeleteBlocks", - pendingDeleteBlockCountTotal); + PendingDeleteType.COUNT.addProperty(pendingDeletes, aggregates); + PendingDeleteType.BYTES.addProperty(pendingDeletes, aggregates); return aggregates; } - private JsonObject getChunksDirectoryJson(File chunksDir) throws IOException { + static JsonObject getChunksDirectoryJson(File chunksDir) throws IOException { JsonObject chunksDirectory = new JsonObject(); chunksDirectory.addProperty("path", chunksDir.getAbsolutePath()); @@ -437,30 +472,88 @@ private JsonObject buildErrorAndRepair(String property, JsonElement expected, return error; } - private long countPendingDeletesSchemaV2(DatanodeStoreSchemaTwoImpl - schemaTwoStore) throws IOException { + enum PendingDeleteType { + COUNT("pendingDeleteBlocks"), + BYTES("pendingDeleteBytes"); + + private final String jsonKey; + + PendingDeleteType(String jsonKey) { + this.jsonKey = jsonKey; + } + + String getJsonKey() { + return jsonKey; + } + + void addProperty(EnumMap map, + JsonObject json) { + Optional.of(map.get(this)) + .ifPresent(bytes -> json.addProperty(getJsonKey(), bytes)); + } + + static EnumMap newMap(long count, long bytes) { + final EnumMap map + = new EnumMap<>(PendingDeleteType.class); + map.put(PendingDeleteType.COUNT, count); + map.put(PendingDeleteType.BYTES, bytes); + return map; + } + } + + static EnumMap countPendingDeletesSchemaV2( + DatanodeStoreSchemaTwoImpl schemaTwoStore, + KeyValueContainerData containerData) throws IOException { long pendingDeleteBlockCountTotal = 0; + long pendingDeleteBytes = 0; + Table delTxTable = schemaTwoStore.getDeleteTransactionTable(); + final Table blockDataTable + = schemaTwoStore.getBlockDataTable(); + try (TableIterator> iterator = delTxTable.iterator()) { while (iterator.hasNext()) { DeletedBlocksTransaction txn = iterator.next().getValue(); + final List localIDs = txn.getLocalIDList(); // In schema 2, pending delete blocks are stored in the // transaction object. Since the actual blocks still exist in the // block data table with no prefix, they have already been // counted towards bytes used and total block count above. - pendingDeleteBlockCountTotal += txn.getLocalIDList().size(); + pendingDeleteBlockCountTotal += localIDs.size(); + pendingDeleteBytes += computePendingDeleteBytes( + localIDs, containerData, blockDataTable); } } - return pendingDeleteBlockCountTotal; + return PendingDeleteType.newMap(pendingDeleteBlockCountTotal, + pendingDeleteBytes); + } + + static long computePendingDeleteBytes(List localIDs, + KeyValueContainerData containerData, + Table blockDataTable) { + long pendingDeleteBytes = 0; + for (long id : localIDs) { + try { + final String blockKey = containerData.getBlockKey(id); + final BlockData blockData = blockDataTable.get(blockKey); + pendingDeleteBytes += blockData == null ? 0 : blockData.getSize(); + } catch (Throwable t) { + LOG.error("Failed to get block data size for local id #" + id, t); + } + } + return pendingDeleteBytes; } - private long countPendingDeletesSchemaV3( + static EnumMap countPendingDeletesSchemaV3( DatanodeStoreSchemaThreeImpl schemaThreeStore, KeyValueContainerData containerData) throws IOException { long pendingDeleteBlockCountTotal = 0; + long pendingDeleteBytes = 0; + final Table blockDataTable + = schemaThreeStore.getBlockDataTable(); try ( TableIterator> @@ -468,10 +561,14 @@ private long countPendingDeletesSchemaV3( .iterator(containerData.containerPrefix())) { while (iter.hasNext()) { DeletedBlocksTransaction delTx = iter.next().getValue(); - pendingDeleteBlockCountTotal += delTx.getLocalIDList().size(); + final List localIDs = delTx.getLocalIDList(); + pendingDeleteBlockCountTotal += localIDs.size(); + pendingDeleteBytes += computePendingDeleteBytes( + localIDs, containerData, blockDataTable); } - return pendingDeleteBlockCountTotal; } + return PendingDeleteType.newMap(pendingDeleteBlockCountTotal, + pendingDeleteBytes); } private static long getBlockLength(BlockData block) { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java index 4087483d723f..683aa0b0531d 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java @@ -196,6 +196,10 @@ public Iterator> getContainers() { return containerSet.getContainerIterator(); } + public Iterable> getContainerSet() { + return containerSet; + } + /** * Return an iterator of containers which are associated with the specified * volume. diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/ContainerCommands.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/ContainerCommands.java index ea7b10216c05..5592926bf883 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/ContainerCommands.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/ContainerCommands.java @@ -79,6 +79,7 @@ ListSubcommand.class, InfoSubcommand.class, ExportSubcommand.class, + InspectSubcommand.class }) @MetaInfServices(SubcommandWithParent.class) public class ContainerCommands implements Callable, SubcommandWithParent { @@ -107,6 +108,10 @@ public Class getParentType() { return OzoneDebug.class; } + OzoneConfiguration getOzoneConf() { + return parent.getOzoneConf(); + } + public void loadContainersFromVolumes() throws IOException { OzoneConfiguration conf = parent.getOzoneConf(); diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/InspectSubcommand.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/InspectSubcommand.java new file mode 100644 index 000000000000..36d7ebf7166d --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/InspectSubcommand.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.debug.container; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.gson.JsonObject; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.container.common.impl.ContainerData; +import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerMetadataInspector; +import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; +import org.apache.hadoop.ozone.container.metadata.DatanodeStore; +import picocli.CommandLine; +import picocli.CommandLine.Command; + +import java.util.Iterator; +import java.util.concurrent.Callable; + +/** + * {@code ozone debug container inspect}, + * a command to run {@link KeyValueContainerMetadataInspector}. + */ +@Command( + name = "inspect", + description = "Check the deletion information for all the containers.") +public class InspectSubcommand implements Callable { + + @CommandLine.ParentCommand + private ContainerCommands parent; + + @Override + public Void call() throws Exception { + final OzoneConfiguration conf = parent.getOzoneConf(); + parent.loadContainersFromVolumes(); + + final Gson gson = new GsonBuilder() + .setPrettyPrinting() + .serializeNulls() + .create(); + + for(Container container : parent.getController().getContainerSet()) { + final ContainerData data = container.getContainerData(); + if (!(data instanceof KeyValueContainerData)) { + continue; + } + final KeyValueContainerData kvData = (KeyValueContainerData)data; + try(DatanodeStore store = BlockUtils.getUncachedDatanodeStore( + kvData, conf, true)) { + final JsonObject json = KeyValueContainerMetadataInspector + .inspectContainer(kvData, store); + System.out.println(gson.toJson(json)); + } + } + + return null; + } +} From b65783374720bd03540fae51ab5c324a7ab5e261 Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Thu, 2 Mar 2023 01:40:05 -0800 Subject: [PATCH 2/7] fix checkstyle --- .../keyvalue/KeyValueContainerMetadataInspector.java | 2 +- .../hadoop/ozone/debug/container/InspectSubcommand.java | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java index 9d0c86f652e7..3b73c36f6052 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java @@ -233,7 +233,7 @@ public static JsonObject inspectContainer( static long getLong(String key, JsonObject json) { final JsonElement e = json.get(key); - return e == null || e.isJsonNull()? 0 : e.getAsLong(); + return e == null || e.isJsonNull() ? 0 : e.getAsLong(); } static void checkDeleteCounts(JsonObject dBMetadata, JsonObject aggregates, diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/InspectSubcommand.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/InspectSubcommand.java index 36d7ebf7166d..82fabf596102 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/InspectSubcommand.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/InspectSubcommand.java @@ -22,7 +22,6 @@ import com.google.gson.GsonBuilder; import com.google.gson.JsonObject; import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; @@ -32,7 +31,6 @@ import picocli.CommandLine; import picocli.CommandLine.Command; -import java.util.Iterator; import java.util.concurrent.Callable; /** @@ -57,13 +55,13 @@ public Void call() throws Exception { .serializeNulls() .create(); - for(Container container : parent.getController().getContainerSet()) { + for (Container container : parent.getController().getContainerSet()) { final ContainerData data = container.getContainerData(); if (!(data instanceof KeyValueContainerData)) { continue; } - final KeyValueContainerData kvData = (KeyValueContainerData)data; - try(DatanodeStore store = BlockUtils.getUncachedDatanodeStore( + final KeyValueContainerData kvData = (KeyValueContainerData) data; + try (DatanodeStore store = BlockUtils.getUncachedDatanodeStore( kvData, conf, true)) { final JsonObject json = KeyValueContainerMetadataInspector .inspectContainer(kvData, store); From 53c44dc68fe117ee0f2cdef6ded68fc340762800 Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Fri, 3 Mar 2023 13:23:49 -0800 Subject: [PATCH 3/7] Support repair mode --- .../KeyValueContainerMetadataInspector.java | 103 +++++++++++------- .../debug/container/InspectSubcommand.java | 23 ++-- 2 files changed, 77 insertions(+), 49 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java index 3b73c36f6052..b0a6f3f5e1f7 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java @@ -102,11 +102,12 @@ public String toString() { public static final String SYSTEM_PROPERTY = "ozone.datanode.container" + ".metadata.inspector"; - static final String DELETE_COUNT_DELTA_KEY - = "dbDeleteCount_minus_aggregatedDeleteCount"; - private Mode mode; + public KeyValueContainerMetadataInspector(Mode mode) { + this.mode = mode; + } + public KeyValueContainerMetadataInspector() { mode = Mode.OFF; } @@ -160,10 +161,15 @@ public boolean isReadOnly() { @Override public void process(ContainerData containerData, DatanodeStore store) { + process(containerData, store, REPORT_LOG); + } + + public String process(ContainerData containerData, DatanodeStore store, + Logger log) { // If the system property to process container metadata was not // specified, or the inspector is unloaded, this method is a no-op. if (mode == Mode.OFF) { - return; + return null; } KeyValueContainerData kvData = null; @@ -172,7 +178,7 @@ public void process(ContainerData containerData, DatanodeStore store) { } else { LOG.error("This inspector only works on KeyValueContainers. Inspection " + "will not be run for container {}", containerData.getContainerID()); - return; + return null; } JsonObject containerJson = inspectContainer(kvData, store); @@ -183,15 +189,18 @@ public void process(ContainerData containerData, DatanodeStore store) { .serializeNulls() .create(); String jsonReport = gson.toJson(containerJson); - if (correct) { - REPORT_LOG.trace(jsonReport); - } else { - REPORT_LOG.error(jsonReport); + if (log != null) { + if (correct) { + log.trace(jsonReport); + } else { + log.error(jsonReport); + } } + return jsonReport; } - public static JsonObject inspectContainer( - KeyValueContainerData containerData, DatanodeStore store) { + static JsonObject inspectContainer(KeyValueContainerData containerData, + DatanodeStore store) { JsonObject containerJson = new JsonObject(); @@ -221,8 +230,6 @@ public static JsonObject inspectContainer( JsonObject chunksDirectory = getChunksDirectoryJson(new File(containerData.getChunksPath())); containerJson.add("chunksDirectory", chunksDirectory); - - checkDeleteCounts(dBMetadata, aggregates, containerJson); } catch (IOException ex) { LOG.error("Inspecting container {} failed", containerData.getContainerID(), ex); @@ -231,28 +238,6 @@ public static JsonObject inspectContainer( return containerJson; } - static long getLong(String key, JsonObject json) { - final JsonElement e = json.get(key); - return e == null || e.isJsonNull() ? 0 : e.getAsLong(); - } - - static void checkDeleteCounts(JsonObject dBMetadata, JsonObject aggregates, - JsonObject container) { - try { - final long dbDeleteCount = getLong( - OzoneConsts.PENDING_DELETE_BLOCK_COUNT, dBMetadata); - final long aggregatedDeleteCount = getLong( - PendingDeleteType.COUNT.getJsonKey(), aggregates); - final long delta = dbDeleteCount - aggregatedDeleteCount; - if (delta != 0) { - container.addProperty(DELETE_COUNT_DELTA_KEY, delta); - } - } catch (Throwable t) { - LOG.error("Failed to check delete counts for container " - + container.get("containerID"), t); - } - } - static JsonObject getDBMetadataJson(Table metadataTable, KeyValueContainerData containerData) throws IOException { JsonObject dBMetadata = new JsonObject(); @@ -356,6 +341,9 @@ private boolean checkAndRepair(JsonObject parent, Table metadataTable = store.getMetadataTable(); + final JsonObject dBMetadata = parent.getAsJsonObject("dBMetadata"); + final JsonObject aggregates = parent.getAsJsonObject("aggregates"); + // Check and repair block count. JsonElement blockCountDB = parent.getAsJsonObject("dBMetadata") .get(OzoneConsts.BLOCK_COUNT); @@ -427,6 +415,39 @@ private boolean checkAndRepair(JsonObject parent, errors.add(usedBytesError); } + // check and repair if db delete count mismatches delete transaction count. + final JsonElement dbDeleteCountJson = dBMetadata.get( + OzoneConsts.PENDING_DELETE_BLOCK_COUNT); + final long dbDeleteCount = jsonToLong(dbDeleteCountJson); + final JsonElement deleteTransactionCountJson = aggregates.get( + PendingDeleteType.COUNT.getJsonKey()); + final long deleteTransactionCount = jsonToLong(deleteTransactionCountJson); + if (dbDeleteCount != deleteTransactionCount) { + passed = false; + + final BooleanSupplier deleteCountRepairAction = () -> { + if (deleteTransactionCount == 0) { + // repair only when delete transaction count is 0 + final String key = containerData.getPendingDeleteBlockCountKey(); + try { + // reset delete block count to 0 in metadata table + metadataTable.put(key, 0L); + return true; + } catch (IOException ex) { + LOG.error("Failed to reset {} for container {}.", + key, containerData.getContainerID(), ex); + } + } + return false; + }; + + final JsonObject deleteCountError = buildErrorAndRepair( + "dBMetadata." + OzoneConsts.PENDING_DELETE_BLOCK_COUNT, + dbDeleteCountJson, deleteTransactionCountJson, + deleteCountRepairAction); + errors.add(deleteCountError); + } + // check and repair chunks dir. JsonElement chunksDirPresent = parent.getAsJsonObject("chunksDirectory") .get("present"); @@ -456,6 +477,10 @@ private boolean checkAndRepair(JsonObject parent, return passed; } + static long jsonToLong(JsonElement e) { + return e == null || e.isJsonNull() ? 0 : e.getAsLong(); + } + private JsonObject buildErrorAndRepair(String property, JsonElement expected, JsonElement actual, BooleanSupplier repairAction) { JsonObject error = new JsonObject(); @@ -539,9 +564,13 @@ static long computePendingDeleteBytes(List localIDs, try { final String blockKey = containerData.getBlockKey(id); final BlockData blockData = blockDataTable.get(blockKey); - pendingDeleteBytes += blockData == null ? 0 : blockData.getSize(); + if (blockData != null) { + pendingDeleteBytes += blockData.getSize(); + } } catch (Throwable t) { - LOG.error("Failed to get block data size for local id #" + id, t); + LOG.error("Failed to get block " + id + + " in container " + containerData.getContainerID() + + " from blockDataTable", t); } } return pendingDeleteBytes; diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/InspectSubcommand.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/InspectSubcommand.java index 82fabf596102..5d848f80aca4 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/InspectSubcommand.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/InspectSubcommand.java @@ -18,9 +18,6 @@ package org.apache.hadoop.ozone.debug.container; -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; -import com.google.gson.JsonObject; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; @@ -31,6 +28,7 @@ import picocli.CommandLine; import picocli.CommandLine.Command; +import java.io.IOException; import java.util.concurrent.Callable; /** @@ -46,15 +44,13 @@ public class InspectSubcommand implements Callable { private ContainerCommands parent; @Override - public Void call() throws Exception { + public Void call() throws IOException { final OzoneConfiguration conf = parent.getOzoneConf(); parent.loadContainersFromVolumes(); - final Gson gson = new GsonBuilder() - .setPrettyPrinting() - .serializeNulls() - .create(); - + final KeyValueContainerMetadataInspector inspector + = new KeyValueContainerMetadataInspector( + KeyValueContainerMetadataInspector.Mode.INSPECT); for (Container container : parent.getController().getContainerSet()) { final ContainerData data = container.getContainerData(); if (!(data instanceof KeyValueContainerData)) { @@ -63,9 +59,12 @@ public Void call() throws Exception { final KeyValueContainerData kvData = (KeyValueContainerData) data; try (DatanodeStore store = BlockUtils.getUncachedDatanodeStore( kvData, conf, true)) { - final JsonObject json = KeyValueContainerMetadataInspector - .inspectContainer(kvData, store); - System.out.println(gson.toJson(json)); + final String json = inspector.process(kvData, store, null); + System.out.println(json); + } catch (IOException e) { + System.err.print("Failed to inspect container " + + kvData.getContainerID() + ": "); + e.printStackTrace(); } } From e13488f7d274a874ae46a3fb74fac0c1dd5093ce Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Fri, 3 Mar 2023 18:39:45 -0800 Subject: [PATCH 4/7] Added new tests and addressed review comments. --- .../common/volume/MutableVolumeSet.java | 2 - .../KeyValueContainerMetadataInspector.java | 65 +++---- .../keyvalue/ContainerTestVersionInfo.java | 5 + .../TestKeyValueContainerIntegrityChecks.java | 3 +- ...estKeyValueContainerMetadataInspector.java | 180 ++++++++++++++++-- .../debug/container/InspectSubcommand.java | 3 +- 6 files changed, 201 insertions(+), 57 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java index f0ae6863c97b..22571685df32 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java @@ -165,8 +165,6 @@ private void initializeVolumeSet() throws IOException { } for (String locationString : rawLocations) { - LOG.info("Start initializing raw location {}", locationString); - try { StorageLocation location = StorageLocation.parse(locationString); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java index b0a6f3f5e1f7..e4718845c5c9 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java @@ -43,9 +43,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.EnumMap; import java.util.List; -import java.util.Optional; import java.util.function.BooleanSupplier; import java.util.stream.Stream; @@ -275,7 +273,7 @@ static JsonObject getAggregateValues(DatanodeStore store, } // Count pending delete blocks. - final EnumMap pendingDeletes; + final PendingDelete pendingDelete; if (schemaVersion.equals(OzoneConsts.SCHEMA_V1)) { long pendingDeleteBlockCountTotal = 0; long pendingDeleteBytes = 0; @@ -291,17 +289,17 @@ static JsonObject getAggregateValues(DatanodeStore store, pendingDeleteBytes += bytes; } } - pendingDeletes = PendingDeleteType.newMap( + pendingDelete = new PendingDelete( pendingDeleteBlockCountTotal, pendingDeleteBytes); } else if (schemaVersion.equals(OzoneConsts.SCHEMA_V2)) { DatanodeStoreSchemaTwoImpl schemaTwoStore = (DatanodeStoreSchemaTwoImpl) store; - pendingDeletes = + pendingDelete = countPendingDeletesSchemaV2(schemaTwoStore, containerData); } else if (schemaVersion.equals(OzoneConsts.SCHEMA_V3)) { DatanodeStoreSchemaThreeImpl schemaThreeStore = (DatanodeStoreSchemaThreeImpl) store; - pendingDeletes = + pendingDelete = countPendingDeletesSchemaV3(schemaThreeStore, containerData); } else { throw new IOException("Failed to process deleted blocks for unknown " + @@ -310,8 +308,7 @@ static JsonObject getAggregateValues(DatanodeStore store, aggregates.addProperty("blockCount", blockCountTotal); aggregates.addProperty("usedBytes", usedBytesTotal); - PendingDeleteType.COUNT.addProperty(pendingDeletes, aggregates); - PendingDeleteType.BYTES.addProperty(pendingDeletes, aggregates); + pendingDelete.addToJson(aggregates); return aggregates; } @@ -419,9 +416,8 @@ private boolean checkAndRepair(JsonObject parent, final JsonElement dbDeleteCountJson = dBMetadata.get( OzoneConsts.PENDING_DELETE_BLOCK_COUNT); final long dbDeleteCount = jsonToLong(dbDeleteCountJson); - final JsonElement deleteTransactionCountJson = aggregates.get( - PendingDeleteType.COUNT.getJsonKey()); - final long deleteTransactionCount = jsonToLong(deleteTransactionCountJson); + final JsonElement deleteTxCountJson = aggregates.get(PendingDelete.COUNT); + final long deleteTransactionCount = jsonToLong(deleteTxCountJson); if (dbDeleteCount != deleteTransactionCount) { passed = false; @@ -443,7 +439,7 @@ private boolean checkAndRepair(JsonObject parent, final JsonObject deleteCountError = buildErrorAndRepair( "dBMetadata." + OzoneConsts.PENDING_DELETE_BLOCK_COUNT, - dbDeleteCountJson, deleteTransactionCountJson, + dbDeleteCountJson, deleteTxCountJson, deleteCountRepairAction); errors.add(deleteCountError); } @@ -497,36 +493,25 @@ private JsonObject buildErrorAndRepair(String property, JsonElement expected, return error; } - enum PendingDeleteType { - COUNT("pendingDeleteBlocks"), - BYTES("pendingDeleteBytes"); + static class PendingDelete { + static final String COUNT = "pendingDeleteBlocks"; + static final String BYTES = "pendingDeleteBytes"; - private final String jsonKey; + private final long count; + private final long bytes; - PendingDeleteType(String jsonKey) { - this.jsonKey = jsonKey; + PendingDelete(long count, long bytes) { + this.count = count; + this.bytes = bytes; } - String getJsonKey() { - return jsonKey; - } - - void addProperty(EnumMap map, - JsonObject json) { - Optional.of(map.get(this)) - .ifPresent(bytes -> json.addProperty(getJsonKey(), bytes)); - } - - static EnumMap newMap(long count, long bytes) { - final EnumMap map - = new EnumMap<>(PendingDeleteType.class); - map.put(PendingDeleteType.COUNT, count); - map.put(PendingDeleteType.BYTES, bytes); - return map; + void addToJson(JsonObject json) { + json.addProperty(COUNT, count); + json.addProperty(BYTES, bytes); } } - static EnumMap countPendingDeletesSchemaV2( + static PendingDelete countPendingDeletesSchemaV2( DatanodeStoreSchemaTwoImpl schemaTwoStore, KeyValueContainerData containerData) throws IOException { long pendingDeleteBlockCountTotal = 0; @@ -552,7 +537,7 @@ static EnumMap countPendingDeletesSchemaV2( } } - return PendingDeleteType.newMap(pendingDeleteBlockCountTotal, + return new PendingDelete(pendingDeleteBlockCountTotal, pendingDeleteBytes); } @@ -567,16 +552,16 @@ static long computePendingDeleteBytes(List localIDs, if (blockData != null) { pendingDeleteBytes += blockData.getSize(); } - } catch (Throwable t) { + } catch (IOException e) { LOG.error("Failed to get block " + id + " in container " + containerData.getContainerID() - + " from blockDataTable", t); + + " from blockDataTable", e); } } return pendingDeleteBytes; } - static EnumMap countPendingDeletesSchemaV3( + static PendingDelete countPendingDeletesSchemaV3( DatanodeStoreSchemaThreeImpl schemaThreeStore, KeyValueContainerData containerData) throws IOException { long pendingDeleteBlockCountTotal = 0; @@ -596,7 +581,7 @@ static EnumMap countPendingDeletesSchemaV3( localIDs, containerData, blockDataTable); } } - return PendingDeleteType.newMap(pendingDeleteBlockCountTotal, + return new PendingDelete(pendingDeleteBlockCountTotal, pendingDeleteBytes); } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/ContainerTestVersionInfo.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/ContainerTestVersionInfo.java index c1292ea46c3e..61487f18b91c 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/ContainerTestVersionInfo.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/ContainerTestVersionInfo.java @@ -65,6 +65,11 @@ public static Iterable versionParameters() { .collect(toList()); } + @Override + public String toString() { + return "schema=" + schemaVersion + ", layout=" + layout; + } + public static List getLayoutList() { return layoutList; } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerIntegrityChecks.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerIntegrityChecks.java index cf18fa8948db..49dc2e9554d2 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerIntegrityChecks.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerIntegrityChecks.java @@ -58,7 +58,7 @@ */ public class TestKeyValueContainerIntegrityChecks { - private static final Logger LOG = + static final Logger LOG = LoggerFactory.getLogger(TestKeyValueContainerIntegrityChecks.class); private final ContainerLayoutTestInfo containerLayoutTestInfo; @@ -75,6 +75,7 @@ public class TestKeyValueContainerIntegrityChecks { public TestKeyValueContainerIntegrityChecks( ContainerTestVersionInfo versionInfo) { + LOG.info("new {} for {}", getClass().getSimpleName(), versionInfo); this.conf = new OzoneConfiguration(); ContainerTestVersionInfo.setTestSchemaVersion( versionInfo.getSchemaVersion(), conf); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java index aea451bc3a0e..52253aa63ef5 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java @@ -22,6 +22,8 @@ import com.google.gson.JsonElement; import com.google.gson.JsonObject; import com.google.gson.JsonPrimitive; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction; +import org.apache.hadoop.hdds.utils.db.BatchOperation; import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.container.common.interfaces.ContainerInspector; @@ -29,6 +31,9 @@ import org.apache.hadoop.ozone.container.common.utils.ContainerInspectorUtil; import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; import org.apache.hadoop.ozone.container.keyvalue.helpers.KeyValueContainerUtil; +import org.apache.hadoop.ozone.container.metadata.DatanodeStore; +import org.apache.hadoop.ozone.container.metadata.DatanodeStoreSchemaThreeImpl; +import org.apache.hadoop.ozone.container.metadata.DatanodeStoreSchemaTwoImpl; import org.apache.log4j.PatternLayout; import org.apache.ozone.test.GenericTestUtils; import org.junit.Assert; @@ -36,6 +41,11 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + /** * Tests for {@link KeyValueContainerMetadataInspector}. */ @@ -113,7 +123,7 @@ public void testIncorrectTotalsNoData() throws Exception { KeyValueContainer container = createClosedContainer(createBlocks); setDBBlockAndByteCounts(container.getContainerData(), setBlocks, setBytes); inspectThenRepairOnIncorrectContainer(container.getContainerData(), - createBlocks, setBlocks, setBytes); + createBlocks, setBlocks, setBytes, 0, 0, true); } @Test @@ -126,7 +136,7 @@ public void testIncorrectTotalsWithData() throws Exception { KeyValueContainer container = createOpenContainer(createBlocks); setDBBlockAndByteCounts(container.getContainerData(), setBlocks, setBytes); inspectThenRepairOnIncorrectContainer(container.getContainerData(), - createBlocks, setBlocks, setBytes); + createBlocks, setBlocks, setBytes, 0, 0, true); } @Test @@ -151,6 +161,93 @@ public void testCorrectTotalsWithData() throws Exception { inspectThenRepairOnCorrectContainer(container.getContainerData()); } + static class DeletedBlocksTransactionGeneratorForTesting { + private long txId = 100; + private long localId = 2000; + + DeletedBlocksTransaction next(long containerId, int numBlocks) { + final DeletedBlocksTransaction.Builder b + = DeletedBlocksTransaction.newBuilder() + .setContainerID(containerId) + .setTxID(txId++) + .setCount(0); + for (int i = 0; i < numBlocks; i++) { + b.addLocalID(localId++); + } + return b.build(); + } + + List generate( + long containerId, List numBlocks) { + final List transactions = new ArrayList<>(); + for (int n : numBlocks) { + transactions.add(next(containerId, n)); + } + return transactions; + } + } + + static final DeletedBlocksTransactionGeneratorForTesting GENERATOR + = new DeletedBlocksTransactionGeneratorForTesting(); + + @Test + public void testCorrectDeleteWithTransaction() throws Exception { + final int createBlocks = 4; + final int setBytes = CHUNK_LEN * CHUNKS_PER_BLOCK * createBlocks; + final int deleteCount = 10; + + final KeyValueContainer container = createClosedContainer(createBlocks); + final List deleteTransactions + = GENERATOR.generate(container.getContainerData().getContainerID(), + Arrays.asList(1, 6, 3)); + LOG.info("deleteTransactions = {}", deleteTransactions); + + setDBBlockAndByteCounts(container.getContainerData(), createBlocks, + setBytes, deleteCount, deleteTransactions); + inspectThenRepairOnCorrectContainer(container.getContainerData()); + } + + @Test + public void testIncorrectDeleteWithTransaction() throws Exception { + final int createBlocks = 4; + final int setBytes = CHUNK_LEN * CHUNKS_PER_BLOCK * createBlocks; + final int deleteCount = 10; + + final KeyValueContainer container = createClosedContainer(createBlocks); + final List deleteTransactions + = GENERATOR.generate(container.getContainerData().getContainerID(), + Arrays.asList(1, 3)); + final long numDeletedLocalIds = deleteTransactions.stream() + .mapToLong(DeletedBlocksTransaction::getLocalIDCount).sum(); + LOG.info("deleteTransactions = {}", deleteTransactions); + + setDBBlockAndByteCounts(container.getContainerData(), createBlocks, + setBytes, deleteCount, deleteTransactions); + inspectThenRepairOnIncorrectContainer(container.getContainerData(), + createBlocks, createBlocks, setBytes, + deleteCount, numDeletedLocalIds, numDeletedLocalIds == 0); + } + + @Test + public void testIncorrectDeleteWithoutTransaction() throws Exception { + final int createBlocks = 4; + final int setBytes = CHUNK_LEN * CHUNKS_PER_BLOCK * createBlocks; + final int deleteCount = 10; + + final KeyValueContainer container = createClosedContainer(createBlocks); + final List deleteTransactions + = Collections.emptyList(); + final long numDeletedLocalIds = deleteTransactions.stream() + .mapToLong(DeletedBlocksTransaction::getLocalIDCount).sum(); + LOG.info("deleteTransactions = {}", deleteTransactions); + + setDBBlockAndByteCounts(container.getContainerData(), createBlocks, + setBytes, deleteCount, deleteTransactions); + inspectThenRepairOnIncorrectContainer(container.getContainerData(), + createBlocks, createBlocks, setBytes, + deleteCount, numDeletedLocalIds, numDeletedLocalIds == 0); + } + public void inspectThenRepairOnCorrectContainer( KeyValueContainerData containerData) throws Exception { // No output for correct containers. @@ -172,7 +269,8 @@ public void inspectThenRepairOnCorrectContainer( */ public void inspectThenRepairOnIncorrectContainer( KeyValueContainerData containerData, int createdBlocks, int setBlocks, - int setBytes) throws Exception { + int setBytes, int deleteCount, long numDeletedLocalIds, + boolean shouldRepair) throws Exception { int createdBytes = CHUNK_LEN * CHUNKS_PER_BLOCK * createdBlocks; int createdFiles = 0; switch (getChunkLayout()) { @@ -194,7 +292,7 @@ public void inspectThenRepairOnIncorrectContainer( checkJsonReportForIncorrectContainer(inspectJson, containerState, createdBlocks, setBlocks, createdBytes, setBytes, - createdFiles, false); + createdFiles, deleteCount, numDeletedLocalIds, false); // Container should not have been modified in inspect mode. checkDBBlockAndByteCounts(containerData, setBlocks, setBytes); @@ -203,7 +301,7 @@ public void inspectThenRepairOnIncorrectContainer( KeyValueContainerMetadataInspector.Mode.REPAIR); checkJsonReportForIncorrectContainer(repairJson, containerState, createdBlocks, setBlocks, createdBytes, setBytes, - createdFiles, true); + createdFiles, deleteCount, numDeletedLocalIds, shouldRepair); // Metadata keys should have been fixed. checkDBBlockAndByteCounts(containerData, createdBlocks, createdBytes); } @@ -212,6 +310,7 @@ public void inspectThenRepairOnIncorrectContainer( private void checkJsonReportForIncorrectContainer(JsonObject inspectJson, String expectedContainerState, long createdBlocks, long setBlocks, long createdBytes, long setBytes, long createdFiles, + long deleteCount, long deleteTransactions, boolean shouldRepair) { // Check main container properties. Assert.assertEquals(inspectJson.get("containerID").getAsLong(), @@ -232,7 +331,7 @@ private void checkJsonReportForIncorrectContainer(JsonObject inspectJson, jsonAggregates.get("blockCount").getAsLong()); Assert.assertEquals(createdBytes, jsonAggregates.get("usedBytes").getAsLong()); - Assert.assertEquals(0, + Assert.assertEquals(deleteTransactions, jsonAggregates.get("pendingDeleteBlocks").getAsLong()); // Check chunks directory. @@ -243,17 +342,30 @@ private void checkJsonReportForIncorrectContainer(JsonObject inspectJson, // Check errors. checkJsonErrorsReport(inspectJson, "dBMetadata.#BLOCKCOUNT", - new JsonPrimitive(createdBlocks), new JsonPrimitive(setBlocks), - shouldRepair); + createdBlocks, setBlocks, shouldRepair); checkJsonErrorsReport(inspectJson, "dBMetadata.#BYTESUSED", - new JsonPrimitive(createdBytes), new JsonPrimitive(setBytes), - shouldRepair); + createdBytes, setBytes, shouldRepair); + checkJsonErrorsReport(inspectJson, "dBMetadata.#PENDINGDELETEBLOCKCOUNT", + deleteCount, deleteTransactions, shouldRepair); + } + + private void checkJsonErrorsReport( + JsonObject jsonReport, String propertyValue, + long correctExpected, long correctActual, + boolean correctRepair) { + if (correctExpected == correctActual) { + return; + } + checkJsonErrorsReport(jsonReport, propertyValue, + new JsonPrimitive(correctExpected), + new JsonPrimitive(correctActual), + correctRepair); } /** - * Checks the erorr list in the provided JsonReport for an error matching - * the template passed in with the parameters. - */ + * Checks the erorr list in the provided JsonReport for an error matching + * the template passed in with the parameters. + */ private void checkJsonErrorsReport(JsonObject jsonReport, String propertyValue, JsonPrimitive correctExpected, JsonPrimitive correctActual, boolean correctRepair) { @@ -290,11 +402,53 @@ private void checkJsonErrorsReport(JsonObject jsonReport, public void setDBBlockAndByteCounts(KeyValueContainerData containerData, long blockCount, long byteCount) throws Exception { + setDBBlockAndByteCounts(containerData, blockCount, byteCount, + 0, Collections.emptyList()); + } + + public void setDBBlockAndByteCounts(KeyValueContainerData containerData, + long blockCount, long byteCount, + long dbDeleteCount, List deleteTransactions) + throws Exception { try (DBHandle db = BlockUtils.getDB(containerData, getConf())) { Table metadataTable = db.getStore().getMetadataTable(); // Don't care about in memory state. Just change the DB values. metadataTable.put(containerData.getBlockCountKey(), blockCount); metadataTable.put(containerData.getBytesUsedKey(), byteCount); + metadataTable.put(containerData.getPendingDeleteBlockCountKey(), + dbDeleteCount); + + final DatanodeStore store = db.getStore(); + LOG.info("store {}", store.getClass().getSimpleName()); + if (store instanceof DatanodeStoreSchemaTwoImpl) { + final DatanodeStoreSchemaTwoImpl s2store + = (DatanodeStoreSchemaTwoImpl)store; + final Table delTxTable + = s2store.getDeleteTransactionTable(); + try (BatchOperation batch = store.getBatchHandler() + .initBatchOperation()) { + for (DeletedBlocksTransaction t : deleteTransactions) { + delTxTable.putWithBatch(batch, t.getTxID(), t); + } + store.getBatchHandler().commitBatchOperation(batch); + } + } else if (store instanceof DatanodeStoreSchemaThreeImpl) { + final DatanodeStoreSchemaThreeImpl s3store + = (DatanodeStoreSchemaThreeImpl)store; + final Table delTxTable + = s3store.getDeleteTransactionTable(); + try (BatchOperation batch = store.getBatchHandler() + .initBatchOperation()) { + for (DeletedBlocksTransaction t : deleteTransactions) { + final String key = containerData.getDeleteTxnKey(t.getTxID()); + delTxTable.putWithBatch(batch, key, t); + } + store.getBatchHandler().commitBatchOperation(batch); + } + } else { + throw new UnsupportedOperationException( + "Unsupported store class " + store.getClass().getSimpleName()); + } } } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/InspectSubcommand.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/InspectSubcommand.java index 5d848f80aca4..86f5d54e202c 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/InspectSubcommand.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/container/InspectSubcommand.java @@ -37,7 +37,8 @@ */ @Command( name = "inspect", - description = "Check the deletion information for all the containers.") + description + = "Check the metadata of all container replicas on this datanode.") public class InspectSubcommand implements Callable { @CommandLine.ParentCommand From a794642bd15b122bebdb69d52c7bd84751c3b43b Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Fri, 3 Mar 2023 18:49:05 -0800 Subject: [PATCH 5/7] Some minor changes. --- .../TestKeyValueContainerMetadataInspector.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java index 52253aa63ef5..76e6470a3385 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java @@ -202,7 +202,7 @@ public void testCorrectDeleteWithTransaction() throws Exception { Arrays.asList(1, 6, 3)); LOG.info("deleteTransactions = {}", deleteTransactions); - setDBBlockAndByteCounts(container.getContainerData(), createBlocks, + setDB(container.getContainerData(), createBlocks, setBytes, deleteCount, deleteTransactions); inspectThenRepairOnCorrectContainer(container.getContainerData()); } @@ -221,7 +221,7 @@ public void testIncorrectDeleteWithTransaction() throws Exception { .mapToLong(DeletedBlocksTransaction::getLocalIDCount).sum(); LOG.info("deleteTransactions = {}", deleteTransactions); - setDBBlockAndByteCounts(container.getContainerData(), createBlocks, + setDB(container.getContainerData(), createBlocks, setBytes, deleteCount, deleteTransactions); inspectThenRepairOnIncorrectContainer(container.getContainerData(), createBlocks, createBlocks, setBytes, @@ -241,7 +241,7 @@ public void testIncorrectDeleteWithoutTransaction() throws Exception { .mapToLong(DeletedBlocksTransaction::getLocalIDCount).sum(); LOG.info("deleteTransactions = {}", deleteTransactions); - setDBBlockAndByteCounts(container.getContainerData(), createBlocks, + setDB(container.getContainerData(), createBlocks, setBytes, deleteCount, deleteTransactions); inspectThenRepairOnIncorrectContainer(container.getContainerData(), createBlocks, createBlocks, setBytes, @@ -363,9 +363,9 @@ private void checkJsonErrorsReport( } /** - * Checks the erorr list in the provided JsonReport for an error matching - * the template passed in with the parameters. - */ + * Checks the erorr list in the provided JsonReport for an error matching + * the template passed in with the parameters. + */ private void checkJsonErrorsReport(JsonObject jsonReport, String propertyValue, JsonPrimitive correctExpected, JsonPrimitive correctActual, boolean correctRepair) { @@ -402,11 +402,11 @@ private void checkJsonErrorsReport(JsonObject jsonReport, public void setDBBlockAndByteCounts(KeyValueContainerData containerData, long blockCount, long byteCount) throws Exception { - setDBBlockAndByteCounts(containerData, blockCount, byteCount, + setDB(containerData, blockCount, byteCount, 0, Collections.emptyList()); } - public void setDBBlockAndByteCounts(KeyValueContainerData containerData, + public void setDB(KeyValueContainerData containerData, long blockCount, long byteCount, long dbDeleteCount, List deleteTransactions) throws Exception { From 448e20d292e5670c773373e3b9f255895e336bc6 Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Tue, 14 Mar 2023 17:24:24 +0800 Subject: [PATCH 6/7] Further addressed review comments. --- .../KeyValueContainerMetadataInspector.java | 30 +++++++++---------- ...estKeyValueContainerMetadataInspector.java | 20 ++++++------- 2 files changed, 24 insertions(+), 26 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java index e4718845c5c9..a7125a8cf954 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java @@ -413,33 +413,31 @@ private boolean checkAndRepair(JsonObject parent, } // check and repair if db delete count mismatches delete transaction count. - final JsonElement dbDeleteCountJson = dBMetadata.get( + final JsonElement pendingDeleteCountDB = dBMetadata.get( OzoneConsts.PENDING_DELETE_BLOCK_COUNT); - final long dbDeleteCount = jsonToLong(dbDeleteCountJson); - final JsonElement deleteTxCountJson = aggregates.get(PendingDelete.COUNT); - final long deleteTransactionCount = jsonToLong(deleteTxCountJson); + final long dbDeleteCount = jsonToLong(pendingDeleteCountDB); + final JsonElement pendingDeleteCountAggregate + = aggregates.get(PendingDelete.COUNT); + final long deleteTransactionCount = jsonToLong(pendingDeleteCountAggregate); if (dbDeleteCount != deleteTransactionCount) { passed = false; final BooleanSupplier deleteCountRepairAction = () -> { - if (deleteTransactionCount == 0) { - // repair only when delete transaction count is 0 - final String key = containerData.getPendingDeleteBlockCountKey(); - try { - // reset delete block count to 0 in metadata table - metadataTable.put(key, 0L); - return true; - } catch (IOException ex) { - LOG.error("Failed to reset {} for container {}.", - key, containerData.getContainerID(), ex); - } + final String key = containerData.getPendingDeleteBlockCountKey(); + try { + // reset delete block count to 0 in metadata table + metadataTable.put(key, 0L); + return true; + } catch (IOException ex) { + LOG.error("Failed to reset {} for container {}.", + key, containerData.getContainerID(), ex); } return false; }; final JsonObject deleteCountError = buildErrorAndRepair( "dBMetadata." + OzoneConsts.PENDING_DELETE_BLOCK_COUNT, - dbDeleteCountJson, deleteTxCountJson, + pendingDeleteCountAggregate, pendingDeleteCountDB, deleteCountRepairAction); errors.add(deleteCountError); } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java index 76e6470a3385..bb2a71c58292 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java @@ -123,7 +123,7 @@ public void testIncorrectTotalsNoData() throws Exception { KeyValueContainer container = createClosedContainer(createBlocks); setDBBlockAndByteCounts(container.getContainerData(), setBlocks, setBytes); inspectThenRepairOnIncorrectContainer(container.getContainerData(), - createBlocks, setBlocks, setBytes, 0, 0, true); + createBlocks, setBlocks, setBytes, 0, 0); } @Test @@ -136,7 +136,7 @@ public void testIncorrectTotalsWithData() throws Exception { KeyValueContainer container = createOpenContainer(createBlocks); setDBBlockAndByteCounts(container.getContainerData(), setBlocks, setBytes); inspectThenRepairOnIncorrectContainer(container.getContainerData(), - createBlocks, setBlocks, setBytes, 0, 0, true); + createBlocks, setBlocks, setBytes, 0, 0); } @Test @@ -225,7 +225,7 @@ public void testIncorrectDeleteWithTransaction() throws Exception { setBytes, deleteCount, deleteTransactions); inspectThenRepairOnIncorrectContainer(container.getContainerData(), createBlocks, createBlocks, setBytes, - deleteCount, numDeletedLocalIds, numDeletedLocalIds == 0); + deleteCount, numDeletedLocalIds); } @Test @@ -245,7 +245,7 @@ public void testIncorrectDeleteWithoutTransaction() throws Exception { setBytes, deleteCount, deleteTransactions); inspectThenRepairOnIncorrectContainer(container.getContainerData(), createBlocks, createBlocks, setBytes, - deleteCount, numDeletedLocalIds, numDeletedLocalIds == 0); + deleteCount, numDeletedLocalIds); } public void inspectThenRepairOnCorrectContainer( @@ -269,8 +269,8 @@ public void inspectThenRepairOnCorrectContainer( */ public void inspectThenRepairOnIncorrectContainer( KeyValueContainerData containerData, int createdBlocks, int setBlocks, - int setBytes, int deleteCount, long numDeletedLocalIds, - boolean shouldRepair) throws Exception { + int setBytes, int deleteCount, long numDeletedLocalIds) + throws Exception { int createdBytes = CHUNK_LEN * CHUNKS_PER_BLOCK * createdBlocks; int createdFiles = 0; switch (getChunkLayout()) { @@ -301,7 +301,7 @@ public void inspectThenRepairOnIncorrectContainer( KeyValueContainerMetadataInspector.Mode.REPAIR); checkJsonReportForIncorrectContainer(repairJson, containerState, createdBlocks, setBlocks, createdBytes, setBytes, - createdFiles, deleteCount, numDeletedLocalIds, shouldRepair); + createdFiles, deleteCount, numDeletedLocalIds, true); // Metadata keys should have been fixed. checkDBBlockAndByteCounts(containerData, createdBlocks, createdBytes); } @@ -310,7 +310,7 @@ public void inspectThenRepairOnIncorrectContainer( private void checkJsonReportForIncorrectContainer(JsonObject inspectJson, String expectedContainerState, long createdBlocks, long setBlocks, long createdBytes, long setBytes, long createdFiles, - long deleteCount, long deleteTransactions, + long setPendingDeleteCount, long createdPendingDeleteCount, boolean shouldRepair) { // Check main container properties. Assert.assertEquals(inspectJson.get("containerID").getAsLong(), @@ -331,7 +331,7 @@ private void checkJsonReportForIncorrectContainer(JsonObject inspectJson, jsonAggregates.get("blockCount").getAsLong()); Assert.assertEquals(createdBytes, jsonAggregates.get("usedBytes").getAsLong()); - Assert.assertEquals(deleteTransactions, + Assert.assertEquals(createdPendingDeleteCount, jsonAggregates.get("pendingDeleteBlocks").getAsLong()); // Check chunks directory. @@ -346,7 +346,7 @@ private void checkJsonReportForIncorrectContainer(JsonObject inspectJson, checkJsonErrorsReport(inspectJson, "dBMetadata.#BYTESUSED", createdBytes, setBytes, shouldRepair); checkJsonErrorsReport(inspectJson, "dBMetadata.#PENDINGDELETEBLOCKCOUNT", - deleteCount, deleteTransactions, shouldRepair); + createdPendingDeleteCount, setPendingDeleteCount, shouldRepair); } private void checkJsonErrorsReport( From 6550acb0e23139c44cf4a6db0e450167e162315a Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Sun, 26 Mar 2023 14:17:56 +0800 Subject: [PATCH 7/7] Fix deleteCountRepairAction and checkDBBlockAndByteCounts. --- .../KeyValueContainerMetadataInspector.java | 4 ++-- ...estKeyValueContainerMetadataInspector.java | 23 +++++++++++++++---- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java index a7125a8cf954..6a41416bbaa0 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerMetadataInspector.java @@ -425,8 +425,8 @@ private boolean checkAndRepair(JsonObject parent, final BooleanSupplier deleteCountRepairAction = () -> { final String key = containerData.getPendingDeleteBlockCountKey(); try { - // reset delete block count to 0 in metadata table - metadataTable.put(key, 0L); + // set delete block count metadata table to delete transaction count + metadataTable.put(key, deleteTransactionCount); return true; } catch (IOException ex) { LOG.error("Failed to reset {} for container {}.", diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java index bb2a71c58292..f201bf5b4827 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerMetadataInspector.java @@ -200,7 +200,11 @@ public void testCorrectDeleteWithTransaction() throws Exception { final List deleteTransactions = GENERATOR.generate(container.getContainerData().getContainerID(), Arrays.asList(1, 6, 3)); + final long numDeletedLocalIds = deleteTransactions.stream() + .mapToLong(DeletedBlocksTransaction::getLocalIDCount).sum(); LOG.info("deleteTransactions = {}", deleteTransactions); + LOG.info("numDeletedLocalIds = {}", numDeletedLocalIds); + Assert.assertEquals(deleteCount, numDeletedLocalIds); setDB(container.getContainerData(), createBlocks, setBytes, deleteCount, deleteTransactions); @@ -220,6 +224,7 @@ public void testIncorrectDeleteWithTransaction() throws Exception { final long numDeletedLocalIds = deleteTransactions.stream() .mapToLong(DeletedBlocksTransaction::getLocalIDCount).sum(); LOG.info("deleteTransactions = {}", deleteTransactions); + LOG.info("numDeletedLocalIds = {}", numDeletedLocalIds); setDB(container.getContainerData(), createBlocks, setBytes, deleteCount, deleteTransactions); @@ -240,6 +245,7 @@ public void testIncorrectDeleteWithoutTransaction() throws Exception { final long numDeletedLocalIds = deleteTransactions.stream() .mapToLong(DeletedBlocksTransaction::getLocalIDCount).sum(); LOG.info("deleteTransactions = {}", deleteTransactions); + LOG.info("numDeletedLocalIds = {}", numDeletedLocalIds); setDB(container.getContainerData(), createBlocks, setBytes, deleteCount, deleteTransactions); @@ -266,6 +272,9 @@ public void inspectThenRepairOnCorrectContainer( * @param createdBlocks Number of blocks to create in the container. * @param setBlocks total block count value set in the database. * @param setBytes total used bytes value set in the database. + * @param deleteCount total deleted block count value set in the database. + * @param numDeletedLocalIds total number of deleted block local id count + * in the transactions */ public void inspectThenRepairOnIncorrectContainer( KeyValueContainerData containerData, int createdBlocks, int setBlocks, @@ -294,7 +303,7 @@ public void inspectThenRepairOnIncorrectContainer( containerState, createdBlocks, setBlocks, createdBytes, setBytes, createdFiles, deleteCount, numDeletedLocalIds, false); // Container should not have been modified in inspect mode. - checkDBBlockAndByteCounts(containerData, setBlocks, setBytes); + checkDbCounts(containerData, setBlocks, setBytes, deleteCount); // Now repair the container. JsonObject repairJson = runInspectorAndGetReport(containerData, @@ -303,7 +312,8 @@ public void inspectThenRepairOnIncorrectContainer( containerState, createdBlocks, setBlocks, createdBytes, setBytes, createdFiles, deleteCount, numDeletedLocalIds, true); // Metadata keys should have been fixed. - checkDBBlockAndByteCounts(containerData, createdBlocks, createdBytes); + checkDbCounts(containerData, createdBlocks, createdBytes, + numDeletedLocalIds); } @SuppressWarnings("checkstyle:ParameterNumber") @@ -452,8 +462,9 @@ public void setDB(KeyValueContainerData containerData, } } - public void checkDBBlockAndByteCounts(KeyValueContainerData containerData, - long expectedBlockCount, long expectedBytesUsed) throws Exception { + void checkDbCounts(KeyValueContainerData containerData, + long expectedBlockCount, long expectedBytesUsed, + long expectedDeletedCount) throws Exception { try (DBHandle db = BlockUtils.getDB(containerData, getConf())) { Table metadataTable = db.getStore().getMetadataTable(); @@ -462,6 +473,10 @@ public void checkDBBlockAndByteCounts(KeyValueContainerData containerData, long blockCount = metadataTable.get(containerData.getBlockCountKey()); Assert.assertEquals(expectedBlockCount, blockCount); + + final long deleteCount = metadataTable.get( + containerData.getPendingDeleteBlockCountKey()); + Assert.assertEquals(expectedDeletedCount, deleteCount); } }