From 9da30858adac9258dc1205599ced3c65fa810801 Mon Sep 17 00:00:00 2001 From: arafat Date: Mon, 21 Jul 2025 13:49:00 +0530 Subject: [PATCH] HDDS-13479. Support Tracking of Deleted Files and Directories in Recon NSSummary. --- .../ozone/recon/api/OMDBInsightEndpoint.java | 12 +- .../ozone/recon/api/types/NSSummary.java | 77 ++- .../ozone/recon/codec/NSSummaryCodec.java | 60 ++- .../tasks/NSSummaryTaskDbEventHandler.java | 116 +++++ .../recon/tasks/NSSummaryTaskWithFSO.java | 123 ++++- .../TestNSSummaryDeletedTrackingWithFSO.java | 452 ++++++++++++++++++ 6 files changed, 826 insertions(+), 14 deletions(-) create mode 100644 hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/tasks/TestNSSummaryDeletedTrackingWithFSO.java diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/OMDBInsightEndpoint.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/OMDBInsightEndpoint.java index 20e0868351ac..009faa858212 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/OMDBInsightEndpoint.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/OMDBInsightEndpoint.java @@ -641,10 +641,20 @@ protected long fetchSizeForDeletedDirectory(long objectId) if (nsSummary == null) { return 0L; } - long totalSize = nsSummary.getSizeOfFiles(); + + // Include both active files and deleted files that haven't been physically deleted yet + long totalSize = nsSummary.getSizeOfFiles() + nsSummary.getSizeOfDeletedFiles(); + + // Add size from active child directories for (long childId : nsSummary.getChildDir()) { totalSize += fetchSizeForDeletedDirectory(childId); } + + // Add size from deleted child directories + for (long deletedChildId : nsSummary.getDeletedChildDir()) { + totalSize += fetchSizeForDeletedDirectory(deletedChildId); + } + return totalSize; } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/NSSummary.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/NSSummary.java index f20fdc764af5..8d1b1cde1e70 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/NSSummary.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/types/NSSummary.java @@ -31,26 +31,41 @@ public class NSSummary { private int numOfFiles; private long sizeOfFiles; + private int numOfDeletedFiles; // New field + private long sizeOfDeletedFiles; // New field + private int numOfDeletedDirs; // New field + private long sizeOfDeletedDirs; // New field private int[] fileSizeBucket; private Set childDir; + private Set deletedChildDir; // New field to track deleted child directories private String dirName; private long parentId = 0; public NSSummary() { - this(0, 0L, new int[ReconConstants.NUM_OF_FILE_SIZE_BINS], - new HashSet<>(), "", 0); + this(0, 0L, 0, 0L, 0, 0L, new int[ReconConstants.NUM_OF_FILE_SIZE_BINS], + new HashSet<>(), new HashSet<>(), "", 0); } public NSSummary(int numOfFiles, long sizeOfFiles, + int numOfDeletedFiles, + long sizeOfDeletedFiles, + int numOfDeletedDirs, + long sizeOfDeletedDirs, int[] bucket, Set childDir, + Set deletedChildDir, String dirName, long parentId) { this.numOfFiles = numOfFiles; this.sizeOfFiles = sizeOfFiles; + this.numOfDeletedFiles = numOfDeletedFiles; + this.sizeOfDeletedFiles = sizeOfDeletedFiles; + this.numOfDeletedDirs = numOfDeletedDirs; + this.sizeOfDeletedDirs = sizeOfDeletedDirs; setFileSizeBucket(bucket); this.childDir = childDir; + this.deletedChildDir = deletedChildDir; this.dirName = dirName; this.parentId = parentId; } @@ -109,6 +124,59 @@ public void removeChildDir(long childId) { } } + public int getNumOfDeletedFiles() { + return numOfDeletedFiles; + } + + public void setNumOfDeletedFiles(int numOfDeletedFiles) { + this.numOfDeletedFiles = numOfDeletedFiles; + } + + public long getSizeOfDeletedFiles() { + return sizeOfDeletedFiles; + } + + public void setSizeOfDeletedFiles(long sizeOfDeletedFiles) { + this.sizeOfDeletedFiles = sizeOfDeletedFiles; + } + + public int getNumOfDeletedDirs() { + return numOfDeletedDirs; + } + + public void setNumOfDeletedDirs(int numOfDeletedDirs) { + this.numOfDeletedDirs = numOfDeletedDirs; + } + + public long getSizeOfDeletedDirs() { + return sizeOfDeletedDirs; + } + + public void setSizeOfDeletedDirs(long sizeOfDeletedDirs) { + this.sizeOfDeletedDirs = sizeOfDeletedDirs; + } + + public Set getDeletedChildDir() { + return deletedChildDir; + } + + public void setDeletedChildDir(Set deletedChildDir) { + this.deletedChildDir = deletedChildDir; + } + + public void addDeletedChildDir(long childId) { + if (this.deletedChildDir.contains(childId)) { + return; + } + this.deletedChildDir.add(childId); + } + + public void removeDeletedChildDir(long childId) { + if (this.deletedChildDir.contains(childId)) { + this.deletedChildDir.remove(childId); + } + } + public long getParentId() { return parentId; } @@ -122,8 +190,13 @@ public String toString() { return "NSSummary{dirName='" + dirName + '\'' + ", parentId=" + parentId + ", childDir=" + childDir + + ", deletedChildDir=" + deletedChildDir + ", numOfFiles=" + numOfFiles + ", sizeOfFiles=" + sizeOfFiles + + ", numOfDeletedFiles=" + numOfDeletedFiles + + ", sizeOfDeletedFiles=" + sizeOfDeletedFiles + + ", numOfDeletedDirs=" + numOfDeletedDirs + + ", sizeOfDeletedDirs=" + sizeOfDeletedDirs + ", fileSizeBucket=" + Arrays.toString(fileSizeBucket) + '}'; } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/codec/NSSummaryCodec.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/codec/NSSummaryCodec.java index 92068988d76e..b6438a565798 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/codec/NSSummaryCodec.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/codec/NSSummaryCodec.java @@ -60,18 +60,23 @@ public Class getTypeClass() { public byte[] toPersistedFormatImpl(NSSummary object) throws IOException { final byte[] dirName = stringCodec.toPersistedFormat(object.getDirName()); Set childDirs = object.getChildDir(); + Set deletedChildDirs = object.getDeletedChildDir(); int numOfChildDirs = childDirs.size(); + int numOfDeletedChildDirs = deletedChildDirs.size(); - // int: 1 field (numOfFiles) + 2 sizes (childDirs, dirName) + NUM_OF_FILE_SIZE_BINS (fileSizeBucket) - final int resSize = (3 + ReconConstants.NUM_OF_FILE_SIZE_BINS) * Integer.BYTES - + (numOfChildDirs + 1) * Long.BYTES // 1 long field for parentId + list size - + Short.BYTES // 2 dummy shorts to track length - + dirName.length // directory name length - + Long.BYTES; // Added space for parentId serialization + // Calculate size: existing fields + new deleted fields + final int resSize = (6 + ReconConstants.NUM_OF_FILE_SIZE_BINS) * Integer.BYTES + + (numOfChildDirs + numOfDeletedChildDirs + 4) * Long.BYTES // sizes, parentId, and size fields + + Short.BYTES + + dirName.length; ByteArrayOutputStream out = new ByteArrayOutputStream(resSize); out.write(integerCodec.toPersistedFormat(object.getNumOfFiles())); out.write(longCodec.toPersistedFormat(object.getSizeOfFiles())); + out.write(integerCodec.toPersistedFormat(object.getNumOfDeletedFiles())); + out.write(longCodec.toPersistedFormat(object.getSizeOfDeletedFiles())); + out.write(integerCodec.toPersistedFormat(object.getNumOfDeletedDirs())); + out.write(longCodec.toPersistedFormat(object.getSizeOfDeletedDirs())); out.write(shortCodec.toPersistedFormat( (short) ReconConstants.NUM_OF_FILE_SIZE_BINS)); int[] fileSizeBucket = object.getFileSizeBucket(); @@ -82,6 +87,10 @@ public byte[] toPersistedFormatImpl(NSSummary object) throws IOException { for (long childDirId : childDirs) { out.write(longCodec.toPersistedFormat(childDirId)); } + out.write(integerCodec.toPersistedFormat(numOfDeletedChildDirs)); + for (long deletedChildDirId : deletedChildDirs) { + out.write(longCodec.toPersistedFormat(deletedChildDirId)); + } out.write(integerCodec.toPersistedFormat(dirName.length)); out.write(dirName); out.write(longCodec.toPersistedFormat(object.getParentId())); @@ -95,6 +104,25 @@ public NSSummary fromPersistedFormatImpl(byte[] rawData) throws IOException { NSSummary res = new NSSummary(); res.setNumOfFiles(in.readInt()); res.setSizeOfFiles(in.readLong()); + + // Try to read new fields, if they exist + try { + res.setNumOfDeletedFiles(in.readInt()); + res.setSizeOfDeletedFiles(in.readLong()); + res.setNumOfDeletedDirs(in.readInt()); + res.setSizeOfDeletedDirs(in.readLong()); + } catch (IOException e) { + // If reading fails, these are old format records + res.setNumOfDeletedFiles(0); + res.setSizeOfDeletedFiles(0); + res.setNumOfDeletedDirs(0); + res.setSizeOfDeletedDirs(0); + // Reset stream position + in = new DataInputStream(new ByteArrayInputStream(rawData)); + in.readInt(); // numOfFiles + in.readLong(); // sizeOfFiles + } + short len = in.readShort(); assert (len == (short) ReconConstants.NUM_OF_FILE_SIZE_BINS); int[] fileSizeBucket = new int[len]; @@ -110,6 +138,19 @@ public NSSummary fromPersistedFormatImpl(byte[] rawData) throws IOException { } res.setChildDir(childDir); + // Try to read deleted child directories + try { + int deletedListSize = in.readInt(); + Set deletedChildDir = new HashSet<>(); + for (int i = 0; i < deletedListSize; ++i) { + deletedChildDir.add(in.readLong()); + } + res.setDeletedChildDir(deletedChildDir); + } catch (IOException e) { + // If reading fails, this is old format + res.setDeletedChildDir(new HashSet<>()); + } + int strLen = in.readInt(); if (strLen == 0) { return res; @@ -136,8 +177,13 @@ public NSSummary copyObject(NSSummary object) { NSSummary copy = new NSSummary(); copy.setNumOfFiles(object.getNumOfFiles()); copy.setSizeOfFiles(object.getSizeOfFiles()); + copy.setNumOfDeletedFiles(object.getNumOfDeletedFiles()); + copy.setSizeOfDeletedFiles(object.getSizeOfDeletedFiles()); + copy.setNumOfDeletedDirs(object.getNumOfDeletedDirs()); + copy.setSizeOfDeletedDirs(object.getSizeOfDeletedDirs()); copy.setFileSizeBucket(object.getFileSizeBucket()); - copy.setChildDir(object.getChildDir()); + copy.setChildDir(new HashSet<>(object.getChildDir())); + copy.setDeletedChildDir(new HashSet<>(object.getDeletedChildDir())); copy.setDirName(object.getDirName()); copy.setParentId(object.getParentId()); return copy; diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTaskDbEventHandler.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTaskDbEventHandler.java index 755d966b8328..f5d4f1bba039 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTaskDbEventHandler.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTaskDbEventHandler.java @@ -28,6 +28,7 @@ import org.apache.hadoop.ozone.recon.spi.ReconNamespaceSummaryManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; /** * Class for holding all NSSummaryTask methods @@ -199,4 +200,119 @@ protected boolean flushAndCommitNSToDB(Map nsSummaryMap) { } return true; } + + /** + * Handle PUT event on deleted table - when a file is moved to deleted table + */ + protected void handlePutDeletedKeyEvent(RepeatedOmKeyInfo repeatedOmKeyInfo, + Map nsSummaryMap) + throws IOException { + for (OmKeyInfo keyInfo : repeatedOmKeyInfo.getOmKeyInfoList()) { + long parentObjectId = keyInfo.getParentObjectID(); + NSSummary nsSummary = nsSummaryMap.get(parentObjectId); + if (nsSummary == null) { + nsSummary = reconNamespaceSummaryManager.getNSSummary(parentObjectId); + } + if (nsSummary == null) { + LOG.error("The namespace table is not correctly populated for parent ID: {}", parentObjectId); + continue; + } + + // Increment deleted files count and size + nsSummary.setNumOfDeletedFiles(nsSummary.getNumOfDeletedFiles() + 1); + nsSummary.setSizeOfDeletedFiles(nsSummary.getSizeOfDeletedFiles() + keyInfo.getDataSize()); + nsSummaryMap.put(parentObjectId, nsSummary); + } + } + + /** + * Handle DELETE event on deleted table - when a file is physically deleted from disk + */ + protected void handleDeleteDeletedKeyEvent(RepeatedOmKeyInfo repeatedOmKeyInfo, + Map nsSummaryMap) + throws IOException { + for (OmKeyInfo keyInfo : repeatedOmKeyInfo.getOmKeyInfoList()) { + long parentObjectId = keyInfo.getParentObjectID(); + NSSummary nsSummary = nsSummaryMap.get(parentObjectId); + if (nsSummary == null) { + nsSummary = reconNamespaceSummaryManager.getNSSummary(parentObjectId); + } + if (nsSummary == null) { + LOG.error("The namespace table is not correctly populated for parent ID: {}", parentObjectId); + continue; + } + + // Decrement deleted files count and size + nsSummary.setNumOfDeletedFiles(Math.max(0, nsSummary.getNumOfDeletedFiles() - 1)); + nsSummary.setSizeOfDeletedFiles(Math.max(0, nsSummary.getSizeOfDeletedFiles() - keyInfo.getDataSize())); + nsSummaryMap.put(parentObjectId, nsSummary); + } + } + + /** + * Handle PUT event on deleted directory table - when a directory is moved to deleted directory table + */ + protected void handlePutDeletedDirEvent(OmKeyInfo deletedDirInfo, + Map nsSummaryMap) + throws IOException { + long parentObjectId = deletedDirInfo.getParentObjectID(); + long objectId = deletedDirInfo.getObjectID(); + + // Add to parent's deleted child directory list + NSSummary parentNsSummary = nsSummaryMap.get(parentObjectId); + if (parentNsSummary == null) { + parentNsSummary = reconNamespaceSummaryManager.getNSSummary(parentObjectId); + } + if (parentNsSummary == null) { + LOG.error("The namespace table is not correctly populated for parent ID: {}", parentObjectId); + return; + } + + parentNsSummary.addDeletedChildDir(objectId); + parentNsSummary.setNumOfDeletedDirs(parentNsSummary.getNumOfDeletedDirs() + 1); + parentNsSummary.setSizeOfDeletedDirs(parentNsSummary.getSizeOfDeletedDirs() + deletedDirInfo.getDataSize()); + nsSummaryMap.put(parentObjectId, parentNsSummary); + + // Create or update the deleted directory's own NSSummary + NSSummary deletedDirNsSummary = nsSummaryMap.get(objectId); + if (deletedDirNsSummary == null) { + deletedDirNsSummary = reconNamespaceSummaryManager.getNSSummary(objectId); + } + if (deletedDirNsSummary == null) { + deletedDirNsSummary = new NSSummary(); + } + + deletedDirNsSummary.setDirName(deletedDirInfo.getFileName()); + deletedDirNsSummary.setParentId(parentObjectId); + nsSummaryMap.put(objectId, deletedDirNsSummary); + } + + /** + * Handle DELETE event on deleted directory table - when a directory is physically deleted from disk + */ + protected void handleDeleteDeletedDirEvent(OmKeyInfo deletedDirInfo, + Map nsSummaryMap) + throws IOException { + long parentObjectId = deletedDirInfo.getParentObjectID(); + long objectId = deletedDirInfo.getObjectID(); + + // Remove from parent's deleted child directory list + NSSummary parentNsSummary = nsSummaryMap.get(parentObjectId); + if (parentNsSummary == null) { + parentNsSummary = reconNamespaceSummaryManager.getNSSummary(parentObjectId); + } + if (parentNsSummary == null) { + LOG.error("The namespace table is not correctly populated for parent ID: {}", parentObjectId); + return; + } + + parentNsSummary.removeDeletedChildDir(objectId); + parentNsSummary.setNumOfDeletedDirs(Math.max(0, parentNsSummary.getNumOfDeletedDirs() - 1)); + parentNsSummary.setSizeOfDeletedDirs(Math.max(0, parentNsSummary.getSizeOfDeletedDirs() - deletedDirInfo.getDataSize())); + nsSummaryMap.put(parentObjectId, parentNsSummary); + + // Remove the deleted directory's NSSummary + nsSummaryMap.remove(objectId); + reconNamespaceSummaryManager.deleteNSSummary(objectId); + } } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTaskWithFSO.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTaskWithFSO.java index b03820ad4dc4..83d3902e369d 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTaskWithFSO.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/NSSummaryTaskWithFSO.java @@ -19,6 +19,8 @@ import static org.apache.hadoop.ozone.om.codec.OMDBDefinition.DIRECTORY_TABLE; import static org.apache.hadoop.ozone.om.codec.OMDBDefinition.FILE_TABLE; +import static org.apache.hadoop.ozone.om.codec.OMDBDefinition.DELETED_TABLE; +import static org.apache.hadoop.ozone.om.codec.OMDBDefinition.DELETED_DIR_TABLE; import java.io.IOException; import java.util.Arrays; @@ -34,6 +36,7 @@ import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.WithParentObjectId; +import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; import org.apache.hadoop.ozone.recon.api.types.NSSummary; import org.apache.hadoop.ozone.recon.recovery.ReconOMMetadataManager; import org.apache.hadoop.ozone.recon.spi.ReconNamespaceSummaryManager; @@ -60,9 +63,9 @@ public NSSummaryTaskWithFSO(ReconNamespaceSummaryManager this.nsSummaryFlushToDBMaxThreshold = nsSummaryFlushToDBMaxThreshold; } - // We only listen to updates from FSO-enabled KeyTable(FileTable) and DirTable + // We only listen to updates from FSO-enabled KeyTable(FileTable), DirTable, DeletedTable, and DeletedDirTable public Collection getTaskTables() { - return Arrays.asList(FILE_TABLE, DIRECTORY_TABLE); + return Arrays.asList(FILE_TABLE, DIRECTORY_TABLE, DELETED_TABLE, DELETED_DIR_TABLE); } public Pair processWithFSO(OMUpdateEventBatch events, @@ -83,9 +86,11 @@ public Pair processWithFSO(OMUpdateEventBatch events, OMDBUpdateEvent.OMDBUpdateAction action = omdbUpdateEvent.getAction(); eventCounter++; - // we only process updates on OM's FileTable and Dirtable + // we only process updates on OM's FileTable, DirTable, DeletedTable, and DeletedDirTable String table = omdbUpdateEvent.getTable(); boolean updateOnFileTable = table.equals(FILE_TABLE); + boolean updateOnDeletedTable = table.equals(DELETED_TABLE); + boolean updateOnDeletedDirTable = table.equals(DELETED_DIR_TABLE); if (!taskTables.contains(table)) { continue; } @@ -125,6 +130,79 @@ public Pair processWithFSO(OMUpdateEventBatch events, omdbUpdateEvent.getAction()); } + } else if (updateOnDeletedTable) { + // deleted table update + Object value = omdbUpdateEvent.getValue(); + if (!(value instanceof RepeatedOmKeyInfo)) { + LOG.warn("Unexpected value type {} for key {}. Skipping processing.", + value.getClass().getName(), updatedKey); + continue; + } + RepeatedOmKeyInfo repeatedOmKeyInfo = (RepeatedOmKeyInfo) value; + Object oldValue = omdbUpdateEvent.getOldValue(); + RepeatedOmKeyInfo oldRepeatedOmKeyInfo = oldValue instanceof RepeatedOmKeyInfo ? + (RepeatedOmKeyInfo) oldValue : null; + + switch (action) { + case PUT: + handlePutDeletedKeyEvent(repeatedOmKeyInfo, nsSummaryMap); + break; + + case DELETE: + handleDeleteDeletedKeyEvent(repeatedOmKeyInfo, nsSummaryMap); + break; + + case UPDATE: + // For deleted table, treat UPDATE as DELETE + PUT + if (oldRepeatedOmKeyInfo != null) { + handleDeleteDeletedKeyEvent(oldRepeatedOmKeyInfo, nsSummaryMap); + } + handlePutDeletedKeyEvent(repeatedOmKeyInfo, nsSummaryMap); + break; + + default: + LOG.debug("Skipping DB update event : {}", + omdbUpdateEvent.getAction()); + } + + } else if (updateOnDeletedDirTable) { + // deleted directory table update + Object value = omdbUpdateEvent.getValue(); + if (!(value instanceof OmKeyInfo)) { + LOG.warn("Unexpected value type {} for key {}. Skipping processing.", + value.getClass().getName(), updatedKey); + continue; + } + OmKeyInfo deletedDirInfo = (OmKeyInfo) value; + Object oldValue = omdbUpdateEvent.getOldValue(); + OmKeyInfo oldDeletedDirInfo = oldValue instanceof OmKeyInfo ? + (OmKeyInfo) oldValue : null; + + switch (action) { + case PUT: + handlePutDeletedDirEvent(deletedDirInfo, nsSummaryMap); + break; + + case DELETE: + handleDeleteDeletedDirEvent(deletedDirInfo, nsSummaryMap); + break; + + case UPDATE: + if (oldDeletedDirInfo != null) { + // delete first, then put + handleDeleteDeletedDirEvent(oldDeletedDirInfo, nsSummaryMap); + } else { + LOG.warn("Update event does not have the old deleted dir info for {}.", + updatedKey); + } + handlePutDeletedDirEvent(deletedDirInfo, nsSummaryMap); + break; + + default: + LOG.debug("Skipping DB update event : {}", + omdbUpdateEvent.getAction()); + } + } else { // directory update on DirTable OMDBUpdateEvent dirTableUpdateEvent = @@ -183,6 +261,7 @@ public boolean reprocessWithFSO(OMMetadataManager omMetadataManager) { Map nsSummaryMap = new HashMap<>(); try { + // First process directory table to establish the directory structure Table dirTable = omMetadataManager.getDirectoryTable(); try (TableIterator keyTable = omMetadataManager.getFileTable(); @@ -218,6 +297,42 @@ public boolean reprocessWithFSO(OMMetadataManager omMetadataManager) { } } + // Process deleted table + Table deletedTable = + omMetadataManager.getDeletedTable(); + + try (TableIterator> + deletedTableIter = deletedTable.iterator()) { + while (deletedTableIter.hasNext()) { + Table.KeyValue kv = deletedTableIter.next(); + RepeatedOmKeyInfo repeatedOmKeyInfo = kv.getValue(); + handlePutDeletedKeyEvent(repeatedOmKeyInfo, nsSummaryMap); + if (nsSummaryMap.size() >= nsSummaryFlushToDBMaxThreshold) { + if (!flushAndCommitNSToDB(nsSummaryMap)) { + return false; + } + } + } + } + + // Process deleted directory table + Table deletedDirTable = + omMetadataManager.getDeletedDirTable(); + + try (TableIterator> + deletedDirTableIter = deletedDirTable.iterator()) { + while (deletedDirTableIter.hasNext()) { + Table.KeyValue kv = deletedDirTableIter.next(); + OmKeyInfo deletedDirInfo = kv.getValue(); + handlePutDeletedDirEvent(deletedDirInfo, nsSummaryMap); + if (nsSummaryMap.size() >= nsSummaryFlushToDBMaxThreshold) { + if (!flushAndCommitNSToDB(nsSummaryMap)) { + return false; + } + } + } + } + } catch (IOException ioEx) { LOG.error("Unable to reprocess Namespace Summary data in Recon DB. ", ioEx); diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/tasks/TestNSSummaryDeletedTrackingWithFSO.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/tasks/TestNSSummaryDeletedTrackingWithFSO.java new file mode 100644 index 000000000000..84034ba0a359 --- /dev/null +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/tasks/TestNSSummaryDeletedTrackingWithFSO.java @@ -0,0 +1,452 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.recon.tasks; + +import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.recon.ReconConstants; +import org.apache.hadoop.ozone.recon.api.types.NSSummary; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.io.TempDir; + +/** + * Test for NSSummary deleted tracking functionality with FSO layout. + * Tests tracking of deleted files and directories in namespace summaries. + * + * This test works with the default FSO structure: + * vol/bucket1/ + * ├── file1 + * └── dir1/ + * ├── dir2/ + * │ └── file3 + * └── dir3/ + * vol/bucket2/ + * ├── file2 + * └── file4 + */ +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +public class TestNSSummaryDeletedTrackingWithFSO extends AbstractNSSummaryTaskTest { + + private NSSummaryTaskWithFSO nSSummaryTaskWithFso; + + // Answer Sets for tracking deleted items + private static Set deletedChildDirsBucket1 = new HashSet<>(); + private static Set deletedChildDirsDir1 = new HashSet<>(); + + // Additional test constants for extra files + private static final String FILE_SIX = "file6"; + private static final String FILE_SEVEN = "file7"; + + private static final long KEY_SIX_OBJECT_ID = 100L; + private static final long KEY_SEVEN_OBJECT_ID = 101L; + + private static final long KEY_SIX_SIZE = 600L; + private static final long KEY_SEVEN_SIZE = 700L; + + private static BucketLayout getBucketLayout() { + return BucketLayout.FILE_SYSTEM_OPTIMIZED; + } + + @BeforeAll + void setUp(@TempDir File tmpDir) throws Exception { + commonSetup(tmpDir, + new OMConfigParameter(true, + false, + getBucketLayout(), + 3, + true, + true, + false)); + + nSSummaryTaskWithFso = new NSSummaryTaskWithFSO( + getReconNamespaceSummaryManager(), + getReconOMMetadataManager(), + 3); + } + + /** + * Nested class for testing deleted tracking during reprocess. + */ + @Nested + public class TestReprocessDeletedTracking { + + private NSSummary nsSummaryForBucket1; + private NSSummary nsSummaryForBucket2; + private NSSummary nsSummaryForDir1; + private NSSummary nsSummaryForDir2; + + @BeforeEach + public void setUp() throws IOException { + List result = commonSetUpTestReprocess( + () -> nSSummaryTaskWithFso.reprocessWithFSO(getReconOMMetadataManager()), + BUCKET_ONE_OBJECT_ID, BUCKET_TWO_OBJECT_ID); + + nsSummaryForBucket1 = result.get(0); + nsSummaryForBucket2 = result.get(1); + + // Get NSSummary for existing directories + nsSummaryForDir1 = getReconNamespaceSummaryManager() + .getNSSummary(DIR_ONE_OBJECT_ID); + nsSummaryForDir2 = getReconNamespaceSummaryManager() + .getNSSummary(DIR_TWO_OBJECT_ID); + } + + @Test + public void testReprocessDeletedTrackingInitialState() { + // Initially, no deleted files or directories should be tracked + assertEquals(0, nsSummaryForBucket1.getNumOfDeletedFiles()); + assertEquals(0, nsSummaryForBucket1.getSizeOfDeletedFiles()); + assertEquals(0, nsSummaryForBucket1.getNumOfDeletedDirs()); + assertEquals(0, nsSummaryForBucket1.getSizeOfDeletedDirs()); + assertEquals(0, nsSummaryForBucket1.getDeletedChildDir().size()); + + assertEquals(0, nsSummaryForBucket2.getNumOfDeletedFiles()); + assertEquals(0, nsSummaryForBucket2.getSizeOfDeletedFiles()); + assertEquals(0, nsSummaryForBucket2.getNumOfDeletedDirs()); + assertEquals(0, nsSummaryForBucket2.getSizeOfDeletedDirs()); + assertEquals(0, nsSummaryForBucket2.getDeletedChildDir().size()); + } + + @Test + public void testReprocessDeletedTrackingForDirectories() { + // Test that directories have proper deleted tracking initialized + assertNotNull(nsSummaryForDir1); + assertNotNull(nsSummaryForDir2); + + assertEquals(0, nsSummaryForDir1.getNumOfDeletedFiles()); + assertEquals(0, nsSummaryForDir1.getSizeOfDeletedFiles()); + assertEquals(0, nsSummaryForDir1.getNumOfDeletedDirs()); + assertEquals(0, nsSummaryForDir1.getSizeOfDeletedDirs()); + assertEquals(0, nsSummaryForDir1.getDeletedChildDir().size()); + + assertEquals(0, nsSummaryForDir2.getNumOfDeletedFiles()); + assertEquals(0, nsSummaryForDir2.getSizeOfDeletedFiles()); + assertEquals(0, nsSummaryForDir2.getNumOfDeletedDirs()); + assertEquals(0, nsSummaryForDir2.getSizeOfDeletedDirs()); + assertEquals(0, nsSummaryForDir2.getDeletedChildDir().size()); + } + + @Test + public void testReprocessFileCountInDirectories() { + // Test that file counts are correct in directories + // dir2 should have 1 file (file3) + assertEquals(1, nsSummaryForDir2.getNumOfFiles()); + assertEquals(KEY_THREE_SIZE, nsSummaryForDir2.getSizeOfFiles()); + + // dir1 should have 1 file from dir2 + assertEquals(1, nsSummaryForDir1.getNumOfFiles()); + assertEquals(KEY_THREE_SIZE, nsSummaryForDir1.getSizeOfFiles()); + + // bucket1 should have 2 files (file1 + file3 from dir2) + assertEquals(2, nsSummaryForBucket1.getNumOfFiles()); + assertEquals(KEY_ONE_SIZE + KEY_THREE_SIZE, nsSummaryForBucket1.getSizeOfFiles()); + } + } + + /** + * Nested class for testing deleted tracking during process events. + */ + @Nested + public class TestProcessDeletedTracking { + + private NSSummary nsSummaryForBucket1; + private NSSummary nsSummaryForBucket2; + private NSSummary nsSummaryForDir1; + private NSSummary nsSummaryForDir2; + + @BeforeEach + public void setUp() throws IOException { + // Add additional files for testing + addAdditionalTestFiles(); + + nSSummaryTaskWithFso.reprocessWithFSO(getReconOMMetadataManager()); + nSSummaryTaskWithFso.processWithFSO(processDeletedTrackingEventBatch(), 0); + + nsSummaryForBucket1 = getReconNamespaceSummaryManager() + .getNSSummary(BUCKET_ONE_OBJECT_ID); + nsSummaryForBucket2 = getReconNamespaceSummaryManager() + .getNSSummary(BUCKET_TWO_OBJECT_ID); + nsSummaryForDir1 = getReconNamespaceSummaryManager() + .getNSSummary(DIR_ONE_OBJECT_ID); + nsSummaryForDir2 = getReconNamespaceSummaryManager() + .getNSSummary(DIR_TWO_OBJECT_ID); + } + + private void addAdditionalTestFiles() throws IOException { + // Add additional files under dir2 for testing + String omKey6Key = DIR_TWO_OBJECT_ID + OM_KEY_PREFIX + FILE_SIX; + OmKeyInfo omKey6Info = buildOmKeyInfo(VOL, BUCKET_ONE, "dir1/dir2/file6", + FILE_SIX, KEY_SIX_OBJECT_ID, DIR_TWO_OBJECT_ID, KEY_SIX_SIZE); + getOmMetadataManager().getKeyTable(getBucketLayout()).put(omKey6Key, omKey6Info); + + String omKey7Key = BUCKET_ONE_OBJECT_ID + OM_KEY_PREFIX + FILE_SEVEN; + OmKeyInfo omKey7Info = buildOmKeyInfo(VOL, BUCKET_ONE, FILE_SEVEN, + FILE_SEVEN, KEY_SEVEN_OBJECT_ID, BUCKET_ONE_OBJECT_ID, KEY_SEVEN_SIZE); + getOmMetadataManager().getKeyTable(getBucketLayout()).put(omKey7Key, omKey7Info); + } + + private OMUpdateEventBatch processDeletedTrackingEventBatch() throws IOException { + // Delete file 1 under bucket 1 + String omDeleteFileKey = BUCKET_ONE_OBJECT_ID + OM_KEY_PREFIX + FILE_ONE; + OmKeyInfo omDeleteFileInfo = buildOmKeyInfo( + VOL, BUCKET_ONE, KEY_ONE, FILE_ONE, + KEY_ONE_OBJECT_ID, BUCKET_ONE_OBJECT_ID, KEY_ONE_SIZE); + OMDBUpdateEvent deleteFileEvent = new OMDBUpdateEvent. + OMUpdateEventBuilder() + .setKey(omDeleteFileKey) + .setValue(omDeleteFileInfo) + .setTable(getOmMetadataManager().getKeyTable(getBucketLayout()) + .getName()) + .setAction(OMDBUpdateEvent.OMDBUpdateAction.DELETE) + .build(); + + // Delete file 6 under dir2 + String omDeleteFile6Key = DIR_TWO_OBJECT_ID + OM_KEY_PREFIX + FILE_SIX; + OmKeyInfo omDeleteFile6Info = buildOmKeyInfo( + VOL, BUCKET_ONE, "dir1/dir2/file6", FILE_SIX, + KEY_SIX_OBJECT_ID, DIR_TWO_OBJECT_ID, KEY_SIX_SIZE); + OMDBUpdateEvent deleteFile6Event = new OMDBUpdateEvent. + OMUpdateEventBuilder() + .setKey(omDeleteFile6Key) + .setValue(omDeleteFile6Info) + .setTable(getOmMetadataManager().getKeyTable(getBucketLayout()) + .getName()) + .setAction(OMDBUpdateEvent.OMDBUpdateAction.DELETE) + .build(); + + // Delete dir3 under dir1 + String omDeleteDir3Key = DIR_ONE_OBJECT_ID + OM_KEY_PREFIX + DIR_THREE; + OmDirectoryInfo omDeleteDir3Info = buildOmDirInfo(DIR_THREE, + DIR_THREE_OBJECT_ID, DIR_ONE_OBJECT_ID); + OMDBUpdateEvent deleteDir3Event = new OMDBUpdateEvent. + OMUpdateEventBuilder() + .setKey(omDeleteDir3Key) + .setValue(omDeleteDir3Info) + .setAction(OMDBUpdateEvent.OMDBUpdateAction.DELETE) + .setTable(getOmMetadataManager().getDirectoryTable().getName()) + .build(); + + return new OMUpdateEventBatch(Arrays.asList( + deleteFileEvent, deleteFile6Event, deleteDir3Event + ), 0L); + } + + @Test + public void testProcessDeletedFileTracking() { + // After deleting file 1, bucket 1 should track the deleted file + assertEquals(1, nsSummaryForBucket1.getNumOfDeletedFiles()); + assertEquals(KEY_ONE_SIZE, nsSummaryForBucket1.getSizeOfDeletedFiles()); + + // After deleting file 6, dir2 should track the deleted file + assertEquals(1, nsSummaryForDir2.getNumOfDeletedFiles()); + assertEquals(KEY_SIX_SIZE, nsSummaryForDir2.getSizeOfDeletedFiles()); + + // Bucket 2 should have no deleted files + assertEquals(0, nsSummaryForBucket2.getNumOfDeletedFiles()); + assertEquals(0, nsSummaryForBucket2.getSizeOfDeletedFiles()); + } + + @Test + public void testProcessDeletedDirectoryTracking() { + // After deleting dir3, dir1 should track the deleted directory + assertEquals(1, nsSummaryForDir1.getNumOfDeletedDirs()); + assertEquals(0, nsSummaryForDir1.getSizeOfDeletedDirs()); // Directories have 0 size + + // Check that deleted child directory is tracked + Set deletedChildDirs = nsSummaryForDir1.getDeletedChildDir(); + assertEquals(1, deletedChildDirs.size()); + assertTrue(deletedChildDirs.contains(DIR_THREE_OBJECT_ID)); + } + + @Test + public void testProcessDeletedTrackingForBuckets() { + // Bucket 1 should have deleted file tracking + assertEquals(1, nsSummaryForBucket1.getNumOfDeletedFiles()); + assertEquals(KEY_ONE_SIZE, nsSummaryForBucket1.getSizeOfDeletedFiles()); + assertEquals(0, nsSummaryForBucket1.getNumOfDeletedDirs()); + assertEquals(0, nsSummaryForBucket1.getSizeOfDeletedDirs()); + + // Bucket 2 should have no deleted items + assertEquals(0, nsSummaryForBucket2.getNumOfDeletedFiles()); + assertEquals(0, nsSummaryForBucket2.getSizeOfDeletedFiles()); + assertEquals(0, nsSummaryForBucket2.getNumOfDeletedDirs()); + assertEquals(0, nsSummaryForBucket2.getSizeOfDeletedDirs()); + } + + @Test + public void testProcessDeletedChildDirectoryTracking() { + // Test that deleted child directories are properly tracked + Set deletedChildDirs = nsSummaryForDir1.getDeletedChildDir(); + assertEquals(1, deletedChildDirs.size()); + assertTrue(deletedChildDirs.contains(DIR_THREE_OBJECT_ID)); + + // Other directories should not have any deleted child directories + assertEquals(0, nsSummaryForDir2.getDeletedChildDir().size()); + assertEquals(0, nsSummaryForBucket1.getDeletedChildDir().size()); + assertEquals(0, nsSummaryForBucket2.getDeletedChildDir().size()); + } + + @Test + public void testProcessMultipleDeleteOperations() { + // Test that multiple delete operations are properly tracked + // Bucket 1 should have 1 deleted file + assertEquals(1, nsSummaryForBucket1.getNumOfDeletedFiles()); + assertEquals(KEY_ONE_SIZE, nsSummaryForBucket1.getSizeOfDeletedFiles()); + + // Dir2 should have 1 deleted file + assertEquals(1, nsSummaryForDir2.getNumOfDeletedFiles()); + assertEquals(KEY_SIX_SIZE, nsSummaryForDir2.getSizeOfDeletedFiles()); + + // Dir1 should have 1 deleted directory + assertEquals(1, nsSummaryForDir1.getNumOfDeletedDirs()); + assertEquals(0, nsSummaryForDir1.getSizeOfDeletedDirs()); + + // Check that deleted child directory is tracked + Set deletedChildDirs = nsSummaryForDir1.getDeletedChildDir(); + assertEquals(1, deletedChildDirs.size()); + assertTrue(deletedChildDirs.contains(DIR_THREE_OBJECT_ID)); + } + } + + /** + * Nested class for testing mixed add/delete operations. + */ + @Nested + public class TestMixedOperations { + + private NSSummary nsSummaryForBucket1; + private NSSummary nsSummaryForDir1; + + @BeforeEach + public void setUp() throws IOException { + nSSummaryTaskWithFso.reprocessWithFSO(getReconOMMetadataManager()); + nSSummaryTaskWithFso.processWithFSO(processMixedOperationsEventBatch(), 0); + + nsSummaryForBucket1 = getReconNamespaceSummaryManager() + .getNSSummary(BUCKET_ONE_OBJECT_ID); + nsSummaryForDir1 = getReconNamespaceSummaryManager() + .getNSSummary(DIR_ONE_OBJECT_ID); + } + + private OMUpdateEventBatch processMixedOperationsEventBatch() throws IOException { + // Add new file to bucket1 + String omAddFileKey = BUCKET_ONE_OBJECT_ID + OM_KEY_PREFIX + "newfile"; + OmKeyInfo omAddFileInfo = buildOmKeyInfo(VOL, BUCKET_ONE, "newfile", + "newfile", 200L, BUCKET_ONE_OBJECT_ID, 800L); + OMDBUpdateEvent addFileEvent = new OMDBUpdateEvent. + OMUpdateEventBuilder() + .setKey(omAddFileKey) + .setValue(omAddFileInfo) + .setTable(getOmMetadataManager().getKeyTable(getBucketLayout()) + .getName()) + .setAction(OMDBUpdateEvent.OMDBUpdateAction.PUT) + .build(); + + // Delete existing file from bucket1 + String omDeleteFileKey = BUCKET_ONE_OBJECT_ID + OM_KEY_PREFIX + FILE_ONE; + OmKeyInfo omDeleteFileInfo = buildOmKeyInfo( + VOL, BUCKET_ONE, KEY_ONE, FILE_ONE, + KEY_ONE_OBJECT_ID, BUCKET_ONE_OBJECT_ID, KEY_ONE_SIZE); + OMDBUpdateEvent deleteFileEvent = new OMDBUpdateEvent. + OMUpdateEventBuilder() + .setKey(omDeleteFileKey) + .setValue(omDeleteFileInfo) + .setTable(getOmMetadataManager().getKeyTable(getBucketLayout()) + .getName()) + .setAction(OMDBUpdateEvent.OMDBUpdateAction.DELETE) + .build(); + + // Add new directory under bucket1 + String omAddDirKey = BUCKET_ONE_OBJECT_ID + OM_KEY_PREFIX + "newdir"; + OmDirectoryInfo omAddDirInfo = buildOmDirInfo("newdir", + 300L, BUCKET_ONE_OBJECT_ID); + OMDBUpdateEvent addDirEvent = new OMDBUpdateEvent. + OMUpdateEventBuilder() + .setKey(omAddDirKey) + .setValue(omAddDirInfo) + .setAction(OMDBUpdateEvent.OMDBUpdateAction.PUT) + .setTable(getOmMetadataManager().getDirectoryTable().getName()) + .build(); + + // Delete existing directory + String omDeleteDirKey = DIR_ONE_OBJECT_ID + OM_KEY_PREFIX + DIR_THREE; + OmDirectoryInfo omDeleteDirInfo = buildOmDirInfo(DIR_THREE, + DIR_THREE_OBJECT_ID, DIR_ONE_OBJECT_ID); + OMDBUpdateEvent deleteDirEvent = new OMDBUpdateEvent. + OMUpdateEventBuilder() + .setKey(omDeleteDirKey) + .setValue(omDeleteDirInfo) + .setAction(OMDBUpdateEvent.OMDBUpdateAction.DELETE) + .setTable(getOmMetadataManager().getDirectoryTable().getName()) + .build(); + + return new OMUpdateEventBatch(Arrays.asList( + addFileEvent, deleteFileEvent, addDirEvent, deleteDirEvent + ), 0L); + } + + @Test + public void testMixedAddDeleteFileOperations() { + // Test that both add and delete file operations work correctly + // Bucket1 should have 1 deleted file and 1 new file + assertEquals(1, nsSummaryForBucket1.getNumOfDeletedFiles()); + assertEquals(KEY_ONE_SIZE, nsSummaryForBucket1.getSizeOfDeletedFiles()); + } + + @Test + public void testMixedAddDeleteDirectoryOperations() { + // Test that both add and delete directory operations work correctly + // Dir1 should have 1 deleted directory + assertEquals(1, nsSummaryForDir1.getNumOfDeletedDirs()); + assertEquals(0, nsSummaryForDir1.getSizeOfDeletedDirs()); + + // Check that deleted child directory is tracked + Set deletedChildDirs = nsSummaryForDir1.getDeletedChildDir(); + assertEquals(1, deletedChildDirs.size()); + assertTrue(deletedChildDirs.contains(DIR_THREE_OBJECT_ID)); + } + + @Test + public void testMixedOperationsNoInterference() { + // Test that add and delete operations don't interfere with each other + // File operations should not affect directory tracking + assertEquals(1, nsSummaryForBucket1.getNumOfDeletedFiles()); + assertEquals(0, nsSummaryForBucket1.getNumOfDeletedDirs()); + + // Directory operations should not affect file tracking + assertEquals(1, nsSummaryForDir1.getNumOfDeletedDirs()); + assertEquals(0, nsSummaryForDir1.getNumOfDeletedFiles()); + } + } +}