airbnb · zarna1parekh · Jul 30, 2025 · Jul 30, 2025 · Jul 30, 2025 · Jul 30, 2025
diff --git a/astra/src/main/java/com/slack/astra/blobfs/BlobStore.java b/astra/src/main/java/com/slack/astra/blobfs/BlobStore.java
@@ -8,7 +8,9 @@
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Path;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.Objects;
 import java.util.concurrent.CopyOnWriteArrayList;
 import java.util.concurrent.ExecutionException;
@@ -195,6 +197,28 @@ public List<String> listFiles(String prefix) {
     return filesList;
   }
 
+  public Map<String, Long> listFilesWithSize(String prefix) {
+    assert prefix != null && !prefix.isEmpty();
+
+    ListObjectsV2Request listRequest = builder().bucket(bucketName).prefix(prefix).build();
+    ListObjectsV2Publisher asyncPaginatedListResponse =
+        s3AsyncClient.listObjectsV2Paginator(listRequest);
+
+    Map<String, Long> filesListWithSize = new HashMap<>();
+    try {
+      asyncPaginatedListResponse
+          .subscribe(
+              listResponse ->
+                  listResponse
+                      .contents()
+                      .forEach(s3Object -> filesListWithSize.put(s3Object.key(), s3Object.size())))
+          .get();
+    } catch (InterruptedException | ExecutionException e) {
+      throw new RuntimeException(e);
+    }
+    return filesListWithSize;
-    assert prefix != null && !prefix.isEmpty();
-
-    ListObjectsV2Request listRequest = builder().bucket(bucketName).prefix(prefix).build();
-    ListObjectsV2Publisher asyncPaginatedListResponse =
-        s3AsyncClient.listObjectsV2Paginator(listRequest);
-
-    Map<String, Long> filesListWithSize = new HashMap<>();
-    try {
-      asyncPaginatedListResponse
-          .subscribe(
-              listResponse ->
-                  listResponse
-                      .contents()
-                      .forEach(s3Object -> filesListWithSize.put(s3Object.key(), s3Object.size())))
-          .get();
-    } catch (InterruptedException | ExecutionException e) {
-      throw new RuntimeException(e);
-    }
-    return filesListWithSize;
+    Map<String, Long> filesWithSize = new ConcurrentHashMap<>();
+    listFilesAndDo(prefix, s3Object -> filesListWithSize.put(s3Object.key(), s3Object.size()));
+    return filesWithSize;
-    assert prefix != null && !prefix.isEmpty();
-
-    ListObjectsV2Request listRequest = builder().bucket(bucketName).prefix(prefix).build();
-    ListObjectsV2Publisher asyncPaginatedListResponse =
-        s3AsyncClient.listObjectsV2Paginator(listRequest);
-
-    Map<String, Long> filesListWithSize = new HashMap<>();
-    try {
-      asyncPaginatedListResponse
-          .subscribe(
-              listResponse ->
-                  listResponse
-                      .contents()
-                      .forEach(s3Object -> filesListWithSize.put(s3Object.key(), s3Object.size())))
-          .get();
-    } catch (InterruptedException | ExecutionException e) {
-      throw new RuntimeException(e);
-    }
-    return filesListWithSize;
+    Map<String, Long> filesWithSize = new ConcurrentHashMap<>();
+    listFilesAndDo(prefix, s3Object -> filesListWithSize.put(s3Object.key(), s3Object.size()));
+    return filesWithSize;
+  }
+
   /**
    * Deletes a chunk off of object storage by chunk id. If object was not found returns false.
    *

diff --git a/astra/src/main/java/com/slack/astra/chunk/ChunkValidationUtils.java b/astra/src/main/java/com/slack/astra/chunk/ChunkValidationUtils.java
@@ -0,0 +1,17 @@
+package com.slack.astra.chunk;
+
+import java.nio.file.Path;
+import org.apache.lucene.index.CheckIndex;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.NoLockFactory;
+
+public class ChunkValidationUtils {
+
+  public static boolean isChunkClean(Path path) throws Exception {
+    FSDirectory existingDir = FSDirectory.open(path, NoLockFactory.INSTANCE);
+    CheckIndex checker = new CheckIndex(existingDir);
+    CheckIndex.Status status = checker.checkIndex();
+    checker.close();
+    return status.clean;
+  }
+}
diff --git a/astra/src/main/java/com/slack/astra/chunk/ReadOnlyChunkImpl.java b/astra/src/main/java/com/slack/astra/chunk/ReadOnlyChunkImpl.java
@@ -1,5 +1,6 @@
 package com.slack.astra.chunk;
 
+import static com.slack.astra.chunk.ChunkValidationUtils.isChunkClean;
 import static com.slack.astra.chunkManager.CachingChunkManager.ASTRA_NG_DYNAMIC_CHUNK_SIZES_FLAG;
 import static com.slack.astra.server.AstraConfig.DEFAULT_ZK_TIMEOUT_SECS;
 
@@ -28,9 +29,11 @@
 import com.slack.astra.proto.metadata.Metadata;
 import io.micrometer.core.instrument.MeterRegistry;
 import io.micrometer.core.instrument.Timer;
+import java.io.File;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.time.Instant;
 import java.util.EnumSet;
 import java.util.List;
@@ -228,6 +231,48 @@ public CacheNodeAssignment getCacheNodeAssignment() {
     return assignment;
   }
 
+  private boolean validateS3vsLocalDownLoad() {
+    // check if the number of files in S3 matches the local directory
+    Map<String, Long> filesWithSizeInS3 = blobStore.listFilesWithSize(snapshotMetadata.snapshotId);
+
+    Map<String, Long> localFiles;
+    try (Stream<Path> fileList = Files.list(dataDirectory)) {
+      localFiles =
+          fileList
+              .filter(Files::isRegularFile)
+              .collect(
+                  Collectors.toMap(
+                      path ->
+                          dataDirectory.relativize(path).toString().replace(File.separator, "/"),
+                      path -> path.toFile().length()));
+    } catch (IOException e) {
+      throw new RuntimeException(
+          String.format("Error reading local files in directory %s", dataDirectory), e);
+    }
+    if (localFiles.size() != filesWithSizeInS3.size()) {
+      LOG.error(
+          String.format(
+              "Mismatch in number of files in S3 (%s) and local directory (%s) for snapshot %s",
+              filesWithSizeInS3.size(), localFiles.size(), snapshotMetadata.toString()));
+      return false;
+    }
+
+    for (Map.Entry<String, Long> entry : filesWithSizeInS3.entrySet()) {
+      String s3Path = entry.getKey();
+      long s3Size = entry.getValue();
+      String fileName = Paths.get(s3Path).getFileName().toString();
+
+      if (!localFiles.containsKey(fileName) || !localFiles.get(fileName).equals(s3Size)) {
+        LOG.error(
+            String.format(
+                "Mismatch for file %s in S3 and local directory of size %s for snapshot %s",
+                s3Path, s3Size, snapshotMetadata.toString()));
+        return false;
+      }
+    }
+    return true;
+  }
+
   public void downloadChunkData() {
     Timer.Sample assignmentTimer = Timer.start(meterRegistry);
     // lock
@@ -265,7 +310,24 @@ public void downloadChunkData() {
                 "No files found on blob storage, released slot for re-assignment");
           }
         }
+        // validate if the number of files in S3 matches the local directory
+        if (!validateS3vsLocalDownLoad()) {
+          String errorString =
+              String.format(
+                  "Mismatch in number or size of files in S3 and local directory for snapshot %s",
+                  snapshotMetadata);
+          throw new IOException(errorString);
+        }
+
+        // check if lucene index is valid and not corrupted
+        boolean luceneStatus = isChunkClean(dataDirectory);
+        if (!luceneStatus) {
+          throw new IOException(
+              String.format(
+                  "Lucene index is not clean. Found issues for snapshot: %s.", snapshotMetadata));
+        }
 
+        // check if schema file exists
         Path schemaPath = Path.of(dataDirectory.toString(), ReadWriteChunk.SCHEMA_FILE_NAME);
         if (!Files.exists(schemaPath)) {
           throw new RuntimeException("We expect a schema.json file to exist within the index");
@@ -305,7 +367,14 @@ public void downloadChunkData() {
       // disregarding any errors
       setAssignmentState(
           getCacheNodeAssignment(), Metadata.CacheNodeAssignment.CacheNodeAssignmentState.EVICT);
-      LOG.error("Error handling chunk assignment", e);
+      LOG.error(
+          "Error handling chunk assignment for assignment: {}, snapshot id: {}, snapshot size: {}, replicaId: {}, replicaSet: {}",
+          assignment.assignmentId,
+          assignment.snapshotId,
+          assignment.snapshotSize,
+          assignment.replicaId,
+          assignment.replicaSet,
+          e);
       assignmentTimer.stop(chunkAssignmentTimerFailure);
     } finally {
       chunkAssignmentLock.unlock();

diff --git a/astra/src/main/java/com/slack/astra/chunk/ReadWriteChunk.java b/astra/src/main/java/com/slack/astra/chunk/ReadWriteChunk.java
@@ -1,6 +1,7 @@
 package com.slack.astra.chunk;
 
 import static com.slack.astra.chunk.ChunkInfo.toSnapshotMetadata;
+import static com.slack.astra.chunk.ChunkValidationUtils.isChunkClean;
 import static com.slack.astra.writer.SpanFormatter.isValidTimestamp;
 
 import com.google.common.annotations.VisibleForTesting;
@@ -249,6 +250,13 @@ public boolean snapshotToS3(BlobStore blobStore) {
         totalBytes += sizeOfFile;
         logger.debug("File name is {} ({} bytes)", fileName, sizeOfFile);
       }
+      // check if lucene index is valid and not corrupted
+      boolean luceneStatus = isChunkClean(dirPath);
+      if (!luceneStatus) {
+        logger.error("Lucene index is not clean. Found issues for chunk: {}.", chunkInfo);
+        return false;
+      }
+
       this.fileUploadAttempts.increment(filesToUpload.size());
       Timer.Sample snapshotTimer = Timer.start(meterRegistry);
 
@@ -258,7 +266,8 @@ public boolean snapshotToS3(BlobStore blobStore) {
       snapshotTimer.stop(meterRegistry.timer(SNAPSHOT_TIMER));
       chunkInfo.setSizeInBytesOnDisk(totalBytes);
 
-      List<String> filesUploaded = blobStore.listFiles(chunkInfo.chunkId);
+      Map<String, Long> filesWithSizeInS3 = blobStore.listFilesWithSize(chunkInfo.chunkId);
+      List<String> filesUploaded = new ArrayList<>(filesWithSizeInS3.keySet().stream().toList());
       filesUploaded.removeIf(file -> file.endsWith("write.lock"));
 
       // check here that all files are uploaded
@@ -273,6 +282,21 @@ public boolean snapshotToS3(BlobStore blobStore) {
             filesUploaded);
         return false;
       }
+
+      // validate the size of the uploaded files
+      for (String fileName : filesToUpload) {
+        String s3Path = String.format("%s/%s", chunkInfo.chunkId, fileName);
+        long sizeOfFile = Files.size(Path.of(dirPath + "/" + fileName));
+        if (!filesWithSizeInS3.containsKey(s3Path)
+            || !filesWithSizeInS3.get(s3Path).equals(sizeOfFile)) {
+          logger.error(
+              String.format(
+                  "Mismatch for file %s in S3 and local directory of size %s for chunk %s",
+                  s3Path, sizeOfFile, chunkInfo.chunkId));
+          return false;
+        }
+      }
+
       // and schema file exists in s3
       if (!filesUploaded.contains(chunkInfo.chunkId + "/" + SCHEMA_FILE_NAME)) {
         logger.error("Schema file was not uploaded to S3: {}", SCHEMA_FILE_NAME);

diff --git a/astra/src/test/java/com/slack/astra/blobfs/BlobStoreTest.java b/astra/src/test/java/com/slack/astra/blobfs/BlobStoreTest.java
@@ -12,6 +12,7 @@
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.List;
+import java.util.Map;
 import java.util.Objects;
 import java.util.UUID;
 import java.util.concurrent.ExecutionException;
@@ -218,6 +219,35 @@ void testListFilesNonExistingPrefix() {
     assertThat(blobStore.listFiles(chunkId).size()).isEqualTo(0);
   }
 
+  @Test
+  void testListFilesWithSize() throws IOException {
+    BlobStore blobStore = new BlobStore(s3Client, TEST_BUCKET);
+    String chunkId = UUID.randomUUID().toString();
+
+    assertThat(blobStore.listFiles(chunkId).size()).isEqualTo(0);
+
+    Path directoryUpload = Files.createTempDirectory("");
+    Path foo = Files.createTempFile(directoryUpload, "", "");
+    try (FileWriter fileWriter = new FileWriter(foo.toFile())) {
+      fileWriter.write("Example test 1");
+    }
+    Path bar = Files.createTempFile(directoryUpload, "", "");
+    try (FileWriter fileWriter = new FileWriter(bar.toFile())) {
+      fileWriter.write("Example test 2");
+    }
+    blobStore.upload(chunkId, directoryUpload);
+
+    Map<String, Long> filesWithSize = blobStore.listFilesWithSize(chunkId);
+    assertThat(filesWithSize.size()).isEqualTo(2);
+    assertThat(filesWithSize)
+        .containsExactlyInAnyOrderEntriesOf(
+            Map.of(
+                String.format("%s/%s", chunkId, foo.getFileName().toString()),
+                Files.size(foo),
+                String.format("%s/%s", chunkId, bar.getFileName().toString()),
+                Files.size(bar)));
+  }
+
   @Test
   public void testCompressDecompressJsonData() throws Exception {
     // Arrange