From e8edf68e1631f64198d63781fd02a26887229ae5 Mon Sep 17 00:00:00 2001
From: Aaron Son <aaron@dolthub.com>
Date: Thu, 29 Jan 2026 17:33:46 -0800
Subject: [PATCH] go/store/nbs: journal_record.go: Do have
 processJournalRecords log errors when it sees an early EOF.

Logging the error here was overly conservative.
---
 go/store/nbs/journal_record.go      | 14 +++++----
 go/store/nbs/journal_record_test.go | 44 +++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/go/store/nbs/journal_record.go b/go/store/nbs/journal_record.go
index d724ba3549d..b21e06fa1ae 100644
--- a/go/store/nbs/journal_record.go
+++ b/go/store/nbs/journal_record.go
@@ -375,11 +375,15 @@ func processJournalRecords(ctx context.Context, r io.ReadSeeker, tryTruncate boo
 		}
 
 		if buf, err = rdr.Peek(int(l)); err != nil {
-			if warningsCb != nil {
-				// We probably wend off the end of the file. Report error and recover.
-				jErr := fmt.Errorf("failed to read full journal record of length %d at offset %d: %w", l, off, err)
-				warningsCb(jErr)
-			}
+			// Not being able to read a full record here is considered OK. Given our write protocol,
+			// it is expected for only a prefix of the journal to make it to storage.
+			//
+			// We still set recovered because there's a potential failure mode involving corruption
+			// which we would like to detect. That comes about when the length tag itself gets
+			// corrupted to be larger than expected, but still not larger than |journalWriterbuffSize|,
+			// and that length ends up taking us to EOF. But running recovery we can detect cases where
+			// there are valid sync'd records after this corruption and we can still detect some cases
+			// where we have knowable corruption.
 			recovered = true
 			break
 		}
diff --git a/go/store/nbs/journal_record_test.go b/go/store/nbs/journal_record_test.go
index 84034620e61..a8df4cc3cf0 100644
--- a/go/store/nbs/journal_record_test.go
+++ b/go/store/nbs/journal_record_test.go
@@ -245,6 +245,50 @@ func TestJournalForDataLoss(t *testing.T) {
 	}
 }
 
+func TestJournalTruncated(t *testing.T) {
+	// We should be able to process a truncated journal without any errors.
+	origTimestampGenerator := journalRecordTimestampGenerator
+	t.Cleanup(func() {
+		journalRecordTimestampGenerator = origTimestampGenerator
+	})
+	journalRecordTimestampGenerator = testTimestampGenerator
+
+	journal := make([]byte, 1<<20) // 1 MB should be plenty for these tests.
+
+	var off uint32
+	for range 8 {
+		r, _ := makeRootHashRecord()
+		off += writeRootHashRecord(journal[off:], r.address)
+		for range 32 {
+			r, _ = makeChunkRecord()
+			off += writeChunkRecord(journal[off:], mustCompressedChunk(r))
+		}
+	}
+	r, _ := makeRootHashRecord()
+	rootstart := off
+	off += writeRootHashRecord(journal[off:], r.address)
+	for i := range 16 {
+		var recoverErr error
+		_, err := processJournalRecords(t.Context(), bytes.NewReader(journal[:int(off)-i]), true, 0, func(int64, journalRec) error { return nil }, func(e error) { recoverErr = e })
+		require.NoError(t, err, "err should be nil, iteration %d", i)
+		require.NoError(t, recoverErr, "recoverErr should be nil, iteration %d", i)
+	}
+
+	// Test writing a large incorrect offset for that root record and then some more records after it. We should see
+	// errors in that case.
+	writeUint32(journal[rootstart:], uint32(1<<20))
+	for range 8 {
+		r, _ := makeRootHashRecord()
+		off += writeRootHashRecord(journal[off:], r.address)
+		for range 32 {
+			r, _ = makeChunkRecord()
+			off += writeChunkRecord(journal[off:], mustCompressedChunk(r))
+		}
+	}
+	_, err := processJournalRecords(t.Context(), bytes.NewReader(journal[:off]), true, 0, func(int64, journalRec) error { return nil }, nil)
+	require.Error(t, err)
+}
+
 func TestJournalForDataLossOnBoundary(t *testing.T) {
 	r := rand.New(rand.NewSource(987654321))
 	// The data loss detection logic has some special cases around buffer boundaries to avoid reading all data into