SiaFoundation · n8mgr · Dec 1, 2025 · Nov 27, 2025 · peterjan · Dec 1, 2025
diff --git a/...rproof_to_accept_cached_subtrees_to_reduce_disk_io_required_for_sector_reads.md b/...rproof_to_accept_cached_subtrees_to_reduce_disk_io_required_for_sector_reads.md
@@ -0,0 +1,5 @@
+---
+default: major
+---
+
+# Changed rhp.BuildSectorProof to accept cached subtrees to reduce disk IO required for sector reads.
diff --git a/rhp/v4/merkle.go b/rhp/v4/merkle.go
@@ -1,22 +1,147 @@
 package rhp
 
 import (
+	"fmt"
 	"io"
 	"math/bits"
+	"sync"
+	"unsafe"
 
 	"go.sia.tech/core/blake2b"
 	rhp2 "go.sia.tech/core/rhp/v2"
 	"go.sia.tech/core/types"
 )
 
 const (
+	// sectorSubtreeLeaves is the number of leaves per cached subtree
+	sectorSubtreeLeaves = 64
+
 	// LeafSize is the size of one leaf in bytes.
 	LeafSize = rhp2.LeafSize
 
 	// LeavesPerSector is the number of leaves in one sector.
 	LeavesPerSector = rhp2.LeavesPerSector
 )
 
+// sectorAccumulator is a specialized accumulator for computing the total root
+// of a sector.
+type sectorAccumulator struct {
+	// Unlike proofAccumulator, the subtree roots are ordered largest-to-
+	// smallest, and we store four roots per height. This ordering allows us to
+	// cast two adjacent elements into a single [8][32]byte, which reduces
+	// copying when hashing.
+	trees [15][4][32]byte
+	// Since we operate on 8 nodes at a time, we need a buffer to hold nodes
+	// until we have enough. And since the buffer is adjacent to the trees in
+	// memory, we can again avoid some copying.
+	nodeBuf [4][32]byte
+	// Like proofAccumulator, 'numLeaves' is both the number of subtree roots
+	// appended and a bit vector that indicates which elements are active. We
+	// also use it to determine how many nodes are in the buffer.
+	numLeaves uint32
+}
+
+// We rely on the nodeBuf field immediately following the last element of the
+// trees field. This should always be true -- there's no reason for a compiler
+// to insert padding between them -- but it doesn't hurt to check.
+var _ [unsafe.Offsetof(sectorAccumulator{}.nodeBuf)]struct{} = [unsafe.Sizeof(sectorAccumulator{}.trees)]struct{}{}
+
+func (sa *sectorAccumulator) reset() {
+	sa.numLeaves = 0
+}
+
+func (sa *sectorAccumulator) hasNodeAtHeight(i int) bool {
+	// not as simple as in proofAccumulator; order is reversed, and sa.numLeaves
+	// is "off" by a factor of 4
+	return (sa.numLeaves>>2)&(1<<(len(sa.trees)-i-1)) != 0
+}
+
+func (sa *sectorAccumulator) appendNode(h types.Hash256) {
+	sa.nodeBuf[sa.numLeaves%4] = h
+	sa.numLeaves++
+	if sa.numLeaves%4 == 0 {
+		sa.numLeaves -= 4 // hack: offset mergeNodeBuf adding 4
+		sa.mergeNodeBuf()
+	}
+}
+
+func (sa *sectorAccumulator) appendLeaves(leaves []byte) {
+	if len(leaves)%LeafSize != 0 {
+		panic("appendLeaves: illegal input size")
+	}
+	rem := len(leaves) % (LeafSize * 4)
+	for i := 0; i < len(leaves)-rem; i += LeafSize * 4 {
+		blake2b.SumLeaves(&sa.nodeBuf, (*[4][64]byte)(unsafe.Pointer(&leaves[i])))
+		sa.mergeNodeBuf()
+	}
+	for i := len(leaves) - rem; i < len(leaves); i += LeafSize {
+		sa.appendNode(blake2b.SumLeaf((*[64]byte)(unsafe.Pointer(&leaves[i]))))
+	}
+}
+
+func (sa *sectorAccumulator) mergeNodeBuf() {
+	// same as in proofAccumulator, except that we operate on 8 nodes at a time,
+	// exploiting the fact that the two groups of 4 are contiguous in memory
+	nodes := &sa.nodeBuf
+	i := len(sa.trees) - 1
+	for ; sa.hasNodeAtHeight(i); i-- {
+		blake2b.SumNodes(&sa.trees[i], (*[8][32]byte)(unsafe.Pointer(&sa.trees[i])))
+		nodes = &sa.trees[i]
+	}
+	sa.trees[i] = *nodes
+	sa.numLeaves += 4
+}
+
+func (sa *sectorAccumulator) root() types.Hash256 {
+	if sa.numLeaves == 0 {
+		return types.Hash256{}
+	}
+
+	// helper function for computing the root of four subtrees
+	root4 := func(nodes [4][32]byte) types.Hash256 {
+		// NOTE: it would be more efficient to mutate sa.trees directly, but
+		// that would make root non-idempotent
+		in := (*[8][32]byte)(unsafe.Pointer(&[2][4][32]byte{0: nodes}))
+		out := (*[4][32]byte)(unsafe.Pointer(in))
+		blake2b.SumNodes(out, in)
+		blake2b.SumNodes(out, in)
+		return out[0]
+	}
+
+	i := len(sa.trees) - 1 - bits.TrailingZeros32(sa.numLeaves>>2)
+	var root types.Hash256
+	switch sa.numLeaves % 4 {
+	case 0:
+		root = root4(sa.trees[i])
+		i--
+	case 1:
+		root = sa.nodeBuf[0]
+	case 2:
+		root = blake2b.SumPair(sa.nodeBuf[0], sa.nodeBuf[1])
+	case 3:
+		root = blake2b.SumPair(blake2b.SumPair(sa.nodeBuf[0], sa.nodeBuf[1]), sa.nodeBuf[2])
+	}
+	for ; i >= 0; i-- {
+		if sa.hasNodeAtHeight(i) {
+			root = blake2b.SumPair(root4(sa.trees[i]), root)
+		}
+	}
+	return root
+}
+
+// A RangeProofVerifier allows range proofs to be verified in streaming fashion.
+type RangeProofVerifier = rhp2.RangeProofVerifier
+
+// NewRangeProofVerifier returns a RangeProofVerifier for the sector range
+// [start, end).
+func NewRangeProofVerifier(start, end uint64) *RangeProofVerifier {
+	return rhp2.NewRangeProofVerifier(start, end)
+}
+
+func sectorProofSize(n, i uint64) uint64 {
+	return rhp2.RangeProofSize(n, i, i+1)
+}
+
 // SectorRoot computes the Merkle root of a sector.
 func SectorRoot(sector *[SectorSize]byte) types.Hash256 {
 	return rhp2.SectorRoot(sector)
@@ -43,18 +168,110 @@ func MetaRoot(roots []types.Hash256) types.Hash256 {
 	return rhp2.MetaRoot(roots)
 }
 
-// BuildSectorProof builds a Merkle proof for a given range within a sector.
-func BuildSectorProof(sector *[SectorSize]byte, start, end uint64) []types.Hash256 {
-	return rhp2.BuildProof(sector, start, end, nil)
+// SectorSubtreeRange computes the leaves required to construct a
+// proof for the leaf range [start, end). It assumes that the cached
+// subtrees are 64 leaves (4 KiB) in size, and returns the aligned
+// start and end offsets that cover the requested range.
+func SectorSubtreeRange(start, end uint64) (rangeStart, rangeEnd uint64) {
+	switch {
+	case end > LeavesPerSector:
+		panic("end exceeds number of leaves")
+	case start > end:
+		panic("start exceeds end")
+	case start == end:
+		panic("start equals end")
+	}
+
+	return (start / sectorSubtreeLeaves) * sectorSubtreeLeaves, ((end + sectorSubtreeLeaves - 1) / sectorSubtreeLeaves) * sectorSubtreeLeaves
 }
 
-// A RangeProofVerifier allows range proofs to be verified in streaming fashion.
-type RangeProofVerifier = rhp2.RangeProofVerifier
+// CachedSectorSubtrees computes and returns the cached subtree roots for a sector.
+// Each root corresponds to a subtree of 64 leaves or 4 KiB of data.
+func CachedSectorSubtrees(sector *[SectorSize]byte) []types.Hash256 {
+	per := LeafSize * sectorSubtreeLeaves
+	n := LeavesPerSector / sectorSubtreeLeaves
+	roots := make([]types.Hash256, n)
+	var wg sync.WaitGroup
+	for i := range roots {
+		wg.Add(1)
+		go func(i int) {
+			defer wg.Done()
+			var sa sectorAccumulator
+			sa.appendLeaves(sector[i*per:][:per])
+			roots[i] = sa.root()
+		}(i)
+	}
+	wg.Wait()
+	return roots
+}
 
-// NewRangeProofVerifier returns a RangeProofVerifier for the sector range
-// [start, end).
-func NewRangeProofVerifier(start, end uint64) *RangeProofVerifier {
-	return rhp2.NewRangeProofVerifier(start, end)
+// BuildSectorProof constructs a proof for the segment range [start, end).
+//
+// `segment` must contain a 64-leaf-aligned segment of the
+// sector data containing all leaves in the range [start, end).
+//
+// `cache` must contain the 64-leaf subtree roots for the sector.
+func BuildSectorProof(segment []byte, start, end uint64, cache []types.Hash256) []types.Hash256 {
+	switch {
+	case end > LeavesPerSector:
+		panic("end exceeds number of leaves")
+	case start > end:
+		panic("start exceeds end")
+	case start == end:
+		panic("start equals end")
+	case len(cache) != LeavesPerSector/sectorSubtreeLeaves:
+		panic("cache has incorrect size")
+	}
+
+	segmentStart, segmentEnd := SectorSubtreeRange(start, end)
+
+	if uint64(len(segment)) != (segmentEnd-segmentStart)*LeafSize {
+		panic("segment has incorrect size")
+	}
+
+	// hash any subtree fully inside segment
+	var s sectorAccumulator
+	subtreeRoot := func(i, j uint64) types.Hash256 {
+		s.reset()
+		s.appendLeaves(segment[(i-segmentStart)*LeafSize : (j-segmentStart)*LeafSize])
+		return s.root()
+	}
+
+	// supply a root from cache when the subtree is aligned to 64-leaf chunks
+	precalc := func(i, j uint64) types.Hash256 {
+		if i >= segmentStart && j <= segmentEnd {
+			return subtreeRoot(i, j)
+		}
+		// use cached roots for aligned ranges
+		if i%sectorSubtreeLeaves == 0 && j%sectorSubtreeLeaves == 0 {
+			return MetaRoot(cache[i/sectorSubtreeLeaves : j/sectorSubtreeLeaves])
+		}
+		panic(fmt.Errorf("no precalculated root for subtree [%d, %d)", i, j))
+	}
+
+	// we build the proof by recursively enumerating subtrees, left to right.
+	// If a subtree is inside the segment range, we can skip it (because the
+	// verifier has the segments); otherwise, we use the precalculated root for
+	// the subtree. If a subtree partially overlaps the segment range, we split
+	// it and recurse.
+	proof := make([]types.Hash256, 0, sectorProofSize(LeavesPerSector, start))
+	var rec func(uint64, uint64)
+	rec = func(i, j uint64) {
+		if i >= start && j <= end {
+			// this subtree contains only data segments; skip it
+			return
+		} else if j <= start || i >= end {
+			proof = append(proof, precalc(i, j))
+			return
+		}
+		// this subtree partially overlaps the data segments; split it
+		// into two subtrees and recurse on each
+		mid := (i + j) / 2
+		rec(i, mid)
+		rec(mid, j)
+	}
+	rec(0, LeavesPerSector)
+	return proof
 }
 
 // VerifyLeafProof verifies the Merkle proof for a given leaf within a sector.

diff --git a/rhp/v4/merkle_test.go b/rhp/v4/merkle_test.go
@@ -3,6 +3,7 @@ package rhp
 import (
 	"bytes"
 	"math/bits"
+	"slices"
 	"testing"
 
 	"go.sia.tech/core/types"
@@ -86,6 +87,52 @@ func TestPartialReadSectorRoot(t *testing.T) {
 	}
 }
 
+func TestBuildSectorProof(t *testing.T) {
+	var sector [SectorSize]byte
+	frand.Read(sector[:])
+	root := SectorRoot(&sector)
+
+	subtrees := CachedSectorSubtrees(&sector)
+
+	randomRange := func() [2]int {
+		start := frand.Intn(LeavesPerSector - 1)
+		end := frand.Intn(LeavesPerSector-start) + start + 1
+		return [2]int{start, end}
+	}
+
+	tests := [][2]int{
+		{0, 1},
+		{1, 2},
+		{0, 64},
+		{66, 67},
+		{0, 130},
+		{130, 194},
+		{0, 129},
+		{0, LeavesPerSector / 2},
+		{LeavesPerSector - 1, LeavesPerSector},
+		{LeavesPerSector/2 - 1, LeavesPerSector},
+		{LeavesPerSector / 2, LeavesPerSector},
+		{LeavesPerSector / 4, 3 * LeavesPerSector / 4},
+	}
+	for range 100 {
+		tests = append(tests, randomRange())
+	}
+
+	for _, test := range tests {
+		start, end := uint64(test[0]), uint64(test[1])
+		subtreeStart, subtreeEnd := SectorSubtreeRange(start, end)
+		segment := slices.Clone(sector[subtreeStart*LeafSize : subtreeEnd*LeafSize])
+		proof := BuildSectorProof(segment, start, end, subtrees)
+
+		rpv := NewRangeProofVerifier(start, end)
+		if _, err := rpv.ReadFrom(bytes.NewReader(sector[start*LeafSize : end*LeafSize])); err != nil {
+			t.Fatal(err)
+		} else if !rpv.Verify(proof, root) {
+			t.Fatalf("invalid proof for range [%d, %d)", start, end)
+		}
+	}
+}
+
 func BenchmarkSectorRoot(b *testing.B) {
 	b.ReportAllocs()
 	var sector [SectorSize]byte