diff --git a/.changeset/changed_rhpbuildsectorproof_to_accept_cached_subtrees_to_reduce_disk_io_required_for_sector_reads.md b/.changeset/changed_rhpbuildsectorproof_to_accept_cached_subtrees_to_reduce_disk_io_required_for_sector_reads.md new file mode 100644 index 00000000..11c397da --- /dev/null +++ b/.changeset/changed_rhpbuildsectorproof_to_accept_cached_subtrees_to_reduce_disk_io_required_for_sector_reads.md @@ -0,0 +1,5 @@ +--- +default: major +--- + +# Changed rhp.BuildSectorProof to accept cached subtrees to reduce disk IO required for sector reads. diff --git a/rhp/v4/merkle.go b/rhp/v4/merkle.go index fe5c826a..1c72332c 100644 --- a/rhp/v4/merkle.go +++ b/rhp/v4/merkle.go @@ -1,8 +1,11 @@ package rhp import ( + "fmt" "io" "math/bits" + "sync" + "unsafe" "go.sia.tech/core/blake2b" rhp2 "go.sia.tech/core/rhp/v2" @@ -10,6 +13,9 @@ import ( ) const ( + // sectorSubtreeLeaves is the number of leaves per cached subtree + sectorSubtreeLeaves = 64 + // LeafSize is the size of one leaf in bytes. LeafSize = rhp2.LeafSize @@ -17,6 +23,125 @@ const ( LeavesPerSector = rhp2.LeavesPerSector ) +// sectorAccumulator is a specialized accumulator for computing the total root +// of a sector. +type sectorAccumulator struct { + // Unlike proofAccumulator, the subtree roots are ordered largest-to- + // smallest, and we store four roots per height. This ordering allows us to + // cast two adjacent elements into a single [8][32]byte, which reduces + // copying when hashing. + trees [15][4][32]byte + // Since we operate on 8 nodes at a time, we need a buffer to hold nodes + // until we have enough. And since the buffer is adjacent to the trees in + // memory, we can again avoid some copying. + nodeBuf [4][32]byte + // Like proofAccumulator, 'numLeaves' is both the number of subtree roots + // appended and a bit vector that indicates which elements are active. We + // also use it to determine how many nodes are in the buffer. + numLeaves uint32 +} + +// We rely on the nodeBuf field immediately following the last element of the +// trees field. This should always be true -- there's no reason for a compiler +// to insert padding between them -- but it doesn't hurt to check. +var _ [unsafe.Offsetof(sectorAccumulator{}.nodeBuf)]struct{} = [unsafe.Sizeof(sectorAccumulator{}.trees)]struct{}{} + +func (sa *sectorAccumulator) reset() { + sa.numLeaves = 0 +} + +func (sa *sectorAccumulator) hasNodeAtHeight(i int) bool { + // not as simple as in proofAccumulator; order is reversed, and sa.numLeaves + // is "off" by a factor of 4 + return (sa.numLeaves>>2)&(1<<(len(sa.trees)-i-1)) != 0 +} + +func (sa *sectorAccumulator) appendNode(h types.Hash256) { + sa.nodeBuf[sa.numLeaves%4] = h + sa.numLeaves++ + if sa.numLeaves%4 == 0 { + sa.numLeaves -= 4 // hack: offset mergeNodeBuf adding 4 + sa.mergeNodeBuf() + } +} + +func (sa *sectorAccumulator) appendLeaves(leaves []byte) { + if len(leaves)%LeafSize != 0 { + panic("appendLeaves: illegal input size") + } + rem := len(leaves) % (LeafSize * 4) + for i := 0; i < len(leaves)-rem; i += LeafSize * 4 { + blake2b.SumLeaves(&sa.nodeBuf, (*[4][64]byte)(unsafe.Pointer(&leaves[i]))) + sa.mergeNodeBuf() + } + for i := len(leaves) - rem; i < len(leaves); i += LeafSize { + sa.appendNode(blake2b.SumLeaf((*[64]byte)(unsafe.Pointer(&leaves[i])))) + } +} + +func (sa *sectorAccumulator) mergeNodeBuf() { + // same as in proofAccumulator, except that we operate on 8 nodes at a time, + // exploiting the fact that the two groups of 4 are contiguous in memory + nodes := &sa.nodeBuf + i := len(sa.trees) - 1 + for ; sa.hasNodeAtHeight(i); i-- { + blake2b.SumNodes(&sa.trees[i], (*[8][32]byte)(unsafe.Pointer(&sa.trees[i]))) + nodes = &sa.trees[i] + } + sa.trees[i] = *nodes + sa.numLeaves += 4 +} + +func (sa *sectorAccumulator) root() types.Hash256 { + if sa.numLeaves == 0 { + return types.Hash256{} + } + + // helper function for computing the root of four subtrees + root4 := func(nodes [4][32]byte) types.Hash256 { + // NOTE: it would be more efficient to mutate sa.trees directly, but + // that would make root non-idempotent + in := (*[8][32]byte)(unsafe.Pointer(&[2][4][32]byte{0: nodes})) + out := (*[4][32]byte)(unsafe.Pointer(in)) + blake2b.SumNodes(out, in) + blake2b.SumNodes(out, in) + return out[0] + } + + i := len(sa.trees) - 1 - bits.TrailingZeros32(sa.numLeaves>>2) + var root types.Hash256 + switch sa.numLeaves % 4 { + case 0: + root = root4(sa.trees[i]) + i-- + case 1: + root = sa.nodeBuf[0] + case 2: + root = blake2b.SumPair(sa.nodeBuf[0], sa.nodeBuf[1]) + case 3: + root = blake2b.SumPair(blake2b.SumPair(sa.nodeBuf[0], sa.nodeBuf[1]), sa.nodeBuf[2]) + } + for ; i >= 0; i-- { + if sa.hasNodeAtHeight(i) { + root = blake2b.SumPair(root4(sa.trees[i]), root) + } + } + return root +} + +// A RangeProofVerifier allows range proofs to be verified in streaming fashion. +type RangeProofVerifier = rhp2.RangeProofVerifier + +// NewRangeProofVerifier returns a RangeProofVerifier for the sector range +// [start, end). +func NewRangeProofVerifier(start, end uint64) *RangeProofVerifier { + return rhp2.NewRangeProofVerifier(start, end) +} + +func sectorProofSize(n, i uint64) uint64 { + return rhp2.RangeProofSize(n, i, i+1) +} + // SectorRoot computes the Merkle root of a sector. func SectorRoot(sector *[SectorSize]byte) types.Hash256 { return rhp2.SectorRoot(sector) @@ -43,18 +168,110 @@ func MetaRoot(roots []types.Hash256) types.Hash256 { return rhp2.MetaRoot(roots) } -// BuildSectorProof builds a Merkle proof for a given range within a sector. -func BuildSectorProof(sector *[SectorSize]byte, start, end uint64) []types.Hash256 { - return rhp2.BuildProof(sector, start, end, nil) +// SectorSubtreeRange computes the leaves required to construct a +// proof for the leaf range [start, end). It assumes that the cached +// subtrees are 64 leaves (4 KiB) in size, and returns the aligned +// start and end offsets that cover the requested range. +func SectorSubtreeRange(start, end uint64) (rangeStart, rangeEnd uint64) { + switch { + case end > LeavesPerSector: + panic("end exceeds number of leaves") + case start > end: + panic("start exceeds end") + case start == end: + panic("start equals end") + } + + return (start / sectorSubtreeLeaves) * sectorSubtreeLeaves, ((end + sectorSubtreeLeaves - 1) / sectorSubtreeLeaves) * sectorSubtreeLeaves } -// A RangeProofVerifier allows range proofs to be verified in streaming fashion. -type RangeProofVerifier = rhp2.RangeProofVerifier +// CachedSectorSubtrees computes and returns the cached subtree roots for a sector. +// Each root corresponds to a subtree of 64 leaves or 4 KiB of data. +func CachedSectorSubtrees(sector *[SectorSize]byte) []types.Hash256 { + per := LeafSize * sectorSubtreeLeaves + n := LeavesPerSector / sectorSubtreeLeaves + roots := make([]types.Hash256, n) + var wg sync.WaitGroup + for i := range roots { + wg.Add(1) + go func(i int) { + defer wg.Done() + var sa sectorAccumulator + sa.appendLeaves(sector[i*per:][:per]) + roots[i] = sa.root() + }(i) + } + wg.Wait() + return roots +} -// NewRangeProofVerifier returns a RangeProofVerifier for the sector range -// [start, end). -func NewRangeProofVerifier(start, end uint64) *RangeProofVerifier { - return rhp2.NewRangeProofVerifier(start, end) +// BuildSectorProof constructs a proof for the segment range [start, end). +// +// `segment` must contain a 64-leaf-aligned segment of the +// sector data containing all leaves in the range [start, end). +// +// `cache` must contain the 64-leaf subtree roots for the sector. +func BuildSectorProof(segment []byte, start, end uint64, cache []types.Hash256) []types.Hash256 { + switch { + case end > LeavesPerSector: + panic("end exceeds number of leaves") + case start > end: + panic("start exceeds end") + case start == end: + panic("start equals end") + case len(cache) != LeavesPerSector/sectorSubtreeLeaves: + panic("cache has incorrect size") + } + + segmentStart, segmentEnd := SectorSubtreeRange(start, end) + + if uint64(len(segment)) != (segmentEnd-segmentStart)*LeafSize { + panic("segment has incorrect size") + } + + // hash any subtree fully inside segment + var s sectorAccumulator + subtreeRoot := func(i, j uint64) types.Hash256 { + s.reset() + s.appendLeaves(segment[(i-segmentStart)*LeafSize : (j-segmentStart)*LeafSize]) + return s.root() + } + + // supply a root from cache when the subtree is aligned to 64-leaf chunks + precalc := func(i, j uint64) types.Hash256 { + if i >= segmentStart && j <= segmentEnd { + return subtreeRoot(i, j) + } + // use cached roots for aligned ranges + if i%sectorSubtreeLeaves == 0 && j%sectorSubtreeLeaves == 0 { + return MetaRoot(cache[i/sectorSubtreeLeaves : j/sectorSubtreeLeaves]) + } + panic(fmt.Errorf("no precalculated root for subtree [%d, %d)", i, j)) + } + + // we build the proof by recursively enumerating subtrees, left to right. + // If a subtree is inside the segment range, we can skip it (because the + // verifier has the segments); otherwise, we use the precalculated root for + // the subtree. If a subtree partially overlaps the segment range, we split + // it and recurse. + proof := make([]types.Hash256, 0, sectorProofSize(LeavesPerSector, start)) + var rec func(uint64, uint64) + rec = func(i, j uint64) { + if i >= start && j <= end { + // this subtree contains only data segments; skip it + return + } else if j <= start || i >= end { + proof = append(proof, precalc(i, j)) + return + } + // this subtree partially overlaps the data segments; split it + // into two subtrees and recurse on each + mid := (i + j) / 2 + rec(i, mid) + rec(mid, j) + } + rec(0, LeavesPerSector) + return proof } // VerifyLeafProof verifies the Merkle proof for a given leaf within a sector. diff --git a/rhp/v4/merkle_test.go b/rhp/v4/merkle_test.go index 4ac3b81e..76af5fb0 100644 --- a/rhp/v4/merkle_test.go +++ b/rhp/v4/merkle_test.go @@ -3,6 +3,7 @@ package rhp import ( "bytes" "math/bits" + "slices" "testing" "go.sia.tech/core/types" @@ -86,6 +87,52 @@ func TestPartialReadSectorRoot(t *testing.T) { } } +func TestBuildSectorProof(t *testing.T) { + var sector [SectorSize]byte + frand.Read(sector[:]) + root := SectorRoot(§or) + + subtrees := CachedSectorSubtrees(§or) + + randomRange := func() [2]int { + start := frand.Intn(LeavesPerSector - 1) + end := frand.Intn(LeavesPerSector-start) + start + 1 + return [2]int{start, end} + } + + tests := [][2]int{ + {0, 1}, + {1, 2}, + {0, 64}, + {66, 67}, + {0, 130}, + {130, 194}, + {0, 129}, + {0, LeavesPerSector / 2}, + {LeavesPerSector - 1, LeavesPerSector}, + {LeavesPerSector/2 - 1, LeavesPerSector}, + {LeavesPerSector / 2, LeavesPerSector}, + {LeavesPerSector / 4, 3 * LeavesPerSector / 4}, + } + for range 100 { + tests = append(tests, randomRange()) + } + + for _, test := range tests { + start, end := uint64(test[0]), uint64(test[1]) + subtreeStart, subtreeEnd := SectorSubtreeRange(start, end) + segment := slices.Clone(sector[subtreeStart*LeafSize : subtreeEnd*LeafSize]) + proof := BuildSectorProof(segment, start, end, subtrees) + + rpv := NewRangeProofVerifier(start, end) + if _, err := rpv.ReadFrom(bytes.NewReader(sector[start*LeafSize : end*LeafSize])); err != nil { + t.Fatal(err) + } else if !rpv.Verify(proof, root) { + t.Fatalf("invalid proof for range [%d, %d)", start, end) + } + } +} + func BenchmarkSectorRoot(b *testing.B) { b.ReportAllocs() var sector [SectorSize]byte