From 0b322e4780db6bc131c631e41aa3b2aeea301091 Mon Sep 17 00:00:00 2001 From: leng25 Date: Sat, 28 Feb 2026 13:57:59 -0500 Subject: [PATCH 1/2] Improve prefix sum in Lucene99HnswVectorsReader --- lucene/CHANGES.txt | 2 ++ .../codecs/lucene99/Lucene99HnswVectorsReader.java | 12 +++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index f061ce8ddd7f..f6b879b86aaf 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -113,6 +113,8 @@ Improvements Optimizations --------------------- +* GITHUB#15024: Improve prefix sum computation in Lucene99HnswVectorsReader for faster neighbor decoding. (Luis Negrin) + * GITHUB#15681: Replace pre-sized array or empty array with lambda expression to call Collection#toArray. (Zhou Hui) * GITHUB#13782: Replace handwritten loops compare with Arrays.compareUnsigned in TermsEnum and TermsEnumFrame classes. (Zhou Hui) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java index e55e015652c6..fca6df43270e 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java @@ -560,16 +560,18 @@ public void seek(int level, int targetOrd) throws IOException { dataIn.seek(graphLevelNodeOffsets.get(targetIndex + graphLevelNodeIndexOffsets[level])); arcCount = dataIn.readVInt(); assert arcCount <= currentNeighborsBuffer.length : "too many neighbors: " + arcCount; + int sum = 0; if (arcCount > 0) { if (version >= VERSION_GROUPVARINT) { GroupVIntUtil.readGroupVInts(dataIn, currentNeighborsBuffer, arcCount); - for (int i = 1; i < arcCount; i++) { - currentNeighborsBuffer[i] = currentNeighborsBuffer[i - 1] + currentNeighborsBuffer[i]; + for (int i = 0; i < arcCount; i++) { + sum += currentNeighborsBuffer[i]; + currentNeighborsBuffer[i] = sum; } } else { - currentNeighborsBuffer[0] = dataIn.readVInt(); - for (int i = 1; i < arcCount; i++) { - currentNeighborsBuffer[i] = currentNeighborsBuffer[i - 1] + dataIn.readVInt(); + for (int i = 0; i < arcCount; i++) { + sum += dataIn.readVInt(); + currentNeighborsBuffer[i] = sum; } } } From b63e8b9b290d66c7c116ac31d4afebd84881dcbc Mon Sep 17 00:00:00 2001 From: leng25 Date: Sun, 8 Mar 2026 20:46:30 -0400 Subject: [PATCH 2/2] Move CHANGES.txt entry from 11.0.0 to 10.5.0 --- lucene/CHANGES.txt | 4 ++-- .../lucene/codecs/lucene99/Lucene99HnswVectorsReader.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 68681d8d388d..738034e68e79 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -113,8 +113,6 @@ Improvements Optimizations --------------------- -* GITHUB#15024: Improve prefix sum computation in Lucene99HnswVectorsReader for faster neighbor decoding. (Luis Negrin) - * GITHUB#15681: Replace pre-sized array or empty array with lambda expression to call Collection#toArray. (Zhou Hui) * GITHUB#13782: Replace handwritten loops compare with Arrays.compareUnsigned in TermsEnum and TermsEnumFrame classes. (Zhou Hui) @@ -241,6 +239,8 @@ Optimizations * GITHUB#15742: Optimize int4 dotProduct and squareDistance computations by replacing vector conversions with reinterpret casting + bit manipulation. (Trevor McCulloch, Kaival Parikh) +* GITHUB#15024: Improve prefix sum computation in Lucene99HnswVectorsReader for faster neighbor decoding. (Luis Negrin) + Bug Fixes --------------------- * GITHUB#15668: Fix UnsupportedOperationException in WeightedSpanTermExtractor by diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java index fca6df43270e..5a37ac0ad3b5 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java @@ -560,8 +560,8 @@ public void seek(int level, int targetOrd) throws IOException { dataIn.seek(graphLevelNodeOffsets.get(targetIndex + graphLevelNodeIndexOffsets[level])); arcCount = dataIn.readVInt(); assert arcCount <= currentNeighborsBuffer.length : "too many neighbors: " + arcCount; - int sum = 0; if (arcCount > 0) { + int sum = 0; if (version >= VERSION_GROUPVARINT) { GroupVIntUtil.readGroupVInts(dataIn, currentNeighborsBuffer, arcCount); for (int i = 0; i < arcCount; i++) {