Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ Optimizations

* GITHUB#15001: Remove full integrity check from SortingStoredFieldsConsumer (Martijn van Groningen)

* GITHUB#14980: Add bulk off-heap scoring for float32 vectors (Chris Hegarty)
* GITHUB#14980, GITHUB#15037: Add bulk off-heap scoring for float32 vectors (Chris Hegarty)

* GITHUB#15004: Wraps all iterator with likelyImpactsEnum under BlockMaxConjunctionBulkScorer. (Ge Song)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@
*/
package org.apache.lucene.benchmark.jmh;

import static org.apache.lucene.index.VectorSimilarityFunction.COSINE;
import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT;
import static org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN;
import static org.apache.lucene.index.VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT;

import java.io.IOException;
import java.nio.ByteBuffer;
Expand Down Expand Up @@ -75,6 +78,11 @@
"-XX:+AlwaysPreTouch",
"--add-modules=jdk.incubator.vector"
})
/**
* Benchmark to compare the performance of float32 vector scoring using the default and optimized
* scorers. While there are benchmark methods for each of the similarities, it is often most useful
* to compare equivalent subsets, e.g. .*dot.*
*/
public class VectorScorerFloat32Benchmark {

@Param({"1024"})
Expand All @@ -92,8 +100,8 @@ public class VectorScorerFloat32Benchmark {
Directory dir;
IndexInput in;
KnnVectorValues values;
UpdateableRandomVectorScorer defDotScorer;
UpdateableRandomVectorScorer optDotScorer;
UpdateableRandomVectorScorer defDotScorer, defCosScorer, defEucScorer, defMipScorer;
UpdateableRandomVectorScorer optDotScorer, optCosScorer, optEucScorer, optMipScorer;

@Setup(Level.Trial)
public void setup() throws IOException {
Expand Down Expand Up @@ -121,12 +129,24 @@ public void perIterationInit() throws IOException {
values = vectorValues(size, numVectors, in, DOT_PRODUCT);
var def = DefaultFlatVectorScorer.INSTANCE;
defDotScorer = def.getRandomVectorScorerSupplier(DOT_PRODUCT, values.copy()).scorer();
defCosScorer = def.getRandomVectorScorerSupplier(COSINE, values.copy()).scorer();
defEucScorer = def.getRandomVectorScorerSupplier(EUCLIDEAN, values.copy()).scorer();
defMipScorer = def.getRandomVectorScorerSupplier(MAXIMUM_INNER_PRODUCT, values.copy()).scorer();
defDotScorer.setScoringOrdinal(targetOrd);
defCosScorer.setScoringOrdinal(targetOrd);
defEucScorer.setScoringOrdinal(targetOrd);
defMipScorer.setScoringOrdinal(targetOrd);

// optimized scorer
var opt = FlatVectorScorerUtil.getLucene99FlatVectorsScorer();
optDotScorer = opt.getRandomVectorScorerSupplier(DOT_PRODUCT, values.copy()).scorer();
optCosScorer = opt.getRandomVectorScorerSupplier(COSINE, values.copy()).scorer();
optEucScorer = opt.getRandomVectorScorerSupplier(EUCLIDEAN, values.copy()).scorer();
optMipScorer = opt.getRandomVectorScorerSupplier(MAXIMUM_INNER_PRODUCT, values.copy()).scorer();
optDotScorer.setScoringOrdinal(targetOrd);
optCosScorer.setScoringOrdinal(targetOrd);
optEucScorer.setScoringOrdinal(targetOrd);
optMipScorer.setScoringOrdinal(targetOrd);

List<Integer> list = IntStream.range(0, numVectors).boxed().collect(Collectors.toList());
Collections.shuffle(list, random);
Expand All @@ -146,17 +166,30 @@ public void teardown() throws IOException {
}

public void pollute(Random random) throws IOException {
// exercise various similarities to ensure they don't have negative effects, e.g.,
// type pollution on virtual calls, etc.
float[] vec = randomVector(size, random);
var opt = FlatVectorScorerUtil.getLucene99FlatVectorsScorer();
var scorer = opt.getRandomVectorScorer(DOT_PRODUCT, values.copy(), vec);

for (int i = 0; i < 2; i++) {
dotProductOptScorer();
dotProductOptBulkScore();
cosineOptScorer();
cosineDefaultBulk();
euclideanOptScorer();
euclideanOptBulkScore();
mipOptScorer();
mipOptBulkScore();
for (int v = 0; v < numVectorsToScore; v++) {
scores[v] = scorer.score(indices[v]);
}
scorer.bulkScore(indices, scores, indices.length);
}
}

// -- dot product

@Benchmark
public float[] dotProductDefault() throws IOException {
for (int v = 0; v < numVectorsToScore; v++) {
Expand Down Expand Up @@ -185,6 +218,96 @@ public float[] dotProductOptBulkScore() throws IOException {
return scores;
}

// -- euclidean

@Benchmark
public float[] euclideanDefault() throws IOException {
for (int v = 0; v < numVectorsToScore; v++) {
scores[v] = defEucScorer.score(indices[v]);
}
return scores;
}

@Benchmark
public float[] euclideanDefaultBulk() throws IOException {
defEucScorer.bulkScore(indices, scores, indices.length);
return scores;
}

@Benchmark
public float[] euclideanOptScorer() throws IOException {
for (int v = 0; v < numVectorsToScore; v++) {
scores[v] = optEucScorer.score(indices[v]);
}
return scores;
}

@Benchmark
public float[] euclideanOptBulkScore() throws IOException {
optEucScorer.bulkScore(indices, scores, indices.length);
return scores;
}

// -- euclidean

@Benchmark
public float[] cosineDefault() throws IOException {
for (int v = 0; v < numVectorsToScore; v++) {
scores[v] = defCosScorer.score(indices[v]);
}
return scores;
}

@Benchmark
public float[] cosineDefaultBulk() throws IOException {
defCosScorer.bulkScore(indices, scores, indices.length);
return scores;
}

@Benchmark
public float[] cosineOptScorer() throws IOException {
for (int v = 0; v < numVectorsToScore; v++) {
scores[v] = optCosScorer.score(indices[v]);
}
return scores;
}

@Benchmark
public float[] cosineOptBulkScore() throws IOException {
optCosScorer.bulkScore(indices, scores, indices.length);
return scores;
}

// -- max inner product

@Benchmark
public float[] mipDefault() throws IOException {
for (int v = 0; v < numVectorsToScore; v++) {
scores[v] = defMipScorer.score(indices[v]);
}
return scores;
}

@Benchmark
public float[] mipDefaultBulk() throws IOException {
defMipScorer.bulkScore(indices, scores, indices.length);
return scores;
}

@Benchmark
public float[] mipOptScorer() throws IOException {
for (int v = 0; v < numVectorsToScore; v++) {
scores[v] = optMipScorer.score(indices[v]);
}
return scores;
}

@Benchmark
public float[] mipOptBulkScore() throws IOException {
optMipScorer.bulkScore(indices, scores, indices.length);
return scores;
}

static float[] randomVector(int dims, Random random) {
float[] fa = new float[dims];
for (int i = 0; i < dims; ++i) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,67 @@ public void testDotProduct() throws IOException {
actualScores = ArrayUtil.copyArray(bench.scores);
assertArrayEquals(expectedScores, actualScores, delta);
}

public void testCosine() throws IOException {
Arrays.fill(bench.scores, 0.0f);
bench.cosineDefault();
var expectedScores = ArrayUtil.copyArray(bench.scores);

Arrays.fill(bench.scores, 0.0f);
bench.cosineDefaultBulk();
var bulkScores = ArrayUtil.copyArray(bench.scores);
assertArrayEquals(expectedScores, bulkScores, delta);

Arrays.fill(bench.scores, 0.0f);
bench.cosineOptScorer();
var actualScores = ArrayUtil.copyArray(bench.scores);
assertArrayEquals(expectedScores, actualScores, delta);

Arrays.fill(bench.scores, 0.0f);
bench.cosineOptBulkScore();
actualScores = ArrayUtil.copyArray(bench.scores);
assertArrayEquals(expectedScores, actualScores, delta);
}

public void testEuclidean() throws IOException {
Arrays.fill(bench.scores, 0.0f);
bench.euclideanDefault();
var expectedScores = ArrayUtil.copyArray(bench.scores);

Arrays.fill(bench.scores, 0.0f);
bench.euclideanDefaultBulk();
var bulkScores = ArrayUtil.copyArray(bench.scores);
assertArrayEquals(expectedScores, bulkScores, delta);

Arrays.fill(bench.scores, 0.0f);
bench.euclideanOptScorer();
var actualScores = ArrayUtil.copyArray(bench.scores);
assertArrayEquals(expectedScores, actualScores, delta);

Arrays.fill(bench.scores, 0.0f);
bench.euclideanOptBulkScore();
actualScores = ArrayUtil.copyArray(bench.scores);
assertArrayEquals(expectedScores, actualScores, delta);
}

public void testMip() throws IOException {
Arrays.fill(bench.scores, 0.0f);
bench.mipDefault();
var expectedScores = ArrayUtil.copyArray(bench.scores);

Arrays.fill(bench.scores, 0.0f);
bench.mipDefaultBulk();
var bulkScores = ArrayUtil.copyArray(bench.scores);
assertArrayEquals(expectedScores, bulkScores, delta);

Arrays.fill(bench.scores, 0.0f);
bench.mipOptScorer();
var actualScores = ArrayUtil.copyArray(bench.scores);
assertArrayEquals(expectedScores, actualScores, delta);

Arrays.fill(bench.scores, 0.0f);
bench.mipOptBulkScore();
actualScores = ArrayUtil.copyArray(bench.scores);
assertArrayEquals(expectedScores, actualScores, delta);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import static org.apache.lucene.util.VectorUtil.cosine;
import static org.apache.lucene.util.VectorUtil.dotProduct;
import static org.apache.lucene.util.VectorUtil.dotProductScore;
import static org.apache.lucene.util.VectorUtil.normalizeDistanceToUnitInterval;
import static org.apache.lucene.util.VectorUtil.normalizeToUnitInterval;
import static org.apache.lucene.util.VectorUtil.scaleMaxInnerProductScore;
import static org.apache.lucene.util.VectorUtil.squareDistance;
Expand All @@ -34,7 +35,7 @@ public enum VectorSimilarityFunction {
EUCLIDEAN {
@Override
public float compare(float[] v1, float[] v2) {
return 1 / (1 + squareDistance(v1, v2));
return normalizeDistanceToUnitInterval(squareDistance(v1, v2));
}

@Override
Expand Down
13 changes: 13 additions & 0 deletions lucene/core/src/java/org/apache/lucene/util/VectorUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,19 @@ public static float normalizeToUnitInterval(float value) {
return Math.max((1 + value) / 2, 0);
}

/**
* Maps a non-negative squared distance to a similarity score in the range (0, 1].
*
* <p>Uses the transformation: {@code similarity = 1 / (1 + squaredDistance)}. Smaller distances
* yield scores closer to 1; larger distances approach 0.
*
* @param squaredDistance squared Euclidean distance (must be ≥ 0)
* @return similarity score in (0, 1]
*/
public static float normalizeDistanceToUnitInterval(float squaredDistance) {
return 1.0f / (1.0f + squaredDistance);
}

/**
* Checks if a float vector only has finite components.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,13 @@ public RandomVectorScorerSupplier getRandomVectorScorerSupplier(

private RandomVectorScorerSupplier getFloatScoringSupplier(
FloatVectorValues vectorValues, VectorSimilarityFunction similarityType) throws IOException {
if (similarityType == VectorSimilarityFunction.DOT_PRODUCT) { // dot product for now
if (vectorValues instanceof HasIndexSlice sliceableValues
&& sliceableValues.getSlice() != null) {
var scorer =
Lucene99MemorySegmentFloatVectorScorerSupplier.create(
similarityType, sliceableValues.getSlice(), vectorValues);
if (scorer.isPresent()) {
return scorer.get();
}
if (vectorValues instanceof HasIndexSlice sliceableValues
&& sliceableValues.getSlice() != null) {
var scorer =
Lucene99MemorySegmentFloatVectorScorerSupplier.create(
similarityType, sliceableValues.getSlice(), vectorValues);
if (scorer.isPresent()) {
return scorer.get();
}
}
return delegate.getRandomVectorScorerSupplier(similarityType, vectorValues);
Expand Down Expand Up @@ -87,16 +85,14 @@ public RandomVectorScorer getRandomVectorScorer(
VectorSimilarityFunction similarityType, KnnVectorValues vectorValues, float[] target)
throws IOException {
checkDimensions(target.length, vectorValues.dimension());
if (similarityType == VectorSimilarityFunction.DOT_PRODUCT) { // just for now
if (vectorValues instanceof FloatVectorValues fvv
&& fvv instanceof HasIndexSlice floatVectorValues
&& floatVectorValues.getSlice() != null) {
var scorer =
Lucene99MemorySegmentFloatVectorScorer.create(
similarityType, floatVectorValues.getSlice(), fvv, target);
if (scorer.isPresent()) {
return scorer.get();
}
if (vectorValues instanceof FloatVectorValues fvv
&& fvv instanceof HasIndexSlice floatVectorValues
&& floatVectorValues.getSlice() != null) {
var scorer =
Lucene99MemorySegmentFloatVectorScorer.create(
similarityType, floatVectorValues.getSlice(), fvv, target);
if (scorer.isPresent()) {
return scorer.get();
}
}
return delegate.getRandomVectorScorer(similarityType, vectorValues, target);
Expand Down
Loading
Loading