diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index dd722d9ce0f11..31c340d458038 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -465,8 +465,11 @@ org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat, org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat, org.elasticsearch.index.codec.vectors.diskbbq.next.ESNextDiskBBQVectorsFormat, - org.elasticsearch.index.codec.vectors.es93.ES93BinaryQuantizedVectorsFormat, + org.elasticsearch.index.codec.vectors.es93.ES93FlatVectorFormat, org.elasticsearch.index.codec.vectors.es93.ES93HnswVectorsFormat, + org.elasticsearch.index.codec.vectors.es93.ES93ScalarQuantizedVectorsFormat, + org.elasticsearch.index.codec.vectors.es93.ES93HnswScalarQuantizedVectorsFormat, + org.elasticsearch.index.codec.vectors.es93.ES93BinaryQuantizedVectorsFormat, org.elasticsearch.index.codec.vectors.es93.ES93HnswBinaryQuantizedVectorsFormat; provides org.apache.lucene.codecs.Codec diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatVectorFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatVectorFormat.java new file mode 100644 index 0000000000000..bdad21596d479 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatVectorFormat.java @@ -0,0 +1,125 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.es93; + +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.KnnVectorsWriter; +import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; +import org.apache.lucene.codecs.hnsw.FlatVectorsReader; +import org.apache.lucene.index.ByteVectorValues; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FloatVectorValues; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.search.AcceptDocs; +import org.apache.lucene.search.KnnCollector; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector; +import org.apache.lucene.util.hnsw.RandomVectorScorer; + +import java.io.IOException; +import java.util.Map; + +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; + +public class ES93FlatVectorFormat extends KnnVectorsFormat { + + static final String NAME = "ES93FlatVectorFormat"; + + private final FlatVectorsFormat format; + + /** + * Sole constructor + */ + public ES93FlatVectorFormat() { + super(NAME); + format = new ES93GenericFlatVectorsFormat(); + } + + public ES93FlatVectorFormat(ES93GenericFlatVectorsFormat.ElementType elementType) { + super(NAME); + assert elementType != ES93GenericFlatVectorsFormat.ElementType.BIT : "ES815BitFlatVectorFormat should be used for bits"; + format = new ES93GenericFlatVectorsFormat(elementType, false); + } + + @Override + public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { + return format.fieldsWriter(state); + } + + @Override + public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { + return new ES93FlatVectorReader(format.fieldsReader(state)); + } + + @Override + public int getMaxDimensions(String fieldName) { + return MAX_DIMS_COUNT; + } + + static class ES93FlatVectorReader extends KnnVectorsReader { + + private final FlatVectorsReader reader; + + ES93FlatVectorReader(FlatVectorsReader reader) { + super(); + this.reader = reader; + } + + @Override + public void checkIntegrity() throws IOException { + reader.checkIntegrity(); + } + + @Override + public FloatVectorValues getFloatVectorValues(String field) throws IOException { + return reader.getFloatVectorValues(field); + } + + @Override + public ByteVectorValues getByteVectorValues(String field) throws IOException { + return reader.getByteVectorValues(field); + } + + @Override + public void search(String field, float[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException { + collectAllMatchingDocs(knnCollector, acceptDocs, reader.getRandomVectorScorer(field, target)); + } + + private void collectAllMatchingDocs(KnnCollector knnCollector, AcceptDocs acceptDocs, RandomVectorScorer scorer) + throws IOException { + OrdinalTranslatedKnnCollector collector = new OrdinalTranslatedKnnCollector(knnCollector, scorer::ordToDoc); + Bits acceptedOrds = scorer.getAcceptOrds(acceptDocs.bits()); + for (int i = 0; i < scorer.maxOrd(); i++) { + if (acceptedOrds == null || acceptedOrds.get(i)) { + collector.collect(i, scorer.score(i)); + collector.incVisitedCount(1); + } + } + assert collector.earlyTerminated() == false; + } + + @Override + public void search(String field, byte[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException { + collectAllMatchingDocs(knnCollector, acceptDocs, reader.getRandomVectorScorer(field, target)); + } + + @Override + public Map getOffHeapByteSize(FieldInfo fieldInfo) { + return reader.getOffHeapByteSize(fieldInfo); + } + + @Override + public void close() throws IOException { + reader.close(); + } + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java new file mode 100644 index 0000000000000..4f47b82c3b5a6 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormat.java @@ -0,0 +1,97 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.es93; + +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.KnnVectorsWriter; +import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil; +import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsReader; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsWriter; +import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader; +import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsWriter; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; +import org.elasticsearch.index.codec.vectors.AbstractHnswVectorsFormat; + +import java.io.IOException; +import java.util.concurrent.ExecutorService; + +public class ES93HnswScalarQuantizedVectorsFormat extends AbstractHnswVectorsFormat { + + static final String NAME = "ES93HnswScalarQuantizedVectorsFormat"; + + static final Lucene104ScalarQuantizedVectorScorer flatVectorScorer = new Lucene104ScalarQuantizedVectorScorer( + FlatVectorScorerUtil.getLucene99FlatVectorsScorer() + ); + + private final Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding encoding; + private final FlatVectorsFormat rawVectorFormat; + + public ES93HnswScalarQuantizedVectorsFormat() { + super(NAME); + this.encoding = Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT; + this.rawVectorFormat = new ES93GenericFlatVectorsFormat(ES93GenericFlatVectorsFormat.ElementType.STANDARD, false); + } + + public ES93HnswScalarQuantizedVectorsFormat( + int maxConn, + int beamWidth, + Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding encoding, + ES93GenericFlatVectorsFormat.ElementType elementType, + boolean useDirectIO + ) { + super(NAME, maxConn, beamWidth); + this.encoding = encoding; + this.rawVectorFormat = new ES93GenericFlatVectorsFormat(elementType, useDirectIO); + } + + public ES93HnswScalarQuantizedVectorsFormat( + int maxConn, + int beamWidth, + Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding encoding, + ES93GenericFlatVectorsFormat.ElementType elementType, + boolean useDirectIO, + int numMergeWorkers, + ExecutorService mergeExec + ) { + super(NAME, maxConn, beamWidth, numMergeWorkers, mergeExec); + this.encoding = encoding; + this.rawVectorFormat = new ES93GenericFlatVectorsFormat(elementType, useDirectIO); + } + + @Override + protected FlatVectorsFormat flatVectorsFormat() { + return rawVectorFormat; + } + + @Override + public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { + return new Lucene99HnswVectorsWriter( + state, + maxConn, + beamWidth, + new Lucene104ScalarQuantizedVectorsWriter(state, encoding, rawVectorFormat.fieldsWriter(state), flatVectorScorer), + numMergeWorkers, + mergeExec, + 0 + ); + } + + @Override + public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { + return new Lucene99HnswVectorsReader( + state, + new Lucene104ScalarQuantizedVectorsReader(state, rawVectorFormat.fieldsReader(state), flatVectorScorer) + ); + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java new file mode 100644 index 0000000000000..075c1728f1029 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormat.java @@ -0,0 +1,85 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.es93; + +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.KnnVectorsWriter; +import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil; +import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsReader; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsWriter; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; + +import java.io.IOException; + +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; + +public class ES93ScalarQuantizedVectorsFormat extends KnnVectorsFormat { + + static final String NAME = "ES93ScalarQuantizedVectorsFormat"; + + static final Lucene104ScalarQuantizedVectorScorer flatVectorScorer = new Lucene104ScalarQuantizedVectorScorer( + FlatVectorScorerUtil.getLucene99FlatVectorsScorer() + ); + + private final Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding encoding; + private final FlatVectorsFormat rawVectorFormat; + + public ES93ScalarQuantizedVectorsFormat() { + this(ES93GenericFlatVectorsFormat.ElementType.STANDARD, Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT); + } + + public ES93ScalarQuantizedVectorsFormat(ES93GenericFlatVectorsFormat.ElementType elementType) { + this(elementType, Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT); + } + + public ES93ScalarQuantizedVectorsFormat( + ES93GenericFlatVectorsFormat.ElementType elementType, + Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding encoding + ) { + super(NAME); + assert elementType != ES93GenericFlatVectorsFormat.ElementType.BIT : "BIT should not be used with scalar quantization"; + this.encoding = encoding; + this.rawVectorFormat = new ES93GenericFlatVectorsFormat(elementType, false); + } + + @Override + public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { + return new Lucene104ScalarQuantizedVectorsWriter(state, encoding, rawVectorFormat.fieldsWriter(state), flatVectorScorer); + } + + @Override + public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { + return new Lucene104ScalarQuantizedVectorsReader(state, rawVectorFormat.fieldsReader(state), flatVectorScorer); + } + + @Override + public int getMaxDimensions(String fieldName) { + return MAX_DIMS_COUNT; + } + + @Override + public String toString() { + return NAME + + "(name=" + + NAME + + ", encoding=" + + encoding + + ", flatVectorScorer=" + + flatVectorScorer + + ", rawVectorFormat=" + + rawVectorFormat + + ")"; + } +} diff --git a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat index 5370d7244df9b..0dc34ea2e808d 100644 --- a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat +++ b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat @@ -9,6 +9,9 @@ org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat org.elasticsearch.index.codec.vectors.diskbbq.next.ESNextDiskBBQVectorsFormat -org.elasticsearch.index.codec.vectors.es93.ES93BinaryQuantizedVectorsFormat +org.elasticsearch.index.codec.vectors.es93.ES93FlatVectorFormat org.elasticsearch.index.codec.vectors.es93.ES93HnswVectorsFormat +org.elasticsearch.index.codec.vectors.es93.ES93ScalarQuantizedVectorsFormat +org.elasticsearch.index.codec.vectors.es93.ES93HnswScalarQuantizedVectorsFormat +org.elasticsearch.index.codec.vectors.es93.ES93BinaryQuantizedVectorsFormat org.elasticsearch.index.codec.vectors.es93.ES93HnswBinaryQuantizedVectorsFormat diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatBFloat16VectorFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatBFloat16VectorFormatTests.java new file mode 100644 index 0000000000000..91d4054ae94ed --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatBFloat16VectorFormatTests.java @@ -0,0 +1,73 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.es93; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.KnnFloatVectorField; +import org.apache.lucene.index.CodecReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.util.TestUtil; +import org.elasticsearch.common.logging.LogConfigurator; +import org.elasticsearch.index.codec.vectors.BFloat16; +import org.elasticsearch.index.codec.vectors.BaseBFloat16KnnVectorsFormatTestCase; +import org.junit.AssumptionViolatedException; + +import java.io.IOException; + +import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT; +import static org.hamcrest.Matchers.aMapWithSize; +import static org.hamcrest.Matchers.hasEntry; + +public class ES93FlatBFloat16VectorFormatTests extends BaseBFloat16KnnVectorsFormatTestCase { + + static { + LogConfigurator.loadLog4jPlugins(); + LogConfigurator.configureESLogging(); // native access requires logging to be initialized + } + + @Override + protected Codec getCodec() { + return TestUtil.alwaysKnnVectorsFormat(new ES93FlatVectorFormat(ES93GenericFlatVectorsFormat.ElementType.BFLOAT16)); + } + + public void testSearchWithVisitedLimit() { + throw new AssumptionViolatedException("requires graph-based vector codec"); + } + + public void testSimpleOffHeapSize() throws IOException { + float[] vector = randomVector(random().nextInt(12, 500)); + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { + Document doc = new Document(); + doc.add(new KnnFloatVectorField("f", vector, DOT_PRODUCT)); + w.addDocument(doc); + w.commit(); + try (IndexReader reader = DirectoryReader.open(w)) { + LeafReader r = getOnlyLeafReader(reader); + if (r instanceof CodecReader codecReader) { + KnnVectorsReader knnVectorsReader = codecReader.getVectorReader(); + if (knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader fieldsReader) { + knnVectorsReader = fieldsReader.getFieldReader("f"); + } + var fieldInfo = r.getFieldInfos().fieldInfo("f"); + var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); + assertThat(offHeap, aMapWithSize(1)); + assertThat(offHeap, hasEntry("vec", (long) vector.length * BFloat16.BYTES)); + } + } + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatVectorFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatVectorFormatTests.java new file mode 100644 index 0000000000000..1ada03a70bed6 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93FlatVectorFormatTests.java @@ -0,0 +1,72 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.es93; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.KnnFloatVectorField; +import org.apache.lucene.index.CodecReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; +import org.apache.lucene.tests.util.TestUtil; +import org.elasticsearch.common.logging.LogConfigurator; +import org.junit.AssumptionViolatedException; + +import java.io.IOException; + +import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT; +import static org.hamcrest.Matchers.aMapWithSize; +import static org.hamcrest.Matchers.hasEntry; + +public class ES93FlatVectorFormatTests extends BaseKnnVectorsFormatTestCase { + + static { + LogConfigurator.loadLog4jPlugins(); + LogConfigurator.configureESLogging(); // native access requires logging to be initialized + } + + @Override + protected Codec getCodec() { + return TestUtil.alwaysKnnVectorsFormat(new ES93FlatVectorFormat(ES93GenericFlatVectorsFormat.ElementType.STANDARD)); + } + + public void testSearchWithVisitedLimit() { + throw new AssumptionViolatedException("requires graph-based vector codec"); + } + + public void testSimpleOffHeapSize() throws IOException { + float[] vector = randomVector(random().nextInt(12, 500)); + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { + Document doc = new Document(); + doc.add(new KnnFloatVectorField("f", vector, DOT_PRODUCT)); + w.addDocument(doc); + w.commit(); + try (IndexReader reader = DirectoryReader.open(w)) { + LeafReader r = getOnlyLeafReader(reader); + if (r instanceof CodecReader codecReader) { + KnnVectorsReader knnVectorsReader = codecReader.getVectorReader(); + if (knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader fieldsReader) { + knnVectorsReader = fieldsReader.getFieldReader("f"); + } + var fieldInfo = r.getFieldInfos().fieldInfo("f"); + var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); + assertThat(offHeap, aMapWithSize(1)); + assertThat(offHeap, hasEntry("vec", (long) vector.length * Float.BYTES)); + } + } + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedBFloat16VectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedBFloat16VectorsFormatTests.java new file mode 100644 index 0000000000000..a1bda3e4b2342 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedBFloat16VectorsFormatTests.java @@ -0,0 +1,82 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.es93; + +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat; +import org.apache.lucene.store.Directory; +import org.elasticsearch.index.codec.vectors.BFloat16; +import org.elasticsearch.index.codec.vectors.BaseHnswBFloat16VectorsFormatTestCase; + +import java.io.IOException; +import java.util.concurrent.ExecutorService; + +import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH; +import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN; +import static org.hamcrest.Matchers.aMapWithSize; +import static org.hamcrest.Matchers.allOf; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.hasEntry; + +public class ES93HnswScalarQuantizedBFloat16VectorsFormatTests extends BaseHnswBFloat16VectorsFormatTestCase { + + @Override + protected KnnVectorsFormat createFormat() { + return new ES93HnswScalarQuantizedVectorsFormat( + DEFAULT_MAX_CONN, + DEFAULT_BEAM_WIDTH, + Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT, + ES93GenericFlatVectorsFormat.ElementType.BFLOAT16, + random().nextBoolean() + ); + } + + @Override + protected KnnVectorsFormat createFormat(int maxConn, int beamWidth) { + return new ES93HnswScalarQuantizedVectorsFormat( + maxConn, + beamWidth, + Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT, + ES93GenericFlatVectorsFormat.ElementType.BFLOAT16, + random().nextBoolean() + ); + } + + @Override + protected KnnVectorsFormat createFormat(int maxConn, int beamWidth, int numMergeWorkers, ExecutorService service) { + return new ES93HnswScalarQuantizedVectorsFormat( + maxConn, + beamWidth, + Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT, + ES93GenericFlatVectorsFormat.ElementType.BFLOAT16, + random().nextBoolean(), + numMergeWorkers, + service + ); + } + + public void testSimpleOffHeapSize() throws IOException { + float[] vector = randomVector(random().nextInt(12, 500)); + try (Directory dir = newDirectory()) { + testSimpleOffHeapSize( + dir, + newIndexWriterConfig(), + vector, + allOf( + aMapWithSize(3), + hasEntry("vec", (long) vector.length * BFloat16.BYTES), + hasEntry("vex", 1L), + hasEntry(equalTo("veq"), greaterThan(0L)) + ) + ); + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormatTests.java new file mode 100644 index 0000000000000..c2bf9e6352f15 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93HnswScalarQuantizedVectorsFormatTests.java @@ -0,0 +1,81 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.es93; + +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat; +import org.apache.lucene.store.Directory; +import org.elasticsearch.index.codec.vectors.BaseHnswVectorsFormatTestCase; + +import java.io.IOException; +import java.util.concurrent.ExecutorService; + +import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH; +import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN; +import static org.hamcrest.Matchers.aMapWithSize; +import static org.hamcrest.Matchers.allOf; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.hasEntry; + +public class ES93HnswScalarQuantizedVectorsFormatTests extends BaseHnswVectorsFormatTestCase { + + @Override + protected KnnVectorsFormat createFormat() { + return new ES93HnswScalarQuantizedVectorsFormat( + DEFAULT_MAX_CONN, + DEFAULT_BEAM_WIDTH, + Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT, + ES93GenericFlatVectorsFormat.ElementType.STANDARD, + random().nextBoolean() + ); + } + + @Override + protected KnnVectorsFormat createFormat(int maxConn, int beamWidth) { + return new ES93HnswScalarQuantizedVectorsFormat( + maxConn, + beamWidth, + Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT, + ES93GenericFlatVectorsFormat.ElementType.STANDARD, + random().nextBoolean() + ); + } + + @Override + protected KnnVectorsFormat createFormat(int maxConn, int beamWidth, int numMergeWorkers, ExecutorService service) { + return new ES93HnswScalarQuantizedVectorsFormat( + maxConn, + beamWidth, + Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT, + ES93GenericFlatVectorsFormat.ElementType.STANDARD, + random().nextBoolean(), + numMergeWorkers, + service + ); + } + + public void testSimpleOffHeapSize() throws IOException { + float[] vector = randomVector(random().nextInt(12, 500)); + try (Directory dir = newDirectory()) { + testSimpleOffHeapSize( + dir, + newIndexWriterConfig(), + vector, + allOf( + aMapWithSize(3), + hasEntry("vec", (long) vector.length * Float.BYTES), + hasEntry("vex", 1L), + hasEntry(equalTo("veq"), greaterThan(0L)) + ) + ); + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedBFloat16VectorFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedBFloat16VectorFormatTests.java new file mode 100644 index 0000000000000..57578097f2db4 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedBFloat16VectorFormatTests.java @@ -0,0 +1,85 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.es93; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.KnnFloatVectorField; +import org.apache.lucene.index.CodecReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.util.TestUtil; +import org.elasticsearch.common.logging.LogConfigurator; +import org.elasticsearch.index.codec.vectors.BFloat16; +import org.elasticsearch.index.codec.vectors.BaseBFloat16KnnVectorsFormatTestCase; +import org.junit.AssumptionViolatedException; + +import java.io.IOException; + +import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT; +import static org.hamcrest.Matchers.aMapWithSize; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.hasEntry; + +public class ES93ScalarQuantizedBFloat16VectorFormatTests extends BaseBFloat16KnnVectorsFormatTestCase { + + static { + LogConfigurator.loadLog4jPlugins(); + LogConfigurator.configureESLogging(); // native access requires logging to be initialized + } + + private KnnVectorsFormat format; + + @Override + public void setUp() throws Exception { + format = new ES93ScalarQuantizedVectorsFormat(ES93GenericFlatVectorsFormat.ElementType.BFLOAT16); + super.setUp(); + } + + @Override + protected Codec getCodec() { + return TestUtil.alwaysKnnVectorsFormat(format); + } + + public void testSearchWithVisitedLimit() { + throw new AssumptionViolatedException("requires graph vector codec"); + } + + public void testSimpleOffHeapSize() throws IOException { + float[] vector = randomVector(random().nextInt(12, 500)); + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { + Document doc = new Document(); + doc.add(new KnnFloatVectorField("f", vector, DOT_PRODUCT)); + w.addDocument(doc); + w.commit(); + try (IndexReader reader = DirectoryReader.open(w)) { + LeafReader r = getOnlyLeafReader(reader); + if (r instanceof CodecReader codecReader) { + KnnVectorsReader knnVectorsReader = codecReader.getVectorReader(); + if (knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader fieldsReader) { + knnVectorsReader = fieldsReader.getFieldReader("f"); + } + var fieldInfo = r.getFieldInfos().fieldInfo("f"); + var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); + assertThat(offHeap, aMapWithSize(2)); + assertThat(offHeap, hasEntry("vec", (long) vector.length * BFloat16.BYTES)); + assertThat(offHeap, hasEntry(equalTo("veq"), greaterThan(0L))); + } + } + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormatTests.java new file mode 100644 index 0000000000000..a880852378d61 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es93/ES93ScalarQuantizedVectorsFormatTests.java @@ -0,0 +1,75 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.es93; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.KnnFloatVectorField; +import org.apache.lucene.index.CodecReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; +import org.apache.lucene.tests.util.TestUtil; +import org.elasticsearch.common.logging.LogConfigurator; +import org.junit.AssumptionViolatedException; + +import java.io.IOException; + +import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT; +import static org.hamcrest.Matchers.aMapWithSize; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.hasEntry; + +public class ES93ScalarQuantizedVectorsFormatTests extends BaseKnnVectorsFormatTestCase { + + static { + LogConfigurator.loadLog4jPlugins(); + LogConfigurator.configureESLogging(); // native access requires logging to be initialized + } + + @Override + protected Codec getCodec() { + return TestUtil.alwaysKnnVectorsFormat(new ES93ScalarQuantizedVectorsFormat(ES93GenericFlatVectorsFormat.ElementType.STANDARD)); + } + + public void testSearchWithVisitedLimit() { + throw new AssumptionViolatedException("requires graph vector codec"); + } + + public void testSimpleOffHeapSize() throws IOException { + float[] vector = randomVector(random().nextInt(12, 500)); + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { + Document doc = new Document(); + doc.add(new KnnFloatVectorField("f", vector, DOT_PRODUCT)); + w.addDocument(doc); + w.commit(); + try (IndexReader reader = DirectoryReader.open(w)) { + LeafReader r = getOnlyLeafReader(reader); + if (r instanceof CodecReader codecReader) { + KnnVectorsReader knnVectorsReader = codecReader.getVectorReader(); + if (knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader fieldsReader) { + knnVectorsReader = fieldsReader.getFieldReader("f"); + } + var fieldInfo = r.getFieldInfos().fieldInfo("f"); + var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); + assertThat(offHeap, aMapWithSize(2)); + assertThat(offHeap, hasEntry("vec", (long) vector.length * Float.BYTES)); + assertThat(offHeap, hasEntry(equalTo("veq"), greaterThan(0L))); + } + } + } + } +}