diff --git a/.github/workflows/run-special-checks-sandbox.yml b/.github/workflows/run-special-checks-sandbox.yml
new file mode 100644
index 000000000000..b43bbb8cd34a
--- /dev/null
+++ b/.github/workflows/run-special-checks-sandbox.yml
@@ -0,0 +1,58 @@
+name: "Run special checks: module lucene/sandbox"
+
+on:
+ workflow_dispatch:
+
+ pull_request:
+ branches:
+ - '*'
+
+ push:
+ branches:
+ - 'main'
+ - 'branch_10x'
+
+jobs:
+ faiss-tests:
+ name: tests for the Faiss codec (v${{ matrix.faiss-version }} with JDK ${{ matrix.java }} on ${{ matrix.os }})
+ timeout-minutes: 15
+
+ strategy:
+ matrix:
+ os: [ ubuntu-latest ]
+ java: [ '21' ]
+ faiss-version: [ '1.11.0' ]
+
+ runs-on: ${{ matrix.os }}
+
+ steps:
+ - name: Install Mamba
+ uses: conda-incubator/setup-miniconda@835234971496cad1653abb28a638a281cf32541f #v3.2.0
+ with:
+ miniforge-version: 'latest'
+ auto-activate-base: 'false'
+ activate-environment: 'faiss-env'
+ # TODO: Use only conda-forge if possible, see https://github.com/conda-forge/faiss-split-feedstock/pull/88
+ channels: 'pytorch,conda-forge'
+ conda-remove-defaults: 'true'
+
+ - name: Install Faiss
+ run: mamba install faiss-cpu=${{ matrix.faiss-version }}
+
+ - name: Checkout Lucene
+ uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+ - name: Prepare Lucene workspace
+ uses: ./.github/actions/prepare-for-build
+
+ - name: Run tests for Faiss codec
+ run: >
+ LD_LIBRARY_PATH=$CONDA_PREFIX/lib
+ ./gradlew -p lucene/sandbox
+ -Dtests.faiss.run=true
+ test
+ --tests "org.apache.lucene.sandbox.codecs.faiss.*"
+
+ defaults:
+ run:
+ shell: bash -leo pipefail {0}
diff --git a/gradle/generation/extract-jdk-apis.gradle b/gradle/generation/extract-jdk-apis.gradle
index 3adde87da838..dd7e9babd93a 100644
--- a/gradle/generation/extract-jdk-apis.gradle
+++ b/gradle/generation/extract-jdk-apis.gradle
@@ -17,7 +17,10 @@
def resources = scriptResources(buildscript)
-configure(project(":lucene:core")) {
+configure([
+ project(":lucene:core"),
+ project(":lucene:sandbox"),
+]) {
ext {
apijars = layout.projectDirectory.dir("src/generated/jdk")
mrjarJavaVersions = [ 21 ]
diff --git a/gradle/generation/regenerate.gradle b/gradle/generation/regenerate.gradle
index d23cfd7d54f0..8edaabc77d80 100644
--- a/gradle/generation/regenerate.gradle
+++ b/gradle/generation/regenerate.gradle
@@ -91,6 +91,7 @@ configure([
project(":lucene:queryparser"),
project(":lucene:expressions"),
project(":lucene:test-framework"),
+ project(":lucene:sandbox"),
]) {
task regenerate() {
description "Rerun any code or static data generation tasks."
diff --git a/gradle/java/core-mrjar.gradle b/gradle/java/core-mrjar.gradle
index 2bd5de51971d..5e9032ffdd3f 100644
--- a/gradle/java/core-mrjar.gradle
+++ b/gradle/java/core-mrjar.gradle
@@ -17,7 +17,10 @@
// Produce an MR-JAR with Java 19+ foreign and vector implementations
-configure(project(":lucene:core")) {
+configure([
+ project(":lucene:core"),
+ project(":lucene:sandbox"),
+]) {
plugins.withType(JavaPlugin) {
mrjarJavaVersions.each { jdkVersion ->
sourceSets.create("main${jdkVersion}") {
@@ -27,6 +30,7 @@ configure(project(":lucene:core")) {
}
configurations["main${jdkVersion}Implementation"].extendsFrom(configurations['implementation'])
dependencies.add("main${jdkVersion}Implementation", sourceSets.main.output)
+ dependencies.add("testImplementation", sourceSets["main${jdkVersion}"].output)
tasks.named("compileMain${jdkVersion}Java").configure {
def apijar = apijars.file("jdk${jdkVersion}.apijar")
diff --git a/gradle/testing/defaults-tests.gradle b/gradle/testing/defaults-tests.gradle
index 22d3f9530dab..f3c191cbb233 100644
--- a/gradle/testing/defaults-tests.gradle
+++ b/gradle/testing/defaults-tests.gradle
@@ -145,7 +145,8 @@ allprojects {
':lucene:core',
':lucene:codecs',
":lucene:distribution.tests",
- ":lucene:test-framework"
+ ":lucene:test-framework",
+ ":lucene:sandbox",
] ? 'ALL-UNNAMED' : 'org.apache.lucene.core')
def loggingConfigFile = layout.projectDirectory.file("${resources}/logging.properties")
diff --git a/gradle/testing/randomization.gradle b/gradle/testing/randomization.gradle
index 185cd0872a9c..357589d981fc 100644
--- a/gradle/testing/randomization.gradle
+++ b/gradle/testing/randomization.gradle
@@ -116,6 +116,7 @@ allprojects {
description: "Forces use of integer vectors even when slow."],
[propName: 'tests.defaultvectorization', value: false,
description: "Uses defaults for running tests with correct JVM settings to test Panama vectorization (tests.jvmargs, tests.vectorsize, tests.forceintegervectors)."],
+ [propName: "tests.faiss.run", value: false, description: "Explicitly run tests for the Faiss codec."],
]
}
}
diff --git a/gradle/testing/randomization/policies/tests.policy b/gradle/testing/randomization/policies/tests.policy
index f8e09ba03661..d7b4c5792548 100644
--- a/gradle/testing/randomization/policies/tests.policy
+++ b/gradle/testing/randomization/policies/tests.policy
@@ -80,6 +80,9 @@ grant {
permission java.io.FilePermission "${hunspell.corpora}${/}-", "read";
permission java.io.FilePermission "${hunspell.dictionaries}", "read";
permission java.io.FilePermission "${hunspell.dictionaries}${/}-", "read";
+
+ // Faiss tests
+ permission java.lang.RuntimePermission "loadLibrary.faiss_c";
};
// Permissions for jacoco code coverage
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 053ccc9f7920..6363a7d512f8 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -34,6 +34,8 @@ New Features
* GITHUB#14729: Support for Re-Ranking Queries using Late Interaction Model Multi-Vectors. (Vigya Sharma, Jim Ferenczi)
+* GITHUB#14843: Add new KnnVectorsFormat that wraps the Faiss library (#14178, #14847). (Kaival Parikh)
+
Improvements
---------------------
* GITHUB#14458: Add an IndexDeletion policy that retains the last N commits. (Owais Kazi)
diff --git a/lucene/sandbox/src/generated/jdk/jdk21.apijar b/lucene/sandbox/src/generated/jdk/jdk21.apijar
new file mode 100644
index 000000000000..8120f23b8b46
Binary files /dev/null and b/lucene/sandbox/src/generated/jdk/jdk21.apijar differ
diff --git a/lucene/sandbox/src/java/module-info.java b/lucene/sandbox/src/java/module-info.java
index d79a150fea3e..34f4ebba539e 100644
--- a/lucene/sandbox/src/java/module-info.java
+++ b/lucene/sandbox/src/java/module-info.java
@@ -22,6 +22,7 @@
requires org.apache.lucene.facet;
exports org.apache.lucene.payloads;
+ exports org.apache.lucene.sandbox.codecs.faiss;
exports org.apache.lucene.sandbox.codecs.idversion;
exports org.apache.lucene.sandbox.codecs.quantization;
exports org.apache.lucene.sandbox.document;
@@ -39,4 +40,6 @@
provides org.apache.lucene.codecs.PostingsFormat with
org.apache.lucene.sandbox.codecs.idversion.IDVersionPostingsFormat;
+ provides org.apache.lucene.codecs.KnnVectorsFormat with
+ org.apache.lucene.sandbox.codecs.faiss.FaissKnnVectorsFormatProvider;
}
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/faiss/FaissKnnVectorsFormatProvider.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/faiss/FaissKnnVectorsFormatProvider.java
new file mode 100644
index 000000000000..cb722eb08325
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/faiss/FaissKnnVectorsFormatProvider.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.sandbox.codecs.faiss;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.MethodType;
+import java.util.Arrays;
+import java.util.stream.Collectors;
+import org.apache.lucene.codecs.KnnVectorsFormat;
+import org.apache.lucene.codecs.KnnVectorsReader;
+import org.apache.lucene.codecs.KnnVectorsWriter;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+
+/**
+ * Provides a Faiss-based vector format, see corresponding classes in {@code java21/} for docs!
+ *
+ * @lucene.experimental
+ */
+public class FaissKnnVectorsFormatProvider extends KnnVectorsFormat {
+ private final KnnVectorsFormat delegate;
+
+ public FaissKnnVectorsFormatProvider() {
+ this(lookup());
+ }
+
+ public FaissKnnVectorsFormatProvider(String description, String indexParams) {
+ this(lookup(description, indexParams));
+ }
+
+ private FaissKnnVectorsFormatProvider(KnnVectorsFormat delegate) {
+ super(delegate.getName());
+ this.delegate = delegate;
+ }
+
+ private static KnnVectorsFormat lookup(Object... args) {
+ try {
+ MethodHandles.Lookup lookup = MethodHandles.lookup();
+ Class> cls =
+ lookup.findClass("org.apache.lucene.sandbox.codecs.faiss.FaissKnnVectorsFormat");
+
+ MethodType type =
+ MethodType.methodType(
+ void.class,
+ Arrays.stream(args).map(Object::getClass).collect(Collectors.toUnmodifiableList()));
+ MethodHandle constr = lookup.findConstructor(cls, type);
+
+ return (KnnVectorsFormat) constr.invokeWithArguments(args);
+ } catch (ClassNotFoundException e) {
+ throw new LinkageError("FaissKnnVectorsFormat is missing from JAR file", e);
+ } catch (IllegalAccessException | NoSuchMethodException e) {
+ throw new LinkageError("FaissKnnVectorsFormat is missing correctly typed constructor", e);
+ } catch (Throwable t) {
+ throw new RuntimeException(t);
+ }
+ }
+
+ @Override
+ public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
+ return delegate.fieldsWriter(state);
+ }
+
+ @Override
+ public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
+ return delegate.fieldsReader(state);
+ }
+
+ @Override
+ public int getMaxDimensions(String fieldName) {
+ return delegate.getMaxDimensions(fieldName);
+ }
+}
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/faiss/package-info.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/faiss/package-info.java
new file mode 100644
index 000000000000..09cc3d2441f7
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/faiss/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * Provides a Faiss-based vector format, see corresponding classes in {@code java21/} for docs!
+ *
+ * @lucene.experimental
+ */
+package org.apache.lucene.sandbox.codecs.faiss;
diff --git a/lucene/sandbox/src/java21/org/apache/lucene/sandbox/codecs/faiss/FaissKnnVectorsFormat.java b/lucene/sandbox/src/java21/org/apache/lucene/sandbox/codecs/faiss/FaissKnnVectorsFormat.java
new file mode 100644
index 000000000000..83beae607dc5
--- /dev/null
+++ b/lucene/sandbox/src/java21/org/apache/lucene/sandbox/codecs/faiss/FaissKnnVectorsFormat.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.sandbox.codecs.faiss;
+
+import static org.apache.lucene.util.hnsw.HnswGraphBuilder.DEFAULT_BEAM_WIDTH;
+import static org.apache.lucene.util.hnsw.HnswGraphBuilder.DEFAULT_MAX_CONN;
+
+import java.io.IOException;
+import java.util.Locale;
+import org.apache.lucene.codecs.KnnVectorsFormat;
+import org.apache.lucene.codecs.KnnVectorsReader;
+import org.apache.lucene.codecs.KnnVectorsWriter;
+import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil;
+import org.apache.lucene.codecs.hnsw.FlatVectorsFormat;
+import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+
+/**
+ * A Faiss-based format to create and search vector indexes, using {@link LibFaissC} to interact
+ * with the native library.
+ *
+ *
The Faiss index is configured using its flexible index factory, which
+ * allows creating arbitrary indexes by "describing" them. These indexes can be tuned by setting
+ * relevant parameters.
+ *
+ *
A separate Faiss index is created per-segment, and uses the following files:
+ *
+ *
+ * .faissm (metadata file): stores field number, offset and length of actual
+ * Faiss index in data file.
+ * .faissd (data file): stores concatenated Faiss indexes for all fields.
+ * - All files required by {@link Lucene99FlatVectorsFormat} for storing raw vectors.
+ *
+ *
+ * Note: Set the {@code $OMP_NUM_THREADS} environment variable to control internal
+ * threading.
+ *
+ *
TODO: There is no guarantee of backwards compatibility!
+ *
+ * @lucene.experimental
+ */
+public final class FaissKnnVectorsFormat extends KnnVectorsFormat {
+ public static final String NAME = FaissKnnVectorsFormat.class.getSimpleName();
+ static final int VERSION_START = 0;
+ static final int VERSION_CURRENT = VERSION_START;
+ static final String META_CODEC_NAME = NAME + "Meta";
+ static final String DATA_CODEC_NAME = NAME + "Data";
+ static final String META_EXTENSION = "faissm";
+ static final String DATA_EXTENSION = "faissd";
+
+ private final String description;
+ private final String indexParams;
+ private final FlatVectorsFormat rawVectorsFormat;
+
+ /**
+ * Constructs an HNSW-based format using default {@code maxConn}={@value
+ * org.apache.lucene.util.hnsw.HnswGraphBuilder#DEFAULT_MAX_CONN} and {@code beamWidth}={@value
+ * org.apache.lucene.util.hnsw.HnswGraphBuilder#DEFAULT_BEAM_WIDTH}.
+ */
+ public FaissKnnVectorsFormat() {
+ this(
+ String.format(Locale.ROOT, "IDMap,HNSW%d", DEFAULT_MAX_CONN),
+ String.format(Locale.ROOT, "efConstruction=%d", DEFAULT_BEAM_WIDTH));
+ }
+
+ /**
+ * Constructs a format using the specified index factory string and index parameters (see class
+ * docs for more information).
+ *
+ * @param description the index factory string to initialize Faiss indexes.
+ * @param indexParams the index params to set on Faiss indexes.
+ */
+ public FaissKnnVectorsFormat(String description, String indexParams) {
+ super(NAME);
+ this.description = description;
+ this.indexParams = indexParams;
+ this.rawVectorsFormat =
+ new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer());
+ }
+
+ @Override
+ public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
+ return new FaissKnnVectorsWriter(
+ description, indexParams, state, rawVectorsFormat.fieldsWriter(state));
+ }
+
+ @Override
+ public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
+ return new FaissKnnVectorsReader(state, rawVectorsFormat.fieldsReader(state));
+ }
+
+ @Override
+ public int getMaxDimensions(String fieldName) {
+ return DEFAULT_MAX_DIMENSIONS;
+ }
+
+ @Override
+ public String toString() {
+ return String.format(
+ Locale.ROOT, "%s(description=%s indexParams=%s)", NAME, description, indexParams);
+ }
+}
diff --git a/lucene/sandbox/src/java21/org/apache/lucene/sandbox/codecs/faiss/FaissKnnVectorsReader.java b/lucene/sandbox/src/java21/org/apache/lucene/sandbox/codecs/faiss/FaissKnnVectorsReader.java
new file mode 100644
index 000000000000..0f7ce99c2084
--- /dev/null
+++ b/lucene/sandbox/src/java21/org/apache/lucene/sandbox/codecs/faiss/FaissKnnVectorsReader.java
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.sandbox.codecs.faiss;
+
+import static org.apache.lucene.sandbox.codecs.faiss.FaissKnnVectorsFormat.DATA_CODEC_NAME;
+import static org.apache.lucene.sandbox.codecs.faiss.FaissKnnVectorsFormat.DATA_EXTENSION;
+import static org.apache.lucene.sandbox.codecs.faiss.FaissKnnVectorsFormat.META_CODEC_NAME;
+import static org.apache.lucene.sandbox.codecs.faiss.FaissKnnVectorsFormat.META_EXTENSION;
+import static org.apache.lucene.sandbox.codecs.faiss.FaissKnnVectorsFormat.VERSION_CURRENT;
+import static org.apache.lucene.sandbox.codecs.faiss.FaissKnnVectorsFormat.VERSION_START;
+import static org.apache.lucene.sandbox.codecs.faiss.LibFaissC.FAISS_IO_FLAG_MMAP;
+import static org.apache.lucene.sandbox.codecs.faiss.LibFaissC.FAISS_IO_FLAG_READ_ONLY;
+import static org.apache.lucene.sandbox.codecs.faiss.LibFaissC.indexRead;
+import static org.apache.lucene.sandbox.codecs.faiss.LibFaissC.indexSearch;
+
+import java.io.IOException;
+import java.lang.foreign.Arena;
+import java.lang.foreign.MemorySegment;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.KnnVectorsReader;
+import org.apache.lucene.codecs.hnsw.FlatVectorsReader;
+import org.apache.lucene.index.ByteVectorValues;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FloatVectorValues;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.search.KnnCollector;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.DataAccessHint;
+import org.apache.lucene.store.FileTypeHint;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * Read per-segment Faiss indexes and associated metadata.
+ *
+ * @lucene.experimental
+ */
+final class FaissKnnVectorsReader extends KnnVectorsReader {
+ private final FlatVectorsReader rawVectorsReader;
+ private final IndexInput data;
+ private final Map indexMap;
+ private final Arena arena;
+ private boolean closed;
+
+ public FaissKnnVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsReader)
+ throws IOException {
+ this.rawVectorsReader = rawVectorsReader;
+ this.indexMap = new HashMap<>();
+ this.arena = Arena.ofShared();
+ this.closed = false;
+
+ List fieldMetaList = new ArrayList<>();
+ String metaFileName =
+ IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, META_EXTENSION);
+ try (ChecksumIndexInput meta = state.directory.openChecksumInput(metaFileName)) {
+ Throwable priorE = null;
+ int versionMeta = -1;
+ try {
+ versionMeta =
+ CodecUtil.checkIndexHeader(
+ meta,
+ META_CODEC_NAME,
+ VERSION_START,
+ VERSION_CURRENT,
+ state.segmentInfo.getId(),
+ state.segmentSuffix);
+
+ FieldMeta fieldMeta;
+ while ((fieldMeta = parseNextField(meta, state)) != null) {
+ fieldMetaList.add(fieldMeta);
+ }
+ } catch (Throwable t) {
+ priorE = t;
+ } finally {
+ CodecUtil.checkFooter(meta, priorE);
+ }
+
+ String dataFileName =
+ IndexFileNames.segmentFileName(
+ state.segmentInfo.name, state.segmentSuffix, DATA_EXTENSION);
+ this.data =
+ state.directory.openInput(
+ dataFileName, state.context.withHints(FileTypeHint.DATA, DataAccessHint.RANDOM));
+
+ int versionData =
+ CodecUtil.checkIndexHeader(
+ this.data,
+ DATA_CODEC_NAME,
+ VERSION_START,
+ VERSION_CURRENT,
+ state.segmentInfo.getId(),
+ state.segmentSuffix);
+ if (versionMeta != versionData) {
+ throw new CorruptIndexException(
+ String.format(
+ Locale.ROOT,
+ "Format versions mismatch (meta=%d, data=%d)",
+ versionMeta,
+ versionData),
+ data);
+ }
+ CodecUtil.retrieveChecksum(data);
+
+ for (FieldMeta fieldMeta : fieldMetaList) {
+ if (indexMap.put(fieldMeta.fieldInfo.name, loadField(data, arena, fieldMeta)) != null) {
+ throw new CorruptIndexException("Duplicate field: " + fieldMeta.fieldInfo.name, meta);
+ }
+ }
+ } catch (Throwable t) {
+ IOUtils.closeWhileHandlingException(this);
+ throw t;
+ }
+ }
+
+ private static FieldMeta parseNextField(IndexInput meta, SegmentReadState state)
+ throws IOException {
+ int fieldNumber = meta.readInt();
+ if (fieldNumber == -1) {
+ return null;
+ }
+
+ FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNumber);
+ if (fieldInfo == null) {
+ throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
+ }
+
+ long dataOffset = meta.readLong();
+ long dataLength = meta.readLong();
+
+ return new FieldMeta(fieldInfo, dataOffset, dataLength);
+ }
+
+ private static IndexEntry loadField(IndexInput data, Arena arena, FieldMeta fieldMeta)
+ throws IOException {
+ int ioFlags = FAISS_IO_FLAG_MMAP | FAISS_IO_FLAG_READ_ONLY;
+
+ // Read index into memory
+ MemorySegment indexPointer =
+ indexRead(data.slice(fieldMeta.fieldInfo.name, fieldMeta.offset, fieldMeta.length), ioFlags)
+ // Ensure timely cleanup
+ .reinterpret(arena, LibFaissC::freeIndex);
+
+ return new IndexEntry(indexPointer, fieldMeta.fieldInfo.getVectorSimilarityFunction());
+ }
+
+ @Override
+ public void checkIntegrity() throws IOException {
+ rawVectorsReader.checkIntegrity();
+ // TODO: Evaluate if we need an explicit check for validity of Faiss indexes
+ CodecUtil.checksumEntireFile(data);
+ }
+
+ @Override
+ public FloatVectorValues getFloatVectorValues(String field) throws IOException {
+ return rawVectorsReader.getFloatVectorValues(field);
+ }
+
+ @Override
+ public ByteVectorValues getByteVectorValues(String field) {
+ // TODO: Support using SQ8 quantization, see:
+ // - https://github.com/opensearch-project/k-NN/pull/2425
+ throw new UnsupportedOperationException("Byte vectors not supported");
+ }
+
+ @Override
+ public void search(String field, float[] vector, KnnCollector knnCollector, Bits acceptDocs) {
+ IndexEntry entry = indexMap.get(field);
+ if (entry != null) {
+ indexSearch(entry.indexPointer, entry.function, vector, knnCollector, acceptDocs);
+ }
+ }
+
+ @Override
+ public void search(String field, byte[] vector, KnnCollector knnCollector, Bits acceptDocs) {
+ // TODO: Support using SQ8 quantization, see:
+ // - https://github.com/opensearch-project/k-NN/pull/2425
+ throw new UnsupportedOperationException("Byte vectors not supported");
+ }
+
+ @Override
+ public Map getOffHeapByteSize(FieldInfo fieldInfo) {
+ // TODO: How to estimate Faiss usage?
+ return rawVectorsReader.getOffHeapByteSize(fieldInfo);
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (closed == false) {
+ closed = true;
+ IOUtils.close(rawVectorsReader, arena::close, data, indexMap::clear);
+ }
+ }
+
+ private record FieldMeta(FieldInfo fieldInfo, long offset, long length) {}
+
+ private record IndexEntry(MemorySegment indexPointer, VectorSimilarityFunction function) {}
+}
diff --git a/lucene/sandbox/src/java21/org/apache/lucene/sandbox/codecs/faiss/FaissKnnVectorsWriter.java b/lucene/sandbox/src/java21/org/apache/lucene/sandbox/codecs/faiss/FaissKnnVectorsWriter.java
new file mode 100644
index 000000000000..43d1991f3974
--- /dev/null
+++ b/lucene/sandbox/src/java21/org/apache/lucene/sandbox/codecs/faiss/FaissKnnVectorsWriter.java
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.sandbox.codecs.faiss;
+
+import static org.apache.lucene.sandbox.codecs.faiss.FaissKnnVectorsFormat.DATA_CODEC_NAME;
+import static org.apache.lucene.sandbox.codecs.faiss.FaissKnnVectorsFormat.DATA_EXTENSION;
+import static org.apache.lucene.sandbox.codecs.faiss.FaissKnnVectorsFormat.META_CODEC_NAME;
+import static org.apache.lucene.sandbox.codecs.faiss.FaissKnnVectorsFormat.META_EXTENSION;
+import static org.apache.lucene.sandbox.codecs.faiss.FaissKnnVectorsFormat.VERSION_CURRENT;
+import static org.apache.lucene.sandbox.codecs.faiss.LibFaissC.FAISS_IO_FLAG_MMAP;
+import static org.apache.lucene.sandbox.codecs.faiss.LibFaissC.FAISS_IO_FLAG_READ_ONLY;
+import static org.apache.lucene.sandbox.codecs.faiss.LibFaissC.createIndex;
+import static org.apache.lucene.sandbox.codecs.faiss.LibFaissC.indexWrite;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.lang.foreign.Arena;
+import java.lang.foreign.MemorySegment;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.KnnFieldVectorsWriter;
+import org.apache.lucene.codecs.KnnVectorsWriter;
+import org.apache.lucene.codecs.hnsw.FlatFieldVectorsWriter;
+import org.apache.lucene.codecs.hnsw.FlatVectorsWriter;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FloatVectorValues;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.Sorter;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.hnsw.IntToIntFunction;
+
+/**
+ * Write per-segment Faiss indexes and associated metadata.
+ *
+ * @lucene.experimental
+ */
+final class FaissKnnVectorsWriter extends KnnVectorsWriter {
+ private final String description, indexParams;
+ private final FlatVectorsWriter rawVectorsWriter;
+ private final IndexOutput meta, data;
+ private final Map> rawFields;
+ private boolean finished;
+
+ public FaissKnnVectorsWriter(
+ String description,
+ String indexParams,
+ SegmentWriteState state,
+ FlatVectorsWriter rawVectorsWriter)
+ throws IOException {
+
+ this.description = description;
+ this.indexParams = indexParams;
+ this.rawVectorsWriter = rawVectorsWriter;
+ this.rawFields = new HashMap<>();
+ this.finished = false;
+
+ try {
+ String metaFileName =
+ IndexFileNames.segmentFileName(
+ state.segmentInfo.name, state.segmentSuffix, META_EXTENSION);
+ this.meta = state.directory.createOutput(metaFileName, state.context);
+ CodecUtil.writeIndexHeader(
+ this.meta,
+ META_CODEC_NAME,
+ VERSION_CURRENT,
+ state.segmentInfo.getId(),
+ state.segmentSuffix);
+
+ String dataFileName =
+ IndexFileNames.segmentFileName(
+ state.segmentInfo.name, state.segmentSuffix, DATA_EXTENSION);
+ this.data = state.directory.createOutput(dataFileName, state.context);
+ CodecUtil.writeIndexHeader(
+ this.data,
+ DATA_CODEC_NAME,
+ VERSION_CURRENT,
+ state.segmentInfo.getId(),
+ state.segmentSuffix);
+ } catch (Throwable t) {
+ IOUtils.closeWhileHandlingException(this);
+ throw t;
+ }
+ }
+
+ @Override
+ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException {
+ rawVectorsWriter.mergeOneField(fieldInfo, mergeState);
+ switch (fieldInfo.getVectorEncoding()) {
+ case BYTE ->
+ // TODO: Support using SQ8 quantization, see:
+ // - https://github.com/opensearch-project/k-NN/pull/2425
+ throw new UnsupportedOperationException("Byte vectors not supported");
+ case FLOAT32 -> {
+ FloatVectorValues merged =
+ KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState);
+ writeFloatField(fieldInfo, merged, doc -> doc);
+ }
+ }
+ }
+
+ @Override
+ public KnnFieldVectorsWriter> addField(FieldInfo fieldInfo) throws IOException {
+ FlatFieldVectorsWriter> rawFieldVectorsWriter = rawVectorsWriter.addField(fieldInfo);
+ rawFields.put(fieldInfo, rawFieldVectorsWriter);
+ return rawFieldVectorsWriter;
+ }
+
+ @Override
+ public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
+ rawVectorsWriter.flush(maxDoc, sortMap);
+ for (Map.Entry> entry : rawFields.entrySet()) {
+ FieldInfo fieldInfo = entry.getKey();
+ switch (fieldInfo.getVectorEncoding()) {
+ case BYTE ->
+ // TODO: Support using SQ8 quantization, see:
+ // - https://github.com/opensearch-project/k-NN/pull/2425
+ throw new UnsupportedOperationException("Byte vectors not supported");
+
+ case FLOAT32 -> {
+ @SuppressWarnings("unchecked")
+ FlatFieldVectorsWriter rawWriter =
+ (FlatFieldVectorsWriter) entry.getValue();
+
+ List vectors = rawWriter.getVectors();
+ int dimension = fieldInfo.getVectorDimension();
+ DocIdSet docIdSet = rawWriter.getDocsWithFieldSet();
+
+ writeFloatField(
+ fieldInfo,
+ new BufferedFloatVectorValues(vectors, dimension, docIdSet),
+ (sortMap != null) ? sortMap::oldToNew : doc -> doc);
+ }
+ }
+ }
+ }
+
+ private void writeFloatField(
+ FieldInfo fieldInfo, FloatVectorValues floatVectorValues, IntToIntFunction oldToNewDocId)
+ throws IOException {
+ int number = fieldInfo.number;
+ meta.writeInt(number);
+
+ // Write index to temp file and deallocate from memory
+ try (Arena temp = Arena.ofConfined()) {
+ VectorSimilarityFunction function = fieldInfo.getVectorSimilarityFunction();
+ MemorySegment indexPointer =
+ createIndex(description, indexParams, function, floatVectorValues, oldToNewDocId)
+ // Ensure timely cleanup
+ .reinterpret(temp, LibFaissC::freeIndex);
+
+ int ioFlags = FAISS_IO_FLAG_MMAP | FAISS_IO_FLAG_READ_ONLY;
+
+ // Write index
+ long dataOffset = data.getFilePointer();
+ indexWrite(indexPointer, data, ioFlags);
+ long dataLength = data.getFilePointer() - dataOffset;
+
+ meta.writeLong(dataOffset);
+ meta.writeLong(dataLength);
+ }
+ }
+
+ @Override
+ public void finish() throws IOException {
+ if (finished) {
+ throw new IllegalStateException("Already finished");
+ }
+ finished = true;
+
+ rawVectorsWriter.finish();
+ meta.writeInt(-1);
+ CodecUtil.writeFooter(meta);
+ CodecUtil.writeFooter(data);
+ }
+
+ @Override
+ public void close() throws IOException {
+ IOUtils.close(rawVectorsWriter, meta, data);
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ // TODO: How to estimate Faiss usage?
+ return rawVectorsWriter.ramBytesUsed();
+ }
+
+ private static class BufferedFloatVectorValues extends FloatVectorValues {
+ private final List floats;
+ private final int dimension;
+ private final DocIdSet docIdSet;
+
+ public BufferedFloatVectorValues(List floats, int dimension, DocIdSet docIdSet) {
+ this.floats = floats;
+ this.dimension = dimension;
+ this.docIdSet = docIdSet;
+ }
+
+ @Override
+ public float[] vectorValue(int ord) {
+ return floats.get(ord);
+ }
+
+ @Override
+ public int dimension() {
+ return dimension;
+ }
+
+ @Override
+ public int size() {
+ return floats.size();
+ }
+
+ @Override
+ public FloatVectorValues copy() {
+ return new BufferedFloatVectorValues(floats, dimension, docIdSet);
+ }
+
+ @Override
+ public DocIndexIterator iterator() {
+ try {
+ return fromDISI(docIdSet.iterator());
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ }
+ }
+}
diff --git a/lucene/sandbox/src/java21/org/apache/lucene/sandbox/codecs/faiss/LibFaissC.java b/lucene/sandbox/src/java21/org/apache/lucene/sandbox/codecs/faiss/LibFaissC.java
new file mode 100644
index 000000000000..cad33a6f9e10
--- /dev/null
+++ b/lucene/sandbox/src/java21/org/apache/lucene/sandbox/codecs/faiss/LibFaissC.java
@@ -0,0 +1,636 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.sandbox.codecs.faiss;
+
+import static java.lang.foreign.ValueLayout.ADDRESS;
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_FLOAT;
+import static java.lang.foreign.ValueLayout.JAVA_INT;
+import static java.lang.foreign.ValueLayout.JAVA_LONG;
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
+import java.io.IOException;
+import java.lang.foreign.Arena;
+import java.lang.foreign.FunctionDescriptor;
+import java.lang.foreign.Linker;
+import java.lang.foreign.MemoryLayout;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SymbolLookup;
+import java.lang.foreign.ValueLayout;
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.MethodType;
+import java.nio.ByteOrder;
+import java.util.Arrays;
+import java.util.Locale;
+import org.apache.lucene.index.FloatVectorValues;
+import org.apache.lucene.index.KnnVectorValues;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.search.KnnCollector;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.hnsw.IntToIntFunction;
+
+/**
+ * Utility class to wrap necessary functions of the native C API of Faiss
+ * using Project Panama.
+ *
+ * @lucene.experimental
+ */
+final class LibFaissC {
+ // TODO: Use vectorized version where available
+ public static final String LIBRARY_NAME = "faiss_c";
+ public static final String LIBRARY_VERSION = "1.11.0";
+
+ // See flags defined in c_api/index_io_c.h
+ static final int FAISS_IO_FLAG_MMAP = 1;
+ static final int FAISS_IO_FLAG_READ_ONLY = 2;
+
+ private static final int BUFFER_SIZE = 256 * 1024 * 1024; // 256 MB
+
+ // Begin support utilities for JDK21 -- wrappers for functions that were renamed!
+ private static final MethodHandle GET_STRING_DELEGATE;
+
+ private static String getStringDelegate(MemorySegment segment, long offset) {
+ return call(GET_STRING_DELEGATE.bindTo(segment), offset);
+ }
+
+ private static final MethodHandle ALLOCATE_FROM_STRING_DELEGATE;
+
+ private static MemorySegment allocateFromStringDelegate(Arena arena, String string) {
+ return call(ALLOCATE_FROM_STRING_DELEGATE.bindTo(arena), string);
+ }
+
+ private static final MethodHandle ALLOCATE_FROM_FLOAT_ARRAY_DELEGATE;
+
+ private static MemorySegment allocateFromFloatArrayDelegate(
+ Arena arena, ValueLayout.OfFloat ofFloat, float... floats) {
+ return call(ALLOCATE_FROM_FLOAT_ARRAY_DELEGATE.bindTo(arena), ofFloat, floats);
+ }
+
+ private static final MethodHandle ALLOCATE_FROM_LONG_ARRAY_DELEGATE;
+
+ private static MemorySegment allocateFromLongArrayDelegate(
+ Arena arena, ValueLayout.OfLong ofLong, long... longs) {
+ return call(ALLOCATE_FROM_LONG_ARRAY_DELEGATE.bindTo(arena), ofLong, longs);
+ }
+
+ private static final MethodHandle ALLOCATE_ARRAY_DELEGATE;
+
+ private static MemorySegment allocateArrayDelegate(Arena arena, MemoryLayout layout, long count) {
+ return call(ALLOCATE_ARRAY_DELEGATE.bindTo(arena), layout, count);
+ }
+
+ static {
+ int runtimeVersion = Runtime.version().feature();
+ assert runtimeVersion >= 21;
+
+ String getStringFunctionName;
+ String allocateFromStringFunctionName;
+ String allocateFromFloatArrayFunctionName;
+ String allocateFromLongArrayFunctionName;
+ String allocateArrayFunctionName;
+ if (runtimeVersion == 21) {
+ getStringFunctionName = "getUtf8String";
+ allocateFromStringFunctionName = "allocateUtf8String";
+ allocateFromFloatArrayFunctionName = "allocateArray";
+ allocateFromLongArrayFunctionName = "allocateArray";
+ allocateArrayFunctionName = "allocateArray";
+ } else {
+ getStringFunctionName = "getString";
+ allocateFromStringFunctionName = "allocateFrom";
+ allocateFromFloatArrayFunctionName = "allocateFrom";
+ allocateFromLongArrayFunctionName = "allocateFrom";
+ allocateArrayFunctionName = "allocate";
+ }
+
+ try {
+ GET_STRING_DELEGATE =
+ MethodHandles.lookup()
+ .findVirtual(
+ MemorySegment.class,
+ getStringFunctionName,
+ MethodType.methodType(String.class, long.class));
+
+ ALLOCATE_FROM_STRING_DELEGATE =
+ MethodHandles.lookup()
+ .findVirtual(
+ Arena.class,
+ allocateFromStringFunctionName,
+ MethodType.methodType(MemorySegment.class, String.class));
+
+ ALLOCATE_FROM_FLOAT_ARRAY_DELEGATE =
+ MethodHandles.lookup()
+ .findVirtual(
+ Arena.class,
+ allocateFromFloatArrayFunctionName,
+ MethodType.methodType(
+ MemorySegment.class, ValueLayout.OfFloat.class, float[].class));
+
+ ALLOCATE_FROM_LONG_ARRAY_DELEGATE =
+ MethodHandles.lookup()
+ .findVirtual(
+ Arena.class,
+ allocateFromLongArrayFunctionName,
+ MethodType.methodType(
+ MemorySegment.class, ValueLayout.OfLong.class, long[].class));
+
+ ALLOCATE_ARRAY_DELEGATE =
+ MethodHandles.lookup()
+ .findVirtual(
+ Arena.class,
+ allocateArrayFunctionName,
+ MethodType.methodType(MemorySegment.class, MemoryLayout.class, long.class));
+ } catch (IllegalAccessException | NoSuchMethodException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ // End support utilities for JDK21
+
+ static {
+ System.loadLibrary(LIBRARY_NAME);
+ checkLibraryVersion();
+ }
+
+ private LibFaissC() {}
+
+ private static MemorySegment getUpcallStub(
+ Arena arena, MethodHandle target, FunctionDescriptor descriptor) {
+ return Linker.nativeLinker().upcallStub(target, descriptor, arena);
+ }
+
+ private static MethodHandle getDowncallHandle(
+ String functionName, FunctionDescriptor descriptor) {
+ return Linker.nativeLinker()
+ .downcallHandle(SymbolLookup.loaderLookup().find(functionName).orElseThrow(), descriptor);
+ }
+
+ private static void checkLibraryVersion() {
+ MethodHandle getVersion =
+ getDowncallHandle("faiss_get_version", FunctionDescriptor.of(ADDRESS));
+
+ MemorySegment nativeString = call(getVersion);
+ String actualVersion = getStringDelegate(nativeString.reinterpret(Long.MAX_VALUE), 0);
+
+ if (LIBRARY_VERSION.equals(actualVersion) == false) {
+ throw new UnsupportedOperationException(
+ String.format(
+ Locale.ROOT,
+ "Expected Faiss library version %s, found %s",
+ LIBRARY_VERSION,
+ actualVersion));
+ }
+ }
+
+ private static final MethodHandle FREE_INDEX =
+ getDowncallHandle("faiss_Index_free", FunctionDescriptor.ofVoid(ADDRESS));
+
+ public static void freeIndex(MemorySegment indexPointer) {
+ call(FREE_INDEX, indexPointer);
+ }
+
+ private static final MethodHandle FREE_CUSTOM_IO_WRITER =
+ getDowncallHandle("faiss_CustomIOWriter_free", FunctionDescriptor.ofVoid(ADDRESS));
+
+ public static void freeCustomIOWriter(MemorySegment customIOWriterPointer) {
+ call(FREE_CUSTOM_IO_WRITER, customIOWriterPointer);
+ }
+
+ private static final MethodHandle FREE_CUSTOM_IO_READER =
+ getDowncallHandle("faiss_CustomIOReader_free", FunctionDescriptor.ofVoid(ADDRESS));
+
+ public static void freeCustomIOReader(MemorySegment customIOReaderPointer) {
+ call(FREE_CUSTOM_IO_READER, customIOReaderPointer);
+ }
+
+ private static final MethodHandle FREE_PARAMETER_SPACE =
+ getDowncallHandle("faiss_ParameterSpace_free", FunctionDescriptor.ofVoid(ADDRESS));
+
+ private static void freeParameterSpace(MemorySegment parameterSpacePointer) {
+ call(FREE_PARAMETER_SPACE, parameterSpacePointer);
+ }
+
+ private static final MethodHandle FREE_ID_SELECTOR_BITMAP =
+ getDowncallHandle("faiss_IDSelectorBitmap_free", FunctionDescriptor.ofVoid(ADDRESS));
+
+ private static void freeIDSelectorBitmap(MemorySegment idSelectorBitmapPointer) {
+ call(FREE_ID_SELECTOR_BITMAP, idSelectorBitmapPointer);
+ }
+
+ private static final MethodHandle FREE_SEARCH_PARAMETERS =
+ getDowncallHandle("faiss_SearchParameters_free", FunctionDescriptor.ofVoid(ADDRESS));
+
+ private static void freeSearchParameters(MemorySegment searchParametersPointer) {
+ call(FREE_SEARCH_PARAMETERS, searchParametersPointer);
+ }
+
+ private static final MethodHandle INDEX_FACTORY =
+ getDowncallHandle(
+ "faiss_index_factory",
+ FunctionDescriptor.of(JAVA_INT, ADDRESS, JAVA_INT, ADDRESS, JAVA_INT));
+
+ private static final MethodHandle PARAMETER_SPACE_NEW =
+ getDowncallHandle("faiss_ParameterSpace_new", FunctionDescriptor.of(JAVA_INT, ADDRESS));
+
+ private static final MethodHandle SET_INDEX_PARAMETERS =
+ getDowncallHandle(
+ "faiss_ParameterSpace_set_index_parameters",
+ FunctionDescriptor.of(JAVA_INT, ADDRESS, ADDRESS, ADDRESS));
+
+ private static final MethodHandle ID_SELECTOR_BITMAP_NEW =
+ getDowncallHandle(
+ "faiss_IDSelectorBitmap_new",
+ FunctionDescriptor.of(JAVA_INT, ADDRESS, JAVA_LONG, ADDRESS));
+
+ private static final MethodHandle SEARCH_PARAMETERS_NEW =
+ getDowncallHandle(
+ "faiss_SearchParameters_new", FunctionDescriptor.of(JAVA_INT, ADDRESS, ADDRESS));
+
+ private static final MethodHandle INDEX_IS_TRAINED =
+ getDowncallHandle("faiss_Index_is_trained", FunctionDescriptor.of(JAVA_INT, ADDRESS));
+
+ private static final MethodHandle INDEX_TRAIN =
+ getDowncallHandle(
+ "faiss_Index_train", FunctionDescriptor.of(JAVA_INT, ADDRESS, JAVA_LONG, ADDRESS));
+
+ private static final MethodHandle INDEX_ADD_WITH_IDS =
+ getDowncallHandle(
+ "faiss_Index_add_with_ids",
+ FunctionDescriptor.of(JAVA_INT, ADDRESS, JAVA_LONG, ADDRESS, ADDRESS));
+
+ public static MemorySegment createIndex(
+ String description,
+ String indexParams,
+ VectorSimilarityFunction function,
+ FloatVectorValues floatVectorValues,
+ IntToIntFunction oldToNewDocId)
+ throws IOException {
+
+ try (Arena temp = Arena.ofConfined()) {
+ int size = floatVectorValues.size();
+ int dimension = floatVectorValues.dimension();
+
+ // Mapped from faiss/MetricType.h
+ int metric =
+ switch (function) {
+ case DOT_PRODUCT -> 0;
+ case EUCLIDEAN -> 1;
+ case COSINE, MAXIMUM_INNER_PRODUCT ->
+ throw new UnsupportedOperationException("Metric type not supported");
+ };
+
+ // Create an index
+ MemorySegment pointer = temp.allocate(ADDRESS);
+ callAndHandleError(
+ INDEX_FACTORY, pointer, dimension, allocateFromStringDelegate(temp, description), metric);
+ MemorySegment indexPointer = pointer.get(ADDRESS, 0);
+
+ // Set index params
+ callAndHandleError(PARAMETER_SPACE_NEW, pointer);
+ MemorySegment parameterSpacePointer =
+ pointer
+ .get(ADDRESS, 0)
+ // Ensure timely cleanup
+ .reinterpret(temp, LibFaissC::freeParameterSpace);
+
+ callAndHandleError(
+ SET_INDEX_PARAMETERS,
+ parameterSpacePointer,
+ indexPointer,
+ allocateFromStringDelegate(temp, indexParams));
+
+ // TODO: Improve memory usage (with a tradeoff in performance) by batched indexing, see:
+ // - https://github.com/opensearch-project/k-NN/issues/1506
+ // - https://github.com/opensearch-project/k-NN/issues/1938
+
+ // Allocate docs in native memory
+ MemorySegment docs = allocateArrayDelegate(temp, JAVA_FLOAT, (long) size * dimension);
+ long docsOffset = 0;
+ long perDocByteSize = dimension * JAVA_FLOAT.byteSize();
+
+ // Allocate ids in native memory
+ MemorySegment ids = allocateArrayDelegate(temp, JAVA_LONG, size);
+ int idsIndex = 0;
+
+ KnnVectorValues.DocIndexIterator iterator = floatVectorValues.iterator();
+ for (int i = iterator.nextDoc(); i != NO_MORE_DOCS; i = iterator.nextDoc()) {
+ int id = oldToNewDocId.apply(i);
+ ids.setAtIndex(JAVA_LONG, idsIndex, id);
+ idsIndex++;
+
+ float[] vector = floatVectorValues.vectorValue(iterator.index());
+ MemorySegment.copy(vector, 0, docs, JAVA_FLOAT, docsOffset, vector.length);
+ docsOffset += perDocByteSize;
+ }
+
+ // Train index
+ int isTrained = call(INDEX_IS_TRAINED, indexPointer);
+ if (isTrained == 0) {
+ callAndHandleError(INDEX_TRAIN, indexPointer, size, docs);
+ }
+
+ // Add docs to index
+ callAndHandleError(INDEX_ADD_WITH_IDS, indexPointer, size, docs, ids);
+
+ return indexPointer;
+ }
+ }
+
+ @SuppressWarnings("unused") // called using a MethodHandle
+ private static long writeBytes(
+ IndexOutput output, MemorySegment inputPointer, long itemSize, long numItems)
+ throws IOException {
+ long size = itemSize * numItems;
+ inputPointer = inputPointer.reinterpret(size);
+
+ if (size <= BUFFER_SIZE) { // simple case, avoid buffering
+ output.writeBytes(inputPointer.toArray(JAVA_BYTE), (int) size);
+ } else { // copy buffered number of bytes repeatedly
+ byte[] bytes = new byte[BUFFER_SIZE];
+ for (long offset = 0; offset < size; offset += BUFFER_SIZE) {
+ int length = (int) Math.min(size - offset, BUFFER_SIZE);
+ MemorySegment.copy(inputPointer, JAVA_BYTE, offset, bytes, 0, length);
+ output.writeBytes(bytes, length);
+ }
+ }
+ return numItems;
+ }
+
+ @SuppressWarnings("unused") // called using a MethodHandle
+ private static long readBytes(
+ IndexInput input, MemorySegment outputPointer, long itemSize, long numItems)
+ throws IOException {
+ long size = itemSize * numItems;
+ outputPointer = outputPointer.reinterpret(size);
+
+ if (size <= BUFFER_SIZE) { // simple case, avoid buffering
+ byte[] bytes = new byte[(int) size];
+ input.readBytes(bytes, 0, bytes.length);
+ MemorySegment.copy(bytes, 0, outputPointer, JAVA_BYTE, 0, bytes.length);
+ } else { // copy buffered number of bytes repeatedly
+ byte[] bytes = new byte[BUFFER_SIZE];
+ for (long offset = 0; offset < size; offset += BUFFER_SIZE) {
+ int length = (int) Math.min(size - offset, BUFFER_SIZE);
+ input.readBytes(bytes, 0, length);
+ MemorySegment.copy(bytes, 0, outputPointer, JAVA_BYTE, offset, length);
+ }
+ }
+ return numItems;
+ }
+
+ private static final MethodHandle WRITE_BYTES_HANDLE;
+ private static final MethodHandle READ_BYTES_HANDLE;
+
+ static {
+ try {
+ WRITE_BYTES_HANDLE =
+ MethodHandles.lookup()
+ .findStatic(
+ LibFaissC.class,
+ "writeBytes",
+ MethodType.methodType(
+ long.class, IndexOutput.class, MemorySegment.class, long.class, long.class));
+
+ READ_BYTES_HANDLE =
+ MethodHandles.lookup()
+ .findStatic(
+ LibFaissC.class,
+ "readBytes",
+ MethodType.methodType(
+ long.class, IndexInput.class, MemorySegment.class, long.class, long.class));
+ } catch (NoSuchMethodException | IllegalAccessException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private static final MethodHandle CUSTOM_IO_WRITER_NEW =
+ getDowncallHandle(
+ "faiss_CustomIOWriter_new", FunctionDescriptor.of(JAVA_INT, ADDRESS, ADDRESS));
+
+ private static final MethodHandle WRITE_INDEX_CUSTOM =
+ getDowncallHandle(
+ "faiss_write_index_custom", FunctionDescriptor.of(JAVA_INT, ADDRESS, ADDRESS, JAVA_INT));
+
+ public static void indexWrite(MemorySegment indexPointer, IndexOutput output, int ioFlags) {
+ try (Arena temp = Arena.ofConfined()) {
+ MethodHandle writerHandle = WRITE_BYTES_HANDLE.bindTo(output);
+ MemorySegment writerStub =
+ getUpcallStub(
+ temp, writerHandle, FunctionDescriptor.of(JAVA_LONG, ADDRESS, JAVA_LONG, JAVA_LONG));
+
+ MemorySegment pointer = temp.allocate(ADDRESS);
+ callAndHandleError(CUSTOM_IO_WRITER_NEW, pointer, writerStub);
+ MemorySegment customIOWriterPointer =
+ pointer
+ .get(ADDRESS, 0)
+ // Ensure timely cleanup
+ .reinterpret(temp, LibFaissC::freeCustomIOWriter);
+
+ callAndHandleError(WRITE_INDEX_CUSTOM, indexPointer, customIOWriterPointer, ioFlags);
+ }
+ }
+
+ private static final MethodHandle CUSTOM_IO_READER_NEW =
+ getDowncallHandle(
+ "faiss_CustomIOReader_new", FunctionDescriptor.of(JAVA_INT, ADDRESS, ADDRESS));
+
+ private static final MethodHandle READ_INDEX_CUSTOM =
+ getDowncallHandle(
+ "faiss_read_index_custom", FunctionDescriptor.of(JAVA_INT, ADDRESS, JAVA_INT, ADDRESS));
+
+ public static MemorySegment indexRead(IndexInput input, int ioFlags) {
+ try (Arena temp = Arena.ofConfined()) {
+ MethodHandle readerHandle = READ_BYTES_HANDLE.bindTo(input);
+ MemorySegment readerStub =
+ getUpcallStub(
+ temp, readerHandle, FunctionDescriptor.of(JAVA_LONG, ADDRESS, JAVA_LONG, JAVA_LONG));
+
+ MemorySegment pointer = temp.allocate(ADDRESS);
+ callAndHandleError(CUSTOM_IO_READER_NEW, pointer, readerStub);
+ MemorySegment customIOReaderPointer =
+ pointer
+ .get(ADDRESS, 0)
+ // Ensure timely cleanup
+ .reinterpret(temp, LibFaissC::freeCustomIOReader);
+
+ callAndHandleError(READ_INDEX_CUSTOM, customIOReaderPointer, ioFlags, pointer);
+ return pointer.get(ADDRESS, 0);
+ }
+ }
+
+ private static final MethodHandle INDEX_SEARCH =
+ getDowncallHandle(
+ "faiss_Index_search",
+ FunctionDescriptor.of(
+ JAVA_INT, ADDRESS, JAVA_LONG, ADDRESS, JAVA_LONG, ADDRESS, ADDRESS));
+
+ private static final MethodHandle INDEX_SEARCH_WITH_PARAMS =
+ getDowncallHandle(
+ "faiss_Index_search_with_params",
+ FunctionDescriptor.of(
+ JAVA_INT, ADDRESS, JAVA_LONG, ADDRESS, JAVA_LONG, ADDRESS, ADDRESS, ADDRESS));
+
+ public static void indexSearch(
+ MemorySegment indexPointer,
+ VectorSimilarityFunction function,
+ float[] query,
+ KnnCollector knnCollector,
+ Bits acceptDocs) {
+
+ try (Arena temp = Arena.ofConfined()) {
+ FixedBitSet fixedBitSet =
+ switch (acceptDocs) {
+ case null -> null;
+ case FixedBitSet bitSet -> bitSet;
+ // TODO: Add optimized case for SparseFixedBitSet
+ case Bits bits -> FixedBitSet.copyOf(bits);
+ };
+
+ // Allocate queries in native memory
+ MemorySegment queries = allocateFromFloatArrayDelegate(temp, JAVA_FLOAT, query);
+
+ // Faiss knn search
+ int k = knnCollector.k();
+ MemorySegment distancesPointer = allocateArrayDelegate(temp, JAVA_FLOAT, k);
+ MemorySegment idsPointer = allocateArrayDelegate(temp, JAVA_LONG, k);
+
+ MemorySegment localIndex = indexPointer.reinterpret(temp, null);
+ if (fixedBitSet == null) {
+ // Search without runtime filters
+ callAndHandleError(INDEX_SEARCH, localIndex, 1, queries, k, distancesPointer, idsPointer);
+ } else {
+ MemorySegment pointer = temp.allocate(ADDRESS);
+
+ long[] bits = fixedBitSet.getBits();
+ MemorySegment nativeBits =
+ // Use LITTLE_ENDIAN to convert long[] -> uint8_t*
+ allocateFromLongArrayDelegate(temp, JAVA_LONG.withOrder(ByteOrder.LITTLE_ENDIAN), bits);
+
+ callAndHandleError(ID_SELECTOR_BITMAP_NEW, pointer, fixedBitSet.length(), nativeBits);
+ MemorySegment idSelectorBitmapPointer =
+ pointer
+ .get(ADDRESS, 0)
+ // Ensure timely cleanup
+ .reinterpret(temp, LibFaissC::freeIDSelectorBitmap);
+
+ callAndHandleError(SEARCH_PARAMETERS_NEW, pointer, idSelectorBitmapPointer);
+ MemorySegment searchParametersPointer =
+ pointer
+ .get(ADDRESS, 0)
+ // Ensure timely cleanup
+ .reinterpret(temp, LibFaissC::freeSearchParameters);
+
+ // Search with runtime filters
+ callAndHandleError(
+ INDEX_SEARCH_WITH_PARAMS,
+ localIndex,
+ 1,
+ queries,
+ k,
+ searchParametersPointer,
+ distancesPointer,
+ idsPointer);
+ }
+
+ // Retrieve scores and ids
+ float[] distances = distancesPointer.toArray(JAVA_FLOAT);
+ long[] ids = idsPointer.toArray(JAVA_LONG);
+
+ // Record hits
+ for (int i = 0; i < k; i++) {
+ // Not enough results
+ if (ids[i] == -1) {
+ break;
+ }
+
+ // Scale Faiss distances to Lucene scores, see VectorSimilarityFunction.java
+ float score =
+ switch (function) {
+ case DOT_PRODUCT ->
+ // distance in Faiss === dotProduct in Lucene
+ Math.max((1 + distances[i]) / 2, 0);
+
+ case EUCLIDEAN ->
+ // distance in Faiss === squareDistance in Lucene
+ 1 / (1 + distances[i]);
+
+ case COSINE, MAXIMUM_INNER_PRODUCT ->
+ throw new UnsupportedOperationException("Metric type not supported");
+ };
+
+ knnCollector.collect((int) ids[i], score);
+ }
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ private static T call(MethodHandle handle, Object... args) {
+ try {
+ return (T) handle.invokeWithArguments(args);
+ } catch (Throwable e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private static void callAndHandleError(MethodHandle handle, Object... args) {
+ int returnCode = call(handle, args);
+ if (returnCode < 0) {
+ // TODO: Surface actual exception in a thread-safe manner?
+ throw new FaissException(returnCode);
+ }
+ }
+
+ /**
+ * Exception used to rethrow handled Faiss errors in native code.
+ *
+ * @lucene.experimental
+ */
+ public static class FaissException extends RuntimeException {
+ // See error codes defined in c_api/error_c.h
+ enum ErrorCode {
+ /// No error
+ OK(0),
+ /// Any exception other than Faiss or standard C++ library exceptions
+ UNKNOWN_EXCEPT(-1),
+ /// Faiss library exception
+ FAISS_EXCEPT(-2),
+ /// Standard C++ library exception
+ STD_EXCEPT(-4);
+
+ private final int code;
+
+ ErrorCode(int code) {
+ this.code = code;
+ }
+
+ static ErrorCode fromCode(int code) {
+ return Arrays.stream(ErrorCode.values())
+ .filter(errorCode -> errorCode.code == code)
+ .findFirst()
+ .orElseThrow();
+ }
+ }
+
+ public FaissException(int code) {
+ super(String.format(Locale.ROOT, "Faiss library ran into %s", ErrorCode.fromCode(code)));
+ }
+ }
+}
diff --git a/lucene/sandbox/src/java21/org/apache/lucene/sandbox/codecs/faiss/package-info.java b/lucene/sandbox/src/java21/org/apache/lucene/sandbox/codecs/faiss/package-info.java
new file mode 100644
index 000000000000..e63fa3070f96
--- /dev/null
+++ b/lucene/sandbox/src/java21/org/apache/lucene/sandbox/codecs/faiss/package-info.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * Faiss is "a library for efficient
+ * similarity search and clustering of dense vectors", with support for various vector
+ * transforms, indexing algorithms, quantization techniques, etc. This package provides a pluggable
+ * Faiss-based format to perform vector searches in Lucene, via {@link
+ * org.apache.lucene.sandbox.codecs.faiss.FaissKnnVectorsFormat}.
+ *
+ * To use this format: Install pytorch/faiss-cpu v{@value
+ * org.apache.lucene.sandbox.codecs.faiss.LibFaissC#LIBRARY_VERSION} from Conda and place shared libraries (including
+ * dependencies) on the {@code $LD_LIBRARY_PATH} environment variable or {@code -Djava.library.path}
+ * JVM argument.
+ *
+ *
Important: Ensure that the license of the Conda distribution and channels is applicable to
+ * you. pytorch and conda-forge are community-maintained channels with
+ * permissive licenses!
+ *
+ *
Sample setup:
+ *
+ *
+ * - Install micromamba (an open-source Conda
+ * package manager) or similar
+ *
- Install dependencies using {@code micromamba create -n faiss-env -c pytorch -c conda-forge
+ * -y faiss-cpu=}{@value org.apache.lucene.sandbox.codecs.faiss.LibFaissC#LIBRARY_VERSION}
+ *
- Activate environment using {@code micromamba activate faiss-env}
+ *
- Add shared libraries to runtime using {@code export LD_LIBRARY_PATH=$CONDA_PREFIX/lib}
+ *
- And you're good to go! (add the {@code -Dtests.faiss.run=true} JVM argument to ensure Faiss
+ * tests are run)
+ *
+ *
+ * @lucene.experimental
+ */
+package org.apache.lucene.sandbox.codecs.faiss;
diff --git a/lucene/sandbox/src/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat b/lucene/sandbox/src/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat
new file mode 100644
index 000000000000..418d70fb51fd
--- /dev/null
+++ b/lucene/sandbox/src/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+org.apache.lucene.sandbox.codecs.faiss.FaissKnnVectorsFormatProvider
diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/faiss/TestFaissKnnVectorsFormat.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/faiss/TestFaissKnnVectorsFormat.java
new file mode 100644
index 000000000000..81a1c6ad1e8b
--- /dev/null
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/faiss/TestFaissKnnVectorsFormat.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.sandbox.codecs.faiss;
+
+import static org.apache.lucene.index.VectorEncoding.FLOAT32;
+import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT;
+import static org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.KnnVectorsFormat;
+import org.apache.lucene.document.KnnFloatVectorField;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.VectorEncoding;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
+import org.apache.lucene.tests.util.TestUtil;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
+
+/**
+ * Tests for {@link FaissKnnVectorsFormatProvider}. Will run only if required shared libraries
+ * (including dependencies) are present at runtime, or the {@value #FAISS_RUN_TESTS} JVM arg is set
+ * to {@code true}
+ */
+public class TestFaissKnnVectorsFormat extends BaseKnnVectorsFormatTestCase {
+ private static final String FAISS_RUN_TESTS = "tests.faiss.run";
+
+ private static final VectorEncoding[] SUPPORTED_ENCODINGS = {FLOAT32};
+ private static final VectorSimilarityFunction[] SUPPORTED_FUNCTIONS = {DOT_PRODUCT, EUCLIDEAN};
+
+ @BeforeClass
+ public static void maybeSuppress() throws ClassNotFoundException {
+ // Explicitly run tests
+ if (Boolean.getBoolean(FAISS_RUN_TESTS)) {
+ return;
+ }
+
+ // Otherwise check if dependencies are present
+ boolean faissLibraryPresent;
+ try {
+ Class.forName("org.apache.lucene.sandbox.codecs.faiss.LibFaissC");
+ faissLibraryPresent = true;
+ } catch (
+ @SuppressWarnings("unused")
+ UnsatisfiedLinkError error) {
+ faissLibraryPresent = false;
+ }
+ assumeTrue("Native libraries present", faissLibraryPresent);
+ }
+
+ @Override
+ protected VectorEncoding randomVectorEncoding() {
+ return SUPPORTED_ENCODINGS[random().nextInt(SUPPORTED_ENCODINGS.length)];
+ }
+
+ @Override
+ protected VectorSimilarityFunction randomSimilarity() {
+ return SUPPORTED_FUNCTIONS[random().nextInt(SUPPORTED_FUNCTIONS.length)];
+ }
+
+ @Override
+ protected Codec getCodec() {
+ return TestUtil.alwaysKnnVectorsFormat(new FaissKnnVectorsFormatProvider());
+ }
+
+ @Override
+ @Ignore // does not honour visitedLimit
+ public void testSearchWithVisitedLimit() {}
+
+ @Override
+ @Ignore // does not support byte vectors
+ public void testByteVectorScorerIteration() {}
+
+ @Override
+ @Ignore // does not support byte vectors
+ public void testMismatchedFields() {}
+
+ @Override
+ @Ignore // does not support byte vectors
+ public void testSortedIndexBytes() {}
+
+ @Override
+ @Ignore // does not support byte vectors
+ public void testRandomBytes() {}
+
+ @Override
+ @Ignore // does not support byte vectors
+ public void testEmptyByteVectorData() {}
+
+ @Override
+ @Ignore // does not support byte vectors
+ public void testMergingWithDifferentByteKnnFields() {}
+
+ @Monster("Uses large amount of heap and RAM")
+ public void testLargeVectorData() throws IOException {
+ KnnVectorsFormat format =
+ new FaissKnnVectorsFormatProvider(
+ "IDMap,Flat", // no need for special indexing like HNSW
+ "");
+ IndexWriterConfig config =
+ newIndexWriterConfig().setCodec(TestUtil.alwaysKnnVectorsFormat(format));
+
+ float[] largeVector =
+ new float[format.getMaxDimensions("vector")]; // largest vector accepted by the format
+ int numDocs =
+ Math.ceilDivExact(
+ Integer.MAX_VALUE, Float.BYTES * largeVector.length); // find minimum number of docs
+
+ // Check that we can index vectors larger than Integer.MAX_VALUE number of bytes
+ try (Directory directory = newDirectory();
+ IndexWriter writer = new IndexWriter(directory, config)) {
+ writer.addDocuments(
+ Collections.nCopies(numDocs, List.of(new KnnFloatVectorField("vector", largeVector))));
+ }
+ }
+}