diff --git a/Dockerfile b/Dockerfile index fddf618..0343a2f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,11 @@ -FROM debian:bullseye as build +ARG arch=amd64 +FROM --platform=linux/${arch} debian:bullseye AS build -ENV DEBIAN_FRONTEND teletype +ENV DEBIAN_FRONTEND=teletype +ARG arch ARG FAISS_VERSION=main +ARG GITHUB_ACCOUNT=facebookresearch RUN apt-get -y update && \ apt-get -y install apt-utils @@ -19,9 +22,9 @@ RUN cd /tmp && \ echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list && \ apt-get -y update && \ apt-get -y install intel-mkl-2020.1-102 -ENV LD_LIBRARY_PATH /opt/intel/mkl/lib/intel64:$LD_LIBRARY_PATH -ENV LIBRARY_PATH /opt/intel/mkl/lib/intel64:$LIBRARY_PATH -ENV LD_PRELOAD /usr/lib/x86_64-linux-gnu/libgomp.so.1:/opt/intel/mkl/lib/intel64/libmkl_def.so:\ +ENV LD_LIBRARY_PATH=/opt/intel/mkl/lib/intel64:$LD_LIBRARY_PATH +ENV LIBRARY_PATH=/opt/intel/mkl/lib/intel64:$LIBRARY_PATH +ENV LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libgomp.so.1:/opt/intel/mkl/lib/intel64/libmkl_def.so:\ /opt/intel/mkl/lib/intel64/libmkl_avx2.so:/opt/intel/mkl/lib/intel64/libmkl_core.so:\ /opt/intel/mkl/lib/intel64/libmkl_intel_lp64.so:/opt/intel/mkl/lib/intel64/libmkl_gnu_thread.so @@ -37,13 +40,13 @@ ENV MKLROOT=/opt/intel/mkl # build faiss and the c api RUN apt-get -y install git && \ - git clone -b ${FAISS_VERSION} https://github.com/facebookresearch/faiss.git /faiss && \ + git clone -b ${FAISS_VERSION} https://github.com/${GITHUB_ACCOUNT}/faiss.git /faiss && \ cd /faiss && \ sed -i 's/faiss_c PRIVATE faiss/faiss_c PRIVATE faiss_avx2/g' c_api/CMakeLists.txt && \ cmake -DFAISS_ENABLE_GPU=OFF -DFAISS_ENABLE_PYTHON=OFF -DBUILD_TESTING=OFF -DCMAKE_BUILD_TYPE=Release -DFAISS_ENABLE_C_API=ON -DBUILD_SHARED_LIBS=ON -DFAISS_OPT_LEVEL=avx2 -B build . && \ make -C build -j $(nproc) faiss_avx2 install -FROM mcr.microsoft.com/dotnet/sdk:6.0 +FROM --platform=linux/${arch} mcr.microsoft.com/dotnet/sdk:6.0 EXPOSE 80 diff --git a/FaissMask.Test/VectorTransformTests.cs b/FaissMask.Test/VectorTransformTests.cs new file mode 100644 index 0000000..36c9761 --- /dev/null +++ b/FaissMask.Test/VectorTransformTests.cs @@ -0,0 +1,134 @@ +using System; +using System.IO; +using Xunit; + +namespace FaissMask.Test; + +public class VectorTransformTests +{ + private const string VectorTransformFileName = "data/vector_transform_1024_512.bin"; + private const int ExpectedDimensionIn = 1024; + private const int ExpectedDimensionOut = 512; + private readonly Random _random = new(42); + + [Fact] + public void ReadsVectorTransformFile() + { + using var vectorTransform = VectorTransform.Read(VectorTransformFileName); + Assert.NotNull(vectorTransform); + Assert.Equal(ExpectedDimensionIn, vectorTransform.DimensionIn); + Assert.Equal(ExpectedDimensionOut, vectorTransform.DimensionOut); + } + + [Theory] + [InlineData(1)] + [InlineData(10)] + public void AppliesAVectorTransform(int count) + { + using var vectorTransform = VectorTransform.Read(VectorTransformFileName); + + var vectors = CreateRandomVectors(vectorTransform.DimensionIn, count); + + var transformed = vectorTransform.Apply(vectors); + + Assert.Equal(count, transformed.Length); + foreach (var vector in transformed) + { + Assert.Equal(vectorTransform.DimensionOut, vector.Length); + } + + } + + [Fact] + public void AppliesASingletonVectorTransform() + { + using var vectorTransform = VectorTransform.Read(VectorTransformFileName); + + var vectors = CreateRandomVector(vectorTransform.DimensionIn); + + var transformed = vectorTransform.Apply(vectors); + + Assert.Equal(vectorTransform.DimensionOut, transformed.Length); + + } + + [Theory] + [InlineData(2)] + [InlineData(10)] + public void AppliesAFlattenedVectorTransform(int count) + { + using var vectorTransform = VectorTransform.Read(VectorTransformFileName); + + var vectorsFlattened = CreateRandomVector(vectorTransform.DimensionIn * count); + + var transformed = vectorTransform.Apply(count, vectorsFlattened); + + Assert.Equal(count * vectorTransform.DimensionOut, transformed.Length); + + } + + [Theory] + [InlineData(1)] + [InlineData(10)] + public void ThrowsExceptionWhenInputVectorsHaveInvalidLength(int count) + { + using var vectorTransform = VectorTransform.Read("data/vector_transform_1024_512.bin"); + + var vectors = CreateRandomVectors(vectorTransform.DimensionIn - 1, count); + + var ex = Assert.Throws(() => vectorTransform.Apply(vectors)); + Assert.Equal( + $"Invalid input vectors, each should have a length of {vectorTransform.DimensionIn} (Parameter 'vectors')", + ex.Message); + } + + [Theory] + [InlineData(1)] + [InlineData(10)] + public void ThrowsExceptionWhenInputFlattenedVectorsHaveInvalidLength(int count) + { + using var vectorTransform = VectorTransform.Read("data/vector_transform_1024_512.bin"); + + var vectors = CreateRandomVector(vectorTransform.DimensionIn - 1 * count); + + var ex = Assert.Throws(() => vectorTransform.Apply(count, vectors)); + Assert.Equal( + $"Invalid input vector, length for count {count} should be {count * vectorTransform.DimensionIn}, got {vectors.Length} (Parameter 'flattenedVectors')", + ex.Message); + } + + [Fact] + public void ThrowsExceptionWhenVectorTransformFilenameParameterIsNull() + { + var ex = Assert.Throws(() => VectorTransform.Read(null)); + Assert.Equal("Value cannot be null. (Parameter 'filename')", ex.Message); + } + + [Fact] + public void ThrowsExceptionWhenVectorTransformFileDoesNotExist() + { + var ex = Assert.Throws(() => VectorTransform.Read("non_existent_file.bin")); + Assert.Equal("The file non_existent_file.bin does not exist", ex.Message); + } + + private float[][] CreateRandomVectors(int dimension, int count) + { + var vectors = new float[count][]; + for (var i = 0; i < count; i++) + { + vectors[i] = CreateRandomVector(dimension); + } + return vectors; + } + + private float[] CreateRandomVector(int dimension) + { + var vector = new float[dimension]; + for (var i = 0; i< vector.Length; i++) + { + vector[i] = _random.NextSingle(); + } + return vector; + } + +} \ No newline at end of file diff --git a/FaissMask.Test/data/vector_transform_1024_512.bin b/FaissMask.Test/data/vector_transform_1024_512.bin new file mode 100644 index 0000000..c6e75d2 Binary files /dev/null and b/FaissMask.Test/data/vector_transform_1024_512.bin differ diff --git a/FaissMask/Index.cs b/FaissMask/Index.cs index 5ff0695..8e010d9 100644 --- a/FaissMask/Index.cs +++ b/FaissMask/Index.cs @@ -61,17 +61,17 @@ public IEnumerable Search(IEnumerable vectors, long kneig return Search(count, vectorsFlattened, kneighbors); } - private IEnumerable Search(long count, float[] vectorsFlattened, long kneighbors) + public IEnumerable Search(long count, float[] vectorsFlattened, long kneighbors) { - float[] distances = new float[kneighbors * count]; - long[] labels = new long[kneighbors * count]; + var distances = new float[kneighbors * count]; + var labels = new long[kneighbors * count]; Handle.Search(count, vectorsFlattened, kneighbors, distances, labels); var labelDistanceZip = labels.Zip(distances, (l, d) => new { Label = l, Distance = d - }); - for (int i = 0; i < count; i++) + }).ToArray(); + for (var i = 0; i < count; i++) { var vectorResult = labelDistanceZip.Skip((int)(i * kneighbors)) .Take((int)kneighbors); diff --git a/FaissMask/Internal/NativeMethods.cs b/FaissMask/Internal/NativeMethods.cs index 65e62f8..0e3af34 100644 --- a/FaissMask/Internal/NativeMethods.cs +++ b/FaissMask/Internal/NativeMethods.cs @@ -38,8 +38,18 @@ internal static class NativeMethods [DllImport("faiss_c", SetLastError = true)] public static extern int faiss_IndexIDMap_new(ref IndexIDMapSafeHandle mapIndex, IndexFlatSafeHandle index); [DllImport("faiss_c", SetLastError = true)] + public static extern int faiss_VectorTransform_apply_noalloc(VectorTransformSafeHandle vt, long n, float[] x, float[] xt); + [DllImport("faiss_c", SetLastError = true)] + public static extern int faiss_VectorTransform_d_in(VectorTransformSafeHandle index); + [DllImport("faiss_c", SetLastError = true)] + public static extern int faiss_VectorTransform_d_out(VectorTransformSafeHandle index); + [DllImport("faiss_c", SetLastError = true)] + public static extern int faiss_VectorTransform_free(VectorTransformSafeHandle vectorTransform); + [DllImport("faiss_c", SetLastError = true)] public static extern int faiss_read_index_fname(string fname, int io_flags, ref IntPtr p_out); [DllImport("faiss_c", SetLastError = true)] + public static extern int faiss_read_VectorTransform_fname(string fname, ref IntPtr p_out); + [DllImport("faiss_c", SetLastError = true)] public static extern string faiss_get_last_error(); [DllImport("faiss_c", SetLastError = true)] public static extern int faiss_IndexIVF_make_direct_map(IndexSafeHandle index, int new_maintain_direct_map); diff --git a/FaissMask/Internal/VectorTransformSafeHandle.cs b/FaissMask/Internal/VectorTransformSafeHandle.cs new file mode 100644 index 0000000..88f2912 --- /dev/null +++ b/FaissMask/Internal/VectorTransformSafeHandle.cs @@ -0,0 +1,74 @@ +using System; +using System.IO; + +namespace FaissMask.Internal +{ + public class VectorTransformSafeHandle : SafeHandleZeroIsInvalid + { + private VectorTransformSafeHandle(IntPtr pointer) : base(pointer) + { + } + + public static VectorTransformSafeHandle Read(string filename) + { + if (string.IsNullOrEmpty(filename)) + { + throw new ArgumentNullException(nameof(filename)); + } + + var fullPathFilename = Path.GetFullPath(filename); + if (!File.Exists(fullPathFilename)) + { + throw new FileNotFoundException($"The file {filename} does not exist", fullPathFilename); + } + + var pointer = IntPtr.Zero; + var returnCode = NativeMethods.faiss_read_VectorTransform_fname(fullPathFilename, ref pointer); + if (returnCode != 0 || pointer == IntPtr.Zero) + { + var lastError = NativeMethods.faiss_get_last_error(); + + if (string.IsNullOrEmpty(lastError)) + { + throw new IOException( + $"An unknown error occurred trying to read the vector transform '{fullPathFilename}' (return code {returnCode})"); + } + + throw new IOException( + $"An error occurred trying to read the vector transform '{fullPathFilename}': {lastError} (return code {returnCode})"); + } + + var safeHandle = new VectorTransformSafeHandle(pointer); + + return safeHandle; + } + + public void Apply(long count, float[] vectors, float[] transformedVectors) + { + NativeMethods.faiss_VectorTransform_apply_noalloc(this, count, vectors, transformedVectors); + } + + public int DimensionIn => NativeMethods.faiss_VectorTransform_d_in(this); + + public int DimensionOut => NativeMethods.faiss_VectorTransform_d_out(this); + + protected override bool ReleaseHandle() + { + if (!IsFree) + Free(); + return true; + } + + private bool IsFree { get; set; } + + protected internal void Free() + { + if (IsInvalid) return; + NativeMethods.faiss_VectorTransform_free(this); + IsFree = true; + } + + } +} + + diff --git a/FaissMask/VectorTransform.cs b/FaissMask/VectorTransform.cs new file mode 100644 index 0000000..80f14ea --- /dev/null +++ b/FaissMask/VectorTransform.cs @@ -0,0 +1,70 @@ +using System; +using FaissMask.Extensions; +using FaissMask.Internal; + +namespace FaissMask +{ + + public class VectorTransform(VectorTransformSafeHandle handle, int dimIn, int dimOut) : IDisposable + { + public static VectorTransform Read(string filename) + { + var handle = VectorTransformSafeHandle.Read(filename); + var dimIn = handle.DimensionIn; + var dimOut = handle.DimensionOut; + return new VectorTransform(handle, dimIn, dimOut); + } + + public int DimensionIn => dimIn; + public int DimensionOut => dimOut; + + public float[] Apply(float[] vector) + { + return Apply(1, vector); + } + + public float[][] Apply(float[][] vectors) + { + var count = vectors.Length; + var vectorsFlattened = vectors.Flatten(); + + if (vectorsFlattened.Length != count * dimIn) + { + throw new ArgumentException($"Invalid input vectors, each should have a length of {dimIn}", nameof(vectors)); + } + + var output = Apply(count, vectorsFlattened); + + // convert to an array of arrays + // the other signatures are more efficient if this response is re-flattened + // e.g., Apply(float[] vector) or Apply(int count, float[] flattenedVectors) + + var result = new float[count][]; + for (var i = 0; i < count; i++) + { + result[i] = new float[dimOut]; + Array.Copy(output, i * dimOut, result[i], 0, dimOut); + } + + return result; + } + + public float[] Apply(int count, float[] flattenedVectors) + { + if (count * dimIn != flattenedVectors.Length) + { + throw new ArgumentException($"Invalid input vector, length for count {count} should be {count*dimIn}, got {flattenedVectors.Length}", nameof(flattenedVectors)); + } + var output = new float[count * dimOut]; + handle.Apply(count, flattenedVectors, output); + return output; + } + + public void Dispose() + { + handle?.Free(); + handle?.Dispose(); + } + } + +} \ No newline at end of file diff --git a/FaissMask/c_functions.txt b/FaissMask/c_functions.txt index be03020..973a899 100644 --- a/FaissMask/c_functions.txt +++ b/FaissMask/c_functions.txt @@ -109,4 +109,9 @@ faiss_ParameterSpace_set_index_parameters_cno faiss_read_index faiss_read_index_fname faiss_write_index -faiss_write_index_fname \ No newline at end of file +faiss_write_index_fname +faiss_read_VectorTransform_fname +faiss_VectorTransform_apply_noalloc +faiss_VectorTransform_d_in +faiss_VectorTransform_d_out +faiss_VectorTransform_free \ No newline at end of file diff --git a/FaissMask/runtimes/linux-x64/native/libfaiss_avx2.so b/FaissMask/runtimes/linux-x64/native/libfaiss_avx2.so index 0f0bc92..eaa67d6 100755 Binary files a/FaissMask/runtimes/linux-x64/native/libfaiss_avx2.so and b/FaissMask/runtimes/linux-x64/native/libfaiss_avx2.so differ diff --git a/FaissMask/runtimes/linux-x64/native/libfaiss_c.so b/FaissMask/runtimes/linux-x64/native/libfaiss_c.so index 0bc4ecd..cf11074 100755 Binary files a/FaissMask/runtimes/linux-x64/native/libfaiss_c.so and b/FaissMask/runtimes/linux-x64/native/libfaiss_c.so differ diff --git a/FaissMask/runtimes/osx-arm64/native/libfaiss.dylib b/FaissMask/runtimes/osx-arm64/native/libfaiss.dylib new file mode 100755 index 0000000..4f5d2d0 Binary files /dev/null and b/FaissMask/runtimes/osx-arm64/native/libfaiss.dylib differ diff --git a/FaissMask/runtimes/osx-arm64/native/libfaiss_c.dylib b/FaissMask/runtimes/osx-arm64/native/libfaiss_c.dylib index d6f6683..1402fcb 100755 Binary files a/FaissMask/runtimes/osx-arm64/native/libfaiss_c.dylib and b/FaissMask/runtimes/osx-arm64/native/libfaiss_c.dylib differ diff --git a/build-faiss-linux.sh b/build-faiss-linux.sh index 0c8d6f6..ff61cee 100755 --- a/build-faiss-linux.sh +++ b/build-faiss-linux.sh @@ -1,6 +1,8 @@ #!/bin/bash -e -FAISS_VERSION=${1:-main} +FAISS_VERSION=${1:-vector_transform_c_api} +GITHUB_ACCOUNT=${2:-makosten} +arch=amd64 rm -f FaissMask/runtimes/linux-x64/native/* -docker-compose build --build-arg FAISS_VERSION=$FAISS_VERSION -docker run --rm -v $PWD:/host faissmask_test bash -c 'cp /src/FaissMask/runtimes/linux-x64/native/* /host/FaissMask/runtimes/linux-x64/native/' \ No newline at end of file +docker-compose build --build-arg arch=$arch --build-arg FAISS_VERSION=$FAISS_VERSION --build-arg GITHUB_ACCOUNT=$GITHUB_ACCOUNT +docker run --platform=linux/${arch} --rm -v $PWD:/host faissmask-test bash -c 'cp /src/FaissMask/runtimes/linux-x64/native/* /host/FaissMask/runtimes/linux-x64/native/' \ No newline at end of file diff --git a/build-faiss-macos.sh b/build-faiss-macos.sh index da94cbe..9944bba 100755 --- a/build-faiss-macos.sh +++ b/build-faiss-macos.sh @@ -1,12 +1,13 @@ #!/bin/bash -BRANCH="main" +BRANCH="vector_transform_c_api" +GITHUB_ACCOUNT="makosten" brew install libomp brew install cmake -git clone --recursive --branch $BRANCH https://github.com/facebookresearch/faiss.git libfaiss-src +git clone --recursive --branch ${BRANCH} https://github.com/${GITHUB_ACCOUNT}/faiss.git libfaiss-src cd libfaiss-src -cmake -DFAISS_ENABLE_GPU=OFF -DFAISS_ENABLE_PYTHON=OFF -DBUILD_TESTING=OFF -DCMAKE_BUILD_TYPE=Release -DFAISS_ENABLE_C_API=ON -DBUILD_SHARED_LIBS=ON -B build . +cmake -DFAISS_ENABLE_GPU=OFF -DFAISS_ENABLE_PYTHON=OFF -DOpenMP_libomp_LIBRARY="/opt/homebrew/opt/libomp/lib/libomp.dylib" -DBUILD_TESTING=OFF -DCMAKE_BUILD_TYPE=Release -DFAISS_ENABLE_C_API=ON -DBUILD_SHARED_LIBS=ON -B build . make -C build -j faiss sudo make -C build install @@ -16,4 +17,5 @@ if [[ $(uname -m) == 'x86_64' ]]; then fi cp build/c_api/libfaiss_c.dylib ../FaissMask/runtimes/osx-$arch/native/ +cp build/faiss/libfaiss.dylib ../FaissMask/runtimes/osx-$arch/native/ cd .. \ No newline at end of file