diff --git a/velox/docs/functions/presto/math.rst b/velox/docs/functions/presto/math.rst index 4bae242dea3..2842403d97e 100644 --- a/velox/docs/functions/presto/math.rst +++ b/velox/docs/functions/presto/math.rst @@ -51,6 +51,29 @@ Mathematical Functions SELECT cosine_similarity(ARRAY[], ARRAY[]); -- NaN +.. function:: cosine_similarity(array(real), array(real)) -> real + + Returns the `cosine similarity `_ between the vectors represented as array(real). + If any input array is empty, the function returns NaN. If the input arrays have different sizes, the function throws VeloxUserError. + +.. function:: l2_squared(array(real), array(real)) -> real + + Returns the squared `Euclidean distance `_ between the vectors represented as array(real). + If any input array is empty, the function returns NaN. If the input arrays have different sizes, the function throws VeloxUserError. + + SELECT l2_squared(ARRAY[1], ARRAY[2]); -- 1.0 + + SELECT l2_squared(ARRAY[1.0, 2.0], ARRAY[NULL, 3.0]); -- NULL + + SELECT l2_squared(ARRAY[], ARRAY[2, 3]); -- Throws VeloxUserError + + SELECT l2_squared(ARRAY[], ARRAY[]); -- NaN + +.. function:: l2_squared(array(double), array(double)) -> double + + Returns the squared `Euclidean distance `_ between the vectors represented as array(double). + If any input array is empty, the function returns NaN. If the input arrays have different sizes, the function throws VeloxUserError. + .. function:: degrees(x) -> double Converts angle x in radians to degrees. diff --git a/velox/functions/prestosql/DistanceFunctions.h b/velox/functions/prestosql/DistanceFunctions.h index e8f61d0d809..8347e787e08 100644 --- a/velox/functions/prestosql/DistanceFunctions.h +++ b/velox/functions/prestosql/DistanceFunctions.h @@ -164,6 +164,66 @@ struct CosineSimilarityFunctionFloatArray { } }; +template +struct L2SquaredFunctionFloatArray { + VELOX_DEFINE_FUNCTION_TYPES(T); + + void callNullFree( + out_type& result, + const facebook::velox::exec::ArrayView& leftArray, + const facebook::velox::exec::ArrayView& rightArray) { + VELOX_USER_CHECK( + leftArray.size() == rightArray.size(), + "Both arrays need to have identical size"); + size_t d = leftArray.size(); + if (d == 0) { + result = std::numeric_limits::quiet_NaN(); + return; + } + + std::vector leftBuffer, rightBuffer; + const float* x = getArrayDataOrCopy(leftArray, leftBuffer); + const float* y = getArrayDataOrCopy(rightArray, rightBuffer); + result = faiss::fvec_L2sqr(x, y, d); + } +}; + +template +struct L2SquaredFunctionDoubleArray { + VELOX_DEFINE_FUNCTION_TYPES(T); + + void callNullFree( + out_type& result, + const facebook::velox::exec::ArrayView& leftArray, + const facebook::velox::exec::ArrayView& rightArray) { + VELOX_USER_CHECK( + leftArray.size() == rightArray.size(), + "Both arrays need to have identical size"); + size_t d = leftArray.size(); + if (d == 0) { + result = std::numeric_limits::quiet_NaN(); + return; + } + + std::vector x(static_cast::size_type>(d), 0); + std::vector y(static_cast::size_type>(d), 0); + + for (size_t i = 0; i < leftArray.size(); i++) { + if (i < x.size()) { + x[i] = static_cast(leftArray[i]); + } + } + for (size_t i = 0; i < rightArray.size(); i++) { + if (i < x.size()) { + y[i] = static_cast(rightArray[i]); + } + } + + float l2_sqr = faiss::fvec_L2sqr(x.data(), y.data(), d); + result = static_cast(l2_sqr); + } +}; + #else // VELOX_ENABLE_FAISS template diff --git a/velox/functions/prestosql/registration/MathematicalFunctionsRegistration.cpp b/velox/functions/prestosql/registration/MathematicalFunctionsRegistration.cpp index edeb6aadafb..b8f72102821 100644 --- a/velox/functions/prestosql/registration/MathematicalFunctionsRegistration.cpp +++ b/velox/functions/prestosql/registration/MathematicalFunctionsRegistration.cpp @@ -130,6 +130,16 @@ void registerMathFunctions(const std::string& prefix) { float, Array, Array>({prefix + "cosine_similarity"}); + registerFunction< + L2SquaredFunctionFloatArray, + float, + Array, + Array>({prefix + "l2_squared"}); + registerFunction< + L2SquaredFunctionDoubleArray, + double, + Array, + Array>({prefix + "l2_squared"}); #endif } diff --git a/velox/functions/prestosql/tests/DistanceFunctionsTest.cpp b/velox/functions/prestosql/tests/DistanceFunctionsTest.cpp index cce037aec51..8cdd29c49ce 100644 --- a/velox/functions/prestosql/tests/DistanceFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/DistanceFunctionsTest.cpp @@ -147,6 +147,54 @@ TEST_F(DistanceFunctionsTest, cosineSimilarityFloatArray) { EXPECT_TRUE(std::isnan(cosineSimilarity({1, 3}, {kNanFloat, 1}))); EXPECT_TRUE(std::isnan(cosineSimilarity({1, 3}, {kInfFloat, 1}))); } + +TEST_F(DistanceFunctionsTest, l2SquaredFunctionFloatArray) { + const auto l2Squared = [&](const std::vector& left, + const std::vector& right) { + auto leftArray = makeArrayVector({left}); + auto rightArray = makeArrayVector({right}); + return evaluateOnce( + "l2_squared(c0,c1)", makeRowVector({leftArray, rightArray})) + .value(); + }; + + EXPECT_NEAR( + (1.234 - 2.345) * (1.234 - 2.345) + (2.456 - 3.567) * (2.456 - 3.567), + l2Squared({1.234, 2.456}, {2.345, 3.567}), + 1e-6); + EXPECT_NEAR( + (1.789 - 4.012) * (1.789 - 4.012) + (2.345 * 2.345) + + (-1.678 - 5.901) * (-1.678 - 5.901), + l2Squared({1.789, 2.345, -1.678}, {4.012, 0.0, 5.901}), + 1e-5); + EXPECT_TRUE(std::isnan(l2Squared({}, {}))); + VELOX_ASSERT_THROW( + l2Squared({1.234, 3.456}, {}), "Both arrays need to have identical size"); +} + +TEST_F(DistanceFunctionsTest, l2SquaredFunctionDoubleArray) { + const auto l2Squared = [&](const std::vector& left, + const std::vector& right) { + auto leftArray = makeArrayVector({left}); + auto rightArray = makeArrayVector({right}); + return evaluateOnce( + "l2_squared(c0,c1)", makeRowVector({leftArray, rightArray})) + .value(); + }; + + EXPECT_NEAR( + (1.5 - 2.3) * (1.5 - 2.3) + (2.7 - 3.8) * (2.7 - 3.8), + l2Squared({1.5, 2.7}, {2.3, 3.8}), + 1e-6); + EXPECT_NEAR( + (1.1 - 4.2) * (1.1 - 4.2) + (2.5 * 2.5) + (-1.3 - 5.6) * (-1.3 - 5.6), + l2Squared({1.1, 2.5, -1.3}, {4.2, 0.0, 5.6}), + 1e-5); + EXPECT_TRUE(std::isnan(l2Squared({}, {}))); + VELOX_ASSERT_THROW( + l2Squared({1.0, 3.0}, {}), "Both arrays need to have identical size"); +} + #endif // VELOX_ENABLE_FAISS } // namespace