Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions velox/docs/functions/presto/math.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,29 @@ Mathematical Functions

SELECT cosine_similarity(ARRAY[], ARRAY[]); -- NaN

.. function:: cosine_similarity(array(real), array(real)) -> real

Returns the `cosine similarity <https://en.wikipedia.org/wiki/Cosine_similarity>`_ between the vectors represented as array(real).
If any input array is empty, the function returns NaN. If the input arrays have different sizes, the function throws VeloxUserError.

.. function:: l2_squared(array(real), array(real)) -> real

Returns the squared `Euclidean distance <https://en.wikipedia.org/wiki/Euclidean_distance>`_ between the vectors represented as array(real).
If any input array is empty, the function returns NaN. If the input arrays have different sizes, the function throws VeloxUserError.

SELECT l2_squared(ARRAY[1], ARRAY[2]); -- 1.0

SELECT l2_squared(ARRAY[1.0, 2.0], ARRAY[NULL, 3.0]); -- NULL

SELECT l2_squared(ARRAY[], ARRAY[2, 3]); -- Throws VeloxUserError

SELECT l2_squared(ARRAY[], ARRAY[]); -- NaN

.. function:: l2_squared(array(double), array(double)) -> double

Returns the squared `Euclidean distance <https://en.wikipedia.org/wiki/Euclidean_distance>`_ between the vectors represented as array(double).
If any input array is empty, the function returns NaN. If the input arrays have different sizes, the function throws VeloxUserError.

.. function:: degrees(x) -> double

Converts angle x in radians to degrees.
Expand Down
60 changes: 60 additions & 0 deletions velox/functions/prestosql/DistanceFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,66 @@ struct CosineSimilarityFunctionFloatArray {
}
};

template <typename T>
struct L2SquaredFunctionFloatArray {
VELOX_DEFINE_FUNCTION_TYPES(T);

void callNullFree(
out_type<float>& result,
const facebook::velox::exec::ArrayView<false, float>& leftArray,
const facebook::velox::exec::ArrayView<false, float>& rightArray) {
VELOX_USER_CHECK(
leftArray.size() == rightArray.size(),
"Both arrays need to have identical size");
size_t d = leftArray.size();
if (d == 0) {
result = std::numeric_limits<float>::quiet_NaN();
return;
}

std::vector<float> leftBuffer, rightBuffer;
const float* x = getArrayDataOrCopy(leftArray, leftBuffer);
const float* y = getArrayDataOrCopy(rightArray, rightBuffer);
result = faiss::fvec_L2sqr(x, y, d);
}
};

template <typename T>
struct L2SquaredFunctionDoubleArray {
VELOX_DEFINE_FUNCTION_TYPES(T);

void callNullFree(
out_type<double>& result,
const facebook::velox::exec::ArrayView<false, double>& leftArray,
const facebook::velox::exec::ArrayView<false, double>& rightArray) {
VELOX_USER_CHECK(
leftArray.size() == rightArray.size(),
"Both arrays need to have identical size");
size_t d = leftArray.size();
if (d == 0) {
result = std::numeric_limits<double>::quiet_NaN();
return;
}

std::vector<float> x(static_cast<std::vector<float>::size_type>(d), 0);
std::vector<float> y(static_cast<std::vector<float>::size_type>(d), 0);

for (size_t i = 0; i < leftArray.size(); i++) {
if (i < x.size()) {
x[i] = static_cast<float>(leftArray[i]);
}
}
for (size_t i = 0; i < rightArray.size(); i++) {
if (i < x.size()) {
y[i] = static_cast<float>(rightArray[i]);
}
}

float l2_sqr = faiss::fvec_L2sqr(x.data(), y.data(), d);
result = static_cast<double>(l2_sqr);
}
};

#else // VELOX_ENABLE_FAISS

template <typename T>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,16 @@ void registerMathFunctions(const std::string& prefix) {
float,
Array<float>,
Array<float>>({prefix + "cosine_similarity"});
registerFunction<
L2SquaredFunctionFloatArray,
float,
Array<float>,
Array<float>>({prefix + "l2_squared"});
registerFunction<
L2SquaredFunctionDoubleArray,
double,
Array<double>,
Array<double>>({prefix + "l2_squared"});
#endif
}

Expand Down
48 changes: 48 additions & 0 deletions velox/functions/prestosql/tests/DistanceFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,54 @@ TEST_F(DistanceFunctionsTest, cosineSimilarityFloatArray) {
EXPECT_TRUE(std::isnan(cosineSimilarity({1, 3}, {kNanFloat, 1})));
EXPECT_TRUE(std::isnan(cosineSimilarity({1, 3}, {kInfFloat, 1})));
}

TEST_F(DistanceFunctionsTest, l2SquaredFunctionFloatArray) {
const auto l2Squared = [&](const std::vector<float>& left,
const std::vector<float>& right) {
auto leftArray = makeArrayVector<float>({left});
auto rightArray = makeArrayVector<float>({right});
return evaluateOnce<float>(
"l2_squared(c0,c1)", makeRowVector({leftArray, rightArray}))
.value();
};

EXPECT_NEAR(
(1.234 - 2.345) * (1.234 - 2.345) + (2.456 - 3.567) * (2.456 - 3.567),
l2Squared({1.234, 2.456}, {2.345, 3.567}),
1e-6);
EXPECT_NEAR(
(1.789 - 4.012) * (1.789 - 4.012) + (2.345 * 2.345) +
(-1.678 - 5.901) * (-1.678 - 5.901),
l2Squared({1.789, 2.345, -1.678}, {4.012, 0.0, 5.901}),
1e-5);
EXPECT_TRUE(std::isnan(l2Squared({}, {})));
VELOX_ASSERT_THROW(
l2Squared({1.234, 3.456}, {}), "Both arrays need to have identical size");
}

TEST_F(DistanceFunctionsTest, l2SquaredFunctionDoubleArray) {
const auto l2Squared = [&](const std::vector<double>& left,
const std::vector<double>& right) {
auto leftArray = makeArrayVector<double>({left});
auto rightArray = makeArrayVector<double>({right});
return evaluateOnce<double>(
"l2_squared(c0,c1)", makeRowVector({leftArray, rightArray}))
.value();
};

EXPECT_NEAR(
(1.5 - 2.3) * (1.5 - 2.3) + (2.7 - 3.8) * (2.7 - 3.8),
l2Squared({1.5, 2.7}, {2.3, 3.8}),
1e-6);
EXPECT_NEAR(
(1.1 - 4.2) * (1.1 - 4.2) + (2.5 * 2.5) + (-1.3 - 5.6) * (-1.3 - 5.6),
l2Squared({1.1, 2.5, -1.3}, {4.2, 0.0, 5.6}),
1e-5);
EXPECT_TRUE(std::isnan(l2Squared({}, {})));
VELOX_ASSERT_THROW(
l2Squared({1.0, 3.0}, {}), "Both arrays need to have identical size");
}

#endif // VELOX_ENABLE_FAISS

} // namespace
Expand Down
Loading