Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions velox/experimental/cudf/expression/ExpressionEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,52 @@ class CardinalityFunction : public CudfFunction {
}
};

class NotFunction : public CudfFunction {
public:
explicit NotFunction(const std::shared_ptr<velox::exec::Expr>& expr) {
VELOX_CHECK_EQ(expr->inputs().size(), 1, "not expects 1 input");
}

ColumnOrView eval(
std::vector<ColumnOrView>& inputColumns,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr) const override {
VELOX_CHECK_EQ(inputColumns.size(), 1, "not expects 1 input");
return cudf::unary_operation(
asView(inputColumns[0]), cudf::unary_operator::NOT, stream, mr);
}
};

class IsNullFunction : public CudfFunction {
public:
explicit IsNullFunction(const std::shared_ptr<velox::exec::Expr>& expr) {
VELOX_CHECK_EQ(expr->inputs().size(), 1, "is_null expects 1 input");
}

ColumnOrView eval(
std::vector<ColumnOrView>& inputColumns,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr) const override {
VELOX_CHECK_EQ(inputColumns.size(), 1, "is_null expects 1 input");
return cudf::is_null(asView(inputColumns[0]), stream, mr);
}
};

class IsNotNullFunction : public CudfFunction {
public:
explicit IsNotNullFunction(const std::shared_ptr<velox::exec::Expr>& expr) {
VELOX_CHECK_EQ(expr->inputs().size(), 1, "isnotnull expects 1 input");
}

ColumnOrView eval(
std::vector<ColumnOrView>& inputColumns,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr) const override {
VELOX_CHECK_EQ(inputColumns.size(), 1, "isnotnull expects 1 input");
return cudf::is_valid(asView(inputColumns[0]), stream, mr);
}
};

class RoundFunction : public CudfFunction {
public:
explicit RoundFunction(const std::shared_ptr<velox::exec::Expr>& expr) {
Expand Down Expand Up @@ -1452,6 +1498,38 @@ bool registerBuiltinFunctions(const std::string& prefix) {
.variableArity("T")
.build()});

registerCudfFunction(
"not",
[](const std::string&, const std::shared_ptr<velox::exec::Expr>& expr) {
return std::make_shared<NotFunction>(expr);
},
Comment on lines +1501 to +1505
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of making a NotFunction, you can make a UnaryFunction and during registration, provide a factory like so:

Suggested change
registerCudfFunction(
"not",
[](const std::string&, const std::shared_ptr<velox::exec::Expr>& expr) {
return std::make_shared<NotFunction>(expr);
},
registerCudfFunction(
"not",
[](const std::string&, const std::shared_ptr<velox::exec::Expr>& expr) {
return std::make_shared<UnaryFunction>(expr, cudf::unary_operator::NOT);
},

{FunctionSignatureBuilder()
.returnType("boolean")
.argumentType("boolean")
.build()});

registerCudfFunction(
"is_null",
[](const std::string&, const std::shared_ptr<velox::exec::Expr>& expr) {
return std::make_shared<IsNullFunction>(expr);
},
{FunctionSignatureBuilder()
.typeVariable("T")
.returnType("boolean")
.argumentType("T")
.build()});

registerCudfFunction(
"isnotnull",
[](const std::string&, const std::shared_ptr<velox::exec::Expr>& expr) {
return std::make_shared<IsNotNullFunction>(expr);
},
{FunctionSignatureBuilder()
.typeVariable("T")
.returnType("boolean")
.argumentType("T")
.build()});

registerCudfFunction(
prefix + "round",
[](const std::string&, const std::shared_ptr<velox::exec::Expr>& expr) {
Expand Down
19 changes: 19 additions & 0 deletions velox/experimental/cudf/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ add_executable(velox_cudf_decimal_expression_test Main.cpp DecimalExpressionTest
add_executable(velox_cudf_filter_project_test Main.cpp FilterProjectTest.cpp)
add_executable(velox_cudf_hash_join_test HashJoinTest.cpp Main.cpp)
add_executable(velox_cudf_limit_test Main.cpp LimitTest.cpp)
add_executable(velox_cudf_logical_functions_test Main.cpp LogicalFunctionsTest.cpp)
add_executable(velox_cudf_local_partition_test Main.cpp LocalPartitionTest.cpp)
add_executable(velox_cudf_order_by_test Main.cpp OrderByTest.cpp)
if(VELOX_ENABLE_S3)
Expand Down Expand Up @@ -96,6 +97,12 @@ add_test(
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
)

add_test(
NAME velox_cudf_logical_functions_test
COMMAND velox_cudf_logical_functions_test
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
)

add_test(
NAME velox_cudf_local_partition_test
COMMAND velox_cudf_local_partition_test
Expand Down Expand Up @@ -183,6 +190,7 @@ set_tests_properties(velox_cudf_decimal_expression_test PROPERTIES LABELS cuda_d
set_tests_properties(velox_cudf_filter_project_test PROPERTIES LABELS cuda_driver TIMEOUT 3000)
set_tests_properties(velox_cudf_hash_join_test PROPERTIES LABELS cuda_driver TIMEOUT 3000)
set_tests_properties(velox_cudf_limit_test PROPERTIES LABELS cuda_driver TIMEOUT 3000)
set_tests_properties(velox_cudf_logical_functions_test PROPERTIES LABELS cuda_driver TIMEOUT 3000)
set_tests_properties(velox_cudf_local_partition_test PROPERTIES LABELS cuda_driver TIMEOUT 3000)
set_tests_properties(velox_cudf_order_by_test PROPERTIES LABELS cuda_driver TIMEOUT 3000)
if(VELOX_ENABLE_S3)
Expand Down Expand Up @@ -294,6 +302,17 @@ target_link_libraries(
gtest_main
)

target_link_libraries(
velox_cudf_logical_functions_test
velox_cudf_exec
velox_cudf_expression
velox_exec
velox_exec_test_lib
velox_test_util
gtest
gtest_main
)

target_link_libraries(
velox_cudf_local_partition_test
velox_cudf_exec
Expand Down
162 changes: 162 additions & 0 deletions velox/experimental/cudf/tests/LogicalFunctionsTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "velox/experimental/cudf/CudfConfig.h"
#include "velox/experimental/cudf/exec/ToCudf.h"
#include "velox/experimental/cudf/expression/AstExpression.h"
#include "velox/experimental/cudf/expression/ExpressionEvaluator.h"

#include "velox/common/file/FileSystems.h"
#include "velox/exec/tests/utils/OperatorTestBase.h"
#include "velox/exec/tests/utils/PlanBuilder.h"

using namespace facebook::velox;
using namespace facebook::velox::exec::test;

namespace {

// Exercises the `not` / `is_null` / `isnotnull` CudfFunction classes
// introduced by this PR. The AST evaluator natively supports all three
// operators, and its default priority (100) beats the Function evaluator
// (50), so under normal configuration these CudfFunction classes are dormant
// for primitive types. This fixture re-registers the AST evaluator at
// priority 0, which drops AST to a last-resort fallback and forces
// expressions that FunctionExpression can handle through the new CudfFunction
// classes. Tests use primitive types only so they don't depend on the
// timestamp or decimal PRs.
class CudfLogicalFunctionsTest : public OperatorTestBase {
protected:
void SetUp() override {
OperatorTestBase::SetUp();
filesystems::registerLocalFileSystem();
cudf_velox::CudfConfig::getInstance().allowCpuFallback = false;
cudf_velox::registerCudf();

// Overwrite the AST registration with priority 0 so any expression that
// FunctionExpression can also evaluate routes through the Function path.
cudf_velox::registerCudfExpressionEvaluator(
cudf_velox::kAstEvaluatorName,
/*priority=*/0,
[](std::shared_ptr<exec::Expr> expr) {
return cudf_velox::ASTExpression::canEvaluate(expr);
},
[](std::shared_ptr<exec::Expr> expr, const RowTypePtr& row) {
return std::make_shared<cudf_velox::ASTExpression>(
std::move(expr), row);
},
/*overwrite=*/true);
}

void TearDown() override {
cudf_velox::unregisterCudf();
OperatorTestBase::TearDown();
}

void runProject(
const std::vector<RowVectorPtr>& input,
const std::string& projection,
const std::string& sql) {
createDuckDbTable(input);
auto plan =
PlanBuilder().values(input).project({projection}).planNode();
assertQuery(plan, sql);
}
};

// NotFunction: negation of a boolean column. Base column-only path.
TEST_F(CudfLogicalFunctionsTest, notColumn) {
auto data = makeRowVector(
{"a"}, {makeFlatVector<bool>({true, false, true, false})});
runProject({data}, "NOT a AS r", "SELECT NOT a AS r FROM tmp");
}

// NotFunction: negation of a comparison. `NotFunction` wraps the comparison
// result, which itself comes from a nested FunctionExpression path.
TEST_F(CudfLogicalFunctionsTest, notComparison) {
auto data = makeRowVector(
{"c0"}, {makeFlatVector<int32_t>({1, 2, 3, 4, 5})});
runProject(
{data}, "NOT (c0 = 3) AS r", "SELECT NOT (c0 = 3) AS r FROM tmp");
}

// NotFunction: null row. `cudf::unary_operation` with `NOT` should propagate
// null through untouched.
TEST_F(CudfLogicalFunctionsTest, notWithNullRows) {
auto data = makeRowVector(
{"a"},
{makeNullableFlatVector<bool>(
{true, false, std::nullopt, true, std::nullopt})});
runProject({data}, "NOT a AS r", "SELECT NOT a AS r FROM tmp");
}

// IsNullFunction: nullable INTEGER column with a mix of nulls and non-nulls.
TEST_F(CudfLogicalFunctionsTest, isNullInteger) {
auto data = makeRowVector(
{"c0"},
{makeNullableFlatVector<int32_t>(
{1, std::nullopt, 3, std::nullopt, 5})});
runProject({data}, "c0 IS NULL AS r", "SELECT c0 IS NULL AS r FROM tmp");
}

// IsNullFunction: nullable VARCHAR column. Exercises string-typed input.
TEST_F(CudfLogicalFunctionsTest, isNullVarchar) {
auto data = makeRowVector(
{"c0"},
{makeNullableFlatVector<std::string>(
{"x", std::nullopt, "y", std::nullopt, ""})});
runProject({data}, "c0 IS NULL AS r", "SELECT c0 IS NULL AS r FROM tmp");
}

// IsNullFunction: column with no nulls. Result is all false.
TEST_F(CudfLogicalFunctionsTest, isNullNoNulls) {
auto data = makeRowVector(
{"c0"}, {makeFlatVector<int32_t>({1, 2, 3, 4, 5})});
runProject({data}, "c0 IS NULL AS r", "SELECT c0 IS NULL AS r FROM tmp");
}

// IsNotNullFunction: nullable INTEGER column. Inverse of IS NULL.
TEST_F(CudfLogicalFunctionsTest, isNotNullInteger) {
auto data = makeRowVector(
{"c0"},
{makeNullableFlatVector<int32_t>(
{1, std::nullopt, 3, std::nullopt, 5})});
runProject(
{data}, "c0 IS NOT NULL AS r", "SELECT c0 IS NOT NULL AS r FROM tmp");
}

// IsNotNullFunction: nullable VARCHAR column.
TEST_F(CudfLogicalFunctionsTest, isNotNullVarchar) {
auto data = makeRowVector(
{"c0"},
{makeNullableFlatVector<std::string>(
{"x", std::nullopt, "y", std::nullopt, ""})});
runProject(
{data}, "c0 IS NOT NULL AS r", "SELECT c0 IS NOT NULL AS r FROM tmp");
}

// Composition: NOT wrapping IS NULL. Exercises NotFunction operating on the
// column produced by IsNullFunction.
TEST_F(CudfLogicalFunctionsTest, notOfIsNull) {
auto data = makeRowVector(
{"c0"},
{makeNullableFlatVector<int32_t>(
{1, std::nullopt, 3, std::nullopt, 5})});
runProject(
{data},
"NOT (c0 IS NULL) AS r",
"SELECT NOT (c0 IS NULL) AS r FROM tmp");
}

} // namespace
Loading