From 7830d16ef986421fb308aa7c0dbff6f7382f7d8a Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Mon, 30 May 2022 18:22:25 +0530 Subject: [PATCH 01/17] feat: compute functions mapping for substrait --- cpp/src/arrow/engine/CMakeLists.txt | 1 + .../arrow/engine/substrait/extension_set.h | 5 + .../engine/substrait/function_internal.cc | 215 ++++++++++++++++++ .../engine/substrait/function_internal.h | 45 ++++ 4 files changed, 266 insertions(+) create mode 100644 cpp/src/arrow/engine/substrait/function_internal.cc create mode 100644 cpp/src/arrow/engine/substrait/function_internal.h diff --git a/cpp/src/arrow/engine/CMakeLists.txt b/cpp/src/arrow/engine/CMakeLists.txt index 8edd22900e6..3f735bdcf46 100644 --- a/cpp/src/arrow/engine/CMakeLists.txt +++ b/cpp/src/arrow/engine/CMakeLists.txt @@ -21,6 +21,7 @@ arrow_install_all_headers("arrow/engine/substrait") set(ARROW_SUBSTRAIT_SRCS substrait/expression_internal.cc + substrait/function_internal.cc substrait/extension_set.cc substrait/extension_types.cc substrait/serde.cc diff --git a/cpp/src/arrow/engine/substrait/extension_set.h b/cpp/src/arrow/engine/substrait/extension_set.h index de013015a72..fe061c508f6 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.h +++ b/cpp/src/arrow/engine/substrait/extension_set.h @@ -22,12 +22,15 @@ #include #include +#include "arrow/compute/function.h" +#include "arrow/compute/exec/expression.h" #include "arrow/engine/substrait/visibility.h" #include "arrow/type_fwd.h" #include "arrow/util/optional.h" #include "arrow/util/string_view.h" #include "arrow/util/hash_util.h" +#include "substrait/expression.pb.h" // IWYU pragma: export namespace arrow { namespace engine { @@ -267,5 +270,7 @@ class ARROW_ENGINE_EXPORT ExtensionSet { Status AddUri(Id id); }; + + } // namespace engine } // namespace arrow diff --git a/cpp/src/arrow/engine/substrait/function_internal.cc b/cpp/src/arrow/engine/substrait/function_internal.cc new file mode 100644 index 00000000000..9fd5409947d --- /dev/null +++ b/cpp/src/arrow/engine/substrait/function_internal.cc @@ -0,0 +1,215 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/engine/substrait/function_internal.h" + +#include "arrow/engine/substrait/extension_set.h" +#include "arrow/engine/substrait/expression_internal.h" +#include "arrow/compute/api_scalar.h" + + +namespace arrow{ +namespace engine{ + +SubstraitToArrow substrait_add_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(1); + auto value_2 = call.args(2); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); + auto options = call.args(0); + if (options.has_enum_()) { + auto overflow_handling = options.enum_(); + if(overflow_handling.has_specified()){ + std::string overflow_type = overflow_handling.specified(); + if(overflow_type == "SILENT"){ + return arrow::compute::call("add", {expression_1,expression_2}, compute::ArithmeticOptions()); + } else if (overflow_type == "SATURATE") { + return Status::Invalid("Arrow does not support a saturating add"); + } else { + return arrow::compute::call("add_checked", {expression_1,expression_2}, compute::ArithmeticOptions(true)); + } + } else { + return arrow::compute::call("add", {expression_1,expression_2}, compute::ArithmeticOptions()); + } + } else { + return Status::Invalid("Substrait Function Options should be an enum"); + } +}; + +const ArrowToSubstrait arrow_add_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("add")); + substrait_call.set_function_reference(function_reference); + + substrait::Expression::Enum options; + std::string overflow_handling = "ERROR"; + options.set_specified(overflow_handling); + substrait_call.add_args()->set_allocated_enum_(&options); + + auto expression_1 = call.arguments[0]; + auto expression_2 = call.arguments[1]; + + ARROW_ASSIGN_OR_RAISE(auto value_1, ToProto(expression_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto value_2, ToProto(expression_2, ext_set_)); + + substrait_call.add_args()->CopyFrom(*value_1); + substrait_call.add_args()->CopyFrom(*value_2); + return &substrait_call; +}; +// ArrowToSubstrait arrow_unchecked_add_to_substrait = [] (const arrow::compute::Expression::Call& call, std::vector args) { +// auto overflow_behavior = substrait::Expression::Enum; +// overflow_behavior.set_specified("SILENT"); +// auto substrait_call = substrait::FunctionSignature_Scalar; +// substrait_call.add_name("add"); + +// substrait_call.add_args(std::move(args)); +// substrait_call.add_args({overflow_behavior}); +// return substrait_call; +// }; + + +// Boolean Functions mapping +SubstraitToArrow substrait_not_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(1); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + return arrow::compute::call("invert", {expression_1}); +}; + +SubstraitToArrow substrait_or_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(1); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + return arrow::compute::call("or_kleene", {expression_1}); +}; + +SubstraitToArrow substrait_and_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(1); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + return arrow::compute::call("and_kleene", {expression_1}); +}; + +SubstraitToArrow substrait_xor_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(0); + auto value_2 = call.args(1); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); + return arrow::compute::call("xor", {expression_1, expression_2}); +}; + +// ArrowToSubstrait arrow_invert_to_substrait = [] (const arrow::compute::Expression::Call& call, std::vector args) { +// auto substrait_call = substrait::FunctionSignature_Scalar; +// substrait_call.add_name("not"); +// substrait_call.add_args(std::move(args)); +// return substrait_call; +// }; + +// ArrowToSubstrait arrow_or_kleene_to_substrait = [] (const arrow::compute::Expression::Call& call, std::vector args) { +// auto substrait_call = substrait::FunctionSignature_Scalar; +// substrait_call.add_name("or"); +// substrait_call.add_args(std::move(args)); +// return substrait_call; +// }; + + +// ArrowToSubstrait arrow_and_kleene_to_substrait = [] (const arrow::compute::Expression::Call& call, std::vector args) { +// auto substrait_call = substrait::FunctionSignature_Scalar; +// substrait_call.add_name("and"); +// substrait_call.add_args(std::move(args)); +// return substrait_call; +// }; + +// ArrowToSubstrait arrow_xor_to_substrait = [] (const arrow::compute::Expression::Call& call, std::vector args) { +// auto substrait_call = substrait::FunctionSignature_Scalar; +// substrait_call.add_name("xor"); +// substrait_call.add_args(std::move(args)); +// return substrait_call; +// }; + +// Comparison Functions mapping +SubstraitToArrow substrait_lt_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(0); + auto value_2 = call.args(1); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); + return arrow::compute::call("less", {expression_1, expression_2}); +}; + +SubstraitToArrow substrait_gt_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(0); + auto value_2 = call.args(1); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); + return arrow::compute::call("greater", {expression_1, expression_2}); +}; + +SubstraitToArrow substrait_lte_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(0); + auto value_2 = call.args(1); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); + return arrow::compute::call("less_equal", {expression_1, expression_2}); +}; + +SubstraitToArrow substrait_not_equal_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(0); + auto value_2 = call.args(1); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); + return arrow::compute::call("not_equal", {expression_1, expression_2}); +}; + +SubstraitToArrow substrait_equal_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(0); + auto value_2 = call.args(1); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); + return arrow::compute::call("equal", {expression_1, expression_2}); +}; + +SubstraitToArrow substrait_is_null_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(0); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + return arrow::compute::call("is_null", {expression_1}); +}; + +SubstraitToArrow substrait_is_not_null_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(0); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + return arrow::compute::call("is_valid", {expression_1}); +}; + +// SubstraitToArrow substrait_is_not_distinct_from_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { +// auto null_check = arrow::compute::call("is_null", call.args(2)); +// if(null_check){ +// return arrow::compute::call("not_equal", {null_check,null_check}); +// } +// return arrow::compute::all("not_equal", call.args(2)); +// }; +} +} diff --git a/cpp/src/arrow/engine/substrait/function_internal.h b/cpp/src/arrow/engine/substrait/function_internal.h new file mode 100644 index 00000000000..3b18d1d53c6 --- /dev/null +++ b/cpp/src/arrow/engine/substrait/function_internal.h @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This API is EXPERIMENTAL. + +#include "arrow/engine/substrait/extension_set.h" +#include "arrow/compute/function.h" +#include "arrow/compute/exec/expression.h" + +#include "substrait/function.pb.h" // IWYU pragma: export + + +namespace arrow{ +namespace engine{ + +using ArrowToSubstrait = const std::function(const arrow::compute::Expression&, ExtensionSet*)>; +using SubstraitToArrow = std::function(const substrait::Expression::ScalarFunction&)>; +class FunctionMapping { + + // Registration API + Status AddArrowToSubstrait(std::string arrow_function_name, ArrowToSubstrait conversion_func); + Status AddSubstraitToArrow(std::string substrait_function_name, SubstraitToArrow conversion_func); + + // Usage API + Result> ToProto(const arrow::compute::Expression::Call& call, ExtensionSet* ext_set); + Result FromProto(const substrait::Expression::ScalarFunction& call); +}; + + +} // namespace engine +} // namespace arrow From 91740f82d136e2d339e2509d8af016c972450c4a Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Wed, 1 Jun 2022 10:07:04 +0530 Subject: [PATCH 02/17] feat: using function mappings in extension set --- cpp/src/arrow/engine/CMakeLists.txt | 1 - .../engine/substrait/expression_internal.cc | 16 +- .../arrow/engine/substrait/extension_set.cc | 254 ++++++++++++++++++ .../arrow/engine/substrait/extension_set.h | 23 +- .../engine/substrait/function_internal.cc | 215 --------------- .../engine/substrait/function_internal.h | 45 ---- 6 files changed, 279 insertions(+), 275 deletions(-) delete mode 100644 cpp/src/arrow/engine/substrait/function_internal.cc delete mode 100644 cpp/src/arrow/engine/substrait/function_internal.h diff --git a/cpp/src/arrow/engine/CMakeLists.txt b/cpp/src/arrow/engine/CMakeLists.txt index 3f735bdcf46..8edd22900e6 100644 --- a/cpp/src/arrow/engine/CMakeLists.txt +++ b/cpp/src/arrow/engine/CMakeLists.txt @@ -21,7 +21,6 @@ arrow_install_all_headers("arrow/engine/substrait") set(ARROW_SUBSTRAIT_SRCS substrait/expression_internal.cc - substrait/function_internal.cc substrait/extension_set.cc substrait/extension_types.cc substrait/serde.cc diff --git a/cpp/src/arrow/engine/substrait/expression_internal.cc b/cpp/src/arrow/engine/substrait/expression_internal.cc index 5d7d66225e1..7a785d2c7f6 100644 --- a/cpp/src/arrow/engine/substrait/expression_internal.cc +++ b/cpp/src/arrow/engine/substrait/expression_internal.cc @@ -159,21 +159,11 @@ Result FromProto(const substrait::Expression& expr, ARROW_ASSIGN_OR_RAISE(auto decoded_function, ext_set.DecodeFunction(scalar_fn.function_reference())); + + auto arrow_function = ext_set.functions_map.GetArrowFromSubstrait(static_cast(decoded_function.name)); - std::vector arguments(scalar_fn.args_size()); - for (int i = 0; i < scalar_fn.args_size(); ++i) { - ARROW_ASSIGN_OR_RAISE(arguments[i], FromProto(scalar_fn.args(i), ext_set)); - } + return arrow_function(scalar_fn); - auto func_name = decoded_function.name.to_string(); - if (func_name != "cast") { - return compute::call(func_name, std::move(arguments)); - } else { - ARROW_ASSIGN_OR_RAISE(auto output_type_desc, - FromProto(scalar_fn.output_type(), ext_set)); - auto cast_options = compute::CastOptions::Safe(std::move(output_type_desc.first)); - return compute::call(func_name, std::move(arguments), std::move(cast_options)); - } } default: diff --git a/cpp/src/arrow/engine/substrait/extension_set.cc b/cpp/src/arrow/engine/substrait/extension_set.cc index a30c740b181..0263c29182c 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.cc +++ b/cpp/src/arrow/engine/substrait/extension_set.cc @@ -20,10 +20,13 @@ #include #include +#include "arrow/compute/api_scalar.h" #include "arrow/util/hash_util.h" #include "arrow/util/hashing.h" #include "arrow/util/string_view.h" +#include "arrow/engine/substrait/expression_internal.h" + namespace arrow { namespace engine { namespace { @@ -458,5 +461,256 @@ std::shared_ptr nested_extension_id_registry( return std::make_shared(parent); } +SubstraitToArrow substrait_add_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(1); + auto value_2 = call.args(2); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); + auto options = call.args(0); + if (options.has_enum_()) { + auto overflow_handling = options.enum_(); + if(overflow_handling.has_specified()){ + std::string overflow_type = overflow_handling.specified(); + if(overflow_type == "SILENT"){ + return arrow::compute::call("add", {expression_1,expression_2}, compute::ArithmeticOptions()); + } else if (overflow_type == "SATURATE") { + return Status::Invalid("Arrow does not support a saturating add"); + } else { + return arrow::compute::call("add_checked", {expression_1,expression_2}, compute::ArithmeticOptions(true)); + } + } else { + return arrow::compute::call("add", {expression_1,expression_2}, compute::ArithmeticOptions()); + } + } else { + return Status::Invalid("Substrait Function Options should be an enum"); + } +}; + +ArrowToSubstrait arrow_add_to_substrait = [] (const arrow::compute::Expression::Call& call, arrow::engine::ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("add")); + substrait_call.set_function_reference(function_reference); + + substrait::Expression::Enum options; + std::string overflow_handling = "ERROR"; + options.set_specified(overflow_handling); + substrait_call.add_args()->set_allocated_enum_(&options); + + auto expression_1 = call.arguments[0]; + auto expression_2 = call.arguments[1]; + + ARROW_ASSIGN_OR_RAISE(auto value_1, ToProto(expression_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto value_2, ToProto(expression_2, ext_set_)); + + substrait_call.add_args()->CopyFrom(*value_1); + substrait_call.add_args()->CopyFrom(*value_2); + return std::move(substrait_call); +}; + +ArrowToSubstrait arrow_unchecked_add_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("add")); + substrait_call.set_function_reference(function_reference); + + substrait::Expression::Enum options; + std::string overflow_handling = "SILENT"; + options.set_specified(overflow_handling); + substrait_call.add_args()->set_allocated_enum_(&options); + + auto expression_1 = call.arguments[0]; + auto expression_2 = call.arguments[1]; + + ARROW_ASSIGN_OR_RAISE(auto value_1, ToProto(expression_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto value_2, ToProto(expression_2, ext_set_)); + + substrait_call.add_args()->CopyFrom(*value_1); + substrait_call.add_args()->CopyFrom(*value_2); + return std::move(substrait_call); +}; + + +// Boolean Functions mapping +SubstraitToArrow substrait_not_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(1); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + return arrow::compute::call("invert", {expression_1}); +}; + +SubstraitToArrow substrait_or_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(0); + auto value_2 = call.args(1); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); + return arrow::compute::call("or_kleene", {expression_1, expression_2}); +}; + +SubstraitToArrow substrait_and_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(0); + auto value_2 = call.args(1); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); + return arrow::compute::call("and_kleene", {expression_1, expression_2}); +}; + +SubstraitToArrow substrait_xor_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(0); + auto value_2 = call.args(1); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); + return arrow::compute::call("xor", {expression_1, expression_2}); +}; + +ArrowToSubstrait arrow_invert_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("not")); + substrait_call.set_function_reference(function_reference); + + auto expression_1 = call.arguments[0]; + auto expression_2 = call.arguments[1]; + + ARROW_ASSIGN_OR_RAISE(auto value_1, ToProto(expression_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto value_2, ToProto(expression_2, ext_set_)); + + substrait_call.add_args()->CopyFrom(*value_1); + substrait_call.add_args()->CopyFrom(*value_2); + return std::move(substrait_call); + +}; + +ArrowToSubstrait arrow_or_kleene_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("or")); + substrait_call.set_function_reference(function_reference); + + auto expression_1 = call.arguments[0]; + auto expression_2 = call.arguments[1]; + + ARROW_ASSIGN_OR_RAISE(auto value_1, ToProto(expression_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto value_2, ToProto(expression_2, ext_set_)); + + substrait_call.add_args()->CopyFrom(*value_1); + substrait_call.add_args()->CopyFrom(*value_2); + return std::move(substrait_call); +}; + + +ArrowToSubstrait arrow_and_kleene_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("and")); + substrait_call.set_function_reference(function_reference); + + auto expression_1 = call.arguments[0]; + auto expression_2 = call.arguments[1]; + + ARROW_ASSIGN_OR_RAISE(auto value_1, ToProto(expression_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto value_2, ToProto(expression_2, ext_set_)); + + substrait_call.add_args()->CopyFrom(*value_1); + substrait_call.add_args()->CopyFrom(*value_2); + + return std::move(substrait_call); +}; + +ArrowToSubstrait arrow_xor_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("xor")); + substrait_call.set_function_reference(function_reference); + + auto expression_1 = call.arguments[0]; + auto expression_2 = call.arguments[1]; + + ARROW_ASSIGN_OR_RAISE(auto value_1, ToProto(expression_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto value_2, ToProto(expression_2, ext_set_)); + + substrait_call.add_args()->CopyFrom(*value_1); + substrait_call.add_args()->CopyFrom(*value_2); + return std::move(substrait_call); +}; + +// Comparison Functions mapping +SubstraitToArrow substrait_lt_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(0); + auto value_2 = call.args(1); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); + return arrow::compute::call("less", {expression_1, expression_2}); +}; + +SubstraitToArrow substrait_gt_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(0); + auto value_2 = call.args(1); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); + return arrow::compute::call("greater", {expression_1, expression_2}); +}; + +SubstraitToArrow substrait_lte_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(0); + auto value_2 = call.args(1); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); + return arrow::compute::call("less_equal", {expression_1, expression_2}); +}; + +SubstraitToArrow substrait_not_equal_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(0); + auto value_2 = call.args(1); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); + return arrow::compute::call("not_equal", {expression_1, expression_2}); +}; + +SubstraitToArrow substrait_equal_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(0); + auto value_2 = call.args(1); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); + return arrow::compute::call("equal", {expression_1, expression_2}); +}; + +SubstraitToArrow substrait_is_null_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(0); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + return arrow::compute::call("is_null", {expression_1}); +}; + +SubstraitToArrow substrait_is_not_null_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(0); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + return arrow::compute::call("is_valid", {expression_1}); +}; + +SubstraitToArrow substrait_is_not_distinct_from_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto value_1 = call.args(0); + auto value_2 = call.args(1); + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); + auto null_check_1 = arrow::compute::call("is_null", {expression_1}); + auto null_check_2 = arrow::compute::call("is_null", {expression_2}); + if(null_check_1.IsNullLiteral() && null_check_1.IsNullLiteral()){ + return arrow::compute::call("not_equal", {null_check_1, null_check_2}); + } + return arrow::compute::call("not_equal", {expression_1, expression_2}); +}; + } // namespace engine } // namespace arrow diff --git a/cpp/src/arrow/engine/substrait/extension_set.h b/cpp/src/arrow/engine/substrait/extension_set.h index fe061c508f6..8e02b69e896 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.h +++ b/cpp/src/arrow/engine/substrait/extension_set.h @@ -35,6 +35,25 @@ namespace arrow { namespace engine { +class ExtensionSet; +using ArrowToSubstrait = std::function(const arrow::compute::Expression::Call&, arrow::engine::ExtensionSet*)>; +using SubstraitToArrow = std::function(const substrait::Expression::ScalarFunction&)>; + +class FunctionMapping { + + std::unordered_map substrait_to_arrow; + std::unordered_map arrow_to_substrait; + + // Registration API + Status AddArrowToSubstrait(std::string arrow_function_name, ArrowToSubstrait conversion_func); + Status AddSubstraitToArrow(std::string substrait_function_name, SubstraitToArrow conversion_func); + + public: + SubstraitToArrow GetArrowFromSubstrait(std::string name) const { return substrait_to_arrow.at(name);} + ArrowToSubstrait GetSubstraitFromArrow(std::string name) const { return arrow_to_substrait.at(name);} +}; + + /// Substrait identifies functions and custom data types using a (uri, name) pair. /// /// This registry is a bidirectional mapping between Substrait IDs and their corresponding @@ -247,6 +266,8 @@ class ARROW_ENGINE_EXPORT ExtensionSet { /// future; see ARROW-15583. std::size_t num_functions() const { return functions_.size(); } + arrow::engine::FunctionMapping functions_map; + private: const ExtensionIdRegistry* registry_; @@ -264,7 +285,7 @@ class ARROW_ENGINE_EXPORT ExtensionSet { // Map from function names to anchor values. Used during Arrow->Substrait // and built as the plan is created. std::unordered_map functions_map_; - + Status CheckHasUri(util::string_view uri); void AddUri(std::pair uri); Status AddUri(Id id); diff --git a/cpp/src/arrow/engine/substrait/function_internal.cc b/cpp/src/arrow/engine/substrait/function_internal.cc deleted file mode 100644 index 9fd5409947d..00000000000 --- a/cpp/src/arrow/engine/substrait/function_internal.cc +++ /dev/null @@ -1,215 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "arrow/engine/substrait/function_internal.h" - -#include "arrow/engine/substrait/extension_set.h" -#include "arrow/engine/substrait/expression_internal.h" -#include "arrow/compute/api_scalar.h" - - -namespace arrow{ -namespace engine{ - -SubstraitToArrow substrait_add_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto value_1 = call.args(1); - auto value_2 = call.args(2); - ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); - auto options = call.args(0); - if (options.has_enum_()) { - auto overflow_handling = options.enum_(); - if(overflow_handling.has_specified()){ - std::string overflow_type = overflow_handling.specified(); - if(overflow_type == "SILENT"){ - return arrow::compute::call("add", {expression_1,expression_2}, compute::ArithmeticOptions()); - } else if (overflow_type == "SATURATE") { - return Status::Invalid("Arrow does not support a saturating add"); - } else { - return arrow::compute::call("add_checked", {expression_1,expression_2}, compute::ArithmeticOptions(true)); - } - } else { - return arrow::compute::call("add", {expression_1,expression_2}, compute::ArithmeticOptions()); - } - } else { - return Status::Invalid("Substrait Function Options should be an enum"); - } -}; - -const ArrowToSubstrait arrow_add_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { - substrait::Expression::ScalarFunction substrait_call; - - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("add")); - substrait_call.set_function_reference(function_reference); - - substrait::Expression::Enum options; - std::string overflow_handling = "ERROR"; - options.set_specified(overflow_handling); - substrait_call.add_args()->set_allocated_enum_(&options); - - auto expression_1 = call.arguments[0]; - auto expression_2 = call.arguments[1]; - - ARROW_ASSIGN_OR_RAISE(auto value_1, ToProto(expression_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto value_2, ToProto(expression_2, ext_set_)); - - substrait_call.add_args()->CopyFrom(*value_1); - substrait_call.add_args()->CopyFrom(*value_2); - return &substrait_call; -}; -// ArrowToSubstrait arrow_unchecked_add_to_substrait = [] (const arrow::compute::Expression::Call& call, std::vector args) { -// auto overflow_behavior = substrait::Expression::Enum; -// overflow_behavior.set_specified("SILENT"); -// auto substrait_call = substrait::FunctionSignature_Scalar; -// substrait_call.add_name("add"); - -// substrait_call.add_args(std::move(args)); -// substrait_call.add_args({overflow_behavior}); -// return substrait_call; -// }; - - -// Boolean Functions mapping -SubstraitToArrow substrait_not_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto value_1 = call.args(1); - ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - return arrow::compute::call("invert", {expression_1}); -}; - -SubstraitToArrow substrait_or_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto value_1 = call.args(1); - ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - return arrow::compute::call("or_kleene", {expression_1}); -}; - -SubstraitToArrow substrait_and_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto value_1 = call.args(1); - ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - return arrow::compute::call("and_kleene", {expression_1}); -}; - -SubstraitToArrow substrait_xor_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto value_1 = call.args(0); - auto value_2 = call.args(1); - ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); - return arrow::compute::call("xor", {expression_1, expression_2}); -}; - -// ArrowToSubstrait arrow_invert_to_substrait = [] (const arrow::compute::Expression::Call& call, std::vector args) { -// auto substrait_call = substrait::FunctionSignature_Scalar; -// substrait_call.add_name("not"); -// substrait_call.add_args(std::move(args)); -// return substrait_call; -// }; - -// ArrowToSubstrait arrow_or_kleene_to_substrait = [] (const arrow::compute::Expression::Call& call, std::vector args) { -// auto substrait_call = substrait::FunctionSignature_Scalar; -// substrait_call.add_name("or"); -// substrait_call.add_args(std::move(args)); -// return substrait_call; -// }; - - -// ArrowToSubstrait arrow_and_kleene_to_substrait = [] (const arrow::compute::Expression::Call& call, std::vector args) { -// auto substrait_call = substrait::FunctionSignature_Scalar; -// substrait_call.add_name("and"); -// substrait_call.add_args(std::move(args)); -// return substrait_call; -// }; - -// ArrowToSubstrait arrow_xor_to_substrait = [] (const arrow::compute::Expression::Call& call, std::vector args) { -// auto substrait_call = substrait::FunctionSignature_Scalar; -// substrait_call.add_name("xor"); -// substrait_call.add_args(std::move(args)); -// return substrait_call; -// }; - -// Comparison Functions mapping -SubstraitToArrow substrait_lt_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto value_1 = call.args(0); - auto value_2 = call.args(1); - ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); - return arrow::compute::call("less", {expression_1, expression_2}); -}; - -SubstraitToArrow substrait_gt_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto value_1 = call.args(0); - auto value_2 = call.args(1); - ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); - return arrow::compute::call("greater", {expression_1, expression_2}); -}; - -SubstraitToArrow substrait_lte_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto value_1 = call.args(0); - auto value_2 = call.args(1); - ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); - return arrow::compute::call("less_equal", {expression_1, expression_2}); -}; - -SubstraitToArrow substrait_not_equal_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto value_1 = call.args(0); - auto value_2 = call.args(1); - ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); - return arrow::compute::call("not_equal", {expression_1, expression_2}); -}; - -SubstraitToArrow substrait_equal_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto value_1 = call.args(0); - auto value_2 = call.args(1); - ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); - return arrow::compute::call("equal", {expression_1, expression_2}); -}; - -SubstraitToArrow substrait_is_null_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto value_1 = call.args(0); - ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - return arrow::compute::call("is_null", {expression_1}); -}; - -SubstraitToArrow substrait_is_not_null_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto value_1 = call.args(0); - ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - return arrow::compute::call("is_valid", {expression_1}); -}; - -// SubstraitToArrow substrait_is_not_distinct_from_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { -// auto null_check = arrow::compute::call("is_null", call.args(2)); -// if(null_check){ -// return arrow::compute::call("not_equal", {null_check,null_check}); -// } -// return arrow::compute::all("not_equal", call.args(2)); -// }; -} -} diff --git a/cpp/src/arrow/engine/substrait/function_internal.h b/cpp/src/arrow/engine/substrait/function_internal.h deleted file mode 100644 index 3b18d1d53c6..00000000000 --- a/cpp/src/arrow/engine/substrait/function_internal.h +++ /dev/null @@ -1,45 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// This API is EXPERIMENTAL. - -#include "arrow/engine/substrait/extension_set.h" -#include "arrow/compute/function.h" -#include "arrow/compute/exec/expression.h" - -#include "substrait/function.pb.h" // IWYU pragma: export - - -namespace arrow{ -namespace engine{ - -using ArrowToSubstrait = const std::function(const arrow::compute::Expression&, ExtensionSet*)>; -using SubstraitToArrow = std::function(const substrait::Expression::ScalarFunction&)>; -class FunctionMapping { - - // Registration API - Status AddArrowToSubstrait(std::string arrow_function_name, ArrowToSubstrait conversion_func); - Status AddSubstraitToArrow(std::string substrait_function_name, SubstraitToArrow conversion_func); - - // Usage API - Result> ToProto(const arrow::compute::Expression::Call& call, ExtensionSet* ext_set); - Result FromProto(const substrait::Expression::ScalarFunction& call); -}; - - -} // namespace engine -} // namespace arrow From 5cbdcf51fc3a891bc150caf19640e2da73496074 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Thu, 2 Jun 2022 10:21:12 +0530 Subject: [PATCH 03/17] feat: variadic boolean function mapping --- .../engine/substrait/expression_internal.cc | 2 +- .../arrow/engine/substrait/extension_set.cc | 80 ++++++++++++------- .../arrow/engine/substrait/extension_set.h | 28 ++++++- 3 files changed, 75 insertions(+), 35 deletions(-) diff --git a/cpp/src/arrow/engine/substrait/expression_internal.cc b/cpp/src/arrow/engine/substrait/expression_internal.cc index 7a785d2c7f6..1c18b20a306 100644 --- a/cpp/src/arrow/engine/substrait/expression_internal.cc +++ b/cpp/src/arrow/engine/substrait/expression_internal.cc @@ -160,7 +160,7 @@ Result FromProto(const substrait::Expression& expr, ARROW_ASSIGN_OR_RAISE(auto decoded_function, ext_set.DecodeFunction(scalar_fn.function_reference())); - auto arrow_function = ext_set.functions_map.GetArrowFromSubstrait(static_cast(decoded_function.name)); + auto arrow_function = ext_set.GetFunctionMap().GetArrowFromSubstrait(decoded_function.name.to_string()); return arrow_function(scalar_fn); diff --git a/cpp/src/arrow/engine/substrait/extension_set.cc b/cpp/src/arrow/engine/substrait/extension_set.cc index 0263c29182c..db1e9868d05 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.cc +++ b/cpp/src/arrow/engine/substrait/extension_set.cc @@ -20,13 +20,10 @@ #include #include -#include "arrow/compute/api_scalar.h" #include "arrow/util/hash_util.h" #include "arrow/util/hashing.h" #include "arrow/util/string_view.h" -#include "arrow/engine/substrait/expression_internal.h" - namespace arrow { namespace engine { namespace { @@ -461,6 +458,21 @@ std::shared_ptr nested_extension_id_registry( return std::make_shared(parent); } +Status FunctionMapping::AddArrowToSubstrait(std::string arrow_function_name, ArrowToSubstrait conversion_func){ + if (arrow_to_substrait.find(arrow_function_name) != arrow_to_substrait.end()){ + arrow_to_substrait[arrow_function_name] = conversion_func; + } + return Status::OK(); +} + +Status FunctionMapping::AddSubstraitToArrow(std::string substrait_function_name, SubstraitToArrow conversion_func){ + if (substrait_to_arrow.find(substrait_function_name) != substrait_to_arrow.end()){ + substrait_to_arrow[substrait_function_name] = conversion_func; + } + return Status::OK(); +>>>>>>> 77d2398a4 (feat: variadic boolean function mapping) +} + SubstraitToArrow substrait_add_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { auto value_1 = call.args(1); auto value_2 = call.args(2); @@ -541,21 +553,31 @@ SubstraitToArrow substrait_not_to_arrow = [] (const substrait::Expression::Scala }; SubstraitToArrow substrait_or_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto value_1 = call.args(0); - auto value_2 = call.args(1); + int num_args = call.args_size(); // OR function has variadic arguments + substrait::Expression value; ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); - return arrow::compute::call("or_kleene", {expression_1, expression_2}); + arrow::compute::Expression expression; + std::vector func_args; + for(int i=0; i Result { - auto value_1 = call.args(0); - auto value_2 = call.args(1); + int num_args = call.args_size(); // AND function has variadic arguments + substrait::Expression value; ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); - return arrow::compute::call("and_kleene", {expression_1, expression_2}); + arrow::compute::Expression expression; + std::vector func_args; + for(int i=0; i Result { @@ -591,14 +613,13 @@ ArrowToSubstrait arrow_or_kleene_to_substrait = [] (const arrow::compute::Expres ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("or")); substrait_call.set_function_reference(function_reference); - auto expression_1 = call.arguments[0]; - auto expression_2 = call.arguments[1]; - - ARROW_ASSIGN_OR_RAISE(auto value_1, ToProto(expression_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto value_2, ToProto(expression_2, ext_set_)); - - substrait_call.add_args()->CopyFrom(*value_1); - substrait_call.add_args()->CopyFrom(*value_2); + arrow::compute::Expression expression; + std::unique_ptr value; + for(size_t i = 0; iCopyFrom(*value); + } return std::move(substrait_call); }; @@ -609,14 +630,13 @@ ArrowToSubstrait arrow_and_kleene_to_substrait = [] (const arrow::compute::Expre ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("and")); substrait_call.set_function_reference(function_reference); - auto expression_1 = call.arguments[0]; - auto expression_2 = call.arguments[1]; - - ARROW_ASSIGN_OR_RAISE(auto value_1, ToProto(expression_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto value_2, ToProto(expression_2, ext_set_)); - - substrait_call.add_args()->CopyFrom(*value_1); - substrait_call.add_args()->CopyFrom(*value_2); + arrow::compute::Expression expression; + std::unique_ptr value; + for(size_t i = 0; iCopyFrom(*value); + } return std::move(substrait_call); }; @@ -713,4 +733,4 @@ SubstraitToArrow substrait_is_not_distinct_from_to_arrow = [] (const substrait:: }; } // namespace engine -} // namespace arrow +} // namespace arrow \ No newline at end of file diff --git a/cpp/src/arrow/engine/substrait/extension_set.h b/cpp/src/arrow/engine/substrait/extension_set.h index 8e02b69e896..45b14933aef 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.h +++ b/cpp/src/arrow/engine/substrait/extension_set.h @@ -41,6 +41,23 @@ using SubstraitToArrow = std::function(const class FunctionMapping { + enum defined_functions { + add, + add_unchecked, + invert, + or_kleene, + and_kleene, + exclusive_or, + lt, + gt, + lte, + not_equal, + equal, + is_null, + is_not_null, + is_not_distict_from + }; + std::unordered_map substrait_to_arrow; std::unordered_map arrow_to_substrait; @@ -111,6 +128,7 @@ class ARROW_ENGINE_EXPORT ExtensionIdRegistry { Id id; const std::string& function_name; }; + arrow::engine::FunctionMapping functions_map; virtual util::optional GetFunction(Id) const = 0; virtual util::optional GetFunction( util::string_view arrow_function_name) const = 0; @@ -265,9 +283,7 @@ class ARROW_ENGINE_EXPORT ExtensionSet { /// value larger than the actual number of functions. This behavior may change in the /// future; see ARROW-15583. std::size_t num_functions() const { return functions_.size(); } - - arrow::engine::FunctionMapping functions_map; - + private: const ExtensionIdRegistry* registry_; @@ -288,7 +304,11 @@ class ARROW_ENGINE_EXPORT ExtensionSet { Status CheckHasUri(util::string_view uri); void AddUri(std::pair uri); - Status AddUri(Id id); + Status AddUri(Id id); + + public: + FunctionMapping GetFunctionMap() const { return registry_->functions_map;} + }; From ffc77d986494a29698e9233f0cb5cc0f5143eb4a Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Thu, 2 Jun 2022 20:10:48 +0530 Subject: [PATCH 04/17] remove: enum for all function mappings --- cpp/src/arrow/engine/substrait/extension_set.h | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/cpp/src/arrow/engine/substrait/extension_set.h b/cpp/src/arrow/engine/substrait/extension_set.h index 45b14933aef..a478f485576 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.h +++ b/cpp/src/arrow/engine/substrait/extension_set.h @@ -40,23 +40,6 @@ using ArrowToSubstrait = std::function(const substrait::Expression::ScalarFunction&)>; class FunctionMapping { - - enum defined_functions { - add, - add_unchecked, - invert, - or_kleene, - and_kleene, - exclusive_or, - lt, - gt, - lte, - not_equal, - equal, - is_null, - is_not_null, - is_not_distict_from - }; std::unordered_map substrait_to_arrow; std::unordered_map arrow_to_substrait; From 754b3a35aabe5c743cc2a47c48611caee41c7807 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Thu, 2 Jun 2022 20:25:39 +0530 Subject: [PATCH 05/17] feat: Get functions to return status invalid if function not found --- .../engine/substrait/expression_internal.cc | 4 +--- cpp/src/arrow/engine/substrait/extension_set.cc | 17 +++++++++++++++++ cpp/src/arrow/engine/substrait/extension_set.h | 4 ++-- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/engine/substrait/expression_internal.cc b/cpp/src/arrow/engine/substrait/expression_internal.cc index 1c18b20a306..5992110c34f 100644 --- a/cpp/src/arrow/engine/substrait/expression_internal.cc +++ b/cpp/src/arrow/engine/substrait/expression_internal.cc @@ -160,10 +160,8 @@ Result FromProto(const substrait::Expression& expr, ARROW_ASSIGN_OR_RAISE(auto decoded_function, ext_set.DecodeFunction(scalar_fn.function_reference())); - auto arrow_function = ext_set.GetFunctionMap().GetArrowFromSubstrait(decoded_function.name.to_string()); - + ARROW_ASSIGN_OR_RAISE(auto arrow_function, ext_set.GetFunctionMap().GetArrowFromSubstrait(decoded_function.name.to_string())); return arrow_function(scalar_fn); - } default: diff --git a/cpp/src/arrow/engine/substrait/extension_set.cc b/cpp/src/arrow/engine/substrait/extension_set.cc index db1e9868d05..9c901ce1c37 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.cc +++ b/cpp/src/arrow/engine/substrait/extension_set.cc @@ -473,6 +473,23 @@ Status FunctionMapping::AddSubstraitToArrow(std::string substrait_function_name, >>>>>>> 77d2398a4 (feat: variadic boolean function mapping) } +Result FunctionMapping::GetArrowFromSubstrait(std::string name) const { + if (FunctionMapping::substrait_to_arrow.find(name)!=FunctionMapping::substrait_to_arrow.end()){ + return FunctionMapping::substrait_to_arrow.at(name); + } else { + return Status::Invalid("Substrait function doesn't exist in the mapping registry"); + } +} + +Result FunctionMapping::GetSubstraitFromArrow(std::string name) const { + if (FunctionMapping::arrow_to_substrait.find(name)!=FunctionMapping::arrow_to_substrait.end()){ + return FunctionMapping::arrow_to_substrait.at(name); + } else { + return Status::Invalid("Arrow function doesn't exist in the mapping registry"); + } +} + + SubstraitToArrow substrait_add_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { auto value_1 = call.args(1); auto value_2 = call.args(2); diff --git a/cpp/src/arrow/engine/substrait/extension_set.h b/cpp/src/arrow/engine/substrait/extension_set.h index a478f485576..1519ac873db 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.h +++ b/cpp/src/arrow/engine/substrait/extension_set.h @@ -49,8 +49,8 @@ class FunctionMapping { Status AddSubstraitToArrow(std::string substrait_function_name, SubstraitToArrow conversion_func); public: - SubstraitToArrow GetArrowFromSubstrait(std::string name) const { return substrait_to_arrow.at(name);} - ArrowToSubstrait GetSubstraitFromArrow(std::string name) const { return arrow_to_substrait.at(name);} + Result GetArrowFromSubstrait(std::string name) const; + Result GetSubstraitFromArrow(std::string name) const; }; From 23f37f50608ec31ced04625b1138c6f2e2e771b8 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Thu, 2 Jun 2022 20:34:53 +0530 Subject: [PATCH 06/17] feat: registration api to return invalid status if function already registered --- cpp/src/arrow/engine/substrait/extension_set.cc | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/cpp/src/arrow/engine/substrait/extension_set.cc b/cpp/src/arrow/engine/substrait/extension_set.cc index 9c901ce1c37..db1e9868d05 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.cc +++ b/cpp/src/arrow/engine/substrait/extension_set.cc @@ -473,23 +473,6 @@ Status FunctionMapping::AddSubstraitToArrow(std::string substrait_function_name, >>>>>>> 77d2398a4 (feat: variadic boolean function mapping) } -Result FunctionMapping::GetArrowFromSubstrait(std::string name) const { - if (FunctionMapping::substrait_to_arrow.find(name)!=FunctionMapping::substrait_to_arrow.end()){ - return FunctionMapping::substrait_to_arrow.at(name); - } else { - return Status::Invalid("Substrait function doesn't exist in the mapping registry"); - } -} - -Result FunctionMapping::GetSubstraitFromArrow(std::string name) const { - if (FunctionMapping::arrow_to_substrait.find(name)!=FunctionMapping::arrow_to_substrait.end()){ - return FunctionMapping::arrow_to_substrait.at(name); - } else { - return Status::Invalid("Arrow function doesn't exist in the mapping registry"); - } -} - - SubstraitToArrow substrait_add_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { auto value_1 = call.args(1); auto value_2 = call.args(2); From b28e5b1e32d554e58a76ce233e629a13c1ae839a Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Fri, 3 Jun 2022 15:23:51 +0530 Subject: [PATCH 07/17] feat: helpers for conversion functions --- cpp/src/arrow/engine/substrait/expression_internal.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cpp/src/arrow/engine/substrait/expression_internal.cc b/cpp/src/arrow/engine/substrait/expression_internal.cc index 5992110c34f..c36aba6beb1 100644 --- a/cpp/src/arrow/engine/substrait/expression_internal.cc +++ b/cpp/src/arrow/engine/substrait/expression_internal.cc @@ -164,6 +164,15 @@ Result FromProto(const substrait::Expression& expr, return arrow_function(scalar_fn); } + case substrait::Expression::kEnum: { + auto enum_expr = expr.enum_(); + if(enum_expr.has_specified()){ + return compute::literal(std::move(enum_expr.specified())); + } else { + return Status::Invalid("Substrait Enum value not specified"); + } + } + default: break; } From 3b939c1e79f1faf4367e08be7e7a58912f4c823d Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Sat, 4 Jun 2022 01:04:59 +0530 Subject: [PATCH 08/17] feat: support for subtract and multiply functions --- .../arrow/engine/substrait/extension_set.cc | 306 ++++++++---------- 1 file changed, 129 insertions(+), 177 deletions(-) diff --git a/cpp/src/arrow/engine/substrait/extension_set.cc b/cpp/src/arrow/engine/substrait/extension_set.cc index db1e9868d05..74d0111457a 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.cc +++ b/cpp/src/arrow/engine/substrait/extension_set.cc @@ -20,6 +20,8 @@ #include #include +#include "arrow/compute/api_scalar.h" +#include "arrow/engine/substrait/expression_internal.h" #include "arrow/util/hash_util.h" #include "arrow/util/hashing.h" #include "arrow/util/string_view.h" @@ -461,275 +463,225 @@ std::shared_ptr nested_extension_id_registry( Status FunctionMapping::AddArrowToSubstrait(std::string arrow_function_name, ArrowToSubstrait conversion_func){ if (arrow_to_substrait.find(arrow_function_name) != arrow_to_substrait.end()){ arrow_to_substrait[arrow_function_name] = conversion_func; + return Status::OK(); } - return Status::OK(); + return Status::AlreadyExists("Arrow function already exist in the conversion map"); } Status FunctionMapping::AddSubstraitToArrow(std::string substrait_function_name, SubstraitToArrow conversion_func){ if (substrait_to_arrow.find(substrait_function_name) != substrait_to_arrow.end()){ substrait_to_arrow[substrait_function_name] = conversion_func; + return Status::OK(); } - return Status::OK(); ->>>>>>> 77d2398a4 (feat: variadic boolean function mapping) + return Status::AlreadyExists("Substrait function already exist in the conversion map"); } -SubstraitToArrow substrait_add_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto value_1 = call.args(1); - auto value_2 = call.args(2); +Result FunctionMapping::GetArrowFromSubstrait(std::string name) const { + if (FunctionMapping::substrait_to_arrow.find(name)!=FunctionMapping::substrait_to_arrow.end()){ + return FunctionMapping::substrait_to_arrow.at(name); + } else { + return Status::KeyError("Substrait function doesn't exist in the mapping registry"); + } + } + +Result FunctionMapping::GetSubstraitFromArrow(std::string name) const { + if (FunctionMapping::arrow_to_substrait.find(name)!=FunctionMapping::arrow_to_substrait.end()){ + return FunctionMapping::arrow_to_substrait.at(name); + } else { + return Status::KeyError("Arrow function doesn't exist in the mapping registry"); + } + } + +std::vector substrait_convert_arguments(const substrait::Expression::ScalarFunction& call){ + substrait::Expression value; ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); - auto options = call.args(0); - if (options.has_enum_()) { - auto overflow_handling = options.enum_(); - if(overflow_handling.has_specified()){ - std::string overflow_type = overflow_handling.specified(); - if(overflow_type == "SILENT"){ - return arrow::compute::call("add", {expression_1,expression_2}, compute::ArithmeticOptions()); - } else if (overflow_type == "SATURATE") { - return Status::Invalid("Arrow does not support a saturating add"); - } else { - return arrow::compute::call("add_checked", {expression_1,expression_2}, compute::ArithmeticOptions(true)); - } + arrow::compute::Expression expression; + std::vector func_args; + for(int i=0; i value; + for(size_t i = 0; iCopyFrom(*value); + } + return std::move(substrait_call); +} + +substrait::Expression::ScalarFunction arrow_convert_arithmetic_arguments(const arrow::compute::Expression::Call& call, substrait::Expression::ScalarFunction& substrait_call, ExtensionSet* ext_set_, std::string overflow_handling){ + substrait::Expression::Enum options; + options.set_specified(overflow_handling); + substrait_call.add_args()->set_allocated_enum_(&options); + return arrow_convert_arguments(call, substrait_call, ext_set_); +} + + +SubstraitToArrow substrait_add_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto func_args = substrait_convert_arguments(call); + if(func_args[0].ToString() == "SILENT"){ + return arrow::compute::call("add", {func_args[1], func_args[2]}, compute::ArithmeticOptions()); + } else if (func_args[0].ToString() == "SATURATE") { + return Status::Invalid("Arrow does not support a saturating add"); + } else { + return arrow::compute::call("add_checked", {func_args[1], func_args[2]}, compute::ArithmeticOptions(true)); + } + }; + +SubstraitToArrow substrait_subtract_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto func_args = substrait_convert_arguments(call); + if(func_args[0].ToString() == "SILENT"){ + return arrow::compute::call("subtract", {func_args[1], func_args[2]}, compute::ArithmeticOptions()); + } else if (func_args[0].ToString() == "SATURATE") { + return Status::Invalid("Arrow does not support a saturating subtract"); } else { - return arrow::compute::call("add", {expression_1,expression_2}, compute::ArithmeticOptions()); + return arrow::compute::call("subtract_checked", {func_args[1], func_args[2]}, compute::ArithmeticOptions(true)); } +}; + +SubstraitToArrow substrait_multiply_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto func_args = substrait_convert_arguments(call); + if(func_args[0].ToString() == "SILENT"){ + return arrow::compute::call("multiply", {func_args[1], func_args[2]}, compute::ArithmeticOptions()); + } else if (func_args[0].ToString() == "SATURATE") { + return Status::Invalid("Arrow does not support a saturating multiply"); } else { - return Status::Invalid("Substrait Function Options should be an enum"); + return arrow::compute::call("mutiply_checked", {func_args[1], func_args[2]}, compute::ArithmeticOptions(true)); } }; -ArrowToSubstrait arrow_add_to_substrait = [] (const arrow::compute::Expression::Call& call, arrow::engine::ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_add_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("add")); substrait_call.set_function_reference(function_reference); - - substrait::Expression::Enum options; - std::string overflow_handling = "ERROR"; - options.set_specified(overflow_handling); - substrait_call.add_args()->set_allocated_enum_(&options); - - auto expression_1 = call.arguments[0]; - auto expression_2 = call.arguments[1]; - - ARROW_ASSIGN_OR_RAISE(auto value_1, ToProto(expression_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto value_2, ToProto(expression_2, ext_set_)); - - substrait_call.add_args()->CopyFrom(*value_1); - substrait_call.add_args()->CopyFrom(*value_2); - return std::move(substrait_call); -}; + return arrow_convert_arithmetic_arguments(call, substrait_call, ext_set_, "ERROR"); + }; ArrowToSubstrait arrow_unchecked_add_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("add")); substrait_call.set_function_reference(function_reference); + return arrow_convert_arithmetic_arguments(call, substrait_call, ext_set_, "SILENT"); +}; - substrait::Expression::Enum options; - std::string overflow_handling = "SILENT"; - options.set_specified(overflow_handling); - substrait_call.add_args()->set_allocated_enum_(&options); - - auto expression_1 = call.arguments[0]; - auto expression_2 = call.arguments[1]; - - ARROW_ASSIGN_OR_RAISE(auto value_1, ToProto(expression_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto value_2, ToProto(expression_2, ext_set_)); - - substrait_call.add_args()->CopyFrom(*value_1); - substrait_call.add_args()->CopyFrom(*value_2); - return std::move(substrait_call); +ArrowToSubstrait arrow_subtract_to_substrait = [] (const arrow::compute::Expression::Call& call, arrow::engine::ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("subtract")); + substrait_call.set_function_reference(function_reference); + return arrow_convert_arithmetic_arguments(call, substrait_call, ext_set_, "ERROR"); +}; + +ArrowToSubstrait arrow_unchecked_subtract_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("subtract")); + substrait_call.set_function_reference(function_reference); + return arrow_convert_arithmetic_arguments(call, substrait_call, ext_set_, "SILENT") ; }; +ArrowToSubstrait arrow_multiply_to_substrait = [] (const arrow::compute::Expression::Call& call, arrow::engine::ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("multiply")); + substrait_call.set_function_reference(function_reference); + return arrow_convert_arithmetic_arguments(call, substrait_call, ext_set_, "ERROR"); +}; + +ArrowToSubstrait arrow_unchecked_multiply_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("multiply")); + substrait_call.set_function_reference(function_reference); + return arrow_convert_arithmetic_arguments(call, substrait_call, ext_set_, "SILENT"); +}; // Boolean Functions mapping SubstraitToArrow substrait_not_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto value_1 = call.args(1); - ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - return arrow::compute::call("invert", {expression_1}); + return arrow::compute::call("invert", substrait_convert_arguments(call)); }; SubstraitToArrow substrait_or_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - int num_args = call.args_size(); // OR function has variadic arguments - substrait::Expression value; - ExtensionSet ext_set_; - arrow::compute::Expression expression; - std::vector func_args; - for(int i=0; i Result { - int num_args = call.args_size(); // AND function has variadic arguments - substrait::Expression value; - ExtensionSet ext_set_; - arrow::compute::Expression expression; - std::vector func_args; - for(int i=0; i Result { - auto value_1 = call.args(0); - auto value_2 = call.args(1); - ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); - return arrow::compute::call("xor", {expression_1, expression_2}); + return arrow::compute::call("xor", substrait_convert_arguments(call)); }; ArrowToSubstrait arrow_invert_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("not")); substrait_call.set_function_reference(function_reference); - - auto expression_1 = call.arguments[0]; - auto expression_2 = call.arguments[1]; - - ARROW_ASSIGN_OR_RAISE(auto value_1, ToProto(expression_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto value_2, ToProto(expression_2, ext_set_)); - - substrait_call.add_args()->CopyFrom(*value_1); - substrait_call.add_args()->CopyFrom(*value_2); - return std::move(substrait_call); - + return arrow_convert_arguments(call, substrait_call, ext_set_); }; ArrowToSubstrait arrow_or_kleene_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("or")); substrait_call.set_function_reference(function_reference); - - arrow::compute::Expression expression; - std::unique_ptr value; - for(size_t i = 0; iCopyFrom(*value); - } - return std::move(substrait_call); + return arrow_convert_arguments(call, substrait_call, ext_set_); }; ArrowToSubstrait arrow_and_kleene_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("and")); substrait_call.set_function_reference(function_reference); - - arrow::compute::Expression expression; - std::unique_ptr value; - for(size_t i = 0; iCopyFrom(*value); - } - - return std::move(substrait_call); + return arrow_convert_arguments(call, substrait_call, ext_set_); }; ArrowToSubstrait arrow_xor_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("xor")); substrait_call.set_function_reference(function_reference); - - auto expression_1 = call.arguments[0]; - auto expression_2 = call.arguments[1]; - - ARROW_ASSIGN_OR_RAISE(auto value_1, ToProto(expression_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto value_2, ToProto(expression_2, ext_set_)); - - substrait_call.add_args()->CopyFrom(*value_1); - substrait_call.add_args()->CopyFrom(*value_2); - return std::move(substrait_call); + return arrow_convert_arguments(call, substrait_call, ext_set_); }; // Comparison Functions mapping SubstraitToArrow substrait_lt_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto value_1 = call.args(0); - auto value_2 = call.args(1); - ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); - return arrow::compute::call("less", {expression_1, expression_2}); + return arrow::compute::call("less", substrait_convert_arguments(call)); }; SubstraitToArrow substrait_gt_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto value_1 = call.args(0); - auto value_2 = call.args(1); - ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); - return arrow::compute::call("greater", {expression_1, expression_2}); + return arrow::compute::call("greater", substrait_convert_arguments(call)); }; SubstraitToArrow substrait_lte_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto value_1 = call.args(0); - auto value_2 = call.args(1); - ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); - return arrow::compute::call("less_equal", {expression_1, expression_2}); + return arrow::compute::call("less_equal", substrait_convert_arguments(call)); }; SubstraitToArrow substrait_not_equal_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto value_1 = call.args(0); - auto value_2 = call.args(1); - ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); - return arrow::compute::call("not_equal", {expression_1, expression_2}); + return arrow::compute::call("greater", substrait_convert_arguments(call)); }; SubstraitToArrow substrait_equal_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto value_1 = call.args(0); - auto value_2 = call.args(1); - ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); - return arrow::compute::call("equal", {expression_1, expression_2}); + return arrow::compute::call("equal", substrait_convert_arguments(call)); }; SubstraitToArrow substrait_is_null_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto value_1 = call.args(0); - ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - return arrow::compute::call("is_null", {expression_1}); + return arrow::compute::call("is_null", substrait_convert_arguments(call)); }; SubstraitToArrow substrait_is_not_null_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto value_1 = call.args(0); - ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - return arrow::compute::call("is_valid", {expression_1}); + return arrow::compute::call("is_valid", substrait_convert_arguments(call)); }; SubstraitToArrow substrait_is_not_distinct_from_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto value_1 = call.args(0); - auto value_2 = call.args(1); - ExtensionSet ext_set_; - ARROW_ASSIGN_OR_RAISE(auto expression_1, FromProto(value_1, ext_set_)); - ARROW_ASSIGN_OR_RAISE(auto expression_2, FromProto(value_2, ext_set_)); - auto null_check_1 = arrow::compute::call("is_null", {expression_1}); - auto null_check_2 = arrow::compute::call("is_null", {expression_2}); + std::vector func_args = substrait_convert_arguments(call); + auto null_check_1 = arrow::compute::call("is_null", {func_args[0]}); + auto null_check_2 = arrow::compute::call("is_null", {func_args[1]}); if(null_check_1.IsNullLiteral() && null_check_1.IsNullLiteral()){ return arrow::compute::call("not_equal", {null_check_1, null_check_2}); } - return arrow::compute::call("not_equal", {expression_1, expression_2}); + return arrow::compute::call("not_equal", func_args); }; } // namespace engine From 76891e6e4982b3d6093017304fdb0d0c20014591 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Sat, 4 Jun 2022 01:13:55 +0530 Subject: [PATCH 09/17] feat: mapped modulus function --- cpp/src/arrow/engine/substrait/extension_set.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/cpp/src/arrow/engine/substrait/extension_set.cc b/cpp/src/arrow/engine/substrait/extension_set.cc index 74d0111457a..f5b607a299a 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.cc +++ b/cpp/src/arrow/engine/substrait/extension_set.cc @@ -557,6 +557,10 @@ SubstraitToArrow substrait_multiply_to_arrow = [] (const substrait::Expression:: } }; +SubstraitToArrow substrait_modulus_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + return arrow::compute::call("abs", substrait_convert_arguments(call)); +}; + ArrowToSubstrait arrow_add_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { substrait::Expression::ScalarFunction substrait_call; ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("add")); @@ -599,6 +603,13 @@ ArrowToSubstrait arrow_unchecked_multiply_to_substrait = [] (const arrow::comput return arrow_convert_arithmetic_arguments(call, substrait_call, ext_set_, "SILENT"); }; +ArrowToSubstrait arrow_abs_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("modulus")); + substrait_call.set_function_reference(function_reference); + return arrow_convert_arguments(call, substrait_call, ext_set_); +}; + // Boolean Functions mapping SubstraitToArrow substrait_not_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { return arrow::compute::call("invert", substrait_convert_arguments(call)); From 99cab946bdf904882113cc0e8eab61b7a5623f7f Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Sat, 4 Jun 2022 02:30:14 +0530 Subject: [PATCH 10/17] feat: remaining comparison function mapping --- .../arrow/engine/substrait/extension_set.cc | 62 ++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/engine/substrait/extension_set.cc b/cpp/src/arrow/engine/substrait/extension_set.cc index f5b607a299a..1afef12ccbf 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.cc +++ b/cpp/src/arrow/engine/substrait/extension_set.cc @@ -669,8 +669,12 @@ SubstraitToArrow substrait_lte_to_arrow = [] (const substrait::Expression::Scala return arrow::compute::call("less_equal", substrait_convert_arguments(call)); }; +SubstraitToArrow substrait_gte_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + return arrow::compute::call("greater_equal", substrait_convert_arguments(call)); +}; + SubstraitToArrow substrait_not_equal_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - return arrow::compute::call("greater", substrait_convert_arguments(call)); + return arrow::compute::call("not_equal", substrait_convert_arguments(call)); }; SubstraitToArrow substrait_equal_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { @@ -695,5 +699,61 @@ SubstraitToArrow substrait_is_not_distinct_from_to_arrow = [] (const substrait:: return arrow::compute::call("not_equal", func_args); }; +ArrowToSubstrait arrow_less_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("lt")); + substrait_call.set_function_reference(function_reference); + return arrow_convert_arguments(call, substrait_call, ext_set_); +}; + +ArrowToSubstrait arrow_greater_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("gt")); + substrait_call.set_function_reference(function_reference); + return arrow_convert_arguments(call, substrait_call, ext_set_); +}; + +ArrowToSubstrait arrow_less_equal_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("lte")); + substrait_call.set_function_reference(function_reference); + return arrow_convert_arguments(call, substrait_call, ext_set_); +}; + +ArrowToSubstrait arrow_greater_equal_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("gte")); + substrait_call.set_function_reference(function_reference); + return arrow_convert_arguments(call, substrait_call, ext_set_); +}; + +ArrowToSubstrait arrow_equal_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("equal")); + substrait_call.set_function_reference(function_reference); + return arrow_convert_arguments(call, substrait_call, ext_set_); +}; + +ArrowToSubstrait arrow_not_equal_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("not_equal")); + substrait_call.set_function_reference(function_reference); + return arrow_convert_arguments(call, substrait_call, ext_set_); +}; + +ArrowToSubstrait arrow_is_null_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("is_null")); + substrait_call.set_function_reference(function_reference); + return arrow_convert_arguments(call, substrait_call, ext_set_); +}; + +ArrowToSubstrait arrow_is_valid_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("is_not_null")); + substrait_call.set_function_reference(function_reference); + return arrow_convert_arguments(call, substrait_call, ext_set_); +}; + } // namespace engine } // namespace arrow \ No newline at end of file From a27e03f6bd8a38800532d387cf14f65155b07db9 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Sun, 5 Jun 2022 11:20:52 +0530 Subject: [PATCH 11/17] feat: mappings for string functions --- .../arrow/engine/substrait/extension_set.cc | 86 ++++++++++++++++++- 1 file changed, 85 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/engine/substrait/extension_set.cc b/cpp/src/arrow/engine/substrait/extension_set.cc index 1afef12ccbf..68fe2b29f86 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.cc +++ b/cpp/src/arrow/engine/substrait/extension_set.cc @@ -755,5 +755,89 @@ ArrowToSubstrait arrow_is_valid_to_substrait = [] (const arrow::compute::Express return arrow_convert_arguments(call, substrait_call, ext_set_); }; +// Strings function mapping +SubstraitToArrow substrait_like_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto func_args = substrait_convert_arguments(call); + return arrow::compute::call("match_like", {func_args[0]}, compute::MatchSubstringOptions(func_args[1].ToString())); +}; + +SubstraitToArrow substrait_substring_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto func_args = substrait_convert_arguments(call); + auto start = func_args[1].literal()->scalar_as(); + auto stop = func_args[2].literal()->scalar_as(); + return arrow::compute::call("utf8_slice_codeunits", {func_args[0]}, compute::SliceOptions(static_cast(start.value), static_cast(stop.value))); +}; + +SubstraitToArrow substrait_concat_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto func_args = substrait_convert_arguments(call); + arrow::StringBuilder builder; + builder.Append(func_args[0].ToString()); + builder.Append(func_args[1].ToString()); + auto strings_datum = arrow::Datum(*builder.Finish()); + auto separator_datum = arrow::Datum(""); + return arrow::compute::call("binary_join", {arrow::compute::Expression(strings_datum), arrow::compute::Expression(separator_datum)}); +}; + +ArrowToSubstrait arrow_match_like_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("like")); + substrait_call.set_function_reference(function_reference); + + arrow::compute::Expression expression_1, expression_2; + std::unique_ptr string_1, string_2; + expression_1 = call.arguments[0]; + string_1 = ToProto(expression_1, ext_set_).ValueOrDie(); + substrait_call.add_args()->CopyFrom(*string_1); + + auto pattern_string = std::dynamic_pointer_cast(call.options)->pattern; + expression_2 = arrow::compute::Expression(arrow::Datum(pattern_string)); + string_2 = ToProto(expression_2, ext_set_).ValueOrDie(); + substrait_call.add_args()->CopyFrom(*string_2); + + return std::move(substrait_call); +}; + +ArrowToSubstrait arrow_utf8_slice_codeunits_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("substring")); + substrait_call.set_function_reference(function_reference); + arrow::compute::Expression expression_1, expression_2, expression_3; + std::unique_ptr string, start, stop; + expression_1 = call.arguments[0]; + string = ToProto(expression_1, ext_set_).ValueOrDie(); + substrait_call.add_args()->CopyFrom(*string); + + auto start_index = std::dynamic_pointer_cast(call.options)->start; + auto stop_index = std::dynamic_pointer_cast(call.options)->stop; + expression_2 = arrow::compute::Expression(arrow::Datum(start_index)); + expression_3 = arrow::compute::Expression(arrow::Datum(stop_index)); + start = ToProto(expression_2, ext_set_).ValueOrDie(); + stop = ToProto(expression_3, ext_set_).ValueOrDie(); + substrait_call.add_args()->CopyFrom(*start); + substrait_call.add_args()->CopyFrom(*stop); + + return std::move(substrait_call); +}; + +ArrowToSubstrait arrow_binary_join_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("concat")); + substrait_call.set_function_reference(function_reference); + arrow::compute::Expression expression_1, expression_2; + std::unique_ptr string_1, string_2; + + auto strings_list = call.arguments[0].literal()->make_array(); + expression_1 = arrow::compute::Expression(*(strings_list->GetScalar(0))); + expression_2 = arrow::compute::Expression(*(strings_list->GetScalar(1))); + + string_1 = ToProto(expression_1, ext_set_).ValueOrDie(); + string_2 = ToProto(expression_2, ext_set_).ValueOrDie(); + substrait_call.add_args()->CopyFrom(*string_1); + substrait_call.add_args()->CopyFrom(*string_2); + return std::move(substrait_call); +}; + + + } // namespace engine -} // namespace arrow \ No newline at end of file +} // namespace arrow From 93c16b57f9228baf532891f8272f5444cbea7975 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Sun, 5 Jun 2022 13:58:34 +0530 Subject: [PATCH 12/17] feat: mapping for cast function --- .../arrow/engine/substrait/extension_set.cc | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/cpp/src/arrow/engine/substrait/extension_set.cc b/cpp/src/arrow/engine/substrait/extension_set.cc index 68fe2b29f86..b46c1964517 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.cc +++ b/cpp/src/arrow/engine/substrait/extension_set.cc @@ -21,7 +21,9 @@ #include #include "arrow/compute/api_scalar.h" +#include "arrow/compute/cast.h" #include "arrow/engine/substrait/expression_internal.h" +#include "arrow/engine/substrait/type_internal.h" #include "arrow/util/hash_util.h" #include "arrow/util/hashing.h" #include "arrow/util/string_view.h" @@ -837,7 +839,29 @@ ArrowToSubstrait arrow_binary_join_to_substrait = [] (const arrow::compute::Expr return std::move(substrait_call); }; +// Cast function mapping +SubstraitToArrow substrait_cast_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + ExtensionSet ext_set_; + ARROW_ASSIGN_OR_RAISE(auto output_type_desc, + FromProto(call.output_type(), ext_set_)); + auto cast_options = compute::CastOptions::Safe(std::move(output_type_desc.first)); + return compute::call("cast", {substrait_convert_arguments(call)[0]}, std::move(cast_options)); +}; +ArrowToSubstrait arrow_cast_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("cast")); + substrait_call.set_function_reference(function_reference); + + auto arrow_to_type = std::dynamic_pointer_cast(call.options)->to_type; + ARROW_ASSIGN_OR_RAISE(auto substrait_to_type, ToProto(*arrow_to_type, false, ext_set_)); + substrait_call.set_allocated_output_type(substrait_to_type.get()); + auto expression = call.arguments[0]; + ARROW_ASSIGN_OR_RAISE(auto value, ToProto(expression, ext_set_)); + substrait_call.add_args()->CopyFrom(*value); + + return substrait_call; +}; } // namespace engine } // namespace arrow From 7e27a7f82ab3592b76743c26b05f500f0c92192d Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Fri, 10 Jun 2022 02:40:23 +0530 Subject: [PATCH 13/17] feat: mapping for datetime function --- .../arrow/engine/substrait/extension_set.cc | 94 +++++++++++++++++-- 1 file changed, 87 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/engine/substrait/extension_set.cc b/cpp/src/arrow/engine/substrait/extension_set.cc index b46c1964517..e189f74b681 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.cc +++ b/cpp/src/arrow/engine/substrait/extension_set.cc @@ -518,7 +518,7 @@ substrait::Expression::ScalarFunction arrow_convert_arguments(const arrow::compu return std::move(substrait_call); } -substrait::Expression::ScalarFunction arrow_convert_arithmetic_arguments(const arrow::compute::Expression::Call& call, substrait::Expression::ScalarFunction& substrait_call, ExtensionSet* ext_set_, std::string overflow_handling){ +substrait::Expression::ScalarFunction arrow_convert_enum_arguments(const arrow::compute::Expression::Call& call, substrait::Expression::ScalarFunction& substrait_call, ExtensionSet* ext_set_, std::string overflow_handling){ substrait::Expression::Enum options; options.set_specified(overflow_handling); substrait_call.add_args()->set_allocated_enum_(&options); @@ -567,42 +567,42 @@ ArrowToSubstrait arrow_add_to_substrait = [] (const arrow::compute::Expression:: substrait::Expression::ScalarFunction substrait_call; ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("add")); substrait_call.set_function_reference(function_reference); - return arrow_convert_arithmetic_arguments(call, substrait_call, ext_set_, "ERROR"); + return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "ERROR"); }; ArrowToSubstrait arrow_unchecked_add_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { substrait::Expression::ScalarFunction substrait_call; ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("add")); substrait_call.set_function_reference(function_reference); - return arrow_convert_arithmetic_arguments(call, substrait_call, ext_set_, "SILENT"); + return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "SILENT"); }; ArrowToSubstrait arrow_subtract_to_substrait = [] (const arrow::compute::Expression::Call& call, arrow::engine::ExtensionSet* ext_set_) -> Result { substrait::Expression::ScalarFunction substrait_call; ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("subtract")); substrait_call.set_function_reference(function_reference); - return arrow_convert_arithmetic_arguments(call, substrait_call, ext_set_, "ERROR"); + return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "ERROR"); }; ArrowToSubstrait arrow_unchecked_subtract_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { substrait::Expression::ScalarFunction substrait_call; ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("subtract")); substrait_call.set_function_reference(function_reference); - return arrow_convert_arithmetic_arguments(call, substrait_call, ext_set_, "SILENT") ; + return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "SILENT") ; }; ArrowToSubstrait arrow_multiply_to_substrait = [] (const arrow::compute::Expression::Call& call, arrow::engine::ExtensionSet* ext_set_) -> Result { substrait::Expression::ScalarFunction substrait_call; ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("multiply")); substrait_call.set_function_reference(function_reference); - return arrow_convert_arithmetic_arguments(call, substrait_call, ext_set_, "ERROR"); + return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "ERROR"); }; ArrowToSubstrait arrow_unchecked_multiply_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { substrait::Expression::ScalarFunction substrait_call; ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("multiply")); substrait_call.set_function_reference(function_reference); - return arrow_convert_arithmetic_arguments(call, substrait_call, ext_set_, "SILENT"); + return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "SILENT"); }; ArrowToSubstrait arrow_abs_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { @@ -863,5 +863,85 @@ ArrowToSubstrait arrow_cast_to_substrait = [] (const arrow::compute::Expression: return substrait_call; }; + +// Datetime functions mapping +SubstraitToArrow substrait_extract_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto func_args = substrait_convert_arguments(call); + if(func_args[0].ToString() == "YEAR"){ + return arrow::compute::call("year", {func_args[1]}); + } else if (func_args[0].ToString() == "MONTH") { + return arrow::compute::call("month", {func_args[1]}); + } else if (func_args[0].ToString() == "DAY") { + return arrow::compute::call("day", {func_args[1]}); + } else { + return arrow::compute::call("second", {func_args[1]}); + } +}; + +ArrowToSubstrait arrow_year_to_arrow = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("extract")); + substrait_call.set_function_reference(function_reference); + return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "YEAR"); +}; + +ArrowToSubstrait arrow_month_to_arrow = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("extract")); + substrait_call.set_function_reference(function_reference); + return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "MONTH"); +}; + +ArrowToSubstrait arrow_day_to_arrow = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("extract")); + substrait_call.set_function_reference(function_reference); + return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "DAY"); +}; + +ArrowToSubstrait arrow_second_to_arrow = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("extract")); + substrait_call.set_function_reference(function_reference); + return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "SECOND"); +}; + +// Substrait Datetime add/subtract mappings should work for datetime intervals functions as well +SubstraitToArrow substrait_datetime_add_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + return arrow::compute::call("add", substrait_convert_arguments(call), compute::ArithmeticOptions()); + }; + +SubstraitToArrow substrait_datetime_subtract_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + return arrow::compute::call("subtract", substrait_convert_arguments(call), compute::ArithmeticOptions()); + }; + +ArrowToSubstrait arrow_datetime_add_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("add")); + substrait_call.set_function_reference(function_reference); + return arrow_convert_arguments(call, substrait_call, ext_set_); +}; + +ArrowToSubstrait arrow_datetime_subtract_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("subtract")); + substrait_call.set_function_reference(function_reference); + return arrow_convert_arguments(call, substrait_call, ext_set_); +}; + +ArrowToSubstrait arrow_datetime_add_intervals_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("add_intervals")); + substrait_call.set_function_reference(function_reference); + return arrow_convert_arguments(call, substrait_call, ext_set_); +}; + +ArrowToSubstrait arrow_datetime_subtract_intervals_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("subtract_intervals")); + substrait_call.set_function_reference(function_reference); + return arrow_convert_arguments(call, substrait_call, ext_set_); +}; + } // namespace engine } // namespace arrow From d0f8ff348a2d705a9b6cb19e5449e82612996a29 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Fri, 10 Jun 2022 02:48:59 +0530 Subject: [PATCH 14/17] feat: mappings for divide functions --- .../arrow/engine/substrait/extension_set.cc | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/cpp/src/arrow/engine/substrait/extension_set.cc b/cpp/src/arrow/engine/substrait/extension_set.cc index e189f74b681..c49d360eb75 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.cc +++ b/cpp/src/arrow/engine/substrait/extension_set.cc @@ -559,6 +559,17 @@ SubstraitToArrow substrait_multiply_to_arrow = [] (const substrait::Expression:: } }; +SubstraitToArrow substrait_divide_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + auto func_args = substrait_convert_arguments(call); + if(func_args[0].ToString() == "SILENT"){ + return arrow::compute::call("divide", {func_args[1], func_args[2]}, compute::ArithmeticOptions()); + } else if (func_args[0].ToString() == "SATURATE") { + return Status::Invalid("Arrow does not support a saturating divide"); + } else { + return arrow::compute::call("divide_checked", {func_args[1], func_args[2]}, compute::ArithmeticOptions(true)); + } +}; + SubstraitToArrow substrait_modulus_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { return arrow::compute::call("abs", substrait_convert_arguments(call)); }; @@ -605,6 +616,21 @@ ArrowToSubstrait arrow_unchecked_multiply_to_substrait = [] (const arrow::comput return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "SILENT"); }; + +ArrowToSubstrait arrow_divide_to_substrait = [] (const arrow::compute::Expression::Call& call, arrow::engine::ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("divide")); + substrait_call.set_function_reference(function_reference); + return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "ERROR"); +}; + +ArrowToSubstrait arrow_unchecked_divide_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { + substrait::Expression::ScalarFunction substrait_call; + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("divide")); + substrait_call.set_function_reference(function_reference); + return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "SILENT"); +}; + ArrowToSubstrait arrow_abs_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { substrait::Expression::ScalarFunction substrait_call; ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("modulus")); From a716c3007edbd70de605853a5cc68fa4d25600e8 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Tue, 14 Jun 2022 00:31:28 +0530 Subject: [PATCH 15/17] feat: mappings for sum & avg aggregate functions --- cpp/src/arrow/engine/substrait/extension_set.cc | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/engine/substrait/extension_set.cc b/cpp/src/arrow/engine/substrait/extension_set.cc index c49d360eb75..1b5dc40b09f 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.cc +++ b/cpp/src/arrow/engine/substrait/extension_set.cc @@ -20,6 +20,7 @@ #include #include +#include "arrow/compute/api_aggregate.h" #include "arrow/compute/api_scalar.h" #include "arrow/compute/cast.h" #include "arrow/engine/substrait/expression_internal.h" @@ -638,7 +639,7 @@ ArrowToSubstrait arrow_abs_to_substrait = [] (const arrow::compute::Expression:: return arrow_convert_arguments(call, substrait_call, ext_set_); }; -// Boolean Functions mapping +// Boolean Functions mappings SubstraitToArrow substrait_not_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { return arrow::compute::call("invert", substrait_convert_arguments(call)); }; @@ -669,7 +670,6 @@ ArrowToSubstrait arrow_or_kleene_to_substrait = [] (const arrow::compute::Expres return arrow_convert_arguments(call, substrait_call, ext_set_); }; - ArrowToSubstrait arrow_and_kleene_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { substrait::Expression::ScalarFunction substrait_call; ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("and")); @@ -969,5 +969,14 @@ ArrowToSubstrait arrow_datetime_subtract_intervals_to_substrait = [] (const arro return arrow_convert_arguments(call, substrait_call, ext_set_); }; +// Aggregate functions mapping +SubstraitToArrow substrait_aggregate_sum_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + return arrow::compute::call("sum", {substrait_convert_arguments(call)[1]}, compute::ScalarAggregateOptions()); +}; + +SubstraitToArrow substrait_aggregate_avg_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { + return arrow::compute::call("avg", {substrait_convert_arguments(call)[1]}, compute::ScalarAggregateOptions()); +}; + } // namespace engine } // namespace arrow From 56cbfff2219d9d01ba6faf5632271dc066f31a12 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Tue, 14 Jun 2022 08:56:50 +0530 Subject: [PATCH 16/17] feat: register functions to use function mappings --- .../engine/substrait/expression_internal.cc | 2 +- .../arrow/engine/substrait/extension_set.cc | 541 ++++++++++-------- .../arrow/engine/substrait/extension_set.h | 3 +- 3 files changed, 290 insertions(+), 256 deletions(-) diff --git a/cpp/src/arrow/engine/substrait/expression_internal.cc b/cpp/src/arrow/engine/substrait/expression_internal.cc index c36aba6beb1..45880bce507 100644 --- a/cpp/src/arrow/engine/substrait/expression_internal.cc +++ b/cpp/src/arrow/engine/substrait/expression_internal.cc @@ -31,6 +31,7 @@ #include "arrow/util/make_unique.h" #include "arrow/visit_scalar_inline.h" + namespace arrow { using internal::checked_cast; @@ -159,7 +160,6 @@ Result FromProto(const substrait::Expression& expr, ARROW_ASSIGN_OR_RAISE(auto decoded_function, ext_set.DecodeFunction(scalar_fn.function_reference())); - ARROW_ASSIGN_OR_RAISE(auto arrow_function, ext_set.GetFunctionMap().GetArrowFromSubstrait(decoded_function.name.to_string())); return arrow_function(scalar_fn); } diff --git a/cpp/src/arrow/engine/substrait/extension_set.cc b/cpp/src/arrow/engine/substrait/extension_set.cc index 1b5dc40b09f..e348d357dbe 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.cc +++ b/cpp/src/arrow/engine/substrait/extension_set.cc @@ -209,260 +209,6 @@ const int* GetIndex(const KeyToIndex& key_to_index, const Key& key) { return &it->second; } -namespace { - -struct ExtensionIdRegistryImpl : ExtensionIdRegistry { - virtual ~ExtensionIdRegistryImpl() {} - - std::vector Uris() const override { - return {uris_.begin(), uris_.end()}; - } - - util::optional GetType(const DataType& type) const override { - if (auto index = GetIndex(type_to_index_, &type)) { - return TypeRecord{type_ids_[*index], types_[*index]}; - } - return {}; - } - - util::optional GetType(Id id) const override { - if (auto index = GetIndex(id_to_index_, id)) { - return TypeRecord{type_ids_[*index], types_[*index]}; - } - return {}; - } - - Status CanRegisterType(Id id, const std::shared_ptr& type) const override { - if (id_to_index_.find(id) != id_to_index_.end()) { - return Status::Invalid("Type id was already registered"); - } - if (type_to_index_.find(&*type) != type_to_index_.end()) { - return Status::Invalid("Type was already registered"); - } - return Status::OK(); - } - - Status RegisterType(Id id, std::shared_ptr type) override { - DCHECK_EQ(type_ids_.size(), types_.size()); - - Id copied_id{*uris_.emplace(id.uri.to_string()).first, - *names_.emplace(id.name.to_string()).first}; - - auto index = static_cast(type_ids_.size()); - - auto it_success = id_to_index_.emplace(copied_id, index); - - if (!it_success.second) { - return Status::Invalid("Type id was already registered"); - } - - if (!type_to_index_.emplace(type.get(), index).second) { - id_to_index_.erase(it_success.first); - return Status::Invalid("Type was already registered"); - } - - type_ids_.push_back(copied_id); - types_.push_back(std::move(type)); - return Status::OK(); - } - - util::optional GetFunction( - util::string_view arrow_function_name) const override { - if (auto index = GetIndex(function_name_to_index_, arrow_function_name)) { - return FunctionRecord{function_ids_[*index], *function_name_ptrs_[*index]}; - } - return {}; - } - - util::optional GetFunction(Id id) const override { - if (auto index = GetIndex(function_id_to_index_, id)) { - return FunctionRecord{function_ids_[*index], *function_name_ptrs_[*index]}; - } - return {}; - } - - Status CanRegisterFunction(Id id, - const std::string& arrow_function_name) const override { - if (function_id_to_index_.find(id) != function_id_to_index_.end()) { - return Status::Invalid("Function id was already registered"); - } - if (function_name_to_index_.find(arrow_function_name) != - function_name_to_index_.end()) { - return Status::Invalid("Function name was already registered"); - } - return Status::OK(); - } - - Status RegisterFunction(Id id, std::string arrow_function_name) override { - DCHECK_EQ(function_ids_.size(), function_name_ptrs_.size()); - - Id copied_id{*uris_.emplace(id.uri.to_string()).first, - *names_.emplace(id.name.to_string()).first}; - - const std::string& copied_function_name{ - *function_names_.emplace(std::move(arrow_function_name)).first}; - - auto index = static_cast(function_ids_.size()); - - auto it_success = function_id_to_index_.emplace(copied_id, index); - - if (!it_success.second) { - return Status::Invalid("Function id was already registered"); - } - - if (!function_name_to_index_.emplace(copied_function_name, index).second) { - function_id_to_index_.erase(it_success.first); - return Status::Invalid("Function name was already registered"); - } - - function_name_ptrs_.push_back(&copied_function_name); - function_ids_.push_back(copied_id); - return Status::OK(); - } - - // owning storage of uris, names, (arrow::)function_names, types - // note that storing strings like this is safe since references into an - // unordered_set are not invalidated on insertion - std::unordered_set uris_, names_, function_names_; - DataTypeVector types_; - - // non-owning lookup helpers - std::vector type_ids_, function_ids_; - std::unordered_map id_to_index_; - std::unordered_map type_to_index_; - - std::vector function_name_ptrs_; - std::unordered_map function_id_to_index_; - std::unordered_map - function_name_to_index_; -}; - -struct NestedExtensionIdRegistryImpl : ExtensionIdRegistryImpl { - explicit NestedExtensionIdRegistryImpl(const ExtensionIdRegistry* parent) - : parent_(parent) {} - - virtual ~NestedExtensionIdRegistryImpl() {} - - std::vector Uris() const override { - std::vector uris = parent_->Uris(); - std::unordered_set uri_set; - uri_set.insert(uris.begin(), uris.end()); - uri_set.insert(uris_.begin(), uris_.end()); - return std::vector(uris); - } - - util::optional GetType(const DataType& type) const override { - auto type_opt = ExtensionIdRegistryImpl::GetType(type); - if (type_opt) { - return type_opt; - } - return parent_->GetType(type); - } - - util::optional GetType(Id id) const override { - auto type_opt = ExtensionIdRegistryImpl::GetType(id); - if (type_opt) { - return type_opt; - } - return parent_->GetType(id); - } - - Status CanRegisterType(Id id, const std::shared_ptr& type) const override { - return parent_->CanRegisterType(id, type) & - ExtensionIdRegistryImpl::CanRegisterType(id, type); - } - - Status RegisterType(Id id, std::shared_ptr type) override { - return parent_->CanRegisterType(id, type) & - ExtensionIdRegistryImpl::RegisterType(id, type); - } - - util::optional GetFunction( - util::string_view arrow_function_name) const override { - auto func_opt = ExtensionIdRegistryImpl::GetFunction(arrow_function_name); - if (func_opt) { - return func_opt; - } - return parent_->GetFunction(arrow_function_name); - } - - util::optional GetFunction(Id id) const override { - auto func_opt = ExtensionIdRegistryImpl::GetFunction(id); - if (func_opt) { - return func_opt; - } - return parent_->GetFunction(id); - } - - Status CanRegisterFunction(Id id, - const std::string& arrow_function_name) const override { - return parent_->CanRegisterFunction(id, arrow_function_name) & - ExtensionIdRegistryImpl::CanRegisterFunction(id, arrow_function_name); - } - - Status RegisterFunction(Id id, std::string arrow_function_name) override { - return parent_->CanRegisterFunction(id, arrow_function_name) & - ExtensionIdRegistryImpl::RegisterFunction(id, arrow_function_name); - } - - const ExtensionIdRegistry* parent_; -}; - -struct DefaultExtensionIdRegistry : ExtensionIdRegistryImpl { - DefaultExtensionIdRegistry() { - struct TypeName { - std::shared_ptr type; - util::string_view name; - }; - - // The type (variation) mappings listed below need to be kept in sync - // with the YAML at substrait/format/extension_types.yaml manually; - // see ARROW-15535. - for (TypeName e : { - TypeName{uint8(), "u8"}, - TypeName{uint16(), "u16"}, - TypeName{uint32(), "u32"}, - TypeName{uint64(), "u64"}, - TypeName{float16(), "fp16"}, - }) { - DCHECK_OK(RegisterType({kArrowExtTypesUri, e.name}, std::move(e.type))); - } - - for (TypeName e : { - TypeName{null(), "null"}, - TypeName{month_interval(), "interval_month"}, - TypeName{day_time_interval(), "interval_day_milli"}, - TypeName{month_day_nano_interval(), "interval_month_day_nano"}, - }) { - DCHECK_OK(RegisterType({kArrowExtTypesUri, e.name}, std::move(e.type))); - } - - // TODO: this is just a placeholder right now. We'll need a YAML file for - // all functions (and prototypes) that Arrow provides that are relevant - // for Substrait, and include mappings for all of them here. See - // ARROW-15535. - for (util::string_view name : { - "add", - "equal", - "is_not_distinct_from", - }) { - DCHECK_OK(RegisterFunction({kArrowExtTypesUri, name}, name.to_string())); - } - } -}; - -} // namespace - -ExtensionIdRegistry* default_extension_id_registry() { - static DefaultExtensionIdRegistry impl_; - return &impl_; -} - -std::shared_ptr nested_extension_id_registry( - const ExtensionIdRegistry* parent) { - return std::make_shared(parent); -} - Status FunctionMapping::AddArrowToSubstrait(std::string arrow_function_name, ArrowToSubstrait conversion_func){ if (arrow_to_substrait.find(arrow_function_name) != arrow_to_substrait.end()){ arrow_to_substrait[arrow_function_name] = conversion_func; @@ -978,5 +724,292 @@ SubstraitToArrow substrait_aggregate_avg_to_arrow = [] (const substrait::Express return arrow::compute::call("avg", {substrait_convert_arguments(call)[1]}, compute::ScalarAggregateOptions()); }; +namespace { + +struct ExtensionIdRegistryImpl : ExtensionIdRegistry { + virtual ~ExtensionIdRegistryImpl() {} + + std::vector Uris() const override { + return {uris_.begin(), uris_.end()}; + } + + util::optional GetType(const DataType& type) const override { + if (auto index = GetIndex(type_to_index_, &type)) { + return TypeRecord{type_ids_[*index], types_[*index]}; + } + return {}; + } + + util::optional GetType(Id id) const override { + if (auto index = GetIndex(id_to_index_, id)) { + return TypeRecord{type_ids_[*index], types_[*index]}; + } + return {}; + } + + Status CanRegisterType(Id id, const std::shared_ptr& type) const override { + if (id_to_index_.find(id) != id_to_index_.end()) { + return Status::Invalid("Type id was already registered"); + } + if (type_to_index_.find(&*type) != type_to_index_.end()) { + return Status::Invalid("Type was already registered"); + } + return Status::OK(); + } + + Status RegisterType(Id id, std::shared_ptr type) override { + DCHECK_EQ(type_ids_.size(), types_.size()); + + Id copied_id{*uris_.emplace(id.uri.to_string()).first, + *names_.emplace(id.name.to_string()).first}; + + auto index = static_cast(type_ids_.size()); + + auto it_success = id_to_index_.emplace(copied_id, index); + + if (!it_success.second) { + return Status::Invalid("Type id was already registered"); + } + + if (!type_to_index_.emplace(type.get(), index).second) { + id_to_index_.erase(it_success.first); + return Status::Invalid("Type was already registered"); + } + + type_ids_.push_back(copied_id); + types_.push_back(std::move(type)); + return Status::OK(); + } + + util::optional GetFunction( + util::string_view arrow_function_name) const override { + if (auto index = GetIndex(function_name_to_index_, arrow_function_name)) { + return FunctionRecord{function_ids_[*index], *function_name_ptrs_[*index]}; + } + return {}; + } + + util::optional GetFunction(Id id) const override { + if (auto index = GetIndex(function_id_to_index_, id)) { + return FunctionRecord{function_ids_[*index], *function_name_ptrs_[*index]}; + } + return {}; + } + + Status CanRegisterFunction(Id id, + const std::string& arrow_function_name) const override { + if (function_id_to_index_.find(id) != function_id_to_index_.end()) { + return Status::Invalid("Function id was already registered"); + } + if (function_name_to_index_.find(arrow_function_name) != + function_name_to_index_.end()) { + return Status::Invalid("Function name was already registered"); + } + return Status::OK(); + } + + Status RegisterFunctionMapping(Id id, SubstraitToArrow conversion_func) override { + DCHECK_OK(functions_map.AddSubstraitToArrow(id.name.to_string(), conversion_func)); + return RegisterFunction(id, id.name.to_string()); + } + + Status RegisterFunction(Id id, std::string arrow_function_name) override { + DCHECK_EQ(function_ids_.size(), function_name_ptrs_.size()); + + Id copied_id{*uris_.emplace(id.uri.to_string()).first, + *names_.emplace(id.name.to_string()).first}; + + const std::string& copied_function_name{ + *function_names_.emplace(std::move(arrow_function_name)).first}; + + auto index = static_cast(function_ids_.size()); + + auto it_success = function_id_to_index_.emplace(copied_id, index); + + if (!it_success.second) { + return Status::Invalid("Function id was already registered"); + } + + if (!function_name_to_index_.emplace(copied_function_name, index).second) { + function_id_to_index_.erase(it_success.first); + return Status::Invalid("Function name was already registered"); + } + + function_name_ptrs_.push_back(&copied_function_name); + function_ids_.push_back(copied_id); + return Status::OK(); + } + + // owning storage of uris, names, (arrow::)function_names, types + // note that storing strings like this is safe since references into an + // unordered_set are not invalidated on insertion + std::unordered_set uris_, names_, function_names_; + DataTypeVector types_; + + // non-owning lookup helpers + std::vector type_ids_, function_ids_; + std::unordered_map id_to_index_; + std::unordered_map type_to_index_; + + std::vector function_name_ptrs_; + std::unordered_map function_id_to_index_; + std::unordered_map + function_name_to_index_; +}; + +struct NestedExtensionIdRegistryImpl : ExtensionIdRegistryImpl { + explicit NestedExtensionIdRegistryImpl(const ExtensionIdRegistry* parent) + : parent_(parent) {} + + virtual ~NestedExtensionIdRegistryImpl() {} + + std::vector Uris() const override { + std::vector uris = parent_->Uris(); + std::unordered_set uri_set; + uri_set.insert(uris.begin(), uris.end()); + uri_set.insert(uris_.begin(), uris_.end()); + return std::vector(uris); + } + + util::optional GetType(const DataType& type) const override { + auto type_opt = ExtensionIdRegistryImpl::GetType(type); + if (type_opt) { + return type_opt; + } + return parent_->GetType(type); + } + + util::optional GetType(Id id) const override { + auto type_opt = ExtensionIdRegistryImpl::GetType(id); + if (type_opt) { + return type_opt; + } + return parent_->GetType(id); + } + + Status CanRegisterType(Id id, const std::shared_ptr& type) const override { + return parent_->CanRegisterType(id, type) & + ExtensionIdRegistryImpl::CanRegisterType(id, type); + } + + Status RegisterType(Id id, std::shared_ptr type) override { + return parent_->CanRegisterType(id, type) & + ExtensionIdRegistryImpl::RegisterType(id, type); + } + + util::optional GetFunction( + util::string_view arrow_function_name) const override { + auto func_opt = ExtensionIdRegistryImpl::GetFunction(arrow_function_name); + if (func_opt) { + return func_opt; + } + return parent_->GetFunction(arrow_function_name); + } + + util::optional GetFunction(Id id) const override { + auto func_opt = ExtensionIdRegistryImpl::GetFunction(id); + if (func_opt) { + return func_opt; + } + return parent_->GetFunction(id); + } + + Status CanRegisterFunction(Id id, + const std::string& arrow_function_name) const override { + return parent_->CanRegisterFunction(id, arrow_function_name) & + ExtensionIdRegistryImpl::CanRegisterFunction(id, arrow_function_name); + } + + Status RegisterFunction(Id id, std::string arrow_function_name) override { + return parent_->CanRegisterFunction(id, arrow_function_name) & + ExtensionIdRegistryImpl::RegisterFunction(id, arrow_function_name); + } + + const ExtensionIdRegistry* parent_; +}; + +struct DefaultExtensionIdRegistry : ExtensionIdRegistryImpl { + DefaultExtensionIdRegistry() { + struct TypeName { + std::shared_ptr type; + util::string_view name; + }; + + // The type (variation) mappings listed below need to be kept in sync + // with the YAML at substrait/format/extension_types.yaml manually; + // see ARROW-15535. + for (TypeName e : { + TypeName{uint8(), "u8"}, + TypeName{uint16(), "u16"}, + TypeName{uint32(), "u32"}, + TypeName{uint64(), "u64"}, + TypeName{float16(), "fp16"}, + }) { + DCHECK_OK(RegisterType({kArrowExtTypesUri, e.name}, std::move(e.type))); + } + + for (TypeName e : { + TypeName{null(), "null"}, + TypeName{month_interval(), "interval_month"}, + TypeName{day_time_interval(), "interval_day_milli"}, + TypeName{month_day_nano_interval(), "interval_month_day_nano"}, + }) { + DCHECK_OK(RegisterType({kArrowExtTypesUri, e.name}, std::move(e.type))); + } + + // registering arithmetic function mappings + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "add"}, substrait_add_to_arrow)); + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "subtract"}, substrait_subtract_to_arrow)); + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "multiply"}, substrait_multiply_to_arrow)); + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "divide"}, substrait_divide_to_arrow)); + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "modulus"}, substrait_modulus_to_arrow)); + + // registering boolean function mappings + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "not"}, substrait_not_to_arrow)); + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "and"}, substrait_and_to_arrow)); + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "or"}, substrait_or_to_arrow)); + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "xor"}, substrait_xor_to_arrow)); + + // registering comparison function mappings + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "lt"}, substrait_lt_to_arrow)); + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "gt"}, substrait_gt_to_arrow)); + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "lte"}, substrait_lte_to_arrow)); + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "gte"}, substrait_gte_to_arrow)); + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "equal"}, substrait_equal_to_arrow)); + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "not_equal"}, substrait_not_equal_to_arrow)); + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "is_null"}, substrait_is_null_to_arrow)); + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "is_not_null"}, substrait_is_not_null_to_arrow)); + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "is_not_distinct_from"}, substrait_is_not_distinct_from_to_arrow)); + + // registering string function mappings + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "like"}, substrait_like_to_arrow)); + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "substring"}, substrait_substring_to_arrow)); + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "concat"}, substrait_concat_to_arrow)); + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "substring"}, substrait_substring_to_arrow)); + + // registering cast function mapping + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "cast"}, substrait_cast_to_arrow)); + + // registering datetime function mappings + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "extract"}, substrait_extract_to_arrow)); + + // registering aggregate function mappings + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "sum"}, substrait_aggregate_sum_to_arrow)); + DCHECK_OK(RegisterFunctionMapping({kArrowExtTypesUri, "avg"}, substrait_aggregate_avg_to_arrow)); + } +}; + +} // namespace + +ExtensionIdRegistry* default_extension_id_registry() { + static DefaultExtensionIdRegistry impl_; + return &impl_; +} + +std::shared_ptr nested_extension_id_registry( + const ExtensionIdRegistry* parent) { + return std::make_shared(parent); +} + } // namespace engine } // namespace arrow diff --git a/cpp/src/arrow/engine/substrait/extension_set.h b/cpp/src/arrow/engine/substrait/extension_set.h index 1519ac873db..d438249cfd5 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.h +++ b/cpp/src/arrow/engine/substrait/extension_set.h @@ -44,11 +44,11 @@ class FunctionMapping { std::unordered_map substrait_to_arrow; std::unordered_map arrow_to_substrait; + public: // Registration API Status AddArrowToSubstrait(std::string arrow_function_name, ArrowToSubstrait conversion_func); Status AddSubstraitToArrow(std::string substrait_function_name, SubstraitToArrow conversion_func); - public: Result GetArrowFromSubstrait(std::string name) const; Result GetSubstraitFromArrow(std::string name) const; }; @@ -117,6 +117,7 @@ class ARROW_ENGINE_EXPORT ExtensionIdRegistry { util::string_view arrow_function_name) const = 0; virtual Status CanRegisterFunction(Id, const std::string& arrow_function_name) const = 0; + virtual Status RegisterFunctionMapping(Id id, SubstraitToArrow conversion_func) = 0; virtual Status RegisterFunction(Id, std::string arrow_function_name) = 0; }; From 3f39eae495b6aed36db58ee659a0a403b0bf3647 Mon Sep 17 00:00:00 2001 From: Sanjiban Sengupta Date: Tue, 28 Jun 2022 20:52:24 +0530 Subject: [PATCH 17/17] review changes --- .../arrow/engine/substrait/extension_set.cc | 292 +++++++++--------- 1 file changed, 146 insertions(+), 146 deletions(-) diff --git a/cpp/src/arrow/engine/substrait/extension_set.cc b/cpp/src/arrow/engine/substrait/extension_set.cc index e348d357dbe..5406d80f3f0 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.cc +++ b/cpp/src/arrow/engine/substrait/extension_set.cc @@ -210,7 +210,7 @@ const int* GetIndex(const KeyToIndex& key_to_index, const Key& key) { } Status FunctionMapping::AddArrowToSubstrait(std::string arrow_function_name, ArrowToSubstrait conversion_func){ - if (arrow_to_substrait.find(arrow_function_name) != arrow_to_substrait.end()){ + if (arrow_to_substrait.find(arrow_function_name) == arrow_to_substrait.end()){ arrow_to_substrait[arrow_function_name] = conversion_func; return Status::OK(); } @@ -218,7 +218,7 @@ Status FunctionMapping::AddArrowToSubstrait(std::string arrow_function_name, Arr } Status FunctionMapping::AddSubstraitToArrow(std::string substrait_function_name, SubstraitToArrow conversion_func){ - if (substrait_to_arrow.find(substrait_function_name) != substrait_to_arrow.end()){ + if (substrait_to_arrow.find(substrait_function_name) == substrait_to_arrow.end()){ substrait_to_arrow[substrait_function_name] = conversion_func; return Status::OK(); } @@ -241,40 +241,40 @@ Result FunctionMapping::GetSubstraitFromArrow(std::string name } } -std::vector substrait_convert_arguments(const substrait::Expression::ScalarFunction& call){ +std::vector ConvertSubstraitArguments(const substrait::Expression::ScalarFunction& call){ substrait::Expression value; - ExtensionSet ext_set_; + ExtensionSet ext_set; arrow::compute::Expression expression; std::vector func_args; for(int i=0; i value; for(size_t i = 0; iCopyFrom(*value); + value = ToProto(expression, ext_set).ValueOrDie(); + substrait_call->add_args()->CopyFrom(*value); } - return std::move(substrait_call); + return *substrait_call; } -substrait::Expression::ScalarFunction arrow_convert_enum_arguments(const arrow::compute::Expression::Call& call, substrait::Expression::ScalarFunction& substrait_call, ExtensionSet* ext_set_, std::string overflow_handling){ +substrait::Expression::ScalarFunction ConvertArrowEnumArguments(const arrow::compute::Expression::Call& call, substrait::Expression::ScalarFunction* substrait_call, ExtensionSet* ext_set, std::string enum_value){ substrait::Expression::Enum options; - options.set_specified(overflow_handling); - substrait_call.add_args()->set_allocated_enum_(&options); - return arrow_convert_arguments(call, substrait_call, ext_set_); + options.set_specified(enum_value); + substrait_call->add_args()->set_allocated_enum_(&options); + return ConvertArrowArguments(call, substrait_call, ext_set); } SubstraitToArrow substrait_add_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto func_args = substrait_convert_arguments(call); + auto func_args = ConvertSubstraitArguments(call); if(func_args[0].ToString() == "SILENT"){ return arrow::compute::call("add", {func_args[1], func_args[2]}, compute::ArithmeticOptions()); } else if (func_args[0].ToString() == "SATURATE") { @@ -285,7 +285,7 @@ SubstraitToArrow substrait_add_to_arrow = [] (const substrait::Expression::Scala }; SubstraitToArrow substrait_subtract_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto func_args = substrait_convert_arguments(call); + auto func_args = ConvertSubstraitArguments(call); if(func_args[0].ToString() == "SILENT"){ return arrow::compute::call("subtract", {func_args[1], func_args[2]}, compute::ArithmeticOptions()); } else if (func_args[0].ToString() == "SATURATE") { @@ -296,7 +296,7 @@ SubstraitToArrow substrait_subtract_to_arrow = [] (const substrait::Expression:: }; SubstraitToArrow substrait_multiply_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto func_args = substrait_convert_arguments(call); + auto func_args = ConvertSubstraitArguments(call); if(func_args[0].ToString() == "SILENT"){ return arrow::compute::call("multiply", {func_args[1], func_args[2]}, compute::ArithmeticOptions()); } else if (func_args[0].ToString() == "SATURATE") { @@ -307,7 +307,7 @@ SubstraitToArrow substrait_multiply_to_arrow = [] (const substrait::Expression:: }; SubstraitToArrow substrait_divide_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto func_args = substrait_convert_arguments(call); + auto func_args = ConvertSubstraitArguments(call); if(func_args[0].ToString() == "SILENT"){ return arrow::compute::call("divide", {func_args[1], func_args[2]}, compute::ArithmeticOptions()); } else if (func_args[0].ToString() == "SATURATE") { @@ -318,153 +318,153 @@ SubstraitToArrow substrait_divide_to_arrow = [] (const substrait::Expression::Sc }; SubstraitToArrow substrait_modulus_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - return arrow::compute::call("abs", substrait_convert_arguments(call)); + return arrow::compute::call("abs", ConvertSubstraitArguments(call)); }; -ArrowToSubstrait arrow_add_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_add_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("add")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("add")); substrait_call.set_function_reference(function_reference); - return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "ERROR"); + return ConvertArrowEnumArguments(call, &substrait_call, ext_set, "ERROR"); }; -ArrowToSubstrait arrow_unchecked_add_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_unchecked_add_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("add")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("add")); substrait_call.set_function_reference(function_reference); - return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "SILENT"); + return ConvertArrowEnumArguments(call, &substrait_call, ext_set, "SILENT"); }; -ArrowToSubstrait arrow_subtract_to_substrait = [] (const arrow::compute::Expression::Call& call, arrow::engine::ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_subtract_to_substrait = [] (const arrow::compute::Expression::Call& call, arrow::engine::ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("subtract")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("subtract")); substrait_call.set_function_reference(function_reference); - return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "ERROR"); + return ConvertArrowEnumArguments(call, &substrait_call, ext_set, "ERROR"); }; -ArrowToSubstrait arrow_unchecked_subtract_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_unchecked_subtract_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("subtract")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("subtract")); substrait_call.set_function_reference(function_reference); - return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "SILENT") ; + return ConvertArrowEnumArguments(call, &substrait_call, ext_set, "SILENT") ; }; -ArrowToSubstrait arrow_multiply_to_substrait = [] (const arrow::compute::Expression::Call& call, arrow::engine::ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_multiply_to_substrait = [] (const arrow::compute::Expression::Call& call, arrow::engine::ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("multiply")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("multiply")); substrait_call.set_function_reference(function_reference); - return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "ERROR"); + return ConvertArrowEnumArguments(call, &substrait_call, ext_set, "ERROR"); }; -ArrowToSubstrait arrow_unchecked_multiply_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_unchecked_multiply_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("multiply")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("multiply")); substrait_call.set_function_reference(function_reference); - return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "SILENT"); + return ConvertArrowEnumArguments(call, &substrait_call, ext_set, "SILENT"); }; -ArrowToSubstrait arrow_divide_to_substrait = [] (const arrow::compute::Expression::Call& call, arrow::engine::ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_divide_to_substrait = [] (const arrow::compute::Expression::Call& call, arrow::engine::ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("divide")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("divide")); substrait_call.set_function_reference(function_reference); - return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "ERROR"); + return ConvertArrowEnumArguments(call, &substrait_call, ext_set, "ERROR"); }; -ArrowToSubstrait arrow_unchecked_divide_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_unchecked_divide_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("divide")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("divide")); substrait_call.set_function_reference(function_reference); - return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "SILENT"); + return ConvertArrowEnumArguments(call, &substrait_call, ext_set, "SILENT"); }; -ArrowToSubstrait arrow_abs_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_abs_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("modulus")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("modulus")); substrait_call.set_function_reference(function_reference); - return arrow_convert_arguments(call, substrait_call, ext_set_); + return ConvertArrowArguments(call, &substrait_call, ext_set); }; // Boolean Functions mappings SubstraitToArrow substrait_not_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - return arrow::compute::call("invert", substrait_convert_arguments(call)); + return arrow::compute::call("invert", ConvertSubstraitArguments(call)); }; SubstraitToArrow substrait_or_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - return arrow::compute::call("or_kleene", substrait_convert_arguments(call)); + return arrow::compute::call("or_kleene", ConvertSubstraitArguments(call)); }; SubstraitToArrow substrait_and_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - return arrow::compute::call("and_kleene", substrait_convert_arguments(call)); + return arrow::compute::call("and_kleene", ConvertSubstraitArguments(call)); }; SubstraitToArrow substrait_xor_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - return arrow::compute::call("xor", substrait_convert_arguments(call)); + return arrow::compute::call("xor", ConvertSubstraitArguments(call)); }; -ArrowToSubstrait arrow_invert_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_invert_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("not")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("not")); substrait_call.set_function_reference(function_reference); - return arrow_convert_arguments(call, substrait_call, ext_set_); + return ConvertArrowArguments(call, &substrait_call, ext_set); }; -ArrowToSubstrait arrow_or_kleene_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_or_kleene_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("or")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("or")); substrait_call.set_function_reference(function_reference); - return arrow_convert_arguments(call, substrait_call, ext_set_); + return ConvertArrowArguments(call, &substrait_call, ext_set); }; -ArrowToSubstrait arrow_and_kleene_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_and_kleene_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("and")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("and")); substrait_call.set_function_reference(function_reference); - return arrow_convert_arguments(call, substrait_call, ext_set_); + return ConvertArrowArguments(call, &substrait_call, ext_set); }; -ArrowToSubstrait arrow_xor_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_xor_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("xor")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("xor")); substrait_call.set_function_reference(function_reference); - return arrow_convert_arguments(call, substrait_call, ext_set_); + return ConvertArrowArguments(call, &substrait_call, ext_set); }; // Comparison Functions mapping SubstraitToArrow substrait_lt_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - return arrow::compute::call("less", substrait_convert_arguments(call)); + return arrow::compute::call("less", ConvertSubstraitArguments(call)); }; SubstraitToArrow substrait_gt_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - return arrow::compute::call("greater", substrait_convert_arguments(call)); + return arrow::compute::call("greater", ConvertSubstraitArguments(call)); }; SubstraitToArrow substrait_lte_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - return arrow::compute::call("less_equal", substrait_convert_arguments(call)); + return arrow::compute::call("less_equal", ConvertSubstraitArguments(call)); }; SubstraitToArrow substrait_gte_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - return arrow::compute::call("greater_equal", substrait_convert_arguments(call)); + return arrow::compute::call("greater_equal", ConvertSubstraitArguments(call)); }; SubstraitToArrow substrait_not_equal_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - return arrow::compute::call("not_equal", substrait_convert_arguments(call)); + return arrow::compute::call("not_equal", ConvertSubstraitArguments(call)); }; SubstraitToArrow substrait_equal_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - return arrow::compute::call("equal", substrait_convert_arguments(call)); + return arrow::compute::call("equal", ConvertSubstraitArguments(call)); }; SubstraitToArrow substrait_is_null_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - return arrow::compute::call("is_null", substrait_convert_arguments(call)); + return arrow::compute::call("is_null", ConvertSubstraitArguments(call)); }; SubstraitToArrow substrait_is_not_null_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - return arrow::compute::call("is_valid", substrait_convert_arguments(call)); + return arrow::compute::call("is_valid", ConvertSubstraitArguments(call)); }; SubstraitToArrow substrait_is_not_distinct_from_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - std::vector func_args = substrait_convert_arguments(call); + std::vector func_args = ConvertSubstraitArguments(call); auto null_check_1 = arrow::compute::call("is_null", {func_args[0]}); auto null_check_2 = arrow::compute::call("is_null", {func_args[1]}); if(null_check_1.IsNullLiteral() && null_check_1.IsNullLiteral()){ @@ -473,77 +473,77 @@ SubstraitToArrow substrait_is_not_distinct_from_to_arrow = [] (const substrait:: return arrow::compute::call("not_equal", func_args); }; -ArrowToSubstrait arrow_less_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_less_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("lt")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("lt")); substrait_call.set_function_reference(function_reference); - return arrow_convert_arguments(call, substrait_call, ext_set_); + return ConvertArrowArguments(call, &substrait_call, ext_set); }; -ArrowToSubstrait arrow_greater_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_greater_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("gt")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("gt")); substrait_call.set_function_reference(function_reference); - return arrow_convert_arguments(call, substrait_call, ext_set_); + return ConvertArrowArguments(call, &substrait_call, ext_set); }; -ArrowToSubstrait arrow_less_equal_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_less_equal_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("lte")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("lte")); substrait_call.set_function_reference(function_reference); - return arrow_convert_arguments(call, substrait_call, ext_set_); + return ConvertArrowArguments(call, &substrait_call, ext_set); }; -ArrowToSubstrait arrow_greater_equal_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_greater_equal_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("gte")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("gte")); substrait_call.set_function_reference(function_reference); - return arrow_convert_arguments(call, substrait_call, ext_set_); + return ConvertArrowArguments(call, &substrait_call, ext_set); }; -ArrowToSubstrait arrow_equal_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_equal_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("equal")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("equal")); substrait_call.set_function_reference(function_reference); - return arrow_convert_arguments(call, substrait_call, ext_set_); + return ConvertArrowArguments(call, &substrait_call, ext_set); }; -ArrowToSubstrait arrow_not_equal_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_not_equal_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("not_equal")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("not_equal")); substrait_call.set_function_reference(function_reference); - return arrow_convert_arguments(call, substrait_call, ext_set_); + return ConvertArrowArguments(call, &substrait_call, ext_set); }; -ArrowToSubstrait arrow_is_null_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_is_null_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("is_null")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("is_null")); substrait_call.set_function_reference(function_reference); - return arrow_convert_arguments(call, substrait_call, ext_set_); + return ConvertArrowArguments(call, &substrait_call, ext_set); }; -ArrowToSubstrait arrow_is_valid_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_is_valid_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("is_not_null")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("is_not_null")); substrait_call.set_function_reference(function_reference); - return arrow_convert_arguments(call, substrait_call, ext_set_); + return ConvertArrowArguments(call, &substrait_call, ext_set); }; // Strings function mapping SubstraitToArrow substrait_like_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto func_args = substrait_convert_arguments(call); + auto func_args = ConvertSubstraitArguments(call); return arrow::compute::call("match_like", {func_args[0]}, compute::MatchSubstringOptions(func_args[1].ToString())); }; SubstraitToArrow substrait_substring_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto func_args = substrait_convert_arguments(call); + auto func_args = ConvertSubstraitArguments(call); auto start = func_args[1].literal()->scalar_as(); auto stop = func_args[2].literal()->scalar_as(); return arrow::compute::call("utf8_slice_codeunits", {func_args[0]}, compute::SliceOptions(static_cast(start.value), static_cast(stop.value))); }; SubstraitToArrow substrait_concat_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto func_args = substrait_convert_arguments(call); + auto func_args = ConvertSubstraitArguments(call); arrow::StringBuilder builder; builder.Append(func_args[0].ToString()); builder.Append(func_args[1].ToString()); @@ -552,50 +552,50 @@ SubstraitToArrow substrait_concat_to_arrow = [] (const substrait::Expression::Sc return arrow::compute::call("binary_join", {arrow::compute::Expression(strings_datum), arrow::compute::Expression(separator_datum)}); }; -ArrowToSubstrait arrow_match_like_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_match_like_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("like")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("like")); substrait_call.set_function_reference(function_reference); arrow::compute::Expression expression_1, expression_2; std::unique_ptr string_1, string_2; expression_1 = call.arguments[0]; - string_1 = ToProto(expression_1, ext_set_).ValueOrDie(); + string_1 = ToProto(expression_1, ext_set).ValueOrDie(); substrait_call.add_args()->CopyFrom(*string_1); auto pattern_string = std::dynamic_pointer_cast(call.options)->pattern; expression_2 = arrow::compute::Expression(arrow::Datum(pattern_string)); - string_2 = ToProto(expression_2, ext_set_).ValueOrDie(); + string_2 = ToProto(expression_2, ext_set).ValueOrDie(); substrait_call.add_args()->CopyFrom(*string_2); return std::move(substrait_call); }; -ArrowToSubstrait arrow_utf8_slice_codeunits_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_utf8_slice_codeunits_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("substring")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("substring")); substrait_call.set_function_reference(function_reference); arrow::compute::Expression expression_1, expression_2, expression_3; std::unique_ptr string, start, stop; expression_1 = call.arguments[0]; - string = ToProto(expression_1, ext_set_).ValueOrDie(); + string = ToProto(expression_1, ext_set).ValueOrDie(); substrait_call.add_args()->CopyFrom(*string); auto start_index = std::dynamic_pointer_cast(call.options)->start; auto stop_index = std::dynamic_pointer_cast(call.options)->stop; expression_2 = arrow::compute::Expression(arrow::Datum(start_index)); expression_3 = arrow::compute::Expression(arrow::Datum(stop_index)); - start = ToProto(expression_2, ext_set_).ValueOrDie(); - stop = ToProto(expression_3, ext_set_).ValueOrDie(); + start = ToProto(expression_2, ext_set).ValueOrDie(); + stop = ToProto(expression_3, ext_set).ValueOrDie(); substrait_call.add_args()->CopyFrom(*start); substrait_call.add_args()->CopyFrom(*stop); return std::move(substrait_call); }; -ArrowToSubstrait arrow_binary_join_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_binary_join_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("concat")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("concat")); substrait_call.set_function_reference(function_reference); arrow::compute::Expression expression_1, expression_2; std::unique_ptr string_1, string_2; @@ -604,8 +604,8 @@ ArrowToSubstrait arrow_binary_join_to_substrait = [] (const arrow::compute::Expr expression_1 = arrow::compute::Expression(*(strings_list->GetScalar(0))); expression_2 = arrow::compute::Expression(*(strings_list->GetScalar(1))); - string_1 = ToProto(expression_1, ext_set_).ValueOrDie(); - string_2 = ToProto(expression_2, ext_set_).ValueOrDie(); + string_1 = ToProto(expression_1, ext_set).ValueOrDie(); + string_2 = ToProto(expression_2, ext_set).ValueOrDie(); substrait_call.add_args()->CopyFrom(*string_1); substrait_call.add_args()->CopyFrom(*string_2); return std::move(substrait_call); @@ -613,24 +613,24 @@ ArrowToSubstrait arrow_binary_join_to_substrait = [] (const arrow::compute::Expr // Cast function mapping SubstraitToArrow substrait_cast_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - ExtensionSet ext_set_; + ExtensionSet ext_set; ARROW_ASSIGN_OR_RAISE(auto output_type_desc, - FromProto(call.output_type(), ext_set_)); + FromProto(call.output_type(), ext_set)); auto cast_options = compute::CastOptions::Safe(std::move(output_type_desc.first)); - return compute::call("cast", {substrait_convert_arguments(call)[0]}, std::move(cast_options)); + return compute::call("cast", {ConvertSubstraitArguments(call)[0]}, std::move(cast_options)); }; -ArrowToSubstrait arrow_cast_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_cast_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("cast")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("cast")); substrait_call.set_function_reference(function_reference); auto arrow_to_type = std::dynamic_pointer_cast(call.options)->to_type; - ARROW_ASSIGN_OR_RAISE(auto substrait_to_type, ToProto(*arrow_to_type, false, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto substrait_to_type, ToProto(*arrow_to_type, false, ext_set)); substrait_call.set_allocated_output_type(substrait_to_type.get()); auto expression = call.arguments[0]; - ARROW_ASSIGN_OR_RAISE(auto value, ToProto(expression, ext_set_)); + ARROW_ASSIGN_OR_RAISE(auto value, ToProto(expression, ext_set)); substrait_call.add_args()->CopyFrom(*value); return substrait_call; @@ -638,7 +638,7 @@ ArrowToSubstrait arrow_cast_to_substrait = [] (const arrow::compute::Expression: // Datetime functions mapping SubstraitToArrow substrait_extract_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - auto func_args = substrait_convert_arguments(call); + auto func_args = ConvertSubstraitArguments(call); if(func_args[0].ToString() == "YEAR"){ return arrow::compute::call("year", {func_args[1]}); } else if (func_args[0].ToString() == "MONTH") { @@ -650,78 +650,78 @@ SubstraitToArrow substrait_extract_to_arrow = [] (const substrait::Expression::S } }; -ArrowToSubstrait arrow_year_to_arrow = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_year_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("extract")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("extract")); substrait_call.set_function_reference(function_reference); - return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "YEAR"); + return ConvertArrowEnumArguments(call, &substrait_call, ext_set, "YEAR"); }; -ArrowToSubstrait arrow_month_to_arrow = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_month_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("extract")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("extract")); substrait_call.set_function_reference(function_reference); - return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "MONTH"); + return ConvertArrowEnumArguments(call, &substrait_call, ext_set, "MONTH"); }; -ArrowToSubstrait arrow_day_to_arrow = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_day_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("extract")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("extract")); substrait_call.set_function_reference(function_reference); - return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "DAY"); + return ConvertArrowEnumArguments(call, &substrait_call, ext_set, "DAY"); }; -ArrowToSubstrait arrow_second_to_arrow = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_second_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("extract")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("extract")); substrait_call.set_function_reference(function_reference); - return arrow_convert_enum_arguments(call, substrait_call, ext_set_, "SECOND"); + return ConvertArrowEnumArguments(call, &substrait_call, ext_set, "SECOND"); }; // Substrait Datetime add/subtract mappings should work for datetime intervals functions as well SubstraitToArrow substrait_datetime_add_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - return arrow::compute::call("add", substrait_convert_arguments(call), compute::ArithmeticOptions()); + return arrow::compute::call("add", ConvertSubstraitArguments(call), compute::ArithmeticOptions()); }; SubstraitToArrow substrait_datetime_subtract_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - return arrow::compute::call("subtract", substrait_convert_arguments(call), compute::ArithmeticOptions()); + return arrow::compute::call("subtract", ConvertSubstraitArguments(call), compute::ArithmeticOptions()); }; -ArrowToSubstrait arrow_datetime_add_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_datetime_add_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("add")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("add")); substrait_call.set_function_reference(function_reference); - return arrow_convert_arguments(call, substrait_call, ext_set_); + return ConvertArrowArguments(call, &substrait_call, ext_set); }; -ArrowToSubstrait arrow_datetime_subtract_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_datetime_subtract_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("subtract")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("subtract")); substrait_call.set_function_reference(function_reference); - return arrow_convert_arguments(call, substrait_call, ext_set_); + return ConvertArrowArguments(call, &substrait_call, ext_set); }; -ArrowToSubstrait arrow_datetime_add_intervals_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_datetime_add_intervals_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("add_intervals")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("add_intervals")); substrait_call.set_function_reference(function_reference); - return arrow_convert_arguments(call, substrait_call, ext_set_); + return ConvertArrowArguments(call, &substrait_call, ext_set); }; -ArrowToSubstrait arrow_datetime_subtract_intervals_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set_) -> Result { +ArrowToSubstrait arrow_datetime_subtract_intervals_to_substrait = [] (const arrow::compute::Expression::Call& call, ExtensionSet* ext_set) -> Result { substrait::Expression::ScalarFunction substrait_call; - ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set_->EncodeFunction("subtract_intervals")); + ARROW_ASSIGN_OR_RAISE(auto function_reference, ext_set->EncodeFunction("subtract_intervals")); substrait_call.set_function_reference(function_reference); - return arrow_convert_arguments(call, substrait_call, ext_set_); + return ConvertArrowArguments(call, &substrait_call, ext_set); }; // Aggregate functions mapping SubstraitToArrow substrait_aggregate_sum_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - return arrow::compute::call("sum", {substrait_convert_arguments(call)[1]}, compute::ScalarAggregateOptions()); + return arrow::compute::call("sum", {ConvertSubstraitArguments(call)[1]}, compute::ScalarAggregateOptions()); }; SubstraitToArrow substrait_aggregate_avg_to_arrow = [] (const substrait::Expression::ScalarFunction& call) -> Result { - return arrow::compute::call("avg", {substrait_convert_arguments(call)[1]}, compute::ScalarAggregateOptions()); + return arrow::compute::call("avg", {ConvertSubstraitArguments(call)[1]}, compute::ScalarAggregateOptions()); }; namespace {