-
Notifications
You must be signed in to change notification settings - Fork 1.5k
[Velox To Substrait] Remove hard-coded function references #2496
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
a21ba57
2f11218
3b6a5d0
9db857b
5eb24fb
569e001
adde313
07a46ed
10e8dc6
88552e5
ad2ae36
dd51cbe
d6dac50
c6583f7
c0570f6
2a542f9
1a8ce2b
4d456e6
137f3cf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,89 @@ | ||
| /* | ||
| * Copyright (c) Facebook, Inc. and its affiliates. | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| #include "velox/substrait/SubstraitExtensionCollector.h" | ||
|
|
||
| namespace facebook::velox::substrait { | ||
|
|
||
| int SubstraitExtensionCollector::getReferenceNumber( | ||
| const std::string& functionName, | ||
| const std::vector<TypePtr>& arguments) { | ||
| const auto& substraitFunctionSignature = | ||
| VeloxSubstraitSignature::toSubstraitSignature(functionName, arguments); | ||
| // TODO: Currently we treat all velox registry based function signatures as | ||
| // custom substrait extension, so no uri link and leave it as empty. | ||
| return getReferenceNumber({"", substraitFunctionSignature}); | ||
| } | ||
|
|
||
| int SubstraitExtensionCollector::getReferenceNumber( | ||
| const std::string& functionName, | ||
| const std::vector<TypePtr>& arguments, | ||
| const core::AggregationNode::Step aggregationStep) { | ||
| // TODO: Ignore aggregationStep for now, will refactor when introduce velox | ||
| // registry for function signature binding | ||
| return getReferenceNumber(functionName, arguments); | ||
| } | ||
|
|
||
| template <typename T> | ||
| void SubstraitExtensionCollector::BiDirectionHashMap<T>::putIfAbsent( | ||
| const int& key, | ||
| const T& value) { | ||
| if (forwardMap_.find(key) == forwardMap_.end()) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't we check both maps first, then insert a (key, value) only if forwardMap_ doesn't have key and reverseMap_ doesn't have value? |
||
| forwardMap_[key] = value; | ||
| } | ||
| if (reverseMap_.find(value) == reverseMap_.end()) { | ||
| reverseMap_[value] = key; | ||
| } | ||
| } | ||
|
|
||
| void SubstraitExtensionCollector::addExtensionsToPlan( | ||
| ::substrait::Plan* plan) const { | ||
| using SimpleExtensionURI = ::substrait::extensions::SimpleExtensionURI; | ||
| // Currently we don't introduce any substrait extension YAML files, so always | ||
| // only have one URI. | ||
| SimpleExtensionURI* extensionUri = plan->add_extension_uris(); | ||
| extensionUri->set_extension_uri_anchor(1); | ||
|
|
||
| for (const auto& [referenceNum, functionId] : | ||
| extensionFunctions_->forwardMap()) { | ||
| auto extensionFunction = | ||
| plan->add_extensions()->mutable_extension_function(); | ||
| extensionFunction->set_extension_uri_reference( | ||
| extensionUri->extension_uri_anchor()); | ||
| extensionFunction->set_function_anchor(referenceNum); | ||
| extensionFunction->set_name(functionId.signature); | ||
| } | ||
| } | ||
|
|
||
| SubstraitExtensionCollector::SubstraitExtensionCollector() { | ||
| extensionFunctions_ = | ||
| std::make_shared<BiDirectionHashMap<ExtensionFunctionId>>(); | ||
| } | ||
|
|
||
| int SubstraitExtensionCollector::getReferenceNumber( | ||
| const ExtensionFunctionId& extensionFunctionId) { | ||
| const auto& extensionFunctionAnchorIt = | ||
| extensionFunctions_->reverseMap().find(extensionFunctionId); | ||
| if (extensionFunctionAnchorIt != extensionFunctions_->reverseMap().end()) { | ||
| return extensionFunctionAnchorIt->second; | ||
| } | ||
| ++functionReferenceNumber; | ||
| extensionFunctions_->putIfAbsent( | ||
| functionReferenceNumber, extensionFunctionId); | ||
| return functionReferenceNumber; | ||
| } | ||
|
|
||
| } // namespace facebook::velox::substrait | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,118 @@ | ||
| /* | ||
| * Copyright (c) Facebook, Inc. and its affiliates. | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <optional> | ||
| #include "velox/core/Expressions.h" | ||
| #include "velox/core/PlanNode.h" | ||
| #include "velox/substrait/VeloxSubstraitSignature.h" | ||
| #include "velox/substrait/proto/substrait/algebra.pb.h" | ||
| #include "velox/substrait/proto/substrait/plan.pb.h" | ||
| #include "velox/type/Type.h" | ||
|
|
||
| namespace facebook::velox::substrait { | ||
|
|
||
| struct ExtensionFunctionId { | ||
| /// Substrait extension YAML file uri. | ||
| std::string uri; | ||
|
|
||
| /// Substrait signature used in the function extension declaration is a | ||
| /// combination of the name of the function along with a list of input | ||
| /// argument types.The format is as follows : <function | ||
| /// name>:<short_arg_type0>_<short_arg_type1>_..._<short_arg_typeN> for more | ||
| /// detail information about the argument type please refer to link | ||
| /// https://substrait.io/extensions/#function-signature-compound-names. | ||
| std::string signature; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please, document the format of this field. It seems that for functions with primitive type arguments, "signature" describes the actual signature exactly. However, for the complex type argument, "signature" is not-exact e.g. it specifies the argument type as "map" or "array", but not as "map(integer, bool)" or "array(bigint)". Is this intentional? Why is this sufficient?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated |
||
|
|
||
| bool operator==(const ExtensionFunctionId& other) const { | ||
| return (uri == other.uri && signature == other.signature); | ||
| } | ||
| }; | ||
|
|
||
| /// Assigns unique IDs to function signatures using ExtensionFunctionId. | ||
| class SubstraitExtensionCollector { | ||
| public: | ||
| SubstraitExtensionCollector(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This constructor is not needed. Member variables can be initialized inline.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I try to remove it, but it report the bi-direction map not initialized properly |
||
|
|
||
| /// Given a scalar function name and argument types, return the functionId | ||
| /// using ExtensionFunctionId. | ||
| int getReferenceNumber( | ||
| const std::string& functionName, | ||
| const std::vector<TypePtr>& arguments); | ||
|
|
||
| /// Given an aggregate function name and argument types and aggregation Step, | ||
| /// return the functionId using ExtensionFunctionId. | ||
| int getReferenceNumber( | ||
| const std::string& functionName, | ||
| const std::vector<TypePtr>& arguments, | ||
| core::AggregationNode::Step aggregationStep); | ||
|
|
||
| /// Add extension functions to Substrait plan. | ||
| void addExtensionsToPlan(::substrait::Plan* plan) const; | ||
|
|
||
| private: | ||
| /// A bi-direction hash map to keep the relation between reference number and | ||
| /// either function or type signature. | ||
| /// @tparam ExtensionFunctionId | ||
| template <class T> | ||
| class BiDirectionHashMap { | ||
| public: | ||
| /// For forwardMap_, if the specified key is not already associated with a | ||
| /// value, associates it with the given value and returns, else do nothing. | ||
| /// For reverseMap_, if the specified value is not already associated with a | ||
| /// key, associate it with the given key and returns, else do nothing. | ||
| void putIfAbsent(const int& key, const T& value); | ||
|
|
||
| const std::unordered_map<int, ExtensionFunctionId> forwardMap() const { | ||
| return forwardMap_; | ||
| } | ||
|
|
||
| const std::unordered_map<T, int>& reverseMap() const { | ||
| return reverseMap_; | ||
| } | ||
|
|
||
| private: | ||
| std::unordered_map<int, T> forwardMap_; | ||
| std::unordered_map<T, int> reverseMap_; | ||
| }; | ||
|
|
||
| /// Assigns unique IDs to function signatures using ExtensionFunctionId. | ||
| int getReferenceNumber(const ExtensionFunctionId& extensionFunctionId); | ||
|
|
||
| int functionReferenceNumber = -1; | ||
| std::shared_ptr<BiDirectionHashMap<ExtensionFunctionId>> extensionFunctions_; | ||
| }; | ||
|
|
||
| using SubstraitExtensionCollectorPtr = | ||
| std::shared_ptr<SubstraitExtensionCollector>; | ||
|
|
||
| } // namespace facebook::velox::substrait | ||
|
|
||
| namespace std { | ||
|
|
||
| /// Hash function of facebook::velox::substrait::ExtensionFunctionId. | ||
| template <> | ||
| struct hash<facebook::velox::substrait::ExtensionFunctionId> { | ||
| size_t operator()( | ||
| const facebook::velox::substrait::ExtensionFunctionId& k) const { | ||
| size_t val = hash<std::string>()(k.uri); | ||
| val = val * 31 + hash<std::string>()(k.signature); | ||
| return val; | ||
| } | ||
| }; | ||
|
|
||
| }; // namespace std | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,80 @@ | ||
| /* | ||
| * Copyright (c) Facebook, Inc. and its affiliates. | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| #include "velox/substrait/VeloxSubstraitSignature.h" | ||
| #include "velox/functions/FunctionRegistry.h" | ||
|
|
||
| namespace facebook::velox::substrait { | ||
|
|
||
| std::string VeloxSubstraitSignature::toSubstraitSignature( | ||
| const TypeKind typeKind) { | ||
| switch (typeKind) { | ||
| case TypeKind::BOOLEAN: | ||
| return "bool"; | ||
| case TypeKind::TINYINT: | ||
| return "i8"; | ||
| case TypeKind::SMALLINT: | ||
| return "i16"; | ||
| case TypeKind::INTEGER: | ||
| return "i32"; | ||
| case TypeKind::BIGINT: | ||
| return "i64"; | ||
| case TypeKind::REAL: | ||
| return "fp32"; | ||
| case TypeKind::DOUBLE: | ||
| return "fp64"; | ||
| case TypeKind::VARCHAR: | ||
| return "str"; | ||
| case TypeKind::VARBINARY: | ||
| return "vbin"; | ||
| case TypeKind::TIMESTAMP: | ||
| return "ts"; | ||
| case TypeKind::DATE: | ||
| return "date"; | ||
| case TypeKind::SHORT_DECIMAL: | ||
| return "dec"; | ||
| case TypeKind::LONG_DECIMAL: | ||
| return "dec"; | ||
| case TypeKind::ARRAY: | ||
| return "list"; | ||
| case TypeKind::MAP: | ||
| return "map"; | ||
| case TypeKind::ROW: | ||
| return "struct"; | ||
| case TypeKind::UNKNOWN: | ||
| return "u!name"; | ||
| default: | ||
| VELOX_UNSUPPORTED( | ||
| "Substrait type signature conversion not supported for type {}.", | ||
| mapTypeKindToName(typeKind)); | ||
| } | ||
| } | ||
|
|
||
| std::string VeloxSubstraitSignature::toSubstraitSignature( | ||
| const std::string& functionName, | ||
| const std::vector<TypePtr>& arguments) { | ||
| if (arguments.empty()) { | ||
| return functionName; | ||
| } | ||
| std::vector<std::string> substraitTypeSignatures; | ||
| substraitTypeSignatures.reserve(arguments.size()); | ||
| for (const auto& type : arguments) { | ||
| substraitTypeSignatures.emplace_back(toSubstraitSignature(type->kind())); | ||
| } | ||
| return functionName + ":" + folly::join("_", substraitTypeSignatures); | ||
| } | ||
|
|
||
| } // namespace facebook::velox::substrait |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| /* | ||
| * Copyright (c) Facebook, Inc. and its affiliates. | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include "velox/type/Type.h" | ||
|
|
||
| namespace facebook::velox::substrait { | ||
|
|
||
| class VeloxSubstraitSignature { | ||
| public: | ||
| /// Given a velox type kind, return the Substrait type signature, throw if no | ||
| /// match found, Substrait signature used in the function extension | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Most of this comment seems to belong to the other method. Please, update. |
||
| /// declaration is a combination of the name of the function along with a list | ||
| /// of input argument types.The format is as follows : <function | ||
| /// name>:<short_arg_type0>_<short_arg_type1>_..._<short_arg_typeN> for more | ||
| /// detail information about the argument type please refer to link | ||
| /// https://substrait.io/extensions/#function-signature-compound-names. | ||
| static std::string toSubstraitSignature(const TypeKind typeKind); | ||
|
|
||
| /// Given a velox scalar function name and argument types, return the | ||
| /// substrait function signature. | ||
| static std::string toSubstraitSignature( | ||
| const std::string& functionName, | ||
| const std::vector<TypePtr>& arguments); | ||
| }; | ||
|
|
||
| } // namespace facebook::velox::substrait | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is it OK to ignore the step? There is already a registry of aggregate functions. What's missing?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because we make an agreement that separate the velox registry to a new PR.