Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion velox/substrait/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,11 @@ set(SRCS
SubstraitToVeloxExpr.cpp
SubstraitToVeloxPlan.cpp
TypeUtils.cpp
SubstraitExtensionCollector.cpp
VeloxToSubstraitExpr.cpp
VeloxToSubstraitPlan.cpp
VeloxToSubstraitType.cpp)
VeloxToSubstraitType.cpp
VeloxSubstraitSignature.cpp)

add_library(velox_substrait_plan_converter ${SRCS})
target_include_directories(velox_substrait_plan_converter
Expand Down
89 changes: 89 additions & 0 deletions velox/substrait/SubstraitExtensionCollector.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "velox/substrait/SubstraitExtensionCollector.h"

namespace facebook::velox::substrait {

int SubstraitExtensionCollector::getReferenceNumber(
const std::string& functionName,
const std::vector<TypePtr>& arguments) {
const auto& substraitFunctionSignature =
VeloxSubstraitSignature::toSubstraitSignature(functionName, arguments);
// TODO: Currently we treat all velox registry based function signatures as
// custom substrait extension, so no uri link and leave it as empty.
return getReferenceNumber({"", substraitFunctionSignature});
}

int SubstraitExtensionCollector::getReferenceNumber(
const std::string& functionName,
const std::vector<TypePtr>& arguments,
const core::AggregationNode::Step aggregationStep) {
// TODO: Ignore aggregationStep for now, will refactor when introduce velox
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is it OK to ignore the step? There is already a registry of aggregate functions. What's missing?

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because we make an agreement that separate the velox registry to a new PR.

// registry for function signature binding
return getReferenceNumber(functionName, arguments);
}

template <typename T>
void SubstraitExtensionCollector::BiDirectionHashMap<T>::putIfAbsent(
const int& key,
const T& value) {
if (forwardMap_.find(key) == forwardMap_.end()) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we check both maps first, then insert a (key, value) only if forwardMap_ doesn't have key and reverseMap_ doesn't have value?

forwardMap_[key] = value;
}
if (reverseMap_.find(value) == reverseMap_.end()) {
reverseMap_[value] = key;
}
}

void SubstraitExtensionCollector::addExtensionsToPlan(
::substrait::Plan* plan) const {
using SimpleExtensionURI = ::substrait::extensions::SimpleExtensionURI;
// Currently we don't introduce any substrait extension YAML files, so always
// only have one URI.
SimpleExtensionURI* extensionUri = plan->add_extension_uris();
extensionUri->set_extension_uri_anchor(1);

for (const auto& [referenceNum, functionId] :
extensionFunctions_->forwardMap()) {
auto extensionFunction =
plan->add_extensions()->mutable_extension_function();
extensionFunction->set_extension_uri_reference(
extensionUri->extension_uri_anchor());
extensionFunction->set_function_anchor(referenceNum);
extensionFunction->set_name(functionId.signature);
}
}

SubstraitExtensionCollector::SubstraitExtensionCollector() {
extensionFunctions_ =
std::make_shared<BiDirectionHashMap<ExtensionFunctionId>>();
}

int SubstraitExtensionCollector::getReferenceNumber(
const ExtensionFunctionId& extensionFunctionId) {
const auto& extensionFunctionAnchorIt =
extensionFunctions_->reverseMap().find(extensionFunctionId);
if (extensionFunctionAnchorIt != extensionFunctions_->reverseMap().end()) {
return extensionFunctionAnchorIt->second;
}
++functionReferenceNumber;
extensionFunctions_->putIfAbsent(
functionReferenceNumber, extensionFunctionId);
return functionReferenceNumber;
}

} // namespace facebook::velox::substrait
118 changes: 118 additions & 0 deletions velox/substrait/SubstraitExtensionCollector.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <optional>
#include "velox/core/Expressions.h"
#include "velox/core/PlanNode.h"
#include "velox/substrait/VeloxSubstraitSignature.h"
#include "velox/substrait/proto/substrait/algebra.pb.h"
#include "velox/substrait/proto/substrait/plan.pb.h"
#include "velox/type/Type.h"

namespace facebook::velox::substrait {

struct ExtensionFunctionId {
/// Substrait extension YAML file uri.
std::string uri;

/// Substrait signature used in the function extension declaration is a
/// combination of the name of the function along with a list of input
/// argument types.The format is as follows : <function
/// name>:<short_arg_type0>_<short_arg_type1>_..._<short_arg_typeN> for more
/// detail information about the argument type please refer to link
/// https://substrait.io/extensions/#function-signature-compound-names.
std::string signature;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please, document the format of this field. It seems that for functions with primitive type arguments, "signature" describes the actual signature exactly. However, for the complex type argument, "signature" is not-exact e.g. it specifies the argument type as "map" or "array", but not as "map(integer, bool)" or "array(bigint)". Is this intentional? Why is this sufficient?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated


bool operator==(const ExtensionFunctionId& other) const {
return (uri == other.uri && signature == other.signature);
}
};

/// Assigns unique IDs to function signatures using ExtensionFunctionId.
class SubstraitExtensionCollector {
public:
SubstraitExtensionCollector();
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This constructor is not needed. Member variables can be initialized inline.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I try to remove it, but it report the bi-direction map not initialized properly


/// Given a scalar function name and argument types, return the functionId
/// using ExtensionFunctionId.
int getReferenceNumber(
const std::string& functionName,
const std::vector<TypePtr>& arguments);

/// Given an aggregate function name and argument types and aggregation Step,
/// return the functionId using ExtensionFunctionId.
int getReferenceNumber(
const std::string& functionName,
const std::vector<TypePtr>& arguments,
core::AggregationNode::Step aggregationStep);

/// Add extension functions to Substrait plan.
void addExtensionsToPlan(::substrait::Plan* plan) const;

private:
/// A bi-direction hash map to keep the relation between reference number and
/// either function or type signature.
/// @tparam ExtensionFunctionId
template <class T>
class BiDirectionHashMap {
public:
/// For forwardMap_, if the specified key is not already associated with a
/// value, associates it with the given value and returns, else do nothing.
/// For reverseMap_, if the specified value is not already associated with a
/// key, associate it with the given key and returns, else do nothing.
void putIfAbsent(const int& key, const T& value);

const std::unordered_map<int, ExtensionFunctionId> forwardMap() const {
return forwardMap_;
}

const std::unordered_map<T, int>& reverseMap() const {
return reverseMap_;
}

private:
std::unordered_map<int, T> forwardMap_;
std::unordered_map<T, int> reverseMap_;
};

/// Assigns unique IDs to function signatures using ExtensionFunctionId.
int getReferenceNumber(const ExtensionFunctionId& extensionFunctionId);

int functionReferenceNumber = -1;
std::shared_ptr<BiDirectionHashMap<ExtensionFunctionId>> extensionFunctions_;
};

using SubstraitExtensionCollectorPtr =
std::shared_ptr<SubstraitExtensionCollector>;

} // namespace facebook::velox::substrait

namespace std {

/// Hash function of facebook::velox::substrait::ExtensionFunctionId.
template <>
struct hash<facebook::velox::substrait::ExtensionFunctionId> {
size_t operator()(
const facebook::velox::substrait::ExtensionFunctionId& k) const {
size_t val = hash<std::string>()(k.uri);
val = val * 31 + hash<std::string>()(k.signature);
return val;
}
};

}; // namespace std
80 changes: 80 additions & 0 deletions velox/substrait/VeloxSubstraitSignature.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "velox/substrait/VeloxSubstraitSignature.h"
#include "velox/functions/FunctionRegistry.h"

namespace facebook::velox::substrait {

std::string VeloxSubstraitSignature::toSubstraitSignature(
const TypeKind typeKind) {
switch (typeKind) {
case TypeKind::BOOLEAN:
return "bool";
case TypeKind::TINYINT:
return "i8";
case TypeKind::SMALLINT:
return "i16";
case TypeKind::INTEGER:
return "i32";
case TypeKind::BIGINT:
return "i64";
case TypeKind::REAL:
return "fp32";
case TypeKind::DOUBLE:
return "fp64";
case TypeKind::VARCHAR:
return "str";
case TypeKind::VARBINARY:
return "vbin";
case TypeKind::TIMESTAMP:
return "ts";
case TypeKind::DATE:
return "date";
case TypeKind::SHORT_DECIMAL:
return "dec";
case TypeKind::LONG_DECIMAL:
return "dec";
case TypeKind::ARRAY:
return "list";
case TypeKind::MAP:
return "map";
case TypeKind::ROW:
return "struct";
case TypeKind::UNKNOWN:
return "u!name";
default:
VELOX_UNSUPPORTED(
"Substrait type signature conversion not supported for type {}.",
mapTypeKindToName(typeKind));
}
}

std::string VeloxSubstraitSignature::toSubstraitSignature(
const std::string& functionName,
const std::vector<TypePtr>& arguments) {
if (arguments.empty()) {
return functionName;
}
std::vector<std::string> substraitTypeSignatures;
substraitTypeSignatures.reserve(arguments.size());
for (const auto& type : arguments) {
substraitTypeSignatures.emplace_back(toSubstraitSignature(type->kind()));
}
return functionName + ":" + folly::join("_", substraitTypeSignatures);
}

} // namespace facebook::velox::substrait
41 changes: 41 additions & 0 deletions velox/substrait/VeloxSubstraitSignature.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include "velox/type/Type.h"

namespace facebook::velox::substrait {

class VeloxSubstraitSignature {
public:
/// Given a velox type kind, return the Substrait type signature, throw if no
/// match found, Substrait signature used in the function extension
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Most of this comment seems to belong to the other method. Please, update.

/// declaration is a combination of the name of the function along with a list
/// of input argument types.The format is as follows : <function
/// name>:<short_arg_type0>_<short_arg_type1>_..._<short_arg_typeN> for more
/// detail information about the argument type please refer to link
/// https://substrait.io/extensions/#function-signature-compound-names.
static std::string toSubstraitSignature(const TypeKind typeKind);

/// Given a velox scalar function name and argument types, return the
/// substrait function signature.
static std::string toSubstraitSignature(
const std::string& functionName,
const std::vector<TypePtr>& arguments);
};

} // namespace facebook::velox::substrait
10 changes: 8 additions & 2 deletions velox/substrait/VeloxToSubstraitExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,14 @@ const ::substrait::Expression& VeloxToSubstraitExprConvertor::toSubstraitExpr(
::substrait::Expression_ScalarFunction* scalarExpr =
substraitExpr->mutable_scalar_function();

// TODO need to change yaml file to register function, now is dummy.
scalarExpr->set_function_reference(functionMap_[functionName]);
std::vector<TypePtr> types;
types.reserve(callTypeExpr->inputs().size());
for (auto& typedExpr : callTypeExpr->inputs()) {
types.emplace_back(typedExpr->type());
}

scalarExpr->set_function_reference(
extensionCollector_->getReferenceNumber(functionName, types));

for (auto& arg : inputs) {
scalarExpr->add_arguments()->mutable_value()->MergeFrom(
Expand Down
Loading