diff --git a/velox/connectors/hive/HiveConnector.cpp b/velox/connectors/hive/HiveConnector.cpp index 08ec62e94369..950a84d00112 100644 --- a/velox/connectors/hive/HiveConnector.cpp +++ b/velox/connectors/hive/HiveConnector.cpp @@ -21,7 +21,6 @@ #include "velox/connectors/hive/HiveDataSource.h" #include "velox/connectors/hive/HivePartitionFunction.h" #include "velox/connectors/hive/iceberg/IcebergDataSink.h" -#include "velox/functions/iceberg/Register.h" #include #include @@ -43,7 +42,6 @@ HiveConnector::HiveConnector( : nullptr, std::make_unique(hiveConfig_->config())), ioExecutor_(ioExecutor) { - iceberg::registerIcebergFunctions(); if (hiveConfig_->isFileHandleCacheEnabled()) { LOG(INFO) << "Hive connector " << connectorId() << " created with maximum of " diff --git a/velox/connectors/hive/iceberg/CMakeLists.txt b/velox/connectors/hive/iceberg/CMakeLists.txt index 67d1a95c0f9e..5822cb7b4d2f 100644 --- a/velox/connectors/hive/iceberg/CMakeLists.txt +++ b/velox/connectors/hive/iceberg/CMakeLists.txt @@ -20,15 +20,6 @@ velox_add_library( IcebergSplitReader.cpp PartitionSpec.cpp PositionalDeleteFileReader.cpp - TransformEvaluator.cpp - TransformExprBuilder.cpp -) - -velox_link_libraries( - velox_hive_iceberg_splitreader - velox_connector - velox_functions_iceberg - Folly::folly ) velox_link_libraries(velox_hive_iceberg_splitreader velox_connector Folly::folly) diff --git a/velox/connectors/hive/iceberg/PartitionSpec.cpp b/velox/connectors/hive/iceberg/PartitionSpec.cpp index 4497c378a93e..4c9fae472ed0 100644 --- a/velox/connectors/hive/iceberg/PartitionSpec.cpp +++ b/velox/connectors/hive/iceberg/PartitionSpec.cpp @@ -16,7 +16,6 @@ #include "velox/connectors/hive/iceberg/PartitionSpec.h" -#include "velox/functions/iceberg/Register.h" #include "velox/functions/prestosql/types/TimestampWithTimeZoneType.h" namespace facebook::velox::connector::hive::iceberg { @@ -103,14 +102,6 @@ VELOX_DEFINE_ENUM_NAME(TransformType, transformTypeNames); VELOX_DEFINE_ENUM_NAME(TransformCategory, transformCategoryNames); -void registerIcebergFunctions() { - static std::once_flag registerFlag; - - std::call_once(registerFlag, []() { - functions::iceberg::registerFunctions(kIcebergFunctionPrefix); - }); -} - void IcebergPartitionSpec::checkCompatibility() const { folly::F14FastMap> columnTransforms; diff --git a/velox/connectors/hive/iceberg/PartitionSpec.h b/velox/connectors/hive/iceberg/PartitionSpec.h index 050b2bc22c43..10d489fd61cc 100644 --- a/velox/connectors/hive/iceberg/PartitionSpec.h +++ b/velox/connectors/hive/iceberg/PartitionSpec.h @@ -20,25 +20,6 @@ namespace facebook::velox::connector::hive::iceberg { -inline constexpr char const* kIcebergFunctionPrefix{"iceberg_"}; - -inline const std::string kBucketFunction = - std::string(kIcebergFunctionPrefix) + "bucket"; -inline const std::string kTruncateFunction = - std::string(kIcebergFunctionPrefix) + "truncate"; -inline const std::string kYearFunction = - std::string(kIcebergFunctionPrefix) + "years"; -inline const std::string kMonthFunction = - std::string(kIcebergFunctionPrefix) + "months"; -inline const std::string kDayFunction = - std::string(kIcebergFunctionPrefix) + "days"; -inline const std::string kHourFunction = - std::string(kIcebergFunctionPrefix) + "hours"; - -/// Registers Iceberg partition transform functions with prefix -/// kIcebergFunctionPrefix. -void registerIcebergFunctions(); - /// Partition transform types. /// Defines how source column values are converted into partition keys. /// See https://iceberg.apache.org/spec/#partition-transforms. @@ -132,7 +113,6 @@ struct IcebergPartitionSpec { case TransformType::kTruncate: return type; } - VELOX_UNREACHABLE("Unknown transform type"); } }; diff --git a/velox/connectors/hive/iceberg/TransformEvaluator.cpp b/velox/connectors/hive/iceberg/TransformEvaluator.cpp deleted file mode 100644 index 2744bddafa75..000000000000 --- a/velox/connectors/hive/iceberg/TransformEvaluator.cpp +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "velox/connectors/hive/iceberg/TransformEvaluator.h" - -#include "velox/expression/Expr.h" - -namespace facebook::velox::connector::hive::iceberg { - -TransformEvaluator::TransformEvaluator( - const std::vector& expressions, - const ConnectorQueryCtx* connectorQueryCtx) - : connectorQueryCtx_(connectorQueryCtx) { - VELOX_CHECK_NOT_NULL(connectorQueryCtx_); - exprSet_ = connectorQueryCtx_->expressionEvaluator()->compile(expressions); - VELOX_CHECK_NOT_NULL(exprSet_); -} - -std::vector TransformEvaluator::evaluate( - const RowVectorPtr& input) const { - const auto numRows = input->size(); - const auto numExpressions = exprSet_->exprs().size(); - - std::vector results(numExpressions); - SelectivityVector rows(numRows); - - // Evaluate all expressions in one pass. - connectorQueryCtx_->expressionEvaluator()->evaluate( - exprSet_.get(), rows, *input, results); - - return results; -} - -} // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/TransformEvaluator.h b/velox/connectors/hive/iceberg/TransformEvaluator.h deleted file mode 100644 index ee7b26f7db8c..000000000000 --- a/velox/connectors/hive/iceberg/TransformEvaluator.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include "velox/connectors/Connector.h" -#include "velox/core/QueryCtx.h" -#include "velox/expression/Expr.h" - -namespace facebook::velox::connector::hive::iceberg { - -/// Evaluates multiple expressions efficiently using batch evaluation. -/// Expressions are compiled once in the constructor and reused across multiple -/// input batches. -class TransformEvaluator { - public: - /// Creates an evaluator with the given expressions and connector query - /// context. Compiles the expressions once for reuse across multiple - /// evaluations. - /// - /// @param expressions Vector of typed expressions to evaluate. These are - /// typically built using TransformExprBuilder::toExpressions() for Iceberg - /// partition transforms, but can be any valid Velox expressions. The - /// expressions are compiled once during construction. - /// @param connectorQueryCtx Connector query context providing access to the - /// expression evaluator (for compilation and evaluation) and memory pool. - /// Must remain valid for the lifetime of this TransformEvaluator. - TransformEvaluator( - const std::vector& expressions, - const ConnectorQueryCtx* connectorQueryCtx); - - /// Evaluates all expressions on the input data in a single pass. - /// Uses the pre-compiled ExprSet from the constructor for efficiency. - /// - /// The input RowType must match the RowType used when building the - /// expressions (passed to TransformExprBuilder::toExpressions). The column - /// positions, names and types must align. Create new TransformEvaluator for - /// input that has different RowType with the one when building the - /// expressions. - /// - /// @param input Input row vector containing the source data. Must have the - /// same RowType (column positions, names and types) as used when building the - /// expressions in the constructor. - /// @return Vector of result columns, one for each expression, in the same - /// order as the expressions provided to the constructor. - std::vector evaluate(const RowVectorPtr& input) const; - - private: - const ConnectorQueryCtx* connectorQueryCtx_; - std::unique_ptr exprSet_; -}; - -} // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/TransformExprBuilder.cpp b/velox/connectors/hive/iceberg/TransformExprBuilder.cpp deleted file mode 100644 index faf0acf3369a..000000000000 --- a/velox/connectors/hive/iceberg/TransformExprBuilder.cpp +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "velox/connectors/hive/iceberg/TransformExprBuilder.h" -#include "velox/core/Expressions.h" - -namespace facebook::velox::connector::hive::iceberg { - -namespace { - -/// Converts a single partition field to a typed expression. -/// -/// Builds an expression tree for one partition transform. Identity transforms -/// become FieldAccessTypedExpr, while other transforms (bucket, truncate, -/// year, month, day, hour) become CallTypedExpr with appropriate function -/// names and parameters. -/// -/// @param field Partition field containing transform type, source column -/// type, and optional parameter (e.g., bucket count, truncate width). -/// @param inputFieldName Name of the source column in the input RowVector. -/// @return Typed expression representing the transform. -core::TypedExprPtr toExpression( - const IcebergPartitionSpec::Field& field, - const std::string& inputFieldName) { - // For identity transform, just return a field access expression. - if (field.transformType == TransformType::kIdentity) { - return std::make_shared( - field.type, inputFieldName); - } - - // For other transforms, build a CallTypedExpr with the appropriate function. - std::string functionName; - switch (field.transformType) { - case TransformType::kBucket: - functionName = kBucketFunction; - break; - case TransformType::kTruncate: - functionName = kTruncateFunction; - break; - case TransformType::kYear: - functionName = kYearFunction; - break; - case TransformType::kMonth: - functionName = kMonthFunction; - break; - case TransformType::kDay: - functionName = kDayFunction; - break; - case TransformType::kHour: - functionName = kHourFunction; - break; - case TransformType::kIdentity: - break; - } - - // Build the expression arguments. - std::vector exprArgs; - if (field.parameter.has_value()) { - exprArgs.emplace_back( - std::make_shared( - INTEGER(), Variant(field.parameter.value()))); - } - exprArgs.emplace_back( - std::make_shared(field.type, inputFieldName)); - - return std::make_shared( - field.resultType(), std::move(exprArgs), functionName); -} - -} // namespace - -std::vector TransformExprBuilder::toExpressions( - const IcebergPartitionSpecPtr& partitionSpec, - const std::vector& partitionChannels, - const RowTypePtr& inputType) { - VELOX_CHECK_EQ( - partitionSpec->fields.size(), - partitionChannels.size(), - "Number of partition fields must match number of partition channels"); - - const auto numTransforms = partitionChannels.size(); - std::vector transformExprs; - transformExprs.reserve(numTransforms); - - for (auto i = 0; i < numTransforms; i++) { - const auto channel = partitionChannels[i]; - transformExprs.emplace_back( - toExpression(partitionSpec->fields.at(i), inputType->nameOf(channel))); - } - - return transformExprs; -} - -} // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/TransformExprBuilder.h b/velox/connectors/hive/iceberg/TransformExprBuilder.h deleted file mode 100644 index 29631348d134..000000000000 --- a/velox/connectors/hive/iceberg/TransformExprBuilder.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "velox/connectors/hive/iceberg/PartitionSpec.h" -#include "velox/expression/Expr.h" - -namespace facebook::velox::connector::hive::iceberg { - -/// Converts Iceberg partition specification to Velox expressions. -class TransformExprBuilder { - public: - /// Converts partition specification to a list of typed expressions. - /// - /// @param partitionSpec Iceberg partition specification containing transform - /// definitions for each partition field. - /// @param partitionChannels Column indices (0-based) in the input RowVector - /// that correspond to each partition field. Must have the same size as - /// partitionSpec->fields. Provides the positional mapping from partition spec - /// fields to input RowVector columns. - /// @param inputType The row type of the input data. This is necessary for - /// building expressions because the column names in partitionSpec reference - /// table schema names, which might not match the column names in inputType - /// (e.g., inputType may use generated names like c0, c1, c2). The - /// FieldAccessTypedExpr must be built using the actual column names from - /// inputType that will be present at runtime. The partitionChannels provide - /// the positional mapping to locate the correct columns. - /// @return Vector of typed expressions, one for each partition field. - static std::vector toExpressions( - const IcebergPartitionSpecPtr& partitionSpec, - const std::vector& partitionChannels, - const RowTypePtr& inputType); -}; - -} // namespace facebook::velox::connector::hive::iceberg diff --git a/velox/connectors/hive/iceberg/tests/CMakeLists.txt b/velox/connectors/hive/iceberg/tests/CMakeLists.txt index 9121fe4c6b85..bed84d138c1b 100644 --- a/velox/connectors/hive/iceberg/tests/CMakeLists.txt +++ b/velox/connectors/hive/iceberg/tests/CMakeLists.txt @@ -64,7 +64,6 @@ if(NOT VELOX_DISABLE_GOOGLETEST) IcebergTestBase.cpp Main.cpp PartitionSpecTest.cpp - TransformTest.cpp ) add_test(velox_hive_iceberg_insert_test velox_hive_iceberg_insert_test) diff --git a/velox/connectors/hive/iceberg/tests/IcebergTestBase.cpp b/velox/connectors/hive/iceberg/tests/IcebergTestBase.cpp index 232f10c6b81e..c7c7a581c46f 100644 --- a/velox/connectors/hive/iceberg/tests/IcebergTestBase.cpp +++ b/velox/connectors/hive/iceberg/tests/IcebergTestBase.cpp @@ -20,7 +20,6 @@ #include "velox/connectors/hive/iceberg/IcebergSplit.h" #include "velox/connectors/hive/iceberg/PartitionSpec.h" -#include "velox/expression/Expr.h" namespace facebook::velox::connector::hive::iceberg::test { @@ -52,7 +51,6 @@ void IcebergTestBase::TearDown() { connectorPool_.reset(); opPool_.reset(); root_.reset(); - queryCtx_.reset(); HiveConnectorTestBase::TearDown(); } @@ -61,7 +59,6 @@ void IcebergTestBase::setupMemoryPools() { opPool_.reset(); connectorPool_.reset(); connectorQueryCtx_.reset(); - queryCtx_.reset(); root_ = memory::memoryManager()->addRootPool( "IcebergTest", 1L << 30, exec::MemoryReclaimer::create()); @@ -69,17 +66,13 @@ void IcebergTestBase::setupMemoryPools() { connectorPool_ = root_->addAggregateChild("connector", exec::MemoryReclaimer::create()); - queryCtx_ = core::QueryCtx::create(nullptr, core::QueryConfig({})); - auto expressionEvaluator = std::make_unique( - queryCtx_.get(), opPool_.get()); - - connectorQueryCtx_ = std::make_unique( + connectorQueryCtx_ = std::make_unique( opPool_.get(), connectorPool_.get(), connectorSessionProperties_.get(), nullptr, common::PrefixSortConfig(), - std::move(expressionEvaluator), + nullptr, nullptr, "query.IcebergTest", "task.IcebergTest", @@ -110,8 +103,8 @@ std::vector IcebergTestBase::createTestData( } std::shared_ptr IcebergTestBase::createPartitionSpec( - const RowTypePtr& rowType, - const std::vector& partitionFields) { + const std::vector& partitionFields, + const RowTypePtr& rowType) { std::vector fields; for (const auto& partitionField : partitionFields) { fields.push_back( @@ -161,7 +154,7 @@ IcebergInsertTableHandlePtr IcebergTestBase::createInsertTableHandle( outputDirectoryPath, LocationHandle::TableType::kNew); - auto partitionSpec = createPartitionSpec(rowType, partitionFields); + auto partitionSpec = createPartitionSpec(partitionFields, rowType); return std::make_shared( /*inputColumns=*/columnHandles, diff --git a/velox/connectors/hive/iceberg/tests/IcebergTestBase.h b/velox/connectors/hive/iceberg/tests/IcebergTestBase.h index 5a1bf146839a..ffe0a59c09af 100644 --- a/velox/connectors/hive/iceberg/tests/IcebergTestBase.h +++ b/velox/connectors/hive/iceberg/tests/IcebergTestBase.h @@ -65,12 +65,10 @@ class IcebergTestBase : public exec::test::HiveConnectorTestBase { std::vector listFiles(const std::string& dirPath); std::shared_ptr createPartitionSpec( - const RowTypePtr& rowType, - const std::vector& partitionFields); + const std::vector& partitionFields, + const RowTypePtr& rowType); dwio::common::FileFormat fileFormat_{dwio::common::FileFormat::DWRF}; - std::shared_ptr opPool_; - std::unique_ptr connectorQueryCtx_; private: IcebergInsertTableHandlePtr createInsertTableHandle( @@ -84,12 +82,13 @@ class IcebergTestBase : public exec::test::HiveConnectorTestBase { void setupMemoryPools(); std::shared_ptr root_; + std::shared_ptr opPool_; std::shared_ptr connectorPool_; std::shared_ptr connectorSessionProperties_; std::shared_ptr connectorConfig_; + std::unique_ptr connectorQueryCtx_; VectorFuzzer::Options fuzzerOptions_; std::unique_ptr fuzzer_; - std::shared_ptr queryCtx_; }; } // namespace facebook::velox::connector::hive::iceberg::test diff --git a/velox/connectors/hive/iceberg/tests/TransformTest.cpp b/velox/connectors/hive/iceberg/tests/TransformTest.cpp deleted file mode 100644 index c90ac600b8b9..000000000000 --- a/velox/connectors/hive/iceberg/tests/TransformTest.cpp +++ /dev/null @@ -1,328 +0,0 @@ -/* - * Copyright (c) Facebook, Inc. and its affiliates. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "velox/common/encode/Base64.h" -#include "velox/connectors/hive/iceberg/PartitionSpec.h" -#include "velox/connectors/hive/iceberg/TransformEvaluator.h" -#include "velox/connectors/hive/iceberg/TransformExprBuilder.h" -#include "velox/connectors/hive/iceberg/tests/IcebergTestBase.h" - -namespace facebook::velox::connector::hive::iceberg { - -namespace { - -class TransformTest : public test::IcebergTestBase { - protected: - void testTransform( - const IcebergPartitionSpecPtr& spec, - const RowVectorPtr& input, - const RowVectorPtr& expected) const { - std::vector partitionChannels; - for (const auto& field : spec->fields) { - partitionChannels.push_back(input->rowType()->getChildIdx(field.name)); - } - // Build and evaluate transform expressions. - auto transformExprs = TransformExprBuilder::toExpressions( - spec, partitionChannels, input->rowType()); - auto transformEvaluator = std::make_unique( - transformExprs, connectorQueryCtx_.get()); - auto result = transformEvaluator->evaluate(input); - - ASSERT_EQ(result.size(), expected->childrenSize()); - for (auto i = 0; i < result.size(); ++i) { - velox::test::assertEqualVectors(expected->childAt(i), result[i]); - } - } -}; - -TEST_F(TransformTest, identity) { - const auto& rowType = - ROW({"c0", "c1", "c2", "c3", "c4"}, - {INTEGER(), BIGINT(), VARCHAR(), VARBINARY(), TIMESTAMP()}); - const auto& partitionSpec = createPartitionSpec( - rowType, - { - {0, TransformType::kIdentity, std::nullopt}, - {1, TransformType::kIdentity, std::nullopt}, - {2, TransformType::kIdentity, std::nullopt}, - {3, TransformType::kIdentity, std::nullopt}, - {4, TransformType::kIdentity, std::nullopt}, - }); - - const std::vector input = { - makeFlatVector({1, -1}), - makeFlatVector({1L, -1L}), - makeFlatVector({("test data"), ("")}), - makeFlatVector({("\x01\x02\x03"), ("")}, VARBINARY()), - makeFlatVector({Timestamp(0, 0), Timestamp(1609459200, 0)}), - }; - - testTransform(partitionSpec, makeRowVector(input), makeRowVector(input)); -} - -TEST_F(TransformTest, nulls) { - const auto& rowType = - ROW({"c0", "c1", "c2", "c3", "c4", "c5", "c6"}, - {INTEGER(), - VARCHAR(), - VARBINARY(), - DATE(), - TIMESTAMP(), - TIMESTAMP(), - TIMESTAMP()}); - const auto& partitionSpec = createPartitionSpec( - rowType, - { - {0, TransformType::kIdentity, std::nullopt}, - {1, TransformType::kBucket, 8}, - {2, TransformType::kTruncate, 16}, - {3, TransformType::kYear, std::nullopt}, - {4, TransformType::kMonth, std::nullopt}, - {5, TransformType::kDay, std::nullopt}, - {6, TransformType::kHour, std::nullopt}, - }); - testTransform( - partitionSpec, - makeRowVector({ - makeNullableFlatVector({std::nullopt}), - makeNullableFlatVector({std::nullopt}), - makeNullableFlatVector({std::nullopt}, VARBINARY()), - makeNullableFlatVector({std::nullopt}, DATE()), - makeNullableFlatVector({std::nullopt}), - makeNullableFlatVector({std::nullopt}), - makeNullableFlatVector({std::nullopt}), - }), - makeRowVector({ - makeNullableFlatVector({std::nullopt}), - makeNullableFlatVector({std::nullopt}), - makeNullableFlatVector({std::nullopt}, VARBINARY()), - makeNullableFlatVector({std::nullopt}), - makeNullableFlatVector({std::nullopt}), - makeNullableFlatVector({std::nullopt}, DATE()), - makeNullableFlatVector({std::nullopt}), - })); -} - -TEST_F(TransformTest, bucket) { - const auto& rowType = - ROW({"c0", "c1", "c2", "c3", "c4", "c5"}, - {INTEGER(), BIGINT(), VARCHAR(), VARBINARY(), DATE(), TIMESTAMP()}); - const auto& partitionSpec = createPartitionSpec( - rowType, - { - {0, TransformType::kBucket, 4}, - {1, TransformType::kBucket, 8}, - {2, TransformType::kBucket, 16}, - {3, TransformType::kBucket, 32}, - {4, TransformType::kBucket, 10}, - {5, TransformType::kBucket, 8}, - }); - - testTransform( - partitionSpec, - makeRowVector({ - makeFlatVector({8, 34, 0}), - makeFlatVector({34L, 0L, -34L}), - makeFlatVector({"abcdefg", "测试", ""}), - makeFlatVector( - {"\x61\x62\x64\x00\x00", "\x01\x02\x03\x04", "\x00"}, - VARBINARY()), - makeFlatVector({0, 365, 18'262}), - makeFlatVector( - {Timestamp(0, 0), - Timestamp(-31536000, 0), - Timestamp(1612224000, 0)}), - }), - makeRowVector({ - makeFlatVector({3, 3, 0}), - makeFlatVector({3, 4, 5}), - makeFlatVector({6, 8, 0}), - makeFlatVector({26, 5, 0}), - makeFlatVector({6, 1, 3}), - makeFlatVector({4, 3, 5}), - })); -} - -TEST_F(TransformTest, year) { - const auto& rowType = ROW({"c0", "c1"}, {DATE(), TIMESTAMP()}); - const auto& partitionSpec = createPartitionSpec( - rowType, - { - {0, TransformType::kYear, std::nullopt}, - {1, TransformType::kYear, std::nullopt}, - }); - - testTransform( - partitionSpec, - makeRowVector({ - makeFlatVector({0, 18'262, -365}), - makeFlatVector( - {Timestamp(0, 0), - Timestamp(31536000, 0), - Timestamp(-31536000, 0)}), - }), - makeRowVector({ - makeFlatVector({0, 50, -1}), - makeFlatVector({0, 1, -1}), - })); -} - -TEST_F(TransformTest, month) { - const auto& rowType = ROW({"c0", "c1"}, {DATE(), TIMESTAMP()}); - const auto& partitionSpec = createPartitionSpec( - rowType, - { - {0, TransformType::kMonth, std::nullopt}, - {1, TransformType::kMonth, std::nullopt}, - }); - - testTransform( - partitionSpec, - makeRowVector({ - makeFlatVector({0, 18'262, -365}), - makeFlatVector( - {Timestamp(0, 0), - Timestamp(31536000, 0), - Timestamp(-2678400, 0)}), - }), - makeRowVector({ - makeFlatVector({0, 600, -12}), - makeFlatVector({0, 12, -1}), - })); -} - -TEST_F(TransformTest, day) { - const auto& rowType = ROW({"c0", "c1"}, {DATE(), TIMESTAMP()}); - const auto& partitionSpec = createPartitionSpec( - rowType, - { - {0, TransformType::kDay, std::nullopt}, - {1, TransformType::kDay, std::nullopt}, - }); - - testTransform( - partitionSpec, - makeRowVector({ - makeFlatVector({0, 17532, -1}, DATE()), - makeFlatVector( - {Timestamp(0, 0), - Timestamp(1514764800, 0), - Timestamp(-86400, 0)}), - }), - makeRowVector({ - makeFlatVector({0, 17532, -1}, DATE()), - makeFlatVector({0, 17532, -1}, DATE()), - })); -} - -TEST_F(TransformTest, hour) { - const auto& partitionSpec = createPartitionSpec( - ROW({"c0"}, {TIMESTAMP()}), {{0, TransformType::kHour, std::nullopt}}); - - testTransform( - partitionSpec, - makeRowVector({makeFlatVector({ - Timestamp(0, 0), - Timestamp(3600, 0), - Timestamp(-3600, 0), - })}), - makeRowVector({makeFlatVector({0, 1, -1})})); -} - -TEST_F(TransformTest, truncate) { - const auto& rowType = ROW( - {"c0", "c1", "c2", "c3"}, {INTEGER(), BIGINT(), VARCHAR(), VARBINARY()}); - const auto& partitionSpec = createPartitionSpec( - rowType, - { - {0, TransformType::kTruncate, 10}, - {1, TransformType::kTruncate, 100}, - {2, TransformType::kTruncate, 5}, - {3, TransformType::kTruncate, 3}, - }); - - testTransform( - partitionSpec, - makeRowVector({ - makeFlatVector({11, -11, 5}), - makeFlatVector({123L, -123L, 50L}), - makeFlatVector({"abcdefg", "测试data", "x"}), - makeFlatVector( - {"abcdefg", "\x01\x02\x03\x04", "\x05"}, VARBINARY()), - }), - makeRowVector({ - makeFlatVector({10, -20, 0}), - makeFlatVector({100L, -200L, 0L}), - makeFlatVector({"abcde", "测试dat", "x"}), - makeFlatVector( - {"abc", "\x01\x02\x03", "\x05"}, VARBINARY()), - })); -} - -TEST_F(TransformTest, multipleTransforms) { - const auto& rowType = ROW({"c0", "c1", "c2"}, {INTEGER(), DATE(), VARCHAR()}); - const auto& partitionSpec = createPartitionSpec( - rowType, - { - {0, TransformType::kBucket, 4}, - {1, TransformType::kYear, std::nullopt}, - {2, TransformType::kTruncate, 3}, - }); - - testTransform( - partitionSpec, - makeRowVector({ - makeFlatVector({8, 34}), - makeFlatVector({0, 17532}), - makeFlatVector({"abcdefg", "ab c"}), - }), - makeRowVector({ - makeFlatVector({3, 3}), - makeFlatVector({0, 48}), - makeFlatVector({"abc", "ab "}), - })); -} - -TEST_F(TransformTest, multipleTransformsOnSameColumn) { - const auto& rowType = ROW({"c0", "c1"}, {DATE(), VARCHAR()}); - const auto& partitionSpec = createPartitionSpec( - rowType, - { - {0, TransformType::kYear, std::nullopt}, - {0, TransformType::kBucket, 10}, - {1, TransformType::kTruncate, 5}, - {1, TransformType::kBucket, 8}, - }); - - testTransform( - partitionSpec, - makeRowVector( - rowType->names(), - { - makeFlatVector({0, 17532}), - makeFlatVector({"abcdefg", "test"}), - }), - makeRowVector({ - makeFlatVector({0, 48}), - makeFlatVector({6, 7}), - makeFlatVector({"abcde", "test"}), - makeFlatVector({6, 3}), - })); -} - -} // namespace - -} // namespace facebook::velox::connector::hive::iceberg