Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions velox/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,8 @@ if(${VELOX_BUILD_TEST_UTILS})
velox_connector
velox_exec
velox_exec_test_lib
velox_dwio_common
velox_dwio_registered_readers
velox_dwio_common_exception
velox_dwio_parquet_reader
velox_dwio_common_test_utils
velox_exception
velox_memory
Expand Down
6 changes: 2 additions & 4 deletions velox/benchmarks/QueryBenchmarkBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@
#include "velox/common/base/SuccinctPrinter.h"
#include "velox/common/file/FileSystems.h"
#include "velox/connectors/hive/HiveConnector.h"
#include "velox/dwio/dwrf/RegisterDwrfReader.h"
#include "velox/dwio/parquet/RegisterParquetReader.h"
#include "velox/dwio/RegisterReaders.h"
#include "velox/exec/Split.h"
#include "velox/exec/tests/utils/HiveConnectorTestBase.h"
#include "velox/functions/prestosql/aggregates/RegisterAggregateFunctions.h"
Expand Down Expand Up @@ -219,8 +218,7 @@ void QueryBenchmarkBase::initialize() {
auto hiveConnector =
factory.newConnector(kHiveConnectorId, properties, ioExecutor_.get());
connector::registerConnector(hiveConnector);
parquet::registerParquetReaderFactory();
dwrf::registerDwrfReaderFactory();
dwio::registerReaderFactories();
}

std::vector<std::shared_ptr<connector::ConnectorSplit>>
Expand Down
3 changes: 1 addition & 2 deletions velox/benchmarks/tpch/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,8 @@ target_link_libraries(
velox_aggregates
velox_exec
velox_exec_test_lib
velox_dwio_common
velox_dwio_registered_readers
velox_dwio_common_exception
velox_dwio_parquet_reader
velox_dwio_common_test_utils
velox_hive_connector
velox_exception
Expand Down
2 changes: 2 additions & 0 deletions velox/common/memory/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ target_link_libraries(
PRIVATE
velox_caching
velox_common_base
velox_dwio_registered_readers
velox_dwio_registered_writers
velox_exception
velox_exec
velox_exec_test_lib
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
#include "velox/connectors/hive/iceberg/IcebergMetadataColumns.h"
#include "velox/connectors/hive/iceberg/IcebergSplit.h"
#include "velox/connectors/hive/iceberg/IcebergSplitReader.h"
#include "velox/dwio/RegisterReaders.h"
#include "velox/dwio/common/tests/utils/DataSetBuilder.h"
#include "velox/dwio/dwrf/RegisterDwrfReader.h"
#include "velox/dwio/dwrf/writer/Writer.h"
#include "velox/exec/tests/utils/TempDirectoryPath.h"
#include "velox/vector/tests/utils/VectorTestBase.h"
Expand All @@ -46,7 +46,7 @@ class IcebergSplitReaderBenchmark {
dataSetBuilder_ =
std::make_unique<facebook::velox::test::DataSetBuilder>(*leafPool_, 0);
filesystems::registerLocalFileSystem();
dwrf::registerDwrfReaderFactory();
dwio::registerReaderFactories();
}

~IcebergSplitReaderBenchmark() {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ target_link_libraries(
velox_gcs_file_test
velox_core
velox_dwio_common_exception
velox_dwio_registered_readers
velox_dwio_registered_writers
velox_exec
velox_exec_test_lib
velox_file
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
#include "velox/connectors/hive/HiveConnectorSplit.h"
#include "velox/connectors/hive/storage_adapters/gcs/RegisterGcsFileSystem.h"
#include "velox/connectors/hive/storage_adapters/gcs/tests/GcsEmulator.h"
#include "velox/dwio/parquet/RegisterParquetReader.h"
#include "velox/dwio/parquet/RegisterParquetWriter.h"
#include "velox/dwio/RegisterReaders.h"
#include "velox/dwio/RegisterWriters.h"
#include "velox/exec/TableWriter.h"
#include "velox/exec/tests/utils/AssertQueryBuilder.h"
#include "velox/exec/tests/utils/PlanBuilder.h"
Expand Down Expand Up @@ -52,8 +52,8 @@ class GcsMultipleEndpointsTest : public testing::Test,
gcsEmulatorTwo_ = std::make_unique<GcsEmulator>();
gcsEmulatorTwo_->bootstrap();

parquet::registerParquetReaderFactory();
parquet::registerParquetWriterFactory();
dwio::registerReaderFactories();
dwio::registerWriterFactories();
}

void registerConnectors(
Expand All @@ -75,8 +75,8 @@ class GcsMultipleEndpointsTest : public testing::Test,
}

void TearDown() override {
parquet::unregisterParquetReaderFactory();
parquet::unregisterParquetWriterFactory();
dwio::unregisterReaderFactories();
dwio::unregisterWriterFactories();
}

folly::dynamic writeData(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ target_link_libraries(
velox_s3fs
velox_core
velox_exec_test_lib
velox_dwio_parquet_reader
velox_dwio_registered_readers
velox_dwio_common_exception
velox_exec
GTest::gtest
Expand All @@ -70,8 +70,8 @@ target_link_libraries(
velox_s3fs
velox_core
velox_exec_test_lib
velox_dwio_parquet_writer
velox_dwio_parquet_reader
velox_dwio_registered_readers
velox_dwio_registered_writers
velox_dwio_common_exception
velox_exec
GTest::gtest
Expand All @@ -90,7 +90,7 @@ target_link_libraries(
velox_s3fs
velox_core
velox_exec_test_lib
velox_dwio_parquet_reader
velox_dwio_registered_readers
velox_dwio_common_exception
velox_exec
GTest::gtest
Expand All @@ -115,8 +115,8 @@ target_link_libraries(
velox_s3fs
velox_core
velox_exec_test_lib
velox_dwio_parquet_reader
velox_dwio_parquet_writer
velox_dwio_registered_readers
velox_dwio_registered_writers
velox_dwio_common_exception
velox_exec
GTest::gtest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
#include "velox/connectors/hive/storage_adapters/s3fs/RegisterS3FileSystem.h"
#include "velox/connectors/hive/storage_adapters/s3fs/S3Util.h"
#include "velox/connectors/hive/storage_adapters/s3fs/tests/S3Test.h"
#include "velox/dwio/parquet/RegisterParquetReader.h"
#include "velox/dwio/parquet/RegisterParquetWriter.h"
#include "velox/dwio/RegisterReaders.h"
#include "velox/dwio/RegisterWriters.h"
#include "velox/exec/TableWriter.h"
#include "velox/exec/tests/utils/AssertQueryBuilder.h"
#include "velox/exec/tests/utils/PlanBuilder.h"
Expand Down Expand Up @@ -53,8 +53,8 @@ class S3MultipleEndpoints : public S3Test, public ::test::VectorTestBase {
minioSecondServer_->addBucket(kBucketName.data());

filesystems::registerS3FileSystem();
parquet::registerParquetReaderFactory();
parquet::registerParquetWriterFactory();
dwio::registerReaderFactories();
dwio::registerWriterFactories();
}

void registerConnectors(
Expand All @@ -76,8 +76,8 @@ class S3MultipleEndpoints : public S3Test, public ::test::VectorTestBase {
}

void TearDown() override {
parquet::unregisterParquetReaderFactory();
parquet::unregisterParquetWriterFactory();
dwio::unregisterReaderFactories();
dwio::unregisterWriterFactories();
S3Test::TearDown();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
#include "velox/connectors/hive/HiveConnector.h"
#include "velox/connectors/hive/storage_adapters/s3fs/RegisterS3FileSystem.h"
#include "velox/connectors/hive/storage_adapters/s3fs/tests/S3Test.h"
#include "velox/dwio/RegisterReaders.h"
#include "velox/dwio/common/tests/utils/DataFiles.h"
#include "velox/dwio/parquet/RegisterParquetReader.h"
#include "velox/exec/tests/utils/AssertQueryBuilder.h"
#include "velox/exec/tests/utils/PlanBuilder.h"

Expand All @@ -44,11 +44,11 @@ class S3ReadTest : public S3Test, public ::test::VectorTestBase {
auto hiveConnector =
factory.newConnector(kHiveConnectorId, minioServer_->hiveConfig());
connector::registerConnector(hiveConnector);
parquet::registerParquetReaderFactory();
dwio::registerReaderFactories();
}

void TearDown() override {
parquet::unregisterParquetReaderFactory();
dwio::unregisterReaderFactories();
filesystems::finalizeS3FileSystem();
connector::unregisterConnector(kHiveConnectorId);
S3Test::TearDown();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

#include "velox/common/memory/Memory.h"
#include "velox/connectors/hive/HiveConnector.h"
#include "velox/dwio/parquet/RegisterParquetReader.h"
#include "velox/dwio/parquet/RegisterParquetWriter.h"
#include "velox/dwio/RegisterReaders.h"
#include "velox/dwio/RegisterWriters.h"
#include "velox/exec/TableWriter.h"
#include "velox/exec/tests/utils/AssertQueryBuilder.h"
#include "velox/exec/tests/utils/HiveConnectorTestBase.h"
Expand All @@ -39,14 +39,13 @@ class InsertTest : public velox::test::VectorTestBase {
exec::test::kHiveConnectorId, hiveConfig, ioExecutor);
connector::registerConnector(hiveConnector);

parquet::registerParquetReaderFactory();
parquet::registerParquetWriterFactory();
dwio::registerReaderFactories();
dwio::registerWriterFactories();
}

void TearDown() {
parquet::unregisterParquetReaderFactory();
parquet::unregisterParquetWriterFactory();

dwio::unregisterReaderFactories();
dwio::unregisterWriterFactories();
connector::unregisterConnector(exec::test::kHiveConnectorId);
}

Expand Down
8 changes: 3 additions & 5 deletions velox/connectors/hive/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ target_link_libraries(
velox_hive_connector
velox_hive_partition_function
velox_dwio_common_exception
velox_dwio_registered_readers
velox_dwio_registered_writers
velox_vector_fuzzer
velox_vector_test_lib
velox_exec
Expand All @@ -42,9 +44,5 @@ target_link_libraries(

if(VELOX_ENABLE_PARQUET)
target_include_directories(velox_hive_connector_test PUBLIC ${ARROW_PREFIX}/install/include)
target_link_libraries(
velox_hive_connector_test
velox_dwio_parquet_writer
velox_dwio_parquet_reader
)
target_link_libraries(velox_hive_connector_test velox_dwio_registered_readers)
endif()
11 changes: 4 additions & 7 deletions velox/connectors/hive/tests/HiveDataSinkTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,14 @@
#include "velox/common/base/tests/GTestUtils.h"
#include "velox/common/testutil/TestValue.h"
#include "velox/connectors/hive/HiveConnector.h"
#include "velox/dwio/RegisterReaders.h"
#include "velox/dwio/RegisterWriters.h"
#include "velox/dwio/common/BufferedInput.h"
#include "velox/dwio/common/Options.h"
#include "velox/dwio/dwrf/reader/DwrfReader.h"
#include "velox/dwio/dwrf/writer/FlushPolicy.h"
#include "velox/dwio/dwrf/writer/Writer.h"

#ifdef VELOX_ENABLE_PARQUET
#include "velox/dwio/parquet/RegisterParquetReader.h"
#include "velox/dwio/parquet/RegisterParquetWriter.h"
#include "velox/dwio/parquet/reader/ParquetReader.h"
#include "velox/dwio/parquet/writer/Writer.h"
#endif
Expand All @@ -52,10 +51,8 @@ class HiveDataSinkTest : public exec::test::HiveConnectorTestBase {
protected:
void SetUp() override {
HiveConnectorTestBase::SetUp();
#ifdef VELOX_ENABLE_PARQUET
parquet::registerParquetReaderFactory();
parquet::registerParquetWriterFactory();
#endif
dwio::registerReaderFactories();
dwio::registerWriterFactories();
Type::registerSerDe();
HiveSortingColumn::registerSerDe();
HiveBucketProperty::registerSerDe();
Expand Down
26 changes: 25 additions & 1 deletion velox/dwio/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,29 @@ add_subdirectory(common)
add_subdirectory(catalog)
add_subdirectory(dwrf)
add_subdirectory(orc)
add_subdirectory(parquet)
add_subdirectory(text)

if(VELOX_ENABLE_PARQUET)
add_subdirectory(parquet)
endif()

add_library(velox_dwio_registered_readers STATIC RegisterReaders.cpp)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you force it to be static?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@assignUser Thanks for reviewing! I recall that it forces CMake to build libvelox_dwio_registered_readers.a and help Linux system to build correctly. It is not an umbrella target (purely transitive deps, no actual objects) where "INTERFACE" is usually for. It actually does have code in RegisterReaders.cpp, so it would be either STATIC, OBJECT or SHARED. Since we want outside users to link with it so it cannot be "OBJECT" here. It is manual registration, and it is never supposed to be loaded dynamically on its own, so it won't be "SHARED" either. I believe we are not doing dynamic loading anyways, so by default it is still "STATIC", I just explicitly specified it out to make it clear.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, not sure how I missed this before but this should be part of the main library as it's not a utility for tests or fuzzers, so please use our wrapper functions:

  • velox_add_library without any library type keyword
  • velox_link_libraries instead of target_link_libraries.

Sorry, this isn't well documented yet (#14941).

target_link_libraries(
velox_dwio_registered_readers
PUBLIC
velox_dwio_dwrf_reader
velox_dwio_orc_reader
velox_dwio_text_reader_register
$<$<BOOL:${VELOX_ENABLE_PARQUET}>:velox_dwio_parquet_reader>
velox_dwio_common
)

velox_add_library(velox_dwio_registered_writers STATIC RegisterWriters.cpp)
velox_link_libraries(
velox_dwio_registered_writers
PUBLIC
velox_dwio_dwrf_writer
velox_dwio_text_writer_register
$<$<BOOL:${VELOX_ENABLE_PARQUET}>:velox_dwio_parquet_writer>
velox_dwio_common
)
46 changes: 46 additions & 0 deletions velox/dwio/RegisterReaders.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "velox/dwio/RegisterReaders.h"

#include "velox/dwio/dwrf/RegisterDwrfReader.h"
#include "velox/dwio/orc/reader/OrcReader.h"
#include "velox/dwio/text/RegisterTextReader.h"
#ifdef VELOX_ENABLE_PARQUET
#include "velox/dwio/parquet/RegisterParquetReader.h"
#endif

namespace facebook::velox::dwio {

void registerReaderFactories() {
dwrf::registerDwrfReaderFactory();
orc::registerOrcReaderFactory();
text::registerTextReaderFactory();
#ifdef VELOX_ENABLE_PARQUET
parquet::registerParquetReaderFactory();
#endif
}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you mind sharing the specific issue you're experiencing with the separate registrations? I assume Spark SQL typically does not register the DWRF readers and writers by default, and prior to this change, it was possible to register only the necessary components as needed.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rui-mo Thanks for reviewing! There are a few motivations:

  1. We wanted the tests to be able to cover both Dwrf and Parquet in a more elegant way, e.g. TableScanTest, TableWriterTest, HiveDataSinkTest, etc. These tests only work on a specific format, mostly DWRF. These tests, being in exec or core, which are more core engine stuff, should stay file format agnostic. We shall also be able to use these tests to test on Parquet or other file formats. I believe this is a well known design principle and that's why the ReaderFactory and WriterFactory were introduced in the past. But there were many tests not following this paradigm, making reusing these tests for other file formats hard. With the changes in this PR, we can make the tests parameterized with the dwio::common::FileFormat enums like this
class OperatorTestBase : public ::testing::TestWithParam<dwio::common::FileFormat>
                         public velox::test::VectorTestBase  

That way we don't have to introduce new tests specifically for Parquet or new file format, but just turning the tests to TEST_P tests that can automatically iterate through the file format collection.

  1. It will also help to add more file readers in a modular way. We will need to add AVRO sometime in the future. If the new AVRO reader can folllow the DWIO interface, then it can just be plugged in and the engine automatically works because it's file format agnostic.

  2. This will help us to make the Velox Parquet reader shareable with other communities like the Iceberg community.

I added a paragraph in the design doc and the PR message to explain why this is a good thing: #14514

Copy link
Copy Markdown
Contributor Author

@yingsu00 yingsu00 Sep 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rui-mo Also, the single file format registry is still supported. So there is no backward compatibility issue for Gluten.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for explaining the benefits. Regarding the first point, I find TableScanTest is in the velox/exec, and I agree with you to make it format agnostic.


void unregisterReaderFactories() {
dwrf::unregisterDwrfReaderFactory();
orc::unregisterOrcReaderFactory();
text::unregisterTextReaderFactory();
#ifdef VELOX_ENABLE_PARQUET
parquet::unregisterParquetReaderFactory();
#endif
}

} // namespace facebook::velox::dwio
25 changes: 25 additions & 0 deletions velox/dwio/RegisterReaders.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

namespace facebook::velox::dwio {

void registerReaderFactories();

void unregisterReaderFactories();

} // namespace facebook::velox::dwio
Loading
Loading